diff --git a/riscv/decode.h b/riscv/decode.h index be310c3b..c3feec3e 100644 --- a/riscv/decode.h +++ b/riscv/decode.h @@ -282,21 +282,29 @@ class wait_for_interrupt_t {}; #define invalid_pc(pc) ((pc) & 1) /* Convenience wrappers to simplify softfloat code sequences */ +#define isBoxedF16(r) (isBoxedF32(r) && ((uint64_t)((r.v[0] >> 16) + 1) == ((uint64_t)1 << 48))) +#define unboxF16(r) (isBoxedF16(r) ? (uint32_t)r.v[0] : defaultNaNF16UI) #define isBoxedF32(r) (isBoxedF64(r) && ((uint32_t)((r.v[0] >> 32) + 1) == 0)) #define unboxF32(r) (isBoxedF32(r) ? (uint32_t)r.v[0] : defaultNaNF32UI) #define isBoxedF64(r) ((r.v[1] + 1) == 0) #define unboxF64(r) (isBoxedF64(r) ? r.v[0] : defaultNaNF64UI) typedef float128_t freg_t; +inline float16_t f16(uint16_t v) { return { v }; } inline float32_t f32(uint32_t v) { return { v }; } inline float64_t f64(uint64_t v) { return { v }; } +inline float16_t f16(freg_t r) { return f16(unboxF16(r)); } inline float32_t f32(freg_t r) { return f32(unboxF32(r)); } inline float64_t f64(freg_t r) { return f64(unboxF64(r)); } inline float128_t f128(freg_t r) { return r; } +inline freg_t freg(float16_t f) { return { ((uint64_t)-1 << 16) | f.v, (uint64_t)-1 }; } inline freg_t freg(float32_t f) { return { ((uint64_t)-1 << 32) | f.v, (uint64_t)-1 }; } inline freg_t freg(float64_t f) { return { f.v, (uint64_t)-1 }; } inline freg_t freg(float128_t f) { return f; } +#define F16_SIGN ((uint16_t)1 << 15) #define F32_SIGN ((uint32_t)1 << 31) #define F64_SIGN ((uint64_t)1 << 63) +#define fsgnj16(a, b, n, x) \ + f16((f16(a).v & ~F16_SIGN) | ((((x) ? f16(a).v : (n) ? F16_SIGN : 0) ^ f16(b).v) & F16_SIGN)) #define fsgnj32(a, b, n, x) \ f32((f32(a).v & ~F32_SIGN) | ((((x) ? f32(a).v : (n) ? F32_SIGN : 0) ^ f32(b).v) & F32_SIGN)) #define fsgnj64(a, b, n, x) \ @@ -1654,7 +1662,8 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \ // #define VI_VFP_COMMON \ require_fp; \ - require((P.VU.vsew == e32 && p->supports_extension('F')) || \ + require((P.VU.vsew == e16 && p->supports_extension(EXT_ZFH)) || \ + (P.VU.vsew == e32 && p->supports_extension('F')) || \ (P.VU.vsew == e64 && p->supports_extension('D'))); \ require_vector;\ reg_t vl = P.VU.vl; \ @@ -1698,11 +1707,6 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \ } \ P.VU.vstart = 0; \ -#define VI_VFP_LOOP_WIDE_END \ - } \ - P.VU.vstart = 0; \ - set_fp_exceptions; - #define VI_VFP_LOOP_REDUCTION_END(x) \ } \ P.VU.vstart = 0; \ @@ -1712,24 +1716,31 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \ #define VI_VFP_LOOP_CMP_END \ switch(P.VU.vsew) { \ + case e16: \ case e32: \ case e64: { \ vdi = (vdi & ~mmask) | (((res) << mpos) & mmask); \ break; \ } \ - case e16: \ default: \ require(0); \ break; \ }; \ } \ - P.VU.vstart = 0; \ - set_fp_exceptions; + P.VU.vstart = 0; -#define VI_VFP_VV_LOOP(BODY32, BODY64) \ +#define VI_VFP_VV_LOOP(BODY16, BODY32, BODY64) \ VI_CHECK_SSS(true); \ VI_VFP_LOOP_BASE \ switch(P.VU.vsew) { \ + case e16: {\ + float16_t &vd = P.VU.elt(rd_num, i, true); \ + float16_t vs1 = P.VU.elt(rs1_num, i); \ + float16_t vs2 = P.VU.elt(rs2_num, i); \ + BODY16; \ + set_fp_exceptions; \ + break; \ + }\ case e32: {\ float32_t &vd = P.VU.elt(rd_num, i, true); \ float32_t vs1 = P.VU.elt(rs1_num, i); \ @@ -1746,7 +1757,6 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \ set_fp_exceptions; \ break; \ }\ - case e16: \ default: \ require(0); \ break; \ @@ -1754,10 +1764,17 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \ DEBUG_RVV_FP_VV; \ VI_VFP_LOOP_END -#define VI_VFP_VV_LOOP_REDUCTION(BODY32, BODY64) \ +#define VI_VFP_VV_LOOP_REDUCTION(BODY16, BODY32, BODY64) \ VI_CHECK_REDUCTION(false) \ VI_VFP_COMMON \ switch(P.VU.vsew) { \ + case e16: {\ + VI_VFP_LOOP_REDUCTION_BASE(16) \ + BODY16; \ + set_fp_exceptions; \ + VI_VFP_LOOP_REDUCTION_END(e16) \ + break; \ + }\ case e32: {\ VI_VFP_LOOP_REDUCTION_BASE(32) \ BODY32; \ @@ -1772,24 +1789,54 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \ VI_VFP_LOOP_REDUCTION_END(e64) \ break; \ }\ - case e16: \ default: \ require(0); \ break; \ }; \ -#define VI_VFP_VV_LOOP_WIDE_REDUCTION(BODY) \ - VI_VFP_LOOP_WIDE_REDUCTION_BASE \ - float64_t vs2 = f32_to_f64(P.VU.elt(rs2_num, i)); \ - BODY; \ - set_fp_exceptions; \ - DEBUG_RVV_FP_VV; \ - VI_VFP_LOOP_REDUCTION_END(e64) +#define VI_VFP_VV_LOOP_WIDE_REDUCTION(BODY16, BODY32) \ + VI_CHECK_REDUCTION(true) \ + VI_VFP_COMMON \ + require((P.VU.vsew == e16 && p->supports_extension('F')) || \ + (P.VU.vsew == e32 && p->supports_extension('D'))); \ + switch(P.VU.vsew) { \ + case e16: {\ + float32_t vd_0 = P.VU.elt(rs1_num, 0); \ + for (reg_t i=P.VU.vstart; i(rs2_num, i)); \ + BODY16; \ + set_fp_exceptions; \ + VI_VFP_LOOP_REDUCTION_END(e32) \ + break; \ + }\ + case e32: {\ + float64_t vd_0 = P.VU.elt(rs1_num, 0); \ + for (reg_t i=P.VU.vstart; i(rs2_num, i)); \ + BODY32; \ + set_fp_exceptions; \ + VI_VFP_LOOP_REDUCTION_END(e64) \ + break; \ + }\ + default: \ + require(0); \ + break; \ + }; \ -#define VI_VFP_VF_LOOP(BODY32, BODY64) \ +#define VI_VFP_VF_LOOP(BODY16, BODY32, BODY64) \ VI_CHECK_SSS(false); \ VI_VFP_LOOP_BASE \ switch(P.VU.vsew) { \ + case e16: {\ + float16_t &vd = P.VU.elt(rd_num, i, true); \ + float16_t rs1 = f16(READ_FREG(rs1_num)); \ + float16_t vs2 = P.VU.elt(rs2_num, i); \ + BODY16; \ + set_fp_exceptions; \ + break; \ + }\ case e32: {\ float32_t &vd = P.VU.elt(rd_num, i, true); \ float32_t rs1 = f32(READ_FREG(rs1_num)); \ @@ -1806,8 +1853,6 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \ set_fp_exceptions; \ break; \ }\ - case e16: \ - case e8: \ default: \ require(0); \ break; \ @@ -1815,10 +1860,18 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \ DEBUG_RVV_FP_VF; \ VI_VFP_LOOP_END -#define VI_VFP_LOOP_CMP(BODY32, BODY64, is_vs1) \ +#define VI_VFP_LOOP_CMP(BODY16, BODY32, BODY64, is_vs1) \ VI_CHECK_MSS(is_vs1); \ VI_VFP_LOOP_CMP_BASE \ switch(P.VU.vsew) { \ + case e16: {\ + float16_t vs2 = P.VU.elt(rs2_num, i); \ + float16_t vs1 = P.VU.elt(rs1_num, i); \ + float16_t rs1 = f16(READ_FREG(rs1_num)); \ + BODY16; \ + set_fp_exceptions; \ + break; \ + }\ case e32: {\ float32_t vs2 = P.VU.elt(rs2_num, i); \ float32_t vs1 = P.VU.elt(rs1_num, i); \ @@ -1835,96 +1888,146 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \ set_fp_exceptions; \ break; \ }\ - case e16: \ default: \ require(0); \ break; \ }; \ VI_VFP_LOOP_CMP_END \ -#define VI_VFP_VF_LOOP_WIDE(BODY) \ +#define VI_VFP_VF_LOOP_WIDE(BODY16, BODY32) \ VI_CHECK_DSS(false); \ VI_VFP_LOOP_BASE \ switch(P.VU.vsew) { \ + case e16: { \ + float32_t &vd = P.VU.elt(rd_num, i, true); \ + float32_t vs2 = f16_to_f32(P.VU.elt(rs2_num, i)); \ + float32_t rs1 = f16_to_f32(f16(READ_FREG(rs1_num))); \ + BODY16; \ + set_fp_exceptions; \ + break; \ + } \ case e32: {\ float64_t &vd = P.VU.elt(rd_num, i, true); \ float64_t vs2 = f32_to_f64(P.VU.elt(rs2_num, i)); \ float64_t rs1 = f32_to_f64(f32(READ_FREG(rs1_num))); \ - BODY; \ + BODY32; \ set_fp_exceptions; \ break; \ }\ - case e16: \ - case e8: \ default: \ require(0); \ break; \ }; \ DEBUG_RVV_FP_VV; \ - VI_VFP_LOOP_WIDE_END + VI_VFP_LOOP_END -#define VI_VFP_VV_LOOP_WIDE(BODY) \ +#define VI_VFP_VV_LOOP_WIDE(BODY16, BODY32) \ VI_CHECK_DSS(true); \ VI_VFP_LOOP_BASE \ switch(P.VU.vsew) { \ + case e16: {\ + float32_t &vd = P.VU.elt(rd_num, i, true); \ + float32_t vs2 = f16_to_f32(P.VU.elt(rs2_num, i)); \ + float32_t vs1 = f16_to_f32(P.VU.elt(rs1_num, i)); \ + BODY16; \ + set_fp_exceptions; \ + break; \ + }\ case e32: {\ float64_t &vd = P.VU.elt(rd_num, i, true); \ float64_t vs2 = f32_to_f64(P.VU.elt(rs2_num, i)); \ float64_t vs1 = f32_to_f64(P.VU.elt(rs1_num, i)); \ - BODY; \ + BODY32; \ set_fp_exceptions; \ break; \ }\ - case e16: \ - case e8: \ default: \ require(0); \ break; \ }; \ DEBUG_RVV_FP_VV; \ - VI_VFP_LOOP_WIDE_END + VI_VFP_LOOP_END -#define VI_VFP_WF_LOOP_WIDE(BODY) \ +#define VI_VFP_WF_LOOP_WIDE(BODY16, BODY32) \ VI_CHECK_DDS(false); \ VI_VFP_LOOP_BASE \ switch(P.VU.vsew) { \ + case e16: {\ + float32_t &vd = P.VU.elt(rd_num, i, true); \ + float32_t vs2 = P.VU.elt(rs2_num, i); \ + float32_t rs1 = f16_to_f32(f16(READ_FREG(rs1_num))); \ + BODY16; \ + set_fp_exceptions; \ + break; \ + }\ case e32: {\ float64_t &vd = P.VU.elt(rd_num, i, true); \ float64_t vs2 = P.VU.elt(rs2_num, i); \ float64_t rs1 = f32_to_f64(f32(READ_FREG(rs1_num))); \ - BODY; \ + BODY32; \ set_fp_exceptions; \ break; \ }\ - case e16: \ - case e8: \ default: \ require(0); \ }; \ DEBUG_RVV_FP_VV; \ - VI_VFP_LOOP_WIDE_END + VI_VFP_LOOP_END -#define VI_VFP_WV_LOOP_WIDE(BODY) \ +#define VI_VFP_WV_LOOP_WIDE(BODY16, BODY32) \ VI_CHECK_DDS(true); \ VI_VFP_LOOP_BASE \ switch(P.VU.vsew) { \ + case e16: {\ + float32_t &vd = P.VU.elt(rd_num, i, true); \ + float32_t vs2 = P.VU.elt(rs2_num, i); \ + float32_t vs1 = f16_to_f32(P.VU.elt(rs1_num, i)); \ + BODY16; \ + set_fp_exceptions; \ + break; \ + }\ case e32: {\ float64_t &vd = P.VU.elt(rd_num, i, true); \ float64_t vs2 = P.VU.elt(rs2_num, i); \ float64_t vs1 = f32_to_f64(P.VU.elt(rs1_num, i)); \ - BODY; \ + BODY32; \ set_fp_exceptions; \ break; \ }\ - case e16: \ - case e8: \ default: \ require(0); \ }; \ DEBUG_RVV_FP_VV; \ - VI_VFP_LOOP_WIDE_END + VI_VFP_LOOP_END +#define VI_VFP_CVT_SCALE(BODY16, BODY32, is_widen) \ + if (is_widen) { \ + VI_CHECK_DSS(false);\ + } else { \ + VI_CHECK_SDS(false); \ + } \ + require((P.VU.vsew == e16 && p->supports_extension('F')) || \ + (P.VU.vsew == e32 && p->supports_extension('D'))); \ + switch(P.VU.vsew) { \ + case e16: {\ + VI_VFP_LOOP_BASE \ + BODY16 \ + set_fp_exceptions; \ + VI_VFP_LOOP_END \ + } \ + break; \ + case e32: {\ + VI_VFP_LOOP_BASE \ + BODY32 \ + set_fp_exceptions; \ + VI_VFP_LOOP_END \ + } \ + break; \ + default: \ + require(0); \ + break; \ + } #define DEBUG_START 0x0 #define DEBUG_END (0x1000 - 1) diff --git a/riscv/insns/vfadd_vf.h b/riscv/insns/vfadd_vf.h index bdb7f75c..2b808e0c 100644 --- a/riscv/insns/vfadd_vf.h +++ b/riscv/insns/vfadd_vf.h @@ -1,6 +1,9 @@ // vfadd.vf vd, vs2, rs1 VI_VFP_VF_LOOP ({ + vd = f16_add(rs1, vs2); +}, +{ vd = f32_add(rs1, vs2); }, { diff --git a/riscv/insns/vfadd_vv.h b/riscv/insns/vfadd_vv.h index b333a8a3..ce94921d 100644 --- a/riscv/insns/vfadd_vv.h +++ b/riscv/insns/vfadd_vv.h @@ -1,6 +1,9 @@ // vfadd.vv vd, vs2, vs1 VI_VFP_VV_LOOP ({ + vd = f16_add(vs1, vs2); +}, +{ vd = f32_add(vs1, vs2); }, { diff --git a/riscv/insns/vfclass_v.h b/riscv/insns/vfclass_v.h index 8ee092f5..1bd5f5ff 100644 --- a/riscv/insns/vfclass_v.h +++ b/riscv/insns/vfclass_v.h @@ -1,6 +1,9 @@ // vfclass.v vd, vs2, vm VI_VFP_VV_LOOP ({ + vd.v = f16_classify(vs2); +}, +{ vd.v = f32_classify(vs2); }, { diff --git a/riscv/insns/vfcvt_f_x_v.h b/riscv/insns/vfcvt_f_x_v.h index fdaa697f..c53b0e1f 100644 --- a/riscv/insns/vfcvt_f_x_v.h +++ b/riscv/insns/vfcvt_f_x_v.h @@ -1,6 +1,10 @@ // vfcvt.f.x.v vd, vd2, vm VI_VFP_VF_LOOP ({ + auto vs2_i = P.VU.elt(rs2_num, i); + vd = i32_to_f16(vs2_i); +}, +{ auto vs2_i = P.VU.elt(rs2_num, i); vd = i32_to_f32(vs2_i); }, diff --git a/riscv/insns/vfcvt_f_xu_v.h b/riscv/insns/vfcvt_f_xu_v.h index 01ea61ca..bd03768d 100644 --- a/riscv/insns/vfcvt_f_xu_v.h +++ b/riscv/insns/vfcvt_f_xu_v.h @@ -1,6 +1,10 @@ // vfcvt.f.xu.v vd, vd2, vm VI_VFP_VF_LOOP ({ + auto vs2_u = P.VU.elt(rs2_num, i); + vd = ui32_to_f16(vs2_u); +}, +{ auto vs2_u = P.VU.elt(rs2_num, i); vd = ui32_to_f32(vs2_u); }, diff --git a/riscv/insns/vfcvt_rtz_x_f_v.h b/riscv/insns/vfcvt_rtz_x_f_v.h index 89c88edb..e7241bd0 100644 --- a/riscv/insns/vfcvt_rtz_x_f_v.h +++ b/riscv/insns/vfcvt_rtz_x_f_v.h @@ -1,6 +1,9 @@ -// vfcvt.x.f.v vd, vd2, vm +// vfcvt.rtz.x.f.v vd, vd2, vm VI_VFP_VF_LOOP ({ + P.VU.elt(rd_num, i) = f16_to_i16(vs2, softfloat_round_minMag, true); +}, +{ P.VU.elt(rd_num, i) = f32_to_i32(vs2, softfloat_round_minMag, true); }, { diff --git a/riscv/insns/vfcvt_rtz_xu_f_v.h b/riscv/insns/vfcvt_rtz_xu_f_v.h index fd75fd0c..d3d266d0 100644 --- a/riscv/insns/vfcvt_rtz_xu_f_v.h +++ b/riscv/insns/vfcvt_rtz_xu_f_v.h @@ -1,6 +1,9 @@ -// vfcvt.xu.f.v vd, vd2, vm +// vfcvt.rtz.xu.f.v vd, vd2, vm VI_VFP_VF_LOOP ({ + P.VU.elt(rd_num, i) = f16_to_ui16(vs2, softfloat_round_minMag, true); +}, +{ P.VU.elt(rd_num, i) = f32_to_ui32(vs2, softfloat_round_minMag, true); }, { diff --git a/riscv/insns/vfcvt_x_f_v.h b/riscv/insns/vfcvt_x_f_v.h index 96bc481d..01e5ca17 100644 --- a/riscv/insns/vfcvt_x_f_v.h +++ b/riscv/insns/vfcvt_x_f_v.h @@ -1,6 +1,9 @@ // vfcvt.x.f.v vd, vd2, vm VI_VFP_VF_LOOP ({ + P.VU.elt(rd_num, i) = f16_to_i16(vs2, STATE.frm, true); +}, +{ P.VU.elt(rd_num, i) = f32_to_i32(vs2, STATE.frm, true); }, { diff --git a/riscv/insns/vfcvt_xu_f_v.h b/riscv/insns/vfcvt_xu_f_v.h index 5f19f900..725cbda2 100644 --- a/riscv/insns/vfcvt_xu_f_v.h +++ b/riscv/insns/vfcvt_xu_f_v.h @@ -1,6 +1,9 @@ // vfcvt.xu.f.v vd, vd2, vm VI_VFP_VV_LOOP ({ + P.VU.elt(rd_num, i) = f16_to_ui16(vs2, STATE.frm, true); +}, +{ P.VU.elt(rd_num, i) = f32_to_ui32(vs2, STATE.frm, true); }, { diff --git a/riscv/insns/vfdiv_vf.h b/riscv/insns/vfdiv_vf.h index ce217304..a703ef02 100644 --- a/riscv/insns/vfdiv_vf.h +++ b/riscv/insns/vfdiv_vf.h @@ -1,6 +1,9 @@ // vfdiv.vf vd, vs2, rs1 VI_VFP_VF_LOOP ({ + vd = f16_div(vs2, rs1); +}, +{ vd = f32_div(vs2, rs1); }, { diff --git a/riscv/insns/vfdiv_vv.h b/riscv/insns/vfdiv_vv.h index 8a49a917..c66d7516 100644 --- a/riscv/insns/vfdiv_vv.h +++ b/riscv/insns/vfdiv_vv.h @@ -1,6 +1,9 @@ // vfdiv.vv vd, vs2, vs1 VI_VFP_VV_LOOP ({ + vd = f16_div(vs2, vs1); +}, +{ vd = f32_div(vs2, vs1); }, { diff --git a/riscv/insns/vfdot_vv.h b/riscv/insns/vfdot_vv.h index 85d0b8ac..8f5225ac 100644 --- a/riscv/insns/vfdot_vv.h +++ b/riscv/insns/vfdot_vv.h @@ -1,6 +1,9 @@ // vfdot.vv vd, vs2, vs1 VI_VFP_VV_LOOP ({ + vd = f16_add(vd, f16_mul(vs2, vs1)); +}, +{ vd = f32_add(vd, f32_mul(vs2, vs1)); }, { diff --git a/riscv/insns/vfmacc_vf.h b/riscv/insns/vfmacc_vf.h index fca41840..61578d33 100644 --- a/riscv/insns/vfmacc_vf.h +++ b/riscv/insns/vfmacc_vf.h @@ -1,6 +1,9 @@ // vfmacc.vf vd, rs1, vs2, vm # vd[i] = +(vs2[i] * x[rs1]) + vd[i] VI_VFP_VF_LOOP ({ + vd = f16_mulAdd(rs1, vs2, vd); +}, +{ vd = f32_mulAdd(rs1, vs2, vd); }, { diff --git a/riscv/insns/vfmacc_vv.h b/riscv/insns/vfmacc_vv.h index f1caf33f..499b1d4d 100644 --- a/riscv/insns/vfmacc_vv.h +++ b/riscv/insns/vfmacc_vv.h @@ -1,6 +1,9 @@ // vfmacc.vv vd, rs1, vs2, vm # vd[i] = +(vs2[i] * vs1[i]) + vd[i] VI_VFP_VV_LOOP ({ + vd = f16_mulAdd(vs1, vs2, vd); +}, +{ vd = f32_mulAdd(vs1, vs2, vd); }, { diff --git a/riscv/insns/vfmadd_vf.h b/riscv/insns/vfmadd_vf.h index 7707daec..2a014295 100644 --- a/riscv/insns/vfmadd_vf.h +++ b/riscv/insns/vfmadd_vf.h @@ -1,6 +1,9 @@ // vfmadd: vd[i] = +(vd[i] * f[rs1]) + vs2[i] VI_VFP_VF_LOOP ({ + vd = f16_mulAdd(vd, rs1, vs2); +}, +{ vd = f32_mulAdd(vd, rs1, vs2); }, { diff --git a/riscv/insns/vfmadd_vv.h b/riscv/insns/vfmadd_vv.h index a095c38d..7ef734f8 100644 --- a/riscv/insns/vfmadd_vv.h +++ b/riscv/insns/vfmadd_vv.h @@ -1,6 +1,9 @@ // vfmadd: vd[i] = +(vd[i] * vs1[i]) + vs2[i] VI_VFP_VV_LOOP ({ + vd = f16_mulAdd(vd, vs1, vs2); +}, +{ vd = f32_mulAdd(vd, vs1, vs2); }, { diff --git a/riscv/insns/vfmax_vf.h b/riscv/insns/vfmax_vf.h index a8df880f..c4b74cbd 100644 --- a/riscv/insns/vfmax_vf.h +++ b/riscv/insns/vfmax_vf.h @@ -1,6 +1,9 @@ // vfmax VI_VFP_VF_LOOP ({ + vd = f16_max(vs2, rs1); +}, +{ vd = f32_max(vs2, rs1); }, { diff --git a/riscv/insns/vfmax_vv.h b/riscv/insns/vfmax_vv.h index 2329e746..6439c899 100644 --- a/riscv/insns/vfmax_vv.h +++ b/riscv/insns/vfmax_vv.h @@ -1,6 +1,9 @@ // vfmax VI_VFP_VV_LOOP ({ + vd = f16_max(vs2, vs1); +}, +{ vd = f32_max(vs2, vs1); }, { diff --git a/riscv/insns/vfmerge_vfm.h b/riscv/insns/vfmerge_vfm.h index 33dacfae..bd00e326 100644 --- a/riscv/insns/vfmerge_vfm.h +++ b/riscv/insns/vfmerge_vfm.h @@ -3,7 +3,20 @@ VI_CHECK_SSS(false); VI_VFP_COMMON; switch(P.VU.vsew) { - case 32: + case e16: + for (reg_t i=P.VU.vstart; i(rd_num, i, true); + auto rs1 = f16(READ_FREG(rs1_num)); + auto vs2 = P.VU.elt(rs2_num, i); + + int midx = (P.VU.vmlen * i) / 64; + int mpos = (P.VU.vmlen * i) % 64; + bool use_first = (P.VU.elt(0, midx) >> mpos) & 0x1; + + vd = use_first ? rs1 : vs2; + } + break; + case e32: for (reg_t i=P.VU.vstart; i(rd_num, i, true); auto rs1 = f32(READ_FREG(rs1_num)); @@ -16,7 +29,7 @@ switch(P.VU.vsew) { vd = use_first ? rs1 : vs2; } break; - case 64: + case e64: for (reg_t i=P.VU.vstart; i(rd_num, i, true); auto rs1 = f64(READ_FREG(rs1_num)); diff --git a/riscv/insns/vfmin_vf.h b/riscv/insns/vfmin_vf.h index a55462b6..1560cdf7 100644 --- a/riscv/insns/vfmin_vf.h +++ b/riscv/insns/vfmin_vf.h @@ -1,6 +1,9 @@ // vfmin vd, vs2, rs1 VI_VFP_VF_LOOP ({ + vd = f16_min(vs2, rs1); +}, +{ vd = f32_min(vs2, rs1); }, { diff --git a/riscv/insns/vfmin_vv.h b/riscv/insns/vfmin_vv.h index 399b5631..882a7740 100644 --- a/riscv/insns/vfmin_vv.h +++ b/riscv/insns/vfmin_vv.h @@ -1,6 +1,9 @@ // vfmin vd, vs2, vs1 VI_VFP_VV_LOOP ({ + vd = f16_min(vs2, vs1); +}, +{ vd = f32_min(vs2, vs1); }, { diff --git a/riscv/insns/vfmsac_vf.h b/riscv/insns/vfmsac_vf.h index 0f42560e..8af397b9 100644 --- a/riscv/insns/vfmsac_vf.h +++ b/riscv/insns/vfmsac_vf.h @@ -1,6 +1,9 @@ // vfmsac: vd[i] = +(f[rs1] * vs2[i]) - vd[i] VI_VFP_VF_LOOP ({ + vd = f16_mulAdd(rs1, vs2, f16(vd.v ^ F16_SIGN)); +}, +{ vd = f32_mulAdd(rs1, vs2, f32(vd.v ^ F32_SIGN)); }, { diff --git a/riscv/insns/vfmsac_vv.h b/riscv/insns/vfmsac_vv.h index 9b4ed9f1..3bb50e50 100644 --- a/riscv/insns/vfmsac_vv.h +++ b/riscv/insns/vfmsac_vv.h @@ -1,6 +1,9 @@ // vfmsac: vd[i] = +(vs1[i] * vs2[i]) - vd[i] VI_VFP_VV_LOOP ({ + vd = f16_mulAdd(vs1, vs2, f16(vd.v ^ F16_SIGN)); +}, +{ vd = f32_mulAdd(vs1, vs2, f32(vd.v ^ F32_SIGN)); }, { diff --git a/riscv/insns/vfmsub_vf.h b/riscv/insns/vfmsub_vf.h index bd968e3d..ab77b4c6 100644 --- a/riscv/insns/vfmsub_vf.h +++ b/riscv/insns/vfmsub_vf.h @@ -1,6 +1,9 @@ // vfmsub: vd[i] = +(vd[i] * f[rs1]) - vs2[i] VI_VFP_VF_LOOP ({ + vd = f16_mulAdd(vd, rs1, f16(vs2.v ^ F16_SIGN)); +}, +{ vd = f32_mulAdd(vd, rs1, f32(vs2.v ^ F32_SIGN)); }, { diff --git a/riscv/insns/vfmsub_vv.h b/riscv/insns/vfmsub_vv.h index f8e0b3dc..3cac937f 100644 --- a/riscv/insns/vfmsub_vv.h +++ b/riscv/insns/vfmsub_vv.h @@ -1,6 +1,9 @@ // vfmsub: vd[i] = +(vd[i] * vs1[i]) - vs2[i] VI_VFP_VV_LOOP ({ + vd = f16_mulAdd(vd, vs1, f16(vs2.v ^ F16_SIGN)); +}, +{ vd = f32_mulAdd(vd, vs1, f32(vs2.v ^ F32_SIGN)); }, { diff --git a/riscv/insns/vfmul_vf.h b/riscv/insns/vfmul_vf.h index 9e7d481a..f5f63e49 100644 --- a/riscv/insns/vfmul_vf.h +++ b/riscv/insns/vfmul_vf.h @@ -1,6 +1,9 @@ // vfmul.vf vd, vs2, rs1, vm VI_VFP_VF_LOOP ({ + vd = f16_mul(vs2, rs1); +}, +{ vd = f32_mul(vs2, rs1); }, { diff --git a/riscv/insns/vfmul_vv.h b/riscv/insns/vfmul_vv.h index 0e4d499e..7930fd03 100644 --- a/riscv/insns/vfmul_vv.h +++ b/riscv/insns/vfmul_vv.h @@ -1,6 +1,9 @@ // vfmul.vv vd, vs1, vs2, vm VI_VFP_VV_LOOP ({ + vd = f16_mul(vs1, vs2); +}, +{ vd = f32_mul(vs1, vs2); }, { diff --git a/riscv/insns/vfmv_f_s.h b/riscv/insns/vfmv_f_s.h index 586b80ee..2f82ce8e 100644 --- a/riscv/insns/vfmv_f_s.h +++ b/riscv/insns/vfmv_f_s.h @@ -1,24 +1,31 @@ // vfmv_f_s: rd = vs2[0] (rs1=0) require_vector; require_fp; -require_extension('F'); -require(P.VU.vsew == e32 || P.VU.vsew == e64); +require((P.VU.vsew == e16 && p->supports_extension(EXT_ZFH)) || + (P.VU.vsew == e32 && p->supports_extension('F')) || + (P.VU.vsew == e64 && p->supports_extension('D'))); reg_t rs2_num = insn.rs2(); uint64_t vs2_0 = 0; const reg_t sew = P.VU.vsew; switch(sew) { -case e32: - vs2_0 = P.VU.elt(rs2_num, 0); - break; -default: - vs2_0 = P.VU.elt(rs2_num, 0); - break; + case e16: + vs2_0 = P.VU.elt(rs2_num, 0); + break; + case e32: + vs2_0 = P.VU.elt(rs2_num, 0); + break; + case e64: + vs2_0 = P.VU.elt(rs2_num, 0); + break; + default: + require(0); + break; } // nan_extened if (FLEN > sew) { - vs2_0 = vs2_0 | ~((uint64_t(1) << sew) - 1); + vs2_0 = vs2_0 | (UINT64_MAX << sew); } if (FLEN == 64) { diff --git a/riscv/insns/vfmv_s_f.h b/riscv/insns/vfmv_s_f.h index 84c5a3f1..d29e2457 100644 --- a/riscv/insns/vfmv_s_f.h +++ b/riscv/insns/vfmv_s_f.h @@ -1,21 +1,29 @@ // vfmv_s_f: vd[0] = rs1 (vs2=0) require_vector; require_fp; -require_extension('F'); -require(P.VU.vsew >= e32 && P.VU.vsew <= 64); +require((P.VU.vsew == e16 && p->supports_extension(EXT_ZFH)) || + (P.VU.vsew == e32 && p->supports_extension('F')) || + (P.VU.vsew == e64 && p->supports_extension('D'))); + reg_t vl = P.VU.vl; if (vl > 0 && P.VU.vstart < vl) { reg_t rd_num = insn.rd(); switch(P.VU.vsew) { - case 32: + case e16: + if (FLEN == 64) + P.VU.elt(rd_num, 0, true) = f64(FRS1).v; + else + P.VU.elt(rd_num, 0, true) = f32(FRS1).v; + break; + case e32: if (FLEN == 64) P.VU.elt(rd_num, 0, true) = f64(FRS1).v; else P.VU.elt(rd_num, 0, true) = f32(FRS1).v; break; - case 64: + case e64: if (FLEN == 64) P.VU.elt(rd_num, 0, true) = f64(FRS1).v; else diff --git a/riscv/insns/vfmv_v_f.h b/riscv/insns/vfmv_v_f.h index f85a26a6..e4cdec4c 100644 --- a/riscv/insns/vfmv_v_f.h +++ b/riscv/insns/vfmv_v_f.h @@ -2,6 +2,14 @@ require((insn.rd() & (P.VU.vlmul - 1)) == 0); VI_VFP_COMMON switch(P.VU.vsew) { + case e16: + for (reg_t i=P.VU.vstart; i(rd_num, i, true); + auto rs1 = f16(READ_FREG(rs1_num)); + + vd = rs1; + } + break; case e32: for (reg_t i=P.VU.vstart; i(rd_num, i, true); diff --git a/riscv/insns/vfncvt_f_f_w.h b/riscv/insns/vfncvt_f_f_w.h index 3a9ead32..d5d5de63 100644 --- a/riscv/insns/vfncvt_f_f_w.h +++ b/riscv/insns/vfncvt_f_f_w.h @@ -1,10 +1,11 @@ // vfncvt.f.f.v vd, vs2, vm -VI_CHECK_SDS(false); -if (P.VU.vsew == e32) - require(p->supports_extension('D')); - -VI_VFP_LOOP_BASE +VI_VFP_CVT_SCALE +({ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f32_to_f16(vs2); +}, +{ auto vs2 = P.VU.elt(rs2_num, i); P.VU.elt(rd_num, i, true) = f64_to_f32(vs2); - set_fp_exceptions; -VI_VFP_LOOP_END +}, false) + diff --git a/riscv/insns/vfncvt_f_x_w.h b/riscv/insns/vfncvt_f_x_w.h index c3283953..25faa1e0 100644 --- a/riscv/insns/vfncvt_f_x_w.h +++ b/riscv/insns/vfncvt_f_x_w.h @@ -1,10 +1,10 @@ // vfncvt.f.x.v vd, vs2, vm -VI_CHECK_SDS(false); -if (P.VU.vsew == e32) - require(p->supports_extension('D')); - -VI_VFP_LOOP_BASE +VI_VFP_CVT_SCALE +({ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = i32_to_f16(vs2); +}, +{ auto vs2 = P.VU.elt(rs2_num, i); P.VU.elt(rd_num, i, true) = i64_to_f32(vs2); - set_fp_exceptions; -VI_VFP_LOOP_END +}, false) diff --git a/riscv/insns/vfncvt_f_xu_w.h b/riscv/insns/vfncvt_f_xu_w.h index c6746440..a8b62fec 100644 --- a/riscv/insns/vfncvt_f_xu_w.h +++ b/riscv/insns/vfncvt_f_xu_w.h @@ -1,10 +1,10 @@ // vfncvt.f.xu.v vd, vs2, vm -VI_CHECK_SDS(false); -if (P.VU.vsew == e32) - require(p->supports_extension('D')); - -VI_VFP_LOOP_BASE +VI_VFP_CVT_SCALE +({ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = ui32_to_f16(vs2); +}, +{ auto vs2 = P.VU.elt(rs2_num, i); P.VU.elt(rd_num, i, true) = ui64_to_f32(vs2); - set_fp_exceptions; -VI_VFP_LOOP_END +}, false) diff --git a/riscv/insns/vfncvt_rod_f_f_w.h b/riscv/insns/vfncvt_rod_f_f_w.h index 0eae343f..864b7846 100644 --- a/riscv/insns/vfncvt_rod_f_f_w.h +++ b/riscv/insns/vfncvt_rod_f_f_w.h @@ -1,11 +1,12 @@ -// vfncvt.f.f.v vd, vs2, vm -VI_CHECK_SDS(false); -if (P.VU.vsew == e32) - require(p->supports_extension('D')); - -VI_VFP_LOOP_BASE +// vfncvt.rod.f.f.v vd, vs2, vm +VI_VFP_CVT_SCALE +({ + softfloat_roundingMode = softfloat_round_odd; + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f32_to_f16(vs2); +}, +{ softfloat_roundingMode = softfloat_round_odd; auto vs2 = P.VU.elt(rs2_num, i); P.VU.elt(rd_num, i, true) = f64_to_f32(vs2); - set_fp_exceptions; -VI_VFP_LOOP_END +}, false) diff --git a/riscv/insns/vfncvt_rtz_x_f_w.h b/riscv/insns/vfncvt_rtz_x_f_w.h index 2fc8edf2..70c66d19 100644 --- a/riscv/insns/vfncvt_rtz_x_f_w.h +++ b/riscv/insns/vfncvt_rtz_x_f_w.h @@ -1,10 +1,10 @@ -// vfncvt.x.f.v vd, vs2, vm -VI_CHECK_SDS(false); -if (P.VU.vsew == e32) - require(p->supports_extension('D')); - -VI_VFP_LOOP_BASE +// vfncvt.rtz.x.f.w vd, vs2, vm +VI_VFP_CVT_SCALE +({ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f32_to_i16(vs2, softfloat_round_minMag, true); +}, +{ auto vs2 = P.VU.elt(rs2_num, i); P.VU.elt(rd_num, i, true) = f64_to_i32(vs2, softfloat_round_minMag, true); - set_fp_exceptions; -VI_VFP_LOOP_END +}, false) diff --git a/riscv/insns/vfncvt_rtz_xu_f_w.h b/riscv/insns/vfncvt_rtz_xu_f_w.h index 8cd68e29..98f1c4b6 100644 --- a/riscv/insns/vfncvt_rtz_xu_f_w.h +++ b/riscv/insns/vfncvt_rtz_xu_f_w.h @@ -1,10 +1,10 @@ -// vfncvt.xu.f.v vd, vs2, vm -VI_CHECK_SDS(false); -if (P.VU.vsew == e32) - require(p->supports_extension('D')); - -VI_VFP_LOOP_BASE +// vfncvt.rtz.xu.f.w vd, vs2, vm +VI_VFP_CVT_SCALE +({ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f32_to_ui16(vs2, softfloat_round_minMag, true); +}, +{ auto vs2 = P.VU.elt(rs2_num, i); P.VU.elt(rd_num, i, true) = f64_to_ui32(vs2, softfloat_round_minMag, true); - set_fp_exceptions; -VI_VFP_LOOP_END +}, false) diff --git a/riscv/insns/vfncvt_x_f_w.h b/riscv/insns/vfncvt_x_f_w.h index d6728bd1..3ddbb875 100644 --- a/riscv/insns/vfncvt_x_f_w.h +++ b/riscv/insns/vfncvt_x_f_w.h @@ -1,10 +1,10 @@ -// vfncvt.x.f.v vd, vs2, vm -VI_CHECK_SDS(false); -if (P.VU.vsew == e32) - require(p->supports_extension('D')); - -VI_VFP_LOOP_BASE +// vfncvt.x.f.w vd, vs2, vm +VI_VFP_CVT_SCALE +({ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f32_to_i16(vs2, STATE.frm, true); +}, +{ auto vs2 = P.VU.elt(rs2_num, i); P.VU.elt(rd_num, i, true) = f64_to_i32(vs2, STATE.frm, true); - set_fp_exceptions; -VI_VFP_LOOP_END +}, false) diff --git a/riscv/insns/vfncvt_xu_f_w.h b/riscv/insns/vfncvt_xu_f_w.h index f2cce241..3b7d4735 100644 --- a/riscv/insns/vfncvt_xu_f_w.h +++ b/riscv/insns/vfncvt_xu_f_w.h @@ -1,10 +1,10 @@ -// vfncvt.xu.f.v vd, vs2, vm -VI_CHECK_SDS(false); -if (P.VU.vsew == e32) - require(p->supports_extension('D')); - -VI_VFP_LOOP_BASE +// vfncvt.xu.f.w vd, vs2, vm +VI_VFP_CVT_SCALE +({ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f32_to_ui16(vs2, STATE.frm, true); +}, +{ auto vs2 = P.VU.elt(rs2_num, i); P.VU.elt(rd_num, i, true) = f64_to_ui32(vs2, STATE.frm, true); - set_fp_exceptions; -VI_VFP_LOOP_END +}, false) diff --git a/riscv/insns/vfnmacc_vf.h b/riscv/insns/vfnmacc_vf.h index da58d3aa..1b99302c 100644 --- a/riscv/insns/vfnmacc_vf.h +++ b/riscv/insns/vfnmacc_vf.h @@ -1,6 +1,9 @@ // vfnmacc: vd[i] = -(f[rs1] * vs2[i]) - vd[i] VI_VFP_VF_LOOP ({ + vd = f16_mulAdd(rs1, f16(vs2.v ^ F16_SIGN), f16(vd.v ^ F16_SIGN)); +}, +{ vd = f32_mulAdd(rs1, f32(vs2.v ^ F32_SIGN), f32(vd.v ^ F32_SIGN)); }, { diff --git a/riscv/insns/vfnmacc_vv.h b/riscv/insns/vfnmacc_vv.h index 62a14861..7200e063 100644 --- a/riscv/insns/vfnmacc_vv.h +++ b/riscv/insns/vfnmacc_vv.h @@ -1,6 +1,9 @@ // vfnmacc: vd[i] = -(vs1[i] * vs2[i]) - vd[i] VI_VFP_VV_LOOP ({ + vd = f16_mulAdd(f16(vs2.v ^ F16_SIGN), vs1, f16(vd.v ^ F16_SIGN)); +}, +{ vd = f32_mulAdd(f32(vs2.v ^ F32_SIGN), vs1, f32(vd.v ^ F32_SIGN)); }, { diff --git a/riscv/insns/vfnmadd_vf.h b/riscv/insns/vfnmadd_vf.h index b26f3775..cb9c217f 100644 --- a/riscv/insns/vfnmadd_vf.h +++ b/riscv/insns/vfnmadd_vf.h @@ -1,6 +1,9 @@ // vfnmadd: vd[i] = -(vd[i] * f[rs1]) - vs2[i] VI_VFP_VF_LOOP ({ + vd = f16_mulAdd(f16(vd.v ^ F16_SIGN), rs1, f16(vs2.v ^ F16_SIGN)); +}, +{ vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), rs1, f32(vs2.v ^ F32_SIGN)); }, { diff --git a/riscv/insns/vfnmadd_vv.h b/riscv/insns/vfnmadd_vv.h index fc705743..7160ed7d 100644 --- a/riscv/insns/vfnmadd_vv.h +++ b/riscv/insns/vfnmadd_vv.h @@ -1,6 +1,9 @@ // vfnmadd: vd[i] = -(vd[i] * vs1[i]) - vs2[i] VI_VFP_VV_LOOP ({ + vd = f16_mulAdd(f16(vd.v ^ F16_SIGN), vs1, f16(vs2.v ^ F16_SIGN)); +}, +{ vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), vs1, f32(vs2.v ^ F32_SIGN)); }, { diff --git a/riscv/insns/vfnmsac_vf.h b/riscv/insns/vfnmsac_vf.h index b78d0cac..aa6baa30 100644 --- a/riscv/insns/vfnmsac_vf.h +++ b/riscv/insns/vfnmsac_vf.h @@ -1,6 +1,9 @@ // vfnmsac: vd[i] = -(f[rs1] * vs2[i]) + vd[i] VI_VFP_VF_LOOP ({ + vd = f16_mulAdd(rs1, f16(vs2.v ^ F16_SIGN), vd); +}, +{ vd = f32_mulAdd(rs1, f32(vs2.v ^ F32_SIGN), vd); }, { diff --git a/riscv/insns/vfnmsac_vv.h b/riscv/insns/vfnmsac_vv.h index 795dc384..47db61d2 100644 --- a/riscv/insns/vfnmsac_vv.h +++ b/riscv/insns/vfnmsac_vv.h @@ -1,6 +1,9 @@ // vfnmsac.vv vd, vs1, vs2, vm # vd[i] = -(vs2[i] * vs1[i]) + vd[i] VI_VFP_VV_LOOP ({ + vd = f16_mulAdd(f16(vs1.v ^ F16_SIGN), vs2, vd); +}, +{ vd = f32_mulAdd(f32(vs1.v ^ F32_SIGN), vs2, vd); }, { diff --git a/riscv/insns/vfnmsub_vf.h b/riscv/insns/vfnmsub_vf.h index 6c6dc27d..43aa9e26 100644 --- a/riscv/insns/vfnmsub_vf.h +++ b/riscv/insns/vfnmsub_vf.h @@ -1,6 +1,9 @@ // vfnmsub: vd[i] = -(vd[i] * f[rs1]) + vs2[i] VI_VFP_VF_LOOP ({ + vd = f16_mulAdd(f16(vd.v ^ F16_SIGN), rs1, vs2); +}, +{ vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), rs1, vs2); }, { diff --git a/riscv/insns/vfnmsub_vv.h b/riscv/insns/vfnmsub_vv.h index ff4a9b59..2a45c8fc 100644 --- a/riscv/insns/vfnmsub_vv.h +++ b/riscv/insns/vfnmsub_vv.h @@ -1,6 +1,9 @@ // vfnmsub: vd[i] = -(vd[i] * vs1[i]) + vs2[i] VI_VFP_VV_LOOP ({ + vd = f16_mulAdd(f16(vd.v ^ F16_SIGN), vs1, vs2); +}, +{ vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), vs1, vs2); }, { diff --git a/riscv/insns/vfrdiv_vf.h b/riscv/insns/vfrdiv_vf.h index 73ec5346..b283343c 100644 --- a/riscv/insns/vfrdiv_vf.h +++ b/riscv/insns/vfrdiv_vf.h @@ -1,6 +1,9 @@ // vfrdiv.vf vd, vs2, rs1, vm # scalar-vector, vd[i] = f[rs1]/vs2[i] VI_VFP_VF_LOOP ({ + vd = f16_div(rs1, vs2); +}, +{ vd = f32_div(rs1, vs2); }, { diff --git a/riscv/insns/vfredmax_vs.h b/riscv/insns/vfredmax_vs.h index cb03dbb0..1ab856b0 100644 --- a/riscv/insns/vfredmax_vs.h +++ b/riscv/insns/vfredmax_vs.h @@ -1,6 +1,9 @@ // vfredmax vd, vs2, vs1 VI_VFP_VV_LOOP_REDUCTION ({ + vd_0 = f16_max(vd_0, vs2); +}, +{ vd_0 = f32_max(vd_0, vs2); }, { diff --git a/riscv/insns/vfredmin_vs.h b/riscv/insns/vfredmin_vs.h index 51c0bcb0..37256cbc 100644 --- a/riscv/insns/vfredmin_vs.h +++ b/riscv/insns/vfredmin_vs.h @@ -1,6 +1,9 @@ // vfredmin vd, vs2, vs1 VI_VFP_VV_LOOP_REDUCTION ({ + vd_0 = f16_min(vd_0, vs2); +}, +{ vd_0 = f32_min(vd_0, vs2); }, { diff --git a/riscv/insns/vfredosum_vs.h b/riscv/insns/vfredosum_vs.h index 7de6dbb4..4564f9c6 100644 --- a/riscv/insns/vfredosum_vs.h +++ b/riscv/insns/vfredosum_vs.h @@ -1,6 +1,9 @@ // vfredosum: vd[0] = sum( vs2[*] , vs1[0] ) VI_VFP_VV_LOOP_REDUCTION ({ + vd_0 = f16_add(vd_0, vs2); +}, +{ vd_0 = f32_add(vd_0, vs2); }, { diff --git a/riscv/insns/vfredsum_vs.h b/riscv/insns/vfredsum_vs.h index 7b5cccce..d18b63ed 100644 --- a/riscv/insns/vfredsum_vs.h +++ b/riscv/insns/vfredsum_vs.h @@ -1,6 +1,9 @@ // vfredsum: vd[0] = sum( vs2[*] , vs1[0] ) VI_VFP_VV_LOOP_REDUCTION ({ + vd_0 = f16_add(vd_0, vs2); +}, +{ vd_0 = f32_add(vd_0, vs2); }, { diff --git a/riscv/insns/vfrsub_vf.h b/riscv/insns/vfrsub_vf.h index d9a19863..7fb26a5b 100644 --- a/riscv/insns/vfrsub_vf.h +++ b/riscv/insns/vfrsub_vf.h @@ -1,6 +1,9 @@ // vfsub.vf vd, vs2, rs1 VI_VFP_VF_LOOP ({ + vd = f16_sub(rs1, vs2); +}, +{ vd = f32_sub(rs1, vs2); }, { diff --git a/riscv/insns/vfsgnj_vf.h b/riscv/insns/vfsgnj_vf.h index c7f731ba..ce06185e 100644 --- a/riscv/insns/vfsgnj_vf.h +++ b/riscv/insns/vfsgnj_vf.h @@ -1,6 +1,9 @@ // vfsgnj vd, vs2, vs1 VI_VFP_VF_LOOP ({ + vd = fsgnj16(vs2.v, rs1.v, false, false); +}, +{ vd = fsgnj32(vs2.v, rs1.v, false, false); }, { diff --git a/riscv/insns/vfsgnj_vv.h b/riscv/insns/vfsgnj_vv.h index 12d3d437..722cb29c 100644 --- a/riscv/insns/vfsgnj_vv.h +++ b/riscv/insns/vfsgnj_vv.h @@ -1,6 +1,9 @@ // vfsgnj VI_VFP_VV_LOOP ({ + vd = fsgnj16(vs2.v, vs1.v, false, false); +}, +{ vd = fsgnj32(vs2.v, vs1.v, false, false); }, { diff --git a/riscv/insns/vfsgnjn_vf.h b/riscv/insns/vfsgnjn_vf.h index 45117481..e4894124 100644 --- a/riscv/insns/vfsgnjn_vf.h +++ b/riscv/insns/vfsgnjn_vf.h @@ -1,6 +1,9 @@ // vfsgnn VI_VFP_VF_LOOP ({ + vd = fsgnj16(vs2.v, rs1.v, true, false); +}, +{ vd = fsgnj32(vs2.v, rs1.v, true, false); }, { diff --git a/riscv/insns/vfsgnjn_vv.h b/riscv/insns/vfsgnjn_vv.h index a16acf7a..1d91f691 100644 --- a/riscv/insns/vfsgnjn_vv.h +++ b/riscv/insns/vfsgnjn_vv.h @@ -1,6 +1,9 @@ // vfsgnn VI_VFP_VV_LOOP ({ + vd = fsgnj16(vs2.v, vs1.v, true, false); +}, +{ vd = fsgnj32(vs2.v, vs1.v, true, false); }, { diff --git a/riscv/insns/vfsgnjx_vf.h b/riscv/insns/vfsgnjx_vf.h index c4230600..7be164c7 100644 --- a/riscv/insns/vfsgnjx_vf.h +++ b/riscv/insns/vfsgnjx_vf.h @@ -1,6 +1,9 @@ // vfsgnx VI_VFP_VF_LOOP ({ + vd = fsgnj16(vs2.v, rs1.v, false, true); +}, +{ vd = fsgnj32(vs2.v, rs1.v, false, true); }, { diff --git a/riscv/insns/vfsgnjx_vv.h b/riscv/insns/vfsgnjx_vv.h index 9dbe0780..b04b8454 100644 --- a/riscv/insns/vfsgnjx_vv.h +++ b/riscv/insns/vfsgnjx_vv.h @@ -1,6 +1,9 @@ // vfsgnx VI_VFP_VV_LOOP ({ + vd = fsgnj16(vs2.v, vs1.v, false, true); +}, +{ vd = fsgnj32(vs2.v, vs1.v, false, true); }, { diff --git a/riscv/insns/vfslide1down_vf.h b/riscv/insns/vfslide1down_vf.h index 43fedb0d..b2ae3457 100644 --- a/riscv/insns/vfslide1down_vf.h +++ b/riscv/insns/vfslide1down_vf.h @@ -4,6 +4,11 @@ VI_CHECK_SLIDE(false); VI_VFP_LOOP_BASE if (i != vl - 1) { switch (P.VU.vsew) { + case e16: { + VI_XI_SLIDEDOWN_PARAMS(e16, 1); + vd = vs2; + } + break; case e32: { VI_XI_SLIDEDOWN_PARAMS(e32, 1); vd = vs2; @@ -17,6 +22,9 @@ if (i != vl - 1) { } } else { switch (P.VU.vsew) { + case e16: + P.VU.elt(rd_num, vl - 1) = f16(FRS1); + break; case e32: P.VU.elt(rd_num, vl - 1, true) = f32(FRS1); break; diff --git a/riscv/insns/vfslide1up_vf.h b/riscv/insns/vfslide1up_vf.h index e0174d65..7012fc1e 100644 --- a/riscv/insns/vfslide1up_vf.h +++ b/riscv/insns/vfslide1up_vf.h @@ -4,6 +4,11 @@ VI_CHECK_SLIDE(true); VI_VFP_LOOP_BASE if (i != 0) { switch (P.VU.vsew) { + case e16: { + VI_XI_SLIDEUP_PARAMS(e16, 1); + vd = vs2; + } + break; case e32: { VI_XI_SLIDEUP_PARAMS(e32, 1); vd = vs2; @@ -17,6 +22,9 @@ if (i != 0) { } } else { switch (P.VU.vsew) { + case e16: + P.VU.elt(rd_num, 0) = f16(FRS1); + break; case e32: P.VU.elt(rd_num, 0, true) = f32(FRS1); break; diff --git a/riscv/insns/vfsqrt_v.h b/riscv/insns/vfsqrt_v.h index 4a36932e..f1213088 100644 --- a/riscv/insns/vfsqrt_v.h +++ b/riscv/insns/vfsqrt_v.h @@ -1,6 +1,9 @@ // vsqrt.v vd, vd2, vm VI_VFP_VV_LOOP ({ + vd = f16_sqrt(vs2); +}, +{ vd = f32_sqrt(vs2); }, { diff --git a/riscv/insns/vfsub_vf.h b/riscv/insns/vfsub_vf.h index a4702d04..fc6877ca 100644 --- a/riscv/insns/vfsub_vf.h +++ b/riscv/insns/vfsub_vf.h @@ -1,6 +1,9 @@ // vfsub.vf vd, vs2, rs1 VI_VFP_VF_LOOP ({ + vd = f16_sub(vs2, rs1); +}, +{ vd = f32_sub(vs2, rs1); }, { diff --git a/riscv/insns/vfsub_vv.h b/riscv/insns/vfsub_vv.h index 40545fb6..b0403f11 100644 --- a/riscv/insns/vfsub_vv.h +++ b/riscv/insns/vfsub_vv.h @@ -1,6 +1,9 @@ // vfsub.vv vd, vs2, vs1 VI_VFP_VV_LOOP ({ + vd = f16_sub(vs2, vs1); +}, +{ vd = f32_sub(vs2, vs1); }, { diff --git a/riscv/insns/vfwadd_vf.h b/riscv/insns/vfwadd_vf.h index ecac2029..b8249001 100644 --- a/riscv/insns/vfwadd_vf.h +++ b/riscv/insns/vfwadd_vf.h @@ -1,5 +1,8 @@ // vfwadd.vf vd, vs2, rs1 VI_VFP_VF_LOOP_WIDE ({ + vd = f32_add(vs2, rs1); +}, +{ vd = f64_add(vs2, rs1); }) diff --git a/riscv/insns/vfwadd_vv.h b/riscv/insns/vfwadd_vv.h index 0665cdcd..7255a50e 100644 --- a/riscv/insns/vfwadd_vv.h +++ b/riscv/insns/vfwadd_vv.h @@ -1,5 +1,8 @@ // vfwadd.vv vd, vs2, vs1 VI_VFP_VV_LOOP_WIDE ({ + vd = f32_add(vs2, vs1); +}, +{ vd = f64_add(vs2, vs1); }) diff --git a/riscv/insns/vfwadd_wf.h b/riscv/insns/vfwadd_wf.h index eb38d0db..021b17f0 100644 --- a/riscv/insns/vfwadd_wf.h +++ b/riscv/insns/vfwadd_wf.h @@ -1,5 +1,8 @@ // vfwadd.wf vd, vs2, vs1 VI_VFP_WF_LOOP_WIDE ({ + vd = f32_add(vs2, rs1); +}, +{ vd = f64_add(vs2, rs1); }) diff --git a/riscv/insns/vfwadd_wv.h b/riscv/insns/vfwadd_wv.h index 675ef228..c1ed0389 100644 --- a/riscv/insns/vfwadd_wv.h +++ b/riscv/insns/vfwadd_wv.h @@ -1,5 +1,8 @@ // vfwadd.wv vd, vs2, vs1 VI_VFP_WV_LOOP_WIDE ({ + vd = f32_add(vs2, vs1); +}, +{ vd = f64_add(vs2, vs1); }) diff --git a/riscv/insns/vfwcvt_f_f_v.h b/riscv/insns/vfwcvt_f_f_v.h index 424f0f41..abb68a42 100644 --- a/riscv/insns/vfwcvt_f_f_v.h +++ b/riscv/insns/vfwcvt_f_f_v.h @@ -1,10 +1,10 @@ // vfwcvt.f.f.v vd, vs2, vm -VI_CHECK_DSS(false); -if (P.VU.vsew == e32) - require(p->supports_extension('D')); - -VI_VFP_LOOP_BASE +VI_VFP_CVT_SCALE +({ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f16_to_f32(vs2); +}, +{ auto vs2 = P.VU.elt(rs2_num, i); P.VU.elt(rd_num, i, true) = f32_to_f64(vs2); - set_fp_exceptions; -VI_VFP_LOOP_WIDE_END +}, true) diff --git a/riscv/insns/vfwcvt_f_x_v.h b/riscv/insns/vfwcvt_f_x_v.h index 1c05ab75..62cd8e8f 100644 --- a/riscv/insns/vfwcvt_f_x_v.h +++ b/riscv/insns/vfwcvt_f_x_v.h @@ -1,10 +1,10 @@ // vfwcvt.f.x.v vd, vs2, vm -VI_CHECK_DSS(false); -if (P.VU.vsew == e32) - require(p->supports_extension('D')); - -VI_VFP_LOOP_BASE +VI_VFP_CVT_SCALE +({ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = i32_to_f32(vs2); +}, +{ auto vs2 = P.VU.elt(rs2_num, i); P.VU.elt(rd_num, i, true) = i32_to_f64(vs2); - set_fp_exceptions; -VI_VFP_LOOP_WIDE_END +}, true) diff --git a/riscv/insns/vfwcvt_f_xu_v.h b/riscv/insns/vfwcvt_f_xu_v.h index fcb8c0c2..36a81edb 100644 --- a/riscv/insns/vfwcvt_f_xu_v.h +++ b/riscv/insns/vfwcvt_f_xu_v.h @@ -1,10 +1,10 @@ // vfwcvt.f.xu.v vd, vs2, vm -VI_CHECK_DSS(false); -if (P.VU.vsew == e32) - require(p->supports_extension('D')); - -VI_VFP_LOOP_BASE +VI_VFP_CVT_SCALE +({ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = ui32_to_f32(vs2); +}, +{ auto vs2 = P.VU.elt(rs2_num, i); P.VU.elt(rd_num, i, true) = ui32_to_f64(vs2); - set_fp_exceptions; -VI_VFP_LOOP_WIDE_END +}, true) diff --git a/riscv/insns/vfwcvt_rtz_x_f_v.h b/riscv/insns/vfwcvt_rtz_x_f_v.h index afbe939f..ad3a90d4 100644 --- a/riscv/insns/vfwcvt_rtz_x_f_v.h +++ b/riscv/insns/vfwcvt_rtz_x_f_v.h @@ -1,10 +1,10 @@ -// vfwcvt.x.f.v vd, vs2, vm -VI_CHECK_DSS(false); -if (P.VU.vsew == e32) - require(p->supports_extension('D')); - -VI_VFP_LOOP_BASE +// vfwcvt.rtz.x.f.v vd, vs2, vm +VI_VFP_CVT_SCALE +({ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f16_to_i32(vs2, softfloat_round_minMag, true); +}, +{ auto vs2 = P.VU.elt(rs2_num, i); P.VU.elt(rd_num, i, true) = f32_to_i64(vs2, softfloat_round_minMag, true); - set_fp_exceptions; -VI_VFP_LOOP_WIDE_END +}, true) diff --git a/riscv/insns/vfwcvt_rtz_xu_f_v.h b/riscv/insns/vfwcvt_rtz_xu_f_v.h index e3e78fff..297008f8 100644 --- a/riscv/insns/vfwcvt_rtz_xu_f_v.h +++ b/riscv/insns/vfwcvt_rtz_xu_f_v.h @@ -1,10 +1,10 @@ -// vfwcvt.xu.f.v vd, vs2, vm -VI_CHECK_DSS(false); -if (P.VU.vsew == e32) - require(p->supports_extension('D')); - -VI_VFP_LOOP_BASE +// vfwcvt.rtz,xu.f.v vd, vs2, vm +VI_VFP_CVT_SCALE +({ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f16_to_ui32(vs2, softfloat_round_minMag, true); +}, +{ auto vs2 = P.VU.elt(rs2_num, i); P.VU.elt(rd_num, i, true) = f32_to_ui64(vs2, softfloat_round_minMag, true); - set_fp_exceptions; -VI_VFP_LOOP_WIDE_END +}, true) diff --git a/riscv/insns/vfwcvt_x_f_v.h b/riscv/insns/vfwcvt_x_f_v.h index 3df8256b..9b798939 100644 --- a/riscv/insns/vfwcvt_x_f_v.h +++ b/riscv/insns/vfwcvt_x_f_v.h @@ -1,10 +1,10 @@ // vfwcvt.x.f.v vd, vs2, vm -VI_CHECK_DSS(false); -if (P.VU.vsew == e32) - require(p->supports_extension('D')); - -VI_VFP_LOOP_BASE +VI_VFP_CVT_SCALE +({ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f16_to_i32(vs2, STATE.frm, true); +}, +{ auto vs2 = P.VU.elt(rs2_num, i); P.VU.elt(rd_num, i, true) = f32_to_i64(vs2, STATE.frm, true); - set_fp_exceptions; -VI_VFP_LOOP_WIDE_END +}, true) diff --git a/riscv/insns/vfwcvt_xu_f_v.h b/riscv/insns/vfwcvt_xu_f_v.h index 6e39b7dc..bfe16ff0 100644 --- a/riscv/insns/vfwcvt_xu_f_v.h +++ b/riscv/insns/vfwcvt_xu_f_v.h @@ -1,10 +1,10 @@ // vfwcvt.xu.f.v vd, vs2, vm -VI_CHECK_DSS(false); -if (P.VU.vsew == e32) - require(p->supports_extension('D')); - -VI_VFP_LOOP_BASE +VI_VFP_CVT_SCALE +({ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f16_to_ui32(vs2, STATE.frm, true); +}, +{ auto vs2 = P.VU.elt(rs2_num, i); P.VU.elt(rd_num, i, true) = f32_to_ui64(vs2, STATE.frm, true); - set_fp_exceptions; -VI_VFP_LOOP_WIDE_END +}, true) diff --git a/riscv/insns/vfwmacc_vf.h b/riscv/insns/vfwmacc_vf.h index 6ee011e7..441fa0a7 100644 --- a/riscv/insns/vfwmacc_vf.h +++ b/riscv/insns/vfwmacc_vf.h @@ -1,5 +1,8 @@ // vfwmacc.vf vd, vs2, rs1 VI_VFP_VF_LOOP_WIDE ({ + vd = f32_mulAdd(rs1, vs2, vd); +}, +{ vd = f64_mulAdd(rs1, vs2, vd); }) diff --git a/riscv/insns/vfwmacc_vv.h b/riscv/insns/vfwmacc_vv.h index 99839afc..a654198b 100644 --- a/riscv/insns/vfwmacc_vv.h +++ b/riscv/insns/vfwmacc_vv.h @@ -1,5 +1,8 @@ // vfwmacc.vv vd, vs2, vs1 VI_VFP_VV_LOOP_WIDE ({ + vd = f32_mulAdd(vs1, vs2, vd); +}, +{ vd = f64_mulAdd(vs1, vs2, vd); }) diff --git a/riscv/insns/vfwmsac_vf.h b/riscv/insns/vfwmsac_vf.h index ea8f0500..18010ff4 100644 --- a/riscv/insns/vfwmsac_vf.h +++ b/riscv/insns/vfwmsac_vf.h @@ -1,5 +1,8 @@ // vfwmsac.vf vd, vs2, rs1 VI_VFP_VF_LOOP_WIDE ({ + vd = f32_mulAdd(rs1, vs2, f32(vd.v ^ F32_SIGN)); +}, +{ vd = f64_mulAdd(rs1, vs2, f64(vd.v ^ F64_SIGN)); }) diff --git a/riscv/insns/vfwmsac_vv.h b/riscv/insns/vfwmsac_vv.h index 81571706..9dc4073f 100644 --- a/riscv/insns/vfwmsac_vv.h +++ b/riscv/insns/vfwmsac_vv.h @@ -1,5 +1,8 @@ // vfwmsac.vv vd, vs2, vs1 VI_VFP_VV_LOOP_WIDE ({ + vd = f32_mulAdd(vs1, vs2, f32(vd.v ^ F32_SIGN)); +}, +{ vd = f64_mulAdd(vs1, vs2, f64(vd.v ^ F64_SIGN)); }) diff --git a/riscv/insns/vfwmul_vf.h b/riscv/insns/vfwmul_vf.h index 884e66fe..2bb543f6 100644 --- a/riscv/insns/vfwmul_vf.h +++ b/riscv/insns/vfwmul_vf.h @@ -1,5 +1,8 @@ // vfwmul.vf vd, vs2, rs1 VI_VFP_VF_LOOP_WIDE ({ + vd = f32_mul(vs2, rs1); +}, +{ vd = f64_mul(vs2, rs1); }) diff --git a/riscv/insns/vfwmul_vv.h b/riscv/insns/vfwmul_vv.h index f8e717e9..2ce38e62 100644 --- a/riscv/insns/vfwmul_vv.h +++ b/riscv/insns/vfwmul_vv.h @@ -1,5 +1,8 @@ // vfwmul.vv vd, vs2, vs1 VI_VFP_VV_LOOP_WIDE ({ + vd = f32_mul(vs2, vs1); +}, +{ vd = f64_mul(vs2, vs1); }) diff --git a/riscv/insns/vfwnmacc_vf.h b/riscv/insns/vfwnmacc_vf.h index bccc24ff..038bda08 100644 --- a/riscv/insns/vfwnmacc_vf.h +++ b/riscv/insns/vfwnmacc_vf.h @@ -1,5 +1,8 @@ // vfwnmacc.vf vd, vs2, rs1 VI_VFP_VF_LOOP_WIDE ({ + vd = f32_mulAdd(f32(rs1.v ^ F32_SIGN), vs2, f32(vd.v ^ F32_SIGN)); +}, +{ vd = f64_mulAdd(f64(rs1.v ^ F64_SIGN), vs2, f64(vd.v ^ F64_SIGN)); }) diff --git a/riscv/insns/vfwnmacc_vv.h b/riscv/insns/vfwnmacc_vv.h index 3dcba1d7..bf863e04 100644 --- a/riscv/insns/vfwnmacc_vv.h +++ b/riscv/insns/vfwnmacc_vv.h @@ -1,5 +1,8 @@ // vfwnmacc.vv vd, vs2, vs1 VI_VFP_VV_LOOP_WIDE ({ + vd = f32_mulAdd(f32(vs1.v ^ F32_SIGN), vs2, f32(vd.v ^ F32_SIGN)); +}, +{ vd = f64_mulAdd(f64(vs1.v ^ F64_SIGN), vs2, f64(vd.v ^ F64_SIGN)); }) diff --git a/riscv/insns/vfwnmsac_vf.h b/riscv/insns/vfwnmsac_vf.h index 32ef6241..1e288e1b 100644 --- a/riscv/insns/vfwnmsac_vf.h +++ b/riscv/insns/vfwnmsac_vf.h @@ -1,5 +1,8 @@ // vfwnmacc.vf vd, vs2, rs1 VI_VFP_VF_LOOP_WIDE ({ + vd = f32_mulAdd(f32(rs1.v ^ F32_SIGN), vs2, vd); +}, +{ vd = f64_mulAdd(f64(rs1.v ^ F64_SIGN), vs2, vd); }) diff --git a/riscv/insns/vfwnmsac_vv.h b/riscv/insns/vfwnmsac_vv.h index d2447e1a..ce97749e 100644 --- a/riscv/insns/vfwnmsac_vv.h +++ b/riscv/insns/vfwnmsac_vv.h @@ -1,5 +1,8 @@ // vfwnmsac.vv vd, vs2, vs1 VI_VFP_VV_LOOP_WIDE ({ + vd = f32_mulAdd(f32(vs1.v ^ F32_SIGN), vs2, vd); +}, +{ vd = f64_mulAdd(f64(vs1.v ^ F64_SIGN), vs2, vd); }) diff --git a/riscv/insns/vfwredosum_vs.h b/riscv/insns/vfwredosum_vs.h index 49c9ebfa..22fb4dfb 100644 --- a/riscv/insns/vfwredosum_vs.h +++ b/riscv/insns/vfwredosum_vs.h @@ -1,8 +1,8 @@ // vfwredosum.vs vd, vs2, vs1 -require_vector; -require(P.VU.vsew * 2 <= P.VU.ELEN); -require((insn.rs2() & (P.VU.vlmul - 1)) == 0); VI_VFP_VV_LOOP_WIDE_REDUCTION ({ + vd_0 = f32_add(vd_0, vs2); +}, +{ vd_0 = f64_add(vd_0, vs2); }) diff --git a/riscv/insns/vfwredsum_vs.h b/riscv/insns/vfwredsum_vs.h index 3426ef85..277cf417 100644 --- a/riscv/insns/vfwredsum_vs.h +++ b/riscv/insns/vfwredsum_vs.h @@ -1,8 +1,8 @@ // vfwredsum.vs vd, vs2, vs1 -require_vector; -require(P.VU.vsew * 2 <= P.VU.ELEN); -require((insn.rs2() & (P.VU.vlmul - 1)) == 0); VI_VFP_VV_LOOP_WIDE_REDUCTION ({ + vd_0 = f32_add(vd_0, vs2); +}, +{ vd_0 = f64_add(vd_0, vs2); }) diff --git a/riscv/insns/vfwsub_vf.h b/riscv/insns/vfwsub_vf.h index 1d20c389..8c376884 100644 --- a/riscv/insns/vfwsub_vf.h +++ b/riscv/insns/vfwsub_vf.h @@ -1,5 +1,8 @@ // vfwsub.vf vd, vs2, rs1 VI_VFP_VF_LOOP_WIDE ({ + vd = f32_sub(vs2, rs1); +}, +{ vd = f64_sub(vs2, rs1); }) diff --git a/riscv/insns/vfwsub_vv.h b/riscv/insns/vfwsub_vv.h index 0a72feae..ce08e36a 100644 --- a/riscv/insns/vfwsub_vv.h +++ b/riscv/insns/vfwsub_vv.h @@ -1,5 +1,8 @@ // vfwsub.vv vd, vs2, vs1 VI_VFP_VV_LOOP_WIDE ({ + vd = f32_sub(vs2, vs1); +}, +{ vd = f64_sub(vs2, vs1); }) diff --git a/riscv/insns/vfwsub_wf.h b/riscv/insns/vfwsub_wf.h index fa3d7470..f6f47ca5 100644 --- a/riscv/insns/vfwsub_wf.h +++ b/riscv/insns/vfwsub_wf.h @@ -1,5 +1,8 @@ // vfwsub.wf vd, vs2, rs1 VI_VFP_WF_LOOP_WIDE ({ + vd = f32_sub(vs2, rs1); +}, +{ vd = f64_sub(vs2, rs1); }) diff --git a/riscv/insns/vfwsub_wv.h b/riscv/insns/vfwsub_wv.h index 4c6fcf60..eef904dc 100644 --- a/riscv/insns/vfwsub_wv.h +++ b/riscv/insns/vfwsub_wv.h @@ -1,5 +1,8 @@ // vfwsub.wv vd, vs2, vs1 VI_VFP_WV_LOOP_WIDE ({ + vd = f32_sub(vs2, vs1); +}, +{ vd = f64_sub(vs2, vs1); }) diff --git a/riscv/insns/vmfeq_vf.h b/riscv/insns/vmfeq_vf.h index 766f0ab3..040f2b0b 100644 --- a/riscv/insns/vmfeq_vf.h +++ b/riscv/insns/vmfeq_vf.h @@ -1,6 +1,9 @@ // vmfeq.vf vd, vs2, fs1 VI_VFP_LOOP_CMP ({ + res = f16_eq(vs2, rs1); +}, +{ res = f32_eq(vs2, rs1); }, { diff --git a/riscv/insns/vmfeq_vv.h b/riscv/insns/vmfeq_vv.h index 19117fc7..fb24d132 100644 --- a/riscv/insns/vmfeq_vv.h +++ b/riscv/insns/vmfeq_vv.h @@ -1,6 +1,9 @@ // vmfeq.vv vd, vs2, vs1 VI_VFP_LOOP_CMP ({ + res = f16_eq(vs2, vs1); +}, +{ res = f32_eq(vs2, vs1); }, { diff --git a/riscv/insns/vmfge_vf.h b/riscv/insns/vmfge_vf.h index c5f4c831..9e69855b 100644 --- a/riscv/insns/vmfge_vf.h +++ b/riscv/insns/vmfge_vf.h @@ -1,6 +1,9 @@ // vmfge.vf vd, vs2, rs1 VI_VFP_LOOP_CMP ({ + res = f16_le(rs1, vs2); +}, +{ res = f32_le(rs1, vs2); }, { diff --git a/riscv/insns/vmfgt_vf.h b/riscv/insns/vmfgt_vf.h index 53873002..bd5d99b7 100644 --- a/riscv/insns/vmfgt_vf.h +++ b/riscv/insns/vmfgt_vf.h @@ -1,6 +1,9 @@ // vmfgt.vf vd, vs2, rs1 VI_VFP_LOOP_CMP ({ + res = f16_lt(rs1, vs2); +}, +{ res = f32_lt(rs1, vs2); }, { diff --git a/riscv/insns/vmfle_vf.h b/riscv/insns/vmfle_vf.h index 1a3a7c4a..3d2852fc 100644 --- a/riscv/insns/vmfle_vf.h +++ b/riscv/insns/vmfle_vf.h @@ -1,6 +1,9 @@ // vmfle.vf vd, vs2, rs1 VI_VFP_LOOP_CMP ({ + res = f16_le(vs2, rs1); +}, +{ res = f32_le(vs2, rs1); }, { diff --git a/riscv/insns/vmfle_vv.h b/riscv/insns/vmfle_vv.h index 067f1a96..203ef210 100644 --- a/riscv/insns/vmfle_vv.h +++ b/riscv/insns/vmfle_vv.h @@ -1,6 +1,9 @@ // vmfle.vv vd, vs2, rs1 VI_VFP_LOOP_CMP ({ + res = f16_le(vs2, vs1); +}, +{ res = f32_le(vs2, vs1); }, { diff --git a/riscv/insns/vmflt_vf.h b/riscv/insns/vmflt_vf.h index 248071d8..4780adc5 100644 --- a/riscv/insns/vmflt_vf.h +++ b/riscv/insns/vmflt_vf.h @@ -1,6 +1,9 @@ // vmflt.vf vd, vs2, rs1 VI_VFP_LOOP_CMP ({ + res = f16_lt(vs2, rs1); +}, +{ res = f32_lt(vs2, rs1); }, { diff --git a/riscv/insns/vmflt_vv.h b/riscv/insns/vmflt_vv.h index 71895df6..cdfc3fae 100644 --- a/riscv/insns/vmflt_vv.h +++ b/riscv/insns/vmflt_vv.h @@ -1,6 +1,9 @@ // vmflt.vv vd, vs2, vs1 VI_VFP_LOOP_CMP ({ + res = f16_lt(vs2, vs1); +}, +{ res = f32_lt(vs2, vs1); }, { diff --git a/riscv/insns/vmfne_vf.h b/riscv/insns/vmfne_vf.h index afccbcb3..84016993 100644 --- a/riscv/insns/vmfne_vf.h +++ b/riscv/insns/vmfne_vf.h @@ -1,6 +1,9 @@ // vmfne.vf vd, vs2, rs1 VI_VFP_LOOP_CMP ({ + res = !f16_eq(vs2, rs1); +}, +{ res = !f32_eq(vs2, rs1); }, { diff --git a/riscv/insns/vmfne_vv.h b/riscv/insns/vmfne_vv.h index d5df60c8..50dfa9c0 100644 --- a/riscv/insns/vmfne_vv.h +++ b/riscv/insns/vmfne_vv.h @@ -1,6 +1,9 @@ // vmfne.vv vd, vs2, rs1 VI_VFP_LOOP_CMP ({ + res = !f16_eq(vs2, vs1); +}, +{ res = !f32_eq(vs2, vs1); }, { diff --git a/riscv/processor.cc b/riscv/processor.cc index 4240fcd6..bf3c40a1 100644 --- a/riscv/processor.cc +++ b/riscv/processor.cc @@ -273,6 +273,9 @@ void processor_t::parse_isa_string(const char* str) if (!supports_extension('I')) bad_isa_string(str, "'I' extension is required"); + if (supports_extension(EXT_ZFH) && !supports_extension('F')) + bad_isa_string(str, "'Zfh' extension requires 'F'"); + if (supports_extension('D') && !supports_extension('F')) bad_isa_string(str, "'D' extension requires 'F'"); diff --git a/softfloat/f16_classify.c b/softfloat/f16_classify.c new file mode 100755 index 00000000..9402ff13 --- /dev/null +++ b/softfloat/f16_classify.c @@ -0,0 +1,36 @@ + +#include +#include +#include "platform.h" +#include "internals.h" +#include "specialize.h" +#include "softfloat.h" + +uint_fast16_t f16_classify( float16_t a ) +{ + union ui16_f16 uA; + uint_fast16_t uiA; + + uA.f = a; + uiA = uA.ui; + + uint_fast16_t infOrNaN = expF16UI( uiA ) == 0x1F; + uint_fast16_t subnormalOrZero = expF16UI( uiA ) == 0; + bool sign = signF16UI( uiA ); + bool fracZero = fracF16UI( uiA ) == 0; + bool isNaN = isNaNF16UI( uiA ); + bool isSNaN = softfloat_isSigNaNF16UI( uiA ); + + return + ( sign && infOrNaN && fracZero ) << 0 | + ( sign && !infOrNaN && !subnormalOrZero ) << 1 | + ( sign && subnormalOrZero && !fracZero ) << 2 | + ( sign && subnormalOrZero && fracZero ) << 3 | + ( !sign && infOrNaN && fracZero ) << 7 | + ( !sign && !infOrNaN && !subnormalOrZero ) << 6 | + ( !sign && subnormalOrZero && !fracZero ) << 5 | + ( !sign && subnormalOrZero && fracZero ) << 4 | + ( isNaN && isSNaN ) << 8 | + ( isNaN && !isSNaN ) << 9; +} + diff --git a/softfloat/f16_to_i16.c b/softfloat/f16_to_i16.c new file mode 100644 index 00000000..0ec7ce14 --- /dev/null +++ b/softfloat/f16_to_i16.c @@ -0,0 +1,55 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include +#include "specialize.h" +#include "softfloat.h" + +int_fast16_t f16_to_i16( float16_t a, uint_fast8_t roundingMode, bool exact ) +{ + int_fast32_t sig32 = f16_to_i32(a, roundingMode, exact); + + if (sig32 > INT16_MAX) { + softfloat_exceptionFlags |= softfloat_flag_invalid; + return i16_fromPosOverflow; + } else if (sig32 < INT16_MIN) { + softfloat_exceptionFlags |= softfloat_flag_invalid; + return i16_fromNegOverflow; + } else { + return sig32; + } +} + diff --git a/softfloat/f16_to_ui16.c b/softfloat/f16_to_ui16.c new file mode 100644 index 00000000..818328a1 --- /dev/null +++ b/softfloat/f16_to_ui16.c @@ -0,0 +1,52 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include +#include "specialize.h" +#include "softfloat.h" + +uint_fast16_t f16_to_ui16( float16_t a, uint_fast8_t roundingMode, bool exact ) +{ + uint_fast32_t sig32 = f16_to_ui32(a, roundingMode, exact); + + if (sig32 > UINT16_MAX) { + softfloat_exceptionFlags |= softfloat_flag_invalid; + return ui16_fromPosOverflow; + } else { + return sig32; + } +} + diff --git a/softfloat/f32_to_i16.c b/softfloat/f32_to_i16.c new file mode 100644 index 00000000..14ebc6b4 --- /dev/null +++ b/softfloat/f32_to_i16.c @@ -0,0 +1,55 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include +#include "specialize.h" +#include "softfloat.h" + +int_fast16_t f32_to_i16( float32_t a, uint_fast8_t roundingMode, bool exact ) +{ + int_fast32_t sig32 = f32_to_i32(a, roundingMode, exact); + + if (sig32 > INT16_MAX) { + softfloat_exceptionFlags |= softfloat_flag_invalid; + return i16_fromPosOverflow; + } else if (sig32 < INT16_MIN) { + softfloat_exceptionFlags |= softfloat_flag_invalid; + return i16_fromNegOverflow; + } else { + return sig32; + } +} + diff --git a/softfloat/f32_to_ui16.c b/softfloat/f32_to_ui16.c new file mode 100644 index 00000000..a8f458da --- /dev/null +++ b/softfloat/f32_to_ui16.c @@ -0,0 +1,51 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include +#include "specialize.h" +#include "softfloat.h" + +uint_fast16_t f32_to_ui16( float32_t a, uint_fast8_t roundingMode, bool exact ) +{ + uint_fast32_t sig32 = f32_to_ui32(a, roundingMode, exact); + + if (sig32 > UINT16_MAX) { + softfloat_exceptionFlags |= softfloat_flag_invalid; + return ui16_fromPosOverflow; + } else { + return sig32; + } +} diff --git a/softfloat/softfloat.h b/softfloat/softfloat.h index ddc39e34..5579c68a 100644 --- a/softfloat/softfloat.h +++ b/softfloat/softfloat.h @@ -141,8 +141,10 @@ void i64_to_f128M( int64_t, float128_t * ); /*---------------------------------------------------------------------------- | 16-bit (half-precision) floating-point operations. *----------------------------------------------------------------------------*/ +uint_fast16_t f16_to_ui16( float16_t, uint_fast8_t, bool ); uint_fast32_t f16_to_ui32( float16_t, uint_fast8_t, bool ); uint_fast64_t f16_to_ui64( float16_t, uint_fast8_t, bool ); +int_fast16_t f16_to_i16( float16_t, uint_fast8_t, bool ); int_fast32_t f16_to_i32( float16_t, uint_fast8_t, bool ); int_fast64_t f16_to_i64( float16_t, uint_fast8_t, bool ); uint_fast32_t f16_to_ui32_r_minMag( float16_t, bool ); @@ -174,12 +176,15 @@ bool f16_eq_signaling( float16_t, float16_t ); bool f16_le_quiet( float16_t, float16_t ); bool f16_lt_quiet( float16_t, float16_t ); bool f16_isSignalingNaN( float16_t ); +uint_fast16_t f16_classify( float16_t ); /*---------------------------------------------------------------------------- | 32-bit (single-precision) floating-point operations. *----------------------------------------------------------------------------*/ +uint_fast16_t f32_to_ui16( float32_t, uint_fast8_t, bool ); uint_fast32_t f32_to_ui32( float32_t, uint_fast8_t, bool ); uint_fast64_t f32_to_ui64( float32_t, uint_fast8_t, bool ); +int_fast16_t f32_to_i16( float32_t, uint_fast8_t, bool ); int_fast32_t f32_to_i32( float32_t, uint_fast8_t, bool ); int_fast64_t f32_to_i64( float32_t, uint_fast8_t, bool ); uint_fast32_t f32_to_ui32_r_minMag( float32_t, bool ); diff --git a/softfloat/softfloat.mk.in b/softfloat/softfloat.mk.in index 52ee1dd0..56fc5602 100644 --- a/softfloat/softfloat.mk.in +++ b/softfloat/softfloat.mk.in @@ -38,6 +38,7 @@ softfloat_c_srcs = \ f128_to_ui64.c \ f128_to_ui64_r_minMag.c \ f16_add.c \ + f16_classify.c \ f16_div.c \ f16_eq.c \ f16_eq_signaling.c \ @@ -55,10 +56,12 @@ softfloat_c_srcs = \ f16_to_f128.c \ f16_to_f32.c \ f16_to_f64.c \ + f16_to_i16.c \ f16_to_i32.c \ f16_to_i32_r_minMag.c \ f16_to_i64.c \ f16_to_i64_r_minMag.c \ + f16_to_ui16.c \ f16_to_ui32.c \ f16_to_ui32_r_minMag.c \ f16_to_ui64.c \ @@ -82,10 +85,12 @@ softfloat_c_srcs = \ f32_to_f128.c \ f32_to_f16.c \ f32_to_f64.c \ + f32_to_i16.c \ f32_to_i32.c \ f32_to_i32_r_minMag.c \ f32_to_i64.c \ f32_to_i64_r_minMag.c \ + f32_to_ui16.c \ f32_to_ui32.c \ f32_to_ui32_r_minMag.c \ f32_to_ui64.c \ diff --git a/softfloat/specialize.h b/softfloat/specialize.h index 629d5185..8bd98570 100644 --- a/softfloat/specialize.h +++ b/softfloat/specialize.h @@ -55,6 +55,13 @@ extern "C" { | The values to return on conversions to 32-bit integer formats that raise an | invalid exception. *----------------------------------------------------------------------------*/ +#define ui16_fromPosOverflow 0xFFFF +#define ui16_fromNegOverflow 0 +#define ui16_fromNaN 0xFFFF +#define i16_fromPosOverflow 0x7FFF +#define i16_fromNegOverflow (-0x7FFF - 1) +#define i16_fromNaN 0x7FFF + #define ui32_fromPosOverflow 0xFFFFFFFF #define ui32_fromNegOverflow 0 #define ui32_fromNaN 0xFFFFFFFF diff --git a/spike_main/disasm.cc b/spike_main/disasm.cc index d8c9eb42..fe6be509 100644 --- a/spike_main/disasm.cc +++ b/spike_main/disasm.cc @@ -990,10 +990,10 @@ disassembler_t::disassembler_t(int xlen) match_##name##cvt_f_x_##suf, mask_##name##cvt_f_x_##suf, \ {&vd, &vs2, &opt, &vm})); \ add_insn(new disasm_insn_t(#name "cvt.rtz.xu.f." #suf, \ - match_##name##cvt_xu_f_##suf, mask_##name##cvt_xu_f_##suf, \ + match_##name##cvt_rtz_xu_f_##suf, mask_##name##cvt_rtz_xu_f_##suf, \ {&vd, &vs2, &opt, &vm})); \ add_insn(new disasm_insn_t(#name "cvt.rtz.x.f." #suf, \ - match_##name##cvt_x_f_##suf, mask_##name##cvt_x_f_##suf, \ + match_##name##cvt_rtz_x_f_##suf, mask_##name##cvt_rtz_x_f_##suf, \ {&vd, &vs2, &opt, &vm})); \ //OPFVV/OPFVF