Browse Source

rvv: add floating-point instructions

based on v-spec 0.7.1, support
  sections: 14/15.3 ~ 15.4
  element size: 32

Signed-off-by: Bruce Hoult <bruce@hoult.org>
Signed-off-by: Chih-Min Chao <chihmin.chao@sifive.com>
Signed-off-by: Dave Wen <dave.wen@sifive.com>
pull/303/head
Chih-Min Chao 7 years ago
parent
commit
833b965679
  1. 265
      riscv/decode.h
  2. 5
      riscv/insns/vfadd_vf.h
  3. 5
      riscv/insns/vfadd_vv.h
  4. 5
      riscv/insns/vfclass_v.h
  5. 6
      riscv/insns/vfcvt_f_x_v.h
  6. 6
      riscv/insns/vfcvt_f_xu_v.h
  7. 5
      riscv/insns/vfcvt_x_f_v.h
  8. 5
      riscv/insns/vfcvt_xu_f_v.h
  9. 5
      riscv/insns/vfdiv_vf.h
  10. 5
      riscv/insns/vfdiv_vv.h
  11. 5
      riscv/insns/vfdot_vv.h
  12. 5
      riscv/insns/vfmacc_vf.h
  13. 5
      riscv/insns/vfmacc_vv.h
  14. 5
      riscv/insns/vfmadd_vf.h
  15. 5
      riscv/insns/vfmadd_vv.h
  16. 5
      riscv/insns/vfmax_vf.h
  17. 5
      riscv/insns/vfmax_vv.h
  18. 25
      riscv/insns/vfmerge_vfm.h
  19. 5
      riscv/insns/vfmin_vf.h
  20. 5
      riscv/insns/vfmin_vv.h
  21. 5
      riscv/insns/vfmsac_vf.h
  22. 5
      riscv/insns/vfmsac_vv.h
  23. 5
      riscv/insns/vfmsub_vf.h
  24. 5
      riscv/insns/vfmsub_vv.h
  25. 5
      riscv/insns/vfmul_vf.h
  26. 5
      riscv/insns/vfmul_vv.h
  27. 33
      riscv/insns/vfmv_f_s.h
  28. 29
      riscv/insns/vfmv_s_f.h
  29. 20
      riscv/insns/vfmv_v_f.h
  30. 6
      riscv/insns/vfncvt_f_f_v.h
  31. 6
      riscv/insns/vfncvt_f_x_v.h
  32. 6
      riscv/insns/vfncvt_f_xu_v.h
  33. 6
      riscv/insns/vfncvt_x_f_v.h
  34. 6
      riscv/insns/vfncvt_xu_f_v.h
  35. 5
      riscv/insns/vfnmacc_vf.h
  36. 5
      riscv/insns/vfnmacc_vv.h
  37. 5
      riscv/insns/vfnmadd_vf.h
  38. 5
      riscv/insns/vfnmadd_vv.h
  39. 5
      riscv/insns/vfnmsac_vf.h
  40. 5
      riscv/insns/vfnmsac_vv.h
  41. 5
      riscv/insns/vfnmsub_vf.h
  42. 5
      riscv/insns/vfnmsub_vv.h
  43. 5
      riscv/insns/vfrdiv_vf.h
  44. 5
      riscv/insns/vfredmax_vs.h
  45. 5
      riscv/insns/vfredmin_vs.h
  46. 5
      riscv/insns/vfredosum_vs.h
  47. 5
      riscv/insns/vfredsum_vs.h
  48. 5
      riscv/insns/vfrsub_vf.h
  49. 5
      riscv/insns/vfsgnj_vf.h
  50. 5
      riscv/insns/vfsgnj_vv.h
  51. 5
      riscv/insns/vfsgnjn_vf.h
  52. 5
      riscv/insns/vfsgnjn_vv.h
  53. 5
      riscv/insns/vfsgnjx_vf.h
  54. 5
      riscv/insns/vfsgnjx_vv.h
  55. 5
      riscv/insns/vfsqrt_v.h
  56. 5
      riscv/insns/vfsub_vf.h
  57. 5
      riscv/insns/vfsub_vv.h
  58. 5
      riscv/insns/vfwadd_vf.h
  59. 5
      riscv/insns/vfwadd_vv.h
  60. 5
      riscv/insns/vfwadd_wf.h
  61. 5
      riscv/insns/vfwadd_wv.h
  62. 7
      riscv/insns/vfwcvt_f_f_v.h
  63. 7
      riscv/insns/vfwcvt_f_x_v.h
  64. 7
      riscv/insns/vfwcvt_f_xu_v.h
  65. 7
      riscv/insns/vfwcvt_x_f_v.h
  66. 7
      riscv/insns/vfwcvt_xu_f_v.h
  67. 5
      riscv/insns/vfwmacc_vf.h
  68. 5
      riscv/insns/vfwmacc_vv.h
  69. 5
      riscv/insns/vfwmsac_vf.h
  70. 5
      riscv/insns/vfwmsac_vv.h
  71. 5
      riscv/insns/vfwmul_vf.h
  72. 5
      riscv/insns/vfwmul_vv.h
  73. 5
      riscv/insns/vfwnmacc_vf.h
  74. 5
      riscv/insns/vfwnmacc_vv.h
  75. 5
      riscv/insns/vfwnmsac_vf.h
  76. 5
      riscv/insns/vfwnmsac_vv.h
  77. 5
      riscv/insns/vfwredosum_vs.h
  78. 5
      riscv/insns/vfwredsum_vs.h
  79. 5
      riscv/insns/vfwsub_vf.h
  80. 5
      riscv/insns/vfwsub_vv.h
  81. 5
      riscv/insns/vfwsub_wf.h
  82. 5
      riscv/insns/vfwsub_wv.h
  83. 96
      riscv/riscv.mk.in

265
riscv/decode.h

@ -148,6 +148,7 @@ private:
#define MMU (*p->get_mmu())
#define STATE (*p->get_state())
#define P (*p)
#define FLEN (p->get_flen())
#define READ_REG(reg) STATE.XPR[reg]
#define READ_FREG(reg) STATE.FPR[reg]
#define RD READ_REG(insn.rd())
@ -314,6 +315,24 @@ inline long double to_f(float128_t f){long double r; memcpy(&r, &f, sizeof(r));
#define vsext(x, sew) (((sreg_t)(x) << (64-sew)) >> (64-sew))
#define vzext(x, sew) (((reg_t)(x) << (64-sew)) >> (64-sew))
#define DEBUG_RVV 0
#if DEBUG_RVV
#define DEBUG_RVV_FP_VV \
printf("vfp(%lu) vd=%f vs1=%f vs2=%f\n", i, to_f(vd), to_f(vs1), to_f(vs2));
#define DEBUG_RVV_FP_VF \
printf("vfp(%lu) vd=%f vs1=%f vs2=%f\n", i, to_f(vd), to_f(rs1), to_f(vs2));
#define DEBUG_RVV_FMA_VV \
printf("vfma(%lu) vd=%f vs1=%f vs2=%f vd_old=%f\n", i, to_f(vd), to_f(vs1), to_f(vs2), to_f(vd_old));
#define DEBUG_RVV_FMA_VF \
printf("vfma(%lu) vd=%f vs1=%f vs2=%f vd_old=%f\n", i, to_f(vd), to_f(rs1), to_f(vs2), to_f(vd_old));
#else
#define DEBUG_RVV_FP_VV 0
#define DEBUG_RVV_FP_VF 0
#define DEBUG_RVV_FMA_VV 0
#define DEBUG_RVV_FMA_VF 0
#endif
//
// vector: masking skip helper
//
@ -1480,8 +1499,252 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \
} \
p->VU.vstart = 0;
//
// vector: vfp helper
//
#define VI_VFP_COMMON \
require_extension('F'); \
require_fp; \
require(P.VU.vsew == 32); \
require(!P.VU.vill);\
reg_t vl = P.VU.vl; \
reg_t rd_num = insn.rd(); \
reg_t rs1_num = insn.rs1(); \
reg_t rs2_num = insn.rs2(); \
softfloat_roundingMode = STATE.frm;
#define VI_VFP_LOOP_BASE \
VI_VFP_COMMON \
for (reg_t i=P.VU.vstart; i<vl; ++i){ \
VI_LOOP_ELEMENT_SKIP();
#define VI_VFP_LOOP_CMP_BASE \
VI_VFP_COMMON \
for (reg_t i = P.VU.vstart; i < vl; ++i) { \
float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \
float32_t vs1 = P.VU.elt<float32_t>(rs1_num, i); \
float32_t rs1 = f32(READ_FREG(rs1_num)); \
VI_LOOP_ELEMENT_SKIP(); \
uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos); \
uint64_t &vdi = P.VU.elt<uint64_t>(rd_num, midx); \
uint64_t res = 0;
#define VI_VFP_LOOP_REDUCTION_BASE \
VI_VFP_COMMON \
float32_t vd_0 = P.VU.elt<float32_t>(rd_num, 0); \
float32_t vs1_0 = P.VU.elt<float32_t>(rs1_num, 0); \
vd_0 = vs1_0;\
for (reg_t i=P.VU.vstart; i<vl; ++i){ \
VI_LOOP_ELEMENT_SKIP(); \
int32_t &vd = P.VU.elt<int32_t>(rd_num, i); \
#define VI_VFP_LOOP_WIDE_REDUCTION_BASE \
VI_VFP_COMMON \
float64_t vd_0 = f64(P.VU.elt<float64_t>(rs1_num, 0).v); \
for (reg_t i=P.VU.vstart; i<vl; ++i) { \
VI_LOOP_ELEMENT_SKIP();
#define VI_VFP_LOOP_END \
} \
if (vl != 0 && vl < P.VU.vlmax && TAIL_ZEROING){ \
uint8_t *tail = &P.VU.elt<uint8_t>(rd_num, vl * ((P.VU.vsew >> 3) * 1)); \
memset(tail, 0, (P.VU.vlmax - vl) * ((P.VU.vsew >> 3) * 1)); \
}\
P.VU.vstart = 0; \
#define VI_VFP_LOOP_WIDE_END \
} \
if (vl != 0 && vl < P.VU.vlmax && TAIL_ZEROING){ \
uint8_t *tail = &P.VU.elt<uint8_t>(rd_num, vl * ((P.VU.vsew >> 3) * 2)); \
memset(tail, 0, (P.VU.vlmax - vl) * ((P.VU.vsew >> 3) * 2)); \
}\
P.VU.vstart = 0; \
set_fp_exceptions;
#define VI_VFP_LOOP_REDUCTION_END(x) \
} \
P.VU.vstart = 0; \
set_fp_exceptions; \
if (vl > 0 && TAIL_ZEROING) { \
P.VU.elt<type_sew_t<x>::type>(rd_num, 0) = vd_0.v; \
for (reg_t i = 1; i < (P.VU.VLEN / x); ++i) { \
P.VU.elt<type_sew_t<x>::type>(rd_num, i) = 0; \
} \
}
#define VI_VFP_LOOP_CMP_END \
switch(P.VU.vsew) { \
case e32: { \
vdi = (vdi & ~mmask) | (((res) << mpos) & mmask); \
break; \
} \
case e16: \
case e8: \
default: \
require(0); \
break; \
}; \
} \
if (vl != 0 && TAIL_ZEROING){ \
for (reg_t i=vl; i<P.VU.vlmax; ++i){ \
const int mlen = P.VU.vmlen; \
const int midx = (mlen * i) / 64; \
const int mpos = (mlen * i) % 64; \
uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos); \
uint64_t &vdi = P.VU.elt<uint64_t>(insn.rd(), midx); \
vdi = (vdi & ~mmask);\
}\
}\
P.VU.vstart = 0; \
set_fp_exceptions;
#define VI_VFP_VV_LOOP(BODY) \
VI_VFP_LOOP_BASE \
switch(P.VU.vsew) { \
case e32: {\
float32_t &vd = P.VU.elt<float32_t>(rd_num, i); \
float32_t vs1 = P.VU.elt<float32_t>(rs1_num, i); \
float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \
BODY; \
set_fp_exceptions; \
break; \
}\
case e16: \
case e8: \
default: \
require(0); \
break; \
}; \
DEBUG_RVV_FP_VV; \
VI_VFP_LOOP_END
#define VI_VFP_VV_LOOP_REDUCTION(BODY) \
VI_VFP_LOOP_REDUCTION_BASE \
float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \
BODY; \
DEBUG_RVV_FP_VV; \
VI_VFP_LOOP_REDUCTION_END(e32)
#define VI_VFP_VV_LOOP_WIDE_REDUCTION(BODY) \
VI_VFP_LOOP_WIDE_REDUCTION_BASE \
float64_t vs2 = f32_to_f64(P.VU.elt<float32_t>(rs2_num, i)); \
BODY; \
DEBUG_RVV_FP_VV; \
VI_VFP_LOOP_REDUCTION_END(e64)
#define VI_VFP_VF_LOOP(BODY) \
VI_VFP_LOOP_BASE \
switch(P.VU.vsew) { \
case e32: {\
float32_t &vd = P.VU.elt<float32_t>(rd_num, i); \
float32_t rs1 = f32(READ_FREG(rs1_num)); \
float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \
BODY; \
set_fp_exceptions; \
break; \
}\
case e16: \
case e8: \
default: \
require(0); \
break; \
}; \
DEBUG_RVV_FP_VF; \
VI_VFP_LOOP_END
#define VI_VFP_LOOP_CMP(BODY) \
VI_VFP_LOOP_CMP_BASE \
BODY; \
DEBUG_RVV_FP_VV; \
VI_VFP_LOOP_CMP_END \
#define VI_VFP_VF_LOOP_WIDE(BODY) \
VI_VFP_LOOP_BASE \
VI_CHECK_DSS(false); \
switch(P.VU.vsew) { \
case e32: {\
float64_t &vd = P.VU.elt<float64_t>(rd_num, i); \
float64_t vs2 = f32_to_f64(P.VU.elt<float32_t>(rs2_num, i)); \
float64_t rs1 = f32_to_f64(f32(READ_FREG(rs1_num))); \
BODY; \
set_fp_exceptions; \
break; \
}\
case e16: \
case e8: \
default: \
require(0); \
break; \
}; \
DEBUG_RVV_FP_VV; \
VI_VFP_LOOP_WIDE_END
#define VI_VFP_VV_LOOP_WIDE(BODY) \
VI_VFP_LOOP_BASE \
VI_CHECK_DSS(true); \
switch(P.VU.vsew) { \
case e32: {\
float64_t &vd = P.VU.elt<float64_t>(rd_num, i); \
float64_t vs2 = f32_to_f64(P.VU.elt<float32_t>(rs2_num, i)); \
float64_t vs1 = f32_to_f64(P.VU.elt<float32_t>(rs1_num, i)); \
BODY; \
set_fp_exceptions; \
break; \
}\
case e16: \
case e8: \
default: \
require(0); \
break; \
}; \
DEBUG_RVV_FP_VV; \
VI_VFP_LOOP_WIDE_END
#define VI_VFP_WF_LOOP_WIDE(BODY) \
VI_VFP_LOOP_BASE \
VI_CHECK_DDS(false); \
switch(P.VU.vsew) { \
case e32: {\
float64_t &vd = P.VU.elt<float64_t>(rd_num, i); \
float64_t vs2 = P.VU.elt<float64_t>(rs2_num, i); \
float64_t rs1 = f32_to_f64(f32(READ_FREG(rs1_num))); \
BODY; \
set_fp_exceptions; \
break; \
}\
case e16: \
case e8: \
default: \
require(0); \
}; \
DEBUG_RVV_FP_VV; \
VI_VFP_LOOP_WIDE_END
#define VI_VFP_WV_LOOP_WIDE(BODY) \
VI_VFP_LOOP_BASE \
VI_CHECK_DDS(true); \
switch(P.VU.vsew) { \
case e32: {\
float64_t &vd = P.VU.elt<float64_t>(rd_num, i); \
float64_t vs2 = P.VU.elt<float64_t>(rs2_num, i); \
float64_t vs1 = f32_to_f64(P.VU.elt<float32_t>(rs1_num, i)); \
BODY; \
set_fp_exceptions; \
break; \
}\
case e16: \
case e8: \
default: \
require(0); \
}; \
DEBUG_RVV_FP_VV; \
VI_VFP_LOOP_WIDE_END
// Seems that 0x0 doesn't work.
#define DEBUG_START 0x100
#define DEBUG_END (0x1000 - 1)
#define DEBUG_END (0x1000 - 1)
#endif

5
riscv/insns/vfadd_vf.h

@ -0,0 +1,5 @@
// vfadd.vf vd, vs2, rs1
VI_VFP_VF_LOOP
({
vd = f32_add(rs1, vs2);
})

5
riscv/insns/vfadd_vv.h

@ -0,0 +1,5 @@
// vfadd.vv vd, vs2, vs1
VI_VFP_VV_LOOP
({
vd = f32_add(vs1, vs2);
})

5
riscv/insns/vfclass_v.h

@ -0,0 +1,5 @@
// vfclass.v vd, vs2, vm
VI_VFP_VV_LOOP
({
vd.v = f32_classify(vs2);
})

6
riscv/insns/vfcvt_f_x_v.h

@ -0,0 +1,6 @@
// vfcvt.f.x.v vd, vd2, vm
VI_VFP_VV_LOOP
({
auto vs2_i = P.VU.elt<int32_t>(rs2_num, i);
vd = i32_to_f32(vs2_i);
})

6
riscv/insns/vfcvt_f_xu_v.h

@ -0,0 +1,6 @@
// vfcvt.f.xu.v vd, vd2, vm
VI_VFP_VV_LOOP
({
auto vs2_u = P.VU.elt<uint32_t>(rs2_num, i);
vd = ui32_to_f32(vs2_u);
})

5
riscv/insns/vfcvt_x_f_v.h

@ -0,0 +1,5 @@
// vfcvt.x.f.v vd, vd2, vm
VI_VFP_VV_LOOP
({
P.VU.elt<int32_t>(rd_num, i) = f32_to_i32(vs2, STATE.frm, true);
})

5
riscv/insns/vfcvt_xu_f_v.h

@ -0,0 +1,5 @@
// vfcvt.xu.f.v vd, vd2, vm
VI_VFP_VV_LOOP
({
P.VU.elt<uint32_t>(rd_num, i) = f32_to_ui32(vs2, STATE.frm, true);
})

5
riscv/insns/vfdiv_vf.h

@ -0,0 +1,5 @@
// vfdiv.vf vd, vs2, rs1
VI_VFP_VF_LOOP
({
vd = f32_div(vs2, rs1);
})

5
riscv/insns/vfdiv_vv.h

@ -0,0 +1,5 @@
// vfdiv.vv vd, vs2, vs1
VI_VFP_VV_LOOP
({
vd = f32_div(vs2, vs1);
})

5
riscv/insns/vfdot_vv.h

@ -0,0 +1,5 @@
// vfdot.vv vd, vs2, vs1
VI_VFP_VV_LOOP
({
vd = f32_add(vd, f32_mul(vs2, vs1));
})

5
riscv/insns/vfmacc_vf.h

@ -0,0 +1,5 @@
// vfmacc.vf vd, rs1, vs2, vm # vd[i] = +(vs2[i] * x[rs1]) + vd[i]
VI_VFP_VF_LOOP
({
vd = f32_mulAdd(rs1, vs2, vd);
})

5
riscv/insns/vfmacc_vv.h

@ -0,0 +1,5 @@
// vfmacc.vv vd, rs1, vs2, vm # vd[i] = +(vs2[i] * vs1[i]) + vd[i]
VI_VFP_VV_LOOP
({
vd = f32_mulAdd(vs1, vs2, vd);
})

5
riscv/insns/vfmadd_vf.h

@ -0,0 +1,5 @@
// vfmadd: vd[i] = +(vd[i] * f[rs1]) + vs2[i]
VI_VFP_VF_LOOP
({
vd = f32_mulAdd(vd, rs1, vs2);
})

5
riscv/insns/vfmadd_vv.h

@ -0,0 +1,5 @@
// vfmadd: vd[i] = +(vd[i] * vs1[i]) + vs2[i]
VI_VFP_VV_LOOP
({
vd = f32_mulAdd(vd, vs1, vs2);
})

5
riscv/insns/vfmax_vf.h

@ -0,0 +1,5 @@
// vfmax
VI_VFP_VF_LOOP
({
vd = f32_max(vs2, rs1);
})

5
riscv/insns/vfmax_vv.h

@ -0,0 +1,5 @@
// vfmax
VI_VFP_VV_LOOP
({
vd = f32_max(vs2, vs1);
})

25
riscv/insns/vfmerge_vfm.h

@ -0,0 +1,25 @@
// vfmerge_vf vd, vs2, vs1, vm
require_extension('F');
require_fp;
require(P.VU.vsew == 32);
require(!P.VU.vill);
reg_t vl = P.VU.vl;
reg_t sew = P.VU.vsew;
reg_t rd_num = insn.rd();
reg_t rs1_num = insn.rs1();
reg_t rs2_num = insn.rs2();
for (reg_t i=P.VU.vstart; i<vl; ++i) {
auto &vd = P.VU.elt<float32_t>(rd_num, i);
auto rs1 = f32(READ_FREG(rs1_num));
auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
int midx = (P.VU.vmlen * i) / 64;
int mpos = (P.VU.vmlen * i) % 64;
bool use_first = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
vd = use_first ? rs1 : vs2;
}
VI_TAIL_ZERO(1);
P.VU.vstart = 0;
set_fp_exceptions;

5
riscv/insns/vfmin_vf.h

@ -0,0 +1,5 @@
// vfmin vd, vs2, rs1
VI_VFP_VF_LOOP
({
vd = f32_min(vs2, rs1);
})

5
riscv/insns/vfmin_vv.h

@ -0,0 +1,5 @@
// vfmin vd, vs2, vs1
VI_VFP_VV_LOOP
({
vd = f32_min(vs2, vs1);
})

5
riscv/insns/vfmsac_vf.h

@ -0,0 +1,5 @@
// vfmsac: vd[i] = +(f[rs1] * vs2[i]) - vd[i]
VI_VFP_VF_LOOP
({
vd = f32_mulAdd(rs1, vs2, f32(vd.v ^ F32_SIGN));
})

5
riscv/insns/vfmsac_vv.h

@ -0,0 +1,5 @@
// vfmsac: vd[i] = +(vs1[i] * vs2[i]) - vd[i]
VI_VFP_VV_LOOP
({
vd = f32_mulAdd(vs1, vs2, f32(vd.v ^ F32_SIGN));
})

5
riscv/insns/vfmsub_vf.h

@ -0,0 +1,5 @@
// vfmsub: vd[i] = +(vd[i] * f[rs1]) - vs2[i]
VI_VFP_VF_LOOP
({
vd = f32_mulAdd(vd, rs1, f32(vs2.v ^ F32_SIGN));
})

5
riscv/insns/vfmsub_vv.h

@ -0,0 +1,5 @@
// vfmsub: vd[i] = +(vd[i] * vs1[i]) - vs2[i]
VI_VFP_VV_LOOP
({
vd = f32_mulAdd(vd, vs1, f32(vs2.v ^ F32_SIGN));
})

5
riscv/insns/vfmul_vf.h

@ -0,0 +1,5 @@
// vfmul.vf vd, vs2, rs1, vm
VI_VFP_VF_LOOP
({
vd = f32_mul(vs2, rs1);
})

5
riscv/insns/vfmul_vv.h

@ -0,0 +1,5 @@
// vfmul.vv vd, vs1, vs2, vm
VI_VFP_VV_LOOP
({
vd = f32_mul(vs1, vs2);
})

33
riscv/insns/vfmv_f_s.h

@ -0,0 +1,33 @@
// vfmv_f_s: rd = vs2[0] (rs1=0)
require(insn.v_vm() == 1);
require_fp;
require(P.VU.vsew == e8 || P.VU.vsew == e16 || P.VU.vsew == e32 || P.VU.vsew == e64);
reg_t rs2_num = insn.rs2();
uint64_t vs2_0 = 0;
const reg_t sew = P.VU.vsew;
switch(sew) {
case e8:
vs2_0 = P.VU.elt<uint8_t>(rs2_num, 0);
break;
case e16:
vs2_0 = P.VU.elt<uint16_t>(rs2_num, 0);
break;
case e32:
vs2_0 = P.VU.elt<uint32_t>(rs2_num, 0);
break;
default:
vs2_0 = P.VU.elt<uint64_t>(rs2_num, 0);
break;
}
// nan_extened
if (FLEN > sew) {
vs2_0 = vs2_0 | ~((1ul << sew) - 1);
}
if (FLEN == 64) {
WRITE_FRD(f64(vs2_0));
} else {
WRITE_FRD(f32(vs2_0));
}

29
riscv/insns/vfmv_s_f.h

@ -0,0 +1,29 @@
// vfmv_s_f: vd[0] = rs1 (vs2=0)
require(insn.v_vm() == 1);
require_fp;
require(P.VU.vsew == e32);
reg_t vl = P.VU.vl;
if (vl > 0) {
reg_t rd_num = insn.rd();
reg_t sew = P.VU.vsew;
if (FLEN == 64)
P.VU.elt<uint32_t>(rd_num, 0) = f64(FRS1).v;
else
P.VU.elt<uint32_t>(rd_num, 0) = f32(FRS1).v;
const reg_t max_len = P.VU.VLEN / sew;
for (reg_t i = 1; i < max_len; ++i) {
switch(sew) {
case e32:
P.VU.elt<uint32_t>(rd_num, i) = 0;
break;
default:
require(false);
break;
}
}
vl = 0;
}

20
riscv/insns/vfmv_v_f.h

@ -0,0 +1,20 @@
// vfmerge_vf vd, vs2, vs1, vm
require_extension('F');
require_fp;
require(P.VU.vsew == 32);
require(!P.VU.vill);
reg_t vl = P.VU.vl;
reg_t sew = P.VU.vsew;
reg_t rd_num = insn.rd();
reg_t rs1_num = insn.rs1();
reg_t rs2_num = insn.rs2();
for (reg_t i=P.VU.vstart; i<vl; ++i) {
auto &vd = P.VU.elt<float32_t>(rd_num, i);
auto rs1 = f32(READ_FREG(rs1_num));
vd = rs1;
}
VI_TAIL_ZERO(1);
P.VU.vstart = 0;
set_fp_exceptions;

6
riscv/insns/vfncvt_f_f_v.h

@ -0,0 +1,6 @@
// vfncvt.f.f.v vd, vs2, vm
VI_VFP_LOOP_BASE
VI_CHECK_SD;
auto vs2 = P.VU.elt<float64_t>(rs2_num, i);
P.VU.elt<float32_t>(rd_num, i) = f64_to_f32(vs2);
VI_VFP_LOOP_END

6
riscv/insns/vfncvt_f_x_v.h

@ -0,0 +1,6 @@
// vfncvt.f.x.v vd, vs2, vm
VI_VFP_LOOP_BASE
VI_CHECK_SD;
auto vs2 = P.VU.elt<int64_t>(rs2_num, i);
P.VU.elt<float32_t>(rd_num, i) = i64_to_f32(vs2);
VI_VFP_LOOP_END

6
riscv/insns/vfncvt_f_xu_v.h

@ -0,0 +1,6 @@
// vfncvt.f.xu.v vd, vs2, vm
VI_VFP_LOOP_BASE
VI_CHECK_SD;
auto vs2 = P.VU.elt<uint64_t>(rs2_num, i);
P.VU.elt<float32_t>(rd_num, i) = ui64_to_f32(vs2);
VI_VFP_LOOP_END

6
riscv/insns/vfncvt_x_f_v.h

@ -0,0 +1,6 @@
// vfncvt.x.f.v vd, vs2, vm
VI_VFP_LOOP_BASE
VI_CHECK_SD;
auto vs2 = P.VU.elt<float64_t>(rs2_num, i);
P.VU.elt<int32_t>(rd_num, i) = f64_to_i32(vs2, STATE.frm, true);
VI_VFP_LOOP_END

6
riscv/insns/vfncvt_xu_f_v.h

@ -0,0 +1,6 @@
// vfncvt.xu.f.v vd, vs2, vm
VI_VFP_LOOP_BASE
VI_CHECK_SD;
auto vs2 = P.VU.elt<float64_t>(rs2_num, i);
P.VU.elt<uint32_t>(rd_num, i) = f64_to_ui32(vs2, STATE.frm, true);
VI_VFP_LOOP_END

5
riscv/insns/vfnmacc_vf.h

@ -0,0 +1,5 @@
// vfnmacc: vd[i] = -(f[rs1] * vs2[i]) - vd[i]
VI_VFP_VF_LOOP
({
vd = f32_mulAdd(rs1, f32(vs2.v ^ F32_SIGN), f32(vd.v ^ F32_SIGN));
})

5
riscv/insns/vfnmacc_vv.h

@ -0,0 +1,5 @@
// vfnmacc: vd[i] = -(vs1[i] * vs2[i]) - vd[i]
VI_VFP_VV_LOOP
({
vd = f32_mulAdd(f32(vs2.v ^ F32_SIGN), vs1, f32(vd.v ^ F32_SIGN));
})

5
riscv/insns/vfnmadd_vf.h

@ -0,0 +1,5 @@
// vfnmadd: vd[i] = -(vd[i] * f[rs1]) - vs2[i]
VI_VFP_VF_LOOP
({
vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), rs1, f32(vs2.v ^ F32_SIGN));
})

5
riscv/insns/vfnmadd_vv.h

@ -0,0 +1,5 @@
// vfnmadd: vd[i] = -(vd[i] * vs1[i]) - vs2[i]
VI_VFP_VV_LOOP
({
vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), vs1, f32(vs2.v ^ F32_SIGN));
})

5
riscv/insns/vfnmsac_vf.h

@ -0,0 +1,5 @@
// vfnmsac: vd[i] = -(f[rs1] * vs2[i]) + vd[i]
VI_VFP_VF_LOOP
({
vd = f32_mulAdd(rs1, f32(vs2.v ^ F32_SIGN), vd);
})

5
riscv/insns/vfnmsac_vv.h

@ -0,0 +1,5 @@
// vfnmsac.vv vd, vs1, vs2, vm # vd[i] = -(vs2[i] * vs1[i]) + vd[i]
VI_VFP_VV_LOOP
({
vd = f32_mulAdd(f32(vs1.v ^ F32_SIGN), vs2, vd);
})

5
riscv/insns/vfnmsub_vf.h

@ -0,0 +1,5 @@
// vfnmsub: vd[i] = -(vd[i] * f[rs1]) + vs2[i]
VI_VFP_VF_LOOP
({
vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), rs1, vs2);
})

5
riscv/insns/vfnmsub_vv.h

@ -0,0 +1,5 @@
// vfnmsub: vd[i] = -(vd[i] * vs1[i]) + vs2[i]
VI_VFP_VV_LOOP
({
vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), vs1, vs2);
})

5
riscv/insns/vfrdiv_vf.h

@ -0,0 +1,5 @@
// vfrdiv.vf vd, vs2, rs1, vm # scalar-vector, vd[i] = f[rs1]/vs2[i]
VI_VFP_VF_LOOP
({
vd = f32_div(rs1, vs2);
})

5
riscv/insns/vfredmax_vs.h

@ -0,0 +1,5 @@
// vfredmax vd, vs2, vs1
VI_VFP_VV_LOOP_REDUCTION
({
vd_0 = f32_max(vd_0, vs2);
})

5
riscv/insns/vfredmin_vs.h

@ -0,0 +1,5 @@
// vfredmin vd, vs2, vs1
VI_VFP_VV_LOOP_REDUCTION
({
vd_0 = f32_min(vd_0, vs2);
})

5
riscv/insns/vfredosum_vs.h

@ -0,0 +1,5 @@
// vfredosum: vd[0] = sum( vs2[*] , vs1[0] )
VI_VFP_VV_LOOP_REDUCTION
({
vd_0 = f32_add(vd_0, vs2);
})

5
riscv/insns/vfredsum_vs.h

@ -0,0 +1,5 @@
// vfredsum: vd[0] = sum( vs2[*] , vs1[0] )
VI_VFP_VV_LOOP_REDUCTION
({
vd_0 = f32_add(vd_0, vs2);
})

5
riscv/insns/vfrsub_vf.h

@ -0,0 +1,5 @@
// vfsub.vf vd, vs2, rs1
VI_VFP_VF_LOOP
({
vd = f32_sub(rs1, vs2);
})

5
riscv/insns/vfsgnj_vf.h

@ -0,0 +1,5 @@
// vfsgnj vd, vs2, vs1
VI_VFP_VF_LOOP
({
vd = fsgnj32(rs1.v, vs2.v, false, false);
})

5
riscv/insns/vfsgnj_vv.h

@ -0,0 +1,5 @@
// vfsgnj
VI_VFP_VV_LOOP
({
vd = fsgnj32(vs1.v, vs2.v, false, false);
})

5
riscv/insns/vfsgnjn_vf.h

@ -0,0 +1,5 @@
// vfsgnn
VI_VFP_VF_LOOP
({
vd = fsgnj32(rs1.v, vs2.v, true, false);
})

5
riscv/insns/vfsgnjn_vv.h

@ -0,0 +1,5 @@
// vfsgnn
VI_VFP_VV_LOOP
({
vd = fsgnj32(vs1.v, vs2.v, true, false);
})

5
riscv/insns/vfsgnjx_vf.h

@ -0,0 +1,5 @@
// vfsgnx
VI_VFP_VF_LOOP
({
vd = fsgnj32(rs1.v, vs2.v, false, true);
})

5
riscv/insns/vfsgnjx_vv.h

@ -0,0 +1,5 @@
// vfsgnx
VI_VFP_VV_LOOP
({
vd = fsgnj32(vs1.v, vs2.v, false, true);
})

5
riscv/insns/vfsqrt_v.h

@ -0,0 +1,5 @@
// vsqrt.v vd, vd2, vm
VI_VFP_VV_LOOP
({
vd = f32_sqrt(vs2);
})

5
riscv/insns/vfsub_vf.h

@ -0,0 +1,5 @@
// vfsub.vf vd, vs2, rs1
VI_VFP_VF_LOOP
({
vd = f32_sub(vs2, rs1);
})

5
riscv/insns/vfsub_vv.h

@ -0,0 +1,5 @@
// vfsub.vv vd, vs2, vs1
VI_VFP_VV_LOOP
({
vd = f32_sub(vs2, vs1);
})

5
riscv/insns/vfwadd_vf.h

@ -0,0 +1,5 @@
// vfwadd.vf vd, vs2, rs1
VI_VFP_VF_LOOP_WIDE
({
vd = f64_add(vs2, rs1);
})

5
riscv/insns/vfwadd_vv.h

@ -0,0 +1,5 @@
// vfwadd.vv vd, vs2, vs1
VI_VFP_VV_LOOP_WIDE
({
vd = f64_add(vs2, vs1);
})

5
riscv/insns/vfwadd_wf.h

@ -0,0 +1,5 @@
// vfwadd.wf vd, vs2, vs1
VI_VFP_WF_LOOP_WIDE
({
vd = f64_add(vs2, rs1);
})

5
riscv/insns/vfwadd_wv.h

@ -0,0 +1,5 @@
// vfwadd.wv vd, vs2, vs1
VI_VFP_WV_LOOP_WIDE
({
vd = f64_add(vs2, vs1);
})

7
riscv/insns/vfwcvt_f_f_v.h

@ -0,0 +1,7 @@
// vfwcvt.f.f.v vd, vs2, vm
VI_VFP_LOOP_BASE
VI_CHECK_DSS(false);
auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
P.VU.elt<float64_t>(rd_num, i) = f32_to_f64(vs2);
set_fp_exceptions;
VI_VFP_LOOP_WIDE_END

7
riscv/insns/vfwcvt_f_x_v.h

@ -0,0 +1,7 @@
// vfwcvt.f.x.v vd, vs2, vm
VI_VFP_LOOP_BASE
VI_CHECK_DSS(false);
auto vs2 = P.VU.elt<int32_t>(rs2_num, i);
P.VU.elt<float64_t>(rd_num, i) = i32_to_f64(vs2);
set_fp_exceptions;
VI_VFP_LOOP_WIDE_END

7
riscv/insns/vfwcvt_f_xu_v.h

@ -0,0 +1,7 @@
// vfwcvt.f.xu.v vd, vs2, vm
VI_VFP_LOOP_BASE
VI_CHECK_DSS(false);
auto vs2 = P.VU.elt<uint32_t>(rs2_num, i);
P.VU.elt<float64_t>(rd_num, i) = ui32_to_f64(vs2);
set_fp_exceptions;
VI_VFP_LOOP_WIDE_END

7
riscv/insns/vfwcvt_x_f_v.h

@ -0,0 +1,7 @@
// vfwcvt.x.f.v vd, vs2, vm
VI_VFP_LOOP_BASE
VI_CHECK_DSS(false);
auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
P.VU.elt<int64_t>(rd_num, i) = f32_to_i64(vs2, STATE.frm, true);
set_fp_exceptions;
VI_VFP_LOOP_WIDE_END

7
riscv/insns/vfwcvt_xu_f_v.h

@ -0,0 +1,7 @@
// vfwcvt.xu.f.v vd, vs2, vm
VI_VFP_LOOP_BASE
VI_CHECK_DSS(false);
auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
P.VU.elt<uint64_t>(rd_num, i) = f32_to_ui64(vs2, STATE.frm, true);
set_fp_exceptions;
VI_VFP_LOOP_WIDE_END

5
riscv/insns/vfwmacc_vf.h

@ -0,0 +1,5 @@
// vfwmacc.vf vd, vs2, rs1
VI_VFP_VF_LOOP_WIDE
({
vd = f64_mulAdd(rs1, vs2, vd);
})

5
riscv/insns/vfwmacc_vv.h

@ -0,0 +1,5 @@
// vfwmacc.vv vd, vs2, vs1
VI_VFP_VV_LOOP_WIDE
({
vd = f64_mulAdd(vs1, vs2, vd);
})

5
riscv/insns/vfwmsac_vf.h

@ -0,0 +1,5 @@
// vfwmsac.vf vd, vs2, rs1
VI_VFP_VF_LOOP_WIDE
({
vd = f64_mulAdd(rs1, vs2, f64(vd.v ^ F64_SIGN));
})

5
riscv/insns/vfwmsac_vv.h

@ -0,0 +1,5 @@
// vfwmsac.vv vd, vs2, vs1
VI_VFP_VV_LOOP_WIDE
({
vd = f64_mulAdd(vs1, vs2, f64(vd.v ^ F64_SIGN));
})

5
riscv/insns/vfwmul_vf.h

@ -0,0 +1,5 @@
// vfwmul.vf vd, vs2, rs1
VI_VFP_VF_LOOP_WIDE
({
vd = f64_mul(vs2, rs1);
})

5
riscv/insns/vfwmul_vv.h

@ -0,0 +1,5 @@
// vfwmul.vv vd, vs2, vs1
VI_VFP_VV_LOOP_WIDE
({
vd = f64_mul(vs2, vs1);
})

5
riscv/insns/vfwnmacc_vf.h

@ -0,0 +1,5 @@
// vfwnmacc.vf vd, vs2, rs1
VI_VFP_VF_LOOP_WIDE
({
vd = f64_mulAdd(f64(rs1.v ^ F64_SIGN), vs2, f64(vd.v ^ F64_SIGN));
})

5
riscv/insns/vfwnmacc_vv.h

@ -0,0 +1,5 @@
// vfwnmacc.vv vd, vs2, vs1
VI_VFP_VV_LOOP_WIDE
({
vd = f64_mulAdd(f64(vs1.v ^ F64_SIGN), vs2, f64(vd.v ^ F64_SIGN));
})

5
riscv/insns/vfwnmsac_vf.h

@ -0,0 +1,5 @@
// vfwnmacc.vf vd, vs2, rs1
VI_VFP_VF_LOOP_WIDE
({
vd = f64_mulAdd(f64(rs1.v ^ F64_SIGN), vs2, vd);
})

5
riscv/insns/vfwnmsac_vv.h

@ -0,0 +1,5 @@
// vfwnmsac.vv vd, vs2, vs1
VI_VFP_VV_LOOP_WIDE
({
vd = f64_mulAdd(f64(vs1.v ^ F64_SIGN), vs2, vd);
})

5
riscv/insns/vfwredosum_vs.h

@ -0,0 +1,5 @@
// vfwredosum.vs vd, vs2, vs1
VI_VFP_VV_LOOP_WIDE_REDUCTION
({
vd_0 = f64_add(vd_0, vs2);
})

5
riscv/insns/vfwredsum_vs.h

@ -0,0 +1,5 @@
// vfwredsum.vs vd, vs2, vs1
VI_VFP_VV_LOOP_WIDE_REDUCTION
({
vd_0 = f64_add(vd_0, vs2);
})

5
riscv/insns/vfwsub_vf.h

@ -0,0 +1,5 @@
// vfwsub.vf vd, vs2, rs1
VI_VFP_VF_LOOP_WIDE
({
vd = f64_sub(vs2, rs1);
})

5
riscv/insns/vfwsub_vv.h

@ -0,0 +1,5 @@
// vfwsub.vv vd, vs2, vs1
VI_VFP_VV_LOOP_WIDE
({
vd = f64_sub(vs2, vs1);
})

5
riscv/insns/vfwsub_wf.h

@ -0,0 +1,5 @@
// vfwsub.wf vd, vs2, rs1
VI_VFP_WF_LOOP_WIDE
({
vd = f64_sub(vs2, rs1);
})

5
riscv/insns/vfwsub_wv.h

@ -0,0 +1,5 @@
// vfwsub.wv vd, vs2, vs1
VI_VFP_WV_LOOP_WIDE
({
vd = f64_sub(vs2, vs1);
})

96
riscv/riscv.mk.in

@ -496,6 +496,101 @@ riscv_insn_ext_v_alu_int = \
vxor_vv \
vxor_vx \
riscv_insn_ext_v_alu_fp = \
vfadd_vf \
vfadd_vv \
vfclass_v \
vfcvt_f_x_v \
vfcvt_f_xu_v \
vfcvt_x_f_v \
vfcvt_xu_f_v \
vfdiv_vf \
vfdiv_vv \
vfdot_vv \
vfmacc_vf \
vfmacc_vv \
vfmadd_vf \
vfmadd_vv \
vfmax_vf \
vfmax_vv \
vfmerge_vfm \
vfmin_vf \
vfmin_vv \
vfmsac_vf \
vfmsac_vv \
vfmsub_vf \
vfmsub_vv \
vfmul_vf \
vfmul_vv \
vfmv_f_s \
vfmv_s_f \
vfmv_v_f \
vfncvt_f_f_v \
vfncvt_f_x_v \
vfncvt_f_xu_v \
vfncvt_x_f_v \
vfncvt_xu_f_v \
vfnmacc_vf \
vfnmacc_vv \
vfnmadd_vf \
vfnmadd_vv \
vfnmsac_vf \
vfnmsac_vv \
vfnmsub_vf \
vfnmsub_vv \
vfrdiv_vf \
vfredmax_vs \
vfredmin_vs \
vfredosum_vs \
vfredsum_vs \
vfrsub_vf \
vfsgnj_vf \
vfsgnj_vv \
vfsgnjn_vf \
vfsgnjn_vv \
vfsgnjx_vf \
vfsgnjx_vv \
vfsqrt_v \
vfsub_vf \
vfsub_vv \
vfwadd_vf \
vfwadd_vv \
vfwadd_wf \
vfwadd_wv \
vfwcvt_f_f_v \
vfwcvt_f_x_v \
vfwcvt_f_xu_v \
vfwcvt_x_f_v \
vfwcvt_xu_f_v \
vfwmacc_vf \
vfwmacc_vv \
vfwmsac_vf \
vfwmsac_vv \
vfwmul_vf \
vfwmul_vv \
vfwnmacc_vf \
vfwnmacc_vv \
vfwnmsac_vf \
vfwnmsac_vv \
vfwredosum_vs \
vfwredsum_vs \
vfwsub_vf \
vfwsub_vv \
vfwsub_wf \
vfwsub_wv \
vmfeq_vf \
vmfeq_vv \
vmfge_vf \
vmfgt_vf \
vmfle_vf \
vmfle_vv \
vmflt_vf \
vmflt_vv \
vmfne_vf \
vmfne_vv \
vmford_vf \
vmford_vv \
riscv_insn_ext_v_ldst = \
vlb_v \
vlh_v \
@ -547,6 +642,7 @@ riscv_insn_ext_v_ctrl = \
vsetvl \
riscv_insn_ext_v = \
$(riscv_insn_ext_v_alu_fp) \
$(riscv_insn_ext_v_alu_int) \
$(riscv_insn_ext_v_ctrl) \
$(riscv_insn_ext_v_ldst) \

Loading…
Cancel
Save