Browse Source

Merge pull request #458 from chihminchao/rvv-fp16

Rvv fp16
pull/459/head
Andrew Waterman 6 years ago
committed by GitHub
parent
commit
b1de71f464
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 193
      riscv/decode.h
  2. 3
      riscv/insns/vfadd_vf.h
  3. 3
      riscv/insns/vfadd_vv.h
  4. 3
      riscv/insns/vfclass_v.h
  5. 4
      riscv/insns/vfcvt_f_x_v.h
  6. 4
      riscv/insns/vfcvt_f_xu_v.h
  7. 5
      riscv/insns/vfcvt_rtz_x_f_v.h
  8. 5
      riscv/insns/vfcvt_rtz_xu_f_v.h
  9. 3
      riscv/insns/vfcvt_x_f_v.h
  10. 3
      riscv/insns/vfcvt_xu_f_v.h
  11. 3
      riscv/insns/vfdiv_vf.h
  12. 3
      riscv/insns/vfdiv_vv.h
  13. 3
      riscv/insns/vfdot_vv.h
  14. 3
      riscv/insns/vfmacc_vf.h
  15. 3
      riscv/insns/vfmacc_vv.h
  16. 3
      riscv/insns/vfmadd_vf.h
  17. 3
      riscv/insns/vfmadd_vv.h
  18. 3
      riscv/insns/vfmax_vf.h
  19. 3
      riscv/insns/vfmax_vv.h
  20. 17
      riscv/insns/vfmerge_vfm.h
  21. 3
      riscv/insns/vfmin_vf.h
  22. 3
      riscv/insns/vfmin_vv.h
  23. 3
      riscv/insns/vfmsac_vf.h
  24. 3
      riscv/insns/vfmsac_vv.h
  25. 3
      riscv/insns/vfmsub_vf.h
  26. 3
      riscv/insns/vfmsub_vv.h
  27. 3
      riscv/insns/vfmul_vf.h
  28. 3
      riscv/insns/vfmul_vv.h
  29. 25
      riscv/insns/vfmv_f_s.h
  30. 16
      riscv/insns/vfmv_s_f.h
  31. 8
      riscv/insns/vfmv_v_f.h
  32. 15
      riscv/insns/vfncvt_f_f_w.h
  33. 14
      riscv/insns/vfncvt_f_x_w.h
  34. 14
      riscv/insns/vfncvt_f_xu_w.h
  35. 17
      riscv/insns/vfncvt_rod_f_f_w.h
  36. 16
      riscv/insns/vfncvt_rtz_x_f_w.h
  37. 16
      riscv/insns/vfncvt_rtz_xu_f_w.h
  38. 16
      riscv/insns/vfncvt_x_f_w.h
  39. 16
      riscv/insns/vfncvt_xu_f_w.h
  40. 3
      riscv/insns/vfnmacc_vf.h
  41. 3
      riscv/insns/vfnmacc_vv.h
  42. 3
      riscv/insns/vfnmadd_vf.h
  43. 3
      riscv/insns/vfnmadd_vv.h
  44. 3
      riscv/insns/vfnmsac_vf.h
  45. 3
      riscv/insns/vfnmsac_vv.h
  46. 3
      riscv/insns/vfnmsub_vf.h
  47. 3
      riscv/insns/vfnmsub_vv.h
  48. 3
      riscv/insns/vfrdiv_vf.h
  49. 3
      riscv/insns/vfredmax_vs.h
  50. 3
      riscv/insns/vfredmin_vs.h
  51. 3
      riscv/insns/vfredosum_vs.h
  52. 3
      riscv/insns/vfredsum_vs.h
  53. 3
      riscv/insns/vfrsub_vf.h
  54. 3
      riscv/insns/vfsgnj_vf.h
  55. 3
      riscv/insns/vfsgnj_vv.h
  56. 3
      riscv/insns/vfsgnjn_vf.h
  57. 3
      riscv/insns/vfsgnjn_vv.h
  58. 3
      riscv/insns/vfsgnjx_vf.h
  59. 3
      riscv/insns/vfsgnjx_vv.h
  60. 8
      riscv/insns/vfslide1down_vf.h
  61. 8
      riscv/insns/vfslide1up_vf.h
  62. 3
      riscv/insns/vfsqrt_v.h
  63. 3
      riscv/insns/vfsub_vf.h
  64. 3
      riscv/insns/vfsub_vv.h
  65. 3
      riscv/insns/vfwadd_vf.h
  66. 3
      riscv/insns/vfwadd_vv.h
  67. 3
      riscv/insns/vfwadd_wf.h
  68. 3
      riscv/insns/vfwadd_wv.h
  69. 14
      riscv/insns/vfwcvt_f_f_v.h
  70. 14
      riscv/insns/vfwcvt_f_x_v.h
  71. 14
      riscv/insns/vfwcvt_f_xu_v.h
  72. 16
      riscv/insns/vfwcvt_rtz_x_f_v.h
  73. 16
      riscv/insns/vfwcvt_rtz_xu_f_v.h
  74. 14
      riscv/insns/vfwcvt_x_f_v.h
  75. 14
      riscv/insns/vfwcvt_xu_f_v.h
  76. 3
      riscv/insns/vfwmacc_vf.h
  77. 3
      riscv/insns/vfwmacc_vv.h
  78. 3
      riscv/insns/vfwmsac_vf.h
  79. 3
      riscv/insns/vfwmsac_vv.h
  80. 3
      riscv/insns/vfwmul_vf.h
  81. 3
      riscv/insns/vfwmul_vv.h
  82. 3
      riscv/insns/vfwnmacc_vf.h
  83. 3
      riscv/insns/vfwnmacc_vv.h
  84. 3
      riscv/insns/vfwnmsac_vf.h
  85. 3
      riscv/insns/vfwnmsac_vv.h
  86. 6
      riscv/insns/vfwredosum_vs.h
  87. 6
      riscv/insns/vfwredsum_vs.h
  88. 3
      riscv/insns/vfwsub_vf.h
  89. 3
      riscv/insns/vfwsub_vv.h
  90. 3
      riscv/insns/vfwsub_wf.h
  91. 3
      riscv/insns/vfwsub_wv.h
  92. 3
      riscv/insns/vmfeq_vf.h
  93. 3
      riscv/insns/vmfeq_vv.h
  94. 3
      riscv/insns/vmfge_vf.h
  95. 3
      riscv/insns/vmfgt_vf.h
  96. 3
      riscv/insns/vmfle_vf.h
  97. 3
      riscv/insns/vmfle_vv.h
  98. 3
      riscv/insns/vmflt_vf.h
  99. 3
      riscv/insns/vmflt_vv.h
  100. 3
      riscv/insns/vmfne_vf.h

193
riscv/decode.h

@ -282,21 +282,29 @@ class wait_for_interrupt_t {};
#define invalid_pc(pc) ((pc) & 1)
/* Convenience wrappers to simplify softfloat code sequences */
#define isBoxedF16(r) (isBoxedF32(r) && ((uint64_t)((r.v[0] >> 16) + 1) == ((uint64_t)1 << 48)))
#define unboxF16(r) (isBoxedF16(r) ? (uint32_t)r.v[0] : defaultNaNF16UI)
#define isBoxedF32(r) (isBoxedF64(r) && ((uint32_t)((r.v[0] >> 32) + 1) == 0))
#define unboxF32(r) (isBoxedF32(r) ? (uint32_t)r.v[0] : defaultNaNF32UI)
#define isBoxedF64(r) ((r.v[1] + 1) == 0)
#define unboxF64(r) (isBoxedF64(r) ? r.v[0] : defaultNaNF64UI)
typedef float128_t freg_t;
inline float16_t f16(uint16_t v) { return { v }; }
inline float32_t f32(uint32_t v) { return { v }; }
inline float64_t f64(uint64_t v) { return { v }; }
inline float16_t f16(freg_t r) { return f16(unboxF16(r)); }
inline float32_t f32(freg_t r) { return f32(unboxF32(r)); }
inline float64_t f64(freg_t r) { return f64(unboxF64(r)); }
inline float128_t f128(freg_t r) { return r; }
inline freg_t freg(float16_t f) { return { ((uint64_t)-1 << 16) | f.v, (uint64_t)-1 }; }
inline freg_t freg(float32_t f) { return { ((uint64_t)-1 << 32) | f.v, (uint64_t)-1 }; }
inline freg_t freg(float64_t f) { return { f.v, (uint64_t)-1 }; }
inline freg_t freg(float128_t f) { return f; }
#define F16_SIGN ((uint16_t)1 << 15)
#define F32_SIGN ((uint32_t)1 << 31)
#define F64_SIGN ((uint64_t)1 << 63)
#define fsgnj16(a, b, n, x) \
f16((f16(a).v & ~F16_SIGN) | ((((x) ? f16(a).v : (n) ? F16_SIGN : 0) ^ f16(b).v) & F16_SIGN))
#define fsgnj32(a, b, n, x) \
f32((f32(a).v & ~F32_SIGN) | ((((x) ? f32(a).v : (n) ? F32_SIGN : 0) ^ f32(b).v) & F32_SIGN))
#define fsgnj64(a, b, n, x) \
@ -1654,7 +1662,8 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \
//
#define VI_VFP_COMMON \
require_fp; \
require((P.VU.vsew == e32 && p->supports_extension('F')) || \
require((P.VU.vsew == e16 && p->supports_extension(EXT_ZFH)) || \
(P.VU.vsew == e32 && p->supports_extension('F')) || \
(P.VU.vsew == e64 && p->supports_extension('D'))); \
require_vector;\
reg_t vl = P.VU.vl; \
@ -1698,11 +1707,6 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \
} \
P.VU.vstart = 0; \
#define VI_VFP_LOOP_WIDE_END \
} \
P.VU.vstart = 0; \
set_fp_exceptions;
#define VI_VFP_LOOP_REDUCTION_END(x) \
} \
P.VU.vstart = 0; \
@ -1712,24 +1716,31 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \
#define VI_VFP_LOOP_CMP_END \
switch(P.VU.vsew) { \
case e16: \
case e32: \
case e64: { \
vdi = (vdi & ~mmask) | (((res) << mpos) & mmask); \
break; \
} \
case e16: \
default: \
require(0); \
break; \
}; \
} \
P.VU.vstart = 0; \
set_fp_exceptions;
P.VU.vstart = 0;
#define VI_VFP_VV_LOOP(BODY32, BODY64) \
#define VI_VFP_VV_LOOP(BODY16, BODY32, BODY64) \
VI_CHECK_SSS(true); \
VI_VFP_LOOP_BASE \
switch(P.VU.vsew) { \
case e16: {\
float16_t &vd = P.VU.elt<float16_t>(rd_num, i, true); \
float16_t vs1 = P.VU.elt<float16_t>(rs1_num, i); \
float16_t vs2 = P.VU.elt<float16_t>(rs2_num, i); \
BODY16; \
set_fp_exceptions; \
break; \
}\
case e32: {\
float32_t &vd = P.VU.elt<float32_t>(rd_num, i, true); \
float32_t vs1 = P.VU.elt<float32_t>(rs1_num, i); \
@ -1746,7 +1757,6 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \
set_fp_exceptions; \
break; \
}\
case e16: \
default: \
require(0); \
break; \
@ -1754,10 +1764,17 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \
DEBUG_RVV_FP_VV; \
VI_VFP_LOOP_END
#define VI_VFP_VV_LOOP_REDUCTION(BODY32, BODY64) \
#define VI_VFP_VV_LOOP_REDUCTION(BODY16, BODY32, BODY64) \
VI_CHECK_REDUCTION(false) \
VI_VFP_COMMON \
switch(P.VU.vsew) { \
case e16: {\
VI_VFP_LOOP_REDUCTION_BASE(16) \
BODY16; \
set_fp_exceptions; \
VI_VFP_LOOP_REDUCTION_END(e16) \
break; \
}\
case e32: {\
VI_VFP_LOOP_REDUCTION_BASE(32) \
BODY32; \
@ -1772,24 +1789,54 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \
VI_VFP_LOOP_REDUCTION_END(e64) \
break; \
}\
case e16: \
default: \
require(0); \
break; \
}; \
#define VI_VFP_VV_LOOP_WIDE_REDUCTION(BODY) \
VI_VFP_LOOP_WIDE_REDUCTION_BASE \
float64_t vs2 = f32_to_f64(P.VU.elt<float32_t>(rs2_num, i)); \
BODY; \
set_fp_exceptions; \
DEBUG_RVV_FP_VV; \
VI_VFP_LOOP_REDUCTION_END(e64)
#define VI_VFP_VV_LOOP_WIDE_REDUCTION(BODY16, BODY32) \
VI_CHECK_REDUCTION(true) \
VI_VFP_COMMON \
require((P.VU.vsew == e16 && p->supports_extension('F')) || \
(P.VU.vsew == e32 && p->supports_extension('D'))); \
switch(P.VU.vsew) { \
case e16: {\
float32_t vd_0 = P.VU.elt<float32_t>(rs1_num, 0); \
for (reg_t i=P.VU.vstart; i<vl; ++i) { \
VI_LOOP_ELEMENT_SKIP(); \
float32_t vs2 = f16_to_f32(P.VU.elt<float16_t>(rs2_num, i)); \
BODY16; \
set_fp_exceptions; \
VI_VFP_LOOP_REDUCTION_END(e32) \
break; \
}\
case e32: {\
float64_t vd_0 = P.VU.elt<float64_t>(rs1_num, 0); \
for (reg_t i=P.VU.vstart; i<vl; ++i) { \
VI_LOOP_ELEMENT_SKIP(); \
float64_t vs2 = f32_to_f64(P.VU.elt<float32_t>(rs2_num, i)); \
BODY32; \
set_fp_exceptions; \
VI_VFP_LOOP_REDUCTION_END(e64) \
break; \
}\
default: \
require(0); \
break; \
}; \
#define VI_VFP_VF_LOOP(BODY32, BODY64) \
#define VI_VFP_VF_LOOP(BODY16, BODY32, BODY64) \
VI_CHECK_SSS(false); \
VI_VFP_LOOP_BASE \
switch(P.VU.vsew) { \
case e16: {\
float16_t &vd = P.VU.elt<float16_t>(rd_num, i, true); \
float16_t rs1 = f16(READ_FREG(rs1_num)); \
float16_t vs2 = P.VU.elt<float16_t>(rs2_num, i); \
BODY16; \
set_fp_exceptions; \
break; \
}\
case e32: {\
float32_t &vd = P.VU.elt<float32_t>(rd_num, i, true); \
float32_t rs1 = f32(READ_FREG(rs1_num)); \
@ -1806,8 +1853,6 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \
set_fp_exceptions; \
break; \
}\
case e16: \
case e8: \
default: \
require(0); \
break; \
@ -1815,10 +1860,18 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \
DEBUG_RVV_FP_VF; \
VI_VFP_LOOP_END
#define VI_VFP_LOOP_CMP(BODY32, BODY64, is_vs1) \
#define VI_VFP_LOOP_CMP(BODY16, BODY32, BODY64, is_vs1) \
VI_CHECK_MSS(is_vs1); \
VI_VFP_LOOP_CMP_BASE \
switch(P.VU.vsew) { \
case e16: {\
float16_t vs2 = P.VU.elt<float16_t>(rs2_num, i); \
float16_t vs1 = P.VU.elt<float16_t>(rs1_num, i); \
float16_t rs1 = f16(READ_FREG(rs1_num)); \
BODY16; \
set_fp_exceptions; \
break; \
}\
case e32: {\
float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \
float32_t vs1 = P.VU.elt<float32_t>(rs1_num, i); \
@ -1835,96 +1888,146 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \
set_fp_exceptions; \
break; \
}\
case e16: \
default: \
require(0); \
break; \
}; \
VI_VFP_LOOP_CMP_END \
#define VI_VFP_VF_LOOP_WIDE(BODY) \
#define VI_VFP_VF_LOOP_WIDE(BODY16, BODY32) \
VI_CHECK_DSS(false); \
VI_VFP_LOOP_BASE \
switch(P.VU.vsew) { \
case e16: { \
float32_t &vd = P.VU.elt<float32_t>(rd_num, i, true); \
float32_t vs2 = f16_to_f32(P.VU.elt<float16_t>(rs2_num, i)); \
float32_t rs1 = f16_to_f32(f16(READ_FREG(rs1_num))); \
BODY16; \
set_fp_exceptions; \
break; \
} \
case e32: {\
float64_t &vd = P.VU.elt<float64_t>(rd_num, i, true); \
float64_t vs2 = f32_to_f64(P.VU.elt<float32_t>(rs2_num, i)); \
float64_t rs1 = f32_to_f64(f32(READ_FREG(rs1_num))); \
BODY; \
BODY32; \
set_fp_exceptions; \
break; \
}\
case e16: \
case e8: \
default: \
require(0); \
break; \
}; \
DEBUG_RVV_FP_VV; \
VI_VFP_LOOP_WIDE_END
VI_VFP_LOOP_END
#define VI_VFP_VV_LOOP_WIDE(BODY) \
#define VI_VFP_VV_LOOP_WIDE(BODY16, BODY32) \
VI_CHECK_DSS(true); \
VI_VFP_LOOP_BASE \
switch(P.VU.vsew) { \
case e16: {\
float32_t &vd = P.VU.elt<float32_t>(rd_num, i, true); \
float32_t vs2 = f16_to_f32(P.VU.elt<float16_t>(rs2_num, i)); \
float32_t vs1 = f16_to_f32(P.VU.elt<float16_t>(rs1_num, i)); \
BODY16; \
set_fp_exceptions; \
break; \
}\
case e32: {\
float64_t &vd = P.VU.elt<float64_t>(rd_num, i, true); \
float64_t vs2 = f32_to_f64(P.VU.elt<float32_t>(rs2_num, i)); \
float64_t vs1 = f32_to_f64(P.VU.elt<float32_t>(rs1_num, i)); \
BODY; \
BODY32; \
set_fp_exceptions; \
break; \
}\
case e16: \
case e8: \
default: \
require(0); \
break; \
}; \
DEBUG_RVV_FP_VV; \
VI_VFP_LOOP_WIDE_END
VI_VFP_LOOP_END
#define VI_VFP_WF_LOOP_WIDE(BODY) \
#define VI_VFP_WF_LOOP_WIDE(BODY16, BODY32) \
VI_CHECK_DDS(false); \
VI_VFP_LOOP_BASE \
switch(P.VU.vsew) { \
case e16: {\
float32_t &vd = P.VU.elt<float32_t>(rd_num, i, true); \
float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \
float32_t rs1 = f16_to_f32(f16(READ_FREG(rs1_num))); \
BODY16; \
set_fp_exceptions; \
break; \
}\
case e32: {\
float64_t &vd = P.VU.elt<float64_t>(rd_num, i, true); \
float64_t vs2 = P.VU.elt<float64_t>(rs2_num, i); \
float64_t rs1 = f32_to_f64(f32(READ_FREG(rs1_num))); \
BODY; \
BODY32; \
set_fp_exceptions; \
break; \
}\
case e16: \
case e8: \
default: \
require(0); \
}; \
DEBUG_RVV_FP_VV; \
VI_VFP_LOOP_WIDE_END
VI_VFP_LOOP_END
#define VI_VFP_WV_LOOP_WIDE(BODY) \
#define VI_VFP_WV_LOOP_WIDE(BODY16, BODY32) \
VI_CHECK_DDS(true); \
VI_VFP_LOOP_BASE \
switch(P.VU.vsew) { \
case e16: {\
float32_t &vd = P.VU.elt<float32_t>(rd_num, i, true); \
float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \
float32_t vs1 = f16_to_f32(P.VU.elt<float16_t>(rs1_num, i)); \
BODY16; \
set_fp_exceptions; \
break; \
}\
case e32: {\
float64_t &vd = P.VU.elt<float64_t>(rd_num, i, true); \
float64_t vs2 = P.VU.elt<float64_t>(rs2_num, i); \
float64_t vs1 = f32_to_f64(P.VU.elt<float32_t>(rs1_num, i)); \
BODY; \
BODY32; \
set_fp_exceptions; \
break; \
}\
case e16: \
case e8: \
default: \
require(0); \
}; \
DEBUG_RVV_FP_VV; \
VI_VFP_LOOP_WIDE_END
VI_VFP_LOOP_END
#define VI_VFP_CVT_SCALE(BODY16, BODY32, is_widen) \
if (is_widen) { \
VI_CHECK_DSS(false);\
} else { \
VI_CHECK_SDS(false); \
} \
require((P.VU.vsew == e16 && p->supports_extension('F')) || \
(P.VU.vsew == e32 && p->supports_extension('D'))); \
switch(P.VU.vsew) { \
case e16: {\
VI_VFP_LOOP_BASE \
BODY16 \
set_fp_exceptions; \
VI_VFP_LOOP_END \
} \
break; \
case e32: {\
VI_VFP_LOOP_BASE \
BODY32 \
set_fp_exceptions; \
VI_VFP_LOOP_END \
} \
break; \
default: \
require(0); \
break; \
}
#define DEBUG_START 0x0
#define DEBUG_END (0x1000 - 1)

3
riscv/insns/vfadd_vf.h

@ -1,6 +1,9 @@
// vfadd.vf vd, vs2, rs1
VI_VFP_VF_LOOP
({
vd = f16_add(rs1, vs2);
},
{
vd = f32_add(rs1, vs2);
},
{

3
riscv/insns/vfadd_vv.h

@ -1,6 +1,9 @@
// vfadd.vv vd, vs2, vs1
VI_VFP_VV_LOOP
({
vd = f16_add(vs1, vs2);
},
{
vd = f32_add(vs1, vs2);
},
{

3
riscv/insns/vfclass_v.h

@ -1,6 +1,9 @@
// vfclass.v vd, vs2, vm
VI_VFP_VV_LOOP
({
vd.v = f16_classify(vs2);
},
{
vd.v = f32_classify(vs2);
},
{

4
riscv/insns/vfcvt_f_x_v.h

@ -1,6 +1,10 @@
// vfcvt.f.x.v vd, vd2, vm
VI_VFP_VF_LOOP
({
auto vs2_i = P.VU.elt<int16_t>(rs2_num, i);
vd = i32_to_f16(vs2_i);
},
{
auto vs2_i = P.VU.elt<int32_t>(rs2_num, i);
vd = i32_to_f32(vs2_i);
},

4
riscv/insns/vfcvt_f_xu_v.h

@ -1,6 +1,10 @@
// vfcvt.f.xu.v vd, vd2, vm
VI_VFP_VF_LOOP
({
auto vs2_u = P.VU.elt<uint16_t>(rs2_num, i);
vd = ui32_to_f16(vs2_u);
},
{
auto vs2_u = P.VU.elt<uint32_t>(rs2_num, i);
vd = ui32_to_f32(vs2_u);
},

5
riscv/insns/vfcvt_rtz_x_f_v.h

@ -1,6 +1,9 @@
// vfcvt.x.f.v vd, vd2, vm
// vfcvt.rtz.x.f.v vd, vd2, vm
VI_VFP_VF_LOOP
({
P.VU.elt<int16_t>(rd_num, i) = f16_to_i16(vs2, softfloat_round_minMag, true);
},
{
P.VU.elt<int32_t>(rd_num, i) = f32_to_i32(vs2, softfloat_round_minMag, true);
},
{

5
riscv/insns/vfcvt_rtz_xu_f_v.h

@ -1,6 +1,9 @@
// vfcvt.xu.f.v vd, vd2, vm
// vfcvt.rtz.xu.f.v vd, vd2, vm
VI_VFP_VF_LOOP
({
P.VU.elt<uint16_t>(rd_num, i) = f16_to_ui16(vs2, softfloat_round_minMag, true);
},
{
P.VU.elt<uint32_t>(rd_num, i) = f32_to_ui32(vs2, softfloat_round_minMag, true);
},
{

3
riscv/insns/vfcvt_x_f_v.h

@ -1,6 +1,9 @@
// vfcvt.x.f.v vd, vd2, vm
VI_VFP_VF_LOOP
({
P.VU.elt<int16_t>(rd_num, i) = f16_to_i16(vs2, STATE.frm, true);
},
{
P.VU.elt<int32_t>(rd_num, i) = f32_to_i32(vs2, STATE.frm, true);
},
{

3
riscv/insns/vfcvt_xu_f_v.h

@ -1,6 +1,9 @@
// vfcvt.xu.f.v vd, vd2, vm
VI_VFP_VV_LOOP
({
P.VU.elt<uint16_t>(rd_num, i) = f16_to_ui16(vs2, STATE.frm, true);
},
{
P.VU.elt<uint32_t>(rd_num, i) = f32_to_ui32(vs2, STATE.frm, true);
},
{

3
riscv/insns/vfdiv_vf.h

@ -1,6 +1,9 @@
// vfdiv.vf vd, vs2, rs1
VI_VFP_VF_LOOP
({
vd = f16_div(vs2, rs1);
},
{
vd = f32_div(vs2, rs1);
},
{

3
riscv/insns/vfdiv_vv.h

@ -1,6 +1,9 @@
// vfdiv.vv vd, vs2, vs1
VI_VFP_VV_LOOP
({
vd = f16_div(vs2, vs1);
},
{
vd = f32_div(vs2, vs1);
},
{

3
riscv/insns/vfdot_vv.h

@ -1,6 +1,9 @@
// vfdot.vv vd, vs2, vs1
VI_VFP_VV_LOOP
({
vd = f16_add(vd, f16_mul(vs2, vs1));
},
{
vd = f32_add(vd, f32_mul(vs2, vs1));
},
{

3
riscv/insns/vfmacc_vf.h

@ -1,6 +1,9 @@
// vfmacc.vf vd, rs1, vs2, vm # vd[i] = +(vs2[i] * x[rs1]) + vd[i]
VI_VFP_VF_LOOP
({
vd = f16_mulAdd(rs1, vs2, vd);
},
{
vd = f32_mulAdd(rs1, vs2, vd);
},
{

3
riscv/insns/vfmacc_vv.h

@ -1,6 +1,9 @@
// vfmacc.vv vd, rs1, vs2, vm # vd[i] = +(vs2[i] * vs1[i]) + vd[i]
VI_VFP_VV_LOOP
({
vd = f16_mulAdd(vs1, vs2, vd);
},
{
vd = f32_mulAdd(vs1, vs2, vd);
},
{

3
riscv/insns/vfmadd_vf.h

@ -1,6 +1,9 @@
// vfmadd: vd[i] = +(vd[i] * f[rs1]) + vs2[i]
VI_VFP_VF_LOOP
({
vd = f16_mulAdd(vd, rs1, vs2);
},
{
vd = f32_mulAdd(vd, rs1, vs2);
},
{

3
riscv/insns/vfmadd_vv.h

@ -1,6 +1,9 @@
// vfmadd: vd[i] = +(vd[i] * vs1[i]) + vs2[i]
VI_VFP_VV_LOOP
({
vd = f16_mulAdd(vd, vs1, vs2);
},
{
vd = f32_mulAdd(vd, vs1, vs2);
},
{

3
riscv/insns/vfmax_vf.h

@ -1,6 +1,9 @@
// vfmax
VI_VFP_VF_LOOP
({
vd = f16_max(vs2, rs1);
},
{
vd = f32_max(vs2, rs1);
},
{

3
riscv/insns/vfmax_vv.h

@ -1,6 +1,9 @@
// vfmax
VI_VFP_VV_LOOP
({
vd = f16_max(vs2, vs1);
},
{
vd = f32_max(vs2, vs1);
},
{

17
riscv/insns/vfmerge_vfm.h

@ -3,7 +3,20 @@ VI_CHECK_SSS(false);
VI_VFP_COMMON;
switch(P.VU.vsew) {
case 32:
case e16:
for (reg_t i=P.VU.vstart; i<vl; ++i) {
auto &vd = P.VU.elt<float16_t>(rd_num, i, true);
auto rs1 = f16(READ_FREG(rs1_num));
auto vs2 = P.VU.elt<float16_t>(rs2_num, i);
int midx = (P.VU.vmlen * i) / 64;
int mpos = (P.VU.vmlen * i) % 64;
bool use_first = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
vd = use_first ? rs1 : vs2;
}
break;
case e32:
for (reg_t i=P.VU.vstart; i<vl; ++i) {
auto &vd = P.VU.elt<float32_t>(rd_num, i, true);
auto rs1 = f32(READ_FREG(rs1_num));
@ -16,7 +29,7 @@ switch(P.VU.vsew) {
vd = use_first ? rs1 : vs2;
}
break;
case 64:
case e64:
for (reg_t i=P.VU.vstart; i<vl; ++i) {
auto &vd = P.VU.elt<float64_t>(rd_num, i, true);
auto rs1 = f64(READ_FREG(rs1_num));

3
riscv/insns/vfmin_vf.h

@ -1,6 +1,9 @@
// vfmin vd, vs2, rs1
VI_VFP_VF_LOOP
({
vd = f16_min(vs2, rs1);
},
{
vd = f32_min(vs2, rs1);
},
{

3
riscv/insns/vfmin_vv.h

@ -1,6 +1,9 @@
// vfmin vd, vs2, vs1
VI_VFP_VV_LOOP
({
vd = f16_min(vs2, vs1);
},
{
vd = f32_min(vs2, vs1);
},
{

3
riscv/insns/vfmsac_vf.h

@ -1,6 +1,9 @@
// vfmsac: vd[i] = +(f[rs1] * vs2[i]) - vd[i]
VI_VFP_VF_LOOP
({
vd = f16_mulAdd(rs1, vs2, f16(vd.v ^ F16_SIGN));
},
{
vd = f32_mulAdd(rs1, vs2, f32(vd.v ^ F32_SIGN));
},
{

3
riscv/insns/vfmsac_vv.h

@ -1,6 +1,9 @@
// vfmsac: vd[i] = +(vs1[i] * vs2[i]) - vd[i]
VI_VFP_VV_LOOP
({
vd = f16_mulAdd(vs1, vs2, f16(vd.v ^ F16_SIGN));
},
{
vd = f32_mulAdd(vs1, vs2, f32(vd.v ^ F32_SIGN));
},
{

3
riscv/insns/vfmsub_vf.h

@ -1,6 +1,9 @@
// vfmsub: vd[i] = +(vd[i] * f[rs1]) - vs2[i]
VI_VFP_VF_LOOP
({
vd = f16_mulAdd(vd, rs1, f16(vs2.v ^ F16_SIGN));
},
{
vd = f32_mulAdd(vd, rs1, f32(vs2.v ^ F32_SIGN));
},
{

3
riscv/insns/vfmsub_vv.h

@ -1,6 +1,9 @@
// vfmsub: vd[i] = +(vd[i] * vs1[i]) - vs2[i]
VI_VFP_VV_LOOP
({
vd = f16_mulAdd(vd, vs1, f16(vs2.v ^ F16_SIGN));
},
{
vd = f32_mulAdd(vd, vs1, f32(vs2.v ^ F32_SIGN));
},
{

3
riscv/insns/vfmul_vf.h

@ -1,6 +1,9 @@
// vfmul.vf vd, vs2, rs1, vm
VI_VFP_VF_LOOP
({
vd = f16_mul(vs2, rs1);
},
{
vd = f32_mul(vs2, rs1);
},
{

3
riscv/insns/vfmul_vv.h

@ -1,6 +1,9 @@
// vfmul.vv vd, vs1, vs2, vm
VI_VFP_VV_LOOP
({
vd = f16_mul(vs1, vs2);
},
{
vd = f32_mul(vs1, vs2);
},
{

25
riscv/insns/vfmv_f_s.h

@ -1,24 +1,31 @@
// vfmv_f_s: rd = vs2[0] (rs1=0)
require_vector;
require_fp;
require_extension('F');
require(P.VU.vsew == e32 || P.VU.vsew == e64);
require((P.VU.vsew == e16 && p->supports_extension(EXT_ZFH)) ||
(P.VU.vsew == e32 && p->supports_extension('F')) ||
(P.VU.vsew == e64 && p->supports_extension('D')));
reg_t rs2_num = insn.rs2();
uint64_t vs2_0 = 0;
const reg_t sew = P.VU.vsew;
switch(sew) {
case e32:
vs2_0 = P.VU.elt<uint32_t>(rs2_num, 0);
break;
default:
vs2_0 = P.VU.elt<uint64_t>(rs2_num, 0);
break;
case e16:
vs2_0 = P.VU.elt<uint16_t>(rs2_num, 0);
break;
case e32:
vs2_0 = P.VU.elt<uint32_t>(rs2_num, 0);
break;
case e64:
vs2_0 = P.VU.elt<uint64_t>(rs2_num, 0);
break;
default:
require(0);
break;
}
// nan_extened
if (FLEN > sew) {
vs2_0 = vs2_0 | ~((uint64_t(1) << sew) - 1);
vs2_0 = vs2_0 | (UINT64_MAX << sew);
}
if (FLEN == 64) {

16
riscv/insns/vfmv_s_f.h

@ -1,21 +1,29 @@
// vfmv_s_f: vd[0] = rs1 (vs2=0)
require_vector;
require_fp;
require_extension('F');
require(P.VU.vsew >= e32 && P.VU.vsew <= 64);
require((P.VU.vsew == e16 && p->supports_extension(EXT_ZFH)) ||
(P.VU.vsew == e32 && p->supports_extension('F')) ||
(P.VU.vsew == e64 && p->supports_extension('D')));
reg_t vl = P.VU.vl;
if (vl > 0 && P.VU.vstart < vl) {
reg_t rd_num = insn.rd();
switch(P.VU.vsew) {
case 32:
case e16:
if (FLEN == 64)
P.VU.elt<uint16_t>(rd_num, 0, true) = f64(FRS1).v;
else
P.VU.elt<uint16_t>(rd_num, 0, true) = f32(FRS1).v;
break;
case e32:
if (FLEN == 64)
P.VU.elt<uint32_t>(rd_num, 0, true) = f64(FRS1).v;
else
P.VU.elt<uint32_t>(rd_num, 0, true) = f32(FRS1).v;
break;
case 64:
case e64:
if (FLEN == 64)
P.VU.elt<uint64_t>(rd_num, 0, true) = f64(FRS1).v;
else

8
riscv/insns/vfmv_v_f.h

@ -2,6 +2,14 @@
require((insn.rd() & (P.VU.vlmul - 1)) == 0);
VI_VFP_COMMON
switch(P.VU.vsew) {
case e16:
for (reg_t i=P.VU.vstart; i<vl; ++i) {
auto &vd = P.VU.elt<float16_t>(rd_num, i, true);
auto rs1 = f16(READ_FREG(rs1_num));
vd = rs1;
}
break;
case e32:
for (reg_t i=P.VU.vstart; i<vl; ++i) {
auto &vd = P.VU.elt<float32_t>(rd_num, i, true);

15
riscv/insns/vfncvt_f_f_w.h

@ -1,10 +1,11 @@
// vfncvt.f.f.v vd, vs2, vm
VI_CHECK_SDS(false);
if (P.VU.vsew == e32)
require(p->supports_extension('D'));
VI_VFP_LOOP_BASE
VI_VFP_CVT_SCALE
({
auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
P.VU.elt<float16_t>(rd_num, i, true) = f32_to_f16(vs2);
},
{
auto vs2 = P.VU.elt<float64_t>(rs2_num, i);
P.VU.elt<float32_t>(rd_num, i, true) = f64_to_f32(vs2);
set_fp_exceptions;
VI_VFP_LOOP_END
}, false)

14
riscv/insns/vfncvt_f_x_w.h

@ -1,10 +1,10 @@
// vfncvt.f.x.v vd, vs2, vm
VI_CHECK_SDS(false);
if (P.VU.vsew == e32)
require(p->supports_extension('D'));
VI_VFP_LOOP_BASE
VI_VFP_CVT_SCALE
({
auto vs2 = P.VU.elt<int32_t>(rs2_num, i);
P.VU.elt<float16_t>(rd_num, i, true) = i32_to_f16(vs2);
},
{
auto vs2 = P.VU.elt<int64_t>(rs2_num, i);
P.VU.elt<float32_t>(rd_num, i, true) = i64_to_f32(vs2);
set_fp_exceptions;
VI_VFP_LOOP_END
}, false)

14
riscv/insns/vfncvt_f_xu_w.h

@ -1,10 +1,10 @@
// vfncvt.f.xu.v vd, vs2, vm
VI_CHECK_SDS(false);
if (P.VU.vsew == e32)
require(p->supports_extension('D'));
VI_VFP_LOOP_BASE
VI_VFP_CVT_SCALE
({
auto vs2 = P.VU.elt<uint32_t>(rs2_num, i);
P.VU.elt<float16_t>(rd_num, i, true) = ui32_to_f16(vs2);
},
{
auto vs2 = P.VU.elt<uint64_t>(rs2_num, i);
P.VU.elt<float32_t>(rd_num, i, true) = ui64_to_f32(vs2);
set_fp_exceptions;
VI_VFP_LOOP_END
}, false)

17
riscv/insns/vfncvt_rod_f_f_w.h

@ -1,11 +1,12 @@
// vfncvt.f.f.v vd, vs2, vm
VI_CHECK_SDS(false);
if (P.VU.vsew == e32)
require(p->supports_extension('D'));
VI_VFP_LOOP_BASE
// vfncvt.rod.f.f.v vd, vs2, vm
VI_VFP_CVT_SCALE
({
softfloat_roundingMode = softfloat_round_odd;
auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
P.VU.elt<float16_t>(rd_num, i, true) = f32_to_f16(vs2);
},
{
softfloat_roundingMode = softfloat_round_odd;
auto vs2 = P.VU.elt<float64_t>(rs2_num, i);
P.VU.elt<float32_t>(rd_num, i, true) = f64_to_f32(vs2);
set_fp_exceptions;
VI_VFP_LOOP_END
}, false)

16
riscv/insns/vfncvt_rtz_x_f_w.h

@ -1,10 +1,10 @@
// vfncvt.x.f.v vd, vs2, vm
VI_CHECK_SDS(false);
if (P.VU.vsew == e32)
require(p->supports_extension('D'));
VI_VFP_LOOP_BASE
// vfncvt.rtz.x.f.w vd, vs2, vm
VI_VFP_CVT_SCALE
({
auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
P.VU.elt<int16_t>(rd_num, i, true) = f32_to_i16(vs2, softfloat_round_minMag, true);
},
{
auto vs2 = P.VU.elt<float64_t>(rs2_num, i);
P.VU.elt<int32_t>(rd_num, i, true) = f64_to_i32(vs2, softfloat_round_minMag, true);
set_fp_exceptions;
VI_VFP_LOOP_END
}, false)

16
riscv/insns/vfncvt_rtz_xu_f_w.h

@ -1,10 +1,10 @@
// vfncvt.xu.f.v vd, vs2, vm
VI_CHECK_SDS(false);
if (P.VU.vsew == e32)
require(p->supports_extension('D'));
VI_VFP_LOOP_BASE
// vfncvt.rtz.xu.f.w vd, vs2, vm
VI_VFP_CVT_SCALE
({
auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
P.VU.elt<uint16_t>(rd_num, i, true) = f32_to_ui16(vs2, softfloat_round_minMag, true);
},
{
auto vs2 = P.VU.elt<float64_t>(rs2_num, i);
P.VU.elt<uint32_t>(rd_num, i, true) = f64_to_ui32(vs2, softfloat_round_minMag, true);
set_fp_exceptions;
VI_VFP_LOOP_END
}, false)

16
riscv/insns/vfncvt_x_f_w.h

@ -1,10 +1,10 @@
// vfncvt.x.f.v vd, vs2, vm
VI_CHECK_SDS(false);
if (P.VU.vsew == e32)
require(p->supports_extension('D'));
VI_VFP_LOOP_BASE
// vfncvt.x.f.w vd, vs2, vm
VI_VFP_CVT_SCALE
({
auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
P.VU.elt<int16_t>(rd_num, i, true) = f32_to_i16(vs2, STATE.frm, true);
},
{
auto vs2 = P.VU.elt<float64_t>(rs2_num, i);
P.VU.elt<int32_t>(rd_num, i, true) = f64_to_i32(vs2, STATE.frm, true);
set_fp_exceptions;
VI_VFP_LOOP_END
}, false)

16
riscv/insns/vfncvt_xu_f_w.h

@ -1,10 +1,10 @@
// vfncvt.xu.f.v vd, vs2, vm
VI_CHECK_SDS(false);
if (P.VU.vsew == e32)
require(p->supports_extension('D'));
VI_VFP_LOOP_BASE
// vfncvt.xu.f.w vd, vs2, vm
VI_VFP_CVT_SCALE
({
auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
P.VU.elt<uint16_t>(rd_num, i, true) = f32_to_ui16(vs2, STATE.frm, true);
},
{
auto vs2 = P.VU.elt<float64_t>(rs2_num, i);
P.VU.elt<uint32_t>(rd_num, i, true) = f64_to_ui32(vs2, STATE.frm, true);
set_fp_exceptions;
VI_VFP_LOOP_END
}, false)

3
riscv/insns/vfnmacc_vf.h

@ -1,6 +1,9 @@
// vfnmacc: vd[i] = -(f[rs1] * vs2[i]) - vd[i]
VI_VFP_VF_LOOP
({
vd = f16_mulAdd(rs1, f16(vs2.v ^ F16_SIGN), f16(vd.v ^ F16_SIGN));
},
{
vd = f32_mulAdd(rs1, f32(vs2.v ^ F32_SIGN), f32(vd.v ^ F32_SIGN));
},
{

3
riscv/insns/vfnmacc_vv.h

@ -1,6 +1,9 @@
// vfnmacc: vd[i] = -(vs1[i] * vs2[i]) - vd[i]
VI_VFP_VV_LOOP
({
vd = f16_mulAdd(f16(vs2.v ^ F16_SIGN), vs1, f16(vd.v ^ F16_SIGN));
},
{
vd = f32_mulAdd(f32(vs2.v ^ F32_SIGN), vs1, f32(vd.v ^ F32_SIGN));
},
{

3
riscv/insns/vfnmadd_vf.h

@ -1,6 +1,9 @@
// vfnmadd: vd[i] = -(vd[i] * f[rs1]) - vs2[i]
VI_VFP_VF_LOOP
({
vd = f16_mulAdd(f16(vd.v ^ F16_SIGN), rs1, f16(vs2.v ^ F16_SIGN));
},
{
vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), rs1, f32(vs2.v ^ F32_SIGN));
},
{

3
riscv/insns/vfnmadd_vv.h

@ -1,6 +1,9 @@
// vfnmadd: vd[i] = -(vd[i] * vs1[i]) - vs2[i]
VI_VFP_VV_LOOP
({
vd = f16_mulAdd(f16(vd.v ^ F16_SIGN), vs1, f16(vs2.v ^ F16_SIGN));
},
{
vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), vs1, f32(vs2.v ^ F32_SIGN));
},
{

3
riscv/insns/vfnmsac_vf.h

@ -1,6 +1,9 @@
// vfnmsac: vd[i] = -(f[rs1] * vs2[i]) + vd[i]
VI_VFP_VF_LOOP
({
vd = f16_mulAdd(rs1, f16(vs2.v ^ F16_SIGN), vd);
},
{
vd = f32_mulAdd(rs1, f32(vs2.v ^ F32_SIGN), vd);
},
{

3
riscv/insns/vfnmsac_vv.h

@ -1,6 +1,9 @@
// vfnmsac.vv vd, vs1, vs2, vm # vd[i] = -(vs2[i] * vs1[i]) + vd[i]
VI_VFP_VV_LOOP
({
vd = f16_mulAdd(f16(vs1.v ^ F16_SIGN), vs2, vd);
},
{
vd = f32_mulAdd(f32(vs1.v ^ F32_SIGN), vs2, vd);
},
{

3
riscv/insns/vfnmsub_vf.h

@ -1,6 +1,9 @@
// vfnmsub: vd[i] = -(vd[i] * f[rs1]) + vs2[i]
VI_VFP_VF_LOOP
({
vd = f16_mulAdd(f16(vd.v ^ F16_SIGN), rs1, vs2);
},
{
vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), rs1, vs2);
},
{

3
riscv/insns/vfnmsub_vv.h

@ -1,6 +1,9 @@
// vfnmsub: vd[i] = -(vd[i] * vs1[i]) + vs2[i]
VI_VFP_VV_LOOP
({
vd = f16_mulAdd(f16(vd.v ^ F16_SIGN), vs1, vs2);
},
{
vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), vs1, vs2);
},
{

3
riscv/insns/vfrdiv_vf.h

@ -1,6 +1,9 @@
// vfrdiv.vf vd, vs2, rs1, vm # scalar-vector, vd[i] = f[rs1]/vs2[i]
VI_VFP_VF_LOOP
({
vd = f16_div(rs1, vs2);
},
{
vd = f32_div(rs1, vs2);
},
{

3
riscv/insns/vfredmax_vs.h

@ -1,6 +1,9 @@
// vfredmax vd, vs2, vs1
VI_VFP_VV_LOOP_REDUCTION
({
vd_0 = f16_max(vd_0, vs2);
},
{
vd_0 = f32_max(vd_0, vs2);
},
{

3
riscv/insns/vfredmin_vs.h

@ -1,6 +1,9 @@
// vfredmin vd, vs2, vs1
VI_VFP_VV_LOOP_REDUCTION
({
vd_0 = f16_min(vd_0, vs2);
},
{
vd_0 = f32_min(vd_0, vs2);
},
{

3
riscv/insns/vfredosum_vs.h

@ -1,6 +1,9 @@
// vfredosum: vd[0] = sum( vs2[*] , vs1[0] )
VI_VFP_VV_LOOP_REDUCTION
({
vd_0 = f16_add(vd_0, vs2);
},
{
vd_0 = f32_add(vd_0, vs2);
},
{

3
riscv/insns/vfredsum_vs.h

@ -1,6 +1,9 @@
// vfredsum: vd[0] = sum( vs2[*] , vs1[0] )
VI_VFP_VV_LOOP_REDUCTION
({
vd_0 = f16_add(vd_0, vs2);
},
{
vd_0 = f32_add(vd_0, vs2);
},
{

3
riscv/insns/vfrsub_vf.h

@ -1,6 +1,9 @@
// vfsub.vf vd, vs2, rs1
VI_VFP_VF_LOOP
({
vd = f16_sub(rs1, vs2);
},
{
vd = f32_sub(rs1, vs2);
},
{

3
riscv/insns/vfsgnj_vf.h

@ -1,6 +1,9 @@
// vfsgnj vd, vs2, vs1
VI_VFP_VF_LOOP
({
vd = fsgnj16(vs2.v, rs1.v, false, false);
},
{
vd = fsgnj32(vs2.v, rs1.v, false, false);
},
{

3
riscv/insns/vfsgnj_vv.h

@ -1,6 +1,9 @@
// vfsgnj
VI_VFP_VV_LOOP
({
vd = fsgnj16(vs2.v, vs1.v, false, false);
},
{
vd = fsgnj32(vs2.v, vs1.v, false, false);
},
{

3
riscv/insns/vfsgnjn_vf.h

@ -1,6 +1,9 @@
// vfsgnn
VI_VFP_VF_LOOP
({
vd = fsgnj16(vs2.v, rs1.v, true, false);
},
{
vd = fsgnj32(vs2.v, rs1.v, true, false);
},
{

3
riscv/insns/vfsgnjn_vv.h

@ -1,6 +1,9 @@
// vfsgnn
VI_VFP_VV_LOOP
({
vd = fsgnj16(vs2.v, vs1.v, true, false);
},
{
vd = fsgnj32(vs2.v, vs1.v, true, false);
},
{

3
riscv/insns/vfsgnjx_vf.h

@ -1,6 +1,9 @@
// vfsgnx
VI_VFP_VF_LOOP
({
vd = fsgnj16(vs2.v, rs1.v, false, true);
},
{
vd = fsgnj32(vs2.v, rs1.v, false, true);
},
{

3
riscv/insns/vfsgnjx_vv.h

@ -1,6 +1,9 @@
// vfsgnx
VI_VFP_VV_LOOP
({
vd = fsgnj16(vs2.v, vs1.v, false, true);
},
{
vd = fsgnj32(vs2.v, vs1.v, false, true);
},
{

8
riscv/insns/vfslide1down_vf.h

@ -4,6 +4,11 @@ VI_CHECK_SLIDE(false);
VI_VFP_LOOP_BASE
if (i != vl - 1) {
switch (P.VU.vsew) {
case e16: {
VI_XI_SLIDEDOWN_PARAMS(e16, 1);
vd = vs2;
}
break;
case e32: {
VI_XI_SLIDEDOWN_PARAMS(e32, 1);
vd = vs2;
@ -17,6 +22,9 @@ if (i != vl - 1) {
}
} else {
switch (P.VU.vsew) {
case e16:
P.VU.elt<float16_t>(rd_num, vl - 1) = f16(FRS1);
break;
case e32:
P.VU.elt<float32_t>(rd_num, vl - 1, true) = f32(FRS1);
break;

8
riscv/insns/vfslide1up_vf.h

@ -4,6 +4,11 @@ VI_CHECK_SLIDE(true);
VI_VFP_LOOP_BASE
if (i != 0) {
switch (P.VU.vsew) {
case e16: {
VI_XI_SLIDEUP_PARAMS(e16, 1);
vd = vs2;
}
break;
case e32: {
VI_XI_SLIDEUP_PARAMS(e32, 1);
vd = vs2;
@ -17,6 +22,9 @@ if (i != 0) {
}
} else {
switch (P.VU.vsew) {
case e16:
P.VU.elt<float16_t>(rd_num, 0) = f16(FRS1);
break;
case e32:
P.VU.elt<float32_t>(rd_num, 0, true) = f32(FRS1);
break;

3
riscv/insns/vfsqrt_v.h

@ -1,6 +1,9 @@
// vsqrt.v vd, vd2, vm
VI_VFP_VV_LOOP
({
vd = f16_sqrt(vs2);
},
{
vd = f32_sqrt(vs2);
},
{

3
riscv/insns/vfsub_vf.h

@ -1,6 +1,9 @@
// vfsub.vf vd, vs2, rs1
VI_VFP_VF_LOOP
({
vd = f16_sub(vs2, rs1);
},
{
vd = f32_sub(vs2, rs1);
},
{

3
riscv/insns/vfsub_vv.h

@ -1,6 +1,9 @@
// vfsub.vv vd, vs2, vs1
VI_VFP_VV_LOOP
({
vd = f16_sub(vs2, vs1);
},
{
vd = f32_sub(vs2, vs1);
},
{

3
riscv/insns/vfwadd_vf.h

@ -1,5 +1,8 @@
// vfwadd.vf vd, vs2, rs1
VI_VFP_VF_LOOP_WIDE
({
vd = f32_add(vs2, rs1);
},
{
vd = f64_add(vs2, rs1);
})

3
riscv/insns/vfwadd_vv.h

@ -1,5 +1,8 @@
// vfwadd.vv vd, vs2, vs1
VI_VFP_VV_LOOP_WIDE
({
vd = f32_add(vs2, vs1);
},
{
vd = f64_add(vs2, vs1);
})

3
riscv/insns/vfwadd_wf.h

@ -1,5 +1,8 @@
// vfwadd.wf vd, vs2, vs1
VI_VFP_WF_LOOP_WIDE
({
vd = f32_add(vs2, rs1);
},
{
vd = f64_add(vs2, rs1);
})

3
riscv/insns/vfwadd_wv.h

@ -1,5 +1,8 @@
// vfwadd.wv vd, vs2, vs1
VI_VFP_WV_LOOP_WIDE
({
vd = f32_add(vs2, vs1);
},
{
vd = f64_add(vs2, vs1);
})

14
riscv/insns/vfwcvt_f_f_v.h

@ -1,10 +1,10 @@
// vfwcvt.f.f.v vd, vs2, vm
VI_CHECK_DSS(false);
if (P.VU.vsew == e32)
require(p->supports_extension('D'));
VI_VFP_LOOP_BASE
VI_VFP_CVT_SCALE
({
auto vs2 = P.VU.elt<float16_t>(rs2_num, i);
P.VU.elt<float32_t>(rd_num, i, true) = f16_to_f32(vs2);
},
{
auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
P.VU.elt<float64_t>(rd_num, i, true) = f32_to_f64(vs2);
set_fp_exceptions;
VI_VFP_LOOP_WIDE_END
}, true)

14
riscv/insns/vfwcvt_f_x_v.h

@ -1,10 +1,10 @@
// vfwcvt.f.x.v vd, vs2, vm
VI_CHECK_DSS(false);
if (P.VU.vsew == e32)
require(p->supports_extension('D'));
VI_VFP_LOOP_BASE
VI_VFP_CVT_SCALE
({
auto vs2 = P.VU.elt<int16_t>(rs2_num, i);
P.VU.elt<float32_t>(rd_num, i, true) = i32_to_f32(vs2);
},
{
auto vs2 = P.VU.elt<int32_t>(rs2_num, i);
P.VU.elt<float64_t>(rd_num, i, true) = i32_to_f64(vs2);
set_fp_exceptions;
VI_VFP_LOOP_WIDE_END
}, true)

14
riscv/insns/vfwcvt_f_xu_v.h

@ -1,10 +1,10 @@
// vfwcvt.f.xu.v vd, vs2, vm
VI_CHECK_DSS(false);
if (P.VU.vsew == e32)
require(p->supports_extension('D'));
VI_VFP_LOOP_BASE
VI_VFP_CVT_SCALE
({
auto vs2 = P.VU.elt<uint16_t>(rs2_num, i);
P.VU.elt<float32_t>(rd_num, i, true) = ui32_to_f32(vs2);
},
{
auto vs2 = P.VU.elt<uint32_t>(rs2_num, i);
P.VU.elt<float64_t>(rd_num, i, true) = ui32_to_f64(vs2);
set_fp_exceptions;
VI_VFP_LOOP_WIDE_END
}, true)

16
riscv/insns/vfwcvt_rtz_x_f_v.h

@ -1,10 +1,10 @@
// vfwcvt.x.f.v vd, vs2, vm
VI_CHECK_DSS(false);
if (P.VU.vsew == e32)
require(p->supports_extension('D'));
VI_VFP_LOOP_BASE
// vfwcvt.rtz.x.f.v vd, vs2, vm
VI_VFP_CVT_SCALE
({
auto vs2 = P.VU.elt<float16_t>(rs2_num, i);
P.VU.elt<int32_t>(rd_num, i, true) = f16_to_i32(vs2, softfloat_round_minMag, true);
},
{
auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
P.VU.elt<int64_t>(rd_num, i, true) = f32_to_i64(vs2, softfloat_round_minMag, true);
set_fp_exceptions;
VI_VFP_LOOP_WIDE_END
}, true)

16
riscv/insns/vfwcvt_rtz_xu_f_v.h

@ -1,10 +1,10 @@
// vfwcvt.xu.f.v vd, vs2, vm
VI_CHECK_DSS(false);
if (P.VU.vsew == e32)
require(p->supports_extension('D'));
VI_VFP_LOOP_BASE
// vfwcvt.rtz,xu.f.v vd, vs2, vm
VI_VFP_CVT_SCALE
({
auto vs2 = P.VU.elt<float16_t>(rs2_num, i);
P.VU.elt<uint32_t>(rd_num, i, true) = f16_to_ui32(vs2, softfloat_round_minMag, true);
},
{
auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
P.VU.elt<uint64_t>(rd_num, i, true) = f32_to_ui64(vs2, softfloat_round_minMag, true);
set_fp_exceptions;
VI_VFP_LOOP_WIDE_END
}, true)

14
riscv/insns/vfwcvt_x_f_v.h

@ -1,10 +1,10 @@
// vfwcvt.x.f.v vd, vs2, vm
VI_CHECK_DSS(false);
if (P.VU.vsew == e32)
require(p->supports_extension('D'));
VI_VFP_LOOP_BASE
VI_VFP_CVT_SCALE
({
auto vs2 = P.VU.elt<float16_t>(rs2_num, i);
P.VU.elt<int32_t>(rd_num, i, true) = f16_to_i32(vs2, STATE.frm, true);
},
{
auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
P.VU.elt<int64_t>(rd_num, i, true) = f32_to_i64(vs2, STATE.frm, true);
set_fp_exceptions;
VI_VFP_LOOP_WIDE_END
}, true)

14
riscv/insns/vfwcvt_xu_f_v.h

@ -1,10 +1,10 @@
// vfwcvt.xu.f.v vd, vs2, vm
VI_CHECK_DSS(false);
if (P.VU.vsew == e32)
require(p->supports_extension('D'));
VI_VFP_LOOP_BASE
VI_VFP_CVT_SCALE
({
auto vs2 = P.VU.elt<float16_t>(rs2_num, i);
P.VU.elt<uint32_t>(rd_num, i, true) = f16_to_ui32(vs2, STATE.frm, true);
},
{
auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
P.VU.elt<uint64_t>(rd_num, i, true) = f32_to_ui64(vs2, STATE.frm, true);
set_fp_exceptions;
VI_VFP_LOOP_WIDE_END
}, true)

3
riscv/insns/vfwmacc_vf.h

@ -1,5 +1,8 @@
// vfwmacc.vf vd, vs2, rs1
VI_VFP_VF_LOOP_WIDE
({
vd = f32_mulAdd(rs1, vs2, vd);
},
{
vd = f64_mulAdd(rs1, vs2, vd);
})

3
riscv/insns/vfwmacc_vv.h

@ -1,5 +1,8 @@
// vfwmacc.vv vd, vs2, vs1
VI_VFP_VV_LOOP_WIDE
({
vd = f32_mulAdd(vs1, vs2, vd);
},
{
vd = f64_mulAdd(vs1, vs2, vd);
})

3
riscv/insns/vfwmsac_vf.h

@ -1,5 +1,8 @@
// vfwmsac.vf vd, vs2, rs1
VI_VFP_VF_LOOP_WIDE
({
vd = f32_mulAdd(rs1, vs2, f32(vd.v ^ F32_SIGN));
},
{
vd = f64_mulAdd(rs1, vs2, f64(vd.v ^ F64_SIGN));
})

3
riscv/insns/vfwmsac_vv.h

@ -1,5 +1,8 @@
// vfwmsac.vv vd, vs2, vs1
VI_VFP_VV_LOOP_WIDE
({
vd = f32_mulAdd(vs1, vs2, f32(vd.v ^ F32_SIGN));
},
{
vd = f64_mulAdd(vs1, vs2, f64(vd.v ^ F64_SIGN));
})

3
riscv/insns/vfwmul_vf.h

@ -1,5 +1,8 @@
// vfwmul.vf vd, vs2, rs1
VI_VFP_VF_LOOP_WIDE
({
vd = f32_mul(vs2, rs1);
},
{
vd = f64_mul(vs2, rs1);
})

3
riscv/insns/vfwmul_vv.h

@ -1,5 +1,8 @@
// vfwmul.vv vd, vs2, vs1
VI_VFP_VV_LOOP_WIDE
({
vd = f32_mul(vs2, vs1);
},
{
vd = f64_mul(vs2, vs1);
})

3
riscv/insns/vfwnmacc_vf.h

@ -1,5 +1,8 @@
// vfwnmacc.vf vd, vs2, rs1
VI_VFP_VF_LOOP_WIDE
({
vd = f32_mulAdd(f32(rs1.v ^ F32_SIGN), vs2, f32(vd.v ^ F32_SIGN));
},
{
vd = f64_mulAdd(f64(rs1.v ^ F64_SIGN), vs2, f64(vd.v ^ F64_SIGN));
})

3
riscv/insns/vfwnmacc_vv.h

@ -1,5 +1,8 @@
// vfwnmacc.vv vd, vs2, vs1
VI_VFP_VV_LOOP_WIDE
({
vd = f32_mulAdd(f32(vs1.v ^ F32_SIGN), vs2, f32(vd.v ^ F32_SIGN));
},
{
vd = f64_mulAdd(f64(vs1.v ^ F64_SIGN), vs2, f64(vd.v ^ F64_SIGN));
})

3
riscv/insns/vfwnmsac_vf.h

@ -1,5 +1,8 @@
// vfwnmacc.vf vd, vs2, rs1
VI_VFP_VF_LOOP_WIDE
({
vd = f32_mulAdd(f32(rs1.v ^ F32_SIGN), vs2, vd);
},
{
vd = f64_mulAdd(f64(rs1.v ^ F64_SIGN), vs2, vd);
})

3
riscv/insns/vfwnmsac_vv.h

@ -1,5 +1,8 @@
// vfwnmsac.vv vd, vs2, vs1
VI_VFP_VV_LOOP_WIDE
({
vd = f32_mulAdd(f32(vs1.v ^ F32_SIGN), vs2, vd);
},
{
vd = f64_mulAdd(f64(vs1.v ^ F64_SIGN), vs2, vd);
})

6
riscv/insns/vfwredosum_vs.h

@ -1,8 +1,8 @@
// vfwredosum.vs vd, vs2, vs1
require_vector;
require(P.VU.vsew * 2 <= P.VU.ELEN);
require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
VI_VFP_VV_LOOP_WIDE_REDUCTION
({
vd_0 = f32_add(vd_0, vs2);
},
{
vd_0 = f64_add(vd_0, vs2);
})

6
riscv/insns/vfwredsum_vs.h

@ -1,8 +1,8 @@
// vfwredsum.vs vd, vs2, vs1
require_vector;
require(P.VU.vsew * 2 <= P.VU.ELEN);
require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
VI_VFP_VV_LOOP_WIDE_REDUCTION
({
vd_0 = f32_add(vd_0, vs2);
},
{
vd_0 = f64_add(vd_0, vs2);
})

3
riscv/insns/vfwsub_vf.h

@ -1,5 +1,8 @@
// vfwsub.vf vd, vs2, rs1
VI_VFP_VF_LOOP_WIDE
({
vd = f32_sub(vs2, rs1);
},
{
vd = f64_sub(vs2, rs1);
})

3
riscv/insns/vfwsub_vv.h

@ -1,5 +1,8 @@
// vfwsub.vv vd, vs2, vs1
VI_VFP_VV_LOOP_WIDE
({
vd = f32_sub(vs2, vs1);
},
{
vd = f64_sub(vs2, vs1);
})

3
riscv/insns/vfwsub_wf.h

@ -1,5 +1,8 @@
// vfwsub.wf vd, vs2, rs1
VI_VFP_WF_LOOP_WIDE
({
vd = f32_sub(vs2, rs1);
},
{
vd = f64_sub(vs2, rs1);
})

3
riscv/insns/vfwsub_wv.h

@ -1,5 +1,8 @@
// vfwsub.wv vd, vs2, vs1
VI_VFP_WV_LOOP_WIDE
({
vd = f32_sub(vs2, vs1);
},
{
vd = f64_sub(vs2, vs1);
})

3
riscv/insns/vmfeq_vf.h

@ -1,6 +1,9 @@
// vmfeq.vf vd, vs2, fs1
VI_VFP_LOOP_CMP
({
res = f16_eq(vs2, rs1);
},
{
res = f32_eq(vs2, rs1);
},
{

3
riscv/insns/vmfeq_vv.h

@ -1,6 +1,9 @@
// vmfeq.vv vd, vs2, vs1
VI_VFP_LOOP_CMP
({
res = f16_eq(vs2, vs1);
},
{
res = f32_eq(vs2, vs1);
},
{

3
riscv/insns/vmfge_vf.h

@ -1,6 +1,9 @@
// vmfge.vf vd, vs2, rs1
VI_VFP_LOOP_CMP
({
res = f16_le(rs1, vs2);
},
{
res = f32_le(rs1, vs2);
},
{

3
riscv/insns/vmfgt_vf.h

@ -1,6 +1,9 @@
// vmfgt.vf vd, vs2, rs1
VI_VFP_LOOP_CMP
({
res = f16_lt(rs1, vs2);
},
{
res = f32_lt(rs1, vs2);
},
{

3
riscv/insns/vmfle_vf.h

@ -1,6 +1,9 @@
// vmfle.vf vd, vs2, rs1
VI_VFP_LOOP_CMP
({
res = f16_le(vs2, rs1);
},
{
res = f32_le(vs2, rs1);
},
{

3
riscv/insns/vmfle_vv.h

@ -1,6 +1,9 @@
// vmfle.vv vd, vs2, rs1
VI_VFP_LOOP_CMP
({
res = f16_le(vs2, vs1);
},
{
res = f32_le(vs2, vs1);
},
{

3
riscv/insns/vmflt_vf.h

@ -1,6 +1,9 @@
// vmflt.vf vd, vs2, rs1
VI_VFP_LOOP_CMP
({
res = f16_lt(vs2, rs1);
},
{
res = f32_lt(vs2, rs1);
},
{

3
riscv/insns/vmflt_vv.h

@ -1,6 +1,9 @@
// vmflt.vv vd, vs2, vs1
VI_VFP_LOOP_CMP
({
res = f16_lt(vs2, vs1);
},
{
res = f32_lt(vs2, vs1);
},
{

3
riscv/insns/vmfne_vf.h

@ -1,6 +1,9 @@
// vmfne.vf vd, vs2, rs1
VI_VFP_LOOP_CMP
({
res = !f16_eq(vs2, rs1);
},
{
res = !f32_eq(vs2, rs1);
},
{

Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save