Browse Source

Merge pull request #879 from eopXD/simply-insts

Simply floating point parameters and merge operations
pull/880/head
Andrew Waterman 4 years ago
committed by GitHub
parent
commit
9b3b305e42
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 166
      riscv/decode.h
  2. 52
      riscv/insns/vfmerge_vfm.h
  3. 8
      riscv/insns/vmerge_vim.h
  4. 8
      riscv/insns/vmerge_vvm.h
  5. 8
      riscv/insns/vmerge_vxm.h
  6. 5
      riscv/insns/vmfeq_vf.h
  7. 5
      riscv/insns/vmfeq_vv.h
  8. 5
      riscv/insns/vmfge_vf.h
  9. 5
      riscv/insns/vmfgt_vf.h
  10. 5
      riscv/insns/vmfle_vf.h
  11. 5
      riscv/insns/vmfle_vv.h
  12. 5
      riscv/insns/vmflt_vf.h
  13. 5
      riscv/insns/vmflt_vv.h
  14. 5
      riscv/insns/vmfne_vf.h
  15. 5
      riscv/insns/vmfne_vv.h
  16. 4
      riscv/insns/vmv_v_i.h
  17. 4
      riscv/insns/vmv_v_v.h
  18. 4
      riscv/insns/vmv_v_x.h

166
riscv/decode.h

@ -838,6 +838,20 @@ static inline bool is_aligned(const unsigned val, const unsigned pos)
auto vs1 = P.VU.elt<type_sew_t<x>::type>(rs1_num, i); \
auto &vd = P.VU.elt<type_sew_t<x>::type>(rd_num, i, true);
#define VFP_V_PARAMS(width) \
float##width##_t &vd = P.VU.elt<float##width##_t>(rd_num, i, true); \
float##width##_t vs2 = P.VU.elt<float##width##_t>(rs2_num, i);
#define VFP_VV_PARAMS(width) \
float##width##_t &vd = P.VU.elt<float##width##_t>(rd_num, i, true); \
float##width##_t vs1 = P.VU.elt<float##width##_t>(rs1_num, i); \
float##width##_t vs2 = P.VU.elt<float##width##_t>(rs2_num, i);
#define VFP_VF_PARAMS(width) \
float##width##_t &vd = P.VU.elt<float##width##_t>(rd_num, i, true); \
float##width##_t rs1 = f##width(READ_FREG(rs1_num)); \
float##width##_t vs2 = P.VU.elt<float##width##_t>(rs2_num, i);
//
// vector: integer and masking operation loop
//
@ -952,23 +966,84 @@ static inline bool is_aligned(const unsigned val, const unsigned pos)
VI_LOOP_CMP_END
// merge and copy loop
#define VI_VVXI_MERGE_LOOP(BODY) \
#define VI_MERGE_VARS \
VI_MASK_VARS \
bool use_first = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
#define VI_MERGE_LOOP_BASE \
require_vector(true); \
VI_GENERAL_LOOP_BASE \
VI_MERGE_VARS
#define VI_VV_MERGE_LOOP(BODY) \
VI_CHECK_SSS(true); \
VI_MERGE_LOOP_BASE \
if (sew == e8){ \
VV_PARAMS(e8); \
BODY; \
}else if(sew == e16){ \
VV_PARAMS(e16); \
BODY; \
}else if(sew == e32){ \
VV_PARAMS(e32); \
BODY; \
}else if(sew == e64){ \
VV_PARAMS(e64); \
BODY; \
} \
VI_LOOP_END
#define VI_VX_MERGE_LOOP(BODY) \
VI_CHECK_SSS(false); \
VI_MERGE_LOOP_BASE \
if (sew == e8){ \
VXI_PARAMS(e8); \
VX_PARAMS(e8); \
BODY; \
}else if(sew == e16){ \
VXI_PARAMS(e16); \
VX_PARAMS(e16); \
BODY; \
}else if(sew == e32){ \
VXI_PARAMS(e32); \
VX_PARAMS(e32); \
BODY; \
}else if(sew == e64){ \
VXI_PARAMS(e64); \
VX_PARAMS(e64); \
BODY; \
} \
VI_LOOP_END
#define VI_VI_MERGE_LOOP(BODY) \
VI_CHECK_SSS(false); \
VI_MERGE_LOOP_BASE \
if (sew == e8){ \
VI_PARAMS(e8); \
BODY; \
}else if(sew == e16){ \
VI_PARAMS(e16); \
BODY; \
}else if(sew == e32){ \
VI_PARAMS(e32); \
BODY; \
}else if(sew == e64){ \
VI_PARAMS(e64); \
BODY; \
} \
VI_LOOP_END
#define VI_VF_MERGE_LOOP(BODY) \
VI_CHECK_SSS(false); \
VI_MERGE_LOOP_BASE \
if(sew == e16){ \
VFP_VF_PARAMS(16); \
BODY; \
}else if(sew == e32){ \
VFP_VF_PARAMS(32); \
BODY; \
}else if(sew == e64){ \
VFP_VF_PARAMS(64); \
BODY; \
} \
VI_LOOP_END
// reduction loop - signed
#define VI_LOOP_REDUCTION_BASE(x) \
require(x >= e8 && x <= e64); \
@ -1879,7 +1954,7 @@ reg_t index[P.VU.vlmax]; \
for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { \
VI_LOOP_ELEMENT_SKIP(); \
uint64_t mmask = UINT64_C(1) << mpos; \
uint64_t &vdi = P.VU.elt<uint64_t>(rd_num, midx, true); \
uint64_t &vd = P.VU.elt<uint64_t>(rd_num, midx, true); \
uint64_t res = 0;
#define VI_VFP_LOOP_REDUCTION_BASE(width) \
@ -1958,7 +2033,7 @@ reg_t index[P.VU.vlmax]; \
case e16: \
case e32: \
case e64: { \
vdi = (vdi & ~mmask) | (((res) << mpos) & mmask); \
vd = (vd & ~mmask) | (((res) << mpos) & mmask); \
break; \
} \
default: \
@ -1973,25 +2048,19 @@ reg_t index[P.VU.vlmax]; \
VI_VFP_LOOP_BASE \
switch(P.VU.vsew) { \
case e16: {\
float16_t &vd = P.VU.elt<float16_t>(rd_num, i, true); \
float16_t vs1 = P.VU.elt<float16_t>(rs1_num, i); \
float16_t vs2 = P.VU.elt<float16_t>(rs2_num, i); \
VFP_VV_PARAMS(16); \
BODY16; \
set_fp_exceptions; \
break; \
}\
case e32: {\
float32_t &vd = P.VU.elt<float32_t>(rd_num, i, true); \
float32_t vs1 = P.VU.elt<float32_t>(rs1_num, i); \
float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \
VFP_VV_PARAMS(32); \
BODY32; \
set_fp_exceptions; \
break; \
}\
case e64: {\
float64_t &vd = P.VU.elt<float64_t>(rd_num, i, true); \
float64_t vs1 = P.VU.elt<float64_t>(rs1_num, i); \
float64_t vs2 = P.VU.elt<float64_t>(rs2_num, i); \
VFP_VV_PARAMS(64); \
BODY64; \
set_fp_exceptions; \
break; \
@ -2008,20 +2077,17 @@ reg_t index[P.VU.vlmax]; \
VI_VFP_LOOP_BASE \
switch(P.VU.vsew) { \
case e16: {\
float16_t &vd = P.VU.elt<float16_t>(rd_num, i, true); \
float16_t vs2 = P.VU.elt<float16_t>(rs2_num, i); \
VFP_V_PARAMS(16); \
BODY16; \
break; \
}\
case e32: {\
float32_t &vd = P.VU.elt<float32_t>(rd_num, i, true); \
float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \
VFP_V_PARAMS(32); \
BODY32; \
break; \
}\
case e64: {\
float64_t &vd = P.VU.elt<float64_t>(rd_num, i, true); \
float64_t vs2 = P.VU.elt<float64_t>(rs2_num, i); \
VFP_V_PARAMS(64); \
BODY64; \
break; \
}\
@ -2101,25 +2167,19 @@ reg_t index[P.VU.vlmax]; \
VI_VFP_LOOP_BASE \
switch(P.VU.vsew) { \
case e16: {\
float16_t &vd = P.VU.elt<float16_t>(rd_num, i, true); \
float16_t rs1 = f16(READ_FREG(rs1_num)); \
float16_t vs2 = P.VU.elt<float16_t>(rs2_num, i); \
VFP_VF_PARAMS(16); \
BODY16; \
set_fp_exceptions; \
break; \
}\
case e32: {\
float32_t &vd = P.VU.elt<float32_t>(rd_num, i, true); \
float32_t rs1 = f32(READ_FREG(rs1_num)); \
float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \
VFP_VF_PARAMS(32); \
BODY32; \
set_fp_exceptions; \
break; \
}\
case e64: {\
float64_t &vd = P.VU.elt<float64_t>(rd_num, i, true); \
float64_t rs1 = f64(READ_FREG(rs1_num)); \
float64_t vs2 = P.VU.elt<float64_t>(rs2_num, i); \
VFP_VF_PARAMS(64); \
BODY64; \
set_fp_exceptions; \
break; \
@ -2131,30 +2191,52 @@ reg_t index[P.VU.vlmax]; \
DEBUG_RVV_FP_VF; \
VI_VFP_LOOP_END
#define VI_VFP_LOOP_CMP(BODY16, BODY32, BODY64, is_vs1) \
VI_CHECK_MSS(is_vs1); \
#define VI_VFP_VV_LOOP_CMP(BODY16, BODY32, BODY64) \
VI_CHECK_MSS(true); \
VI_VFP_LOOP_CMP_BASE \
switch(P.VU.vsew) { \
case e16: {\
float16_t vs2 = P.VU.elt<float16_t>(rs2_num, i); \
float16_t vs1 = P.VU.elt<float16_t>(rs1_num, i); \
float16_t rs1 = f16(READ_FREG(rs1_num)); \
VFP_VV_PARAMS(16); \
BODY16; \
set_fp_exceptions; \
break; \
}\
case e32: {\
float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \
float32_t vs1 = P.VU.elt<float32_t>(rs1_num, i); \
float32_t rs1 = f32(READ_FREG(rs1_num)); \
VFP_VV_PARAMS(32); \
BODY32; \
set_fp_exceptions; \
break; \
}\
case e64: {\
float64_t vs2 = P.VU.elt<float64_t>(rs2_num, i); \
float64_t vs1 = P.VU.elt<float64_t>(rs1_num, i); \
float64_t rs1 = f64(READ_FREG(rs1_num)); \
VFP_VV_PARAMS(64); \
BODY64; \
set_fp_exceptions; \
break; \
}\
default: \
require(0); \
break; \
}; \
VI_VFP_LOOP_CMP_END \
#define VI_VFP_VF_LOOP_CMP(BODY16, BODY32, BODY64) \
VI_CHECK_MSS(false); \
VI_VFP_LOOP_CMP_BASE \
switch(P.VU.vsew) { \
case e16: {\
VFP_VF_PARAMS(16); \
BODY16; \
set_fp_exceptions; \
break; \
}\
case e32: {\
VFP_VF_PARAMS(32); \
BODY32; \
set_fp_exceptions; \
break; \
}\
case e64: {\
VFP_VF_PARAMS(64); \
BODY64; \
set_fp_exceptions; \
break; \

52
riscv/insns/vfmerge_vfm.h

@ -1,50 +1,4 @@
// vfmerge_vf vd, vs2, vs1, vm
VI_CHECK_SSS(false);
VI_VFP_COMMON;
switch(P.VU.vsew) {
case e16:
for (reg_t i=P.VU.vstart->read(); i<vl; ++i) {
auto &vd = P.VU.elt<float16_t>(rd_num, i, true);
auto rs1 = f16(READ_FREG(rs1_num));
auto vs2 = P.VU.elt<float16_t>(rs2_num, i);
int midx = i / 64;
int mpos = i % 64;
bool use_first = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
vd = use_first ? rs1 : vs2;
}
break;
case e32:
for (reg_t i=P.VU.vstart->read(); i<vl; ++i) {
auto &vd = P.VU.elt<float32_t>(rd_num, i, true);
auto rs1 = f32(READ_FREG(rs1_num));
auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
int midx = i / 64;
int mpos = i % 64;
bool use_first = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
vd = use_first ? rs1 : vs2;
}
break;
case e64:
for (reg_t i=P.VU.vstart->read(); i<vl; ++i) {
auto &vd = P.VU.elt<float64_t>(rd_num, i, true);
auto rs1 = f64(READ_FREG(rs1_num));
auto vs2 = P.VU.elt<float64_t>(rs2_num, i);
int midx = i / 64;
int mpos = i % 64;
bool use_first = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
vd = use_first ? rs1 : vs2;
}
break;
default:
require(0);
break;
}
P.VU.vstart->write(0);
VI_VF_MERGE_LOOP({
vd = use_first ? rs1 : vs2;
})

8
riscv/insns/vmerge_vim.h

@ -1,11 +1,5 @@
// vmerge.vim vd, vs2, simm5
require_vector(true);
VI_CHECK_SSS(false);
VI_VVXI_MERGE_LOOP
VI_VI_MERGE_LOOP
({
int midx = i / 64;
int mpos = i % 64;
bool use_first = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
vd = use_first ? simm5 : vs2;
})

8
riscv/insns/vmerge_vvm.h

@ -1,11 +1,5 @@
// vmerge.vvm vd, vs2, vs1
require_vector(true);
VI_CHECK_SSS(true);
VI_VVXI_MERGE_LOOP
VI_VV_MERGE_LOOP
({
int midx = i / 64;
int mpos = i % 64;
bool use_first = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
vd = use_first ? vs1 : vs2;
})

8
riscv/insns/vmerge_vxm.h

@ -1,11 +1,5 @@
// vmerge.vxm vd, vs2, rs1
require_vector(true);
VI_CHECK_SSS(false);
VI_VVXI_MERGE_LOOP
VI_VX_MERGE_LOOP
({
int midx = i / 64;
int mpos = i % 64;
bool use_first = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
vd = use_first ? rs1 : vs2;
})

5
riscv/insns/vmfeq_vf.h

@ -1,5 +1,5 @@
// vmfeq.vf vd, vs2, fs1
VI_VFP_LOOP_CMP
VI_VFP_VF_LOOP_CMP
({
res = f16_eq(vs2, rs1);
},
@ -8,5 +8,4 @@ VI_VFP_LOOP_CMP
},
{
res = f64_eq(vs2, rs1);
},
false)
})

5
riscv/insns/vmfeq_vv.h

@ -1,5 +1,5 @@
// vmfeq.vv vd, vs2, vs1
VI_VFP_LOOP_CMP
VI_VFP_VV_LOOP_CMP
({
res = f16_eq(vs2, vs1);
},
@ -8,5 +8,4 @@ VI_VFP_LOOP_CMP
},
{
res = f64_eq(vs2, vs1);
},
true)
})

5
riscv/insns/vmfge_vf.h

@ -1,5 +1,5 @@
// vmfge.vf vd, vs2, rs1
VI_VFP_LOOP_CMP
VI_VFP_VF_LOOP_CMP
({
res = f16_le(rs1, vs2);
},
@ -8,5 +8,4 @@ VI_VFP_LOOP_CMP
},
{
res = f64_le(rs1, vs2);
},
false)
})

5
riscv/insns/vmfgt_vf.h

@ -1,5 +1,5 @@
// vmfgt.vf vd, vs2, rs1
VI_VFP_LOOP_CMP
VI_VFP_VF_LOOP_CMP
({
res = f16_lt(rs1, vs2);
},
@ -8,5 +8,4 @@ VI_VFP_LOOP_CMP
},
{
res = f64_lt(rs1, vs2);
},
false)
})

5
riscv/insns/vmfle_vf.h

@ -1,5 +1,5 @@
// vmfle.vf vd, vs2, rs1
VI_VFP_LOOP_CMP
VI_VFP_VF_LOOP_CMP
({
res = f16_le(vs2, rs1);
},
@ -8,5 +8,4 @@ VI_VFP_LOOP_CMP
},
{
res = f64_le(vs2, rs1);
},
false)
})

5
riscv/insns/vmfle_vv.h

@ -1,5 +1,5 @@
// vmfle.vv vd, vs2, rs1
VI_VFP_LOOP_CMP
VI_VFP_VV_LOOP_CMP
({
res = f16_le(vs2, vs1);
},
@ -8,5 +8,4 @@ VI_VFP_LOOP_CMP
},
{
res = f64_le(vs2, vs1);
},
true)
})

5
riscv/insns/vmflt_vf.h

@ -1,5 +1,5 @@
// vmflt.vf vd, vs2, rs1
VI_VFP_LOOP_CMP
VI_VFP_VF_LOOP_CMP
({
res = f16_lt(vs2, rs1);
},
@ -8,5 +8,4 @@ VI_VFP_LOOP_CMP
},
{
res = f64_lt(vs2, rs1);
},
false)
})

5
riscv/insns/vmflt_vv.h

@ -1,5 +1,5 @@
// vmflt.vv vd, vs2, vs1
VI_VFP_LOOP_CMP
VI_VFP_VV_LOOP_CMP
({
res = f16_lt(vs2, vs1);
},
@ -8,5 +8,4 @@ VI_VFP_LOOP_CMP
},
{
res = f64_lt(vs2, vs1);
},
true)
})

5
riscv/insns/vmfne_vf.h

@ -1,5 +1,5 @@
// vmfne.vf vd, vs2, rs1
VI_VFP_LOOP_CMP
VI_VFP_VF_LOOP_CMP
({
res = !f16_eq(vs2, rs1);
},
@ -8,5 +8,4 @@ VI_VFP_LOOP_CMP
},
{
res = !f64_eq(vs2, rs1);
},
false)
})

5
riscv/insns/vmfne_vv.h

@ -1,5 +1,5 @@
// vmfne.vv vd, vs2, rs1
VI_VFP_LOOP_CMP
VI_VFP_VV_LOOP_CMP
({
res = !f16_eq(vs2, vs1);
},
@ -8,5 +8,4 @@ VI_VFP_LOOP_CMP
},
{
res = !f64_eq(vs2, vs1);
},
true)
})

4
riscv/insns/vmv_v_i.h

@ -1,7 +1,5 @@
// vmv.v.i vd, simm5
require_vector(true);
VI_CHECK_SSS(false);
VI_VVXI_MERGE_LOOP
VI_VI_MERGE_LOOP
({
vd = simm5;
})

4
riscv/insns/vmv_v_v.h

@ -1,7 +1,5 @@
// vvmv.v.v vd, vs1
require_vector(true);
VI_CHECK_SSS(true);
VI_VVXI_MERGE_LOOP
VI_VV_MERGE_LOOP
({
vd = vs1;
})

4
riscv/insns/vmv_v_x.h

@ -1,7 +1,5 @@
// vmv.v.x vd, rs1
require_vector(true);
VI_CHECK_SSS(false);
VI_VVXI_MERGE_LOOP
VI_VX_MERGE_LOOP
({
vd = rs1;
})

Loading…
Cancel
Save