From 6507ccc30f29948a81661048e8a0ac3ae8e9a436 Mon Sep 17 00:00:00 2001 From: "Yueh-Ting (eop) Chen" Date: Wed, 1 Dec 2021 03:37:05 +0800 Subject: [PATCH] Simplify mulhsu (#870) --- riscv/decode.h | 47 ++++++++++++++++++++++++++++++++++++++++ riscv/insns/vmulhsu_vv.h | 38 ++------------------------------ riscv/insns/vmulhsu_vx.h | 38 ++------------------------------ 3 files changed, 51 insertions(+), 72 deletions(-) diff --git a/riscv/decode.h b/riscv/decode.h index 2eccce2d..47df4511 100644 --- a/riscv/decode.h +++ b/riscv/decode.h @@ -738,6 +738,16 @@ static inline bool is_aligned(const unsigned val, const unsigned pos) type_sew_t::type &vd = P.VU.elt::type>(rd_num, i, true); \ type_usew_t::type vs2 = P.VU.elt::type>(rs2_num, RS1); +#define VV_SU_PARAMS(x) \ + type_sew_t::type &vd = P.VU.elt::type>(rd_num, i, true); \ + type_usew_t::type vs1 = P.VU.elt::type>(rs1_num, i); \ + type_sew_t::type vs2 = P.VU.elt::type>(rs2_num, i); + +#define VX_SU_PARAMS(x) \ + type_sew_t::type &vd = P.VU.elt::type>(rd_num, i, true); \ + type_usew_t::type rs1 = (type_usew_t::type)RS1; \ + type_sew_t::type vs2 = P.VU.elt::type>(rs2_num, i); + #define VV_UCMP_PARAMS(x) \ type_usew_t::type vs1 = P.VU.elt::type>(rs1_num, i); \ type_usew_t::type vs2 = P.VU.elt::type>(rs2_num, i); @@ -1112,6 +1122,43 @@ static inline bool is_aligned(const unsigned val, const unsigned pos) } \ VI_LOOP_END +// signed unsigned operation loop (e.g. mulhsu) +#define VI_VV_SU_LOOP(BODY) \ + VI_CHECK_SSS(true) \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VV_SU_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VV_SU_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VV_SU_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VV_SU_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VX_SU_LOOP(BODY) \ + VI_CHECK_SSS(false) \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VX_SU_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VX_SU_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VX_SU_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VX_SU_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_END + // narrow operation loop #define VI_VV_LOOP_NARROW(BODY) \ VI_NARROW_CHECK_COMMON; \ diff --git a/riscv/insns/vmulhsu_vv.h b/riscv/insns/vmulhsu_vv.h index f77a7d3f..e1c0ba60 100644 --- a/riscv/insns/vmulhsu_vv.h +++ b/riscv/insns/vmulhsu_vv.h @@ -1,38 +1,4 @@ // vmulhsu.vv vd, vs2, vs1 -VI_CHECK_SSS(true); -VI_LOOP_BASE -switch(sew) { -case e8: { - auto &vd = P.VU.elt(rd_num, i, true); - auto vs2 = P.VU.elt(rs2_num, i); - auto vs1 = P.VU.elt(rs1_num, i); - - vd = ((int16_t)vs2 * (uint16_t)vs1) >> sew; - break; -} -case e16: { - auto &vd = P.VU.elt(rd_num, i, true); - auto vs2 = P.VU.elt(rs2_num, i); - auto vs1 = P.VU.elt(rs1_num, i); - - vd = ((int32_t)vs2 * (uint32_t)vs1) >> sew; - break; -} -case e32: { - auto &vd = P.VU.elt(rd_num, i, true); - auto vs2 = P.VU.elt(rs2_num, i); - auto vs1 = P.VU.elt(rs1_num, i); - - vd = ((int64_t)vs2 * (uint64_t)vs1) >> sew; - break; -} -default: { - auto &vd = P.VU.elt(rd_num, i, true); - auto vs2 = P.VU.elt(rs2_num, i); - auto vs1 = P.VU.elt(rs1_num, i); - +VI_VV_SU_LOOP({ vd = ((int128_t)vs2 * (uint128_t)vs1) >> sew; - break; -} -} -VI_LOOP_END +}) diff --git a/riscv/insns/vmulhsu_vx.h b/riscv/insns/vmulhsu_vx.h index b0699f6f..4619ea89 100644 --- a/riscv/insns/vmulhsu_vx.h +++ b/riscv/insns/vmulhsu_vx.h @@ -1,38 +1,4 @@ // vmulhsu.vx vd, vs2, rs1 -VI_CHECK_SSS(false); -VI_LOOP_BASE -switch(sew) { -case e8: { - auto &vd = P.VU.elt(rd_num, i, true); - auto vs2 = P.VU.elt(rs2_num, i); - uint8_t rs1 = RS1; - - vd = ((int16_t)vs2 * (uint16_t)rs1) >> sew; - break; -} -case e16: { - auto &vd = P.VU.elt(rd_num, i, true); - auto vs2 = P.VU.elt(rs2_num, i); - uint16_t rs1 = RS1; - - vd = ((int32_t)vs2 * (uint32_t)rs1) >> sew; - break; -} -case e32: { - auto &vd = P.VU.elt(rd_num, i, true); - auto vs2 = P.VU.elt(rs2_num, i); - uint32_t rs1 = RS1; - - vd = ((int64_t)vs2 * (uint64_t)rs1) >> sew; - break; -} -default: { - auto &vd = P.VU.elt(rd_num, i, true); - auto vs2 = P.VU.elt(rs2_num, i); - uint64_t rs1 = RS1; - +VI_VX_SU_LOOP({ vd = ((int128_t)vs2 * (uint128_t)rs1) >> sew; - break; -} -} -VI_LOOP_END +})