diff --git a/riscv/decode.h b/riscv/decode.h index 4d375307..313d9f0f 100644 --- a/riscv/decode.h +++ b/riscv/decode.h @@ -794,19 +794,19 @@ static inline bool is_aligned(const unsigned val, const unsigned pos) auto &vd = P.VU.elt::type>(rd_num, i, true); \ auto vs2 = P.VU.elt::type>(rs2_num, i - offset); -#define VI_NSHIFT_PARAMS(sew1, sew2) \ +#define VI_NARROW_PARAMS(sew1, sew2) \ auto &vd = P.VU.elt::type>(rd_num, i, true); \ auto vs2_u = P.VU.elt::type>(rs2_num, i); \ auto vs2 = P.VU.elt::type>(rs2_num, i); \ auto zimm5 = (type_usew_t::type)insn.v_zimm5(); -#define VX_NSHIFT_PARAMS(sew1, sew2) \ +#define VX_NARROW_PARAMS(sew1, sew2) \ auto &vd = P.VU.elt::type>(rd_num, i, true); \ auto vs2_u = P.VU.elt::type>(rs2_num, i); \ auto vs2 = P.VU.elt::type>(rs2_num, i); \ auto rs1 = (type_sew_t::type)RS1; -#define VV_NSHIFT_PARAMS(sew1, sew2) \ +#define VV_NARROW_PARAMS(sew1, sew2) \ auto &vd = P.VU.elt::type>(rd_num, i, true); \ auto vs2_u = P.VU.elt::type>(rs2_num, i); \ auto vs2 = P.VU.elt::type>(rs2_num, i); \ @@ -1174,84 +1174,91 @@ static inline bool is_aligned(const unsigned val, const unsigned pos) // narrow operation loop #define VI_VV_LOOP_NARROW(BODY) \ -VI_NARROW_CHECK_COMMON; \ -VI_LOOP_BASE \ -if (sew == e8){ \ - VI_NARROW_SHIFT(e8, e16) \ - BODY; \ -}else if(sew == e16){ \ - VI_NARROW_SHIFT(e16, e32) \ - BODY; \ -}else if(sew == e32){ \ - VI_NARROW_SHIFT(e32, e64) \ - BODY; \ -} \ -VI_LOOP_END - -#define VI_NARROW_SHIFT(sew1, sew2) \ - type_usew_t::type &vd = P.VU.elt::type>(rd_num, i, true); \ - type_usew_t::type vs2_u = P.VU.elt::type>(rs2_num, i); \ - type_usew_t::type zimm5 = (type_usew_t::type)insn.v_zimm5(); \ - type_sew_t::type vs2 = P.VU.elt::type>(rs2_num, i); \ - type_sew_t::type vs1 = P.VU.elt::type>(rs1_num, i); \ - type_sew_t::type rs1 = (type_sew_t::type)RS1; - -#define VI_VVXI_LOOP_NARROW(BODY, is_vs1) \ - VI_CHECK_SDS(is_vs1); \ + VI_CHECK_SDS(true); \ VI_LOOP_BASE \ if (sew == e8){ \ - VI_NARROW_SHIFT(e8, e16) \ + VV_NARROW_PARAMS(e8, e16) \ BODY; \ - } else if (sew == e16) { \ - VI_NARROW_SHIFT(e16, e32) \ + }else if(sew == e16){ \ + VV_NARROW_PARAMS(e16, e32) \ BODY; \ - } else if (sew == e32) { \ - VI_NARROW_SHIFT(e32, e64) \ + }else if(sew == e32){ \ + VV_NARROW_PARAMS(e32, e64) \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VX_LOOP_NARROW(BODY) \ + VI_CHECK_SDS(false); \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VX_NARROW_PARAMS(e8, e16) \ + BODY; \ + }else if(sew == e16){ \ + VX_NARROW_PARAMS(e16, e32) \ + BODY; \ + }else if(sew == e32){ \ + VX_NARROW_PARAMS(e32, e64) \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VI_LOOP_NARROW(BODY) \ + VI_CHECK_SDS(false); \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VI_NARROW_PARAMS(e8, e16) \ + BODY; \ + }else if(sew == e16){ \ + VI_NARROW_PARAMS(e16, e32) \ + BODY; \ + }else if(sew == e32){ \ + VI_NARROW_PARAMS(e32, e64) \ BODY; \ } \ VI_LOOP_END -#define VI_VI_LOOP_NSHIFT(BODY, is_vs1) \ - VI_CHECK_SDS(is_vs1); \ +#define VI_VI_LOOP_NSHIFT(BODY) \ + VI_CHECK_SDS(false); \ VI_LOOP_NSHIFT_BASE \ if (sew == e8){ \ - VI_NSHIFT_PARAMS(e8, e16) \ + VI_NARROW_PARAMS(e8, e16) \ BODY; \ } else if (sew == e16) { \ - VI_NSHIFT_PARAMS(e16, e32) \ + VI_NARROW_PARAMS(e16, e32) \ BODY; \ } else if (sew == e32) { \ - VI_NSHIFT_PARAMS(e32, e64) \ + VI_NARROW_PARAMS(e32, e64) \ BODY; \ } \ VI_LOOP_END -#define VI_VX_LOOP_NSHIFT(BODY, is_vs1) \ - VI_CHECK_SDS(is_vs1); \ +#define VI_VX_LOOP_NSHIFT(BODY) \ + VI_CHECK_SDS(false); \ VI_LOOP_NSHIFT_BASE \ if (sew == e8){ \ - VX_NSHIFT_PARAMS(e8, e16) \ + VX_NARROW_PARAMS(e8, e16) \ BODY; \ } else if (sew == e16) { \ - VX_NSHIFT_PARAMS(e16, e32) \ + VX_NARROW_PARAMS(e16, e32) \ BODY; \ } else if (sew == e32) { \ - VX_NSHIFT_PARAMS(e32, e64) \ + VX_NARROW_PARAMS(e32, e64) \ BODY; \ } \ VI_LOOP_END -#define VI_VV_LOOP_NSHIFT(BODY, is_vs1) \ - VI_CHECK_SDS(is_vs1); \ +#define VI_VV_LOOP_NSHIFT(BODY) \ + VI_CHECK_SDS(true); \ VI_LOOP_NSHIFT_BASE \ if (sew == e8){ \ - VV_NSHIFT_PARAMS(e8, e16) \ + VV_NARROW_PARAMS(e8, e16) \ BODY; \ } else if (sew == e16) { \ - VV_NSHIFT_PARAMS(e16, e32) \ + VV_NARROW_PARAMS(e16, e32) \ BODY; \ } else if (sew == e32) { \ - VV_NSHIFT_PARAMS(e32, e64) \ + VV_NARROW_PARAMS(e32, e64) \ BODY; \ } \ VI_LOOP_END diff --git a/riscv/insns/vnclip_wi.h b/riscv/insns/vnclip_wi.h index 6b68e1df..ea6898cf 100644 --- a/riscv/insns/vnclip_wi.h +++ b/riscv/insns/vnclip_wi.h @@ -2,7 +2,7 @@ VRM xrm = P.VU.get_vround_mode(); int64_t int_max = INT64_MAX >> (64 - P.VU.vsew); int64_t int_min = INT64_MIN >> (64 - P.VU.vsew); -VI_VVXI_LOOP_NARROW +VI_VI_LOOP_NARROW ({ int128_t result = vs2; unsigned shift = zimm5 & ((sew * 2) - 1); @@ -22,4 +22,4 @@ VI_VVXI_LOOP_NARROW } vd = result; -}, false) +}) diff --git a/riscv/insns/vnclip_wv.h b/riscv/insns/vnclip_wv.h index 5f876976..63b84c65 100644 --- a/riscv/insns/vnclip_wv.h +++ b/riscv/insns/vnclip_wv.h @@ -2,7 +2,7 @@ VRM xrm = P.VU.get_vround_mode(); int64_t int_max = INT64_MAX >> (64 - P.VU.vsew); int64_t int_min = INT64_MIN >> (64 - P.VU.vsew); -VI_VVXI_LOOP_NARROW +VI_VV_LOOP_NARROW ({ int128_t result = vs2; unsigned shift = vs1 & ((sew * 2) - 1); @@ -22,4 +22,4 @@ VI_VVXI_LOOP_NARROW } vd = result; -}, true) +}) diff --git a/riscv/insns/vnclip_wx.h b/riscv/insns/vnclip_wx.h index 5436936a..482eace4 100644 --- a/riscv/insns/vnclip_wx.h +++ b/riscv/insns/vnclip_wx.h @@ -2,7 +2,7 @@ VRM xrm = P.VU.get_vround_mode(); int64_t int_max = INT64_MAX >> (64 - P.VU.vsew); int64_t int_min = INT64_MIN >> (64 - P.VU.vsew); -VI_VVXI_LOOP_NARROW +VI_VX_LOOP_NARROW ({ int128_t result = vs2; unsigned shift = rs1 & ((sew * 2) - 1); @@ -22,4 +22,4 @@ VI_VVXI_LOOP_NARROW } vd = result; -}, false) +}) diff --git a/riscv/insns/vnclipu_wi.h b/riscv/insns/vnclipu_wi.h index ba39905c..441a3a7d 100644 --- a/riscv/insns/vnclipu_wi.h +++ b/riscv/insns/vnclipu_wi.h @@ -2,7 +2,7 @@ VRM xrm = P.VU.get_vround_mode(); uint64_t uint_max = UINT64_MAX >> (64 - P.VU.vsew); uint64_t sign_mask = UINT64_MAX << P.VU.vsew; -VI_VVXI_LOOP_NARROW +VI_VI_LOOP_NARROW ({ uint128_t result = vs2_u; unsigned shift = zimm5 & ((sew * 2) - 1); @@ -20,4 +20,4 @@ VI_VVXI_LOOP_NARROW } vd = result; -}, false) +}) diff --git a/riscv/insns/vnclipu_wv.h b/riscv/insns/vnclipu_wv.h index d6647c62..80724899 100644 --- a/riscv/insns/vnclipu_wv.h +++ b/riscv/insns/vnclipu_wv.h @@ -2,7 +2,7 @@ VRM xrm = P.VU.get_vround_mode(); uint64_t uint_max = UINT64_MAX >> (64 - P.VU.vsew); uint64_t sign_mask = UINT64_MAX << P.VU.vsew; -VI_VVXI_LOOP_NARROW +VI_VV_LOOP_NARROW ({ uint128_t result = vs2_u; unsigned shift = vs1 & ((sew * 2) - 1); @@ -19,4 +19,4 @@ VI_VVXI_LOOP_NARROW } vd = result; -}, true) +}) diff --git a/riscv/insns/vnclipu_wx.h b/riscv/insns/vnclipu_wx.h index bf442334..b2d91c33 100644 --- a/riscv/insns/vnclipu_wx.h +++ b/riscv/insns/vnclipu_wx.h @@ -2,7 +2,7 @@ VRM xrm = P.VU.get_vround_mode(); uint64_t uint_max = UINT64_MAX >> (64 - P.VU.vsew); uint64_t sign_mask = UINT64_MAX << P.VU.vsew; -VI_VVXI_LOOP_NARROW +VI_VX_LOOP_NARROW ({ uint128_t result = vs2_u; unsigned shift = rs1 & ((sew * 2) - 1); @@ -19,4 +19,4 @@ VI_VVXI_LOOP_NARROW } vd = result; -}, false) +}) diff --git a/riscv/insns/vnsra_wi.h b/riscv/insns/vnsra_wi.h index f41979ed..0502ff1a 100644 --- a/riscv/insns/vnsra_wi.h +++ b/riscv/insns/vnsra_wi.h @@ -2,4 +2,4 @@ VI_VI_LOOP_NSHIFT ({ vd = vs2 >> (zimm5 & (sew * 2 - 1) & 0x1f); -}, false) +}) diff --git a/riscv/insns/vnsra_wv.h b/riscv/insns/vnsra_wv.h index 59f255ef..555ce3fb 100644 --- a/riscv/insns/vnsra_wv.h +++ b/riscv/insns/vnsra_wv.h @@ -2,4 +2,4 @@ VI_VV_LOOP_NSHIFT ({ vd = vs2 >> (vs1 & (sew * 2 - 1)); -}, true) +}) diff --git a/riscv/insns/vnsra_wx.h b/riscv/insns/vnsra_wx.h index adaa24c3..05a55e3e 100644 --- a/riscv/insns/vnsra_wx.h +++ b/riscv/insns/vnsra_wx.h @@ -2,4 +2,4 @@ VI_VX_LOOP_NSHIFT ({ vd = vs2 >> (rs1 & (sew * 2 - 1)); -}, false) +}) diff --git a/riscv/insns/vnsrl_wi.h b/riscv/insns/vnsrl_wi.h index 91402c0c..d4dfcf07 100644 --- a/riscv/insns/vnsrl_wi.h +++ b/riscv/insns/vnsrl_wi.h @@ -2,4 +2,4 @@ VI_VI_LOOP_NSHIFT ({ vd = vs2_u >> (zimm5 & (sew * 2 - 1)); -}, false) +}) diff --git a/riscv/insns/vnsrl_wv.h b/riscv/insns/vnsrl_wv.h index 609299fa..ab72b849 100644 --- a/riscv/insns/vnsrl_wv.h +++ b/riscv/insns/vnsrl_wv.h @@ -2,4 +2,4 @@ VI_VV_LOOP_NSHIFT ({ vd = vs2_u >> (vs1 & (sew * 2 - 1)); -}, true) +}) diff --git a/riscv/insns/vnsrl_wx.h b/riscv/insns/vnsrl_wx.h index 8356a2bd..e149b38d 100644 --- a/riscv/insns/vnsrl_wx.h +++ b/riscv/insns/vnsrl_wx.h @@ -2,4 +2,4 @@ VI_VX_LOOP_NSHIFT ({ vd = vs2_u >> (rs1 & (sew * 2 - 1)); -}, false) +})