Browse Source

Have nclip_{wv/wx/wi} use different macros

This allows them to share PARAM macro with narrowing right-shift instructions.
Rename VV_NSHIFT_PARAMS -> VV_NARROW_PARAMS so nclip, nsra, nsrl can share it.
(Same goes to VX_NSHIFT_PARAMS and VI_NSHIFT_PARAMS)
pull/868/head
eopXD 4 years ago
parent
commit
d177f05b7b
  1. 89
      riscv/decode.h
  2. 4
      riscv/insns/vnclip_wi.h
  3. 4
      riscv/insns/vnclip_wv.h
  4. 4
      riscv/insns/vnclip_wx.h
  5. 4
      riscv/insns/vnclipu_wi.h
  6. 4
      riscv/insns/vnclipu_wv.h
  7. 4
      riscv/insns/vnclipu_wx.h

89
riscv/decode.h

@ -769,19 +769,19 @@ static inline bool is_aligned(const unsigned val, const unsigned pos)
auto &vd = P.VU.elt<type_sew_t<x>::type>(rd_num, i, true); \ auto &vd = P.VU.elt<type_sew_t<x>::type>(rd_num, i, true); \
auto vs2 = P.VU.elt<type_sew_t<x>::type>(rs2_num, i - offset); auto vs2 = P.VU.elt<type_sew_t<x>::type>(rs2_num, i - offset);
#define VI_NSHIFT_PARAMS(sew1, sew2) \ #define VI_NARROW_PARAMS(sew1, sew2) \
auto &vd = P.VU.elt<type_usew_t<sew1>::type>(rd_num, i, true); \ auto &vd = P.VU.elt<type_usew_t<sew1>::type>(rd_num, i, true); \
auto vs2_u = P.VU.elt<type_usew_t<sew2>::type>(rs2_num, i); \ auto vs2_u = P.VU.elt<type_usew_t<sew2>::type>(rs2_num, i); \
auto vs2 = P.VU.elt<type_sew_t<sew2>::type>(rs2_num, i); \ auto vs2 = P.VU.elt<type_sew_t<sew2>::type>(rs2_num, i); \
auto zimm5 = (type_usew_t<sew1>::type)insn.v_zimm5(); auto zimm5 = (type_usew_t<sew1>::type)insn.v_zimm5();
#define VX_NSHIFT_PARAMS(sew1, sew2) \ #define VX_NARROW_PARAMS(sew1, sew2) \
auto &vd = P.VU.elt<type_usew_t<sew1>::type>(rd_num, i, true); \ auto &vd = P.VU.elt<type_usew_t<sew1>::type>(rd_num, i, true); \
auto vs2_u = P.VU.elt<type_usew_t<sew2>::type>(rs2_num, i); \ auto vs2_u = P.VU.elt<type_usew_t<sew2>::type>(rs2_num, i); \
auto vs2 = P.VU.elt<type_sew_t<sew2>::type>(rs2_num, i); \ auto vs2 = P.VU.elt<type_sew_t<sew2>::type>(rs2_num, i); \
auto rs1 = (type_sew_t<sew1>::type)RS1; auto rs1 = (type_sew_t<sew1>::type)RS1;
#define VV_NSHIFT_PARAMS(sew1, sew2) \ #define VV_NARROW_PARAMS(sew1, sew2) \
auto &vd = P.VU.elt<type_usew_t<sew1>::type>(rd_num, i, true); \ auto &vd = P.VU.elt<type_usew_t<sew1>::type>(rd_num, i, true); \
auto vs2_u = P.VU.elt<type_usew_t<sew2>::type>(rs2_num, i); \ auto vs2_u = P.VU.elt<type_usew_t<sew2>::type>(rs2_num, i); \
auto vs2 = P.VU.elt<type_sew_t<sew2>::type>(rs2_num, i); \ auto vs2 = P.VU.elt<type_sew_t<sew2>::type>(rs2_num, i); \
@ -1114,39 +1114,46 @@ static inline bool is_aligned(const unsigned val, const unsigned pos)
// narrow operation loop // narrow operation loop
#define VI_VV_LOOP_NARROW(BODY) \ #define VI_VV_LOOP_NARROW(BODY) \
VI_NARROW_CHECK_COMMON; \ VI_CHECK_SDS(true); \
VI_LOOP_BASE \
if (sew == e8){ \
VI_NARROW_SHIFT(e8, e16) \
BODY; \
}else if(sew == e16){ \
VI_NARROW_SHIFT(e16, e32) \
BODY; \
}else if(sew == e32){ \
VI_NARROW_SHIFT(e32, e64) \
BODY; \
} \
VI_LOOP_END
#define VI_NARROW_SHIFT(sew1, sew2) \
type_usew_t<sew1>::type &vd = P.VU.elt<type_usew_t<sew1>::type>(rd_num, i, true); \
type_usew_t<sew2>::type vs2_u = P.VU.elt<type_usew_t<sew2>::type>(rs2_num, i); \
type_usew_t<sew1>::type zimm5 = (type_usew_t<sew1>::type)insn.v_zimm5(); \
type_sew_t<sew2>::type vs2 = P.VU.elt<type_sew_t<sew2>::type>(rs2_num, i); \
type_sew_t<sew1>::type vs1 = P.VU.elt<type_sew_t<sew1>::type>(rs1_num, i); \
type_sew_t<sew1>::type rs1 = (type_sew_t<sew1>::type)RS1;
#define VI_VVXI_LOOP_NARROW(BODY, is_vs1) \
VI_CHECK_SDS(is_vs1); \
VI_LOOP_BASE \ VI_LOOP_BASE \
if (sew == e8){ \ if (sew == e8){ \
VI_NARROW_SHIFT(e8, e16) \ VV_NARROW_PARAMS(e8, e16) \
BODY; \ BODY; \
} else if (sew == e16) { \ }else if(sew == e16){ \
VI_NARROW_SHIFT(e16, e32) \ VV_NARROW_PARAMS(e16, e32) \
BODY; \ BODY; \
} else if (sew == e32) { \ }else if(sew == e32){ \
VI_NARROW_SHIFT(e32, e64) \ VV_NARROW_PARAMS(e32, e64) \
BODY; \
} \
VI_LOOP_END
#define VI_VX_LOOP_NARROW(BODY) \
VI_CHECK_SDS(false); \
VI_LOOP_BASE \
if (sew == e8){ \
VX_NARROW_PARAMS(e8, e16) \
BODY; \
}else if(sew == e16){ \
VX_NARROW_PARAMS(e16, e32) \
BODY; \
}else if(sew == e32){ \
VX_NARROW_PARAMS(e32, e64) \
BODY; \
} \
VI_LOOP_END
#define VI_VI_LOOP_NARROW(BODY) \
VI_CHECK_SDS(false); \
VI_LOOP_BASE \
if (sew == e8){ \
VI_NARROW_PARAMS(e8, e16) \
BODY; \
}else if(sew == e16){ \
VI_NARROW_PARAMS(e16, e32) \
BODY; \
}else if(sew == e32){ \
VI_NARROW_PARAMS(e32, e64) \
BODY; \ BODY; \
} \ } \
VI_LOOP_END VI_LOOP_END
@ -1155,13 +1162,13 @@ VI_LOOP_END
VI_CHECK_SDS(false); \ VI_CHECK_SDS(false); \
VI_LOOP_NSHIFT_BASE \ VI_LOOP_NSHIFT_BASE \
if (sew == e8){ \ if (sew == e8){ \
VI_NSHIFT_PARAMS(e8, e16) \ VI_NARROW_PARAMS(e8, e16) \
BODY; \ BODY; \
} else if (sew == e16) { \ } else if (sew == e16) { \
VI_NSHIFT_PARAMS(e16, e32) \ VI_NARROW_PARAMS(e16, e32) \
BODY; \ BODY; \
} else if (sew == e32) { \ } else if (sew == e32) { \
VI_NSHIFT_PARAMS(e32, e64) \ VI_NARROW_PARAMS(e32, e64) \
BODY; \ BODY; \
} \ } \
VI_LOOP_END VI_LOOP_END
@ -1170,13 +1177,13 @@ VI_LOOP_END
VI_CHECK_SDS(false); \ VI_CHECK_SDS(false); \
VI_LOOP_NSHIFT_BASE \ VI_LOOP_NSHIFT_BASE \
if (sew == e8){ \ if (sew == e8){ \
VX_NSHIFT_PARAMS(e8, e16) \ VX_NARROW_PARAMS(e8, e16) \
BODY; \ BODY; \
} else if (sew == e16) { \ } else if (sew == e16) { \
VX_NSHIFT_PARAMS(e16, e32) \ VX_NARROW_PARAMS(e16, e32) \
BODY; \ BODY; \
} else if (sew == e32) { \ } else if (sew == e32) { \
VX_NSHIFT_PARAMS(e32, e64) \ VX_NARROW_PARAMS(e32, e64) \
BODY; \ BODY; \
} \ } \
VI_LOOP_END VI_LOOP_END
@ -1185,13 +1192,13 @@ VI_LOOP_END
VI_CHECK_SDS(true); \ VI_CHECK_SDS(true); \
VI_LOOP_NSHIFT_BASE \ VI_LOOP_NSHIFT_BASE \
if (sew == e8){ \ if (sew == e8){ \
VV_NSHIFT_PARAMS(e8, e16) \ VV_NARROW_PARAMS(e8, e16) \
BODY; \ BODY; \
} else if (sew == e16) { \ } else if (sew == e16) { \
VV_NSHIFT_PARAMS(e16, e32) \ VV_NARROW_PARAMS(e16, e32) \
BODY; \ BODY; \
} else if (sew == e32) { \ } else if (sew == e32) { \
VV_NSHIFT_PARAMS(e32, e64) \ VV_NARROW_PARAMS(e32, e64) \
BODY; \ BODY; \
} \ } \
VI_LOOP_END VI_LOOP_END

4
riscv/insns/vnclip_wi.h

@ -2,7 +2,7 @@
VRM xrm = P.VU.get_vround_mode(); VRM xrm = P.VU.get_vround_mode();
int64_t int_max = INT64_MAX >> (64 - P.VU.vsew); int64_t int_max = INT64_MAX >> (64 - P.VU.vsew);
int64_t int_min = INT64_MIN >> (64 - P.VU.vsew); int64_t int_min = INT64_MIN >> (64 - P.VU.vsew);
VI_VVXI_LOOP_NARROW VI_VI_LOOP_NARROW
({ ({
int128_t result = vs2; int128_t result = vs2;
unsigned shift = zimm5 & ((sew * 2) - 1); unsigned shift = zimm5 & ((sew * 2) - 1);
@ -22,4 +22,4 @@ VI_VVXI_LOOP_NARROW
} }
vd = result; vd = result;
}, false) })

4
riscv/insns/vnclip_wv.h

@ -2,7 +2,7 @@
VRM xrm = P.VU.get_vround_mode(); VRM xrm = P.VU.get_vround_mode();
int64_t int_max = INT64_MAX >> (64 - P.VU.vsew); int64_t int_max = INT64_MAX >> (64 - P.VU.vsew);
int64_t int_min = INT64_MIN >> (64 - P.VU.vsew); int64_t int_min = INT64_MIN >> (64 - P.VU.vsew);
VI_VVXI_LOOP_NARROW VI_VV_LOOP_NARROW
({ ({
int128_t result = vs2; int128_t result = vs2;
unsigned shift = vs1 & ((sew * 2) - 1); unsigned shift = vs1 & ((sew * 2) - 1);
@ -22,4 +22,4 @@ VI_VVXI_LOOP_NARROW
} }
vd = result; vd = result;
}, true) })

4
riscv/insns/vnclip_wx.h

@ -2,7 +2,7 @@
VRM xrm = P.VU.get_vround_mode(); VRM xrm = P.VU.get_vround_mode();
int64_t int_max = INT64_MAX >> (64 - P.VU.vsew); int64_t int_max = INT64_MAX >> (64 - P.VU.vsew);
int64_t int_min = INT64_MIN >> (64 - P.VU.vsew); int64_t int_min = INT64_MIN >> (64 - P.VU.vsew);
VI_VVXI_LOOP_NARROW VI_VX_LOOP_NARROW
({ ({
int128_t result = vs2; int128_t result = vs2;
unsigned shift = rs1 & ((sew * 2) - 1); unsigned shift = rs1 & ((sew * 2) - 1);
@ -22,4 +22,4 @@ VI_VVXI_LOOP_NARROW
} }
vd = result; vd = result;
}, false) })

4
riscv/insns/vnclipu_wi.h

@ -2,7 +2,7 @@
VRM xrm = P.VU.get_vround_mode(); VRM xrm = P.VU.get_vround_mode();
uint64_t uint_max = UINT64_MAX >> (64 - P.VU.vsew); uint64_t uint_max = UINT64_MAX >> (64 - P.VU.vsew);
uint64_t sign_mask = UINT64_MAX << P.VU.vsew; uint64_t sign_mask = UINT64_MAX << P.VU.vsew;
VI_VVXI_LOOP_NARROW VI_VI_LOOP_NARROW
({ ({
uint128_t result = vs2_u; uint128_t result = vs2_u;
unsigned shift = zimm5 & ((sew * 2) - 1); unsigned shift = zimm5 & ((sew * 2) - 1);
@ -20,4 +20,4 @@ VI_VVXI_LOOP_NARROW
} }
vd = result; vd = result;
}, false) })

4
riscv/insns/vnclipu_wv.h

@ -2,7 +2,7 @@
VRM xrm = P.VU.get_vround_mode(); VRM xrm = P.VU.get_vround_mode();
uint64_t uint_max = UINT64_MAX >> (64 - P.VU.vsew); uint64_t uint_max = UINT64_MAX >> (64 - P.VU.vsew);
uint64_t sign_mask = UINT64_MAX << P.VU.vsew; uint64_t sign_mask = UINT64_MAX << P.VU.vsew;
VI_VVXI_LOOP_NARROW VI_VV_LOOP_NARROW
({ ({
uint128_t result = vs2_u; uint128_t result = vs2_u;
unsigned shift = vs1 & ((sew * 2) - 1); unsigned shift = vs1 & ((sew * 2) - 1);
@ -19,4 +19,4 @@ VI_VVXI_LOOP_NARROW
} }
vd = result; vd = result;
}, true) })

4
riscv/insns/vnclipu_wx.h

@ -2,7 +2,7 @@
VRM xrm = P.VU.get_vround_mode(); VRM xrm = P.VU.get_vround_mode();
uint64_t uint_max = UINT64_MAX >> (64 - P.VU.vsew); uint64_t uint_max = UINT64_MAX >> (64 - P.VU.vsew);
uint64_t sign_mask = UINT64_MAX << P.VU.vsew; uint64_t sign_mask = UINT64_MAX << P.VU.vsew;
VI_VVXI_LOOP_NARROW VI_VX_LOOP_NARROW
({ ({
uint128_t result = vs2_u; uint128_t result = vs2_u;
unsigned shift = rs1 & ((sew * 2) - 1); unsigned shift = rs1 & ((sew * 2) - 1);
@ -19,4 +19,4 @@ VI_VVXI_LOOP_NARROW
} }
vd = result; vd = result;
}, false) })

Loading…
Cancel
Save