Browse Source

vector: Check if there is any vector extension before using vector CSRs

pull/1750/head
YenHaoChen 2 years ago
parent
commit
e9f620ffb5
  1. 6
      riscv/insns/vnclip_wi.h
  2. 6
      riscv/insns/vnclip_wv.h
  3. 6
      riscv/insns/vnclipu_wi.h
  4. 6
      riscv/insns/vnclipu_wv.h
  5. 6
      riscv/insns/vnclipu_wx.h
  6. 8
      riscv/insns/vsmul_vv.h
  7. 8
      riscv/insns/vsmul_vx.h
  8. 2
      riscv/insns/vssra_vi.h
  9. 2
      riscv/insns/vssra_vv.h
  10. 2
      riscv/insns/vssra_vx.h
  11. 2
      riscv/insns/vssrl_vi.h
  12. 2
      riscv/insns/vssrl_vv.h
  13. 2
      riscv/insns/vssrl_vx.h
  14. 8
      riscv/v_ext_macros.h

6
riscv/insns/vnclip_wi.h

@ -1,9 +1,9 @@
// vnclip: vd[i] = clip(round(vs2[i] + rnd) >> simm) // vnclip: vd[i] = clip(round(vs2[i] + rnd) >> simm)
VRM xrm = P.VU.get_vround_mode();
int64_t int_max = INT64_MAX >> (64 - P.VU.vsew);
int64_t int_min = INT64_MIN >> (64 - P.VU.vsew);
VI_VI_LOOP_NARROW VI_VI_LOOP_NARROW
({ ({
VRM xrm = P.VU.get_vround_mode();
int64_t int_max = INT64_MAX >> (64 - P.VU.vsew);
int64_t int_min = INT64_MIN >> (64 - P.VU.vsew);
int128_t result = vs2; int128_t result = vs2;
unsigned shift = zimm5 & ((sew * 2) - 1); unsigned shift = zimm5 & ((sew * 2) - 1);

6
riscv/insns/vnclip_wv.h

@ -1,9 +1,9 @@
// vnclip: vd[i] = clip(round(vs2[i] + rnd) >> vs1[i]) // vnclip: vd[i] = clip(round(vs2[i] + rnd) >> vs1[i])
VRM xrm = P.VU.get_vround_mode();
int64_t int_max = INT64_MAX >> (64 - P.VU.vsew);
int64_t int_min = INT64_MIN >> (64 - P.VU.vsew);
VI_VV_LOOP_NARROW VI_VV_LOOP_NARROW
({ ({
VRM xrm = P.VU.get_vround_mode();
int64_t int_max = INT64_MAX >> (64 - P.VU.vsew);
int64_t int_min = INT64_MIN >> (64 - P.VU.vsew);
int128_t result = vs2; int128_t result = vs2;
unsigned shift = vs1 & ((sew * 2) - 1); unsigned shift = vs1 & ((sew * 2) - 1);

6
riscv/insns/vnclipu_wi.h

@ -1,9 +1,9 @@
// vnclipu: vd[i] = clip(round(vs2[i] + rnd) >> simm) // vnclipu: vd[i] = clip(round(vs2[i] + rnd) >> simm)
VRM xrm = P.VU.get_vround_mode();
uint64_t uint_max = UINT64_MAX >> (64 - P.VU.vsew);
uint64_t sign_mask = UINT64_MAX << P.VU.vsew;
VI_VI_LOOP_NARROW VI_VI_LOOP_NARROW
({ ({
VRM xrm = P.VU.get_vround_mode();
uint64_t uint_max = UINT64_MAX >> (64 - P.VU.vsew);
uint64_t sign_mask = UINT64_MAX << P.VU.vsew;
uint128_t result = vs2_u; uint128_t result = vs2_u;
unsigned shift = zimm5 & ((sew * 2) - 1); unsigned shift = zimm5 & ((sew * 2) - 1);

6
riscv/insns/vnclipu_wv.h

@ -1,9 +1,9 @@
// vnclipu: vd[i] = clip(round(vs2[i] + rnd) >> vs1[i]) // vnclipu: vd[i] = clip(round(vs2[i] + rnd) >> vs1[i])
VRM xrm = P.VU.get_vround_mode();
uint64_t uint_max = UINT64_MAX >> (64 - P.VU.vsew);
uint64_t sign_mask = UINT64_MAX << P.VU.vsew;
VI_VV_LOOP_NARROW VI_VV_LOOP_NARROW
({ ({
VRM xrm = P.VU.get_vround_mode();
uint64_t uint_max = UINT64_MAX >> (64 - P.VU.vsew);
uint64_t sign_mask = UINT64_MAX << P.VU.vsew;
uint128_t result = vs2_u; uint128_t result = vs2_u;
unsigned shift = vs1 & ((sew * 2) - 1); unsigned shift = vs1 & ((sew * 2) - 1);

6
riscv/insns/vnclipu_wx.h

@ -1,9 +1,9 @@
// vnclipu: vd[i] = clip(round(vs2[i] + rnd) >> rs1[i]) // vnclipu: vd[i] = clip(round(vs2[i] + rnd) >> rs1[i])
VRM xrm = P.VU.get_vround_mode();
uint64_t uint_max = UINT64_MAX >> (64 - P.VU.vsew);
uint64_t sign_mask = UINT64_MAX << P.VU.vsew;
VI_VX_LOOP_NARROW VI_VX_LOOP_NARROW
({ ({
VRM xrm = P.VU.get_vround_mode();
uint64_t uint_max = UINT64_MAX >> (64 - P.VU.vsew);
uint64_t sign_mask = UINT64_MAX << P.VU.vsew;
uint128_t result = vs2_u; uint128_t result = vs2_u;
unsigned shift = rs1 & ((sew * 2) - 1); unsigned shift = rs1 & ((sew * 2) - 1);

8
riscv/insns/vsmul_vv.h

@ -1,10 +1,10 @@
// vsmul.vv vd, vs2, vs1 // vsmul.vv vd, vs2, vs1
VRM xrm = P.VU.get_vround_mode();
int64_t int_max = INT64_MAX >> (64 - P.VU.vsew);
int64_t int_min = INT64_MIN >> (64 - P.VU.vsew);
VI_VV_LOOP VI_VV_LOOP
({ ({
VRM xrm = P.VU.get_vround_mode();
int64_t int_max = INT64_MAX >> (64 - P.VU.vsew);
int64_t int_min = INT64_MIN >> (64 - P.VU.vsew);
bool overflow = vs1 == vs2 && vs1 == int_min; bool overflow = vs1 == vs2 && vs1 == int_min;
int128_t result = (int128_t)vs1 * (int128_t)vs2; int128_t result = (int128_t)vs1 * (int128_t)vs2;

8
riscv/insns/vsmul_vx.h

@ -1,10 +1,10 @@
// vsmul.vx vd, vs2, rs1 // vsmul.vx vd, vs2, rs1
VRM xrm = P.VU.get_vround_mode();
int64_t int_max = INT64_MAX >> (64 - P.VU.vsew);
int64_t int_min = INT64_MIN >> (64 - P.VU.vsew);
VI_VX_LOOP VI_VX_LOOP
({ ({
VRM xrm = P.VU.get_vround_mode();
int64_t int_max = INT64_MAX >> (64 - P.VU.vsew);
int64_t int_min = INT64_MIN >> (64 - P.VU.vsew);
bool overflow = rs1 == vs2 && rs1 == int_min; bool overflow = rs1 == vs2 && rs1 == int_min;
int128_t result = (int128_t)rs1 * (int128_t)vs2; int128_t result = (int128_t)rs1 * (int128_t)vs2;

2
riscv/insns/vssra_vi.h

@ -1,7 +1,7 @@
// vssra.vi vd, vs2, simm5 // vssra.vi vd, vs2, simm5
VRM xrm = P.VU.get_vround_mode();
VI_VI_LOOP VI_VI_LOOP
({ ({
VRM xrm = P.VU.get_vround_mode();
int sh = simm5 & (sew - 1) & 0x1f; int sh = simm5 & (sew - 1) & 0x1f;
int128_t val = vs2; int128_t val = vs2;

2
riscv/insns/vssra_vv.h

@ -1,7 +1,7 @@
// vssra.vv vd, vs2, vs1 // vssra.vv vd, vs2, vs1
VRM xrm = P.VU.get_vround_mode();
VI_VV_LOOP VI_VV_LOOP
({ ({
VRM xrm = P.VU.get_vround_mode();
int sh = vs1 & (sew - 1); int sh = vs1 & (sew - 1);
int128_t val = vs2; int128_t val = vs2;

2
riscv/insns/vssra_vx.h

@ -1,7 +1,7 @@
// vssra.vx vd, vs2, rs1 // vssra.vx vd, vs2, rs1
VRM xrm = P.VU.get_vround_mode();
VI_VX_LOOP VI_VX_LOOP
({ ({
VRM xrm = P.VU.get_vround_mode();
int sh = rs1 & (sew - 1); int sh = rs1 & (sew - 1);
int128_t val = vs2; int128_t val = vs2;

2
riscv/insns/vssrl_vi.h

@ -1,7 +1,7 @@
// vssra.vi vd, vs2, simm5 // vssra.vi vd, vs2, simm5
VRM xrm = P.VU.get_vround_mode();
VI_VI_ULOOP VI_VI_ULOOP
({ ({
VRM xrm = P.VU.get_vround_mode();
int sh = zimm5 & (sew - 1) & 0x1f; int sh = zimm5 & (sew - 1) & 0x1f;
uint128_t val = vs2; uint128_t val = vs2;

2
riscv/insns/vssrl_vv.h

@ -1,7 +1,7 @@
// vssrl.vv vd, vs2, vs1 // vssrl.vv vd, vs2, vs1
VRM xrm = P.VU.get_vround_mode();
VI_VV_ULOOP VI_VV_ULOOP
({ ({
VRM xrm = P.VU.get_vround_mode();
int sh = vs1 & (sew - 1); int sh = vs1 & (sew - 1);
uint128_t val = vs2; uint128_t val = vs2;

2
riscv/insns/vssrl_vx.h

@ -1,7 +1,7 @@
// vssrl.vx vd, vs2, rs1 // vssrl.vx vd, vs2, rs1
VRM xrm = P.VU.get_vround_mode();
VI_VX_ULOOP VI_VX_ULOOP
({ ({
VRM xrm = P.VU.get_vround_mode();
int sh = rs1 & (sew - 1); int sh = rs1 & (sew - 1);
uint128_t val = vs2; uint128_t val = vs2;

8
riscv/v_ext_macros.h

@ -1144,32 +1144,32 @@ static inline bool is_overlapped_widen(const int astart, int asize,
// average loop // average loop
#define VI_VV_LOOP_AVG(op) \ #define VI_VV_LOOP_AVG(op) \
VRM xrm = p->VU.get_vround_mode(); \
VI_VV_LOOP({ \ VI_VV_LOOP({ \
VRM xrm = p->VU.get_vround_mode(); \
uint128_t res = ((uint128_t)vs2) op vs1; \ uint128_t res = ((uint128_t)vs2) op vs1; \
INT_ROUNDING(res, xrm, 1); \ INT_ROUNDING(res, xrm, 1); \
vd = res >> 1; \ vd = res >> 1; \
}) })
#define VI_VX_LOOP_AVG(op) \ #define VI_VX_LOOP_AVG(op) \
VRM xrm = p->VU.get_vround_mode(); \
VI_VX_LOOP({ \ VI_VX_LOOP({ \
VRM xrm = p->VU.get_vround_mode(); \
uint128_t res = ((uint128_t)vs2) op rs1; \ uint128_t res = ((uint128_t)vs2) op rs1; \
INT_ROUNDING(res, xrm, 1); \ INT_ROUNDING(res, xrm, 1); \
vd = res >> 1; \ vd = res >> 1; \
}) })
#define VI_VV_ULOOP_AVG(op) \ #define VI_VV_ULOOP_AVG(op) \
VRM xrm = p->VU.get_vround_mode(); \
VI_VV_ULOOP({ \ VI_VV_ULOOP({ \
VRM xrm = p->VU.get_vround_mode(); \
uint128_t res = ((uint128_t)vs2) op vs1; \ uint128_t res = ((uint128_t)vs2) op vs1; \
INT_ROUNDING(res, xrm, 1); \ INT_ROUNDING(res, xrm, 1); \
vd = res >> 1; \ vd = res >> 1; \
}) })
#define VI_VX_ULOOP_AVG(op) \ #define VI_VX_ULOOP_AVG(op) \
VRM xrm = p->VU.get_vround_mode(); \
VI_VX_ULOOP({ \ VI_VX_ULOOP({ \
VRM xrm = p->VU.get_vround_mode(); \
uint128_t res = ((uint128_t)vs2) op rs1; \ uint128_t res = ((uint128_t)vs2) op rs1; \
INT_ROUNDING(res, xrm, 1); \ INT_ROUNDING(res, xrm, 1); \
vd = res >> 1; \ vd = res >> 1; \

Loading…
Cancel
Save