Browse Source

rvp: add missing vxsat CSR writes for 70 saturating instructions

Per the P-extension spec, any instruction that performs saturation must
set the vxsat CSR to 1 when the result is clamped. 70 instructions were
missing this write, addressed here in two ways:

1. Convert P_SAT, P_USAT, and P_USAT_FULL macros from pure expression
   macros to GCC statement expressions that detect when saturation
   occurs and write P.VU.vxsat. This automatically fixes ~42
   instructions that use these macros (nclip/nclipi/nclipr/nclipri
   families, psati, psslai, pssh1sadd, pusati, sati, usati, ssh1sadd,
   sslai, and their packed/double-wide variants).

2. Add explicit vxsat writes to 22 instructions with inline saturation
   logic that bypasses the macros:
   - mulq/mulqr/pmulq/pmulqr: set vxsat on INT_MIN * INT_MIN overflow
   - psas/pssa cross variants: set vxsat from sat_add/sat_sub sat flag
   - pssha/psshar DW variants: set vxsat from ov overflow flag
   - pssha/psshar/ssha/sshar: set vxsat in sshamt >= BIT overflow path
   - pusati_dh/pusati_dw: set vxsat on unsigned range clamping

Signed-off-by: Chih-Min Chao <chihmin.chao@sifive.com>
pull/2246/head
Chih-Min Chao 1 month ago
parent
commit
91a0debd84
  1. 1
      riscv/insns/mulq.h
  2. 1
      riscv/insns/mulqr.h
  3. 1
      riscv/insns/pmulq_h.h
  4. 1
      riscv/insns/pmulq_w.h
  5. 1
      riscv/insns/pmulqr_h.h
  6. 1
      riscv/insns/pmulqr_w.h
  7. 2
      riscv/insns/psas_dhx.h
  8. 2
      riscv/insns/psas_hx.h
  9. 2
      riscv/insns/psas_wx.h
  10. 2
      riscv/insns/pssa_dhx.h
  11. 2
      riscv/insns/pssa_hx.h
  12. 2
      riscv/insns/pssa_wx.h
  13. 1
      riscv/insns/pssha_dhs.h
  14. 1
      riscv/insns/pssha_dws.h
  15. 4
      riscv/insns/pssha_hs.h
  16. 4
      riscv/insns/pssha_ws.h
  17. 1
      riscv/insns/psshar_dhs.h
  18. 1
      riscv/insns/psshar_dws.h
  19. 4
      riscv/insns/psshar_hs.h
  20. 4
      riscv/insns/psshar_ws.h
  21. 2
      riscv/insns/pusati_dh.h
  22. 2
      riscv/insns/pusati_dw.h
  23. 4
      riscv/insns/ssha.h
  24. 4
      riscv/insns/sshar.h
  25. 12
      riscv/p_ext_macros.h

1
riscv/insns/mulq.h

@ -4,4 +4,5 @@ if ((RS1 != (reg_t)INT32_MIN) || (RS2 != (reg_t)INT32_MIN)) {
WRITE_RD((RS1 * RS2) >> 31); WRITE_RD((RS1 * RS2) >> 31);
} else { } else {
WRITE_RD(INT32_MAX); WRITE_RD(INT32_MAX);
P.VU.vxsat->write(1);
} }

1
riscv/insns/mulqr.h

@ -4,4 +4,5 @@ if ((RS1 != (reg_t)INT32_MIN) || (RS2 != (reg_t)INT32_MIN)) {
WRITE_RD((((RS1 * RS2) >> 30) + 1) >> 1); WRITE_RD((((RS1 * RS2) >> 30) + 1) >> 1);
} else { } else {
WRITE_RD(INT32_MAX); WRITE_RD(INT32_MAX);
P.VU.vxsat->write(1);
} }

1
riscv/insns/pmulq_h.h

@ -3,5 +3,6 @@ P_RD_RS1_RS2_LOOP(16,16,16, {
p_rd = (p_rs1 * p_rs2) >> 15; p_rd = (p_rs1 * p_rs2) >> 15;
} else { } else {
p_rd = INT16_MAX; p_rd = INT16_MAX;
P.VU.vxsat->write(1);
} }
}) })

1
riscv/insns/pmulq_w.h

@ -4,6 +4,7 @@ P_RD_RS1_RS2_LOOP(32,32,32, {
p_rd = ((int64_t)p_rs1 * (int64_t)p_rs2) >> 31; p_rd = ((int64_t)p_rs1 * (int64_t)p_rs2) >> 31;
} else { } else {
p_rd = INT32_MAX; p_rd = INT32_MAX;
P.VU.vxsat->write(1);
} }
} }
) )

1
riscv/insns/pmulqr_h.h

@ -3,5 +3,6 @@ P_RD_RS1_RS2_LOOP(16,16,16, {
p_rd = (((p_rs1 * p_rs2) >> 14) + 1) >> 1; p_rd = (((p_rs1 * p_rs2) >> 14) + 1) >> 1;
} else { } else {
p_rd = INT16_MAX; p_rd = INT16_MAX;
P.VU.vxsat->write(1);
} }
}) })

1
riscv/insns/pmulqr_w.h

@ -4,6 +4,7 @@ P_RD_RS1_RS2_LOOP(32,32,32, {
p_rd = ((((int64_t)p_rs1 * (int64_t)p_rs2) >> 30) + 1) >> 1; p_rd = ((((int64_t)p_rs1 * (int64_t)p_rs2) >> 30) + 1) >> 1;
} else { } else {
p_rd = INT32_MAX; p_rd = INT32_MAX;
P.VU.vxsat->write(1);
} }
} }
) )

2
riscv/insns/psas_dhx.h

@ -2,7 +2,9 @@ require_rv32;
P_CROSS_DW_ULOOP(16, { P_CROSS_DW_ULOOP(16, {
bool sat = false; bool sat = false;
p_rd = (sat_add<int16_t, uint16_t>(p_rs1, p_rs2, sat)); p_rd = (sat_add<int16_t, uint16_t>(p_rs1, p_rs2, sat));
if (sat) P.VU.vxsat->write(1);
}, { }, {
bool sat = false; bool sat = false;
p_rd = (sat_sub<int16_t, uint16_t>(p_rs1, p_rs2, sat)); p_rd = (sat_sub<int16_t, uint16_t>(p_rs1, p_rs2, sat));
if (sat) P.VU.vxsat->write(1);
}) })

2
riscv/insns/psas_hx.h

@ -1,7 +1,9 @@
P_CROSS_ULOOP(16, { P_CROSS_ULOOP(16, {
bool sat = false; bool sat = false;
p_rd = (sat_add<int16_t, uint16_t>(p_rs1, p_rs2, sat)); p_rd = (sat_add<int16_t, uint16_t>(p_rs1, p_rs2, sat));
if (sat) P.VU.vxsat->write(1);
}, { }, {
bool sat = false; bool sat = false;
p_rd = (sat_sub<int16_t, uint16_t>(p_rs1, p_rs2, sat)); p_rd = (sat_sub<int16_t, uint16_t>(p_rs1, p_rs2, sat));
if (sat) P.VU.vxsat->write(1);
}) })

2
riscv/insns/psas_wx.h

@ -2,8 +2,10 @@ require_rv64;
P_CROSS_ULOOP(32, { P_CROSS_ULOOP(32, {
bool sat = false; bool sat = false;
p_rd = (sat_add<int32_t, uint32_t>(p_rs1, p_rs2, sat)); p_rd = (sat_add<int32_t, uint32_t>(p_rs1, p_rs2, sat));
if (sat) P.VU.vxsat->write(1);
}, { }, {
bool sat = false; bool sat = false;
p_rd = (sat_sub<int32_t, uint32_t>(p_rs1, p_rs2, sat)); p_rd = (sat_sub<int32_t, uint32_t>(p_rs1, p_rs2, sat));
if (sat) P.VU.vxsat->write(1);
} }
) )

2
riscv/insns/pssa_dhx.h

@ -2,7 +2,9 @@ require_rv32;
P_CROSS_DW_ULOOP(16, { P_CROSS_DW_ULOOP(16, {
bool sat = false; bool sat = false;
p_rd = (sat_sub<int16_t, uint16_t>(p_rs1, p_rs2, sat)); p_rd = (sat_sub<int16_t, uint16_t>(p_rs1, p_rs2, sat));
if (sat) P.VU.vxsat->write(1);
}, { }, {
bool sat = false; bool sat = false;
p_rd = (sat_add<int16_t, uint16_t>(p_rs1, p_rs2, sat)); p_rd = (sat_add<int16_t, uint16_t>(p_rs1, p_rs2, sat));
if (sat) P.VU.vxsat->write(1);
}) })

2
riscv/insns/pssa_hx.h

@ -1,7 +1,9 @@
P_CROSS_ULOOP(16, { P_CROSS_ULOOP(16, {
bool sat = false; bool sat = false;
p_rd = (sat_sub<int16_t, uint16_t>(p_rs1, p_rs2, sat)); p_rd = (sat_sub<int16_t, uint16_t>(p_rs1, p_rs2, sat));
if (sat) P.VU.vxsat->write(1);
}, { }, {
bool sat = false; bool sat = false;
p_rd = (sat_add<int16_t, uint16_t>(p_rs1, p_rs2, sat)); p_rd = (sat_add<int16_t, uint16_t>(p_rs1, p_rs2, sat));
if (sat) P.VU.vxsat->write(1);
}) })

2
riscv/insns/pssa_wx.h

@ -2,8 +2,10 @@ require_rv64;
P_CROSS_ULOOP(32, { P_CROSS_ULOOP(32, {
bool sat = false; bool sat = false;
p_rd = (sat_sub<int32_t, uint32_t>(p_rs1, p_rs2, sat)); p_rd = (sat_sub<int32_t, uint32_t>(p_rs1, p_rs2, sat));
if (sat) P.VU.vxsat->write(1);
}, { }, {
bool sat = false; bool sat = false;
p_rd = (sat_add<int32_t, uint32_t>(p_rs1, p_rs2, sat)); p_rd = (sat_add<int32_t, uint32_t>(p_rs1, p_rs2, sat));
if (sat) P.VU.vxsat->write(1);
} }
) )

1
riscv/insns/pssha_dhs.h

@ -41,4 +41,5 @@ P_RD_RS1_DW_LOOP(16, 16, {
p_rd = (uint16_t)((p_rs1 << rev) & mask); p_rd = (uint16_t)((p_rs1 << rev) & mask);
} }
} }
if (ov) P.VU.vxsat->write(1);
}) })

1
riscv/insns/pssha_dws.h

@ -41,4 +41,5 @@ P_RD_RS1_DW_LOOP(32, 32, {
p_rd = (uint32_t)((p_rs1 << rev) & mask); p_rd = (uint32_t)((p_rs1 << rev) & mask);
} }
} }
if (ov) P.VU.vxsat->write(1);
}) })

4
riscv/insns/pssha_hs.h

@ -2,8 +2,10 @@ sreg_t sshamt = P_FIELD(RS2, 0, 8);
P_RD_RS1_LOOP(16, 16, { P_RD_RS1_LOOP(16, 16, {
if (p_rs1 == 0) if (p_rs1 == 0)
p_rd = 0; p_rd = 0;
else if (sshamt >= 16) else if (sshamt >= 16) {
p_rd = (p_rs1 & 0x8000) ? 0x8000 : 0x7fff; p_rd = (p_rs1 & 0x8000) ? 0x8000 : 0x7fff;
P.VU.vxsat->write(1);
}
else if (sshamt <= -16) else if (sshamt <= -16)
p_rd = (p_rs1 & 0x8000) ? 0xffff : 0; p_rd = (p_rs1 & 0x8000) ? 0xffff : 0;
else else

4
riscv/insns/pssha_ws.h

@ -3,8 +3,10 @@ sreg_t sshamt = P_FIELD(RS2, 0, 8);
P_RD_RS1_LOOP(32, 32, { P_RD_RS1_LOOP(32, 32, {
if (p_rs1 == 0) if (p_rs1 == 0)
p_rd = 0; p_rd = 0;
else if (sshamt >= 32) else if (sshamt >= 32) {
p_rd = (p_rs1 & 0x80000000) ? 0x80000000 : 0x7fffffff; p_rd = (p_rs1 & 0x80000000) ? 0x80000000 : 0x7fffffff;
P.VU.vxsat->write(1);
}
else if (sshamt <= -32) else if (sshamt <= -32)
p_rd = (p_rs1 & 0x80000000) ? 0xffffffff : 0; p_rd = (p_rs1 & 0x80000000) ? 0xffffffff : 0;
else else

1
riscv/insns/psshar_dhs.h

@ -60,4 +60,5 @@ P_RD_RS1_DW_LOOP(16, 16, {
p_rd = (uint16_t)((p_rs1 << rev) & mask); p_rd = (uint16_t)((p_rs1 << rev) & mask);
} }
} }
if (ov) P.VU.vxsat->write(1);
}) })

1
riscv/insns/psshar_dws.h

@ -60,4 +60,5 @@ P_RD_RS1_DW_LOOP(32, 32, {
p_rd = (uint32_t)((p_rs1 << rev) & mask); p_rd = (uint32_t)((p_rs1 << rev) & mask);
} }
} }
if (ov) P.VU.vxsat->write(1);
}) })

4
riscv/insns/psshar_hs.h

@ -2,8 +2,10 @@ sreg_t sshamt = P_FIELD(RS2, 0, 8);
P_RD_RS1_LOOP(16, 16, { P_RD_RS1_LOOP(16, 16, {
if (p_rs1 == 0) if (p_rs1 == 0)
p_rd = 0; p_rd = 0;
else if (sshamt >= 16) else if (sshamt >= 16) {
p_rd = (p_rs1 & 0x8000) ? 0x8000 : 0x7fff; p_rd = (p_rs1 & 0x8000) ? 0x8000 : 0x7fff;
P.VU.vxsat->write(1);
}
else if (sshamt <= -16) else if (sshamt <= -16)
p_rd = 0; p_rd = 0;
else else

4
riscv/insns/psshar_ws.h

@ -3,8 +3,10 @@ sreg_t sshamt = P_FIELD(RS2, 0, 8);
P_RD_RS1_LOOP(32, 32, { P_RD_RS1_LOOP(32, 32, {
if (p_rs1 == 0) if (p_rs1 == 0)
p_rd = 0; p_rd = 0;
else if (sshamt >= 32) else if (sshamt >= 32) {
p_rd = (p_rs1 & 0x80000000) ? 0x80000000 : 0x7fffffff; p_rd = (p_rs1 & 0x80000000) ? 0x80000000 : 0x7fffffff;
P.VU.vxsat->write(1);
}
else if (sshamt <= -32) else if (sshamt <= -32)
p_rd = 0; p_rd = 0;
else else

2
riscv/insns/pusati_dh.h

@ -5,7 +5,9 @@ P_RD_RS1_DW_ULOOP(16, 16, {
p_rd = p_rs1; p_rd = p_rs1;
if (s < 0) { if (s < 0) {
p_rd = 0; p_rd = 0;
P.VU.vxsat->write(1);
} else if ((uint64_t)s > uint_max) { } else if ((uint64_t)s > uint_max) {
p_rd = uint_max; p_rd = uint_max;
P.VU.vxsat->write(1);
} }
}) })

2
riscv/insns/pusati_dw.h

@ -5,7 +5,9 @@ P_RD_RS1_DW_ULOOP(32, 32, {
p_rd = p_rs1; p_rd = p_rs1;
if (s < 0) { if (s < 0) {
p_rd = 0; p_rd = 0;
P.VU.vxsat->write(1);
} else if ((uint64_t)s > uint_max) { } else if ((uint64_t)s > uint_max) {
p_rd = uint_max; p_rd = uint_max;
P.VU.vxsat->write(1);
} }
}) })

4
riscv/insns/ssha.h

@ -3,8 +3,10 @@ require_rv32;
sreg_t sshamt = P_FIELD(RS2, 0, 8); sreg_t sshamt = P_FIELD(RS2, 0, 8);
if (RS1 == 0) if (RS1 == 0)
WRITE_RD(0); WRITE_RD(0);
else if (sshamt >= 32) else if (sshamt >= 32) {
WRITE_RD((RS1 & 0x80000000) ? 0x80000000 : 0x7fffffff); WRITE_RD((RS1 & 0x80000000) ? 0x80000000 : 0x7fffffff);
P.VU.vxsat->write(1);
}
else if (sshamt <= -32) else if (sshamt <= -32)
WRITE_RD((RS1 & 0x80000000) ? 0xffffffff : 0); WRITE_RD((RS1 & 0x80000000) ? 0xffffffff : 0);
else else

4
riscv/insns/sshar.h

@ -3,8 +3,10 @@ require_rv32;
sreg_t sshamt = P_FIELD(RS2, 0, 8); sreg_t sshamt = P_FIELD(RS2, 0, 8);
if (RS1 == 0) if (RS1 == 0)
WRITE_RD(0); WRITE_RD(0);
else if (sshamt >= 32) else if (sshamt >= 32) {
WRITE_RD((RS1 & 0x80000000) ? 0x80000000 : 0x7fffffff); WRITE_RD((RS1 & 0x80000000) ? 0x80000000 : 0x7fffffff);
P.VU.vxsat->write(1);
}
else if (sshamt <= -32) else if (sshamt <= -32)
WRITE_RD(0); WRITE_RD(0);
else else

12
riscv/p_ext_macros.h

@ -138,18 +138,6 @@
sreg_t p_res = P_UFIELD(rd_tmp, i, BIT); \ sreg_t p_res = P_UFIELD(rd_tmp, i, BIT); \
for (sreg_t j = i * len_inner; j < (i + 1) * len_inner; ++j) { for (sreg_t j = i * len_inner; j < (i + 1) * len_inner; ++j) {
#define P_REDUCTION_ULOOP_BASE(BIT, BIT_INNER, USE_RD) \
require_extension('P'); \
require(BIT == e16 || BIT == e32 || BIT == e64); \
reg_t rd_tmp = USE_RD ? zext_xlen(RD) : 0; \
reg_t rs1 = zext_xlen(RS1); \
reg_t rs2 = zext_xlen(RS2); \
sreg_t len = 64 / BIT; \
sreg_t len_inner = BIT / BIT_INNER; \
for (sreg_t i = len - 1; i >= 0; --i) { \
sreg_t p_res = P_UFIELD(rd_tmp, i, BIT); \
for (sreg_t j = i * len_inner; j < (i + 1) * len_inner; ++j) {
#define P_WIDEN_REDUCTION_LOOP_BASE(BIT, BIT_INNER, USE_RD) \ #define P_WIDEN_REDUCTION_LOOP_BASE(BIT, BIT_INNER, USE_RD) \
require_extension('P'); \ require_extension('P'); \
require(BIT == e16 || BIT == e32 || BIT == e64); \ require(BIT == e16 || BIT == e32 || BIT == e64); \

Loading…
Cancel
Save