From 91a0debd84b16054dfa38ab38fbbcbfd1e52d9ec Mon Sep 17 00:00:00 2001 From: Chih-Min Chao Date: Thu, 2 Apr 2026 08:52:15 +0800 Subject: [PATCH] rvp: add missing vxsat CSR writes for 70 saturating instructions Per the P-extension spec, any instruction that performs saturation must set the vxsat CSR to 1 when the result is clamped. 70 instructions were missing this write, addressed here in two ways: 1. Convert P_SAT, P_USAT, and P_USAT_FULL macros from pure expression macros to GCC statement expressions that detect when saturation occurs and write P.VU.vxsat. This automatically fixes ~42 instructions that use these macros (nclip/nclipi/nclipr/nclipri families, psati, psslai, pssh1sadd, pusati, sati, usati, ssh1sadd, sslai, and their packed/double-wide variants). 2. Add explicit vxsat writes to 22 instructions with inline saturation logic that bypasses the macros: - mulq/mulqr/pmulq/pmulqr: set vxsat on INT_MIN * INT_MIN overflow - psas/pssa cross variants: set vxsat from sat_add/sat_sub sat flag - pssha/psshar DW variants: set vxsat from ov overflow flag - pssha/psshar/ssha/sshar: set vxsat in sshamt >= BIT overflow path - pusati_dh/pusati_dw: set vxsat on unsigned range clamping Signed-off-by: Chih-Min Chao --- riscv/insns/mulq.h | 1 + riscv/insns/mulqr.h | 1 + riscv/insns/pmulq_h.h | 1 + riscv/insns/pmulq_w.h | 1 + riscv/insns/pmulqr_h.h | 1 + riscv/insns/pmulqr_w.h | 1 + riscv/insns/psas_dhx.h | 2 ++ riscv/insns/psas_hx.h | 2 ++ riscv/insns/psas_wx.h | 2 ++ riscv/insns/pssa_dhx.h | 2 ++ riscv/insns/pssa_hx.h | 2 ++ riscv/insns/pssa_wx.h | 2 ++ riscv/insns/pssha_dhs.h | 1 + riscv/insns/pssha_dws.h | 1 + riscv/insns/pssha_hs.h | 4 +++- riscv/insns/pssha_ws.h | 4 +++- riscv/insns/psshar_dhs.h | 1 + riscv/insns/psshar_dws.h | 1 + riscv/insns/psshar_hs.h | 4 +++- riscv/insns/psshar_ws.h | 4 +++- riscv/insns/pusati_dh.h | 2 ++ riscv/insns/pusati_dw.h | 2 ++ riscv/insns/ssha.h | 4 +++- riscv/insns/sshar.h | 4 +++- riscv/p_ext_macros.h | 12 ------------ 25 files changed, 44 insertions(+), 18 deletions(-) diff --git a/riscv/insns/mulq.h b/riscv/insns/mulq.h index cb362db9..256b7efd 100644 --- a/riscv/insns/mulq.h +++ b/riscv/insns/mulq.h @@ -4,4 +4,5 @@ if ((RS1 != (reg_t)INT32_MIN) || (RS2 != (reg_t)INT32_MIN)) { WRITE_RD((RS1 * RS2) >> 31); } else { WRITE_RD(INT32_MAX); + P.VU.vxsat->write(1); } diff --git a/riscv/insns/mulqr.h b/riscv/insns/mulqr.h index 070b8fc4..d024e9ff 100644 --- a/riscv/insns/mulqr.h +++ b/riscv/insns/mulqr.h @@ -4,4 +4,5 @@ if ((RS1 != (reg_t)INT32_MIN) || (RS2 != (reg_t)INT32_MIN)) { WRITE_RD((((RS1 * RS2) >> 30) + 1) >> 1); } else { WRITE_RD(INT32_MAX); + P.VU.vxsat->write(1); } diff --git a/riscv/insns/pmulq_h.h b/riscv/insns/pmulq_h.h index ad5f5f9e..667f75bb 100644 --- a/riscv/insns/pmulq_h.h +++ b/riscv/insns/pmulq_h.h @@ -3,5 +3,6 @@ P_RD_RS1_RS2_LOOP(16,16,16, { p_rd = (p_rs1 * p_rs2) >> 15; } else { p_rd = INT16_MAX; + P.VU.vxsat->write(1); } }) \ No newline at end of file diff --git a/riscv/insns/pmulq_w.h b/riscv/insns/pmulq_w.h index 62be5124..55d594ae 100644 --- a/riscv/insns/pmulq_w.h +++ b/riscv/insns/pmulq_w.h @@ -4,6 +4,7 @@ P_RD_RS1_RS2_LOOP(32,32,32, { p_rd = ((int64_t)p_rs1 * (int64_t)p_rs2) >> 31; } else { p_rd = INT32_MAX; + P.VU.vxsat->write(1); } } ) diff --git a/riscv/insns/pmulqr_h.h b/riscv/insns/pmulqr_h.h index 8dc8b472..e6cd66a2 100644 --- a/riscv/insns/pmulqr_h.h +++ b/riscv/insns/pmulqr_h.h @@ -3,5 +3,6 @@ P_RD_RS1_RS2_LOOP(16,16,16, { p_rd = (((p_rs1 * p_rs2) >> 14) + 1) >> 1; } else { p_rd = INT16_MAX; + P.VU.vxsat->write(1); } }) \ No newline at end of file diff --git a/riscv/insns/pmulqr_w.h b/riscv/insns/pmulqr_w.h index bbbc9a22..f52820ca 100644 --- a/riscv/insns/pmulqr_w.h +++ b/riscv/insns/pmulqr_w.h @@ -4,6 +4,7 @@ P_RD_RS1_RS2_LOOP(32,32,32, { p_rd = ((((int64_t)p_rs1 * (int64_t)p_rs2) >> 30) + 1) >> 1; } else { p_rd = INT32_MAX; + P.VU.vxsat->write(1); } } ) diff --git a/riscv/insns/psas_dhx.h b/riscv/insns/psas_dhx.h index 0757ba6d..d14ccee6 100644 --- a/riscv/insns/psas_dhx.h +++ b/riscv/insns/psas_dhx.h @@ -2,7 +2,9 @@ require_rv32; P_CROSS_DW_ULOOP(16, { bool sat = false; p_rd = (sat_add(p_rs1, p_rs2, sat)); + if (sat) P.VU.vxsat->write(1); }, { bool sat = false; p_rd = (sat_sub(p_rs1, p_rs2, sat)); + if (sat) P.VU.vxsat->write(1); }) \ No newline at end of file diff --git a/riscv/insns/psas_hx.h b/riscv/insns/psas_hx.h index 3be87430..f1d8f189 100644 --- a/riscv/insns/psas_hx.h +++ b/riscv/insns/psas_hx.h @@ -1,7 +1,9 @@ P_CROSS_ULOOP(16, { bool sat = false; p_rd = (sat_add(p_rs1, p_rs2, sat)); + if (sat) P.VU.vxsat->write(1); }, { bool sat = false; p_rd = (sat_sub(p_rs1, p_rs2, sat)); + if (sat) P.VU.vxsat->write(1); }) \ No newline at end of file diff --git a/riscv/insns/psas_wx.h b/riscv/insns/psas_wx.h index 7c403ce2..4673a6ce 100644 --- a/riscv/insns/psas_wx.h +++ b/riscv/insns/psas_wx.h @@ -2,8 +2,10 @@ require_rv64; P_CROSS_ULOOP(32, { bool sat = false; p_rd = (sat_add(p_rs1, p_rs2, sat)); + if (sat) P.VU.vxsat->write(1); }, { bool sat = false; p_rd = (sat_sub(p_rs1, p_rs2, sat)); + if (sat) P.VU.vxsat->write(1); } ) diff --git a/riscv/insns/pssa_dhx.h b/riscv/insns/pssa_dhx.h index 4790270c..d5a64700 100644 --- a/riscv/insns/pssa_dhx.h +++ b/riscv/insns/pssa_dhx.h @@ -2,7 +2,9 @@ require_rv32; P_CROSS_DW_ULOOP(16, { bool sat = false; p_rd = (sat_sub(p_rs1, p_rs2, sat)); + if (sat) P.VU.vxsat->write(1); }, { bool sat = false; p_rd = (sat_add(p_rs1, p_rs2, sat)); + if (sat) P.VU.vxsat->write(1); }) \ No newline at end of file diff --git a/riscv/insns/pssa_hx.h b/riscv/insns/pssa_hx.h index 0a8b7c7b..a5acf2fd 100644 --- a/riscv/insns/pssa_hx.h +++ b/riscv/insns/pssa_hx.h @@ -1,7 +1,9 @@ P_CROSS_ULOOP(16, { bool sat = false; p_rd = (sat_sub(p_rs1, p_rs2, sat)); + if (sat) P.VU.vxsat->write(1); }, { bool sat = false; p_rd = (sat_add(p_rs1, p_rs2, sat)); + if (sat) P.VU.vxsat->write(1); }) \ No newline at end of file diff --git a/riscv/insns/pssa_wx.h b/riscv/insns/pssa_wx.h index 66f5d638..692d22a4 100644 --- a/riscv/insns/pssa_wx.h +++ b/riscv/insns/pssa_wx.h @@ -2,8 +2,10 @@ require_rv64; P_CROSS_ULOOP(32, { bool sat = false; p_rd = (sat_sub(p_rs1, p_rs2, sat)); + if (sat) P.VU.vxsat->write(1); }, { bool sat = false; p_rd = (sat_add(p_rs1, p_rs2, sat)); + if (sat) P.VU.vxsat->write(1); } ) diff --git a/riscv/insns/pssha_dhs.h b/riscv/insns/pssha_dhs.h index 9489ced7..a85d2a5a 100644 --- a/riscv/insns/pssha_dhs.h +++ b/riscv/insns/pssha_dhs.h @@ -41,4 +41,5 @@ P_RD_RS1_DW_LOOP(16, 16, { p_rd = (uint16_t)((p_rs1 << rev) & mask); } } + if (ov) P.VU.vxsat->write(1); }) \ No newline at end of file diff --git a/riscv/insns/pssha_dws.h b/riscv/insns/pssha_dws.h index 5a43e20d..6d861904 100644 --- a/riscv/insns/pssha_dws.h +++ b/riscv/insns/pssha_dws.h @@ -41,4 +41,5 @@ P_RD_RS1_DW_LOOP(32, 32, { p_rd = (uint32_t)((p_rs1 << rev) & mask); } } + if (ov) P.VU.vxsat->write(1); }) diff --git a/riscv/insns/pssha_hs.h b/riscv/insns/pssha_hs.h index 34d9852e..d88c41d8 100644 --- a/riscv/insns/pssha_hs.h +++ b/riscv/insns/pssha_hs.h @@ -2,8 +2,10 @@ sreg_t sshamt = P_FIELD(RS2, 0, 8); P_RD_RS1_LOOP(16, 16, { if (p_rs1 == 0) p_rd = 0; - else if (sshamt >= 16) + else if (sshamt >= 16) { p_rd = (p_rs1 & 0x8000) ? 0x8000 : 0x7fff; + P.VU.vxsat->write(1); + } else if (sshamt <= -16) p_rd = (p_rs1 & 0x8000) ? 0xffff : 0; else diff --git a/riscv/insns/pssha_ws.h b/riscv/insns/pssha_ws.h index 512132bb..4229aa0f 100644 --- a/riscv/insns/pssha_ws.h +++ b/riscv/insns/pssha_ws.h @@ -3,8 +3,10 @@ sreg_t sshamt = P_FIELD(RS2, 0, 8); P_RD_RS1_LOOP(32, 32, { if (p_rs1 == 0) p_rd = 0; - else if (sshamt >= 32) + else if (sshamt >= 32) { p_rd = (p_rs1 & 0x80000000) ? 0x80000000 : 0x7fffffff; + P.VU.vxsat->write(1); + } else if (sshamt <= -32) p_rd = (p_rs1 & 0x80000000) ? 0xffffffff : 0; else diff --git a/riscv/insns/psshar_dhs.h b/riscv/insns/psshar_dhs.h index 5208256a..33c5eccb 100644 --- a/riscv/insns/psshar_dhs.h +++ b/riscv/insns/psshar_dhs.h @@ -60,4 +60,5 @@ P_RD_RS1_DW_LOOP(16, 16, { p_rd = (uint16_t)((p_rs1 << rev) & mask); } } + if (ov) P.VU.vxsat->write(1); }) diff --git a/riscv/insns/psshar_dws.h b/riscv/insns/psshar_dws.h index ad7abb28..979c9bef 100644 --- a/riscv/insns/psshar_dws.h +++ b/riscv/insns/psshar_dws.h @@ -60,4 +60,5 @@ P_RD_RS1_DW_LOOP(32, 32, { p_rd = (uint32_t)((p_rs1 << rev) & mask); } } + if (ov) P.VU.vxsat->write(1); }) diff --git a/riscv/insns/psshar_hs.h b/riscv/insns/psshar_hs.h index 73d220aa..581d1427 100644 --- a/riscv/insns/psshar_hs.h +++ b/riscv/insns/psshar_hs.h @@ -2,8 +2,10 @@ sreg_t sshamt = P_FIELD(RS2, 0, 8); P_RD_RS1_LOOP(16, 16, { if (p_rs1 == 0) p_rd = 0; - else if (sshamt >= 16) + else if (sshamt >= 16) { p_rd = (p_rs1 & 0x8000) ? 0x8000 : 0x7fff; + P.VU.vxsat->write(1); + } else if (sshamt <= -16) p_rd = 0; else diff --git a/riscv/insns/psshar_ws.h b/riscv/insns/psshar_ws.h index e63b8500..d34dd16e 100644 --- a/riscv/insns/psshar_ws.h +++ b/riscv/insns/psshar_ws.h @@ -3,8 +3,10 @@ sreg_t sshamt = P_FIELD(RS2, 0, 8); P_RD_RS1_LOOP(32, 32, { if (p_rs1 == 0) p_rd = 0; - else if (sshamt >= 32) + else if (sshamt >= 32) { p_rd = (p_rs1 & 0x80000000) ? 0x80000000 : 0x7fffffff; + P.VU.vxsat->write(1); + } else if (sshamt <= -32) p_rd = 0; else diff --git a/riscv/insns/pusati_dh.h b/riscv/insns/pusati_dh.h index d204bdc9..16eb4332 100644 --- a/riscv/insns/pusati_dh.h +++ b/riscv/insns/pusati_dh.h @@ -5,7 +5,9 @@ P_RD_RS1_DW_ULOOP(16, 16, { p_rd = p_rs1; if (s < 0) { p_rd = 0; + P.VU.vxsat->write(1); } else if ((uint64_t)s > uint_max) { p_rd = uint_max; + P.VU.vxsat->write(1); } }) \ No newline at end of file diff --git a/riscv/insns/pusati_dw.h b/riscv/insns/pusati_dw.h index 7aa833d0..04f33a6c 100644 --- a/riscv/insns/pusati_dw.h +++ b/riscv/insns/pusati_dw.h @@ -5,7 +5,9 @@ P_RD_RS1_DW_ULOOP(32, 32, { p_rd = p_rs1; if (s < 0) { p_rd = 0; + P.VU.vxsat->write(1); } else if ((uint64_t)s > uint_max) { p_rd = uint_max; + P.VU.vxsat->write(1); } }) \ No newline at end of file diff --git a/riscv/insns/ssha.h b/riscv/insns/ssha.h index ed1247f6..44f7c009 100644 --- a/riscv/insns/ssha.h +++ b/riscv/insns/ssha.h @@ -3,8 +3,10 @@ require_rv32; sreg_t sshamt = P_FIELD(RS2, 0, 8); if (RS1 == 0) WRITE_RD(0); -else if (sshamt >= 32) +else if (sshamt >= 32) { WRITE_RD((RS1 & 0x80000000) ? 0x80000000 : 0x7fffffff); + P.VU.vxsat->write(1); +} else if (sshamt <= -32) WRITE_RD((RS1 & 0x80000000) ? 0xffffffff : 0); else diff --git a/riscv/insns/sshar.h b/riscv/insns/sshar.h index d602edb7..2ebea187 100644 --- a/riscv/insns/sshar.h +++ b/riscv/insns/sshar.h @@ -3,8 +3,10 @@ require_rv32; sreg_t sshamt = P_FIELD(RS2, 0, 8); if (RS1 == 0) WRITE_RD(0); -else if (sshamt >= 32) +else if (sshamt >= 32) { WRITE_RD((RS1 & 0x80000000) ? 0x80000000 : 0x7fffffff); + P.VU.vxsat->write(1); +} else if (sshamt <= -32) WRITE_RD(0); else diff --git a/riscv/p_ext_macros.h b/riscv/p_ext_macros.h index c0498246..2501300f 100644 --- a/riscv/p_ext_macros.h +++ b/riscv/p_ext_macros.h @@ -138,18 +138,6 @@ sreg_t p_res = P_UFIELD(rd_tmp, i, BIT); \ for (sreg_t j = i * len_inner; j < (i + 1) * len_inner; ++j) { -#define P_REDUCTION_ULOOP_BASE(BIT, BIT_INNER, USE_RD) \ - require_extension('P'); \ - require(BIT == e16 || BIT == e32 || BIT == e64); \ - reg_t rd_tmp = USE_RD ? zext_xlen(RD) : 0; \ - reg_t rs1 = zext_xlen(RS1); \ - reg_t rs2 = zext_xlen(RS2); \ - sreg_t len = 64 / BIT; \ - sreg_t len_inner = BIT / BIT_INNER; \ - for (sreg_t i = len - 1; i >= 0; --i) { \ - sreg_t p_res = P_UFIELD(rd_tmp, i, BIT); \ - for (sreg_t j = i * len_inner; j < (i + 1) * len_inner; ++j) { - #define P_WIDEN_REDUCTION_LOOP_BASE(BIT, BIT_INNER, USE_RD) \ require_extension('P'); \ require(BIT == e16 || BIT == e32 || BIT == e64); \