Browse Source

Merge pull request #2207 from riscv-software-src/fix-2206

Only set mstatus.VS for legal vector instructions
pull/2210/merge
Andrew Waterman 2 months ago
committed by GitHub
parent
commit
591cff1610
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 4
      riscv/csrs.cc
  2. 3
      riscv/decode_macros.h
  3. 3
      riscv/insns/vcompress_vm.h
  4. 2
      riscv/insns/vcpop_m.h
  5. 2
      riscv/insns/vfbdot_vv.h
  6. 2
      riscv/insns/vfirst_m.h
  7. 2
      riscv/insns/vfmv_f_s.h
  8. 2
      riscv/insns/vfmv_s_f.h
  9. 2
      riscv/insns/vfqbdot_alt_vv.h
  10. 2
      riscv/insns/vfqbdot_vv.h
  11. 2
      riscv/insns/vfqldot_alt_vv.h
  12. 2
      riscv/insns/vfqldot_vv.h
  13. 2
      riscv/insns/vfwbdot_vv.h
  14. 2
      riscv/insns/vfwldot_vv.h
  15. 2
      riscv/insns/vid_v.h
  16. 1
      riscv/insns/viota_m.h
  17. 2
      riscv/insns/vmsbf_m.h
  18. 2
      riscv/insns/vmsif_m.h
  19. 2
      riscv/insns/vmsof_m.h
  20. 2
      riscv/insns/vmv_s_x.h
  21. 2
      riscv/insns/vmv_x_s.h
  22. 2
      riscv/insns/vmvnfr_v.h
  23. 2
      riscv/insns/vqbdots_vv.h
  24. 2
      riscv/insns/vqbdotu_vv.h
  25. 2
      riscv/insns/vqldots_vv.h
  26. 2
      riscv/insns/vqldotu_vv.h
  27. 1
      riscv/insns/vsetivli.h
  28. 1
      riscv/insns/vsetvl.h
  29. 1
      riscv/insns/vsetvli.h
  30. 84
      riscv/v_ext_macros.h
  31. 1
      riscv/vector_unit.cc
  32. 16
      riscv/zvk_ext_macros.h

4
riscv/csrs.cc

@ -1598,7 +1598,7 @@ void vector_csr_t::write_raw(const reg_t val) noexcept {
bool vector_csr_t::unlogged_write(const reg_t val) noexcept {
if (mask == 0) return false;
dirty_vs_state;
STATE.sstatus->dirty(SSTATUS_VS);
return basic_csr_t::unlogged_write(val & mask);
}
@ -1612,7 +1612,7 @@ void vxsat_csr_t::verify_permissions(insn_t insn, bool write) const {
}
bool vxsat_csr_t::unlogged_write(const reg_t val) noexcept {
dirty_vs_state;
STATE.sstatus->dirty(SSTATUS_VS);
return masked_csr_t::unlogged_write(val);
}

3
riscv/decode_macros.h

@ -110,7 +110,6 @@
#define FRS3_D READ_FREG_D(insn.rs3())
#define dirty_fp_state STATE.sstatus->dirty(SSTATUS_FS)
#define dirty_ext_state STATE.sstatus->dirty(SSTATUS_XS)
#define dirty_vs_state STATE.sstatus->dirty(SSTATUS_VS)
#define DO_WRITE_FREG(reg, value) (STATE.FPR.write(reg, value), dirty_fp_state)
#define WRITE_FRD(value) WRITE_FREG(insn.rd(), value)
#define WRITE_FRD_H(value) \
@ -173,14 +172,12 @@ static inline bool is_aligned(const unsigned val, const unsigned pos)
if (alu && !P.VU.vstart_alu) \
require(P.VU.vstart->read() == 0); \
WRITE_VSTATUS; \
dirty_vs_state; \
} while (0);
#define require_vector_novtype(is_log) \
do { \
require_vector_vs; \
if (is_log) \
WRITE_VSTATUS; \
dirty_vs_state; \
} while (0);
#define require_align(val, pos) require(is_aligned(val, pos))
#define require_noover(astart, asize, bstart, bsize) \

3
riscv/insns/vcompress_vm.h

@ -27,4 +27,5 @@ VI_GENERAL_LOOP_BASE
++pos;
}
VI_LOOP_END_BASE;
VI_LOOP_END;

2
riscv/insns/vcpop_m.h

@ -10,3 +10,5 @@ for (reg_t i=P.VU.vstart->read(); i<vl; ++i) {
popcount += vs2_bit && (insn.v_vm() || P.VU.mask_elt(0, i));
}
WRITE_RD(popcount);
VECTOR_END;

2
riscv/insns/vfbdot_vv.h

@ -14,3 +14,5 @@ switch (P.VU.vsew) {
}
default: require(false);
}
VECTOR_END;

2
riscv/insns/vfirst_m.h

@ -14,3 +14,5 @@ for (reg_t i=P.VU.vstart->read(); i < vl; ++i) {
}
}
WRITE_RD(pos);
VECTOR_END;

2
riscv/insns/vfmv_f_s.h

@ -29,4 +29,4 @@ if (FLEN == 64) {
WRITE_FRD(f32(vs2_0));
}
P.VU.vstart->write(0);
VECTOR_END;

2
riscv/insns/vfmv_s_f.h

@ -21,4 +21,4 @@ if (vl > 0 && P.VU.vstart->read() < vl) {
break;
}
}
P.VU.vstart->write(0);
VECTOR_END;

2
riscv/insns/vfqbdot_alt_vv.h

@ -15,3 +15,5 @@ switch (P.VU.vsew) {
}
default: require(false);
}
VECTOR_END;

2
riscv/insns/vfqbdot_vv.h

@ -15,3 +15,5 @@ switch (P.VU.vsew) {
}
default: require(false);
}
VECTOR_END;

2
riscv/insns/vfqldot_alt_vv.h

@ -15,3 +15,5 @@ switch (P.VU.vsew) {
}
default: require(false);
}
VECTOR_END;

2
riscv/insns/vfqldot_vv.h

@ -15,3 +15,5 @@ switch (P.VU.vsew) {
}
default: require(false);
}
VECTOR_END;

2
riscv/insns/vfwbdot_vv.h

@ -13,3 +13,5 @@ switch (P.VU.vsew) {
}
default: require(false);
}
VECTOR_END;

2
riscv/insns/vfwldot_vv.h

@ -13,3 +13,5 @@ switch (P.VU.vsew) {
}
default: require(false);
}
VECTOR_END;

2
riscv/insns/vid_v.h

@ -25,4 +25,4 @@ for (reg_t i = P.VU.vstart->read() ; i < P.VU.vl->read(); ++i) {
}
}
P.VU.vstart->write(0);
VECTOR_END;

1
riscv/insns/viota_m.h

@ -45,3 +45,4 @@ for (reg_t i = 0; i < vl; ++i) {
}
}
VECTOR_END;

2
riscv/insns/vmsbf_m.h

@ -25,3 +25,5 @@ for (reg_t i = P.VU.vstart->read(); i < vl; ++i) {
P.VU.set_mask_elt(rd_num, i, res);
}
}
VECTOR_END;

2
riscv/insns/vmsif_m.h

@ -26,3 +26,5 @@ for (reg_t i = P.VU.vstart->read(); i < vl; ++i) {
P.VU.set_mask_elt(rd_num, i, res);
}
}
VECTOR_END;

2
riscv/insns/vmsof_m.h

@ -24,3 +24,5 @@ for (reg_t i = P.VU.vstart->read() ; i < vl; ++i) {
P.VU.set_mask_elt(rd_num, i, res);
}
}
VECTOR_END;

2
riscv/insns/vmv_s_x.h

@ -26,4 +26,4 @@ if (vl > 0 && P.VU.vstart->read() < vl) {
vl = 0;
}
P.VU.vstart->write(0);
VECTOR_END;

2
riscv/insns/vmv_x_s.h

@ -24,4 +24,4 @@ default:
WRITE_RD(sext_xlen(res));
P.VU.vstart->write(0);
VECTOR_END;

2
riscv/insns/vmvnfr_v.h

@ -24,4 +24,4 @@ if (vd != vs2 && start < size) {
}
}
P.VU.vstart->write(0);
VECTOR_END;

2
riscv/insns/vqbdots_vv.h

@ -21,3 +21,5 @@ switch (P.VU.vsew) {
}
default: require(false);
}
VECTOR_END;

2
riscv/insns/vqbdotu_vv.h

@ -21,3 +21,5 @@ switch (P.VU.vsew) {
}
default: require(false);
}
VECTOR_END;

2
riscv/insns/vqldots_vv.h

@ -21,3 +21,5 @@ switch (P.VU.vsew) {
}
default: require(false);
}
VECTOR_END;

2
riscv/insns/vqldotu_vv.h

@ -21,3 +21,5 @@ switch (P.VU.vsew) {
}
default: require(false);
}
VECTOR_END;

1
riscv/insns/vsetivli.h

@ -1,2 +1,3 @@
require_vector_novtype(false);
WRITE_RD(P.VU.set_vl(insn.rd(), -1, insn.rs1(), insn.v_zimm10()));
VECTOR_END;

1
riscv/insns/vsetvl.h

@ -1,2 +1,3 @@
require_vector_novtype(false);
WRITE_RD(P.VU.set_vl(insn.rd(), insn.rs1(), RS1, RS2));
VECTOR_END;

1
riscv/insns/vsetvli.h

@ -1,2 +1,3 @@
require_vector_novtype(false);
WRITE_RD(P.VU.set_vl(insn.rd(), insn.rs1(), RS1, insn.v_zimm11()));
VECTOR_END;

84
riscv/v_ext_macros.h

@ -233,16 +233,19 @@ static inline bool is_overlapped_widen(const int astart, int asize,
#define VI_LOOP_END_BASE \
}
#define VECTOR_END \
P.VU.vstart->write(0)
#define VI_LOOP_END \
VI_LOOP_END_BASE \
P.VU.vstart->write(0);
VECTOR_END;
#define VI_LOOP_REDUCTION_END(x) \
} \
if (vl > 0) { \
vd_0_des = vd_0_res; \
} \
P.VU.vstart->write(0);
VECTOR_END;
#define VI_LOOP_CARRY_BASE \
VI_GENERAL_LOOP_BASE \
@ -253,7 +256,7 @@ static inline bool is_overlapped_widen(const int astart, int asize,
#define VI_LOOP_CARRY_END \
P.VU.set_mask_elt(insn.rd(), i, res); \
} \
P.VU.vstart->write(0);
VECTOR_END;
#define VI_LOOP_WITH_CARRY_BASE \
VI_GENERAL_LOOP_BASE \
const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); \
@ -274,7 +277,7 @@ static inline bool is_overlapped_widen(const int astart, int asize,
#define VI_LOOP_CMP_END \
P.VU.set_mask_elt(insn.rd(), i, res); \
} \
P.VU.vstart->write(0);
VECTOR_END;
#define VI_LOOP_MASK(op) \
require(P.VU.vsew <= e64); \
@ -285,7 +288,7 @@ static inline bool is_overlapped_widen(const int astart, int asize,
bool vs1 = P.VU.mask_elt(insn.rs1(), i); \
P.VU.set_mask_elt(insn.rd(), i, (op)); \
} \
P.VU.vstart->write(0);
VECTOR_END;
#define VI_LOOP_NSHIFT_BASE \
VI_GENERAL_LOOP_BASE; \
@ -1199,7 +1202,7 @@ VI_VX_ULOOP({ \
P.VU.elt<elt_width##_t>(vd + fn * emul, vreg_inx, true) = val; \
} \
} \
P.VU.vstart->write(0);
VECTOR_END;
#define VI_LDST_GET_INDEX(elt_width) \
reg_t index; \
@ -1252,7 +1255,7 @@ VI_VX_ULOOP({ \
} \
} \
} \
P.VU.vstart->write(0);
VECTOR_END;
#define VI_ST(stride, offset, elt_width, is_mask_ldst) \
const reg_t nf = insn.v_nf() + 1; \
@ -1270,7 +1273,7 @@ VI_VX_ULOOP({ \
baseAddr + (stride) + (offset) * sizeof(elt_width##_t), val); \
} \
} \
P.VU.vstart->write(0);
VECTOR_END;
#define VI_ST_INDEX(elt_width, is_seg) \
const reg_t nf = insn.v_nf() + 1; \
@ -1306,7 +1309,7 @@ VI_VX_ULOOP({ \
} \
} \
} \
P.VU.vstart->write(0);
VECTOR_END;
#define VI_LDST_FF(elt_width) \
const reg_t nf = insn.v_nf() + 1; \
@ -1325,8 +1328,10 @@ VI_VX_ULOOP({ \
val = MMU.load<elt_width##_t>( \
baseAddr + (i * nf + fn) * sizeof(elt_width##_t)); \
} catch (trap_t& t) { \
if (i == 0) \
if (i == 0) { \
P.VU.vstart->write(0); /* dirty VS */ \
throw; /* Only take exception on zeroth element */ \
} \
/* Reduce VL if an exception occurs on a later element */ \
early_stop = true; \
P.VU.vl->write_raw(i); \
@ -1339,7 +1344,7 @@ VI_VX_ULOOP({ \
break; \
} \
} \
p->VU.vstart->write(0);
VECTOR_END;
#define VI_LD_WHOLE(elt_width) \
require_vector_novtype(true); \
@ -1350,28 +1355,12 @@ VI_VX_ULOOP({ \
require_align(vd, len); \
const reg_t elt_per_reg = P.VU.vlenb / sizeof(elt_width ## _t); \
const reg_t size = len * elt_per_reg; \
if (P.VU.vstart->read() < size) { \
reg_t i = P.VU.vstart->read() / elt_per_reg; \
reg_t off = P.VU.vstart->read() % elt_per_reg; \
if (off) { \
for (reg_t pos = off; pos < elt_per_reg; ++pos) { \
auto val = MMU.load<elt_width##_t>(baseAddr + \
P.VU.vstart->read() * sizeof(elt_width ## _t)); \
P.VU.elt<elt_width ## _t>(vd + i, pos, true) = val; \
P.VU.vstart->write(P.VU.vstart->read() + 1); \
} \
++i; \
} \
for (; i < len; ++i) { \
for (reg_t pos = 0; pos < elt_per_reg; ++pos) { \
auto val = MMU.load<elt_width##_t>(baseAddr + \
P.VU.vstart->read() * sizeof(elt_width ## _t)); \
P.VU.elt<elt_width ## _t>(vd + i, pos, true) = val; \
P.VU.vstart->write(P.VU.vstart->read() + 1); \
} \
} \
for (reg_t i = P.VU.vstart->read(); i < size; i++) { \
P.VU.vstart->write(i); \
auto val = MMU.load<elt_width##_t>(baseAddr + i * sizeof(elt_width ## _t)); \
P.VU.elt<elt_width ## _t>(vd, i, true) = val; \
} \
P.VU.vstart->write(0);
VECTOR_END;
#define VI_ST_WHOLE \
require_vector_novtype(true); \
@ -1380,27 +1369,12 @@ VI_VX_ULOOP({ \
const reg_t len = insn.v_nf() + 1; \
require_align(vs3, len); \
const reg_t size = len * P.VU.vlenb; \
\
if (P.VU.vstart->read() < size) { \
reg_t i = P.VU.vstart->read() / P.VU.vlenb; \
reg_t off = P.VU.vstart->read() % P.VU.vlenb; \
if (off) { \
for (reg_t pos = off; pos < P.VU.vlenb; ++pos) { \
auto val = P.VU.elt<uint8_t>(vs3 + i, pos); \
MMU.store<uint8_t>(baseAddr + P.VU.vstart->read(), val); \
P.VU.vstart->write(P.VU.vstart->read() + 1); \
} \
i++; \
} \
for (; i < len; ++i) { \
for (reg_t pos = 0; pos < P.VU.vlenb; ++pos) { \
auto val = P.VU.elt<uint8_t>(vs3 + i, pos); \
MMU.store<uint8_t>(baseAddr + P.VU.vstart->read(), val); \
P.VU.vstart->write(P.VU.vstart->read() + 1); \
} \
} \
for (reg_t i = P.VU.vstart->read(); i < size; i++) { \
P.VU.vstart->write(i); \
auto val = P.VU.elt<uint8_t>(vs3, i); \
MMU.store<uint8_t>(baseAddr + i, val); \
} \
P.VU.vstart->write(0);
VECTOR_END;
#define VI_EXT_CHECK(div) \
require(insn.rd() != insn.rs2()); \
@ -1509,11 +1483,11 @@ VI_VX_ULOOP({ \
#define VI_VFP_LOOP_END \
} \
P.VU.vstart->write(0); \
VECTOR_END; \
#define VI_VFP_LOOP_REDUCTION_END(x) \
} \
P.VU.vstart->write(0); \
VECTOR_END; \
if (vl > 0) { \
if (is_propagate && !is_active) { \
switch (x) { \
@ -1575,7 +1549,7 @@ VI_VX_ULOOP({ \
break; \
}; \
} \
P.VU.vstart->write(0);
VECTOR_END;
#define VI_VFP_VV_LOOP(BODY16, BODY32, BODY64) \
VI_CHECK_SSS(true); \

1
riscv/vector_unit.cc

@ -89,7 +89,6 @@ reg_t vectorUnit_t::vectorUnit_t::set_vl(int rd, int rs1, reg_t reqVL, reg_t new
vl->write_raw(std::min(reqVL, vlmax));
}
vstart->write_raw(0);
return vl->read();
}

16
riscv/zvk_ext_macros.h

@ -322,7 +322,7 @@
VV_VD_VS1_VS2_EGU32x4_PARAMS(vd_num, vs1_num, vs2_num, idx_eg); \
EG_BODY \
} \
P.VU.vstart->write(0); \
VECTOR_END; \
} while (0)
// Processes all 32b*8 element groups available in the vector register
@ -377,7 +377,7 @@
VV_VD_VS1_VS2_EGU32x8_PARAMS(vd_num, vs1_num, vs2_num, idx_eg); \
EG_BODY \
} \
P.VU.vstart->write(0); \
VECTOR_END; \
} while (0)
// Processes all 32b*4 element groups available in the vector register
@ -445,7 +445,7 @@
EG_BODY \
} \
} \
P.VU.vstart->write(0); \
VECTOR_END; \
} while (0)
// Processes all 32b*4 element groups available in the vector register
@ -513,7 +513,7 @@
EG_BODY \
} \
} \
P.VU.vstart->write(0); \
VECTOR_END; \
} while (0)
// Processes all 32b*4 element groups available in the vector registers
@ -560,7 +560,7 @@
VV_VD_VS2_EGU32x4_PARAMS(vd_num, vs2_num, idx_eg); \
EG_BODY \
} \
P.VU.vstart->write(0); \
VECTOR_END; \
} while (0)
// Processes all 32b*4 element groups available in the vector registers
@ -616,7 +616,7 @@
EG_BODY \
} \
} \
P.VU.vstart->write(0); \
VECTOR_END; \
} while (0)
// Processes all 32b*8 element groups available in the vector registers
@ -672,7 +672,7 @@
EG_BODY \
} \
} \
P.VU.vstart->write(0); \
VECTOR_END; \
} while (0)
// Processes all 64b*4 element groups available in the vector registers
@ -726,7 +726,7 @@
VV_VD_VS1_VS2_EGU64x4_PARAMS(vd_num, vs1_num, vs2_num, idx_eg); \
EG_BODY \
} \
P.VU.vstart->write(0); \
VECTOR_END; \
} while (0)

Loading…
Cancel
Save