rvv: leave only SEW-bit segment store

new features in spec 0.9 Signed-off-by: Chih-Min Chao <chihmin.chao@sifive.com>
6 years ago · f5be48f027
17 changed files with 58 additions and 156 deletions
--- a/riscv/decode.h
+++ b/riscv/decode.h
@ -1486,13 +1486,15 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \
  } \
 }

-#define VI_ST_COMMON(stride, offset, st_width, elt_byte) \
+#define VI_ST_COMMON(stride, offset, st_width, elt_byte, is_seg) \
  const reg_t nf = insn.v_nf() + 1; \
  const reg_t vl = P.VU.vl; \
  const reg_t baseAddr = RS1; \
  const reg_t vs3 = insn.rd(); \
  require((nf * P.VU.vlmul) <= (NVPR / 4) && \
          vs3 + nf * P.VU.vlmul <= NVPR); \
+  if (!is_seg) \
+    require(nf == 1); \
  const reg_t vlmul = P.VU.vlmul; \
  for (reg_t i = 0; i < vl; ++i) { \
    VI_STRIP(i) \
@ -1563,13 +1565,13 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \
  if (nf >= 2) \
    require(!is_overlapped(vd, nf, insn.rs2(), 1));

-#define VI_ST(stride, offset, st_width, elt_byte) \
+#define VI_ST(stride, offset, st_width, elt_byte, is_seg) \
  VI_CHECK_STORE_SXX; \
-  VI_ST_COMMON(stride, offset, st_width, elt_byte) \
+  VI_ST_COMMON(stride, offset, st_width, elt_byte, is_seg) \

-#define VI_ST_INDEX(stride, offset, st_width, elt_byte) \
+#define VI_ST_INDEX(stride, offset, st_width, elt_byte, is_seg) \
  VI_CHECK_ST_INDEX; \
-  VI_ST_COMMON(stride, offset, st_width, elt_byte) \
+  VI_ST_COMMON(stride, offset, st_width, elt_byte, is_seg) \

 #define VI_LDST_FF(itype, tsew, is_seg) \
  require(p->VU.vsew >= e##tsew && p->VU.vsew <= e64); \
--- a/riscv/insns/vsb_v.h
+++ b/riscv/insns/vsb_v.h
@ -1,3 +1,3 @@
-// vsb.v and vsseg[2-8]b.v
+// vsb.v
 require(P.VU.vsew >= e8);
-VI_ST(0, i * nf + fn, uint8, 1);
+VI_ST(0, i * nf + fn, uint8, 1, false);
--- a/riscv/insns/vse_v.h
+++ b/riscv/insns/vse_v.h
@ -1,13 +1,13 @@
-// vsw.v and vsseg[2-8]w.v
+// vse.v and vsseg[2-8]e.v
 reg_t sew = P.VU.vsew;

 if (sew == e8) {
-  VI_ST(0, (i * nf + fn), uint8, 1);
+  VI_ST(0, (i * nf + fn), uint8,  1, true);
 } else if (sew == e16) {
-  VI_ST(0, (i * nf + fn), uint16, 2);
+  VI_ST(0, (i * nf + fn), uint16, 2, true);
 } else if (sew == e32) {
-  VI_ST(0, (i * nf + fn), uint32, 4);
+  VI_ST(0, (i * nf + fn), uint32, 4, true);
 } else if (sew == e64) {
-  VI_ST(0, (i * nf + fn), uint64, 8);
+  VI_ST(0, (i * nf + fn), uint64, 8, true);
 }

--- a/riscv/insns/vsh_v.h
+++ b/riscv/insns/vsh_v.h
@ -1,3 +1,3 @@
-// vsh.v and vsseg[2-8]h.v
+// vsh.v
 require(P.VU.vsew >= e16);
-VI_ST(0, i * nf + fn, uint16, 2);
+VI_ST(0, i * nf + fn, uint16, 2, false);
--- a/riscv/insns/vssb_v.h
+++ b/riscv/insns/vssb_v.h
@ -1,3 +1,3 @@
-// vssb.v and vssseg[2-8]b.v
+// vssb.v
 require(P.VU.vsew >= e8);
-VI_ST(i * RS2, fn, uint8, 1);
+VI_ST(i * RS2, fn, uint8, 1, false);
--- a/riscv/insns/vsse_v.h
+++ b/riscv/insns/vsse_v.h
@ -2,12 +2,12 @@
 reg_t sew = P.VU.vsew;

 if (sew == e8) {
-  VI_ST(i * RS2, fn, uint8, 1);
+  VI_ST(i * RS2, fn, uint8,  1, true);
 } else if (sew == e16) {
-  VI_ST(i * RS2, fn, uint16, 2);
+  VI_ST(i * RS2, fn, uint16, 2, true);
 } else if (sew == e32) {
-  VI_ST(i * RS2, fn, uint32, 4);
+  VI_ST(i * RS2, fn, uint32, 4, true);
 } else if (sew == e64) {
-  VI_ST(i * RS2, fn, uint64, 8);
+  VI_ST(i * RS2, fn, uint64, 8, true);
 }

--- a/riscv/insns/vssh_v.h
+++ b/riscv/insns/vssh_v.h
@ -1,3 +1,3 @@
-// vssh.v and vssseg[2-8]h.v
+// vssh.v
 require(P.VU.vsew >= e16);
-VI_ST(i * RS2, fn, uint16, 2);
+VI_ST(i * RS2, fn, uint16, 2, false);
--- a/riscv/insns/vssw_v.h
+++ b/riscv/insns/vssw_v.h
@ -1,3 +1,3 @@
-// vssw.v and vssseg[2-8]w.v
+// vssw.v
 require(P.VU.vsew >= e32);
-VI_ST(i * RS2, fn, uint32, 4);
+VI_ST(i * RS2, fn, uint32, 4, false);
--- a/riscv/insns/vsuxb_v.h
+++ b/riscv/insns/vsuxb_v.h
@ -1,34 +1,4 @@
-// vsuxb.v and vsxseg[2-8]b.v
+// vsuxb.v
 require(P.VU.vsew >= e8);
-VI_CHECK_STORE_SXX;
-require((insn.rs2() & (P.VU.vlmul - 1)) == 0); \
-reg_t vl = P.VU.vl;
-reg_t baseAddr = RS1;
-reg_t stride = insn.rs2();
-reg_t vs3 = insn.rd();
-reg_t vlmax = P.VU.vlmax;
-VI_DUPLICATE_VREG(stride, vlmax);
-for (reg_t i = 0; i < vlmax && vl != 0; ++i) {
-  VI_ELEMENT_SKIP(i);
-  VI_STRIP(i)
-
-  switch (P.VU.vsew) {
-  case e8:
-    MMU.store_uint8(baseAddr + index[i],
-                    P.VU.elt<uint8_t>(vs3, vreg_inx));
-    break;
-  case e16:
-    MMU.store_uint8(baseAddr + index[i],
-                    P.VU.elt<uint16_t>(vs3, vreg_inx));
-    break;
-  case e32:
-    MMU.store_uint8(baseAddr + index[i],
-                      P.VU.elt<uint32_t>(vs3, vreg_inx));
-    break;
-  case e64:
-    MMU.store_uint8(baseAddr + index[i],
-                    P.VU.elt<uint64_t>(vs3, vreg_inx));
-    break;
-  }
-}
-P.VU.vstart = 0;
+VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
+VI_ST_INDEX(index[i], fn, uint8, 1, false);
--- a/riscv/insns/vsuxe_v.h
+++ b/riscv/insns/vsuxe_v.h
@ -1,35 +1,13 @@
-// vsxe.v and vsxseg[2-8]e.v
-const reg_t sew = P.VU.vsew;
-const reg_t vl = P.VU.vl;
+// vsuxe.v
+reg_t sew = P.VU.vsew;
 require(sew >= e8 && sew <= e64);
-VI_CHECK_STORE_SXX;
-require((insn.rs2() & (P.VU.vlmul - 1)) == 0); \
-reg_t baseAddr = RS1;
-reg_t stride = insn.rs2();
-reg_t vs3 = insn.rd();
-reg_t vlmax = P.VU.vlmax;
-VI_DUPLICATE_VREG(stride, vlmax);
-for (reg_t i = 0; i < vlmax && vl != 0; ++i) {
-  VI_ELEMENT_SKIP(i);
-  VI_STRIP(i)
-
-  switch (sew) {
-  case e8:
-    MMU.store_uint8(baseAddr + index[i],
-                    P.VU.elt<uint8_t>(vs3, vreg_inx));
-    break;
-  case e16:
-    MMU.store_uint16(baseAddr + index[i],
-                     P.VU.elt<uint16_t>(vs3, vreg_inx));
-    break;
-  case e32:
-    MMU.store_uint32(baseAddr + index[i],
-                     P.VU.elt<uint32_t>(vs3, vreg_inx));
-    break;
-  case e64:
-    MMU.store_uint64(baseAddr + index[i],
-                     P.VU.elt<uint64_t>(vs3, vreg_inx));
-    break;
-  }
+VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
+if (sew == e8) {
+  VI_ST_INDEX(index[i], fn, uint8,  1, false);
+} else if (sew == e16) {
+  VI_ST_INDEX(index[i], fn, uint16, 2, false);
+} else if (sew == e32) {
+  VI_ST_INDEX(index[i], fn, uint32, 4, false);
+} else if (sew == e64) {
+  VI_ST_INDEX(index[i], fn, uint64, 8, false);
 }
-P.VU.vstart = 0;
--- a/riscv/insns/vsuxh_v.h
+++ b/riscv/insns/vsuxh_v.h
@ -1,30 +1,4 @@
-// vsxh.v and vsxseg[2-8]h.v
+// vsuxh.v
 require(P.VU.vsew >= e16);
-VI_CHECK_STORE_SXX;
-require((insn.rs2() & (P.VU.vlmul - 1)) == 0); \
-reg_t vl = P.VU.vl;
-reg_t baseAddr = RS1;
-reg_t stride = insn.rs2();
-reg_t vs3 = insn.rd();
-reg_t vlmax = P.VU.vlmax;
-VI_DUPLICATE_VREG(stride, vlmax);
-for (reg_t i = 0; i < vlmax && vl != 0; ++i) {
-  VI_ELEMENT_SKIP(i);
-  VI_STRIP(i)
-
-  switch (P.VU.vsew) {
-  case e16:
-    MMU.store_uint16(baseAddr + index[i],
-                     P.VU.elt<uint16_t>(vs3, vreg_inx));
-    break;
-  case e32:
-    MMU.store_uint16(baseAddr + index[i],
-                     P.VU.elt<uint32_t>(vs3, vreg_inx));
-    break;
-  case e64:
-    MMU.store_uint16(baseAddr + index[i],
-                     P.VU.elt<uint64_t>(vs3, vreg_inx));
-    break;
-  }
-}
-P.VU.vstart = 0;
+VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
+VI_ST_INDEX(index[i], fn, uint16, 2, false);
--- a/riscv/insns/vsuxw_v.h
+++ b/riscv/insns/vsuxw_v.h
@ -1,26 +1,4 @@
-// vsxw.v and vsxseg[2-8]w.v
+// vsuxw.v
 require(P.VU.vsew >= e32);
-VI_CHECK_STORE_SXX;
-require((insn.rs2() & (P.VU.vlmul - 1)) == 0); \
-reg_t vl = P.VU.vl;
-reg_t baseAddr = RS1;
-reg_t stride = insn.rs2();
-reg_t vs3 = insn.rd();
-reg_t vlmax = P.VU.vlmax;
-VI_DUPLICATE_VREG(stride, vlmax);
-for (reg_t i = 0; i < vlmax && vl != 0; ++i) {
-  VI_ELEMENT_SKIP(i);
-  VI_STRIP(i)
-
-  switch (P.VU.vsew) {
-  case e32:
-    MMU.store_uint32(baseAddr + index[i],
-                     P.VU.elt<uint32_t>(vs3, vreg_inx));
-    break;
-  case e64:
-    MMU.store_uint32(baseAddr + index[i],
-                     P.VU.elt<uint64_t>(vs3, vreg_inx));
-    break;
-  }
-}
-P.VU.vstart = 0;
+VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
+VI_ST_INDEX(index[i], fn, uint32, 4, false);
--- a/riscv/insns/vsw_v.h
+++ b/riscv/insns/vsw_v.h
@ -1,3 +1,3 @@
-// vsw.v and vsseg[2-8]w.v
+// vsw.v
 require(P.VU.vsew >= e32);
-VI_ST(0, i * nf + fn, uint32, 4);
+VI_ST(0, i * nf + fn, uint32, 4, false);
--- a/riscv/insns/vsxb_v.h
+++ b/riscv/insns/vsxb_v.h
@ -1,4 +1,4 @@
-// vsxb.v and vsxseg[2-8]b.v
+// vsxb.v
 require(P.VU.vsew >= e8);
 VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
-VI_ST_INDEX(index[i], fn, uint8, 1);
+VI_ST_INDEX(index[i], fn, uint8, 1, false);
--- a/riscv/insns/vsxe_v.h
+++ b/riscv/insns/vsxe_v.h
@ -3,12 +3,12 @@ reg_t sew = P.VU.vsew;
 require(sew >= e8 && sew <= e64);
 VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
 if (sew == e8) {
-  VI_ST_INDEX(index[i], fn, uint8, 1);
+  VI_ST_INDEX(index[i], fn, uint8,  1, true);
 } else if (sew == e16) {
-  VI_ST_INDEX(index[i], fn, uint16, 2);
+  VI_ST_INDEX(index[i], fn, uint16, 2, true);
 } else if (sew == e32) {
-  VI_ST_INDEX(index[i], fn, uint32, 4);
+  VI_ST_INDEX(index[i], fn, uint32, 4, true);
 } else if (sew == e64) {
-  VI_ST_INDEX(index[i], fn, uint64, 8);
+  VI_ST_INDEX(index[i], fn, uint64, 8, true);
 }

--- a/riscv/insns/vsxh_v.h
+++ b/riscv/insns/vsxh_v.h
@ -1,4 +1,4 @@
-// vsxh.v and vsxseg[2-8]h.v
+// vsxh.v
 require(P.VU.vsew >= e16);
 VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
-VI_ST_INDEX(index[i], fn, uint16, 2);
+VI_ST_INDEX(index[i], fn, uint16, 2, false);
--- a/riscv/insns/vsxw_v.h
+++ b/riscv/insns/vsxw_v.h
@ -1,4 +1,4 @@
-// vsxw.v and vsxseg[2-8]w.v
+// vsxw.v
 require(P.VU.vsew >= e32);
 VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
-VI_ST_INDEX(index[i], fn, uint32, 4);
+VI_ST_INDEX(index[i], fn, uint32, 4, false);