Browse Source

rvv: add integer/fixed-point/mask/reduction/permutation instructions

based on v-spec 0.7.1, support
  sections: 12/13/15.1 ~ 15.2/16/17
  element size: 8/16/32/64
  support ediv: 1

Signed-off-by: Bruce Hoult <bruce@hoult.org>
Signed-off-by: Chih-Min Chao <chihmin.chao@sifive.com>
Signed-off-by: Dave Wen <dave.wen@sifive.com>
pull/303/head
Chih-Min Chao 7 years ago
parent
commit
655aedc0eb
  1. 1041
      riscv/decode.h
  2. 9
      riscv/insns/vaadd_vi.h
  3. 2
      riscv/insns/vaadd_vv.h
  4. 2
      riscv/insns/vaadd_vx.h
  5. 11
      riscv/insns/vadc_vim.h
  6. 11
      riscv/insns/vadc_vvm.h
  7. 11
      riscv/insns/vadc_vxm.h
  8. 5
      riscv/insns/vadd_vi.h
  9. 5
      riscv/insns/vadd_vv.h
  10. 5
      riscv/insns/vadd_vx.h
  11. 5
      riscv/insns/vand_vi.h
  12. 5
      riscv/insns/vand_vv.h
  13. 5
      riscv/insns/vand_vx.h
  14. 2
      riscv/insns/vasub_vv.h
  15. 2
      riscv/insns/vasub_vx.h
  16. 41
      riscv/insns/vcompress_vm.h
  17. 10
      riscv/insns/vdiv_vv.h
  18. 10
      riscv/insns/vdiv_vx.h
  19. 8
      riscv/insns/vdivu_vv.h
  20. 8
      riscv/insns/vdivu_vx.h
  21. 5
      riscv/insns/vdot_vv.h
  22. 5
      riscv/insns/vdotu_vv.h
  23. 30
      riscv/insns/vext_x_v.h
  24. 30
      riscv/insns/vid_v.h
  25. 52
      riscv/insns/viota_m.h
  26. 5
      riscv/insns/vmacc_vv.h
  27. 5
      riscv/insns/vmacc_vx.h
  28. 14
      riscv/insns/vmadc_vim.h
  29. 14
      riscv/insns/vmadc_vvm.h
  30. 14
      riscv/insns/vmadc_vxm.h
  31. 5
      riscv/insns/vmadd_vv.h
  32. 5
      riscv/insns/vmadd_vx.h
  33. 2
      riscv/insns/vmand_mm.h
  34. 2
      riscv/insns/vmandnot_mm.h
  35. 10
      riscv/insns/vmax_vv.h
  36. 10
      riscv/insns/vmax_vx.h
  37. 9
      riscv/insns/vmaxu_vv.h
  38. 9
      riscv/insns/vmaxu_vx.h
  39. 9
      riscv/insns/vmerge_vim.h
  40. 9
      riscv/insns/vmerge_vvm.h
  41. 9
      riscv/insns/vmerge_vxm.h
  42. 5
      riscv/insns/vmfeq_vf.h
  43. 5
      riscv/insns/vmfeq_vv.h
  44. 5
      riscv/insns/vmfge_vf.h
  45. 5
      riscv/insns/vmfgt_vf.h
  46. 20
      riscv/insns/vmfirst_m.h
  47. 5
      riscv/insns/vmfle_vf.h
  48. 5
      riscv/insns/vmfle_vv.h
  49. 5
      riscv/insns/vmflt_vf.h
  50. 5
      riscv/insns/vmflt_vv.h
  51. 5
      riscv/insns/vmfne_vf.h
  52. 5
      riscv/insns/vmfne_vv.h
  53. 5
      riscv/insns/vmford_vf.h
  54. 5
      riscv/insns/vmford_vv.h
  55. 11
      riscv/insns/vmin_vv.h
  56. 11
      riscv/insns/vmin_vx.h
  57. 9
      riscv/insns/vminu_vv.h
  58. 10
      riscv/insns/vminu_vx.h
  59. 2
      riscv/insns/vmnand_mm.h
  60. 2
      riscv/insns/vmnor_mm.h
  61. 2
      riscv/insns/vmor_mm.h
  62. 2
      riscv/insns/vmornot_mm.h
  63. 24
      riscv/insns/vmpopc_m.h
  64. 14
      riscv/insns/vmsbc_vvm.h
  65. 14
      riscv/insns/vmsbc_vxm.h
  66. 34
      riscv/insns/vmsbf_m.h
  67. 5
      riscv/insns/vmseq_vi.h
  68. 6
      riscv/insns/vmseq_vv.h
  69. 5
      riscv/insns/vmseq_vx.h
  70. 5
      riscv/insns/vmsgt_vi.h
  71. 5
      riscv/insns/vmsgt_vx.h
  72. 5
      riscv/insns/vmsgtu_vi.h
  73. 5
      riscv/insns/vmsgtu_vx.h
  74. 34
      riscv/insns/vmsif_m.h
  75. 5
      riscv/insns/vmsle_vi.h
  76. 5
      riscv/insns/vmsle_vv.h
  77. 5
      riscv/insns/vmsle_vx.h
  78. 5
      riscv/insns/vmsleu_vi.h
  79. 5
      riscv/insns/vmsleu_vv.h
  80. 5
      riscv/insns/vmsleu_vx.h
  81. 5
      riscv/insns/vmslt_vv.h
  82. 5
      riscv/insns/vmslt_vx.h
  83. 5
      riscv/insns/vmsltu_vv.h
  84. 5
      riscv/insns/vmsltu_vx.h
  85. 5
      riscv/insns/vmsne_vi.h
  86. 5
      riscv/insns/vmsne_vv.h
  87. 5
      riscv/insns/vmsne_vx.h
  88. 32
      riscv/insns/vmsof_m.h
  89. 5
      riscv/insns/vmul_vv.h
  90. 5
      riscv/insns/vmul_vx.h
  91. 5
      riscv/insns/vmulh_vv.h
  92. 5
      riscv/insns/vmulh_vx.h
  93. 37
      riscv/insns/vmulhsu_vv.h
  94. 37
      riscv/insns/vmulhsu_vx.h
  95. 5
      riscv/insns/vmulhu_vv.h
  96. 5
      riscv/insns/vmulhu_vx.h
  97. 45
      riscv/insns/vmv_s_x.h
  98. 5
      riscv/insns/vmv_v_i.h
  99. 5
      riscv/insns/vmv_v_v.h
  100. 5
      riscv/insns/vmv_v_x.h

1041
riscv/decode.h

File diff suppressed because it is too large

9
riscv/insns/vaadd_vi.h

@ -0,0 +1,9 @@
// vaadd: Averaging adds of integers
VRM xrm = P.VU.get_vround_mode();
VI_VI_LOOP
({
int64_t result = simm5 + vs2;
INT_ROUNDING(result, xrm, 1);
result = vzext(result >> 1, sew);
vd = result;
})

2
riscv/insns/vaadd_vv.h

@ -0,0 +1,2 @@
// vaadd.vv vd, vs2, vs1
VI_VVX_LOOP_AVG(vs1, +);

2
riscv/insns/vaadd_vx.h

@ -0,0 +1,2 @@
// vaadd.vx vd, vs2, rs1
VI_VVX_LOOP_AVG(rs1, +);

11
riscv/insns/vadc_vim.h

@ -0,0 +1,11 @@
// vadc.vim vd, vs2, simm5
require(!(insn.rd() == 0 && P.VU.vlmul > 1));
VI_VI_LOOP
({
auto &v0 = P.VU.elt<uint64_t>(0, midx);
const uint128_t op_mask = (UINT64_MAX >> (64 - sew));
uint64_t carry = (v0 >> mpos) & 0x1;
uint128_t res = (op_mask & simm5) + (op_mask & vs2) + carry;
vd = res;
})

11
riscv/insns/vadc_vvm.h

@ -0,0 +1,11 @@
// vadc.vvm vd, vs2, rs1
require(!(insn.rd() == 0 && P.VU.vlmul > 1));
VI_VV_LOOP
({
auto &v0 = P.VU.elt<uint64_t>(0, midx);
const uint128_t op_mask = (UINT64_MAX >> (64 - sew));
uint64_t carry = (v0 >> mpos) & 0x1;
uint128_t res = (op_mask & vs1) + (op_mask & vs2) + carry;
vd = res;
})

11
riscv/insns/vadc_vxm.h

@ -0,0 +1,11 @@
// vadc.vxm vd, vs2, rs1
require(!(insn.rd() == 0 && P.VU.vlmul > 1));
VI_VX_LOOP
({
auto &v0 = P.VU.elt<uint64_t>(0, midx);
const uint128_t op_mask = (UINT64_MAX >> (64 - sew));
uint64_t carry = (v0 >> mpos) & 0x1;
uint128_t res = (op_mask & rs1) + (op_mask & vs2) + carry;
vd = res;
})

5
riscv/insns/vadd_vi.h

@ -0,0 +1,5 @@
// vadd.vi vd, simm5, vs2, vm
VI_VI_LOOP
({
vd = simm5 + vs2;
})

5
riscv/insns/vadd_vv.h

@ -0,0 +1,5 @@
// vadd.vv vd, vs1, vs2, vm
VI_VV_LOOP
({
vd = vs1 + vs2;
})

5
riscv/insns/vadd_vx.h

@ -0,0 +1,5 @@
// vadd.vx vd, rs1, vs2, vm
VI_VX_LOOP
({
vd = rs1 + vs2;
})

5
riscv/insns/vand_vi.h

@ -0,0 +1,5 @@
// vand.vi vd, simm5, vs2, vm
VI_VI_LOOP
({
vd = simm5 & vs2;
})

5
riscv/insns/vand_vv.h

@ -0,0 +1,5 @@
// vand.vv vd, vs1, vs2, vm
VI_VV_LOOP
({
vd = vs1 & vs2;
})

5
riscv/insns/vand_vx.h

@ -0,0 +1,5 @@
// vand.vx vd, rs1, vs2, vm
VI_VX_LOOP
({
vd = rs1 & vs2;
})

2
riscv/insns/vasub_vv.h

@ -0,0 +1,2 @@
// vasub.vv vd, vs2, vs1
VI_VVX_LOOP_AVG(vs1, -);

2
riscv/insns/vasub_vx.h

@ -0,0 +1,2 @@
// vasub.vx vd, vs2, rs1
VI_VVX_LOOP_AVG(rs1, -);

41
riscv/insns/vcompress_vm.h

@ -0,0 +1,41 @@
// vcompress vd, vs2, vs1
require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
require(!P.VU.vill);
require(P.VU.vstart == 0);
reg_t sew = P.VU.vsew;
reg_t vl = P.VU.vl;
reg_t rd_num = insn.rd();
reg_t rs1_num = insn.rs1();
reg_t rs2_num = insn.rs2();
reg_t pos = 0;
for (reg_t i = P.VU.vstart ; i < vl; ++i) {
const int mlen = P.VU.vmlen;
const int midx = (mlen * i) / 64;
const int mpos = (mlen * i) % 64;
bool do_mask = (P.VU.elt<uint64_t>(rs1_num, midx) >> mpos) & 0x1;
if (do_mask) {
switch (sew) {
case e8:
P.VU.elt<uint8_t>(rd_num, pos) = P.VU.elt<uint8_t>(rs2_num, i);
break;
case e16:
P.VU.elt<uint16_t>(rd_num, pos) = P.VU.elt<uint16_t>(rs2_num, i);
break;
case e32:
P.VU.elt<uint32_t>(rd_num, pos) = P.VU.elt<uint32_t>(rs2_num, i);
break;
default:
P.VU.elt<uint64_t>(rd_num, pos) = P.VU.elt<uint64_t>(rs2_num, i);
break;
}
++pos;
}
}
if (vl > 0 && TAIL_ZEROING) {
uint8_t *tail = &P.VU.elt<uint8_t>(rd_num, pos * ((sew >> 3) * 1));
memset(tail, 0, (P.VU.vlmax - pos) * ((sew >> 3) * 1));
}

10
riscv/insns/vdiv_vv.h

@ -0,0 +1,10 @@
// vdiv.vv vd, vs2, vs1
VI_VV_LOOP
({
if (vs1 == 0)
vd = -1;
else if (vs2 == -(1 << (sew - 1)) && vs1 == -1)
vd = vs2;
else
vd = vs2 / vs1;
})

10
riscv/insns/vdiv_vx.h

@ -0,0 +1,10 @@
// vdiv.vx vd, vs2, rs1
VI_VX_LOOP
({
if(rs1 == 0)
vd = -1;
else if(vs2 == -(1 << (sew - 1)) && rs1 == -1)
vd = vs2;
else
vd = vs2 / rs1;
})

8
riscv/insns/vdivu_vv.h

@ -0,0 +1,8 @@
// vdivu.vv vd, vs2, vs1
VI_VV_ULOOP
({
if(vs1 == 0)
vd = -1;
else
vd = vs2 / vs1;
})

8
riscv/insns/vdivu_vx.h

@ -0,0 +1,8 @@
// vdivu.vx vd, vs2, rs1
VI_VX_ULOOP
({
if(rs1 == 0)
vd = -1;
else
vd = vs2 / rs1;
})

5
riscv/insns/vdot_vv.h

@ -0,0 +1,5 @@
// vdot vd, vs2, vs1
VI_VV_LOOP
({
vd += vs2 * vs1;
})

5
riscv/insns/vdotu_vv.h

@ -0,0 +1,5 @@
// vdotu vd, vs2, vs1
VI_VV_ULOOP
({
vd += vs2 * vs1;
})

30
riscv/insns/vext_x_v.h

@ -0,0 +1,30 @@
// vext_x_v: rd = vs2[rs1]
require(insn.v_vm() == 1);
uint64_t xmask = UINT64_MAX >> (64 - P.get_max_xlen());
reg_t rs1 = RS1;
VI_LOOP_BASE
VI_LOOP_END_NO_TAIL_ZERO
if (!(rs1 >= 0 && rs1 < (P.VU.get_vlen()/sew))) {
WRITE_RD(0);
} else {
switch(sew) {
case e8:
WRITE_RD(P.VU.elt<uint8_t>(rs2_num, rs1));
break;
case e16:
WRITE_RD(P.VU.elt<uint16_t>(rs2_num, rs1));
break;
case e32:
if (P.get_max_xlen() == 32)
WRITE_RD(P.VU.elt<int32_t>(rs2_num, rs1));
else
WRITE_RD(P.VU.elt<uint32_t>(rs2_num, rs1));
break;
case e64:
if (P.get_max_xlen() <= sew)
WRITE_RD(P.VU.elt<uint64_t>(rs2_num, rs1) & xmask);
else
WRITE_RD(P.VU.elt<uint64_t>(rs2_num, rs1));
break;
}
}

30
riscv/insns/vid_v.h

@ -0,0 +1,30 @@
// vmpopc rd, vs2, vm
require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
require(!P.VU.vill);
reg_t vl = P.VU.vl;
reg_t sew = P.VU.vsew;
reg_t rd_num = insn.rd();
reg_t rs1_num = insn.rs1();
reg_t rs2_num = insn.rs2();
for (reg_t i = P.VU.vstart ; i < P.VU.vl; ++i) {
VI_LOOP_ELEMENT_SKIP();
switch (sew) {
case e8:
P.VU.elt<uint8_t>(rd_num, i) = i;
break;
case e16:
P.VU.elt<uint16_t>(rd_num, i) = i;
break;
case e32:
P.VU.elt<uint32_t>(rd_num, i) = i;
break;
default:
P.VU.elt<uint64_t>(rd_num, i) = i;
break;
}
}
VI_TAIL_ZERO(1);
P.VU.vstart = 0;

52
riscv/insns/viota_m.h

@ -0,0 +1,52 @@
// vmpopc rd, vs2, vm
require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
require(!P.VU.vill);
reg_t vl = P.VU.vl;
reg_t sew = P.VU.vsew;
reg_t rd_num = insn.rd();
reg_t rs1_num = insn.rs1();
reg_t rs2_num = insn.rs2();
require(P.VU.vstart == 0);
int cnt = 0;
for (reg_t i = 0; i < vl; ++i) {
const int mlen = P.VU.vmlen;
const int midx = (mlen * i) / 64;
const int mpos = (mlen * i) % 64;
bool vs2_lsb = ((P.VU.elt<uint64_t>(rs2_num, midx) >> mpos) & 0x1) == 1;
bool do_mask = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
bool has_one = false;
if (insn.v_vm() == 1 || (insn.v_vm() == 0 && do_mask)) {
if (vs2_lsb) {
has_one = true;
}
}
bool use_ori = (insn.v_vm() == 0) && !do_mask;
switch (sew) {
case e8:
P.VU.elt<uint8_t>(rd_num, i) = use_ori ?
P.VU.elt<uint8_t>(rd_num, i) : cnt;
break;
case e16:
P.VU.elt<uint16_t>(rd_num, i) = use_ori ?
P.VU.elt<uint16_t>(rd_num, i) : cnt;
break;
case e32:
P.VU.elt<uint32_t>(rd_num, i) = use_ori ?
P.VU.elt<uint32_t>(rd_num, i) : cnt;
break;
default:
P.VU.elt<uint64_t>(rd_num, i) = use_ori ?
P.VU.elt<uint64_t>(rd_num, i) : cnt;
break;
}
if (has_one) {
cnt++;
}
}
VI_TAIL_ZERO(1);

5
riscv/insns/vmacc_vv.h

@ -0,0 +1,5 @@
// vmacc.vv: vd[i] = +(vs1[i] * vs2[i]) + vd[i]
VI_VV_LOOP
({
vd = vs1 * vs2 + vd;
})

5
riscv/insns/vmacc_vx.h

@ -0,0 +1,5 @@
// vmacc.vx: vd[i] = +(x[rs1] * vs2[i]) + vd[i]
VI_VX_LOOP
({
vd = rs1 * vs2 + vd;
})

14
riscv/insns/vmadc_vim.h

@ -0,0 +1,14 @@
// vmadc.vim vd, vs2, simm5
require(!(insn.rd() == 0 && P.VU.vlmul > 1));
VI_XI_LOOP_CARRY
({
auto v0 = P.VU.elt<uint64_t>(0, midx);
const uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos);
const uint128_t op_mask = (UINT64_MAX >> (64 - sew));
uint64_t carry = (v0 >> mpos) & 0x1;
uint128_t res = (op_mask & simm5) + (op_mask & vs2) + carry;
carry = (res >> sew) & 0x1u;
vd = (vd & ~mmask) | ((carry << mpos) & mmask);
})

14
riscv/insns/vmadc_vvm.h

@ -0,0 +1,14 @@
// vmadc.vvm vd, vs2, rs1
require(!(insn.rd() == 0 && P.VU.vlmul > 1));
VI_VV_LOOP_CARRY
({
auto v0 = P.VU.elt<uint64_t>(0, midx);
const uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos);
const uint128_t op_mask = (UINT64_MAX >> (64 - sew));
uint64_t carry = (v0 >> mpos) & 0x1;
uint128_t res = (op_mask & vs1) + (op_mask & vs2) + carry;
carry = (res >> sew) & 0x1u;
vd = (vd & ~mmask) | ((carry << mpos) & mmask);
})

14
riscv/insns/vmadc_vxm.h

@ -0,0 +1,14 @@
// vadc.vx vd, vs2, rs1
require(!(insn.rd() == 0 && P.VU.vlmul > 1));
VI_XI_LOOP_CARRY
({
auto v0 = P.VU.elt<uint64_t>(0, midx);
const uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos);
const uint128_t op_mask = (UINT64_MAX >> (64 - sew));
uint64_t carry = (v0 >> mpos) & 0x1;
uint128_t res = (op_mask & rs1) + (op_mask & vs2) + carry;
carry = (res >> sew) & 0x1u;
vd = (vd & ~mmask) | ((carry << mpos) & mmask);
})

5
riscv/insns/vmadd_vv.h

@ -0,0 +1,5 @@
// vmadd: vd[i] = (vd[i] * vs1[i]) + vs2[i]
VI_VV_LOOP
({
vd = vd * vs1 + vs2;
})

5
riscv/insns/vmadd_vx.h

@ -0,0 +1,5 @@
// vmadd: vd[i] = (vd[i] * x[rs1]) + vs2[i]
VI_VX_LOOP
({
vd = vd * rs1 + vs2;
})

2
riscv/insns/vmand_mm.h

@ -0,0 +1,2 @@
// vmand.mm vd, vs2, vs1
VI_LOOP_MASK(vs2 & vs1);

2
riscv/insns/vmandnot_mm.h

@ -0,0 +1,2 @@
// vmandnot.mm vd, vs2, vs1
VI_LOOP_MASK(vs2 & ~vs1);

10
riscv/insns/vmax_vv.h

@ -0,0 +1,10 @@
// vmax.vv vd, vs2, vs1, vm # Vector-vector
VI_VV_LOOP
({
if (vs1 >= vs2) {
vd = vs1;
} else {
vd = vs2;
}
})

10
riscv/insns/vmax_vx.h

@ -0,0 +1,10 @@
// vmax.vx vd, vs2, rs1, vm # vector-scalar
VI_VX_LOOP
({
if (rs1 >= vs2) {
vd = rs1;
} else {
vd = vs2;
}
})

9
riscv/insns/vmaxu_vv.h

@ -0,0 +1,9 @@
// vmaxu.vv vd, vs2, vs1, vm # Vector-vector
VI_VV_ULOOP
({
if (vs1 >= vs2) {
vd = vs1;
} else {
vd = vs2;
}
})

9
riscv/insns/vmaxu_vx.h

@ -0,0 +1,9 @@
// vmaxu.vx vd, vs2, rs1, vm # vector-scalar
VI_VX_ULOOP
({
if (rs1 >= vs2) {
vd = rs1;
} else {
vd = vs2;
}
})

9
riscv/insns/vmerge_vim.h

@ -0,0 +1,9 @@
// vmerge.vim vd, vs2, simm5
VI_VVXI_MERGE_LOOP
({
int midx = (P.VU.vmlen * i) / 64;
int mpos = (P.VU.vmlen * i) % 64;
bool use_first = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
vd = use_first ? simm5 : vs2;
})

9
riscv/insns/vmerge_vvm.h

@ -0,0 +1,9 @@
// vmerge.vvm vd, vs2, vs1
VI_VVXI_MERGE_LOOP
({
int midx = (P.VU.vmlen * i) / 64;
int mpos = (P.VU.vmlen * i) % 64;
bool use_first = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
vd = use_first ? vs1 : vs2;
})

9
riscv/insns/vmerge_vxm.h

@ -0,0 +1,9 @@
// vmerge.vxm vd, vs2, rs1
VI_VVXI_MERGE_LOOP
({
int midx = (P.VU.vmlen * i) / 64;
int mpos = (P.VU.vmlen * i) % 64;
bool use_first = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
vd = use_first ? rs1 : vs2;
})

5
riscv/insns/vmfeq_vf.h

@ -0,0 +1,5 @@
// vfeq.vf vd, vs2, fs1
VI_VFP_LOOP_CMP
({
res = f32_eq(vs2, rs1);
})

5
riscv/insns/vmfeq_vv.h

@ -0,0 +1,5 @@
// vfeq.vv vd, vs2, vs1
VI_VFP_LOOP_CMP
({
res = f32_eq(vs2, vs1);
})

5
riscv/insns/vmfge_vf.h

@ -0,0 +1,5 @@
// vfge.vf vd, vs2, rs1
VI_VFP_LOOP_CMP
({
res = f32_le_quiet(rs1, vs2);
})

5
riscv/insns/vmfgt_vf.h

@ -0,0 +1,5 @@
// vfgt.vf vd, vs2, rs1
VI_VFP_LOOP_CMP
({
res = f32_lt_quiet(rs1, vs2);
})

20
riscv/insns/vmfirst_m.h

@ -0,0 +1,20 @@
// vmfirst rd, vs2
require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
require(!P.VU.vill);
reg_t vl = P.VU.vl;
reg_t sew = P.VU.vsew;
reg_t rd_num = insn.rd();
reg_t rs2_num = insn.rs2();
require(P.VU.vstart == 0);
reg_t pos = -1;
for (reg_t i=P.VU.vstart; i < vl; ++i) {
VI_LOOP_ELEMENT_SKIP()
bool vs2_lsb = ((P.VU.elt<uint64_t>(rs2_num, midx ) >> mpos) & 0x1) == 1;
if (vs2_lsb) {
pos = i;
break;
}
}
P.VU.vstart = 0;
WRITE_RD(pos);

5
riscv/insns/vmfle_vf.h

@ -0,0 +1,5 @@
// vfle.vf vd, vs2, rs1
VI_VFP_LOOP_CMP
({
res = f32_le(vs2, rs1);
})

5
riscv/insns/vmfle_vv.h

@ -0,0 +1,5 @@
// vfle.vv vd, vs2, rs1
VI_VFP_LOOP_CMP
({
res = f32_le_quiet(vs2, vs1);
})

5
riscv/insns/vmflt_vf.h

@ -0,0 +1,5 @@
// vflt.vf vd, vs2, rs1
VI_VFP_LOOP_CMP
({
res = f32_lt_quiet(vs2, rs1);
})

5
riscv/insns/vmflt_vv.h

@ -0,0 +1,5 @@
// vflt.vv vd, vs2, vs1
VI_VFP_LOOP_CMP
({
res = f32_lt_quiet(vs2, vs1);
})

5
riscv/insns/vmfne_vf.h

@ -0,0 +1,5 @@
// vfne.vf vd, vs2, rs1
VI_VFP_LOOP_CMP
({
res = !f32_eq(vs2, rs1);
})

5
riscv/insns/vmfne_vv.h

@ -0,0 +1,5 @@
// vfne.vv vd, vs2, rs1
VI_VFP_LOOP_CMP
({
res = !f32_eq(vs2, vs1);
})

5
riscv/insns/vmford_vf.h

@ -0,0 +1,5 @@
// vford.vf vd, vs2, rs1, vm
VI_VFP_LOOP_CMP
({
res = !(f32_isSignalingNaN(vs2) || f32_isSignalingNaN(rs1));
})

5
riscv/insns/vmford_vv.h

@ -0,0 +1,5 @@
// vford.vv vd, vs2, vs1, vm
VI_VFP_LOOP_CMP
({
res = !(f32_isSignalingNaN(vs2) || f32_isSignalingNaN(vs1));
})

11
riscv/insns/vmin_vv.h

@ -0,0 +1,11 @@
// vmin.vv vd, vs2, vs1, vm # Vector-vector
VI_VV_LOOP
({
if (vs1 <= vs2) {
vd = vs1;
} else {
vd = vs2;
}
})

11
riscv/insns/vmin_vx.h

@ -0,0 +1,11 @@
// vminx.vx vd, vs2, rs1, vm # vector-scalar
VI_VX_LOOP
({
if (rs1 <= vs2) {
vd = rs1;
} else {
vd = vs2;
}
})

9
riscv/insns/vminu_vv.h

@ -0,0 +1,9 @@
// vminu.vv vd, vs2, vs1, vm # Vector-vector
VI_VV_ULOOP
({
if (vs1 <= vs2) {
vd = vs1;
} else {
vd = vs2;
}
})

10
riscv/insns/vminu_vx.h

@ -0,0 +1,10 @@
// vminu.vx vd, vs2, rs1, vm # vector-scalar
VI_VX_ULOOP
({
if (rs1 <= vs2) {
vd = rs1;
} else {
vd = vs2;
}
})

2
riscv/insns/vmnand_mm.h

@ -0,0 +1,2 @@
// vmnand.mm vd, vs2, vs1
VI_LOOP_MASK(~(vs2 & vs1));

2
riscv/insns/vmnor_mm.h

@ -0,0 +1,2 @@
// vmnor.mm vd, vs2, vs1
VI_LOOP_MASK(~(vs2 | vs1));

2
riscv/insns/vmor_mm.h

@ -0,0 +1,2 @@
// vmor.mm vd, vs2, vs1
VI_LOOP_MASK(vs2 | vs1);

2
riscv/insns/vmornot_mm.h

@ -0,0 +1,2 @@
// vmornot.mm vd, vs2, vs1
VI_LOOP_MASK(vs2 | ~vs1);

24
riscv/insns/vmpopc_m.h

@ -0,0 +1,24 @@
// vmpopc rd, vs2, vm
require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
require(!P.VU.vill);
reg_t vl = P.VU.vl;
reg_t sew = P.VU.vsew;
reg_t rd_num = insn.rd();
reg_t rs2_num = insn.rs2();
require(P.VU.vstart == 0);
reg_t popcount = 0;
for (reg_t i=P.VU.vstart; i<vl; ++i) {
const int mlen = P.VU.vmlen;
const int midx = (mlen * i) / 32;
const int mpos = (mlen * i) % 32;
bool vs2_lsb = ((P.VU.elt<uint32_t>(rs2_num, midx ) >> mpos) & 0x1) == 1;
if (insn.v_vm() == 1) {
popcount += vs2_lsb;
} else {
bool do_mask = (P.VU.elt<uint32_t>(0, midx) >> mpos) & 0x1;
popcount += (vs2_lsb && do_mask);
}
}
P.VU.vstart = 0;
WRITE_RD(popcount);

14
riscv/insns/vmsbc_vvm.h

@ -0,0 +1,14 @@
// vmsbc.vvm vd, vs2, rs1
require(!(insn.rd() == 0 && P.VU.vlmul > 1));
VI_VV_LOOP_CARRY
({
auto v0 = P.VU.elt<uint64_t>(0, midx);
const uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos);
const uint128_t op_mask = (UINT64_MAX >> (64 - sew));
uint64_t carry = (v0 >> mpos) & 0x1;
uint128_t res = (op_mask & vs1) - (op_mask & vs2) - carry;
carry = (res >> sew) & 0x1u;
vd = (vd & ~mmask) | ((carry << mpos) & mmask);
})

14
riscv/insns/vmsbc_vxm.h

@ -0,0 +1,14 @@
// vmsbc.vxm vd, vs2, rs1
require(!(insn.rd() == 0 && P.VU.vlmul > 1));
VI_XI_LOOP_CARRY
({
auto &v0 = P.VU.elt<uint64_t>(0, midx);
const uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos);
const uint128_t op_mask = (UINT64_MAX >> (64 - sew));
uint64_t carry = (v0 >> mpos) & 0x1;
uint128_t res = (op_mask & rs1) - (op_mask & vs2) - carry;
carry = (res >> sew) & 0x1u;
vd = (vd & ~mmask) | ((carry << mpos) & mmask);
})

34
riscv/insns/vmsbf_m.h

@ -0,0 +1,34 @@
// vmsbf.m vd, vs2, vm
require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
require(!P.VU.vill);
reg_t vl = P.VU.vl;
reg_t sew = P.VU.vsew;
reg_t rd_num = insn.rd();
reg_t rs1_num = insn.rs1();
reg_t rs2_num = insn.rs2();
bool has_one = false;
for (reg_t i = P.VU.vstart; i < vl; ++i) {
const int mlen = P.VU.vmlen;
const int midx = (mlen * i) / 64;
const int mpos = (mlen * i) % 64;
const uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos);
bool vs2_lsb = ((P.VU.elt<uint64_t>(rs2_num, midx) >> mpos) & 0x1) == 1;
bool do_mask = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
auto &vd = P.VU.elt<uint64_t>(rd_num, midx);
if (insn.v_vm() == 1 || (insn.v_vm() == 0 && do_mask)) {
uint64_t res = 0;
if (!has_one && !vs2_lsb) {
res = 1;
} else if(!has_one && vs2_lsb) {
has_one = true;
}
vd = (vd & ~mmask) | ((res << mpos) & mmask);
}
}
VI_TAIL_ZERO_MASK(rd_num);
P.VU.vstart = 0;

5
riscv/insns/vmseq_vi.h

@ -0,0 +1,5 @@
// vseq.vi vd, vs2, simm5
VI_VI_LOOP_CMP
({
res = simm5 == vs2;
})

6
riscv/insns/vmseq_vv.h

@ -0,0 +1,6 @@
// vseq.vv vd, vs2, vs1
VI_VV_LOOP_CMP
({
res = vs2 == vs1;
})

5
riscv/insns/vmseq_vx.h

@ -0,0 +1,5 @@
// vseq.vx vd, vs2, rs1
VI_VX_LOOP_CMP
({
res = rs1 == vs2;
})

5
riscv/insns/vmsgt_vi.h

@ -0,0 +1,5 @@
// vsgt.vi vd, vs2, simm5
VI_VI_LOOP_CMP
({
res = vs2 > simm5;
})

5
riscv/insns/vmsgt_vx.h

@ -0,0 +1,5 @@
// vsgt.vx vd, vs2, rs1
VI_VX_LOOP_CMP
({
res = vs2 > rs1;
})

5
riscv/insns/vmsgtu_vi.h

@ -0,0 +1,5 @@
// vsgtu.vi vd, vd2, zimm5
VI_VI_ULOOP_CMP
({
res = vs2 > simm5;
})

5
riscv/insns/vmsgtu_vx.h

@ -0,0 +1,5 @@
// vsgtu.vx vd, vs2, rs1
VI_VX_ULOOP_CMP
({
res = vs2 > rs1;
})

34
riscv/insns/vmsif_m.h

@ -0,0 +1,34 @@
// vmpopc rd, vs2, vm
require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
require(!P.VU.vill);
reg_t vl = P.VU.vl;
reg_t sew = P.VU.vsew;
reg_t rd_num = insn.rd();
reg_t rs1_num = insn.rs1();
reg_t rs2_num = insn.rs2();
bool has_one = false;
for (reg_t i = P.VU.vstart ; i < vl; ++i) {
const int mlen = P.VU.vmlen;
const int midx = (mlen * i) / 64;
const int mpos = (mlen * i) % 64;
const uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos);
bool vs2_lsb = ((P.VU.elt<uint64_t>(rs2_num, midx ) >> mpos) & 0x1) == 1;
bool do_mask = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
auto &vd = P.VU.elt<uint64_t>(rd_num, midx);
if (insn.v_vm() == 1 || (insn.v_vm() == 0 && do_mask)) {
uint64_t res = 0;
if (!has_one && !vs2_lsb) {
res = 1;
} else if(!has_one && vs2_lsb) {
has_one = true;
res = 1;
}
vd = (vd & ~mmask) | ((res << mpos) & mmask);
}
}
VI_TAIL_ZERO_MASK(rd_num);
P.VU.vstart = 0;

5
riscv/insns/vmsle_vi.h

@ -0,0 +1,5 @@
// vsle.vi vd, vs2, simm5
VI_VI_LOOP_CMP
({
res = vs2 <= simm5;
})

5
riscv/insns/vmsle_vv.h

@ -0,0 +1,5 @@
// vsle.vv vd, vs2, vs1
VI_VV_LOOP_CMP
({
res = vs2 <= vs1;
})

5
riscv/insns/vmsle_vx.h

@ -0,0 +1,5 @@
// vsle.vx vd, vs2, rs1
VI_VX_LOOP_CMP
({
res = vs2 <= rs1;
})

5
riscv/insns/vmsleu_vi.h

@ -0,0 +1,5 @@
// vsleu.vi vd, vs2, zimm5
VI_VI_ULOOP_CMP
({
res = vs2 <= simm5;
})

5
riscv/insns/vmsleu_vv.h

@ -0,0 +1,5 @@
// vsleu.vv vd, vs2, vs1
VI_VV_ULOOP_CMP
({
res = vs2 <= vs1;
})

5
riscv/insns/vmsleu_vx.h

@ -0,0 +1,5 @@
// vsleu.vx vd, vs2, rs1
VI_VX_ULOOP_CMP
({
res = vs2 <= rs1;
})

5
riscv/insns/vmslt_vv.h

@ -0,0 +1,5 @@
// vslt.vv vd, vd2, vs1
VI_VV_LOOP_CMP
({
res = vs2 < vs1;
})

5
riscv/insns/vmslt_vx.h

@ -0,0 +1,5 @@
// vslt.vx vd, vs2, vs1
VI_VX_LOOP_CMP
({
res = vs2 < rs1;
})

5
riscv/insns/vmsltu_vv.h

@ -0,0 +1,5 @@
// vsltu.vv vd, vs2, vs1
VI_VV_ULOOP_CMP
({
res = vs2 < vs1;
})

5
riscv/insns/vmsltu_vx.h

@ -0,0 +1,5 @@
// vsltu.vx vd, vs2, vs1
VI_VX_ULOOP_CMP
({
res = vs2 < rs1;
})

5
riscv/insns/vmsne_vi.h

@ -0,0 +1,5 @@
// vsne.vi vd, vs2, simm5
VI_VI_LOOP_CMP
({
res = vs2 != simm5;
})

5
riscv/insns/vmsne_vv.h

@ -0,0 +1,5 @@
// vneq.vv vd, vs2, vs1
VI_VV_LOOP_CMP
({
res = vs2 != vs1;
})

5
riscv/insns/vmsne_vx.h

@ -0,0 +1,5 @@
// vsne.vx vd, vs2, rs1
VI_VX_LOOP_CMP
({
res = vs2 != rs1;
})

32
riscv/insns/vmsof_m.h

@ -0,0 +1,32 @@
// vmsof.m rd, vs2, vm
require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
require(!P.VU.vill);
reg_t vl = P.VU.vl;
reg_t sew = P.VU.vsew;
reg_t rd_num = insn.rd();
reg_t rs1_num = insn.rs1();
reg_t rs2_num = insn.rs2();
bool has_one = false;
for (reg_t i = P.VU.vstart ; i < vl; ++i) {
const int mlen = P.VU.vmlen;
const int midx = (mlen * i) / 64;
const int mpos = (mlen * i) % 64;
const uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos);
bool vs2_lsb = ((P.VU.elt<uint64_t>(rs2_num, midx ) >> mpos) & 0x1) == 1;
bool do_mask = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
uint64_t &vd = P.VU.elt<uint64_t>(rd_num, midx);
if (insn.v_vm() == 1 || (insn.v_vm() == 0 && do_mask)) {
uint64_t res = 0;
if(!has_one && vs2_lsb) {
has_one = true;
res = 1;
}
vd = (vd & ~mmask) | ((res << mpos) & mmask);
}
}
VI_TAIL_ZERO_MASK(rd_num);
P.VU.vstart = 0;

5
riscv/insns/vmul_vv.h

@ -0,0 +1,5 @@
// vmul vd, vs2, vs1
VI_VV_LOOP
({
vd = vs2 * vs1;
})

5
riscv/insns/vmul_vx.h

@ -0,0 +1,5 @@
// vmul vd, vs2, rs1
VI_VX_LOOP
({
vd = vs2 * rs1;
})

5
riscv/insns/vmulh_vv.h

@ -0,0 +1,5 @@
// vmulh vd, vs2, vs1
VI_VV_LOOP
({
vd = ((int128_t)vs2 * vs1) >> sew;
})

5
riscv/insns/vmulh_vx.h

@ -0,0 +1,5 @@
// vmulh vd, vs2, rs1
VI_VX_LOOP
({
vd = ((int128_t)vs2 * rs1) >> sew;
})

37
riscv/insns/vmulhsu_vv.h

@ -0,0 +1,37 @@
// vmulhsu.vv vd, vs2, vs1
VI_LOOP_BASE
switch(sew) {
case e8: {
auto &vd = P.VU.elt<int8_t>(rd_num, i);
auto vs2 = P.VU.elt<int8_t>(rs2_num, i);
auto vs1 = P.VU.elt<uint8_t>(rs1_num, i);
vd = ((int16_t)vs2 * (uint16_t)vs1) >> sew;
break;
}
case e16: {
auto &vd = P.VU.elt<int16_t>(rd_num, i);
auto vs2 = P.VU.elt<int16_t>(rs2_num, i);
auto vs1 = P.VU.elt<uint16_t>(rs1_num, i);
vd = ((int32_t)vs2 * (uint32_t)vs1) >> sew;
break;
}
case e32: {
auto &vd = P.VU.elt<int32_t>(rd_num, i);
auto vs2 = P.VU.elt<int32_t>(rs2_num, i);
auto vs1 = P.VU.elt<uint32_t>(rs1_num, i);
vd = ((int64_t)vs2 * (uint64_t)vs1) >> sew;
break;
}
default: {
auto &vd = P.VU.elt<int64_t>(rd_num, i);
auto vs2 = P.VU.elt<int64_t>(rs2_num, i);
auto vs1 = P.VU.elt<uint64_t>(rs1_num, i);
vd = ((int128_t)vs2 * (uint128_t)vs1) >> sew;
break;
}
}
VI_LOOP_END

37
riscv/insns/vmulhsu_vx.h

@ -0,0 +1,37 @@
// vmulhsu.vx vd, vs2, rs1
VI_LOOP_BASE
switch(sew) {
case e8: {
auto &vd = P.VU.elt<int8_t>(rd_num, i);
auto vs2 = P.VU.elt<int8_t>(rs2_num, i);
uint8_t rs1 = RS1;
vd = ((int16_t)vs2 * (uint16_t)rs1) >> sew;
break;
}
case e16: {
auto &vd = P.VU.elt<int16_t>(rd_num, i);
auto vs2 = P.VU.elt<int16_t>(rs2_num, i);
uint16_t rs1 = RS1;
vd = ((int32_t)vs2 * (uint32_t)rs1) >> sew;
break;
}
case e32: {
auto &vd = P.VU.elt<int32_t>(rd_num, i);
auto vs2 = P.VU.elt<int32_t>(rs2_num, i);
uint32_t rs1 = RS1;
vd = ((int64_t)vs2 * (uint64_t)rs1) >> sew;
break;
}
default: {
auto &vd = P.VU.elt<int64_t>(rd_num, i);
auto vs2 = P.VU.elt<int64_t>(rs2_num, i);
uint64_t rs1 = RS1;
vd = ((int128_t)vs2 * (uint128_t)rs1) >> sew;
break;
}
}
VI_LOOP_END

5
riscv/insns/vmulhu_vv.h

@ -0,0 +1,5 @@
// vmulhu vd ,vs2, vs1
VI_VV_ULOOP
({
vd = ((uint128_t)vs2 * vs1) >> sew;
})

5
riscv/insns/vmulhu_vx.h

@ -0,0 +1,5 @@
// vmulhu vd ,vs2, rs1
VI_VX_ULOOP
({
vd = ((uint128_t)vs2 * rs1) >> sew;
})

45
riscv/insns/vmv_s_x.h

@ -0,0 +1,45 @@
// vmv_s_x: vd[0] = rs1
require(insn.v_vm() == 1);
require(P.VU.vsew == e8 || P.VU.vsew == e16 ||
P.VU.vsew == e32 || P.VU.vsew == e64);
reg_t vl = P.VU.vl;
if (vl > 0) {
reg_t rd_num = insn.rd();
reg_t sew = P.VU.vsew;
switch(sew) {
case e8:
P.VU.elt<uint8_t>(rd_num, 0) = RS1;
break;
case e16:
P.VU.elt<uint16_t>(rd_num, 0) = RS1;
break;
case e32:
P.VU.elt<uint32_t>(rd_num, 0) = RS1;
break;
default:
P.VU.elt<uint64_t>(rd_num, 0) = RS1;
break;
}
const reg_t max_len = P.VU.VLEN / sew;
for (reg_t i = 1; i < max_len; ++i) {
switch(sew) {
case e8:
P.VU.elt<uint8_t>(rd_num, i) = 0;
break;
case e16:
P.VU.elt<uint16_t>(rd_num, i) = 0;
break;
case e32:
P.VU.elt<uint32_t>(rd_num, i) = 0;
break;
default:
P.VU.elt<uint64_t>(rd_num, i) = 0;
break;
}
}
vl = 0;
}

5
riscv/insns/vmv_v_i.h

@ -0,0 +1,5 @@
// vmv.v.i vd, simm5
VI_VVXI_MERGE_LOOP
({
vd = simm5;
})

5
riscv/insns/vmv_v_v.h

@ -0,0 +1,5 @@
// vvmv.v.v vd, vs1
VI_VVXI_MERGE_LOOP
({
vd = vs1;
})

5
riscv/insns/vmv_v_x.h

@ -0,0 +1,5 @@
// vmv.v.x vd, rs1
VI_VVXI_MERGE_LOOP
({
vd = rs1;
})

Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save