Browse Source

rvp: add packed instructions for rv64 - part1

Add the following categories

  * RD-only element-wise instructions (_w suffix
  * packed add/sub instructions (_w suffix only)
  * packed cross instructions (_wx suffix only)
  * packed accumulation instructions (_ws suffix
  * packed sign-extend and saturating instructions
  * packed shift instructions (_w/_ws suffix and
  * packed compare instructions (_w suffix only)
  * packed pack instructions (_h/_w suffix only)
  * rev_rv32/rev/rev16
pull/2246/head
Jason 4 months ago
committed by Chih-Min Chao
parent
commit
71b7473b5f
  1. 5
      riscv/insns/paadd_w.h
  2. 5
      riscv/insns/paaddu_w.h
  3. 7
      riscv/insns/paas_wx.h
  4. 5
      riscv/insns/padd_w.h
  5. 5
      riscv/insns/padd_ws.h
  6. 7
      riscv/insns/pas_wx.h
  7. 7
      riscv/insns/pasa_wx.h
  8. 5
      riscv/insns/pasub_w.h
  9. 5
      riscv/insns/pasubu_w.h
  10. 5
      riscv/insns/pli_w.h
  11. 5
      riscv/insns/plui_w.h
  12. 5
      riscv/insns/pmax_w.h
  13. 5
      riscv/insns/pmaxu_w.h
  14. 5
      riscv/insns/pmin_w.h
  15. 5
      riscv/insns/pminu_w.h
  16. 5
      riscv/insns/pmseq_w.h
  17. 5
      riscv/insns/pmslt_w.h
  18. 5
      riscv/insns/pmsltu_w.h
  19. 1
      riscv/insns/ppaire_h.h
  20. 2
      riscv/insns/ppaireo_w.h
  21. 2
      riscv/insns/ppairo_w.h
  22. 2
      riscv/insns/ppairoe_w.h
  23. 6
      riscv/insns/predsum_ws.h
  24. 6
      riscv/insns/predsumu_ws.h
  25. 7
      riscv/insns/psa_wx.h
  26. 6
      riscv/insns/psadd_w.h
  27. 6
      riscv/insns/psaddu_w.h
  28. 9
      riscv/insns/psas_wx.h
  29. 5
      riscv/insns/psati_w.h
  30. 5
      riscv/insns/psext_w_b.h
  31. 5
      riscv/insns/psext_w_h.h
  32. 5
      riscv/insns/psh1add_w.h
  33. 5
      riscv/insns/psll_ws.h
  34. 5
      riscv/insns/pslli_w.h
  35. 5
      riscv/insns/psra_ws.h
  36. 5
      riscv/insns/psrai_w.h
  37. 5
      riscv/insns/psrari_w.h
  38. 5
      riscv/insns/psrl_ws.h
  39. 5
      riscv/insns/psrli_w.h
  40. 9
      riscv/insns/pssa_wx.h
  41. 5
      riscv/insns/pssh1sadd_w.h
  42. 13
      riscv/insns/pssha_ws.h
  43. 13
      riscv/insns/psshar_ws.h
  44. 5
      riscv/insns/psslai_w.h
  45. 6
      riscv/insns/pssub_w.h
  46. 6
      riscv/insns/pssubu_w.h
  47. 5
      riscv/insns/psub_w.h
  48. 5
      riscv/insns/pusati_w.h
  49. 3
      riscv/insns/rev.h
  50. 4
      riscv/insns/rev16.h
  51. 3
      riscv/insns/rev_rv32.h
  52. 11
      riscv/insns/sha.h
  53. 11
      riscv/insns/shar.h
  54. 20
      riscv/overlap_list.h
  55. 53
      riscv/riscv.mk.in

5
riscv/insns/paadd_w.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_RS2_LOOP(32,32,32, {
p_rd = ((int64_t)p_rs1 + p_rs2) >> 1;
}
)

5
riscv/insns/paaddu_w.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_RS2_ULOOP(32,32,32, {
p_rd = ((uint64_t)p_rs1 + p_rs2) >> 1;
}
)

7
riscv/insns/paas_wx.h

@ -0,0 +1,7 @@
require_rv64;
P_CROSS_LOOP(32, {
p_rd = ((int64_t)p_rs1 + p_rs2) >> 1;
}, {
p_rd = ((int64_t)p_rs1 - p_rs2) >> 1;
}
)

5
riscv/insns/padd_w.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_RS2_LOOP(32, 32, 32, {
p_rd = p_rs1 + p_rs2;
}
)

5
riscv/insns/padd_ws.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_LOOP(32, 32, {
p_rd = p_rs1 + P_FIELD(RS2, 0, 32);
}
)

7
riscv/insns/pas_wx.h

@ -0,0 +1,7 @@
require_rv64;
P_CROSS_LOOP(32, {
p_rd = p_rs1 + p_rs2;
}, {
p_rd = p_rs1 - p_rs2;
}
)

7
riscv/insns/pasa_wx.h

@ -0,0 +1,7 @@
require_rv64;
P_CROSS_LOOP(32, {
p_rd = ((uint64_t)p_rs1 - p_rs2) >> 1;
}, {
p_rd = ((uint64_t)p_rs1 + p_rs2) >> 1;
}
)

5
riscv/insns/pasub_w.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_RS2_LOOP(32,32,32, {
p_rd = ((int64_t)p_rs1 - p_rs2) >> 1;
}
)

5
riscv/insns/pasubu_w.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_RS2_ULOOP(32,32,32, {
p_rd = ((uint64_t)p_rs1 - p_rs2) >> 1;
}
)

5
riscv/insns/pli_w.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_LOOP(32, {
p_rd = (insn.p_imm10csl() & 0x200) ? (0xfffffc00 | insn.p_imm10csl()) : insn.p_imm10csl();
}
)

5
riscv/insns/plui_w.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_LOOP(32, {
p_rd = insn.p_imm10csrw();
}
)

5
riscv/insns/pmax_w.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_RS2_LOOP(32,32,32, {
p_rd = (p_rs1 > p_rs2) ? p_rs1 : p_rs2;
}
)

5
riscv/insns/pmaxu_w.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_RS2_ULOOP(32,32,32, {
p_rd = (p_rs1 > p_rs2) ? p_rs1 : p_rs2;
}
)

5
riscv/insns/pmin_w.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_RS2_LOOP(32,32,32, {
p_rd = (p_rs1 < p_rs2) ? p_rs1 : p_rs2;
}
)

5
riscv/insns/pminu_w.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_RS2_ULOOP(32,32,32, {
p_rd = (p_rs1 < p_rs2) ? p_rs1 : p_rs2;
}
)

5
riscv/insns/pmseq_w.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_RS2_LOOP(32,32,32, {
p_rd = (p_rs1 == p_rs2) ? -1 : 0;
}
)

5
riscv/insns/pmslt_w.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_RS2_LOOP(32,32,32, {
p_rd = (p_rs1 < p_rs2) ? -1 : 0;
}
)

5
riscv/insns/pmsltu_w.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_RS2_ULOOP(32,32,32, {
p_rd = (p_rs1 < p_rs2) ? -1 : 0;
}
)

1
riscv/insns/ppaire_h.h

@ -0,0 +1 @@
P_PACK(16, 0, 0);

2
riscv/insns/ppaireo_w.h

@ -0,0 +1,2 @@
require_rv64;
P_PACK(32, 0, 1);

2
riscv/insns/ppairo_w.h

@ -0,0 +1,2 @@
require_rv64;
P_PACK(32, 1, 1);

2
riscv/insns/ppairoe_w.h

@ -0,0 +1,2 @@
require_rv64;
P_PACK(32, 1, 0);

6
riscv/insns/predsum_ws.h

@ -0,0 +1,6 @@
require_rv64;
reg_t rd_tmp = RS2; \
P_RS1_LOOP_BASE(32)
P_RS1_PARAMS(32)
rd_tmp += sext_xlen(p_rs1);
P_RD_LOOP_END()

6
riscv/insns/predsumu_ws.h

@ -0,0 +1,6 @@
require_rv64;
reg_t rd_tmp = RS2; \
P_RS1_LOOP_BASE(32)
P_RS1_UPARAMS(32)
rd_tmp += zext_xlen(p_rs1);
P_RD_LOOP_END()

7
riscv/insns/psa_wx.h

@ -0,0 +1,7 @@
require_rv64;
P_CROSS_LOOP(32, {
p_rd = p_rs1 - p_rs2;
}, {
p_rd = p_rs1 + p_rs2;
}
)

6
riscv/insns/psadd_w.h

@ -0,0 +1,6 @@
require_rv64;
P_RD_RS1_RS2_LOOP(32,32,32, {
bool sat = false;
p_rd = (sat_add<int32_t, uint32_t>(p_rs1, p_rs2, sat));
}
)

6
riscv/insns/psaddu_w.h

@ -0,0 +1,6 @@
require_rv64;
P_RD_RS1_RS2_ULOOP(32,32,32, {
bool sat = false;
p_rd = (sat_addu<uint32_t>(p_rs1, p_rs2, sat));
}
)

9
riscv/insns/psas_wx.h

@ -0,0 +1,9 @@
require_rv64;
P_CROSS_ULOOP(32, {
bool sat = false;
p_rd = (sat_add<int32_t, uint32_t>(p_rs1, p_rs2, sat));
}, {
bool sat = false;
p_rd = (sat_sub<int32_t, uint32_t>(p_rs1, p_rs2, sat));
}
)

5
riscv/insns/psati_w.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_LOOP(32, 32, {
p_rd = P_SAT(insn.shamtw() + 1, p_rs1);
}
)

5
riscv/insns/psext_w_b.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_LOOP(32, 32, {
p_rd = (int32_t)(int8_t)p_rs1;
}
)

5
riscv/insns/psext_w_h.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_LOOP(32, 32, {
p_rd = (int32_t)(int16_t)p_rs1;
}
)

5
riscv/insns/psh1add_w.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_RS2_LOOP(32, 32, 32, {
p_rd = (p_rs1 << 1) + p_rs2;
}
)

5
riscv/insns/psll_ws.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_LOOP(32, 32, {
p_rd = p_rs1 << (RS2 & (32 - 1));
}
)

5
riscv/insns/pslli_w.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_LOOP(32, 32, {
p_rd = p_rs1 << insn.shamtw();
}
)

5
riscv/insns/psra_ws.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_LOOP(32, 32, {
p_rd = p_rs1 >> (RS2 & (32 - 1));
}
)

5
riscv/insns/psrai_w.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_LOOP(32, 32, {
p_rd = p_rs1 >> insn.shamtw();
}
)

5
riscv/insns/psrari_w.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_LOOP(32, 32, {
p_rd = insn.shamtw() ? ((p_rs1 >> insn.shamtw()) + ((p_rs1 >> (insn.shamtw() - 1)) & 1)) : p_rs1;
}
)

5
riscv/insns/psrl_ws.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_ULOOP(32, 32, {
p_rd = p_rs1 >> (RS2 & (32 - 1));
}
)

5
riscv/insns/psrli_w.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_ULOOP(32, 32, {
p_rd = p_rs1 >> insn.shamtw();
}
)

9
riscv/insns/pssa_wx.h

@ -0,0 +1,9 @@
require_rv64;
P_CROSS_ULOOP(32, {
bool sat = false;
p_rd = (sat_sub<int32_t, uint32_t>(p_rs1, p_rs2, sat));
}, {
bool sat = false;
p_rd = (sat_add<int32_t, uint32_t>(p_rs1, p_rs2, sat));
}
)

5
riscv/insns/pssh1sadd_w.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_RS2_LOOP(32, 32, 32, {
p_rd = P_SAT(32, P_SAT(32, p_rs1 << 1) + p_rs2);
}
)

13
riscv/insns/pssha_ws.h

@ -0,0 +1,13 @@
require_rv64;
sreg_t sshamt = P_FIELD(RS2, 0, 8);
P_RD_RS1_LOOP(32, 32, {
if (p_rs1 == 0)
p_rd = 0;
else if (sshamt >= 32)
p_rd = (p_rs1 & 0x80000000) ? 0x80000000 : 0x7fffffff;
else if (sshamt <= -32)
p_rd = (p_rs1 & 0x80000000) ? 0xffffffff : 0;
else
p_rd = sshamt >= 0 ? P_SAT(32, sext32(p_rs1) << sshamt) : (p_rs1 >> -sshamt);
}
)

13
riscv/insns/psshar_ws.h

@ -0,0 +1,13 @@
require_rv64;
sreg_t sshamt = P_FIELD(RS2, 0, 8);
P_RD_RS1_LOOP(32, 32, {
if (p_rs1 == 0)
p_rd = 0;
else if (sshamt >= 32)
p_rd = (p_rs1 & 0x80000000) ? 0x80000000 : 0x7fffffff;
else if (sshamt <= -32)
p_rd = 0;
else
p_rd = sshamt >= 0 ? P_SAT(32, sext32(p_rs1) << sshamt) : ((p_rs1 >> -sshamt) + ((p_rs1 >> (-sshamt - 1)) & 1));
}
)

5
riscv/insns/psslai_w.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_LOOP(32, 32, {
p_rd = P_SAT(32, sext32(p_rs1) << insn.shamtw());
}
)

6
riscv/insns/pssub_w.h

@ -0,0 +1,6 @@
require_rv64;
P_RD_RS1_RS2_LOOP(32,32,32, {
bool sat = false;
p_rd = (sat_sub<int32_t, uint32_t>(p_rs1, p_rs2, sat));
}
)

6
riscv/insns/pssubu_w.h

@ -0,0 +1,6 @@
require_rv64;
P_RD_RS1_RS2_ULOOP(32,32,32, {
bool sat = false;
p_rd = (sat_subu<uint32_t>(p_rs1, p_rs2, sat));
}
)

5
riscv/insns/psub_w.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_RS2_LOOP(32, 32, 32, {
p_rd = p_rs1 - p_rs2;
}
)

5
riscv/insns/pusati_w.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_LOOP(32, 32, {
p_rd = P_USAT(insn.shamtw() + 1, p_rs1);
}
)

3
riscv/insns/rev.h

@ -0,0 +1,3 @@
require_extension('P');
#include "grevi.h"

4
riscv/insns/rev16.h

@ -0,0 +1,4 @@
require_rv64;
require_extension('P');
#include "grevi.h"

3
riscv/insns/rev_rv32.h

@ -0,0 +1,3 @@
require_extension('P');
#include "grevi.h"

11
riscv/insns/sha.h

@ -0,0 +1,11 @@
require_extension('P');
require_rv64;
sreg_t sshamt = P_FIELD(RS2, 0, 8);
if (RS1 == 0)
WRITE_RD(0);
else if (sshamt >= 64)
WRITE_RD(0);
else if (sshamt <= -64)
WRITE_RD((RS1 & 0x8000000000000000) ? 0xffffffffffffffff : 0);
else
WRITE_RD(sshamt >= 0 ? (RS1 << sshamt) : ((sreg_t)RS1 >> -sshamt));

11
riscv/insns/shar.h

@ -0,0 +1,11 @@
require_extension('P');
require_rv64;
sreg_t sshamt = P_FIELD(RS2, 0, 8);
if (RS1 == 0)
WRITE_RD(0);
else if (sshamt >= 64)
WRITE_RD(0);
else if (sshamt <= -64)
WRITE_RD(0);
else
WRITE_RD(sshamt >= 0 ? (RS1 << sshamt) : (((sreg_t)RS1 >> -sshamt) + ((RS1 >> (-sshamt - 1)) & 1)));

20
riscv/overlap_list.h

@ -32,3 +32,23 @@ DECLARE_OVERLAP_INSN(sspopchk_x5, EXT_ZICFISS)
DECLARE_OVERLAP_INSN(c_sspush_x1, EXT_ZICFISS) DECLARE_OVERLAP_INSN(c_sspush_x1, EXT_ZICFISS)
DECLARE_OVERLAP_INSN(c_sspopchk_x5, EXT_ZICFISS) DECLARE_OVERLAP_INSN(c_sspopchk_x5, EXT_ZICFISS)
DECLARE_OVERLAP_INSN(c_mop_N, EXT_ZCMOP) DECLARE_OVERLAP_INSN(c_mop_N, EXT_ZCMOP)
// rv64p overlap rv32p
DECLARE_OVERLAP_INSN(paadd_w, 'P')
DECLARE_OVERLAP_INSN(paaddu_w, 'P')
DECLARE_OVERLAP_INSN(pasub_w, 'P')
DECLARE_OVERLAP_INSN(pasubu_w, 'P')
DECLARE_OVERLAP_INSN(psadd_w, 'P')
DECLARE_OVERLAP_INSN(psaddu_w, 'P')
DECLARE_OVERLAP_INSN(pssh1sadd_w, 'P')
DECLARE_OVERLAP_INSN(pssub_w, 'P')
DECLARE_OVERLAP_INSN(pssubu_w, 'P')
DECLARE_OVERLAP_INSN(psati_w, 'P')
DECLARE_OVERLAP_INSN(pusati_w, 'P')
DECLARE_OVERLAP_INSN(psrari_w, 'P')
DECLARE_OVERLAP_INSN(pssha_ws, 'P')
DECLARE_OVERLAP_INSN(psshar_ws, 'P')
DECLARE_OVERLAP_INSN(psslai_w, 'P')
DECLARE_OVERLAP_INSN(pmseq_w, 'P')
DECLARE_OVERLAP_INSN(pmslt_w, 'P')
DECLARE_OVERLAP_INSN(pmsltu_w, 'P')

53
riscv/riscv.mk.in

@ -1587,6 +1587,59 @@ riscv_insn_ext_p = \
pm2wsuba_h \ pm2wsuba_h \
pm2wsub_hx \ pm2wsub_hx \
pm2wsuba_hx \ pm2wsuba_hx \
pli_w \
plui_w \
paadd_w \
paaddu_w \
padd_w \
padd_ws \
pasub_w \
pasubu_w \
psadd_w \
psaddu_w \
psh1add_w \
pssh1sadd_w \
pssub_w \
pssubu_w \
psub_w \
paas_wx \
pas_wx \
pasa_wx \
psa_wx \
psas_wx \
pssa_wx \
predsum_ws \
predsumu_ws \
psati_w \
psext_w_b \
psext_w_h \
pusati_w \
psll_ws \
pslli_w \
psra_ws \
psrai_w \
psrari_w \
psrl_ws \
psrli_w \
pssha_ws \
psshar_ws \
psslai_w \
sha \
shar \
pmax_w \
pmaxu_w \
pmin_w \
pminu_w \
pmseq_w \
pmslt_w \
pmsltu_w \
ppaire_h \
ppaireo_w \
ppairo_w \
ppairoe_w \
rev \
rev16 \
rev_rv32 \
riscv_insn_list = \ riscv_insn_list = \
$(riscv_insn_ext_i) \ $(riscv_insn_ext_i) \

Loading…
Cancel
Save