Browse Source

rvp: add packed instructions for rv32 - part2

Add the following instructions to rv32

 * packed add/sub register-pair instructions
 * packed cross register-pair instructions
 * packed absolute register-pair instructions
 * packed accumulation register-pair instructions
 * packed sign-extend and saturating
 * packed shift register-pair instructions
 * packed compare register-pair instructions
 * packed pack register-pair instructions
 * packed zip register-pair instructions
 * packed narrowing shift and clip register-pair
 * packed multiply register-pair instructions
 * packed accumulate register-pair instructions
 * packed multiply-add register-pair instructions
pull/2246/head
Jason 4 months ago
committed by Chih-Min Chao
parent
commit
2e2748b2e0
  1. 4
      riscv/decode_macros.h
  2. 3
      riscv/insns/addd.h
  3. 3
      riscv/insns/mqrwacc.h
  4. 3
      riscv/insns/mqwacc.h
  5. 5
      riscv/insns/nclip.h
  6. 5
      riscv/insns/nclipi.h
  7. 5
      riscv/insns/nclipiu.h
  8. 11
      riscv/insns/nclipr.h
  9. 11
      riscv/insns/nclipri.h
  10. 11
      riscv/insns/nclipriu.h
  11. 11
      riscv/insns/nclipru.h
  12. 5
      riscv/insns/nclipu.h
  13. 3
      riscv/insns/nsra.h
  14. 3
      riscv/insns/nsrai.h
  15. 11
      riscv/insns/nsrar.h
  16. 11
      riscv/insns/nsrari.h
  17. 3
      riscv/insns/nsrl.h
  18. 3
      riscv/insns/nsrli.h
  19. 4
      riscv/insns/paadd_db.h
  20. 4
      riscv/insns/paadd_dh.h
  21. 4
      riscv/insns/paadd_dw.h
  22. 4
      riscv/insns/paaddu_db.h
  23. 4
      riscv/insns/paaddu_dh.h
  24. 4
      riscv/insns/paaddu_dw.h
  25. 6
      riscv/insns/paas_dhx.h
  26. 5
      riscv/insns/pabd_db.h
  27. 5
      riscv/insns/pabd_dh.h
  28. 4
      riscv/insns/pabdu_db.h
  29. 4
      riscv/insns/pabdu_dh.h
  30. 4
      riscv/insns/padd_db.h
  31. 4
      riscv/insns/padd_dbs.h
  32. 4
      riscv/insns/padd_dh.h
  33. 4
      riscv/insns/padd_dhs.h
  34. 4
      riscv/insns/padd_dw.h
  35. 4
      riscv/insns/padd_dws.h
  36. 6
      riscv/insns/pas_dhx.h
  37. 6
      riscv/insns/pasa_dhx.h
  38. 4
      riscv/insns/pasub_db.h
  39. 4
      riscv/insns/pasub_dh.h
  40. 4
      riscv/insns/pasub_dw.h
  41. 4
      riscv/insns/pasubu_db.h
  42. 4
      riscv/insns/pasubu_dh.h
  43. 4
      riscv/insns/pasubu_dw.h
  44. 4
      riscv/insns/pm2wadd_h.h
  45. 4
      riscv/insns/pm2wadd_hx.h
  46. 4
      riscv/insns/pm2wadda_h.h
  47. 4
      riscv/insns/pm2wadda_hx.h
  48. 4
      riscv/insns/pm2waddasu_h.h
  49. 4
      riscv/insns/pm2waddau_h.h
  50. 4
      riscv/insns/pm2waddsu_h.h
  51. 4
      riscv/insns/pm2waddu_h.h
  52. 7
      riscv/insns/pm2wsub_h.h
  53. 7
      riscv/insns/pm2wsub_hx.h
  54. 7
      riscv/insns/pm2wsuba_h.h
  55. 7
      riscv/insns/pm2wsuba_hx.h
  56. 4
      riscv/insns/pmax_db.h
  57. 4
      riscv/insns/pmax_dh.h
  58. 4
      riscv/insns/pmax_dw.h
  59. 4
      riscv/insns/pmaxu_db.h
  60. 4
      riscv/insns/pmaxu_dh.h
  61. 4
      riscv/insns/pmaxu_dw.h
  62. 4
      riscv/insns/pmin_db.h
  63. 4
      riscv/insns/pmin_dh.h
  64. 4
      riscv/insns/pmin_dw.h
  65. 4
      riscv/insns/pminu_db.h
  66. 4
      riscv/insns/pminu_dh.h
  67. 4
      riscv/insns/pminu_dw.h
  68. 4
      riscv/insns/pmqrwacc_h.h
  69. 4
      riscv/insns/pmqwacc_h.h
  70. 4
      riscv/insns/pmseq_db.h
  71. 4
      riscv/insns/pmseq_dh.h
  72. 4
      riscv/insns/pmseq_dw.h
  73. 4
      riscv/insns/pmslt_db.h
  74. 4
      riscv/insns/pmslt_dh.h
  75. 4
      riscv/insns/pmslt_dw.h
  76. 4
      riscv/insns/pmsltu_db.h
  77. 4
      riscv/insns/pmsltu_dh.h
  78. 4
      riscv/insns/pmsltu_dw.h
  79. 4
      riscv/insns/pnclip_bs.h
  80. 4
      riscv/insns/pnclip_hs.h
  81. 4
      riscv/insns/pnclipi_b.h
  82. 4
      riscv/insns/pnclipi_h.h
  83. 4
      riscv/insns/pnclipiu_b.h
  84. 4
      riscv/insns/pnclipiu_h.h
  85. 13
      riscv/insns/pnclipr_bs.h
  86. 13
      riscv/insns/pnclipr_hs.h
  87. 13
      riscv/insns/pnclipri_b.h
  88. 13
      riscv/insns/pnclipri_h.h
  89. 13
      riscv/insns/pnclipriu_b.h
  90. 13
      riscv/insns/pnclipriu_h.h
  91. 13
      riscv/insns/pnclipru_bs.h
  92. 13
      riscv/insns/pnclipru_hs.h
  93. 4
      riscv/insns/pnclipu_bs.h
  94. 4
      riscv/insns/pnclipu_hs.h
  95. 4
      riscv/insns/pnsra_bs.h
  96. 4
      riscv/insns/pnsra_hs.h
  97. 4
      riscv/insns/pnsrai_b.h
  98. 4
      riscv/insns/pnsrai_h.h
  99. 12
      riscv/insns/pnsrar_bs.h
  100. 12
      riscv/insns/pnsrar_hs.h

4
riscv/decode_macros.h

@ -236,8 +236,8 @@ static inline bool is_aligned(const unsigned val, const unsigned pos)
#define zext(x, pos) (((reg_t)(x) << (64 - (pos))) >> (64 - (pos)))
#define sext_xlen(x) sext(x, xlen)
#define zext_xlen(x) zext(x, xlen)
#define sext_xlen_pair(x) sext(x, xlen * 2)
#define zext_xlen_pair(x) zext(x, xlen * 2)
#define sext_xlen_pair(x) (xlen == 32 ? sext(x, 64) : (sreg_t)(x))
#define zext_xlen_pair(x) (xlen == 32 ? zext(x, 64) : (reg_t)(x))
#define set_pc(x) \
do { if (unlikely((x) & ~p->pc_alignment_mask())) \

3
riscv/insns/addd.h

@ -0,0 +1,3 @@
require_extension('P');
require_rv32;
WRITE_P_RD_PAIR(P_RS1_PAIR + P_RS2_PAIR);

3
riscv/insns/mqrwacc.h

@ -0,0 +1,3 @@
require_extension('P');
require_rv32;
WRITE_P_RD_PAIR(P_RD_PAIR + (((sreg_t)RS1*(sreg_t)RS2 + 0x40000000) >> 31));

3
riscv/insns/mqwacc.h

@ -0,0 +1,3 @@
require_extension('P');
require_rv32;
WRITE_P_RD_PAIR(P_RD_PAIR + (((sreg_t)RS1*(sreg_t)RS2) >> 31));

5
riscv/insns/nclip.h

@ -0,0 +1,5 @@
require_extension('P');
require_rv32;
sreg_t tmp = (sreg_t)P_RS1_PAIR >> (RS2 & 0x3f);
int32_t result = P_SAT(32, tmp);
WRITE_RD(result);

5
riscv/insns/nclipi.h

@ -0,0 +1,5 @@
require_extension('P');
require_rv32;
sreg_t tmp = (sreg_t)P_RS1_PAIR >> insn.shamtd();
int32_t result = P_SAT(32, tmp);
WRITE_RD(result);

5
riscv/insns/nclipiu.h

@ -0,0 +1,5 @@
require_extension('P');
require_rv32;
reg_t tmp = (reg_t)P_RS1_PAIR >> insn.shamtd();
uint32_t result = P_USAT_FULL(32, (sreg_t)tmp);
WRITE_RD(result);

11
riscv/insns/nclipr.h

@ -0,0 +1,11 @@
require_extension('P');
require_rv32;
sreg_t val = (sreg_t)P_RS1_PAIR;
uint32_t shamt = RS2 & 0x3f;
sreg_t result;
if (shamt == 0) {
result = P_SAT(32, val);
} else {
result = P_SAT(32, (val >> shamt) + ((val >> (shamt - 1)) & 1));
}
WRITE_RD(result);

11
riscv/insns/nclipri.h

@ -0,0 +1,11 @@
require_extension('P');
require_rv32;
sreg_t val = (sreg_t)P_RS1_PAIR;
uint32_t shamt = insn.shamtd();
sreg_t result;
if (shamt == 0) {
result = P_SAT(32, val);
} else {
result = P_SAT(32, (val >> shamt) + ((val >> (shamt - 1)) & 1));
}
WRITE_RD(result);

11
riscv/insns/nclipriu.h

@ -0,0 +1,11 @@
require_extension('P');
require_rv32;
reg_t val = (reg_t)P_RS1_PAIR;
uint32_t shamt = insn.shamtd();
reg_t result;
if (shamt == 0) {
result = P_USAT_FULL(32, (sreg_t)val);
} else {
result = P_USAT_FULL(32, (sreg_t)((val >> shamt) + ((val >> (shamt - 1)) & 1)));
}
WRITE_RD(result);

11
riscv/insns/nclipru.h

@ -0,0 +1,11 @@
require_extension('P');
require_rv32;
reg_t val = (reg_t)P_RS1_PAIR;
uint32_t shamt = RS2 & 0x3f;
reg_t result;
if (shamt == 0) {
result = P_USAT_FULL(32, (sreg_t)val);
} else {
result = P_USAT_FULL(32, (sreg_t)((val >> shamt) + ((val >> (shamt - 1)) & 1)));
}
WRITE_RD(result);

5
riscv/insns/nclipu.h

@ -0,0 +1,5 @@
require_extension('P');
require_rv32;
reg_t tmp = (reg_t)P_RS1_PAIR >> (RS2 & 0x3f);
uint32_t result = P_USAT_FULL(32, (sreg_t)tmp);
WRITE_RD(result);

3
riscv/insns/nsra.h

@ -0,0 +1,3 @@
require_extension('P');
require_rv32;
WRITE_RD((sreg_t)P_RS1_PAIR >> (RS2 & 0x3f));

3
riscv/insns/nsrai.h

@ -0,0 +1,3 @@
require_extension('P');
require_rv32;
WRITE_RD((sreg_t)P_RS1_PAIR >> insn.shamtd());

11
riscv/insns/nsrar.h

@ -0,0 +1,11 @@
require_extension('P');
require_rv32;
sreg_t val = (sreg_t)P_RS1_PAIR;
uint32_t shamt = RS2 & 0x3f;
sreg_t result;
if (shamt == 0) {
result = val;
} else {
result = (val >> shamt) + ((val >> (shamt - 1)) & 1);
}
WRITE_RD(result);

11
riscv/insns/nsrari.h

@ -0,0 +1,11 @@
require_extension('P');
require_rv32;
sreg_t val = (sreg_t)P_RS1_PAIR;
uint32_t shamt = insn.shamtd();
sreg_t result;
if (shamt == 0) {
result = val;
} else {
result = (val >> shamt) + ((val >> (shamt - 1)) & 1);
}
WRITE_RD(result);

3
riscv/insns/nsrl.h

@ -0,0 +1,3 @@
require_extension('P');
require_rv32;
WRITE_RD(P_RS1_PAIR >> (RS2 & 0x3f));

3
riscv/insns/nsrli.h

@ -0,0 +1,3 @@
require_extension('P');
require_rv32;
WRITE_RD(P_RS1_PAIR >> insn.shamtd());

4
riscv/insns/paadd_db.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_RS2_DW_LOOP(8,8,8, {
p_rd = (p_rs1 + p_rs2) >> 1;
})

4
riscv/insns/paadd_dh.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_RS2_DW_LOOP(16,16,16, {
p_rd = (p_rs1 + p_rs2) >> 1;
})

4
riscv/insns/paadd_dw.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_RS2_DW_LOOP(32,32,32, {
p_rd = ((sreg_t)p_rs1 + (sreg_t)p_rs2) >> 1;
})

4
riscv/insns/paaddu_db.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_RS2_DW_ULOOP(8,8,8, {
p_rd = (p_rs1 + p_rs2) >> 1;
})

4
riscv/insns/paaddu_dh.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_RS2_DW_ULOOP(16,16,16, {
p_rd = (p_rs1 + p_rs2) >> 1;
})

4
riscv/insns/paaddu_dw.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_RS2_DW_ULOOP(32,32,32, {
p_rd = ((reg_t)p_rs1 + (reg_t)p_rs2) >> 1;
})

6
riscv/insns/paas_dhx.h

@ -0,0 +1,6 @@
require_rv32;
P_CROSS_DW_LOOP(16, {
p_rd = (p_rs1 + p_rs2) >> 1;
}, {
p_rd = (p_rs1 - p_rs2) >> 1;
})

5
riscv/insns/pabd_db.h

@ -0,0 +1,5 @@
require_rv32;
P_RD_RS1_RS2_DW_LOOP(8, 8, 8, {
p_rd = (p_rs1 < p_rs2) ? (int8_t)((uint8_t)p_rs2 - (uint8_t)p_rs1)
: (int8_t)((uint8_t)p_rs1 - (uint8_t)p_rs2);
})

5
riscv/insns/pabd_dh.h

@ -0,0 +1,5 @@
require_rv32;
P_RD_RS1_RS2_DW_LOOP(16, 16, 16, {
p_rd = (p_rs1 < p_rs2) ? (int16_t)((uint16_t)p_rs2 - (uint16_t)p_rs1)
: (int16_t)((uint16_t)p_rs1 - (uint16_t)p_rs2);
})

4
riscv/insns/pabdu_db.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_RS2_DW_ULOOP(8, 8, 8, {
p_rd = (p_rs1 < p_rs2) ? p_rs2 - p_rs1 : p_rs1 - p_rs2;
})

4
riscv/insns/pabdu_dh.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_RS2_DW_ULOOP(16, 16, 16, {
p_rd = (p_rs1 < p_rs2) ? p_rs2 - p_rs1 : p_rs1 - p_rs2;
})

4
riscv/insns/padd_db.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_RS2_DW_LOOP(8, 8, 8, {
p_rd = p_rs1 + p_rs2;
})

4
riscv/insns/padd_dbs.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_DW_LOOP(8, 8, {
p_rd = p_rs1 + P_FIELD(RS2, 0, 8);
})

4
riscv/insns/padd_dh.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_RS2_DW_LOOP(16, 16, 16, {
p_rd = p_rs1 + p_rs2;
})

4
riscv/insns/padd_dhs.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_DW_LOOP(16, 16, {
p_rd = p_rs1 + P_FIELD(RS2, 0, 16);
})

4
riscv/insns/padd_dw.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_RS2_DW_LOOP(32, 32, 32, {
p_rd = p_rs1 + p_rs2;
})

4
riscv/insns/padd_dws.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_DW_LOOP(32, 32, {
p_rd = p_rs1 + P_FIELD(RS2, 0, 32);
})

6
riscv/insns/pas_dhx.h

@ -0,0 +1,6 @@
require_rv32;
P_CROSS_DW_LOOP(16, {
p_rd = p_rs1 + p_rs2;
}, {
p_rd = p_rs1 - p_rs2;
})

6
riscv/insns/pasa_dhx.h

@ -0,0 +1,6 @@
require_rv32;
P_CROSS_DW_LOOP(16, {
p_rd = (p_rs1 - p_rs2) >> 1;
}, {
p_rd = (p_rs1 + p_rs2) >> 1;
})

4
riscv/insns/pasub_db.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_RS2_DW_LOOP(8,8,8, {
p_rd = (p_rs1 - p_rs2) >> 1;
})

4
riscv/insns/pasub_dh.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_RS2_DW_LOOP(16,16,16, {
p_rd = (p_rs1 - p_rs2) >> 1;
})

4
riscv/insns/pasub_dw.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_RS2_DW_LOOP(32,32,32, {
p_rd = ((sreg_t)p_rs1 - (sreg_t)p_rs2) >> 1;
})

4
riscv/insns/pasubu_db.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_RS2_DW_ULOOP(8,8,8, {
p_rd = (p_rs1 - p_rs2) >> 1;
})

4
riscv/insns/pasubu_dh.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_RS2_DW_ULOOP(16,16,16, {
p_rd = (p_rs1 - p_rs2) >> 1;
})

4
riscv/insns/pasubu_dw.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_RS2_DW_ULOOP(32,32,32, {
p_rd = ((reg_t)p_rs1 - (reg_t)p_rs2) >> 1;
})

4
riscv/insns/pm2wadd_h.h

@ -0,0 +1,4 @@
require_rv32;
P_WIDEN_REDUCTION_LOOP(32, 16, false, false, {
p_res += sext32(p_rs1) * sext32(p_rs2);
})

4
riscv/insns/pm2wadd_hx.h

@ -0,0 +1,4 @@
require_rv32;
P_WIDEN_REDUCTION_CROSS_LOOP(32, 16, false, false, {
p_res += p_rs1 * p_rs2;
})

4
riscv/insns/pm2wadda_h.h

@ -0,0 +1,4 @@
require_rv32;
P_WIDEN_REDUCTION_LOOP(32, 16, true, false, {
p_res += p_rs1 * p_rs2;
})

4
riscv/insns/pm2wadda_hx.h

@ -0,0 +1,4 @@
require_rv32;
P_WIDEN_REDUCTION_CROSS_LOOP(32, 16, true, false, {
p_res += p_rs1 * p_rs2;
})

4
riscv/insns/pm2waddasu_h.h

@ -0,0 +1,4 @@
require_rv32;
P_WIDEN_REDUCTION_SULOOP(32, 16, true, false, {
p_res += p_rs1 * p_rs2;
})

4
riscv/insns/pm2waddau_h.h

@ -0,0 +1,4 @@
require_rv32;
P_WIDEN_REDUCTION_ULOOP(32, 16, true, false, {
p_res += (uint32_t)p_rs1 * (uint32_t)p_rs2;
})

4
riscv/insns/pm2waddsu_h.h

@ -0,0 +1,4 @@
require_rv32;
P_WIDEN_REDUCTION_SULOOP(32, 16, false, false, {
p_res += p_rs1 * p_rs2;
})

4
riscv/insns/pm2waddu_h.h

@ -0,0 +1,4 @@
require_rv32;
P_WIDEN_REDUCTION_ULOOP(32, 16, false, false, {
p_res += (uint32_t)p_rs1 * (uint32_t)p_rs2;
})

7
riscv/insns/pm2wsub_h.h

@ -0,0 +1,7 @@
require_rv32;
P_WIDEN_REDUCTION_LOOP(32, 16, false, false, {
if (j & 1)
p_res += p_rs1 * p_rs2;
else
p_res -= p_rs1 * p_rs2;
})

7
riscv/insns/pm2wsub_hx.h

@ -0,0 +1,7 @@
require_rv32;
P_WIDEN_REDUCTION_CROSS_LOOP(32, 16, false, false, {
if (j & 1)
p_res += p_rs1 * p_rs2;
else
p_res -= p_rs1 * p_rs2;
})

7
riscv/insns/pm2wsuba_h.h

@ -0,0 +1,7 @@
require_rv32;
P_WIDEN_REDUCTION_LOOP(32, 16, true, false, {
if (j & 1)
p_res += p_rs1 * p_rs2;
else
p_res -= p_rs1 * p_rs2;
})

7
riscv/insns/pm2wsuba_hx.h

@ -0,0 +1,7 @@
require_rv32;
P_WIDEN_REDUCTION_CROSS_LOOP(32, 16, true, false, {
if (j & 1)
p_res += p_rs1 * p_rs2;
else
p_res -= p_rs1 * p_rs2;
})

4
riscv/insns/pmax_db.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_RS2_DW_LOOP(8, 8, 8, {
p_rd = (p_rs1 > p_rs2) ? p_rs1 : p_rs2;
})

4
riscv/insns/pmax_dh.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_RS2_DW_LOOP(16, 16, 16, {
p_rd = (p_rs1 > p_rs2) ? p_rs1 : p_rs2;
})

4
riscv/insns/pmax_dw.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_RS2_DW_LOOP(32, 32, 32, {
p_rd = (p_rs1 > p_rs2) ? p_rs1 : p_rs2;
})

4
riscv/insns/pmaxu_db.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_RS2_DW_ULOOP(8, 8, 8, {
p_rd = (p_rs1 > p_rs2) ? p_rs1 : p_rs2;
})

4
riscv/insns/pmaxu_dh.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_RS2_DW_ULOOP(16, 16, 16, {
p_rd = (p_rs1 > p_rs2) ? p_rs1 : p_rs2;
})

4
riscv/insns/pmaxu_dw.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_RS2_DW_ULOOP(32, 32, 32, {
p_rd = (p_rs1 > p_rs2) ? p_rs1 : p_rs2;
})

4
riscv/insns/pmin_db.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_RS2_DW_LOOP(8, 8, 8, {
p_rd = (p_rs1 < p_rs2) ? p_rs1 : p_rs2;
})

4
riscv/insns/pmin_dh.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_RS2_DW_LOOP(16, 16, 16, {
p_rd = (p_rs1 < p_rs2) ? p_rs1 : p_rs2;
})

4
riscv/insns/pmin_dw.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_RS2_DW_LOOP(32, 32, 32, {
p_rd = (p_rs1 < p_rs2) ? p_rs1 : p_rs2;
})

4
riscv/insns/pminu_db.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_RS2_DW_ULOOP(8, 8, 8, {
p_rd = (p_rs1 < p_rs2) ? p_rs1 : p_rs2;
})

4
riscv/insns/pminu_dh.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_RS2_DW_ULOOP(16, 16, 16, {
p_rd = (p_rs1 < p_rs2) ? p_rs1 : p_rs2;
})

4
riscv/insns/pminu_dw.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_RS2_DW_ULOOP(32, 32, 32, {
p_rd = (p_rs1 < p_rs2) ? p_rs1 : p_rs2;
})

4
riscv/insns/pmqrwacc_h.h

@ -0,0 +1,4 @@
require_rv32;
P_WIDEN_RD_RS1_RS2_LOOP(16, 16, {
p_rd += (p_rs1 * p_rs2 + 0x4000) >> 15;
})

4
riscv/insns/pmqwacc_h.h

@ -0,0 +1,4 @@
require_rv32;
P_WIDEN_RD_RS1_RS2_LOOP(16, 16, {
p_rd += (p_rs1 * p_rs2) >> 15;
})

4
riscv/insns/pmseq_db.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_RS2_DW_LOOP(8, 8, 8, {
p_rd = (p_rs1 == p_rs2) ? -1 : 0;
})

4
riscv/insns/pmseq_dh.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_RS2_DW_LOOP(16, 16, 16, {
p_rd = (p_rs1 == p_rs2) ? -1 : 0;
})

4
riscv/insns/pmseq_dw.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_RS2_DW_LOOP(32, 32, 32, {
p_rd = (p_rs1 == p_rs2) ? -1 : 0;
})

4
riscv/insns/pmslt_db.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_RS2_DW_LOOP(8, 8, 8, {
p_rd = (p_rs1 < p_rs2) ? -1 : 0;
})

4
riscv/insns/pmslt_dh.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_RS2_DW_LOOP(16, 16, 16, {
p_rd = (p_rs1 < p_rs2) ? -1 : 0;
})

4
riscv/insns/pmslt_dw.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_RS2_DW_LOOP(32, 32, 32, {
p_rd = (p_rs1 < p_rs2) ? -1 : 0;
})

4
riscv/insns/pmsltu_db.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_RS2_DW_ULOOP(8 ,8 ,8, {
p_rd = (p_rs1 < p_rs2) ? -1 : 0;
})

4
riscv/insns/pmsltu_dh.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_RS2_DW_ULOOP(16, 16, 16, {
p_rd = (p_rs1 < p_rs2) ? -1 : 0;
})

4
riscv/insns/pmsltu_dw.h

@ -0,0 +1,4 @@
require_rv32;
P_RD_RS1_RS2_DW_ULOOP(32, 32, 32, {
p_rd = (p_rs1 < p_rs2) ? -1 : 0;
})

4
riscv/insns/pnclip_bs.h

@ -0,0 +1,4 @@
require_rv32;
P_NARROW_RD_RS1_LOOP(8, 16, {
p_rd = P_SAT(8, p_rs1 >> (P_FIELD(RS2, 0, 8) & 0xF));
})

4
riscv/insns/pnclip_hs.h

@ -0,0 +1,4 @@
require_rv32;
P_NARROW_RD_RS1_LOOP(16, 32, {
p_rd = P_SAT(16, p_rs1 >> (P_FIELD(RS2, 0, 16) & 0X1F));
})

4
riscv/insns/pnclipi_b.h

@ -0,0 +1,4 @@
require_rv32;
P_NARROW_RD_RS1_LOOP(8, 16, {
p_rd = P_SAT(8, p_rs1 >> insn.shamth());
})

4
riscv/insns/pnclipi_h.h

@ -0,0 +1,4 @@
require_rv32;
P_NARROW_RD_RS1_LOOP(16, 32, {
p_rd = P_SAT(16, p_rs1 >> insn.shamtw());
})

4
riscv/insns/pnclipiu_b.h

@ -0,0 +1,4 @@
require_rv32;
P_NARROW_RD_RS1_ULOOP(8, 16, {
p_rd = P_USAT(8, p_rs1 >> insn.shamth());
})

4
riscv/insns/pnclipiu_h.h

@ -0,0 +1,4 @@
require_rv32;
P_NARROW_RD_RS1_ULOOP(16, 32, {
p_rd = P_USAT(16, (p_rs1 >> insn.shamtw()));
})

13
riscv/insns/pnclipr_bs.h

@ -0,0 +1,13 @@
require_rv32;
P_NARROW_RD_RS1_LOOP(8, 16, {
uint16_t shamt = P_FIELD(RS2, 0, 8) & 0xF;
sreg_t result;
if (shamt == 0) {
result = p_rs1;
} else {
sreg_t shifted = (sreg_t)p_rs1 >> shamt;
sreg_t roundbit = ((sreg_t)p_rs1 >> (shamt - 1)) & 1;
result = shifted + roundbit;
}
p_rd = P_SAT(8, result);
})

13
riscv/insns/pnclipr_hs.h

@ -0,0 +1,13 @@
require_rv32;
P_NARROW_RD_RS1_LOOP(16, 32, {
uint32_t shamt = P_FIELD(RS2, 0, 16) & 0x1F;
sreg_t result;
if (shamt == 0) {
result = p_rs1;
} else {
sreg_t shifted = (sreg_t)p_rs1 >> shamt;
sreg_t roundbit = ((sreg_t)p_rs1 >> (shamt - 1)) & 1;
result = shifted + roundbit;
}
p_rd = P_SAT(16, result);
})

13
riscv/insns/pnclipri_b.h

@ -0,0 +1,13 @@
require_rv32;
P_NARROW_RD_RS1_LOOP(8, 16, {
uint16_t shamt = insn.shamth();
sreg_t result;
if (shamt == 0) {
result = p_rs1;
} else {
sreg_t shifted = (sreg_t)p_rs1 >> shamt;
sreg_t roundbit = ((sreg_t)p_rs1 >> (shamt - 1)) & 1;
result = shifted + roundbit;
}
p_rd = P_SAT(8, result);
})

13
riscv/insns/pnclipri_h.h

@ -0,0 +1,13 @@
require_rv32;
P_NARROW_RD_RS1_LOOP(16, 32, {
uint32_t shamt = insn.shamtw();
sreg_t result;
if (shamt == 0) {
result = p_rs1;
} else {
sreg_t shifted = (sreg_t)p_rs1 >> shamt;
sreg_t roundbit = ((sreg_t)p_rs1 >> (shamt - 1)) & 1;
result = shifted + roundbit;
}
p_rd = P_SAT(16, result);
})

13
riscv/insns/pnclipriu_b.h

@ -0,0 +1,13 @@
require_rv32;
P_NARROW_RD_RS1_ULOOP(8, 16, {
uint32_t shamt = insn.shamth();
uint32_t result;
if (shamt == 0) {
result = p_rs1;
} else {
uint32_t shifted = p_rs1 >> shamt;
uint32_t roundbit = (p_rs1 >> (shamt - 1)) & 1;
result = shifted + roundbit;
}
p_rd = P_USAT_FULL(8, (sreg_t)result);
})

13
riscv/insns/pnclipriu_h.h

@ -0,0 +1,13 @@
require_rv32;
P_NARROW_RD_RS1_ULOOP(16, 32, {
uint32_t shamt = insn.shamtw();
uint32_t result;
if (shamt == 0) {
result = p_rs1;
} else {
uint32_t shifted = p_rs1 >> shamt;
uint32_t roundbit = (p_rs1 >> (shamt - 1)) & 1;
result = shifted + roundbit;
}
p_rd = P_USAT_FULL(16, (sreg_t)result);
})

13
riscv/insns/pnclipru_bs.h

@ -0,0 +1,13 @@
require_rv32;
P_NARROW_RD_RS1_ULOOP(8, 16, {
uint32_t shamt = P_UFIELD(RS2, 0, 8) & 0xF;
uint32_t result;
if (shamt == 0) {
result = p_rs1;
} else {
uint32_t shifted = p_rs1 >> shamt;
uint32_t roundbit = (p_rs1 >> (shamt - 1)) & 1;
result = shifted + roundbit;
}
p_rd = P_USAT_FULL(8, (sreg_t)result);
})

13
riscv/insns/pnclipru_hs.h

@ -0,0 +1,13 @@
require_rv32;
P_NARROW_RD_RS1_ULOOP(16, 32, {
uint32_t shamt = P_UFIELD(RS2, 0, 16) & 0x1F;
uint32_t result;
if (shamt == 0) {
result = p_rs1;
} else {
uint32_t shifted = p_rs1 >> shamt;
uint32_t roundbit = (p_rs1 >> (shamt - 1)) & 1;
result = shifted + roundbit;
}
p_rd = P_USAT_FULL(16, (sreg_t)result);
})

4
riscv/insns/pnclipu_bs.h

@ -0,0 +1,4 @@
require_rv32;
P_NARROW_RD_RS1_ULOOP(8, 16, {
p_rd = P_USAT_FULL(8, (sreg_t)(p_rs1 >> (P_UFIELD(RS2, 0, 8) & 0xF)));
})

4
riscv/insns/pnclipu_hs.h

@ -0,0 +1,4 @@
require_rv32;
P_NARROW_RD_RS1_ULOOP(16, 32, {
p_rd = P_USAT_FULL(16, (sreg_t)(p_rs1 >> (P_UFIELD(RS2, 0, 16) & 0X1F)));
})

4
riscv/insns/pnsra_bs.h

@ -0,0 +1,4 @@
require_rv32;
P_NARROW_RD_RS1_LOOP(8, 16, {
p_rd = p_rs1 >> (P_FIELD(RS2, 0, 8) & 0xF);
})

4
riscv/insns/pnsra_hs.h

@ -0,0 +1,4 @@
require_rv32;
P_NARROW_RD_RS1_LOOP(16, 32, {
p_rd = p_rs1 >> (P_FIELD(RS2, 0, 16) & 0x1F);
})

4
riscv/insns/pnsrai_b.h

@ -0,0 +1,4 @@
require_rv32;
P_NARROW_RD_RS1_LOOP(8, 16, {
p_rd = p_rs1 >> insn.shamth();
})

4
riscv/insns/pnsrai_h.h

@ -0,0 +1,4 @@
require_rv32;
P_NARROW_RD_RS1_LOOP(16, 32, {
p_rd = p_rs1 >> insn.shamtw();
})

12
riscv/insns/pnsrar_bs.h

@ -0,0 +1,12 @@
require_rv32;
P_NARROW_RD_RS1_LOOP(8, 16, {
uint32_t shamt = P_FIELD(RS2, 0, 8) & 0xF;
if (shamt != 0) {
sreg_t val = (sreg_t)p_rs1;
sreg_t shifted = val >> shamt;
sreg_t roundbit = (val >> (shamt - 1)) & 1;
p_rd = (shifted + roundbit) & 0xFF;
} else {
p_rd = p_rs1 & 0xFF;
}
})

12
riscv/insns/pnsrar_hs.h

@ -0,0 +1,12 @@
require_rv32;
P_NARROW_RD_RS1_LOOP(16, 32, {
uint32_t shamt = P_FIELD(RS2, 0, 16) & 0x1F;
if (shamt != 0) {
sreg_t val = (sreg_t)p_rs1;
sreg_t shifted = val >> shamt;
sreg_t roundbit = (val >> (shamt - 1)) & 1;
p_rd = (shifted + roundbit) & 0xFFFF;
} else {
p_rd = p_rs1 & 0xFFFF;
}
})

Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save