Browse Source

rvp: add packed instructions for rv64 - part2

Add the following categories

  * packed zip and unzip instructions
  * packed basic arithmetic and data-move
  * packed multiply instructions (_w suffix only)
  * packed accumulate instructions (_w suffix
  * packed multiply-add instructions (_w/_h suffix
  * packed sati/usati/srari for rv64

Signed-off-by: Chih-Min Chao <chihmin.chao@sifive.com>
pull/2246/head
Jason 4 months ago
committed by Chih-Min Chao
parent
commit
d4a87e9caa
  1. 5
      riscv/insns/absw.h
  2. 11
      riscv/insns/clsw.h
  3. 3
      riscv/insns/macc_w00.h
  4. 3
      riscv/insns/macc_w01.h
  5. 3
      riscv/insns/macc_w11.h
  6. 3
      riscv/insns/maccsu_w00.h
  7. 3
      riscv/insns/maccsu_w11.h
  8. 3
      riscv/insns/maccu_w00.h
  9. 3
      riscv/insns/maccu_w01.h
  10. 3
      riscv/insns/maccu_w11.h
  11. 3
      riscv/insns/mqacc_w00.h
  12. 3
      riscv/insns/mqacc_w01.h
  13. 3
      riscv/insns/mqacc_w11.h
  14. 3
      riscv/insns/mqracc_w00.h
  15. 3
      riscv/insns/mqracc_w01.h
  16. 3
      riscv/insns/mqracc_w11.h
  17. 3
      riscv/insns/mul_w00.h
  18. 3
      riscv/insns/mul_w01.h
  19. 3
      riscv/insns/mul_w11.h
  20. 3
      riscv/insns/mulsu_w00.h
  21. 3
      riscv/insns/mulsu_w11.h
  22. 3
      riscv/insns/mulu_w00.h
  23. 3
      riscv/insns/mulu_w01.h
  24. 3
      riscv/insns/mulu_w11.h
  25. 5
      riscv/insns/pm2add_w.h
  26. 5
      riscv/insns/pm2add_wx.h
  27. 5
      riscv/insns/pm2adda_w.h
  28. 5
      riscv/insns/pm2adda_wx.h
  29. 5
      riscv/insns/pm2addasu_w.h
  30. 5
      riscv/insns/pm2addau_w.h
  31. 5
      riscv/insns/pm2addsu_w.h
  32. 5
      riscv/insns/pm2addu_w.h
  33. 8
      riscv/insns/pm2sub_w.h
  34. 8
      riscv/insns/pm2sub_wx.h
  35. 8
      riscv/insns/pm2suba_w.h
  36. 8
      riscv/insns/pm2suba_wx.h
  37. 5
      riscv/insns/pm4add_h.h
  38. 5
      riscv/insns/pm4adda_h.h
  39. 5
      riscv/insns/pm4addasu_h.h
  40. 5
      riscv/insns/pm4addau_h.h
  41. 5
      riscv/insns/pm4addsu_h.h
  42. 5
      riscv/insns/pm4addu_h.h
  43. 5
      riscv/insns/pmacc_w_h00.h
  44. 5
      riscv/insns/pmacc_w_h01.h
  45. 5
      riscv/insns/pmacc_w_h11.h
  46. 5
      riscv/insns/pmaccsu_w_h00.h
  47. 5
      riscv/insns/pmaccsu_w_h11.h
  48. 5
      riscv/insns/pmaccu_w_h00.h
  49. 5
      riscv/insns/pmaccu_w_h01.h
  50. 5
      riscv/insns/pmaccu_w_h11.h
  51. 6
      riscv/insns/pmhacc_w.h
  52. 6
      riscv/insns/pmhacc_w_h0.h
  53. 6
      riscv/insns/pmhacc_w_h1.h
  54. 6
      riscv/insns/pmhaccsu_w.h
  55. 6
      riscv/insns/pmhaccsu_w_h0.h
  56. 6
      riscv/insns/pmhaccsu_w_h1.h
  57. 6
      riscv/insns/pmhaccu_w.h
  58. 7
      riscv/insns/pmhracc_w.h
  59. 7
      riscv/insns/pmhraccsu_w.h
  60. 7
      riscv/insns/pmhraccu_w.h
  61. 5
      riscv/insns/pmq2add_w.h
  62. 5
      riscv/insns/pmq2adda_w.h
  63. 5
      riscv/insns/pmqacc_w_h00.h
  64. 5
      riscv/insns/pmqacc_w_h01.h
  65. 5
      riscv/insns/pmqacc_w_h11.h
  66. 5
      riscv/insns/pmqr2add_w.h
  67. 5
      riscv/insns/pmqr2adda_w.h
  68. 5
      riscv/insns/pmqracc_w_h00.h
  69. 5
      riscv/insns/pmqracc_w_h01.h
  70. 5
      riscv/insns/pmqracc_w_h11.h
  71. 5
      riscv/insns/pmul_w_h00.h
  72. 5
      riscv/insns/pmul_w_h01.h
  73. 5
      riscv/insns/pmul_w_h11.h
  74. 6
      riscv/insns/pmulh_w.h
  75. 6
      riscv/insns/pmulh_w_h0.h
  76. 6
      riscv/insns/pmulh_w_h1.h
  77. 6
      riscv/insns/pmulhr_w.h
  78. 6
      riscv/insns/pmulhrsu_w.h
  79. 6
      riscv/insns/pmulhru_w.h
  80. 6
      riscv/insns/pmulhsu_w.h
  81. 6
      riscv/insns/pmulhsu_w_h0.h
  82. 6
      riscv/insns/pmulhsu_w_h1.h
  83. 6
      riscv/insns/pmulhu_w.h
  84. 9
      riscv/insns/pmulq_w.h
  85. 9
      riscv/insns/pmulqr_w.h
  86. 5
      riscv/insns/pmulsu_w_h00.h
  87. 5
      riscv/insns/pmulsu_w_h11.h
  88. 5
      riscv/insns/pmulu_w_h00.h
  89. 5
      riscv/insns/pmulu_w_h01.h
  90. 5
      riscv/insns/pmulu_w_h11.h
  91. 3
      riscv/insns/sati.h
  92. 3
      riscv/insns/srari.h
  93. 1
      riscv/insns/unzip16hp.h
  94. 1
      riscv/insns/unzip16p.h
  95. 1
      riscv/insns/unzip8hp.h
  96. 1
      riscv/insns/unzip8p.h
  97. 3
      riscv/insns/usati.h
  98. 6
      riscv/insns/zip16hp.h
  99. 6
      riscv/insns/zip16p.h
  100. 5
      riscv/insns/zip8hp.h

5
riscv/insns/absw.h

@ -0,0 +1,5 @@
require_rv64;
require_extension('P');
reg_t s1_w = sext32(RS1);
reg_t result = (int32_t(s1_w) < 0) ? -s1_w : s1_w;
WRITE_RD(sext_xlen(result));

11
riscv/insns/clsw.h

@ -0,0 +1,11 @@
require_rv64;
require_extension('P');
reg_t x = 32 - 1;
reg_t msb = (RS1 >> (32 - 1)) & 1;
for (int i = 0; i < 32 - 1; i++) {
if (msb != ((RS1 >> (32 - i - 2)) & 1)) {
x = i;
break;
}
}
WRITE_RD(sext_xlen(x));

3
riscv/insns/macc_w00.h

@ -0,0 +1,3 @@
require_extension('P');
require_rv64;
WRITE_RD(RD + sext32(RS1) * sext32(RS2));

3
riscv/insns/macc_w01.h

@ -0,0 +1,3 @@
require_extension('P');
require_rv64;
WRITE_RD(RD + sext32(RS1) * sext32(RS2 >> 32));

3
riscv/insns/macc_w11.h

@ -0,0 +1,3 @@
require_extension('P');
require_rv64;
WRITE_RD(RD + sext32(RS1 >> 32) * sext32(RS2 >> 32));

3
riscv/insns/maccsu_w00.h

@ -0,0 +1,3 @@
require_extension('P');
require_rv64;
WRITE_RD(RD + sext32(RS1) * zext32(RS2));

3
riscv/insns/maccsu_w11.h

@ -0,0 +1,3 @@
require_extension('P');
require_rv64;
WRITE_RD(RD + sext32(RS1 >> 32) * zext32(RS2 >> 32));

3
riscv/insns/maccu_w00.h

@ -0,0 +1,3 @@
require_extension('P');
require_rv64;
WRITE_RD(RD + zext32(RS1) * zext32(RS2));

3
riscv/insns/maccu_w01.h

@ -0,0 +1,3 @@
require_extension('P');
require_rv64;
WRITE_RD(RD + zext32(RS1) * zext32(RS2 >> 32));

3
riscv/insns/maccu_w11.h

@ -0,0 +1,3 @@
require_extension('P');
require_rv64;
WRITE_RD(RD + zext32(RS1 >> 32) * zext32(RS2 >> 32));

3
riscv/insns/mqacc_w00.h

@ -0,0 +1,3 @@
require_extension('P');
require_rv64;
WRITE_RD(RD + (((sreg_t)P_FIELD(RS1, 0, 32) * P_FIELD(RS2, 0, 32)) >> 31));

3
riscv/insns/mqacc_w01.h

@ -0,0 +1,3 @@
require_extension('P');
require_rv64;
WRITE_RD(RD + (((sreg_t)P_FIELD(RS1, 0, 32) * P_FIELD(RS2, 1, 32)) >> 31));

3
riscv/insns/mqacc_w11.h

@ -0,0 +1,3 @@
require_extension('P');
require_rv64;
WRITE_RD(RD + (((sreg_t)P_FIELD(RS1, 1, 32) * P_FIELD(RS2, 1, 32)) >> 31));

3
riscv/insns/mqracc_w00.h

@ -0,0 +1,3 @@
require_extension('P');
require_rv64;
WRITE_RD(RD + (((sreg_t)P_FIELD(RS1, 0, 32) * P_FIELD(RS2, 0, 32) + 0x40000000) >> 31));

3
riscv/insns/mqracc_w01.h

@ -0,0 +1,3 @@
require_extension('P');
require_rv64;
WRITE_RD(RD + (((sreg_t)P_FIELD(RS1, 0, 32) * P_FIELD(RS2, 1, 32) + 0x40000000) >> 31));

3
riscv/insns/mqracc_w11.h

@ -0,0 +1,3 @@
require_extension('P');
require_rv64;
WRITE_RD(RD + (((sreg_t)P_FIELD(RS1, 1, 32) * P_FIELD(RS2, 1, 32) + 0x40000000) >> 31));

3
riscv/insns/mul_w00.h

@ -0,0 +1,3 @@
require_extension('P');
require_rv64;
WRITE_RD(sext32(RS1) * sext32(RS2));

3
riscv/insns/mul_w01.h

@ -0,0 +1,3 @@
require_extension('P');
require_rv64;
WRITE_RD(sext32(RS1) * sext32(RS2 >> 32));

3
riscv/insns/mul_w11.h

@ -0,0 +1,3 @@
require_extension('P');
require_rv64;
WRITE_RD(sext32(RS1 >> 32) * sext32(RS2 >> 32));

3
riscv/insns/mulsu_w00.h

@ -0,0 +1,3 @@
require_extension('P');
require_rv64;
WRITE_RD(sext32(RS1) * zext32(RS2));

3
riscv/insns/mulsu_w11.h

@ -0,0 +1,3 @@
require_extension('P');
require_rv64;
WRITE_RD(sext32(RS1 >> 32) * zext32(RS2 >> 32));

3
riscv/insns/mulu_w00.h

@ -0,0 +1,3 @@
require_extension('P');
require_rv64;
WRITE_RD(zext32(RS1) * zext32(RS2));

3
riscv/insns/mulu_w01.h

@ -0,0 +1,3 @@
require_extension('P');
require_rv64;
WRITE_RD(zext32(RS1) * zext32(RS2 >> 32));

3
riscv/insns/mulu_w11.h

@ -0,0 +1,3 @@
require_extension('P');
require_rv64;
WRITE_RD(zext32(RS1 >> 32) * zext32(RS2 >> 32));

5
riscv/insns/pm2add_w.h

@ -0,0 +1,5 @@
require_rv64;
P_REDUCTION_LOOP(64, 32, false, false, {
p_res += (sreg_t)p_rs1 * p_rs2;
}
)

5
riscv/insns/pm2add_wx.h

@ -0,0 +1,5 @@
require_rv64;
P_REDUCTION_CROSS_LOOP(64, 32, false, false, {
p_res += (sreg_t)p_rs1 * p_rs2;
}
)

5
riscv/insns/pm2adda_w.h

@ -0,0 +1,5 @@
require_rv64;
P_REDUCTION_LOOP(64, 32, true, false, {
p_res += (sreg_t)p_rs1 * p_rs2;
}
)

5
riscv/insns/pm2adda_wx.h

@ -0,0 +1,5 @@
require_rv64;
P_REDUCTION_CROSS_LOOP(64, 32, true, false, {
p_res += (sreg_t)p_rs1 * p_rs2;
}
)

5
riscv/insns/pm2addasu_w.h

@ -0,0 +1,5 @@
require_rv64;
P_REDUCTION_SULOOP(64, 32, true, false, {
p_res += (sreg_t)p_rs1 * p_rs2;
}
)

5
riscv/insns/pm2addau_w.h

@ -0,0 +1,5 @@
require_rv64;
P_REDUCTION_ULOOP(64, 32, true, false, {
p_res += (reg_t)p_rs1 * p_rs2;
}
)

5
riscv/insns/pm2addsu_w.h

@ -0,0 +1,5 @@
require_rv64;
P_REDUCTION_SULOOP(64, 32, false, false, {
p_res += (sreg_t)p_rs1 * p_rs2;
}
)

5
riscv/insns/pm2addu_w.h

@ -0,0 +1,5 @@
require_rv64;
P_REDUCTION_ULOOP(64, 32, false, false, {
p_res += (reg_t)p_rs1 * p_rs2;
}
)

8
riscv/insns/pm2sub_w.h

@ -0,0 +1,8 @@
require_rv64;
P_REDUCTION_LOOP(64, 32, false, false, {
if (j & 1)
p_res -= (sreg_t)p_rs1 * p_rs2;
else
p_res += (sreg_t)p_rs1 * p_rs2;
}
)

8
riscv/insns/pm2sub_wx.h

@ -0,0 +1,8 @@
require_rv64;
P_REDUCTION_CROSS_LOOP(64, 32, false, false, {
if (j & 1)
p_res -= (sreg_t)p_rs1 * p_rs2;
else
p_res += (sreg_t)p_rs1 * p_rs2;
}
)

8
riscv/insns/pm2suba_w.h

@ -0,0 +1,8 @@
require_rv64;
P_REDUCTION_LOOP(64, 32, true, false, {
if (j & 1)
p_res -= (sreg_t)p_rs1 * p_rs2;
else
p_res += (sreg_t)p_rs1 * p_rs2;
}
)

8
riscv/insns/pm2suba_wx.h

@ -0,0 +1,8 @@
require_rv64;
P_REDUCTION_CROSS_LOOP(64, 32, true, false, {
if (j & 1)
p_res -= (sreg_t)p_rs1 * p_rs2;
else
p_res += (sreg_t)p_rs1 * p_rs2;
}
)

5
riscv/insns/pm4add_h.h

@ -0,0 +1,5 @@
require_rv64;
P_REDUCTION_LOOP(64, 16, false, false, {
p_res += (sreg_t)p_rs1 * p_rs2;
}
)

5
riscv/insns/pm4adda_h.h

@ -0,0 +1,5 @@
require_rv64;
P_REDUCTION_LOOP(64, 16, true, false, {
p_res += (sreg_t)p_rs1 * p_rs2;
}
)

5
riscv/insns/pm4addasu_h.h

@ -0,0 +1,5 @@
require_rv64;
P_REDUCTION_SULOOP(64, 16, true, false, {
p_res += (sreg_t)p_rs1 * p_rs2;
}
)

5
riscv/insns/pm4addau_h.h

@ -0,0 +1,5 @@
require_rv64;
P_REDUCTION_ULOOP(64, 16, true, false, {
p_res += (reg_t)p_rs1 * p_rs2;
}
)

5
riscv/insns/pm4addsu_h.h

@ -0,0 +1,5 @@
require_rv64;
P_REDUCTION_SULOOP(64, 16, false, false, {
p_res += (sreg_t)p_rs1 * p_rs2;
}
)

5
riscv/insns/pm4addu_h.h

@ -0,0 +1,5 @@
require_rv64;
P_REDUCTION_ULOOP(64, 16, false, false, {
p_res += (reg_t)p_rs1 * p_rs2;
}
)

5
riscv/insns/pmacc_w_h00.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_RS2_EE_LOOP(32, 16, 16, {
p_rd += sext32(p_rs1) * sext32(p_rs2);
}
)

5
riscv/insns/pmacc_w_h01.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_RS2_EO_LOOP(32, 16, 16, {
p_rd += sext32(p_rs1) * sext32(p_rs2);
}
)

5
riscv/insns/pmacc_w_h11.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_RS2_OO_LOOP(32, 16, 16, {
p_rd += sext32(p_rs1) * sext32(p_rs2);
}
)

5
riscv/insns/pmaccsu_w_h00.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_RS2_EE_SULOOP(32, 16, 16, {
p_rd += sext32(p_rs1) * zext32(p_rs2);
}
)

5
riscv/insns/pmaccsu_w_h11.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_RS2_OO_SULOOP(32, 16, 16, {
p_rd += sext32(p_rs1) * zext32(p_rs2);
}
)

5
riscv/insns/pmaccu_w_h00.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_RS2_EE_ULOOP(32, 16, 16, {
p_rd += zext32(p_rs1) * zext32(p_rs2);
}
)

5
riscv/insns/pmaccu_w_h01.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_RS2_EO_ULOOP(32, 16, 16, {
p_rd += zext32(p_rs1) * zext32(p_rs2);
}
)

5
riscv/insns/pmaccu_w_h11.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_RS2_OO_ULOOP(32, 16, 16, {
p_rd += zext32(p_rs1) * zext32(p_rs2);
}
)

6
riscv/insns/pmhacc_w.h

@ -0,0 +1,6 @@
require_rv64;
P_RD_RS1_RS2_LOOP(32,32,32, {
sreg_t mres = sext(p_rs1,64) * sext(p_rs2,64);
p_rd += mres>>32;
}
)

6
riscv/insns/pmhacc_w_h0.h

@ -0,0 +1,6 @@
require_rv64;
P_RD_RS1_RS2_E_LOOP(32,32,16, {
sreg_t mres = sext(p_rs1,64) * sext(p_rs2,64);
p_rd += mres>>32;
}
)

6
riscv/insns/pmhacc_w_h1.h

@ -0,0 +1,6 @@
require_rv64;
P_RD_RS1_RS2_O_LOOP(32,32,16, {
sreg_t mres = sext(p_rs1,64) * sext(p_rs2,64);
p_rd += mres>>32;
}
)

6
riscv/insns/pmhaccsu_w.h

@ -0,0 +1,6 @@
require_rv64;
P_RD_RS1_RS2_SULOOP(32,32,32, {
sreg_t mres = sext(p_rs1,64) * zext(p_rs2,64);
p_rd += mres>>32;
}
)

6
riscv/insns/pmhaccsu_w_h0.h

@ -0,0 +1,6 @@
require_rv64;
P_RD_RS1_RS2_E_SULOOP(32,32,16, {
sreg_t mres = sext(p_rs1,64) * zext(p_rs2,64);
p_rd += mres>>32;
}
)

6
riscv/insns/pmhaccsu_w_h1.h

@ -0,0 +1,6 @@
require_rv64;
P_RD_RS1_RS2_O_SULOOP(32,32,16, {
sreg_t mres = sext(p_rs1,64) * zext(p_rs2,64);
p_rd += mres>>32;
}
)

6
riscv/insns/pmhaccu_w.h

@ -0,0 +1,6 @@
require_rv64;
P_RD_RS1_RS2_ULOOP(32,32,32, {
reg_t mres = zext(p_rs1,64) * zext(p_rs2,64);
p_rd += mres>>32;
}
)

7
riscv/insns/pmhracc_w.h

@ -0,0 +1,7 @@
require_rv64;
P_RD_RS1_RS2_LOOP(32,32,32, {
sreg_t mres = sext(p_rs1,64) * sext(p_rs2,64);
int32_t round = ((mres >> 31) + 1) >> 1;
p_rd += round;
}
)

7
riscv/insns/pmhraccsu_w.h

@ -0,0 +1,7 @@
require_rv64;
P_RD_RS1_RS2_SULOOP(32,32,32, {
sreg_t mres = sext(p_rs1,64) * zext(p_rs2,64);
int32_t round = ((mres >> 31) + 1) >> 1;
p_rd += round;
}
)

7
riscv/insns/pmhraccu_w.h

@ -0,0 +1,7 @@
require_rv64;
P_RD_RS1_RS2_ULOOP(32,32,32, {
reg_t mres = zext(p_rs1,64) * zext(p_rs2,64);
uint32_t round = ((mres >> 31) + 1) >> 1;
p_rd += round;
}
)

5
riscv/insns/pmq2add_w.h

@ -0,0 +1,5 @@
require_rv64;
P_REDUCTION_LOOP(64, 32, false, false, {
p_res += ((sreg_t)p_rs1 * p_rs2) >> 31;
}
)

5
riscv/insns/pmq2adda_w.h

@ -0,0 +1,5 @@
require_rv64;
P_REDUCTION_LOOP(64, 32, true, false, {
p_res += ((sreg_t)p_rs1 * p_rs2) >> 31;
}
)

5
riscv/insns/pmqacc_w_h00.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_RS2_EE_LOOP(32, 16, 16, {
p_rd += (p_rs1 * p_rs2) >> 15;
})

5
riscv/insns/pmqacc_w_h01.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_RS2_EO_LOOP(32, 16, 16, {
p_rd += (p_rs1 * p_rs2) >> 15;
})

5
riscv/insns/pmqacc_w_h11.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_RS2_OO_LOOP(32, 16, 16, {
p_rd += (p_rs1 * p_rs2) >> 15;
})

5
riscv/insns/pmqr2add_w.h

@ -0,0 +1,5 @@
require_rv64;
P_REDUCTION_LOOP(64, 32, false, false, {
p_res += (((sreg_t)p_rs1 * p_rs2) + 0x40000000) >> 31;
}
)

5
riscv/insns/pmqr2adda_w.h

@ -0,0 +1,5 @@
require_rv64;
P_REDUCTION_LOOP(64, 32, true, false, {
p_res += (((sreg_t)p_rs1 * p_rs2) + 0x40000000) >> 31;
}
)

5
riscv/insns/pmqracc_w_h00.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_RS2_EE_LOOP(32, 16, 16, {
p_rd += (p_rs1 * p_rs2 + 0x4000) >> 15;
}
)

5
riscv/insns/pmqracc_w_h01.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_RS2_EO_LOOP(32, 16, 16, {
p_rd += (p_rs1 * p_rs2 + 0x4000) >> 15;
}
)

5
riscv/insns/pmqracc_w_h11.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_RS2_OO_LOOP(32, 16, 16, {
p_rd += (p_rs1 * p_rs2 + 0x4000) >> 15;
}
)

5
riscv/insns/pmul_w_h00.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_RS2_EE_LOOP(32, 16, 16, {
p_rd = sext32(p_rs1) * sext32(p_rs2);
}
)

5
riscv/insns/pmul_w_h01.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_RS2_EO_LOOP(32, 16, 16, {
p_rd = sext32(p_rs1) * sext32(p_rs2);
}
)

5
riscv/insns/pmul_w_h11.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_RS2_OO_LOOP(32, 16, 16, {
p_rd = sext32(p_rs1) * sext32(p_rs2);
}
)

6
riscv/insns/pmulh_w.h

@ -0,0 +1,6 @@
require_rv64;
P_RD_RS1_RS2_LOOP(32,32,32, {
int64_t mres = sext(p_rs1,64) * sext(p_rs2,64);
p_rd = mres >> 32;
}
)

6
riscv/insns/pmulh_w_h0.h

@ -0,0 +1,6 @@
require_rv64;
P_RD_RS1_RS2_E_LOOP(32,32,16, {
sreg_t mres = sext(p_rs1,64) * sext(p_rs2,64);
p_rd = mres >> 32;
}
)

6
riscv/insns/pmulh_w_h1.h

@ -0,0 +1,6 @@
require_rv64;
P_RD_RS1_RS2_O_LOOP(32,32,16, {
sreg_t mres = sext(p_rs1,64) * sext(p_rs2,64);
p_rd = mres >> 32;
}
)

6
riscv/insns/pmulhr_w.h

@ -0,0 +1,6 @@
require_rv64;
P_RD_RS1_RS2_LOOP(32,32,32, {
sreg_t mres = sext(p_rs1,64) * sext(p_rs2,64);
p_rd = ((mres >> 31) + 1) >> 1;
}
)

6
riscv/insns/pmulhrsu_w.h

@ -0,0 +1,6 @@
require_rv64;
P_RD_RS1_RS2_SULOOP(32,32,32, {
sreg_t mres = sext(p_rs1,64) * zext(p_rs2,64);
p_rd = ((mres >> 31) + 1) >> 1;
}
)

6
riscv/insns/pmulhru_w.h

@ -0,0 +1,6 @@
require_rv64;
P_RD_RS1_RS2_ULOOP(32,32,32, {
reg_t mres = zext(p_rs1,64) * zext(p_rs2,64);
p_rd = ((mres >> 31) + 1) >> 1;
}
)

6
riscv/insns/pmulhsu_w.h

@ -0,0 +1,6 @@
require_rv64;
P_RD_RS1_RS2_SULOOP(32,32,32, {
int64_t mres = sext(p_rs1,64) * zext(p_rs2,64);
p_rd = mres >> 32;
}
)

6
riscv/insns/pmulhsu_w_h0.h

@ -0,0 +1,6 @@
require_rv64;
P_RD_RS1_RS2_E_SULOOP(32,32,16, {
sreg_t mres = sext(p_rs1,64) * zext(p_rs2,64);
p_rd = mres >> 32;
}
)

6
riscv/insns/pmulhsu_w_h1.h

@ -0,0 +1,6 @@
require_rv64;
P_RD_RS1_RS2_O_SULOOP(32,32,16, {
sreg_t mres = sext(p_rs1,64) * zext(p_rs2,64);
p_rd = mres >> 32;
}
)

6
riscv/insns/pmulhu_w.h

@ -0,0 +1,6 @@
require_rv64;
P_RD_RS1_RS2_ULOOP(32,32,32, {
uint64_t mres = zext(p_rs1,64) * zext(p_rs2,64);
p_rd = mres >> 32;
}
)

9
riscv/insns/pmulq_w.h

@ -0,0 +1,9 @@
require_rv64;
P_RD_RS1_RS2_LOOP(32,32,32, {
if ((p_rs1 != INT32_MIN) | (p_rs2 != INT32_MIN)) {
p_rd = ((int64_t)p_rs1 * (int64_t)p_rs2) >> 31;
} else {
p_rd = INT32_MAX;
}
}
)

9
riscv/insns/pmulqr_w.h

@ -0,0 +1,9 @@
require_rv64;
P_RD_RS1_RS2_LOOP(32,32,32, {
if ((p_rs1 != INT32_MIN) | (p_rs2 != INT32_MIN)) {
p_rd = ((((int64_t)p_rs1 * (int64_t)p_rs2) >> 30) + 1) >> 1;
} else {
p_rd = INT32_MAX;
}
}
)

5
riscv/insns/pmulsu_w_h00.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_RS2_EE_SULOOP(32, 16, 16, {
p_rd = sext32(p_rs1) * zext32(p_rs2);
}
)

5
riscv/insns/pmulsu_w_h11.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_RS2_OO_SULOOP(32, 16, 16, {
p_rd = sext32(p_rs1) * zext32(p_rs2);
}
)

5
riscv/insns/pmulu_w_h00.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_RS2_EE_ULOOP(32, 16, 16, {
p_rd = zext32(p_rs1) * zext32(p_rs2);
}
)

5
riscv/insns/pmulu_w_h01.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_RS2_EO_ULOOP(32, 16, 16, {
p_rd = zext32(p_rs1) * zext32(p_rs2);
}
)

5
riscv/insns/pmulu_w_h11.h

@ -0,0 +1,5 @@
require_rv64;
P_RD_RS1_RS2_OO_ULOOP(32, 16, 16, {
p_rd = zext32(p_rs1) * zext32(p_rs2);
}
)

3
riscv/insns/sati.h

@ -0,0 +1,3 @@
require_extension('P');
require_rv64;
WRITE_RD(P_SAT(insn.shamtd() + 1, (sreg_t)RS1));

3
riscv/insns/srari.h

@ -0,0 +1,3 @@
require_extension('P');
require_rv64;
WRITE_RD(insn.shamtd() ? ((sext_xlen(RS1) >> insn.shamtd()) + ((sext_xlen(RS1) >> (insn.shamtd() - 1)) & 1)) : RS1);

1
riscv/insns/unzip16hp.h

@ -0,0 +1 @@
P_UNZIP(16, 1)

1
riscv/insns/unzip16p.h

@ -0,0 +1 @@
P_UNZIP(16, 0)

1
riscv/insns/unzip8hp.h

@ -0,0 +1 @@
P_UNZIP(8, 1)

1
riscv/insns/unzip8p.h

@ -0,0 +1 @@
P_UNZIP(8, 0)

3
riscv/insns/usati.h

@ -0,0 +1,3 @@
require_extension('P');
require_rv64;
WRITE_RD(P_USAT(insn.shamtd() + 1, (sreg_t)RS1));

6
riscv/insns/zip16hp.h

@ -0,0 +1,6 @@
require_extension('P');
require_rv64;
P_RD_RS1_RS2_ZIP_LOOP(16, 16, 16, 1, {
p_rd = i % 2 ? p_rs2 : p_rs1;
}
)

6
riscv/insns/zip16p.h

@ -0,0 +1,6 @@
require_extension('P');
require_rv64;
P_RD_RS1_RS2_ZIP_LOOP(16, 16, 16, 0, {
p_rd = i % 2 ? p_rs2 : p_rs1;
}
)

5
riscv/insns/zip8hp.h

@ -0,0 +1,5 @@
require_extension('P');
P_RD_RS1_RS2_ZIP_LOOP(8, 8, 8, 1, {
p_rd = i % 2 ? p_rs2 : p_rs1;
}
)

Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save