Browse Source
add the following categories * packed add/sub/pli instructions * packed cross instructions * packed absolute instructions * packed accumulation instructions * packed sign-extend and saturating instructions * packed shift instructions * packed compare instructions * packed pack instructions * packed basic arithmetic and data-move * packed multiply instructions * packed accumulate instructions * packed multiply-add instructions * RD-only element-wise register-pairpull/2246/head
committed by
Chih-Min Chao
212 changed files with 1516 additions and 2 deletions
@ -0,0 +1,3 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
WRITE_RD((RS1 + RS2)>>1); |
||||
@ -0,0 +1,3 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
WRITE_RD((0ULL + (uint32_t)RS1 + (uint32_t)RS2)>>1); |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_extension('P'); |
||||
|
reg_t s1 = RS1; |
||||
|
reg_t result = (int64_t(s1) < 0) ? -s1 : s1; |
||||
|
WRITE_RD(sext_xlen(result)); |
||||
@ -0,0 +1,3 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
WRITE_RD((RS1 - RS2)>>1); |
||||
@ -0,0 +1,3 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
WRITE_RD((0ULL + (uint32_t)RS1 - (uint32_t)RS2)>>1); |
||||
@ -0,0 +1,10 @@ |
|||||
|
require_extension('P'); |
||||
|
reg_t x = xlen - 1; |
||||
|
reg_t msb = (RS1 >> (xlen - 1)) & 1; |
||||
|
for (int i = 0; i < xlen - 1; i++) { |
||||
|
if (msb != ((RS1 >> (xlen - i - 2)) & 1)) { |
||||
|
x = i; |
||||
|
break; |
||||
|
} |
||||
|
} |
||||
|
WRITE_RD(sext_xlen(x)); |
||||
@ -0,0 +1,3 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
WRITE_RD(RD + sext(RS1, 16) * sext(RS2, 16)); |
||||
@ -0,0 +1,3 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
WRITE_RD(RD + sext(RS1, 16) * sext(RS2 >> 16, 16)); |
||||
@ -0,0 +1,3 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
WRITE_RD(RD + sext(RS1 >> 16, 16) * sext(RS2 >> 16, 16)); |
||||
@ -0,0 +1,3 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
WRITE_RD(RD + sext(RS1, 16) * zext(RS2, 16)); |
||||
@ -0,0 +1,3 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
WRITE_RD(RD + sext(RS1 >> 16, 16) * zext(RS2 >> 16, 16)); |
||||
@ -0,0 +1,3 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
WRITE_RD(RD + zext(RS1, 16) * zext(RS2, 16)); |
||||
@ -0,0 +1,3 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
WRITE_RD(RD + zext(RS1, 16) * zext(RS2 >> 16, 16)); |
||||
@ -0,0 +1,3 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
WRITE_RD(RD + zext(RS1 >> 16, 16) * zext(RS2 >> 16, 16)); |
||||
@ -0,0 +1,2 @@ |
|||||
|
require_extension('P'); |
||||
|
WRITE_RD((RS2 & RD) | (RS1 & ~RD)); |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
sreg_t mres = sext32(RS1) * sext32(RS2); |
||||
|
WRITE_RD(RD + (mres >> 32)); |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
sreg_t mres = sext32(RS1) * sext32(P_FIELD(RS2, 0, 16)); |
||||
|
WRITE_RD(RD + (mres >> 32)); |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
sreg_t mres = sext32(RS1) * sext32(P_FIELD(RS2, 1, 16)); |
||||
|
WRITE_RD(RD + (mres >> 32)); |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
sreg_t mres = sext32(RS1) * reg_t((uint32_t)RS2); |
||||
|
WRITE_RD(RD + (mres >> 32)); |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
sreg_t mres = sext32(RS1) * (uint32_t)P_FIELD(RS2, 0, 16); |
||||
|
WRITE_RD(RD + (mres >> 32)); |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
sreg_t mres = sext32(RS1) * (uint32_t)P_FIELD(RS2, 1, 16); |
||||
|
WRITE_RD(RD + (mres >> 32)); |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
uint64_t mres = reg_t((uint32_t)RS1) * reg_t((uint32_t)RS2); |
||||
|
WRITE_RD(RD + (mres >> 32)); |
||||
@ -0,0 +1,5 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
sreg_t mres = sext32(RS1) * sext32(RS2); |
||||
|
int32_t round = ((mres >> 31) + 1) >> 1; |
||||
|
WRITE_RD(RD + round); |
||||
@ -0,0 +1,5 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
sreg_t mres = sext(RS1,64) * reg_t((uint32_t)RS2); |
||||
|
int32_t round = ((mres >> 31) + 1) >> 1; |
||||
|
WRITE_RD(RD + round); |
||||
@ -0,0 +1,5 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
reg_t mres = reg_t((uint32_t)RS1) * reg_t((uint32_t)RS2); |
||||
|
uint32_t round = ((mres >> 31) + 1) >> 1; |
||||
|
WRITE_RD(RD + round); |
||||
@ -0,0 +1,3 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
WRITE_RD(RD + (((int32_t)P_FIELD(RS1, 0, 16) * P_FIELD(RS2, 0, 16)) >> 15)); |
||||
@ -0,0 +1,3 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
WRITE_RD(RD + (((int32_t)P_FIELD(RS1, 0, 16) * P_FIELD(RS2, 1, 16)) >> 15)); |
||||
@ -0,0 +1,3 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
WRITE_RD(RD + (((int32_t)P_FIELD(RS1, 1, 16) * P_FIELD(RS2, 1, 16)) >> 15)); |
||||
@ -0,0 +1,3 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
WRITE_RD(RD + (((int32_t)P_FIELD(RS1, 0, 16) * P_FIELD(RS2, 0, 16) + 0x4000) >> 15)); |
||||
@ -0,0 +1,3 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
WRITE_RD(RD + (((int32_t)P_FIELD(RS1, 0, 16) * P_FIELD(RS2, 1, 16) + 0x4000) >> 15)); |
||||
@ -0,0 +1,3 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
WRITE_RD(RD + (((int32_t)P_FIELD(RS1, 1, 16) * P_FIELD(RS2, 1, 16) + 0x4000) >> 15)); |
||||
@ -0,0 +1,3 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
WRITE_RD(RS1 == RS2 ? -1 : 0); |
||||
@ -0,0 +1,3 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
WRITE_RD((int32_t)RS1 < (int32_t)RS2 ? -1 : 0); |
||||
@ -0,0 +1,3 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
WRITE_RD((uint32_t)RS1 < (uint32_t)RS2 ? -1 : 0); |
||||
@ -0,0 +1,3 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
WRITE_RD(sext(RS1, 16) * sext(RS2, 16)); |
||||
@ -0,0 +1,3 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
WRITE_RD(sext(RS1, 16) * sext(RS2 >> 16, 16)); |
||||
@ -0,0 +1,3 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
WRITE_RD(sext(RS1 >> 16, 16) * sext(RS2 >> 16, 16)); |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
int64_t mres = sext(RS1,64) * sext(P_FIELD(RS2, 0, 16),64); |
||||
|
WRITE_RD(mres>>32); |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
int64_t mres = sext(RS1,64) * sext(P_FIELD(RS2, 1, 16),64); |
||||
|
WRITE_RD(mres>>32); |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
int64_t mres = sext(RS1,64) * sext(RS2,64); |
||||
|
WRITE_RD(((mres >> 31) + 1) >> 1); |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
sreg_t mres = sext(RS1,64) * reg_t((uint32_t)RS2); |
||||
|
WRITE_RD(((mres >> 31) + 1) >> 1); |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
sreg_t mres = reg_t((uint32_t)RS1) * reg_t((uint32_t)RS2); |
||||
|
WRITE_RD(((mres >> 31) + 1) >> 1); |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
sreg_t mres = sext32(RS1) * (uint32_t)P_FIELD(RS2, 0, 16); |
||||
|
WRITE_RD(mres >> 32); |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
sreg_t mres = sext32(RS1) * (uint32_t)P_FIELD(RS2, 1, 16); |
||||
|
WRITE_RD(mres >> 32); |
||||
@ -0,0 +1,7 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
if ((RS1 != (reg_t)INT32_MIN) || (RS2 != (reg_t)INT32_MIN)) { |
||||
|
WRITE_RD((RS1 * RS2) >> 31); |
||||
|
} else { |
||||
|
WRITE_RD(INT32_MAX); |
||||
|
} |
||||
@ -0,0 +1,7 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
if ((RS1 != (reg_t)INT32_MIN) || (RS2 != (reg_t)INT32_MIN)) { |
||||
|
WRITE_RD((((RS1 * RS2) >> 30) + 1) >> 1); |
||||
|
} else { |
||||
|
WRITE_RD(INT32_MAX); |
||||
|
} |
||||
@ -0,0 +1,3 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
WRITE_RD(sext(RS1, 16) * zext(RS2, 16)); |
||||
@ -0,0 +1,3 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
WRITE_RD(sext(RS1 >> 16, 16) * zext(RS2 >> 16, 16)); |
||||
@ -0,0 +1,3 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
WRITE_RD(zext(RS1, 16) * zext(RS2, 16)); |
||||
@ -0,0 +1,3 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
WRITE_RD(zext(RS1, 16) * zext(RS2 >> 16, 16)); |
||||
@ -0,0 +1,3 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
WRITE_RD(zext(RS1 >> 16, 16) * zext(RS2 >> 16, 16)); |
||||
@ -0,0 +1,2 @@ |
|||||
|
require_extension('P'); |
||||
|
WRITE_RD((RS1 & RS2) | (RD & ~RS2)); |
||||
@ -0,0 +1,2 @@ |
|||||
|
require_extension('P'); |
||||
|
WRITE_RD((RD & RS2) | (RS1 & ~RS2)); |
||||
@ -0,0 +1,3 @@ |
|||||
|
P_RD_RS1_RS2_LOOP(8,8,8, { |
||||
|
p_rd = (p_rs1 + p_rs2) >> 1; |
||||
|
}) |
||||
@ -0,0 +1,3 @@ |
|||||
|
P_RD_RS1_RS2_LOOP(16,16,16, { |
||||
|
p_rd = (p_rs1 + p_rs2) >> 1; |
||||
|
}) |
||||
@ -0,0 +1,3 @@ |
|||||
|
P_RD_RS1_RS2_ULOOP(8,8,8, { |
||||
|
p_rd = (p_rs1 + p_rs2) >> 1; |
||||
|
}) |
||||
@ -0,0 +1,3 @@ |
|||||
|
P_RD_RS1_RS2_ULOOP(16,16,16, { |
||||
|
p_rd = (p_rs1 + p_rs2) >> 1; |
||||
|
}) |
||||
@ -0,0 +1,5 @@ |
|||||
|
P_CROSS_LOOP(16, { |
||||
|
p_rd = (p_rs1 + p_rs2) >> 1; |
||||
|
}, { |
||||
|
p_rd = (p_rs1 - p_rs2) >> 1; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
P_RD_RS1_RS2_LOOP(8, 8, 8, { |
||||
|
p_rd = (p_rs1 < p_rs2) ? (int8_t)((uint8_t)p_rs2 - (uint8_t)p_rs1) |
||||
|
: (int8_t)((uint8_t)p_rs1 - (uint8_t)p_rs2); |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
P_RD_RS1_RS2_LOOP(16, 16, 16, { |
||||
|
p_rd = (p_rs1 < p_rs2) ? (int16_t)((uint16_t)p_rs2 - (uint16_t)p_rs1) |
||||
|
: (int16_t)((uint16_t)p_rs1 - (uint16_t)p_rs2); |
||||
|
}) |
||||
@ -0,0 +1,3 @@ |
|||||
|
P_REDUCTION_ULOOP(64, 8, true, false, { |
||||
|
p_res += (p_rs1 > p_rs2 ? p_rs1 - p_rs2 : p_rs2 - p_rs1); |
||||
|
}) |
||||
@ -0,0 +1,3 @@ |
|||||
|
P_REDUCTION_ULOOP(64, 8, false, false, { |
||||
|
p_res += (p_rs1 > p_rs2 ? p_rs1 - p_rs2 : p_rs2 - p_rs1); |
||||
|
}) |
||||
@ -0,0 +1,3 @@ |
|||||
|
P_RD_RS1_RS2_ULOOP(8, 8, 8, { |
||||
|
p_rd = (p_rs1 < p_rs2) ? p_rs2 - p_rs1 : p_rs1 - p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,3 @@ |
|||||
|
P_RD_RS1_RS2_ULOOP(16, 16, 16, { |
||||
|
p_rd = (p_rs1 < p_rs2) ? p_rs2 - p_rs1 : p_rs1 - p_rs2; |
||||
|
}) |
||||
@ -1,6 +1,7 @@ |
|||||
// RV32Zbb contains zext.h but not general pack
|
// RV32Zbb contains zext.h but not general pack
|
||||
require(((xlen == 32) && (insn.rs2() == 0) && p->extension_enabled(EXT_ZBB)) |
require(((xlen == 32) && (insn.rs2() == 0) && p->extension_enabled(EXT_ZBB)) |
||||
|| p->extension_enabled(EXT_ZBKB)); |
|| p->extension_enabled(EXT_ZBKB) |
||||
|
|| p->extension_enabled('P')); |
||||
reg_t lo = zext_xlen(RS1 << (xlen/2)) >> (xlen/2); |
reg_t lo = zext_xlen(RS1 << (xlen/2)) >> (xlen/2); |
||||
reg_t hi = zext_xlen(RS2 << (xlen/2)); |
reg_t hi = zext_xlen(RS2 << (xlen/2)); |
||||
WRITE_RD(sext_xlen(lo | hi)); |
WRITE_RD(sext_xlen(lo | hi)); |
||||
@ -0,0 +1,3 @@ |
|||||
|
P_RD_RS1_RS2_LOOP(8,8,8, { |
||||
|
p_rd = p_rs1 + p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,3 @@ |
|||||
|
P_RD_RS1_LOOP(8, 8, { |
||||
|
p_rd = p_rs1 + P_FIELD(RS2, 0, 8); |
||||
|
}) |
||||
@ -0,0 +1,3 @@ |
|||||
|
P_RD_RS1_RS2_LOOP(16, 16, 16, { |
||||
|
p_rd = p_rs1 + p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,3 @@ |
|||||
|
P_RD_RS1_LOOP(16, 16, { |
||||
|
p_rd = p_rs1 + P_FIELD(RS2, 0, 16); |
||||
|
}) |
||||
@ -0,0 +1,5 @@ |
|||||
|
P_CROSS_LOOP(16, { |
||||
|
p_rd = p_rs1 + p_rs2; |
||||
|
}, { |
||||
|
p_rd = p_rs1 - p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,5 @@ |
|||||
|
P_CROSS_LOOP(16, { |
||||
|
p_rd = (p_rs1 - p_rs2) >> 1; |
||||
|
}, { |
||||
|
p_rd = (p_rs1 + p_rs2) >> 1; |
||||
|
}) |
||||
@ -0,0 +1,3 @@ |
|||||
|
P_RD_RS1_RS2_LOOP(8,8,8, { |
||||
|
p_rd = (p_rs1 - p_rs2) >> 1; |
||||
|
}) |
||||
@ -0,0 +1,3 @@ |
|||||
|
P_RD_RS1_RS2_LOOP(16,16,16, { |
||||
|
p_rd = (p_rs1 - p_rs2) >> 1; |
||||
|
}) |
||||
@ -0,0 +1,3 @@ |
|||||
|
P_RD_RS1_RS2_ULOOP(8,8,8, { |
||||
|
p_rd = (p_rs1 - p_rs2) >> 1; |
||||
|
}) |
||||
@ -0,0 +1,3 @@ |
|||||
|
P_RD_RS1_RS2_ULOOP(16,16,16, { |
||||
|
p_rd = (p_rs1 - p_rs2) >> 1; |
||||
|
}) |
||||
@ -0,0 +1,3 @@ |
|||||
|
P_RD_LOOP(8, { |
||||
|
p_rd = insn.p_imm8(); |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_DW_LOOP(8, { |
||||
|
p_rd = insn.p_imm8(); |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_DW_LOOP(16, { |
||||
|
p_rd = (insn.p_imm10csl() & 0x200) ? (0xfc00 | insn.p_imm10csl()) : insn.p_imm10csl(); |
||||
|
}) |
||||
@ -0,0 +1,3 @@ |
|||||
|
P_RD_LOOP(16, { |
||||
|
p_rd = (insn.p_imm10csl() & 0x200) ? (0xfc00 | insn.p_imm10csl()) : insn.p_imm10csl(); |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_DW_LOOP(16, { |
||||
|
p_rd = insn.p_imm10csr(); |
||||
|
}) |
||||
@ -0,0 +1,3 @@ |
|||||
|
P_RD_LOOP(16, { |
||||
|
p_rd = insn.p_imm10csr(); |
||||
|
}) |
||||
@ -0,0 +1,3 @@ |
|||||
|
P_REDUCTION_LOOP(32, 16, false, false, { |
||||
|
p_res += p_rs1 * p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,3 @@ |
|||||
|
P_REDUCTION_CROSS_LOOP(32, 16, false, false, { |
||||
|
p_res += p_rs1 * p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,3 @@ |
|||||
|
P_REDUCTION_LOOP(32, 16, true, false, { |
||||
|
p_res += p_rs1 * p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,3 @@ |
|||||
|
P_REDUCTION_CROSS_LOOP(32, 16, true, false, { |
||||
|
p_res += p_rs1 * p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,3 @@ |
|||||
|
P_REDUCTION_SULOOP(32, 16, true, false, { |
||||
|
p_res += p_rs1 * p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,3 @@ |
|||||
|
P_REDUCTION_ULOOP(32, 16, true, false, { |
||||
|
p_res += p_rs1 * p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,3 @@ |
|||||
|
P_REDUCTION_SULOOP(32, 16, false, false, { |
||||
|
p_res += p_rs1 * p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,3 @@ |
|||||
|
P_REDUCTION_ULOOP(32, 16, false, false, { |
||||
|
p_res += p_rs1 * p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,3 @@ |
|||||
|
P_REDUCTION_LOOP(32, 16, false, true, { |
||||
|
p_res += p_rs1 * p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,3 @@ |
|||||
|
P_REDUCTION_CROSS_LOOP(32, 16, false, true, { |
||||
|
p_res += p_rs1 * p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,6 @@ |
|||||
|
P_REDUCTION_LOOP(32, 16, false, false, { |
||||
|
if (j & 1) |
||||
|
p_res -= p_rs1 * p_rs2; |
||||
|
else |
||||
|
p_res += p_rs1 * p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,6 @@ |
|||||
|
P_REDUCTION_CROSS_LOOP(32, 16, false, false, { |
||||
|
if (j & 1) |
||||
|
p_res -= p_rs1 * p_rs2; |
||||
|
else |
||||
|
p_res += p_rs1 * p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,6 @@ |
|||||
|
P_REDUCTION_LOOP(32, 16, true, false, { |
||||
|
if (j & 1) |
||||
|
p_res -= p_rs1 * p_rs2; |
||||
|
else |
||||
|
p_res += p_rs1 * p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,6 @@ |
|||||
|
P_REDUCTION_CROSS_LOOP(32, 16, true, false, { |
||||
|
if (j & 1) |
||||
|
p_res -= p_rs1 * p_rs2; |
||||
|
else |
||||
|
p_res += p_rs1 * p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,3 @@ |
|||||
|
P_REDUCTION_LOOP(32, 8, false, false, { |
||||
|
p_res += p_rs1 * p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,3 @@ |
|||||
|
P_REDUCTION_LOOP(32, 8, true, false, { |
||||
|
p_res += p_rs1 * p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,3 @@ |
|||||
|
P_REDUCTION_SULOOP(32, 8, true, false, { |
||||
|
p_res += p_rs1 * p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,3 @@ |
|||||
|
P_REDUCTION_ULOOP(32, 8, true, false, { |
||||
|
p_res += p_rs1 * p_rs2; |
||||
|
}) |
||||
Some files were not shown because too many files changed in this diff
Loading…
Reference in new issue