Browse Source
Add the following instructions to rv32 * packed add/sub register-pair instructions * packed cross register-pair instructions * packed absolute register-pair instructions * packed accumulation register-pair instructions * packed sign-extend and saturating * packed shift register-pair instructions * packed compare register-pair instructions * packed pack register-pair instructions * packed zip register-pair instructions * packed narrowing shift and clip register-pair * packed multiply register-pair instructions * packed accumulate register-pair instructions * packed multiply-add register-pair instructionspull/2246/head
committed by
Chih-Min Chao
231 changed files with 1797 additions and 15 deletions
@ -0,0 +1,3 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
WRITE_P_RD_PAIR(P_RS1_PAIR + P_RS2_PAIR); |
||||
@ -0,0 +1,3 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
WRITE_P_RD_PAIR(P_RD_PAIR + (((sreg_t)RS1*(sreg_t)RS2 + 0x40000000) >> 31)); |
||||
@ -0,0 +1,3 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
WRITE_P_RD_PAIR(P_RD_PAIR + (((sreg_t)RS1*(sreg_t)RS2) >> 31)); |
||||
@ -0,0 +1,5 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
sreg_t tmp = (sreg_t)P_RS1_PAIR >> (RS2 & 0x3f); |
||||
|
int32_t result = P_SAT(32, tmp); |
||||
|
WRITE_RD(result); |
||||
@ -0,0 +1,5 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
sreg_t tmp = (sreg_t)P_RS1_PAIR >> insn.shamtd(); |
||||
|
int32_t result = P_SAT(32, tmp); |
||||
|
WRITE_RD(result); |
||||
@ -0,0 +1,5 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
reg_t tmp = (reg_t)P_RS1_PAIR >> insn.shamtd(); |
||||
|
uint32_t result = P_USAT_FULL(32, (sreg_t)tmp); |
||||
|
WRITE_RD(result); |
||||
@ -0,0 +1,11 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
sreg_t val = (sreg_t)P_RS1_PAIR; |
||||
|
uint32_t shamt = RS2 & 0x3f; |
||||
|
sreg_t result; |
||||
|
if (shamt == 0) { |
||||
|
result = P_SAT(32, val); |
||||
|
} else { |
||||
|
result = P_SAT(32, (val >> shamt) + ((val >> (shamt - 1)) & 1)); |
||||
|
} |
||||
|
WRITE_RD(result); |
||||
@ -0,0 +1,11 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
sreg_t val = (sreg_t)P_RS1_PAIR; |
||||
|
uint32_t shamt = insn.shamtd(); |
||||
|
sreg_t result; |
||||
|
if (shamt == 0) { |
||||
|
result = P_SAT(32, val); |
||||
|
} else { |
||||
|
result = P_SAT(32, (val >> shamt) + ((val >> (shamt - 1)) & 1)); |
||||
|
} |
||||
|
WRITE_RD(result); |
||||
@ -0,0 +1,11 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
reg_t val = (reg_t)P_RS1_PAIR; |
||||
|
uint32_t shamt = insn.shamtd(); |
||||
|
reg_t result; |
||||
|
if (shamt == 0) { |
||||
|
result = P_USAT_FULL(32, (sreg_t)val); |
||||
|
} else { |
||||
|
result = P_USAT_FULL(32, (sreg_t)((val >> shamt) + ((val >> (shamt - 1)) & 1))); |
||||
|
} |
||||
|
WRITE_RD(result); |
||||
@ -0,0 +1,11 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
reg_t val = (reg_t)P_RS1_PAIR; |
||||
|
uint32_t shamt = RS2 & 0x3f; |
||||
|
reg_t result; |
||||
|
if (shamt == 0) { |
||||
|
result = P_USAT_FULL(32, (sreg_t)val); |
||||
|
} else { |
||||
|
result = P_USAT_FULL(32, (sreg_t)((val >> shamt) + ((val >> (shamt - 1)) & 1))); |
||||
|
} |
||||
|
WRITE_RD(result); |
||||
@ -0,0 +1,5 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
reg_t tmp = (reg_t)P_RS1_PAIR >> (RS2 & 0x3f); |
||||
|
uint32_t result = P_USAT_FULL(32, (sreg_t)tmp); |
||||
|
WRITE_RD(result); |
||||
@ -0,0 +1,3 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
WRITE_RD((sreg_t)P_RS1_PAIR >> (RS2 & 0x3f)); |
||||
@ -0,0 +1,3 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
WRITE_RD((sreg_t)P_RS1_PAIR >> insn.shamtd()); |
||||
@ -0,0 +1,11 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
sreg_t val = (sreg_t)P_RS1_PAIR; |
||||
|
uint32_t shamt = RS2 & 0x3f; |
||||
|
sreg_t result; |
||||
|
if (shamt == 0) { |
||||
|
result = val; |
||||
|
} else { |
||||
|
result = (val >> shamt) + ((val >> (shamt - 1)) & 1); |
||||
|
} |
||||
|
WRITE_RD(result); |
||||
@ -0,0 +1,11 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
sreg_t val = (sreg_t)P_RS1_PAIR; |
||||
|
uint32_t shamt = insn.shamtd(); |
||||
|
sreg_t result; |
||||
|
if (shamt == 0) { |
||||
|
result = val; |
||||
|
} else { |
||||
|
result = (val >> shamt) + ((val >> (shamt - 1)) & 1); |
||||
|
} |
||||
|
WRITE_RD(result); |
||||
@ -0,0 +1,3 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
WRITE_RD(P_RS1_PAIR >> (RS2 & 0x3f)); |
||||
@ -0,0 +1,3 @@ |
|||||
|
require_extension('P'); |
||||
|
require_rv32; |
||||
|
WRITE_RD(P_RS1_PAIR >> insn.shamtd()); |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_LOOP(8,8,8, { |
||||
|
p_rd = (p_rs1 + p_rs2) >> 1; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_LOOP(16,16,16, { |
||||
|
p_rd = (p_rs1 + p_rs2) >> 1; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_LOOP(32,32,32, { |
||||
|
p_rd = ((sreg_t)p_rs1 + (sreg_t)p_rs2) >> 1; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_ULOOP(8,8,8, { |
||||
|
p_rd = (p_rs1 + p_rs2) >> 1; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_ULOOP(16,16,16, { |
||||
|
p_rd = (p_rs1 + p_rs2) >> 1; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_ULOOP(32,32,32, { |
||||
|
p_rd = ((reg_t)p_rs1 + (reg_t)p_rs2) >> 1; |
||||
|
}) |
||||
@ -0,0 +1,6 @@ |
|||||
|
require_rv32; |
||||
|
P_CROSS_DW_LOOP(16, { |
||||
|
p_rd = (p_rs1 + p_rs2) >> 1; |
||||
|
}, { |
||||
|
p_rd = (p_rs1 - p_rs2) >> 1; |
||||
|
}) |
||||
@ -0,0 +1,5 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_LOOP(8, 8, 8, { |
||||
|
p_rd = (p_rs1 < p_rs2) ? (int8_t)((uint8_t)p_rs2 - (uint8_t)p_rs1) |
||||
|
: (int8_t)((uint8_t)p_rs1 - (uint8_t)p_rs2); |
||||
|
}) |
||||
@ -0,0 +1,5 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_LOOP(16, 16, 16, { |
||||
|
p_rd = (p_rs1 < p_rs2) ? (int16_t)((uint16_t)p_rs2 - (uint16_t)p_rs1) |
||||
|
: (int16_t)((uint16_t)p_rs1 - (uint16_t)p_rs2); |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_ULOOP(8, 8, 8, { |
||||
|
p_rd = (p_rs1 < p_rs2) ? p_rs2 - p_rs1 : p_rs1 - p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_ULOOP(16, 16, 16, { |
||||
|
p_rd = (p_rs1 < p_rs2) ? p_rs2 - p_rs1 : p_rs1 - p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_LOOP(8, 8, 8, { |
||||
|
p_rd = p_rs1 + p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_DW_LOOP(8, 8, { |
||||
|
p_rd = p_rs1 + P_FIELD(RS2, 0, 8); |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_LOOP(16, 16, 16, { |
||||
|
p_rd = p_rs1 + p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_DW_LOOP(16, 16, { |
||||
|
p_rd = p_rs1 + P_FIELD(RS2, 0, 16); |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_LOOP(32, 32, 32, { |
||||
|
p_rd = p_rs1 + p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_DW_LOOP(32, 32, { |
||||
|
p_rd = p_rs1 + P_FIELD(RS2, 0, 32); |
||||
|
}) |
||||
@ -0,0 +1,6 @@ |
|||||
|
require_rv32; |
||||
|
P_CROSS_DW_LOOP(16, { |
||||
|
p_rd = p_rs1 + p_rs2; |
||||
|
}, { |
||||
|
p_rd = p_rs1 - p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,6 @@ |
|||||
|
require_rv32; |
||||
|
P_CROSS_DW_LOOP(16, { |
||||
|
p_rd = (p_rs1 - p_rs2) >> 1; |
||||
|
}, { |
||||
|
p_rd = (p_rs1 + p_rs2) >> 1; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_LOOP(8,8,8, { |
||||
|
p_rd = (p_rs1 - p_rs2) >> 1; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_LOOP(16,16,16, { |
||||
|
p_rd = (p_rs1 - p_rs2) >> 1; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_LOOP(32,32,32, { |
||||
|
p_rd = ((sreg_t)p_rs1 - (sreg_t)p_rs2) >> 1; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_ULOOP(8,8,8, { |
||||
|
p_rd = (p_rs1 - p_rs2) >> 1; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_ULOOP(16,16,16, { |
||||
|
p_rd = (p_rs1 - p_rs2) >> 1; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_ULOOP(32,32,32, { |
||||
|
p_rd = ((reg_t)p_rs1 - (reg_t)p_rs2) >> 1; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_WIDEN_REDUCTION_LOOP(32, 16, false, false, { |
||||
|
p_res += sext32(p_rs1) * sext32(p_rs2); |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_WIDEN_REDUCTION_CROSS_LOOP(32, 16, false, false, { |
||||
|
p_res += p_rs1 * p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_WIDEN_REDUCTION_LOOP(32, 16, true, false, { |
||||
|
p_res += p_rs1 * p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_WIDEN_REDUCTION_CROSS_LOOP(32, 16, true, false, { |
||||
|
p_res += p_rs1 * p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_WIDEN_REDUCTION_SULOOP(32, 16, true, false, { |
||||
|
p_res += p_rs1 * p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_WIDEN_REDUCTION_ULOOP(32, 16, true, false, { |
||||
|
p_res += (uint32_t)p_rs1 * (uint32_t)p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_WIDEN_REDUCTION_SULOOP(32, 16, false, false, { |
||||
|
p_res += p_rs1 * p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_WIDEN_REDUCTION_ULOOP(32, 16, false, false, { |
||||
|
p_res += (uint32_t)p_rs1 * (uint32_t)p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,7 @@ |
|||||
|
require_rv32; |
||||
|
P_WIDEN_REDUCTION_LOOP(32, 16, false, false, { |
||||
|
if (j & 1) |
||||
|
p_res += p_rs1 * p_rs2; |
||||
|
else |
||||
|
p_res -= p_rs1 * p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,7 @@ |
|||||
|
require_rv32; |
||||
|
P_WIDEN_REDUCTION_CROSS_LOOP(32, 16, false, false, { |
||||
|
if (j & 1) |
||||
|
p_res += p_rs1 * p_rs2; |
||||
|
else |
||||
|
p_res -= p_rs1 * p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,7 @@ |
|||||
|
require_rv32; |
||||
|
P_WIDEN_REDUCTION_LOOP(32, 16, true, false, { |
||||
|
if (j & 1) |
||||
|
p_res += p_rs1 * p_rs2; |
||||
|
else |
||||
|
p_res -= p_rs1 * p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,7 @@ |
|||||
|
require_rv32; |
||||
|
P_WIDEN_REDUCTION_CROSS_LOOP(32, 16, true, false, { |
||||
|
if (j & 1) |
||||
|
p_res += p_rs1 * p_rs2; |
||||
|
else |
||||
|
p_res -= p_rs1 * p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_LOOP(8, 8, 8, { |
||||
|
p_rd = (p_rs1 > p_rs2) ? p_rs1 : p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_LOOP(16, 16, 16, { |
||||
|
p_rd = (p_rs1 > p_rs2) ? p_rs1 : p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_LOOP(32, 32, 32, { |
||||
|
p_rd = (p_rs1 > p_rs2) ? p_rs1 : p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_ULOOP(8, 8, 8, { |
||||
|
p_rd = (p_rs1 > p_rs2) ? p_rs1 : p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_ULOOP(16, 16, 16, { |
||||
|
p_rd = (p_rs1 > p_rs2) ? p_rs1 : p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_ULOOP(32, 32, 32, { |
||||
|
p_rd = (p_rs1 > p_rs2) ? p_rs1 : p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_LOOP(8, 8, 8, { |
||||
|
p_rd = (p_rs1 < p_rs2) ? p_rs1 : p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_LOOP(16, 16, 16, { |
||||
|
p_rd = (p_rs1 < p_rs2) ? p_rs1 : p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_LOOP(32, 32, 32, { |
||||
|
p_rd = (p_rs1 < p_rs2) ? p_rs1 : p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_ULOOP(8, 8, 8, { |
||||
|
p_rd = (p_rs1 < p_rs2) ? p_rs1 : p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_ULOOP(16, 16, 16, { |
||||
|
p_rd = (p_rs1 < p_rs2) ? p_rs1 : p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_ULOOP(32, 32, 32, { |
||||
|
p_rd = (p_rs1 < p_rs2) ? p_rs1 : p_rs2; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_WIDEN_RD_RS1_RS2_LOOP(16, 16, { |
||||
|
p_rd += (p_rs1 * p_rs2 + 0x4000) >> 15; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_WIDEN_RD_RS1_RS2_LOOP(16, 16, { |
||||
|
p_rd += (p_rs1 * p_rs2) >> 15; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_LOOP(8, 8, 8, { |
||||
|
p_rd = (p_rs1 == p_rs2) ? -1 : 0; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_LOOP(16, 16, 16, { |
||||
|
p_rd = (p_rs1 == p_rs2) ? -1 : 0; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_LOOP(32, 32, 32, { |
||||
|
p_rd = (p_rs1 == p_rs2) ? -1 : 0; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_LOOP(8, 8, 8, { |
||||
|
p_rd = (p_rs1 < p_rs2) ? -1 : 0; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_LOOP(16, 16, 16, { |
||||
|
p_rd = (p_rs1 < p_rs2) ? -1 : 0; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_LOOP(32, 32, 32, { |
||||
|
p_rd = (p_rs1 < p_rs2) ? -1 : 0; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_ULOOP(8 ,8 ,8, { |
||||
|
p_rd = (p_rs1 < p_rs2) ? -1 : 0; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_ULOOP(16, 16, 16, { |
||||
|
p_rd = (p_rs1 < p_rs2) ? -1 : 0; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_RD_RS1_RS2_DW_ULOOP(32, 32, 32, { |
||||
|
p_rd = (p_rs1 < p_rs2) ? -1 : 0; |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_NARROW_RD_RS1_LOOP(8, 16, { |
||||
|
p_rd = P_SAT(8, p_rs1 >> (P_FIELD(RS2, 0, 8) & 0xF)); |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_NARROW_RD_RS1_LOOP(16, 32, { |
||||
|
p_rd = P_SAT(16, p_rs1 >> (P_FIELD(RS2, 0, 16) & 0X1F)); |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_NARROW_RD_RS1_LOOP(8, 16, { |
||||
|
p_rd = P_SAT(8, p_rs1 >> insn.shamth()); |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_NARROW_RD_RS1_LOOP(16, 32, { |
||||
|
p_rd = P_SAT(16, p_rs1 >> insn.shamtw()); |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_NARROW_RD_RS1_ULOOP(8, 16, { |
||||
|
p_rd = P_USAT(8, p_rs1 >> insn.shamth()); |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_NARROW_RD_RS1_ULOOP(16, 32, { |
||||
|
p_rd = P_USAT(16, (p_rs1 >> insn.shamtw())); |
||||
|
}) |
||||
@ -0,0 +1,13 @@ |
|||||
|
require_rv32; |
||||
|
P_NARROW_RD_RS1_LOOP(8, 16, { |
||||
|
uint16_t shamt = P_FIELD(RS2, 0, 8) & 0xF; |
||||
|
sreg_t result; |
||||
|
if (shamt == 0) { |
||||
|
result = p_rs1; |
||||
|
} else { |
||||
|
sreg_t shifted = (sreg_t)p_rs1 >> shamt; |
||||
|
sreg_t roundbit = ((sreg_t)p_rs1 >> (shamt - 1)) & 1; |
||||
|
result = shifted + roundbit; |
||||
|
} |
||||
|
p_rd = P_SAT(8, result); |
||||
|
}) |
||||
@ -0,0 +1,13 @@ |
|||||
|
require_rv32; |
||||
|
P_NARROW_RD_RS1_LOOP(16, 32, { |
||||
|
uint32_t shamt = P_FIELD(RS2, 0, 16) & 0x1F; |
||||
|
sreg_t result; |
||||
|
if (shamt == 0) { |
||||
|
result = p_rs1; |
||||
|
} else { |
||||
|
sreg_t shifted = (sreg_t)p_rs1 >> shamt; |
||||
|
sreg_t roundbit = ((sreg_t)p_rs1 >> (shamt - 1)) & 1; |
||||
|
result = shifted + roundbit; |
||||
|
} |
||||
|
p_rd = P_SAT(16, result); |
||||
|
}) |
||||
@ -0,0 +1,13 @@ |
|||||
|
require_rv32; |
||||
|
P_NARROW_RD_RS1_LOOP(8, 16, { |
||||
|
uint16_t shamt = insn.shamth(); |
||||
|
sreg_t result; |
||||
|
if (shamt == 0) { |
||||
|
result = p_rs1; |
||||
|
} else { |
||||
|
sreg_t shifted = (sreg_t)p_rs1 >> shamt; |
||||
|
sreg_t roundbit = ((sreg_t)p_rs1 >> (shamt - 1)) & 1; |
||||
|
result = shifted + roundbit; |
||||
|
} |
||||
|
p_rd = P_SAT(8, result); |
||||
|
}) |
||||
@ -0,0 +1,13 @@ |
|||||
|
require_rv32; |
||||
|
P_NARROW_RD_RS1_LOOP(16, 32, { |
||||
|
uint32_t shamt = insn.shamtw(); |
||||
|
sreg_t result; |
||||
|
if (shamt == 0) { |
||||
|
result = p_rs1; |
||||
|
} else { |
||||
|
sreg_t shifted = (sreg_t)p_rs1 >> shamt; |
||||
|
sreg_t roundbit = ((sreg_t)p_rs1 >> (shamt - 1)) & 1; |
||||
|
result = shifted + roundbit; |
||||
|
} |
||||
|
p_rd = P_SAT(16, result); |
||||
|
}) |
||||
@ -0,0 +1,13 @@ |
|||||
|
require_rv32; |
||||
|
P_NARROW_RD_RS1_ULOOP(8, 16, { |
||||
|
uint32_t shamt = insn.shamth(); |
||||
|
uint32_t result; |
||||
|
if (shamt == 0) { |
||||
|
result = p_rs1; |
||||
|
} else { |
||||
|
uint32_t shifted = p_rs1 >> shamt; |
||||
|
uint32_t roundbit = (p_rs1 >> (shamt - 1)) & 1; |
||||
|
result = shifted + roundbit; |
||||
|
} |
||||
|
p_rd = P_USAT_FULL(8, (sreg_t)result); |
||||
|
}) |
||||
@ -0,0 +1,13 @@ |
|||||
|
require_rv32; |
||||
|
P_NARROW_RD_RS1_ULOOP(16, 32, { |
||||
|
uint32_t shamt = insn.shamtw(); |
||||
|
uint32_t result; |
||||
|
if (shamt == 0) { |
||||
|
result = p_rs1; |
||||
|
} else { |
||||
|
uint32_t shifted = p_rs1 >> shamt; |
||||
|
uint32_t roundbit = (p_rs1 >> (shamt - 1)) & 1; |
||||
|
result = shifted + roundbit; |
||||
|
} |
||||
|
p_rd = P_USAT_FULL(16, (sreg_t)result); |
||||
|
}) |
||||
@ -0,0 +1,13 @@ |
|||||
|
require_rv32; |
||||
|
P_NARROW_RD_RS1_ULOOP(8, 16, { |
||||
|
uint32_t shamt = P_UFIELD(RS2, 0, 8) & 0xF; |
||||
|
uint32_t result; |
||||
|
if (shamt == 0) { |
||||
|
result = p_rs1; |
||||
|
} else { |
||||
|
uint32_t shifted = p_rs1 >> shamt; |
||||
|
uint32_t roundbit = (p_rs1 >> (shamt - 1)) & 1; |
||||
|
result = shifted + roundbit; |
||||
|
} |
||||
|
p_rd = P_USAT_FULL(8, (sreg_t)result); |
||||
|
}) |
||||
@ -0,0 +1,13 @@ |
|||||
|
require_rv32; |
||||
|
P_NARROW_RD_RS1_ULOOP(16, 32, { |
||||
|
uint32_t shamt = P_UFIELD(RS2, 0, 16) & 0x1F; |
||||
|
uint32_t result; |
||||
|
if (shamt == 0) { |
||||
|
result = p_rs1; |
||||
|
} else { |
||||
|
uint32_t shifted = p_rs1 >> shamt; |
||||
|
uint32_t roundbit = (p_rs1 >> (shamt - 1)) & 1; |
||||
|
result = shifted + roundbit; |
||||
|
} |
||||
|
p_rd = P_USAT_FULL(16, (sreg_t)result); |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_NARROW_RD_RS1_ULOOP(8, 16, { |
||||
|
p_rd = P_USAT_FULL(8, (sreg_t)(p_rs1 >> (P_UFIELD(RS2, 0, 8) & 0xF))); |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_NARROW_RD_RS1_ULOOP(16, 32, { |
||||
|
p_rd = P_USAT_FULL(16, (sreg_t)(p_rs1 >> (P_UFIELD(RS2, 0, 16) & 0X1F))); |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_NARROW_RD_RS1_LOOP(8, 16, { |
||||
|
p_rd = p_rs1 >> (P_FIELD(RS2, 0, 8) & 0xF); |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_NARROW_RD_RS1_LOOP(16, 32, { |
||||
|
p_rd = p_rs1 >> (P_FIELD(RS2, 0, 16) & 0x1F); |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_NARROW_RD_RS1_LOOP(8, 16, { |
||||
|
p_rd = p_rs1 >> insn.shamth(); |
||||
|
}) |
||||
@ -0,0 +1,4 @@ |
|||||
|
require_rv32; |
||||
|
P_NARROW_RD_RS1_LOOP(16, 32, { |
||||
|
p_rd = p_rs1 >> insn.shamtw(); |
||||
|
}) |
||||
@ -0,0 +1,12 @@ |
|||||
|
require_rv32; |
||||
|
P_NARROW_RD_RS1_LOOP(8, 16, { |
||||
|
uint32_t shamt = P_FIELD(RS2, 0, 8) & 0xF; |
||||
|
if (shamt != 0) { |
||||
|
sreg_t val = (sreg_t)p_rs1; |
||||
|
sreg_t shifted = val >> shamt; |
||||
|
sreg_t roundbit = (val >> (shamt - 1)) & 1; |
||||
|
p_rd = (shifted + roundbit) & 0xFF; |
||||
|
} else { |
||||
|
p_rd = p_rs1 & 0xFF; |
||||
|
} |
||||
|
}) |
||||
@ -0,0 +1,12 @@ |
|||||
|
require_rv32; |
||||
|
P_NARROW_RD_RS1_LOOP(16, 32, { |
||||
|
uint32_t shamt = P_FIELD(RS2, 0, 16) & 0x1F; |
||||
|
if (shamt != 0) { |
||||
|
sreg_t val = (sreg_t)p_rs1; |
||||
|
sreg_t shifted = val >> shamt; |
||||
|
sreg_t roundbit = (val >> (shamt - 1)) & 1; |
||||
|
p_rd = (shifted + roundbit) & 0xFFFF; |
||||
|
} else { |
||||
|
p_rd = p_rs1 & 0xFFFF; |
||||
|
} |
||||
|
}) |
||||
Some files were not shown because too many files changed in this diff
Loading…
Reference in new issue