Browse Source
This reverts commit 1a5b2d9dda,
which is buggy (the vs1 argument is being sign-extended).
pull/870/head
2 changed files with 72 additions and 6 deletions
@ -1,5 +1,38 @@ |
|||||
// vmulhsu.vv vd, vs2, vs1
|
// vmulhsu.vv vd, vs2, vs1
|
||||
VI_VV_LOOP |
VI_CHECK_SSS(true); |
||||
({ |
VI_LOOP_BASE |
||||
|
switch(sew) { |
||||
|
case e8: { |
||||
|
auto &vd = P.VU.elt<int8_t>(rd_num, i, true); |
||||
|
auto vs2 = P.VU.elt<int8_t>(rs2_num, i); |
||||
|
auto vs1 = P.VU.elt<uint8_t>(rs1_num, i); |
||||
|
|
||||
|
vd = ((int16_t)vs2 * (uint16_t)vs1) >> sew; |
||||
|
break; |
||||
|
} |
||||
|
case e16: { |
||||
|
auto &vd = P.VU.elt<int16_t>(rd_num, i, true); |
||||
|
auto vs2 = P.VU.elt<int16_t>(rs2_num, i); |
||||
|
auto vs1 = P.VU.elt<uint16_t>(rs1_num, i); |
||||
|
|
||||
|
vd = ((int32_t)vs2 * (uint32_t)vs1) >> sew; |
||||
|
break; |
||||
|
} |
||||
|
case e32: { |
||||
|
auto &vd = P.VU.elt<int32_t>(rd_num, i, true); |
||||
|
auto vs2 = P.VU.elt<int32_t>(rs2_num, i); |
||||
|
auto vs1 = P.VU.elt<uint32_t>(rs1_num, i); |
||||
|
|
||||
|
vd = ((int64_t)vs2 * (uint64_t)vs1) >> sew; |
||||
|
break; |
||||
|
} |
||||
|
default: { |
||||
|
auto &vd = P.VU.elt<int64_t>(rd_num, i, true); |
||||
|
auto vs2 = P.VU.elt<int64_t>(rs2_num, i); |
||||
|
auto vs1 = P.VU.elt<uint64_t>(rs1_num, i); |
||||
|
|
||||
vd = ((int128_t)vs2 * (uint128_t)vs1) >> sew; |
vd = ((int128_t)vs2 * (uint128_t)vs1) >> sew; |
||||
}) |
break; |
||||
|
} |
||||
|
} |
||||
|
VI_LOOP_END |
||||
|
|||||
@ -1,5 +1,38 @@ |
|||||
// vmulhsu.vx vd, vs2, rs1
|
// vmulhsu.vx vd, vs2, rs1
|
||||
VI_VX_LOOP |
VI_CHECK_SSS(false); |
||||
({ |
VI_LOOP_BASE |
||||
|
switch(sew) { |
||||
|
case e8: { |
||||
|
auto &vd = P.VU.elt<int8_t>(rd_num, i, true); |
||||
|
auto vs2 = P.VU.elt<int8_t>(rs2_num, i); |
||||
|
uint8_t rs1 = RS1; |
||||
|
|
||||
|
vd = ((int16_t)vs2 * (uint16_t)rs1) >> sew; |
||||
|
break; |
||||
|
} |
||||
|
case e16: { |
||||
|
auto &vd = P.VU.elt<int16_t>(rd_num, i, true); |
||||
|
auto vs2 = P.VU.elt<int16_t>(rs2_num, i); |
||||
|
uint16_t rs1 = RS1; |
||||
|
|
||||
|
vd = ((int32_t)vs2 * (uint32_t)rs1) >> sew; |
||||
|
break; |
||||
|
} |
||||
|
case e32: { |
||||
|
auto &vd = P.VU.elt<int32_t>(rd_num, i, true); |
||||
|
auto vs2 = P.VU.elt<int32_t>(rs2_num, i); |
||||
|
uint32_t rs1 = RS1; |
||||
|
|
||||
|
vd = ((int64_t)vs2 * (uint64_t)rs1) >> sew; |
||||
|
break; |
||||
|
} |
||||
|
default: { |
||||
|
auto &vd = P.VU.elt<int64_t>(rd_num, i, true); |
||||
|
auto vs2 = P.VU.elt<int64_t>(rs2_num, i); |
||||
|
uint64_t rs1 = RS1; |
||||
|
|
||||
vd = ((int128_t)vs2 * (uint128_t)rs1) >> sew; |
vd = ((int128_t)vs2 * (uint128_t)rs1) >> sew; |
||||
}) |
break; |
||||
|
} |
||||
|
} |
||||
|
VI_LOOP_END |
||||
|
|||||
Loading…
Reference in new issue