Browse Source
This reverts commit 1a5b2d9dda,
which is buggy (the vs1 argument is being sign-extended).
pull/870/head
2 changed files with 72 additions and 6 deletions
@ -1,5 +1,38 @@ |
|||
// vmulhsu.vv vd, vs2, vs1
|
|||
VI_VV_LOOP |
|||
({ |
|||
VI_CHECK_SSS(true); |
|||
VI_LOOP_BASE |
|||
switch(sew) { |
|||
case e8: { |
|||
auto &vd = P.VU.elt<int8_t>(rd_num, i, true); |
|||
auto vs2 = P.VU.elt<int8_t>(rs2_num, i); |
|||
auto vs1 = P.VU.elt<uint8_t>(rs1_num, i); |
|||
|
|||
vd = ((int16_t)vs2 * (uint16_t)vs1) >> sew; |
|||
break; |
|||
} |
|||
case e16: { |
|||
auto &vd = P.VU.elt<int16_t>(rd_num, i, true); |
|||
auto vs2 = P.VU.elt<int16_t>(rs2_num, i); |
|||
auto vs1 = P.VU.elt<uint16_t>(rs1_num, i); |
|||
|
|||
vd = ((int32_t)vs2 * (uint32_t)vs1) >> sew; |
|||
break; |
|||
} |
|||
case e32: { |
|||
auto &vd = P.VU.elt<int32_t>(rd_num, i, true); |
|||
auto vs2 = P.VU.elt<int32_t>(rs2_num, i); |
|||
auto vs1 = P.VU.elt<uint32_t>(rs1_num, i); |
|||
|
|||
vd = ((int64_t)vs2 * (uint64_t)vs1) >> sew; |
|||
break; |
|||
} |
|||
default: { |
|||
auto &vd = P.VU.elt<int64_t>(rd_num, i, true); |
|||
auto vs2 = P.VU.elt<int64_t>(rs2_num, i); |
|||
auto vs1 = P.VU.elt<uint64_t>(rs1_num, i); |
|||
|
|||
vd = ((int128_t)vs2 * (uint128_t)vs1) >> sew; |
|||
}) |
|||
break; |
|||
} |
|||
} |
|||
VI_LOOP_END |
|||
|
|||
@ -1,5 +1,38 @@ |
|||
// vmulhsu.vx vd, vs2, rs1
|
|||
VI_VX_LOOP |
|||
({ |
|||
VI_CHECK_SSS(false); |
|||
VI_LOOP_BASE |
|||
switch(sew) { |
|||
case e8: { |
|||
auto &vd = P.VU.elt<int8_t>(rd_num, i, true); |
|||
auto vs2 = P.VU.elt<int8_t>(rs2_num, i); |
|||
uint8_t rs1 = RS1; |
|||
|
|||
vd = ((int16_t)vs2 * (uint16_t)rs1) >> sew; |
|||
break; |
|||
} |
|||
case e16: { |
|||
auto &vd = P.VU.elt<int16_t>(rd_num, i, true); |
|||
auto vs2 = P.VU.elt<int16_t>(rs2_num, i); |
|||
uint16_t rs1 = RS1; |
|||
|
|||
vd = ((int32_t)vs2 * (uint32_t)rs1) >> sew; |
|||
break; |
|||
} |
|||
case e32: { |
|||
auto &vd = P.VU.elt<int32_t>(rd_num, i, true); |
|||
auto vs2 = P.VU.elt<int32_t>(rs2_num, i); |
|||
uint32_t rs1 = RS1; |
|||
|
|||
vd = ((int64_t)vs2 * (uint64_t)rs1) >> sew; |
|||
break; |
|||
} |
|||
default: { |
|||
auto &vd = P.VU.elt<int64_t>(rd_num, i, true); |
|||
auto vs2 = P.VU.elt<int64_t>(rs2_num, i); |
|||
uint64_t rs1 = RS1; |
|||
|
|||
vd = ((int128_t)vs2 * (uint128_t)rs1) >> sew; |
|||
}) |
|||
break; |
|||
} |
|||
} |
|||
VI_LOOP_END |
|||
|
|||
Loading…
Reference in new issue