@ -1943,56 +1943,59 @@ SSE_HELPER_I(helper_pblendw, W, 8, FBLENDP)
void glue ( helper_dpps , SUFFIX ) ( CPUX86State * env , Reg * d , Reg * s , uint32_t mask )
{
float32 iresult = float32_zero ;
float32 prod1 , prod2 , temp2 , temp3 , temp4 ;
/*
* We must evaluate ( A + B ) + ( C + D ) , not ( ( A + B ) + C ) + D
* to correctly round the intermediate results
*/
if ( mask & ( 1 < < 4 ) ) {
iresult = float32_add ( iresult ,
float32_mul ( d - > ZMM_S ( 0 ) , s - > ZMM_S ( 0 ) ,
& env - > sse_status ) ,
& env - > sse_status ) ;
prod1 = float32_mul ( d - > ZMM_S ( 0 ) , s - > ZMM_S ( 0 ) , & env - > sse_status ) ;
} else {
prod1 = float32_zero ;
}
if ( mask & ( 1 < < 5 ) ) {
iresult = float32_add ( iresult ,
float32_mul ( d - > ZMM_S ( 1 ) , s - > ZMM_S ( 1 ) ,
& env - > sse_status ) ,
& env - > sse_status ) ;
prod2 = float32_mul ( d - > ZMM_S ( 1 ) , s - > ZMM_S ( 1 ) , & env - > sse_status ) ;
} else {
prod2 = float32_zero ;
}
temp2 = float32_add ( prod1 , prod2 , & env - > sse_status ) ;
if ( mask & ( 1 < < 6 ) ) {
iresult = float32_add ( iresult ,
float32_mul ( d - > ZMM_S ( 2 ) , s - > ZMM_S ( 2 ) ,
& env - > sse_status ) ,
& env - > sse_status ) ;
prod1 = float32_mul ( d - > ZMM_S ( 2 ) , s - > ZMM_S ( 2 ) , & env - > sse_status ) ;
} else {
prod1 = float32_zero ;
}
if ( mask & ( 1 < < 7 ) ) {
iresult = float32_add ( iresult ,
float32_mul ( d - > ZMM_S ( 3 ) , s - > ZMM_S ( 3 ) ,
& env - > sse_status ) ,
& env - > sse_status ) ;
prod2 = float32_mul ( d - > ZMM_S ( 3 ) , s - > ZMM_S ( 3 ) , & env - > sse_status ) ;
} else {
prod2 = float32_zero ;
}
d - > ZMM_S ( 0 ) = ( mask & ( 1 < < 0 ) ) ? iresult : float32_zero ;
d - > ZMM_S ( 1 ) = ( mask & ( 1 < < 1 ) ) ? iresult : float32_zero ;
d - > ZMM_S ( 2 ) = ( mask & ( 1 < < 2 ) ) ? iresult : float32_zero ;
d - > ZMM_S ( 3 ) = ( mask & ( 1 < < 3 ) ) ? iresult : float32_zero ;
temp3 = float32_add ( prod1 , prod2 , & env - > sse_status ) ;
temp4 = float32_add ( temp2 , temp3 , & env - > sse_status ) ;
d - > ZMM_S ( 0 ) = ( mask & ( 1 < < 0 ) ) ? temp4 : float32_zero ;
d - > ZMM_S ( 1 ) = ( mask & ( 1 < < 1 ) ) ? temp4 : float32_zero ;
d - > ZMM_S ( 2 ) = ( mask & ( 1 < < 2 ) ) ? temp4 : float32_zero ;
d - > ZMM_S ( 3 ) = ( mask & ( 1 < < 3 ) ) ? temp4 : float32_zero ;
}
void glue ( helper_dppd , SUFFIX ) ( CPUX86State * env , Reg * d , Reg * s , uint32_t mask )
{
float64 iresult = float64_zero ;
float64 prod1 , prod2 , temp2 ;
if ( mask & ( 1 < < 4 ) ) {
iresult = float64_add ( iresult ,
float64_mul ( d - > ZMM_D ( 0 ) , s - > ZMM_D ( 0 ) ,
& env - > sse_status ) ,
& env - > sse_status ) ;
prod1 = float64_mul ( d - > ZMM_D ( 0 ) , s - > ZMM_D ( 0 ) , & env - > sse_status ) ;
} else {
prod1 = float64_zero ;
}
if ( mask & ( 1 < < 5 ) ) {
iresult = float64_add ( iresult ,
float64_mul ( d - > ZMM_D ( 1 ) , s - > ZMM_D ( 1 ) ,
& env - > sse_status ) ,
& env - > sse_status ) ;
prod2 = float64_mul ( d - > ZMM_D ( 1 ) , s - > ZMM_D ( 1 ) , & env - > sse_status ) ;
} else {
prod2 = float64_zero ;
}
d - > ZMM_D ( 0 ) = ( mask & ( 1 < < 0 ) ) ? iresult : float64_zero ;
d - > ZMM_D ( 1 ) = ( mask & ( 1 < < 1 ) ) ? iresult : float64_zero ;
temp2 = float64_add ( prod1 , prod2 , & env - > sse_status ) ;
d - > ZMM_D ( 0 ) = ( mask & ( 1 < < 0 ) ) ? temp2 : float64_zero ;
d - > ZMM_D ( 1 ) = ( mask & ( 1 < < 1 ) ) ? temp2 : float64_zero ;
}
void glue ( helper_mpsadbw , SUFFIX ) ( CPUX86State * env , Reg * d , Reg * s ,