Browse Source

target/i386/tcg: commonize code to compute SF/ZF/PF

PF/ZF/SF are computed the same way for almost all CC_OP values (depending
only on the operand size in the case of ZF and SF).  The only exception is
PF for CC_OP_BLSI* and CC_OP_BMILG*; but AMD documents that PF should
be computed normally (rather than being undefined) so that is a kind of
bug fix.

Put the common code at the end of helper_cc_compute_all, shaving
another kB from its text.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
pull/316/head
Paolo Bonzini 8 months ago
parent
commit
e12799fc61
  1. 4
      target/i386/cpu.h
  2. 192
      target/i386/tcg/cc_helper.c
  3. 112
      target/i386/tcg/cc_helper_template.h.inc

4
target/i386/cpu.h

@ -1545,12 +1545,12 @@ typedef enum {
CC_OP_SARL,
CC_OP_SARQ,
CC_OP_BMILGB, /* Z,S via CC_DST, C = SRC==0; O=0; P,A undefined */
CC_OP_BMILGB, /* P,Z,S via CC_DST, C = SRC==0; A=O=0 */
CC_OP_BMILGW,
CC_OP_BMILGL,
CC_OP_BMILGQ,
CC_OP_BLSIB, /* Z,S via CC_DST, C = SRC!=0; O=0; P,A undefined */
CC_OP_BLSIB, /* P,Z,S via CC_DST, C = SRC!=0; A=O=0 */
CC_OP_BLSIW,
CC_OP_BLSIL,
CC_OP_BLSIQ,

192
target/i386/tcg/cc_helper.c

@ -73,9 +73,25 @@ target_ulong helper_cc_compute_nz(target_ulong dst, target_ulong src1,
}
}
/* NOTE: we compute the flags like the P4. On olders CPUs, only OF and
CF are modified and it is slower to do that. Note as well that we
don't truncate SRC1 for computing carry to DATA_TYPE. */
static inline uint32_t compute_aco_mul(target_long src1)
{
uint32_t cf, af, of;
cf = (src1 != 0);
af = 0; /* undefined */
of = cf * CC_O;
return cf + af + of;
}
target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1,
target_ulong src2, int op)
{
uint32_t flags = 0;
int shift = 0;
switch (op) {
default: /* should never happen */
return 0;
@ -88,124 +104,188 @@ target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1,
/* dst is either all zeros (--Z-P-) or all ones (-S-APC) */
return (dst & (CC_Z|CC_A|CC_C|CC_S)) ^ (CC_P | CC_Z);
case CC_OP_ADCX:
return compute_all_adcx(dst, src1, src2);
case CC_OP_ADOX:
return compute_all_adox(dst, src1, src2);
case CC_OP_ADCOX:
return compute_all_adcox(dst, src1, src2);
case CC_OP_MULB:
return compute_all_mulb(dst, src1);
flags = compute_aco_mul(src1);
goto psz_b;
case CC_OP_MULW:
return compute_all_mulw(dst, src1);
flags = compute_aco_mul(src1);
goto psz_w;
case CC_OP_MULL:
return compute_all_mull(dst, src1);
flags = compute_aco_mul(src1);
goto psz_l;
case CC_OP_ADDB:
return compute_all_addb(dst, src1);
flags = compute_aco_addb(dst, src1);
goto psz_b;
case CC_OP_ADDW:
return compute_all_addw(dst, src1);
flags = compute_aco_addw(dst, src1);
goto psz_w;
case CC_OP_ADDL:
return compute_all_addl(dst, src1);
flags = compute_aco_addl(dst, src1);
goto psz_l;
case CC_OP_ADCB:
return compute_all_adcb(dst, src1, src2);
flags = compute_aco_adcb(dst, src1, src2);
goto psz_b;
case CC_OP_ADCW:
return compute_all_adcw(dst, src1, src2);
flags = compute_aco_adcw(dst, src1, src2);
goto psz_w;
case CC_OP_ADCL:
return compute_all_adcl(dst, src1, src2);
flags = compute_aco_adcl(dst, src1, src2);
goto psz_l;
case CC_OP_SUBB:
return compute_all_subb(dst, src1);
flags = compute_aco_subb(dst, src1);
goto psz_b;
case CC_OP_SUBW:
return compute_all_subw(dst, src1);
flags = compute_aco_subw(dst, src1);
goto psz_w;
case CC_OP_SUBL:
return compute_all_subl(dst, src1);
flags = compute_aco_subl(dst, src1);
goto psz_l;
case CC_OP_SBBB:
return compute_all_sbbb(dst, src1, src2);
flags = compute_aco_sbbb(dst, src1, src2);
goto psz_b;
case CC_OP_SBBW:
return compute_all_sbbw(dst, src1, src2);
flags = compute_aco_sbbw(dst, src1, src2);
goto psz_w;
case CC_OP_SBBL:
return compute_all_sbbl(dst, src1, src2);
flags = compute_aco_sbbl(dst, src1, src2);
goto psz_l;
case CC_OP_LOGICB:
return compute_all_logicb(dst, src1);
flags = 0;
goto psz_b;
case CC_OP_LOGICW:
return compute_all_logicw(dst, src1);
flags = 0;
goto psz_w;
case CC_OP_LOGICL:
return compute_all_logicl(dst, src1);
flags = 0;
goto psz_l;
case CC_OP_INCB:
return compute_all_incb(dst, src1);
flags = compute_aco_incb(dst, src1);
goto psz_b;
case CC_OP_INCW:
return compute_all_incw(dst, src1);
flags = compute_aco_incw(dst, src1);
goto psz_w;
case CC_OP_INCL:
return compute_all_incl(dst, src1);
flags = compute_aco_incl(dst, src1);
goto psz_l;
case CC_OP_DECB:
return compute_all_decb(dst, src1);
flags = compute_aco_decb(dst, src1);
goto psz_b;
case CC_OP_DECW:
return compute_all_decw(dst, src1);
flags = compute_aco_decw(dst, src1);
goto psz_w;
case CC_OP_DECL:
return compute_all_decl(dst, src1);
flags = compute_aco_decl(dst, src1);
goto psz_l;
case CC_OP_SHLB:
return compute_all_shlb(dst, src1);
flags = compute_aco_shlb(dst, src1);
goto psz_b;
case CC_OP_SHLW:
return compute_all_shlw(dst, src1);
flags = compute_aco_shlw(dst, src1);
goto psz_w;
case CC_OP_SHLL:
return compute_all_shll(dst, src1);
flags = compute_aco_shll(dst, src1);
goto psz_l;
case CC_OP_SARB:
return compute_all_sarb(dst, src1);
flags = compute_aco_sarb(dst, src1);
goto psz_b;
case CC_OP_SARW:
return compute_all_sarw(dst, src1);
flags = compute_aco_sarw(dst, src1);
goto psz_w;
case CC_OP_SARL:
return compute_all_sarl(dst, src1);
flags = compute_aco_sarl(dst, src1);
goto psz_l;
case CC_OP_BMILGB:
return compute_all_bmilgb(dst, src1);
flags = compute_aco_bmilgb(dst, src1);
goto psz_b;
case CC_OP_BMILGW:
return compute_all_bmilgw(dst, src1);
flags = compute_aco_bmilgw(dst, src1);
goto psz_w;
case CC_OP_BMILGL:
return compute_all_bmilgl(dst, src1);
flags = compute_aco_bmilgl(dst, src1);
goto psz_l;
case CC_OP_BLSIB:
return compute_all_blsib(dst, src1);
flags = compute_aco_blsib(dst, src1);
goto psz_b;
case CC_OP_BLSIW:
return compute_all_blsiw(dst, src1);
flags = compute_aco_blsiw(dst, src1);
goto psz_w;
case CC_OP_BLSIL:
return compute_all_blsil(dst, src1);
case CC_OP_ADCX:
return compute_all_adcx(dst, src1, src2);
case CC_OP_ADOX:
return compute_all_adox(dst, src1, src2);
case CC_OP_ADCOX:
return compute_all_adcox(dst, src1, src2);
flags = compute_aco_blsil(dst, src1);
goto psz_l;
#ifdef TARGET_X86_64
case CC_OP_MULQ:
return compute_all_mulq(dst, src1);
flags = compute_aco_mul(src1);
goto psz_q;
case CC_OP_ADDQ:
return compute_all_addq(dst, src1);
flags = compute_aco_addq(dst, src1);
goto psz_q;
case CC_OP_ADCQ:
return compute_all_adcq(dst, src1, src2);
flags = compute_aco_adcq(dst, src1, src2);
goto psz_q;
case CC_OP_SUBQ:
return compute_all_subq(dst, src1);
flags = compute_aco_subq(dst, src1);
goto psz_q;
case CC_OP_SBBQ:
return compute_all_sbbq(dst, src1, src2);
case CC_OP_LOGICQ:
return compute_all_logicq(dst, src1);
flags = compute_aco_sbbq(dst, src1, src2);
goto psz_q;
case CC_OP_INCQ:
return compute_all_incq(dst, src1);
flags = compute_aco_incq(dst, src1);
goto psz_q;
case CC_OP_DECQ:
return compute_all_decq(dst, src1);
flags = compute_aco_decq(dst, src1);
goto psz_q;
case CC_OP_LOGICQ:
flags = 0;
goto psz_q;
case CC_OP_SHLQ:
return compute_all_shlq(dst, src1);
flags = compute_aco_shlq(dst, src1);
goto psz_q;
case CC_OP_SARQ:
return compute_all_sarq(dst, src1);
flags = compute_aco_sarq(dst, src1);
goto psz_q;
case CC_OP_BMILGQ:
return compute_all_bmilgq(dst, src1);
flags = compute_aco_bmilgq(dst, src1);
goto psz_q;
case CC_OP_BLSIQ:
return compute_all_blsiq(dst, src1);
flags = compute_aco_blsiq(dst, src1);
goto psz_q;
#endif
}
psz_b:
shift += 8;
psz_w:
shift += 16;
psz_l:
#ifdef TARGET_X86_64
shift += 32;
psz_q:
#endif
flags += compute_pf(dst);
dst <<= shift;
flags += dst == 0 ? CC_Z : 0;
flags += (target_long)dst < 0 ? CC_S : 0;
return flags;
}
uint32_t cpu_cc_compute_all(CPUX86State *env)

112
target/i386/tcg/cc_helper_template.h.inc

@ -1,5 +1,5 @@
/*
* x86 condition code helpers
* x86 condition code helpers for AF/CF/OF
*
* Copyright (c) 2008 Fabrice Bellard
*
@ -44,14 +44,9 @@
/* dynamic flags computation */
static uint32_t glue(compute_all_cout, SUFFIX)(DATA_TYPE dst, DATA_TYPE carries)
static uint32_t glue(compute_aco_cout, SUFFIX)(DATA_TYPE carries)
{
uint32_t af_cf, pf, zf, sf, of;
/* PF, ZF, SF computed from result. */
pf = compute_pf(dst);
zf = (dst == 0) * CC_Z;
sf = lshift(dst, 8 - DATA_BITS) & CC_S;
uint32_t af_cf, of;
/*
* AF, CF, OF computed from carry out vector. To compute AF and CF, rotate it
@ -62,14 +57,14 @@ static uint32_t glue(compute_all_cout, SUFFIX)(DATA_TYPE dst, DATA_TYPE carries)
*/
af_cf = ((carries << 1) | (carries >> (DATA_BITS - 1))) & (CC_A | CC_C);
of = (lshift(carries, 12 - DATA_BITS) + CC_O / 2) & CC_O;
return pf + zf + sf + af_cf + of;
return af_cf + of;
}
static uint32_t glue(compute_all_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
static uint32_t glue(compute_aco_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
{
DATA_TYPE src2 = dst - src1;
DATA_TYPE carries = ADD_COUT_VEC(src1, src2, dst);
return glue(compute_all_cout, SUFFIX)(dst, carries);
return glue(compute_aco_cout, SUFFIX)(carries);
}
static int glue(compute_c_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
@ -77,12 +72,12 @@ static int glue(compute_c_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
return dst < src1;
}
static uint32_t glue(compute_all_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1,
static uint32_t glue(compute_aco_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1,
DATA_TYPE src3)
{
DATA_TYPE src2 = dst - src1 - src3;
DATA_TYPE carries = ADD_COUT_VEC(src1, src2, dst);
return glue(compute_all_cout, SUFFIX)(dst, carries);
return glue(compute_aco_cout, SUFFIX)(carries);
}
static int glue(compute_c_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1,
@ -97,11 +92,11 @@ static int glue(compute_c_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1,
#endif
}
static uint32_t glue(compute_all_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2)
static uint32_t glue(compute_aco_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2)
{
DATA_TYPE src1 = dst + src2;
DATA_TYPE carries = SUB_COUT_VEC(src1, src2, dst);
return glue(compute_all_cout, SUFFIX)(dst, carries);
return glue(compute_aco_cout, SUFFIX)(carries);
}
static int glue(compute_c_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2)
@ -111,12 +106,12 @@ static int glue(compute_c_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2)
return src1 < src2;
}
static uint32_t glue(compute_all_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2,
static uint32_t glue(compute_aco_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2,
DATA_TYPE src3)
{
DATA_TYPE src1 = dst + src2 + src3;
DATA_TYPE carries = SUB_COUT_VEC(src1, src2, dst);
return glue(compute_all_cout, SUFFIX)(dst, carries);
return glue(compute_aco_cout, SUFFIX)(carries);
}
static int glue(compute_c_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2,
@ -134,57 +129,35 @@ static int glue(compute_c_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2,
#endif
}
static uint32_t glue(compute_all_logic, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
{
uint32_t cf, pf, af, zf, sf, of;
cf = 0;
pf = compute_pf(dst);
af = 0;
zf = (dst == 0) * CC_Z;
sf = lshift(dst, 8 - DATA_BITS) & CC_S;
of = 0;
return cf + pf + af + zf + sf + of;
}
static uint32_t glue(compute_all_inc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
static uint32_t glue(compute_aco_inc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
{
uint32_t cf, pf, af, zf, sf, of;
uint32_t cf, af, of;
cf = src1;
pf = compute_pf(dst);
af = (dst ^ (dst - 1)) & CC_A; /* bits 0..3 are all clear */
zf = (dst == 0) * CC_Z;
sf = lshift(dst, 8 - DATA_BITS) & CC_S;
of = (dst == SIGN_MASK) * CC_O;
return cf + pf + af + zf + sf + of;
return cf + af + of;
}
static uint32_t glue(compute_all_dec, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
static uint32_t glue(compute_aco_dec, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
{
uint32_t cf, pf, af, zf, sf, of;
uint32_t cf, af, of;
cf = src1;
pf = compute_pf(dst);
af = (dst ^ (dst + 1)) & CC_A; /* bits 0..3 are all set */
zf = (dst == 0) * CC_Z;
sf = lshift(dst, 8 - DATA_BITS) & CC_S;
of = (dst == SIGN_MASK - 1) * CC_O;
return cf + pf + af + zf + sf + of;
return cf + af + of;
}
static uint32_t glue(compute_all_shl, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
static uint32_t glue(compute_aco_shl, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
{
uint32_t cf, pf, af, zf, sf, of;
uint32_t cf, af, of;
cf = (src1 >> (DATA_BITS - 1)) & CC_C;
pf = compute_pf(dst);
af = 0; /* undefined */
zf = (dst == 0) * CC_Z;
sf = lshift(dst, 8 - DATA_BITS) & CC_S;
/* of is defined iff shift count == 1 */
of = lshift(src1 ^ dst, 12 - DATA_BITS) & CC_O;
return cf + pf + af + zf + sf + of;
return cf + af + of;
}
static int glue(compute_c_shl, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
@ -192,47 +165,25 @@ static int glue(compute_c_shl, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
return (src1 >> (DATA_BITS - 1)) & CC_C;
}
static uint32_t glue(compute_all_sar, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
static uint32_t glue(compute_aco_sar, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
{
uint32_t cf, pf, af, zf, sf, of;
uint32_t cf, af, of;
cf = src1 & 1;
pf = compute_pf(dst);
af = 0; /* undefined */
zf = (dst == 0) * CC_Z;
sf = lshift(dst, 8 - DATA_BITS) & CC_S;
/* of is defined iff shift count == 1 */
of = lshift(src1 ^ dst, 12 - DATA_BITS) & CC_O;
return cf + pf + af + zf + sf + of;
}
/* NOTE: we compute the flags like the P4. On olders CPUs, only OF and
CF are modified and it is slower to do that. Note as well that we
don't truncate SRC1 for computing carry to DATA_TYPE. */
static uint32_t glue(compute_all_mul, SUFFIX)(DATA_TYPE dst, target_long src1)
{
uint32_t cf, pf, af, zf, sf, of;
cf = (src1 != 0);
pf = compute_pf(dst);
af = 0; /* undefined */
zf = (dst == 0) * CC_Z;
sf = lshift(dst, 8 - DATA_BITS) & CC_S;
of = cf * CC_O;
return cf + pf + af + zf + sf + of;
return cf + af + of;
}
static uint32_t glue(compute_all_bmilg, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
static uint32_t glue(compute_aco_bmilg, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
{
uint32_t cf, pf, af, zf, sf, of;
uint32_t cf, af, of;
cf = (src1 == 0);
pf = 0; /* undefined */
af = 0; /* undefined */
zf = (dst == 0) * CC_Z;
sf = lshift(dst, 8 - DATA_BITS) & CC_S;
of = 0;
return cf + pf + af + zf + sf + of;
return cf + af + of;
}
static int glue(compute_c_bmilg, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
@ -240,17 +191,14 @@ static int glue(compute_c_bmilg, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
return src1 == 0;
}
static int glue(compute_all_blsi, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
static int glue(compute_aco_blsi, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
{
uint32_t cf, pf, af, zf, sf, of;
uint32_t cf, af, of;
cf = (src1 != 0);
pf = 0; /* undefined */
af = 0; /* undefined */
zf = (dst == 0) * CC_Z;
sf = lshift(dst, 8 - DATA_BITS) & CC_S;
of = 0;
return cf + pf + af + zf + sf + of;
return cf + af + of;
}
static int glue(compute_c_blsi, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)

Loading…
Cancel
Save