Browse Source

math: fix x86 asin, atan, exp, log1p to raise underflow

underflow is raised by an inexact subnormal float store,
since subnormal operations are slow, check the underflow
flag and skip the store if it's already raised
rs-1.0
Szabolcs Nagy 13 years ago
parent
commit
31c5fb80b9
  1. 23
      src/math/i386/asin.s
  2. 10
      src/math/i386/atan.s
  3. 12
      src/math/i386/atanf.s
  4. 37
      src/math/i386/exp.s
  5. 9
      src/math/i386/log1p.s
  6. 10
      src/math/i386/log1pf.s

23
src/math/i386/asin.s

@ -2,7 +2,18 @@
.type asinf,@function
asinf:
flds 4(%esp)
jmp 1f
mov 4(%esp),%eax
add %eax,%eax
cmp $0x01000000,%eax
jae 1f
# subnormal x, return x with underflow
fnstsw %ax
and $16,%ax
jnz 2f
fld %st(0)
fmul %st(1)
fstps 4(%esp)
2: ret
.global asinl
.type asinl,@function
@ -14,6 +25,16 @@ asinl:
.type asin,@function
asin:
fldl 4(%esp)
mov 8(%esp),%eax
add %eax,%eax
cmp $0x00200000,%eax
jae 1f
# subnormal x, return x with underflow
fnstsw %ax
and $16,%ax
jnz 2f
fsts 4(%esp)
2: ret
1: fld %st(0)
fld1
fsub %st(0),%st(1)

10
src/math/i386/atan.s

@ -2,6 +2,16 @@
.type atan,@function
atan:
fldl 4(%esp)
mov 8(%esp),%eax
add %eax,%eax
cmp $0x00200000,%eax
jb 1f
fld1
fpatan
ret
# subnormal x, return x with underflow
1: fnstsw %ax
and $16,%ax
jnz 2f
fsts 4(%esp)
2: ret

12
src/math/i386/atanf.s

@ -2,6 +2,18 @@
.type atanf,@function
atanf:
flds 4(%esp)
mov 4(%esp),%eax
add %eax,%eax
cmp $0x01000000,%eax
jb 1f
fld1
fpatan
ret
# subnormal x, return x with underflow
1: fnstsw %ax
and $16,%ax
jnz 2f
fld %st(0)
fmul %st(1)
fstps 4(%esp)
2: ret

37
src/math/i386/exp.s

@ -2,7 +2,18 @@
.type expm1f,@function
expm1f:
flds 4(%esp)
jmp 1f
mov 4(%esp),%eax
add %eax,%eax
cmp $0x01000000,%eax
jae 1f
# subnormal x, return x with underflow
fnstsw %ax
and $16,%ax
jnz 2f
fld %st(0)
fmul %st(1)
fstps 4(%esp)
2: ret
.global expm1l
.type expm1l,@function
@ -14,10 +25,32 @@ expm1l:
.type expm1,@function
expm1:
fldl 4(%esp)
mov 8(%esp),%eax
add %eax,%eax
cmp $0x00200000,%eax
jae 1f
# subnormal x, return x with underflow
fnstsw %ax
and $16,%ax
jnz 2f
fsts 4(%esp)
2: ret
1: fldl2e
fmulp
mov $0xc2820000,%eax
push %eax
flds (%esp)
pop %eax
fucomp %st(1)
fnstsw %ax
sahf
fld1
fld %st(1)
jb 1f
# x*log2e < -65, return -1 without underflow
fstp %st(1)
fchs
ret
1: fld %st(1)
fabs
fucom %st(1)
fnstsw %ax

9
src/math/i386/log1p.s

@ -7,9 +7,18 @@ log1p:
fldl 4(%esp)
cmp $0x3fd28f00,%eax
ja 1f
cmp $0x00100000,%eax
jb 2f
fyl2xp1
ret
1: fld1
faddp
fyl2x
ret
# subnormal x, return x with underflow
2: fnstsw %ax
and $16,%ax
jnz 1f
fsts 4(%esp)
fstp %st(1)
1: ret

10
src/math/i386/log1pf.s

@ -7,9 +7,19 @@ log1pf:
flds 4(%esp)
cmp $0x3e940000,%eax
ja 1f
cmp $0x00800000,%eax
jb 2f
fyl2xp1
ret
1: fld1
faddp
fyl2x
ret
# subnormal x, return x with underflow
2: fnstsw %ax
and $16,%ax
jnz 1f
fxch
fmul %st(1)
fstps 4(%esp)
1: ret

Loading…
Cancel
Save