Browse Source

math: fix exp2l asm on x86 (raise underflow correctly)

there were two problems:
* omitted underflow on subnormal results: exp2l(-16383.5) was calculated
as sqrt(2)*2^-16384, the last bits of sqrt(2) are zero so the down scaling
does not underflow eventhough the result is in subnormal range
* spurious underflow for subnormal inputs: exp2l(0x1p-16400) was evaluated
as f2xm1(x)+1 and f2xm1 raised underflow (because inexact subnormal result)

the first issue is fixed by raising underflow manually if x is in
(-32768,-16382] and not integer (x-0x1p63+0x1p63 != x)

the second issue is fixed by treating x in (-0x1p64,0x1p64) specially

for these fixes the special case handling was completely rewritten
rs-1.0
Szabolcs Nagy 13 years ago
parent
commit
07039ed856
  1. 70
      src/math/i386/exp.s
  2. 75
      src/math/x86_64/exp2l.s

70
src/math/i386/exp.s

@ -95,42 +95,32 @@ exp:
.type exp2,@function
exp2:
fldl 4(%esp)
1: pushl $0x467ff000
flds (%esp) # 16380
xorl %eax,%eax
pushl $0x80000000
push %eax
fld %st(1)
fabs
fucomp %st(1)
fnstsw
fstp %st(0)
sahf
ja 3f # |x| > 16380
jp 2f # x is nan (avoid invalid except in fistp)
1: sub $12,%esp
fld %st(0)
fistpl 8(%esp)
fildl 8(%esp)
fxch %st(1)
fsub %st(1)
mov $0x3fff,%eax
add %eax,8(%esp)
f2xm1
fld1
faddp # 2^(x-rint(x))
fldt (%esp) # 2^rint(x)
fmulp
fstp %st(1)
2: add $12,%esp
ret
3: fld %st(0)
fstpt (%esp)
fld1
mov 8(%esp),%ax
and $0x7fff,%ax
cmp $0x7fff,%ax
je 1f # x = +-inf
cmp $0x3fff+13,%ax
jb 4f # |x| < 8192
cmp $0x3fff+15,%ax
jae 3f # |x| >= 32768
fsts (%esp)
cmpl $0xc67ff800,(%esp)
jb 2f # x > -16382
movl $0x5f000000,(%esp)
flds (%esp) # 0x1p63
fld %st(1)
fsub %st(1)
faddp
fucomp %st(1)
fnstsw
sahf
je 2f # x - 0x1p63 + 0x1p63 == x
movl $1,(%esp)
flds (%esp) # 0x1p-149
fdiv %st(1)
fstps (%esp) # raise underflow
2: fld1
fld %st(1)
frndint
fxch %st(2)
@ -141,3 +131,19 @@ exp2:
fstp %st(1)
add $12,%esp
ret
3: xor %eax,%eax
4: cmp $0x3fff-64,%ax
fld1
jb 1b # |x| < 0x1p-64
fstpt (%esp)
fistl 8(%esp)
fildl 8(%esp)
fsubrp %st(1)
addl $0x3fff,8(%esp)
f2xm1
fld1
faddp # 2^(x-rint(x))
fldt (%esp) # 2^rint(x)
fmulp
add $12,%esp
ret

75
src/math/x86_64/exp2l.s

@ -26,44 +26,32 @@ expm1l:
.type exp2l,@function
exp2l:
fldt 8(%rsp)
1: mov $0x467ff000,%eax
mov %eax,-16(%rsp)
mov $0x80000000,%eax
mov %eax,-20(%rsp)
xor %eax,%eax
mov %eax,-24(%rsp)
flds -16(%rsp) # 16380
1: fld %st(0)
sub $16,%rsp
fstpt (%rsp)
mov 8(%rsp),%ax
and $0x7fff,%ax
cmp $0x3fff+13,%ax
jb 4f # |x| < 8192
cmp $0x3fff+15,%ax
jae 3f # |x| >= 32768
fsts (%rsp)
cmpl $0xc67ff800,(%rsp)
jb 2f # x > -16382
movl $0x5f000000,(%rsp)
flds (%rsp) # 0x1p63
fld %st(1)
fabs
fucom %st(1)
fsub %st(1)
faddp
fucomp %st(1)
fnstsw
fstp %st(0)
fstp %st(0)
sahf
ja 3f # |x| > 16380
jp 2f # x is nan (avoid invalid except in fistp)
fld %st(0)
fistpl -16(%rsp)
fildl -16(%rsp)
fxch %st(1)
fsub %st(1)
mov $0x3fff,%eax
add %eax,-16(%rsp)
f2xm1
fld1
faddp # 2^(x-rint(x))
fldt -24(%rsp) # 2^rint(x)
fmulp
2: fstp %st(1)
ret
3: fld %st(0)
fstpt -24(%rsp)
fld1
mov -15(%rsp),%ax
and $0x7fff,%ax
cmp $0x7fff,%ax
je 1f # x = +-inf
je 2f # x - 0x1p63 + 0x1p63 == x
movl $1,(%rsp)
flds (%rsp) # 0x1p-149
fdiv %st(1)
fstps (%rsp) # raise underflow
2: fld1
fld %st(1)
frndint
fxch %st(2)
@ -72,4 +60,21 @@ exp2l:
faddp # 2^(x-rint(x))
1: fscale
fstp %st(1)
add $16,%rsp
ret
3: xor %eax,%eax
4: cmp $0x3fff-64,%ax
fld1
jb 1b # |x| < 0x1p-64
fstpt (%rsp)
fistl 8(%rsp)
fildl 8(%rsp)
fsubrp %st(1)
addl $0x3fff,8(%rsp)
f2xm1
fld1
faddp # 2^(x-rint(x))
fldt (%rsp) # 2^rint(x)
fmulp
add $16,%rsp
ret

Loading…
Cancel
Save