math: fix x86 asin, atan, exp, log1p to raise underflow

underflow is raised by an inexact subnormal float store, since subnormal operations are slow, check the underflow flag and skip the store if it's already raised
13 years ago · 31c5fb80b9
6 changed files with 98 additions and 3 deletions
--- a/src/math/i386/asin.s
+++ b/src/math/i386/asin.s
@ -2,7 +2,18 @@
 .type asinf,@function
 asinf:
 	flds 4(%esp)
-	jmp 1f
+	mov 4(%esp),%eax
+	add %eax,%eax
+	cmp $0x01000000,%eax
+	jae 1f
+		# subnormal x, return x with underflow
+	fnstsw %ax
+	and $16,%ax
+	jnz 2f
+	fld %st(0)
+	fmul %st(1)
+	fstps 4(%esp)
+2:	ret

 .global asinl
 .type asinl,@function
@ -14,6 +25,16 @@ asinl:
 .type asin,@function
 asin:
 	fldl 4(%esp)
+	mov 8(%esp),%eax
+	add %eax,%eax
+	cmp $0x00200000,%eax
+	jae 1f
+		# subnormal x, return x with underflow
+	fnstsw %ax
+	and $16,%ax
+	jnz 2f
+	fsts 4(%esp)
+2:	ret
 1:	fld %st(0)
 	fld1
 	fsub %st(0),%st(1)
--- a/src/math/i386/atan.s
+++ b/src/math/i386/atan.s
@ -2,6 +2,16 @@
 .type atan,@function
 atan:
 	fldl 4(%esp)
+	mov 8(%esp),%eax
+	add %eax,%eax
+	cmp $0x00200000,%eax
+	jb 1f
 	fld1
 	fpatan
 	ret
+		# subnormal x, return x with underflow
+1:	fnstsw %ax
+	and $16,%ax
+	jnz 2f
+	fsts 4(%esp)
+2:	ret
--- a/src/math/i386/atanf.s
+++ b/src/math/i386/atanf.s
@ -2,6 +2,18 @@
 .type atanf,@function
 atanf:
 	flds 4(%esp)
+	mov 4(%esp),%eax
+	add %eax,%eax
+	cmp $0x01000000,%eax
+	jb 1f
 	fld1
 	fpatan
 	ret
+		# subnormal x, return x with underflow
+1:	fnstsw %ax
+	and $16,%ax
+	jnz 2f
+	fld %st(0)
+	fmul %st(1)
+	fstps 4(%esp)
+2:	ret
--- a/src/math/i386/exp.s
+++ b/src/math/i386/exp.s
@ -2,7 +2,18 @@
 .type expm1f,@function
 expm1f:
 	flds 4(%esp)
-	jmp 1f
+	mov 4(%esp),%eax
+	add %eax,%eax
+	cmp $0x01000000,%eax
+	jae 1f
+		# subnormal x, return x with underflow
+	fnstsw %ax
+	and $16,%ax
+	jnz 2f
+	fld %st(0)
+	fmul %st(1)
+	fstps 4(%esp)
+2:	ret

 .global expm1l
 .type expm1l,@function
@ -14,10 +25,32 @@ expm1l:
 .type expm1,@function
 expm1:
 	fldl 4(%esp)
+	mov 8(%esp),%eax
+	add %eax,%eax
+	cmp $0x00200000,%eax
+	jae 1f
+		# subnormal x, return x with underflow
+	fnstsw %ax
+	and $16,%ax
+	jnz 2f
+	fsts 4(%esp)
+2:	ret
 1:	fldl2e
 	fmulp
+	mov $0xc2820000,%eax
+	push %eax
+	flds (%esp)
+	pop %eax
+	fucomp %st(1)
+	fnstsw %ax
+	sahf
 	fld1
-	fld %st(1)
+	jb 1f
+		# x*log2e < -65, return -1 without underflow
+	fstp %st(1)
+	fchs
+	ret
+1:	fld %st(1)
 	fabs
 	fucom %st(1)
 	fnstsw %ax
--- a/src/math/i386/log1p.s
+++ b/src/math/i386/log1p.s
@ -7,9 +7,18 @@ log1p:
 	fldl 4(%esp)
 	cmp $0x3fd28f00,%eax
 	ja 1f
+	cmp $0x00100000,%eax
+	jb 2f
 	fyl2xp1
 	ret
 1:	fld1
 	faddp
 	fyl2x
 	ret
+		# subnormal x, return x with underflow
+2:	fnstsw %ax
+	and $16,%ax
+	jnz 1f
+	fsts 4(%esp)
+	fstp %st(1)
+1:	ret
--- a/src/math/i386/log1pf.s
+++ b/src/math/i386/log1pf.s
@ -7,9 +7,19 @@ log1pf:
 	flds 4(%esp)
 	cmp $0x3e940000,%eax
 	ja 1f
+	cmp $0x00800000,%eax
+	jb 2f
 	fyl2xp1
 	ret
 1:	fld1
 	faddp
 	fyl2x
 	ret
+		# subnormal x, return x with underflow
+2:	fnstsw %ax
+	and $16,%ax
+	jnz 1f
+	fxch
+	fmul %st(1)
+	fstps 4(%esp)
+1:	ret