Browse Source

x86_64: add single instruction fma

fma is only available on recent x86_64 cpus and it is much faster than
a software fma, so this should be done with a runtime check, however
that requires more changes, this patch just adds the code so it can be
tested when musl is compiled with -mfma or -mfma4.
master
Szabolcs Nagy 8 years ago
committed by Rich Felker
parent
commit
e901613888
  1. 23
      src/math/x32/fma.c
  2. 23
      src/math/x32/fmaf.c
  3. 23
      src/math/x86_64/fma.c
  4. 23
      src/math/x86_64/fmaf.c

23
src/math/x32/fma.c

@ -0,0 +1,23 @@
#include <math.h>
#if __FMA__
double fma(double x, double y, double z)
{
__asm__ ("vfmadd132sd %1, %2, %0" : "+x" (x) : "x" (y), "x" (z));
return x;
}
#elif __FMA4__
double fma(double x, double y, double z)
{
__asm__ ("vfmaddsd %3, %2, %1, %0" : "=x" (x) : "x" (x), "x" (y), "x" (z));
return x;
}
#else
#include "../fma.c"
#endif

23
src/math/x32/fmaf.c

@ -0,0 +1,23 @@
#include <math.h>
#if __FMA__
float fmaf(float x, float y, float z)
{
__asm__ ("vfmadd132ss %1, %2, %0" : "+x" (x) : "x" (y), "x" (z));
return x;
}
#elif __FMA4__
float fmaf(float x, float y, float z)
{
__asm__ ("vfmaddss %3, %2, %1, %0" : "=x" (x) : "x" (x), "x" (y), "x" (z));
return x;
}
#else
#include "../fmaf.c"
#endif

23
src/math/x86_64/fma.c

@ -0,0 +1,23 @@
#include <math.h>
#if __FMA__
double fma(double x, double y, double z)
{
__asm__ ("vfmadd132sd %1, %2, %0" : "+x" (x) : "x" (y), "x" (z));
return x;
}
#elif __FMA4__
double fma(double x, double y, double z)
{
__asm__ ("vfmaddsd %3, %2, %1, %0" : "=x" (x) : "x" (x), "x" (y), "x" (z));
return x;
}
#else
#include "../fma.c"
#endif

23
src/math/x86_64/fmaf.c

@ -0,0 +1,23 @@
#include <math.h>
#if __FMA__
float fmaf(float x, float y, float z)
{
__asm__ ("vfmadd132ss %1, %2, %0" : "+x" (x) : "x" (y), "x" (z));
return x;
}
#elif __FMA4__
float fmaf(float x, float y, float z)
{
__asm__ ("vfmaddss %3, %2, %1, %0" : "=x" (x) : "x" (x), "x" (y), "x" (z));
return x;
}
#else
#include "../fmaf.c"
#endif
Loading…
Cancel
Save