mirror of git://sourceware.org/git/glibc.git
x86: Use "%v" to emit VEX encoded instructions for AVX targets
Legacy encodings of SSE instructions incur AVX-SSE domain transition penalties on some Intel microarchitectures (e.g. Haswell, Broadwell). Using the VEX forms avoids these penatlies and keeps all instructions in the VEX decode domain. Use "%v" sequence to emit the "v" prefix for opcodes when compiling with -mavx. No functional changes intended. Signed-off-by: Uros Bizjak <ubizjak@gmail.com> Reviewed-by: Florian Weimer <fweimer@redhat.com> Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
This commit is contained in:
parent
3014dec3ad
commit
ff8be6152b
|
|
@ -44,13 +44,13 @@ __feclearexcept (int excepts)
|
||||||
unsigned int xnew_exc;
|
unsigned int xnew_exc;
|
||||||
|
|
||||||
/* Get the current MXCSR. */
|
/* Get the current MXCSR. */
|
||||||
__asm__ ("stmxcsr %0" : "=m" (xnew_exc));
|
__asm__ ("%vstmxcsr %0" : "=m" (xnew_exc));
|
||||||
|
|
||||||
/* Clear the relevant bits. */
|
/* Clear the relevant bits. */
|
||||||
xnew_exc &= ~excepts;
|
xnew_exc &= ~excepts;
|
||||||
|
|
||||||
/* Put the new data in effect. */
|
/* Put the new data in effect. */
|
||||||
__asm__ ("ldmxcsr %0" : : "m" (xnew_exc));
|
__asm__ ("%vldmxcsr %0" : : "m" (xnew_exc));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Success. */
|
/* Success. */
|
||||||
|
|
|
||||||
|
|
@ -41,11 +41,11 @@ fedisableexcept (int excepts)
|
||||||
unsigned int xnew_exc;
|
unsigned int xnew_exc;
|
||||||
|
|
||||||
/* Get the current control word. */
|
/* Get the current control word. */
|
||||||
__asm__ ("stmxcsr %0" : "=m" (xnew_exc));
|
__asm__ ("%vstmxcsr %0" : "=m" (xnew_exc));
|
||||||
|
|
||||||
xnew_exc |= excepts << 7;
|
xnew_exc |= excepts << 7;
|
||||||
|
|
||||||
__asm__ ("ldmxcsr %0" : : "m" (xnew_exc));
|
__asm__ ("%vldmxcsr %0" : : "m" (xnew_exc));
|
||||||
}
|
}
|
||||||
|
|
||||||
return old_exc;
|
return old_exc;
|
||||||
|
|
|
||||||
|
|
@ -41,11 +41,11 @@ feenableexcept (int excepts)
|
||||||
unsigned int xnew_exc;
|
unsigned int xnew_exc;
|
||||||
|
|
||||||
/* Get the current control word. */
|
/* Get the current control word. */
|
||||||
__asm__ ("stmxcsr %0" : "=m" (xnew_exc));
|
__asm__ ("%vstmxcsr %0" : "=m" (xnew_exc));
|
||||||
|
|
||||||
xnew_exc &= ~(excepts << 7);
|
xnew_exc &= ~(excepts << 7);
|
||||||
|
|
||||||
__asm__ ("ldmxcsr %0" : : "m" (xnew_exc));
|
__asm__ ("%vldmxcsr %0" : : "m" (xnew_exc));
|
||||||
}
|
}
|
||||||
|
|
||||||
return old_exc;
|
return old_exc;
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,7 @@ __fegetenv (fenv_t *envp)
|
||||||
__asm__ ("fldenv %0" : : "m" (*envp));
|
__asm__ ("fldenv %0" : : "m" (*envp));
|
||||||
|
|
||||||
if (CPU_FEATURE_USABLE (SSE))
|
if (CPU_FEATURE_USABLE (SSE))
|
||||||
__asm__ ("stmxcsr %0" : "=m" (envp->__eip));
|
__asm__ ("%vstmxcsr %0" : "=m" (envp->__eip));
|
||||||
|
|
||||||
/* Success. */
|
/* Success. */
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,6 @@ fegetmode (femode_t *modep)
|
||||||
{
|
{
|
||||||
_FPU_GETCW (modep->__control_word);
|
_FPU_GETCW (modep->__control_word);
|
||||||
if (CPU_FEATURE_USABLE (SSE))
|
if (CPU_FEATURE_USABLE (SSE))
|
||||||
__asm__ ("stmxcsr %0" : "=m" (modep->__mxcsr));
|
__asm__ ("%vstmxcsr %0" : "=m" (modep->__mxcsr));
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -33,12 +33,12 @@ __feholdexcept (fenv_t *envp)
|
||||||
unsigned int xwork;
|
unsigned int xwork;
|
||||||
|
|
||||||
/* Get the current control word. */
|
/* Get the current control word. */
|
||||||
__asm__ ("stmxcsr %0" : "=m" (envp->__eip));
|
__asm__ ("%vstmxcsr %0" : "=m" (envp->__eip));
|
||||||
|
|
||||||
/* Set all exceptions to non-stop and clear them. */
|
/* Set all exceptions to non-stop and clear them. */
|
||||||
xwork = (envp->__eip | 0x1f80) & ~0x3f;
|
xwork = (envp->__eip | 0x1f80) & ~0x3f;
|
||||||
|
|
||||||
__asm__ ("ldmxcsr %0" : : "m" (xwork));
|
__asm__ ("%vldmxcsr %0" : : "m" (xwork));
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
||||||
|
|
@ -80,7 +80,7 @@ __fesetenv (const fenv_t *envp)
|
||||||
if (CPU_FEATURE_USABLE (SSE))
|
if (CPU_FEATURE_USABLE (SSE))
|
||||||
{
|
{
|
||||||
unsigned int mxcsr;
|
unsigned int mxcsr;
|
||||||
__asm__ ("stmxcsr %0" : "=m" (mxcsr));
|
__asm__ ("%vstmxcsr %0" : "=m" (mxcsr));
|
||||||
|
|
||||||
if (envp == FE_DFL_ENV)
|
if (envp == FE_DFL_ENV)
|
||||||
{
|
{
|
||||||
|
|
@ -111,7 +111,7 @@ __fesetenv (const fenv_t *envp)
|
||||||
else
|
else
|
||||||
mxcsr = envp->__eip;
|
mxcsr = envp->__eip;
|
||||||
|
|
||||||
__asm__ ("ldmxcsr %0" : : "m" (mxcsr));
|
__asm__ ("%vldmxcsr %0" : : "m" (mxcsr));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Success. */
|
/* Success. */
|
||||||
|
|
|
||||||
|
|
@ -33,13 +33,13 @@ fesetexcept (int excepts)
|
||||||
{
|
{
|
||||||
/* Get the control word of the SSE unit. */
|
/* Get the control word of the SSE unit. */
|
||||||
unsigned int mxcsr;
|
unsigned int mxcsr;
|
||||||
__asm__ ("stmxcsr %0" : "=m" (mxcsr));
|
__asm__ ("%vstmxcsr %0" : "=m" (mxcsr));
|
||||||
|
|
||||||
/* Set relevant flags. */
|
/* Set relevant flags. */
|
||||||
mxcsr |= excepts;
|
mxcsr |= excepts;
|
||||||
|
|
||||||
/* Put the new data in effect. */
|
/* Put the new data in effect. */
|
||||||
__asm__ ("ldmxcsr %0" : : "m" (mxcsr));
|
__asm__ ("%vldmxcsr %0" : : "m" (mxcsr));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -37,7 +37,7 @@ fesetmode (const femode_t *modep)
|
||||||
if (CPU_FEATURE_USABLE (SSE))
|
if (CPU_FEATURE_USABLE (SSE))
|
||||||
{
|
{
|
||||||
unsigned int mxcsr;
|
unsigned int mxcsr;
|
||||||
__asm__ ("stmxcsr %0" : "=m" (mxcsr));
|
__asm__ ("%vstmxcsr %0" : "=m" (mxcsr));
|
||||||
/* Preserve SSE exception flags but restore other state in
|
/* Preserve SSE exception flags but restore other state in
|
||||||
MXCSR. */
|
MXCSR. */
|
||||||
mxcsr &= FE_ALL_EXCEPT_X86;
|
mxcsr &= FE_ALL_EXCEPT_X86;
|
||||||
|
|
@ -47,7 +47,7 @@ fesetmode (const femode_t *modep)
|
||||||
mxcsr |= FE_ALL_EXCEPT_X86 << 7;
|
mxcsr |= FE_ALL_EXCEPT_X86 << 7;
|
||||||
else
|
else
|
||||||
mxcsr |= modep->__mxcsr & ~FE_ALL_EXCEPT_X86;
|
mxcsr |= modep->__mxcsr & ~FE_ALL_EXCEPT_X86;
|
||||||
__asm__ ("ldmxcsr %0" : : "m" (mxcsr));
|
__asm__ ("%vldmxcsr %0" : : "m" (mxcsr));
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -39,10 +39,10 @@ __fesetround (int round)
|
||||||
{
|
{
|
||||||
unsigned int xcw;
|
unsigned int xcw;
|
||||||
|
|
||||||
__asm__ ("stmxcsr %0" : "=m" (xcw));
|
__asm__ ("%vstmxcsr %0" : "=m" (xcw));
|
||||||
xcw &= ~0x6000;
|
xcw &= ~0x6000;
|
||||||
xcw |= round << 3;
|
xcw |= round << 3;
|
||||||
__asm__ ("ldmxcsr %0" : : "m" (xcw));
|
__asm__ ("%vldmxcsr %0" : : "m" (xcw));
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
||||||
|
|
@ -31,7 +31,7 @@ __feupdateenv (const fenv_t *envp)
|
||||||
|
|
||||||
/* If the CPU supports SSE we test the MXCSR as well. */
|
/* If the CPU supports SSE we test the MXCSR as well. */
|
||||||
if (CPU_FEATURE_USABLE (SSE))
|
if (CPU_FEATURE_USABLE (SSE))
|
||||||
__asm__ ("stmxcsr %0" : "=m" (xtemp));
|
__asm__ ("%vstmxcsr %0" : "=m" (xtemp));
|
||||||
|
|
||||||
temp = (temp | xtemp) & FE_ALL_EXCEPT;
|
temp = (temp | xtemp) & FE_ALL_EXCEPT;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -37,7 +37,7 @@ __fegetexceptflag (fexcept_t *flagp, int excepts)
|
||||||
unsigned int sse_exc;
|
unsigned int sse_exc;
|
||||||
|
|
||||||
/* Get the current MXCSR. */
|
/* Get the current MXCSR. */
|
||||||
__asm__ ("stmxcsr %0" : "=m" (sse_exc));
|
__asm__ ("%vstmxcsr %0" : "=m" (sse_exc));
|
||||||
|
|
||||||
*flagp |= sse_exc & excepts & FE_ALL_EXCEPT;
|
*flagp |= sse_exc & excepts & FE_ALL_EXCEPT;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -50,13 +50,13 @@ __fesetexceptflag (const fexcept_t *flagp, int excepts)
|
||||||
__asm__ ("fldenv %0" : : "m" (temp));
|
__asm__ ("fldenv %0" : : "m" (temp));
|
||||||
|
|
||||||
/* And now similarly for SSE. */
|
/* And now similarly for SSE. */
|
||||||
__asm__ ("stmxcsr %0" : "=m" (mxcsr));
|
__asm__ ("%vstmxcsr %0" : "=m" (mxcsr));
|
||||||
|
|
||||||
/* Clear or set relevant flags. */
|
/* Clear or set relevant flags. */
|
||||||
mxcsr ^= (mxcsr ^ *flagp) & excepts;
|
mxcsr ^= (mxcsr ^ *flagp) & excepts;
|
||||||
|
|
||||||
/* Put the new data in effect. */
|
/* Put the new data in effect. */
|
||||||
__asm__ ("ldmxcsr %0" : : "m" (mxcsr));
|
__asm__ ("%vldmxcsr %0" : : "m" (mxcsr));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -31,7 +31,7 @@ __fetestexcept (int excepts)
|
||||||
|
|
||||||
/* If the CPU supports SSE we test the MXCSR as well. */
|
/* If the CPU supports SSE we test the MXCSR as well. */
|
||||||
if (CPU_FEATURE_USABLE (SSE))
|
if (CPU_FEATURE_USABLE (SSE))
|
||||||
__asm__ ("stmxcsr %0" : "=m" (xtemp));
|
__asm__ ("%vstmxcsr %0" : "=m" (xtemp));
|
||||||
|
|
||||||
return (temp | xtemp) & excepts & FE_ALL_EXCEPT;
|
return (temp | xtemp) & excepts & FE_ALL_EXCEPT;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -43,11 +43,11 @@ __setfpucw (fpu_control_t set)
|
||||||
unsigned int xnew_exc;
|
unsigned int xnew_exc;
|
||||||
|
|
||||||
/* Get the current MXCSR. */
|
/* Get the current MXCSR. */
|
||||||
__asm__ ("stmxcsr %0" : "=m" (xnew_exc));
|
__asm__ ("%vstmxcsr %0" : "=m" (xnew_exc));
|
||||||
|
|
||||||
xnew_exc &= ~((0xc00 << 3) | (FE_ALL_EXCEPT << 7));
|
xnew_exc &= ~((0xc00 << 3) | (FE_ALL_EXCEPT << 7));
|
||||||
xnew_exc |= ((set & 0xc00) << 3) | ((set & FE_ALL_EXCEPT) << 7);
|
xnew_exc |= ((set & 0xc00) << 3) | ((set & FE_ALL_EXCEPT) << 7);
|
||||||
|
|
||||||
__asm__ ("ldmxcsr %0" : : "m" (xnew_exc));
|
__asm__ ("%vldmxcsr %0" : : "m" (xnew_exc));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -18,22 +18,14 @@
|
||||||
need not care for both the 387 and the sse unit, only the one we're
|
need not care for both the 387 and the sse unit, only the one we're
|
||||||
actually using. */
|
actually using. */
|
||||||
|
|
||||||
#if defined __AVX__ || defined SSE2AVX
|
|
||||||
# define STMXCSR "vstmxcsr"
|
|
||||||
# define LDMXCSR "vldmxcsr"
|
|
||||||
#else
|
|
||||||
# define STMXCSR "stmxcsr"
|
|
||||||
# define LDMXCSR "ldmxcsr"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static __always_inline void
|
static __always_inline void
|
||||||
libc_feholdexcept_sse (fenv_t *e)
|
libc_feholdexcept_sse (fenv_t *e)
|
||||||
{
|
{
|
||||||
unsigned int mxcsr;
|
unsigned int mxcsr;
|
||||||
asm (STMXCSR " %0" : "=m" (mxcsr));
|
asm ("%vstmxcsr %0" : "=m" (mxcsr));
|
||||||
e->__mxcsr = mxcsr;
|
e->__mxcsr = mxcsr;
|
||||||
mxcsr = (mxcsr | 0x1f80) & ~0x3f;
|
mxcsr = (mxcsr | 0x1f80) & ~0x3f;
|
||||||
asm volatile (LDMXCSR " %0" : : "m" (mxcsr));
|
asm volatile ("%vldmxcsr %0" : : "m" (mxcsr));
|
||||||
}
|
}
|
||||||
|
|
||||||
static __always_inline void
|
static __always_inline void
|
||||||
|
|
@ -51,9 +43,9 @@ static __always_inline void
|
||||||
libc_fesetround_sse (int r)
|
libc_fesetround_sse (int r)
|
||||||
{
|
{
|
||||||
unsigned int mxcsr;
|
unsigned int mxcsr;
|
||||||
asm (STMXCSR " %0" : "=m" (mxcsr));
|
asm ("%vstmxcsr %0" : "=m" (mxcsr));
|
||||||
mxcsr = (mxcsr & ~0x6000) | (r << 3);
|
mxcsr = (mxcsr & ~0x6000) | (r << 3);
|
||||||
asm volatile (LDMXCSR " %0" : : "m" (mxcsr));
|
asm volatile ("%vldmxcsr %0" : : "m" (mxcsr));
|
||||||
}
|
}
|
||||||
|
|
||||||
static __always_inline void
|
static __always_inline void
|
||||||
|
|
@ -69,10 +61,10 @@ static __always_inline void
|
||||||
libc_feholdexcept_setround_sse (fenv_t *e, int r)
|
libc_feholdexcept_setround_sse (fenv_t *e, int r)
|
||||||
{
|
{
|
||||||
unsigned int mxcsr;
|
unsigned int mxcsr;
|
||||||
asm (STMXCSR " %0" : "=m" (mxcsr));
|
asm ("%vstmxcsr %0" : "=m" (mxcsr));
|
||||||
e->__mxcsr = mxcsr;
|
e->__mxcsr = mxcsr;
|
||||||
mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | (r << 3);
|
mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | (r << 3);
|
||||||
asm volatile (LDMXCSR " %0" : : "m" (mxcsr));
|
asm volatile ("%vldmxcsr %0" : : "m" (mxcsr));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Set both rounding mode and precision. A convenience function for use
|
/* Set both rounding mode and precision. A convenience function for use
|
||||||
|
|
@ -104,7 +96,7 @@ static __always_inline int
|
||||||
libc_fetestexcept_sse (int e)
|
libc_fetestexcept_sse (int e)
|
||||||
{
|
{
|
||||||
unsigned int mxcsr;
|
unsigned int mxcsr;
|
||||||
asm volatile (STMXCSR " %0" : "=m" (mxcsr));
|
asm volatile ("%vstmxcsr %0" : "=m" (mxcsr));
|
||||||
return mxcsr & e & FE_ALL_EXCEPT;
|
return mxcsr & e & FE_ALL_EXCEPT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -119,7 +111,7 @@ libc_fetestexcept_387 (int ex)
|
||||||
static __always_inline void
|
static __always_inline void
|
||||||
libc_fesetenv_sse (fenv_t *e)
|
libc_fesetenv_sse (fenv_t *e)
|
||||||
{
|
{
|
||||||
asm volatile (LDMXCSR " %0" : : "m" (e->__mxcsr));
|
asm volatile ("%vldmxcsr %0" : : "m" (e->__mxcsr));
|
||||||
}
|
}
|
||||||
|
|
||||||
static __always_inline void
|
static __always_inline void
|
||||||
|
|
@ -137,13 +129,13 @@ static __always_inline int
|
||||||
libc_feupdateenv_test_sse (fenv_t *e, int ex)
|
libc_feupdateenv_test_sse (fenv_t *e, int ex)
|
||||||
{
|
{
|
||||||
unsigned int mxcsr, old_mxcsr, cur_ex;
|
unsigned int mxcsr, old_mxcsr, cur_ex;
|
||||||
asm volatile (STMXCSR " %0" : "=m" (mxcsr));
|
asm volatile ("%vstmxcsr %0" : "=m" (mxcsr));
|
||||||
cur_ex = mxcsr & FE_ALL_EXCEPT;
|
cur_ex = mxcsr & FE_ALL_EXCEPT;
|
||||||
|
|
||||||
/* Merge current exceptions with the old environment. */
|
/* Merge current exceptions with the old environment. */
|
||||||
old_mxcsr = e->__mxcsr;
|
old_mxcsr = e->__mxcsr;
|
||||||
mxcsr = old_mxcsr | cur_ex;
|
mxcsr = old_mxcsr | cur_ex;
|
||||||
asm volatile (LDMXCSR " %0" : : "m" (mxcsr));
|
asm volatile ("%vldmxcsr %0" : : "m" (mxcsr));
|
||||||
|
|
||||||
/* Raise SIGFPE for any new exceptions since the hold. Expect that
|
/* Raise SIGFPE for any new exceptions since the hold. Expect that
|
||||||
the normal environment has all exceptions masked. */
|
the normal environment has all exceptions masked. */
|
||||||
|
|
@ -189,10 +181,10 @@ static __always_inline void
|
||||||
libc_feholdsetround_sse (fenv_t *e, int r)
|
libc_feholdsetround_sse (fenv_t *e, int r)
|
||||||
{
|
{
|
||||||
unsigned int mxcsr;
|
unsigned int mxcsr;
|
||||||
asm (STMXCSR " %0" : "=m" (mxcsr));
|
asm ("%vstmxcsr %0" : "=m" (mxcsr));
|
||||||
e->__mxcsr = mxcsr;
|
e->__mxcsr = mxcsr;
|
||||||
mxcsr = (mxcsr & ~0x6000) | (r << 3);
|
mxcsr = (mxcsr & ~0x6000) | (r << 3);
|
||||||
asm volatile (LDMXCSR " %0" : : "m" (mxcsr));
|
asm volatile ("%vldmxcsr %0" : : "m" (mxcsr));
|
||||||
}
|
}
|
||||||
|
|
||||||
static __always_inline void
|
static __always_inline void
|
||||||
|
|
@ -223,9 +215,9 @@ static __always_inline void
|
||||||
libc_feresetround_sse (fenv_t *e)
|
libc_feresetround_sse (fenv_t *e)
|
||||||
{
|
{
|
||||||
unsigned int mxcsr;
|
unsigned int mxcsr;
|
||||||
asm (STMXCSR " %0" : "=m" (mxcsr));
|
asm ("%vstmxcsr %0" : "=m" (mxcsr));
|
||||||
mxcsr = (mxcsr & ~0x6000) | (e->__mxcsr & 0x6000);
|
mxcsr = (mxcsr & ~0x6000) | (e->__mxcsr & 0x6000);
|
||||||
asm volatile (LDMXCSR " %0" : : "m" (mxcsr));
|
asm volatile ("%vldmxcsr %0" : : "m" (mxcsr));
|
||||||
}
|
}
|
||||||
|
|
||||||
static __always_inline void
|
static __always_inline void
|
||||||
|
|
@ -315,13 +307,13 @@ static __always_inline void
|
||||||
libc_feholdexcept_setround_sse_ctx (struct rm_ctx *ctx, int r)
|
libc_feholdexcept_setround_sse_ctx (struct rm_ctx *ctx, int r)
|
||||||
{
|
{
|
||||||
unsigned int mxcsr, new_mxcsr;
|
unsigned int mxcsr, new_mxcsr;
|
||||||
asm (STMXCSR " %0" : "=m" (mxcsr));
|
asm ("%vstmxcsr %0" : "=m" (mxcsr));
|
||||||
new_mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | (r << 3);
|
new_mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | (r << 3);
|
||||||
|
|
||||||
ctx->env.__mxcsr = mxcsr;
|
ctx->env.__mxcsr = mxcsr;
|
||||||
if (__glibc_unlikely (mxcsr != new_mxcsr))
|
if (__glibc_unlikely (mxcsr != new_mxcsr))
|
||||||
{
|
{
|
||||||
asm volatile (LDMXCSR " %0" : : "m" (new_mxcsr));
|
asm volatile ("%vldmxcsr %0" : : "m" (new_mxcsr));
|
||||||
ctx->updated_status = true;
|
ctx->updated_status = true;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|
@ -412,13 +404,13 @@ libc_feholdsetround_sse_ctx (struct rm_ctx *ctx, int r)
|
||||||
{
|
{
|
||||||
unsigned int mxcsr, new_mxcsr;
|
unsigned int mxcsr, new_mxcsr;
|
||||||
|
|
||||||
asm (STMXCSR " %0" : "=m" (mxcsr));
|
asm ("%vstmxcsr %0" : "=m" (mxcsr));
|
||||||
new_mxcsr = (mxcsr & ~0x6000) | (r << 3);
|
new_mxcsr = (mxcsr & ~0x6000) | (r << 3);
|
||||||
|
|
||||||
ctx->env.__mxcsr = mxcsr;
|
ctx->env.__mxcsr = mxcsr;
|
||||||
if (__glibc_unlikely (new_mxcsr != mxcsr))
|
if (__glibc_unlikely (new_mxcsr != mxcsr))
|
||||||
{
|
{
|
||||||
asm volatile (LDMXCSR " %0" : : "m" (new_mxcsr));
|
asm volatile ("%vldmxcsr %0" : : "m" (new_mxcsr));
|
||||||
ctx->updated_status = true;
|
ctx->updated_status = true;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|
|
||||||
|
|
@ -39,15 +39,9 @@ typedef unsigned int UTItype __attribute__ ((mode (TI)));
|
||||||
|
|
||||||
# define FP_RND_MASK 0x6000
|
# define FP_RND_MASK 0x6000
|
||||||
|
|
||||||
# ifdef __AVX__
|
|
||||||
# define AVX_INSN_PREFIX "v"
|
|
||||||
# else
|
|
||||||
# define AVX_INSN_PREFIX ""
|
|
||||||
# endif
|
|
||||||
|
|
||||||
# define FP_INIT_ROUNDMODE \
|
# define FP_INIT_ROUNDMODE \
|
||||||
do { \
|
do { \
|
||||||
__asm__ __volatile__ (AVX_INSN_PREFIX "stmxcsr\t%0" : "=m" (_fcw)); \
|
__asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (_fcw)); \
|
||||||
} while (0)
|
} while (0)
|
||||||
#else
|
#else
|
||||||
# define _FP_W_TYPE_SIZE 32
|
# define _FP_W_TYPE_SIZE 32
|
||||||
|
|
|
||||||
|
|
@ -29,14 +29,14 @@ static uint32_t
|
||||||
get_sse_mxcsr (void)
|
get_sse_mxcsr (void)
|
||||||
{
|
{
|
||||||
uint32_t temp;
|
uint32_t temp;
|
||||||
__asm__ __volatile__ ("stmxcsr %0" : "=m" (temp));
|
__asm__ __volatile__ ("%vstmxcsr %0" : "=m" (temp));
|
||||||
return temp;
|
return temp;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
set_sse_mxcsr (uint32_t val)
|
set_sse_mxcsr (uint32_t val)
|
||||||
{
|
{
|
||||||
__asm__ __volatile__ ("ldmxcsr %0" : : "m" (val));
|
__asm__ __volatile__ ("%vldmxcsr %0" : : "m" (val));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
|
||||||
|
|
@ -38,13 +38,13 @@ __feclearexcept (int excepts)
|
||||||
__asm__ ("fldenv %0" : : "m" (temp));
|
__asm__ ("fldenv %0" : : "m" (temp));
|
||||||
|
|
||||||
/* And the same procedure for SSE. */
|
/* And the same procedure for SSE. */
|
||||||
__asm__ ("stmxcsr %0" : "=m" (mxcsr));
|
__asm__ ("%vstmxcsr %0" : "=m" (mxcsr));
|
||||||
|
|
||||||
/* Clear the relevant bits. */
|
/* Clear the relevant bits. */
|
||||||
mxcsr &= ~excepts;
|
mxcsr &= ~excepts;
|
||||||
|
|
||||||
/* And put them into effect. */
|
/* And put them into effect. */
|
||||||
__asm__ ("ldmxcsr %0" : : "m" (mxcsr));
|
__asm__ ("%vldmxcsr %0" : : "m" (mxcsr));
|
||||||
|
|
||||||
/* Success. */
|
/* Success. */
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
||||||
|
|
@ -35,11 +35,11 @@ fedisableexcept (int excepts)
|
||||||
__asm__ ("fldcw %0" : : "m" (new_exc));
|
__asm__ ("fldcw %0" : : "m" (new_exc));
|
||||||
|
|
||||||
/* And now the same for the SSE MXCSR register. */
|
/* And now the same for the SSE MXCSR register. */
|
||||||
__asm__ ("stmxcsr %0" : "=m" (new));
|
__asm__ ("%vstmxcsr %0" : "=m" (new));
|
||||||
|
|
||||||
/* The SSE exception masks are shifted by 7 bits. */
|
/* The SSE exception masks are shifted by 7 bits. */
|
||||||
new |= excepts << 7;
|
new |= excepts << 7;
|
||||||
__asm__ ("ldmxcsr %0" : : "m" (new));
|
__asm__ ("%vldmxcsr %0" : : "m" (new));
|
||||||
|
|
||||||
return old_exc;
|
return old_exc;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -35,11 +35,11 @@ feenableexcept (int excepts)
|
||||||
__asm__ ("fldcw %0" : : "m" (new_exc));
|
__asm__ ("fldcw %0" : : "m" (new_exc));
|
||||||
|
|
||||||
/* And now the same for the SSE MXCSR register. */
|
/* And now the same for the SSE MXCSR register. */
|
||||||
__asm__ ("stmxcsr %0" : "=m" (new));
|
__asm__ ("%vstmxcsr %0" : "=m" (new));
|
||||||
|
|
||||||
/* The SSE exception masks are shifted by 7 bits. */
|
/* The SSE exception masks are shifted by 7 bits. */
|
||||||
new &= ~(excepts << 7);
|
new &= ~(excepts << 7);
|
||||||
__asm__ ("ldmxcsr %0" : : "m" (new));
|
__asm__ ("%vldmxcsr %0" : : "m" (new));
|
||||||
|
|
||||||
return old_exc;
|
return old_exc;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@ __fegetenv (fenv_t *envp)
|
||||||
/* fnstenv changes the exception mask, so load back the
|
/* fnstenv changes the exception mask, so load back the
|
||||||
stored environment. */
|
stored environment. */
|
||||||
"fldenv %0\n"
|
"fldenv %0\n"
|
||||||
"stmxcsr %1" : "=m" (*envp), "=m" (envp->__mxcsr));
|
"%vstmxcsr %1" : "=m" (*envp), "=m" (envp->__mxcsr));
|
||||||
|
|
||||||
/* Success. */
|
/* Success. */
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
||||||
|
|
@ -23,6 +23,6 @@ int
|
||||||
fegetmode (femode_t *modep)
|
fegetmode (femode_t *modep)
|
||||||
{
|
{
|
||||||
_FPU_GETCW (modep->__control_word);
|
_FPU_GETCW (modep->__control_word);
|
||||||
__asm__ ("stmxcsr %0" : "=m" (modep->__mxcsr));
|
__asm__ ("%vstmxcsr %0" : "=m" (modep->__mxcsr));
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -26,13 +26,13 @@ __feholdexcept (fenv_t *envp)
|
||||||
/* Store the environment. Recall that fnstenv has a side effect of
|
/* Store the environment. Recall that fnstenv has a side effect of
|
||||||
masking all exceptions. Then clear all exceptions. */
|
masking all exceptions. Then clear all exceptions. */
|
||||||
__asm__ ("fnstenv %0\n\t"
|
__asm__ ("fnstenv %0\n\t"
|
||||||
"stmxcsr %1\n\t"
|
"%vstmxcsr %1\n\t"
|
||||||
"fnclex"
|
"fnclex"
|
||||||
: "=m" (*envp), "=m" (envp->__mxcsr));
|
: "=m" (*envp), "=m" (envp->__mxcsr));
|
||||||
|
|
||||||
/* Set the SSE MXCSR register. */
|
/* Set the SSE MXCSR register. */
|
||||||
mxcsr = (envp->__mxcsr | 0x1f80) & ~0x3f;
|
mxcsr = (envp->__mxcsr | 0x1f80) & ~0x3f;
|
||||||
__asm__ ("ldmxcsr %0" : : "m" (mxcsr));
|
__asm__ ("%vldmxcsr %0" : : "m" (mxcsr));
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -36,7 +36,7 @@ __fesetenv (const fenv_t *envp)
|
||||||
Therefore, we get the current environment and replace the values
|
Therefore, we get the current environment and replace the values
|
||||||
we want to use from the environment specified by the parameter. */
|
we want to use from the environment specified by the parameter. */
|
||||||
__asm__ ("fnstenv %0\n"
|
__asm__ ("fnstenv %0\n"
|
||||||
"stmxcsr %1" : "=m" (temp), "=m" (temp.__mxcsr));
|
"%vstmxcsr %1" : "=m" (temp), "=m" (temp.__mxcsr));
|
||||||
|
|
||||||
if (envp == FE_DFL_ENV)
|
if (envp == FE_DFL_ENV)
|
||||||
{
|
{
|
||||||
|
|
@ -104,7 +104,7 @@ __fesetenv (const fenv_t *envp)
|
||||||
}
|
}
|
||||||
|
|
||||||
__asm__ ("fldenv %0\n"
|
__asm__ ("fldenv %0\n"
|
||||||
"ldmxcsr %1" : : "m" (temp), "m" (temp.__mxcsr));
|
"%vldmxcsr %1" : : "m" (temp), "m" (temp.__mxcsr));
|
||||||
|
|
||||||
/* Success. */
|
/* Success. */
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
||||||
|
|
@ -23,9 +23,9 @@ fesetexcept (int excepts)
|
||||||
{
|
{
|
||||||
unsigned int mxcsr;
|
unsigned int mxcsr;
|
||||||
|
|
||||||
__asm__ ("stmxcsr %0" : "=m" (mxcsr));
|
__asm__ ("%vstmxcsr %0" : "=m" (mxcsr));
|
||||||
mxcsr |= excepts & FE_ALL_EXCEPT;
|
mxcsr |= excepts & FE_ALL_EXCEPT;
|
||||||
__asm__ ("ldmxcsr %0" : : "m" (mxcsr));
|
__asm__ ("%vldmxcsr %0" : : "m" (mxcsr));
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -28,7 +28,7 @@ fesetmode (const femode_t *modep)
|
||||||
{
|
{
|
||||||
fpu_control_t cw;
|
fpu_control_t cw;
|
||||||
unsigned int mxcsr;
|
unsigned int mxcsr;
|
||||||
__asm__ ("stmxcsr %0" : "=m" (mxcsr));
|
__asm__ ("%vstmxcsr %0" : "=m" (mxcsr));
|
||||||
/* Preserve SSE exception flags but restore other state in
|
/* Preserve SSE exception flags but restore other state in
|
||||||
MXCSR. */
|
MXCSR. */
|
||||||
mxcsr &= FE_ALL_EXCEPT_X86;
|
mxcsr &= FE_ALL_EXCEPT_X86;
|
||||||
|
|
@ -45,6 +45,6 @@ fesetmode (const femode_t *modep)
|
||||||
mxcsr |= modep->__mxcsr & ~FE_ALL_EXCEPT_X86;
|
mxcsr |= modep->__mxcsr & ~FE_ALL_EXCEPT_X86;
|
||||||
}
|
}
|
||||||
_FPU_SETCW (cw);
|
_FPU_SETCW (cw);
|
||||||
__asm__ ("ldmxcsr %0" : : "m" (mxcsr));
|
__asm__ ("%vldmxcsr %0" : : "m" (mxcsr));
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -36,10 +36,10 @@ __fesetround (int round)
|
||||||
|
|
||||||
/* And now the MSCSR register for SSE, the precision is at different bit
|
/* And now the MSCSR register for SSE, the precision is at different bit
|
||||||
positions in the different units, we need to shift it 3 bits. */
|
positions in the different units, we need to shift it 3 bits. */
|
||||||
asm ("stmxcsr %0" : "=m" (mxcsr));
|
asm ("%vstmxcsr %0" : "=m" (mxcsr));
|
||||||
mxcsr &= ~ 0x6000;
|
mxcsr &= ~ 0x6000;
|
||||||
mxcsr |= round << 3;
|
mxcsr |= round << 3;
|
||||||
asm ("ldmxcsr %0" : : "m" (mxcsr));
|
asm ("%vldmxcsr %0" : : "m" (mxcsr));
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@ __feupdateenv (const fenv_t *envp)
|
||||||
unsigned int xtemp;
|
unsigned int xtemp;
|
||||||
|
|
||||||
/* Save current exceptions. */
|
/* Save current exceptions. */
|
||||||
__asm__ ("fnstsw %0\n\tstmxcsr %1" : "=m" (temp), "=m" (xtemp));
|
__asm__ ("fnstsw %0\n\t%vstmxcsr %1" : "=m" (temp), "=m" (xtemp));
|
||||||
temp = (temp | xtemp) & FE_ALL_EXCEPT;
|
temp = (temp | xtemp) & FE_ALL_EXCEPT;
|
||||||
|
|
||||||
/* Install new environment. */
|
/* Install new environment. */
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,7 @@ fegetexceptflag (fexcept_t *flagp, int excepts)
|
||||||
|
|
||||||
/* Get the current exceptions for the x87 FPU and SSE unit. */
|
/* Get the current exceptions for the x87 FPU and SSE unit. */
|
||||||
__asm__ ("fnstsw %0\n"
|
__asm__ ("fnstsw %0\n"
|
||||||
"stmxcsr %1" : "=m" (temp), "=m" (mxscr));
|
"%vstmxcsr %1" : "=m" (temp), "=m" (mxscr));
|
||||||
|
|
||||||
*flagp = (temp | mxscr) & FE_ALL_EXCEPT & excepts;
|
*flagp = (temp | mxscr) & FE_ALL_EXCEPT & excepts;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,7 @@ __feraiseexcept (int excepts)
|
||||||
/* One example of an invalid operation is 0.0 / 0.0. */
|
/* One example of an invalid operation is 0.0 / 0.0. */
|
||||||
float f = 0.0;
|
float f = 0.0;
|
||||||
|
|
||||||
__asm__ __volatile__ ("divss %0, %0 " : "+x" (f));
|
__asm__ __volatile__ ("%vdivss %0, %0 " : "+x" (f));
|
||||||
(void) &f;
|
(void) &f;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -43,7 +43,7 @@ __feraiseexcept (int excepts)
|
||||||
float f = 1.0;
|
float f = 1.0;
|
||||||
float g = 0.0;
|
float g = 0.0;
|
||||||
|
|
||||||
__asm__ __volatile__ ("divss %1, %0" : "+x" (f) : "x" (g));
|
__asm__ __volatile__ ("%vdivss %1, %0" : "+x" (f) : "x" (g));
|
||||||
(void) &f;
|
(void) &f;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -44,13 +44,13 @@ fesetexceptflag (const fexcept_t *flagp, int excepts)
|
||||||
__asm__ ("fldenv %0" : : "m" (temp));
|
__asm__ ("fldenv %0" : : "m" (temp));
|
||||||
|
|
||||||
/* And now similarly for SSE. */
|
/* And now similarly for SSE. */
|
||||||
__asm__ ("stmxcsr %0" : "=m" (mxcsr));
|
__asm__ ("%vstmxcsr %0" : "=m" (mxcsr));
|
||||||
|
|
||||||
/* Clear or set relevant flags. */
|
/* Clear or set relevant flags. */
|
||||||
mxcsr ^= (mxcsr ^ *flagp) & excepts;
|
mxcsr ^= (mxcsr ^ *flagp) & excepts;
|
||||||
|
|
||||||
/* Put the new data in effect. */
|
/* Put the new data in effect. */
|
||||||
__asm__ ("ldmxcsr %0" : : "m" (mxcsr));
|
__asm__ ("%vldmxcsr %0" : : "m" (mxcsr));
|
||||||
|
|
||||||
/* Success. */
|
/* Success. */
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,7 @@ __fetestexcept (int excepts)
|
||||||
|
|
||||||
/* Get current exceptions. */
|
/* Get current exceptions. */
|
||||||
__asm__ ("fnstsw %0\n"
|
__asm__ ("fnstsw %0\n"
|
||||||
"stmxcsr %1" : "=m" (temp), "=m" (mxscr));
|
"%vstmxcsr %1" : "=m" (temp), "=m" (mxscr));
|
||||||
|
|
||||||
return (temp | mxscr) & excepts & FE_ALL_EXCEPT;
|
return (temp | mxscr) & excepts & FE_ALL_EXCEPT;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue