mirror of git://sourceware.org/git/glibc.git
i386: Replace assembly versions of e_log2f with generic e_log2f.c
This patch replaces i386 assembly versions of e_log2f with generic
e_log2f.c. For workload-spec2017.wrf, on Nehalem, it improves
performance by:
Before After Improvement
reciprocal-throughput 92.3845 30.8752 199%
latency 112.855 54.8645 105%
On Skylake, it improves performance by:
Before After Improvement
reciprocal-throughput 98.7488 22.7507 334%
latency 118.01 51.6083 128%
On IvyBridge with --disable-multi-arch, it improves performance by:
Before After Improvement
reciprocal-throughput 106.635 28.8596 269%
latency 129.888 56.9187 128%
* sysdeps/i386/fpu/e_log2f.S: Removed.
* sysdeps/i386/fpu/e_log2f_data.c: Likewise.
* sysdeps/i386/fpu/w_log2f.c: Likewise.
* sysdeps/i386/fpu/libm-test-ulps: Updated for generic e_log2f.c.
* sysdeps/i386/i686/fpu/multiarch/libm-test-ulps: Likewise.
* sysdeps/i386/i686/fpu/multiarch/Makefile (libm-sysdep_routines):
Add e_log2f-sse2.
(CFLAGS-e_log2f-sse2.c): New.
* sysdeps/i386/i686/fpu/multiarch/e_log2f-sse2.c: New file.
* sysdeps/i386/i686/fpu/multiarch/e_log2f.c: Likewise.
This commit is contained in:
parent
80bb593563
commit
6089a3ee24
13
ChangeLog
13
ChangeLog
|
|
@ -1,3 +1,16 @@
|
||||||
|
2017-10-22 H.J. Lu <hongjiu.lu@intel.com>
|
||||||
|
|
||||||
|
* sysdeps/i386/fpu/e_log2f.S: Removed.
|
||||||
|
* sysdeps/i386/fpu/e_log2f_data.c: Likewise.
|
||||||
|
* sysdeps/i386/fpu/w_log2f.c: Likewise.
|
||||||
|
* sysdeps/i386/fpu/libm-test-ulps: Updated for generic e_log2f.c.
|
||||||
|
* sysdeps/i386/i686/fpu/multiarch/libm-test-ulps: Likewise.
|
||||||
|
* sysdeps/i386/i686/fpu/multiarch/Makefile (libm-sysdep_routines):
|
||||||
|
Add e_log2f-sse2.
|
||||||
|
(CFLAGS-e_log2f-sse2.c): New.
|
||||||
|
* sysdeps/i386/i686/fpu/multiarch/e_log2f-sse2.c: New file.
|
||||||
|
* sysdeps/i386/i686/fpu/multiarch/e_log2f.c: Likewise.
|
||||||
|
|
||||||
2017-10-22 H.J. Lu <hongjiu.lu@intel.com>
|
2017-10-22 H.J. Lu <hongjiu.lu@intel.com>
|
||||||
|
|
||||||
* sysdeps/x86_64/fpu/multiarch/Makefile (libm-sysdep_routines):
|
* sysdeps/x86_64/fpu/multiarch/Makefile (libm-sysdep_routines):
|
||||||
|
|
|
||||||
|
|
@ -1,69 +0,0 @@
|
||||||
/*
|
|
||||||
* Written by J.T. Conklin <jtc@netbsd.org>.
|
|
||||||
* Adapted for use as log2 by Ulrich Drepper <drepper@cygnus.com>.
|
|
||||||
* Public domain.
|
|
||||||
*
|
|
||||||
* Changed to use fyl2xp1 for values near 1, <drepper@cygnus.com>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <machine/asm.h>
|
|
||||||
|
|
||||||
.section .rodata.cst8,"aM",@progbits,8
|
|
||||||
|
|
||||||
.p2align 3
|
|
||||||
.type one,@object
|
|
||||||
one: .double 1.0
|
|
||||||
ASM_SIZE_DIRECTIVE(one)
|
|
||||||
/* It is not important that this constant is precise. It is only
|
|
||||||
a value which is known to be on the safe side for using the
|
|
||||||
fyl2xp1 instruction. */
|
|
||||||
.type limit,@object
|
|
||||||
limit: .double 0.29
|
|
||||||
ASM_SIZE_DIRECTIVE(limit)
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef PIC
|
|
||||||
# define MO(op) op##@GOTOFF(%edx)
|
|
||||||
#else
|
|
||||||
# define MO(op) op
|
|
||||||
#endif
|
|
||||||
|
|
||||||
.text
|
|
||||||
ENTRY(__ieee754_log2f)
|
|
||||||
#ifdef PIC
|
|
||||||
LOAD_PIC_REG (dx)
|
|
||||||
#endif
|
|
||||||
fldl MO(one)
|
|
||||||
flds 4(%esp) // x : 1
|
|
||||||
fxam
|
|
||||||
fnstsw
|
|
||||||
fld %st // x : x : 1
|
|
||||||
sahf
|
|
||||||
jc 3f // in case x is NaN or ±Inf
|
|
||||||
4: fsub %st(2), %st // x-1 : x : 1
|
|
||||||
fld %st // x-1 : x-1 : x : 1
|
|
||||||
fabs // |x-1| : x-1 : x : 1
|
|
||||||
fcompl MO(limit) // x-1 : x : 1
|
|
||||||
fnstsw // x-1 : x : 1
|
|
||||||
andb $0x45, %ah
|
|
||||||
jz 2f
|
|
||||||
fxam
|
|
||||||
fnstsw
|
|
||||||
andb $0x45, %ah
|
|
||||||
cmpb $0x40, %ah
|
|
||||||
jne 5f
|
|
||||||
fabs // log2(1) is +0 in all rounding modes.
|
|
||||||
5: fstp %st(1) // x-1 : 1
|
|
||||||
fyl2xp1 // log(x)
|
|
||||||
ret
|
|
||||||
|
|
||||||
2: fstp %st(0) // x : 1
|
|
||||||
fyl2x // log(x)
|
|
||||||
ret
|
|
||||||
|
|
||||||
3: jp 4b // in case x is ±Inf
|
|
||||||
fstp %st(1)
|
|
||||||
fstp %st(1)
|
|
||||||
ret
|
|
||||||
END (__ieee754_log2f)
|
|
||||||
strong_alias (__ieee754_log2f, __log2f_finite)
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
/* Not needed. */
|
|
||||||
|
|
@ -2300,8 +2300,10 @@ ldouble: 3
|
||||||
|
|
||||||
Function: "log2":
|
Function: "log2":
|
||||||
double: 1
|
double: 1
|
||||||
|
float: 1
|
||||||
float128: 2
|
float128: 2
|
||||||
idouble: 1
|
idouble: 1
|
||||||
|
ifloat: 1
|
||||||
ifloat128: 2
|
ifloat128: 2
|
||||||
ildouble: 1
|
ildouble: 1
|
||||||
ldouble: 1
|
ldouble: 1
|
||||||
|
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
#include <sysdeps/../math/w_log2f.c>
|
|
||||||
|
|
@ -1,8 +1,9 @@
|
||||||
ifeq ($(subdir),math)
|
ifeq ($(subdir),math)
|
||||||
libm-sysdep_routines += e_exp2f-sse2 e_expf-sse2 e_logf-sse2 \
|
libm-sysdep_routines += e_exp2f-sse2 e_expf-sse2 e_logf-sse2 e_log2f-sse2 \
|
||||||
s_sinf-sse2 s_cosf-sse2 s_sincosf-sse2
|
s_sinf-sse2 s_cosf-sse2 s_sincosf-sse2
|
||||||
|
|
||||||
CFLAGS-e_exp2f-sse2.c = -msse2 -mfpmath=sse
|
CFLAGS-e_exp2f-sse2.c = -msse2 -mfpmath=sse
|
||||||
CFLAGS-e_expf-sse2.c = -msse2 -mfpmath=sse
|
CFLAGS-e_expf-sse2.c = -msse2 -mfpmath=sse
|
||||||
|
CFLAGS-e_log2f-sse2.c = -msse2 -mfpmath=sse
|
||||||
CFLAGS-e_logf-sse2.c = -msse2 -mfpmath=sse
|
CFLAGS-e_logf-sse2.c = -msse2 -mfpmath=sse
|
||||||
endif
|
endif
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,3 @@
|
||||||
|
#define __log2f __log2f_sse2
|
||||||
|
|
||||||
|
#include <sysdeps/ieee754/flt-32/e_log2f.c>
|
||||||
|
|
@ -0,0 +1,40 @@
|
||||||
|
/* Multiple versions of log2f.
|
||||||
|
Copyright (C) 2017 Free Software Foundation, Inc.
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library; if not, see
|
||||||
|
<http://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
extern float __redirect_log2f (float);
|
||||||
|
|
||||||
|
#define SYMBOL_NAME log2f
|
||||||
|
#include "ifunc-sse2.h"
|
||||||
|
|
||||||
|
libc_ifunc_redirected (__redirect_log2f, __log2f, IFUNC_SELECTOR ());
|
||||||
|
|
||||||
|
#ifdef SHARED
|
||||||
|
__hidden_ver1 (__log2f_ia32, __GI___log2f, __redirect_log2f)
|
||||||
|
__attribute__ ((visibility ("hidden")));
|
||||||
|
|
||||||
|
# include <shlib-compat.h>
|
||||||
|
versioned_symbol (libm, __log2f, log2f, GLIBC_2_27);
|
||||||
|
#else
|
||||||
|
weak_alias (__log2f, log2f)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
strong_alias (__log2f, __ieee754_log2f)
|
||||||
|
strong_alias (__log2f, __log2f_finite)
|
||||||
|
|
||||||
|
#define __log2f __log2f_ia32
|
||||||
|
#include <sysdeps/ieee754/flt-32/e_log2f.c>
|
||||||
|
|
@ -2300,8 +2300,10 @@ ldouble: 3
|
||||||
|
|
||||||
Function: "log2":
|
Function: "log2":
|
||||||
double: 1
|
double: 1
|
||||||
|
float: 1
|
||||||
float128: 2
|
float128: 2
|
||||||
idouble: 1
|
idouble: 1
|
||||||
|
ifloat: 1
|
||||||
ifloat128: 2
|
ifloat128: 2
|
||||||
ildouble: 1
|
ildouble: 1
|
||||||
ldouble: 1
|
ldouble: 1
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue