x86-64: Properly compile ISA optimized modf and modff

There are 3 variants of modf and modff: SSE2, SSE4.1 and AVX.  s_modf.c
and s_modff.c include the generic implementation compiled with the minimum
x86 ISA level.  The IFUNC selector is used only if the minimum ISA level
is less than AVX.  SSE4.1 variant is included only if the ISA level is
less than SSE4.1.  AVX variant is included only the ISA level is less than
AVX.

AVX variant should be compiled with -mavx, not -msse2avx -DSSE2AVX which
are used to encode SSE assembly sources with EVEX encoding.

The routines that are shared between libc and libm should use different
rules to avoid using the same MODULE_NAME, to avoid potential issues
like BZ #33165 where __stack_chk_fail not being routed to the internal
symbol.

Tested with -march=x86-64, -march=x86-64-v2, -march=x86-64-v3 and
-march=x86-64-v4.

This fixes BZ #33165 and BZ #33173.

Co-authored-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Signed-off-by: H.J. Lu <hjl.tools@gmail.com>
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
This commit is contained in:
H.J. Lu 2025-07-17 16:00:42 -07:00
parent 13bf7812ef
commit aec8498873
12 changed files with 90 additions and 47 deletions

View File

@ -171,8 +171,12 @@ fi
config_vars="$config_vars
have-x86-isa-level = $libc_cv_have_x86_isa_level"
config_vars="$config_vars
x86-isa-level-2-or-above = 2 3 4"
config_vars="$config_vars
x86-isa-level-3-or-above = 3 4"
config_vars="$config_vars
x86-isa-level-4-or-above = 4"
config_vars="$config_vars
enable-x86-isa-level = $libc_cv_include_x86_isa_level"

View File

@ -117,7 +117,9 @@ else
AC_DEFINE_UNQUOTED(MINIMUM_X86_ISA_LEVEL, $libc_cv_have_x86_isa_level)
fi
LIBC_CONFIG_VAR([have-x86-isa-level], [$libc_cv_have_x86_isa_level])
LIBC_CONFIG_VAR([x86-isa-level-2-or-above], [2 3 4])
LIBC_CONFIG_VAR([x86-isa-level-3-or-above], [3 4])
LIBC_CONFIG_VAR([x86-isa-level-4-or-above], [4])
LIBC_CONFIG_VAR([enable-x86-isa-level], [$libc_cv_include_x86_isa_level])
dnl Check if TEST_CC supports -mfpmath=387

View File

@ -26,19 +26,21 @@ CFLAGS-s_sinf-fma.c = -mfma -mavx2
CFLAGS-s_cosf-fma.c = -mfma -mavx2
CFLAGS-s_sincosf-fma.c = -mfma -mavx2
# Check if ISA level is 2 or above.
ifeq (,$(filter $(have-x86-isa-level),$(x86-isa-level-2-or-above)))
sysdep_calls += \
s_modf-sse4_1 \
s_modff-sse4_1 \
# sysdep_calls
endif
# Check if ISA level is 3 or above.
ifneq (,$(filter $(have-x86-isa-level),$(x86-isa-level-3-or-above)))
sysdep_routines += \
s_modf-avx \
s_modff-avx \
# sysdep_routines
libm-sysdep_routines += \
s_ceil-avx \
s_ceilf-avx \
s_floor-avx \
s_floorf-avx \
s_modf-avx \
s_modff-avx \
s_nearbyint-avx \
s_nearbyintf-avx \
s_rint-avx \
@ -49,6 +51,10 @@ libm-sysdep_routines += \
s_truncf-avx \
# libm-sysdep_routines
else
sysdep_calls += \
s_modf-avx \
s_modff-avx \
# sysdep_calls
ifeq (no,$(have-x86-apx))
libm-sysdep_routines += \
e_asin-fma4 \
@ -62,10 +68,6 @@ libm-sysdep_routines += \
s_tan-fma4 \
# libm-sysdep_routines
endif
sysdep_routines += \
s_modf-sse4_1 \
s_modff-sse4_1 \
# sysdep_routines
libm-sysdep_routines += \
e_asin-fma \
e_atan2-avx \
@ -95,8 +97,6 @@ libm-sysdep_routines += \
s_floor-sse4_1 \
s_floorf-sse4_1 \
s_log1p-fma \
s_modf-sse4_1 \
s_modff-sse4_1 \
s_nearbyint-sse4_1 \
s_nearbyintf-sse4_1 \
s_rint-sse4_1 \
@ -118,17 +118,11 @@ libm-sysdep_routines += \
s_truncf-sse4_1 \
# libm-sysdep_routines
ifeq ($(have-x86-isa-level),baseline)
sysdep_routines += \
s_modf-c \
s_modff-c \
# sysdep-routines
libm-sysdep_routines += \
s_ceil-c \
s_ceilf-c \
s_floor-c \
s_floorf-c \
s_modf-c \
s_modff-c \
s_nearbyint-c \
s_nearbyintf-c \
s_rint-c \
@ -139,6 +133,11 @@ libm-sysdep_routines += \
s_truncf-c \
# libm-sysdep_routines
endif
# $(sysdep_calls) functions are built both for libc and libm. While the
# libc objects have the prefix s_, the libm ones are prefixed with m_.
sysdep_routines += $(sysdep_calls)
libm-sysdep_routines += $(sysdep_calls:s_%=m_%)
endif
CFLAGS-e_asin-fma4.c = -mfma4
@ -159,18 +158,11 @@ CFLAGS-s_sin-avx.c = -msse2avx -DSSE2AVX
CFLAGS-s_tan-avx.c = -msse2avx -DSSE2AVX
CFLAGS-s_sincos-avx.c = -msse2avx -DSSE2AVX
CFLAGS-s_modf-c.c = $(no-stack-protector)
CFLAGS-s_modff-c.c = $(no-stack-protector)
CFLAGS-s_modf-sse4_1.c = -msse4.1 -fno-builtin-modff32x -fno-builtin-modff64
CFLAGS-s_modff-sse4_1.c = -msse4.1 -fno-builtin-modff32
CFLAGS-s_modf-sse4_1.c = -msse4.1 -fno-builtin-modff32x \
-fno-builtin-modff64 $(no-stack-protector)
CFLAGS-s_modff-sse4_1.c = -msse4.1 -fno-builtin-modff32 \
$(no-stack-protector)
CFLAGS-s_modf-avx.c = -msse2avx -DSSE2AVX -fno-builtin-modff32x \
$(no-stack-protector) -fno-builtin-modff64
CFLAGS-s_modff-avx.c = -msse2avx -DSSE2AVX -fno-builtin-modff32 \
$(no-stack-protector)
CFLAGS-s_modf-avx.c = -mavx -fno-builtin-modff32x -fno-builtin-modff64
CFLAGS-s_modff-avx.c = -mavx -fno-builtin-modff32
endif
ifeq ($(subdir),mathvec)

View File

@ -0,0 +1,41 @@
/* Common definition for ifunc selections optimized with SSE4.1 and AVX.
Copyright (C) 2025 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <init-arch.h>
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse41) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx) attribute_hidden;
static inline void *
IFUNC_SELECTOR (void)
{
const struct cpu_features* cpu_features = __get_cpu_features ();
if (CPU_FEATURE_USABLE_P (cpu_features, AVX))
return OPTIMIZE (avx);
#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
return OPTIMIZE (sse41);
#else
if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1))
return OPTIMIZE (sse41);
return OPTIMIZE (sse2);
#endif
}

View File

@ -1 +1,3 @@
#define __modf __modf_avx
#include <sysdeps/ieee754/dbl-64/s_modf.c>

View File

@ -1,2 +0,0 @@
#define __modf __modf_c
#include <sysdeps/ieee754/dbl-64/s_modf.c>

View File

@ -1,7 +1,3 @@
#include <sysdeps/x86/isa-level.h>
#if MINIMUM_X86_ISA_LEVEL != SSE4_1_X86_ISA_LEVEL
# define __modf __modf_sse41
#endif
#define __modf __modf_sse41
#include <sysdeps/ieee754/dbl-64/s_modf.c>

View File

@ -17,7 +17,7 @@
<https://www.gnu.org/licenses/>. */
#include <sysdeps/x86/isa-level.h>
#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
# define NO_MATH_REDIRECT
# include <libm-alias-double.h>
@ -28,8 +28,14 @@
# undef __modf
# define SYMBOL_NAME modf
# include "ifunc-sse4_1.h"
# include "ifunc-sse4_1-avx.h"
libc_ifunc_redirected (__redirect_modf, __modf, IFUNC_SELECTOR ());
libm_alias_double (__modf, modf)
# if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
# define __modf __modf_sse41
# else
# define __modf __modf_sse2
# endif
#endif
#include <sysdeps/ieee754/dbl-64/s_modf.c>

View File

@ -1 +1,3 @@
#define __modff __modff_avx
#include <sysdeps/ieee754/flt-32/s_modff.c>

View File

@ -1,2 +0,0 @@
#define __modff __modff_c
#include <sysdeps/ieee754/flt-32/s_modff.c>

View File

@ -1,7 +1,3 @@
#include <sysdeps/x86/isa-level.h>
#if MINIMUM_X86_ISA_LEVEL != SSE4_1_X86_ISA_LEVEL
# define __modff __modff_sse41
#endif
#define __modff __modff_sse41
#include <sysdeps/ieee754/flt-32/s_modff.c>

View File

@ -17,7 +17,7 @@
<https://www.gnu.org/licenses/>. */
#include <sysdeps/x86/isa-level.h>
#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
# define NO_MATH_REDIRECT
# include <libm-alias-float.h>
@ -28,8 +28,14 @@
# undef __modff
# define SYMBOL_NAME modff
# include "ifunc-sse4_1.h"
# include "ifunc-sse4_1-avx.h"
libc_ifunc_redirected (__redirect_modff, __modff, IFUNC_SELECTOR ());
libm_alias_float (__modf, modf)
# if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
# define __modff __modff_sse41
# else
# define __modff __modff_sse2
# endif
#endif
#include <sysdeps/ieee754/flt-32/s_modff.c>