i386: Update ___tls_get_addr to preserve vector registers

Compiler generates the following instruction sequence for dynamic TLS
access:

	leal	tls_var@tlsgd(,%ebx,1), %eax
	call	___tls_get_addr@PLT

CALL instruction is transparent to compiler which assumes all registers,
except for EFLAGS, AX, CX, and DX, are unchanged after CALL.  But
___tls_get_addr is a normal function which doesn't preserve any vector
registers.

1. Rename the generic __tls_get_addr function to ___tls_get_addr_internal.
2. Change ___tls_get_addr to a wrapper function with implementations for
FNSAVE, FXSAVE, XSAVE and XSAVEC to save and restore all vector registers.
3. dl-tlsdesc-dynamic.h has:

_dl_tlsdesc_dynamic:
	/* Like all TLS resolvers, preserve call-clobbered registers.
	   We need two scratch regs anyway.  */
	subl	$32, %esp
	cfi_adjust_cfa_offset (32)

It is wrong to use

	movl	%ebx, -28(%esp)
	movl	%esp, %ebx
	cfi_def_cfa_register(%ebx)
	...
	mov	%ebx, %esp
	cfi_def_cfa_register(%esp)
	movl	-28(%esp), %ebx

to preserve EBX on stack.  Fix it with:

	movl	%ebx, 28(%esp)
	movl	%esp, %ebx
	cfi_def_cfa_register(%ebx)
	...
	mov	%ebx, %esp
	cfi_def_cfa_register(%esp)
	movl	28(%esp), %ebx

4. Update _dl_tlsdesc_dynamic to call ___tls_get_addr_internal directly.
5. Add have-test-mtls-traditional to compile tst-tls23-mod.c with
traditional TLS variant to verify the fix.
6. Define DL_RUNTIME_RESOLVE_REALIGN_STACK in sysdeps/x86/sysdep.h.

This fixes BZ #32996.

Co-Authored-By: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Signed-off-by: H.J. Lu <hjl.tools@gmail.com>
Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
This commit is contained in:
H.J. Lu 2025-06-09 05:22:10 +08:00
parent abc2e954af
commit 848f0e46f0
25 changed files with 623 additions and 169 deletions

38
configure vendored
View File

@ -4931,6 +4931,9 @@ with_fp_cond=1
# A preconfigure script may define another name to TLS descriptor variant # A preconfigure script may define another name to TLS descriptor variant
mtls_descriptor=gnu2 mtls_descriptor=gnu2
# A preconfigure script may define another name to traditional TLS variant
mtls_traditional=gnu
if frags=`ls -d $srcdir/sysdeps/*/preconfigure 2> /dev/null` if frags=`ls -d $srcdir/sysdeps/*/preconfigure 2> /dev/null`
then then
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for sysdeps preconfigure fragments" >&5 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for sysdeps preconfigure fragments" >&5
@ -7490,6 +7493,41 @@ rm -f conftest*
config_vars="$config_vars config_vars="$config_vars
have-test-mtls-descriptor = $libc_cv_test_mtls_descriptor" have-test-mtls-descriptor = $libc_cv_test_mtls_descriptor"
cat > conftest.c <<EOF
$conftest_code
EOF
saved_CC="$CC"
CC="$TEST_CC"
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for traditional tls support in testing" >&5
printf %s "checking for traditional tls support in testing... " >&6; }
if test ${libc_cv_test_mtls_traditional+y}
then :
printf %s "(cached) " >&6
else case e in #(
e) if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=$mtls_traditional -nostdlib -nostartfiles -shared conftest.c -o conftest 1>&5'
{ { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
(eval $ac_try) 2>&5
ac_status=$?
printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; }
then
libc_cv_test_mtls_traditional=$mtls_traditional
else
libc_cv_test_mtls_traditional=no
fi ;;
esac
fi
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_test_mtls_traditional" >&5
printf "%s\n" "$libc_cv_test_mtls_traditional" >&6; }
CC="$saved_CC"
rm -f conftest*
config_vars="$config_vars
have-test-mtls-traditional = $libc_cv_test_mtls_traditional"
conftest_code=" conftest_code="
void __foo (void) void __foo (void)
{ {

View File

@ -483,6 +483,9 @@ with_fp_cond=1
# A preconfigure script may define another name to TLS descriptor variant # A preconfigure script may define another name to TLS descriptor variant
mtls_descriptor=gnu2 mtls_descriptor=gnu2
# A preconfigure script may define another name to traditional TLS variant
mtls_traditional=gnu
dnl Let sysdeps/*/preconfigure act here. dnl Let sysdeps/*/preconfigure act here.
LIBC_PRECONFIGURE([$srcdir], [for sysdeps]) LIBC_PRECONFIGURE([$srcdir], [for sysdeps])
@ -1401,6 +1404,16 @@ LIBC_TRY_TEST_CC_COMMAND([for tls descriptor support],
LIBC_CONFIG_VAR([have-test-mtls-descriptor], LIBC_CONFIG_VAR([have-test-mtls-descriptor],
[$libc_cv_test_mtls_descriptor]) [$libc_cv_test_mtls_descriptor])
dnl Check if TEST_CC support traditional tls.
LIBC_TRY_TEST_CC_COMMAND([for traditional tls support],
[$conftest_code],
[-fPIC -mtls-dialect=$mtls_traditional -nostdlib -nostartfiles -shared],
libc_cv_test_mtls_traditional,
[libc_cv_test_mtls_traditional=$mtls_traditional],
[libc_cv_test_mtls_traditional=no])
LIBC_CONFIG_VAR([have-test-mtls-traditional],
[$libc_cv_test_mtls_traditional])
dnl clang emits an warning for a double alias redirection, to warn the dnl clang emits an warning for a double alias redirection, to warn the
dnl original symbol is sed even when weak definition overrides it. dnl original symbol is sed even when weak definition overrides it.
dnl It is a usual pattern for weak_alias, where multiple alias point to dnl It is a usual pattern for weak_alias, where multiple alias point to

View File

@ -496,6 +496,7 @@ tests += \
tst-tls21 \ tst-tls21 \
tst-tls22 \ tst-tls22 \
tst-tls22-gnu2 \ tst-tls22-gnu2 \
tst-tls23 \
tst-tlsalign \ tst-tlsalign \
tst-tlsalign-extern \ tst-tlsalign-extern \
tst-tlsgap \ tst-tlsgap \
@ -1023,6 +1024,7 @@ modules-names += \
tst-tls22-mod1-gnu2 \ tst-tls22-mod1-gnu2 \
tst-tls22-mod2 \ tst-tls22-mod2 \
tst-tls22-mod2-gnu2 \ tst-tls22-mod2-gnu2 \
tst-tls23-mod \
tst-tlsalign-lib \ tst-tlsalign-lib \
tst-tlsgap-mod0 \ tst-tlsgap-mod0 \
tst-tlsgap-mod1 \ tst-tlsgap-mod1 \
@ -3410,6 +3412,13 @@ tst-tls22-mod1-gnu2.so-no-z-defs = yes
tst-tls22-mod2.so-no-z-defs = yes tst-tls22-mod2.so-no-z-defs = yes
tst-tls22-mod2-gnu2.so-no-z-defs = yes tst-tls22-mod2-gnu2.so-no-z-defs = yes
$(objpfx)tst-tls23: $(shared-thread-library)
$(objpfx)tst-tls23.out: $(objpfx)tst-tls23-mod.so
ifneq (no,$(have-test-mtls-traditional))
CFLAGS-tst-tls23-mod.c += -mtls-dialect=$(have-test-mtls-traditional)
endif
ifeq ($(have-test-cc-cflags-fsemantic-interposition),yes) ifeq ($(have-test-cc-cflags-fsemantic-interposition),yes)
# Compiler may default to -fno-semantic-interposition. These modules # Compiler may default to -fno-semantic-interposition. These modules
# must be compiled with -fsemantic-interposition. # must be compiled with -fsemantic-interposition.

32
elf/tst-tls23-mod.c Normal file
View File

@ -0,0 +1,32 @@
/* DSO used by tst-tls23.
Copyright (C) 2025 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <tst-tls23.h>
__thread struct tls tls_var0 __attribute__ ((visibility ("hidden")));
struct tls *
apply_tls (struct tls *p)
{
INIT_TLS_CALL ();
BEFORE_TLS_CALL ();
tls_var0 = *p;
struct tls *ret = &tls_var0;
AFTER_TLS_CALL ();
return ret;
}

106
elf/tst-tls23.c Normal file
View File

@ -0,0 +1,106 @@
/* Test that __tls_get_addr preserves caller-saved registers.
Copyright (C) 2025 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <dlfcn.h>
#include <pthread.h>
#include <support/xdlfcn.h>
#include <support/xthread.h>
#include <support/check.h>
#include <support/test-driver.h>
#include <tst-tls23.h>
#ifndef IS_SUPPORTED
# define IS_SUPPORTED() true
#endif
/* An architecture can define it to clobber caller-saved registers in
malloc below to verify that __tls_get_addr won't change caller-saved
registers. */
#ifndef PREPARE_MALLOC
# define PREPARE_MALLOC()
#endif
extern void * __libc_malloc (size_t);
size_t malloc_counter = 0;
void *
malloc (size_t n)
{
PREPARE_MALLOC ();
malloc_counter++;
return __libc_malloc (n);
}
static void *mod;
static const char *modname = "tst-tls23-mod.so";
static void
open_mod (void)
{
mod = xdlopen (modname, RTLD_LAZY);
printf ("open %s\n", modname);
}
static void
close_mod (void)
{
xdlclose (mod);
mod = NULL;
printf ("close %s\n", modname);
}
static void
access_mod (const char *sym)
{
struct tls var = { -4, -4, -4, -4 };
struct tls *(*f) (struct tls *) = xdlsym (mod, sym);
/* Check that our malloc is called. */
malloc_counter = 0;
struct tls *p = f (&var);
TEST_VERIFY (malloc_counter != 0);
printf ("access %s: %s() = %p\n", modname, sym, p);
TEST_VERIFY_EXIT (memcmp (p, &var, sizeof (var)) == 0);
++(p->a);
}
static void *
start (void *arg)
{
access_mod ("apply_tls");
return arg;
}
static int
do_test (void)
{
if (!IS_SUPPORTED ())
return EXIT_UNSUPPORTED;
open_mod ();
pthread_t t = xpthread_create (NULL, start, NULL);
xpthread_join (t);
close_mod ();
return 0;
}
#include <support/test-driver.c>

View File

@ -1,5 +1,5 @@
/* x86-64 PLT trampoline register save macros. /* Test that __tls_get_addr preserves caller-saved registers.
Copyright (C) 2024-2025 Free Software Foundation, Inc. Copyright (C) 2025 Free Software Foundation, Inc.
This file is part of the GNU C Library. This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or The GNU C Library is free software; you can redistribute it and/or
@ -16,19 +16,25 @@
License along with the GNU C Library; if not, see License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */ <https://www.gnu.org/licenses/>. */
#ifndef DL_STACK_ALIGNMENT #include <stdint.h>
/* Due to GCC bug:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066 struct tls
{
int64_t a, b, c, d;
};
__tls_get_addr may be called with 8-byte stack alignment. Although extern struct tls *apply_tls (struct tls *);
this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume
that stack will be always aligned at 16 bytes. */ /* An architecture can define them to verify that caller-saved registers
# define DL_STACK_ALIGNMENT 8 aren't changed by __tls_get_addr. */
#ifndef INIT_TLS_CALL
# define INIT_TLS_CALL()
#endif #endif
/* True if _dl_runtime_resolve should align stack for STATE_SAVE or align #ifndef BEFORE_TLS_CALL
stack to 16 bytes before calling _dl_fixup. */ # define BEFORE_TLS_CALL()
#define DL_RUNTIME_RESOLVE_REALIGN_STACK \ #endif
(STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \
|| 16 > DL_STACK_ALIGNMENT) #ifndef AFTER_TLS_CALL
# define AFTER_TLS_CALL()
#endif

View File

@ -3,5 +3,6 @@ aarch64*)
base_machine=aarch64 base_machine=aarch64
machine=aarch64 machine=aarch64
mtls_descriptor=desc mtls_descriptor=desc
mtls_traditional=trad
;; ;;
esac esac

View File

@ -30,7 +30,9 @@ stack-align-test-flags += -malign-double
endif endif
ifeq ($(subdir),elf) ifeq ($(subdir),elf)
sysdep-dl-routines += tlsdesc dl-tlsdesc sysdep-dl-routines += \
dl-tls-get-addr \
# sysdep-dl-routines
tests += tst-audit3 tests += tst-audit3
modules-names += tst-auditmod3a tst-auditmod3b modules-names += tst-auditmod3a tst-auditmod3b

View File

@ -0,0 +1,68 @@
/* Ifunc selector for ___tls_get_addr.
Copyright (C) 2025 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#ifdef SHARED
# define ___tls_get_addr __redirect____tls_get_addr
# include <dl-tls.h>
# undef ___tls_get_addr
# undef __tls_get_addr
# define SYMBOL_NAME ___tls_get_addr
# include <init-arch.h>
extern __typeof (REDIRECT_NAME) OPTIMIZE (fnsave) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (fxsave) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (xsave) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (xsavec) attribute_hidden;
static inline void *
IFUNC_SELECTOR (void)
{
const struct cpu_features* cpu_features = __get_cpu_features ();
if (cpu_features->xsave_state_size != 0)
{
if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC))
return OPTIMIZE (xsavec);
else
return OPTIMIZE (xsave);
}
else if (CPU_FEATURE_USABLE_P (cpu_features, FXSR))
return OPTIMIZE (fxsave);
return OPTIMIZE (fnsave);
}
libc_ifunc_redirected (__redirect____tls_get_addr, ___tls_get_addr,
IFUNC_SELECTOR ());
/* The special thing about the x86 TLS ABI is that we have two
variants of the __tls_get_addr function with different calling
conventions. The GNU version, which we are mostly concerned here,
takes the parameter in a register. The name is changed by adding
an additional underscore at the beginning. The Sun version uses
the normal calling convention. */
rtld_hidden_proto (___tls_get_addr)
rtld_hidden_def (___tls_get_addr)
void *
__tls_get_addr (tls_index *ti)
{
return ___tls_get_addr (ti);
}
#endif

View File

@ -37,34 +37,14 @@ typedef struct dl_tls_index
/* This is the prototype for the GNU version. */ /* This is the prototype for the GNU version. */
extern void *___tls_get_addr (tls_index *ti) extern void *___tls_get_addr (tls_index *ti)
__attribute__ ((__regparm__ (1))); __attribute__ ((__regparm__ (1)));
extern void *___tls_get_addr_internal (tls_index *ti)
__attribute__ ((__regparm__ (1))) attribute_hidden;
# if IS_IN (rtld) # if IS_IN (rtld)
/* The special thing about the x86 TLS ABI is that we have two
variants of the __tls_get_addr function with different calling
conventions. The GNU version, which we are mostly concerned here,
takes the parameter in a register. The name is changed by adding
an additional underscore at the beginning. The Sun version uses
the normal calling convention. */
void *
__tls_get_addr (tls_index *ti)
{
return ___tls_get_addr_internal (ti);
}
/* Prepare using the definition of __tls_get_addr in the generic /* Prepare using the definition of __tls_get_addr in the generic
version of this file. */ version of this file. */
# define __tls_get_addr __attribute__ ((__regparm__ (1))) ___tls_get_addr # define __tls_get_addr \
strong_alias (___tls_get_addr, ___tls_get_addr_internal) __attribute__ ((__regparm__ (1))) ___tls_get_addr_internal
rtld_hidden_proto (___tls_get_addr) # else
rtld_hidden_def (___tls_get_addr)
#else
/* Users should get the better interface. */ /* Users should get the better interface. */
# define __tls_get_addr ___tls_get_addr # define __tls_get_addr ___tls_get_addr
# endif # endif
#endif #endif

View File

@ -16,34 +16,6 @@
License along with the GNU C Library; if not, see License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */ <https://www.gnu.org/licenses/>. */
#undef REGISTER_SAVE_AREA
#if !defined USE_FNSAVE && (STATE_SAVE_ALIGNMENT % 16) != 0
# error STATE_SAVE_ALIGNMENT must be multiple of 16
#endif
#if DL_RUNTIME_RESOLVE_REALIGN_STACK
# ifdef USE_FNSAVE
# error USE_FNSAVE shouldn't be defined
# endif
# ifdef USE_FXSAVE
/* Use fxsave to save all registers. */
# define REGISTER_SAVE_AREA 512
# endif
#else
# ifdef USE_FNSAVE
/* Use fnsave to save x87 FPU stack registers. */
# define REGISTER_SAVE_AREA 108
# else
# ifndef USE_FXSAVE
# error USE_FXSAVE must be defined
# endif
/* Use fxsave to save all registers. Add 12 bytes to align the stack
to 16 bytes. */
# define REGISTER_SAVE_AREA (512 + 12)
# endif
#endif
.hidden _dl_tlsdesc_dynamic .hidden _dl_tlsdesc_dynamic
.global _dl_tlsdesc_dynamic .global _dl_tlsdesc_dynamic
.type _dl_tlsdesc_dynamic,@function .type _dl_tlsdesc_dynamic,@function
@ -104,85 +76,7 @@ _dl_tlsdesc_dynamic:
ret ret
.p2align 4,,7 .p2align 4,,7
2: 2:
cfi_adjust_cfa_offset (32) #include "tls-get-addr-wrapper.h"
#if DL_RUNTIME_RESOLVE_REALIGN_STACK
movl %ebx, -28(%esp)
movl %esp, %ebx
cfi_def_cfa_register(%ebx)
and $-STATE_SAVE_ALIGNMENT, %esp
#endif
#ifdef REGISTER_SAVE_AREA
subl $REGISTER_SAVE_AREA, %esp
# if !DL_RUNTIME_RESOLVE_REALIGN_STACK
cfi_adjust_cfa_offset(REGISTER_SAVE_AREA)
# endif
#else
# if !DL_RUNTIME_RESOLVE_REALIGN_STACK
# error DL_RUNTIME_RESOLVE_REALIGN_STACK must be true
# endif
/* Allocate stack space of the required size to save the state. */
LOAD_PIC_REG (cx)
subl RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET+_rtld_local_ro@GOTOFF(%ecx), %esp
#endif
#ifdef USE_FNSAVE
fnsave (%esp)
#elif defined USE_FXSAVE
fxsave (%esp)
#else
/* Save the argument for ___tls_get_addr in EAX. */
movl %eax, %ecx
movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax
xorl %edx, %edx
/* Clear the XSAVE Header. */
# ifdef USE_XSAVE
movl %edx, (512)(%esp)
movl %edx, (512 + 4 * 1)(%esp)
movl %edx, (512 + 4 * 2)(%esp)
movl %edx, (512 + 4 * 3)(%esp)
# endif
movl %edx, (512 + 4 * 4)(%esp)
movl %edx, (512 + 4 * 5)(%esp)
movl %edx, (512 + 4 * 6)(%esp)
movl %edx, (512 + 4 * 7)(%esp)
movl %edx, (512 + 4 * 8)(%esp)
movl %edx, (512 + 4 * 9)(%esp)
movl %edx, (512 + 4 * 10)(%esp)
movl %edx, (512 + 4 * 11)(%esp)
movl %edx, (512 + 4 * 12)(%esp)
movl %edx, (512 + 4 * 13)(%esp)
movl %edx, (512 + 4 * 14)(%esp)
movl %edx, (512 + 4 * 15)(%esp)
# ifdef USE_XSAVE
xsave (%esp)
# else
xsavec (%esp)
# endif
/* Restore the argument for ___tls_get_addr in EAX. */
movl %ecx, %eax
#endif
call HIDDEN_JUMPTARGET (___tls_get_addr)
/* Get register content back. */
#ifdef USE_FNSAVE
frstor (%esp)
#elif defined USE_FXSAVE
fxrstor (%esp)
#else
/* Save and retore ___tls_get_addr return value stored in EAX. */
movl %eax, %ecx
movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax
xorl %edx, %edx
xrstor (%esp)
movl %ecx, %eax
#endif
#if DL_RUNTIME_RESOLVE_REALIGN_STACK
mov %ebx, %esp
cfi_def_cfa_register(%esp)
movl -28(%esp), %ebx
cfi_restore(%ebx)
#else
addl $REGISTER_SAVE_AREA, %esp
cfi_adjust_cfa_offset(-REGISTER_SAVE_AREA)
#endif
jmp 1b jmp 1b
cfi_endproc cfi_endproc
.size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic

View File

@ -22,23 +22,6 @@
#include <features-offsets.h> #include <features-offsets.h>
#include "tlsdesc.h" #include "tlsdesc.h"
#ifndef DL_STACK_ALIGNMENT
/* Due to GCC bug:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066
__tls_get_addr may be called with 4-byte stack alignment. Although
this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume
that stack will be always aligned at 16 bytes. */
# define DL_STACK_ALIGNMENT 4
#endif
/* True if _dl_tlsdesc_dynamic should align stack for STATE_SAVE or align
stack to MINIMUM_ALIGNMENT bytes before calling ___tls_get_addr. */
#define DL_RUNTIME_RESOLVE_REALIGN_STACK \
(STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \
|| MINIMUM_ALIGNMENT > DL_STACK_ALIGNMENT)
.text .text
/* This function is used to compute the TP offset for symbols in /* This function is used to compute the TP offset for symbols in

View File

@ -0,0 +1,127 @@
/* Wrapper of i386 ___tls_get_addr to save and restore vector registers.
Copyright (C) 2025 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#undef REGISTER_SAVE_AREA
#if !defined USE_FNSAVE && (STATE_SAVE_ALIGNMENT % 16) != 0
# error STATE_SAVE_ALIGNMENT must be multiple of 16
#endif
#if DL_RUNTIME_RESOLVE_REALIGN_STACK
# ifdef USE_FNSAVE
# error USE_FNSAVE shouldn't be defined
# endif
# ifdef USE_FXSAVE
/* Use fxsave to save all registers. */
# define REGISTER_SAVE_AREA 512
# endif
#else
# ifdef USE_FNSAVE
/* Use fnsave to save x87 FPU stack registers. */
# define REGISTER_SAVE_AREA 108
# else
# ifndef USE_FXSAVE
# error USE_FXSAVE must be defined
# endif
/* Use fxsave to save all registers. Add 12 bytes to align the stack
to 16 bytes. */
# define REGISTER_SAVE_AREA (512 + 12)
# endif
#endif
#if DL_RUNTIME_RESOLVE_REALIGN_STACK
movl %ebx, 28(%esp)
movl %esp, %ebx
cfi_def_cfa_register(%ebx)
and $-STATE_SAVE_ALIGNMENT, %esp
#endif
#ifdef REGISTER_SAVE_AREA
subl $REGISTER_SAVE_AREA, %esp
# if !DL_RUNTIME_RESOLVE_REALIGN_STACK
cfi_adjust_cfa_offset(REGISTER_SAVE_AREA)
# endif
#else
# if !DL_RUNTIME_RESOLVE_REALIGN_STACK
# error DL_RUNTIME_RESOLVE_REALIGN_STACK must be true
# endif
/* Allocate stack space of the required size to save the state. */
LOAD_PIC_REG (cx)
subl RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET \
+XSAVE_STATE_SIZE_OFFSET+_rtld_local_ro@GOTOFF(%ecx), %esp
#endif
#ifdef USE_FNSAVE
fnsave (%esp)
#elif defined USE_FXSAVE
fxsave (%esp)
#else
/* Save the argument for ___tls_get_addr in EAX. */
movl %eax, %ecx
movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax
xorl %edx, %edx
/* Clear the XSAVE Header. */
# ifdef USE_XSAVE
movl %edx, (512)(%esp)
movl %edx, (512 + 4 * 1)(%esp)
movl %edx, (512 + 4 * 2)(%esp)
movl %edx, (512 + 4 * 3)(%esp)
# endif
movl %edx, (512 + 4 * 4)(%esp)
movl %edx, (512 + 4 * 5)(%esp)
movl %edx, (512 + 4 * 6)(%esp)
movl %edx, (512 + 4 * 7)(%esp)
movl %edx, (512 + 4 * 8)(%esp)
movl %edx, (512 + 4 * 9)(%esp)
movl %edx, (512 + 4 * 10)(%esp)
movl %edx, (512 + 4 * 11)(%esp)
movl %edx, (512 + 4 * 12)(%esp)
movl %edx, (512 + 4 * 13)(%esp)
movl %edx, (512 + 4 * 14)(%esp)
movl %edx, (512 + 4 * 15)(%esp)
# ifdef USE_XSAVE
xsave (%esp)
# else
xsavec (%esp)
# endif
/* Restore the argument for ___tls_get_addr in EAX. */
movl %ecx, %eax
#endif
call ___tls_get_addr_internal
/* Get register content back. */
#ifdef USE_FNSAVE
frstor (%esp)
#elif defined USE_FXSAVE
fxrstor (%esp)
#else
/* Save and retore ___tls_get_addr return value stored in EAX. */
movl %eax, %ecx
movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax
xorl %edx, %edx
xrstor (%esp)
movl %ecx, %eax
#endif
#if DL_RUNTIME_RESOLVE_REALIGN_STACK
mov %ebx, %esp
cfi_def_cfa_register(%esp)
movl 28(%esp), %ebx
cfi_restore(%ebx)
#else
addl $REGISTER_SAVE_AREA, %esp
cfi_adjust_cfa_offset(-REGISTER_SAVE_AREA)
#endif
#undef STATE_SAVE_ALIGNMENT

View File

@ -0,0 +1,57 @@
/* Thread-local storage handling in the ELF dynamic linker. i386 version.
Copyright (C) 2025 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
#include <tls.h>
#include <cpu-features-offsets.h>
#include <features-offsets.h>
.text
#ifdef SHARED
# define USE_FNSAVE
# define MINIMUM_ALIGNMENT 4
# define STATE_SAVE_ALIGNMENT 4
# define ___tls_get_addr _____tls_get_addr_fnsave
# include "tls_get_addr.h"
# undef ___tls_get_addr
# undef MINIMUM_ALIGNMENT
# undef USE_FNSAVE
# define MINIMUM_ALIGNMENT 16
# define USE_FXSAVE
# define STATE_SAVE_ALIGNMENT 16
# define ___tls_get_addr _____tls_get_addr_fxsave
# include "tls_get_addr.h"
# undef ___tls_get_addr
# undef USE_FXSAVE
# define USE_XSAVE
# define STATE_SAVE_ALIGNMENT 64
# define ___tls_get_addr _____tls_get_addr_xsave
# include "tls_get_addr.h"
# undef ___tls_get_addr
# undef USE_XSAVE
# define USE_XSAVEC
# define STATE_SAVE_ALIGNMENT 64
# define ___tls_get_addr _____tls_get_addr_xsavec
# include "tls_get_addr.h"
# undef ___tls_get_addr
# undef USE_XSAVEC
#endif /* SHARED */

View File

@ -0,0 +1,42 @@
/* Thread-local storage handling in the ELF dynamic linker. i386 version.
Copyright (C) 2025 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
.hidden ___tls_get_addr
.global ___tls_get_addr
.type ___tls_get_addr,@function
/* This function is a wrapper of ___tls_get_addr_internal to
preserve caller-saved vector registers. */
cfi_startproc
.align 16
___tls_get_addr:
/* Like all TLS resolvers, preserve call-clobbered registers.
We need two scratch regs anyway. */
subl $32, %esp
cfi_adjust_cfa_offset (32)
movl %ecx, 20(%esp)
movl %edx, 24(%esp)
#include "tls-get-addr-wrapper.h"
movl 20(%esp), %ecx
movl 24(%esp), %edx
addl $32, %esp
cfi_adjust_cfa_offset (-32)
ret
cfi_endproc
.size ___tls_get_addr, .-___tls_get_addr

View File

@ -44,6 +44,7 @@ loongarch*)
base_machine=loongarch base_machine=loongarch
mtls_descriptor=desc mtls_descriptor=desc
mtls_traditional=trad
;; ;;
esac esac

View File

@ -42,6 +42,7 @@ loongarch*)
base_machine=loongarch base_machine=loongarch
mtls_descriptor=desc mtls_descriptor=desc
mtls_traditional=trad
;; ;;
esac esac

View File

@ -28,6 +28,11 @@ tst-cache-ppc-static-dlopen-ENV = LD_LIBRARY_PATH=$(objpfx):$(common-objpfx):$(c
$(objpfx)tst-cache-ppc-static-dlopen.out: $(objpfx)mod-cache-ppc.so $(objpfx)tst-cache-ppc-static-dlopen.out: $(objpfx)mod-cache-ppc.so
$(objpfx)tst-cache-ppc: $(objpfx)mod-cache-ppc.so $(objpfx)tst-cache-ppc: $(objpfx)mod-cache-ppc.so
# The test checks if the __tls_get_addr does not clobber caller-saved
# register, so disable the powerpc specific optimization to force a
# __tls_get_addr call.
LDFLAGS-tst-tls23-mod.so = -Wl,--no-tls-get-addr-optimize
endif endif
ifneq (no,$(multi-arch)) ifneq (no,$(multi-arch))

View File

@ -4,7 +4,13 @@ endif
ifeq ($(subdir),elf) ifeq ($(subdir),elf)
sysdep_routines += get-cpuid-feature-leaf sysdep_routines += get-cpuid-feature-leaf
sysdep-dl-routines += dl-get-cpu-features sysdep-dl-routines += \
dl-get-cpu-features \
dl-tlsdesc \
tls_get_addr \
tlsdesc \
# sysdep-dl-routines
sysdep_headers += \ sysdep_headers += \
bits/platform/features.h \ bits/platform/features.h \
bits/platform/x86.h \ bits/platform/x86.h \
@ -113,6 +119,14 @@ $(objpfx)tst-gnu2-tls2-x86-noxsavexsavec.out: \
$(objpfx)tst-gnu2-tls2mod0.so \ $(objpfx)tst-gnu2-tls2mod0.so \
$(objpfx)tst-gnu2-tls2mod1.so \ $(objpfx)tst-gnu2-tls2mod1.so \
$(objpfx)tst-gnu2-tls2mod2.so $(objpfx)tst-gnu2-tls2mod2.so
CFLAGS-tst-tls23.c += -msse2
CFLAGS-tst-tls23-mod.c += -msse2 -mtune=haswell
LDFLAGS-tst-tls23 += -rdynamic
tst-tls23-mod.so-no-z-defs = yes
$(objpfx)tst-tls23-mod.so: $(libsupport)
endif endif
ifeq ($(subdir),math) ifeq ($(subdir),math)

View File

@ -183,6 +183,29 @@
#define atom_text_section .section ".text.atom", "ax" #define atom_text_section .section ".text.atom", "ax"
#ifndef DL_STACK_ALIGNMENT
/* Due to GCC bug:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066
__tls_get_addr may be called with 8-byte/4-byte stack alignment.
Although this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't
assume that stack will be always aligned at 16 bytes. */
# ifdef __x86_64__
# define DL_STACK_ALIGNMENT 8
# define MINIMUM_ALIGNMENT 16
# else
# define DL_STACK_ALIGNMENT 4
# endif
#endif
/* True if _dl_runtime_resolve/_dl_tlsdesc_dynamic should align stack for
STATE_SAVE or align stack to MINIMUM_ALIGNMENT bytes before calling
_dl_fixup/__tls_get_addr. */
#define DL_RUNTIME_RESOLVE_REALIGN_STACK \
(STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \
|| MINIMUM_ALIGNMENT > DL_STACK_ALIGNMENT)
#endif /* __ASSEMBLER__ */ #endif /* __ASSEMBLER__ */
#endif /* _X86_SYSDEP_H */ #endif /* _X86_SYSDEP_H */

22
sysdeps/x86/tst-tls23.c Normal file
View File

@ -0,0 +1,22 @@
#ifndef __x86_64__
#include <sys/platform/x86.h>
#define IS_SUPPORTED() CPU_FEATURE_ACTIVE (SSE2)
#endif
/* Set XMM0...XMM7 to all 1s. */
#define PREPARE_MALLOC() \
{ \
asm volatile ("pcmpeqd %%xmm0, %%xmm0" : : : "xmm0" ); \
asm volatile ("pcmpeqd %%xmm1, %%xmm1" : : : "xmm1" ); \
asm volatile ("pcmpeqd %%xmm2, %%xmm2" : : : "xmm2" ); \
asm volatile ("pcmpeqd %%xmm3, %%xmm3" : : : "xmm3" ); \
asm volatile ("pcmpeqd %%xmm4, %%xmm4" : : : "xmm4" ); \
asm volatile ("pcmpeqd %%xmm5, %%xmm5" : : : "xmm5" ); \
asm volatile ("pcmpeqd %%xmm6, %%xmm6" : : : "xmm6" ); \
asm volatile ("pcmpeqd %%xmm7, %%xmm7" : : : "xmm7" ); \
}
#include <elf/tst-tls23.c>
v2di v1, v2, v3;

35
sysdeps/x86/tst-tls23.h Normal file
View File

@ -0,0 +1,35 @@
/* Test that __tls_get_addr preserves XMM registers.
Copyright (C) 2025 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <support/check.h>
typedef long long v2di __attribute__((vector_size(16)));
extern v2di v1, v2, v3;
#define BEFORE_TLS_CALL() \
v1 = __extension__(v2di){0, 0}; \
v2 = __extension__(v2di){0, 0};
#define AFTER_TLS_CALL() \
v3 = __extension__(v2di){0, 0}; \
asm volatile ("" : "+x" (v3)); \
union { v2di x; long long a[2]; } u; \
u.x = v3; \
TEST_VERIFY_EXIT (u.a[0] == 0 && u.a[1] == 0);
#include <elf/tst-tls23.h>

View File

@ -41,9 +41,6 @@ ifeq ($(subdir),elf)
CFLAGS-.os += $(if $(filter $(@F),$(patsubst %,%.os,$(all-rtld-routines))),\ CFLAGS-.os += $(if $(filter $(@F),$(patsubst %,%.os,$(all-rtld-routines))),\
-mno-mmx) -mno-mmx)
sysdep-dl-routines += tlsdesc dl-tlsdesc tls_get_addr
tests += ifuncmain8
modules-names += ifuncmod8 modules-names += ifuncmod8
$(objpfx)ifuncmain8: $(objpfx)ifuncmod8.so $(objpfx)ifuncmain8: $(objpfx)ifuncmod8.so

View File

@ -22,7 +22,6 @@
#include <features-offsets.h> #include <features-offsets.h>
#include <isa-level.h> #include <isa-level.h>
#include "tlsdesc.h" #include "tlsdesc.h"
#include "dl-trampoline-save.h"
/* Area on stack to save and restore registers used for parameter /* Area on stack to save and restore registers used for parameter
passing when calling _dl_tlsdesc_dynamic. */ passing when calling _dl_tlsdesc_dynamic. */

View File

@ -22,7 +22,6 @@
#include <features-offsets.h> #include <features-offsets.h>
#include <link-defines.h> #include <link-defines.h>
#include <isa-level.h> #include <isa-level.h>
#include "dl-trampoline-save.h"
/* Area on stack to save and restore registers used for parameter /* Area on stack to save and restore registers used for parameter
passing when calling _dl_fixup. */ passing when calling _dl_fixup. */