mirror of git://sourceware.org/git/glibc.git
x86: Optimize xstate size calculation
Scan xstate IDs up to the maximum supported xstate ID. Remove the separate AMX xstate calculation. Instead, exclude the AMX space from the start of TILECFG to the end of TILEDATA in xsave_state_size. Completed validation on SKL/SKX/SPR/SDE and compared xsave state size with "ld.so --list-diagnostics" option, no regression. Co-Authored-By: H.J. Lu <hjl.tools@gmail.com> Reviewed-by: Sunil K Pandey <skpgkp2@gmail.com>
This commit is contained in:
parent
8322e93dcd
commit
70b6488551
|
|
@ -325,13 +325,8 @@ update_active (struct cpu_features *cpu_features)
|
||||||
/* Check if XSAVEC is available. */
|
/* Check if XSAVEC is available. */
|
||||||
if (CPU_FEATURES_CPU_P (cpu_features, XSAVEC))
|
if (CPU_FEATURES_CPU_P (cpu_features, XSAVEC))
|
||||||
{
|
{
|
||||||
unsigned int xstate_comp_offsets[32];
|
unsigned int xstate_comp_offsets[X86_XSTATE_MAX_ID + 1];
|
||||||
unsigned int xstate_comp_sizes[32];
|
unsigned int xstate_comp_sizes[X86_XSTATE_MAX_ID + 1];
|
||||||
#ifdef __x86_64__
|
|
||||||
unsigned int xstate_amx_comp_offsets[32];
|
|
||||||
unsigned int xstate_amx_comp_sizes[32];
|
|
||||||
unsigned int amx_ecx;
|
|
||||||
#endif
|
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
|
|
||||||
xstate_comp_offsets[0] = 0;
|
xstate_comp_offsets[0] = 0;
|
||||||
|
|
@ -339,39 +334,16 @@ update_active (struct cpu_features *cpu_features)
|
||||||
xstate_comp_offsets[2] = 576;
|
xstate_comp_offsets[2] = 576;
|
||||||
xstate_comp_sizes[0] = 160;
|
xstate_comp_sizes[0] = 160;
|
||||||
xstate_comp_sizes[1] = 256;
|
xstate_comp_sizes[1] = 256;
|
||||||
#ifdef __x86_64__
|
|
||||||
xstate_amx_comp_offsets[0] = 0;
|
|
||||||
xstate_amx_comp_offsets[1] = 160;
|
|
||||||
xstate_amx_comp_offsets[2] = 576;
|
|
||||||
xstate_amx_comp_sizes[0] = 160;
|
|
||||||
xstate_amx_comp_sizes[1] = 256;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
for (i = 2; i < 32; i++)
|
for (i = 2; i <= X86_XSTATE_MAX_ID; i++)
|
||||||
{
|
{
|
||||||
if ((FULL_STATE_SAVE_MASK & (1 << i)) != 0)
|
if ((FULL_STATE_SAVE_MASK & (1 << i)) != 0)
|
||||||
{
|
{
|
||||||
__cpuid_count (0xd, i, eax, ebx, ecx, edx);
|
__cpuid_count (0xd, i, eax, ebx, ecx, edx);
|
||||||
#ifdef __x86_64__
|
|
||||||
/* Include this in xsave_state_full_size. */
|
|
||||||
amx_ecx = ecx;
|
|
||||||
xstate_amx_comp_sizes[i] = eax;
|
|
||||||
if ((AMX_STATE_SAVE_MASK & (1 << i)) != 0)
|
|
||||||
{
|
|
||||||
/* Exclude this from xsave_state_size. */
|
|
||||||
ecx = 0;
|
|
||||||
xstate_comp_sizes[i] = 0;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
#endif
|
|
||||||
xstate_comp_sizes[i] = eax;
|
xstate_comp_sizes[i] = eax;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
#ifdef __x86_64__
|
|
||||||
amx_ecx = 0;
|
|
||||||
xstate_amx_comp_sizes[i] = 0;
|
|
||||||
#endif
|
|
||||||
ecx = 0;
|
ecx = 0;
|
||||||
xstate_comp_sizes[i] = 0;
|
xstate_comp_sizes[i] = 0;
|
||||||
}
|
}
|
||||||
|
|
@ -380,42 +352,32 @@ update_active (struct cpu_features *cpu_features)
|
||||||
{
|
{
|
||||||
xstate_comp_offsets[i]
|
xstate_comp_offsets[i]
|
||||||
= (xstate_comp_offsets[i - 1]
|
= (xstate_comp_offsets[i - 1]
|
||||||
+ xstate_comp_sizes[i -1]);
|
+ xstate_comp_sizes[i - 1]);
|
||||||
if ((ecx & (1 << 1)) != 0)
|
if ((ecx & (1 << 1)) != 0)
|
||||||
xstate_comp_offsets[i]
|
xstate_comp_offsets[i]
|
||||||
= ALIGN_UP (xstate_comp_offsets[i], 64);
|
= ALIGN_UP (xstate_comp_offsets[i], 64);
|
||||||
#ifdef __x86_64__
|
|
||||||
xstate_amx_comp_offsets[i]
|
|
||||||
= (xstate_amx_comp_offsets[i - 1]
|
|
||||||
+ xstate_amx_comp_sizes[i - 1]);
|
|
||||||
if ((amx_ecx & (1 << 1)) != 0)
|
|
||||||
xstate_amx_comp_offsets[i]
|
|
||||||
= ALIGN_UP (xstate_amx_comp_offsets[i],
|
|
||||||
64);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Use XSAVEC. */
|
/* Use XSAVEC. */
|
||||||
unsigned int size
|
unsigned int size
|
||||||
= xstate_comp_offsets[31] + xstate_comp_sizes[31];
|
= (xstate_comp_offsets[X86_XSTATE_MAX_ID]
|
||||||
|
+ xstate_comp_sizes[X86_XSTATE_MAX_ID]);
|
||||||
if (size)
|
if (size)
|
||||||
{
|
{
|
||||||
|
size = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA,
|
||||||
|
64);
|
||||||
#ifdef __x86_64__
|
#ifdef __x86_64__
|
||||||
unsigned int amx_size
|
_dl_x86_features_tlsdesc_state_size = size;
|
||||||
= (xstate_amx_comp_offsets[31]
|
/* Exclude the AMX space from the start of TILECFG
|
||||||
+ xstate_amx_comp_sizes[31]);
|
space to the end of TILEDATA space. If CPU
|
||||||
amx_size
|
doesn't support AMX, TILECFG offset is the same
|
||||||
= ALIGN_UP ((amx_size
|
as TILEDATA + 1 offset. Otherwise, they are
|
||||||
+ TLSDESC_CALL_REGISTER_SAVE_AREA),
|
multiples of 64. */
|
||||||
64);
|
size -= (xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1]
|
||||||
/* Set TLSDESC state size to the compact AMX
|
- xstate_comp_offsets[X86_XSTATE_TILECFG_ID]);
|
||||||
state size for XSAVEC. */
|
|
||||||
_dl_x86_features_tlsdesc_state_size = amx_size;
|
|
||||||
#endif
|
#endif
|
||||||
cpu_features->xsave_state_size
|
cpu_features->xsave_state_size = size;
|
||||||
= ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA,
|
|
||||||
64);
|
|
||||||
CPU_FEATURE_SET (cpu_features, XSAVEC);
|
CPU_FEATURE_SET (cpu_features, XSAVEC);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -102,6 +102,9 @@
|
||||||
| (1 << X86_XSTATE_ZMM_ID) \
|
| (1 << X86_XSTATE_ZMM_ID) \
|
||||||
| (1 << X86_XSTATE_APX_F_ID))
|
| (1 << X86_XSTATE_APX_F_ID))
|
||||||
|
|
||||||
|
/* The maximum supported xstate ID. */
|
||||||
|
# define X86_XSTATE_MAX_ID X86_XSTATE_APX_F_ID
|
||||||
|
|
||||||
/* AMX state mask. */
|
/* AMX state mask. */
|
||||||
# define AMX_STATE_SAVE_MASK \
|
# define AMX_STATE_SAVE_MASK \
|
||||||
((1 << X86_XSTATE_TILECFG_ID) | (1 << X86_XSTATE_TILEDATA_ID))
|
((1 << X86_XSTATE_TILECFG_ID) | (1 << X86_XSTATE_TILEDATA_ID))
|
||||||
|
|
@ -123,6 +126,9 @@
|
||||||
| (1 << X86_XSTATE_K_ID) \
|
| (1 << X86_XSTATE_K_ID) \
|
||||||
| (1 << X86_XSTATE_ZMM_H_ID))
|
| (1 << X86_XSTATE_ZMM_H_ID))
|
||||||
|
|
||||||
|
/* The maximum supported xstate ID. */
|
||||||
|
# define X86_XSTATE_MAX_ID X86_XSTATE_ZMM_H_ID
|
||||||
|
|
||||||
/* States to be included in xsave_state_size. */
|
/* States to be included in xsave_state_size. */
|
||||||
# define FULL_STATE_SAVE_MASK STATE_SAVE_MASK
|
# define FULL_STATE_SAVE_MASK STATE_SAVE_MASK
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue