X86-64: Prepare memmove-vec-unaligned-erms.S

Prepare memmove-vec-unaligned-erms.S to make the SSE2 version as the
default memcpy, mempcpy and memmove.

	* sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
	(MEMCPY_SYMBOL): New.
	(MEMPCPY_SYMBOL): Likewise.
	(MEMMOVE_CHK_SYMBOL): Likewise.
	Replace MEMMOVE_SYMBOL with MEMMOVE_CHK_SYMBOL on __mempcpy_chk
	symbols.  Replace MEMMOVE_SYMBOL with MEMPCPY_SYMBOL on
	__mempcpy symbols.  Provide alias for __memcpy_chk in libc.a.
	Provide alias for memcpy in libc.a and ld.so.
This commit is contained in:
H.J. Lu 2016-04-06 10:19:16 -07:00
parent 4af1bb06c5
commit a7d1c51482
2 changed files with 95 additions and 54 deletions

View File

@ -1,3 +1,14 @@
2016-04-06 H.J. Lu <hongjiu.lu@intel.com>
* sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
(MEMCPY_SYMBOL): New.
(MEMPCPY_SYMBOL): Likewise.
(MEMMOVE_CHK_SYMBOL): Likewise.
Replace MEMMOVE_SYMBOL with MEMMOVE_CHK_SYMBOL on __mempcpy_chk
symbols. Replace MEMMOVE_SYMBOL with MEMPCPY_SYMBOL on
__mempcpy symbols. Provide alias for __memcpy_chk in libc.a.
Provide alias for memcpy in libc.a and ld.so.
2016-04-06 H.J. Lu <hongjiu.lu@intel.com> 2016-04-06 H.J. Lu <hongjiu.lu@intel.com>
* sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S * sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S

View File

@ -32,10 +32,19 @@
8 * VEC_SIZE at a time. 8 * VEC_SIZE at a time.
8. Otherwise, forward copy 8 * VEC_SIZE at a time. */ 8. Otherwise, forward copy 8 * VEC_SIZE at a time. */
#if IS_IN (libc)
#include <sysdep.h> #include <sysdep.h>
# include "asm-syntax.h"
#ifndef MEMCPY_SYMBOL
# define MEMCPY_SYMBOL(p,s) MEMMOVE_SYMBOL(p, s)
#endif
#ifndef MEMPCPY_SYMBOL
# define MEMPCPY_SYMBOL(p,s) MEMMOVE_SYMBOL(p, s)
#endif
#ifndef MEMMOVE_CHK_SYMBOL
# define MEMMOVE_CHK_SYMBOL(p,s) MEMMOVE_SYMBOL(p, s)
#endif
#ifndef VZEROUPPER #ifndef VZEROUPPER
# if VEC_SIZE > 16 # if VEC_SIZE > 16
@ -59,24 +68,28 @@
#ifndef SECTION #ifndef SECTION
# error SECTION is not defined! # error SECTION is not defined!
#endif #endif
.section SECTION(.text),"ax",@progbits
# ifdef SHARED .section SECTION(.text),"ax",@progbits
ENTRY (MEMMOVE_SYMBOL (__mempcpy_chk, unaligned_2)) #if defined SHARED && IS_IN (libc)
ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_2))
cmpq %rdx, %rcx cmpq %rdx, %rcx
jb HIDDEN_JUMPTARGET (__chk_fail) jb HIDDEN_JUMPTARGET (__chk_fail)
END (MEMMOVE_SYMBOL (__mempcpy_chk, unaligned_2)) END (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_2))
#endif
ENTRY (MEMMOVE_SYMBOL (__mempcpy, unaligned_2)) #if VEC_SIZE == 16 || defined SHARED
ENTRY (MEMPCPY_SYMBOL (__mempcpy, unaligned_2))
movq %rdi, %rax movq %rdi, %rax
addq %rdx, %rax addq %rdx, %rax
jmp L(start) jmp L(start)
END (MEMMOVE_SYMBOL (__mempcpy, unaligned_2)) END (MEMPCPY_SYMBOL (__mempcpy, unaligned_2))
#endif
ENTRY (MEMMOVE_SYMBOL (__memmove_chk, unaligned_2)) #if defined SHARED && IS_IN (libc)
ENTRY (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_2))
cmpq %rdx, %rcx cmpq %rdx, %rcx
jb HIDDEN_JUMPTARGET (__chk_fail) jb HIDDEN_JUMPTARGET (__chk_fail)
END (MEMMOVE_SYMBOL (__memmove_chk, unaligned_2)) END (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_2))
#endif #endif
ENTRY (MEMMOVE_SYMBOL (__memmove, unaligned_2)) ENTRY (MEMMOVE_SYMBOL (__memmove, unaligned_2))
@ -86,24 +99,29 @@ L(start):
jb L(less_vec) jb L(less_vec)
cmpq $(VEC_SIZE * 2), %rdx cmpq $(VEC_SIZE * 2), %rdx
ja L(more_2x_vec) ja L(more_2x_vec)
#if !defined USE_MULTIARCH || !IS_IN (libc)
L(last_2x_vec):
#endif
/* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */ /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */
VMOVU (%rsi), %VEC(0) VMOVU (%rsi), %VEC(0)
VMOVU -VEC_SIZE(%rsi,%rdx), %VEC(1) VMOVU -VEC_SIZE(%rsi,%rdx), %VEC(1)
VMOVU %VEC(0), (%rdi) VMOVU %VEC(0), (%rdi)
VMOVU %VEC(1), -VEC_SIZE(%rdi,%rdx) VMOVU %VEC(1), -VEC_SIZE(%rdi,%rdx)
VZEROUPPER VZEROUPPER
#if !defined USE_MULTIARCH || !IS_IN (libc)
L(nop):
#endif
ret ret
#if defined USE_MULTIARCH && IS_IN (libc)
END (MEMMOVE_SYMBOL (__memmove, unaligned_2)) END (MEMMOVE_SYMBOL (__memmove, unaligned_2))
# if VEC_SIZE == 16 # if VEC_SIZE == 16 && defined SHARED
/* Only used to measure performance of REP MOVSB. */ /* Only used to measure performance of REP MOVSB. */
# ifdef SHARED
ENTRY (__mempcpy_erms) ENTRY (__mempcpy_erms)
movq %rdi, %rax movq %rdi, %rax
addq %rdx, %rax addq %rdx, %rax
jmp L(start_movsb) jmp L(start_movsb)
END (__mempcpy_erms) END (__mempcpy_erms)
# endif
ENTRY (__memmove_erms) ENTRY (__memmove_erms)
movq %rdi, %rax movq %rdi, %rax
@ -132,11 +150,10 @@ strong_alias (__memmove_erms, __memcpy_erms)
# endif # endif
# ifdef SHARED # ifdef SHARED
ENTRY (MEMMOVE_SYMBOL (__mempcpy_chk, unaligned_erms)) ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms))
cmpq %rdx, %rcx cmpq %rdx, %rcx
jb HIDDEN_JUMPTARGET (__chk_fail) jb HIDDEN_JUMPTARGET (__chk_fail)
END (MEMMOVE_SYMBOL (__mempcpy_chk, unaligned_erms)) END (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms))
# endif
ENTRY (MEMMOVE_SYMBOL (__mempcpy, unaligned_erms)) ENTRY (MEMMOVE_SYMBOL (__mempcpy, unaligned_erms))
movq %rdi, %rax movq %rdi, %rax
@ -144,11 +161,10 @@ ENTRY (MEMMOVE_SYMBOL (__mempcpy, unaligned_erms))
jmp L(start_erms) jmp L(start_erms)
END (MEMMOVE_SYMBOL (__mempcpy, unaligned_erms)) END (MEMMOVE_SYMBOL (__mempcpy, unaligned_erms))
# ifdef SHARED ENTRY (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_erms))
ENTRY (MEMMOVE_SYMBOL (__memmove_chk, unaligned_erms))
cmpq %rdx, %rcx cmpq %rdx, %rcx
jb HIDDEN_JUMPTARGET (__chk_fail) jb HIDDEN_JUMPTARGET (__chk_fail)
END (MEMMOVE_SYMBOL (__memmove_chk, unaligned_erms)) END (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_erms))
# endif # endif
ENTRY (MEMMOVE_SYMBOL (__memmove, unaligned_erms)) ENTRY (MEMMOVE_SYMBOL (__memmove, unaligned_erms))
@ -192,6 +208,7 @@ L(movsb_more_2x_vec):
/* Force 32-bit displacement to avoid long nop between /* Force 32-bit displacement to avoid long nop between
instructions. */ instructions. */
ja.d32 L(movsb) ja.d32 L(movsb)
#endif
.p2align 4 .p2align 4
L(more_2x_vec): L(more_2x_vec):
/* More than 2 * VEC. */ /* More than 2 * VEC. */
@ -228,7 +245,13 @@ L(copy_forward):
VMOVU %VEC(3), -(VEC_SIZE * 4)(%rdi,%rdx) VMOVU %VEC(3), -(VEC_SIZE * 4)(%rdi,%rdx)
cmpq $(VEC_SIZE * 8), %rdx cmpq $(VEC_SIZE * 8), %rdx
#if VEC_SIZE == 16 #if VEC_SIZE == 16
# if defined USE_MULTIARCH && IS_IN (libc)
jbe L(return) jbe L(return)
# else
/* Use 32-bit displacement to avoid long nop between
instructions. */
jbe.d32 L(return)
# endif
#else #else
/* Use 8-bit displacement to avoid long nop between /* Use 8-bit displacement to avoid long nop between
instructions. */ instructions. */
@ -263,6 +286,9 @@ L(loop):
addq $(VEC_SIZE * 4), %rcx addq $(VEC_SIZE * 4), %rcx
cmpq %rcx, %rdx cmpq %rcx, %rdx
jne L(loop) jne L(loop)
#if !defined USE_MULTIARCH || !IS_IN (libc)
L(return):
#endif
L(return_disp8): L(return_disp8):
VZEROUPPER VZEROUPPER
ret ret
@ -455,14 +481,18 @@ L(loop_8x_vec_backward):
END (MEMMOVE_SYMBOL (__memmove, unaligned_erms)) END (MEMMOVE_SYMBOL (__memmove, unaligned_erms))
#ifdef SHARED #ifdef SHARED
# if IS_IN (libc)
# ifdef USE_MULTIARCH
strong_alias (MEMMOVE_SYMBOL (__memmove, unaligned_erms), strong_alias (MEMMOVE_SYMBOL (__memmove, unaligned_erms),
MEMMOVE_SYMBOL (__memcpy, unaligned_erms)) MEMMOVE_SYMBOL (__memcpy, unaligned_erms))
strong_alias (MEMMOVE_SYMBOL (__memmove_chk, unaligned_erms), strong_alias (MEMMOVE_SYMBOL (__memmove_chk, unaligned_erms),
MEMMOVE_SYMBOL (__memcpy_chk, unaligned_erms)) MEMMOVE_SYMBOL (__memcpy_chk, unaligned_erms))
# endif
strong_alias (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_2),
MEMMOVE_CHK_SYMBOL (__memcpy_chk, unaligned_2))
# endif
#endif
#if VEC_SIZE == 16 || defined SHARED
strong_alias (MEMMOVE_SYMBOL (__memmove, unaligned_2), strong_alias (MEMMOVE_SYMBOL (__memmove, unaligned_2),
MEMMOVE_SYMBOL (__memcpy, unaligned_2)) MEMCPY_SYMBOL (__memcpy, unaligned_2))
strong_alias (MEMMOVE_SYMBOL (__memmove_chk, unaligned_2),
MEMMOVE_SYMBOL (__memcpy_chk, unaligned_2))
# endif
#endif #endif