Improve x86-32 strcat functions with SSE2/SSSE3

This commit is contained in:
Liubov Dmitrieva 2011-08-04 15:33:38 -04:00 committed by Ulrich Drepper
parent 8c1a459f9a
commit 5fa16e9b01
11 changed files with 2342 additions and 319 deletions

View File

@ -1,3 +1,29 @@
2011-07-22 Liubov Dmitrieva <liubov.dmitrieva@intel.com>
* sysdeps/i386/i686/multiarch/strcat-sse2.S: Update.
Fix overflow bug in strncat.
* sysdeps/i386/i686/multiarch/strcpy-ssse3.S: Likewise.
* string/test-strncat.c: Update.
Add new tests for checking overflow bugs.
2011-07-15 Liubov Dmitrieva <liubov.dmitrieva@intel.com>
* sysdeps/i386/i686/multiarch/Makefile (sysdep_routines): Add
strcat-ssse3 strcat-sse2 strncat-ssse3 strncat-sse2 strncat-c.
* sysdeps/i386/i686/multiarch/strcat.S: New file.
* sysdeps/i386/i686/multiarch/strcat-c.c: New file.
* sysdeps/i386/i686/multiarch/strcat-sse2.S: New file.
* sysdeps/i386/i686/multiarch/strcat-ssse3.S: New file.
* sysdeps/i386/i686/multiarch/strncat.S: New file.
* sysdeps/i386/i686/multiarch/strncat-sse2.S: New file.
* sysdeps/i386/i686/multiarch/strncat-ssse3.S: New file.
* sysdeps/i386/i686/multiarch/strcpy-ssse3.S
(USE_AS_STRCAT): Define.
Add strcat and strncat support.
* sysdeps/i386/i686/multiarch/strlen-sse2.S: Likewise.
2011-07-25 Andreas Schwab <schwab@redhat.com> 2011-07-25 Andreas Schwab <schwab@redhat.com>
* sysdeps/i386/i486/bits/string.h (__strncat_g): Correctly handle * sysdeps/i386/i486/bits/string.h (__strncat_g): Correctly handle

View File

@ -12,7 +12,8 @@ sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \
memcmp-ssse3 memcmp-sse4 strcasestr-nonascii varshift \ memcmp-ssse3 memcmp-sse4 strcasestr-nonascii varshift \
strlen-sse2 strlen-sse2-bsf strncpy-c strcpy-ssse3 \ strlen-sse2 strlen-sse2-bsf strncpy-c strcpy-ssse3 \
strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 strcpy-sse2 \ strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 strcpy-sse2 \
strncpy-sse2 stpcpy-sse2 stpncpy-sse2 strncpy-sse2 stpcpy-sse2 stpncpy-sse2 strcat-ssse3 \
strcat-sse2 strncat-ssse3 strncat-sse2 strncat-c
ifeq (yes,$(config-cflags-sse4)) ifeq (yes,$(config-cflags-sse4))
sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
CFLAGS-varshift.c += -msse4 CFLAGS-varshift.c += -msse4

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,573 @@
/* strcat with SSSE3
Copyright (C) 2011 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#ifndef NOT_IN_libc
# include <sysdep.h>
# define CFI_PUSH(REG) \
cfi_adjust_cfa_offset (4); \
cfi_rel_offset (REG, 0)
# define CFI_POP(REG) \
cfi_adjust_cfa_offset (-4); \
cfi_restore (REG)
# define PUSH(REG) pushl REG; CFI_PUSH (REG)
# define POP(REG) popl REG; CFI_POP (REG)
# ifndef STRCAT
# define STRCAT __strcat_ssse3
# endif
# define PARMS 4
# define STR1 PARMS+4
# define STR2 STR1+4
# ifdef USE_AS_STRNCAT
# define LEN STR2+8
# endif
# define USE_AS_STRCAT
.text
ENTRY (STRCAT)
PUSH (%edi)
mov STR1(%esp), %edi
mov %edi, %edx
# define RETURN jmp L(StartStrcpyPart)
# include "strlen-sse2.S"
L(StartStrcpyPart):
mov STR2(%esp), %ecx
lea (%edi, %eax), %edx
# ifdef USE_AS_STRNCAT
PUSH (%ebx)
mov LEN(%esp), %ebx
test %ebx, %ebx
jz L(StrncatExit0)
cmp $8, %ebx
jbe L(StrncatExit8Bytes)
# endif
cmpb $0, (%ecx)
jz L(Exit1)
cmpb $0, 1(%ecx)
jz L(Exit2)
cmpb $0, 2(%ecx)
jz L(Exit3)
cmpb $0, 3(%ecx)
jz L(Exit4)
cmpb $0, 4(%ecx)
jz L(Exit5)
cmpb $0, 5(%ecx)
jz L(Exit6)
cmpb $0, 6(%ecx)
jz L(Exit7)
cmpb $0, 7(%ecx)
jz L(Exit8)
cmpb $0, 8(%ecx)
jz L(Exit9)
# ifdef USE_AS_STRNCAT
cmp $16, %ebx
jb L(StrncatExit15Bytes)
# endif
cmpb $0, 9(%ecx)
jz L(Exit10)
cmpb $0, 10(%ecx)
jz L(Exit11)
cmpb $0, 11(%ecx)
jz L(Exit12)
cmpb $0, 12(%ecx)
jz L(Exit13)
cmpb $0, 13(%ecx)
jz L(Exit14)
cmpb $0, 14(%ecx)
jz L(Exit15)
cmpb $0, 15(%ecx)
jz L(Exit16)
# ifdef USE_AS_STRNCAT
cmp $16, %ebx
je L(StrncatExit16)
# define RETURN1 \
POP (%ebx); \
POP (%edi); \
ret; \
CFI_PUSH (%ebx); \
CFI_PUSH (%edi)
# define USE_AS_STRNCPY
# else
# define RETURN1 POP (%edi); ret; CFI_PUSH (%edi)
# endif
# include "strcpy-ssse3.S"
.p2align 4
L(CopyFrom1To16Bytes):
add %esi, %edx
add %esi, %ecx
POP (%esi)
test %al, %al
jz L(ExitHigh)
test $0x01, %al
jnz L(Exit1)
test $0x02, %al
jnz L(Exit2)
test $0x04, %al
jnz L(Exit3)
test $0x08, %al
jnz L(Exit4)
test $0x10, %al
jnz L(Exit5)
test $0x20, %al
jnz L(Exit6)
test $0x40, %al
jnz L(Exit7)
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(ExitHigh):
test $0x01, %ah
jnz L(Exit9)
test $0x02, %ah
jnz L(Exit10)
test $0x04, %ah
jnz L(Exit11)
test $0x08, %ah
jnz L(Exit12)
test $0x10, %ah
jnz L(Exit13)
test $0x20, %ah
jnz L(Exit14)
test $0x40, %ah
jnz L(Exit15)
movlpd (%ecx), %xmm0
movlpd 8(%ecx), %xmm1
movlpd %xmm0, (%edx)
movlpd %xmm1, 8(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit1):
movb %bh, 1(%edx)
L(Exit1):
movb (%ecx), %al
movb %al, (%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit2):
movb %bh, 2(%edx)
L(Exit2):
movw (%ecx), %ax
movw %ax, (%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit3):
movb %bh, 3(%edx)
L(Exit3):
movw (%ecx), %ax
movw %ax, (%edx)
movb 2(%ecx), %al
movb %al, 2(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit4):
movb %bh, 4(%edx)
L(Exit4):
movl (%ecx), %eax
movl %eax, (%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit5):
movb %bh, 5(%edx)
L(Exit5):
movl (%ecx), %eax
movl %eax, (%edx)
movb 4(%ecx), %al
movb %al, 4(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit6):
movb %bh, 6(%edx)
L(Exit6):
movl (%ecx), %eax
movl %eax, (%edx)
movw 4(%ecx), %ax
movw %ax, 4(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit7):
movb %bh, 7(%edx)
L(Exit7):
movl (%ecx), %eax
movl %eax, (%edx)
movl 3(%ecx), %eax
movl %eax, 3(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit8):
movb %bh, 8(%edx)
L(Exit8):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit9):
movb %bh, 9(%edx)
L(Exit9):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movb 8(%ecx), %al
movb %al, 8(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit10):
movb %bh, 10(%edx)
L(Exit10):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movw 8(%ecx), %ax
movw %ax, 8(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit11):
movb %bh, 11(%edx)
L(Exit11):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movl 7(%ecx), %eax
movl %eax, 7(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit12):
movb %bh, 12(%edx)
L(Exit12):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movl 8(%ecx), %eax
movl %eax, 8(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit13):
movb %bh, 13(%edx)
L(Exit13):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movlpd 5(%ecx), %xmm0
movlpd %xmm0, 5(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit14):
movb %bh, 14(%edx)
L(Exit14):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movlpd 6(%ecx), %xmm0
movlpd %xmm0, 6(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit15):
movb %bh, 15(%edx)
L(Exit15):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movlpd 7(%ecx), %xmm0
movlpd %xmm0, 7(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit16):
movb %bh, 16(%edx)
L(Exit16):
movlpd (%ecx), %xmm0
movlpd 8(%ecx), %xmm1
movlpd %xmm0, (%edx)
movlpd %xmm1, 8(%edx)
movl %edi, %eax
RETURN1
# ifdef USE_AS_STRNCPY
CFI_PUSH(%esi)
.p2align 4
L(CopyFrom1To16BytesCase2):
add $16, %ebx
add %esi, %ecx
lea (%esi, %edx), %esi
lea -9(%ebx), %edx
and $1<<7, %dh
or %al, %dh
test %dh, %dh
lea (%esi), %edx
POP (%esi)
jz L(ExitHighCase2)
test $0x01, %al
jnz L(Exit1)
cmp $1, %ebx
je L(StrncatExit1)
test $0x02, %al
jnz L(Exit2)
cmp $2, %ebx
je L(StrncatExit2)
test $0x04, %al
jnz L(Exit3)
cmp $3, %ebx
je L(StrncatExit3)
test $0x08, %al
jnz L(Exit4)
cmp $4, %ebx
je L(StrncatExit4)
test $0x10, %al
jnz L(Exit5)
cmp $5, %ebx
je L(StrncatExit5)
test $0x20, %al
jnz L(Exit6)
cmp $6, %ebx
je L(StrncatExit6)
test $0x40, %al
jnz L(Exit7)
cmp $7, %ebx
je L(StrncatExit7)
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
lea 7(%edx), %eax
cmpb $1, (%eax)
sbb $-1, %eax
xor %cl, %cl
movb %cl, (%eax)
movl %edi, %eax
RETURN1
.p2align 4
L(ExitHighCase2):
test $0x01, %ah
jnz L(Exit9)
cmp $9, %ebx
je L(StrncatExit9)
test $0x02, %ah
jnz L(Exit10)
cmp $10, %ebx
je L(StrncatExit10)
test $0x04, %ah
jnz L(Exit11)
cmp $11, %ebx
je L(StrncatExit11)
test $0x8, %ah
jnz L(Exit12)
cmp $12, %ebx
je L(StrncatExit12)
test $0x10, %ah
jnz L(Exit13)
cmp $13, %ebx
je L(StrncatExit13)
test $0x20, %ah
jnz L(Exit14)
cmp $14, %ebx
je L(StrncatExit14)
test $0x40, %ah
jnz L(Exit15)
cmp $15, %ebx
je L(StrncatExit15)
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movlpd 8(%ecx), %xmm1
movlpd %xmm1, 8(%edx)
movl %edi, %eax
RETURN1
CFI_PUSH(%esi)
L(CopyFrom1To16BytesCase2OrCase3):
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
.p2align 4
L(CopyFrom1To16BytesCase3):
add $16, %ebx
add %esi, %edx
add %esi, %ecx
POP (%esi)
cmp $8, %ebx
ja L(ExitHighCase3)
cmp $1, %ebx
je L(StrncatExit1)
cmp $2, %ebx
je L(StrncatExit2)
cmp $3, %ebx
je L(StrncatExit3)
cmp $4, %ebx
je L(StrncatExit4)
cmp $5, %ebx
je L(StrncatExit5)
cmp $6, %ebx
je L(StrncatExit6)
cmp $7, %ebx
je L(StrncatExit7)
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movb %bh, 8(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(ExitHighCase3):
cmp $9, %ebx
je L(StrncatExit9)
cmp $10, %ebx
je L(StrncatExit10)
cmp $11, %ebx
je L(StrncatExit11)
cmp $12, %ebx
je L(StrncatExit12)
cmp $13, %ebx
je L(StrncatExit13)
cmp $14, %ebx
je L(StrncatExit14)
cmp $15, %ebx
je L(StrncatExit15)
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movlpd 8(%ecx), %xmm1
movlpd %xmm1, 8(%edx)
movb %bh, 16(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit0):
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit15Bytes):
cmp $9, %ebx
je L(StrncatExit9)
cmpb $0, 9(%ecx)
jz L(Exit10)
cmp $10, %ebx
je L(StrncatExit10)
cmpb $0, 10(%ecx)
jz L(Exit11)
cmp $11, %ebx
je L(StrncatExit11)
cmpb $0, 11(%ecx)
jz L(Exit12)
cmp $12, %ebx
je L(StrncatExit12)
cmpb $0, 12(%ecx)
jz L(Exit13)
cmp $13, %ebx
je L(StrncatExit13)
cmpb $0, 13(%ecx)
jz L(Exit14)
cmp $14, %ebx
je L(StrncatExit14)
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movlpd 7(%ecx), %xmm0
movlpd %xmm0, 7(%edx)
lea 14(%edx), %eax
cmpb $1, (%eax)
sbb $-1, %eax
movb %bh, (%eax)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit8Bytes):
cmpb $0, (%ecx)
jz L(Exit1)
cmp $1, %ebx
je L(StrncatExit1)
cmpb $0, 1(%ecx)
jz L(Exit2)
cmp $2, %ebx
je L(StrncatExit2)
cmpb $0, 2(%ecx)
jz L(Exit3)
cmp $3, %ebx
je L(StrncatExit3)
cmpb $0, 3(%ecx)
jz L(Exit4)
cmp $4, %ebx
je L(StrncatExit4)
cmpb $0, 4(%ecx)
jz L(Exit5)
cmp $5, %ebx
je L(StrncatExit5)
cmpb $0, 5(%ecx)
jz L(Exit6)
cmp $6, %ebx
je L(StrncatExit6)
cmpb $0, 6(%ecx)
jz L(Exit7)
cmp $7, %ebx
je L(StrncatExit7)
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
lea 7(%edx), %eax
cmpb $1, (%eax)
sbb $-1, %eax
movb %bh, (%eax)
movl %edi, %eax
RETURN1
# endif
END (STRCAT)
#endif

View File

@ -0,0 +1,131 @@
/* Multiple versions of strcat
Copyright (C) 2011 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#include <sysdep.h>
#include <init-arch.h>
#ifndef USE_AS_STRNCAT
# ifndef STRCAT
# define STRCAT strcat
# endif
#endif
#ifdef USE_AS_STRNCAT
# define STRCAT_SSSE3 __strncat_ssse3
# define STRCAT_SSE2 __strncat_sse2
# define STRCAT_IA32 __strncat_ia32
# define __GI_STRCAT __GI_strncat
#else
# define STRCAT_SSSE3 __strcat_ssse3
# define STRCAT_SSE2 __strcat_sse2
# define STRCAT_IA32 __strcat_ia32
# define __GI_STRCAT __GI_strcat
#endif
/* Define multiple versions only for the definition in libc. Don't
define multiple versions for strncat in static library since we
need strncat before the initialization happened. */
#ifndef NOT_IN_libc
# ifdef SHARED
.section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
.globl __i686.get_pc_thunk.bx
.hidden __i686.get_pc_thunk.bx
.p2align 4
.type __i686.get_pc_thunk.bx,@function
__i686.get_pc_thunk.bx:
movl (%esp), %ebx
ret
.text
ENTRY(STRCAT)
.type STRCAT, @gnu_indirect_function
pushl %ebx
cfi_adjust_cfa_offset (4)
cfi_rel_offset (ebx, 0)
call __i686.get_pc_thunk.bx
addl $_GLOBAL_OFFSET_TABLE_, %ebx
cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
jne 1f
call __init_cpu_features
1: leal STRCAT_IA32@GOTOFF(%ebx), %eax
testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
jz 2f
leal STRCAT_SSE2@GOTOFF(%ebx), %eax
testl $bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx)
jnz 2f
testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
jz 2f
leal STRCAT_SSSE3@GOTOFF(%ebx), %eax
2: popl %ebx
cfi_adjust_cfa_offset (-4)
cfi_restore (ebx)
ret
END(STRCAT)
# else
ENTRY(STRCAT)
.type STRCAT, @gnu_indirect_function
cmpl $0, KIND_OFFSET+__cpu_features
jne 1f
call __init_cpu_features
1: leal STRCAT_IA32, %eax
testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features
jz 2f
leal STRCAT_SSE2, %eax
testl $bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features
jnz 2f
testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features
jz 2f
leal STRCAT_SSSE3, %eax
2: ret
END(STRCAT)
# endif
# undef ENTRY
# define ENTRY(name) \
.type STRCAT_IA32, @function; \
.align 16; \
STRCAT_IA32: cfi_startproc; \
CALL_MCOUNT
# undef END
# define END(name) \
cfi_endproc; .size STRCAT_IA32, .-STRCAT_IA32
# ifdef SHARED
# undef libc_hidden_builtin_def
/* It doesn't make sense to send libc-internal strcat calls through a PLT.
The speedup we get from using SSSE3 instruction is likely eaten away
by the indirect call in the PLT. */
# define libc_hidden_builtin_def(name) \
.globl __GI_STRCAT; __GI_STRCAT = STRCAT_IA32
# undef libc_hidden_def
# define libc_hidden_def(name) \
.globl __GI___STRCAT; __GI___STRCAT = STRCAT_IA32
# endif
#endif
#ifndef USE_AS_STRNCAT
# include "../../i486/strcat.S"
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
/* strlen with SSE2 /* strlen with SSE2
Copyright (C) 2010 Free Software Foundation, Inc. Copyright (C) 2010, 2011 Free Software Foundation, Inc.
Contributed by Intel Corporation. Contributed by Intel Corporation.
This file is part of the GNU C Library. This file is part of the GNU C Library.
@ -18,30 +18,32 @@
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */ 02111-1307 USA. */
#if defined SHARED && !defined NOT_IN_libc #if (defined USE_AS_STRCAT || defined SHARED) && !defined NOT_IN_libc
# ifndef USE_AS_STRCAT
#include <sysdep.h> # include <sysdep.h>
#include "asm-syntax.h" # include "asm-syntax.h"
#define CFI_PUSH(REG) \ # define CFI_PUSH(REG) \
cfi_adjust_cfa_offset (4); \ cfi_adjust_cfa_offset (4); \
cfi_rel_offset (REG, 0) cfi_rel_offset (REG, 0)
#define CFI_POP(REG) \ # define CFI_POP(REG) \
cfi_adjust_cfa_offset (-4); \ cfi_adjust_cfa_offset (-4); \
cfi_restore (REG) cfi_restore (REG)
#define PUSH(REG) pushl REG; CFI_PUSH (REG) # define PUSH(REG) pushl REG; CFI_PUSH (REG)
#define POP(REG) popl REG; CFI_POP (REG) # define POP(REG) popl REG; CFI_POP (REG)
#define PARMS 4 # define PARMS 4
#define STR PARMS # define STR PARMS
#define ENTRANCE # define ENTRANCE
#define RETURN ret # define RETURN ret
.text .text
ENTRY (__strlen_sse2) ENTRY (__strlen_sse2)
ENTRANCE ENTRANCE
mov STR(%esp), %edx mov STR(%esp), %edx
# endif
xor %eax, %eax xor %eax, %eax
cmpb $0, (%edx) cmpb $0, (%edx)
jz L(exit_tail0) jz L(exit_tail0)
@ -77,9 +79,8 @@ ENTRY (__strlen_sse2)
jz L(exit_tail15) jz L(exit_tail15)
pxor %xmm0, %xmm0 pxor %xmm0, %xmm0
mov %edx, %eax mov %edx, %eax
mov %edx, %ecx lea 16(%edx), %ecx
and $-16, %eax and $-16, %eax
add $16, %ecx
add $16, %eax add $16, %eax
pcmpeqb (%eax), %xmm0 pcmpeqb (%eax), %xmm0
@ -183,51 +184,41 @@ ENTRY (__strlen_sse2)
jnz L(exit) jnz L(exit)
and $-0x40, %eax and $-0x40, %eax
PUSH (%esi)
PUSH (%edi)
PUSH (%ebx)
PUSH (%ebp)
xor %ebp, %ebp
L(aligned_64): L(aligned_64):
pcmpeqb (%eax), %xmm0 movaps (%eax), %xmm0
pcmpeqb 16(%eax), %xmm1 movaps 16(%eax), %xmm1
pcmpeqb 32(%eax), %xmm2 movaps 32(%eax), %xmm2
pcmpeqb 48(%eax), %xmm3 movaps 48(%eax), %xmm6
pmovmskb %xmm0, %edx pminub %xmm1, %xmm0
pmovmskb %xmm1, %esi pminub %xmm6, %xmm2
pmovmskb %xmm2, %edi pminub %xmm0, %xmm2
pmovmskb %xmm3, %ebx pcmpeqb %xmm3, %xmm2
or %edx, %ebp pmovmskb %xmm2, %edx
or %esi, %ebp test %edx, %edx
or %edi, %ebp
or %ebx, %ebp
lea 64(%eax), %eax lea 64(%eax), %eax
jz L(aligned_64) jz L(aligned_64)
L(48leave):
pcmpeqb -64(%eax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx test %edx, %edx
jnz L(aligned_64_exit_16) lea 48(%ecx), %ecx
test %esi, %esi jnz L(exit)
jnz L(aligned_64_exit_32)
test %edi, %edi pcmpeqb %xmm1, %xmm3
jnz L(aligned_64_exit_48) pmovmskb %xmm3, %edx
mov %ebx, %edx test %edx, %edx
lea (%eax), %eax lea -16(%ecx), %ecx
jmp L(aligned_64_exit) jnz L(exit)
L(aligned_64_exit_48):
lea -16(%eax), %eax pcmpeqb -32(%eax), %xmm3
mov %edi, %edx pmovmskb %xmm3, %edx
jmp L(aligned_64_exit) test %edx, %edx
L(aligned_64_exit_32): lea -16(%ecx), %ecx
lea -32(%eax), %eax jnz L(exit)
mov %esi, %edx
jmp L(aligned_64_exit) pcmpeqb %xmm6, %xmm3
L(aligned_64_exit_16): pmovmskb %xmm3, %edx
lea -48(%eax), %eax lea -16(%ecx), %ecx
L(aligned_64_exit):
POP (%ebp)
POP (%ebx)
POP (%edi)
POP (%esi)
L(exit): L(exit):
sub %ecx, %eax sub %ecx, %eax
test %dl, %dl test %dl, %dl
@ -340,8 +331,9 @@ L(exit_tail14):
L(exit_tail15): L(exit_tail15):
add $15, %eax add $15, %eax
# ifndef USE_AS_STRCAT
ret ret
END (__strlen_sse2) END (__strlen_sse2)
# endif
#endif #endif

View File

@ -0,0 +1,8 @@
#define STRNCAT __strncat_ia32
#ifdef SHARED
#undef libc_hidden_def
#define libc_hidden_def(name) \
__hidden_ver1 (__strncat_ia32, __GI___strncat, __strncat_ia32);
#endif
#include "string/strncat.c"

View File

@ -0,0 +1,4 @@
#define STRCAT __strncat_sse2
#define USE_AS_STRNCAT
#include "strcat-sse2.S"

View File

@ -0,0 +1,4 @@
#define STRCAT __strncat_ssse3
#define USE_AS_STRNCAT
#include "strcat-ssse3.S"

View File

@ -0,0 +1,3 @@
#define STRCAT strncat
#define USE_AS_STRNCAT
#include "strcat.S"