i586: Use conditional branches in strcpy.S [BZ #22353]

i586 strcpy.S used a clever trick with LEA to implement jump table:

/* ECX has the last 2 bits of the address of source - 1.  */
	andl	$3, %ecx

        call    2f
2:      popl    %edx
	/* 0xb is the distance between 2: and 1:.  */
        leal    0xb(%edx,%ecx,8), %ecx
        jmp     *%ecx

        .align 8
1:  /* ECX == 0 */
        orb     (%esi), %al
        jz      L(end)
        stosb
        xorl    %eax, %eax
        incl    %esi
    /* ECX == 1 */
        orb     (%esi), %al
        jz      L(end)
        stosb
        xorl    %eax, %eax
        incl    %esi
    /* ECX == 2 */
        orb     (%esi), %al
        jz      L(end)
        stosb
        xorl    %eax, %eax
        incl    %esi
    /* ECX == 3 */
L(1):   movl    (%esi), %ecx
        leal    4(%esi),%esi

This fails if there are instruction length changes before L(1):.  This
patch replaces it with conditional branches:

	cmpb	$2, %cl
	je	L(Src2)
	ja	L(Src3)
	cmpb	$1, %cl
	je	L(Src1)

L(Src0):

which have similar performance and work with any instruction lengths.

Tested on i586 and i686 with and without --disable-multi-arch.

	[BZ #22353]
	* sysdeps/i386/i586/strcpy.S (STRCPY): Use conditional branches.
	(1): Renamed to ...
	(L(Src0)): This.
	(L(Src1)): New.
	(L(Src2)): Likewise.
	(L(1)): Renamed to ...
	(L(Src3)): This.
This commit is contained in:
H.J. Lu 2017-10-30 10:02:16 -07:00
parent ce12269fac
commit c5cc45148c
2 changed files with 22 additions and 17 deletions

View File

@ -1,3 +1,14 @@
2017-10-30 H.J. Lu <hongjiu.lu@intel.com>
[BZ #22353]
* sysdeps/i386/i586/strcpy.S (STRCPY): Use conditional branches.
(1): Renamed to ...
(L(Src0)): This.
(L(Src1)): New.
(L(Src2)): Likewise.
(L(1)): Renamed to ...
(L(Src3)): This.
2017-10-30 Joseph Myers <joseph@codesourcery.com>
* math/math.h [__HAVE_FLOAT16 && __USE_GNU] (M_Ef16): New macro.

View File

@ -53,41 +53,35 @@ ENTRY (STRCPY)
cfi_rel_offset (ebx, 0)
andl $3, %ecx
#ifdef PIC
call 2f
cfi_adjust_cfa_offset (4)
2: popl %edx
cfi_adjust_cfa_offset (-4)
/* 0xb is the distance between 2: and 1: but we avoid writing
1f-2b because the assembler generates worse code. */
leal 0xb(%edx,%ecx,8), %ecx
#else
leal 1f(,%ecx,8), %ecx
#endif
cmpb $2, %cl
je L(Src2)
ja L(Src3)
cmpb $1, %cl
je L(Src1)
jmp *%ecx
.align 8
1:
L(Src0):
orb (%esi), %al
jz L(end)
stosb
xorl %eax, %eax
incl %esi
L(Src1):
orb (%esi), %al
jz L(end)
stosb
xorl %eax, %eax
incl %esi
L(Src2):
orb (%esi), %al
jz L(end)
stosb
xorl %eax, %eax
incl %esi
L(1): movl (%esi), %ecx
L(Src3):
movl (%esi), %ecx
leal 4(%esi),%esi
subl %ecx, %eax
@ -107,7 +101,7 @@ L(1): movl (%esi), %ecx
movl %edx, (%edi)
leal 4(%edi),%edi
jmp L(1)
jmp L(Src3)
L(3): movl %ecx, %edx