* sysdeps/powerpc/powerpc64/strchr.S: 64-bit optimizations.

* sysdeps/powerpc/powerpc64/strlen.S: 64-bit optimizations.

	* sysdeps/powerpc/fpu/bits/mathdef.h (FLT_EVAL_METHOD): Undef before
	defining.
This commit is contained in:
Roland McGrath 2003-04-04 22:03:25 +00:00
parent 91613ed9d8
commit beb03cee27
3 changed files with 74 additions and 46 deletions

View File

@ -1,3 +1,11 @@
2003-04-04 Steven Munroe <sjmunroe@us.ibm.com>
* sysdeps/powerpc/powerpc64/strchr.S: 64-bit optimizations.
* sysdeps/powerpc/powerpc64/strlen.S: 64-bit optimizations.
* sysdeps/powerpc/fpu/bits/mathdef.h (FLT_EVAL_METHOD): Undef before
defining.
2003-04-04 Alexandre Oliva <aoliva@redhat.com> 2003-04-04 Alexandre Oliva <aoliva@redhat.com>
* sysdeps/unix/sysv/linux/mips/bits/fcntl.h (struct flock): Adjust * sysdeps/unix/sysv/linux/mips/bits/fcntl.h (struct flock): Adjust

View File

@ -1,5 +1,5 @@
/* Optimized strchr implementation for PowerPC64. /* Optimized strchr implementation for PowerPC64.
Copyright (C) 1997, 1999, 2000, 2002 Free Software Foundation, Inc. Copyright (C) 1997, 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
This file is part of the GNU C Library. This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or The GNU C Library is free software; you can redistribute it and/or
@ -29,6 +29,11 @@ ENTRY (BP_SYM (strchr))
#define rTMP1 r0 #define rTMP1 r0
#define rRTN r3 /* outgoing result */ #define rRTN r3 /* outgoing result */
/* Note: The Bounded pointer support in this code is broken. This code
was inherited from PPC32 and and that support was never completed.
Currently PPC gcc does not support -fbounds-check or -fbounded-pointers.
These artifacts are left in the code as a reminder in case we need
bounded pointer support in the future. */
#if __BOUNDED_POINTERS__ #if __BOUNDED_POINTERS__
# define rSTR r4 # define rSTR r4
# define rCHR r5 /* byte we're looking for, spread over the whole word */ # define rCHR r5 /* byte we're looking for, spread over the whole word */
@ -39,8 +44,8 @@ ENTRY (BP_SYM (strchr))
# define rWORD r5 /* the current word */ # define rWORD r5 /* the current word */
#endif #endif
#define rCLZB rCHR /* leading zero byte count */ #define rCLZB rCHR /* leading zero byte count */
#define rFEFE r6 /* constant 0xfefefeff (-0x01010101) */ #define rFEFE r6 /* constant 0xfefefefefefefeff (-0x0101010101010101) */
#define r7F7F r7 /* constant 0x7f7f7f7f */ #define r7F7F r7 /* constant 0x7f7f7f7f7f7f7f7f */
#define rTMP2 r9 #define rTMP2 r9
#define rIGN r10 /* number of bits we should ignore in the first word */ #define rIGN r10 /* number of bits we should ignore in the first word */
#define rMASK r11 /* mask with the bits to ignore set to 0 */ #define rMASK r11 /* mask with the bits to ignore set to 0 */
@ -49,18 +54,23 @@ ENTRY (BP_SYM (strchr))
CHECK_BOUNDS_LOW (rSTR, rTMP1, rTMP2) CHECK_BOUNDS_LOW (rSTR, rTMP1, rTMP2)
STORE_RETURN_BOUNDS (rTMP1, rTMP2) STORE_RETURN_BOUNDS (rTMP1, rTMP2)
dcbt 0,rRTN
rlwimi rCHR, rCHR, 8, 16, 23 rlwimi rCHR, rCHR, 8, 16, 23
li rMASK, -1 li rMASK, -1
rlwimi rCHR, rCHR, 16, 0, 15 rlwimi rCHR, rCHR, 16, 0, 15
rlwinm rIGN, rRTN, 3, 27, 28 rlwinm rIGN, rRTN, 3, 26, 28
insrdi rCHR, rCHR, 32, 0
lis rFEFE, -0x101 lis rFEFE, -0x101
lis r7F7F, 0x7f7f lis r7F7F, 0x7f7f
clrrdi rSTR, rRTN, 2 clrrdi rSTR, rRTN, 3
addi rFEFE, rFEFE, -0x101 addi rFEFE, rFEFE, -0x101
addi r7F7F, r7F7F, 0x7f7f addi r7F7F, r7F7F, 0x7f7f
sldi rTMP1, rFEFE, 32
insrdi r7F7F, r7F7F, 32, 0
add rFEFE, rFEFE, rTMP1
/* Test the first (partial?) word. */ /* Test the first (partial?) word. */
lwz rWORD, 0(rSTR) ld rWORD, 0(rSTR)
srw rMASK, rMASK, rIGN srd rMASK, rMASK, rIGN
orc rWORD, rWORD, rMASK orc rWORD, rWORD, rMASK
add rTMP1, rFEFE, rWORD add rTMP1, rFEFE, rWORD
nor rTMP2, r7F7F, rWORD nor rTMP2, r7F7F, rWORD
@ -71,7 +81,7 @@ ENTRY (BP_SYM (strchr))
/* The loop. */ /* The loop. */
L(loop):lwzu rWORD, 4(rSTR) L(loop):ldu rWORD, 8(rSTR)
and. rTMP1, rTMP1, rTMP2 and. rTMP1, rTMP1, rTMP2
/* Test for 0. */ /* Test for 0. */
add rTMP1, rFEFE, rWORD add rTMP1, rFEFE, rWORD
@ -104,12 +114,12 @@ L(missed):
add rTMP1, rTMP1, r7F7F add rTMP1, rTMP1, r7F7F
nor rWORD, rMASK, rFEFE nor rWORD, rMASK, rFEFE
nor rTMP2, rIGN, rTMP1 nor rTMP2, rIGN, rTMP1
cmplw rWORD, rTMP2 cmpld rWORD, rTMP2
bgtlr bgtlr
cntlzw rCLZB, rTMP2 cntlzd rCLZB, rTMP2
srwi rCLZB, rCLZB, 3 srdi rCLZB, rCLZB, 3
add rRTN, rSTR, rCLZB add rRTN, rSTR, rCLZB
CHECK_BOUNDS_HIGH_RTN (rSTR, rTMP2, twlge) CHECK_BOUNDS_HIGH_RTN (rSTR, rTMP2, tdlge)
STORE_RETURN_VALUE (rSTR) STORE_RETURN_VALUE (rSTR)
blr blr
@ -118,11 +128,11 @@ L(foundit):
or rIGN, r7F7F, rTMP3 or rIGN, r7F7F, rTMP3
add rTMP1, rTMP1, r7F7F add rTMP1, rTMP1, r7F7F
nor rTMP2, rIGN, rTMP1 nor rTMP2, rIGN, rTMP1
cntlzw rCLZB, rTMP2 cntlzd rCLZB, rTMP2
subi rSTR, rSTR, 4 subi rSTR, rSTR, 8
srwi rCLZB, rCLZB, 3 srdi rCLZB, rCLZB, 3
add rRTN, rSTR, rCLZB add rRTN, rSTR, rCLZB
CHECK_BOUNDS_HIGH_RTN (rSTR, rTMP2, twlge) CHECK_BOUNDS_HIGH_RTN (rSTR, rTMP2, tdlge)
STORE_RETURN_VALUE (rSTR) STORE_RETURN_VALUE (rSTR)
blr blr
END (BP_SYM (strchr)) END (BP_SYM (strchr))

View File

@ -1,5 +1,5 @@
/* Optimized strlen implementation for PowerPC64. /* Optimized strlen implementation for PowerPC64.
Copyright (C) 1997, 1999, 2000, 2002 Free Software Foundation, Inc. Copyright (C) 1997, 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
This file is part of the GNU C Library. This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or The GNU C Library is free software; you can redistribute it and/or
@ -60,7 +60,12 @@
2) How popular are bytes with the high bit set? If they are very rare, 2) How popular are bytes with the high bit set? If they are very rare,
on some processors it might be useful to use the simpler expression on some processors it might be useful to use the simpler expression
~((x - 0x01010101) | 0x7f7f7f7f) (that is, on processors with only one ~((x - 0x01010101) | 0x7f7f7f7f) (that is, on processors with only one
ALU), but this fails when any character has its high bit set. */ ALU), but this fails when any character has its high bit set.
Answer:
1) Added a Data Cache Block Touch early to prefetch the first 128
byte cache line. Adding dcbt instructions to the loop would not be
effective since most strings will be shorter than the cache line.*/
/* Some notes on register usage: Under the SVR4 ABI, we can use registers /* Some notes on register usage: Under the SVR4 ABI, we can use registers
0 and 3 through 12 (so long as we don't call any procedures) without 0 and 3 through 12 (so long as we don't call any procedures) without
@ -80,63 +85,68 @@ ENTRY (BP_SYM (strlen))
#define rSTR r4 /* current string position */ #define rSTR r4 /* current string position */
#define rPADN r5 /* number of padding bits we prepend to the #define rPADN r5 /* number of padding bits we prepend to the
string to make it start at a word boundary */ string to make it start at a word boundary */
#define rFEFE r6 /* constant 0xfefefeff (-0x01010101) */ #define rFEFE r6 /* constant 0xfefefefefefefeff (-0x0101010101010101) */
#define r7F7F r7 /* constant 0x7f7f7f7f */ #define r7F7F r7 /* constant 0x7f7f7f7f7f7f7f7f */
#define rWORD1 r8 /* current string word */ #define rWORD1 r8 /* current string doubleword */
#define rWORD2 r9 /* next string word */ #define rWORD2 r9 /* next string doubleword */
#define rMASK r9 /* mask for first string word */ #define rMASK r9 /* mask for first string doubleword */
#define rTMP2 r10 #define rTMP2 r10
#define rTMP3 r11 #define rTMP3 r11
#define rTMP4 r12 #define rTMP4 r12
/* Note: The Bounded pointer support in this code is broken. This code
was inherited from PPC32 and and that support was never completed.
Current PPC gcc does not support -fbounds-check or -fbounded-pointers.
These artifacts are left in the code as a reminder in case we need
bounded pointer support in the future. */
CHECK_BOUNDS_LOW (rRTN, rTMP1, rTMP2) CHECK_BOUNDS_LOW (rRTN, rTMP1, rTMP2)
clrrdi rSTR, rRTN, 2 dcbt 0,rRTN
clrrdi rSTR, rRTN, 3
lis r7F7F, 0x7f7f lis r7F7F, 0x7f7f
rlwinm rPADN, rRTN, 3, 27, 28 rlwinm rPADN, rRTN, 3, 26, 28
lwz rWORD1, 0(rSTR) ld rWORD1, 0(rSTR)
li rMASK, -1
addi r7F7F, r7F7F, 0x7f7f addi r7F7F, r7F7F, 0x7f7f
/* That's the setup done, now do the first pair of words. li rMASK, -1
We make an exception and use method (2) on the first two words, to reduce insrdi r7F7F, r7F7F, 32, 0
overhead. */ /* That's the setup done, now do the first pair of doublewords.
srw rMASK, rMASK, rPADN We make an exception and use method (2) on the first two doublewords,
to reduce overhead. */
srd rMASK, rMASK, rPADN
and rTMP1, r7F7F, rWORD1 and rTMP1, r7F7F, rWORD1
or rTMP2, r7F7F, rWORD1 or rTMP2, r7F7F, rWORD1
lis rFEFE, -0x101
add rTMP1, rTMP1, r7F7F add rTMP1, rTMP1, r7F7F
addi rFEFE, rFEFE, -0x101
nor rTMP1, rTMP2, rTMP1 nor rTMP1, rTMP2, rTMP1
and. rWORD1, rTMP1, rMASK and. rWORD1, rTMP1, rMASK
mtcrf 0x01, rRTN mtcrf 0x01, rRTN
bne L(done0) bne L(done0)
lis rFEFE, -0x101 sldi rTMP1, rFEFE, 32
addi rFEFE, rFEFE, -0x101 add rFEFE, rFEFE, rTMP1
clrldi rFEFE,rFEFE,32 /* clear upper 32 */
/* Are we now aligned to a doubleword boundary? */ /* Are we now aligned to a doubleword boundary? */
bt 29, L(loop) bt 28, L(loop)
/* Handle second word of pair. */ /* Handle second doubleword of pair. */
lwzu rWORD1, 4(rSTR) ldu rWORD1, 8(rSTR)
and rTMP1, r7F7F, rWORD1 and rTMP1, r7F7F, rWORD1
or rTMP2, r7F7F, rWORD1 or rTMP2, r7F7F, rWORD1
add rTMP1, rTMP1, r7F7F add rTMP1, rTMP1, r7F7F
nor. rWORD1, rTMP2, rTMP1 nor. rWORD1, rTMP2, rTMP1
clrldi. rWORD1,rWORD1,32 /* clear upper 32 */
bne L(done0) bne L(done0)
/* The loop. */ /* The loop. */
L(loop): L(loop):
lwz rWORD1, 4(rSTR) ld rWORD1, 8(rSTR)
lwzu rWORD2, 8(rSTR) ldu rWORD2, 16(rSTR)
add rTMP1, rFEFE, rWORD1 add rTMP1, rFEFE, rWORD1
nor rTMP2, r7F7F, rWORD1 nor rTMP2, r7F7F, rWORD1
and. rTMP1, rTMP1, rTMP2 and. rTMP1, rTMP1, rTMP2
clrldi. rTMP1,rTMP1,32 /* clear upper 32 */
add rTMP3, rFEFE, rWORD2 add rTMP3, rFEFE, rWORD2
nor rTMP4, r7F7F, rWORD2 nor rTMP4, r7F7F, rWORD2
bne L(done1) bne L(done1)
and. rTMP1, rTMP3, rTMP4 and. rTMP1, rTMP3, rTMP4
clrldi. rTMP1,rTMP1,32 /* clear upper 32 */
beq L(loop) beq L(loop)
and rTMP1, r7F7F, rWORD2 and rTMP1, r7F7F, rWORD2
@ -146,17 +156,17 @@ L(loop):
L(done1): L(done1):
and rTMP1, r7F7F, rWORD1 and rTMP1, r7F7F, rWORD1
subi rSTR, rSTR, 4 subi rSTR, rSTR, 8
add rTMP1, rTMP1, r7F7F add rTMP1, rTMP1, r7F7F
andc rWORD1, rTMP2, rTMP1 andc rWORD1, rTMP2, rTMP1
/* When we get to here, rSTR points to the first word in the string that /* When we get to here, rSTR points to the first doubleword in the string that
contains a zero byte, and the most significant set bit in rWORD1 is in that contains a zero byte, and the most significant set bit in rWORD1 is in that
byte. */ byte. */
L(done0): L(done0):
cntlzw rTMP3, rWORD1 cntlzd rTMP3, rWORD1
subf rTMP1, rRTN, rSTR subf rTMP1, rRTN, rSTR
srwi rTMP3, rTMP3, 3 srdi rTMP3, rTMP3, 3
add rRTN, rTMP1, rTMP3 add rRTN, rTMP1, rTMP3
/* GKM FIXME: check high bound. */ /* GKM FIXME: check high bound. */
blr blr