mirror of git://sourceware.org/git/glibc.git
* sysdeps/powerpc/powerpc64/strchr.S: 64-bit optimizations.
* sysdeps/powerpc/powerpc64/strlen.S: 64-bit optimizations. * sysdeps/powerpc/fpu/bits/mathdef.h (FLT_EVAL_METHOD): Undef before defining.
This commit is contained in:
parent
91613ed9d8
commit
beb03cee27
|
@ -1,3 +1,11 @@
|
||||||
|
2003-04-04 Steven Munroe <sjmunroe@us.ibm.com>
|
||||||
|
|
||||||
|
* sysdeps/powerpc/powerpc64/strchr.S: 64-bit optimizations.
|
||||||
|
* sysdeps/powerpc/powerpc64/strlen.S: 64-bit optimizations.
|
||||||
|
|
||||||
|
* sysdeps/powerpc/fpu/bits/mathdef.h (FLT_EVAL_METHOD): Undef before
|
||||||
|
defining.
|
||||||
|
|
||||||
2003-04-04 Alexandre Oliva <aoliva@redhat.com>
|
2003-04-04 Alexandre Oliva <aoliva@redhat.com>
|
||||||
|
|
||||||
* sysdeps/unix/sysv/linux/mips/bits/fcntl.h (struct flock): Adjust
|
* sysdeps/unix/sysv/linux/mips/bits/fcntl.h (struct flock): Adjust
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/* Optimized strchr implementation for PowerPC64.
|
/* Optimized strchr implementation for PowerPC64.
|
||||||
Copyright (C) 1997, 1999, 2000, 2002 Free Software Foundation, Inc.
|
Copyright (C) 1997, 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
|
||||||
This file is part of the GNU C Library.
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
The GNU C Library is free software; you can redistribute it and/or
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
@ -29,6 +29,11 @@ ENTRY (BP_SYM (strchr))
|
||||||
|
|
||||||
#define rTMP1 r0
|
#define rTMP1 r0
|
||||||
#define rRTN r3 /* outgoing result */
|
#define rRTN r3 /* outgoing result */
|
||||||
|
/* Note: The Bounded pointer support in this code is broken. This code
|
||||||
|
was inherited from PPC32 and and that support was never completed.
|
||||||
|
Currently PPC gcc does not support -fbounds-check or -fbounded-pointers.
|
||||||
|
These artifacts are left in the code as a reminder in case we need
|
||||||
|
bounded pointer support in the future. */
|
||||||
#if __BOUNDED_POINTERS__
|
#if __BOUNDED_POINTERS__
|
||||||
# define rSTR r4
|
# define rSTR r4
|
||||||
# define rCHR r5 /* byte we're looking for, spread over the whole word */
|
# define rCHR r5 /* byte we're looking for, spread over the whole word */
|
||||||
|
@ -39,8 +44,8 @@ ENTRY (BP_SYM (strchr))
|
||||||
# define rWORD r5 /* the current word */
|
# define rWORD r5 /* the current word */
|
||||||
#endif
|
#endif
|
||||||
#define rCLZB rCHR /* leading zero byte count */
|
#define rCLZB rCHR /* leading zero byte count */
|
||||||
#define rFEFE r6 /* constant 0xfefefeff (-0x01010101) */
|
#define rFEFE r6 /* constant 0xfefefefefefefeff (-0x0101010101010101) */
|
||||||
#define r7F7F r7 /* constant 0x7f7f7f7f */
|
#define r7F7F r7 /* constant 0x7f7f7f7f7f7f7f7f */
|
||||||
#define rTMP2 r9
|
#define rTMP2 r9
|
||||||
#define rIGN r10 /* number of bits we should ignore in the first word */
|
#define rIGN r10 /* number of bits we should ignore in the first word */
|
||||||
#define rMASK r11 /* mask with the bits to ignore set to 0 */
|
#define rMASK r11 /* mask with the bits to ignore set to 0 */
|
||||||
|
@ -49,18 +54,23 @@ ENTRY (BP_SYM (strchr))
|
||||||
CHECK_BOUNDS_LOW (rSTR, rTMP1, rTMP2)
|
CHECK_BOUNDS_LOW (rSTR, rTMP1, rTMP2)
|
||||||
STORE_RETURN_BOUNDS (rTMP1, rTMP2)
|
STORE_RETURN_BOUNDS (rTMP1, rTMP2)
|
||||||
|
|
||||||
|
dcbt 0,rRTN
|
||||||
rlwimi rCHR, rCHR, 8, 16, 23
|
rlwimi rCHR, rCHR, 8, 16, 23
|
||||||
li rMASK, -1
|
li rMASK, -1
|
||||||
rlwimi rCHR, rCHR, 16, 0, 15
|
rlwimi rCHR, rCHR, 16, 0, 15
|
||||||
rlwinm rIGN, rRTN, 3, 27, 28
|
rlwinm rIGN, rRTN, 3, 26, 28
|
||||||
|
insrdi rCHR, rCHR, 32, 0
|
||||||
lis rFEFE, -0x101
|
lis rFEFE, -0x101
|
||||||
lis r7F7F, 0x7f7f
|
lis r7F7F, 0x7f7f
|
||||||
clrrdi rSTR, rRTN, 2
|
clrrdi rSTR, rRTN, 3
|
||||||
addi rFEFE, rFEFE, -0x101
|
addi rFEFE, rFEFE, -0x101
|
||||||
addi r7F7F, r7F7F, 0x7f7f
|
addi r7F7F, r7F7F, 0x7f7f
|
||||||
|
sldi rTMP1, rFEFE, 32
|
||||||
|
insrdi r7F7F, r7F7F, 32, 0
|
||||||
|
add rFEFE, rFEFE, rTMP1
|
||||||
/* Test the first (partial?) word. */
|
/* Test the first (partial?) word. */
|
||||||
lwz rWORD, 0(rSTR)
|
ld rWORD, 0(rSTR)
|
||||||
srw rMASK, rMASK, rIGN
|
srd rMASK, rMASK, rIGN
|
||||||
orc rWORD, rWORD, rMASK
|
orc rWORD, rWORD, rMASK
|
||||||
add rTMP1, rFEFE, rWORD
|
add rTMP1, rFEFE, rWORD
|
||||||
nor rTMP2, r7F7F, rWORD
|
nor rTMP2, r7F7F, rWORD
|
||||||
|
@ -71,7 +81,7 @@ ENTRY (BP_SYM (strchr))
|
||||||
|
|
||||||
/* The loop. */
|
/* The loop. */
|
||||||
|
|
||||||
L(loop):lwzu rWORD, 4(rSTR)
|
L(loop):ldu rWORD, 8(rSTR)
|
||||||
and. rTMP1, rTMP1, rTMP2
|
and. rTMP1, rTMP1, rTMP2
|
||||||
/* Test for 0. */
|
/* Test for 0. */
|
||||||
add rTMP1, rFEFE, rWORD
|
add rTMP1, rFEFE, rWORD
|
||||||
|
@ -104,12 +114,12 @@ L(missed):
|
||||||
add rTMP1, rTMP1, r7F7F
|
add rTMP1, rTMP1, r7F7F
|
||||||
nor rWORD, rMASK, rFEFE
|
nor rWORD, rMASK, rFEFE
|
||||||
nor rTMP2, rIGN, rTMP1
|
nor rTMP2, rIGN, rTMP1
|
||||||
cmplw rWORD, rTMP2
|
cmpld rWORD, rTMP2
|
||||||
bgtlr
|
bgtlr
|
||||||
cntlzw rCLZB, rTMP2
|
cntlzd rCLZB, rTMP2
|
||||||
srwi rCLZB, rCLZB, 3
|
srdi rCLZB, rCLZB, 3
|
||||||
add rRTN, rSTR, rCLZB
|
add rRTN, rSTR, rCLZB
|
||||||
CHECK_BOUNDS_HIGH_RTN (rSTR, rTMP2, twlge)
|
CHECK_BOUNDS_HIGH_RTN (rSTR, rTMP2, tdlge)
|
||||||
STORE_RETURN_VALUE (rSTR)
|
STORE_RETURN_VALUE (rSTR)
|
||||||
blr
|
blr
|
||||||
|
|
||||||
|
@ -118,11 +128,11 @@ L(foundit):
|
||||||
or rIGN, r7F7F, rTMP3
|
or rIGN, r7F7F, rTMP3
|
||||||
add rTMP1, rTMP1, r7F7F
|
add rTMP1, rTMP1, r7F7F
|
||||||
nor rTMP2, rIGN, rTMP1
|
nor rTMP2, rIGN, rTMP1
|
||||||
cntlzw rCLZB, rTMP2
|
cntlzd rCLZB, rTMP2
|
||||||
subi rSTR, rSTR, 4
|
subi rSTR, rSTR, 8
|
||||||
srwi rCLZB, rCLZB, 3
|
srdi rCLZB, rCLZB, 3
|
||||||
add rRTN, rSTR, rCLZB
|
add rRTN, rSTR, rCLZB
|
||||||
CHECK_BOUNDS_HIGH_RTN (rSTR, rTMP2, twlge)
|
CHECK_BOUNDS_HIGH_RTN (rSTR, rTMP2, tdlge)
|
||||||
STORE_RETURN_VALUE (rSTR)
|
STORE_RETURN_VALUE (rSTR)
|
||||||
blr
|
blr
|
||||||
END (BP_SYM (strchr))
|
END (BP_SYM (strchr))
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/* Optimized strlen implementation for PowerPC64.
|
/* Optimized strlen implementation for PowerPC64.
|
||||||
Copyright (C) 1997, 1999, 2000, 2002 Free Software Foundation, Inc.
|
Copyright (C) 1997, 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
|
||||||
This file is part of the GNU C Library.
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
The GNU C Library is free software; you can redistribute it and/or
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
@ -60,7 +60,12 @@
|
||||||
2) How popular are bytes with the high bit set? If they are very rare,
|
2) How popular are bytes with the high bit set? If they are very rare,
|
||||||
on some processors it might be useful to use the simpler expression
|
on some processors it might be useful to use the simpler expression
|
||||||
~((x - 0x01010101) | 0x7f7f7f7f) (that is, on processors with only one
|
~((x - 0x01010101) | 0x7f7f7f7f) (that is, on processors with only one
|
||||||
ALU), but this fails when any character has its high bit set. */
|
ALU), but this fails when any character has its high bit set.
|
||||||
|
|
||||||
|
Answer:
|
||||||
|
1) Added a Data Cache Block Touch early to prefetch the first 128
|
||||||
|
byte cache line. Adding dcbt instructions to the loop would not be
|
||||||
|
effective since most strings will be shorter than the cache line.*/
|
||||||
|
|
||||||
/* Some notes on register usage: Under the SVR4 ABI, we can use registers
|
/* Some notes on register usage: Under the SVR4 ABI, we can use registers
|
||||||
0 and 3 through 12 (so long as we don't call any procedures) without
|
0 and 3 through 12 (so long as we don't call any procedures) without
|
||||||
|
@ -80,63 +85,68 @@ ENTRY (BP_SYM (strlen))
|
||||||
#define rSTR r4 /* current string position */
|
#define rSTR r4 /* current string position */
|
||||||
#define rPADN r5 /* number of padding bits we prepend to the
|
#define rPADN r5 /* number of padding bits we prepend to the
|
||||||
string to make it start at a word boundary */
|
string to make it start at a word boundary */
|
||||||
#define rFEFE r6 /* constant 0xfefefeff (-0x01010101) */
|
#define rFEFE r6 /* constant 0xfefefefefefefeff (-0x0101010101010101) */
|
||||||
#define r7F7F r7 /* constant 0x7f7f7f7f */
|
#define r7F7F r7 /* constant 0x7f7f7f7f7f7f7f7f */
|
||||||
#define rWORD1 r8 /* current string word */
|
#define rWORD1 r8 /* current string doubleword */
|
||||||
#define rWORD2 r9 /* next string word */
|
#define rWORD2 r9 /* next string doubleword */
|
||||||
#define rMASK r9 /* mask for first string word */
|
#define rMASK r9 /* mask for first string doubleword */
|
||||||
#define rTMP2 r10
|
#define rTMP2 r10
|
||||||
#define rTMP3 r11
|
#define rTMP3 r11
|
||||||
#define rTMP4 r12
|
#define rTMP4 r12
|
||||||
|
|
||||||
|
/* Note: The Bounded pointer support in this code is broken. This code
|
||||||
|
was inherited from PPC32 and and that support was never completed.
|
||||||
|
Current PPC gcc does not support -fbounds-check or -fbounded-pointers.
|
||||||
|
These artifacts are left in the code as a reminder in case we need
|
||||||
|
bounded pointer support in the future. */
|
||||||
CHECK_BOUNDS_LOW (rRTN, rTMP1, rTMP2)
|
CHECK_BOUNDS_LOW (rRTN, rTMP1, rTMP2)
|
||||||
|
|
||||||
clrrdi rSTR, rRTN, 2
|
dcbt 0,rRTN
|
||||||
|
clrrdi rSTR, rRTN, 3
|
||||||
lis r7F7F, 0x7f7f
|
lis r7F7F, 0x7f7f
|
||||||
rlwinm rPADN, rRTN, 3, 27, 28
|
rlwinm rPADN, rRTN, 3, 26, 28
|
||||||
lwz rWORD1, 0(rSTR)
|
ld rWORD1, 0(rSTR)
|
||||||
li rMASK, -1
|
|
||||||
addi r7F7F, r7F7F, 0x7f7f
|
addi r7F7F, r7F7F, 0x7f7f
|
||||||
/* That's the setup done, now do the first pair of words.
|
li rMASK, -1
|
||||||
We make an exception and use method (2) on the first two words, to reduce
|
insrdi r7F7F, r7F7F, 32, 0
|
||||||
overhead. */
|
/* That's the setup done, now do the first pair of doublewords.
|
||||||
srw rMASK, rMASK, rPADN
|
We make an exception and use method (2) on the first two doublewords,
|
||||||
|
to reduce overhead. */
|
||||||
|
srd rMASK, rMASK, rPADN
|
||||||
and rTMP1, r7F7F, rWORD1
|
and rTMP1, r7F7F, rWORD1
|
||||||
or rTMP2, r7F7F, rWORD1
|
or rTMP2, r7F7F, rWORD1
|
||||||
|
lis rFEFE, -0x101
|
||||||
add rTMP1, rTMP1, r7F7F
|
add rTMP1, rTMP1, r7F7F
|
||||||
|
addi rFEFE, rFEFE, -0x101
|
||||||
nor rTMP1, rTMP2, rTMP1
|
nor rTMP1, rTMP2, rTMP1
|
||||||
and. rWORD1, rTMP1, rMASK
|
and. rWORD1, rTMP1, rMASK
|
||||||
mtcrf 0x01, rRTN
|
mtcrf 0x01, rRTN
|
||||||
bne L(done0)
|
bne L(done0)
|
||||||
lis rFEFE, -0x101
|
sldi rTMP1, rFEFE, 32
|
||||||
addi rFEFE, rFEFE, -0x101
|
add rFEFE, rFEFE, rTMP1
|
||||||
clrldi rFEFE,rFEFE,32 /* clear upper 32 */
|
|
||||||
/* Are we now aligned to a doubleword boundary? */
|
/* Are we now aligned to a doubleword boundary? */
|
||||||
bt 29, L(loop)
|
bt 28, L(loop)
|
||||||
|
|
||||||
/* Handle second word of pair. */
|
/* Handle second doubleword of pair. */
|
||||||
lwzu rWORD1, 4(rSTR)
|
ldu rWORD1, 8(rSTR)
|
||||||
and rTMP1, r7F7F, rWORD1
|
and rTMP1, r7F7F, rWORD1
|
||||||
or rTMP2, r7F7F, rWORD1
|
or rTMP2, r7F7F, rWORD1
|
||||||
add rTMP1, rTMP1, r7F7F
|
add rTMP1, rTMP1, r7F7F
|
||||||
nor. rWORD1, rTMP2, rTMP1
|
nor. rWORD1, rTMP2, rTMP1
|
||||||
clrldi. rWORD1,rWORD1,32 /* clear upper 32 */
|
|
||||||
bne L(done0)
|
bne L(done0)
|
||||||
|
|
||||||
/* The loop. */
|
/* The loop. */
|
||||||
|
|
||||||
L(loop):
|
L(loop):
|
||||||
lwz rWORD1, 4(rSTR)
|
ld rWORD1, 8(rSTR)
|
||||||
lwzu rWORD2, 8(rSTR)
|
ldu rWORD2, 16(rSTR)
|
||||||
add rTMP1, rFEFE, rWORD1
|
add rTMP1, rFEFE, rWORD1
|
||||||
nor rTMP2, r7F7F, rWORD1
|
nor rTMP2, r7F7F, rWORD1
|
||||||
and. rTMP1, rTMP1, rTMP2
|
and. rTMP1, rTMP1, rTMP2
|
||||||
clrldi. rTMP1,rTMP1,32 /* clear upper 32 */
|
|
||||||
add rTMP3, rFEFE, rWORD2
|
add rTMP3, rFEFE, rWORD2
|
||||||
nor rTMP4, r7F7F, rWORD2
|
nor rTMP4, r7F7F, rWORD2
|
||||||
bne L(done1)
|
bne L(done1)
|
||||||
and. rTMP1, rTMP3, rTMP4
|
and. rTMP1, rTMP3, rTMP4
|
||||||
clrldi. rTMP1,rTMP1,32 /* clear upper 32 */
|
|
||||||
beq L(loop)
|
beq L(loop)
|
||||||
|
|
||||||
and rTMP1, r7F7F, rWORD2
|
and rTMP1, r7F7F, rWORD2
|
||||||
|
@ -146,17 +156,17 @@ L(loop):
|
||||||
|
|
||||||
L(done1):
|
L(done1):
|
||||||
and rTMP1, r7F7F, rWORD1
|
and rTMP1, r7F7F, rWORD1
|
||||||
subi rSTR, rSTR, 4
|
subi rSTR, rSTR, 8
|
||||||
add rTMP1, rTMP1, r7F7F
|
add rTMP1, rTMP1, r7F7F
|
||||||
andc rWORD1, rTMP2, rTMP1
|
andc rWORD1, rTMP2, rTMP1
|
||||||
|
|
||||||
/* When we get to here, rSTR points to the first word in the string that
|
/* When we get to here, rSTR points to the first doubleword in the string that
|
||||||
contains a zero byte, and the most significant set bit in rWORD1 is in that
|
contains a zero byte, and the most significant set bit in rWORD1 is in that
|
||||||
byte. */
|
byte. */
|
||||||
L(done0):
|
L(done0):
|
||||||
cntlzw rTMP3, rWORD1
|
cntlzd rTMP3, rWORD1
|
||||||
subf rTMP1, rRTN, rSTR
|
subf rTMP1, rRTN, rSTR
|
||||||
srwi rTMP3, rTMP3, 3
|
srdi rTMP3, rTMP3, 3
|
||||||
add rRTN, rTMP1, rTMP3
|
add rRTN, rTMP1, rTMP3
|
||||||
/* GKM FIXME: check high bound. */
|
/* GKM FIXME: check high bound. */
|
||||||
blr
|
blr
|
||||||
|
|
Loading…
Reference in New Issue