mirror of git://sourceware.org/git/glibc.git
Correct cacheline size to 32-bytes for ppc405 memset.S (bug 14595).
This patch also creates a version of memset.S for the ppc476 processor which uses a 128-byte cacheline size for dcbz insns.
This commit is contained in:
parent
9f45bfe790
commit
09dec6c37e
4
NEWS
4
NEWS
|
|
@ -16,8 +16,8 @@ Version 2.17
|
||||||
14303, 14307, 14328, 14331, 14336, 14337, 14347, 14349, 14376, 14417,
|
14303, 14307, 14328, 14331, 14336, 14337, 14347, 14349, 14376, 14417,
|
||||||
14459, 14476, 14477, 14505, 14510, 14516, 14518, 14519, 14530, 14532,
|
14459, 14476, 14477, 14505, 14510, 14516, 14518, 14519, 14530, 14532,
|
||||||
14538, 14543, 14544, 14545, 14557, 14562, 14568, 14576, 14579, 14583,
|
14538, 14543, 14544, 14545, 14557, 14562, 14568, 14576, 14579, 14583,
|
||||||
14587, 14602, 14621, 14638, 14645, 14648, 14652, 14660, 14661, 14683,
|
14587, 14595, 14602, 14621, 14638, 14645, 14648, 14652, 14660, 14661,
|
||||||
14694, 14716, 14743, 14767, 14783.
|
14683, 14694, 14716, 14743, 14767, 14783.
|
||||||
|
|
||||||
* Support for STT_GNU_IFUNC symbols added for s390 and s390x.
|
* Support for STT_GNU_IFUNC symbols added for s390 and s390x.
|
||||||
Optimized versions of memcpy, memset, and memcmp added for System z10 and
|
Optimized versions of memcpy, memset, and memcmp added for System z10 and
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,12 @@
|
||||||
|
2012-09-25 Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
|
||||||
|
Ryan S. Arnold <rsa@linux.vnet.ibm.com>
|
||||||
|
|
||||||
|
[BZ #14595]
|
||||||
|
* sysdeps/powerpc/powerpc32/476/memset.S: New file copied from
|
||||||
|
405/memset.S to preserve 128-byte cacheline size.
|
||||||
|
* sysdeps/powerpc/powerpc32/405/memset.S (memset): Fix cacheline size
|
||||||
|
to 32-bytes for 405, 440, and 464 processors.
|
||||||
|
|
||||||
2012-10-19 Roland McGrath <roland@hack.frob.com>
|
2012-10-19 Roland McGrath <roland@hack.frob.com>
|
||||||
|
|
||||||
* sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libc.abilist
|
* sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libc.abilist
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
/* Optimized memset implementation for PowerPC476.
|
/* Optimized memset for PowerPC405,440,464 (32-byte cacheline).
|
||||||
Copyright (C) 2010 Free Software Foundation, Inc.
|
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||||
This file is part of the GNU C Library.
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
The GNU C Library is free software; you can redistribute it and/or
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
|
@ -104,7 +104,7 @@ L(use_dcbz):
|
||||||
add r3,r3,r7
|
add r3,r3,r7
|
||||||
|
|
||||||
L(skip_string_loop):
|
L(skip_string_loop):
|
||||||
clrlwi r8,r6,25
|
clrlwi r8,r6,27
|
||||||
srwi. r8,r8,4
|
srwi. r8,r8,4
|
||||||
beq L(dcbz_pre_loop)
|
beq L(dcbz_pre_loop)
|
||||||
mtctr r8
|
mtctr r8
|
||||||
|
|
@ -119,14 +119,14 @@ L(word_loop):
|
||||||
bdnz L(word_loop)
|
bdnz L(word_loop)
|
||||||
|
|
||||||
L(dcbz_pre_loop):
|
L(dcbz_pre_loop):
|
||||||
srwi r6,r5,7
|
srwi r6,r5,5
|
||||||
mtctr r6
|
mtctr r6
|
||||||
addi r7,0,0
|
addi r7,0,0
|
||||||
|
|
||||||
L(dcbz_loop):
|
L(dcbz_loop):
|
||||||
dcbz r3,r7
|
dcbz r3,r7
|
||||||
addi r3,r3,0x80
|
addi r3,r3,0x20
|
||||||
subi r5,r5,0x80
|
subi r5,r5,0x20
|
||||||
bdnz L(dcbz_loop)
|
bdnz L(dcbz_loop)
|
||||||
srwi. r6,r5,4
|
srwi. r6,r5,4
|
||||||
beq L(postword2_count_loop)
|
beq L(postword2_count_loop)
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,154 @@
|
||||||
|
/* Optimized memset for PowerPC476 (128-byte cacheline).
|
||||||
|
Copyright (C) 2010 Free Software Foundation, Inc.
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library. If not, see
|
||||||
|
<http://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
#include <sysdep.h>
|
||||||
|
#include <bp-sym.h>
|
||||||
|
#include <bp-asm.h>
|
||||||
|
|
||||||
|
/* memset
|
||||||
|
|
||||||
|
r3:destination address and return address
|
||||||
|
r4:source integer to copy
|
||||||
|
r5:byte count
|
||||||
|
r11:sources integer to copy in all 32 bits of reg
|
||||||
|
r12:temp return address
|
||||||
|
|
||||||
|
Save return address in r12
|
||||||
|
If destinationn is unaligned and count is greater tha 255 bytes
|
||||||
|
set 0-3 bytes to make destination aligned
|
||||||
|
If count is greater tha 255 bytes and setting zero to memory
|
||||||
|
use dbcz to set memeory when we can
|
||||||
|
otherwsie do the follwoing
|
||||||
|
If 16 or more words to set we use 16 word copy loop.
|
||||||
|
Finaly we set 0-15 extra bytes with string store. */
|
||||||
|
|
||||||
|
EALIGN (BP_SYM (memset), 5, 0)
|
||||||
|
rlwinm r11,r4,0,24,31
|
||||||
|
rlwimi r11,r4,8,16,23
|
||||||
|
rlwimi r11,r11,16,0,15
|
||||||
|
addi r12,r3,0
|
||||||
|
cmpwi r5,0x00FF
|
||||||
|
ble L(preword8_count_loop)
|
||||||
|
cmpwi r4,0x00
|
||||||
|
beq L(use_dcbz)
|
||||||
|
neg r6,r3
|
||||||
|
clrlwi. r6,r6,30
|
||||||
|
beq L(preword8_count_loop)
|
||||||
|
addi r8,0,1
|
||||||
|
mtctr r6
|
||||||
|
subi r3,r3,1
|
||||||
|
|
||||||
|
L(unaligned_bytecopy_loop):
|
||||||
|
stbu r11,0x1(r3)
|
||||||
|
subf. r5,r8,r5
|
||||||
|
beq L(end_memset)
|
||||||
|
bdnz L(unaligned_bytecopy_loop)
|
||||||
|
addi r3,r3,1
|
||||||
|
|
||||||
|
L(preword8_count_loop):
|
||||||
|
srwi. r6,r5,4
|
||||||
|
beq L(preword2_count_loop)
|
||||||
|
mtctr r6
|
||||||
|
addi r3,r3,-4
|
||||||
|
mr r8,r11
|
||||||
|
mr r9,r11
|
||||||
|
mr r10,r11
|
||||||
|
|
||||||
|
L(word8_count_loop_no_dcbt):
|
||||||
|
stwu r8,4(r3)
|
||||||
|
stwu r9,4(r3)
|
||||||
|
subi r5,r5,0x10
|
||||||
|
stwu r10,4(r3)
|
||||||
|
stwu r11,4(r3)
|
||||||
|
bdnz L(word8_count_loop_no_dcbt)
|
||||||
|
addi r3,r3,4
|
||||||
|
|
||||||
|
L(preword2_count_loop):
|
||||||
|
clrlwi. r7,r5,28
|
||||||
|
beq L(end_memset)
|
||||||
|
mr r8,r11
|
||||||
|
mr r9,r11
|
||||||
|
mr r10,r11
|
||||||
|
mtxer r7
|
||||||
|
stswx r8,0,r3
|
||||||
|
|
||||||
|
L(end_memset):
|
||||||
|
addi r3,r12,0
|
||||||
|
blr
|
||||||
|
|
||||||
|
L(use_dcbz):
|
||||||
|
neg r6,r3
|
||||||
|
clrlwi. r7,r6,28
|
||||||
|
beq L(skip_string_loop)
|
||||||
|
mr r8,r11
|
||||||
|
mr r9,r11
|
||||||
|
mr r10,r11
|
||||||
|
subf r5,r7,r5
|
||||||
|
mtxer r7
|
||||||
|
stswx r8,0,r3
|
||||||
|
add r3,r3,r7
|
||||||
|
|
||||||
|
L(skip_string_loop):
|
||||||
|
clrlwi r8,r6,25
|
||||||
|
srwi. r8,r8,4
|
||||||
|
beq L(dcbz_pre_loop)
|
||||||
|
mtctr r8
|
||||||
|
|
||||||
|
L(word_loop):
|
||||||
|
stw r11,0(r3)
|
||||||
|
subi r5,r5,0x10
|
||||||
|
stw r11,4(r3)
|
||||||
|
stw r11,8(r3)
|
||||||
|
stw r11,12(r3)
|
||||||
|
addi r3,r3,0x10
|
||||||
|
bdnz L(word_loop)
|
||||||
|
|
||||||
|
L(dcbz_pre_loop):
|
||||||
|
srwi r6,r5,7
|
||||||
|
mtctr r6
|
||||||
|
addi r7,0,0
|
||||||
|
|
||||||
|
L(dcbz_loop):
|
||||||
|
dcbz r3,r7
|
||||||
|
addi r3,r3,0x80
|
||||||
|
subi r5,r5,0x80
|
||||||
|
bdnz L(dcbz_loop)
|
||||||
|
srwi. r6,r5,4
|
||||||
|
beq L(postword2_count_loop)
|
||||||
|
mtctr r6
|
||||||
|
|
||||||
|
L(postword8_count_loop):
|
||||||
|
stw r11,0(r3)
|
||||||
|
subi r5,r5,0x10
|
||||||
|
stw r11,4(r3)
|
||||||
|
stw r11,8(r3)
|
||||||
|
stw r11,12(r3)
|
||||||
|
addi r3,r3,0x10
|
||||||
|
bdnz L(postword8_count_loop)
|
||||||
|
|
||||||
|
L(postword2_count_loop):
|
||||||
|
clrlwi. r7,r5,28
|
||||||
|
beq L(end_memset)
|
||||||
|
mr r8,r11
|
||||||
|
mr r9,r11
|
||||||
|
mr r10,r11
|
||||||
|
mtxer r7
|
||||||
|
stswx r8,0,r3
|
||||||
|
b L(end_memset)
|
||||||
|
END (BP_SYM (memset))
|
||||||
|
libc_hidden_builtin_def (memset)
|
||||||
Loading…
Reference in New Issue