mirror of git://sourceware.org/git/glibc.git
powerpc: Remove assembler workarounds
Now that we require at least binutils 2.39 the support for POWER9 and POWER10 instructions can be assumed.
This commit is contained in:
parent
c274c3c07f
commit
eae5bb0f60
|
@ -18,26 +18,10 @@
|
|||
|
||||
#include <sysdep.h>
|
||||
|
||||
/* TODO: Replace macros by the actual instructions when minimum binutils becomes
|
||||
>= 2.35. This is used to keep compatibility with older versions. */
|
||||
#define VEXTRACTBM(rt,vrb) \
|
||||
.long(((4)<<(32-6)) \
|
||||
| ((rt)<<(32-11)) \
|
||||
| ((8)<<(32-16)) \
|
||||
| ((vrb)<<(32-21)) \
|
||||
| 1602)
|
||||
|
||||
#define LXVP(xtp,dq,ra) \
|
||||
.long(((6)<<(32-6)) \
|
||||
| ((((xtp)-32)>>1)<<(32-10)) \
|
||||
| ((1)<<(32-11)) \
|
||||
| ((ra)<<(32-16)) \
|
||||
| dq)
|
||||
|
||||
/* Compare 32 bytes. */
|
||||
#define COMPARE_32(vr1,vr2,offset,tail_1,tail_2)\
|
||||
LXVP(32+vr1,offset,r3); \
|
||||
LXVP(32+vr2,offset,r4); \
|
||||
lxvp 32+vr1,offset(r3); \
|
||||
lxvp 32+vr2,offset(r4); \
|
||||
vcmpneb. v5,vr1+1,vr2+1; \
|
||||
bne cr6,L(tail_2); \
|
||||
vcmpneb. v4,vr1,vr2; \
|
||||
|
|
|
@ -63,22 +63,6 @@
|
|||
blr
|
||||
#endif /* USE_AS_RAWMEMCHR */
|
||||
|
||||
/* TODO: Replace macros by the actual instructions when minimum binutils becomes
|
||||
>= 2.35. This is used to keep compatibility with older versions. */
|
||||
#define VEXTRACTBM(rt,vrb) \
|
||||
.long(((4)<<(32-6)) \
|
||||
| ((rt)<<(32-11)) \
|
||||
| ((8)<<(32-16)) \
|
||||
| ((vrb)<<(32-21)) \
|
||||
| 1602)
|
||||
|
||||
#define LXVP(xtp,dq,ra) \
|
||||
.long(((6)<<(32-6)) \
|
||||
| ((((xtp)-32)>>1)<<(32-10)) \
|
||||
| ((1)<<(32-11)) \
|
||||
| ((ra)<<(32-16)) \
|
||||
| dq)
|
||||
|
||||
#define CHECK16(vreg,offset,addr,label) \
|
||||
lxv vreg+32,offset(addr); \
|
||||
vcmpequb. vreg,vreg,v18; \
|
||||
|
@ -88,8 +72,8 @@
|
|||
of bytes already checked. */
|
||||
#define CHECK64(offset,addr,label) \
|
||||
li r6,offset; \
|
||||
LXVP(v4+32,offset,addr); \
|
||||
LXVP(v6+32,offset+32,addr); \
|
||||
lxvp v4+32,offset(addr); \
|
||||
lxvp v6+32,offset+32(addr); \
|
||||
RAWMEMCHR_SUBTRACT_VECTORS; \
|
||||
vminub v14,v4,v5; \
|
||||
vminub v15,v6,v7; \
|
||||
|
@ -234,10 +218,10 @@ L(tail_64b):
|
|||
add r5,r5,r6
|
||||
|
||||
/* Extract first bit of each byte. */
|
||||
VEXTRACTBM(r7,v1)
|
||||
VEXTRACTBM(r8,v2)
|
||||
VEXTRACTBM(r9,v3)
|
||||
VEXTRACTBM(r10,v4)
|
||||
vextractbm r7,v1
|
||||
vextractbm r8,v2
|
||||
vextractbm r9,v3
|
||||
vextractbm r10,v4
|
||||
|
||||
/* Shift each value into their corresponding position. */
|
||||
sldi r8,r8,16
|
||||
|
|
|
@ -28,21 +28,6 @@
|
|||
The implementation uses unaligned doubleword access for first 32 bytes
|
||||
as in POWER8 patch and uses vectorised loops after that. */
|
||||
|
||||
/* TODO: Change this to actual instructions when minimum binutils is upgraded
|
||||
to 2.27. Macros are defined below for these newer instructions in order
|
||||
to maintain compatibility. */
|
||||
#define VCTZLSBB(r,v) .long (0x10010602 | ((r)<<(32-11)) | ((v)<<(32-21)))
|
||||
|
||||
#define VEXTUBRX(t,a,b) .long (0x1000070d \
|
||||
| ((t)<<(32-11)) \
|
||||
| ((a)<<(32-16)) \
|
||||
| ((b)<<(32-21)) )
|
||||
|
||||
#define VCMPNEZB(t,a,b) .long (0x10000507 \
|
||||
| ((t)<<(32-11)) \
|
||||
| ((a)<<(32-16)) \
|
||||
| ((b)<<(32-21)) )
|
||||
|
||||
/* Get 16 bytes for unaligned case.
|
||||
reg1: Vector to hold next 16 bytes.
|
||||
reg2: Address to read from.
|
||||
|
@ -61,10 +46,7 @@
|
|||
2: \
|
||||
vperm reg1, v9, reg1, reg3;
|
||||
|
||||
/* TODO: change this to .machine power9 when the minimum required binutils
|
||||
allows it. */
|
||||
|
||||
.machine power7
|
||||
.machine power9
|
||||
ENTRY_TOCLESS (STRCMP, 4)
|
||||
li r0, 0
|
||||
|
||||
|
@ -116,7 +98,7 @@ L(align):
|
|||
/* Both s1 and s2 are unaligned. */
|
||||
GET16BYTES(v4, r7, v10)
|
||||
GET16BYTES(v5, r4, v6)
|
||||
VCMPNEZB(v7, v5, v4)
|
||||
vcmpnezb. v7, v5, v4
|
||||
beq cr6, L(match)
|
||||
b L(different)
|
||||
|
||||
|
@ -136,28 +118,28 @@ L(match):
|
|||
L(s1_align):
|
||||
lvx v4, r7, r0
|
||||
GET16BYTES(v5, r4, v6)
|
||||
VCMPNEZB(v7, v5, v4)
|
||||
vcmpnezb. v7, v5, v4
|
||||
addi r7, r7, 16
|
||||
addi r4, r4, 16
|
||||
bne cr6, L(different)
|
||||
|
||||
lvx v4, r7, r0
|
||||
GET16BYTES(v5, r4, v6)
|
||||
VCMPNEZB(v7, v5, v4)
|
||||
vcmpnezb. v7, v5, v4
|
||||
addi r7, r7, 16
|
||||
addi r4, r4, 16
|
||||
bne cr6, L(different)
|
||||
|
||||
lvx v4, r7, r0
|
||||
GET16BYTES(v5, r4, v6)
|
||||
VCMPNEZB(v7, v5, v4)
|
||||
vcmpnezb. v7, v5, v4
|
||||
addi r7, r7, 16
|
||||
addi r4, r4, 16
|
||||
bne cr6, L(different)
|
||||
|
||||
lvx v4, r7, r0
|
||||
GET16BYTES(v5, r4, v6)
|
||||
VCMPNEZB(v7, v5, v4)
|
||||
vcmpnezb. v7, v5, v4
|
||||
addi r7, r7, 16
|
||||
addi r4, r4, 16
|
||||
beq cr6, L(s1_align)
|
||||
|
@ -167,37 +149,37 @@ L(s1_align):
|
|||
L(aligned):
|
||||
lvx v4, 0, r7
|
||||
lvx v5, 0, r4
|
||||
VCMPNEZB(v7, v5, v4)
|
||||
vcmpnezb. v7, v5, v4
|
||||
addi r7, r7, 16
|
||||
addi r4, r4, 16
|
||||
bne cr6, L(different)
|
||||
|
||||
lvx v4, 0, r7
|
||||
lvx v5, 0, r4
|
||||
VCMPNEZB(v7, v5, v4)
|
||||
vcmpnezb. v7, v5, v4
|
||||
addi r7, r7, 16
|
||||
addi r4, r4, 16
|
||||
bne cr6, L(different)
|
||||
|
||||
lvx v4, 0, r7
|
||||
lvx v5, 0, r4
|
||||
VCMPNEZB(v7, v5, v4)
|
||||
vcmpnezb. v7, v5, v4
|
||||
addi r7, r7, 16
|
||||
addi r4, r4, 16
|
||||
bne cr6, L(different)
|
||||
|
||||
lvx v4, 0, r7
|
||||
lvx v5, 0, r4
|
||||
VCMPNEZB(v7, v5, v4)
|
||||
vcmpnezb. v7, v5, v4
|
||||
addi r7, r7, 16
|
||||
addi r4, r4, 16
|
||||
beq cr6, L(aligned)
|
||||
|
||||
/* Calculate and return the difference. */
|
||||
L(different):
|
||||
VCTZLSBB(r6, v7)
|
||||
VEXTUBRX(r5, r6, v4)
|
||||
VEXTUBRX(r4, r6, v5)
|
||||
vctzlsbb r6, v7
|
||||
vextubrx r5, r6, v4
|
||||
vextubrx r4, r6, v5
|
||||
subf r3, r4, r5
|
||||
extsw r3, r3
|
||||
blr
|
||||
|
|
|
@ -29,21 +29,6 @@
|
|||
# define STRNCMP strncmp
|
||||
#endif
|
||||
|
||||
/* TODO: Change this to actual instructions when minimum binutils is upgraded
|
||||
to 2.27. Macros are defined below for these newer instructions in order
|
||||
to maintain compatibility. */
|
||||
#define VCTZLSBB(r,v) .long (0x10010602 | ((r)<<(32-11)) | ((v)<<(32-21)))
|
||||
|
||||
#define VEXTUBRX(t,a,b) .long (0x1000070d \
|
||||
| ((t)<<(32-11)) \
|
||||
| ((a)<<(32-16)) \
|
||||
| ((b)<<(32-21)) )
|
||||
|
||||
#define VCMPNEZB(t,a,b) .long (0x10000507 \
|
||||
| ((t)<<(32-11)) \
|
||||
| ((a)<<(32-16)) \
|
||||
| ((b)<<(32-21)) )
|
||||
|
||||
/* Get 16 bytes for unaligned case.
|
||||
reg1: Vector to hold next 16 bytes.
|
||||
reg2: Address to read from.
|
||||
|
@ -64,9 +49,7 @@
|
|||
2: \
|
||||
vperm reg1, v9, reg1, reg3;
|
||||
|
||||
/* TODO: change this to .machine power9 when minimum binutils
|
||||
is upgraded to 2.27. */
|
||||
.machine power7
|
||||
.machine power9
|
||||
ENTRY_TOCLESS (STRNCMP, 4)
|
||||
/* Check if size is 0. */
|
||||
cmpdi cr0, r5, 0
|
||||
|
@ -163,7 +146,7 @@ L(align):
|
|||
clrldi r6, r3, 60
|
||||
subfic r11, r6, 16
|
||||
GET16BYTES(v4, r3, v10)
|
||||
VCMPNEZB(v7, v5, v4)
|
||||
vcmpnezb. v7, v5, v4
|
||||
beq cr6, L(match)
|
||||
b L(different)
|
||||
|
||||
|
@ -186,7 +169,7 @@ L(match):
|
|||
L(s1_align):
|
||||
lvx v4, 0, r3
|
||||
GET16BYTES(v5, r4, v6)
|
||||
VCMPNEZB(v7, v5, v4)
|
||||
vcmpnezb. v7, v5, v4
|
||||
bne cr6, L(different)
|
||||
cmpldi cr7, r5, 16
|
||||
ble cr7, L(ret0)
|
||||
|
@ -196,7 +179,7 @@ L(s1_align):
|
|||
|
||||
lvx v4, 0, r3
|
||||
GET16BYTES(v5, r4, v6)
|
||||
VCMPNEZB(v7, v5, v4)
|
||||
vcmpnezb. v7, v5, v4
|
||||
bne cr6, L(different)
|
||||
cmpldi cr7, r5, 16
|
||||
ble cr7, L(ret0)
|
||||
|
@ -206,7 +189,7 @@ L(s1_align):
|
|||
|
||||
lvx v4, 0, r3
|
||||
GET16BYTES(v5, r4, v6)
|
||||
VCMPNEZB(v7, v5, v4)
|
||||
vcmpnezb. v7, v5, v4
|
||||
bne cr6, L(different)
|
||||
cmpldi cr7, r5, 16
|
||||
ble cr7, L(ret0)
|
||||
|
@ -216,7 +199,7 @@ L(s1_align):
|
|||
|
||||
lvx v4, 0, r3
|
||||
GET16BYTES(v5, r4, v6)
|
||||
VCMPNEZB(v7, v5, v4)
|
||||
vcmpnezb. v7, v5, v4
|
||||
bne cr6, L(different)
|
||||
cmpldi cr7, r5, 16
|
||||
ble cr7, L(ret0)
|
||||
|
@ -228,7 +211,7 @@ L(s1_align):
|
|||
L(aligned):
|
||||
lvx v4, 0, r3
|
||||
lvx v5, 0, r4
|
||||
VCMPNEZB(v7, v5, v4)
|
||||
vcmpnezb. v7, v5, v4
|
||||
bne cr6, L(different)
|
||||
cmpldi cr7, r5, 16
|
||||
ble cr7, L(ret0)
|
||||
|
@ -238,7 +221,7 @@ L(aligned):
|
|||
|
||||
lvx v4, 0, r3
|
||||
lvx v5, 0, r4
|
||||
VCMPNEZB(v7, v5, v4)
|
||||
vcmpnezb. v7, v5, v4
|
||||
bne cr6, L(different)
|
||||
cmpldi cr7, r5, 16
|
||||
ble cr7, L(ret0)
|
||||
|
@ -248,7 +231,7 @@ L(aligned):
|
|||
|
||||
lvx v4, 0, r3
|
||||
lvx v5, 0, r4
|
||||
VCMPNEZB(v7, v5, v4)
|
||||
vcmpnezb. v7, v5, v4
|
||||
bne cr6, L(different)
|
||||
cmpldi cr7, r5, 16
|
||||
ble cr7, L(ret0)
|
||||
|
@ -258,7 +241,7 @@ L(aligned):
|
|||
|
||||
lvx v4, 0, r3
|
||||
lvx v5, 0, r4
|
||||
VCMPNEZB(v7, v5, v4)
|
||||
vcmpnezb. v7, v5, v4
|
||||
bne cr6, L(different)
|
||||
cmpldi cr7, r5, 16
|
||||
ble cr7, L(ret0)
|
||||
|
@ -268,11 +251,11 @@ L(aligned):
|
|||
b L(aligned)
|
||||
/* Calculate and return the difference. */
|
||||
L(different):
|
||||
VCTZLSBB(r6, v7)
|
||||
vctzlsbb r6, v7
|
||||
cmplw cr7, r5, r6
|
||||
ble cr7, L(ret0)
|
||||
VEXTUBRX(r5, r6, v4)
|
||||
VEXTUBRX(r4, r6, v5)
|
||||
vextubrx r5, r6, v4
|
||||
vextubrx r4, r6, v5
|
||||
subf r3, r4, r5
|
||||
extsw r3, r3
|
||||
blr
|
||||
|
|
Loading…
Reference in New Issue