powerpc: Remove assembler workarounds

Now that we require at least binutils 2.39 the support for POWER9 and
POWER10 instructions can be assumed.
This commit is contained in:
Andreas Schwab 2025-06-05 10:24:46 +02:00
parent c274c3c07f
commit eae5bb0f60
4 changed files with 34 additions and 101 deletions

View File

@ -18,26 +18,10 @@
#include <sysdep.h>
/* TODO: Replace macros by the actual instructions when minimum binutils becomes
>= 2.35. This is used to keep compatibility with older versions. */
#define VEXTRACTBM(rt,vrb) \
.long(((4)<<(32-6)) \
| ((rt)<<(32-11)) \
| ((8)<<(32-16)) \
| ((vrb)<<(32-21)) \
| 1602)
#define LXVP(xtp,dq,ra) \
.long(((6)<<(32-6)) \
| ((((xtp)-32)>>1)<<(32-10)) \
| ((1)<<(32-11)) \
| ((ra)<<(32-16)) \
| dq)
/* Compare 32 bytes. */
#define COMPARE_32(vr1,vr2,offset,tail_1,tail_2)\
LXVP(32+vr1,offset,r3); \
LXVP(32+vr2,offset,r4); \
lxvp 32+vr1,offset(r3); \
lxvp 32+vr2,offset(r4); \
vcmpneb. v5,vr1+1,vr2+1; \
bne cr6,L(tail_2); \
vcmpneb. v4,vr1,vr2; \

View File

@ -63,22 +63,6 @@
blr
#endif /* USE_AS_RAWMEMCHR */
/* TODO: Replace macros by the actual instructions when minimum binutils becomes
>= 2.35. This is used to keep compatibility with older versions. */
#define VEXTRACTBM(rt,vrb) \
.long(((4)<<(32-6)) \
| ((rt)<<(32-11)) \
| ((8)<<(32-16)) \
| ((vrb)<<(32-21)) \
| 1602)
#define LXVP(xtp,dq,ra) \
.long(((6)<<(32-6)) \
| ((((xtp)-32)>>1)<<(32-10)) \
| ((1)<<(32-11)) \
| ((ra)<<(32-16)) \
| dq)
#define CHECK16(vreg,offset,addr,label) \
lxv vreg+32,offset(addr); \
vcmpequb. vreg,vreg,v18; \
@ -88,8 +72,8 @@
of bytes already checked. */
#define CHECK64(offset,addr,label) \
li r6,offset; \
LXVP(v4+32,offset,addr); \
LXVP(v6+32,offset+32,addr); \
lxvp v4+32,offset(addr); \
lxvp v6+32,offset+32(addr); \
RAWMEMCHR_SUBTRACT_VECTORS; \
vminub v14,v4,v5; \
vminub v15,v6,v7; \
@ -234,10 +218,10 @@ L(tail_64b):
add r5,r5,r6
/* Extract first bit of each byte. */
VEXTRACTBM(r7,v1)
VEXTRACTBM(r8,v2)
VEXTRACTBM(r9,v3)
VEXTRACTBM(r10,v4)
vextractbm r7,v1
vextractbm r8,v2
vextractbm r9,v3
vextractbm r10,v4
/* Shift each value into their corresponding position. */
sldi r8,r8,16

View File

@ -28,21 +28,6 @@
The implementation uses unaligned doubleword access for first 32 bytes
as in POWER8 patch and uses vectorised loops after that. */
/* TODO: Change this to actual instructions when minimum binutils is upgraded
to 2.27. Macros are defined below for these newer instructions in order
to maintain compatibility. */
#define VCTZLSBB(r,v) .long (0x10010602 | ((r)<<(32-11)) | ((v)<<(32-21)))
#define VEXTUBRX(t,a,b) .long (0x1000070d \
| ((t)<<(32-11)) \
| ((a)<<(32-16)) \
| ((b)<<(32-21)) )
#define VCMPNEZB(t,a,b) .long (0x10000507 \
| ((t)<<(32-11)) \
| ((a)<<(32-16)) \
| ((b)<<(32-21)) )
/* Get 16 bytes for unaligned case.
reg1: Vector to hold next 16 bytes.
reg2: Address to read from.
@ -61,10 +46,7 @@
2: \
vperm reg1, v9, reg1, reg3;
/* TODO: change this to .machine power9 when the minimum required binutils
allows it. */
.machine power7
.machine power9
ENTRY_TOCLESS (STRCMP, 4)
li r0, 0
@ -116,7 +98,7 @@ L(align):
/* Both s1 and s2 are unaligned. */
GET16BYTES(v4, r7, v10)
GET16BYTES(v5, r4, v6)
VCMPNEZB(v7, v5, v4)
vcmpnezb. v7, v5, v4
beq cr6, L(match)
b L(different)
@ -136,28 +118,28 @@ L(match):
L(s1_align):
lvx v4, r7, r0
GET16BYTES(v5, r4, v6)
VCMPNEZB(v7, v5, v4)
vcmpnezb. v7, v5, v4
addi r7, r7, 16
addi r4, r4, 16
bne cr6, L(different)
lvx v4, r7, r0
GET16BYTES(v5, r4, v6)
VCMPNEZB(v7, v5, v4)
vcmpnezb. v7, v5, v4
addi r7, r7, 16
addi r4, r4, 16
bne cr6, L(different)
lvx v4, r7, r0
GET16BYTES(v5, r4, v6)
VCMPNEZB(v7, v5, v4)
vcmpnezb. v7, v5, v4
addi r7, r7, 16
addi r4, r4, 16
bne cr6, L(different)
lvx v4, r7, r0
GET16BYTES(v5, r4, v6)
VCMPNEZB(v7, v5, v4)
vcmpnezb. v7, v5, v4
addi r7, r7, 16
addi r4, r4, 16
beq cr6, L(s1_align)
@ -167,37 +149,37 @@ L(s1_align):
L(aligned):
lvx v4, 0, r7
lvx v5, 0, r4
VCMPNEZB(v7, v5, v4)
vcmpnezb. v7, v5, v4
addi r7, r7, 16
addi r4, r4, 16
bne cr6, L(different)
lvx v4, 0, r7
lvx v5, 0, r4
VCMPNEZB(v7, v5, v4)
vcmpnezb. v7, v5, v4
addi r7, r7, 16
addi r4, r4, 16
bne cr6, L(different)
lvx v4, 0, r7
lvx v5, 0, r4
VCMPNEZB(v7, v5, v4)
vcmpnezb. v7, v5, v4
addi r7, r7, 16
addi r4, r4, 16
bne cr6, L(different)
lvx v4, 0, r7
lvx v5, 0, r4
VCMPNEZB(v7, v5, v4)
vcmpnezb. v7, v5, v4
addi r7, r7, 16
addi r4, r4, 16
beq cr6, L(aligned)
/* Calculate and return the difference. */
L(different):
VCTZLSBB(r6, v7)
VEXTUBRX(r5, r6, v4)
VEXTUBRX(r4, r6, v5)
vctzlsbb r6, v7
vextubrx r5, r6, v4
vextubrx r4, r6, v5
subf r3, r4, r5
extsw r3, r3
blr

View File

@ -29,21 +29,6 @@
# define STRNCMP strncmp
#endif
/* TODO: Change this to actual instructions when minimum binutils is upgraded
to 2.27. Macros are defined below for these newer instructions in order
to maintain compatibility. */
#define VCTZLSBB(r,v) .long (0x10010602 | ((r)<<(32-11)) | ((v)<<(32-21)))
#define VEXTUBRX(t,a,b) .long (0x1000070d \
| ((t)<<(32-11)) \
| ((a)<<(32-16)) \
| ((b)<<(32-21)) )
#define VCMPNEZB(t,a,b) .long (0x10000507 \
| ((t)<<(32-11)) \
| ((a)<<(32-16)) \
| ((b)<<(32-21)) )
/* Get 16 bytes for unaligned case.
reg1: Vector to hold next 16 bytes.
reg2: Address to read from.
@ -64,9 +49,7 @@
2: \
vperm reg1, v9, reg1, reg3;
/* TODO: change this to .machine power9 when minimum binutils
is upgraded to 2.27. */
.machine power7
.machine power9
ENTRY_TOCLESS (STRNCMP, 4)
/* Check if size is 0. */
cmpdi cr0, r5, 0
@ -163,7 +146,7 @@ L(align):
clrldi r6, r3, 60
subfic r11, r6, 16
GET16BYTES(v4, r3, v10)
VCMPNEZB(v7, v5, v4)
vcmpnezb. v7, v5, v4
beq cr6, L(match)
b L(different)
@ -186,7 +169,7 @@ L(match):
L(s1_align):
lvx v4, 0, r3
GET16BYTES(v5, r4, v6)
VCMPNEZB(v7, v5, v4)
vcmpnezb. v7, v5, v4
bne cr6, L(different)
cmpldi cr7, r5, 16
ble cr7, L(ret0)
@ -196,7 +179,7 @@ L(s1_align):
lvx v4, 0, r3
GET16BYTES(v5, r4, v6)
VCMPNEZB(v7, v5, v4)
vcmpnezb. v7, v5, v4
bne cr6, L(different)
cmpldi cr7, r5, 16
ble cr7, L(ret0)
@ -206,7 +189,7 @@ L(s1_align):
lvx v4, 0, r3
GET16BYTES(v5, r4, v6)
VCMPNEZB(v7, v5, v4)
vcmpnezb. v7, v5, v4
bne cr6, L(different)
cmpldi cr7, r5, 16
ble cr7, L(ret0)
@ -216,7 +199,7 @@ L(s1_align):
lvx v4, 0, r3
GET16BYTES(v5, r4, v6)
VCMPNEZB(v7, v5, v4)
vcmpnezb. v7, v5, v4
bne cr6, L(different)
cmpldi cr7, r5, 16
ble cr7, L(ret0)
@ -228,7 +211,7 @@ L(s1_align):
L(aligned):
lvx v4, 0, r3
lvx v5, 0, r4
VCMPNEZB(v7, v5, v4)
vcmpnezb. v7, v5, v4
bne cr6, L(different)
cmpldi cr7, r5, 16
ble cr7, L(ret0)
@ -238,7 +221,7 @@ L(aligned):
lvx v4, 0, r3
lvx v5, 0, r4
VCMPNEZB(v7, v5, v4)
vcmpnezb. v7, v5, v4
bne cr6, L(different)
cmpldi cr7, r5, 16
ble cr7, L(ret0)
@ -248,7 +231,7 @@ L(aligned):
lvx v4, 0, r3
lvx v5, 0, r4
VCMPNEZB(v7, v5, v4)
vcmpnezb. v7, v5, v4
bne cr6, L(different)
cmpldi cr7, r5, 16
ble cr7, L(ret0)
@ -258,7 +241,7 @@ L(aligned):
lvx v4, 0, r3
lvx v5, 0, r4
VCMPNEZB(v7, v5, v4)
vcmpnezb. v7, v5, v4
bne cr6, L(different)
cmpldi cr7, r5, 16
ble cr7, L(ret0)
@ -268,11 +251,11 @@ L(aligned):
b L(aligned)
/* Calculate and return the difference. */
L(different):
VCTZLSBB(r6, v7)
vctzlsbb r6, v7
cmplw cr7, r5, r6
ble cr7, L(ret0)
VEXTUBRX(r5, r6, v4)
VEXTUBRX(r4, r6, v5)
vextubrx r5, r6, v4
vextubrx r4, r6, v5
subf r3, r4, r5
extsw r3, r3
blr