mirror of git://sourceware.org/git/glibc.git
math: Optimize f{max,min}imum{f,l,f128}
Add an isunordered check for fast-path and simplified sign check and use the fmax/fmin when possible. With gcc-15 on aarch64: * master: 0000000000000000 <__fmaximum>: 0: d503245f bti c 4: 1e612000 fcmp d0, d1 8: 5400008d b.le 18 <__fmaximum+0x18> c: 1e60401f fmov d31, d0 10: 1e6043e0 fmov d0, d31 14: d65f03c0 ret 18: 54000085 b.pl 28 <__fmaximum+0x28> // b.nfrst 1c: 1e60403f fmov d31, d1 20: 1e6043e0 fmov d0, d31 24: d65f03c0 ret 28: 54000161 b.ne 54 <__fmaximum+0x54> // b.any 2c: 4f000402 movi v2.4s, #0x0 30: 1e6e101e fmov d30, #1.000000000000000000e+00 34: 6ee0f842 fneg v2.2d, v2.2d 38: 4ea21c5d mov v29.16b, v2.16b 3c: 2e7e1c22 bsl v2.8b, v1.8b, v30.8b 40: 2e7e1c1d bsl v29.8b, v0.8b, v30.8b 44: 1e6223b0 fcmpe d29, d2 48: 1e61ac1f fcsel d31, d0, d1, ge // ge = tcont 4c: 1e6043e0 fmov d0, d31 50: d65f03c0 ret 54: 1e61281f fadd d31, d0, d1 58: 1e6043e0 fmov d0, d31 5c: d65f03c0 ret * patch: 0000000000000000 <__fmaximum>: 0: d503245f bti c 4: 1e612000 fcmp d0, d1 8: 54000086 b.vs 18 <__fmaximum+0x18> c: 1e61681f fmaxnm d31, d0, d1 10: 1e6043e0 fmov d0, d31 14: d65f03c0 ret 18: 1e61281f fadd d31, d0, d1 1c: 1e6043e0 fmov d0, d31 20: d65f03c0 ret And with gcc-15 on x86_64: * master: 0000000000000000 <__fmaximum>: 0: 66 0f 2e c1 ucomisd %xmm1,%xmm0 4: 77 56 ja 5c <__fmaximum+0x5c> 6: 66 0f 2e c8 ucomisd %xmm0,%xmm1 a: 77 4c ja 58 <__fmaximum+0x58> c: 66 0f 2e c1 ucomisd %xmm1,%xmm0 10: 7a 4e jp 60 <__fmaximum+0x60> 12: 75 4c jne 60 <__fmaximum+0x60> 14: f3 0f 7e 1d 00 00 00 movq 0x0(%rip),%xmm3 # 1c <__fmaximum+0x1c> 1b: 00 1c: f2 0f 10 15 00 00 00 movsd 0x0(%rip),%xmm2 # 24 <__fmaximum+0x24> 23: 00 24: 66 0f 28 e3 movapd %xmm3,%xmm4 28: 66 0f 54 15 00 00 00 andpd 0x0(%rip),%xmm2 # 30 <__fmaximum+0x30> 2f: 00 30: 66 0f 54 e0 andpd %xmm0,%xmm4 34: 66 0f 54 d9 andpd %xmm1,%xmm3 38: 66 0f 56 e2 orpd %xmm2,%xmm4 3c: 66 0f 56 d3 orpd %xmm3,%xmm2 40: f2 0f c2 d4 02 cmplesd %xmm4,%xmm2 45: 66 0f 54 c2 andpd %xmm2,%xmm0 49: 66 0f 55 d1 andnpd %xmm1,%xmm2 4d: 66 0f 56 c2 orpd %xmm2,%xmm0 51: c3 ret 52: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1) 58: 66 0f 28 c1 movapd %xmm1,%xmm0 5c: c3 ret 5d: 0f 1f 00 nopl (%rax) 60: f2 0f 58 c1 addsd %xmm1,%xmm0 64: c3 ret * patched: 0000000000000000 <__fmaximum>: 0: 66 0f 2e c1 ucomisd %xmm1,%xmm0 4: 7a 2a jp 30 <__fmaximum+0x30> 6: 77 18 ja 20 <__fmaximum+0x20> 8: 66 0f 2e c8 ucomisd %xmm0,%xmm1 c: 77 08 ja 16 <__fmaximum+0x16> e: 66 0f 50 c0 movmskpd %xmm0,%eax 12: a8 01 test $0x1,%al 14: 74 0a je 20 <__fmaximum+0x20> 16: 66 0f 28 c1 movapd %xmm1,%xmm0 1a: c3 ret 1b: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) 20: 66 0f 28 c8 movapd %xmm0,%xmm1 24: 66 0f 28 c1 movapd %xmm1,%xmm0 28: c3 ret 29: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) 30: f2 0f 58 c8 addsd %xmm0,%xmm1 34: 66 0f 28 c1 movapd %xmm1,%xmm0 38: c3 ret Checked on x86_64-linux-gnu, aarch64-linux-gnu, i686-linux-gnu, arm-linux-gnueabihf, powerpc64le-linux-gnu, riscv64-linux-gnu-rv64imafdc-lp64d, and loongarch64-linux-gnuf64. Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
This commit is contained in:
parent
822bb11278
commit
a95db490b5
|
|
@ -21,12 +21,18 @@
|
|||
FLOAT
|
||||
M_DECL_FUNC (__fmaximum) (FLOAT x, FLOAT y)
|
||||
{
|
||||
if (isgreater (x, y))
|
||||
return x;
|
||||
else if (isless (x, y))
|
||||
return y;
|
||||
else if (x == y)
|
||||
return (M_COPYSIGN (1, x) >= M_COPYSIGN (1, y) ? x : y);
|
||||
if (__glibc_likely (!isunordered (x, y)))
|
||||
{
|
||||
#if M_USE_BUILTIN (FMAX)
|
||||
return M_SUF (__builtin_fmax) (x, y);
|
||||
#else
|
||||
if (isgreater (x, y))
|
||||
return x;
|
||||
else if (isless (x, y))
|
||||
return y;
|
||||
return signbit (x) ? y : x;
|
||||
#endif
|
||||
}
|
||||
else
|
||||
return x + y;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,12 +21,18 @@
|
|||
FLOAT
|
||||
M_DECL_FUNC (__fminimum) (FLOAT x, FLOAT y)
|
||||
{
|
||||
if (isless (x, y))
|
||||
return x;
|
||||
else if (isgreater (x, y))
|
||||
return y;
|
||||
else if (x == y)
|
||||
return (M_COPYSIGN (1, x) <= M_COPYSIGN (1, y) ? x : y);
|
||||
if (__glibc_likely (!isunordered (x, y)))
|
||||
{
|
||||
#if M_USE_BUILTIN (FMIN)
|
||||
return M_SUF (__builtin_fmin) (x, y);
|
||||
#else
|
||||
if (isless (x, y))
|
||||
return x;
|
||||
else if (isgreater (x, y))
|
||||
return y;
|
||||
return signbit (x) ? x : y;
|
||||
#endif
|
||||
}
|
||||
else
|
||||
return x + y;
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue