mirror of git://sourceware.org/git/glibc.git
AArch64: Remove -0.0 check from vector sin
Remove the unnecessary extra checks for sin (-0.0) from vector sin/sinf, improving performance. Passes regress. Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
This commit is contained in:
parent
fd134feba3
commit
6b695e5c62
|
|
@ -56,7 +56,7 @@ float64x2_t VPCS_ATTR V_NAME_D1 (sin) (float64x2_t x)
|
||||||
{
|
{
|
||||||
const struct data *d = ptr_barrier (&data);
|
const struct data *d = ptr_barrier (&data);
|
||||||
float64x2_t n, r, r2, r3, r4, y, t1, t2, t3;
|
float64x2_t n, r, r2, r3, r4, y, t1, t2, t3;
|
||||||
uint64x2_t odd, cmp, eqz;
|
uint64x2_t odd, cmp;
|
||||||
|
|
||||||
#if WANT_SIMD_EXCEPT
|
#if WANT_SIMD_EXCEPT
|
||||||
/* Detect |x| <= TinyBound or |x| >= RangeVal. If fenv exceptions are to be
|
/* Detect |x| <= TinyBound or |x| >= RangeVal. If fenv exceptions are to be
|
||||||
|
|
@ -70,7 +70,6 @@ float64x2_t VPCS_ATTR V_NAME_D1 (sin) (float64x2_t x)
|
||||||
cmp = vcageq_f64 (d->range_val, x);
|
cmp = vcageq_f64 (d->range_val, x);
|
||||||
cmp = vceqzq_u64 (cmp); /* cmp = ~cmp. */
|
cmp = vceqzq_u64 (cmp); /* cmp = ~cmp. */
|
||||||
#endif
|
#endif
|
||||||
eqz = vceqzq_f64 (x);
|
|
||||||
|
|
||||||
/* n = rint(|x|/pi). */
|
/* n = rint(|x|/pi). */
|
||||||
n = vfmaq_f64 (d->shift, d->inv_pi, r);
|
n = vfmaq_f64 (d->shift, d->inv_pi, r);
|
||||||
|
|
@ -96,10 +95,6 @@ float64x2_t VPCS_ATTR V_NAME_D1 (sin) (float64x2_t x)
|
||||||
y = vfmaq_f64 (t3, y, r4);
|
y = vfmaq_f64 (t3, y, r4);
|
||||||
y = vfmaq_f64 (r, y, r3);
|
y = vfmaq_f64 (r, y, r3);
|
||||||
|
|
||||||
/* Sign of 0 is discarded by polynomial, so copy it back here. */
|
|
||||||
if (__glibc_unlikely (v_any_u64 (eqz)))
|
|
||||||
y = vbslq_f64 (eqz, x, y);
|
|
||||||
|
|
||||||
if (__glibc_unlikely (v_any_u64 (cmp)))
|
if (__glibc_unlikely (v_any_u64 (cmp)))
|
||||||
return special_case (x, y, odd, cmp);
|
return special_case (x, y, odd, cmp);
|
||||||
return vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y), odd));
|
return vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y), odd));
|
||||||
|
|
|
||||||
|
|
@ -56,7 +56,7 @@ float32x4_t VPCS_ATTR V_NAME_F1 (sin) (float32x4_t x)
|
||||||
{
|
{
|
||||||
const struct data *d = ptr_barrier (&data);
|
const struct data *d = ptr_barrier (&data);
|
||||||
float32x4_t n, r, r2, y;
|
float32x4_t n, r, r2, y;
|
||||||
uint32x4_t odd, cmp, eqz;
|
uint32x4_t odd, cmp;
|
||||||
|
|
||||||
#if WANT_SIMD_EXCEPT
|
#if WANT_SIMD_EXCEPT
|
||||||
uint32x4_t ir = vreinterpretq_u32_f32 (vabsq_f32 (x));
|
uint32x4_t ir = vreinterpretq_u32_f32 (vabsq_f32 (x));
|
||||||
|
|
@ -70,7 +70,6 @@ float32x4_t VPCS_ATTR V_NAME_F1 (sin) (float32x4_t x)
|
||||||
cmp = vcageq_f32 (d->range_val, x);
|
cmp = vcageq_f32 (d->range_val, x);
|
||||||
cmp = vceqzq_u32 (cmp); /* cmp = ~cmp. */
|
cmp = vceqzq_u32 (cmp); /* cmp = ~cmp. */
|
||||||
#endif
|
#endif
|
||||||
eqz = vceqzq_f32 (x);
|
|
||||||
|
|
||||||
/* n = rint(|x|/pi) */
|
/* n = rint(|x|/pi) */
|
||||||
n = vfmaq_f32 (d->shift, d->inv_pi, r);
|
n = vfmaq_f32 (d->shift, d->inv_pi, r);
|
||||||
|
|
@ -89,10 +88,6 @@ float32x4_t VPCS_ATTR V_NAME_F1 (sin) (float32x4_t x)
|
||||||
y = vfmaq_f32 (C (0), y, r2);
|
y = vfmaq_f32 (C (0), y, r2);
|
||||||
y = vfmaq_f32 (r, vmulq_f32 (y, r2), r);
|
y = vfmaq_f32 (r, vmulq_f32 (y, r2), r);
|
||||||
|
|
||||||
/* Sign of 0 is discarded by polynomial, so copy it back here. */
|
|
||||||
if (__glibc_unlikely (v_any_u32 (eqz)))
|
|
||||||
y = vbslq_f32 (eqz, x, y);
|
|
||||||
|
|
||||||
if (__glibc_unlikely (v_any_u32 (cmp)))
|
if (__glibc_unlikely (v_any_u32 (cmp)))
|
||||||
return special_case (x, y, odd, cmp);
|
return special_case (x, y, odd, cmp);
|
||||||
return vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), odd));
|
return vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), odd));
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue