mirror of git://sourceware.org/git/glibc.git
AArch64: Fix SVE powf routine [BZ #33299]
Fix a bug in predicate logic introduced in last change. A slight performance improvement from relying on all true predicates during conversion from single to double. This fixes BZ #33299. Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
This commit is contained in:
parent
6f999af332
commit
aac077645a
|
|
@ -223,15 +223,15 @@ sv_powf_core (const svbool_t pg, svuint32_t i, svuint32_t iz, svint32_t k,
|
||||||
const svbool_t ptrue = svptrue_b64 ();
|
const svbool_t ptrue = svptrue_b64 ();
|
||||||
|
|
||||||
/* Unpack and promote input vectors (pg, y, z, i, k and sign_bias) into two
|
/* Unpack and promote input vectors (pg, y, z, i, k and sign_bias) into two
|
||||||
* in order to perform core computation in double precision. */
|
in order to perform core computation in double precision. */
|
||||||
const svbool_t pg_lo = svunpklo (pg);
|
const svbool_t pg_lo = svunpklo (pg);
|
||||||
const svbool_t pg_hi = svunpkhi (pg);
|
const svbool_t pg_hi = svunpkhi (pg);
|
||||||
svfloat64_t y_lo
|
svfloat64_t y_lo = svcvt_f64_x (
|
||||||
= svcvt_f64_x (pg, svreinterpret_f32 (svunpklo (svreinterpret_u32 (y))));
|
ptrue, svreinterpret_f32 (svunpklo (svreinterpret_u32 (y))));
|
||||||
svfloat64_t y_hi
|
svfloat64_t y_hi = svcvt_f64_x (
|
||||||
= svcvt_f64_x (pg, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (y))));
|
ptrue, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (y))));
|
||||||
svfloat64_t z_lo = svcvt_f64_x (pg, svreinterpret_f32 (svunpklo (iz)));
|
svfloat64_t z_lo = svcvt_f64_x (ptrue, svreinterpret_f32 (svunpklo (iz)));
|
||||||
svfloat64_t z_hi = svcvt_f64_x (pg, svreinterpret_f32 (svunpkhi (iz)));
|
svfloat64_t z_hi = svcvt_f64_x (ptrue, svreinterpret_f32 (svunpkhi (iz)));
|
||||||
svuint64_t i_lo = svunpklo (i);
|
svuint64_t i_lo = svunpklo (i);
|
||||||
svuint64_t i_hi = svunpkhi (i);
|
svuint64_t i_hi = svunpkhi (i);
|
||||||
svint64_t k_lo = svunpklo (k);
|
svint64_t k_lo = svunpklo (k);
|
||||||
|
|
@ -312,7 +312,7 @@ svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg)
|
||||||
(23 - V_POWF_EXP2_TABLE_BITS));
|
(23 - V_POWF_EXP2_TABLE_BITS));
|
||||||
|
|
||||||
/* Compute core in extended precision and return intermediate ylogx results
|
/* Compute core in extended precision and return intermediate ylogx results
|
||||||
* to handle cases of underflow and underflow in exp. */
|
to handle cases of underflow and overflow in exp. */
|
||||||
svfloat32_t ylogx;
|
svfloat32_t ylogx;
|
||||||
svfloat32_t ret
|
svfloat32_t ret
|
||||||
= sv_powf_core (yint_or_xpos, i, iz, k, y, sign_bias, &ylogx, d);
|
= sv_powf_core (yint_or_xpos, i, iz, k, y, sign_bias, &ylogx, d);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue