1998-04-03 23:38  Ulrich Drepper  <drepper@cygnus.com>

	* sysdeps/libm-ieee754/e_acos.c: Optimize by splitting large
	expressions and using array variables.
	* sysdeps/libm-ieee754/e_asin.c: Likewise.
	* sysdeps/libm-ieee754/e_j0.c: Likewise.
	* sysdeps/libm-ieee754/e_j1.c: Likewise.
	* sysdeps/libm-ieee754/e_log.c: Likewise.
	* sysdeps/libm-ieee754/e_pow.c: Likewise.
	* sysdeps/libm-ieee754/k_cos.c: Likewise.
	* sysdeps/libm-ieee754/k_sin.c: Likewise.
	* sysdeps/libm-ieee754/k_tan.c: Likewise.
	* sysdeps/libm-ieee754/s_atan.c: Likewise.
	* sysdeps/libm-ieee754/s_erf.c: Likewise.
	* sysdeps/libm-ieee754/s_expm1.c: Likewise.
	* sysdeps/libm-ieee754/s_log1p.c: Likewise.
	Patch by Naohiko Shimizu <nshimizu@et.u-tokai.ac.jp>.
This commit is contained in:
Ulrich Drepper 1998-04-04 07:46:55 +00:00
parent 0d9f67937f
commit 923609d149
14 changed files with 662 additions and 257 deletions

View File

@ -1,3 +1,21 @@
1998-04-03 23:38 Ulrich Drepper <drepper@cygnus.com>
* sysdeps/libm-ieee754/e_acos.c: Optimize by splitting large
expressions and using array variables.
* sysdeps/libm-ieee754/e_asin.c: Likewise.
* sysdeps/libm-ieee754/e_j0.c: Likewise.
* sysdeps/libm-ieee754/e_j1.c: Likewise.
* sysdeps/libm-ieee754/e_log.c: Likewise.
* sysdeps/libm-ieee754/e_pow.c: Likewise.
* sysdeps/libm-ieee754/k_cos.c: Likewise.
* sysdeps/libm-ieee754/k_sin.c: Likewise.
* sysdeps/libm-ieee754/k_tan.c: Likewise.
* sysdeps/libm-ieee754/s_atan.c: Likewise.
* sysdeps/libm-ieee754/s_erf.c: Likewise.
* sysdeps/libm-ieee754/s_expm1.c: Likewise.
* sysdeps/libm-ieee754/s_log1p.c: Likewise.
Patch by Naohiko Shimizu <nshimizu@et.u-tokai.ac.jp>.
1998-04-03 23:17 Ulrich Drepper <drepper@cygnus.com> 1998-04-03 23:17 Ulrich Drepper <drepper@cygnus.com>
* iconv/gconv.c: Rewrite of the low-level of gconv. * iconv/gconv.c: Rewrite of the low-level of gconv.

View File

@ -5,24 +5,27 @@
* *
* Developed at SunPro, a Sun Microsystems, Inc. business. * Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this * Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice * software is freely granted, provided that this notice
* is preserved. * is preserved.
* ==================================================== * ====================================================
*/ */
/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25,
for performance improvement on pipelined processors.
*/
#if defined(LIBM_SCCS) && !defined(lint) #if defined(LIBM_SCCS) && !defined(lint)
static char rcsid[] = "$NetBSD: e_acos.c,v 1.9 1995/05/12 04:57:13 jtc Exp $"; static char rcsid[] = "$NetBSD: e_acos.c,v 1.9 1995/05/12 04:57:13 jtc Exp $";
#endif #endif
/* __ieee754_acos(x) /* __ieee754_acos(x)
* Method : * Method :
* acos(x) = pi/2 - asin(x) * acos(x) = pi/2 - asin(x)
* acos(-x) = pi/2 + asin(x) * acos(-x) = pi/2 + asin(x)
* For |x|<=0.5 * For |x|<=0.5
* acos(x) = pi/2 - (x + x*x^2*R(x^2)) (see asin.c) * acos(x) = pi/2 - (x + x*x^2*R(x^2)) (see asin.c)
* For x>0.5 * For x>0.5
* acos(x) = pi/2 - (pi/2 - 2asin(sqrt((1-x)/2))) * acos(x) = pi/2 - (pi/2 - 2asin(sqrt((1-x)/2)))
* = 2asin(sqrt((1-x)/2)) * = 2asin(sqrt((1-x)/2))
* = 2s + 2s*z*R(z) ...z=(1-x)/2, s=sqrt(z) * = 2s + 2s*z*R(z) ...z=(1-x)/2, s=sqrt(z)
* = 2f + (2c + 2s*z*R(z)) * = 2f + (2c + 2s*z*R(z))
* where f=hi part of s, and c = (z-f*f)/(s+f) is the correction term * where f=hi part of s, and c = (z-f*f)/(s+f) is the correction term
@ -40,26 +43,26 @@ static char rcsid[] = "$NetBSD: e_acos.c,v 1.9 1995/05/12 04:57:13 jtc Exp $";
#include "math.h" #include "math.h"
#include "math_private.h" #include "math_private.h"
#define one qS[0]
#ifdef __STDC__ #ifdef __STDC__
static const double static const double
#else #else
static double static double
#endif #endif
one= 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
pi = 3.14159265358979311600e+00, /* 0x400921FB, 0x54442D18 */ pi = 3.14159265358979311600e+00, /* 0x400921FB, 0x54442D18 */
pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */ pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */
pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */ pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */
pS0 = 1.66666666666666657415e-01, /* 0x3FC55555, 0x55555555 */ pS[] = {1.66666666666666657415e-01, /* 0x3FC55555, 0x55555555 */
pS1 = -3.25565818622400915405e-01, /* 0xBFD4D612, 0x03EB6F7D */ -3.25565818622400915405e-01, /* 0xBFD4D612, 0x03EB6F7D */
pS2 = 2.01212532134862925881e-01, /* 0x3FC9C155, 0x0E884455 */ 2.01212532134862925881e-01, /* 0x3FC9C155, 0x0E884455 */
pS3 = -4.00555345006794114027e-02, /* 0xBFA48228, 0xB5688F3B */ -4.00555345006794114027e-02, /* 0xBFA48228, 0xB5688F3B */
pS4 = 7.91534994289814532176e-04, /* 0x3F49EFE0, 0x7501B288 */ 7.91534994289814532176e-04, /* 0x3F49EFE0, 0x7501B288 */
pS5 = 3.47933107596021167570e-05, /* 0x3F023DE1, 0x0DFDF709 */ 3.47933107596021167570e-05}, /* 0x3F023DE1, 0x0DFDF709 */
qS1 = -2.40339491173441421878e+00, /* 0xC0033A27, 0x1C8A2D4B */ qS[] ={1.0, -2.40339491173441421878e+00, /* 0xC0033A27, 0x1C8A2D4B */
qS2 = 2.02094576023350569471e+00, /* 0x40002AE5, 0x9C598AC8 */ 2.02094576023350569471e+00, /* 0x40002AE5, 0x9C598AC8 */
qS3 = -6.88283971605453293030e-01, /* 0xBFE6066C, 0x1B8D0159 */ -6.88283971605453293030e-01, /* 0xBFE6066C, 0x1B8D0159 */
qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */ 7.70381505559019352791e-02}; /* 0x3FB3B8C5, 0xB12E9282 */
#ifdef __STDC__ #ifdef __STDC__
double __ieee754_acos(double x) double __ieee754_acos(double x)
@ -68,7 +71,7 @@ qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
double x; double x;
#endif #endif
{ {
double z,p,q,r,w,s,c,df; double z,p,q,r,w,s,c,df,p1,p2,p3,q1,q2,q3,z2,z4,z6;
int32_t hx,ix; int32_t hx,ix;
GET_HIGH_WORD(hx,x); GET_HIGH_WORD(hx,x);
ix = hx&0x7fffffff; ix = hx&0x7fffffff;
@ -84,14 +87,34 @@ qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
if(ix<0x3fe00000) { /* |x| < 0.5 */ if(ix<0x3fe00000) { /* |x| < 0.5 */
if(ix<=0x3c600000) return pio2_hi+pio2_lo;/*if|x|<2**-57*/ if(ix<=0x3c600000) return pio2_hi+pio2_lo;/*if|x|<2**-57*/
z = x*x; z = x*x;
#ifdef DO_NOT_USE_THIS
p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5))))); p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4))); q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
#else
p1 = z*pS[0]; z2=z*z;
p2 = pS[1]+z*pS[2]; z4=z2*z2;
p3 = pS[3]+z*pS[4]; z6=z4*z2;
q1 = one+z*qS[1];
q2 = qS[2]+z*qS[3];
p = p1 + z2*p2 + z4*p3 + z6*pS[5];
q = q1 + z2*q2 + z4*qS[4];
#endif
r = p/q; r = p/q;
return pio2_hi - (x - (pio2_lo-x*r)); return pio2_hi - (x - (pio2_lo-x*r));
} else if (hx<0) { /* x < -0.5 */ } else if (hx<0) { /* x < -0.5 */
z = (one+x)*0.5; z = (one+x)*0.5;
#ifdef DO_NOT_USE_THIS
p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5))))); p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4))); q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
#else
p1 = z*pS[0]; z2=z*z;
p2 = pS[1]+z*pS[2]; z4=z2*z2;
p3 = pS[3]+z*pS[4]; z6=z4*z2;
q1 = one+z*qS[1];
q2 = qS[2]+z*qS[3];
p = p1 + z2*p2 + z4*p3 + z6*pS[5];
q = q1 + z2*q2 + z4*qS[4];
#endif
s = __ieee754_sqrt(z); s = __ieee754_sqrt(z);
r = p/q; r = p/q;
w = r*s-pio2_lo; w = r*s-pio2_lo;
@ -102,8 +125,18 @@ qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
df = s; df = s;
SET_LOW_WORD(df,0); SET_LOW_WORD(df,0);
c = (z-df*df)/(s+df); c = (z-df*df)/(s+df);
#ifdef DO_NOT_USE_THIS
p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5))))); p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4))); q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
#else
p1 = z*pS[0]; z2=z*z;
p2 = pS[1]+z*pS[2]; z4=z2*z2;
p3 = pS[3]+z*pS[4]; z6=z4*z2;
q1 = one+z*qS[1];
q2 = qS[2]+z*qS[3];
p = p1 + z2*p2 + z4*p3 + z6*pS[5];
q = q1 + z2*q2 + z4*qS[4];
#endif
r = p/q; r = p/q;
w = r*s+c; w = r*s+c;
return 2.0*(df+w); return 2.0*(df+w);

View File

@ -9,6 +9,9 @@
* is preserved. * is preserved.
* ==================================================== * ====================================================
*/ */
/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25,
for performance improvement on pipelined processors.
*/
#if defined(LIBM_SCCS) && !defined(lint) #if defined(LIBM_SCCS) && !defined(lint)
static char rcsid[] = "$NetBSD: e_asin.c,v 1.9 1995/05/12 04:57:22 jtc Exp $"; static char rcsid[] = "$NetBSD: e_asin.c,v 1.9 1995/05/12 04:57:22 jtc Exp $";
@ -47,28 +50,27 @@ static char rcsid[] = "$NetBSD: e_asin.c,v 1.9 1995/05/12 04:57:22 jtc Exp $";
#include "math.h" #include "math.h"
#include "math_private.h" #include "math_private.h"
#define one qS[0]
#ifdef __STDC__ #ifdef __STDC__
static const double static const double
#else #else
static double static double
#endif #endif
one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
huge = 1.000e+300, huge = 1.000e+300,
pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */ pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */
pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */ pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */
pio4_hi = 7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */ pio4_hi = 7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */
/* coefficient for R(x^2) */ /* coefficient for R(x^2) */
pS0 = 1.66666666666666657415e-01, /* 0x3FC55555, 0x55555555 */ pS[] = {1.66666666666666657415e-01, /* 0x3FC55555, 0x55555555 */
pS1 = -3.25565818622400915405e-01, /* 0xBFD4D612, 0x03EB6F7D */ -3.25565818622400915405e-01, /* 0xBFD4D612, 0x03EB6F7D */
pS2 = 2.01212532134862925881e-01, /* 0x3FC9C155, 0x0E884455 */ 2.01212532134862925881e-01, /* 0x3FC9C155, 0x0E884455 */
pS3 = -4.00555345006794114027e-02, /* 0xBFA48228, 0xB5688F3B */ -4.00555345006794114027e-02, /* 0xBFA48228, 0xB5688F3B */
pS4 = 7.91534994289814532176e-04, /* 0x3F49EFE0, 0x7501B288 */ 7.91534994289814532176e-04, /* 0x3F49EFE0, 0x7501B288 */
pS5 = 3.47933107596021167570e-05, /* 0x3F023DE1, 0x0DFDF709 */ 3.47933107596021167570e-05}, /* 0x3F023DE1, 0x0DFDF709 */
qS1 = -2.40339491173441421878e+00, /* 0xC0033A27, 0x1C8A2D4B */ qS[] = {1.0, -2.40339491173441421878e+00, /* 0xC0033A27, 0x1C8A2D4B */
qS2 = 2.02094576023350569471e+00, /* 0x40002AE5, 0x9C598AC8 */ 2.02094576023350569471e+00, /* 0x40002AE5, 0x9C598AC8 */
qS3 = -6.88283971605453293030e-01, /* 0xBFE6066C, 0x1B8D0159 */ -6.88283971605453293030e-01, /* 0xBFE6066C, 0x1B8D0159 */
qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */ 7.70381505559019352791e-02}; /* 0x3FB3B8C5, 0xB12E9282 */
#ifdef __STDC__ #ifdef __STDC__
double __ieee754_asin(double x) double __ieee754_asin(double x)
@ -77,7 +79,7 @@ qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
double x; double x;
#endif #endif
{ {
double t,w,p,q,c,r,s; double t,w,p,q,c,r,s,p1,p2,p3,q1,q2,q3,z2,z4,z6;
int32_t hx,ix; int32_t hx,ix;
GET_HIGH_WORD(hx,x); GET_HIGH_WORD(hx,x);
ix = hx&0x7fffffff; ix = hx&0x7fffffff;
@ -93,8 +95,18 @@ qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
if(huge+x>one) return x;/* return x with inexact if x!=0*/ if(huge+x>one) return x;/* return x with inexact if x!=0*/
} else { } else {
t = x*x; t = x*x;
#ifdef DO_NOT_USE_THIS
p = t*(pS0+t*(pS1+t*(pS2+t*(pS3+t*(pS4+t*pS5))))); p = t*(pS0+t*(pS1+t*(pS2+t*(pS3+t*(pS4+t*pS5)))));
q = one+t*(qS1+t*(qS2+t*(qS3+t*qS4))); q = one+t*(qS1+t*(qS2+t*(qS3+t*qS4)));
#else
p1 = t*pS[0]; z2=t*t;
p2 = pS[1]+t*pS[2]; z4=z2*z2;
p3 = pS[3]+t*pS[4]; z6=z4*z2;
q1 = one+t*qS[1];
q2 = qS[2]+t*qS[3];
p = p1 + z2*p2 + z4*p3 + z6*pS[5];
q = q1 + z2*q2 + z4*qS[4];
#endif
w = p/q; w = p/q;
return x+x*w; return x+x*w;
} }
@ -102,8 +114,18 @@ qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
/* 1> |x|>= 0.5 */ /* 1> |x|>= 0.5 */
w = one-fabs(x); w = one-fabs(x);
t = w*0.5; t = w*0.5;
#ifdef DO_NOT_USE_THIS
p = t*(pS0+t*(pS1+t*(pS2+t*(pS3+t*(pS4+t*pS5))))); p = t*(pS0+t*(pS1+t*(pS2+t*(pS3+t*(pS4+t*pS5)))));
q = one+t*(qS1+t*(qS2+t*(qS3+t*qS4))); q = one+t*(qS1+t*(qS2+t*(qS3+t*qS4)));
#else
p1 = t*pS[0]; z2=t*t;
p2 = pS[1]+t*pS[2]; z4=z2*z2;
p3 = pS[3]+t*pS[4]; z6=z4*z2;
q1 = one+t*qS[1];
q2 = qS[2]+t*qS[3];
p = p1 + z2*p2 + z4*p3 + z6*pS[5];
q = q1 + z2*q2 + z4*qS[4];
#endif
s = __ieee754_sqrt(t); s = __ieee754_sqrt(t);
if(ix>=0x3FEF3333) { /* if |x| > 0.975 */ if(ix>=0x3FEF3333) { /* if |x| > 0.975 */
w = p/q; w = p/q;

View File

@ -9,6 +9,9 @@
* is preserved. * is preserved.
* ==================================================== * ====================================================
*/ */
/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/26,
for performance improvement on pipelined processors.
*/
#if defined(LIBM_SCCS) && !defined(lint) #if defined(LIBM_SCCS) && !defined(lint)
static char rcsid[] = "$NetBSD: e_j0.c,v 1.8 1995/05/10 20:45:23 jtc Exp $"; static char rcsid[] = "$NetBSD: e_j0.c,v 1.8 1995/05/10 20:45:23 jtc Exp $";
@ -78,14 +81,14 @@ one = 1.0,
invsqrtpi= 5.64189583547756279280e-01, /* 0x3FE20DD7, 0x50429B6D */ invsqrtpi= 5.64189583547756279280e-01, /* 0x3FE20DD7, 0x50429B6D */
tpi = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */ tpi = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
/* R0/S0 on [0, 2.00] */ /* R0/S0 on [0, 2.00] */
R02 = 1.56249999999999947958e-02, /* 0x3F8FFFFF, 0xFFFFFFFD */ R[] = {0.0, 0.0, 1.56249999999999947958e-02, /* 0x3F8FFFFF, 0xFFFFFFFD */
R03 = -1.89979294238854721751e-04, /* 0xBF28E6A5, 0xB61AC6E9 */ -1.89979294238854721751e-04, /* 0xBF28E6A5, 0xB61AC6E9 */
R04 = 1.82954049532700665670e-06, /* 0x3EBEB1D1, 0x0C503919 */ 1.82954049532700665670e-06, /* 0x3EBEB1D1, 0x0C503919 */
R05 = -4.61832688532103189199e-09, /* 0xBE33D5E7, 0x73D63FCE */ -4.61832688532103189199e-09}, /* 0xBE33D5E7, 0x73D63FCE */
S01 = 1.56191029464890010492e-02, /* 0x3F8FFCE8, 0x82C8C2A4 */ S[] = {0.0, 1.56191029464890010492e-02, /* 0x3F8FFCE8, 0x82C8C2A4 */
S02 = 1.16926784663337450260e-04, /* 0x3F1EA6D2, 0xDD57DBF4 */ 1.16926784663337450260e-04, /* 0x3F1EA6D2, 0xDD57DBF4 */
S03 = 5.13546550207318111446e-07, /* 0x3EA13B54, 0xCE84D5A9 */ 5.13546550207318111446e-07, /* 0x3EA13B54, 0xCE84D5A9 */
S04 = 1.16614003333790000205e-09; /* 0x3E1408BC, 0xF4745D8F */ 1.16614003333790000205e-09}; /* 0x3E1408BC, 0xF4745D8F */
#ifdef __STDC__ #ifdef __STDC__
static const double zero = 0.0; static const double zero = 0.0;
@ -100,7 +103,7 @@ static double zero = 0.0;
double x; double x;
#endif #endif
{ {
double z, s,c,ss,cc,r,u,v; double z, s,c,ss,cc,r,u,v,r1,r2,s1,s2,z2,z4;
int32_t hx,ix; int32_t hx,ix;
GET_HIGH_WORD(hx,x); GET_HIGH_WORD(hx,x);
@ -135,8 +138,17 @@ static double zero = 0.0;
} }
} }
z = x*x; z = x*x;
#ifdef DO_NOT_USE_THIS
r = z*(R02+z*(R03+z*(R04+z*R05))); r = z*(R02+z*(R03+z*(R04+z*R05)));
s = one+z*(S01+z*(S02+z*(S03+z*S04))); s = one+z*(S01+z*(S02+z*(S03+z*S04)));
#else
r1 = z*R[2]; z2=z*z;
r2 = R[3]+z*R[4]; z4=z2*z2;
r = r1 + z2*r2 + z4*R[5];
s1 = one+z*S[1];
s2 = S[2]+z*S[3];
s = s1 + z2*s2 + z4*S[4];
#endif
if(ix < 0x3FF00000) { /* |x| < 1.00 */ if(ix < 0x3FF00000) { /* |x| < 1.00 */
return one + z*(-0.25+(r/s)); return one + z*(-0.25+(r/s));
} else { } else {
@ -150,17 +162,17 @@ static const double
#else #else
static double static double
#endif #endif
u00 = -7.38042951086872317523e-02, /* 0xBFB2E4D6, 0x99CBD01F */ U[] = {-7.38042951086872317523e-02, /* 0xBFB2E4D6, 0x99CBD01F */
u01 = 1.76666452509181115538e-01, /* 0x3FC69D01, 0x9DE9E3FC */ 1.76666452509181115538e-01, /* 0x3FC69D01, 0x9DE9E3FC */
u02 = -1.38185671945596898896e-02, /* 0xBF8C4CE8, 0xB16CFA97 */ -1.38185671945596898896e-02, /* 0xBF8C4CE8, 0xB16CFA97 */
u03 = 3.47453432093683650238e-04, /* 0x3F36C54D, 0x20B29B6B */ 3.47453432093683650238e-04, /* 0x3F36C54D, 0x20B29B6B */
u04 = -3.81407053724364161125e-06, /* 0xBECFFEA7, 0x73D25CAD */ -3.81407053724364161125e-06, /* 0xBECFFEA7, 0x73D25CAD */
u05 = 1.95590137035022920206e-08, /* 0x3E550057, 0x3B4EABD4 */ 1.95590137035022920206e-08, /* 0x3E550057, 0x3B4EABD4 */
u06 = -3.98205194132103398453e-11, /* 0xBDC5E43D, 0x693FB3C8 */ -3.98205194132103398453e-11}, /* 0xBDC5E43D, 0x693FB3C8 */
v01 = 1.27304834834123699328e-02, /* 0x3F8A1270, 0x91C9C71A */ V[] = {1.27304834834123699328e-02, /* 0x3F8A1270, 0x91C9C71A */
v02 = 7.60068627350353253702e-05, /* 0x3F13ECBB, 0xF578C6C1 */ 7.60068627350353253702e-05, /* 0x3F13ECBB, 0xF578C6C1 */
v03 = 2.59150851840457805467e-07, /* 0x3E91642D, 0x7FF202FD */ 2.59150851840457805467e-07, /* 0x3E91642D, 0x7FF202FD */
v04 = 4.41110311332675467403e-10; /* 0x3DFE5018, 0x3BD6D9EF */ 4.41110311332675467403e-10}; /* 0x3DFE5018, 0x3BD6D9EF */
#ifdef __STDC__ #ifdef __STDC__
double __ieee754_y0(double x) double __ieee754_y0(double x)
@ -169,7 +181,7 @@ v04 = 4.41110311332675467403e-10; /* 0x3DFE5018, 0x3BD6D9EF */
double x; double x;
#endif #endif
{ {
double z, s,c,ss,cc,u,v; double z, s,c,ss,cc,u,v,z2,z4,z6,u1,u2,u3,v1,v2;
int32_t hx,ix,lx; int32_t hx,ix,lx;
EXTRACT_WORDS(hx,lx,x); EXTRACT_WORDS(hx,lx,x);
@ -211,11 +223,21 @@ v04 = 4.41110311332675467403e-10; /* 0x3DFE5018, 0x3BD6D9EF */
return z; return z;
} }
if(ix<=0x3e400000) { /* x < 2**-27 */ if(ix<=0x3e400000) { /* x < 2**-27 */
return(u00 + tpi*__ieee754_log(x)); return(U[0] + tpi*__ieee754_log(x));
} }
z = x*x; z = x*x;
#ifdef DO_NOT_USE_THIS
u = u00+z*(u01+z*(u02+z*(u03+z*(u04+z*(u05+z*u06))))); u = u00+z*(u01+z*(u02+z*(u03+z*(u04+z*(u05+z*u06)))));
v = one+z*(v01+z*(v02+z*(v03+z*v04))); v = one+z*(v01+z*(v02+z*(v03+z*v04)));
#else
u1 = U[0]+z*U[1]; z2=z*z;
u2 = U[2]+z*U[3]; z4=z2*z2;
u3 = U[4]+z*U[5]; z6=z4*z2;
u = u1 + z2*u2 + z4*u3 + z6*U[6];
v1 = one+z*V[1];
v2 = V[2]+z*V[3];
v = v1 + z2*v2 + z4*V[4];
#endif
return(u/v + tpi*(__ieee754_j0(x)*__ieee754_log(x))); return(u/v + tpi*(__ieee754_j0(x)*__ieee754_log(x)));
} }
@ -336,7 +358,7 @@ static double pS2[5] = {
#else #else
double *p,*q; double *p,*q;
#endif #endif
double z,r,s; double z,r,s,z2,z4,r1,r2,r3,s1,s2,s3;
int32_t ix; int32_t ix;
GET_HIGH_WORD(ix,x); GET_HIGH_WORD(ix,x);
ix &= 0x7fffffff; ix &= 0x7fffffff;
@ -345,8 +367,19 @@ static double pS2[5] = {
else if(ix>=0x4006DB6D){p = pR3; q= pS3;} else if(ix>=0x4006DB6D){p = pR3; q= pS3;}
else if(ix>=0x40000000){p = pR2; q= pS2;} else if(ix>=0x40000000){p = pR2; q= pS2;}
z = one/(x*x); z = one/(x*x);
#ifdef DO_NOT_USE_THIS
r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4])))); s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4]))));
#else
r1 = p[0]+z*p[1]; z2=z*z;
r2 = p[2]+z*p[3]; z4=z2*z2;
r3 = p[4]+z*p[5];
r = r1 + z2*r2 + z4*r3;
s1 = one+z*q[0];
s2 = q[1]+z*q[2];
s3 = q[3]+z*q[4];
s = s1 + z2*s2 + z4*s3;
#endif
return one+ r/s; return one+ r/s;
} }
@ -472,7 +505,7 @@ static double qS2[6] = {
#else #else
double *p,*q; double *p,*q;
#endif #endif
double s,r,z; double s,r,z,z2,z4,z6,r1,r2,r3,s1,s2,s3;
int32_t ix; int32_t ix;
GET_HIGH_WORD(ix,x); GET_HIGH_WORD(ix,x);
ix &= 0x7fffffff; ix &= 0x7fffffff;
@ -481,7 +514,18 @@ static double qS2[6] = {
else if(ix>=0x4006DB6D){p = qR3; q= qS3;} else if(ix>=0x4006DB6D){p = qR3; q= qS3;}
else if(ix>=0x40000000){p = qR2; q= qS2;} else if(ix>=0x40000000){p = qR2; q= qS2;}
z = one/(x*x); z = one/(x*x);
#ifdef DO_NOT_USE_THIS
r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5]))))); s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5])))));
#else
r1 = p[0]+z*p[1]; z2=z*z;
r2 = p[2]+z*p[3]; z4=z2*z2;
r3 = p[4]+z*p[5]; z6=z4*z2;
r= r1 + z2*r2 + z4*r3;
s1 = one+z*q[0];
s2 = q[1]+z*q[2];
s3 = q[3]+z*q[4];
s = s1 + z2*s2 + z4*s3 +z6*q[5];
#endif
return (-.125 + r/s)/x; return (-.125 + r/s)/x;
} }

View File

@ -9,6 +9,9 @@
* is preserved. * is preserved.
* ==================================================== * ====================================================
*/ */
/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/26,
for performance improvement on pipelined processors.
*/
#if defined(LIBM_SCCS) && !defined(lint) #if defined(LIBM_SCCS) && !defined(lint)
static char rcsid[] = "$NetBSD: e_j1.c,v 1.8 1995/05/10 20:45:27 jtc Exp $"; static char rcsid[] = "$NetBSD: e_j1.c,v 1.8 1995/05/10 20:45:27 jtc Exp $";
@ -78,15 +81,15 @@ one = 1.0,
invsqrtpi= 5.64189583547756279280e-01, /* 0x3FE20DD7, 0x50429B6D */ invsqrtpi= 5.64189583547756279280e-01, /* 0x3FE20DD7, 0x50429B6D */
tpi = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */ tpi = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
/* R0/S0 on [0,2] */ /* R0/S0 on [0,2] */
r00 = -6.25000000000000000000e-02, /* 0xBFB00000, 0x00000000 */ R[] = {-6.25000000000000000000e-02, /* 0xBFB00000, 0x00000000 */
r01 = 1.40705666955189706048e-03, /* 0x3F570D9F, 0x98472C61 */ 1.40705666955189706048e-03, /* 0x3F570D9F, 0x98472C61 */
r02 = -1.59955631084035597520e-05, /* 0xBEF0C5C6, 0xBA169668 */ -1.59955631084035597520e-05, /* 0xBEF0C5C6, 0xBA169668 */
r03 = 4.96727999609584448412e-08, /* 0x3E6AAAFA, 0x46CA0BD9 */ 4.96727999609584448412e-08}, /* 0x3E6AAAFA, 0x46CA0BD9 */
s01 = 1.91537599538363460805e-02, /* 0x3F939D0B, 0x12637E53 */ S[] = {0.0, 1.91537599538363460805e-02, /* 0x3F939D0B, 0x12637E53 */
s02 = 1.85946785588630915560e-04, /* 0x3F285F56, 0xB9CDF664 */ 1.85946785588630915560e-04, /* 0x3F285F56, 0xB9CDF664 */
s03 = 1.17718464042623683263e-06, /* 0x3EB3BFF8, 0x333F8498 */ 1.17718464042623683263e-06, /* 0x3EB3BFF8, 0x333F8498 */
s04 = 5.04636257076217042715e-09, /* 0x3E35AC88, 0xC97DFF2C */ 5.04636257076217042715e-09, /* 0x3E35AC88, 0xC97DFF2C */
s05 = 1.23542274426137913908e-11; /* 0x3DAB2ACF, 0xCFB97ED8 */ 1.23542274426137913908e-11}; /* 0x3DAB2ACF, 0xCFB97ED8 */
#ifdef __STDC__ #ifdef __STDC__
static const double zero = 0.0; static const double zero = 0.0;
@ -101,7 +104,7 @@ static double zero = 0.0;
double x; double x;
#endif #endif
{ {
double z, s,c,ss,cc,r,u,v,y; double z, s,c,ss,cc,r,u,v,y,r1,r2,s1,s2,s3,z2,z4;
int32_t hx,ix; int32_t hx,ix;
GET_HIGH_WORD(hx,x); GET_HIGH_WORD(hx,x);
@ -134,9 +137,20 @@ static double zero = 0.0;
if(huge+x>one) return 0.5*x;/* inexact if x!=0 necessary */ if(huge+x>one) return 0.5*x;/* inexact if x!=0 necessary */
} }
z = x*x; z = x*x;
#ifdef DO_NOT_USE_THIS
r = z*(r00+z*(r01+z*(r02+z*r03))); r = z*(r00+z*(r01+z*(r02+z*r03)));
s = one+z*(s01+z*(s02+z*(s03+z*(s04+z*s05)))); s = one+z*(s01+z*(s02+z*(s03+z*(s04+z*s05))));
r *= x; r *= x;
#else
r1 = z*R[0]; z2=z*z;
r2 = R[1]+z*R[2]; z4=z2*z2;
r = r1 + z2*r2 + z4*R[3];
r *= x;
s1 = one+z*S[1];
s2 = S[2]+z*S[3];
s3 = S[4]+z*S[5];
s = s1 + z2*s2 + z4*s3;
#endif
return(x*0.5+r/s); return(x*0.5+r/s);
} }
@ -170,7 +184,7 @@ static double V0[5] = {
double x; double x;
#endif #endif
{ {
double z, s,c,ss,cc,u,v; double z, s,c,ss,cc,u,v,u1,u2,v1,v2,v3,z2,z4;
int32_t hx,ix,lx; int32_t hx,ix,lx;
EXTRACT_WORDS(hx,lx,x); EXTRACT_WORDS(hx,lx,x);
@ -211,8 +225,18 @@ static double V0[5] = {
return(-tpi/x); return(-tpi/x);
} }
z = x*x; z = x*x;
#ifdef DO_NOT_USE_THIS
u = U0[0]+z*(U0[1]+z*(U0[2]+z*(U0[3]+z*U0[4]))); u = U0[0]+z*(U0[1]+z*(U0[2]+z*(U0[3]+z*U0[4])));
v = one+z*(V0[0]+z*(V0[1]+z*(V0[2]+z*(V0[3]+z*V0[4])))); v = one+z*(V0[0]+z*(V0[1]+z*(V0[2]+z*(V0[3]+z*V0[4]))));
#else
u1 = U0[0]+z*U0[1];z2=z*z;
u2 = U0[2]+z*U0[3];z4=z2*z2;
u = u1 + z2*u2 + z4*U0[4];
v1 = one+z*V0[0];
v2 = V0[1]+z*V0[2];
v3 = V0[3]+z*V0[4];
v = v1 + z2*v2 + z4*v3;
#endif
return(x*(u/v) + tpi*(__ieee754_j1(x)*__ieee754_log(x)-one/x)); return(x*(u/v) + tpi*(__ieee754_j1(x)*__ieee754_log(x)-one/x));
} }
@ -334,7 +358,7 @@ static double ps2[5] = {
#else #else
double *p,*q; double *p,*q;
#endif #endif
double z,r,s; double z,r,s,r1,r2,r3,s1,s2,s3,z2,z4;
int32_t ix; int32_t ix;
GET_HIGH_WORD(ix,x); GET_HIGH_WORD(ix,x);
ix &= 0x7fffffff; ix &= 0x7fffffff;
@ -343,8 +367,19 @@ static double ps2[5] = {
else if(ix>=0x4006DB6D){p = pr3; q= ps3;} else if(ix>=0x4006DB6D){p = pr3; q= ps3;}
else if(ix>=0x40000000){p = pr2; q= ps2;} else if(ix>=0x40000000){p = pr2; q= ps2;}
z = one/(x*x); z = one/(x*x);
#ifdef DO_NOT_USE_THIS
r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4])))); s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4]))));
#else
r1 = p[0]+z*p[1]; z2=z*z;
r2 = p[2]+z*p[3]; z4=z2*z2;
r3 = p[4]+z*p[5];
r = r1 + z2*r2 + z4*r3;
s1 = one+z*q[0];
s2 = q[1]+z*q[2];
s3 = q[3]+z*q[4];
s = s1 + z2*s2 + z4*s3;
#endif
return one+ r/s; return one+ r/s;
} }
@ -471,7 +506,7 @@ static double qs2[6] = {
#else #else
double *p,*q; double *p,*q;
#endif #endif
double s,r,z; double s,r,z,r1,r2,r3,s1,s2,s3,z2,z4,z6;
int32_t ix; int32_t ix;
GET_HIGH_WORD(ix,x); GET_HIGH_WORD(ix,x);
ix &= 0x7fffffff; ix &= 0x7fffffff;
@ -480,7 +515,18 @@ static double qs2[6] = {
else if(ix>=0x4006DB6D){p = qr3; q= qs3;} else if(ix>=0x4006DB6D){p = qr3; q= qs3;}
else if(ix>=0x40000000){p = qr2; q= qs2;} else if(ix>=0x40000000){p = qr2; q= qs2;}
z = one/(x*x); z = one/(x*x);
#ifdef DO_NOT_USE_THIS
r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5]))))); s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5])))));
#else
r1 = p[0]+z*p[1]; z2=z*z;
r2 = p[2]+z*p[3]; z4=z2*z2;
r3 = p[4]+z*p[5]; z6=z4*z2;
r = r1 + z2*r2 + z4*r3;
s1 = one+z*q[0];
s2 = q[1]+z*q[2];
s3 = q[3]+z*q[4];
s = s1 + z2*s2 + z4*s3 + z6*q[5];
#endif
return (.375 + r/s)/x; return (.375 + r/s)/x;
} }

View File

@ -9,6 +9,9 @@
* is preserved. * is preserved.
* ==================================================== * ====================================================
*/ */
/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25,
for performance improvement on pipelined processors.
*/
#if defined(LIBM_SCCS) && !defined(lint) #if defined(LIBM_SCCS) && !defined(lint)
static char rcsid[] = "$NetBSD: e_log.c,v 1.8 1995/05/10 20:45:49 jtc Exp $"; static char rcsid[] = "$NetBSD: e_log.c,v 1.8 1995/05/10 20:45:49 jtc Exp $";
@ -67,7 +70,8 @@ static char rcsid[] = "$NetBSD: e_log.c,v 1.8 1995/05/10 20:45:49 jtc Exp $";
#include "math.h" #include "math.h"
#include "math_private.h" #include "math_private.h"
#define half Lg[8]
#define two Lg[9]
#ifdef __STDC__ #ifdef __STDC__
static const double static const double
#else #else
@ -76,14 +80,16 @@ static double
ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */ ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */
ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */ ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */
two54 = 1.80143985094819840000e+16, /* 43500000 00000000 */ two54 = 1.80143985094819840000e+16, /* 43500000 00000000 */
Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */ Lg[] = {0.0,
Lg2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */ 6.666666666666735130e-01, /* 3FE55555 55555593 */
Lg3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */ 3.999999999940941908e-01, /* 3FD99999 9997FA04 */
Lg4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */ 2.857142874366239149e-01, /* 3FD24924 94229359 */
Lg5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */ 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */
Lg6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */ 1.818357216161805012e-01, /* 3FC74664 96CB03DE */
Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ 1.531383769920937332e-01, /* 3FC39A09 D078C69F */
1.479819860511658591e-01, /* 3FC2F112 DF3E5244 */
0.5,
2.0};
#ifdef __STDC__ #ifdef __STDC__
static const double zero = 0.0; static const double zero = 0.0;
#else #else
@ -97,7 +103,7 @@ static double zero = 0.0;
double x; double x;
#endif #endif
{ {
double hfsq,f,s,z,R,w,t1,t2,dk; double hfsq,f,s,z,R,w,t1,t2,dk,t11,t12,t21,t22,w2,zw2;
int32_t k,hx,i,j; int32_t k,hx,i,j;
u_int32_t lx; u_int32_t lx;
@ -121,20 +127,28 @@ static double zero = 0.0;
if((0x000fffff&(2+hx))<3) { /* |f| < 2**-20 */ if((0x000fffff&(2+hx))<3) { /* |f| < 2**-20 */
if(f==zero) if(k==0) return zero; else {dk=(double)k; if(f==zero) if(k==0) return zero; else {dk=(double)k;
return dk*ln2_hi+dk*ln2_lo;} return dk*ln2_hi+dk*ln2_lo;}
R = f*f*(0.5-0.33333333333333333*f); R = f*f*(half-0.33333333333333333*f);
if(k==0) return f-R; else {dk=(double)k; if(k==0) return f-R; else {dk=(double)k;
return dk*ln2_hi-((R-dk*ln2_lo)-f);} return dk*ln2_hi-((R-dk*ln2_lo)-f);}
} }
s = f/(2.0+f); s = f/(two+f);
dk = (double)k; dk = (double)k;
z = s*s; z = s*s;
i = hx-0x6147a; i = hx-0x6147a;
w = z*z; w = z*z;
j = 0x6b851-hx; j = 0x6b851-hx;
#ifdef DO_NOT_USE_THIS
t1= w*(Lg2+w*(Lg4+w*Lg6)); t1= w*(Lg2+w*(Lg4+w*Lg6));
t2= z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7))); t2= z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7)));
i |= j;
R = t2+t1; R = t2+t1;
#else
t21 = Lg[5]+w*Lg[7]; w2=w*w;
t22 = Lg[1]+w*Lg[3]; zw2=z*w2;
t11 = Lg[4]+w*Lg[6];
t12 = w*Lg[2];
R = t12 + w2*t11 + z*t22 + zw2*t21;
#endif
i |= j;
if(i>0) { if(i>0) {
hfsq=0.5*f*f; hfsq=0.5*f*f;
if(k==0) return f-(hfsq-s*(hfsq+R)); else if(k==0) return f-(hfsq-s*(hfsq+R)); else

View File

@ -9,6 +9,9 @@
* is preserved. * is preserved.
* ==================================================== * ====================================================
*/ */
/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25,
for performance improvement on pipelined processors.
*/
#if defined(LIBM_SCCS) && !defined(lint) #if defined(LIBM_SCCS) && !defined(lint)
static char rcsid[] = "$NetBSD: e_pow.c,v 1.9 1995/05/12 04:57:32 jtc Exp $"; static char rcsid[] = "$NetBSD: e_pow.c,v 1.9 1995/05/12 04:57:32 jtc Exp $";
@ -61,6 +64,33 @@ static char rcsid[] = "$NetBSD: e_pow.c,v 1.9 1995/05/12 04:57:32 jtc Exp $";
#include "math.h" #include "math.h"
#include "math_private.h" #include "math_private.h"
#define zero C[0]
#define one C[1]
#define two C[2]
#define two53 C[3]
#define huge C[4]
#define tiny C[5]
#define L1 C[6]
#define L2 C[7]
#define L3 C[8]
#define L4 C[9]
#define L5 C[10]
#define L6 C[11]
#define P1 C[12]
#define P2 C[13]
#define P3 C[14]
#define P4 C[15]
#define P5 C[16]
#define lg2 C[17]
#define lg2_h C[18]
#define lg2_l C[19]
#define ovt C[20]
#define cp C[21]
#define cp_h C[22]
#define cp_l C[23]
#define ivln2 C[24]
#define ivln2_h C[25]
#define ivln2_l C[26]
#ifdef __STDC__ #ifdef __STDC__
static const double static const double
@ -70,34 +100,34 @@ static double
bp[] = {1.0, 1.5,}, bp[] = {1.0, 1.5,},
dp_h[] = { 0.0, 5.84962487220764160156e-01,}, /* 0x3FE2B803, 0x40000000 */ dp_h[] = { 0.0, 5.84962487220764160156e-01,}, /* 0x3FE2B803, 0x40000000 */
dp_l[] = { 0.0, 1.35003920212974897128e-08,}, /* 0x3E4CFDEB, 0x43CFD006 */ dp_l[] = { 0.0, 1.35003920212974897128e-08,}, /* 0x3E4CFDEB, 0x43CFD006 */
zero = 0.0, C[] = {
one = 1.0, 0.0,
two = 2.0, 1.0,
two53 = 9007199254740992.0, /* 0x43400000, 0x00000000 */ 2.0,
huge = 1.0e300, 9007199254740992.0 ,
tiny = 1.0e-300, 1.0e300,
/* poly coefs for (3/2)*(log(x)-2s-2/3*s**3 */ 1.0e-300,
L1 = 5.99999999999994648725e-01, /* 0x3FE33333, 0x33333303 */ 5.99999999999994648725e-01 ,
L2 = 4.28571428578550184252e-01, /* 0x3FDB6DB6, 0xDB6FABFF */ 4.28571428578550184252e-01 ,
L3 = 3.33333329818377432918e-01, /* 0x3FD55555, 0x518F264D */ 3.33333329818377432918e-01 ,
L4 = 2.72728123808534006489e-01, /* 0x3FD17460, 0xA91D4101 */ 2.72728123808534006489e-01 ,
L5 = 2.30660745775561754067e-01, /* 0x3FCD864A, 0x93C9DB65 */ 2.30660745775561754067e-01 ,
L6 = 2.06975017800338417784e-01, /* 0x3FCA7E28, 0x4A454EEF */ 2.06975017800338417784e-01 ,
P1 = 1.66666666666666019037e-01, /* 0x3FC55555, 0x5555553E */ 1.66666666666666019037e-01 ,
P2 = -2.77777777770155933842e-03, /* 0xBF66C16C, 0x16BEBD93 */ -2.77777777770155933842e-03 ,
P3 = 6.61375632143793436117e-05, /* 0x3F11566A, 0xAF25DE2C */ 6.61375632143793436117e-05 ,
P4 = -1.65339022054652515390e-06, /* 0xBEBBBD41, 0xC5D26BF1 */ -1.65339022054652515390e-06 ,
P5 = 4.13813679705723846039e-08, /* 0x3E663769, 0x72BEA4D0 */ 4.13813679705723846039e-08 ,
lg2 = 6.93147180559945286227e-01, /* 0x3FE62E42, 0xFEFA39EF */ 6.93147180559945286227e-01 ,
lg2_h = 6.93147182464599609375e-01, /* 0x3FE62E43, 0x00000000 */ 6.93147182464599609375e-01 ,
lg2_l = -1.90465429995776804525e-09, /* 0xBE205C61, 0x0CA86C39 */ -1.90465429995776804525e-09 ,
ovt = 8.0085662595372944372e-0017, /* -(1024-log2(ovfl+.5ulp)) */ 8.0085662595372944372e-0017 ,
cp = 9.61796693925975554329e-01, /* 0x3FEEC709, 0xDC3A03FD =2/(3ln2) */ 9.61796693925975554329e-01 ,
cp_h = 9.61796700954437255859e-01, /* 0x3FEEC709, 0xE0000000 =(float)cp */ 9.61796700954437255859e-01 ,
cp_l = -7.02846165095275826516e-09, /* 0xBE3E2FE0, 0x145B01F5 =tail of cp_h*/ -7.02846165095275826516e-09 ,
ivln2 = 1.44269504088896338700e+00, /* 0x3FF71547, 0x652B82FE =1/ln2 */ 1.44269504088896338700e+00 ,
ivln2_h = 1.44269502162933349609e+00, /* 0x3FF71547, 0x60000000 =24b 1/ln2*/ 1.44269502162933349609e+00 ,
ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/ 1.92596299112661746887e-08 };
#ifdef __STDC__ #ifdef __STDC__
double __ieee754_pow(double x, double y) double __ieee754_pow(double x, double y)
@ -107,7 +137,7 @@ ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
#endif #endif
{ {
double z,ax,z_h,z_l,p_h,p_l; double z,ax,z_h,z_l,p_h,p_l;
double y1,t1,t2,r,s,t,u,v,w; double y1,t1,t2,r,s,t,u,v,w, t12,t14,r_1,r_2,r_3;
int32_t i,j,k,yisint,n; int32_t i,j,k,yisint,n;
int32_t hx,hy,ix,iy; int32_t hx,hy,ix,iy;
u_int32_t lx,ly; u_int32_t lx,ly;
@ -117,7 +147,7 @@ ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
ix = hx&0x7fffffff; iy = hy&0x7fffffff; ix = hx&0x7fffffff; iy = hy&0x7fffffff;
/* y==zero: x**0 = 1 */ /* y==zero: x**0 = 1 */
if((iy|ly)==0) return one; if((iy|ly)==0) return C[1];
/* +-NaN return x+y */ /* +-NaN return x+y */
if(ix > 0x7ff00000 || ((ix==0x7ff00000)&&(lx!=0)) || if(ix > 0x7ff00000 || ((ix==0x7ff00000)&&(lx!=0)) ||
@ -150,12 +180,12 @@ ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
if(((ix-0x3ff00000)|lx)==0) if(((ix-0x3ff00000)|lx)==0)
return y - y; /* inf**+-1 is NaN */ return y - y; /* inf**+-1 is NaN */
else if (ix >= 0x3ff00000)/* (|x|>1)**+-inf = inf,0 */ else if (ix >= 0x3ff00000)/* (|x|>1)**+-inf = inf,0 */
return (hy>=0)? y: zero; return (hy>=0)? y: C[0];
else /* (|x|<1)**-,+inf = inf,0 */ else /* (|x|<1)**-,+inf = inf,0 */
return (hy<0)?-y: zero; return (hy<0)?-y: C[0];
} }
if(iy==0x3ff00000) { /* y is +-1 */ if(iy==0x3ff00000) { /* y is +-1 */
if(hy<0) return one/x; else return x; if(hy<0) return C[1]/x; else return x;
} }
if(hy==0x40000000) return x*x; /* y is 2 */ if(hy==0x40000000) return x*x; /* y is 2 */
if(hy==0x3fe00000) { /* y is 0.5 */ if(hy==0x3fe00000) { /* y is 0.5 */
@ -169,7 +199,7 @@ ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
if(lx==0) { if(lx==0) {
if(ix==0x7ff00000||ix==0||ix==0x3ff00000){ if(ix==0x7ff00000||ix==0||ix==0x3ff00000){
z = ax; /*x is +-0,+-inf,+-1*/ z = ax; /*x is +-0,+-inf,+-1*/
if(hy<0) z = one/z; /* z = (1/|x|) */ if(hy<0) z = C[1]/z; /* z = (1/|x|) */
if(hx<0) { if(hx<0) {
if(((ix-0x3ff00000)|yisint)==0) { if(((ix-0x3ff00000)|yisint)==0) {
z = (z-z)/(z-z); /* (-1)**non-int is NaN */ z = (z-z)/(z-z); /* (-1)**non-int is NaN */
@ -186,27 +216,27 @@ ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
/* |y| is huge */ /* |y| is huge */
if(iy>0x41e00000) { /* if |y| > 2**31 */ if(iy>0x41e00000) { /* if |y| > 2**31 */
if(iy>0x43f00000){ /* if |y| > 2**64, must o/uflow */ if(iy>0x43f00000){ /* if |y| > 2**64, must o/uflow */
if(ix<=0x3fefffff) return (hy<0)? huge*huge:tiny*tiny; if(ix<=0x3fefffff) return (hy<0)? C[4]*C[4]:C[5]*C[5];
if(ix>=0x3ff00000) return (hy>0)? huge*huge:tiny*tiny; if(ix>=0x3ff00000) return (hy>0)? C[4]*C[4]:C[5]*C[5];
} }
/* over/underflow if x is not close to one */ /* over/underflow if x is not close to one */
if(ix<0x3fefffff) return (hy<0)? huge*huge:tiny*tiny; if(ix<0x3fefffff) return (hy<0)? C[4]*C[4]:C[5]*C[5];
if(ix>0x3ff00000) return (hy>0)? huge*huge:tiny*tiny; if(ix>0x3ff00000) return (hy>0)? C[4]*C[4]:C[5]*C[5];
/* now |1-x| is tiny <= 2**-20, suffice to compute /* now |1-x| is tiny <= 2**-20, suffice to compute
log(x) by x-x^2/2+x^3/3-x^4/4 */ log(x) by x-x^2/2+x^3/3-x^4/4 */
t = x-1; /* t has 20 trailing zeros */ t = x-1; /* t has 20 trailing zeros */
w = (t*t)*(0.5-t*(0.3333333333333333333333-t*0.25)); w = (t*t)*(0.5-t*(0.3333333333333333333333-t*0.25));
u = ivln2_h*t; /* ivln2_h has 21 sig. bits */ u = C[25]*t; /* ivln2_h has 21 sig. bits */
v = t*ivln2_l-w*ivln2; v = t*C[26]-w*C[24];
t1 = u+v; t1 = u+v;
SET_LOW_WORD(t1,0); SET_LOW_WORD(t1,0);
t2 = v-(t1-u); t2 = v-(t1-u);
} else { } else {
double s2,s_h,s_l,t_h,t_l; double s2,s_h,s_l,t_h,t_l,s22,s24,s26,r1,r2,r3;
n = 0; n = 0;
/* take care subnormal number */ /* take care subnormal number */
if(ix<0x00100000) if(ix<0x00100000)
{ax *= two53; n -= 53; GET_HIGH_WORD(ix,ax); } {ax *= C[3]; n -= 53; GET_HIGH_WORD(ix,ax); }
n += ((ix)>>20)-0x3ff; n += ((ix)>>20)-0x3ff;
j = ix&0x000fffff; j = ix&0x000fffff;
/* determine interval */ /* determine interval */
@ -218,18 +248,25 @@ ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
/* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */ /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
u = ax-bp[k]; /* bp[0]=1.0, bp[1]=1.5 */ u = ax-bp[k]; /* bp[0]=1.0, bp[1]=1.5 */
v = one/(ax+bp[k]); v = C[1]/(ax+bp[k]);
s = u*v; s = u*v;
s_h = s; s_h = s;
SET_LOW_WORD(s_h,0); SET_LOW_WORD(s_h,0);
/* t_h=ax+bp[k] High */ /* t_h=ax+bp[k] High */
t_h = zero; t_h = C[0];
SET_HIGH_WORD(t_h,((ix>>1)|0x20000000)+0x00080000+(k<<18)); SET_HIGH_WORD(t_h,((ix>>1)|0x20000000)+0x00080000+(k<<18));
t_l = ax - (t_h-bp[k]); t_l = ax - (t_h-bp[k]);
s_l = v*((u-s_h*t_h)-s_h*t_l); s_l = v*((u-s_h*t_h)-s_h*t_l);
/* compute log(ax) */ /* compute log(ax) */
s2 = s*s; s2 = s*s;
#ifdef DO_NOT_USE_THIS
r = s2*s2*(L1+s2*(L2+s2*(L3+s2*(L4+s2*(L5+s2*L6))))); r = s2*s2*(L1+s2*(L2+s2*(L3+s2*(L4+s2*(L5+s2*L6)))));
#else
r1 = C[10]+s2*C[11]; s22=s2*s2;
r2 = C[8]+s2*C[9]; s24=s22*s22;
r3 = C[6]+s2*C[7]; s26=s24*s22;
r = r3*s22 + r2*s24 + r1*s26;
#endfi
r += s_l*(s_h+s); r += s_l*(s_h+s);
s2 = s_h*s_h; s2 = s_h*s_h;
t_h = 3.0+s2+r; t_h = 3.0+s2+r;
@ -242,8 +279,8 @@ ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
p_h = u+v; p_h = u+v;
SET_LOW_WORD(p_h,0); SET_LOW_WORD(p_h,0);
p_l = v-(p_h-u); p_l = v-(p_h-u);
z_h = cp_h*p_h; /* cp_h+cp_l = 2/(3*log2) */ z_h = C[22]*p_h; /* cp_h+cp_l = 2/(3*log2) */
z_l = cp_l*p_h+p_l*cp+dp_l[k]; z_l = C[23]*p_h+p_l*C[21]+dp_l[k];
/* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */ /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
t = (double)n; t = (double)n;
t1 = (((z_h+z_l)+dp_h[k])+t); t1 = (((z_h+z_l)+dp_h[k])+t);
@ -251,9 +288,9 @@ ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
t2 = z_l-(((t1-t)-dp_h[k])-z_h); t2 = z_l-(((t1-t)-dp_h[k])-z_h);
} }
s = one; /* s (sign of result -ve**odd) = -1 else = 1 */ s = C[1]; /* s (sign of result -ve**odd) = -1 else = 1 */
if(((((u_int32_t)hx>>31)-1)|(yisint-1))==0) if(((((u_int32_t)hx>>31)-1)|(yisint-1))==0)
s = -one;/* (-ve)**(odd int) */ s = -C[1];/* (-ve)**(odd int) */
/* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */ /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
y1 = y; y1 = y;
@ -264,15 +301,15 @@ ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
EXTRACT_WORDS(j,i,z); EXTRACT_WORDS(j,i,z);
if (j>=0x40900000) { /* z >= 1024 */ if (j>=0x40900000) { /* z >= 1024 */
if(((j-0x40900000)|i)!=0) /* if z > 1024 */ if(((j-0x40900000)|i)!=0) /* if z > 1024 */
return s*huge*huge; /* overflow */ return s*C[4]*C[4]; /* overflow */
else { else {
if(p_l+ovt>z-p_h) return s*huge*huge; /* overflow */ if(p_l+C[20]>z-p_h) return s*C[4]*C[4]; /* overflow */
} }
} else if((j&0x7fffffff)>=0x4090cc00 ) { /* z <= -1075 */ } else if((j&0x7fffffff)>=0x4090cc00 ) { /* z <= -1075 */
if(((j-0xc090cc00)|i)!=0) /* z < -1075 */ if(((j-0xc090cc00)|i)!=0) /* z < -1075 */
return s*tiny*tiny; /* underflow */ return s*C[5]*C[5]; /* underflow */
else { else {
if(p_l<=z-p_h) return s*tiny*tiny; /* underflow */ if(p_l<=z-p_h) return s*C[5]*C[5]; /* underflow */
} }
} }
/* /*
@ -284,7 +321,7 @@ ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
if(i>0x3fe00000) { /* if |z| > 0.5, set n = [z+0.5] */ if(i>0x3fe00000) { /* if |z| > 0.5, set n = [z+0.5] */
n = j+(0x00100000>>(k+1)); n = j+(0x00100000>>(k+1));
k = ((n&0x7fffffff)>>20)-0x3ff; /* new k for n */ k = ((n&0x7fffffff)>>20)-0x3ff; /* new k for n */
t = zero; t = C[0];
SET_HIGH_WORD(t,n&~(0x000fffff>>k)); SET_HIGH_WORD(t,n&~(0x000fffff>>k));
n = ((n&0x000fffff)|0x00100000)>>(20-k); n = ((n&0x000fffff)|0x00100000)>>(20-k);
if(j<0) n = -n; if(j<0) n = -n;
@ -292,14 +329,21 @@ ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
} }
t = p_l+p_h; t = p_l+p_h;
SET_LOW_WORD(t,0); SET_LOW_WORD(t,0);
u = t*lg2_h; u = t*C[18];
v = (p_l-(t-p_h))*lg2+t*lg2_l; v = (p_l-(t-p_h))*C[17]+t*C[19];
z = u+v; z = u+v;
w = v-(z-u); w = v-(z-u);
t = z*z; t = z*z;
t1 = z - t*(P1+t*(P2+t*(P3+t*(P4+t*P5)))); #ifdef DO_NOT_USE_THIS
r = (z*t1)/(t1-two)-(w+z*w); t1 = z - t*(C[12]+t*(C[13]+t*(C[14]+t*(C[15]+t*C[16]))));
z = one-(r-z); #else
r_1 = C[15]+t*C[16]; t12 = t*t;
r_2 = C[13]+t*C[14]; t14 = t12*t12;
r_3 = t*C[12];
t1 = z - r_3 - t12*r_2 - t14*r_1;
#endif
r = (z*t1)/(t1-C[2])-(w+z*w);
z = C[1]-(r-z);
GET_HIGH_WORD(j,z); GET_HIGH_WORD(j,z);
j += (n<<20); j += (n<<20);
if((j>>20)<=0) z = __scalbn(z,n); /* subnormal output */ if((j>>20)<=0) z = __scalbn(z,n); /* subnormal output */

View File

@ -5,10 +5,13 @@
* *
* Developed at SunPro, a Sun Microsystems, Inc. business. * Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this * Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice * software is freely granted, provided that this notice
* is preserved. * is preserved.
* ==================================================== * ====================================================
*/ */
/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25,
for performance improvement on pipelined processors.
*/
#if defined(LIBM_SCCS) && !defined(lint) #if defined(LIBM_SCCS) && !defined(lint)
static char rcsid[] = "$NetBSD: k_cos.c,v 1.8 1995/05/10 20:46:22 jtc Exp $"; static char rcsid[] = "$NetBSD: k_cos.c,v 1.8 1995/05/10 20:46:22 jtc Exp $";
@ -18,7 +21,7 @@ static char rcsid[] = "$NetBSD: k_cos.c,v 1.8 1995/05/10 20:46:22 jtc Exp $";
* __kernel_cos( x, y ) * __kernel_cos( x, y )
* kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164 * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
* Input x is assumed to be bounded by ~pi/4 in magnitude. * Input x is assumed to be bounded by ~pi/4 in magnitude.
* Input y is the tail of x. * Input y is the tail of x.
* *
* Algorithm * Algorithm
* 1. Since cos(-x) = cos(x), we need only to consider positive x. * 1. Since cos(-x) = cos(x), we need only to consider positive x.
@ -28,15 +31,15 @@ static char rcsid[] = "$NetBSD: k_cos.c,v 1.8 1995/05/10 20:46:22 jtc Exp $";
* 4 14 * 4 14
* cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x * cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
* where the remez error is * where the remez error is
* *
* | 2 4 6 8 10 12 14 | -58 * | 2 4 6 8 10 12 14 | -58
* |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2 * |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2
* | | * | |
* *
* 4 6 8 10 12 14 * 4 6 8 10 12 14
* 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then * 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then
* cos(x) = 1 - x*x/2 + r * cos(x) = 1 - x*x/2 + r
* since cos(x+y) ~ cos(x) - sin(x)*y * since cos(x+y) ~ cos(x) - sin(x)*y
* ~ cos(x) - x*y, * ~ cos(x) - x*y,
* a correction term is necessary in cos(x) and hence * a correction term is necessary in cos(x) and hence
* cos(x+y) = 1 - (x*x/2 - (r - x*y)) * cos(x+y) = 1 - (x*x/2 - (r - x*y))
@ -53,17 +56,18 @@ static char rcsid[] = "$NetBSD: k_cos.c,v 1.8 1995/05/10 20:46:22 jtc Exp $";
#include "math_private.h" #include "math_private.h"
#ifdef __STDC__ #ifdef __STDC__
static const double static const double
#else #else
static double static double
#endif #endif
one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */ C[] = {
C1 = 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */ 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */ 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */
C3 = 2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */ -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */ 2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */
C5 = 2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */ -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */ 2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */
-1.13596475577881948265e-11}; /* 0xBDA8FAE9, 0xBE8838D4 */
#ifdef __STDC__ #ifdef __STDC__
double __kernel_cos(double x, double y) double __kernel_cos(double x, double y)
@ -72,17 +76,24 @@ C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
double x,y; double x,y;
#endif #endif
{ {
double a,hz,z,r,qx; double a,hz,z,r,qx,r1,r2,r3,z1,z2,z3;
int32_t ix; int32_t ix;
z = x*x;
GET_HIGH_WORD(ix,x); GET_HIGH_WORD(ix,x);
ix &= 0x7fffffff; /* ix = |x|'s high word*/ ix &= 0x7fffffff; /* ix = |x|'s high word*/
if(ix<0x3e400000) { /* if x < 2**27 */ if(ix<0x3e400000) { /* if x < 2**27 */
if(((int)x)==0) return one; /* generate inexact */ if(((int)x)==0) return C[0]; /* generate inexact */
} }
z = x*x; #ifdef DO_NOT_USE_THIS
r = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6))))); r = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))));
if(ix < 0x3FD33333) /* if |x| < 0.3 */ #else
return one - (0.5*z - (z*r - x*y)); r1=z*C[6];r1=r1+C[5];z1=z*z;
r2=z*C[4];r2=r2+C[3];z2=z1*z;
r3=z*C[2];r3=r3+C[1];z3=z2*z1;
r=z3*r1+z2*r2+z*r3;
#endif
if(ix < 0x3FD33333) /* if |x| < 0.3 */
return C[0] - (0.5*z - (z*r - x*y));
else { else {
if(ix > 0x3fe90000) { /* x > 0.78125 */ if(ix > 0x3fe90000) { /* x > 0.78125 */
qx = 0.28125; qx = 0.28125;
@ -90,7 +101,7 @@ C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
INSERT_WORDS(qx,ix-0x00200000,0); /* x/4 */ INSERT_WORDS(qx,ix-0x00200000,0); /* x/4 */
} }
hz = 0.5*z-qx; hz = 0.5*z-qx;
a = one-qx; a = C[0]-qx;
return a - (hz - (z*r-x*y)); return a - (hz - (z*r-x*y));
} }
} }

View File

@ -5,10 +5,13 @@
* *
* Developed at SunPro, a Sun Microsystems, Inc. business. * Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this * Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice * software is freely granted, provided that this notice
* is preserved. * is preserved.
* ==================================================== * ====================================================
*/ */
/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25,
for performance improvement on pipelined processors.
*/
#if defined(LIBM_SCCS) && !defined(lint) #if defined(LIBM_SCCS) && !defined(lint)
static char rcsid[] = "$NetBSD: k_sin.c,v 1.8 1995/05/10 20:46:31 jtc Exp $"; static char rcsid[] = "$NetBSD: k_sin.c,v 1.8 1995/05/10 20:46:31 jtc Exp $";
@ -18,24 +21,24 @@ static char rcsid[] = "$NetBSD: k_sin.c,v 1.8 1995/05/10 20:46:31 jtc Exp $";
* kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854 * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
* Input x is assumed to be bounded by ~pi/4 in magnitude. * Input x is assumed to be bounded by ~pi/4 in magnitude.
* Input y is the tail of x. * Input y is the tail of x.
* Input iy indicates whether y is 0. (if iy=0, y assume to be 0). * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
* *
* Algorithm * Algorithm
* 1. Since sin(-x) = -sin(x), we need only to consider positive x. * 1. Since sin(-x) = -sin(x), we need only to consider positive x.
* 2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0. * 2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.
* 3. sin(x) is approximated by a polynomial of degree 13 on * 3. sin(x) is approximated by a polynomial of degree 13 on
* [0,pi/4] * [0,pi/4]
* 3 13 * 3 13
* sin(x) ~ x + S1*x + ... + S6*x * sin(x) ~ x + S1*x + ... + S6*x
* where * where
* *
* |sin(x) 2 4 6 8 10 12 | -58 * |sin(x) 2 4 6 8 10 12 | -58
* |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2 * |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2
* | x | * | x |
* *
* 4. sin(x+y) = sin(x) + sin'(x')*y * 4. sin(x+y) = sin(x) + sin'(x')*y
* ~ sin(x) + (1-x*x/2)*y * ~ sin(x) + (1-x*x/2)*y
* For better accuracy, let * For better accuracy, let
* 3 2 2 2 2 * 3 2 2 2 2
* r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6)))) * r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
* then 3 2 * then 3 2
@ -46,17 +49,18 @@ static char rcsid[] = "$NetBSD: k_sin.c,v 1.8 1995/05/10 20:46:31 jtc Exp $";
#include "math_private.h" #include "math_private.h"
#ifdef __STDC__ #ifdef __STDC__
static const double static const double
#else #else
static double static double
#endif #endif
half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */ S[] = {
S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */ 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */ -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
S3 = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */ 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
S4 = 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */ -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
S5 = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */ 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */ -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
1.58969099521155010221e-10}; /* 0x3DE5D93A, 0x5ACFD57C */
#ifdef __STDC__ #ifdef __STDC__
double __kernel_sin(double x, double y, int iy) double __kernel_sin(double x, double y, int iy)
@ -65,7 +69,7 @@ S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
double x,y; int iy; /* iy=0 if y is zero */ double x,y; int iy; /* iy=0 if y is zero */
#endif #endif
{ {
double z,r,v; double z,r,v,z1,r1,r2;
int32_t ix; int32_t ix;
GET_HIGH_WORD(ix,x); GET_HIGH_WORD(ix,x);
ix &= 0x7fffffff; /* high word of x */ ix &= 0x7fffffff; /* high word of x */
@ -73,7 +77,15 @@ S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
{if((int)x==0) return x;} /* generate inexact */ {if((int)x==0) return x;} /* generate inexact */
z = x*x; z = x*x;
v = z*x; v = z*x;
#ifdef DO_NOT_USE_THIS
r = S2+z*(S3+z*(S4+z*(S5+z*S6))); r = S2+z*(S3+z*(S4+z*(S5+z*S6)));
if(iy==0) return x+v*(S1+z*r); if(iy==0) return x+v*(S1+z*r);
else return x-((z*(half*y-v*r)-y)-v*S1); else return x-((z*(half*y-v*r)-y)-v*S1);
#else
r1 = S[5]+z*S[6]; z1 = z*z*z;
r2 = S[3]+z*S[4];
r = S[2] + z*r2 + z1*r1;
if(iy==0) return x+v*(S[1]+z*r);
else return x-((z*(S[0]*y-v*r)-y)-v*S[1]);
#endif
} }

View File

@ -5,10 +5,13 @@
* *
* Developed at SunPro, a Sun Microsystems, Inc. business. * Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this * Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice * software is freely granted, provided that this notice
* is preserved. * is preserved.
* ==================================================== * ====================================================
*/ */
/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25,
for performance improvement on pipelined processors.
*/
#if defined(LIBM_SCCS) && !defined(lint) #if defined(LIBM_SCCS) && !defined(lint)
static char rcsid[] = "$NetBSD: k_tan.c,v 1.8 1995/05/10 20:46:37 jtc Exp $"; static char rcsid[] = "$NetBSD: k_tan.c,v 1.8 1995/05/10 20:46:37 jtc Exp $";
@ -18,25 +21,25 @@ static char rcsid[] = "$NetBSD: k_tan.c,v 1.8 1995/05/10 20:46:37 jtc Exp $";
* kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854 * kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854
* Input x is assumed to be bounded by ~pi/4 in magnitude. * Input x is assumed to be bounded by ~pi/4 in magnitude.
* Input y is the tail of x. * Input y is the tail of x.
* Input k indicates whether tan (if k=1) or * Input k indicates whether tan (if k=1) or
* -1/tan (if k= -1) is returned. * -1/tan (if k= -1) is returned.
* *
* Algorithm * Algorithm
* 1. Since tan(-x) = -tan(x), we need only to consider positive x. * 1. Since tan(-x) = -tan(x), we need only to consider positive x.
* 2. if x < 2^-28 (hx<0x3e300000 0), return x with inexact if x!=0. * 2. if x < 2^-28 (hx<0x3e300000 0), return x with inexact if x!=0.
* 3. tan(x) is approximated by a odd polynomial of degree 27 on * 3. tan(x) is approximated by a odd polynomial of degree 27 on
* [0,0.67434] * [0,0.67434]
* 3 27 * 3 27
* tan(x) ~ x + T1*x + ... + T13*x * tan(x) ~ x + T1*x + ... + T13*x
* where * where
* *
* |tan(x) 2 4 26 | -59.2 * |tan(x) 2 4 26 | -59.2
* |----- - (1+T1*x +T2*x +.... +T13*x )| <= 2 * |----- - (1+T1*x +T2*x +.... +T13*x )| <= 2
* | x | * | x |
* *
* Note: tan(x+y) = tan(x) + tan'(x)*y * Note: tan(x+y) = tan(x) + tan'(x)*y
* ~ tan(x) + (1+x*x)*y * ~ tan(x) + (1+x*x)*y
* Therefore, for better accuracy in computing tan(x+y), let * Therefore, for better accuracy in computing tan(x+y), let
* 3 2 2 2 2 * 3 2 2 2 2
* r = x *(T2+x *(T3+x *(...+x *(T12+x *T13)))) * r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))
* then * then
@ -51,9 +54,9 @@ static char rcsid[] = "$NetBSD: k_tan.c,v 1.8 1995/05/10 20:46:37 jtc Exp $";
#include "math.h" #include "math.h"
#include "math_private.h" #include "math_private.h"
#ifdef __STDC__ #ifdef __STDC__
static const double static const double
#else #else
static double static double
#endif #endif
one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */ one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
pio4 = 7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */ pio4 = 7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */
@ -81,7 +84,7 @@ T[] = {
double x,y; int iy; double x,y; int iy;
#endif #endif
{ {
double z,r,v,w,s; double z,r,v,w,s,r1,r2,r3,v1,v2,v3,w2,w4;
int32_t ix,hx; int32_t ix,hx;
GET_HIGH_WORD(hx,x); GET_HIGH_WORD(hx,x);
ix = hx&0x7fffffff; /* high word of |x| */ ix = hx&0x7fffffff; /* high word of |x| */
@ -105,8 +108,19 @@ T[] = {
* x^5(T[1]+x^4*T[3]+...+x^20*T[11]) + * x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +
* x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12])) * x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))
*/ */
#ifdef DO_NOT_USE_THIS
r = T[1]+w*(T[3]+w*(T[5]+w*(T[7]+w*(T[9]+w*T[11])))); r = T[1]+w*(T[3]+w*(T[5]+w*(T[7]+w*(T[9]+w*T[11]))));
v = z*(T[2]+w*(T[4]+w*(T[6]+w*(T[8]+w*(T[10]+w*T[12]))))); v = z*(T[2]+w*(T[4]+w*(T[6]+w*(T[8]+w*(T[10]+w*T[12])))));
#else
v1 = T[10]+w*T[12]; w2=w*w;
v2 = T[6]+w*T[8]; w4=w2*w2;
v3 = T[2]+w*T[4]; v1=z*v1;
r1 = T[9]+w*T[11]; v2=z*v2;
r2 = T[5]+w*T[7]; v3=z*v3;
r3 = T[1]+w*T[3];
v = v3 + w2*v2 + w4*v1;
r = r3 + w2*r2 + w4*r1;
#endif
s = z*x; s = z*x;
r = y + z*(s*(r+v)+y); r = y + z*(s*(r+v)+y);
r += T[0]*s; r += T[0]*s;
@ -116,7 +130,7 @@ T[] = {
return (double)(1-((hx>>30)&2))*(v-2.0*(x-(w*w/(w+v)-r))); return (double)(1-((hx>>30)&2))*(v-2.0*(x-(w*w/(w+v)-r)));
} }
if(iy==1) return w; if(iy==1) return w;
else { /* if allow error up to 2 ulp, else { /* if allow error up to 2 ulp,
simply return -1.0/(x+r) here */ simply return -1.0/(x+r) here */
/* compute -1.0/(x+r) accurately */ /* compute -1.0/(x+r) accurately */
double a,t; double a,t;

View File

@ -9,6 +9,9 @@
* is preserved. * is preserved.
* ==================================================== * ====================================================
*/ */
/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25,
for performance improvement on pipelined processors.
*/
#if defined(LIBM_SCCS) && !defined(lint) #if defined(LIBM_SCCS) && !defined(lint)
static char rcsid[] = "$NetBSD: s_atan.c,v 1.8 1995/05/10 20:46:45 jtc Exp $"; static char rcsid[] = "$NetBSD: s_atan.c,v 1.8 1995/05/10 20:46:45 jtc Exp $";
@ -92,7 +95,7 @@ huge = 1.0e300;
double x; double x;
#endif #endif
{ {
double w,s1,s2,z; double w,s1,z,s,w2,w4,s11,s12,s13,s21,s22,s23;
int32_t ix,hx,id; int32_t ix,hx,id;
GET_HIGH_WORD(hx,x); GET_HIGH_WORD(hx,x);
@ -129,6 +132,7 @@ huge = 1.0e300;
z = x*x; z = x*x;
w = z*z; w = z*z;
/* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */ /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
#ifdef DO_NOT_USE_THIS
s1 = z*(aT[0]+w*(aT[2]+w*(aT[4]+w*(aT[6]+w*(aT[8]+w*aT[10]))))); s1 = z*(aT[0]+w*(aT[2]+w*(aT[4]+w*(aT[6]+w*(aT[8]+w*aT[10])))));
s2 = w*(aT[1]+w*(aT[3]+w*(aT[5]+w*(aT[7]+w*aT[9])))); s2 = w*(aT[1]+w*(aT[3]+w*(aT[5]+w*(aT[7]+w*aT[9]))));
if (id<0) return x - x*(s1+s2); if (id<0) return x - x*(s1+s2);
@ -136,6 +140,21 @@ huge = 1.0e300;
z = atanhi[id] - ((x*(s1+s2) - atanlo[id]) - x); z = atanhi[id] - ((x*(s1+s2) - atanlo[id]) - x);
return (hx<0)? -z:z; return (hx<0)? -z:z;
} }
#else
s11 = aT[8]+w*aT[10]; w2=w*w;
s12 = aT[4]+w*aT[6]; w4=w2*w2;
s13 = aT[0]+w*aT[2];
s21 = aT[7]+w*aT[9];
s22 = aT[3]+w*aT[5];
s23 = w*aT[1];
s1 = s13 + w2*s12 + w4*s11;
s = s23 + w2*s22 + w4*s21 + z*s1;
if (id<0) return x - x*(s);
else {
z = atanhi[id] - ((x*(s) - atanlo[id]) - x);
return (hx<0)? -z:z;
}
#endif
} }
weak_alias (__atan, atan) weak_alias (__atan, atan)
#ifdef NO_LONG_DOUBLE #ifdef NO_LONG_DOUBLE

View File

@ -9,6 +9,9 @@
* is preserved. * is preserved.
* ==================================================== * ====================================================
*/ */
/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25,
for performance improvement on pipelined processors.
*/
#if defined(LIBM_SCCS) && !defined(lint) #if defined(LIBM_SCCS) && !defined(lint)
static char rcsid[] = "$NetBSD: s_erf.c,v 1.8 1995/05/10 20:47:05 jtc Exp $"; static char rcsid[] = "$NetBSD: s_erf.c,v 1.8 1995/05/10 20:47:05 jtc Exp $";
@ -128,68 +131,68 @@ erx = 8.45062911510467529297e-01, /* 0x3FEB0AC1, 0x60000000 */
*/ */
efx = 1.28379167095512586316e-01, /* 0x3FC06EBA, 0x8214DB69 */ efx = 1.28379167095512586316e-01, /* 0x3FC06EBA, 0x8214DB69 */
efx8= 1.02703333676410069053e+00, /* 0x3FF06EBA, 0x8214DB69 */ efx8= 1.02703333676410069053e+00, /* 0x3FF06EBA, 0x8214DB69 */
pp0 = 1.28379167095512558561e-01, /* 0x3FC06EBA, 0x8214DB68 */ pp[] = {1.28379167095512558561e-01, /* 0x3FC06EBA, 0x8214DB68 */
pp1 = -3.25042107247001499370e-01, /* 0xBFD4CD7D, 0x691CB913 */ -3.25042107247001499370e-01, /* 0xBFD4CD7D, 0x691CB913 */
pp2 = -2.84817495755985104766e-02, /* 0xBF9D2A51, 0xDBD7194F */ -2.84817495755985104766e-02, /* 0xBF9D2A51, 0xDBD7194F */
pp3 = -5.77027029648944159157e-03, /* 0xBF77A291, 0x236668E4 */ -5.77027029648944159157e-03, /* 0xBF77A291, 0x236668E4 */
pp4 = -2.37630166566501626084e-05, /* 0xBEF8EAD6, 0x120016AC */ -2.37630166566501626084e-05}, /* 0xBEF8EAD6, 0x120016AC */
qq1 = 3.97917223959155352819e-01, /* 0x3FD97779, 0xCDDADC09 */ qq[] = {0.0, 3.97917223959155352819e-01, /* 0x3FD97779, 0xCDDADC09 */
qq2 = 6.50222499887672944485e-02, /* 0x3FB0A54C, 0x5536CEBA */ 6.50222499887672944485e-02, /* 0x3FB0A54C, 0x5536CEBA */
qq3 = 5.08130628187576562776e-03, /* 0x3F74D022, 0xC4D36B0F */ 5.08130628187576562776e-03, /* 0x3F74D022, 0xC4D36B0F */
qq4 = 1.32494738004321644526e-04, /* 0x3F215DC9, 0x221C1A10 */ 1.32494738004321644526e-04, /* 0x3F215DC9, 0x221C1A10 */
qq5 = -3.96022827877536812320e-06, /* 0xBED09C43, 0x42A26120 */ -3.96022827877536812320e-06}, /* 0xBED09C43, 0x42A26120 */
/* /*
* Coefficients for approximation to erf in [0.84375,1.25] * Coefficients for approximation to erf in [0.84375,1.25]
*/ */
pa0 = -2.36211856075265944077e-03, /* 0xBF6359B8, 0xBEF77538 */ pa[] = {-2.36211856075265944077e-03, /* 0xBF6359B8, 0xBEF77538 */
pa1 = 4.14856118683748331666e-01, /* 0x3FDA8D00, 0xAD92B34D */ 4.14856118683748331666e-01, /* 0x3FDA8D00, 0xAD92B34D */
pa2 = -3.72207876035701323847e-01, /* 0xBFD7D240, 0xFBB8C3F1 */ -3.72207876035701323847e-01, /* 0xBFD7D240, 0xFBB8C3F1 */
pa3 = 3.18346619901161753674e-01, /* 0x3FD45FCA, 0x805120E4 */ 3.18346619901161753674e-01, /* 0x3FD45FCA, 0x805120E4 */
pa4 = -1.10894694282396677476e-01, /* 0xBFBC6398, 0x3D3E28EC */ -1.10894694282396677476e-01, /* 0xBFBC6398, 0x3D3E28EC */
pa5 = 3.54783043256182359371e-02, /* 0x3FA22A36, 0x599795EB */ 3.54783043256182359371e-02, /* 0x3FA22A36, 0x599795EB */
pa6 = -2.16637559486879084300e-03, /* 0xBF61BF38, 0x0A96073F */ -2.16637559486879084300e-03}, /* 0xBF61BF38, 0x0A96073F */
qa1 = 1.06420880400844228286e-01, /* 0x3FBB3E66, 0x18EEE323 */ qa[] = {0.0, 1.06420880400844228286e-01, /* 0x3FBB3E66, 0x18EEE323 */
qa2 = 5.40397917702171048937e-01, /* 0x3FE14AF0, 0x92EB6F33 */ 5.40397917702171048937e-01, /* 0x3FE14AF0, 0x92EB6F33 */
qa3 = 7.18286544141962662868e-02, /* 0x3FB2635C, 0xD99FE9A7 */ 7.18286544141962662868e-02, /* 0x3FB2635C, 0xD99FE9A7 */
qa4 = 1.26171219808761642112e-01, /* 0x3FC02660, 0xE763351F */ 1.26171219808761642112e-01, /* 0x3FC02660, 0xE763351F */
qa5 = 1.36370839120290507362e-02, /* 0x3F8BEDC2, 0x6B51DD1C */ 1.36370839120290507362e-02, /* 0x3F8BEDC2, 0x6B51DD1C */
qa6 = 1.19844998467991074170e-02, /* 0x3F888B54, 0x5735151D */ 1.19844998467991074170e-02}, /* 0x3F888B54, 0x5735151D */
/* /*
* Coefficients for approximation to erfc in [1.25,1/0.35] * Coefficients for approximation to erfc in [1.25,1/0.35]
*/ */
ra0 = -9.86494403484714822705e-03, /* 0xBF843412, 0x600D6435 */ ra[] = {-9.86494403484714822705e-03, /* 0xBF843412, 0x600D6435 */
ra1 = -6.93858572707181764372e-01, /* 0xBFE63416, 0xE4BA7360 */ -6.93858572707181764372e-01, /* 0xBFE63416, 0xE4BA7360 */
ra2 = -1.05586262253232909814e+01, /* 0xC0251E04, 0x41B0E726 */ -1.05586262253232909814e+01, /* 0xC0251E04, 0x41B0E726 */
ra3 = -6.23753324503260060396e+01, /* 0xC04F300A, 0xE4CBA38D */ -6.23753324503260060396e+01, /* 0xC04F300A, 0xE4CBA38D */
ra4 = -1.62396669462573470355e+02, /* 0xC0644CB1, 0x84282266 */ -1.62396669462573470355e+02, /* 0xC0644CB1, 0x84282266 */
ra5 = -1.84605092906711035994e+02, /* 0xC067135C, 0xEBCCABB2 */ -1.84605092906711035994e+02, /* 0xC067135C, 0xEBCCABB2 */
ra6 = -8.12874355063065934246e+01, /* 0xC0545265, 0x57E4D2F2 */ -8.12874355063065934246e+01, /* 0xC0545265, 0x57E4D2F2 */
ra7 = -9.81432934416914548592e+00, /* 0xC023A0EF, 0xC69AC25C */ -9.81432934416914548592e+00}, /* 0xC023A0EF, 0xC69AC25C */
sa1 = 1.96512716674392571292e+01, /* 0x4033A6B9, 0xBD707687 */ sa[] = {0.0,1.96512716674392571292e+01, /* 0x4033A6B9, 0xBD707687 */
sa2 = 1.37657754143519042600e+02, /* 0x4061350C, 0x526AE721 */ 1.37657754143519042600e+02, /* 0x4061350C, 0x526AE721 */
sa3 = 4.34565877475229228821e+02, /* 0x407B290D, 0xD58A1A71 */ 4.34565877475229228821e+02, /* 0x407B290D, 0xD58A1A71 */
sa4 = 6.45387271733267880336e+02, /* 0x40842B19, 0x21EC2868 */ 6.45387271733267880336e+02, /* 0x40842B19, 0x21EC2868 */
sa5 = 4.29008140027567833386e+02, /* 0x407AD021, 0x57700314 */ 4.29008140027567833386e+02, /* 0x407AD021, 0x57700314 */
sa6 = 1.08635005541779435134e+02, /* 0x405B28A3, 0xEE48AE2C */ 1.08635005541779435134e+02, /* 0x405B28A3, 0xEE48AE2C */
sa7 = 6.57024977031928170135e+00, /* 0x401A47EF, 0x8E484A93 */ 6.57024977031928170135e+00, /* 0x401A47EF, 0x8E484A93 */
sa8 = -6.04244152148580987438e-02, /* 0xBFAEEFF2, 0xEE749A62 */ -6.04244152148580987438e-02}, /* 0xBFAEEFF2, 0xEE749A62 */
/* /*
* Coefficients for approximation to erfc in [1/.35,28] * Coefficients for approximation to erfc in [1/.35,28]
*/ */
rb0 = -9.86494292470009928597e-03, /* 0xBF843412, 0x39E86F4A */ rb[] = {-9.86494292470009928597e-03, /* 0xBF843412, 0x39E86F4A */
rb1 = -7.99283237680523006574e-01, /* 0xBFE993BA, 0x70C285DE */ -7.99283237680523006574e-01, /* 0xBFE993BA, 0x70C285DE */
rb2 = -1.77579549177547519889e+01, /* 0xC031C209, 0x555F995A */ -1.77579549177547519889e+01, /* 0xC031C209, 0x555F995A */
rb3 = -1.60636384855821916062e+02, /* 0xC064145D, 0x43C5ED98 */ -1.60636384855821916062e+02, /* 0xC064145D, 0x43C5ED98 */
rb4 = -6.37566443368389627722e+02, /* 0xC083EC88, 0x1375F228 */ -6.37566443368389627722e+02, /* 0xC083EC88, 0x1375F228 */
rb5 = -1.02509513161107724954e+03, /* 0xC0900461, 0x6A2E5992 */ -1.02509513161107724954e+03, /* 0xC0900461, 0x6A2E5992 */
rb6 = -4.83519191608651397019e+02, /* 0xC07E384E, 0x9BDC383F */ -4.83519191608651397019e+02}, /* 0xC07E384E, 0x9BDC383F */
sb1 = 3.03380607434824582924e+01, /* 0x403E568B, 0x261D5190 */ sb[] = {0.0,3.03380607434824582924e+01, /* 0x403E568B, 0x261D5190 */
sb2 = 3.25792512996573918826e+02, /* 0x40745CAE, 0x221B9F0A */ 3.25792512996573918826e+02, /* 0x40745CAE, 0x221B9F0A */
sb3 = 1.53672958608443695994e+03, /* 0x409802EB, 0x189D5118 */ 1.53672958608443695994e+03, /* 0x409802EB, 0x189D5118 */
sb4 = 3.19985821950859553908e+03, /* 0x40A8FFB7, 0x688C246A */ 3.19985821950859553908e+03, /* 0x40A8FFB7, 0x688C246A */
sb5 = 2.55305040643316442583e+03, /* 0x40A3F219, 0xCEDF3BE6 */ 2.55305040643316442583e+03, /* 0x40A3F219, 0xCEDF3BE6 */
sb6 = 4.74528541206955367215e+02, /* 0x407DA874, 0xE79FE763 */ 4.74528541206955367215e+02, /* 0x407DA874, 0xE79FE763 */
sb7 = -2.24409524465858183362e+01; /* 0xC03670E2, 0x42712D62 */ -2.24409524465858183362e+01}; /* 0xC03670E2, 0x42712D62 */
#ifdef __STDC__ #ifdef __STDC__
double __erf(double x) double __erf(double x)
@ -208,21 +211,46 @@ sb7 = -2.24409524465858183362e+01; /* 0xC03670E2, 0x42712D62 */
} }
if(ix < 0x3feb0000) { /* |x|<0.84375 */ if(ix < 0x3feb0000) { /* |x|<0.84375 */
double r1,r2,s1,s2,s3,z2,z4;
if(ix < 0x3e300000) { /* |x|<2**-28 */ if(ix < 0x3e300000) { /* |x|<2**-28 */
if (ix < 0x00800000) if (ix < 0x00800000)
return 0.125*(8.0*x+efx8*x); /*avoid underflow */ return 0.125*(8.0*x+efx8*x); /*avoid underflow */
return x + efx*x; return x + efx*x;
} }
z = x*x; z = x*x;
#ifdef DO_NOT_USE_THIS
r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4))); r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
s = one+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5)))); s = one+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
#else
r1 = pp[0]+z*pp[1]; z2=z*z;
r2 = pp[2]+z*pp[3]; z4=z2*z2;
s1 = one+z*qq[1];
s2 = qq[2]+z*qq[3];
s3 = qq[4]+z*qq[5];
r = r1 + z2*r2 + z4*pp[4];
s = s1 + z2*s2 + z4*s3;
#endif
y = r/s; y = r/s;
return x + x*y; return x + x*y;
} }
if(ix < 0x3ff40000) { /* 0.84375 <= |x| < 1.25 */ if(ix < 0x3ff40000) { /* 0.84375 <= |x| < 1.25 */
double s2,s4,s6,P1,P2,P3,P4,Q1,Q2,Q3,Q4;
s = fabs(x)-one; s = fabs(x)-one;
#ifdef DO_NOT_USE_THIS
P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6))))); P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))));
Q = one+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6))))); Q = one+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))));
#else
P1 = pa[0]+s*pa[1]; s2=s*s;
Q1 = one+s*qa[1]; s4=s2*s2;
P2 = pa[2]+s*pa[3]; s6=s4*s2;
Q2 = qa[2]+s*qa[3];
P3 = pa[4]+s*pa[5];
Q3 = qa[4]+s*qa[5];
P4 = s6*pa[6];
Q4 = s6*qa[6];
P = P1 + s2*P2 + s4*P3 + s6*P4;
Q = Q1 + s2*Q2 + s4*Q3 + s6*Q4;
#endif
if(hx>=0) return erx + P/Q; else return -erx - P/Q; if(hx>=0) return erx + P/Q; else return -erx - P/Q;
} }
if (ix >= 0x40180000) { /* inf>|x|>=6 */ if (ix >= 0x40180000) { /* inf>|x|>=6 */
@ -231,15 +259,42 @@ sb7 = -2.24409524465858183362e+01; /* 0xC03670E2, 0x42712D62 */
x = fabs(x); x = fabs(x);
s = one/(x*x); s = one/(x*x);
if(ix< 0x4006DB6E) { /* |x| < 1/0.35 */ if(ix< 0x4006DB6E) { /* |x| < 1/0.35 */
#ifdef DO_NOT_USE_THIS
R=ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*( R=ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(
ra5+s*(ra6+s*ra7)))))); ra5+s*(ra6+s*ra7))))));
S=one+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*( S=one+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(
sa5+s*(sa6+s*(sa7+s*sa8))))))); sa5+s*(sa6+s*(sa7+s*sa8)))))));
#else
double R1,R2,R3,R4,S1,S2,S3,S4,s2,s4,s6,s8;
R1 = ra[0]+s*ra[1];s2 = s*s;
S1 = one+s*sa[1]; s4 = s2*s2;
R2 = ra[2]+s*ra[3];s6 = s4*s2;
S2 = sa[2]+s*sa[3];s8 = s4*s4;
R3 = ra[4]+s*ra[5];
S3 = sa[4]+s*sa[5];
R4 = ra[6]+s*ra[7];
S4 = sa[6]+s*sa[7];
R = R1 + s2*R2 + s4*R3 + s6*R4;
S = S1 + s2*S2 + s4*S3 + s6*S4 + s8*sa[8];
#endif
} else { /* |x| >= 1/0.35 */ } else { /* |x| >= 1/0.35 */
#ifdef DO_NOT_USE_THIS
R=rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*( R=rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(
rb5+s*rb6))))); rb5+s*rb6)))));
S=one+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*( S=one+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(
sb5+s*(sb6+s*sb7)))))); sb5+s*(sb6+s*sb7))))));
#else
double R1,R2,R3,S1,S2,S3,S4,s2,s4,s6;
R1 = rb[0]+s*rb[1];s2 = s*s;
S1 = one+s*sb[1]; s4 = s2*s2;
R2 = rb[2]+s*rb[3];s6 = s4*s2;
S2 = sb[2]+s*sb[3];
R3 = rb[4]+s*rb[5];
S3 = sb[4]+s*sb[5];
S4 = sb[6]+s*sb[7];
R = R1 + s2*R2 + s4*R3 + s6*rb[6];
S = S1 + s2*S2 + s4*S3 + s6*S4;
#endif
} }
z = x; z = x;
SET_LOW_WORD(z,0); SET_LOW_WORD(z,0);
@ -269,11 +324,22 @@ weak_alias (__erf, erfl)
} }
if(ix < 0x3feb0000) { /* |x|<0.84375 */ if(ix < 0x3feb0000) { /* |x|<0.84375 */
double r1,r2,s1,s2,s3,z2,z4;
if(ix < 0x3c700000) /* |x|<2**-56 */ if(ix < 0x3c700000) /* |x|<2**-56 */
return one-x; return one-x;
z = x*x; z = x*x;
#ifdef DO_NOT_USE_THIS
r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4))); r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
s = one+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5)))); s = one+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
#else
r1 = pp[0]+z*pp[1]; z2=z*z;
r2 = pp[2]+z*pp[3]; z4=z2*z2;
s1 = one+z*qq[1];
s2 = qq[2]+z*qq[3];
s3 = qq[4]+z*qq[5];
r = r1 + z2*r2 + z4*pp[4];
s = s1 + z2*s2 + z4*s3;
#endif
y = r/s; y = r/s;
if(hx < 0x3fd00000) { /* x<1/4 */ if(hx < 0x3fd00000) { /* x<1/4 */
return one-(x+x*y); return one-(x+x*y);
@ -284,9 +350,23 @@ weak_alias (__erf, erfl)
} }
} }
if(ix < 0x3ff40000) { /* 0.84375 <= |x| < 1.25 */ if(ix < 0x3ff40000) { /* 0.84375 <= |x| < 1.25 */
double s2,s4,s6,P1,P2,P3,P4,Q1,Q2,Q3,Q4;
s = fabs(x)-one; s = fabs(x)-one;
#ifdef DO_NOT_USE_THIS
P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6))))); P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))));
Q = one+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6))))); Q = one+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))));
#else
P1 = pa[0]+s*pa[1]; s2=s*s;
Q1 = one+s*qa[1]; s4=s2*s2;
P2 = pa[2]+s*pa[3]; s6=s4*s2;
Q2 = qa[2]+s*qa[3];
P3 = pa[4]+s*pa[5];
Q3 = qa[4]+s*qa[5];
P4 = s6*pa[6];
Q4 = s6*qa[6];
P = P1 + s2*P2 + s4*P3 + s6*P4;
Q = Q1 + s2*Q2 + s4*Q3 + s6*Q4;
#endif
if(hx>=0) { if(hx>=0) {
z = one-erx; return z - P/Q; z = one-erx; return z - P/Q;
} else { } else {
@ -297,16 +377,43 @@ weak_alias (__erf, erfl)
x = fabs(x); x = fabs(x);
s = one/(x*x); s = one/(x*x);
if(ix< 0x4006DB6D) { /* |x| < 1/.35 ~ 2.857143*/ if(ix< 0x4006DB6D) { /* |x| < 1/.35 ~ 2.857143*/
#ifdef DO_NOT_USE_THIS
R=ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*( R=ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(
ra5+s*(ra6+s*ra7)))))); ra5+s*(ra6+s*ra7))))));
S=one+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*( S=one+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(
sa5+s*(sa6+s*(sa7+s*sa8))))))); sa5+s*(sa6+s*(sa7+s*sa8)))))));
#else
double R1,R2,R3,R4,S1,S2,S3,S4,s2,s4,s6,s8;
R1 = ra[0]+s*ra[1];s2 = s*s;
S1 = one+s*sa[1]; s4 = s2*s2;
R2 = ra[2]+s*ra[3];s6 = s4*s2;
S2 = sa[2]+s*sa[3];s8 = s4*s4;
R3 = ra[4]+s*ra[5];
S3 = sa[4]+s*sa[5];
R4 = ra[6]+s*ra[7];
S4 = sa[6]+s*sa[7];
R = R1 + s2*R2 + s4*R3 + s6*R4;
S = S1 + s2*S2 + s4*S3 + s6*S4 + s8*sa[8];
#endif
} else { /* |x| >= 1/.35 ~ 2.857143 */ } else { /* |x| >= 1/.35 ~ 2.857143 */
double R1,R2,R3,S1,S2,S3,S4,s2,s4,s6;
if(hx<0&&ix>=0x40180000) return two-tiny;/* x < -6 */ if(hx<0&&ix>=0x40180000) return two-tiny;/* x < -6 */
#ifdef DO_NOT_USE_THIS
R=rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*( R=rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(
rb5+s*rb6))))); rb5+s*rb6)))));
S=one+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*( S=one+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(
sb5+s*(sb6+s*sb7)))))); sb5+s*(sb6+s*sb7))))));
#else
R1 = rb[0]+s*rb[1];s2 = s*s;
S1 = one+s*sb[1]; s4 = s2*s2;
R2 = rb[2]+s*rb[3];s6 = s4*s2;
S2 = sb[2]+s*sb[3];
R3 = rb[4]+s*rb[5];
S3 = sb[4]+s*sb[5];
S4 = sb[6]+s*sb[7];
R = R1 + s2*R2 + s4*R3 + s6*rb[6];
S = S1 + s2*S2 + s4*S3 + s6*S4;
#endif
} }
z = x; z = x;
SET_LOW_WORD(z,0); SET_LOW_WORD(z,0);

View File

@ -9,6 +9,9 @@
* is preserved. * is preserved.
* ==================================================== * ====================================================
*/ */
/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25,
for performance improvement on pipelined processors.
*/
#if defined(LIBM_SCCS) && !defined(lint) #if defined(LIBM_SCCS) && !defined(lint)
static char rcsid[] = "$NetBSD: s_expm1.c,v 1.8 1995/05/10 20:47:09 jtc Exp $"; static char rcsid[] = "$NetBSD: s_expm1.c,v 1.8 1995/05/10 20:47:09 jtc Exp $";
@ -111,7 +114,7 @@ static char rcsid[] = "$NetBSD: s_expm1.c,v 1.8 1995/05/10 20:47:09 jtc Exp $";
#include "math.h" #include "math.h"
#include "math_private.h" #include "math_private.h"
#define one Q[0]
#ifdef __STDC__ #ifdef __STDC__
static const double static const double
#else #else
@ -125,11 +128,11 @@ ln2_hi = 6.93147180369123816490e-01,/* 0x3fe62e42, 0xfee00000 */
ln2_lo = 1.90821492927058770002e-10,/* 0x3dea39ef, 0x35793c76 */ ln2_lo = 1.90821492927058770002e-10,/* 0x3dea39ef, 0x35793c76 */
invln2 = 1.44269504088896338700e+00,/* 0x3ff71547, 0x652b82fe */ invln2 = 1.44269504088896338700e+00,/* 0x3ff71547, 0x652b82fe */
/* scaled coefficients related to expm1 */ /* scaled coefficients related to expm1 */
Q1 = -3.33333333333331316428e-02, /* BFA11111 111110F4 */ Q[] = {1.0, -3.33333333333331316428e-02, /* BFA11111 111110F4 */
Q2 = 1.58730158725481460165e-03, /* 3F5A01A0 19FE5585 */ 1.58730158725481460165e-03, /* 3F5A01A0 19FE5585 */
Q3 = -7.93650757867487942473e-05, /* BF14CE19 9EAADBB7 */ -7.93650757867487942473e-05, /* BF14CE19 9EAADBB7 */
Q4 = 4.00821782732936239552e-06, /* 3ED0CFCA 86E65239 */ 4.00821782732936239552e-06, /* 3ED0CFCA 86E65239 */
Q5 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */ -2.01099218183624371326e-07}; /* BE8AFDB7 6E09C32D */
#ifdef __STDC__ #ifdef __STDC__
double __expm1(double x) double __expm1(double x)
@ -138,7 +141,7 @@ Q5 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */
double x; double x;
#endif #endif
{ {
double y,hi,lo,c,t,e,hxs,hfx,r1; double y,hi,lo,c,t,e,hxs,hfx,r1,h2,h4,R1,R2,R3;
int32_t k,xsb; int32_t k,xsb;
u_int32_t hx; u_int32_t hx;
@ -190,7 +193,14 @@ Q5 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */
/* x is now in primary range */ /* x is now in primary range */
hfx = 0.5*x; hfx = 0.5*x;
hxs = x*hfx; hxs = x*hfx;
#ifdef DO_NOT_USE_THIS
r1 = one+hxs*(Q1+hxs*(Q2+hxs*(Q3+hxs*(Q4+hxs*Q5)))); r1 = one+hxs*(Q1+hxs*(Q2+hxs*(Q3+hxs*(Q4+hxs*Q5))));
#else
R1 = one+hxs*Q[1]; h2 = hxs*hxs;
R2 = Q[2]+hxs*Q[3]; h4 = h2*h2;
R3 = Q[4]+hxs*Q[5];
r1 = R1 + h2*R2 + h4*R3;
#endif
t = 3.0-r1*hfx; t = 3.0-r1*hfx;
e = hxs*((r1-t)/(6.0 - x*t)); e = hxs*((r1-t)/(6.0 - x*t));
if(k==0) return x - (x*e-hxs); /* c is 0 */ if(k==0) return x - (x*e-hxs); /* c is 0 */

View File

@ -9,6 +9,9 @@
* is preserved. * is preserved.
* ==================================================== * ====================================================
*/ */
/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25,
for performance improvement on pipelined processors.
*/
#if defined(LIBM_SCCS) && !defined(lint) #if defined(LIBM_SCCS) && !defined(lint)
static char rcsid[] = "$NetBSD: s_log1p.c,v 1.8 1995/05/10 20:47:46 jtc Exp $"; static char rcsid[] = "$NetBSD: s_log1p.c,v 1.8 1995/05/10 20:47:46 jtc Exp $";
@ -90,13 +93,13 @@ static double
ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */ ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */
ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */ ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */
two54 = 1.80143985094819840000e+16, /* 43500000 00000000 */ two54 = 1.80143985094819840000e+16, /* 43500000 00000000 */
Lp1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */ Lp[] = {0.0, 6.666666666666735130e-01, /* 3FE55555 55555593 */
Lp2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */ 3.999999999940941908e-01, /* 3FD99999 9997FA04 */
Lp3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */ 2.857142874366239149e-01, /* 3FD24924 94229359 */
Lp4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */ 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */
Lp5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */ 1.818357216161805012e-01, /* 3FC74664 96CB03DE */
Lp6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */ 1.531383769920937332e-01, /* 3FC39A09 D078C69F */
Lp7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ 1.479819860511658591e-01}; /* 3FC2F112 DF3E5244 */
#ifdef __STDC__ #ifdef __STDC__
static const double zero = 0.0; static const double zero = 0.0;
@ -111,7 +114,7 @@ static double zero = 0.0;
double x; double x;
#endif #endif
{ {
double hfsq,f,c,s,z,R,u; double hfsq,f,c,s,z,R,u,z2,z4,z6,R1,R2,R3,R4;
int32_t k,hx,hu,ax; int32_t k,hx,hu,ax;
GET_HIGH_WORD(hx,x); GET_HIGH_WORD(hx,x);
@ -167,7 +170,15 @@ static double zero = 0.0;
} }
s = f/(2.0+f); s = f/(2.0+f);
z = s*s; z = s*s;
#ifdef DO_NOT_USE_THIS
R = z*(Lp1+z*(Lp2+z*(Lp3+z*(Lp4+z*(Lp5+z*(Lp6+z*Lp7)))))); R = z*(Lp1+z*(Lp2+z*(Lp3+z*(Lp4+z*(Lp5+z*(Lp6+z*Lp7))))));
#else
R1 = z*Lp[1]; z2=z*z;
R2 = Lp[2]+z*Lp[3]; z4=z2*z2;
R3 = Lp[4]+z*Lp[5]; z6=z4*z2;
R4 = Lp[6]+z*Lp[7];
R = R1 + z2*R2 + z4*R3 + z6*R4;
#endif
if(k==0) return f-(hfsq-s*(hfsq+R)); else if(k==0) return f-(hfsq-s*(hfsq+R)); else
return k*ln2_hi-((hfsq-(s*(hfsq+R)+(k*ln2_lo+c)))-f); return k*ln2_hi-((hfsq-(s*(hfsq+R)+(k*ln2_lo+c)))-f);
} }