Fix powl inaccuracy for x86_64 and x86 (bug 13881).
This commit is contained in:
parent
0817d63dd1
commit
1bead169c3
18
ChangeLog
18
ChangeLog
@ -1,3 +1,21 @@
|
||||
2012-11-28 Joseph Myers <joseph@codesourcery.com>
|
||||
|
||||
[BZ #13881]
|
||||
* sysdeps/x86/fpu/powl_helper.c: New file.
|
||||
* sysdeps/x86/fpu/Makefile: Likewise.
|
||||
* sysdeps/i386/fpu/e_powl.S (limit): Remove object.
|
||||
(p3): New object.
|
||||
(__ieee754_powl): Use __powl_helper for finite arguments except
|
||||
integer exponents below 8.
|
||||
* sysdeps/x86_64/fpu/e_powl.S (limit): Remove object.
|
||||
(p3): New object.
|
||||
(__ieee754_powl): Use __powl_helper for finite arguments except
|
||||
integer exponents below 8.
|
||||
* math/libm-test.inc (pow_test): Add more tests and enable some
|
||||
previously disabled tests.
|
||||
* sysdeps/i386/fpu/libm-test-ulps: Update.
|
||||
* sysdeps/x86_64/fpu/libm-test-ulps: Likewise.
|
||||
|
||||
2012-11-28 Siddhesh Poyarekar <siddhesh@redhat.com>
|
||||
Carlos O'Donell <carlos_odonell@mentor.com>
|
||||
|
||||
|
21
NEWS
21
NEWS
@ -12,16 +12,17 @@ Version 2.17
|
||||
1349, 3439, 3479, 3665, 5044, 5246, 5298, 5400, 6530, 6778, 6808, 9685,
|
||||
9914, 10014, 10038, 10631, 10873, 11438, 11607, 11638, 11741, 12140,
|
||||
13412, 13542, 13601, 13603, 13604, 13629, 13679, 13696, 13698, 13717,
|
||||
13741, 13759, 13763, 13939, 13950, 13952, 13966, 14042, 14047, 14090,
|
||||
14150, 14151, 14152, 14154, 14157, 14166, 14173, 14195, 14237, 14251,
|
||||
14252, 14283, 14298, 14303, 14307, 14328, 14331, 14336, 14337, 14347,
|
||||
14349, 14368, 14376, 14417, 14459, 14476, 14477, 14501, 14505, 14510,
|
||||
14516, 14518, 14519, 14530, 14532, 14538, 14543, 14544, 14545, 14557,
|
||||
14562, 14568, 14576, 14579, 14583, 14587, 14595, 14602, 14610, 14621,
|
||||
14638, 14645, 14648, 14652, 14660, 14661, 14669, 14672, 14683, 14694,
|
||||
14716, 14719, 14743, 14767, 14783, 14784, 14785, 14793, 14796, 14797,
|
||||
14801, 14805, 14807, 14809, 14811, 14815, 14821, 14822, 14824, 14828,
|
||||
14831, 14835, 14838, 14856, 14863, 14865, 14866, 14868, 14869, 14871.
|
||||
13741, 13759, 13763, 13881, 13939, 13950, 13952, 13966, 14042, 14047,
|
||||
14090, 14150, 14151, 14152, 14154, 14157, 14166, 14173, 14195, 14237,
|
||||
14251, 14252, 14283, 14298, 14303, 14307, 14328, 14331, 14336, 14337,
|
||||
14347, 14349, 14368, 14376, 14417, 14459, 14476, 14477, 14501, 14505,
|
||||
14510, 14516, 14518, 14519, 14530, 14532, 14538, 14543, 14544, 14545,
|
||||
14557, 14562, 14568, 14576, 14579, 14583, 14587, 14595, 14602, 14610,
|
||||
14621, 14638, 14645, 14648, 14652, 14660, 14661, 14669, 14672, 14683,
|
||||
14694, 14716, 14719, 14743, 14767, 14783, 14784, 14785, 14793, 14796,
|
||||
14797, 14801, 14805, 14807, 14809, 14811, 14815, 14821, 14822, 14824,
|
||||
14828, 14831, 14835, 14838, 14856, 14863, 14865, 14866, 14868, 14869,
|
||||
14871.
|
||||
|
||||
* Port to ARM AArch64 contributed by Linaro.
|
||||
|
||||
|
@ -8431,7 +8431,6 @@ pow_test (void)
|
||||
#endif
|
||||
TEST_ff_f (pow, -min_value, max_value, plus_zero, UNDERFLOW_EXCEPTION);
|
||||
|
||||
#ifndef TEST_LDOUBLE /* Bug 13881. */
|
||||
TEST_ff_f (pow, 0x0.ffffffp0, 10, 0.999999403953712118183885036774764444747L);
|
||||
TEST_ff_f (pow, 0x0.ffffffp0, 100, 0.999994039553108359406305079606228341585L);
|
||||
TEST_ff_f (pow, 0x0.ffffffp0, 1000, 0.9999403971297699052276650144650733772182L);
|
||||
@ -8447,16 +8446,44 @@ pow_test (void)
|
||||
TEST_ff_f (pow, 0x1.000002p0, 0x1p24, 7.3890552180866447284268641248075832310141L);
|
||||
TEST_ff_f (pow, 0x1.000002p0, 0x1.234566p29, 4.2107033006507495188536371520637025716256e+31L);
|
||||
TEST_ff_f (pow, 0x1.000002p0, -0x1.234566p29, 2.3749001736727769098946062325205705312166e-32L);
|
||||
#endif
|
||||
|
||||
/* Bug 13881: powl inaccurate so these tests disabled for long double. */
|
||||
#if !defined TEST_FLOAT && !defined TEST_LDOUBLE
|
||||
#if !defined TEST_FLOAT
|
||||
TEST_ff_f (pow, 0x0.fffffffffffff8p0L, 0x1.23456789abcdfp62L, 1.0118762747827252817436395051178295138220e-253L);
|
||||
TEST_ff_f (pow, 0x0.fffffffffffff8p0L, -0x1.23456789abcdfp62L, 9.8826311568054561811190162420900667121992e+252L);
|
||||
TEST_ff_f (pow, 0x1.0000000000001p0L, 0x1.23456789abcdfp61L, 9.8826311568044974397135026217687399395481e+252L);
|
||||
TEST_ff_f (pow, 0x1.0000000000001p0L, -0x1.23456789abcdfp61L, 1.0118762747828234466621210689458255908670e-253L);
|
||||
#endif
|
||||
|
||||
#if defined TEST_LDOUBLE && LDBL_MANT_DIG >= 64 && LDBL_MAX_EXP >= 16384
|
||||
TEST_ff_f (pow, 0x0.ffffffffffffffffp0L, 0x1.23456789abcdef0ep77L, 1.2079212226420368189981778807634890018840e-4048L);
|
||||
TEST_ff_f (pow, 0x0.ffffffffffffffffp0L, -0x1.23456789abcdef0ep77L, 8.2786855736563746280496724205839522148001e+4047L);
|
||||
TEST_ff_f (pow, 0x1.0000000000000002p0L, 0x1.23456789abcdef0ep76L, 8.2786855736563683535324500168799315131570e+4047L);
|
||||
TEST_ff_f (pow, 0x1.0000000000000002p0L, -0x1.23456789abcdef0ep76L, 1.2079212226420377344964713407722652880280e-4048L);
|
||||
#endif
|
||||
|
||||
#if defined TEST_LDOUBLE && LDBL_MANT_DIG >= 113
|
||||
TEST_ff_f (pow, 0x0.ffffffffffffffffffffffffffff8p0L, 0x1.23456789abcdef0123456789abcdp126L, 1.2079212226420440237790185999151440179953e-4048L);
|
||||
TEST_ff_f (pow, 0x0.ffffffffffffffffffffffffffff8p0L, -0x1.23456789abcdef0123456789abcdp126L, 8.2786855736563252489063231915535105363602e+4047L);
|
||||
TEST_ff_f (pow, 0x1.0000000000000000000000000001p0L, 0x1.23456789abcdef0123456789abcdp125L, 8.2786855736563252489063231915423647547782e+4047L);
|
||||
TEST_ff_f (pow, 0x1.0000000000000000000000000001p0L, -0x1.23456789abcdef0123456789abcdp125L, 1.2079212226420440237790185999167702696503e-4048L);
|
||||
#endif
|
||||
|
||||
#if defined TEST_LDOUBLE && LDBL_MAX_EXP >= 16384
|
||||
TEST_ff_f (pow, 1e4932L, 0.75L, 1e3699L);
|
||||
TEST_ff_f (pow, 1e4928L, 0.75L, 1e3696L);
|
||||
TEST_ff_f (pow, 1e4924L, 0.75L, 1e3693L);
|
||||
TEST_ff_f (pow, 1e4920L, 0.75L, 1e3690L);
|
||||
TEST_ff_f (pow, 10.0L, 4932.0L, 1e4932L);
|
||||
TEST_ff_f (pow, 10.0L, 4931.0L, 1e4931L);
|
||||
TEST_ff_f (pow, 10.0L, 4930.0L, 1e4930L);
|
||||
TEST_ff_f (pow, 10.0L, 4929.0L, 1e4929L);
|
||||
TEST_ff_f (pow, 10.0L, -4931.0L, 1e-4931L);
|
||||
TEST_ff_f (pow, 10.0L, -4930.0L, 1e-4930L);
|
||||
TEST_ff_f (pow, 10.0L, -4929.0L, 1e-4929L);
|
||||
TEST_ff_f (pow, 1e27L, 182.0L, 1e4914L);
|
||||
TEST_ff_f (pow, 1e27L, -182.0L, 1e-4914L);
|
||||
#endif
|
||||
|
||||
TEST_ff_f (pow, min_subnorm_value, min_subnorm_value, 1.0L);
|
||||
TEST_ff_f (pow, min_subnorm_value, -min_subnorm_value, 1.0L);
|
||||
TEST_ff_f (pow, max_value, min_subnorm_value, 1.0L);
|
||||
|
@ -26,9 +26,9 @@
|
||||
.type one,@object
|
||||
one: .double 1.0
|
||||
ASM_SIZE_DIRECTIVE(one)
|
||||
.type limit,@object
|
||||
limit: .double 0.29
|
||||
ASM_SIZE_DIRECTIVE(limit)
|
||||
.type p3,@object
|
||||
p3: .byte 0, 0, 0, 0, 0, 0, 0x20, 0x40
|
||||
ASM_SIZE_DIRECTIVE(p3)
|
||||
.type p63,@object
|
||||
p63: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43
|
||||
ASM_SIZE_DIRECTIVE(p63)
|
||||
@ -141,7 +141,15 @@ ENTRY(__ieee754_powl)
|
||||
fchs // -0x1p-79 : x
|
||||
jmp 3f
|
||||
|
||||
9: /* OK, we have an integer value for y. */
|
||||
9: /* OK, we have an integer value for y. Unless very small
|
||||
(we use < 8), use the algorithm for real exponent to avoid
|
||||
accumulation of errors. */
|
||||
fld %st // y : y : x
|
||||
fabs // |y| : y : x
|
||||
fcompl MO(p3) // y : x
|
||||
fnstsw
|
||||
sahf
|
||||
jnc 2f
|
||||
popl %eax
|
||||
cfi_adjust_cfa_offset (-4)
|
||||
popl %edx
|
||||
@ -182,7 +190,7 @@ ENTRY(__ieee754_powl)
|
||||
|
||||
cfi_adjust_cfa_offset (8)
|
||||
.align ALIGNARG(4)
|
||||
2: // y is a large integer (absolute value at least 1L<<63), but
|
||||
2: // y is a large integer (absolute value at least 8), but
|
||||
// may be odd unless at least 1L<<64. So it may be necessary
|
||||
// to adjust the sign of a negative result afterwards.
|
||||
fxch // x : y
|
||||
@ -205,34 +213,21 @@ ENTRY(__ieee754_powl)
|
||||
fchs // -(1L<<78) : |x|
|
||||
.align ALIGNARG(4)
|
||||
3: /* y is a real number. */
|
||||
fxch // x : y
|
||||
fldl MO(one) // 1.0 : x : y
|
||||
fldl MO(limit) // 0.29 : 1.0 : x : y
|
||||
fld %st(2) // x : 0.29 : 1.0 : x : y
|
||||
fsub %st(2) // x-1 : 0.29 : 1.0 : x : y
|
||||
fabs // |x-1| : 0.29 : 1.0 : x : y
|
||||
fucompp // 1.0 : x : y
|
||||
fnstsw
|
||||
fxch // x : 1.0 : y
|
||||
sahf
|
||||
ja 7f
|
||||
fsub %st(1) // x-1 : 1.0 : y
|
||||
fyl2xp1 // log2(x) : y
|
||||
jmp 8f
|
||||
|
||||
7: fyl2x // log2(x) : y
|
||||
8: fmul %st(1) // y*log2(x) : y
|
||||
fst %st(1) // y*log2(x) : y*log2(x)
|
||||
frndint // int(y*log2(x)) : y*log2(x)
|
||||
fsubr %st, %st(1) // int(y*log2(x)) : fract(y*log2(x))
|
||||
fxch // fract(y*log2(x)) : int(y*log2(x))
|
||||
f2xm1 // 2^fract(y*log2(x))-1 : int(y*log2(x))
|
||||
faddl MO(one) // 2^fract(y*log2(x)) : int(y*log2(x))
|
||||
fscale // 2^fract(y*log2(x))*2^int(y*log2(x)) : int(y*log2(x))
|
||||
fstp %st(1) // 2^fract(y*log2(x))*2^int(y*log2(x))
|
||||
subl $28, %esp
|
||||
cfi_adjust_cfa_offset (28)
|
||||
fstpt 12(%esp) // x
|
||||
fstpt (%esp) // <empty>
|
||||
mov %edx, 24(%esp)
|
||||
call HIDDEN_JUMPTARGET (__powl_helper) // <result>
|
||||
mov 24(%esp), %edx
|
||||
addl $28, %esp
|
||||
cfi_adjust_cfa_offset (-28)
|
||||
testb $2, %dh
|
||||
jz 292f
|
||||
// x is negative. If y is an odd integer, negate the result.
|
||||
#ifdef PIC
|
||||
LOAD_PIC_REG (cx)
|
||||
#endif
|
||||
fldt 24(%esp) // y : abs(result)
|
||||
fld %st // y : y : abs(result)
|
||||
fabs // |y| : y : abs(result)
|
||||
|
@ -2480,6 +2480,11 @@ ifloat: 1
|
||||
ildouble: 1
|
||||
ldouble: 1
|
||||
|
||||
# pow
|
||||
Test "pow (0x0.ffffffp0, -0x1p24) == 2.7182819094701610539628664526874952929416":
|
||||
ildouble: 1
|
||||
ldouble: 1
|
||||
|
||||
# pow_downward
|
||||
Test "pow_downward (1.0625, 1.125) == 1.070582293028761362162622578677070098674":
|
||||
double: 1
|
||||
@ -3782,6 +3787,10 @@ ifloat: 1
|
||||
ildouble: 1
|
||||
ldouble: 1
|
||||
|
||||
Function: "pow":
|
||||
ildouble: 1
|
||||
ldouble: 1
|
||||
|
||||
Function: "pow_downward":
|
||||
double: 1
|
||||
float: 1
|
||||
|
3
sysdeps/x86/fpu/Makefile
Normal file
3
sysdeps/x86/fpu/Makefile
Normal file
@ -0,0 +1,3 @@
|
||||
ifeq ($(subdir),math)
|
||||
libm-support += powl_helper
|
||||
endif
|
211
sysdeps/x86/fpu/powl_helper.c
Normal file
211
sysdeps/x86/fpu/powl_helper.c
Normal file
@ -0,0 +1,211 @@
|
||||
/* Implement powl for x86 using extra-precision log.
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <math.h>
|
||||
#include <math_private.h>
|
||||
|
||||
/* High parts and low parts of -log (k/16), for integer k from 12 to
|
||||
24. */
|
||||
|
||||
static const long double powl_log_table[] =
|
||||
{
|
||||
0x4.9a58844d36e49e1p-4L, -0x1.0522624fd558f574p-68L,
|
||||
0x3.527da7915b3c6de4p-4L, 0x1.7d4ef4b901b99b9ep-68L,
|
||||
0x2.22f1d044fc8f7bc8p-4L, -0x1.8e97c071a42fc388p-68L,
|
||||
0x1.08598b59e3a0688ap-4L, 0x3.fd9bf503372c12fcp-72L,
|
||||
-0x0p+0L, 0x0p+0L,
|
||||
-0xf.85186008b15330cp-8L, 0x1.9b47488a6687672cp-72L,
|
||||
-0x1.e27076e2af2e5e9ep-4L, -0xa.87ffe1fe9e155dcp-72L,
|
||||
-0x2.bfe60e14f27a791p-4L, 0x1.83bebf1bdb88a032p-68L,
|
||||
-0x3.91fef8f353443584p-4L, -0xb.b03de5ff734495cp-72L,
|
||||
-0x4.59d72aeae98380e8p-4L, 0xc.e0aa3be4747dc1p-72L,
|
||||
-0x5.1862f08717b09f4p-4L, -0x2.decdeccf1cd10578p-68L,
|
||||
-0x5.ce75fdaef401a738p-4L, -0x9.314feb4fbde5aaep-72L,
|
||||
-0x6.7cc8fb2fe612fcbp-4L, 0x2.5ca2642feb779f98p-68L,
|
||||
};
|
||||
|
||||
/* High 32 bits of log2 (e), and remainder rounded to 64 bits. */
|
||||
static const long double log2e_hi = 0x1.71547652p+0L;
|
||||
static const long double log2e_lo = 0xb.82fe1777d0ffda1p-36L;
|
||||
|
||||
/* Given a number with high part HI and low part LO, add the number X
|
||||
to it and store the result in *RHI and *RLO. It is given that
|
||||
either |X| < |0.7 * HI|, or HI == LO == 0, and that the values are
|
||||
small enough that no overflow occurs. The result does not need to
|
||||
be exact to 128 bits; 78-bit accuracy of the final accumulated
|
||||
result suffices. */
|
||||
|
||||
static inline void
|
||||
acc_split (long double *rhi, long double *rlo, long double hi, long double lo,
|
||||
long double x)
|
||||
{
|
||||
long double thi = hi + x;
|
||||
long double tlo = (hi - thi) + x + lo;
|
||||
*rhi = thi + tlo;
|
||||
*rlo = (thi - *rhi) + tlo;
|
||||
}
|
||||
|
||||
extern long double __powl_helper (long double x, long double y);
|
||||
libm_hidden_proto (__powl_helper)
|
||||
|
||||
/* Given X a value that is finite and nonzero, or a NaN, and only
|
||||
negative if Y is not an integer, and Y a finite nonzero value with
|
||||
0x1p-79 <= |Y| <= 0x1p78, compute X to the power Y. */
|
||||
|
||||
long double
|
||||
__powl_helper (long double x, long double y)
|
||||
{
|
||||
if (isnan (x) || x < 0)
|
||||
return __ieee754_expl (y * __ieee754_logl (x));
|
||||
|
||||
/* We need to compute Y * log2 (X) to at least 64 bits after the
|
||||
point for normal results (that is, to at least 78 bits
|
||||
precision). */
|
||||
int x_int_exponent;
|
||||
long double x_frac;
|
||||
x_frac = __frexpl (x, &x_int_exponent);
|
||||
if (x_frac <= 0x0.aaaaaaaaaaaaaaaap0L) /* 2.0L / 3.0L, rounded down */
|
||||
{
|
||||
x_frac *= 2.0;
|
||||
x_int_exponent--;
|
||||
}
|
||||
|
||||
long double log_x_frac_hi, log_x_frac_lo;
|
||||
/* Determine an initial approximation to log (X_FRAC) using
|
||||
POWL_LOG_TABLE, and multiply by a value K/16 to reduce to an
|
||||
interval (24/25, 26/25). */
|
||||
int k = (int) ((16.0L / x_frac) + 0.5L);
|
||||
log_x_frac_hi = powl_log_table[2 * k - 24];
|
||||
log_x_frac_lo = powl_log_table[2 * k - 23];
|
||||
long double x_frac_low;
|
||||
if (k == 16)
|
||||
x_frac_low = 0.0L;
|
||||
else
|
||||
{
|
||||
/* Mask off low 5 bits of X_FRAC so the multiplication by K/16
|
||||
is exact. These bits are small enough that they can be
|
||||
corrected for by adding log2 (e) * X_FRAC_LOW to the final
|
||||
result. */
|
||||
int32_t se;
|
||||
u_int32_t i0, i1;
|
||||
GET_LDOUBLE_WORDS (se, i0, i1, x_frac);
|
||||
x_frac_low = x_frac;
|
||||
i1 &= 0xffffffe0;
|
||||
SET_LDOUBLE_WORDS (x_frac, se, i0, i1);
|
||||
x_frac_low -= x_frac;
|
||||
x_frac_low /= x_frac;
|
||||
x_frac *= k / 16.0L;
|
||||
}
|
||||
|
||||
/* Now compute log (X_FRAC) for X_FRAC in (24/25, 26/25). Separate
|
||||
W = X_FRAC - 1 into high 16 bits and remaining bits, so that
|
||||
multiplications for low-order power series terms are exact. The
|
||||
remaining bits are small enough that adding a 64-bit value of
|
||||
log2 (1 + W_LO / (1 + W_HI)) will be a sufficient correction for
|
||||
them. */
|
||||
long double w = x_frac - 1;
|
||||
long double w_hi, w_lo;
|
||||
int32_t se;
|
||||
u_int32_t i0, i1;
|
||||
GET_LDOUBLE_WORDS (se, i0, i1, w);
|
||||
i0 &= 0xffff0000;
|
||||
i1 = 0;
|
||||
SET_LDOUBLE_WORDS (w_hi, se, i0, i1);
|
||||
w_lo = w - w_hi;
|
||||
long double wp = w_hi;
|
||||
acc_split (&log_x_frac_hi, &log_x_frac_lo, log_x_frac_hi, log_x_frac_lo, wp);
|
||||
wp *= -w_hi;
|
||||
acc_split (&log_x_frac_hi, &log_x_frac_lo, log_x_frac_hi, log_x_frac_lo,
|
||||
wp / 2.0L);
|
||||
wp *= -w_hi;
|
||||
acc_split (&log_x_frac_hi, &log_x_frac_lo, log_x_frac_hi, log_x_frac_lo,
|
||||
wp * 0x0.5555p0L); /* -W_HI**3 / 3, high part. */
|
||||
acc_split (&log_x_frac_hi, &log_x_frac_lo, log_x_frac_hi, log_x_frac_lo,
|
||||
wp * 0x0.5555555555555555p-16L); /* -W_HI**3 / 3, low part. */
|
||||
wp *= -w_hi;
|
||||
acc_split (&log_x_frac_hi, &log_x_frac_lo, log_x_frac_hi, log_x_frac_lo,
|
||||
wp / 4.0L);
|
||||
/* Subsequent terms are small enough that they only need be computed
|
||||
to 64 bits. */
|
||||
for (int i = 5; i <= 17; i++)
|
||||
{
|
||||
wp *= -w_hi;
|
||||
acc_split (&log_x_frac_hi, &log_x_frac_lo, log_x_frac_hi, log_x_frac_lo,
|
||||
wp / i);
|
||||
}
|
||||
|
||||
/* Convert LOG_X_FRAC_HI + LOG_X_FRAC_LO to a base-2 logarithm. */
|
||||
long double log2_x_frac_hi, log2_x_frac_lo;
|
||||
long double log_x_frac_hi32, log_x_frac_lo64;
|
||||
GET_LDOUBLE_WORDS (se, i0, i1, log_x_frac_hi);
|
||||
i1 = 0;
|
||||
SET_LDOUBLE_WORDS (log_x_frac_hi32, se, i0, i1);
|
||||
log_x_frac_lo64 = (log_x_frac_hi - log_x_frac_hi32) + log_x_frac_lo;
|
||||
long double log2_x_frac_hi1 = log_x_frac_hi32 * log2e_hi;
|
||||
long double log2_x_frac_lo1
|
||||
= log_x_frac_lo64 * log2e_hi + log_x_frac_hi * log2e_lo;
|
||||
log2_x_frac_hi = log2_x_frac_hi1 + log2_x_frac_lo1;
|
||||
log2_x_frac_lo = (log2_x_frac_hi1 - log2_x_frac_hi) + log2_x_frac_lo1;
|
||||
|
||||
/* Correct for the masking off of W_LO. */
|
||||
long double log2_1p_w_lo;
|
||||
asm ("fyl2xp1"
|
||||
: "=t" (log2_1p_w_lo)
|
||||
: "0" (w_lo / (1.0L + w_hi)), "u" (1.0L)
|
||||
: "st(1)");
|
||||
acc_split (&log2_x_frac_hi, &log2_x_frac_lo, log2_x_frac_hi, log2_x_frac_lo,
|
||||
log2_1p_w_lo);
|
||||
|
||||
/* Correct for the masking off of X_FRAC_LOW. */
|
||||
acc_split (&log2_x_frac_hi, &log2_x_frac_lo, log2_x_frac_hi, log2_x_frac_lo,
|
||||
x_frac_low * M_LOG2El);
|
||||
|
||||
/* Add the integer and fractional parts of the base-2 logarithm. */
|
||||
long double log2_x_hi, log2_x_lo;
|
||||
log2_x_hi = x_int_exponent + log2_x_frac_hi;
|
||||
log2_x_lo = ((x_int_exponent - log2_x_hi) + log2_x_frac_hi) + log2_x_frac_lo;
|
||||
|
||||
/* Compute the base-2 logarithm of the result. */
|
||||
long double log2_res_hi, log2_res_lo;
|
||||
long double log2_x_hi32, log2_x_lo64;
|
||||
GET_LDOUBLE_WORDS (se, i0, i1, log2_x_hi);
|
||||
i1 = 0;
|
||||
SET_LDOUBLE_WORDS (log2_x_hi32, se, i0, i1);
|
||||
log2_x_lo64 = (log2_x_hi - log2_x_hi32) + log2_x_lo;
|
||||
long double y_hi32, y_lo32;
|
||||
GET_LDOUBLE_WORDS (se, i0, i1, y);
|
||||
i1 = 0;
|
||||
SET_LDOUBLE_WORDS (y_hi32, se, i0, i1);
|
||||
y_lo32 = y - y_hi32;
|
||||
log2_res_hi = log2_x_hi32 * y_hi32;
|
||||
log2_res_lo = log2_x_hi32 * y_lo32 + log2_x_lo64 * y;
|
||||
|
||||
/* Split the base-2 logarithm of the result into integer and
|
||||
fractional parts. */
|
||||
long double log2_res_int = __roundl (log2_res_hi);
|
||||
long double log2_res_frac = log2_res_hi - log2_res_int + log2_res_lo;
|
||||
|
||||
/* Compute the final result. */
|
||||
long double res;
|
||||
asm ("f2xm1" : "=t" (res) : "0" (log2_res_frac));
|
||||
res += 1.0L;
|
||||
asm ("fscale" : "=t" (res) : "0" (res), "u" (log2_res_int));
|
||||
return res;
|
||||
}
|
||||
|
||||
libm_hidden_def (__powl_helper)
|
@ -26,9 +26,9 @@
|
||||
.type one,@object
|
||||
one: .double 1.0
|
||||
ASM_SIZE_DIRECTIVE(one)
|
||||
.type limit,@object
|
||||
limit: .double 0.29
|
||||
ASM_SIZE_DIRECTIVE(limit)
|
||||
.type p3,@object
|
||||
p3: .byte 0, 0, 0, 0, 0, 0, 0x20, 0x40
|
||||
ASM_SIZE_DIRECTIVE(p3)
|
||||
.type p63,@object
|
||||
p63: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43
|
||||
ASM_SIZE_DIRECTIVE(p63)
|
||||
@ -131,7 +131,15 @@ ENTRY(__ieee754_powl)
|
||||
fchs // -0x1p-79 : x
|
||||
jmp 3f
|
||||
|
||||
9: /* OK, we have an integer value for y. */
|
||||
9: /* OK, we have an integer value for y. Unless very small
|
||||
(we use < 8), use the algorithm for real exponent to avoid
|
||||
accumulation of errors. */
|
||||
fldl MO(p3) // 8 : y : x
|
||||
fld %st(1) // y : 8 : y : x
|
||||
fabs // |y| : 8 : y : x
|
||||
fcomip %st(1), %st // 8 : y : x
|
||||
fstp %st(0) // y : x
|
||||
jnc 2f
|
||||
mov -8(%rsp),%eax
|
||||
mov -4(%rsp),%edx
|
||||
orl $0, %edx
|
||||
@ -167,7 +175,7 @@ ENTRY(__ieee754_powl)
|
||||
ret
|
||||
|
||||
.align ALIGNARG(4)
|
||||
2: // y is a large integer (absolute value at least 1L<<63), but
|
||||
2: // y is a large integer (absolute value at least 8), but
|
||||
// may be odd unless at least 1L<<64. So it may be necessary
|
||||
// to adjust the sign of a negative result afterwards.
|
||||
fxch // x : y
|
||||
@ -190,31 +198,15 @@ ENTRY(__ieee754_powl)
|
||||
fchs // -(1L<<78) : |x|
|
||||
.align ALIGNARG(4)
|
||||
3: /* y is a real number. */
|
||||
fxch // x : y
|
||||
fldl MO(one) // 1.0 : x : y
|
||||
fldl MO(limit) // 0.29 : 1.0 : x : y
|
||||
fld %st(2) // x : 0.29 : 1.0 : x : y
|
||||
fsub %st(2) // x-1 : 0.29 : 1.0 : x : y
|
||||
fabs // |x-1| : 0.29 : 1.0 : x : y
|
||||
fucompp // 1.0 : x : y
|
||||
fnstsw
|
||||
fxch // x : 1.0 : y
|
||||
test $0x4500,%eax
|
||||
jz 7f
|
||||
fsub %st(1) // x-1 : 1.0 : y
|
||||
fyl2xp1 // log2(x) : y
|
||||
jmp 8f
|
||||
|
||||
7: fyl2x // log2(x) : y
|
||||
8: fmul %st(1) // y*log2(x) : y
|
||||
fst %st(1) // y*log2(x) : y*log2(x)
|
||||
frndint // int(y*log2(x)) : y*log2(x)
|
||||
fsubr %st, %st(1) // int(y*log2(x)) : fract(y*log2(x))
|
||||
fxch // fract(y*log2(x)) : int(y*log2(x))
|
||||
f2xm1 // 2^fract(y*log2(x))-1 : int(y*log2(x))
|
||||
faddl MO(one) // 2^fract(y*log2(x)) : int(y*log2(x))
|
||||
fscale // 2^fract(y*log2(x))*2^int(y*log2(x)) : int(y*log2(x))
|
||||
fstp %st(1) // 2^fract(y*log2(x))*2^int(y*log2(x))
|
||||
subq $40, %rsp
|
||||
cfi_adjust_cfa_offset (40)
|
||||
fstpt 16(%rsp) // x
|
||||
fstpt (%rsp) // <empty>
|
||||
mov %edx, 32(%rsp)
|
||||
call HIDDEN_JUMPTARGET (__powl_helper) // <result>
|
||||
mov 32(%rsp), %edx
|
||||
addq $40, %rsp
|
||||
cfi_adjust_cfa_offset (-40)
|
||||
testb $2, %dh
|
||||
jz 292f
|
||||
// x is negative. If y is an odd integer, negate the result.
|
||||
|
@ -2398,6 +2398,8 @@ ifloat: 1
|
||||
Test "pow (0x0.ffffffp0, -0x1p24) == 2.7182819094701610539628664526874952929416":
|
||||
float: 1
|
||||
ifloat: 1
|
||||
ildouble: 1
|
||||
ldouble: 1
|
||||
Test "pow (0x0.ffffffp0, 0x1p24) == 0.3678794302077803437135155590023422899744":
|
||||
float: 1
|
||||
ifloat: 1
|
||||
@ -3575,6 +3577,8 @@ ifloat: 1
|
||||
Function: "pow":
|
||||
float: 1
|
||||
ifloat: 1
|
||||
ildouble: 1
|
||||
ldouble: 1
|
||||
|
||||
Function: "pow_downward":
|
||||
float: 1
|
||||
|
Loading…
x
Reference in New Issue
Block a user