Fix powl inaccuracy for x86_64 and x86 (bug 13881).

This commit is contained in:
Joseph Myers 2012-11-28 13:40:54 +00:00
parent 0817d63dd1
commit 1bead169c3
9 changed files with 334 additions and 74 deletions

View File

@ -1,3 +1,21 @@
2012-11-28 Joseph Myers <joseph@codesourcery.com>
[BZ #13881]
* sysdeps/x86/fpu/powl_helper.c: New file.
* sysdeps/x86/fpu/Makefile: Likewise.
* sysdeps/i386/fpu/e_powl.S (limit): Remove object.
(p3): New object.
(__ieee754_powl): Use __powl_helper for finite arguments except
integer exponents below 8.
* sysdeps/x86_64/fpu/e_powl.S (limit): Remove object.
(p3): New object.
(__ieee754_powl): Use __powl_helper for finite arguments except
integer exponents below 8.
* math/libm-test.inc (pow_test): Add more tests and enable some
previously disabled tests.
* sysdeps/i386/fpu/libm-test-ulps: Update.
* sysdeps/x86_64/fpu/libm-test-ulps: Likewise.
2012-11-28 Siddhesh Poyarekar <siddhesh@redhat.com> 2012-11-28 Siddhesh Poyarekar <siddhesh@redhat.com>
Carlos O'Donell <carlos_odonell@mentor.com> Carlos O'Donell <carlos_odonell@mentor.com>

21
NEWS
View File

@ -12,16 +12,17 @@ Version 2.17
1349, 3439, 3479, 3665, 5044, 5246, 5298, 5400, 6530, 6778, 6808, 9685, 1349, 3439, 3479, 3665, 5044, 5246, 5298, 5400, 6530, 6778, 6808, 9685,
9914, 10014, 10038, 10631, 10873, 11438, 11607, 11638, 11741, 12140, 9914, 10014, 10038, 10631, 10873, 11438, 11607, 11638, 11741, 12140,
13412, 13542, 13601, 13603, 13604, 13629, 13679, 13696, 13698, 13717, 13412, 13542, 13601, 13603, 13604, 13629, 13679, 13696, 13698, 13717,
13741, 13759, 13763, 13939, 13950, 13952, 13966, 14042, 14047, 14090, 13741, 13759, 13763, 13881, 13939, 13950, 13952, 13966, 14042, 14047,
14150, 14151, 14152, 14154, 14157, 14166, 14173, 14195, 14237, 14251, 14090, 14150, 14151, 14152, 14154, 14157, 14166, 14173, 14195, 14237,
14252, 14283, 14298, 14303, 14307, 14328, 14331, 14336, 14337, 14347, 14251, 14252, 14283, 14298, 14303, 14307, 14328, 14331, 14336, 14337,
14349, 14368, 14376, 14417, 14459, 14476, 14477, 14501, 14505, 14510, 14347, 14349, 14368, 14376, 14417, 14459, 14476, 14477, 14501, 14505,
14516, 14518, 14519, 14530, 14532, 14538, 14543, 14544, 14545, 14557, 14510, 14516, 14518, 14519, 14530, 14532, 14538, 14543, 14544, 14545,
14562, 14568, 14576, 14579, 14583, 14587, 14595, 14602, 14610, 14621, 14557, 14562, 14568, 14576, 14579, 14583, 14587, 14595, 14602, 14610,
14638, 14645, 14648, 14652, 14660, 14661, 14669, 14672, 14683, 14694, 14621, 14638, 14645, 14648, 14652, 14660, 14661, 14669, 14672, 14683,
14716, 14719, 14743, 14767, 14783, 14784, 14785, 14793, 14796, 14797, 14694, 14716, 14719, 14743, 14767, 14783, 14784, 14785, 14793, 14796,
14801, 14805, 14807, 14809, 14811, 14815, 14821, 14822, 14824, 14828, 14797, 14801, 14805, 14807, 14809, 14811, 14815, 14821, 14822, 14824,
14831, 14835, 14838, 14856, 14863, 14865, 14866, 14868, 14869, 14871. 14828, 14831, 14835, 14838, 14856, 14863, 14865, 14866, 14868, 14869,
14871.
* Port to ARM AArch64 contributed by Linaro. * Port to ARM AArch64 contributed by Linaro.

View File

@ -8431,7 +8431,6 @@ pow_test (void)
#endif #endif
TEST_ff_f (pow, -min_value, max_value, plus_zero, UNDERFLOW_EXCEPTION); TEST_ff_f (pow, -min_value, max_value, plus_zero, UNDERFLOW_EXCEPTION);
#ifndef TEST_LDOUBLE /* Bug 13881. */
TEST_ff_f (pow, 0x0.ffffffp0, 10, 0.999999403953712118183885036774764444747L); TEST_ff_f (pow, 0x0.ffffffp0, 10, 0.999999403953712118183885036774764444747L);
TEST_ff_f (pow, 0x0.ffffffp0, 100, 0.999994039553108359406305079606228341585L); TEST_ff_f (pow, 0x0.ffffffp0, 100, 0.999994039553108359406305079606228341585L);
TEST_ff_f (pow, 0x0.ffffffp0, 1000, 0.9999403971297699052276650144650733772182L); TEST_ff_f (pow, 0x0.ffffffp0, 1000, 0.9999403971297699052276650144650733772182L);
@ -8447,16 +8446,44 @@ pow_test (void)
TEST_ff_f (pow, 0x1.000002p0, 0x1p24, 7.3890552180866447284268641248075832310141L); TEST_ff_f (pow, 0x1.000002p0, 0x1p24, 7.3890552180866447284268641248075832310141L);
TEST_ff_f (pow, 0x1.000002p0, 0x1.234566p29, 4.2107033006507495188536371520637025716256e+31L); TEST_ff_f (pow, 0x1.000002p0, 0x1.234566p29, 4.2107033006507495188536371520637025716256e+31L);
TEST_ff_f (pow, 0x1.000002p0, -0x1.234566p29, 2.3749001736727769098946062325205705312166e-32L); TEST_ff_f (pow, 0x1.000002p0, -0x1.234566p29, 2.3749001736727769098946062325205705312166e-32L);
#endif
/* Bug 13881: powl inaccurate so these tests disabled for long double. */ #if !defined TEST_FLOAT
#if !defined TEST_FLOAT && !defined TEST_LDOUBLE
TEST_ff_f (pow, 0x0.fffffffffffff8p0L, 0x1.23456789abcdfp62L, 1.0118762747827252817436395051178295138220e-253L); TEST_ff_f (pow, 0x0.fffffffffffff8p0L, 0x1.23456789abcdfp62L, 1.0118762747827252817436395051178295138220e-253L);
TEST_ff_f (pow, 0x0.fffffffffffff8p0L, -0x1.23456789abcdfp62L, 9.8826311568054561811190162420900667121992e+252L); TEST_ff_f (pow, 0x0.fffffffffffff8p0L, -0x1.23456789abcdfp62L, 9.8826311568054561811190162420900667121992e+252L);
TEST_ff_f (pow, 0x1.0000000000001p0L, 0x1.23456789abcdfp61L, 9.8826311568044974397135026217687399395481e+252L); TEST_ff_f (pow, 0x1.0000000000001p0L, 0x1.23456789abcdfp61L, 9.8826311568044974397135026217687399395481e+252L);
TEST_ff_f (pow, 0x1.0000000000001p0L, -0x1.23456789abcdfp61L, 1.0118762747828234466621210689458255908670e-253L); TEST_ff_f (pow, 0x1.0000000000001p0L, -0x1.23456789abcdfp61L, 1.0118762747828234466621210689458255908670e-253L);
#endif #endif
#if defined TEST_LDOUBLE && LDBL_MANT_DIG >= 64 && LDBL_MAX_EXP >= 16384
TEST_ff_f (pow, 0x0.ffffffffffffffffp0L, 0x1.23456789abcdef0ep77L, 1.2079212226420368189981778807634890018840e-4048L);
TEST_ff_f (pow, 0x0.ffffffffffffffffp0L, -0x1.23456789abcdef0ep77L, 8.2786855736563746280496724205839522148001e+4047L);
TEST_ff_f (pow, 0x1.0000000000000002p0L, 0x1.23456789abcdef0ep76L, 8.2786855736563683535324500168799315131570e+4047L);
TEST_ff_f (pow, 0x1.0000000000000002p0L, -0x1.23456789abcdef0ep76L, 1.2079212226420377344964713407722652880280e-4048L);
#endif
#if defined TEST_LDOUBLE && LDBL_MANT_DIG >= 113
TEST_ff_f (pow, 0x0.ffffffffffffffffffffffffffff8p0L, 0x1.23456789abcdef0123456789abcdp126L, 1.2079212226420440237790185999151440179953e-4048L);
TEST_ff_f (pow, 0x0.ffffffffffffffffffffffffffff8p0L, -0x1.23456789abcdef0123456789abcdp126L, 8.2786855736563252489063231915535105363602e+4047L);
TEST_ff_f (pow, 0x1.0000000000000000000000000001p0L, 0x1.23456789abcdef0123456789abcdp125L, 8.2786855736563252489063231915423647547782e+4047L);
TEST_ff_f (pow, 0x1.0000000000000000000000000001p0L, -0x1.23456789abcdef0123456789abcdp125L, 1.2079212226420440237790185999167702696503e-4048L);
#endif
#if defined TEST_LDOUBLE && LDBL_MAX_EXP >= 16384
TEST_ff_f (pow, 1e4932L, 0.75L, 1e3699L);
TEST_ff_f (pow, 1e4928L, 0.75L, 1e3696L);
TEST_ff_f (pow, 1e4924L, 0.75L, 1e3693L);
TEST_ff_f (pow, 1e4920L, 0.75L, 1e3690L);
TEST_ff_f (pow, 10.0L, 4932.0L, 1e4932L);
TEST_ff_f (pow, 10.0L, 4931.0L, 1e4931L);
TEST_ff_f (pow, 10.0L, 4930.0L, 1e4930L);
TEST_ff_f (pow, 10.0L, 4929.0L, 1e4929L);
TEST_ff_f (pow, 10.0L, -4931.0L, 1e-4931L);
TEST_ff_f (pow, 10.0L, -4930.0L, 1e-4930L);
TEST_ff_f (pow, 10.0L, -4929.0L, 1e-4929L);
TEST_ff_f (pow, 1e27L, 182.0L, 1e4914L);
TEST_ff_f (pow, 1e27L, -182.0L, 1e-4914L);
#endif
TEST_ff_f (pow, min_subnorm_value, min_subnorm_value, 1.0L); TEST_ff_f (pow, min_subnorm_value, min_subnorm_value, 1.0L);
TEST_ff_f (pow, min_subnorm_value, -min_subnorm_value, 1.0L); TEST_ff_f (pow, min_subnorm_value, -min_subnorm_value, 1.0L);
TEST_ff_f (pow, max_value, min_subnorm_value, 1.0L); TEST_ff_f (pow, max_value, min_subnorm_value, 1.0L);

View File

@ -26,9 +26,9 @@
.type one,@object .type one,@object
one: .double 1.0 one: .double 1.0
ASM_SIZE_DIRECTIVE(one) ASM_SIZE_DIRECTIVE(one)
.type limit,@object .type p3,@object
limit: .double 0.29 p3: .byte 0, 0, 0, 0, 0, 0, 0x20, 0x40
ASM_SIZE_DIRECTIVE(limit) ASM_SIZE_DIRECTIVE(p3)
.type p63,@object .type p63,@object
p63: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43 p63: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43
ASM_SIZE_DIRECTIVE(p63) ASM_SIZE_DIRECTIVE(p63)
@ -141,7 +141,15 @@ ENTRY(__ieee754_powl)
fchs // -0x1p-79 : x fchs // -0x1p-79 : x
jmp 3f jmp 3f
9: /* OK, we have an integer value for y. */ 9: /* OK, we have an integer value for y. Unless very small
(we use < 8), use the algorithm for real exponent to avoid
accumulation of errors. */
fld %st // y : y : x
fabs // |y| : y : x
fcompl MO(p3) // y : x
fnstsw
sahf
jnc 2f
popl %eax popl %eax
cfi_adjust_cfa_offset (-4) cfi_adjust_cfa_offset (-4)
popl %edx popl %edx
@ -182,7 +190,7 @@ ENTRY(__ieee754_powl)
cfi_adjust_cfa_offset (8) cfi_adjust_cfa_offset (8)
.align ALIGNARG(4) .align ALIGNARG(4)
2: // y is a large integer (absolute value at least 1L<<63), but 2: // y is a large integer (absolute value at least 8), but
// may be odd unless at least 1L<<64. So it may be necessary // may be odd unless at least 1L<<64. So it may be necessary
// to adjust the sign of a negative result afterwards. // to adjust the sign of a negative result afterwards.
fxch // x : y fxch // x : y
@ -205,34 +213,21 @@ ENTRY(__ieee754_powl)
fchs // -(1L<<78) : |x| fchs // -(1L<<78) : |x|
.align ALIGNARG(4) .align ALIGNARG(4)
3: /* y is a real number. */ 3: /* y is a real number. */
fxch // x : y subl $28, %esp
fldl MO(one) // 1.0 : x : y cfi_adjust_cfa_offset (28)
fldl MO(limit) // 0.29 : 1.0 : x : y fstpt 12(%esp) // x
fld %st(2) // x : 0.29 : 1.0 : x : y fstpt (%esp) // <empty>
fsub %st(2) // x-1 : 0.29 : 1.0 : x : y mov %edx, 24(%esp)
fabs // |x-1| : 0.29 : 1.0 : x : y call HIDDEN_JUMPTARGET (__powl_helper) // <result>
fucompp // 1.0 : x : y mov 24(%esp), %edx
fnstsw addl $28, %esp
fxch // x : 1.0 : y cfi_adjust_cfa_offset (-28)
sahf
ja 7f
fsub %st(1) // x-1 : 1.0 : y
fyl2xp1 // log2(x) : y
jmp 8f
7: fyl2x // log2(x) : y
8: fmul %st(1) // y*log2(x) : y
fst %st(1) // y*log2(x) : y*log2(x)
frndint // int(y*log2(x)) : y*log2(x)
fsubr %st, %st(1) // int(y*log2(x)) : fract(y*log2(x))
fxch // fract(y*log2(x)) : int(y*log2(x))
f2xm1 // 2^fract(y*log2(x))-1 : int(y*log2(x))
faddl MO(one) // 2^fract(y*log2(x)) : int(y*log2(x))
fscale // 2^fract(y*log2(x))*2^int(y*log2(x)) : int(y*log2(x))
fstp %st(1) // 2^fract(y*log2(x))*2^int(y*log2(x))
testb $2, %dh testb $2, %dh
jz 292f jz 292f
// x is negative. If y is an odd integer, negate the result. // x is negative. If y is an odd integer, negate the result.
#ifdef PIC
LOAD_PIC_REG (cx)
#endif
fldt 24(%esp) // y : abs(result) fldt 24(%esp) // y : abs(result)
fld %st // y : y : abs(result) fld %st // y : y : abs(result)
fabs // |y| : y : abs(result) fabs // |y| : y : abs(result)

View File

@ -2480,6 +2480,11 @@ ifloat: 1
ildouble: 1 ildouble: 1
ldouble: 1 ldouble: 1
# pow
Test "pow (0x0.ffffffp0, -0x1p24) == 2.7182819094701610539628664526874952929416":
ildouble: 1
ldouble: 1
# pow_downward # pow_downward
Test "pow_downward (1.0625, 1.125) == 1.070582293028761362162622578677070098674": Test "pow_downward (1.0625, 1.125) == 1.070582293028761362162622578677070098674":
double: 1 double: 1
@ -3782,6 +3787,10 @@ ifloat: 1
ildouble: 1 ildouble: 1
ldouble: 1 ldouble: 1
Function: "pow":
ildouble: 1
ldouble: 1
Function: "pow_downward": Function: "pow_downward":
double: 1 double: 1
float: 1 float: 1

3
sysdeps/x86/fpu/Makefile Normal file
View File

@ -0,0 +1,3 @@
ifeq ($(subdir),math)
libm-support += powl_helper
endif

View File

@ -0,0 +1,211 @@
/* Implement powl for x86 using extra-precision log.
Copyright (C) 2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#include <math.h>
#include <math_private.h>
/* High parts and low parts of -log (k/16), for integer k from 12 to
24. */
static const long double powl_log_table[] =
{
0x4.9a58844d36e49e1p-4L, -0x1.0522624fd558f574p-68L,
0x3.527da7915b3c6de4p-4L, 0x1.7d4ef4b901b99b9ep-68L,
0x2.22f1d044fc8f7bc8p-4L, -0x1.8e97c071a42fc388p-68L,
0x1.08598b59e3a0688ap-4L, 0x3.fd9bf503372c12fcp-72L,
-0x0p+0L, 0x0p+0L,
-0xf.85186008b15330cp-8L, 0x1.9b47488a6687672cp-72L,
-0x1.e27076e2af2e5e9ep-4L, -0xa.87ffe1fe9e155dcp-72L,
-0x2.bfe60e14f27a791p-4L, 0x1.83bebf1bdb88a032p-68L,
-0x3.91fef8f353443584p-4L, -0xb.b03de5ff734495cp-72L,
-0x4.59d72aeae98380e8p-4L, 0xc.e0aa3be4747dc1p-72L,
-0x5.1862f08717b09f4p-4L, -0x2.decdeccf1cd10578p-68L,
-0x5.ce75fdaef401a738p-4L, -0x9.314feb4fbde5aaep-72L,
-0x6.7cc8fb2fe612fcbp-4L, 0x2.5ca2642feb779f98p-68L,
};
/* High 32 bits of log2 (e), and remainder rounded to 64 bits. */
static const long double log2e_hi = 0x1.71547652p+0L;
static const long double log2e_lo = 0xb.82fe1777d0ffda1p-36L;
/* Given a number with high part HI and low part LO, add the number X
to it and store the result in *RHI and *RLO. It is given that
either |X| < |0.7 * HI|, or HI == LO == 0, and that the values are
small enough that no overflow occurs. The result does not need to
be exact to 128 bits; 78-bit accuracy of the final accumulated
result suffices. */
static inline void
acc_split (long double *rhi, long double *rlo, long double hi, long double lo,
long double x)
{
long double thi = hi + x;
long double tlo = (hi - thi) + x + lo;
*rhi = thi + tlo;
*rlo = (thi - *rhi) + tlo;
}
extern long double __powl_helper (long double x, long double y);
libm_hidden_proto (__powl_helper)
/* Given X a value that is finite and nonzero, or a NaN, and only
negative if Y is not an integer, and Y a finite nonzero value with
0x1p-79 <= |Y| <= 0x1p78, compute X to the power Y. */
long double
__powl_helper (long double x, long double y)
{
if (isnan (x) || x < 0)
return __ieee754_expl (y * __ieee754_logl (x));
/* We need to compute Y * log2 (X) to at least 64 bits after the
point for normal results (that is, to at least 78 bits
precision). */
int x_int_exponent;
long double x_frac;
x_frac = __frexpl (x, &x_int_exponent);
if (x_frac <= 0x0.aaaaaaaaaaaaaaaap0L) /* 2.0L / 3.0L, rounded down */
{
x_frac *= 2.0;
x_int_exponent--;
}
long double log_x_frac_hi, log_x_frac_lo;
/* Determine an initial approximation to log (X_FRAC) using
POWL_LOG_TABLE, and multiply by a value K/16 to reduce to an
interval (24/25, 26/25). */
int k = (int) ((16.0L / x_frac) + 0.5L);
log_x_frac_hi = powl_log_table[2 * k - 24];
log_x_frac_lo = powl_log_table[2 * k - 23];
long double x_frac_low;
if (k == 16)
x_frac_low = 0.0L;
else
{
/* Mask off low 5 bits of X_FRAC so the multiplication by K/16
is exact. These bits are small enough that they can be
corrected for by adding log2 (e) * X_FRAC_LOW to the final
result. */
int32_t se;
u_int32_t i0, i1;
GET_LDOUBLE_WORDS (se, i0, i1, x_frac);
x_frac_low = x_frac;
i1 &= 0xffffffe0;
SET_LDOUBLE_WORDS (x_frac, se, i0, i1);
x_frac_low -= x_frac;
x_frac_low /= x_frac;
x_frac *= k / 16.0L;
}
/* Now compute log (X_FRAC) for X_FRAC in (24/25, 26/25). Separate
W = X_FRAC - 1 into high 16 bits and remaining bits, so that
multiplications for low-order power series terms are exact. The
remaining bits are small enough that adding a 64-bit value of
log2 (1 + W_LO / (1 + W_HI)) will be a sufficient correction for
them. */
long double w = x_frac - 1;
long double w_hi, w_lo;
int32_t se;
u_int32_t i0, i1;
GET_LDOUBLE_WORDS (se, i0, i1, w);
i0 &= 0xffff0000;
i1 = 0;
SET_LDOUBLE_WORDS (w_hi, se, i0, i1);
w_lo = w - w_hi;
long double wp = w_hi;
acc_split (&log_x_frac_hi, &log_x_frac_lo, log_x_frac_hi, log_x_frac_lo, wp);
wp *= -w_hi;
acc_split (&log_x_frac_hi, &log_x_frac_lo, log_x_frac_hi, log_x_frac_lo,
wp / 2.0L);
wp *= -w_hi;
acc_split (&log_x_frac_hi, &log_x_frac_lo, log_x_frac_hi, log_x_frac_lo,
wp * 0x0.5555p0L); /* -W_HI**3 / 3, high part. */
acc_split (&log_x_frac_hi, &log_x_frac_lo, log_x_frac_hi, log_x_frac_lo,
wp * 0x0.5555555555555555p-16L); /* -W_HI**3 / 3, low part. */
wp *= -w_hi;
acc_split (&log_x_frac_hi, &log_x_frac_lo, log_x_frac_hi, log_x_frac_lo,
wp / 4.0L);
/* Subsequent terms are small enough that they only need be computed
to 64 bits. */
for (int i = 5; i <= 17; i++)
{
wp *= -w_hi;
acc_split (&log_x_frac_hi, &log_x_frac_lo, log_x_frac_hi, log_x_frac_lo,
wp / i);
}
/* Convert LOG_X_FRAC_HI + LOG_X_FRAC_LO to a base-2 logarithm. */
long double log2_x_frac_hi, log2_x_frac_lo;
long double log_x_frac_hi32, log_x_frac_lo64;
GET_LDOUBLE_WORDS (se, i0, i1, log_x_frac_hi);
i1 = 0;
SET_LDOUBLE_WORDS (log_x_frac_hi32, se, i0, i1);
log_x_frac_lo64 = (log_x_frac_hi - log_x_frac_hi32) + log_x_frac_lo;
long double log2_x_frac_hi1 = log_x_frac_hi32 * log2e_hi;
long double log2_x_frac_lo1
= log_x_frac_lo64 * log2e_hi + log_x_frac_hi * log2e_lo;
log2_x_frac_hi = log2_x_frac_hi1 + log2_x_frac_lo1;
log2_x_frac_lo = (log2_x_frac_hi1 - log2_x_frac_hi) + log2_x_frac_lo1;
/* Correct for the masking off of W_LO. */
long double log2_1p_w_lo;
asm ("fyl2xp1"
: "=t" (log2_1p_w_lo)
: "0" (w_lo / (1.0L + w_hi)), "u" (1.0L)
: "st(1)");
acc_split (&log2_x_frac_hi, &log2_x_frac_lo, log2_x_frac_hi, log2_x_frac_lo,
log2_1p_w_lo);
/* Correct for the masking off of X_FRAC_LOW. */
acc_split (&log2_x_frac_hi, &log2_x_frac_lo, log2_x_frac_hi, log2_x_frac_lo,
x_frac_low * M_LOG2El);
/* Add the integer and fractional parts of the base-2 logarithm. */
long double log2_x_hi, log2_x_lo;
log2_x_hi = x_int_exponent + log2_x_frac_hi;
log2_x_lo = ((x_int_exponent - log2_x_hi) + log2_x_frac_hi) + log2_x_frac_lo;
/* Compute the base-2 logarithm of the result. */
long double log2_res_hi, log2_res_lo;
long double log2_x_hi32, log2_x_lo64;
GET_LDOUBLE_WORDS (se, i0, i1, log2_x_hi);
i1 = 0;
SET_LDOUBLE_WORDS (log2_x_hi32, se, i0, i1);
log2_x_lo64 = (log2_x_hi - log2_x_hi32) + log2_x_lo;
long double y_hi32, y_lo32;
GET_LDOUBLE_WORDS (se, i0, i1, y);
i1 = 0;
SET_LDOUBLE_WORDS (y_hi32, se, i0, i1);
y_lo32 = y - y_hi32;
log2_res_hi = log2_x_hi32 * y_hi32;
log2_res_lo = log2_x_hi32 * y_lo32 + log2_x_lo64 * y;
/* Split the base-2 logarithm of the result into integer and
fractional parts. */
long double log2_res_int = __roundl (log2_res_hi);
long double log2_res_frac = log2_res_hi - log2_res_int + log2_res_lo;
/* Compute the final result. */
long double res;
asm ("f2xm1" : "=t" (res) : "0" (log2_res_frac));
res += 1.0L;
asm ("fscale" : "=t" (res) : "0" (res), "u" (log2_res_int));
return res;
}
libm_hidden_def (__powl_helper)

View File

@ -26,9 +26,9 @@
.type one,@object .type one,@object
one: .double 1.0 one: .double 1.0
ASM_SIZE_DIRECTIVE(one) ASM_SIZE_DIRECTIVE(one)
.type limit,@object .type p3,@object
limit: .double 0.29 p3: .byte 0, 0, 0, 0, 0, 0, 0x20, 0x40
ASM_SIZE_DIRECTIVE(limit) ASM_SIZE_DIRECTIVE(p3)
.type p63,@object .type p63,@object
p63: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43 p63: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43
ASM_SIZE_DIRECTIVE(p63) ASM_SIZE_DIRECTIVE(p63)
@ -131,7 +131,15 @@ ENTRY(__ieee754_powl)
fchs // -0x1p-79 : x fchs // -0x1p-79 : x
jmp 3f jmp 3f
9: /* OK, we have an integer value for y. */ 9: /* OK, we have an integer value for y. Unless very small
(we use < 8), use the algorithm for real exponent to avoid
accumulation of errors. */
fldl MO(p3) // 8 : y : x
fld %st(1) // y : 8 : y : x
fabs // |y| : 8 : y : x
fcomip %st(1), %st // 8 : y : x
fstp %st(0) // y : x
jnc 2f
mov -8(%rsp),%eax mov -8(%rsp),%eax
mov -4(%rsp),%edx mov -4(%rsp),%edx
orl $0, %edx orl $0, %edx
@ -167,7 +175,7 @@ ENTRY(__ieee754_powl)
ret ret
.align ALIGNARG(4) .align ALIGNARG(4)
2: // y is a large integer (absolute value at least 1L<<63), but 2: // y is a large integer (absolute value at least 8), but
// may be odd unless at least 1L<<64. So it may be necessary // may be odd unless at least 1L<<64. So it may be necessary
// to adjust the sign of a negative result afterwards. // to adjust the sign of a negative result afterwards.
fxch // x : y fxch // x : y
@ -190,31 +198,15 @@ ENTRY(__ieee754_powl)
fchs // -(1L<<78) : |x| fchs // -(1L<<78) : |x|
.align ALIGNARG(4) .align ALIGNARG(4)
3: /* y is a real number. */ 3: /* y is a real number. */
fxch // x : y subq $40, %rsp
fldl MO(one) // 1.0 : x : y cfi_adjust_cfa_offset (40)
fldl MO(limit) // 0.29 : 1.0 : x : y fstpt 16(%rsp) // x
fld %st(2) // x : 0.29 : 1.0 : x : y fstpt (%rsp) // <empty>
fsub %st(2) // x-1 : 0.29 : 1.0 : x : y mov %edx, 32(%rsp)
fabs // |x-1| : 0.29 : 1.0 : x : y call HIDDEN_JUMPTARGET (__powl_helper) // <result>
fucompp // 1.0 : x : y mov 32(%rsp), %edx
fnstsw addq $40, %rsp
fxch // x : 1.0 : y cfi_adjust_cfa_offset (-40)
test $0x4500,%eax
jz 7f
fsub %st(1) // x-1 : 1.0 : y
fyl2xp1 // log2(x) : y
jmp 8f
7: fyl2x // log2(x) : y
8: fmul %st(1) // y*log2(x) : y
fst %st(1) // y*log2(x) : y*log2(x)
frndint // int(y*log2(x)) : y*log2(x)
fsubr %st, %st(1) // int(y*log2(x)) : fract(y*log2(x))
fxch // fract(y*log2(x)) : int(y*log2(x))
f2xm1 // 2^fract(y*log2(x))-1 : int(y*log2(x))
faddl MO(one) // 2^fract(y*log2(x)) : int(y*log2(x))
fscale // 2^fract(y*log2(x))*2^int(y*log2(x)) : int(y*log2(x))
fstp %st(1) // 2^fract(y*log2(x))*2^int(y*log2(x))
testb $2, %dh testb $2, %dh
jz 292f jz 292f
// x is negative. If y is an odd integer, negate the result. // x is negative. If y is an odd integer, negate the result.

View File

@ -2398,6 +2398,8 @@ ifloat: 1
Test "pow (0x0.ffffffp0, -0x1p24) == 2.7182819094701610539628664526874952929416": Test "pow (0x0.ffffffp0, -0x1p24) == 2.7182819094701610539628664526874952929416":
float: 1 float: 1
ifloat: 1 ifloat: 1
ildouble: 1
ldouble: 1
Test "pow (0x0.ffffffp0, 0x1p24) == 0.3678794302077803437135155590023422899744": Test "pow (0x0.ffffffp0, 0x1p24) == 0.3678794302077803437135155590023422899744":
float: 1 float: 1
ifloat: 1 ifloat: 1
@ -3575,6 +3577,8 @@ ifloat: 1
Function: "pow": Function: "pow":
float: 1 float: 1
ifloat: 1 ifloat: 1
ildouble: 1
ldouble: 1
Function: "pow_downward": Function: "pow_downward":
float: 1 float: 1