41e8926aa4
This patch helps some math functions performance by adding the libc_fexxx variant of inline functions to handle both FPU round and exception set/restore and by using them on the libc_fexxx_ctx functions. It is based on already coded fexxx family functions for PPC with fpu. Here is the summary of performance improvements due this patch (measured on a POWER7 machine): Before: cos(): ITERS:9.5895e+07: TOTAL:5116.03Mcy, MAX:77.6cy, MIN:49.792cy, 18744 calls/Mcy exp(): ITERS:2.827e+07: TOTAL:5187.15Mcy, MAX:494.018cy, MIN:38.422cy, 5450.01 calls/Mcy pow(): ITERS:6.1705e+07: TOTAL:5144.26Mcy, MAX:171.95cy, MIN:29.935cy, 11994.9 calls/Mcy sin(): ITERS:8.6898e+07: TOTAL:5117.06Mcy, MAX:83.841cy, MIN:46.582cy, 16982 calls/Mcy tan(): ITERS:2.9473e+07: TOTAL:5115.39Mcy, MAX:191.017cy, MIN:172.352cy, 5761.63 calls/Mcy After: cos(): ITERS:2.05265e+08: TOTAL:5111.37Mcy, MAX:78.754cy, MIN:24.196cy, 40158.5 calls/Mcy exp(): ITERS:3.341e+07: TOTAL:5170.84Mcy, MAX:476.317cy, MIN:15.574cy, 6461.23 calls/Mcy pow(): ITERS:7.6153e+07: TOTAL:5129.1Mcy, MAX:147.5cy, MIN:30.916cy, 14847.2 calls/Mcy sin(): ITERS:1.58816e+08: TOTAL:5115.11Mcy, MAX:1490.39cy, MIN:22.341cy, 31048.4 calls/Mcy tan(): ITERS:3.4964e+07: TOTAL:5114.18Mcy, MAX:177.422cy, MIN:146.115cy, 6836.68 calls/Mcy
156 lines
3.9 KiB
C
156 lines
3.9 KiB
C
/* Round to long int long double floating-point values.
|
|
IBM extended format long double version.
|
|
Copyright (C) 2006-2013 Free Software Foundation, Inc.
|
|
This file is part of the GNU C Library.
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with the GNU C Library; if not, see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
#include <math.h>
|
|
#include <fenv_libc.h>
|
|
#include <math_ldbl_opt.h>
|
|
#include <float.h>
|
|
#include <ieee754.h>
|
|
|
|
|
|
long
|
|
__lrintl (long double x)
|
|
{
|
|
double xh, xl;
|
|
long res, hi, lo;
|
|
int save_round;
|
|
|
|
ldbl_unpack (x, &xh, &xl);
|
|
|
|
/* Limit the range of values handled by the conversion to long.
|
|
We do this because we aren't sure whether that conversion properly
|
|
raises FE_INVALID. */
|
|
if (
|
|
#if __LONG_MAX__ == 2147483647
|
|
__builtin_expect
|
|
((__builtin_fabs (xh) <= (double) __LONG_MAX__ + 2), 1)
|
|
#else
|
|
__builtin_expect
|
|
((__builtin_fabs (xh) <= -(double) (-__LONG_MAX__ - 1)), 1)
|
|
#endif
|
|
#if !defined (FE_INVALID)
|
|
|| 1
|
|
#endif
|
|
)
|
|
{
|
|
save_round = __fegetround ();
|
|
|
|
#if __LONG_MAX__ == 2147483647
|
|
long long llhi = (long long) xh;
|
|
if (llhi != (long) llhi)
|
|
hi = llhi < 0 ? -__LONG_MAX__ - 1 : __LONG_MAX__;
|
|
else
|
|
hi = llhi;
|
|
xh -= hi;
|
|
#else
|
|
if (__builtin_expect ((xh == -(double) (-__LONG_MAX__ - 1)), 0))
|
|
{
|
|
/* When XH is 9223372036854775808.0, converting to long long will
|
|
overflow, resulting in an invalid operation. However, XL might
|
|
be negative and of sufficient magnitude that the overall long
|
|
double is in fact in range. Avoid raising an exception. In any
|
|
case we need to convert this value specially, because
|
|
the converted value is not exactly represented as a double
|
|
thus subtracting HI from XH suffers rounding error. */
|
|
hi = __LONG_MAX__;
|
|
xh = 1.0;
|
|
}
|
|
else
|
|
{
|
|
hi = (long) xh;
|
|
xh -= hi;
|
|
}
|
|
#endif
|
|
ldbl_canonicalize (&xh, &xl);
|
|
|
|
lo = (long) xh;
|
|
|
|
/* Peg at max/min values, assuming that the above conversions do so.
|
|
Strictly speaking, we can return anything for values that overflow,
|
|
but this is more useful. */
|
|
res = hi + lo;
|
|
|
|
/* This is just sign(hi) == sign(lo) && sign(res) != sign(hi). */
|
|
if (__builtin_expect (((~(hi ^ lo) & (res ^ hi)) < 0), 0))
|
|
goto overflow;
|
|
|
|
xh -= lo;
|
|
ldbl_canonicalize (&xh, &xl);
|
|
|
|
hi = res;
|
|
switch (save_round)
|
|
{
|
|
case FE_TONEAREST:
|
|
if (fabs (xh) < 0.5
|
|
|| (fabs (xh) == 0.5
|
|
&& ((xh > 0.0 && xl < 0.0)
|
|
|| (xh < 0.0 && xl > 0.0)
|
|
|| (xl == 0.0 && (res & 1) == 0))))
|
|
return res;
|
|
|
|
if (xh < 0.0)
|
|
res -= 1;
|
|
else
|
|
res += 1;
|
|
break;
|
|
|
|
case FE_TOWARDZERO:
|
|
if (res > 0 && (xh < 0.0 || (xh == 0.0 && xl < 0.0)))
|
|
res -= 1;
|
|
else if (res < 0 && (xh > 0.0 || (xh == 0.0 && xl > 0.0)))
|
|
res += 1;
|
|
return res;
|
|
break;
|
|
|
|
case FE_UPWARD:
|
|
if (xh > 0.0 || (xh == 0.0 && xl > 0.0))
|
|
res += 1;
|
|
break;
|
|
|
|
case FE_DOWNWARD:
|
|
if (xh < 0.0 || (xh == 0.0 && xl < 0.0))
|
|
res -= 1;
|
|
break;
|
|
}
|
|
|
|
if (__builtin_expect (((~(hi ^ (res - hi)) & (res ^ hi)) < 0), 0))
|
|
goto overflow;
|
|
|
|
return res;
|
|
}
|
|
else
|
|
{
|
|
if (xh > 0.0)
|
|
hi = __LONG_MAX__;
|
|
else if (xh < 0.0)
|
|
hi = -__LONG_MAX__ - 1;
|
|
else
|
|
/* Nan */
|
|
hi = 0;
|
|
}
|
|
|
|
overflow:
|
|
#ifdef FE_INVALID
|
|
feraiseexcept (FE_INVALID);
|
|
#endif
|
|
return hi;
|
|
}
|
|
|
|
long_double_symbol (libm, __lrintl, lrintl);
|