Alan Modra 4cf69995e2 Fix for [BZ #15680] IBM long double inaccuracy
http://sourceware.org/ml/libc-alpha/2013-06/msg00919.html

I discovered a number of places where denormals and other corner cases
were being handled wrongly.

- printf_fphex.c: Testing for the low double exponent being zero is
unnecessary.  If the difference in exponents is less than 53 then the
high double exponent must be nearing the low end of its range, and the
low double exponent hit rock bottom.

- ldbl2mpn.c: A denormal (ie. exponent of zero) value is treated as
if the exponent was one, so shift mantissa left by one.  Code handling
normalisation of the low double mantissa lacked a test for shift count
greater than bits in type being shifted, and lacked anything to handle
the case where the difference in exponents is less than 53 as in
printf_fphex.c.

- math_ldbl.h (ldbl_extract_mantissa): Same as above, but worse, with
code testing for exponent > 1 for some reason, probably a typo for >= 1.

- math_ldbl.h (ldbl_insert_mantissa): Round the high double as per
mpn2ldbl.c (hi is odd or explicit mantissas non-zero) so that the
number we return won't change when applying ldbl_canonicalize().
Add missing overflow checks and normalisation of high mantissa.
Correct misleading comment: "The hidden bit of the lo mantissa is
zero" is not always true as can be seen from the code rounding the hi
mantissa.  Also by inspection, lzcount can never be less than zero so
remove that test.  Lastly, masking bitfields to their widths can be
left to the compiler.

- mpn2ldbl.c: The overflow checks here on rounding of high double were
just plain wrong.  Incrementing the exponent must be accompanied by a
shift right of the mantissa to keep the value unchanged.  Above notes
for ldbl_insert_mantissa are also relevant.

	[BZ #15680]
	* sysdeps/ieee754/ldbl-128ibm/e_rem_pio2l.c: Comment fix.
	* sysdeps/ieee754/ldbl-128ibm/printf_fphex.c
	(PRINT_FPHEX_LONG_DOUBLE): Tidy code by moving -53 into ediff
	calculation.  Remove unnecessary test for denormal exponent.
	* sysdeps/ieee754/ldbl-128ibm/ldbl2mpn.c (__mpn_extract_long_double):
	Correct handling of denormals.  Avoid undefined shift behaviour.
	Correct normalisation of low mantissa when low double is denormal.
	* sysdeps/ieee754/ldbl-128ibm/math_ldbl.h
	(ldbl_extract_mantissa): Likewise.  Comment.  Use uint64_t* for hi64.
	(ldbl_insert_mantissa): Make both hi64 and lo64 parms uint64_t.
	Correct normalisation of low mantissa.  Test for overflow of high
	mantissa and normalise.
	(ldbl_nearbyint): Use more readable constant for two52.
	* sysdeps/ieee754/ldbl-128ibm/mpn2ldbl.c
	(__mpn_construct_long_double): Fix test for overflow of high
	mantissa and correct normalisation.  Avoid undefined shift.
2013-10-04 10:30:56 +09:30

187 lines
5.3 KiB
C

/* Copyright (C) 1995-2013 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#include "gmp.h"
#include "gmp-impl.h"
#include "longlong.h"
#include <ieee754.h>
#include <float.h>
#include <math.h>
#include <stdlib.h>
/* Convert a `long double' in IBM extended format to a multi-precision
integer representing the significand scaled up by its number of
bits (106 for long double) and an integral power of two (MPN
frexpl). */
mp_size_t
__mpn_extract_long_double (mp_ptr res_ptr, mp_size_t size,
int *expt, int *is_neg,
long double value)
{
union ibm_extended_long_double u;
unsigned long long hi, lo;
int ediff;
u.ld = value;
*is_neg = u.d[0].ieee.negative;
*expt = (int) u.d[0].ieee.exponent - IEEE754_DOUBLE_BIAS;
lo = ((long long) u.d[1].ieee.mantissa0 << 32) | u.d[1].ieee.mantissa1;
hi = ((long long) u.d[0].ieee.mantissa0 << 32) | u.d[0].ieee.mantissa1;
/* If the lower double is not a denormal or zero then set the hidden
53rd bit. */
if (u.d[1].ieee.exponent != 0)
lo |= 1ULL << 52;
else
lo = lo << 1;
/* The lower double is normalized separately from the upper. We may
need to adjust the lower manitissa to reflect this. */
ediff = u.d[0].ieee.exponent - u.d[1].ieee.exponent - 53;
if (ediff > 0)
{
if (ediff < 64)
lo = lo >> ediff;
else
lo = 0;
}
else if (ediff < 0)
lo = lo << -ediff;
/* The high double may be rounded and the low double reflects the
difference between the long double and the rounded high double
value. This is indicated by a differnce between the signs of the
high and low doubles. */
if (u.d[0].ieee.negative != u.d[1].ieee.negative
&& lo != 0)
{
lo = (1ULL << 53) - lo;
if (hi == 0)
{
/* we have a borrow from the hidden bit, so shift left 1. */
hi = 0x0ffffffffffffeLL | (lo >> 51);
lo = 0x1fffffffffffffLL & (lo << 1);
(*expt)--;
}
else
hi--;
}
#if BITS_PER_MP_LIMB == 32
/* Combine the mantissas to be contiguous. */
res_ptr[0] = lo;
res_ptr[1] = (hi << (53 - 32)) | (lo >> 32);
res_ptr[2] = hi >> 11;
res_ptr[3] = hi >> (32 + 11);
#define N 4
#elif BITS_PER_MP_LIMB == 64
/* Combine the two mantissas to be contiguous. */
res_ptr[0] = (hi << 53) | lo;
res_ptr[1] = hi >> 11;
#define N 2
#else
#error "mp_limb size " BITS_PER_MP_LIMB "not accounted for"
#endif
/* The format does not fill the last limb. There are some zeros. */
#define NUM_LEADING_ZEROS (BITS_PER_MP_LIMB \
- (LDBL_MANT_DIG - ((N - 1) * BITS_PER_MP_LIMB)))
if (u.d[0].ieee.exponent == 0)
{
/* A biased exponent of zero is a special case.
Either it is a zero or it is a denormal number. */
if (res_ptr[0] == 0 && res_ptr[1] == 0
&& res_ptr[N - 2] == 0 && res_ptr[N - 1] == 0) /* Assumes N<=4. */
/* It's zero. */
*expt = 0;
else
{
/* It is a denormal number, meaning it has no implicit leading
one bit, and its exponent is in fact the format minimum. We
use DBL_MIN_EXP instead of LDBL_MIN_EXP below because the
latter describes the properties of both parts together, but
the exponent is computed from the high part only. */
int cnt;
#if N == 2
if (res_ptr[N - 1] != 0)
{
count_leading_zeros (cnt, res_ptr[N - 1]);
cnt -= NUM_LEADING_ZEROS;
res_ptr[N - 1] = res_ptr[N - 1] << cnt
| (res_ptr[0] >> (BITS_PER_MP_LIMB - cnt));
res_ptr[0] <<= cnt;
*expt = DBL_MIN_EXP - 1 - cnt;
}
else
{
count_leading_zeros (cnt, res_ptr[0]);
if (cnt >= NUM_LEADING_ZEROS)
{
res_ptr[N - 1] = res_ptr[0] << (cnt - NUM_LEADING_ZEROS);
res_ptr[0] = 0;
}
else
{
res_ptr[N - 1] = res_ptr[0] >> (NUM_LEADING_ZEROS - cnt);
res_ptr[0] <<= BITS_PER_MP_LIMB - (NUM_LEADING_ZEROS - cnt);
}
*expt = DBL_MIN_EXP - 1
- (BITS_PER_MP_LIMB - NUM_LEADING_ZEROS) - cnt;
}
#else
int j, k, l;
for (j = N - 1; j > 0; j--)
if (res_ptr[j] != 0)
break;
count_leading_zeros (cnt, res_ptr[j]);
cnt -= NUM_LEADING_ZEROS;
l = N - 1 - j;
if (cnt < 0)
{
cnt += BITS_PER_MP_LIMB;
l--;
}
if (!cnt)
for (k = N - 1; k >= l; k--)
res_ptr[k] = res_ptr[k-l];
else
{
for (k = N - 1; k > l; k--)
res_ptr[k] = res_ptr[k-l] << cnt
| res_ptr[k-l-1] >> (BITS_PER_MP_LIMB - cnt);
res_ptr[k--] = res_ptr[0] << cnt;
}
for (; k >= 0; k--)
res_ptr[k] = 0;
*expt = DBL_MIN_EXP - 1 - l * BITS_PER_MP_LIMB - cnt;
#endif
}
}
else
/* Add the implicit leading one bit for a normalized number. */
res_ptr[N - 1] |= (mp_limb_t) 1 << (LDBL_MANT_DIG - 1
- ((N - 1) * BITS_PER_MP_LIMB));
return N;
}