Handle surrogate pairs in c16rtomb (bug 23794, DR#488, C2X).
The c16rtomb implementation has: // XXX The ISO C 11 spec I have does not say anything about handling // XXX surrogates in this interface. The DR#488 resolution, as applied to C2X, requires surrogate pairs to be handled here (so the first call returns 0 and stores the high surrogate in the mbstate_t, while the second call combines the surrogates, produces a multibyte character and returns the number of bytes written). This patch implements that. (mbrtoc16 already handled producing surrogates as output.) Tested for x86_64. [BZ #23794] * wcsmbs/c16rtomb.c (c16rtomb): Save first character of surrogate pair and return 0 in that case, and use saved character to interpret following character. * wcsmbs/tst-c16-surrogate.c: New file. * wcsmbs/Makefile (tests): Add tst-c16-surrogate.c. [$(run-built-tests) = yes] ($(objpfx)tst-c16-surrogate.out): Depend on $(gen-locales)
This commit is contained in:
parent
f997b4be18
commit
d0a7415979
11
ChangeLog
11
ChangeLog
@ -1,3 +1,14 @@
|
||||
2018-10-19 Joseph Myers <joseph@codesourcery.com>
|
||||
|
||||
[BZ #23794]
|
||||
* wcsmbs/c16rtomb.c (c16rtomb): Save first character of surrogate
|
||||
pair and return 0 in that case, and use saved character to
|
||||
interpret following character.
|
||||
* wcsmbs/tst-c16-surrogate.c: New file.
|
||||
* wcsmbs/Makefile (tests): Add tst-c16-surrogate.c.
|
||||
[$(run-built-tests) = yes] ($(objpfx)tst-c16-surrogate.out):
|
||||
Depend on $(gen-locales)
|
||||
|
||||
2018-10-19 Ilya Yu. Malakhov <malakhov@mcst.ru>
|
||||
|
||||
[BZ #23562]
|
||||
|
@ -51,7 +51,8 @@ tests := tst-wcstof wcsmbs-tst1 tst-wcsnlen tst-btowc tst-mbrtowc \
|
||||
tst-wcrtomb tst-wcpncpy tst-mbsrtowcs tst-wchar-h tst-mbrtowc2 \
|
||||
tst-c16c32-1 wcsatcliff tst-wcstol-locale tst-wcstod-nan-locale \
|
||||
tst-wcstod-round test-char-types tst-fgetwc-after-eof \
|
||||
tst-wcstod-nan-sign $(addprefix test-,$(strop-tests))
|
||||
tst-wcstod-nan-sign tst-c16-surrogate \
|
||||
$(addprefix test-,$(strop-tests))
|
||||
|
||||
include ../Rules
|
||||
|
||||
@ -68,6 +69,7 @@ $(objpfx)tst-wcrtomb.out: $(gen-locales)
|
||||
$(objpfx)wcsmbs-tst1.out: $(gen-locales)
|
||||
$(objpfx)tst-wcstol-locale.out: $(gen-locales)
|
||||
$(objpfx)tst-wcstod-nan-locale.out: $(gen-locales)
|
||||
$(objpfx)tst-c16-surrogate.out: $(gen-locales)
|
||||
endif
|
||||
|
||||
$(objpfx)tst-wcstod-round: $(libm)
|
||||
|
@ -26,7 +26,42 @@ static mbstate_t state;
|
||||
size_t
|
||||
c16rtomb (char *s, char16_t c16, mbstate_t *ps)
|
||||
{
|
||||
// XXX The ISO C 11 spec I have does not say anything about handling
|
||||
// XXX surrogates in this interface.
|
||||
return wcrtomb (s, c16, ps ?: &state);
|
||||
wchar_t wc = c16;
|
||||
|
||||
if (ps == NULL)
|
||||
ps = &state;
|
||||
|
||||
if (s == NULL)
|
||||
{
|
||||
/* Reset any state relating to surrogate pairs. */
|
||||
ps->__count &= 0x7fffffff;
|
||||
ps->__value.__wch = 0;
|
||||
wc = 0;
|
||||
}
|
||||
|
||||
if (ps->__count & 0x80000000)
|
||||
{
|
||||
/* The previous call passed in the first surrogate of a
|
||||
surrogate pair. */
|
||||
ps->__count &= 0x7fffffff;
|
||||
if (wc >= 0xdc00 && wc < 0xe000)
|
||||
wc = (0x10000
|
||||
+ ((ps->__value.__wch & 0x3ff) << 10)
|
||||
+ (wc & 0x3ff));
|
||||
else
|
||||
/* This is not a low surrogate; ensure an EILSEQ error by
|
||||
trying to decode the high surrogate as a wide character on
|
||||
its own. */
|
||||
wc = ps->__value.__wch;
|
||||
ps->__value.__wch = 0;
|
||||
}
|
||||
else if (wc >= 0xd800 && wc < 0xdc00)
|
||||
{
|
||||
/* The high part of a surrogate pair. */
|
||||
ps->__count |= 0x80000000;
|
||||
ps->__value.__wch = wc;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return wcrtomb (s, wc, ps);
|
||||
}
|
||||
|
89
wcsmbs/tst-c16-surrogate.c
Normal file
89
wcsmbs/tst-c16-surrogate.c
Normal file
@ -0,0 +1,89 @@
|
||||
/* Test c16rtomb handling of surrogate pairs (DR#488, bug 23794).
|
||||
Copyright (C) 2018 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <errno.h>
|
||||
#include <locale.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <uchar.h>
|
||||
#include <wchar.h>
|
||||
#include <array_length.h>
|
||||
#include <support/check.h>
|
||||
|
||||
static int
|
||||
do_test (void)
|
||||
{
|
||||
TEST_VERIFY_EXIT (setlocale (LC_ALL, "de_DE.UTF-8") != NULL);
|
||||
/* Test conversions of surrogate pairs. */
|
||||
for (char32_t c = 0x10000; c <= 0x10ffff; c += 0x123)
|
||||
{
|
||||
char32_t c_pos = c - 0x10000;
|
||||
char16_t c_hi = (c_pos >> 10) + 0xd800;
|
||||
char16_t c_lo = (c_pos & 0x3ff) + 0xdc00;
|
||||
printf ("testing U+0x%08x (0x%x 0x%x)\n",
|
||||
(unsigned int) c, (unsigned int) c_hi, (unsigned int) c_lo);
|
||||
char buf[16] = { 0 };
|
||||
size_t ret_hi = c16rtomb (buf, c_hi, NULL);
|
||||
TEST_COMPARE (ret_hi, 0);
|
||||
size_t ret_lo = c16rtomb (buf, c_lo, NULL);
|
||||
TEST_COMPARE (ret_lo, 4);
|
||||
wchar_t wc = 0;
|
||||
size_t ret_wc = mbrtowc (&wc, buf, 4, NULL);
|
||||
TEST_COMPARE (ret_wc, 4);
|
||||
TEST_COMPARE (wc, (wchar_t) c);
|
||||
}
|
||||
/* Test errors for invalid conversions. */
|
||||
static const char16_t err_cases[][2] =
|
||||
{
|
||||
/* High surrogate followed by non-surrogate. */
|
||||
{ 0xd800, 0x1 },
|
||||
/* High surrogate followed by another high surrogate. */
|
||||
{ 0xd800, 0xd800 },
|
||||
/* Low surrogate not following high surrogate. */
|
||||
{ 0xdc00, 0 }
|
||||
};
|
||||
for (size_t i = 0; i < array_length (err_cases); i++)
|
||||
{
|
||||
char16_t c_hi = err_cases[i][0];
|
||||
char16_t c_lo = err_cases[i][1];
|
||||
printf ("testing error case: 0x%x 0x%x\n", (unsigned int) c_hi,
|
||||
(unsigned int) c_lo);
|
||||
c16rtomb (NULL, 0, NULL);
|
||||
char buf[16] = { 0 };
|
||||
errno = 0;
|
||||
size_t ret_hi = c16rtomb (buf, c_hi, NULL);
|
||||
if (c_lo == 0)
|
||||
{
|
||||
/* Unmatched low surrogate in first place. */
|
||||
TEST_COMPARE (ret_hi, (size_t) -1);
|
||||
TEST_COMPARE (errno, EILSEQ);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* High surrogate; error in second place. */
|
||||
TEST_COMPARE (ret_hi, 0);
|
||||
errno = 0;
|
||||
size_t ret_lo = c16rtomb (buf, c_lo, NULL);
|
||||
TEST_COMPARE (ret_lo, (size_t) -1);
|
||||
TEST_COMPARE (errno, EILSEQ);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include <support/test-driver.c>
|
Loading…
x
Reference in New Issue
Block a user