mirror of
https://sourceware.org/git/glibc.git
synced 2025-01-08 18:30:18 +00:00
More char16_t and char32_t support
It works now for UTF-8 locales
This commit is contained in:
parent
c3a8723670
commit
9954432e30
16
ChangeLog
16
ChangeLog
@ -1,5 +1,21 @@
|
||||
2012-01-07 Ulrich Drepper <drepper@gmail.com>
|
||||
|
||||
* iconv/gconv_builtin.h: Use CHAR16 for the char16_t conversions.
|
||||
* iconv/gconv_simple.c: Rename char16_t routines. Add char16_t<->utf8
|
||||
routines.
|
||||
* iconv/gconv_int.h: Adjust prototypes for char16_t routines.
|
||||
* iconv/iconv_prog.c: Recognize CHAR16 as internal name.
|
||||
* wcsmbs/c16rtomb.c: Fix a few problems. Disable all the code and
|
||||
fall back to using wcrtomb.
|
||||
* wcsmbs/mbrtoc16.: Fix implementation to handle real conversions.
|
||||
* wcsmbs/wcsmbsload.c: Make char16 routines optional. Adjust for
|
||||
renaming.
|
||||
* wcsmbs/Makefile (tests): Add tst-c16c32-1:
|
||||
* wcsmbs/tst-c16c32-1.c: New file.
|
||||
|
||||
* wcsmbs/wcrtomb.c: Use MB_LEN_MAX instead of MB_CUR_MAX for sizing
|
||||
local variable.
|
||||
|
||||
* libio/stdio.h: Do not declare gets at all for _GNU_SOURCE.
|
||||
|
||||
* elf/tst-unique3.cc: Add explicit declaration of gets.
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* Builtin transformations.
|
||||
Copyright (C) 1997-1999, 2000-2002, 2006, 2011 Free Software Foundation, Inc.
|
||||
Copyright (C) 1997-1999, 2000-2002, 2006, 2011, 2012 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
|
||||
|
||||
@ -124,22 +124,15 @@ BUILTIN_TRANSFORMATION ("INTERNAL", "UNICODEBIG//", 1,
|
||||
#endif
|
||||
|
||||
|
||||
BUILTIN_TRANSFORMATION ("ANSI_X3.4-1968//", "UTF-16//", 1, "=ascii->UTF-16",
|
||||
__gconv_transform_ascii_utf16, NULL, 2, 2, 1, 1)
|
||||
BUILTIN_TRANSFORMATION ("ANSI_X3.4-1968//", "CHAR16", 1, "=ascii->CHAR16",
|
||||
__gconv_transform_ascii_char16, NULL, 1, 1, 2, 4)
|
||||
|
||||
BUILTIN_TRANSFORMATION ("UTF-16//", "ANSI_X3.4-1968//", 1, "=UTF-16->ascii",
|
||||
__gconv_transform_utf16_ascii, NULL, 2, 2, 1, 1)
|
||||
BUILTIN_TRANSFORMATION ("CHAR16", "ANSI_X3.4-1968//", 1, "=CHAR16->ascii",
|
||||
__gconv_transform_char16_ascii, NULL, 2, 4, 1, 1)
|
||||
|
||||
#if BYTE_ORDER == BIG_ENDIAN
|
||||
BUILTIN_TRANSFORMATION ("ANSI_X3.4-1968//", "UTF-16BE//", 1, "=ascii->UTF-16BE",
|
||||
__gconv_transform_ascii_utf16, NULL, 2, 2, 1, 1)
|
||||
|
||||
BUILTIN_TRANSFORMATION ("UTF-16BE//", "ANSI_X3.4-1968//", 1, "=UTF-16BE->ascii",
|
||||
__gconv_transform_utf16_ascii, NULL, 2, 2, 1, 1)
|
||||
#else
|
||||
BUILTIN_TRANSFORMATION ("ANSI_X3.4-1968//", "UTF-16LE//", 1, "=ascii->UTF-16LE",
|
||||
__gconv_transform_ascii_utf16, NULL, 2, 2, 1, 1)
|
||||
BUILTIN_TRANSFORMATION ("ISO-10646/UTF8/", "CHAR16", 1, "=utf8->CHAR16",
|
||||
__gconv_transform_utf8_char16, NULL, 1, 6, 2, 4)
|
||||
|
||||
BUILTIN_TRANSFORMATION ("UTF-16LE//", "ANSI_X3.4-1968//", 1, "=UTF-16LE->ascii",
|
||||
__gconv_transform_utf16_ascii, NULL, 2, 2, 1, 1)
|
||||
#endif
|
||||
BUILTIN_TRANSFORMATION ("CHAR16", "ISO-10646/UTF8/", 1, "=CHAR16->utf8",
|
||||
__gconv_transform_char16_utf8, NULL, 2, 4, 1, 6)
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 1997-2005, 2006, 2007, 2011 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 1997-2005, 2006, 2007, 2011, 2012 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
|
||||
|
||||
@ -303,8 +303,10 @@ __BUILTIN_TRANSFORM (__gconv_transform_internal_ucs4le);
|
||||
__BUILTIN_TRANSFORM (__gconv_transform_ucs4le_internal);
|
||||
__BUILTIN_TRANSFORM (__gconv_transform_internal_utf16);
|
||||
__BUILTIN_TRANSFORM (__gconv_transform_utf16_internal);
|
||||
__BUILTIN_TRANSFORM (__gconv_transform_ascii_utf16);
|
||||
__BUILTIN_TRANSFORM (__gconv_transform_utf16_ascii);
|
||||
__BUILTIN_TRANSFORM (__gconv_transform_ascii_char16);
|
||||
__BUILTIN_TRANSFORM (__gconv_transform_char16_ascii);
|
||||
__BUILTIN_TRANSFORM (__gconv_transform_utf8_char16);
|
||||
__BUILTIN_TRANSFORM (__gconv_transform_char16_utf8);
|
||||
# undef __BUITLIN_TRANSFORM
|
||||
|
||||
/* Specialized conversion function for a single byte to INTERNAL, recognizing
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* Simple transformations functions.
|
||||
Copyright (C) 1997-2005, 2007, 2008, 2009, 2011 Free Software Foundation, Inc.
|
||||
Copyright (C) 1997-2005, 2007, 2008, 2009, 2011, 2012 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
|
||||
|
||||
@ -1065,6 +1065,7 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
|
||||
\
|
||||
state->__count = inend - *inptrp; \
|
||||
\
|
||||
assert (ch != 0xc0 && ch != 0xc1); \
|
||||
if (ch >= 0xc2 && ch < 0xe0) \
|
||||
{ \
|
||||
/* We expect two bytes. The first byte cannot be 0xc0 or \
|
||||
@ -1322,15 +1323,15 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
|
||||
#include <iconv/skeleton.c>
|
||||
|
||||
|
||||
/* Convert from ISO 646-IRV to UTF-16. */
|
||||
/* Convert from ISO 646-IRV to the char16_t format. */
|
||||
#define DEFINE_INIT 0
|
||||
#define DEFINE_FINI 0
|
||||
#define MIN_NEEDED_FROM 1
|
||||
#define MIN_NEEDED_TO 2
|
||||
#define FROM_DIRECTION 1
|
||||
#define FROM_LOOP ascii_utf16_loop
|
||||
#define TO_LOOP ascii_utf16_loop /* This is not used. */
|
||||
#define FUNCTION_NAME __gconv_transform_ascii_utf16
|
||||
#define FROM_LOOP ascii_char16_loop
|
||||
#define TO_LOOP ascii_char16_loop /* This is not used. */
|
||||
#define FUNCTION_NAME __gconv_transform_ascii_char16
|
||||
#define ONE_DIRECTION 1
|
||||
|
||||
#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
|
||||
@ -1358,15 +1359,15 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
|
||||
#include <iconv/skeleton.c>
|
||||
|
||||
|
||||
/* Convert from UTF-16 to ISO 646-IRV. */
|
||||
/* Convert from the char16_t format to ISO 646-IRV. */
|
||||
#define DEFINE_INIT 0
|
||||
#define DEFINE_FINI 0
|
||||
#define MIN_NEEDED_FROM 2
|
||||
#define MIN_NEEDED_TO 1
|
||||
#define FROM_DIRECTION 1
|
||||
#define FROM_LOOP utf16_ascii_loop
|
||||
#define TO_LOOP utf16_ascii_loop /* This is not used. */
|
||||
#define FUNCTION_NAME __gconv_transform_utf16_ascii
|
||||
#define FROM_LOOP char16_ascii_loop
|
||||
#define TO_LOOP char16_ascii_loop /* This is not used. */
|
||||
#define FUNCTION_NAME __gconv_transform_char16_ascii
|
||||
#define ONE_DIRECTION 1
|
||||
|
||||
#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
|
||||
@ -1383,9 +1384,328 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
|
||||
{ \
|
||||
/* It's an one byte sequence. */ \
|
||||
*outptr++ = *((const uint16_t *) inptr); \
|
||||
inptr += sizeof (uint16_t); \
|
||||
inptr += 2; \
|
||||
} \
|
||||
}
|
||||
#define LOOP_NEED_FLAGS
|
||||
#include <iconv/loop.c>
|
||||
#include <iconv/skeleton.c>
|
||||
|
||||
|
||||
/* Convert from the char16_t format to UTF-8. */
|
||||
#define DEFINE_INIT 0
|
||||
#define DEFINE_FINI 0
|
||||
#define MIN_NEEDED_FROM 2
|
||||
#define MAX_NEEDED_FROM 4
|
||||
#define MIN_NEEDED_TO 1
|
||||
#define MAX_NEEDED_TO 6
|
||||
#define FROM_DIRECTION 1
|
||||
#define FROM_LOOP char16_utf8_loop
|
||||
#define TO_LOOP char16_utf8_loop /* This is not used. */
|
||||
#define FUNCTION_NAME __gconv_transform_char16_utf8
|
||||
#define ONE_DIRECTION 1
|
||||
|
||||
#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
|
||||
#define MAX_NEEDED_INPUT MAX_NEEDED_FROM
|
||||
#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
|
||||
#define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
|
||||
#define LOOPFCT FROM_LOOP
|
||||
#define BODY \
|
||||
{ \
|
||||
/* Yes, reading a 16-bit number and storing it as 32-bit is correct. */ \
|
||||
uint32_t wc = *((const uint16_t *) inptr); \
|
||||
inptr += 2; \
|
||||
\
|
||||
if (__builtin_expect (wc < 0x80, 1)) \
|
||||
/* It's an one byte sequence. */ \
|
||||
*outptr++ = (unsigned char) wc; \
|
||||
else \
|
||||
{ \
|
||||
size_t step; \
|
||||
\
|
||||
if (__builtin_expect (wc < 0xd800 || wc > 0xdfff, 1)) \
|
||||
step = wc < 0x800 ? 2 : 3; \
|
||||
else \
|
||||
{ \
|
||||
if (__builtin_expect (inptr + 2 > inend, 0)) \
|
||||
{ \
|
||||
/* We don't have enough input for another complete input \
|
||||
character. */ \
|
||||
inptr -= 2; \
|
||||
result = __GCONV_INCOMPLETE_INPUT; \
|
||||
break; \
|
||||
} \
|
||||
\
|
||||
uint32_t sec = *((const uint16_t *) inptr); \
|
||||
if (__builtin_expect (sec < 0xdc00, 0) \
|
||||
|| __builtin_expect (sec > 0xdfff, 0)) \
|
||||
{ \
|
||||
/* This is no valid second word for a surrogate. */ \
|
||||
STANDARD_FROM_LOOP_ERR_HANDLER (2); \
|
||||
} \
|
||||
inptr += 2; \
|
||||
wc = ((wc - 0xd7c0) << 10) + (sec - 0xdc00); \
|
||||
\
|
||||
step = wc < 0x200000 ? 4 : 5; \
|
||||
} \
|
||||
\
|
||||
if (__builtin_expect (outptr + step > outend, 0)) \
|
||||
{ \
|
||||
/* Too long. */ \
|
||||
result = __GCONV_FULL_OUTPUT; \
|
||||
inptr -= step >= 4 ? 4 : 2; \
|
||||
break; \
|
||||
} \
|
||||
\
|
||||
unsigned char *start = outptr; \
|
||||
*outptr = (unsigned char) (~0xff >> step); \
|
||||
outptr += step; \
|
||||
do \
|
||||
{ \
|
||||
start[--step] = 0x80 | (wc & 0x3f); \
|
||||
wc >>= 6; \
|
||||
} \
|
||||
while (step > 1); \
|
||||
start[0] |= wc; \
|
||||
} \
|
||||
}
|
||||
#define LOOP_NEED_FLAGS
|
||||
#include <iconv/loop.c>
|
||||
#include <iconv/skeleton.c>
|
||||
|
||||
|
||||
/* Convert from UTF-8 to the char16_t format. */
|
||||
#define DEFINE_INIT 0
|
||||
#define DEFINE_FINI 0
|
||||
#define MIN_NEEDED_FROM 1
|
||||
#define MAX_NEEDED_FROM 6
|
||||
#define MIN_NEEDED_TO 2
|
||||
#define MAX_NEEDED_TO 4
|
||||
#define FROM_DIRECTION 1
|
||||
#define FROM_LOOP utf8_char16_loop
|
||||
#define TO_LOOP utf8_char16_loop /* This is not used. */
|
||||
#define FUNCTION_NAME __gconv_transform_utf8_char16
|
||||
#define ONE_DIRECTION 1
|
||||
|
||||
#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
|
||||
#define MAX_NEEDED_INPUT MAX_NEEDED_FROM
|
||||
#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
|
||||
#define LOOPFCT FROM_LOOP
|
||||
#define BODY \
|
||||
{ \
|
||||
/* Next input byte. */ \
|
||||
uint32_t ch = *inptr; \
|
||||
\
|
||||
if (__builtin_expect (ch < 0x80, 1)) \
|
||||
{ \
|
||||
/* One byte sequence. */ \
|
||||
*((uint16_t *) outptr) = ch; \
|
||||
outptr += 2; \
|
||||
++inptr; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
uint_fast32_t cnt; \
|
||||
uint_fast32_t i; \
|
||||
\
|
||||
if (ch >= 0xc2 && ch < 0xe0) \
|
||||
{ \
|
||||
/* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \
|
||||
otherwise the wide character could have been represented \
|
||||
using a single byte. */ \
|
||||
cnt = 2; \
|
||||
ch &= 0x1f; \
|
||||
} \
|
||||
else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \
|
||||
{ \
|
||||
/* We expect three bytes. */ \
|
||||
cnt = 3; \
|
||||
ch &= 0x0f; \
|
||||
} \
|
||||
else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \
|
||||
{ \
|
||||
/* We expect four bytes. */ \
|
||||
cnt = 4; \
|
||||
ch &= 0x07; \
|
||||
} \
|
||||
else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \
|
||||
{ \
|
||||
/* We expect five bytes. */ \
|
||||
cnt = 5; \
|
||||
ch &= 0x03; \
|
||||
} \
|
||||
else if (__builtin_expect ((ch & 0xfe) == 0xfc, 1)) \
|
||||
{ \
|
||||
/* We expect six bytes. */ \
|
||||
cnt = 6; \
|
||||
ch &= 0x01; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
/* Search the end of this ill-formed UTF-8 character. This \
|
||||
is the next byte with (x & 0xc0) != 0x80. */ \
|
||||
i = 0; \
|
||||
do \
|
||||
++i; \
|
||||
while (inptr + i < inend \
|
||||
&& (*(inptr + i) & 0xc0) == 0x80 \
|
||||
&& i < 5); \
|
||||
\
|
||||
errout: \
|
||||
STANDARD_FROM_LOOP_ERR_HANDLER (i); \
|
||||
} \
|
||||
\
|
||||
if (__builtin_expect (inptr + cnt > inend, 0)) \
|
||||
{ \
|
||||
/* We don't have enough input. But before we report that check \
|
||||
that all the bytes are correct. */ \
|
||||
for (i = 1; inptr + i < inend; ++i) \
|
||||
if ((inptr[i] & 0xc0) != 0x80) \
|
||||
break; \
|
||||
\
|
||||
if (__builtin_expect (inptr + i == inend, 1)) \
|
||||
{ \
|
||||
result = __GCONV_INCOMPLETE_INPUT; \
|
||||
break; \
|
||||
} \
|
||||
\
|
||||
goto errout; \
|
||||
} \
|
||||
\
|
||||
/* Read the possible remaining bytes. */ \
|
||||
for (i = 1; i < cnt; ++i) \
|
||||
{ \
|
||||
uint32_t byte = inptr[i]; \
|
||||
\
|
||||
if ((byte & 0xc0) != 0x80) \
|
||||
/* This is an illegal encoding. */ \
|
||||
break; \
|
||||
\
|
||||
ch <<= 6; \
|
||||
ch |= byte & 0x3f; \
|
||||
} \
|
||||
\
|
||||
/* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
|
||||
If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
|
||||
have been represented with fewer than cnt bytes. */ \
|
||||
if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0) \
|
||||
/* Do not accept UTF-16 surrogates. */ \
|
||||
|| (ch >= 0xd800 && ch <= 0xdfff)) \
|
||||
{ \
|
||||
/* This is an illegal encoding. */ \
|
||||
goto errout; \
|
||||
} \
|
||||
\
|
||||
/* Now adjust the pointers and store the result. */ \
|
||||
if (ch < 0x10000) \
|
||||
*((uint16_t *) outptr) = ch; \
|
||||
else \
|
||||
{ \
|
||||
if (__builtin_expect (outptr + 4 > outend, 0)) \
|
||||
{ \
|
||||
result = __GCONV_FULL_OUTPUT; \
|
||||
break; \
|
||||
} \
|
||||
\
|
||||
*((uint16_t *) outptr) = 0xd7c0 + (ch >> 10); \
|
||||
outptr += 2; \
|
||||
*((uint16_t *) outptr) = 0xdc00 + (ch & 0x3ff); \
|
||||
} \
|
||||
\
|
||||
outptr += 2; \
|
||||
inptr += cnt; \
|
||||
} \
|
||||
}
|
||||
#define LOOP_NEED_FLAGS
|
||||
|
||||
#define STORE_REST \
|
||||
{ \
|
||||
/* We store the remaining bytes while converting them into the UCS4 \
|
||||
format. We can assume that the first byte in the buffer is \
|
||||
correct and that it requires a larger number of bytes than there \
|
||||
are in the input buffer. */ \
|
||||
wint_t ch = **inptrp; \
|
||||
size_t cnt, r; \
|
||||
\
|
||||
state->__count = inend - *inptrp; \
|
||||
\
|
||||
assert (ch != 0xc0 && ch != 0xc1); \
|
||||
if (ch >= 0xc2 && ch < 0xe0) \
|
||||
{ \
|
||||
/* We expect two bytes. The first byte cannot be 0xc0 or \
|
||||
0xc1, otherwise the wide character could have been \
|
||||
represented using a single byte. */ \
|
||||
cnt = 2; \
|
||||
ch &= 0x1f; \
|
||||
} \
|
||||
else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \
|
||||
{ \
|
||||
/* We expect three bytes. */ \
|
||||
cnt = 3; \
|
||||
ch &= 0x0f; \
|
||||
} \
|
||||
else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \
|
||||
{ \
|
||||
/* We expect four bytes. */ \
|
||||
cnt = 4; \
|
||||
ch &= 0x07; \
|
||||
} \
|
||||
else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \
|
||||
{ \
|
||||
/* We expect five bytes. */ \
|
||||
cnt = 5; \
|
||||
ch &= 0x03; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
/* We expect six bytes. */ \
|
||||
cnt = 6; \
|
||||
ch &= 0x01; \
|
||||
} \
|
||||
\
|
||||
/* The first byte is already consumed. */ \
|
||||
r = cnt - 1; \
|
||||
while (++(*inptrp) < inend) \
|
||||
{ \
|
||||
ch <<= 6; \
|
||||
ch |= **inptrp & 0x3f; \
|
||||
--r; \
|
||||
} \
|
||||
\
|
||||
/* Shift for the so far missing bytes. */ \
|
||||
ch <<= r * 6; \
|
||||
\
|
||||
/* Store the number of bytes expected for the entire sequence. */ \
|
||||
state->__count |= cnt << 8; \
|
||||
\
|
||||
/* Store the value. */ \
|
||||
state->__value.__wch = ch; \
|
||||
}
|
||||
|
||||
#define UNPACK_BYTES \
|
||||
{ \
|
||||
static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; \
|
||||
wint_t wch = state->__value.__wch; \
|
||||
size_t ntotal = state->__count >> 8; \
|
||||
\
|
||||
inlen = state->__count & 255; \
|
||||
\
|
||||
bytebuf[0] = inmask[ntotal - 2]; \
|
||||
\
|
||||
do \
|
||||
{ \
|
||||
if (--ntotal < inlen) \
|
||||
bytebuf[ntotal] = 0x80 | (wch & 0x3f); \
|
||||
wch >>= 6; \
|
||||
} \
|
||||
while (ntotal > 1); \
|
||||
\
|
||||
bytebuf[0] |= wch; \
|
||||
}
|
||||
|
||||
#define CLEAR_STATE \
|
||||
state->__count = 0
|
||||
|
||||
|
||||
#include <iconv/loop.c>
|
||||
#include <iconv/skeleton.c>
|
||||
|
@ -719,10 +719,12 @@ add_known_names (struct gconv_module *node)
|
||||
add_known_names (node->right);
|
||||
do
|
||||
{
|
||||
if (strcmp (node->from_string, "INTERNAL"))
|
||||
if (strcmp (node->from_string, "INTERNAL") != 0
|
||||
&& strcmp (node->from_string, "CHAR16") != 0)
|
||||
tsearch (node->from_string, &printlist,
|
||||
(__compar_fn_t) strverscmp);
|
||||
if (strcmp (node->to_string, "INTERNAL") != 0)
|
||||
if (strcmp (node->to_string, "INTERNAL") != 0
|
||||
&& strcmp (node->to_string, "CHAR16") != 0)
|
||||
tsearch (node->to_string, &printlist, (__compar_fn_t) strverscmp);
|
||||
|
||||
node = node->same;
|
||||
@ -748,7 +750,8 @@ insert_cache (void)
|
||||
{
|
||||
const char *str = strtab + hashtab[cnt].string_offset;
|
||||
|
||||
if (strcmp (str, "INTERNAL") != 0)
|
||||
if (strcmp (str, "INTERNAL") != 0
|
||||
&& strcmp (str, "CHAR16") != 0)
|
||||
tsearch (str, &printlist, (__compar_fn_t) strverscmp);
|
||||
}
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
# Copyright (C) 1995-2000,2002,2003,2004,2005,2006,2007,2011
|
||||
# Copyright (C) 1995-2000,2002,2003,2004,2005,2006,2007,2011,2012
|
||||
# Free Software Foundation, Inc.
|
||||
# This file is part of the GNU C Library.
|
||||
|
||||
@ -46,6 +46,7 @@ routines := wcscat wcschr wcscmp wcscpy wcscspn wcsdup wcslen wcsncat \
|
||||
strop-tests := wcscmp wmemcmp wcslen wcschr wcsrchr wcscpy
|
||||
tests := tst-wcstof wcsmbs-tst1 tst-wcsnlen tst-btowc tst-mbrtowc \
|
||||
tst-wcrtomb tst-wcpncpy tst-mbsrtowcs tst-wchar-h tst-mbrtowc2 \
|
||||
tst-c16c32-1 \
|
||||
wcsatcliff $(addprefix test-,$(strop-tests))
|
||||
|
||||
include ../Rules
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* Copyright (C) 2011, 2012 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Ulrich Drepper <drepper@cygnus.com>, 2011.
|
||||
Contributed by Ulrich Drepper <drepper@gmail.com>, 2011.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
@ -44,7 +44,12 @@ static mbstate_t state;
|
||||
size_t
|
||||
c16rtomb (char *s, char16_t c16, mbstate_t *ps)
|
||||
{
|
||||
char buf[MB_CUR_MAX];
|
||||
#if 1
|
||||
// XXX The ISO C 11 spec I have does not say anything about handling
|
||||
// XXX surrogates in this interface.
|
||||
return wcrtomb (s, c16, ps ?: &state);
|
||||
#else
|
||||
char buf[MB_LEN_MAX];
|
||||
struct __gconv_step_data data;
|
||||
int status;
|
||||
size_t result;
|
||||
@ -78,9 +83,9 @@ c16rtomb (char *s, char16_t c16, mbstate_t *ps)
|
||||
PTR_DEMANGLE (fct);
|
||||
#endif
|
||||
|
||||
/* If C16 is the NUL character we write into the output buffer the byte
|
||||
sequence necessary for PS to get into the initial state, followed
|
||||
by a NUL byte. */
|
||||
/* If C16 is the NUL character we write into the output buffer
|
||||
the byte sequence necessary for PS to get into the initial
|
||||
state, followed by a NUL byte. */
|
||||
if (c16 == L'\0')
|
||||
{
|
||||
status = DL_CALL_FCT (fct, (fcts->fromc16, &data, NULL, NULL,
|
||||
@ -96,7 +101,8 @@ c16rtomb (char *s, char16_t c16, mbstate_t *ps)
|
||||
|
||||
status = DL_CALL_FCT (fct,
|
||||
(fcts->fromc16, &data, &inbuf,
|
||||
inbuf + sizeof (char16_t), NULL, &dummy, 0, 1));
|
||||
inbuf + sizeof (char16_t), NULL, &dummy,
|
||||
0, 1));
|
||||
}
|
||||
|
||||
/* There must not be any problems with the conversion but illegal input
|
||||
@ -118,4 +124,5 @@ c16rtomb (char *s, char16_t c16, mbstate_t *ps)
|
||||
}
|
||||
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* Copyright (C) 2011, 2012 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Ulrich Drepper <drepper@gnu.org>, 2011.
|
||||
Contributed by Ulrich Drepper <drepper@gmail.com>, 2011.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
@ -43,20 +43,32 @@ static mbstate_t state;
|
||||
size_t
|
||||
mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps)
|
||||
{
|
||||
char16_t buf[1];
|
||||
if (ps == NULL)
|
||||
ps = &state;
|
||||
|
||||
if (ps->__count & 0x80000000)
|
||||
{
|
||||
/* We have to return the second word for a surrogate. */
|
||||
ps->__count &= 0x7fffffff;
|
||||
*pc16 = ps->__value.__wch;
|
||||
ps->__value.__wch = L'\0';
|
||||
return (size_t) -3;
|
||||
}
|
||||
|
||||
char16_t buf[2];
|
||||
struct __gconv_step_data data;
|
||||
int status;
|
||||
size_t result;
|
||||
size_t dummy;
|
||||
const unsigned char *inbuf, *endbuf;
|
||||
unsigned char *outbuf = (unsigned char *) (pc16 ?: buf);
|
||||
unsigned char *outbuf = (unsigned char *) buf;
|
||||
const struct gconv_fcts *fcts;
|
||||
|
||||
/* Set information for this step. */
|
||||
data.__invocation_counter = 0;
|
||||
data.__internal_use = 1;
|
||||
data.__flags = __GCONV_IS_LAST;
|
||||
data.__statep = ps ?: &state;
|
||||
data.__statep = ps;
|
||||
data.__trans = NULL;
|
||||
|
||||
/* A first special case is if S is NULL. This means put PS in the
|
||||
@ -85,9 +97,22 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps)
|
||||
if (fcts->toc16->__shlib_handle != NULL)
|
||||
PTR_DEMANGLE (fct);
|
||||
#endif
|
||||
|
||||
/* We first have to check whether the character can be represented
|
||||
without a surrogate. If we immediately pass in a buffer large
|
||||
enough to hold two char16_t values and the first character does
|
||||
not require a surrogate the routine will try to convert more
|
||||
input if N is larger then needed for the first character. */
|
||||
status = DL_CALL_FCT (fct, (fcts->toc16, &data, &inbuf, endbuf,
|
||||
NULL, &dummy, 0, 1));
|
||||
|
||||
if (status == __GCONV_FULL_OUTPUT && data.__outbuf == outbuf)
|
||||
{
|
||||
data.__outbufend = outbuf + 2 * sizeof (char16_t);
|
||||
status = DL_CALL_FCT (fct, (fcts->toc16, &data, &inbuf, endbuf,
|
||||
NULL, &dummy, 0, 1));
|
||||
}
|
||||
|
||||
/* There must not be any problems with the conversion but illegal input
|
||||
characters. The output buffer must be large enough, otherwise the
|
||||
definition of MB_CUR_MAX is not correct. All the other possible
|
||||
@ -100,15 +125,28 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps)
|
||||
if (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT
|
||||
|| status == __GCONV_FULL_OUTPUT)
|
||||
{
|
||||
if (data.__outbuf != (unsigned char *) outbuf
|
||||
&& *(char16_t *) outbuf == U('\0'))
|
||||
if (pc16 != NULL)
|
||||
*pc16 = buf[0];
|
||||
|
||||
if (data.__outbuf != outbuf && *(char16_t *) outbuf == U('\0'))
|
||||
{
|
||||
/* The converted character is the NUL character. */
|
||||
assert (__mbsinit (data.__statep));
|
||||
result = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
result = inbuf - (const unsigned char *) s;
|
||||
|
||||
if (data.__outbuf != outbuf + 2)
|
||||
{
|
||||
/* This is a surrogate. */
|
||||
assert (buf[0] >= 0xd800 && buf[0] <= 0xdfff);
|
||||
assert (buf[1] >= 0xdc00 && buf[1] <= 0xdfff);
|
||||
ps->__count |= 0x80000000;
|
||||
ps->__value.__wch = buf[1];
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (status == __GCONV_INCOMPLETE_INPUT)
|
||||
result = (size_t) -2;
|
||||
|
131
wcsmbs/tst-c16c32-1.c
Normal file
131
wcsmbs/tst-c16c32-1.c
Normal file
@ -0,0 +1,131 @@
|
||||
#include <inttypes.h>
|
||||
#include <locale.h>
|
||||
#include <stdio.h>
|
||||
#include <uchar.h>
|
||||
|
||||
|
||||
static int
|
||||
do_test (void)
|
||||
{
|
||||
if (setlocale (LC_ALL, "de_DE.UTF-8") == NULL)
|
||||
{
|
||||
puts ("cannot set locale");
|
||||
return 1;
|
||||
}
|
||||
|
||||
int result = 0;
|
||||
|
||||
char32_t c32 = 48;
|
||||
do
|
||||
{
|
||||
if (c32 >= 0xd800 && c32 <= 0xe000)
|
||||
continue;
|
||||
|
||||
char buf[20];
|
||||
size_t n1 = c32rtomb (buf, c32, NULL);
|
||||
if (n1 <= 0)
|
||||
{
|
||||
printf ("c32rtomb for U'\\x%" PRIx32 "' failed\n", (uint32_t) c32);
|
||||
result = 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
char32_t c32out;
|
||||
size_t n2 = mbrtoc32 (&c32out, buf, n1, NULL);
|
||||
if ((ssize_t) n2 < 0)
|
||||
{
|
||||
printf ("mbrtoc32 for U'\\x%" PRIx32 "' failed\n", (uint32_t) c32);
|
||||
result = 1;
|
||||
continue;
|
||||
}
|
||||
if (n2 != n1)
|
||||
{
|
||||
printf ("mbrtoc32 for U'\\x%" PRIx32 "' consumed %zu bytes, not %zu\n",
|
||||
(uint32_t) c32, n2, n1);
|
||||
result = 1;
|
||||
}
|
||||
else if (c32out != c32)
|
||||
{
|
||||
printf ("mbrtoc32 for U'\\x%" PRIx32 "' produced U'\\x%" PRIx32 "\n",
|
||||
(uint32_t) c32, (uint32_t) c32out);
|
||||
result = 1;
|
||||
}
|
||||
|
||||
char16_t c16;
|
||||
size_t n3 = mbrtoc16 (&c16, buf, n1, NULL);
|
||||
if (n3 != n1)
|
||||
{
|
||||
printf ("mbrtoc16 for U'\\x%" PRIx32 "' did not consume all bytes\n",
|
||||
(uint32_t) c32);
|
||||
result = 1;
|
||||
continue;
|
||||
}
|
||||
if (c32 < 0x10000)
|
||||
{
|
||||
if (c16 != c32)
|
||||
{
|
||||
printf ("mbrtoc16 for U'\\x%" PRIx32 "' produce u'\\x%" PRIx16 "'\n",
|
||||
(uint32_t) c32, (uint16_t) c16);
|
||||
result = 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
buf[0] = '1';
|
||||
char16_t c16_2;
|
||||
size_t n4 = mbrtoc16 (&c16_2, buf, 1, NULL);
|
||||
if (n4 != (size_t) -3)
|
||||
{
|
||||
printf ("second mbrtoc16 for U'\\x%" PRIx32 "' did not return -3\n",
|
||||
(uint32_t) c32);
|
||||
result = 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c32 != (((uint32_t) (c16 - 0xd7c0)) << 10) + (c16_2 - 0xdc00))
|
||||
{
|
||||
printf ("mbrtoc16 for U'\\x%" PRIx32 "' returns U'\\x%" PRIx32 "\n",
|
||||
(uint32_t) c32,
|
||||
(((uint32_t) (c16 - 0xd7c0)) << 10) + (c16_2 - 0xdc00));
|
||||
result = 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
buf[0] = '\0';
|
||||
char16_t c16_nul;
|
||||
n3 = mbrtoc16 (&c16_nul, buf, n1, NULL);
|
||||
if (n3 != 0)
|
||||
{
|
||||
printf ("mbrtoc16 for '\\0' returns %zd\n", n3);
|
||||
result = 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c32 < 0x10000)
|
||||
{
|
||||
size_t n5 = c16rtomb (buf, c16, NULL);
|
||||
if ((ssize_t) n5 < 0)
|
||||
{
|
||||
printf ("c16rtomb for U'\\x%" PRIx32 "' failed with %zd\n",
|
||||
(uint32_t) c32, n5);
|
||||
result = 1;
|
||||
continue;
|
||||
}
|
||||
if (n5 != n1)
|
||||
{
|
||||
printf ("c16rtomb for U'\\x%" PRIx32 "' produced %zu bytes instead of %zu bytes\n",
|
||||
(uint32_t) c32, n5, n1);
|
||||
result = 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
while ((c32 += 0x1111) <= U'\x12000');
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
#define TEST_FUNCTION do_test ()
|
||||
#include "../test-skeleton.c"
|
@ -1,4 +1,5 @@
|
||||
/* Copyright (C) 1996-1998,2000,2002,2005,2011 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 1996-1998,2000,2002,2005,2011,2012
|
||||
Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
|
||||
|
||||
@ -38,7 +39,7 @@ static mbstate_t state;
|
||||
size_t
|
||||
__wcrtomb (char *s, wchar_t wc, mbstate_t *ps)
|
||||
{
|
||||
char buf[MB_CUR_MAX];
|
||||
char buf[MB_LEN_MAX];
|
||||
struct __gconv_step_data data;
|
||||
int status;
|
||||
size_t result;
|
||||
|
@ -1,4 +1,5 @@
|
||||
/* Copyright (C) 1998-2002,2004,2005,2008,2010,2011 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 1998-2002,2004,2005,2008,2010,2011,2012
|
||||
Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
|
||||
|
||||
@ -74,7 +75,7 @@ static const struct __gconv_step to_c16 =
|
||||
.__counter = INT_MAX,
|
||||
.__from_name = (char *) "ANSI_X3.4-1968//TRANSLIT",
|
||||
.__to_name = (char *) "UTF-16//",
|
||||
.__fct = __gconv_transform_ascii_utf16,
|
||||
.__fct = __gconv_transform_ascii_char16,
|
||||
.__btowc_fct = NULL,
|
||||
.__init_fct = NULL,
|
||||
.__end_fct = NULL,
|
||||
@ -93,7 +94,7 @@ static const struct __gconv_step from_c16 =
|
||||
.__counter = INT_MAX,
|
||||
.__from_name = (char *) "UTF-16//",
|
||||
.__to_name = (char *) "ANSI_X3.4-1968//TRANSLIT",
|
||||
.__fct = __gconv_transform_utf16_ascii,
|
||||
.__fct = __gconv_transform_char16_ascii,
|
||||
.__btowc_fct = NULL,
|
||||
.__init_fct = NULL,
|
||||
.__end_fct = NULL,
|
||||
@ -209,7 +210,7 @@ __wcsmbs_load_conv (struct __locale_data *new_category)
|
||||
int use_translit;
|
||||
|
||||
/* Allocate the gconv_fcts structure. */
|
||||
new_fcts = malloc (sizeof *new_fcts);
|
||||
new_fcts = calloc (1, sizeof *new_fcts);
|
||||
if (new_fcts == NULL)
|
||||
goto failed;
|
||||
|
||||
@ -229,16 +230,24 @@ __wcsmbs_load_conv (struct __locale_data *new_category)
|
||||
represent all others. */
|
||||
new_fcts->towc = __wcsmbs_getfct ("INTERNAL", complete_name,
|
||||
&new_fcts->towc_nsteps);
|
||||
new_fcts->tomb = (new_fcts->towc != NULL
|
||||
? __wcsmbs_getfct (complete_name, "INTERNAL",
|
||||
&new_fcts->tomb_nsteps)
|
||||
: NULL);
|
||||
if (new_fcts->towc != NULL)
|
||||
new_fcts->tomb = __wcsmbs_getfct (complete_name, "INTERNAL",
|
||||
&new_fcts->tomb_nsteps);
|
||||
|
||||
// XXX
|
||||
new_fcts->toc16 = (struct __gconv_step *) &to_c16;
|
||||
new_fcts->toc16_nsteps = 1;
|
||||
new_fcts->fromc16 = (struct __gconv_step *) &from_c16;
|
||||
new_fcts->fromc16_nsteps = 1;
|
||||
if (new_fcts->tomb != NULL)
|
||||
{
|
||||
new_fcts->toc16 = __wcsmbs_getfct ("CHAR16", complete_name,
|
||||
&new_fcts->toc16_nsteps);
|
||||
|
||||
if (new_fcts->toc16 != NULL)
|
||||
new_fcts->fromc16 = __wcsmbs_getfct (complete_name, "CHAR16",
|
||||
&new_fcts->fromc16_nsteps);
|
||||
else
|
||||
{
|
||||
__gconv_close_transform (new_fcts->toc16, new_fcts->toc16_nsteps);
|
||||
new_fcts->toc16 = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* If any of the conversion functions is not available we don't
|
||||
use any since this would mean we cannot convert back and
|
||||
@ -255,6 +264,12 @@ __wcsmbs_load_conv (struct __locale_data *new_category)
|
||||
}
|
||||
else
|
||||
{
|
||||
// XXX At least for now we live with the CHAR16 not being available.
|
||||
if (new_fcts->toc16 == NULL)
|
||||
new_fcts->toc16 = __wcsmbs_gconv_fcts_c.toc16;
|
||||
if (new_fcts->fromc16 == NULL)
|
||||
new_fcts->fromc16 = __wcsmbs_gconv_fcts_c.fromc16;
|
||||
|
||||
new_category->private.ctype = new_fcts;
|
||||
new_category->private.cleanup = &_nl_cleanup_ctype;
|
||||
}
|
||||
@ -277,11 +292,15 @@ __wcsmbs_clone_conv (struct gconv_fcts *copy)
|
||||
*copy = *orig;
|
||||
|
||||
/* Now increment the usage counters.
|
||||
Note: This assumes copy->towc_nsteps == 1 and copy->tomb_nsteps == 1. */
|
||||
Note: This assumes copy->*_nsteps == 1. */
|
||||
if (copy->towc->__shlib_handle != NULL)
|
||||
++copy->towc->__counter;
|
||||
if (copy->tomb->__shlib_handle != NULL)
|
||||
++copy->tomb->__counter;
|
||||
if (copy->toc16->__shlib_handle != NULL)
|
||||
++copy->toc16->__counter;
|
||||
if (copy->fromc16->__shlib_handle != NULL)
|
||||
++copy->fromc16->__counter;
|
||||
}
|
||||
|
||||
|
||||
@ -296,30 +315,24 @@ __wcsmbs_named_conv (struct gconv_fcts *copy, const char *name)
|
||||
|
||||
copy->tomb = __wcsmbs_getfct (name, "INTERNAL", ©->tomb_nsteps);
|
||||
if (copy->tomb == NULL)
|
||||
goto out_mb;
|
||||
|
||||
#if 0
|
||||
copy->fromc16 = __wcsmbs_getfct (name, "UTF-16//", ©->fromc16_nsteps);
|
||||
if (copy->fromc16 == NULL)
|
||||
goto out_fromc16;
|
||||
|
||||
copy->toc16 = __wcsmbs_getfct ("UTF-16//", name, ©->toc16_nsteps);
|
||||
if (copy->toc16 == NULL)
|
||||
#else
|
||||
if (0)
|
||||
#endif
|
||||
{
|
||||
#if 0
|
||||
__gconv_close_transform (copy->fromc16, copy->fromc16_nsteps);
|
||||
out_fromc16:
|
||||
__gconv_close_transform (copy->tomb, copy->tomb_nsteps);
|
||||
#endif
|
||||
out_mb:
|
||||
__gconv_close_transform (copy->towc, copy->towc_nsteps);
|
||||
out_wc:
|
||||
return 1;
|
||||
}
|
||||
|
||||
copy->fromc16 = __wcsmbs_getfct (name, "CHAR16", ©->fromc16_nsteps);
|
||||
if (copy->fromc16 == NULL)
|
||||
copy->toc16 = NULL;
|
||||
else
|
||||
{
|
||||
copy->toc16 = __wcsmbs_getfct ("CHAR16", name, ©->toc16_nsteps);
|
||||
if (copy->toc16 == NULL)
|
||||
{
|
||||
__gconv_close_transform (copy->fromc16, copy->fromc16_nsteps);
|
||||
copy->fromc16 = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -335,11 +348,8 @@ _nl_cleanup_ctype (struct __locale_data *locale)
|
||||
/* Free the old conversions. */
|
||||
__gconv_close_transform (data->tomb, data->tomb_nsteps);
|
||||
__gconv_close_transform (data->towc, data->towc_nsteps);
|
||||
#if 0
|
||||
// XXX
|
||||
__gconv_close_transform (data->fromc16, data->fromc16_nsteps);
|
||||
__gconv_close_transform (data->toc16, data->toc16c_nsteps);
|
||||
#endif
|
||||
__gconv_close_transform (data->toc16, data->toc16_nsteps);
|
||||
free ((char *) data);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user