2000-09-10  Bruno Haible  <haible@clisp.cons.org>

	* locale/programs/ld-collate.c (collate_read): Fix typo in handling
	of decimal ellipsis.

2000-09-11  Bruno Haible <haible@clisp.cons.org>

	* locale/programs/ld-collate.c (collate_read): Always initialize
	error_section.next.

2000-09-10  Bruno Haible  <haible@clisp.cons.org>

	* locale/programs/ld-collate.c (collate_finish): Upper bound for
	ruleidx is 128, not 256.

2000-09-11  Ulrich Drepper  <drepper@redhat.com>

	* locale/programs/ld-collate.c (collate_read): Correct check for
	already inserted entries.

2000-09-10  Bruno Haible <haible@clisp.cons.org>

	* iconv/skeleton.c (FUNCTION_NAME): Handle unaligned access in
	second try as well.

2000-09-10  Bruno Haible <haible@clisp.cons.org>

	* iconv/skeleton.c (FUNCTION_NAME): Optimize an `if' if
	MAX_NEEDED_FROM > 1 && MAX_NEEDED_TO > 1.

2000-09-10  Bruno Haible <haible@clisp.cons.org>

	* iconv/skeleton.c (gconv_init): Replace all uses of RESET_STATE with
	SAVE_RESET_STATE.

2000-09-10  Bruno Haible  <haible@clisp.cons.org>

	* iconvdata/utf-7.c: New file.
	* iconvdata/gconv-modules (UTF-7): New module entries.
	* iconvdata/Makefile (modules): Add UTF-7.
	(distribute): Add utf-7.c.
	* iconvdata/testdata/UTF-7: New file.
	* iconvdata/testdata/UTF-7..UTF8: New file.
	* iconvdata/TESTS (UTF-7): New entry.
	* iconvdata/run-iconv-test.sh: Fix confusing output.
This commit is contained in:
Ulrich Drepper 2000-09-11 20:33:59 +00:00
parent b81c896174
commit f6ad47269a
10 changed files with 774 additions and 58 deletions

View File

@ -1,3 +1,49 @@
2000-09-10 Bruno Haible <haible@clisp.cons.org>
* locale/programs/ld-collate.c (collate_read): Fix typo in handling
of decimal ellipsis.
2000-09-11 Bruno Haible <haible@clisp.cons.org>
* locale/programs/ld-collate.c (collate_read): Always initialize
error_section.next.
2000-09-10 Bruno Haible <haible@clisp.cons.org>
* locale/programs/ld-collate.c (collate_finish): Upper bound for
ruleidx is 128, not 256.
2000-09-11 Ulrich Drepper <drepper@redhat.com>
* locale/programs/ld-collate.c (collate_read): Correct check for
already inserted entries.
2000-09-10 Bruno Haible <haible@clisp.cons.org>
* iconv/skeleton.c (FUNCTION_NAME): Handle unaligned access in
second try as well.
2000-09-10 Bruno Haible <haible@clisp.cons.org>
* iconv/skeleton.c (FUNCTION_NAME): Optimize an `if' if
MAX_NEEDED_FROM > 1 && MAX_NEEDED_TO > 1.
2000-09-10 Bruno Haible <haible@clisp.cons.org>
* iconv/skeleton.c (gconv_init): Replace all uses of RESET_STATE with
SAVE_RESET_STATE.
2000-09-10 Bruno Haible <haible@clisp.cons.org>
* iconvdata/utf-7.c: New file.
* iconvdata/gconv-modules (UTF-7): New module entries.
* iconvdata/Makefile (modules): Add UTF-7.
(distribute): Add utf-7.c.
* iconvdata/testdata/UTF-7: New file.
* iconvdata/testdata/UTF-7..UTF8: New file.
* iconvdata/TESTS (UTF-7): New entry.
* iconvdata/run-iconv-test.sh: Fix confusing output.
2000-09-11 Ulrich Drepper <drepper@redhat.com>
* sysdeps/posix/tempname.c (__gen_tempname): Use __lxstat and __xstat.

View File

@ -57,7 +57,12 @@
from the current characters.
TO_LOOP likewise for the other direction
RESET_STATE in case of an error we must reset the state for
ONE_DIRECTION optional. If defined to 1, only one conversion
direction is defined instead of two. In this
case, FROM_DIRECTION should be defined to 1, and
FROM_LOOP and TO_LOOP should have the same value.
SAVE_RESET_STATE in case of an error we must reset the state for
the rerun so this macro must be defined for
stateful encodings. It takes an argument which
is nonzero when saving.
@ -184,8 +189,8 @@ static int to_object;
#endif
/* For conversions from a fixed width character sets to another fixed width
character set we we can define RESET_INPUT_BUFFER is necessary. */
/* For conversions from a fixed width character set to another fixed width
character set we can define RESET_INPUT_BUFFER in a very fast way. */
#if !defined RESET_INPUT_BUFFER && !defined SAVE_RESET_STATE
# if MIN_NEEDED_FROM == MAX_NEEDED_FROM && MIN_NEEDED_TO == MAX_NEEDED_TO
/* We have to use these `if's here since the compiler cannot know that
@ -233,7 +238,7 @@ gconv_init (struct __gconv_step *step)
else
return __GCONV_NOCONV;
#ifdef RESET_STATE
#ifdef SAVE_RESET_STATE
step->__stateful = 1;
#else
step->__stateful = 0;
@ -245,8 +250,8 @@ gconv_init (struct __gconv_step *step)
/* The default destructor function does nothing in the moment and so
be define it at all. But we still provide the macro just in case
we need it some day. */
we don't define it at all. But we still provide the macro just in
case we need it some day. */
#if DEFINE_FINI
#endif
@ -339,7 +344,8 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
/* If the function is used to implement the mb*towc*() or wc*tomb*()
functions we must test whether any bytes from the last call are
stored in the `state' object. */
if (((MAX_NEEDED_FROM > 1 && FROM_DIRECTION)
if (((MAX_NEEDED_FROM > 1 && MAX_NEEDED_TO > 1)
|| (MAX_NEEDED_FROM > 1 && FROM_DIRECTION)
|| (MAX_NEEDED_TO > 1 && !FROM_DIRECTION))
&& consume_incomplete && (data->__statep->__count & 7) != 0)
{
@ -491,23 +497,44 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
SAVE_RESET_STATE (0);
# endif
/* XXX Handle unaligned access here as well. */
if (__builtin_expect (!unaligned, 1))
{
if (FROM_DIRECTION)
/* Run the conversion loop. */
nstatus = FROM_LOOP (step, data,
(const unsigned char **) inptrp,
(const unsigned char *) inend,
(unsigned char **) &outbuf,
nstatus = FROM_LOOP (step, data, inptrp, inend,
&outbuf,
(unsigned char *) outerr,
lirreversiblep EXTRA_LOOP_ARGS);
lirreversiblep
EXTRA_LOOP_ARGS);
else
/* Run the conversion loop. */
nstatus = TO_LOOP (step, data,
(const unsigned char **) inptrp,
(const unsigned char *) inend,
(unsigned char **) &outbuf,
nstatus = TO_LOOP (step, data, inptrp, inend,
&outbuf,
(unsigned char *) outerr,
lirreversiblep EXTRA_LOOP_ARGS);
lirreversiblep
EXTRA_LOOP_ARGS);
}
# if !defined _STRING_ARCH_unaligned \
&& MIN_NEEDED_FROM != 1 && MAX_NEEDED_FROM % MIN_NEEDED_FROM == 0 \
&& MIN_NEEDED_TO != 1 && MAX_NEEDED_TO % MIN_NEEDED_TO == 0
else
{
if (FROM_DIRECTION)
/* Run the conversion loop. */
nstatus = GEN_unaligned (FROM_LOOP) (step, data,
inptrp, inend,
(unsigned char *) outerr,
lirreversiblep
EXTRA_LOOP_ARGS);
else
/* Run the conversion loop. */
nstatus = GEN_unaligned (TO_LOOP) (step, data,
inptrp, inend,
(unsigned char *) outerr,
lirreversiblep
EXTRA_LOOP_ARGS);
}
# endif
/* We must run out of output buffer space in this
rerun. */
@ -540,7 +567,8 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
/* If we are supposed to consume all character store now all of the
remaining characters in the `state' object. */
#if MAX_NEEDED_FROM > 1 || MAX_NEEDED_TO > 1
if (((MAX_NEEDED_FROM > 1 && FROM_DIRECTION)
if (((MAX_NEEDED_FROM > 1 && MAX_NEEDED_TO > 1)
|| (MAX_NEEDED_FROM > 1 && FROM_DIRECTION)
|| (MAX_NEEDED_TO > 1 && !FROM_DIRECTION))
&& __builtin_expect (consume_incomplete, 0)
&& status == __GCONV_INCOMPLETE_INPUT)
@ -580,7 +608,7 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
#undef EMIT_SHIFT_TO_INIT
#undef FROM_LOOP
#undef TO_LOOP
#undef RESET_STATE
#undef SAVE_RESET_STATE
#undef RESET_INPUT_BUFFER
#undef FUNCTION_NAME
#undef PREPARE_LOOP

View File

@ -45,7 +45,8 @@ modules := ISO8859-1 ISO8859-2 ISO8859-3 ISO8859-4 ISO8859-5 \
INIS-CYRILLIC ISO_6937-2 ISO_2033 ISO_5427 ISO_5427-EXT \
ISO_5428 ISO_10367-BOX MAC-IS MAC-UK NATS-DANO NATS-SEFI \
SAMI-WS2 ISO-IR-197 TIS-620 KOI8-U GBK ISIRI-3342 GBGBK \
ISO-2022-CN libISOIR165 UTF-16 UNICODE BIG5HKSCS GB18030
ISO-2022-CN libISOIR165 UTF-16 UNICODE UTF-7 BIG5HKSCS \
GB18030
modules.so := $(addsuffix .so, $(modules))
@ -125,8 +126,8 @@ distribute := gconv-modules extra-module.mk gap.awk gaptab.awk \
macintosh.c mac-is.c mac-uk.c nats-dano.c nats-sefi.c sjis.c \
t.61.c uhc.c sami-ws2.c iso-ir-197.c tis-620.c koi8-u.c \
isiri-3342.c isiri-3342.h gbgbk.c iso-2022-cn.c cns11643l2.h \
iso8859-16.c utf-16.c unicode.c big5hkscs.c iso-ir-165.c \
iso-ir-165.h gb18030.c
iso8859-16.c utf-16.c unicode.c utf-7.c big5hkscs.c \
iso-ir-165.c iso-ir-165.h gb18030.c
# We build the transformation modules only when we build shared libs.
ifeq (yes,$(build-shared))

View File

@ -81,3 +81,4 @@ EUC-KR EUC-KR Y UTF8
EUC-CN EUC-CN Y UTF8
GBK GBK Y UTF8
BIG5HKSCS BIG5HKSCS Y UTF8
UTF-7 UTF-7 N UTF8

View File

@ -1194,6 +1194,10 @@ alias CSUNICODE// UNICODE//
module UNICODE// INTERNAL UNICODE 1
module INTERNAL UNICODE// UNICODE 1
# from to module cost
module UTF-7// INTERNAL UTF-7 1
module INTERNAL UTF-7// UTF-7 1
# from to module cost
module GB18030// INTERNAL GB18030 1
module INTERNAL GB18030// GB18030 1

View File

@ -100,7 +100,7 @@ while read from to subset targets; do
{ echo "/FAILED";
failed=1; continue; }
else
echo $ac_n " suntzu: $from -> ASCII -> $to $ac_c"
echo $ac_n " suntzu: ASCII -> $to -> ASCII $ac_c"
$PROG -f ASCII -t $to testdata/suntzus |
$PROG -f $to -t ASCII > $temp1 ||
{ if test $? -gt 128; then exit 1; fi

25
iconvdata/testdata/UTF-7 vendored Normal file
View File

@ -0,0 +1,25 @@
+EqASGxItEps Amharic
+AQ0-esky Czech
Dansk Danish
English English
Suomi Finnish
Fran+AOc-ais French
Deutsch German
+A5UDuwO7A7cDvQO5A7oDrA Greek
+BeIF0QXoBdkF6g Hebrew
Italiano Italian
Norsk Norwegian
+BCAEQwRBBEEEOgQ4BDk Russian
Espa+APE-ol Spanish
Svenska Swedish
+DiAOMg4pDjIORA4XDiI Thai
T+APw-rk+AOc-e Turkish
Ti+Hr8-ng Vi+Hsc-t Vietnamese
+ZeVnLIqe Japanese
+Ti1lhw Chinese
+1VyuAA Korean
// The last line of this file is missing the end-of-line terminator
// on purpose, in order to test that the conversion empties the bit buffer
// and shifts back to the initial state at the end of the conversion.
A+ImIDkQ-

25
iconvdata/testdata/UTF-7..UTF8 vendored Normal file
View File

@ -0,0 +1,25 @@
አማርኛ Amharic
česky Czech
Dansk Danish
English English
Suomi Finnish
Français French
Deutsch German
Ελληνικά Greek
עברית Hebrew
Italiano Italian
Norsk Norwegian
Русский Russian
Español Spanish
Svenska Swedish
ภาษาไทย Thai
Türkçe Turkish
Tiếng Việt Vietnamese
日本語 Japanese
中文 Chinese
한글 Korean
// The last line of this file is missing the end-of-line terminator
// on purpose, in order to test that the conversion empties the bit buffer
// and shifts back to the initial state at the end of the conversion.
A≢Α

559
iconvdata/utf-7.c Normal file
View File

@ -0,0 +1,559 @@
/* Conversion module for UTF-7.
Copyright (C) 2000 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Bruno Haible <haible@clisp.cons.org>, 2000.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If not,
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
/* UTF-7 is a legacy encoding used for transmitting Unicode within the
ASCII character set, used primarily by mail agents. New programs
are encouraged to use UTF-8 instead.
UTF-7 is specified in RFC 2152 (and old RFC 1641, RFC 1642). The
original Base64 encoding is defined in RFC 2045. */
#include <dlfcn.h>
#include <gconv.h>
#include <stdint.h>
#include <stdlib.h>
/* Define this to 1 if you want the so-called "optional direct" characters
! " # $ % & * ; < = > @ [ ] ^ _ ` { | }
to be encoded. Define to 0 if you want them to be passed straight
through, like the so-called "direct" characters.
We set this to 1 because it's safer.
*/
#define UTF7_ENCODE_OPTIONAL_CHARS 1
/* The set of "direct characters":
A-Z a-z 0-9 ' ( ) , - . / : ? space tab lf cr
*/
static const unsigned char direct_tab[128/8] =
{
0x00, 0x26, 0x00, 0x00, 0x81, 0xf3, 0xff, 0x87,
0xfe, 0xff, 0xff, 0x07, 0xfe, 0xff, 0xff, 0x07
};
static inline int
isdirect (uint32_t ch)
{
return (ch < 128 && ((direct_tab[ch >> 3] >> (ch & 7)) & 1));
}
/* The set of "direct and optional direct characters":
A-Z a-z 0-9 ' ( ) , - . / : ? space tab lf cr
! " # $ % & * ; < = > @ [ ] ^ _ ` { | }
*/
static const unsigned char xdirect_tab[128/8] =
{
0x00, 0x26, 0x00, 0x00, 0xff, 0xf7, 0xff, 0xff,
0xff, 0xff, 0xff, 0xef, 0xff, 0xff, 0xff, 0x3f
};
static inline int
isxdirect (uint32_t ch)
{
return (ch < 128 && ((xdirect_tab[ch >> 3] >> (ch & 7)) & 1));
}
/* The set of "extended base64 characters":
A-Z a-z 0-9 + / -
*/
static const unsigned char xbase64_tab[128/8] =
{
0x00, 0x00, 0x00, 0x00, 0x00, 0xa8, 0xff, 0x03,
0xfe, 0xff, 0xff, 0x07, 0xfe, 0xff, 0xff, 0x07
};
static inline int
isxbase64 (uint32_t ch)
{
return (ch < 128 && ((xbase64_tab[ch >> 3] >> (ch & 7)) & 1));
}
/* Converts a value in the range 0..63 to a base64 encoded char. */
static inline unsigned char
base64 (unsigned int i)
{
if (i < 26)
return i + 'A';
else if (i < 52)
return i - 26 + 'a';
else if (i < 62)
return i - 52 + '0';
else if (i == 62)
return '+';
else if (i == 63)
return '/';
else
abort ();
}
/* Definitions used in the body of the `gconv' function. */
#define CHARSET_NAME "UTF-7//"
#define DEFINE_INIT 1
#define DEFINE_FINI 1
#define FROM_LOOP from_utf7_loop
#define TO_LOOP to_utf7_loop
#define MIN_NEEDED_FROM 1
#define MAX_NEEDED_FROM 6
#define MIN_NEEDED_TO 4
#define MAX_NEEDED_TO 4
#define PREPARE_LOOP \
mbstate_t saved_state; \
mbstate_t *statep = data->__statep;
#define EXTRA_LOOP_ARGS , statep
/* Since we might have to reset input pointer we must be able to save
and restore the state. */
#define SAVE_RESET_STATE(Save) \
if (Save) \
saved_state = *statep; \
else \
*statep = saved_state
/* First define the conversion function from UTF-7 to UCS4.
The state is structured as follows:
__count bit 2..0: zero
__count bit 8..3: shift
__wch: data
Precise meaning:
shift data
0 -- not inside base64 encoding
1..32 XX..XX00..00 inside base64, (32 - shift) bits pending
This state layout is simpler than relying on STORE_REST/UNPACK_BYTES.
When shift = 0, __wch needs to store at most one lookahead byte (see
__GCONV_INCOMPLETE_INPUT below).
*/
#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
#define MAX_NEEDED_INPUT MAX_NEEDED_FROM
#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
#define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
#define LOOPFCT FROM_LOOP
#define BODY \
{ \
uint_fast8_t ch = *inptr; \
\
if ((statep->__count >> 3) == 0) \
{ \
/* base64 encoding inactive. */ \
if (isxdirect (ch)) \
{ \
inptr++; \
put32 (outptr, ch); \
outptr += 4; \
} \
else if (__builtin_expect (ch == '+', 1)) \
{ \
if (__builtin_expect (inptr + 2 >= inend, 0)) \
{ \
/* Not enough input available. */ \
result = __GCONV_INCOMPLETE_INPUT; \
break; \
} \
if (inptr[1] == '-') \
{ \
inptr += 2; \
put32 (outptr, ch); \
outptr += 4; \
} \
else \
{ \
/* Switch into base64 mode. */ \
inptr++; \
statep->__count = (32 << 3); \
statep->__value.__wch = 0; \
} \
} \
else \
{ \
/* The input is invalid. */ \
if (! ignore_errors_p ()) \
{ \
result = __GCONV_ILLEGAL_INPUT; \
break; \
} \
\
++inptr; \
++*irreversible; \
} \
} \
else \
{ \
/* base64 encoding active. */ \
uint32_t i; \
int shift; \
\
if (ch >= 'A' && ch <= 'Z') \
i = ch - 'A'; \
else if (ch >= 'a' && ch <= 'z') \
i = ch - 'a' + 26; \
else if (ch >= '0' && ch <= '9') \
i = ch - '0' + 52; \
else if (ch == '+') \
i = 62; \
else if (ch == '/') \
i = 63; \
else \
{ \
/* Terminate base64 encoding. */ \
\
/* If accumulated data is nonzero, the input is invalid. */ \
/* Also, partial UTF-16 characters are invalid. */ \
if (__builtin_expect (statep->__value.__wch != 0, 0) \
|| __builtin_expect ((statep->__count >> 3) <= 26, 0)) \
{ \
if (! ignore_errors_p ()) \
{ \
result = __GCONV_ILLEGAL_INPUT; \
break; \
} \
\
++inptr; \
++*irreversible; \
statep->__count = 0; \
continue; \
} \
\
if (ch == '-') \
inptr++; \
\
statep->__count = 0; \
continue; \
} \
\
/* Concatenate the base64 integer i to the accumulator. */ \
shift = (statep->__count >> 3); \
if (shift > 6) \
{ \
uint32_t wch; \
\
shift -= 6; \
wch = statep->__value.__wch | (i << shift); \
\
if (shift <= 16 && shift > 10) \
{ \
/* An UTF-16 character has just been completed. */ \
uint32_t wc1 = wch >> 16; \
\
/* UTF-16: When we see a High Surrogate, we must also decode \
the following Low Surrogate. */ \
if (!(wc1 >= 0xd800 && wc1 < 0xdc00)) \
{ \
wch = wch << 16; \
shift += 16; \
put32 (outptr, wc1); \
outptr += 4; \
} \
} \
else if (shift <= 10 && shift > 4) \
{ \
/* After a High Surrogate, verify that the next 16 bit \
indeed form a Low Surrogate. */ \
uint32_t wc2 = wch & 0xffff; \
\
if (! __builtin_expect (wc2 >= 0xdc00 && wc2 < 0xe000, 1)) \
{ \
if (! ignore_errors_p ()) \
{ \
result = __GCONV_ILLEGAL_INPUT; \
break; \
} \
\
++inptr; \
++*irreversible; \
statep->__count = 0; \
continue; \
} \
} \
\
statep->__value.__wch = wch; \
} \
else \
{ \
/* An UTF-16 surrogate pair has just been completed. */ \
uint32_t wc1 = (uint32_t) statep->__value.__wch >> 16; \
uint32_t wc2 = ((uint32_t) statep->__value.__wch & 0xffff) \
| (i >> (6 - shift)); \
\
statep->__value.__wch = (i << shift) << 26; \
shift += 26; \
\
assert (wc1 >= 0xd800 && wc1 < 0xdc00); \
assert (wc2 >= 0xdc00 && wc2 < 0xe000); \
put32 (outptr, \
0x10000 + ((wc1 - 0xd800) << 10) + (wc2 - 0xdc00)); \
outptr += 4; \
} \
\
statep->__count = shift << 3; \
\
/* Now that we digested the input increment the input pointer. */ \
inptr++; \
} \
}
#define LOOP_NEED_FLAGS
#define EXTRA_LOOP_DECLS , mbstate_t *statep
#include <iconv/loop.c>
/* Next, define the conversion from UCS4 to UTF-7.
The state is structured as follows:
__count bit 2..0: zero
__count bit 4..3: shift
__count bit 8..5: data
Precise meaning:
shift data
0 0 not inside base64 encoding
1 0 inside base64, no pending bits
2 XX00 inside base64, 2 bits known for next byte
3 XXXX inside base64, 4 bits known for next byte
__count bit 2..0 and __wch are always zero, because this direction
never returns __GCONV_INCOMPLETE_INPUT.
*/
#define MIN_NEEDED_INPUT MIN_NEEDED_TO
#define MAX_NEEDED_INPUT MAX_NEEDED_TO
#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM
#define LOOPFCT TO_LOOP
#define BODY \
{ \
uint32_t ch = get32 (inptr); \
\
if ((statep->__count & 0x18) == 0) \
{ \
/* base64 encoding inactive */ \
if (UTF7_ENCODE_OPTIONAL_CHARS ? isdirect (ch) : isxdirect (ch)) \
{ \
*outptr++ = (unsigned char) ch; \
} \
else \
{ \
size_t count; \
\
if (ch == '+') \
count = 2; \
else if (ch < 0x10000) \
count = 3; \
else if (ch < 0x110000) \
count = 6; \
else \
STANDARD_ERR_HANDLER (4); \
\
if (__builtin_expect (outptr + count > outend, 0)) \
{ \
result = __GCONV_FULL_OUTPUT; \
break; \
} \
\
*outptr++ = '+'; \
if (ch == '+') \
*outptr++ = '-'; \
else if (ch < 0x10000) \
{ \
*outptr++ = base64 (ch >> 10); \
*outptr++ = base64 ((ch >> 4) & 0x3f); \
statep->__count = ((ch & 15) << 5) | (3 << 3); \
} \
else if (ch < 0x110000) \
{ \
uint32_t ch1 = 0xd800 + ((ch - 0x10000) >> 10); \
uint32_t ch2 = 0xdc00 + ((ch - 0x10000) & 0x3ff); \
\
ch = (ch1 << 16) | ch2; \
*outptr++ = base64 (ch >> 26); \
*outptr++ = base64 ((ch >> 20) & 0x3f); \
*outptr++ = base64 ((ch >> 14) & 0x3f); \
*outptr++ = base64 ((ch >> 8) & 0x3f); \
*outptr++ = base64 ((ch >> 2) & 0x3f); \
statep->__count = ((ch & 3) << 7) | (2 << 3); \
} \
else \
abort (); \
} \
} \
else \
{ \
/* base64 encoding active */ \
if (UTF7_ENCODE_OPTIONAL_CHARS ? isdirect (ch) : isxdirect (ch)) \
{ \
/* deactivate base64 encoding */ \
size_t count; \
\
count = ((statep->__count & 0x18) >= 0x10) + isxbase64 (ch) + 1; \
if (__builtin_expect (outptr + count > outend, 0)) \
{ \
result = __GCONV_FULL_OUTPUT; \
break; \
} \
\
if ((statep->__count & 0x18) >= 0x10) \
*outptr++ = base64 ((statep->__count >> 3) & ~3); \
if (isxbase64 (ch)) \
*outptr++ = '-'; \
*outptr++ = (unsigned char) ch; \
statep->__count = 0; \
} \
else \
{ \
size_t count; \
\
if (ch < 0x10000) \
count = ((statep->__count & 0x18) >= 0x10 ? 3 : 2); \
else if (ch < 0x110000) \
count = ((statep->__count & 0x18) >= 0x18 ? 6 : 5); \
else \
STANDARD_ERR_HANDLER (4); \
\
if (__builtin_expect (outptr + count > outend, 0)) \
{ \
result = __GCONV_FULL_OUTPUT; \
break; \
} \
\
if (ch < 0x10000) \
{ \
switch ((statep->__count >> 3) & 3) \
{ \
case 1: \
*outptr++ = base64 (ch >> 10); \
*outptr++ = base64 ((ch >> 4) & 0x3f); \
statep->__count = ((ch & 15) << 5) | (3 << 3); \
break; \
case 2: \
*outptr++ = \
base64 (((statep->__count >> 3) & ~3) | (ch >> 12)); \
*outptr++ = base64 ((ch >> 6) & 0x3f); \
*outptr++ = base64 (ch & 0x3f); \
statep->__count = (1 << 3); \
break; \
case 3: \
*outptr++ = \
base64 (((statep->__count >> 3) & ~3) | (ch >> 14)); \
*outptr++ = base64 ((ch >> 8) & 0x3f); \
*outptr++ = base64 ((ch >> 2) & 0x3f); \
statep->__count = ((ch & 3) << 7) | (2 << 3); \
break; \
default: \
abort (); \
} \
} \
else if (ch < 0x110000) \
{ \
uint32_t ch1 = 0xd800 + ((ch - 0x10000) >> 10); \
uint32_t ch2 = 0xdc00 + ((ch - 0x10000) & 0x3ff); \
\
ch = (ch1 << 16) | ch2; \
switch ((statep->__count >> 3) & 3) \
{ \
case 1: \
*outptr++ = base64 (ch >> 26); \
*outptr++ = base64 ((ch >> 20) & 0x3f); \
*outptr++ = base64 ((ch >> 14) & 0x3f); \
*outptr++ = base64 ((ch >> 8) & 0x3f); \
*outptr++ = base64 ((ch >> 2) & 0x3f); \
statep->__count = ((ch & 3) << 7) | (2 << 3); \
break; \
case 2: \
*outptr++ = \
base64 (((statep->__count >> 3) & ~3) | (ch >> 28)); \
*outptr++ = base64 ((ch >> 22) & 0x3f); \
*outptr++ = base64 ((ch >> 16) & 0x3f); \
*outptr++ = base64 ((ch >> 10) & 0x3f); \
*outptr++ = base64 ((ch >> 4) & 0x3f); \
statep->__count = ((ch & 15) << 5) | (3 << 3); \
break; \
case 3: \
*outptr++ = \
base64 (((statep->__count >> 3) & ~3) | (ch >> 30)); \
*outptr++ = base64 ((ch >> 24) & 0x3f); \
*outptr++ = base64 ((ch >> 18) & 0x3f); \
*outptr++ = base64 ((ch >> 12) & 0x3f); \
*outptr++ = base64 ((ch >> 6) & 0x3f); \
*outptr++ = base64 (ch & 0x3f); \
statep->__count = (1 << 3); \
break; \
default: \
abort (); \
} \
} \
else \
abort (); \
} \
} \
\
/* Now that we wrote the output increment the input pointer. */ \
inptr += 4; \
}
#define LOOP_NEED_FLAGS
#define EXTRA_LOOP_DECLS , mbstate_t *statep
#include <iconv/loop.c>
/* Since this is a stateful encoding we have to provide code which resets
the output state to the initial state. This has to be done during the
flushing. */
#define EMIT_SHIFT_TO_INIT \
if (FROM_DIRECTION) \
/* Nothing to emit. */ \
memset (data->__statep, '\0', sizeof (mbstate_t)); \
else \
{ \
/* The "to UTF-7" direction. Flush the remaining bits and terminate \
with a '-' byte. This will guarantee correct decoding if more \
UTF-7 encoded text is added afterwards. */ \
int state = data->__statep->__count; \
\
if (state & 0x18) \
{ \
/* Deactivate base64 encoding. */ \
unsigned char *outbuf = data->__outbuf; \
size_t count = ((state & 0x18) >= 0x10) + 1; \
\
if (__builtin_expect (outbuf + count > data->__outbufend, 0)) \
/* We don't have enough room in the output buffer. */ \
status = __GCONV_FULL_OUTPUT; \
else \
{ \
/* Write out the shift sequence. */ \
if ((state & 0x18) >= 0x10) \
*outbuf++ = base64 ((state >> 3) & ~3); \
*outbuf++ = '-'; \
\
data->__outbuf = outbuf; \
data->__statep->__count = 0; \
} \
} \
else \
data->__statep->__count = 0; \
}
/* Now define the toplevel functions. */
#include <iconv/skeleton.c>

View File

@ -65,7 +65,9 @@ struct element_t;
/* Data type for list of strings. */
struct section_list
{
/* Successor in the known_sections list. */
struct section_list *def_next;
/* Successor in the sections list. */
struct section_list *next;
/* Name of the section. */
const char *name;
@ -291,6 +293,7 @@ make_seclist_elem (struct locale_collate_t *collate, const char *string,
newp->next = next;
newp->name = string;
newp->first = NULL;
newp->last = NULL;
return newp;
}
@ -336,6 +339,10 @@ new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
newp->used_in_level = 0;
newp->is_character = is_character;
/* Will be assigned later. XXX */
newp->mbseqorder = 0;
newp->wcseqorder = 0;
/* Will be allocated later. */
newp->weights = NULL;
@ -350,6 +357,9 @@ new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
newp->mbnext = NULL;
newp->mblast = NULL;
newp->wcnext = NULL;
newp->wclast = NULL;
return newp;
}
@ -619,7 +629,6 @@ find_element (struct linereader *ldfile, struct locale_collate_t *collate,
/* It's also no collation element. So it is a character
element defined later. */
result = new_element (collate, NULL, 0, NULL, str, len, 1);
if (result != NULL)
/* Insert it into the sequence table. */
insert_entry (&collate->seq_table, str, len, result);
}
@ -660,11 +669,11 @@ insert_weights (struct linereader *ldfile, struct element_t *elem,
/* Initialize all the fields. */
elem->file = ldfile->fname;
elem->line = ldfile->lineno;
elem->last = collate->cursor;
elem->next = collate->cursor ? collate->cursor->next : NULL;
if (collate->cursor != NULL && collate->cursor->next != NULL)
collate->cursor->next->last = elem;
elem->section = collate->current_section;
if (collate->cursor != NULL)
collate->cursor->next = elem;
if (collate->start == NULL)
@ -672,9 +681,8 @@ insert_weights (struct linereader *ldfile, struct element_t *elem,
assert (collate->cursor == NULL);
collate->start = elem;
}
elem->weights = (struct element_list_t *)
obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
elem->section = collate->current_section;
if (collate->current_section->first == NULL)
collate->current_section->first = elem;
@ -683,6 +691,10 @@ insert_weights (struct linereader *ldfile, struct element_t *elem,
collate->cursor = elem;
elem->weights = (struct element_list_t *)
obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
weight_cnt = 0;
arg = lr_token (ldfile, charmap, repertoire);
@ -839,8 +851,8 @@ insert_weights (struct linereader *ldfile, struct element_t *elem,
%s: weights must use the same ellipsis symbol as the name"),
"LC_COLLATE");
/* The weight for this level has to be ignored. We use the
null pointer to indicate this. */
/* The weight for this level will depend on the element
iterating over the range. Put a placeholder. */
elem->weights[weight_cnt].w = (struct element_t **)
obstack_alloc (&collate->mempool, sizeof (struct element_t *));
elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
@ -988,8 +1000,7 @@ insert_value (struct linereader *ldfile, const char *symstr, size_t symlen,
}
/* Test whether this element is not already in the list. */
if (elem->next != NULL || (collate->cursor != NULL
&& elem->next == collate->cursor))
if (elem->next != NULL || elem == collate->cursor)
{
lr_error (ldfile, _("order for `%.*s' already defined at %s:%Zu"),
(int) symlen, symstr, elem->file, elem->line);
@ -1434,6 +1445,7 @@ collate_startup (struct linereader *ldfile, struct localedef_t *locale,
collate->col_weight_max = -1;
}
else
/* Reuse the copy_locale's data structures. */
collate = locale->categories[LC_COLLATE].collate =
copy_locale->categories[LC_COLLATE].collate;
}
@ -1788,9 +1800,9 @@ collate_finish (struct localedef_t *locale, struct charmap_t *charmap)
while (sect != NULL && sect->rules == NULL);
}
while (sect != NULL);
/* We are currently not prepared for more than 256 rulesets. But this
/* We are currently not prepared for more than 128 rulesets. But this
should never really be a problem. */
assert (ruleidx <= 256);
assert (ruleidx <= 128);
}
@ -2529,9 +2541,18 @@ collate_read (struct linereader *ldfile, struct localedef_t *result,
struct token *now;
struct token *arg = NULL;
enum token_t nowtok;
int state = 0;
enum token_t was_ellipsis = tok_none;
struct localedef_t *copy_locale = NULL;
/* Parsing state:
0 - start
1 - between `order-start' and `order-end'
2 - after `order-end'
3 - after `reorder-after', waiting for `reorder-end'
4 - after `reorder-end'
5 - after `reorder-sections-after', waiting for `reorder-sections-end'
6 - after `reorder-sections-end'
*/
int state = 0;
/* Get the repertoire we have to use. */
if (repertoire_name != NULL)
@ -2828,9 +2849,10 @@ collate_read (struct linereader *ldfile, struct localedef_t *result,
}
else if (ellipsis == tok_none)
{
/* The name is already defined. */
/* A single symbol, no ellipsis. */
if (check_duplicate (ldfile, collate, charmap,
repertoire, symbol, symbol_len))
/* The name is already defined. */
goto col_sym_free;
insert_entry (&collate->sym_table, symbol, symbol_len,
@ -2884,13 +2906,13 @@ collate_read (struct linereader *ldfile, struct localedef_t *result,
/* Create the name. */
sprintf (symbuf,
ellipsis == tok_ellipsis2
? "%.*s%.*lX" : "%.*s%.*lX",
? "%.*s%.*lX" : "%.*s%.*lu",
(int) prefixlen, symbol,
(int) (symbol_len - prefixlen), from);
/* The name is already defined. */
if (check_duplicate (ldfile, collate, charmap,
repertoire, symbuf, symbol_len))
/* The name is already defined. */
goto col_sym_free;
insert_entry (&collate->sym_table, symbuf,
@ -3021,8 +3043,8 @@ error while adding equivalent collating symbol"));
}
runp = (struct section_list *) xcalloc (1, sizeof (*runp));
name = strncpy (xmalloc (arg->val.str.lenmb + 1),
arg->val.str.startmb, arg->val.str.lenmb);
name = (char *) xmalloc (arg->val.str.lenmb + 1);
memcpy (name, arg->val.str.startmb, arg->val.str.lenmb);
name[arg->val.str.lenmb] = '\0';
runp->name = name;
@ -3070,6 +3092,8 @@ error while adding equivalent collating symbol"));
if (collate->error_section.first == NULL)
{
/* Insert &collate->error_section at the end of
the collate->sections list. */
if (collate->sections == NULL)
collate->sections = &collate->error_section;
else
@ -3078,9 +3102,9 @@ error while adding equivalent collating symbol"));
while (sp->next != NULL)
sp = sp->next;
collate->error_section.next = NULL;
sp->next = &collate->error_section;
}
collate->error_section.next = NULL;
}
}
else
@ -3093,6 +3117,8 @@ error while adding equivalent collating symbol"));
"LC_COLLATE", sp->name);
else
{
/* Insert sp in the collate->sections list,
right after collate->current_section. */
if (collate->current_section == NULL)
collate->current_section = sp;
else
@ -3141,6 +3167,8 @@ error while adding equivalent collating symbol"));
"LC_COLLATE");
else
{
/* Insert &collate->unnamed_section at the beginning of
the collate->sections list. */
collate->unnamed_section.next = collate->sections;
collate->sections = &collate->unnamed_section;
}
@ -3149,7 +3177,7 @@ error while adding equivalent collating symbol"));
/* Now read the direction names. */
read_directions (ldfile, arg, charmap, repertoire, collate);
/* From now be need the strings untranslated. */
/* From now we need the strings untranslated. */
ldfile->translate_strings = 0;
break;
@ -3428,7 +3456,7 @@ error while adding equivalent collating symbol"));
/* We are outside an `order_start' region. This means
we must only accept definitions of values for
collation symbols since these are purely abstract
values and don't need dorections associated. */
values and don't need directions associated. */
struct element_t *seqp;
if (find_entry (&collate->seq_table, symstr, symlen,
@ -3510,7 +3538,7 @@ error while adding equivalent collating symbol"));
if (seqp->section->first == seqp)
{
if (seqp->section->first == seqp->section->last)
/* This setion has no content anymore. */
/* This section has no content anymore. */
seqp->section->first = seqp->section->last = NULL;
else
seqp->section->first = seqp->next;
@ -3616,8 +3644,7 @@ error while adding equivalent collating symbol"));
/* See whether UNDEFINED already appeared somewhere. */
if (collate->undefined.next != NULL
|| (collate->cursor != NULL
&& collate->undefined.next == collate->cursor))
|| &collate->undefined == collate->cursor)
{
lr_error (ldfile,
_("%s: order for `%.*s' already defined at %s:%Zu"),
@ -3632,9 +3659,9 @@ error while adding equivalent collating symbol"));
repertoire, collate, tok_none);
break;
case tok_ellipsis2:
case tok_ellipsis3:
case tok_ellipsis4:
case tok_ellipsis2: /* symbolic hexadecimal ellipsis */
case tok_ellipsis3: /* absolute ellipsis */
case tok_ellipsis4: /* symbolic decimal ellipsis */
/* This is the symbolic (decimal or hexadecimal) or absolute
ellipsis. */
if (was_ellipsis != tok_none)