Update.

2000-09-10 Bruno Haible <haible@clisp.cons.org> * locale/programs/ld-collate.c (collate_read): Fix typo in handling of decimal ellipsis. 2000-09-11 Bruno Haible <haible@clisp.cons.org> * locale/programs/ld-collate.c (collate_read): Always initialize error_section.next. 2000-09-10 Bruno Haible <haible@clisp.cons.org> * locale/programs/ld-collate.c (collate_finish): Upper bound for ruleidx is 128, not 256. 2000-09-11 Ulrich Drepper <drepper@redhat.com> * locale/programs/ld-collate.c (collate_read): Correct check for already inserted entries. 2000-09-10 Bruno Haible <haible@clisp.cons.org> * iconv/skeleton.c (FUNCTION_NAME): Handle unaligned access in second try as well. 2000-09-10 Bruno Haible <haible@clisp.cons.org> * iconv/skeleton.c (FUNCTION_NAME): Optimize an `if' if MAX_NEEDED_FROM > 1 && MAX_NEEDED_TO > 1. 2000-09-10 Bruno Haible <haible@clisp.cons.org> * iconv/skeleton.c (gconv_init): Replace all uses of RESET_STATE with SAVE_RESET_STATE. 2000-09-10 Bruno Haible <haible@clisp.cons.org> * iconvdata/utf-7.c: New file. * iconvdata/gconv-modules (UTF-7): New module entries. * iconvdata/Makefile (modules): Add UTF-7. (distribute): Add utf-7.c. * iconvdata/testdata/UTF-7: New file. * iconvdata/testdata/UTF-7..UTF8: New file. * iconvdata/TESTS (UTF-7): New entry. * iconvdata/run-iconv-test.sh: Fix confusing output.
2024-11-21 20:40:05 +00:00 · 2000-09-11 20:33:59 +00:00 · 2000-09-11 20:33:59 +00:00 · f6ad47269a
commit f6ad47269a
parent b81c896174
10 changed files with 774 additions and 58 deletions
--- a/46
+++ b/46
@ -1,3 +1,49 @@
+2000-09-10  Bruno Haible  <haible@clisp.cons.org>
+
+	* locale/programs/ld-collate.c (collate_read): Fix typo in handling
+	of decimal ellipsis.
+
+2000-09-11  Bruno Haible <haible@clisp.cons.org>
+
+	* locale/programs/ld-collate.c (collate_read): Always initialize
+	error_section.next.
+
+2000-09-10  Bruno Haible  <haible@clisp.cons.org>
+
+	* locale/programs/ld-collate.c (collate_finish): Upper bound for
+	ruleidx is 128, not 256.
+
+2000-09-11  Ulrich Drepper  <drepper@redhat.com>
+
+	* locale/programs/ld-collate.c (collate_read): Correct check for
+	already inserted entries.
+
+2000-09-10  Bruno Haible <haible@clisp.cons.org>
+
+	* iconv/skeleton.c (FUNCTION_NAME): Handle unaligned access in
+	second try as well.
+
+2000-09-10  Bruno Haible <haible@clisp.cons.org>
+
+	* iconv/skeleton.c (FUNCTION_NAME): Optimize an `if' if
+	MAX_NEEDED_FROM > 1 && MAX_NEEDED_TO > 1.
+
+2000-09-10  Bruno Haible <haible@clisp.cons.org>
+
+	* iconv/skeleton.c (gconv_init): Replace all uses of RESET_STATE with
+	SAVE_RESET_STATE.
+
+2000-09-10  Bruno Haible  <haible@clisp.cons.org>
+
+	* iconvdata/utf-7.c: New file.
+	* iconvdata/gconv-modules (UTF-7): New module entries.
+	* iconvdata/Makefile (modules): Add UTF-7.
+	(distribute): Add utf-7.c.
+	* iconvdata/testdata/UTF-7: New file.
+	* iconvdata/testdata/UTF-7..UTF8: New file.
+	* iconvdata/TESTS (UTF-7): New entry.
+	* iconvdata/run-iconv-test.sh: Fix confusing output.
+
 2000-09-11  Ulrich Drepper  <drepper@redhat.com>

 	* sysdeps/posix/tempname.c (__gen_tempname): Use __lxstat and __xstat.
--- a/iconv/skeleton.c
+++ b/iconv/skeleton.c
@ -57,7 +57,12 @@
 			from the current characters.
     TO_LOOP		likewise for the other direction

-     RESET_STATE	in case of an error we must reset the state for
+     ONE_DIRECTION	optional.  If defined to 1, only one conversion
+			direction is defined instead of two.  In this
+			case, FROM_DIRECTION should be defined to 1, and
+			FROM_LOOP and TO_LOOP should have the same value.
+
+     SAVE_RESET_STATE	in case of an error we must reset the state for
 			the rerun so this macro must be defined for
 			stateful encodings.  It takes an argument which
 			is nonzero when saving.
@ -184,8 +189,8 @@ static int to_object;
 #endif


-/* For conversions from a fixed width character sets to another fixed width
-   character set we we can define RESET_INPUT_BUFFER is necessary.  */
+/* For conversions from a fixed width character set to another fixed width
+   character set we can define RESET_INPUT_BUFFER in a very fast way.  */
 #if !defined RESET_INPUT_BUFFER && !defined SAVE_RESET_STATE
 # if MIN_NEEDED_FROM == MAX_NEEDED_FROM && MIN_NEEDED_TO == MAX_NEEDED_TO
 /* We have to use these `if's here since the compiler cannot know that
@ -233,7 +238,7 @@ gconv_init (struct __gconv_step *step)
  else
    return __GCONV_NOCONV;

-#ifdef RESET_STATE
+#ifdef SAVE_RESET_STATE
  step->__stateful = 1;
 #else
  step->__stateful = 0;
@ -245,8 +250,8 @@ gconv_init (struct __gconv_step *step)


 /* The default destructor function does nothing in the moment and so
-   be define it at all.  But we still provide the macro just in case
-   we need it some day.  */
+   we don't define it at all.  But we still provide the macro just in
+   case we need it some day.  */
 #if DEFINE_FINI
 #endif

@ -339,7 +344,8 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
      /* If the function is used to implement the mb*towc*() or wc*tomb*()
 	 functions we must test whether any bytes from the last call are
 	 stored in the `state' object.  */
-      if (((MAX_NEEDED_FROM > 1 && FROM_DIRECTION)
+      if (((MAX_NEEDED_FROM > 1 && MAX_NEEDED_TO > 1)
+	   || (MAX_NEEDED_FROM > 1 && FROM_DIRECTION)
 	   || (MAX_NEEDED_TO > 1 && !FROM_DIRECTION))
 	  && consume_incomplete && (data->__statep->__count & 7) != 0)
 	{
@ -491,23 +497,44 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
 		      SAVE_RESET_STATE (0);
 # endif

-		      /* XXX Handle unaligned access here as well.  */
+		      if (__builtin_expect (!unaligned, 1))
+			{
 			  if (FROM_DIRECTION)
 			    /* Run the conversion loop.  */
-			nstatus = FROM_LOOP (step, data,
-					     (const unsigned char **) inptrp,
-					     (const unsigned char *) inend,
-					     (unsigned char **) &outbuf,
+			    nstatus = FROM_LOOP (step, data, inptrp, inend,
+						 &outbuf,
 						 (unsigned char *) outerr,
-					     lirreversiblep EXTRA_LOOP_ARGS);
+						 lirreversiblep
+						 EXTRA_LOOP_ARGS);
 			  else
 			    /* Run the conversion loop.  */
-			nstatus = TO_LOOP (step, data,
-					   (const unsigned char **) inptrp,
-					   (const unsigned char *) inend,
-					   (unsigned char **) &outbuf,
+			    nstatus = TO_LOOP (step, data, inptrp, inend,
+					       &outbuf,
 					       (unsigned char *) outerr,
-					   lirreversiblep EXTRA_LOOP_ARGS);
+					       lirreversiblep
+					       EXTRA_LOOP_ARGS);
+			}
+# if !defined _STRING_ARCH_unaligned \
+     && MIN_NEEDED_FROM != 1 && MAX_NEEDED_FROM % MIN_NEEDED_FROM == 0 \
+     && MIN_NEEDED_TO != 1 && MAX_NEEDED_TO % MIN_NEEDED_TO == 0
+		      else
+			{
+			  if (FROM_DIRECTION)
+			    /* Run the conversion loop.  */
+			    nstatus = GEN_unaligned (FROM_LOOP) (step, data,
+								 inptrp, inend,
+								 (unsigned char *) outerr,
+								 lirreversiblep
+								 EXTRA_LOOP_ARGS);
+			  else
+			    /* Run the conversion loop.  */
+			    nstatus = GEN_unaligned (TO_LOOP) (step, data,
+							       inptrp, inend,
+							       (unsigned char *) outerr,
+							       lirreversiblep
+							       EXTRA_LOOP_ARGS);
+			}
+# endif

 		      /* We must run out of output buffer space in this
 			 rerun.  */
@ -540,7 +567,8 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
      /* If we are supposed to consume all character store now all of the
 	 remaining characters in the `state' object.  */
 #if MAX_NEEDED_FROM > 1 || MAX_NEEDED_TO > 1
-      if (((MAX_NEEDED_FROM > 1 && FROM_DIRECTION)
+      if (((MAX_NEEDED_FROM > 1 && MAX_NEEDED_TO > 1)
+	   || (MAX_NEEDED_FROM > 1 && FROM_DIRECTION)
 	   || (MAX_NEEDED_TO > 1 && !FROM_DIRECTION))
 	  && __builtin_expect (consume_incomplete, 0)
 	  && status == __GCONV_INCOMPLETE_INPUT)
@ -580,7 +608,7 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
 #undef EMIT_SHIFT_TO_INIT
 #undef FROM_LOOP
 #undef TO_LOOP
-#undef RESET_STATE
+#undef SAVE_RESET_STATE
 #undef RESET_INPUT_BUFFER
 #undef FUNCTION_NAME
 #undef PREPARE_LOOP
--- a/iconvdata/Makefile
+++ b/iconvdata/Makefile
@ -45,7 +45,8 @@ modules	:= ISO8859-1 ISO8859-2 ISO8859-3 ISO8859-4 ISO8859-5		 \
 	   INIS-CYRILLIC ISO_6937-2 ISO_2033 ISO_5427 ISO_5427-EXT	 \
 	   ISO_5428 ISO_10367-BOX MAC-IS MAC-UK NATS-DANO NATS-SEFI	 \
 	   SAMI-WS2 ISO-IR-197 TIS-620 KOI8-U GBK ISIRI-3342 GBGBK	 \
-	   ISO-2022-CN libISOIR165 UTF-16 UNICODE BIG5HKSCS GB18030
+	   ISO-2022-CN libISOIR165 UTF-16 UNICODE UTF-7 BIG5HKSCS	 \
+	   GB18030

 modules.so := $(addsuffix .so, $(modules))

@ -125,8 +126,8 @@ distribute := gconv-modules extra-module.mk gap.awk gaptab.awk		    \
 	      macintosh.c mac-is.c mac-uk.c nats-dano.c nats-sefi.c sjis.c  \
 	      t.61.c uhc.c sami-ws2.c iso-ir-197.c tis-620.c koi8-u.c	    \
 	      isiri-3342.c isiri-3342.h gbgbk.c iso-2022-cn.c cns11643l2.h  \
-	      iso8859-16.c utf-16.c unicode.c big5hkscs.c iso-ir-165.c	    \
-	      iso-ir-165.h gb18030.c
+	      iso8859-16.c utf-16.c unicode.c utf-7.c big5hkscs.c	    \
+	      iso-ir-165.c iso-ir-165.h gb18030.c

 # We build the transformation modules only when we build shared libs.
 ifeq (yes,$(build-shared))
--- a/iconvdata/TESTS
+++ b/iconvdata/TESTS
@ -81,3 +81,4 @@ EUC-KR			EUC-KR			Y	UTF8
 EUC-CN			EUC-CN			Y	UTF8
 GBK			GBK			Y	UTF8
 BIG5HKSCS		BIG5HKSCS		Y	UTF8
+UTF-7			UTF-7			N	UTF8
--- a/iconvdata/gconv-modules
+++ b/iconvdata/gconv-modules
@ -1194,6 +1194,10 @@ alias	CSUNICODE//		UNICODE//
 module	UNICODE//		INTERNAL		UNICODE		1
 module	INTERNAL		UNICODE//		UNICODE		1

+#	from			to			module		cost
+module	UTF-7//			INTERNAL		UTF-7		1
+module	INTERNAL		UTF-7//			UTF-7		1
+
 #	from			to			module		cost
 module	GB18030//		INTERNAL		GB18030		1
 module	INTERNAL		GB18030//		GB18030		1
--- a/iconvdata/run-iconv-test.sh
+++ b/iconvdata/run-iconv-test.sh
@ -100,7 +100,7 @@ while read from to subset targets; do
 	{ echo "/FAILED";
 	  failed=1; continue; }
    else
-      echo $ac_n "   suntzu: $from -> ASCII -> $to $ac_c"
+      echo $ac_n "   suntzu: ASCII -> $to -> ASCII $ac_c"
      $PROG -f ASCII -t $to testdata/suntzus |
      $PROG -f $to -t ASCII > $temp1 ||
        { if test $? -gt 128; then exit 1; fi
--- a/iconvdata/testdata/UTF-7
+++ b/iconvdata/testdata/UTF-7
@ -0,0 +1,25 @@
+EqASGxItEps       Amharic
+AQ0-esky      Czech
+Dansk      Danish
+English    English
+Suomi      Finnish
+Fran+AOc-ais   French
+Deutsch    German
+A5UDuwO7A7cDvQO5A7oDrA   Greek
+BeIF0QXoBdkF6g      Hebrew
+Italiano   Italian
+Norsk      Norwegian
+BCAEQwRBBEEEOgQ4BDk    Russian
+Espa+APE-ol    Spanish
+Svenska    Swedish
+DiAOMg4pDjIORA4XDiI    Thai
+T+APw-rk+AOc-e     Turkish
+Ti+Hr8-ng Vi+Hsc-t Vietnamese
+ZeVnLIqe     Japanese
+Ti1lhw       Chinese
+1VyuAA       Korean
+
+// The last line of this file is missing the end-of-line terminator
+// on purpose, in order to test that the conversion empties the bit buffer
+// and shifts back to the initial state at the end of the conversion.
+A+ImIDkQ-
--- a/iconvdata/testdata/UTF-7..UTF8
+++ b/iconvdata/testdata/UTF-7..UTF8
@ -0,0 +1,25 @@
+አማርኛ       Amharic
+česky      Czech
+Dansk      Danish
+English    English
+Suomi      Finnish
+Français   French
+Deutsch    German
+Ελληνικά   Greek
+עברית      Hebrew
+Italiano   Italian
+Norsk      Norwegian
+Русский    Russian
+Español    Spanish
+Svenska    Swedish
+ภาษาไทย    Thai
+Türkçe     Turkish
+Tiếng Việt Vietnamese
+日本語     Japanese
+中文       Chinese
+한글       Korean
+
+// The last line of this file is missing the end-of-line terminator
+// on purpose, in order to test that the conversion empties the bit buffer
+// and shifts back to the initial state at the end of the conversion.
+A≢Α
--- a/iconvdata/utf-7.c
+++ b/iconvdata/utf-7.c
@ -0,0 +1,559 @@
+/* Conversion module for UTF-7.
+   Copyright (C) 2000 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Bruno Haible <haible@clisp.cons.org>, 2000.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+/* UTF-7 is a legacy encoding used for transmitting Unicode within the
+   ASCII character set, used primarily by mail agents.  New programs
+   are encouraged to use UTF-8 instead.
+
+   UTF-7 is specified in RFC 2152 (and old RFC 1641, RFC 1642).  The
+   original Base64 encoding is defined in RFC 2045.  */
+
+#include <dlfcn.h>
+#include <gconv.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+
+/* Define this to 1 if you want the so-called "optional direct" characters
+      ! " # $ % & * ; < = > @ [ ] ^ _ ` { | }
+   to be encoded. Define to 0 if you want them to be passed straight
+   through, like the so-called "direct" characters.
+   We set this to 1 because it's safer.
+ */
+#define UTF7_ENCODE_OPTIONAL_CHARS 1
+
+
+/* The set of "direct characters":
+   A-Z a-z 0-9 ' ( ) , - . / : ? space tab lf cr
+*/
+
+static const unsigned char direct_tab[128/8] =
+  {
+    0x00, 0x26, 0x00, 0x00, 0x81, 0xf3, 0xff, 0x87,
+    0xfe, 0xff, 0xff, 0x07, 0xfe, 0xff, 0xff, 0x07
+  };
+
+static inline int
+isdirect (uint32_t ch)
+{
+  return (ch < 128 && ((direct_tab[ch >> 3] >> (ch & 7)) & 1));
+}
+
+
+/* The set of "direct and optional direct characters":
+   A-Z a-z 0-9 ' ( ) , - . / : ? space tab lf cr
+   ! " # $ % & * ; < = > @ [ ] ^ _ ` { | }
+*/
+
+static const unsigned char xdirect_tab[128/8] =
+  {
+    0x00, 0x26, 0x00, 0x00, 0xff, 0xf7, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xef, 0xff, 0xff, 0xff, 0x3f
+  };
+
+static inline int
+isxdirect (uint32_t ch)
+{
+  return (ch < 128 && ((xdirect_tab[ch >> 3] >> (ch & 7)) & 1));
+}
+
+
+/* The set of "extended base64 characters":
+   A-Z a-z 0-9 + / -
+*/
+
+static const unsigned char xbase64_tab[128/8] =
+  {
+    0x00, 0x00, 0x00, 0x00, 0x00, 0xa8, 0xff, 0x03,
+    0xfe, 0xff, 0xff, 0x07, 0xfe, 0xff, 0xff, 0x07
+  };
+
+static inline int
+isxbase64 (uint32_t ch)
+{
+  return (ch < 128 && ((xbase64_tab[ch >> 3] >> (ch & 7)) & 1));
+}
+
+
+/* Converts a value in the range 0..63 to a base64 encoded char.  */
+static inline unsigned char
+base64 (unsigned int i)
+{
+  if (i < 26)
+    return i + 'A';
+  else if (i < 52)
+    return i - 26 + 'a';
+  else if (i < 62)
+    return i - 52 + '0';
+  else if (i == 62)
+    return '+';
+  else if (i == 63)
+    return '/';
+  else
+    abort ();
+}
+
+
+/* Definitions used in the body of the `gconv' function.  */
+#define CHARSET_NAME		"UTF-7//"
+#define DEFINE_INIT		1
+#define DEFINE_FINI		1
+#define FROM_LOOP		from_utf7_loop
+#define TO_LOOP			to_utf7_loop
+#define MIN_NEEDED_FROM		1
+#define MAX_NEEDED_FROM		6
+#define MIN_NEEDED_TO		4
+#define MAX_NEEDED_TO		4
+#define PREPARE_LOOP \
+  mbstate_t saved_state;						      \
+  mbstate_t *statep = data->__statep;
+#define EXTRA_LOOP_ARGS		, statep
+
+
+/* Since we might have to reset input pointer we must be able to save
+   and restore the state.  */
+#define SAVE_RESET_STATE(Save) \
+  if (Save)								      \
+    saved_state = *statep;						      \
+  else									      \
+    *statep = saved_state
+
+
+/* First define the conversion function from UTF-7 to UCS4.
+   The state is structured as follows:
+     __count bit 2..0: zero
+     __count bit 8..3: shift
+     __wch: data
+   Precise meaning:
+     shift      data
+       0         --          not inside base64 encoding
+     1..32  XX..XX00..00     inside base64, (32 - shift) bits pending
+   This state layout is simpler than relying on STORE_REST/UNPACK_BYTES.
+
+   When shift = 0, __wch needs to store at most one lookahead byte (see
+   __GCONV_INCOMPLETE_INPUT below).
+*/
+#define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
+#define MAX_NEEDED_INPUT	MAX_NEEDED_FROM
+#define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
+#define MAX_NEEDED_OUTPUT	MAX_NEEDED_TO
+#define LOOPFCT			FROM_LOOP
+#define BODY \
+  {									      \
+    uint_fast8_t ch = *inptr;						      \
+									      \
+    if ((statep->__count >> 3) == 0)					      \
+      {									      \
+	/* base64 encoding inactive.  */				      \
+	if (isxdirect (ch))						      \
+	  {								      \
+	    inptr++;							      \
+	    put32 (outptr, ch);						      \
+	    outptr += 4;						      \
+	  }								      \
+	else if (__builtin_expect (ch == '+', 1))			      \
+	  {								      \
+	    if (__builtin_expect (inptr + 2 >= inend, 0))		      \
+	      {								      \
+		/* Not enough input available.  */			      \
+		result = __GCONV_INCOMPLETE_INPUT;			      \
+		break;							      \
+	      }								      \
+	    if (inptr[1] == '-')					      \
+	      {								      \
+		inptr += 2;						      \
+		put32 (outptr, ch);					      \
+		outptr += 4;						      \
+	      }								      \
+	    else							      \
+	      {								      \
+		/* Switch into base64 mode.  */				      \
+		inptr++;						      \
+		statep->__count = (32 << 3);				      \
+		statep->__value.__wch = 0;				      \
+	      }								      \
+	  }								      \
+	else								      \
+	  {								      \
+	    /* The input is invalid.  */				      \
+	    if (! ignore_errors_p ())					      \
+	      {								      \
+		result = __GCONV_ILLEGAL_INPUT;				      \
+		break;							      \
+	      }								      \
+									      \
+	    ++inptr;							      \
+	    ++*irreversible;						      \
+	  }								      \
+      }									      \
+    else								      \
+      {									      \
+	/* base64 encoding active.  */					      \
+	uint32_t i;							      \
+	int shift;							      \
+									      \
+	if (ch >= 'A' && ch <= 'Z')					      \
+	  i = ch - 'A';							      \
+	else if (ch >= 'a' && ch <= 'z')				      \
+	  i = ch - 'a' + 26;						      \
+	else if (ch >= '0' && ch <= '9')				      \
+	  i = ch - '0' + 52;						      \
+	else if (ch == '+')						      \
+	  i = 62;							      \
+	else if (ch == '/')						      \
+	  i = 63;							      \
+	else								      \
+	  {								      \
+	    /* Terminate base64 encoding.  */				      \
+									      \
+	    /* If accumulated data is nonzero, the input is invalid.  */      \
+	    /* Also, partial UTF-16 characters are invalid.  */		      \
+	    if (__builtin_expect (statep->__value.__wch != 0, 0)	      \
+		|| __builtin_expect ((statep->__count >> 3) <= 26, 0))	      \
+	      {								      \
+		if (! ignore_errors_p ())				      \
+		  {							      \
+		    result = __GCONV_ILLEGAL_INPUT;			      \
+		    break;						      \
+		  }							      \
+									      \
+		++inptr;						      \
+		++*irreversible;					      \
+		statep->__count = 0;					      \
+		continue;						      \
+	      }								      \
+									      \
+	    if (ch == '-')						      \
+	      inptr++;							      \
+									      \
+	    statep->__count = 0;					      \
+	    continue;							      \
+	  }								      \
+									      \
+	/* Concatenate the base64 integer i to the accumulator.  */	      \
+	shift = (statep->__count >> 3);					      \
+	if (shift > 6)							      \
+	  {								      \
+	    uint32_t wch;						      \
+									      \
+	    shift -= 6;							      \
+	    wch = statep->__value.__wch | (i << shift);			      \
+									      \
+	    if (shift <= 16 && shift > 10)				      \
+	      {								      \
+		/* An UTF-16 character has just been completed.  */	      \
+		uint32_t wc1 = wch >> 16;				      \
+									      \
+		/* UTF-16: When we see a High Surrogate, we must also decode  \
+		   the following Low Surrogate. */			      \
+		if (!(wc1 >= 0xd800 && wc1 < 0xdc00))			      \
+		  {							      \
+		    wch = wch << 16;					      \
+		    shift += 16;					      \
+		    put32 (outptr, wc1);				      \
+		    outptr += 4;					      \
+		  }							      \
+	      }								      \
+	    else if (shift <= 10 && shift > 4)				      \
+	      {								      \
+		/* After a High Surrogate, verify that the next 16 bit	      \
+		   indeed form a Low Surrogate.  */			      \
+		uint32_t wc2 = wch & 0xffff;				      \
+									      \
+		if (! __builtin_expect (wc2 >= 0xdc00 && wc2 < 0xe000, 1))    \
+		  {							      \
+		    if (! ignore_errors_p ())				      \
+		      {							      \
+			result = __GCONV_ILLEGAL_INPUT;			      \
+			break;						      \
+		      }							      \
+									      \
+		    ++inptr;						      \
+		    ++*irreversible;					      \
+		    statep->__count = 0;				      \
+		    continue;						      \
+		  }							      \
+	      }								      \
+									      \
+	    statep->__value.__wch = wch;				      \
+	  }								      \
+	else								      \
+	  {								      \
+	    /* An UTF-16 surrogate pair has just been completed.  */	      \
+	    uint32_t wc1 = (uint32_t) statep->__value.__wch >> 16;	      \
+	    uint32_t wc2 = ((uint32_t) statep->__value.__wch & 0xffff)	      \
+			   | (i >> (6 - shift));			      \
+									      \
+	    statep->__value.__wch = (i << shift) << 26;			      \
+	    shift += 26;						      \
+									      \
+	    assert (wc1 >= 0xd800 && wc1 < 0xdc00);			      \
+	    assert (wc2 >= 0xdc00 && wc2 < 0xe000);			      \
+	    put32 (outptr,						      \
+		   0x10000 + ((wc1 - 0xd800) << 10) + (wc2 - 0xdc00));	      \
+	    outptr += 4;						      \
+	  }								      \
+									      \
+	statep->__count = shift << 3;					      \
+									      \
+	/* Now that we digested the input increment the input pointer.  */    \
+	inptr++;							      \
+      }									      \
+  }
+#define LOOP_NEED_FLAGS
+#define EXTRA_LOOP_DECLS	, mbstate_t *statep
+#include <iconv/loop.c>
+
+
+/* Next, define the conversion from UCS4 to UTF-7.
+   The state is structured as follows:
+     __count bit 2..0: zero
+     __count bit 4..3: shift
+     __count bit 8..5: data
+   Precise meaning:
+     shift      data
+       0         0           not inside base64 encoding
+       1         0           inside base64, no pending bits
+       2       XX00          inside base64, 2 bits known for next byte
+       3       XXXX          inside base64, 4 bits known for next byte
+
+   __count bit 2..0 and __wch are always zero, because this direction
+   never returns __GCONV_INCOMPLETE_INPUT.
+*/
+#define MIN_NEEDED_INPUT	MIN_NEEDED_TO
+#define MAX_NEEDED_INPUT	MAX_NEEDED_TO
+#define MIN_NEEDED_OUTPUT	MIN_NEEDED_FROM
+#define MAX_NEEDED_OUTPUT	MAX_NEEDED_FROM
+#define LOOPFCT			TO_LOOP
+#define BODY \
+  {									      \
+    uint32_t ch = get32 (inptr);					      \
+									      \
+    if ((statep->__count & 0x18) == 0)					      \
+      {									      \
+	/* base64 encoding inactive */					      \
+	if (UTF7_ENCODE_OPTIONAL_CHARS ? isdirect (ch) : isxdirect (ch))      \
+	  {								      \
+	    *outptr++ = (unsigned char) ch;				      \
+	  }								      \
+	else								      \
+	  {								      \
+	    size_t count;						      \
+									      \
+	    if (ch == '+')						      \
+	      count = 2;						      \
+	    else if (ch < 0x10000)					      \
+	      count = 3;						      \
+	    else if (ch < 0x110000)					      \
+	      count = 6;						      \
+	    else							      \
+	      STANDARD_ERR_HANDLER (4);					      \
+									      \
+	    if (__builtin_expect (outptr + count > outend, 0))		      \
+	      {								      \
+		result = __GCONV_FULL_OUTPUT;				      \
+		break;							      \
+	      }								      \
+									      \
+	    *outptr++ = '+';						      \
+	    if (ch == '+')						      \
+	      *outptr++ = '-';						      \
+	    else if (ch < 0x10000)					      \
+	      {								      \
+		*outptr++ = base64 (ch >> 10);				      \
+		*outptr++ = base64 ((ch >> 4) & 0x3f);			      \
+		statep->__count = ((ch & 15) << 5) | (3 << 3);		      \
+	      }								      \
+	    else if (ch < 0x110000)					      \
+	      {								      \
+		uint32_t ch1 = 0xd800 + ((ch - 0x10000) >> 10);		      \
+		uint32_t ch2 = 0xdc00 + ((ch - 0x10000) & 0x3ff);	      \
+									      \
+		ch = (ch1 << 16) | ch2;					      \
+		*outptr++ = base64 (ch >> 26);				      \
+		*outptr++ = base64 ((ch >> 20) & 0x3f);			      \
+		*outptr++ = base64 ((ch >> 14) & 0x3f);			      \
+		*outptr++ = base64 ((ch >> 8) & 0x3f);			      \
+		*outptr++ = base64 ((ch >> 2) & 0x3f);			      \
+		statep->__count = ((ch & 3) << 7) | (2 << 3);		      \
+	      }								      \
+	    else							      \
+	      abort ();							      \
+	  }								      \
+      }									      \
+    else								      \
+      {									      \
+	/* base64 encoding active */					      \
+	if (UTF7_ENCODE_OPTIONAL_CHARS ? isdirect (ch) : isxdirect (ch))      \
+	  {								      \
+	    /* deactivate base64 encoding */				      \
+	    size_t count;						      \
+									      \
+	    count = ((statep->__count & 0x18) >= 0x10) + isxbase64 (ch) + 1;  \
+	    if (__builtin_expect (outptr + count > outend, 0))		      \
+	      {								      \
+		result = __GCONV_FULL_OUTPUT;				      \
+		break;							      \
+	      }								      \
+									      \
+	    if ((statep->__count & 0x18) >= 0x10)			      \
+	      *outptr++ = base64 ((statep->__count >> 3) & ~3);		      \
+	    if (isxbase64 (ch))						      \
+	      *outptr++ = '-';						      \
+	    *outptr++ = (unsigned char) ch;				      \
+	    statep->__count = 0;					      \
+	  }								      \
+	else								      \
+	  {								      \
+	    size_t count;						      \
+									      \
+	    if (ch < 0x10000)						      \
+	      count = ((statep->__count & 0x18) >= 0x10 ? 3 : 2);	      \
+	    else if (ch < 0x110000)					      \
+	      count = ((statep->__count & 0x18) >= 0x18 ? 6 : 5);	      \
+	    else							      \
+	      STANDARD_ERR_HANDLER (4);					      \
+									      \
+	    if (__builtin_expect (outptr + count > outend, 0))		      \
+	      {								      \
+		result = __GCONV_FULL_OUTPUT;				      \
+		break;							      \
+	      }								      \
+									      \
+	    if (ch < 0x10000)						      \
+	      {								      \
+		switch ((statep->__count >> 3) & 3)			      \
+		  {							      \
+		  case 1:						      \
+		    *outptr++ = base64 (ch >> 10);			      \
+		    *outptr++ = base64 ((ch >> 4) & 0x3f);		      \
+		    statep->__count = ((ch & 15) << 5) | (3 << 3);	      \
+		    break;						      \
+		  case 2:						      \
+		    *outptr++ =						      \
+		      base64 (((statep->__count >> 3) & ~3) | (ch >> 12));    \
+		    *outptr++ = base64 ((ch >> 6) & 0x3f);		      \
+		    *outptr++ = base64 (ch & 0x3f);			      \
+		    statep->__count = (1 << 3);				      \
+		    break;						      \
+		  case 3:						      \
+		    *outptr++ =						      \
+		      base64 (((statep->__count >> 3) & ~3) | (ch >> 14));    \
+		    *outptr++ = base64 ((ch >> 8) & 0x3f);		      \
+		    *outptr++ = base64 ((ch >> 2) & 0x3f);		      \
+		    statep->__count = ((ch & 3) << 7) | (2 << 3);	      \
+		    break;						      \
+		  default:						      \
+		    abort ();						      \
+		  }							      \
+	      }								      \
+	    else if (ch < 0x110000)					      \
+	      {								      \
+		uint32_t ch1 = 0xd800 + ((ch - 0x10000) >> 10);		      \
+		uint32_t ch2 = 0xdc00 + ((ch - 0x10000) & 0x3ff);	      \
+									      \
+		ch = (ch1 << 16) | ch2;					      \
+		switch ((statep->__count >> 3) & 3)			      \
+		  {							      \
+		  case 1:						      \
+		    *outptr++ = base64 (ch >> 26);			      \
+		    *outptr++ = base64 ((ch >> 20) & 0x3f);		      \
+		    *outptr++ = base64 ((ch >> 14) & 0x3f);		      \
+		    *outptr++ = base64 ((ch >> 8) & 0x3f);		      \
+		    *outptr++ = base64 ((ch >> 2) & 0x3f);		      \
+		    statep->__count = ((ch & 3) << 7) | (2 << 3);	      \
+		    break;						      \
+		  case 2:						      \
+		    *outptr++ =						      \
+		      base64 (((statep->__count >> 3) & ~3) | (ch >> 28));    \
+		    *outptr++ = base64 ((ch >> 22) & 0x3f);		      \
+		    *outptr++ = base64 ((ch >> 16) & 0x3f);		      \
+		    *outptr++ = base64 ((ch >> 10) & 0x3f);		      \
+		    *outptr++ = base64 ((ch >> 4) & 0x3f);		      \
+		    statep->__count = ((ch & 15) << 5) | (3 << 3);	      \
+		    break;						      \
+		  case 3:						      \
+		    *outptr++ =						      \
+		      base64 (((statep->__count >> 3) & ~3) | (ch >> 30));    \
+		    *outptr++ = base64 ((ch >> 24) & 0x3f);		      \
+		    *outptr++ = base64 ((ch >> 18) & 0x3f);		      \
+		    *outptr++ = base64 ((ch >> 12) & 0x3f);		      \
+		    *outptr++ = base64 ((ch >> 6) & 0x3f);		      \
+		    *outptr++ = base64 (ch & 0x3f);			      \
+		    statep->__count = (1 << 3);				      \
+		    break;						      \
+		  default:						      \
+		    abort ();						      \
+		  }							      \
+	      }								      \
+	    else							      \
+	      abort ();							      \
+	  }								      \
+      }									      \
+									      \
+    /* Now that we wrote the output increment the input pointer.  */	      \
+    inptr += 4;								      \
+  }
+#define LOOP_NEED_FLAGS
+#define EXTRA_LOOP_DECLS	, mbstate_t *statep
+#include <iconv/loop.c>
+
+
+/* Since this is a stateful encoding we have to provide code which resets
+   the output state to the initial state.  This has to be done during the
+   flushing.  */
+#define EMIT_SHIFT_TO_INIT \
+  if (FROM_DIRECTION)							      \
+    /* Nothing to emit.  */						      \
+    memset (data->__statep, '\0', sizeof (mbstate_t));			      \
+  else									      \
+    {									      \
+      /* The "to UTF-7" direction.  Flush the remaining bits and terminate    \
+	 with a '-' byte.  This will guarantee correct decoding if more	      \
+	 UTF-7 encoded text is added afterwards.  */			      \
+      int state = data->__statep->__count;				      \
+									      \
+      if (state & 0x18)							      \
+	{								      \
+	  /* Deactivate base64 encoding.  */				      \
+	  unsigned char *outbuf = data->__outbuf;			      \
+	  size_t count = ((state & 0x18) >= 0x10) + 1;			      \
+									      \
+	  if (__builtin_expect (outbuf + count > data->__outbufend, 0))	      \
+	    /* We don't have enough room in the output buffer.  */	      \
+	    status = __GCONV_FULL_OUTPUT;				      \
+	  else								      \
+	    {								      \
+	      /* Write out the shift sequence.  */			      \
+	      if ((state & 0x18) >= 0x10)				      \
+		*outbuf++ = base64 ((state >> 3) & ~3);			      \
+	      *outbuf++ = '-';						      \
+									      \
+	      data->__outbuf = outbuf;					      \
+	      data->__statep->__count = 0;				      \
+	    }								      \
+	}								      \
+      else								      \
+	data->__statep->__count = 0;					      \
+    }
+
+
+/* Now define the toplevel functions.  */
+#include <iconv/skeleton.c>
--- a/locale/programs/ld-collate.c
+++ b/locale/programs/ld-collate.c
@ -65,7 +65,9 @@ struct element_t;
 /* Data type for list of strings.  */
 struct section_list
 {
+  /* Successor in the known_sections list.  */
  struct section_list *def_next;
+  /* Successor in the sections list.  */
  struct section_list *next;
  /* Name of the section.  */
  const char *name;
@ -291,6 +293,7 @@ make_seclist_elem (struct locale_collate_t *collate, const char *string,
  newp->next = next;
  newp->name = string;
  newp->first = NULL;
+  newp->last = NULL;

  return newp;
 }
@ -336,6 +339,10 @@ new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
  newp->used_in_level = 0;
  newp->is_character = is_character;

+  /* Will be assigned later.  XXX  */
+  newp->mbseqorder = 0;
+  newp->wcseqorder = 0;
+
  /* Will be allocated later.  */
  newp->weights = NULL;

@ -350,6 +357,9 @@ new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
  newp->mbnext = NULL;
  newp->mblast = NULL;

+  newp->wcnext = NULL;
+  newp->wclast = NULL;
+
  return newp;
 }

@ -619,7 +629,6 @@ find_element (struct linereader *ldfile, struct locale_collate_t *collate,
 	  /* It's also no collation element.  So it is a character
 	     element defined later.  */
 	  result = new_element (collate, NULL, 0, NULL, str, len, 1);
-	  if (result != NULL)
 	  /* Insert it into the sequence table.  */
 	  insert_entry (&collate->seq_table, str, len, result);
 	}
@ -660,11 +669,11 @@ insert_weights (struct linereader *ldfile, struct element_t *elem,
  /* Initialize all the fields.  */
  elem->file = ldfile->fname;
  elem->line = ldfile->lineno;
+
  elem->last = collate->cursor;
  elem->next = collate->cursor ? collate->cursor->next : NULL;
  if (collate->cursor != NULL && collate->cursor->next != NULL)
    collate->cursor->next->last = elem;
-  elem->section = collate->current_section;
  if (collate->cursor != NULL)
    collate->cursor->next = elem;
  if (collate->start == NULL)
@ -672,9 +681,8 @@ insert_weights (struct linereader *ldfile, struct element_t *elem,
      assert (collate->cursor == NULL);
      collate->start = elem;
    }
-  elem->weights = (struct element_list_t *)
-    obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
-  memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
+
+  elem->section = collate->current_section;

  if (collate->current_section->first == NULL)
    collate->current_section->first = elem;
@ -683,6 +691,10 @@ insert_weights (struct linereader *ldfile, struct element_t *elem,

  collate->cursor = elem;

+  elem->weights = (struct element_list_t *)
+    obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
+  memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
+
  weight_cnt = 0;

  arg = lr_token (ldfile, charmap, repertoire);
@ -839,8 +851,8 @@ insert_weights (struct linereader *ldfile, struct element_t *elem,
 %s: weights must use the same ellipsis symbol as the name"),
 		      "LC_COLLATE");

-	  /* The weight for this level has to be ignored.  We use the
-	     null pointer to indicate this.  */
+	  /* The weight for this level will depend on the element
+	     iterating over the range.  Put a placeholder.  */
 	  elem->weights[weight_cnt].w = (struct element_t **)
 	    obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 	  elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
@ -988,8 +1000,7 @@ insert_value (struct linereader *ldfile, const char *symstr, size_t symlen,
    }

  /* Test whether this element is not already in the list.  */
-  if (elem->next != NULL || (collate->cursor != NULL
-			     && elem->next == collate->cursor))
+  if (elem->next != NULL || elem == collate->cursor)
    {
      lr_error (ldfile, _("order for `%.*s' already defined at %s:%Zu"),
 		(int) symlen, symstr, elem->file, elem->line);
@ -1434,6 +1445,7 @@ collate_startup (struct linereader *ldfile, struct localedef_t *locale,
 	  collate->col_weight_max = -1;
 	}
      else
+	/* Reuse the copy_locale's data structures.  */
 	collate = locale->categories[LC_COLLATE].collate =
 	  copy_locale->categories[LC_COLLATE].collate;
    }
@ -1788,9 +1800,9 @@ collate_finish (struct localedef_t *locale, struct charmap_t *charmap)
      while (sect != NULL && sect->rules == NULL);
    }
  while (sect != NULL);
-  /* We are currently not prepared for more than 256 rulesets.  But this
+  /* We are currently not prepared for more than 128 rulesets.  But this
     should never really be a problem.  */
-  assert (ruleidx <= 256);
+  assert (ruleidx <= 128);
 }


@ -2529,9 +2541,18 @@ collate_read (struct linereader *ldfile, struct localedef_t *result,
  struct token *now;
  struct token *arg = NULL;
  enum token_t nowtok;
-  int state = 0;
  enum token_t was_ellipsis = tok_none;
  struct localedef_t *copy_locale = NULL;
+  /* Parsing state:
+     0 - start
+     1 - between `order-start' and `order-end'
+     2 - after `order-end'
+     3 - after `reorder-after', waiting for `reorder-end'
+     4 - after `reorder-end'
+     5 - after `reorder-sections-after', waiting for `reorder-sections-end'
+     6 - after `reorder-sections-end'
+  */
+  int state = 0;

  /* Get the repertoire we have to use.  */
  if (repertoire_name != NULL)
@ -2828,9 +2849,10 @@ collate_read (struct linereader *ldfile, struct localedef_t *result,
 		    }
 		  else if (ellipsis == tok_none)
 		    {
-		      /* The name is already defined.  */
+		      /* A single symbol, no ellipsis.  */
 		      if (check_duplicate (ldfile, collate, charmap,
 					   repertoire, symbol, symbol_len))
+			/* The name is already defined.  */
 			goto col_sym_free;

 		      insert_entry (&collate->sym_table, symbol, symbol_len,
@ -2884,13 +2906,13 @@ collate_read (struct linereader *ldfile, struct localedef_t *result,
 			  /* Create the name.  */
 			  sprintf (symbuf,
 				   ellipsis == tok_ellipsis2
-				   ? "%.*s%.*lX" : "%.*s%.*lX",
+				   ? "%.*s%.*lX" : "%.*s%.*lu",
 				   (int) prefixlen, symbol,
 				   (int) (symbol_len - prefixlen), from);

-			  /* The name is already defined.  */
 			  if (check_duplicate (ldfile, collate, charmap,
 					       repertoire, symbuf, symbol_len))
+			    /* The name is already defined.  */
 			    goto col_sym_free;

 			  insert_entry (&collate->sym_table, symbuf,
@ -3021,8 +3043,8 @@ error while adding equivalent collating symbol"));
 		}

 	      runp = (struct section_list *) xcalloc (1, sizeof (*runp));
-	      name = strncpy (xmalloc (arg->val.str.lenmb + 1),
-			      arg->val.str.startmb, arg->val.str.lenmb);
+	      name = (char *) xmalloc (arg->val.str.lenmb + 1);
+	      memcpy (name, arg->val.str.startmb, arg->val.str.lenmb);
 	      name[arg->val.str.lenmb] = '\0';
 	      runp->name = name;

@ -3070,6 +3092,8 @@ error while adding equivalent collating symbol"));

 		  if (collate->error_section.first == NULL)
 		    {
+		      /* Insert &collate->error_section at the end of
+			 the collate->sections list.  */
 		      if (collate->sections == NULL)
 			collate->sections = &collate->error_section;
 		      else
@ -3078,9 +3102,9 @@ error while adding equivalent collating symbol"));
 			  while (sp->next != NULL)
 			    sp = sp->next;

-			  collate->error_section.next = NULL;
 			  sp->next = &collate->error_section;
 			}
+		      collate->error_section.next = NULL;
 		    }
 		}
 	      else
@ -3093,6 +3117,8 @@ error while adding equivalent collating symbol"));
 			      "LC_COLLATE", sp->name);
 		  else
 		    {
+		      /* Insert sp in the collate->sections list,
+			 right after collate->current_section.  */
 		      if (collate->current_section == NULL)
 			collate->current_section = sp;
 		      else
@ -3141,6 +3167,8 @@ error while adding equivalent collating symbol"));
 			  "LC_COLLATE");
 	      else
 		{
+		  /* Insert &collate->unnamed_section at the beginning of
+		     the collate->sections list.  */
 		  collate->unnamed_section.next = collate->sections;
 		  collate->sections = &collate->unnamed_section;
 		}
@ -3149,7 +3177,7 @@ error while adding equivalent collating symbol"));
 	  /* Now read the direction names.  */
 	  read_directions (ldfile, arg, charmap, repertoire, collate);

-	  /* From now be need the strings untranslated.  */
+	  /* From now we need the strings untranslated.  */
 	  ldfile->translate_strings = 0;
 	  break;

@ -3428,7 +3456,7 @@ error while adding equivalent collating symbol"));
 	      /* We are outside an `order_start' region.  This means
                 we must only accept definitions of values for
                 collation symbols since these are purely abstract
-                 values and don't need dorections associated.  */
+                 values and don't need directions associated.  */
 	      struct element_t *seqp;

 	      if (find_entry (&collate->seq_table, symstr, symlen,
@ -3510,7 +3538,7 @@ error while adding equivalent collating symbol"));
 		  if (seqp->section->first == seqp)
 		    {
 		      if (seqp->section->first == seqp->section->last)
-			/* This setion has no content anymore.  */
+			/* This section has no content anymore.  */
 			seqp->section->first = seqp->section->last = NULL;
 		      else
 			seqp->section->first = seqp->next;
@ -3616,8 +3644,7 @@ error while adding equivalent collating symbol"));

 	  /* See whether UNDEFINED already appeared somewhere.  */
 	  if (collate->undefined.next != NULL
-	      || (collate->cursor != NULL
-		  && collate->undefined.next == collate->cursor))
+	      || &collate->undefined == collate->cursor)
 	    {
 	      lr_error (ldfile,
 			_("%s: order for `%.*s' already defined at %s:%Zu"),
@ -3632,9 +3659,9 @@ error while adding equivalent collating symbol"));
 			     repertoire, collate, tok_none);
 	  break;

-	case tok_ellipsis2:
-	case tok_ellipsis3:
-	case tok_ellipsis4:
+	case tok_ellipsis2: /* symbolic hexadecimal ellipsis */
+	case tok_ellipsis3: /* absolute ellipsis */
+	case tok_ellipsis4: /* symbolic decimal ellipsis */
 	  /* This is the symbolic (decimal or hexadecimal) or absolute
             ellipsis.  */
 	  if (was_ellipsis != tok_none)