ICU-206 Added UTF-32 converter

X-SVN-Rev: 2917
2000-11-16 17:20:03 +00:00 · 2000-11-16 17:20:03 +00:00 · 516103b627
commit 516103b627
parent 918ec01345
7 changed files with 578 additions and 29 deletions
--- a/icu4c/data/convrtrs.txt
+++ b/icu4c/data/convrtrs.txt
@ -63,11 +63,15 @@
 # be changed - or else code and/or file names must also be changed.

 # Algorithmic
-UTF8			 utf-8 {	 MIME } ibm-1208 cp1208
-UTF16_BigEndian		 utf-16be {	MIME }
-UTF16_LittleEndian { MIME }	 utf-16le {	MIME }
-UTF16_PlatformEndian { MIME }	 ISO-10646-UCS-2 { IANA	} csUnicode utf-16 { MIME } ibm-1200 cp1200 ucs-2
+UTF8			 utf-8 { MIME } ibm-1208 cp1208
+UTF16_BigEndian		 utf-16be { MIME }
+UTF16_LittleEndian	 utf-16le { MIME }
+UTF16_PlatformEndian	 ISO-10646-UCS-2 { IANA } csUnicode utf-16 { MIME } ibm-1200 cp1200 ucs-2
 UTF16_OppositeEndian
+UTF32_BigEndian		 utf-32be { MIME }
+UTF32_LittleEndian	 utf-32le { MIME }
+UTF32_PlatformEndian	 ISO-10646-UCS-4 { IANA	} csUCS4 utf-32 { MIME } ucs-4
+UTF32_OppositeEndian
 LATIN_1			 iso-8859-1	{ MIME } ibm-819 cp819 latin1 8859-1 csisolatin1 iso-ir-100 cp367 ISO_8859-1:1987 { IANA } l1 ANSI_X3.110-1983   #!!!!! There's whole lot of names for this
 ISO_2022			 iso-2022 {	MIME	 } 2022 cp2022
 ISO_2022,locale=ja,version=0	 ISO_2022_JP, ISO-2022-JP, csISO2022JP, iso-2022-jp { MIME }
--- a/icu4c/source/common/ucnv_bld.c
+++ b/icu4c/source/common/ucnv_bld.c
@ -47,8 +47,8 @@ extern void UCNV_DEBUG_LOG(char *what, char *who, void *p, int l);
 static const UConverterSharedData *
 converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={
    &_SBCSData, &_DBCSData, &_MBCSData, &_Latin1Data,
-    &_UTF8Data, &_UTF16BEData, &_UTF16LEData, &_EBCDICStatefulData,
-    &_ISO2022Data, 
+    &_UTF8Data, &_UTF16BEData, &_UTF16LEData, &_UTF32BEData, &_UTF32LEData,
+    &_EBCDICStatefulData, &_ISO2022Data, 
    &_LMBCSData1,&_LMBCSData2, &_LMBCSData3, &_LMBCSData4, &_LMBCSData5, &_LMBCSData6,
    &_LMBCSData8,&_LMBCSData11,&_LMBCSData16,&_LMBCSData17,&_LMBCSData18,&_LMBCSData19,
    &_HZData,
@ -68,6 +68,15 @@ static struct {
 #else
  { "UTF16_PlatformEndian", UCNV_UTF16_LittleEndian },
  { "UTF16_OppositeEndian", UCNV_UTF16_BigEndian},
+#endif
+  { "UTF32_BigEndian", UCNV_UTF32_BigEndian },
+  { "UTF32_LittleEndian", UCNV_UTF32_LittleEndian },
+#if U_IS_BIG_ENDIAN
+  { "UTF32_PlatformEndian", UCNV_UTF32_BigEndian },
+  { "UTF32_OppositeEndian", UCNV_UTF32_LittleEndian },
+#else
+  { "UTF32_PlatformEndian", UCNV_UTF32_LittleEndian },
+  { "UTF32_OppositeEndian", UCNV_UTF32_BigEndian},
 #endif
  { "ISO_2022", UCNV_ISO_2022 },
  { "LMBCS-1", UCNV_LMBCS_1 },
--- a/icu4c/source/common/ucnv_cnv.h
+++ b/icu4c/source/common/ucnv_cnv.h
@ -205,8 +205,8 @@ struct UConverterImpl {

 extern const UConverterSharedData
    _SBCSData, _DBCSData, _MBCSData, _Latin1Data,
-    _UTF8Data, _UTF16BEData, _UTF16LEData, _EBCDICStatefulData,
-    _ISO2022Data, 
+    _UTF8Data, _UTF16BEData, _UTF16LEData, _UTF32BEData, _UTF32LEData,
+    _EBCDICStatefulData, _ISO2022Data, 
    _LMBCSData1,_LMBCSData2, _LMBCSData3, _LMBCSData4, _LMBCSData5, _LMBCSData6,
    _LMBCSData8,_LMBCSData11,_LMBCSData16,_LMBCSData17,_LMBCSData18,_LMBCSData19,_HZData;

--- a/icu4c/source/common/ucnv_utf.c
+++ b/icu4c/source/common/ucnv_utf.c
@ -16,6 +16,7 @@
 *   06/29/2000  helena      Major rewrite of the callback APIs.
 *   07/20/2000  george      Change the coding style to conform to the coding guidelines,
 *                           and a few miscellaneous bug fixes.
+*   11/15/2000  george      Added UTF-32
 */

 #include "cmemory.h"
@ -34,7 +35,7 @@
 */
 /*static const uint32_t REPLACEMENT_CHARACTER = 0x0000FFFD;*/
 static const uint32_t MAXIMUM_UCS2 = 0x0000FFFF;
-static const uint32_t MAXIMUM_UTF16 = 0x0010FFFF;
+static const uint32_t MAXIMUM_UTF = 0x0010FFFF;
 static const uint32_t MAXIMUM_UCS4 = 0x7FFFFFFF;
 static const int8_t HALF_SHIFT = 10;
 static const uint32_t HALF_BASE = 0x0010000;
@ -73,7 +74,8 @@ static const int8_t bytesFromUTF8[256] = {
 *
 * @returns true when callback fails
 */
-UBool T_UConverter_toUnicode_InvalidChar_Callback(UConverterToUnicodeArgs * args,
+static UBool
+T_UConverter_toUnicode_InvalidChar_Callback(UConverterToUnicodeArgs * args,
                                                  UErrorCode *err)
 {
    UConverter *converter = args->converter;
@ -99,7 +101,8 @@ UBool T_UConverter_toUnicode_InvalidChar_Callback(UConverterToUnicodeArgs * args
    return (UBool)U_FAILURE(*err);
 }

-UBool T_UConverter_toUnicode_InvalidChar_OffsetCallback(UConverterToUnicodeArgs * args,
+static UBool
+T_UConverter_toUnicode_InvalidChar_OffsetCallback(UConverterToUnicodeArgs * args,
                                                        int32_t currentOffset,
                                                        UErrorCode *err)
 {
@ -195,7 +198,7 @@ morebytes:
            /* Remove the acummulated high bits */
            ch -= offsetsFromUTF8[inBytes];

-            if (i == inBytes && ch <= MAXIMUM_UTF16)
+            if (i == inBytes && ch <= MAXIMUM_UTF)
            {
                /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
                if (ch <= MAXIMUM_UCS2) 
@ -345,7 +348,7 @@ morebytes:
            /* Remove the acummulated high bits */
            ch -= offsetsFromUTF8[inBytes];

-            if (i == inBytes && ch <= MAXIMUM_UTF16)
+            if (i == inBytes && ch <= MAXIMUM_UTF)
            {
                /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
                if (ch <= MAXIMUM_UCS2) 
@ -1030,7 +1033,7 @@ U_CFUNC void  T_UConverter_toUnicode_UTF16_LE (UConverterToUnicodeArgs * args,
    args->source += mySourceIndex;
 }

-U_CFUNC void   T_UConverter_fromUnicode_UTF16_LE (UConverterFromUnicodeArgs * args,
+U_CFUNC void T_UConverter_fromUnicode_UTF16_LE (UConverterFromUnicodeArgs * args,
                                          UErrorCode * err)
 {
    const UChar *mySource = args->source;
@ -1157,3 +1160,470 @@ const UConverterSharedData _UTF16LEData={
    NULL, NULL, &_UTF16LEStaticData, FALSE, &_UTF16LEImpl, 
    0
 };
+
+/* UTF-32BE ----------------------------------------------------------------- */
+
+void T_UConverter_toUnicode_UTF32_BE(UConverterToUnicodeArgs * args,
+                                     UErrorCode * err)
+{
+    const unsigned char *mySource = (unsigned char *) args->source;
+    UChar *myTarget = args->target;
+    const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
+    const UChar *targetLimit = args->targetLimit;
+    unsigned char *toUBytes = args->converter->toUBytes;
+    uint32_t ch, i;
+
+    /* UTF-8 returns here for only non-offset, this needs to change.*/
+    if (args->converter->toUnicodeStatus && myTarget < targetLimit)
+    {
+        i = args->converter->toULength;       /* restore # of bytes consumed */
+
+        ch = args->converter->toUnicodeStatus - 1;/*Stores the previously calculated ch from a previous call*/
+        args->converter->toUnicodeStatus = 0;
+        goto morebytes;
+    }
+
+    while (mySource < sourceLimit && myTarget < targetLimit)
+    {
+        i = 0;
+        ch = 0;
+morebytes:
+        while (i < sizeof(uint32_t))
+        {
+            if (mySource < sourceLimit)
+            {
+                ch = (ch << 8) | (uint8_t)(*mySource);
+                toUBytes[i++] = (char) *(mySource++);
+            }
+            else
+            {
+                if (args->flush)
+                {
+                    if (U_SUCCESS(*err))
+                    {
+                        *err = U_TRUNCATED_CHAR_FOUND;
+                        args->converter->toUnicodeStatus = MAXIMUM_UCS4;
+                    }
+                }
+                else
+                {   /* stores a partially calculated target*/
+                    /* + 1 to make 0 a valid character */
+                    args->converter->toUnicodeStatus = ch + 1;
+                    args->converter->toULength = (int8_t) i;
+                }
+                goto donefornow;
+            }
+        }
+
+        if (ch <= MAXIMUM_UTF)
+        {
+            /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
+            if (ch <= MAXIMUM_UCS2) 
+            {
+                /* fits in 16 bits */
+                *(myTarget++) = (UChar) ch;
+            }
+            else
+            {
+                /* write out the surrogates */
+                ch -= HALF_BASE;
+                *(myTarget++) = (UChar) ((ch >> HALF_SHIFT) + SURROGATE_HIGH_START);
+                ch = (ch & HALF_MASK) + SURROGATE_LOW_START;
+                if (myTarget < targetLimit)
+                {
+                    *(myTarget++) = (UChar)ch;
+                }
+                else
+                {
+                    /* Put in overflow buffer (not handled here) */
+                    args->converter->UCharErrorBuffer[0] = (UChar) ch;
+                    args->converter->UCharErrorBufferLength = 1;
+                    *err = U_BUFFER_OVERFLOW_ERROR;
+                    break;
+                }
+            }
+        }
+        else
+        {
+            args->source = (const char *) mySource;
+            args->target = myTarget;
+            args->converter->invalidCharLength = (int8_t)i;
+            if (T_UConverter_toUnicode_InvalidChar_Callback(args, err))
+            {
+                /* Stop if the error wasn't handled */
+                break;
+            }
+            args->converter->invalidCharLength = 0;
+            mySource = (unsigned char *) args->source;
+            myTarget = args->target;
+        }
+    }
+
+donefornow:
+    if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
+    {
+        /* End of target buffer */
+        *err = U_BUFFER_OVERFLOW_ERROR;
+    }
+
+    args->target = myTarget;
+    args->source = (const char *) mySource;
+}
+
+void T_UConverter_fromUnicode_UTF32_BE(UConverterFromUnicodeArgs * args,
+                                       UErrorCode * err)
+{
+    const UChar *mySource = args->source;
+    unsigned char *myTarget = (unsigned char *) args->target;
+    const UChar *sourceLimit = args->sourceLimit;
+    const unsigned char *targetLimit = (unsigned char *) args->targetLimit;
+    UChar32 ch, ch2;
+    unsigned int indexToWrite;
+    unsigned char temp[sizeof(uint32_t)];
+
+    temp[0] = 0;
+
+    if (args->converter->fromUnicodeStatus)
+    {
+        ch = args->converter->fromUnicodeStatus;
+        args->converter->fromUnicodeStatus = 0;
+        goto lowsurogate;
+    }
+
+    while (mySource < sourceLimit && myTarget < targetLimit)
+    {
+        ch = *(mySource++);
+
+        if (SURROGATE_HIGH_START <= ch && ch < SURROGATE_LOW_START)
+        {
+lowsurogate:
+            if (mySource < sourceLimit)
+            {
+                ch2 = *mySource;
+                if (SURROGATE_LOW_START <= ch2 && ch2 <= SURROGATE_LOW_END)
+                {
+                    ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE;
+                    mySource++;
+                }
+            }
+            else if (!args->flush)
+            {
+                // ran out of source
+                args->converter->fromUnicodeStatus = ch;
+                break;
+            }
+        }
+
+        /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
+        /* Todo: Can the & part be left off implicitly? Does it really save time? */
+        temp[1] = (uint8_t) (ch >> 16 & 0x1F);
+        temp[2] = (uint8_t) (ch >> 8 & 0xFF);
+        temp[3] = (uint8_t) (ch & 0xFF);
+
+        for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++)
+        {
+            if (myTarget < targetLimit)
+            {
+                *(myTarget++) = temp[indexToWrite];
+            }
+            else
+            {
+                args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite];
+                *err = U_BUFFER_OVERFLOW_ERROR; /* Todo: is this needed because of ending if */
+            }
+        }
+    }
+
+    if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
+    {
+        *err = U_BUFFER_OVERFLOW_ERROR;
+    }
+
+    args->target = (char *) myTarget;
+    args->source = mySource;
+}
+
+/*
+UChar32 T_UConverter_getNextUChar_UTF32_BE(UConverterToUnicodeArgs* args,
+                                                   UErrorCode* err)
+{
+    *err = U_UNSUPPORTED_ERROR;
+    return 0;
+}
+*/
+static const UConverterImpl _UTF32BEImpl = {
+    UCNV_UTF32_BigEndian,
+
+    NULL,
+    NULL,
+
+    NULL,
+    NULL,
+    NULL,
+
+    T_UConverter_toUnicode_UTF32_BE,
+    NULL,
+//    T_UConverter_toUnicode_UTF32_BE_OFFSETS_LOGIC,
+    T_UConverter_fromUnicode_UTF32_BE,
+    NULL,
+//    T_UConverter_fromUnicode_UTF32_BE_OFFSETS_LOGIC,
+    NULL,
+//    T_UConverter_getNextUChar_UTF32_BE,
+
+    NULL
+};
+
+/** Todo: These numbers are probably in correct. */
+const UConverterStaticData _UTF32BEStaticData = {
+  sizeof(UConverterStaticData),
+"UTF32_BigEndian",
+    1200, UCNV_IBM, UCNV_UTF32_BigEndian, 4, 4,
+    { 0, 0, 0xff, 0xfd }, 4, FALSE, FALSE,
+    {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
+};
+
+
+const UConverterSharedData _UTF32BEData = {
+    sizeof(UConverterSharedData), ~((uint32_t) 0),
+    NULL, NULL, &_UTF32BEStaticData, FALSE, &_UTF32BEImpl, 
+    0
+};
+
+/* UTF-32LE ---------------------------------------------------------- */
+
+void T_UConverter_toUnicode_UTF32_LE(UConverterToUnicodeArgs * args,
+                                      UErrorCode * err)
+{
+    const unsigned char *mySource = (unsigned char *) args->source;
+    UChar *myTarget = args->target;
+    const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
+    const UChar *targetLimit = args->targetLimit;
+    unsigned char *toUBytes = args->converter->toUBytes;
+    uint32_t ch, i;
+
+    /* UTF-8 returns here for only non-offset, this needs to change.*/
+    if (args->converter->toUnicodeStatus && myTarget < targetLimit)
+    {
+        i = args->converter->toULength;       /* restore # of bytes consumed */
+
+        ch = args->converter->toUnicodeStatus;/*Stores the previously calculated ch from a previous call*/
+        if (ch == -1)
+            ch = 0;
+        args->converter->toUnicodeStatus = 0;
+        goto morebytes;
+    }
+
+    while (mySource < sourceLimit && myTarget < targetLimit)
+    {
+        i = 0;
+        ch = 0;
+morebytes:
+        while (i < sizeof(uint32_t))
+        {
+            if (mySource < sourceLimit)
+            {
+                ch |= ((uint8_t)(*mySource)) << (i * 8);
+                toUBytes[i++] = (char) *(mySource++);
+            }
+            else
+            {
+                if (args->flush)
+                {
+                    if (U_SUCCESS(*err))
+                    {
+                        *err = U_TRUNCATED_CHAR_FOUND;
+                        args->converter->toUnicodeStatus = 0;
+                    }
+                }
+                else
+                {    /* stores a partially calculated target*/
+                    if (ch == 0)
+                    {
+                        args->converter->toUnicodeStatus = -1;
+                    }
+                    else 
+                    {
+                        args->converter->toUnicodeStatus = ch;
+                    }
+                    args->converter->toULength = (int8_t) i;
+                }
+                goto donefornow;
+            }
+        }
+
+        if (ch <= MAXIMUM_UTF)
+        {
+            /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
+            if (ch <= MAXIMUM_UCS2) 
+            {
+                /* fits in 16 bits */
+                *(myTarget++) = (UChar) ch;
+            }
+            else
+            {
+                /* write out the surrogates */
+                ch -= HALF_BASE;
+                *(myTarget++) = (UChar) ((ch >> HALF_SHIFT) + SURROGATE_HIGH_START);
+                ch = (ch & HALF_MASK) + SURROGATE_LOW_START;
+                if (myTarget < targetLimit)
+                {
+                    *(myTarget++) = (UChar)ch;
+                }
+                else
+                {
+                    /* Put in overflow buffer (not handled here) */
+                    args->converter->UCharErrorBuffer[0] = (UChar) ch;
+                    args->converter->UCharErrorBufferLength = 1;
+                    *err = U_BUFFER_OVERFLOW_ERROR;
+                    break;
+                }
+            }
+        }
+        else
+        {
+            args->source = (const char *) mySource;
+            args->target = myTarget;
+            args->converter->invalidCharLength = (int8_t)i;
+            if (T_UConverter_toUnicode_InvalidChar_Callback(args, err))
+            {
+                /* Stop if the error wasn't handled */
+                break;
+            }
+            args->converter->invalidCharLength = 0;
+            mySource = (unsigned char *) args->source;
+            myTarget = args->target;
+        }
+    }
+
+donefornow:
+    if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
+    {
+        /* End of target buffer */
+        *err = U_BUFFER_OVERFLOW_ERROR;
+    }
+
+    args->target = myTarget;
+    args->source = (const char *) mySource;
+//    *err = U_UNSUPPORTED_ERROR;
+}
+
+void  T_UConverter_fromUnicode_UTF32_LE(UConverterFromUnicodeArgs * args,
+                                         UErrorCode * err)
+{
+    const UChar *mySource = args->source;
+    unsigned char *myTarget = (unsigned char *) args->target;
+    const UChar *sourceLimit = args->sourceLimit;
+    const unsigned char *targetLimit = (unsigned char *) args->targetLimit;
+    UChar32 ch, ch2;
+    unsigned int indexToWrite;
+    unsigned char temp[sizeof(uint32_t)];
+
+    temp[3] = 0;
+
+    if (args->converter->fromUnicodeStatus)
+    {
+        ch = args->converter->fromUnicodeStatus;
+        args->converter->fromUnicodeStatus = 0;
+        goto lowsurogate;
+    }
+
+    while (mySource < sourceLimit && myTarget < targetLimit)
+    {
+        ch = *(mySource++);
+
+        if (SURROGATE_HIGH_START <= ch && ch < SURROGATE_LOW_START)
+        {
+lowsurogate:
+            if (mySource < sourceLimit)
+            {
+                ch2 = *mySource;
+                if (SURROGATE_LOW_START <= ch2 && ch2 <= SURROGATE_LOW_END)
+                {
+                    ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE;
+                    mySource++;
+                }
+            }
+            else if (!args->flush)
+            {
+                // ran out of source
+                args->converter->fromUnicodeStatus = ch;
+                break;
+            }
+        }
+
+        /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
+        /* Todo: Can the & part be left off implicitly? Does it really save time? */
+        temp[2] = (uint8_t) (ch >> 16 & 0x1F);
+        temp[1] = (uint8_t) (ch >> 8 & 0xFF);
+        temp[0] = (uint8_t) (ch & 0xFF);
+
+        for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++)
+        {
+            if (myTarget < targetLimit)
+            {
+                *(myTarget++) = temp[indexToWrite];
+            }
+            else
+            {
+                args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite];
+                *err = U_BUFFER_OVERFLOW_ERROR; /* Todo: is this needed because of ending if */
+            }
+        }
+    }
+
+    if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
+    {
+        *err = U_BUFFER_OVERFLOW_ERROR;
+    }
+
+    args->target = (char *) myTarget;
+    args->source = mySource;
+//    *err = U_UNSUPPORTED_ERROR;
+}
+
+/*
+UChar32 T_UConverter_getNextUChar_UTF32_LE(UConverterToUnicodeArgs* args,
+                                                   UErrorCode* err)
+{
+    *err = U_UNSUPPORTED_ERROR;
+    return 0;
+}
+*/
+
+static const UConverterImpl _UTF32LEImpl = {
+    UCNV_UTF32_LittleEndian,
+
+    NULL,
+    NULL,
+
+    NULL,
+    NULL,
+    NULL,
+
+    T_UConverter_toUnicode_UTF32_LE,
+    NULL,
+//    T_UConverter_toUnicode_UTF32_LE_OFFSETS_LOGIC,
+    T_UConverter_fromUnicode_UTF32_LE,
+    NULL,
+//    T_UConverter_fromUnicode_UTF32_LE_OFFSETS_LOGIC,
+    NULL,
+//    T_UConverter_getNextUChar_UTF32_LE,
+
+    NULL
+};
+
+/** Todo: These numbers are probably in correct. */
+const UConverterStaticData _UTF32LEStaticData = {
+  sizeof(UConverterStaticData),
+"UTF32_LittleEndian",
+    1200, UCNV_IBM, UCNV_UTF32_BigEndian, 4, 4,
+    { 0xfd, 0xff, 0, 0 }, 4, FALSE, FALSE,
+    {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
+};
+
+
+const UConverterSharedData _UTF32LEData = {
+    sizeof(UConverterSharedData), ~((uint32_t) 0),
+    NULL, NULL, &_UTF32LEStaticData, FALSE, &_UTF32LEImpl, 
+    0
+};
--- a/icu4c/source/common/unicode/ucnv.h
+++ b/icu4c/source/common/unicode/ucnv.h
@ -50,10 +50,12 @@ typedef enum {
    UCNV_UTF8 = 4,
    UCNV_UTF16_BigEndian = 5,
    UCNV_UTF16_LittleEndian = 6,
-    UCNV_EBCDIC_STATEFUL = 7,
-    UCNV_ISO_2022 = 8,
+    UCNV_UTF32_BigEndian = 7,
+    UCNV_UTF32_LittleEndian = 8,
+    UCNV_EBCDIC_STATEFUL = 9,
+    UCNV_ISO_2022 = 10,

-    UCNV_LMBCS_1 = 9,
+    UCNV_LMBCS_1 = 11,
    UCNV_LMBCS_2, 
    UCNV_LMBCS_3,		
    UCNV_LMBCS_4,
--- a/icu4c/source/data/mappings/convrtrs.txt
+++ b/icu4c/source/data/mappings/convrtrs.txt
@ -63,11 +63,15 @@
 # be changed - or else code and/or file names must also be changed.

 # Algorithmic
-UTF8			 utf-8 {	 MIME } ibm-1208 cp1208
-UTF16_BigEndian		 utf-16be {	MIME }
-UTF16_LittleEndian { MIME }	 utf-16le {	MIME }
-UTF16_PlatformEndian { MIME }	 ISO-10646-UCS-2 { IANA	} csUnicode utf-16 { MIME } ibm-1200 cp1200 ucs-2
+UTF8			 utf-8 { MIME } ibm-1208 cp1208
+UTF16_BigEndian		 utf-16be { MIME }
+UTF16_LittleEndian	 utf-16le { MIME }
+UTF16_PlatformEndian	 ISO-10646-UCS-2 { IANA } csUnicode utf-16 { MIME } ibm-1200 cp1200 ucs-2
 UTF16_OppositeEndian
+UTF32_BigEndian		 utf-32be { MIME }
+UTF32_LittleEndian	 utf-32le { MIME }
+UTF32_PlatformEndian	 ISO-10646-UCS-4 { IANA	} csUCS4 utf-32 { MIME } ucs-4
+UTF32_OppositeEndian
 LATIN_1			 iso-8859-1	{ MIME } ibm-819 cp819 latin1 8859-1 csisolatin1 iso-ir-100 cp367 ISO_8859-1:1987 { IANA } l1 ANSI_X3.110-1983   #!!!!! There's whole lot of names for this
 ISO_2022			 iso-2022 {	MIME	 } 2022 cp2022
 ISO_2022,locale=ja,version=0	 ISO_2022_JP, ISO-2022-JP, csISO2022JP, iso-2022-jp { MIME }
--- a/icu4c/source/test/cintltst/nucnvtst.c
+++ b/icu4c/source/test/cintltst/nucnvtst.c
@ -583,21 +583,64 @@ void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize )
    
   
    /*  etc */
-    const uint8_t expectedUTF16LE[] = 
-     { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00 };
-    int32_t      toUTF16LEOffs[]=  
-     { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06,  0x07, 0x07};
-    int32_t fmUTF16LEOffs[] = 
-     { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c,  0x000e }; 
-
    const uint8_t expectedUTF16BE[] = 
     { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e };
    int32_t      toUTF16BEOffs[]=  
     { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07};
    int32_t fmUTF16BEOffs[] = 
     { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c,  0x000e }; 
+
+    const uint8_t expectedUTF16LE[] = 
+     { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00 };
+    int32_t      toUTF16LEOffs[]=  
+     { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06,  0x07, 0x07};
+    int32_t fmUTF16LEOffs[] = 
+     { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c,  0x000e }; 
    
-  
+    const uint8_t expectedUTF32BE[] = 
+     { 0x00, 0x00, 0x00, 0x31,
+       0x00, 0x00, 0x00, 0x32,
+       0x00, 0x00, 0x00, 0x33,
+       0x00, 0x00, 0x00, 0x00,
+       0x00, 0x00, 0x4e, 0x00,
+       0x00, 0x00, 0x4e, 0x8c,
+       0x00, 0x00, 0x4e, 0x09,
+       0x00, 0x00, 0x00, 0x2e };
+    int32_t      toUTF32BEOffs[]=  
+     { 0x00, 0x00, 0x00, 0x00,
+       0x01, 0x01, 0x01, 0x01,
+       0x02, 0x02, 0x02, 0x02,
+       0x03, 0x03, 0x03, 0x03,
+       0x04, 0x04, 0x04, 0x04,
+       0x05, 0x05, 0x05, 0x05,
+       0x06, 0x06, 0x06, 0x06,
+       0x07, 0x07, 0x07, 0x07,
+       0x08, 0x08, 0x08, 0x08 };
+    int32_t fmUTF32BEOffs[] = 
+     { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018,  0x001c }; 
+
+    const uint8_t expectedUTF32LE[] = 
+     { 0x31, 0x00, 0x00, 0x00,
+       0x32, 0x00, 0x00, 0x00,
+       0x33, 0x00, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x00,
+       0x00, 0x4e, 0x00, 0x00,
+       0x8c, 0x4e, 0x00, 0x00,
+       0x09, 0x4e, 0x00, 0x00,
+       0x2e, 0x00, 0x00, 0x00 };
+    int32_t      toUTF32LEOffs[]=  
+     { 0x00, 0x00, 0x00, 0x00,
+       0x01, 0x01, 0x01, 0x01,
+       0x02, 0x02, 0x02, 0x02,
+       0x03, 0x03, 0x03, 0x03,
+       0x04, 0x04, 0x04, 0x04,
+       0x05, 0x05, 0x05, 0x05,
+       0x06, 0x06, 0x06, 0x06,
+       0x07, 0x07, 0x07, 0x07,
+       0x08, 0x08, 0x08, 0x08 };
+    int32_t fmUTF32LEOffs[] = 
+     { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018,  0x001c }; 
+
    


@ -651,6 +694,14 @@ void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize )
    if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
            expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs ))
        log_err("u-> utf-16be did not match.\n");
+    /*UTF32 LE*/
+    if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
+            expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs ))
+        log_err("u-> utf-32le did not match.\n");
+    /*UTF32 BE*/
+    if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
+            expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs ))
+        log_err("u-> utf-32be did not match.\n");
    /*LATIN_1*/
    if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
            expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs ))
@ -703,6 +754,14 @@ void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize )
    if(!testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE),
               sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16be", fmUTF16BEOffs ))
      log_err("utf-16be -> u  did not match.\n");
+    /*UTF32 LE*/
+    if(!testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE),
+               sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32le", fmUTF32LEOffs ))
+      log_err("utf-32le -> u  did not match.\n");
+    /*UTF32 BE*/
+    if(!testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE),
+               sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32be", fmUTF32BEOffs ))
+      log_err("utf-32be -> u  did not match.\n");
    /*EBCDIC_STATEFUL*/
    if(!testConvertToU(expectedIBM930, sizeof(expectedIBM930),
               sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ibm-930", fmIBM930Offs ))
@ -712,6 +771,7 @@ void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize )
               sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ibm-943", fmIBM943Offs ))
      log_err("ibm-943 -> u  did not match.\n");

+    /* Try it again to make sure it still works */
    if(!testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
               sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs ))
      log_err("utf-16le -> u  did not match.\n");