ICU-4548 Update Unicode CCSIDs from IBM.

X-SVN-Rev: 17751
This commit is contained in:
George Rhoten 2005-05-31 22:04:26 +00:00
parent 66840a241d
commit 86b1781bad
6 changed files with 73 additions and 29 deletions

View File

@ -1,6 +1,6 @@
/* /*
********************************************************************** **********************************************************************
* Copyright (C) 2002-2004, International Business Machines * Copyright (C) 2002-2005, International Business Machines
* Corporation and others. All Rights Reserved. * Corporation and others. All Rights Reserved.
********************************************************************** **********************************************************************
* file name: ucnv_u16.c * file name: ucnv_u16.c
@ -1328,7 +1328,7 @@ static const UConverterImpl _UTF16Impl = {
static const UConverterStaticData _UTF16StaticData = { static const UConverterStaticData _UTF16StaticData = {
sizeof(UConverterStaticData), sizeof(UConverterStaticData),
"UTF-16", "UTF-16",
0, /* ### TODO review correctness of all Unicode CCSIDs */ 1204, /* CCSID for BOM sensitive UTF-16 */
UCNV_IBM, UCNV_UTF16, 2, 2, UCNV_IBM, UCNV_UTF16, 2, 2,
#if U_IS_BIG_ENDIAN #if U_IS_BIG_ENDIAN
{ 0xff, 0xfd, 0, 0 }, 2, { 0xff, 0xfd, 0, 0 }, 2,

View File

@ -1,6 +1,6 @@
/* /*
********************************************************************** **********************************************************************
* Copyright (C) 2002-2004, International Business Machines * Copyright (C) 2002-2005, International Business Machines
* Corporation and others. All Rights Reserved. * Corporation and others. All Rights Reserved.
********************************************************************** **********************************************************************
* file name: ucnv_u32.c * file name: ucnv_u32.c
@ -1156,10 +1156,11 @@ static const UConverterImpl _UTF32Impl = {
ucnv_getCompleteUnicodeSet ucnv_getCompleteUnicodeSet
}; };
/* The 1236 CCSID refers to any version of Unicode with a BOM sensitive endianess of UTF-32 */
static const UConverterStaticData _UTF32StaticData = { static const UConverterStaticData _UTF32StaticData = {
sizeof(UConverterStaticData), sizeof(UConverterStaticData),
"UTF-32", "UTF-32",
0, /* ### TODO review correctness of all Unicode CCSIDs */ 1236,
UCNV_IBM, UCNV_UTF32, 4, 4, UCNV_IBM, UCNV_UTF32, 4, 4,
#if U_IS_BIG_ENDIAN #if U_IS_BIG_ENDIAN
{ 0, 0, 0xff, 0xfd }, 4, { 0, 0, 0xff, 0xfd }, 4,

View File

@ -1,6 +1,6 @@
/* /*
********************************************************************** **********************************************************************
* Copyright (C) 2002-2004, International Business Machines * Copyright (C) 2002-2005, International Business Machines
* Corporation and others. All Rights Reserved. * Corporation and others. All Rights Reserved.
********************************************************************** **********************************************************************
* file name: ucnv_u8.c * file name: ucnv_u8.c
@ -798,7 +798,8 @@ static const UConverterImpl _CESU8Impl={
static const UConverterStaticData _CESU8StaticData={ static const UConverterStaticData _CESU8StaticData={
sizeof(UConverterStaticData), sizeof(UConverterStaticData),
"CESU-8", "CESU-8",
0, UCNV_UNKNOWN, UCNV_CESU8, 1, 3, 9400, /* CCSID for CESU-8 */
UCNV_UNKNOWN, UCNV_CESU8, 1, 3,
{ 0xef, 0xbf, 0xbd, 0 },3,FALSE,FALSE, { 0xef, 0xbf, 0xbd, 0 },3,FALSE,FALSE,
0, 0,
0, 0,

View File

@ -1,7 +1,7 @@
/* /*
****************************************************************************** ******************************************************************************
* *
* Copyright (C) 2002-2004, International Business Machines * Copyright (C) 2002-2005, International Business Machines
* Corporation and others. All Rights Reserved. * Corporation and others. All Rights Reserved.
* *
****************************************************************************** ******************************************************************************
@ -1374,7 +1374,7 @@ static const UConverterImpl _Bocu1Impl={
static const UConverterStaticData _Bocu1StaticData={ static const UConverterStaticData _Bocu1StaticData={
sizeof(UConverterStaticData), sizeof(UConverterStaticData),
"BOCU-1", "BOCU-1",
0, /* CCSID for BOCU-1 */ 1214, /* CCSID for BOCU-1 */
UCNV_IBM, UCNV_BOCU1, UCNV_IBM, UCNV_BOCU1,
1, 4, /* one UChar generates at least 1 byte and at most 4 bytes */ 1, 4, /* one UChar generates at least 1 byte and at most 4 bytes */
{ 0x1a, 0, 0, 0 }, 1, /* BOCU-1 never needs to write a subchar */ { 0x1a, 0, 0, 0 }, 1, /* BOCU-1 never needs to write a subchar */

View File

@ -1,7 +1,7 @@
/* /*
****************************************************************************** ******************************************************************************
* *
* Copyright (C) 2000-2004, International Business Machines * Copyright (C) 2000-2005, International Business Machines
* Corporation and others. All Rights Reserved. * Corporation and others. All Rights Reserved.
* *
****************************************************************************** ******************************************************************************
@ -2008,7 +2008,7 @@ static const UConverterImpl _SCSUImpl={
static const UConverterStaticData _SCSUStaticData={ static const UConverterStaticData _SCSUStaticData={
sizeof(UConverterStaticData), sizeof(UConverterStaticData),
"SCSU", "SCSU",
0, /* CCSID for SCSU */ 1212, /* CCSID for SCSU */
UCNV_IBM, UCNV_SCSU, UCNV_IBM, UCNV_SCSU,
1, 3, /* one UChar generates at least 1 byte and at most 3 bytes */ 1, 3, /* one UChar generates at least 1 byte and at most 3 bytes */
/* /*

View File

@ -134,36 +134,70 @@
# Fully algorithmic converters # Fully algorithmic converters
UTF-8 { IANA* MIME* JAVA* WINDOWS } UTF-8 { IANA* MIME* JAVA* WINDOWS }
ibm-1208 { IBM* } ibm-1208 { IBM* } # UTF-8 with IBM PUA
ibm-1209 { IBM } ibm-1209 { IBM } # UTF-8
ibm-5304 { IBM } ibm-5304 { IBM } # Unicode 2.0, UTF-8 with IBM PUA
ibm-5305 { IBM } ibm-5305 { IBM } # Unicode 2.0, UTF-8
ibm-13496 { IBM } # Unicode 3.0, UTF-8 with IBM PUA
ibm-13497 { IBM } # Unicode 3.0, UTF-8
ibm-17592 { IBM } # Unicode 4.0, UTF-8 with IBM PUA
ibm-17593 { IBM } # Unicode 4.0, UTF-8
windows-65001 { WINDOWS* } windows-65001 { WINDOWS* }
cp1208 cp1208
# The ICU 2.2 UTF-16/32 converters detect and write a BOM. # The ICU 2.2 UTF-16/32 converters detect and write a BOM.
UTF-16 { IANA* MIME* JAVA* } ISO-10646-UCS-2 { IANA } unicode csUnicode ucs-2 UTF-16 { IANA* MIME* JAVA* } ISO-10646-UCS-2 { IANA }
ibm-1204 { IBM* } # UTF-16 with IBM PUA and BOM sensitive
ibm-1205 { IBM } # UTF-16 BOM sensitive
unicode
csUnicode
ucs-2
# The following Unicode CCSIDs (IBM) are not valid in ICU because they are
# considered pure DBCS (exactly 2 bytes) of Unicode,
# and they are a subset of Unicode. ICU does not support their encoding structures.
# 1400 1401 1402 1410 1414 1415 1446 1447 1448 1449 64770 64771 65520 5496 5497 5498 9592 13688
UTF-16BE { IANA* MIME* JAVA* } x-utf-16be { JAVA } UTF-16BE { IANA* MIME* JAVA* } x-utf-16be { JAVA }
# iso-10646-ucs-2 { JAVA } # This is ambiguous ibm-1200 { IBM* } # UTF-16 BE with IBM PUA
ibm-1200 { IBM* } ibm-1201 { IBM } # UTF-16 BE
ibm-1201 { IBM } ibm-13488 { IBM } # Unicode 2.0, UTF-16 BE with IBM PUA
ibm-5297 { IBM } ibm-13489 { IBM } # Unicode 2.0, UTF-16 BE
ibm-13488 { IBM } ibm-17584 { IBM } # Unicode 3.0, UTF-16 BE with IBM PUA
ibm-17584 { IBM } ibm-17585 { IBM } # Unicode 3.0, UTF-16 BE
ibm-21680 { IBM } # Unicode 4.0, UTF-16 BE with IBM PUA
ibm-21681 { IBM } # Unicode 4.0, UTF-16 BE
ibm-61955 { IBM } # UTF-16BE with Gaidai University (Japan) PUA
ibm-61956 { IBM } # UTF-16BE with HKSCS-Big 5 PUA
windows-1201 { WINDOWS* } windows-1201 { WINDOWS* }
cp1200 cp1200
cp1201 cp1201
UTF16_BigEndian UTF16_BigEndian
# ibm-5297 { IBM } # Unicode 2.0, UTF-16 (BE) (reserved, never used)
# iso-10646-ucs-2 { JAVA } # This is ambiguous
# ibm-61952 is not a valid CCSID because it's Unicode 1.1
# ibm-61953 is not a valid CCSID because it's Unicode 1.0
UTF-16LE { IANA* MIME* JAVA* } x-utf-16le { JAVA } UTF-16LE { IANA* MIME* JAVA* } x-utf-16le { JAVA }
ibm-1202 { IBM* } ibm-1202 { IBM* } # UTF-16 LE with IBM PUA
ibm-13490 { IBM } ibm-1203 { IBM } # UTF-16 LE
ibm-17586 { IBM } ibm-13490 { IBM } # Unicode 2.0, UTF-16 LE with IBM PUA
ibm-13491 { IBM } # Unicode 2.0, UTF-16 LE
ibm-17586 { IBM } # Unicode 3.0, UTF-16 LE with IBM PUA
ibm-17587 { IBM } # Unicode 3.0, UTF-16 LE
ibm-21682 { IBM } # Unicode 4.0, UTF-16 LE with IBM PUA
ibm-21683 { IBM } # Unicode 4.0, UTF-16 LE
UTF16_LittleEndian UTF16_LittleEndian
windows-1200 { WINDOWS* } windows-1200 { WINDOWS* }
UTF-32 { IANA* MIME* } ISO-10646-UCS-4 { IANA } csUCS4 ucs-4 UTF-32 { IANA* MIME* } ISO-10646-UCS-4 { IANA }
UTF-32BE { IANA* } UTF32_BigEndian ibm-1232 { IBM* } ibm-1233 { IBM } ibm-1236 { IBM* } # UTF-32 with IBM PUA and BOM sensitive
UTF-32LE { IANA* } UTF32_LittleEndian ibm-1234 { IBM* } ibm-1237 { IBM } # UTF-32 BOM sensitive
csUCS4
ucs-4
UTF-32BE { IANA* } UTF32_BigEndian
ibm-1232 { IBM* } # UTF-32 BE with IBM PUA
ibm-1233 { IBM } # UTF-32 BE
UTF-32LE { IANA* } UTF32_LittleEndian
ibm-1234 { IBM* } # UTF-32 LE, with IBM PUA
ibm-1235 { IBM } # UTF-32 LE
# ICU-specific names for special uses # ICU-specific names for special uses
UTF16_PlatformEndian UTF16_PlatformEndian
@ -185,6 +219,9 @@ UTF32_OppositeEndian
# For details about email headers see RFC 2047. # For details about email headers see RFC 2047.
UTF-7 { IANA* MIME* WINDOWS } windows-65000 { WINDOWS* } UTF-7 { IANA* MIME* WINDOWS } windows-65000 { WINDOWS* }
# UTF-EBCDIC doesn't exist in ICU, but the aliases are here for reference.
#UTF-EBCDIC ibm-1210 { IBM* } ibm-1211 { IBM }
# IMAP-mailbox-name is an ICU-specific name for the encoding of IMAP mailbox names. # IMAP-mailbox-name is an ICU-specific name for the encoding of IMAP mailbox names.
# It is a substantially modified UTF-7 encoding. See the specification in: # It is a substantially modified UTF-7 encoding. See the specification in:
# #
@ -194,11 +231,16 @@ UTF-7 { IANA* MIME* WINDOWS } windows-65000 { WINDOWS* }
IMAP-mailbox-name IMAP-mailbox-name
SCSU { IANA* } SCSU { IANA* }
BOCU-1 { IANA* } csBOCU-1 { IANA } ibm-1212 { IBM* } # SCSU with IBM PUA
ibm-1213 { IBM } # SCSU
BOCU-1 { IANA* }
csBOCU-1 { IANA }
ibm-1214 { IBM* } # BOCU-1 with IBM PUA
ibm-1215 { IBM } # BOCU-1
# See http://www.unicode.org/unicode/reports/tr26 for this Compatibility Encoding Scheme for UTF-16 # See http://www.unicode.org/unicode/reports/tr26 for this Compatibility Encoding Scheme for UTF-16
# The Unicode Consortium does not encourage the use of CESU-8 # The Unicode Consortium does not encourage the use of CESU-8
CESU-8 { IANA* } CESU-8 { IANA* } ibm-9400 { IBM* }
# Standard iso-8859-1, which does not have the Euro update. # Standard iso-8859-1, which does not have the Euro update.
# See iso-8859-15 (latin9) for the Euro update # See iso-8859-15 (latin9) for the Euro update