ICU-4548 Update Unicode CCSIDs from IBM.
X-SVN-Rev: 17751
This commit is contained in:
parent
66840a241d
commit
86b1781bad
@ -1,6 +1,6 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2002-2004, International Business Machines
|
||||
* Copyright (C) 2002-2005, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: ucnv_u16.c
|
||||
@ -1328,7 +1328,7 @@ static const UConverterImpl _UTF16Impl = {
|
||||
static const UConverterStaticData _UTF16StaticData = {
|
||||
sizeof(UConverterStaticData),
|
||||
"UTF-16",
|
||||
0, /* ### TODO review correctness of all Unicode CCSIDs */
|
||||
1204, /* CCSID for BOM sensitive UTF-16 */
|
||||
UCNV_IBM, UCNV_UTF16, 2, 2,
|
||||
#if U_IS_BIG_ENDIAN
|
||||
{ 0xff, 0xfd, 0, 0 }, 2,
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2002-2004, International Business Machines
|
||||
* Copyright (C) 2002-2005, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: ucnv_u32.c
|
||||
@ -1156,10 +1156,11 @@ static const UConverterImpl _UTF32Impl = {
|
||||
ucnv_getCompleteUnicodeSet
|
||||
};
|
||||
|
||||
/* The 1236 CCSID refers to any version of Unicode with a BOM sensitive endianess of UTF-32 */
|
||||
static const UConverterStaticData _UTF32StaticData = {
|
||||
sizeof(UConverterStaticData),
|
||||
"UTF-32",
|
||||
0, /* ### TODO review correctness of all Unicode CCSIDs */
|
||||
1236,
|
||||
UCNV_IBM, UCNV_UTF32, 4, 4,
|
||||
#if U_IS_BIG_ENDIAN
|
||||
{ 0, 0, 0xff, 0xfd }, 4,
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2002-2004, International Business Machines
|
||||
* Copyright (C) 2002-2005, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: ucnv_u8.c
|
||||
@ -798,7 +798,8 @@ static const UConverterImpl _CESU8Impl={
|
||||
static const UConverterStaticData _CESU8StaticData={
|
||||
sizeof(UConverterStaticData),
|
||||
"CESU-8",
|
||||
0, UCNV_UNKNOWN, UCNV_CESU8, 1, 3,
|
||||
9400, /* CCSID for CESU-8 */
|
||||
UCNV_UNKNOWN, UCNV_CESU8, 1, 3,
|
||||
{ 0xef, 0xbf, 0xbd, 0 },3,FALSE,FALSE,
|
||||
0,
|
||||
0,
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002-2004, International Business Machines
|
||||
* Copyright (C) 2002-2005, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
@ -1374,7 +1374,7 @@ static const UConverterImpl _Bocu1Impl={
|
||||
static const UConverterStaticData _Bocu1StaticData={
|
||||
sizeof(UConverterStaticData),
|
||||
"BOCU-1",
|
||||
0, /* CCSID for BOCU-1 */
|
||||
1214, /* CCSID for BOCU-1 */
|
||||
UCNV_IBM, UCNV_BOCU1,
|
||||
1, 4, /* one UChar generates at least 1 byte and at most 4 bytes */
|
||||
{ 0x1a, 0, 0, 0 }, 1, /* BOCU-1 never needs to write a subchar */
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2000-2004, International Business Machines
|
||||
* Copyright (C) 2000-2005, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
@ -2008,7 +2008,7 @@ static const UConverterImpl _SCSUImpl={
|
||||
static const UConverterStaticData _SCSUStaticData={
|
||||
sizeof(UConverterStaticData),
|
||||
"SCSU",
|
||||
0, /* CCSID for SCSU */
|
||||
1212, /* CCSID for SCSU */
|
||||
UCNV_IBM, UCNV_SCSU,
|
||||
1, 3, /* one UChar generates at least 1 byte and at most 3 bytes */
|
||||
/*
|
||||
|
@ -134,36 +134,70 @@
|
||||
# Fully algorithmic converters
|
||||
|
||||
UTF-8 { IANA* MIME* JAVA* WINDOWS }
|
||||
ibm-1208 { IBM* }
|
||||
ibm-1209 { IBM }
|
||||
ibm-5304 { IBM }
|
||||
ibm-5305 { IBM }
|
||||
ibm-1208 { IBM* } # UTF-8 with IBM PUA
|
||||
ibm-1209 { IBM } # UTF-8
|
||||
ibm-5304 { IBM } # Unicode 2.0, UTF-8 with IBM PUA
|
||||
ibm-5305 { IBM } # Unicode 2.0, UTF-8
|
||||
ibm-13496 { IBM } # Unicode 3.0, UTF-8 with IBM PUA
|
||||
ibm-13497 { IBM } # Unicode 3.0, UTF-8
|
||||
ibm-17592 { IBM } # Unicode 4.0, UTF-8 with IBM PUA
|
||||
ibm-17593 { IBM } # Unicode 4.0, UTF-8
|
||||
windows-65001 { WINDOWS* }
|
||||
cp1208
|
||||
|
||||
# The ICU 2.2 UTF-16/32 converters detect and write a BOM.
|
||||
UTF-16 { IANA* MIME* JAVA* } ISO-10646-UCS-2 { IANA } unicode csUnicode ucs-2
|
||||
UTF-16 { IANA* MIME* JAVA* } ISO-10646-UCS-2 { IANA }
|
||||
ibm-1204 { IBM* } # UTF-16 with IBM PUA and BOM sensitive
|
||||
ibm-1205 { IBM } # UTF-16 BOM sensitive
|
||||
unicode
|
||||
csUnicode
|
||||
ucs-2
|
||||
# The following Unicode CCSIDs (IBM) are not valid in ICU because they are
|
||||
# considered pure DBCS (exactly 2 bytes) of Unicode,
|
||||
# and they are a subset of Unicode. ICU does not support their encoding structures.
|
||||
# 1400 1401 1402 1410 1414 1415 1446 1447 1448 1449 64770 64771 65520 5496 5497 5498 9592 13688
|
||||
UTF-16BE { IANA* MIME* JAVA* } x-utf-16be { JAVA }
|
||||
# iso-10646-ucs-2 { JAVA } # This is ambiguous
|
||||
ibm-1200 { IBM* }
|
||||
ibm-1201 { IBM }
|
||||
ibm-5297 { IBM }
|
||||
ibm-13488 { IBM }
|
||||
ibm-17584 { IBM }
|
||||
ibm-1200 { IBM* } # UTF-16 BE with IBM PUA
|
||||
ibm-1201 { IBM } # UTF-16 BE
|
||||
ibm-13488 { IBM } # Unicode 2.0, UTF-16 BE with IBM PUA
|
||||
ibm-13489 { IBM } # Unicode 2.0, UTF-16 BE
|
||||
ibm-17584 { IBM } # Unicode 3.0, UTF-16 BE with IBM PUA
|
||||
ibm-17585 { IBM } # Unicode 3.0, UTF-16 BE
|
||||
ibm-21680 { IBM } # Unicode 4.0, UTF-16 BE with IBM PUA
|
||||
ibm-21681 { IBM } # Unicode 4.0, UTF-16 BE
|
||||
ibm-61955 { IBM } # UTF-16BE with Gaidai University (Japan) PUA
|
||||
ibm-61956 { IBM } # UTF-16BE with HKSCS-Big 5 PUA
|
||||
windows-1201 { WINDOWS* }
|
||||
cp1200
|
||||
cp1201
|
||||
UTF16_BigEndian
|
||||
# ibm-5297 { IBM } # Unicode 2.0, UTF-16 (BE) (reserved, never used)
|
||||
# iso-10646-ucs-2 { JAVA } # This is ambiguous
|
||||
# ibm-61952 is not a valid CCSID because it's Unicode 1.1
|
||||
# ibm-61953 is not a valid CCSID because it's Unicode 1.0
|
||||
UTF-16LE { IANA* MIME* JAVA* } x-utf-16le { JAVA }
|
||||
ibm-1202 { IBM* }
|
||||
ibm-13490 { IBM }
|
||||
ibm-17586 { IBM }
|
||||
ibm-1202 { IBM* } # UTF-16 LE with IBM PUA
|
||||
ibm-1203 { IBM } # UTF-16 LE
|
||||
ibm-13490 { IBM } # Unicode 2.0, UTF-16 LE with IBM PUA
|
||||
ibm-13491 { IBM } # Unicode 2.0, UTF-16 LE
|
||||
ibm-17586 { IBM } # Unicode 3.0, UTF-16 LE with IBM PUA
|
||||
ibm-17587 { IBM } # Unicode 3.0, UTF-16 LE
|
||||
ibm-21682 { IBM } # Unicode 4.0, UTF-16 LE with IBM PUA
|
||||
ibm-21683 { IBM } # Unicode 4.0, UTF-16 LE
|
||||
UTF16_LittleEndian
|
||||
windows-1200 { WINDOWS* }
|
||||
|
||||
UTF-32 { IANA* MIME* } ISO-10646-UCS-4 { IANA } csUCS4 ucs-4
|
||||
UTF-32BE { IANA* } UTF32_BigEndian ibm-1232 { IBM* } ibm-1233 { IBM }
|
||||
UTF-32LE { IANA* } UTF32_LittleEndian ibm-1234 { IBM* }
|
||||
UTF-32 { IANA* MIME* } ISO-10646-UCS-4 { IANA }
|
||||
ibm-1236 { IBM* } # UTF-32 with IBM PUA and BOM sensitive
|
||||
ibm-1237 { IBM } # UTF-32 BOM sensitive
|
||||
csUCS4
|
||||
ucs-4
|
||||
UTF-32BE { IANA* } UTF32_BigEndian
|
||||
ibm-1232 { IBM* } # UTF-32 BE with IBM PUA
|
||||
ibm-1233 { IBM } # UTF-32 BE
|
||||
UTF-32LE { IANA* } UTF32_LittleEndian
|
||||
ibm-1234 { IBM* } # UTF-32 LE, with IBM PUA
|
||||
ibm-1235 { IBM } # UTF-32 LE
|
||||
|
||||
# ICU-specific names for special uses
|
||||
UTF16_PlatformEndian
|
||||
@ -185,6 +219,9 @@ UTF32_OppositeEndian
|
||||
# For details about email headers see RFC 2047.
|
||||
UTF-7 { IANA* MIME* WINDOWS } windows-65000 { WINDOWS* }
|
||||
|
||||
# UTF-EBCDIC doesn't exist in ICU, but the aliases are here for reference.
|
||||
#UTF-EBCDIC ibm-1210 { IBM* } ibm-1211 { IBM }
|
||||
|
||||
# IMAP-mailbox-name is an ICU-specific name for the encoding of IMAP mailbox names.
|
||||
# It is a substantially modified UTF-7 encoding. See the specification in:
|
||||
#
|
||||
@ -194,11 +231,16 @@ UTF-7 { IANA* MIME* WINDOWS } windows-65000 { WINDOWS* }
|
||||
IMAP-mailbox-name
|
||||
|
||||
SCSU { IANA* }
|
||||
BOCU-1 { IANA* } csBOCU-1 { IANA }
|
||||
ibm-1212 { IBM* } # SCSU with IBM PUA
|
||||
ibm-1213 { IBM } # SCSU
|
||||
BOCU-1 { IANA* }
|
||||
csBOCU-1 { IANA }
|
||||
ibm-1214 { IBM* } # BOCU-1 with IBM PUA
|
||||
ibm-1215 { IBM } # BOCU-1
|
||||
|
||||
# See http://www.unicode.org/unicode/reports/tr26 for this Compatibility Encoding Scheme for UTF-16
|
||||
# The Unicode Consortium does not encourage the use of CESU-8
|
||||
CESU-8 { IANA* }
|
||||
CESU-8 { IANA* } ibm-9400 { IBM* }
|
||||
|
||||
# Standard iso-8859-1, which does not have the Euro update.
|
||||
# See iso-8859-15 (latin9) for the Euro update
|
||||
|
Loading…
Reference in New Issue
Block a user