ICU-4548 Update Unicode CCSIDs from IBM.
X-SVN-Rev: 17751
This commit is contained in:
parent
66840a241d
commit
86b1781bad
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
**********************************************************************
|
**********************************************************************
|
||||||
* Copyright (C) 2002-2004, International Business Machines
|
* Copyright (C) 2002-2005, International Business Machines
|
||||||
* Corporation and others. All Rights Reserved.
|
* Corporation and others. All Rights Reserved.
|
||||||
**********************************************************************
|
**********************************************************************
|
||||||
* file name: ucnv_u16.c
|
* file name: ucnv_u16.c
|
||||||
@ -1328,7 +1328,7 @@ static const UConverterImpl _UTF16Impl = {
|
|||||||
static const UConverterStaticData _UTF16StaticData = {
|
static const UConverterStaticData _UTF16StaticData = {
|
||||||
sizeof(UConverterStaticData),
|
sizeof(UConverterStaticData),
|
||||||
"UTF-16",
|
"UTF-16",
|
||||||
0, /* ### TODO review correctness of all Unicode CCSIDs */
|
1204, /* CCSID for BOM sensitive UTF-16 */
|
||||||
UCNV_IBM, UCNV_UTF16, 2, 2,
|
UCNV_IBM, UCNV_UTF16, 2, 2,
|
||||||
#if U_IS_BIG_ENDIAN
|
#if U_IS_BIG_ENDIAN
|
||||||
{ 0xff, 0xfd, 0, 0 }, 2,
|
{ 0xff, 0xfd, 0, 0 }, 2,
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
**********************************************************************
|
**********************************************************************
|
||||||
* Copyright (C) 2002-2004, International Business Machines
|
* Copyright (C) 2002-2005, International Business Machines
|
||||||
* Corporation and others. All Rights Reserved.
|
* Corporation and others. All Rights Reserved.
|
||||||
**********************************************************************
|
**********************************************************************
|
||||||
* file name: ucnv_u32.c
|
* file name: ucnv_u32.c
|
||||||
@ -1156,10 +1156,11 @@ static const UConverterImpl _UTF32Impl = {
|
|||||||
ucnv_getCompleteUnicodeSet
|
ucnv_getCompleteUnicodeSet
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* The 1236 CCSID refers to any version of Unicode with a BOM sensitive endianess of UTF-32 */
|
||||||
static const UConverterStaticData _UTF32StaticData = {
|
static const UConverterStaticData _UTF32StaticData = {
|
||||||
sizeof(UConverterStaticData),
|
sizeof(UConverterStaticData),
|
||||||
"UTF-32",
|
"UTF-32",
|
||||||
0, /* ### TODO review correctness of all Unicode CCSIDs */
|
1236,
|
||||||
UCNV_IBM, UCNV_UTF32, 4, 4,
|
UCNV_IBM, UCNV_UTF32, 4, 4,
|
||||||
#if U_IS_BIG_ENDIAN
|
#if U_IS_BIG_ENDIAN
|
||||||
{ 0, 0, 0xff, 0xfd }, 4,
|
{ 0, 0, 0xff, 0xfd }, 4,
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
**********************************************************************
|
**********************************************************************
|
||||||
* Copyright (C) 2002-2004, International Business Machines
|
* Copyright (C) 2002-2005, International Business Machines
|
||||||
* Corporation and others. All Rights Reserved.
|
* Corporation and others. All Rights Reserved.
|
||||||
**********************************************************************
|
**********************************************************************
|
||||||
* file name: ucnv_u8.c
|
* file name: ucnv_u8.c
|
||||||
@ -798,7 +798,8 @@ static const UConverterImpl _CESU8Impl={
|
|||||||
static const UConverterStaticData _CESU8StaticData={
|
static const UConverterStaticData _CESU8StaticData={
|
||||||
sizeof(UConverterStaticData),
|
sizeof(UConverterStaticData),
|
||||||
"CESU-8",
|
"CESU-8",
|
||||||
0, UCNV_UNKNOWN, UCNV_CESU8, 1, 3,
|
9400, /* CCSID for CESU-8 */
|
||||||
|
UCNV_UNKNOWN, UCNV_CESU8, 1, 3,
|
||||||
{ 0xef, 0xbf, 0xbd, 0 },3,FALSE,FALSE,
|
{ 0xef, 0xbf, 0xbd, 0 },3,FALSE,FALSE,
|
||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
******************************************************************************
|
******************************************************************************
|
||||||
*
|
*
|
||||||
* Copyright (C) 2002-2004, International Business Machines
|
* Copyright (C) 2002-2005, International Business Machines
|
||||||
* Corporation and others. All Rights Reserved.
|
* Corporation and others. All Rights Reserved.
|
||||||
*
|
*
|
||||||
******************************************************************************
|
******************************************************************************
|
||||||
@ -1374,7 +1374,7 @@ static const UConverterImpl _Bocu1Impl={
|
|||||||
static const UConverterStaticData _Bocu1StaticData={
|
static const UConverterStaticData _Bocu1StaticData={
|
||||||
sizeof(UConverterStaticData),
|
sizeof(UConverterStaticData),
|
||||||
"BOCU-1",
|
"BOCU-1",
|
||||||
0, /* CCSID for BOCU-1 */
|
1214, /* CCSID for BOCU-1 */
|
||||||
UCNV_IBM, UCNV_BOCU1,
|
UCNV_IBM, UCNV_BOCU1,
|
||||||
1, 4, /* one UChar generates at least 1 byte and at most 4 bytes */
|
1, 4, /* one UChar generates at least 1 byte and at most 4 bytes */
|
||||||
{ 0x1a, 0, 0, 0 }, 1, /* BOCU-1 never needs to write a subchar */
|
{ 0x1a, 0, 0, 0 }, 1, /* BOCU-1 never needs to write a subchar */
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
******************************************************************************
|
******************************************************************************
|
||||||
*
|
*
|
||||||
* Copyright (C) 2000-2004, International Business Machines
|
* Copyright (C) 2000-2005, International Business Machines
|
||||||
* Corporation and others. All Rights Reserved.
|
* Corporation and others. All Rights Reserved.
|
||||||
*
|
*
|
||||||
******************************************************************************
|
******************************************************************************
|
||||||
@ -2008,7 +2008,7 @@ static const UConverterImpl _SCSUImpl={
|
|||||||
static const UConverterStaticData _SCSUStaticData={
|
static const UConverterStaticData _SCSUStaticData={
|
||||||
sizeof(UConverterStaticData),
|
sizeof(UConverterStaticData),
|
||||||
"SCSU",
|
"SCSU",
|
||||||
0, /* CCSID for SCSU */
|
1212, /* CCSID for SCSU */
|
||||||
UCNV_IBM, UCNV_SCSU,
|
UCNV_IBM, UCNV_SCSU,
|
||||||
1, 3, /* one UChar generates at least 1 byte and at most 3 bytes */
|
1, 3, /* one UChar generates at least 1 byte and at most 3 bytes */
|
||||||
/*
|
/*
|
||||||
|
@ -134,36 +134,70 @@
|
|||||||
# Fully algorithmic converters
|
# Fully algorithmic converters
|
||||||
|
|
||||||
UTF-8 { IANA* MIME* JAVA* WINDOWS }
|
UTF-8 { IANA* MIME* JAVA* WINDOWS }
|
||||||
ibm-1208 { IBM* }
|
ibm-1208 { IBM* } # UTF-8 with IBM PUA
|
||||||
ibm-1209 { IBM }
|
ibm-1209 { IBM } # UTF-8
|
||||||
ibm-5304 { IBM }
|
ibm-5304 { IBM } # Unicode 2.0, UTF-8 with IBM PUA
|
||||||
ibm-5305 { IBM }
|
ibm-5305 { IBM } # Unicode 2.0, UTF-8
|
||||||
|
ibm-13496 { IBM } # Unicode 3.0, UTF-8 with IBM PUA
|
||||||
|
ibm-13497 { IBM } # Unicode 3.0, UTF-8
|
||||||
|
ibm-17592 { IBM } # Unicode 4.0, UTF-8 with IBM PUA
|
||||||
|
ibm-17593 { IBM } # Unicode 4.0, UTF-8
|
||||||
windows-65001 { WINDOWS* }
|
windows-65001 { WINDOWS* }
|
||||||
cp1208
|
cp1208
|
||||||
|
|
||||||
# The ICU 2.2 UTF-16/32 converters detect and write a BOM.
|
# The ICU 2.2 UTF-16/32 converters detect and write a BOM.
|
||||||
UTF-16 { IANA* MIME* JAVA* } ISO-10646-UCS-2 { IANA } unicode csUnicode ucs-2
|
UTF-16 { IANA* MIME* JAVA* } ISO-10646-UCS-2 { IANA }
|
||||||
|
ibm-1204 { IBM* } # UTF-16 with IBM PUA and BOM sensitive
|
||||||
|
ibm-1205 { IBM } # UTF-16 BOM sensitive
|
||||||
|
unicode
|
||||||
|
csUnicode
|
||||||
|
ucs-2
|
||||||
|
# The following Unicode CCSIDs (IBM) are not valid in ICU because they are
|
||||||
|
# considered pure DBCS (exactly 2 bytes) of Unicode,
|
||||||
|
# and they are a subset of Unicode. ICU does not support their encoding structures.
|
||||||
|
# 1400 1401 1402 1410 1414 1415 1446 1447 1448 1449 64770 64771 65520 5496 5497 5498 9592 13688
|
||||||
UTF-16BE { IANA* MIME* JAVA* } x-utf-16be { JAVA }
|
UTF-16BE { IANA* MIME* JAVA* } x-utf-16be { JAVA }
|
||||||
# iso-10646-ucs-2 { JAVA } # This is ambiguous
|
ibm-1200 { IBM* } # UTF-16 BE with IBM PUA
|
||||||
ibm-1200 { IBM* }
|
ibm-1201 { IBM } # UTF-16 BE
|
||||||
ibm-1201 { IBM }
|
ibm-13488 { IBM } # Unicode 2.0, UTF-16 BE with IBM PUA
|
||||||
ibm-5297 { IBM }
|
ibm-13489 { IBM } # Unicode 2.0, UTF-16 BE
|
||||||
ibm-13488 { IBM }
|
ibm-17584 { IBM } # Unicode 3.0, UTF-16 BE with IBM PUA
|
||||||
ibm-17584 { IBM }
|
ibm-17585 { IBM } # Unicode 3.0, UTF-16 BE
|
||||||
|
ibm-21680 { IBM } # Unicode 4.0, UTF-16 BE with IBM PUA
|
||||||
|
ibm-21681 { IBM } # Unicode 4.0, UTF-16 BE
|
||||||
|
ibm-61955 { IBM } # UTF-16BE with Gaidai University (Japan) PUA
|
||||||
|
ibm-61956 { IBM } # UTF-16BE with HKSCS-Big 5 PUA
|
||||||
windows-1201 { WINDOWS* }
|
windows-1201 { WINDOWS* }
|
||||||
cp1200
|
cp1200
|
||||||
cp1201
|
cp1201
|
||||||
UTF16_BigEndian
|
UTF16_BigEndian
|
||||||
|
# ibm-5297 { IBM } # Unicode 2.0, UTF-16 (BE) (reserved, never used)
|
||||||
|
# iso-10646-ucs-2 { JAVA } # This is ambiguous
|
||||||
|
# ibm-61952 is not a valid CCSID because it's Unicode 1.1
|
||||||
|
# ibm-61953 is not a valid CCSID because it's Unicode 1.0
|
||||||
UTF-16LE { IANA* MIME* JAVA* } x-utf-16le { JAVA }
|
UTF-16LE { IANA* MIME* JAVA* } x-utf-16le { JAVA }
|
||||||
ibm-1202 { IBM* }
|
ibm-1202 { IBM* } # UTF-16 LE with IBM PUA
|
||||||
ibm-13490 { IBM }
|
ibm-1203 { IBM } # UTF-16 LE
|
||||||
ibm-17586 { IBM }
|
ibm-13490 { IBM } # Unicode 2.0, UTF-16 LE with IBM PUA
|
||||||
|
ibm-13491 { IBM } # Unicode 2.0, UTF-16 LE
|
||||||
|
ibm-17586 { IBM } # Unicode 3.0, UTF-16 LE with IBM PUA
|
||||||
|
ibm-17587 { IBM } # Unicode 3.0, UTF-16 LE
|
||||||
|
ibm-21682 { IBM } # Unicode 4.0, UTF-16 LE with IBM PUA
|
||||||
|
ibm-21683 { IBM } # Unicode 4.0, UTF-16 LE
|
||||||
UTF16_LittleEndian
|
UTF16_LittleEndian
|
||||||
windows-1200 { WINDOWS* }
|
windows-1200 { WINDOWS* }
|
||||||
|
|
||||||
UTF-32 { IANA* MIME* } ISO-10646-UCS-4 { IANA } csUCS4 ucs-4
|
UTF-32 { IANA* MIME* } ISO-10646-UCS-4 { IANA }
|
||||||
UTF-32BE { IANA* } UTF32_BigEndian ibm-1232 { IBM* } ibm-1233 { IBM }
|
ibm-1236 { IBM* } # UTF-32 with IBM PUA and BOM sensitive
|
||||||
UTF-32LE { IANA* } UTF32_LittleEndian ibm-1234 { IBM* }
|
ibm-1237 { IBM } # UTF-32 BOM sensitive
|
||||||
|
csUCS4
|
||||||
|
ucs-4
|
||||||
|
UTF-32BE { IANA* } UTF32_BigEndian
|
||||||
|
ibm-1232 { IBM* } # UTF-32 BE with IBM PUA
|
||||||
|
ibm-1233 { IBM } # UTF-32 BE
|
||||||
|
UTF-32LE { IANA* } UTF32_LittleEndian
|
||||||
|
ibm-1234 { IBM* } # UTF-32 LE, with IBM PUA
|
||||||
|
ibm-1235 { IBM } # UTF-32 LE
|
||||||
|
|
||||||
# ICU-specific names for special uses
|
# ICU-specific names for special uses
|
||||||
UTF16_PlatformEndian
|
UTF16_PlatformEndian
|
||||||
@ -185,6 +219,9 @@ UTF32_OppositeEndian
|
|||||||
# For details about email headers see RFC 2047.
|
# For details about email headers see RFC 2047.
|
||||||
UTF-7 { IANA* MIME* WINDOWS } windows-65000 { WINDOWS* }
|
UTF-7 { IANA* MIME* WINDOWS } windows-65000 { WINDOWS* }
|
||||||
|
|
||||||
|
# UTF-EBCDIC doesn't exist in ICU, but the aliases are here for reference.
|
||||||
|
#UTF-EBCDIC ibm-1210 { IBM* } ibm-1211 { IBM }
|
||||||
|
|
||||||
# IMAP-mailbox-name is an ICU-specific name for the encoding of IMAP mailbox names.
|
# IMAP-mailbox-name is an ICU-specific name for the encoding of IMAP mailbox names.
|
||||||
# It is a substantially modified UTF-7 encoding. See the specification in:
|
# It is a substantially modified UTF-7 encoding. See the specification in:
|
||||||
#
|
#
|
||||||
@ -194,11 +231,16 @@ UTF-7 { IANA* MIME* WINDOWS } windows-65000 { WINDOWS* }
|
|||||||
IMAP-mailbox-name
|
IMAP-mailbox-name
|
||||||
|
|
||||||
SCSU { IANA* }
|
SCSU { IANA* }
|
||||||
BOCU-1 { IANA* } csBOCU-1 { IANA }
|
ibm-1212 { IBM* } # SCSU with IBM PUA
|
||||||
|
ibm-1213 { IBM } # SCSU
|
||||||
|
BOCU-1 { IANA* }
|
||||||
|
csBOCU-1 { IANA }
|
||||||
|
ibm-1214 { IBM* } # BOCU-1 with IBM PUA
|
||||||
|
ibm-1215 { IBM } # BOCU-1
|
||||||
|
|
||||||
# See http://www.unicode.org/unicode/reports/tr26 for this Compatibility Encoding Scheme for UTF-16
|
# See http://www.unicode.org/unicode/reports/tr26 for this Compatibility Encoding Scheme for UTF-16
|
||||||
# The Unicode Consortium does not encourage the use of CESU-8
|
# The Unicode Consortium does not encourage the use of CESU-8
|
||||||
CESU-8 { IANA* }
|
CESU-8 { IANA* } ibm-9400 { IBM* }
|
||||||
|
|
||||||
# Standard iso-8859-1, which does not have the Euro update.
|
# Standard iso-8859-1, which does not have the Euro update.
|
||||||
# See iso-8859-15 (latin9) for the Euro update
|
# See iso-8859-15 (latin9) for the Euro update
|
||||||
|
Loading…
Reference in New Issue
Block a user