# ******************************************************************************* # * # * Copyright (C) 1995-2001, International Business Machines # * Corporation and others. All Rights Reserved. # * # ******************************************************************************* # IMPORTANT NOTE # # This file is not read directly by ICU. If you change it, you need to # run gencnval, and eventually pkgdata to update the representation that # ICU uses for aliases. # This is an alias file used by the character set converter. # # Format: # # Actual file name || Algorithm name alias1 alias2 ... # # except for column 1 (file names) case insensitive. Names are separated # by whitespace. # # All names can be tagged by including a space-separated list of tags in # curly braces, as in ISO_8859-1:1987{IANA} iso-8859-1 { MIME } or # some-charset{MIME IANA}. The order of tags does not matter, and # whitespace is allowed between the tagged name and the tags list. # # The tags can be used to get standard names using ucnv_getStandardName(). # # Here is a list of tags used in this file: # # IANA The IANA charset name, as documented in RFC 1700. # MIME The MIME charset name, used for content type tagging. # The world is getting more complicated... # Supporting XML parsers, HTML, MIME, and similar applications # that mark encodings with unique charset names, we are forced to # make this table much more static than before. # It means that a new encoding, one that differs from an # old one by changing a code point, e.g., to the Euro sign, # must not get an old alias, because it would mean that # old files with this alias would be interpreted differently. # If an encoding gets updated by assigning characters to previously # unassigned code points, then a new name is not necessary. # Also, some codepages map unassigned codepage byte values # to the same numbers in Unicode for roundtripping. It may be # industry practice to keep the encoding name in such a case, too # (example: Windows codepages). # Especially, the aliases listed in the list of character sets # that is maintained by the IANA (http://www.iana.org/) must # not be changed to mean encodings different from what this # list shows. # Currently, the IANA list is at # http://www.isi.edu/in-notes/iana/assignments/character-sets # Name matching is case-insensitive. Also, dashes '-', underscores '_' # and spaces ' ' are ignored in names (thus cs-iso-latin-1 and csisolatin1 # are the same). # However, the names in the left column are directly file names # or names of algorithmic converters, and their case must not # be changed - or else code and/or file names must also be changed. # Fully algorithmic converters UTF-8 { MIME } ibm-1208 cp1208 UTF-16BE { MIME } UTF16_BigEndian x-utf-16be UTF-16LE { MIME } UTF16_LittleEndian x-utf-16le # The ICU UTF-16 converter uses the current platform's endianness. # It does not autodetect endianness from a BOM. UTF-16 { MIME } UTF16_PlatformEndian ISO-10646-UCS-2 { IANA } csUnicode ibm-17584 ibm-13488 ibm-1200 cp1200 ucs-2 UTF16_OppositeEndian UTF-32BE UTF32_BigEndian UTF-32LE UTF32_LittleEndian # The ICU UTF-32 converter uses the current platform's endianness. # It does not autodetect endianness from a BOM. UTF-32 { MIME } UTF32_PlatformEndian ISO-10646-UCS-4 { IANA } csUCS4 ucs-4 ibm-1232 UTF32_OppositeEndian UTF-7 { IANA MIME } # On UTF-7: # RFC 2152 (http://www.imc.org/rfc2152) allows to encode some US-ASCII # characters directly or in base64. Especially, the characters in set O # as defined in the RFC (!"#$%&*;<=>@[]^_`{|}) may be encoded directly but are not # allowed in, e.g., email headers. # By default, the ICU UTF-7 converter encodes set O directly. # By choosing the option "version=1", set O will be escaped instead. # For example: # utf7Converter=ucnv_open("UTF-7,version=1"); SCSU { IANA } BOCU-1 ISO-8859-1 { MIME } LATIN_1 ibm-819 cp819 latin1 8859-1 csisolatin1 iso-ir-100 ISO_8859-1:1987 { IANA } l1 ANSI_X3.110-1983 819 #!!!!! There's whole lot of names for this US-ASCII { MIME } ascii ascii-7 ANSI_X3.4-1968 { IANA } ANSI_X3.4-1986 ISO_646.irv:1991 iso646-us us csASCII 646 iso-ir-6 cp367 # Partially algorithmic converters ISO_2022 ISO-2022 { MIME } 2022 cp2022 ISO_2022,locale=ja,version=0 ISO-2022-JP { IANA MIME } csISO2022JP ISO_2022,locale=ja,version=1 ISO-2022-JP-1 JIS JIS_Encoding { IANA } ISO_2022,locale=ja,version=2 ISO-2022-JP-2 { IANA MIME } csISO2022JP2 ISO_2022,locale=ja,version=3 JIS7 csJISEncoding ISO_2022,locale=ja,version=4 JIS8 ISO_2022,locale=ko,version=0 ISO-2022-KR { IANA MIME } csISO2022KR ISO_2022,locale=ko,version=1 ibm-25546 ibm-25546_P100 25546 ISO_2022,locale=zh,version=0 ISO-2022-CN { MIME } csISO2022CN ISO_2022,locale=zh,version=1 ISO-2022-CN-EXT { MIME } HZ HZ-GB-2312 { MIME } LMBCS-1 lmbcs LMBCS-2 LMBCS-3 LMBCS-4 LMBCS-5 LMBCS-6 LMBCS-8 LMBCS-11 LMBCS-16 LMBCS-17 LMBCS-18 LMBCS-19 ISCII,version=0 iscii-dev x-iscii-de ISCII,version=1 iscii-bng x-iscii-as x-iscii-be ISCII,version=2 iscii-gur x-iscii-pa ISCII,version=3 iscii-guj x-iscii-gu ISCII,version=4 iscii-ori x-iscii-or ISCII,version=5 iscii-tml x-iscii-ta ISCII,version=6 iscii-tlg x-iscii-te ISCII,version=7 iscii-knd x-iscii-ka ISCII,version=8 iscii-mlm x-iscii-ma # Table-based ibm-367 # Special mapping for S/390 new line characters ebcdic-xml-us # Interchange codepages ibm-912 iso-8859-2 { MIME } cp912 latin2 8859-2 csisolatin2 iso-ir-101 ISO_8859-2:1987 { IANA } l2 912 # Central Europe ibm-913 iso-8859-3 { MIME } latin3 cp913 8859-3 csisolatin3 iso-ir-109 ISO_8859-3:1988 { IANA } l3 913 # Maltese Esperanto ibm-914 iso-8859-4 { MIME } latin4 cp914 8859-4 csisolatin4 iso-ir-110 ISO_8859-4:1988 { IANA } l4 914 # Baltic ibm-915 iso-8859-5 { MIME } cyrillic cp915 8859-5 csisolatincyrillic iso-ir-144 ISO_8859-5:1988 { IANA } 915 # Cyrillic ibm-1089 iso-8859-6 { MIME } arabic cp1089 8859-6 csisolatinarabic iso-ir-127 ISO_8859-6:1987 { IANA } ecma-114 asmo-708 1089 # Arabic ibm-4909 cp813 iso-8859-7 { MIME } greek greek8 elot_928 ecma-118 8859-7 csisolatingreek iso-ir-126 ISO_8859-7:1987 { IANA } 813 # ISO Greek (w/ euro update) ibm-813 # Same as 4909 (w/o euro update) ibm-916 iso-8859-8 { MIME } hebrew cp916 8859-8 csisolatinhebrew iso-ir-138 ISO_8859-8:1988 { IANA } 916 # hebrew iso-8859-8i - typo? ibm-920 iso-8859-9 { MIME } ECMA-128 latin5 cp920 8859-9 csisolatin5 iso-ir-148 ISO_8859-9:1989 { IANA } l5 920 # Turkish ibm-923 iso-8859-15 { MIME } latin9 cp923 8859-15 latin0 csisolatin0 iso8859_15_fdis csisolatin9 923 # Latin 9 ibm-1252 ibm-1004 cp1004 # Windows Latin 1 without Euro ibm-942_P120-2000 ibm-942_VASCII_VSUB_VPUA ibm-942 ibm-932 ibm-932_VASCII_VSUB_VPUA # Old s_jis ibm-932 added! ibm-942_P12A-2000 ibm-942_VSUB_VPUA shift_jis78 sjis78 ibm-932_VSUB_VPUA ibm-943_P130-2000 ibm-943_VASCII_VSUB_VPUA ibm-943 # japanese. Unicode name is \u30b7\u30d5\u30c8\u7b26\u53f7\u5316\u8868\u73fe Iana says that Windows-31J is an extension to csshiftjis ibm-932 removed ibm-943_P14A-2000 ibm-943_VSUB_VPUA Shift_JIS { MIME } csWindows31J sjis cp943 cp932 pck ms_kanji csshiftjis windows-31j x-sjis 943 ibm-949_P110-2000 ibm-949_VASCII_VSUB_VPUA ibm-949 ibm-949_P11A-2000 ibm-949_VSUB_VPUA KS_C_5601-1987 { IANA } iso-ir-149 KS_C_5601-1989 csKSC56011987 KSC_5601 { MIME } johab ks_x_1001:1992 949 ksc5601_1992 ksc5601_1987 # KSC-5601-1992, korean ibm-1370 Big5 { MIME } csBig5 x-big5 cp950 950 # Taiwan Big-5 (w/ euro update) ibm-950 # Taiwan Big-5 (w/o euro update) ibm-1386 gbk cp936 zh_cn # Chinese GBK removed ibm-33722_P120-2000 ibm-33722_VASCII_VPUA ibm-33722 cp33722 33722 ibm-5050 # Japan EUC with \ <-> Yen mapping ibm-33722_P12A-2000 ibm-33722_VPUA EUC-JP { MIME } ibm-eucJP eucjis extended_unix_code_packed_format_for_japanese cseucpkdfmtjapanese X-EUC-JP # Japan EUC. x-euc-jp is a MIME name ibm-970 EUC-KR { IANA MIME } ibm-eucKR csEUCKR # Korean EUC. x-euc-kr is a MIME name ibm-964 EUC-TW ibm-eucTW cns11643 # Taiwan EUC. x-euc-tw is a MIME name ibm-1383_P110-2000 ibm-1383_VPUA ibm-1383 EUC-CN ibm-eucCN GB_2312-80 { IANA } chinese gb iso-ir-58 csISO58GB231280 GB2312 { MIME } gb2312-1980 cp1383 1383 csGB2312# China EUC. x-euc-cn is a MIME name ibm-1162 tis-620 cp874 windows-874 ms874 cp9066 874 # Thai (w/ euro update) ibm-874 ibm-1161 # Same as 1162 (w/o euro update) # Platform codepages ibm-437 cp437 csPC8CodePage437 437 # PC US # HSYS: ibm-850 IBM850 { IANA } cp850 { MIME } 850 csPC850Multilingual # PC latin1 ibm-851 IBM851 { IANA } cp851 { MIME } 851 csPC851 # PC DOS Greek (w/o euro) ibm-858 cp858 { MIME } IBM00858 { IANA } # PC latin1 with Euro cp850 removed ibm-9044 852 csPCp852 cp852 # PC latin2 (w/ euro update) cp852 is a MIME name for IBM-852 ibm-852 IBM852 { IANA } # PC latin2 (w/o euro update) ibm-872 855 csIBM855 cp855 csPCp855 # PC cyrillic (w/ euro update) cp855 is a MIME name for IBM-855 ibm-855 IBM855 { IANA } # PC cyrillic (w/o euro update) ibm-856 cp856 { MIME } 856 # PC Hebrew (old) ibm-9049 857 csIBM857 cp857 { MIME } # PC Latin 5 (Turkish) (w/ euro update) ibm-857 IBM857 { IANA } # PC Latin 5 (w/o euro update) ibm-859 cp859 { MIME } # PC Latin 9 (w/ euro update) ibm-860 IBM860 { IANA } cp860 { MIME } 860 csIBM860 # PC Portugal ibm-861 IBM861 { IANA } cp861 { MIME } 861 cp-is csIBM861 # PC Iceland ibm-867 cp867 862 cp862 { MIME } cspc862latinhebrew # PC Hebrew (w/ euro update) ibm-862 IBM862 { IANA } # PC Hebrew (w/o euro update) ibm-863 IBM863 { IANA } cp863 { MIME } 863 csIBM863 # PC Canadian French ibm-17248 cp864 { MIME } csIBM864 # PC Arabic (w/ euro update) ibm-864 IBM864 { IANA } # PC Arabic (w/o euro update) ibm-865 IBM865 { IANA } cp865 { MIME } 865 csIBM865 # PC Nordic ibm-808 cp866 { MIME } 866 csIBM866 # PC Russian (w/ euro update) ibm-866 # PC Russian (w/o euro update) ibm-868 IBM868 { IANA } cp868 { MIME } cp-ar csIBM868 868 # PC Urdu ibm-9061 cp869 { MIME } 869 cp-gr csIBM869 # PC Greek (w/ euro update) ibm-869 IBM869 { IANA } # PC Greek (w/o euro update) ibm-878 KOI8-R { IANA MIME } cp878 koi8 cskoi8r # Russian internet ibm-901 cp921 { MIME } 921 # PC Baltic (w/ euro update) ibm-921 # PC Baltic (w/o euro update) ibm-902 cp922 { MIME } 922 # PC Estonian (w/ euro update) ibm-922 # PC Estonian (w/o euro update) #ibm-941 jis-208 jisx-208 # Pure DBCS jisx-208 # ibm-941 is not JISX 208 code page #ibm-1038 Adobe-Symbol-Encoding csHPPSMath symbol ibm-5346 windows-1250 { IANA } cp1250 # Windows Latin2 (w/ euro update) ibm-5347 windows-1251 { IANA } cp1251 # Windows Cyrillic (w/ euro update) ibm-5348 windows-1252 { IANA } cp1252 # Windows Latin1 (w/ euro update) ibm-5349 windows-1253 { IANA } cp1253 # Windows Greek (w/ euro update) ibm-5350 windows-1254 { IANA } cp1254 # Windows Turkish (w/ euro update) ibm-5351 windows-1255 { IANA } cp1255 # Windows Hebrew (w/ euro update) ibm-5352 windows-1256 { IANA } cp1256 # Windows Arabic (w/ euro update) ibm-5353 windows-1257 { IANA } cp1257 # Windows Baltic (w/ euro update) ibm-5354 windows-1258 { IANA } cp1258 # Windows Vietnamese (w/ euro update) ibm-1250 # Windows Latin2 (w/o euro update) ibm-1251 # Windows Cyrillic (w/o euro update) ibm-1253 # Windows Greek (w/o euro update) ibm-1254 # Windows Turkish (w/o euro update) ibm-1255 # Windows Hebrew (w/o euro update) ibm-1256 # Windows Arabic (w/o euro update) ibm-1257 # Windows Baltic (w/o euro update) ibm-1258 # Windows Vietnamese (w/o euro update) ibm-1275 macintosh { IANA } mac { MIME } csMacintosh # Apple latin 1 ibm-1276 Adobe-Standard-Encoding { IANA } csAdobeStandardEncoding # Different from ISO-Unicode-IBM-1276 (GCSGID: 1276) ibm-1277 Adobe-Latin1-Encoding ibm-1280 macgr # Apple Greek ibm-1281 mactr # Apple Turkish ibm-1282 macce # Apple Central Europe ibm-1283 maccy # Apple Cyrillic ibm-1051 hp-roman8 { IANA } roman8 r8 csHPRoman8 # HP Latin1 ibm-806_P100-2000 ibm-806 ibm-806_VSUB # PC ISCII-91: Indian Script Code ibm-1006_P100-2000 ibm-1006 ibm-1006_VPUA # Urdu ibm-1006_X100-2000 ibm-1006_STD # Urdu ibm-1098_P100-2000 ibm-1098 ibm-1098_VSUB_VPUA # Farsi ibm-1098_X100-2000 ibm-1098_VSUB # Farsi ibm-1124_P100-2000 ibm-1124 ibm-1124_STD # ISO Cyrillic Ukraine ibm-1125_P100-2000 ibm-1125 ibm-1125_VSUB # Cyrillic Ukraine PC ibm-1129_P100-2000 ibm-1129 ibm-1129_STD # ISO Vietnamese ibm-1131_P100-2000 ibm-1131 ibm-1131_VSUB # Cyrillic Belarus PC ibm-1133_P100-2000 ibm-1133 ibm-1133_STD # ISO Lao ibm-1381_P110-2000 ibm-1381 ibm-1381_VSUB_VPUA # S-Ch PC Data mixed (IBM GB) ibm-9066_P100-2000 ibm-9066 ibm-9066_VSUB # Thai PC # Added for more euro support ibm-849 cp1131 # PC Belarus (w/ euro update) ibm-848 cp1125 # PC Ukraine (w/ euro update) ibm-5104 cp1008 # 8-bit Arabic (w/ euro update) ibm-9238 cp1046 # PC Arabic Extended (w/ euro update) ibm-1363_P110-2000 ibm-1363 ibm-1363_VASCII_VSUB_VPUA ibm-1362 # Korean KSC Korean Windows MBCS ibm-1363_P11B-2000 ibm-1363_VSUB_VPUA windows-949 cp949 cp1363 ksc korean ibm-5210 cp1114 # PC SBCS Big-5 (w/ euro update) ibm-21427 cp947 # PC DBCS Big-5 (w/ euro update) # EBCDIC codepages according to the CDRA # without Euro ibm-37 IBM037 { IANA } ibm-037 cpibm37 ebcdic-cp-us ebcdic-cp-ca ebcdic-cp-wt ebcdic-cp-nl csIBM037 cp37 cp037 037 # EBCDIC US ibm-273 IBM273 { IANA } csIBM273 ebcdic-de cp273 cpibm273 273 # EBCDIC Germanay, Austria... ibm-277 IBM277 { IANA } EBCDIC-CP-DK EBCDIC-CP-NO csIBM277 ebcdic-dk cp277 cpibm277 277 # EBCDIC Denmark... ibm-278 IBM278 { IANA } ebcdic-cp-fi ebcdic-cp-se csIBM278 ebcdic-sv cp278 cpibm278 278 # EBCDIC Sweden ibm-280 IBM280 { IANA } ebcdic-cp-it csIBM280 cp280 cpibm280 280 # EBCDIC Italy ibm-284 IBM284 { IANA } ebcdic-cp-es csIBM284 cp284 cpibm284 284 # EBCDIC Spain ibm-285 IBM285 { IANA } ebcdic-cp-gb csIBM285 ebcdic-gb cp285 cpibm285 285 # EBCDIC UK Ireland ibm-290 IBM290 { IANA } EBCDIC-JP-kana csIBM290 cp290 # host SBCS (Katakana) ibm-297 IBM297 { IANA } ebcdic-cp-fr csIBM297 cp297 cpibm297 297 # EBCDIC France ibm-420 IBM420 { IANA } ebcdic-cp-ar1 csIBM420 cp420 420 ibm-424 IBM424 { IANA } ebcdic-cp-he csIBM424 cp424 424 ibm-500 IBM500 { IANA } cpibm500 csIBM500 cp500 ebcdic-cp-be ebcdic-cp-ch 500 # EBCDIC International Latin1 ibm-803 cp803 # Old EBCDIC Hebrew ibm-834 cp834 # Korean DBCS Host ibm-835 cp835 # DBCS T-Ch Host ibm-870_P100-2000 ibm-870 CP870 IBM870 { IANA } ibm-870_STD ebcdic-cp-roece ebcdic-cp-yu csIBM870 ibm-871 IBM871 { IANA } ebcdic-cp-is csIBM871 cpibm871 cp871 871 # EBCDIC Iceland ibm-875_P100-2000 ibm-875 cp875 ibm-875 875 ibm-875_STD ibm-918_P100-2000 ibm-918 CP918 IBM918 { IANA } ibm-918_VPUA ebcdic-cp-ar2 csIBM918 ibm-918_X100-2000 ibm-918_STD ibm-930 cp930 cpibm930 930 # Japan EBCDIC MIXED ibm-933 cp933 cpibm933 933 # Korea EBCDIC MIXED ibm-935 cp935 cpibm935 935 # China EBCDIC MIXED ibm-937 cp937 cpibm937 937 # Taiwan EBCDIC MIXED ibm-939 cp939 939 # Host MBCS (Latin-Kanji) EBCDIC ibm-1025_P100-2000 ibm-1025 ibm-1025_STD ibm-1026_P100-2000 ibm-1026 CP1026 IBM1026 { IANA } csIBM1026 ibm-1026_STD ibm-1047 cpibm1047 # EBCDIC Open systems Latin1 ibm-1097_P100-2000 ibm-1097 ibm-1097_VPUA ibm-1097_X100-2000 ibm-1097_STD ibm-1112_P100-2000 ibm-1112 cp1112 1112 ibm-1112_STD ibm-1122_P100-2000 ibm-1122 cp1122 ibm-1122 1122 ibm-1122_STD ibm-1130_P100-2000 ibm-1130 ibm-1130_STD ibm-1132_P100-2000 ibm-1132 ibm-1132_STD ibm-1137_P100-2000 ibm-1137 ibm-1137_STD ibm-1388_P103-2001 ibm-1388 # S-Ch DBCS-Host Data GBK mixed MBCS ibm-9030_P100-2000 ibm-9030 ibm-9030_STD #ibm-1046 # PC Arabic without EURO # with Euro ibm-1123 cpibm1123 # EBCDIC Cyrillic Ukraine ibm-1140 cpibm1140 IBM01140 { IANA } # EBCDIC US... ibm-1141 cpibm1141 IBM01141 { IANA } # EBCDIC Germanay, Austria... ibm-1142 cpibm1142 IBM01142 { IANA } # EBCDIC Denmark... ibm-1143 cpibm1143 IBM01143 { IANA } # EBCDIC Sweden ibm-1144 cpibm1144 # EBCDIC Italy ibm-1145 cpibm1145 # EBCDIC Spain ibm-1146 cpibm1146 # EBCDIC UK Ireland ibm-1147 cpibm1147 # EBCDIC France ibm-1148 cpibm1148 # EBCDIC International Latin1 ibm-1149 cpibm1149 ebcdic-is # EBCDIC Iceland ibm-1153 cpibm1153 # EBCDIC latin 2 ibm-1154 cp1025 cpibm1154 # EBCDIC Cyrillic Multilingual ibm-1155 cpibm1155 # EBCDIC Turkey ibm-1156 cpibm1156 # EBCDIC Baltic Multilingual ibm-1157 cpibm1157 # EBCDIC Estonia ibm-1158 cp1123 cpibm1158 1123 # EBCDIC Cyrillic Ukraine ibm-1159 cp28709 # SBCS T-Ch Host ibm-1160 cp9030 cpibm1160 # EBCDIC Thailand ibm-1164 cp1130 cpibm1164 # EBCDIC Viet Nam ibm-1364_P110-2000 ibm-1364_VPUA ibm-1364 cp1364 # Korean Host Mixed ibm-1371 cpibm1371 # Taiwan EBCDIC MIXED ibm-1390 cpibm1390 # Japan EBCDIC MIXED ibm-1399 # Host MBCS (Latin-Kanji) ibm-4899 cpibm4899 # Old EBCDIC Hebrew ibm-4971 cpibm4971 # EBCDIC Greek ibm-5123 cp1027 # Host Roman Jis ibm-8482 # host SBCS (Katakana) ibm-9027 # DBCS T-Ch Host ibm-12712 cpibm12712 ebcdic-he # EBCDIC Hebrew (new sheqel, control charaters update) ibm-16684 cp300 # Jis + Roman Jis Host ibm-16804 cpibm16804 ebcdic-ar # EBCDIC Arabic # unsupported IANA names # ebcdic-it csEBCDICIT # ebcdic-es csEBCDICES # csEBCDICFR ebcdic-fr # ibm-274 IBM274 { IANA } cp274 csIBM274 ebcdic-be # ibm-870 IBM870 { IANA } ebcdic-cp-roece ebcdic-cp-yu csIBM870 cp870 870 # EBCDIC codepages for S/390, with LF and NL codes swapped # without Euro ibm-37-s390 ibm037-s390 # EBCDIC US ibm-1047-s390 # EBCDIC for S/390 Open Edition # with Euro ibm-1140-s390 # EBCDIC US ibm-1142-s390 # EBCDIC Denmark ibm-1143-s390 # EBCDIC Sweden ibm-1144-s390 # EBCDIC Italy ibm-1145-s390 # EBCDIC Spain ibm-1146-s390 # EBCDIC UK Ireland ibm-1147-s390 # EBCDIC France ibm-1148-s390 # EBCDIC International Latin1 ibm-1149-s390 # EBCDIC Iceland ibm-1153-s390 # EBCDIC latin 2 ibm-12712-s390 # EBCDIC Hebrew ibm-16804-s390 # EBCDIC Arabic # GB 18030 is partly algorithmic, using the MBCS converter gb18030 ibm-1392