c63531c7e1
Added alias support for ISO 2022-JP X-SVN-Rev: 2240
284 lines
14 KiB
Plaintext
284 lines
14 KiB
Plaintext
# *******************************************************************************
|
|
# *
|
|
# * Copyright (C) 1995-2000, International Business Machines
|
|
# * Corporation and others. All Rights Reserved.
|
|
# *
|
|
# *******************************************************************************
|
|
|
|
# IMPORTANT NOTE
|
|
#
|
|
# This file is not read directly by ICU. If you change it, you need to
|
|
# run gencnval, and evantually pkgdata to update the representation that
|
|
# ICU uses for aliases.
|
|
|
|
# This is an alias file used by the character set converter.
|
|
#
|
|
# Format:
|
|
#
|
|
# Actual file name || Algorithm name alias1 alias2 ...
|
|
#
|
|
# except for column 1 (file names) case insensitive
|
|
#
|
|
# All names can be tagged by including a comma-separated list of tags in
|
|
# curly braces, as in ISO_8859-1:1987{IANA} iso-8859-1 { MIME } or
|
|
# some-charset{MIME IANA}. The order of tags does not matter, and
|
|
# whitespace is allowed between the tagged name and the tags list.
|
|
#
|
|
# The tags can be used to get standard names using ucnv_getStandardName().
|
|
#
|
|
# Here is a list of tags used in this file:
|
|
#
|
|
# IANA The IANA charset name, as documented in RFC 1700.
|
|
# MIME The MIME charset name, used for content type tagging.
|
|
|
|
# The world is getting more complicated...
|
|
# Supporting XML parsers, HTML, MIME, and similar applications
|
|
# that mark encodings with unique charset names, we are forced to
|
|
# make this table much more static than before.
|
|
|
|
# It means that a new encoding, one that differs from an
|
|
# old one by changing a code point, e.g., to the Euro sign,
|
|
# must not get an old alias, because it would mean that
|
|
# old files with this alias would be interpreted differently.
|
|
|
|
# If an encoding gets updated by assigning characters to previously
|
|
# unassigned code points, then a new name is not necessary.
|
|
# Also, some codepages map unassigned codepage byte values
|
|
# to the same numbers in Unicode for roundtripping. It may be
|
|
# industry practice to keep the encoding name in such a case, too
|
|
# (example: Windows codepages).
|
|
|
|
# Especially, the aliases listed in the list of character sets
|
|
# that is maintained by the IANA (http://www.iana.org/) must
|
|
# not be changed to mean encodings different from what this
|
|
# list shows.
|
|
# Currently, the IANA list is at
|
|
# http://www.isi.edu/in-notes/iana/assignments/character-sets
|
|
|
|
# Name matching is case-insensitive. Also, dashes '-', underscores '_'
|
|
# and spaces ' ' are ignored in names (thus cs-iso-latin-1 and csisolatin1
|
|
# are the same).
|
|
# However, the names in the left column are directly file names
|
|
# or names of algorithmic converters, and their case must not
|
|
# be changed - or else code and/or file names must also be changed.
|
|
|
|
# Algorithmic
|
|
UTF8 utf-8 { MIME } ibm-1208 cp1208
|
|
UTF16_BigEndian utf-16be { MIME }
|
|
UTF16_LittleEndian { MIME } utf-16le { MIME }
|
|
UTF16_PlatformEndian { MIME } ISO-10646-UCS-2 { IANA } csUnicode utf-16 { MIME } ibm-1200 cp1200 ucs-2
|
|
UTF16_OppositeEndian
|
|
LATIN_1 iso-8859-1 { MIME } ibm-819 cp819 latin1 8859-1 csisolatin1 iso-ir-100 cp367 ISO_8859-1:1987 { IANA } l1 ANSI_X3.110-1983 #!!!!! There's whole lot of names for this
|
|
ISO_2022 iso-2022 { MIME } 2022 cp2022
|
|
ISO_2022,locale=jp { MIME } iso-2022-jp
|
|
LMBCS-1 lmbcs
|
|
LMBCS-2
|
|
LMBCS-3
|
|
LMBCS-4
|
|
LMBCS-5
|
|
LMBCS-6
|
|
LMBCS-8
|
|
LMBCS-11
|
|
LMBCS-16
|
|
LMBCS-17
|
|
LMBCS-18
|
|
LMBCS-19
|
|
|
|
# Table-based
|
|
|
|
ibm-367 us-ascii { MIME } ascii ascii-7 US-ASCII ANSI_X3.4-1968 { IANA } ANSI_X3.4-1986 ISO_646.irv:1991 iso646-us us csASCII 646
|
|
|
|
# Special mapping for S/390 new line characters
|
|
ebcdic-xml-us
|
|
|
|
# Interchange codepages
|
|
ibm-912 iso-8859-2 { MIME } cp912 latin2 8859-2 csisolatin2 iso-ir-101 ISO_8859-2:1987 { IANA } l2 # Central Europe
|
|
ibm-913 iso-8859-3 { MIME } latin3 cp913 8859-3 csisolatin3 iso-ir-109 ISO_8859-3:1988 { IANA } l3 # Maltese Esperanto
|
|
ibm-914 iso-8859-4 { MIME } latin4 cp914 8859-4 csisolatin4 iso-ir-110 ISO_8859-4:1988 { IANA } l4 # Baltic
|
|
ibm-915 iso-8859-5 { MIME } cyrillic cp915 8859-5 csisolatincyrillic iso-ir-144 ISO_8859-5:1988 { IANA } # Cyrillic
|
|
ibm-1089 iso-8859-6 { MIME } arabic cp1089 8859-6 csisolatinarabic iso-ir-127 ISO_8859-6:1987 { IANA } ecma-114 asmo-708 # Arabic
|
|
ibm-4909 cp813 iso-8859-7 { MIME } greek greek8 elot_928 ecma-118 8859-7 csisolatingreek iso-ir-126 ISO_8859-7:1987 { IANA } # ISO Greek (w/ euro update)
|
|
ibm-813 # Same as 4909 (w/o euro update)
|
|
ibm-916 iso-8859-8 { MIME } hebrew cp916 8859-8 csisolatinhebrew iso-ir-138 ISO_8859-8:1988 { IANA } # hebrew iso-8859-8i - typo?
|
|
ibm-920 iso-8859-9 { MIME } ECMA-128 latin5 cp920 8859-9 csisolatin5 iso-ir-148 ISO_8859-9:1989 { IANA } l5 # Turkish
|
|
ibm-923 iso-8859-15 { MIME } latin9 cp923 8859-15 latin0 csisolatin0 csisolatin9 # Latin 9
|
|
ibm-1252 windows-1252 { MIME } cp1252 ibm-1004 cp1004 # Windows Latin 1 We don't have an ibm-5348, so this is a best possible match
|
|
ibm-943 Shift_JIS { MIME } csWindows31J sjis cp943 cp932 ms_kanji csshiftjis windows-31j x-sjis # japanese. Unicode name is \u30b7\u30d5\u30c8\u7b26\u53f7\u5316\u8868\u73fe Iana says that Windows-31J is an extension to csshiftjis ibm-932 removed
|
|
ibm-949 KS_C_5601-1987 { IANA } iso-ir-149 KS_C_5601-1989 csKSC56011987 ksc-5601 { MIME } johab ks_x_1001:1992 # KSC-5601-1992, korean
|
|
ibm-1370 big5 { MIME } csBig5 x-big5 cp950 # Taiwan Big-5 (w/ euro update)
|
|
ibm-950 # Taiwan Big-5 (w/o euro update)
|
|
ibm-1386 GB_2312-80 { IANA } iso-ir-58 csISO58GB231280 gbk chinese gb { MIME } gb2312 gb2312-1980 cp936 zh_cn # Chinese GBK removed
|
|
ibm-33722 EUC-JP { MIME } ibm-eucJP eucjis extended_unix_code_packed_format_for_japanese cseuckdfmtjapanese X-EUC-JP # Japan EUC
|
|
ibm-970 EUC-KR { IANA MIME } ibm-eucKR csEUCKR # Korean EUC
|
|
ibm-964 EUC-TW { MIME } ibm-eucTW cns11643 # Taiwan EUC
|
|
ibm-1383 EUC-CN { MIME } ibm-eucCN # China EUC
|
|
ibm-1162 tis-620 cp874 windows-874 { MIME } ms874 cp9066 # Thai (w/ euro update)
|
|
ibm-874 ibm-1161 # Same as 1162 (w/o euro update)
|
|
|
|
lmb-excp # Special exceptions list for LMBCS algorithm
|
|
|
|
# Platform codepages
|
|
ibm-437 cp437 csPC8CodePage437 437 # PC US
|
|
# HSYS:
|
|
ibm-850 IBM850 { IANA } cp850 { MIME } 850 csPC850Multilingual # PC latin1
|
|
ibm-851 IBM851 { IANA } cp851 { MIME } 851 csPC851 # PC DOS Greek (w/o euro)
|
|
ibm-858 cp858 { MIME } # PC latin1 with Euro cp850 removed
|
|
ibm-9044 852 csPCp852 cp852 { MIME } # PC latin2 (w/ euro update)
|
|
ibm-852 IBM852 { IANA } # PC latin2 (w/o euro update)
|
|
ibm-872 855 csIBM855 cp855 { MIME } # PC cyrillic (w/ euro update)
|
|
ibm-855 IBM855 { IANA } # PC cyrillic (w/o euro update)
|
|
ibm-856 cp856 { MIME } # PC Hebrew (old)
|
|
ibm-9049 857 csIBM857 cp857 { MIME } # PC Latin 5 (Turkish) (w/ euro update)
|
|
ibm-857 IBM857 { IANA } # PC Latin 5 (w/o euro update)
|
|
ibm-859 cp859 { MIME } # PC Latin 9 (w/ euro update)
|
|
ibm-860 IBM860 { IANA } cp860 { MIME } 860 csIBM860 # PC Portugal
|
|
ibm-861 IBM861 { IANA } cp861 { MIME } 861 cp-is csIBM861 # PC Iceland
|
|
ibm-867 cp867 862 cp862 { MIME } cspc862latinhebrew # PC Hebrew (w/ euro update)
|
|
ibm-862 IBM862 { IANA } # PC Hebrew (w/o euro update)
|
|
ibm-863 IBM863 { IANA } cp863 { MIME } 863 csIBM863 # PC Canadian French
|
|
ibm-17248 cp864 { MIME } csIBM864 # PC Arabic (w/ euro update)
|
|
ibm-864 IBM864 { IANA } # PC Arabic (w/o euro update)
|
|
ibm-865 IBM865 { IANA } cp865 { MIME } 865 csIBM865 # PC Nordic
|
|
ibm-808 cp866 { MIME } 866 csIBM866 # PC Russian (w/ euro update)
|
|
ibm-866 # PC Russian (w/o euro update)
|
|
ibm-868 IBM868 { IANA } cp868 { MIME } cp-ar csIBM868 # PC Urdu
|
|
ibm-9061 cp869 { MIME } 869 cp-gr csIBM869 # PC Greek (w/ euro update)
|
|
ibm-869 IBM869 { IANA } # PC Greek (w/o euro update)
|
|
ibm-878 KOI8-R { IANA MIME } cp878 koi8 cskoi8r # Russian internet
|
|
ibm-901 cp921 { MIME } # PC Baltic (w/ euro update)
|
|
ibm-921 # PC Baltic (w/o euro update)
|
|
ibm-902 cp922 { MIME } # PC Estonian (w/ euro update)
|
|
ibm-922 # PC Estonian (w/o euro update)
|
|
ibm-942 shift_jis78 sjis78 ibm-932 # Old s_jis ibm-932 added!
|
|
#ibm-941 jis-208 jisx-208 # Pure DBCS jisx-208# ibm-941 is not JISX 208 code page
|
|
ibm-1038 Adobe-Symbol-Encoding csHPPSMath symbol
|
|
ibm-5346 windows-1250 { MIME } cp1250 # Windows Latin2 (w/ euro update)
|
|
ibm-5347 windows-1251 { MIME } cp1251 # Windows Cyrillic (w/ euro update)
|
|
ibm-5349 windows-1253 { MIME } cp1253 # Windows Greek (w/ euro update)
|
|
ibm-5350 windows-1254 { MIME } cp1254 # Windows Turkish (w/ euro update)
|
|
ibm-5351 windows-1255 { MIME } cp1255 # Windows Hebrew (w/ euro update)
|
|
ibm-5352 windows-1256 { MIME } cp1256 # Windows Arabic (w/ euro update)
|
|
ibm-5353 windows-1257 { MIME } cp1257 # Windows Baltic (w/ euro update)
|
|
ibm-5354 windows-1258 { MIME } cp1258 # Windows Vietnamese (w/ euro update)
|
|
ibm-1250 # Windows Latin2 (w/o euro update)
|
|
ibm-1251 # Windows Cyrillic (w/o euro update)
|
|
ibm-1253 # Windows Greek (w/o euro update)
|
|
ibm-1254 # Windows Turkish (w/o euro update)
|
|
ibm-1255 # Windows Hebrew (w/o euro update)
|
|
ibm-1256 # Windows Arabic (w/o euro update)
|
|
ibm-1257 # Windows Baltic (w/o euro update)
|
|
ibm-1258 # Windows Vietnamese (w/o euro update)
|
|
|
|
ibm-1275 macintosh { IANA } mac { MIME } csMacintosh # Apple latin 1
|
|
ibm-1276 Adobe-Standard-Encoding csAdobeStandardEncoding
|
|
ibm-1277 Adobe-Latin1-Encoding
|
|
ibm-1280 macgr # Apple Greek
|
|
ibm-1281 mactr # Apple Turkish
|
|
ibm-1282 macce # Apple Central Europe
|
|
ibm-1283 maccy # Apple Cyrillic
|
|
|
|
ibm-1051 hp-roman8 roman8 r8 csHPRoman8 # HP Latin1
|
|
|
|
ibm-1388 #S-Ch DBCS-Host Data GBK mixed MBCS
|
|
|
|
# Added for more euro support
|
|
|
|
ibm-849 cp1131 # PC Belarus (w/ euro update)
|
|
ibm-848 cp1125 # PC Ukraine (w/ euro update)
|
|
ibm-5104 cp1008 # 8-bit Arabic (w/ euro update)
|
|
ibm-9238 cp1046 # PC Arabic Extended (w/ euro update)
|
|
ibm-1362 cp1362 # Korean Windows DBCS (w/ euro update)
|
|
ibm-1363 cp1363 cp949 ksc korean # Korean KSC Korean Windows MBCS (w/ euro update)
|
|
ibm-5210 cp1114 # PC SBCS Big-5 (w/ euro update)
|
|
ibm-21427 cp947 # PC DBCS Big-5 (w/ euro update)
|
|
|
|
# EBCDIC codepages according to the CDRA
|
|
|
|
# without Euro
|
|
ibm-37 IBM037 { IANA } ibm-037 cpibm37 ebcdic-cp-us ebcdic-cp-ca ebcdic-cp-wt ebcdic-cp-nl csIBM037 cp37 ebcdic-cp-us cp037 # EBCDIC US
|
|
ibm-273 IBM273 { IANA } csIBM273 ebcdic-de cp273 cpibm273 # EBCDIC Germanay, Austria...
|
|
ibm-277 IBM277 { IANA } EBCDIC-CP-DK EBCDIC-CP-NO csIBM277 ebcdic-dk cp277 cpibm277 # EBCDIC Denmark...
|
|
ibm-278 IBM278 { IANA } ebcdic-cp-fi ebcdic-cp-se csIBM278 ebcdic-sv cp278 cpibm278 # EBCDIC Sweden
|
|
ibm-280 IBM280 { IANA } ebcdic-cp-it csIBM280 cp280 cpibm280 # EBCDIC Italy
|
|
ibm-284 IBM284 { IANA } ebcdic-cp-es csIBM284 cp284 cpibm284 # EBCDIC Spain
|
|
ibm-285 IBM285 { IANA } ebcdic-cp-gb csIBM285 ebcdic-gb cp285 cpibm285 # EBCDIC UK Ireland
|
|
ibm-290 IBM290 { IANA } EBCDIC-JP-kana csIBM290 cp290 # host SBCS (Katakana)
|
|
ibm-297 IBM297 { IANA } ebcdic-cp-fr csIBM297 cp297 cpibm297 # EBCDIC France
|
|
ibm-420 IBM420 { IANA } ebcdic-cp-ar1 csIBM420 cp420
|
|
ibm-424 IBM424 { IANA } ebcdic-cp-he csIBM424 cp424
|
|
ibm-500 IBM500 { IANA } cpibm500 csIBM500 cp500 ebcdic-cp-be ebcdic-cp-ch # EBCDIC International Latin1
|
|
ibm-803 cp803 # Old EBCDIC Hebrew
|
|
ibm-834 cp834 # Korean DBCS Host
|
|
ibm-835 cp835 # DBCS T-Ch Host
|
|
ibm-871 IBM871 { IANA } ebcdic-cp-is csIBM871 cpibm871 cp871 # EBCDIC Iceland
|
|
ibm-930 cp930 cpibm930 # Japan EBCDIC MIXED
|
|
ibm-933 cp933 cpibm933 # Korea EBCDIC MIXED
|
|
ibm-935 cp935 cpibm935 # China EBCDIC MIXED
|
|
ibm-937 cp937 cpibm937 # Taiwan EBCDIC MIXED
|
|
ibm-939 cp939 # Host MBCS (Latin-Kanji)
|
|
#ibm-1046 # PC Arabic without EURO
|
|
# with Euro
|
|
ibm-1390 cpibm1390 # Japan EBCDIC MIXED
|
|
ibm-1371 cpibm1371 # Taiwan EBCDIC MIXED
|
|
ibm-1047 cpibm1047 # EBCDIC Open systems Latin1
|
|
ibm-1123 cpibm1123 # Cyrillic Ukraine EBCDIC
|
|
ibm-1140 cpibm1140 # EBCDIC US...
|
|
ibm-1141 cpibm1141 # EBCDIC Germanay, Austria...
|
|
ibm-1142 cpibm1142 # EBCDIC Denmark...
|
|
ibm-1143 cpibm1143 # EBCDIC Sweden
|
|
ibm-1144 cpibm1144 # EBCDIC Italy
|
|
ibm-1145 cpibm1145 # EBCDIC Spain
|
|
ibm-1146 cpibm1146 # EBCDIC UK Ireland
|
|
ibm-1147 cpibm1147 # EBCDIC France
|
|
ibm-1148 cpibm1148 # EBCDIC International Latin1
|
|
ibm-1149 cpibm1149 ebcdic-is # EBCDIC Iceland
|
|
ibm-1153 cpibm1153 # EBCDIC latin 2
|
|
ibm-1154 cp1025 cpibm1154 # EBCDIC Cyrillic Multilingual
|
|
ibm-1155 IBM1026 { IANA } csIBM1026 cp1026 cpibm1155 # EBCDIC Turkey
|
|
ibm-1156 cp1112 cpibm1156 # EBCDIC Baltic Multilingual
|
|
ibm-1157 cp1122 cpibm1157 # EBCDIC Estonia
|
|
ibm-1158 cp1123 cpibm1158 # Cyrillic Ukraine EBCDIC
|
|
ibm-1159 cp28709 # SBCS T-Ch Host
|
|
ibm-1160 cp9030 cpibm1160 # EBCDIC Thailand
|
|
ibm-1164 cp1130 cpibm1164 # EBCDIC Viet Nam
|
|
|
|
ibm-1399 # Host MBCS (Latin-Kanji)
|
|
ibm-4930 cp4930 # Korean DBCS Host
|
|
ibm-1364 cp1364 # Korean Host Mixed
|
|
ibm-8482 # host SBCS (Katakana)
|
|
ibm-4899 cpibm4899 # Old EBCDIC Hebrew
|
|
ibm-4971 cp875 cpibm4971 # EBCDIC Greek
|
|
ibm-9027 # DBCS T-Ch Host
|
|
ibm-5123 cp1027 # Host Roman Jis
|
|
ibm-12712 cpibm12712 ebcdic-he # EBCDIC Hebrew (new sheqel, control charaters update)
|
|
ibm-16684 cp300 # Jis + Roman Jis Host
|
|
ibm-16804 cpibm16804 ebcdic-ar # EBCDIC Arabic
|
|
|
|
# unsupported IANA names
|
|
# ebcdic-it csEBCDICIT
|
|
# ebcdic-es csEBCDICES
|
|
# csEBCDICFR ebcdic-fr
|
|
# ibm-274 IBM274 { IANA } cp274 csIBM274 ebcdic-be
|
|
# ibm-870 IBM870 { IANA } ebcdic-cp-roece ebcdic-cp-yu csIBM870 cp870
|
|
|
|
# EBCDIC codepages for S/390, with LF and NL codes swapped
|
|
|
|
# without Euro
|
|
ibm-37-s390 ibm037-s390 # EBCDIC US
|
|
ibm-1047-s390 # EBCDIC for S/390 Open Edition
|
|
|
|
# with Euro
|
|
ibm-1140-s390 # EBCDIC US
|
|
ibm-1142-s390 # EBCDIC Denmark
|
|
ibm-1143-s390 # EBCDIC Sweden
|
|
ibm-1144-s390 # EBCDIC Italy
|
|
ibm-1145-s390 # EBCDIC Spain
|
|
ibm-1146-s390 # EBCDIC UK Ireland
|
|
ibm-1147-s390 # EBCDIC France
|
|
ibm-1148-s390 # EBCDIC International Latin1
|
|
ibm-1149-s390 # EBCDIC Iceland
|
|
ibm-1153-s390 # EBCDIC latin 2
|
|
ibm-12712-s390 # EBCDIC Hebrew
|
|
ibm-16804-s390 # EBCDIC Arabic
|
|
|