ICU-868 New alias scheme.

X-SVN-Rev: 8975
This commit is contained in:
George Rhoten 2002-06-28 23:13:30 +00:00
parent a79775fe45
commit 70debd215f
11 changed files with 1339 additions and 661 deletions

View File

@ -1724,7 +1724,9 @@ _uErrorInfoName[U_ERROR_WARNING_LIMIT-U_ERROR_WARNING_START]={
"U_USING_DEFAULT_WARNING",
"U_SAFECLONE_ALLOCATED_WARNING",
"U_STATE_OLD_WARNING",
"U_STRING_NOT_TERMINATED_WARNING"
"U_STRING_NOT_TERMINATED_WARNING",
"U_SORT_KEY_TOO_SHORT_WARNING",
"U_AMBIGUOUS_ALIAS_WARNING"
};
static const char * const

View File

@ -283,8 +283,7 @@ ucnv_countAvailable ()
U_CAPI uint16_t U_EXPORT2
ucnv_countAliases(const char *alias, UErrorCode *pErrorCode)
{
const char *p;
return ucnv_io_getAliases(alias, &p, pErrorCode);
return ucnv_io_countAliases(alias, pErrorCode);
}
@ -297,14 +296,7 @@ ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode)
U_CAPI void U_EXPORT2
ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode)
{
const char *p;
uint16_t count=ucnv_io_getAliases(alias, &p, pErrorCode);
while(count>0) {
*aliases++=p;
/* skip a name, first the canonical converter name */
p+=uprv_strlen(p)+1;
--count;
}
ucnv_io_getAliases(alias, 0, aliases, pErrorCode);
}
U_CAPI uint16_t U_EXPORT2

View File

@ -31,67 +31,140 @@
#include "unicode/udata.h"
#include "ucln_cmn.h"
/* Format of cnvalias.dat ------------------------------------------------------
/* Format of cnvalias.icu -----------------------------------------------------
*
* cnvalias.dat is a binary, memory-mappable form of convrtrs.txt .
* It contains two sorted tables and a block of zero-terminated strings.
* Each table is preceded by the number of table entries.
* cnvalias.dat is a binary, memory-mappable form of convrtrs.txt.
* This binary form contains several tables. All indexes are to uint16_t
* units, and not to the bytes (uint8_t units). Addressing everything on
* 16-bit boundaries allows us to store more information with small index
* numbers, which are also 16-bit in size. The majority of the table (except
* the string table) are 16-bit numbers.
*
* The first table maps from aliases to converter indexes.
* The converter names themselves are listed as aliases in this table.
* Each entry in this table has an offset to the alias and
* an index of the converter in the converter table.
* First there is the size of the Table of Contents (TOC). The TOC
* entries contain the size of each section. In order to find the offset
* you just need to sum up the previous offsets.
*
* The second table lists only the converters themselves.
* Each entry in this table has an offset to the converter name and
* the number of aliases, including the converter itself.
* A count of 1 means that there is no alias, only the converter name.
* 1) This section contains a list of converters. This list contains indexes
* into the string table for the converter name. The index of this list is
* also used by other sections, which are mentioned later on.
*
* In the block of strings after the tables, each converter name is directly
* followed by its aliases. All offsets to strings are offsets from the
* beginning of the data.
* 2) This section contains a list of tags. This list contains indexes
* into the string table for the tag name. The index of this list is
* also used by other sections, which are mentioned later on.
*
* More formal file data structure (data format 2.1):
* 3) This section contains a list of sorted list of unique aliases. This
* list contains indexes into the string table for the alias name. The
* index of this list is also used by other sections, which are mentioned
* later on.
*
* uint16_t aliasCount;
* uint16_t aliasOffsets[aliasCount];
* uint16_t converterIndexes[aliasCount];
* 4) This section contains a list of mapped converter names. Consider this
* as a table that maps the 3rd section to the 1st section. This list contains
* indexes into the 1st section. The index of this list is the same index in
* the 3rd section. There is also some extra information in the high bits of
* each converter index in this table. Currently it's only used to say that
* an alias mapped to this converter is ambiguous. See UCNV_CONVERTER_INDEX_MASK
* and UCNV_AMBIGUOUS_ALIAS_MAP_BIT for more information. This section is
* the predigested form of the 5th section so that an alias lookup can be fast.
*
* 5) This section contains a 2D array with indexes to the 6th section. This
* section is the full form of all alias mappings. The column index is the
* index into the converter list (column header). The row index is the index
* to tag list (row header). This 2D array is the top part a 3D array. The
* third dimension is in the 6th section.
*
* uint16_t converterCount;
* struct {
* uint16_t converterOffset;
* uint16_t aliasCount;
* } converters[converterCount];
* 6) This is blob of variable length arrays. Each array starts with a size,
* and is followed by indexes to alias names in the string table. This is
* the third dimension to the section 5. No other section should be referencing
* this section.
*
* uint16_t tagCount;
* uint16_t taggedAliasesOffsets[tagCount][converterCount];
* char tags[] = { "Tag0\Tag1\0..." };
* 7) Reserved at this time (There is no information). This _usually_ has a
* size of 0. Future versions may add more information here.
*
* char strings[]={
* "Converter0\0Alias1\0Alias2\0...Converter1\0Converter2\0Alias0\Alias1\0..."
* };
* 8) This is the string table. All strings are indexed on an even address.
* There are two reasons for this. First many chip architectures locate strings
* faster on even address boundaries. Second, since all indexes are 16-bit
* numbers, this string table can be 128KB in size instead of 64KB when we
* only have strings starting on an even address.
*
* The code included here can read versions 2 and 2.1 of the data format.
* Version 2 does not have tag information, but since the code never refers
* to strings[] by its base offset, it's okay.
*
* Here is the concept of section 5 and 6. It's a 3D cube. Each tag
* has a unique alias among all converters. That same alias can
* be mentioned in other standards on different converters,
* but only one alias per tag can be unique.
*
*
* Converter Names (Usually in TR22 form)
* -------------------------------------------.
* T / /|
* a / / |
* g / / |
* s / / |
* / / |
* ------------------------------------------/ |
* A | | |
* l | | |
* i | | /
* a | | /
* s | | /
* e | | /
* s | |/
* -------------------------------------------
*
*
*
* Here is what it really looks like. It's like swiss cheese.
* There are holes. Some converters aren't recognized by
* a standard, or they are really old converters that the
* standard doesn't recognize anymore.
*
* Converter Names (Usually in TR22 form)
* -------------------------------------------.
* T /##########################################/|
* a / # # /#
* g / # ## ## ### # ### ### ### #/
* s / # ##### #### ## ## #/#
* / ### # # ## # # # ### # # #/##
* ------------------------------------------/# #
* A |### # # ## # # # ### # # #|# #
* l |# # # # # ## # #|# #
* i |# # # # # # #|#
* a |# #|#
* s | #|#
* e
* s
*
*/
static const char DATA_NAME[] = "cnvalias";
static const char DATA_TYPE[] = "dat";
static const char DATA_TYPE[] = "icu";
static UDataMemory *aliasData=NULL;
static const uint16_t *aliasTable=NULL;
static const uint16_t *converterList = NULL;
static const uint16_t *tagList = NULL;
static const uint16_t *aliasList = NULL;
static const uint16_t *untaggedConvArray = NULL;
static const uint16_t *taggedAliasArray = NULL;
static const uint16_t *taggedAliasLists = NULL;
static const uint16_t *stringTable = NULL;
static uint32_t converterListNum;
static uint32_t tagListNum;
static uint32_t aliasListNum;
static uint32_t untaggedConvArraySize;
static uint32_t taggedAliasArraySize;
static uint32_t taggedAliasListsSize;
static uint32_t stringTableSize;
static const char **availableConverters = NULL;
static uint16_t availableConverterCount = 0;
static const uint16_t *converterTable = NULL;
static const uint16_t *tagTable = NULL;
static char defaultConverterNameBuffer[100];
static char defaultConverterNameBuffer[UCNV_MAX_CONVERTER_NAME_LENGTH + 1]; /* +1 for NULL */
static const char *defaultConverterName = NULL;
#define GET_STRING(idx) (const char *)(stringTable + (idx))
#define NUM_RESERVED_TAGS 2
static UBool
isAcceptable(void *context,
const char *type, const char *name,
@ -104,7 +177,7 @@ isAcceptable(void *context,
pInfo->dataFormat[1]==0x76 &&
pInfo->dataFormat[2]==0x41 &&
pInfo->dataFormat[3]==0x6c &&
pInfo->formatVersion[0]==2);
pInfo->formatVersion[0]==3);
}
static UBool
@ -115,32 +188,64 @@ haveAliasData(UErrorCode *pErrorCode) {
/* load converter alias data from file if necessary */
if(aliasData==NULL) {
UDataMemory *data;
UDataInfo info;
const uint16_t *table=NULL;
UDataMemory *data = NULL;
const uint16_t *table = NULL;
uint32_t tableStart;
uint32_t currOffset;
uint32_t reservedSize1;
/* open the data outside the mutex block */
data=udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, pErrorCode);
data = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
return FALSE;
}
table=(const uint16_t *)udata_getMemory(data);
info.size=sizeof(UDataInfo);
udata_getInfo(data, &info);
table = (const uint16_t *)udata_getMemory(data);
tableStart = ((const uint32_t *)(table))[0];
if (tableStart < 8) {
*pErrorCode = U_INVALID_FORMAT_ERROR;
return FALSE;
}
/* in the mutex block, set the data for this process */
umtx_lock(NULL);
if(aliasData==NULL) {
aliasData=data;
aliasData = data;
data=NULL;
aliasTable=table;
table=NULL;
converterTable = aliasTable + 1 + 2 * *aliasTable;
if (info.formatVersion[0] == 2 && info.formatVersion[1] > 0) {
tagTable = converterTable + 1 + 2 * *converterTable;
}
converterListNum = ((const uint32_t *)(table))[1];
tagListNum = ((const uint32_t *)(table))[2];
aliasListNum = ((const uint32_t *)(table))[3];
untaggedConvArraySize = ((const uint32_t *)(table))[4];
taggedAliasArraySize = ((const uint32_t *)(table))[5];
taggedAliasListsSize = ((const uint32_t *)(table))[6];
reservedSize1 = ((const uint32_t *)(table))[7]; /* reserved */
stringTableSize = ((const uint32_t *)(table))[8];
currOffset = tableStart * (sizeof(uint32_t)/sizeof(uint16_t)) + (sizeof(uint32_t)/sizeof(uint16_t));
converterList = table + currOffset;
currOffset += converterListNum;
tagList = table + currOffset;
currOffset += tagListNum;
aliasList = table + currOffset;
currOffset += aliasListNum;
untaggedConvArray = table + currOffset;
currOffset += untaggedConvArraySize;
taggedAliasArray = table + currOffset;
/* aliasLists is a 1's based array, but it has a padding character */
currOffset += taggedAliasArraySize;
taggedAliasLists = table + currOffset;
currOffset += taggedAliasListsSize;
/* reserved */
currOffset += reservedSize1;
stringTable = table + currOffset;
}
umtx_unlock(NULL);
@ -175,49 +280,40 @@ ucnv_io_cleanup()
ucnv_io_flushAvailableConverterCache();
aliasData = NULL;
aliasTable = NULL;
converterListNum = 0;
tagListNum = 0;
aliasListNum = 0;
untaggedConvArraySize = 0;
taggedAliasArraySize = 0;
taggedAliasListsSize = 0;
stringTableSize = 0;
converterTable = NULL;
tagTable = NULL;
converterList = NULL;
tagList = NULL;
aliasList = NULL;
untaggedConvArray = NULL;
taggedAliasArray = NULL;
taggedAliasLists = NULL;
stringTable = NULL;
defaultConverterName = NULL;
defaultConverterNameBuffer[0] = 0;
return TRUE; /* Everything was cleaned up */
}
static int16_t getTagNumber(const char *tagname) {
if (tagTable) {
int16_t tag, count = (int16_t) *tagTable;
const char *tags = (const char *) (tagTable + 1 + count * *converterTable);
#if 0
char name[100];
int i;
/* convert the tag name to lowercase to do case-insensitive comparisons */
for(i = 0; i < sizeof(name) - 1 && *tagname; ++i) {
name[i] = (char)uprv_tolower(*tagname++);
}
name[i] = 0;
#else
const char *name = tagname;
#endif
for (tag = 0; count--; ++tag) {
if (!uprv_stricmp(name, tags)) {
return tag;
static uint32_t getTagNumber(const char *tagname) {
if (tagList) {
uint32_t tagNum;
for (tagNum = 0; tagNum < tagListNum; tagNum++) {
if (!uprv_stricmp(GET_STRING(tagList[tagNum]), tagname)) {
return tagNum;
}
tags += strlen(tags) + 1;
}
}
return -1;
return UINT32_MAX;
}
/**
@ -240,14 +336,16 @@ static int16_t getTagNumber(const char *tagname) {
U_CAPI int U_EXPORT2
ucnv_compareNames(const char *name1, const char *name2) {
int rc;
unsigned char c1, c2;
char c1, c2;
for (;;) {
/* Ignore delimiters '-', '_', and ' ' */
while ((c1 = (unsigned char)*name1) == '-'
|| c1 == '_' || c1 == ' ') ++name1;
while ((c2 = (unsigned char)*name2) == '-'
|| c2 == '_' || c2 == ' ') ++name2;
while ((c1 = *name1) == '-' || c1 == '_' || c1 == ' ') {
++name1;
}
while ((c2 = *name2) == '-' || c2 == '_' || c2 == ' ') {
++name2;
}
/* If we reach the ends of both strings then they match */
if ((c1|c2)==0) {
@ -257,7 +355,7 @@ ucnv_compareNames(const char *name1, const char *name2) {
/* Case-insensitive comparison */
rc = (int)(unsigned char)uprv_tolower(c1) -
(int)(unsigned char)uprv_tolower(c2);
if (rc!=0) {
if (rc != 0) {
return rc;
}
++name1;
@ -267,69 +365,87 @@ ucnv_compareNames(const char *name1, const char *name2) {
/*
* search for an alias
* return NULL or a pointer to the converter table entry
* return the converter number index for converterList
*/
static const uint16_t *
findAlias(const char *alias) {
char name[100];
const uint16_t *p=aliasTable;
uint16_t i, start, limit;
limit=*p++;
if(limit==0) {
/* there are no aliases */
return NULL;
}
/* convert the alias name to lowercase to do case-insensitive comparisons */
for(i=0; i<sizeof(name)-1 && *alias!=0; ++i) {
name[i]=(char)uprv_tolower(*alias++);
}
name[i]=0;
static uint32_t
findConverter(const char *alias, UErrorCode *pErrorCode) {
uint32_t mid, start, limit;
int result;
/* do a binary search for the alias */
start=0;
while(start<limit-1) {
i=(uint16_t)((start+limit)/2);
if(ucnv_compareNames(name, (const char *)aliasTable+p[i])<0) {
limit=i;
start = 0;
limit = untaggedConvArraySize - 1;
mid = limit;
/* Once mid == 0 we've already checked the 0'th element and we can stop */
while (start <= limit && mid != 0) {
mid = (uint32_t)((start + limit + 1) / 2); /* +1 is to round properly */
result = ucnv_compareNames(alias, GET_STRING(aliasList[mid]));
if (result < 0) {
limit = mid-1;
} else if (result > 0) {
start = mid+1;
} else {
start=i;
/* Since the gencnval tool folds duplicates into one entry,
* this alias in aliasList is unique, but different standards
* may map an alias to different converters.
*/
if (untaggedConvArray[mid] & UCNV_AMBIGUOUS_ALIAS_MAP_BIT) {
*pErrorCode = U_AMBIGUOUS_ALIAS_WARNING;
}
return untaggedConvArray[mid] & UCNV_CONVERTER_INDEX_MASK;
}
}
/* did we really find it? */
if(ucnv_compareNames(name, (const char *)aliasTable+p[start])==0) {
limit=*(p-1); /* aliasCount */
p+=limit; /* advance to the second column of the alias table */
i=p[start]; /* converter index */
return
p+limit+ /* beginning of converter table */
1+ /* skip its count */
2*i; /* go to this converter's entry and return a pointer to it */
} else {
return NULL;
}
return UINT32_MAX;
}
U_CFUNC const char *
ucnv_io_getConverterName(const char *alias, UErrorCode *pErrorCode) {
if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
const uint16_t *p=findAlias(alias);
if(p!=NULL) {
return (const char *)aliasTable+*p;
uint32_t convNum = findConverter(alias, pErrorCode);
if (convNum < converterListNum) {
return GET_STRING(converterList[convNum]);
}
}
return NULL;
}
U_CFUNC uint16_t
ucnv_io_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode) {
ucnv_io_countAliases(const char *alias, UErrorCode *pErrorCode) {
if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
const uint16_t *p=findAlias(alias);
if(p!=NULL) {
*aliases=(const char *)aliasTable+*p;
return *(p+1);
uint32_t convNum = findConverter(alias, pErrorCode);
if (convNum < converterListNum) {
/* tagListNum - 1 is the ALL tag */
int32_t listOffset = taggedAliasArray[(tagListNum - 1)*converterListNum + convNum];
if (listOffset) {
return taggedAliasLists[listOffset];
}
}
}
return 0;
}
U_CFUNC uint16_t
ucnv_io_getAliases(const char *alias, uint16_t start, const char **aliases, UErrorCode *pErrorCode) {
if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
uint32_t currAlias;
uint32_t convNum = findConverter(alias, pErrorCode);
if (convNum < converterListNum) {
/* tagListNum - 1 is the ALL tag */
int32_t listOffset = taggedAliasArray[(tagListNum - 1)*converterListNum + convNum];
if (listOffset) {
uint32_t listCount = taggedAliasLists[listOffset];
/* +1 to skip listCount */
const uint16_t *currList = taggedAliasLists + listOffset + 1;
for (currAlias = start; currAlias < listCount; currAlias++) {
aliases[currAlias] = GET_STRING(currList[currAlias]);
}
}
}
}
return 0;
@ -338,17 +454,20 @@ ucnv_io_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCo
U_CFUNC const char *
ucnv_io_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) {
if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
const uint16_t *p=findAlias(alias);
if(p!=NULL) {
uint16_t count=*(p+1);
if(n<count) {
const char *aliases=(const char *)aliasTable+*p;
while(n>0) {
/* skip a name, first the canonical converter name */
aliases+=uprv_strlen(aliases)+1;
--n;
uint32_t convNum = findConverter(alias, pErrorCode);
if (convNum < converterListNum) {
/* tagListNum - 1 is the ALL tag */
int32_t listOffset = taggedAliasArray[(tagListNum - 1)*converterListNum + convNum];
if (listOffset) {
uint32_t listCount = taggedAliasLists[listOffset];
/* +1 to skip listCount */
const uint16_t *currList = taggedAliasLists + listOffset + 1;
if (n < listCount) {
return GET_STRING(currList[n]);
}
return aliases;
*pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
}
}
}
@ -358,12 +477,8 @@ ucnv_io_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) {
U_CFUNC uint16_t
ucnv_io_countStandards(UErrorCode *pErrorCode) {
if (haveAliasData(pErrorCode)) {
if (!tagTable) {
*pErrorCode = U_INVALID_FORMAT_ERROR;
return 0;
}
return *tagTable;
/* Don't include the empty list */
return (uint16_t)(tagListNum - NUM_RESERVED_TAGS);
}
return 0;
@ -371,15 +486,11 @@ ucnv_io_countStandards(UErrorCode *pErrorCode) {
U_CAPI const char * U_EXPORT2
ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode) {
if (haveAliasData(pErrorCode) && tagTable) {
int16_t count = (int16_t) *tagTable;
const char *tags = (const char *) (tagTable + 1 + count * *converterTable);
while (n-- && count--) {
tags += strlen(tags) + 1;
if (haveAliasData(pErrorCode)) {
if (n < tagListNum - NUM_RESERVED_TAGS) {
return GET_STRING(tagList[n]);
}
return count ? tags : NULL;
*pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
}
return NULL;
@ -388,18 +499,56 @@ ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode) {
U_CFUNC const char * U_EXPORT2
ucnv_getStandardName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
const uint16_t *p = findAlias(alias);
if(p != NULL) {
int16_t tag = getTagNumber(standard);
uint32_t idx;
uint32_t listOffset;
uint32_t convNum;
uint32_t tagNum = getTagNumber(standard);
UErrorCode myErr = U_ZERO_ERROR;
if (tag > -1) {
uint16_t offset = tagTable[1 + tag * *converterTable + (p - converterTable) / 2];
return offset ? (const char *) aliasTable + offset : NULL;
/* Make a quick guess. Hopefully they used a TR22 canonical alias. */
convNum = findConverter(alias, &myErr);
if (tagNum < (tagListNum - NUM_RESERVED_TAGS) && convNum < converterListNum) {
if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
/* Uh Oh! They used an ambiguous alias.
Hopefully the standard knows the alias.
This may take a while.
*/
for (idx = 0; idx < converterListNum; idx++) {
listOffset = taggedAliasArray[tagNum*converterListNum + idx];
if (listOffset) {
uint32_t currAlias;
uint32_t listCount = taggedAliasLists[listOffset];
/* +1 to skip listCount */
const uint16_t *currList = taggedAliasLists + listOffset + 1;
for (currAlias = 0; currAlias < listCount; currAlias++) {
if (currList[currAlias]
&& ucnv_compareNames(alias, GET_STRING(currList[currAlias]))==0)
{
if (currList[0]) {
return GET_STRING(currList[0]);
}
else {
/* Someone screwed up the alias table. */
return NULL;
}
}
}
}
}
/* The standard doesn't know about the alias */
*pErrorCode = U_AMBIGUOUS_ALIAS_WARNING;
}
listOffset = taggedAliasArray[tagNum*converterListNum + convNum];
if (listOffset && taggedAliasLists[listOffset + 1]) {
return GET_STRING(taggedAliasLists[listOffset + 1]);
}
/* else no default name */
}
/* else converter or tag not found */
}
return NULL;
return NULL;
}
void
@ -413,41 +562,52 @@ ucnv_io_flushAvailableConverterCache() {
availableConverterCount = 0;
}
static void ucnv_io_loadAvailableConverterList(void) {
uint16_t idx = 0;
uint16_t localConverterCount = 0;
UErrorCode status;
char *converterName;
/* We can't have more than "*converterTable" converters to open */
char **localConverterList = (char **) uprv_malloc(*converterTable * sizeof(char*));
for (; idx < *converterTable; idx++) {
status = U_ZERO_ERROR;
converterName = (char *)aliasTable+converterTable[1+2*idx];
ucnv_close(ucnv_open(converterName, &status));
if (U_SUCCESS(status)) {
localConverterList[localConverterCount++] = converterName;
}
}
umtx_lock(NULL);
static UBool haveAvailableConverterList(UErrorCode *pErrorCode) {
if (availableConverters == NULL) {
availableConverters = (const char **)localConverterList;
availableConverterCount = localConverterCount;
uint16_t idx;
uint16_t localConverterCount;
UErrorCode status;
const char *converterName;
const char **localConverterList;
if (!haveAliasData(pErrorCode)) {
return FALSE;
}
/* We can't have more than "*converterTable" converters to open */
localConverterList = (const char **) uprv_malloc(converterListNum * sizeof(char*));
if (!localConverterList) {
*pErrorCode = U_MEMORY_ALLOCATION_ERROR;
return FALSE;
}
localConverterCount = 0;
for (idx = 0; idx < converterListNum; idx++) {
status = U_ZERO_ERROR;
converterName = GET_STRING(converterList[idx]);
ucnv_close(ucnv_open(converterName, &status));
if (U_SUCCESS(status)) {
localConverterList[localConverterCount++] = converterName;
}
}
umtx_lock(NULL);
if (availableConverters == NULL) {
availableConverters = localConverterList;
availableConverterCount = localConverterCount;
}
else {
uprv_free((char **)localConverterList);
}
umtx_unlock(NULL);
}
else {
uprv_free(localConverterList);
}
umtx_unlock(NULL);
return TRUE;
}
U_CFUNC uint16_t
ucnv_io_countAvailableConverters(UErrorCode *pErrorCode) {
if(haveAliasData(pErrorCode)) {
if (availableConverters == NULL) {
ucnv_io_loadAvailableConverterList();
}
if (haveAvailableConverterList(pErrorCode)) {
return availableConverterCount;
}
return 0;
@ -455,20 +615,18 @@ ucnv_io_countAvailableConverters(UErrorCode *pErrorCode) {
U_CFUNC const char *
ucnv_io_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode) {
if(haveAliasData(pErrorCode)) {
if (availableConverters == NULL) {
ucnv_io_loadAvailableConverterList();
}
if(n < availableConverterCount) {
if (haveAvailableConverterList(pErrorCode)) {
if (n < availableConverterCount) {
return availableConverters[n];
}
*pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
}
return NULL;
}
U_CFUNC void
ucnv_io_fillAvailableConverters(const char **aliases, UErrorCode *pErrorCode) {
if(haveAliasData(pErrorCode)) {
if (haveAvailableConverterList(pErrorCode)) {
uint16_t count = 0;
while (count < availableConverterCount) {
*aliases++=availableConverters[count++];
@ -478,42 +636,12 @@ ucnv_io_fillAvailableConverters(const char **aliases, UErrorCode *pErrorCode) {
U_CFUNC uint16_t
ucnv_io_countAvailableAliases(UErrorCode *pErrorCode) {
if(haveAliasData(pErrorCode)) {
return *aliasTable;
if (haveAliasData(pErrorCode)) {
return (uint16_t)aliasListNum;
}
return 0;
}
#if 0
/*
* We are not currently using these functions, so I am commenting them out
* to reduce the binary file size and improve the code coverage;
* I do not currently want to remove this entirely because it may be useful
* in the future and also serves to some degree as another piece of
* documentation of the data structure.
*/
U_CFUNC const char *
ucnv_io_getAvailableAlias(uint16_t n, UErrorCode *pErrorCode) {
if(haveAliasData(pErrorCode) && n<*aliasTable) {
return (const char *)aliasTable+*(aliasTable+1+n);
}
return NULL;
}
U_CFUNC void
ucnv_io_fillAvailableAliases(const char **aliases, UErrorCode *pErrorCode) {
if(haveAliasData(pErrorCode)) {
const uint16_t *p=aliasTable;
uint16_t count=*p++;
while(count>0) {
*aliases++=(const char *)aliasTable+*p;
++p;
--count;
}
}
}
#endif
/* default converter name --------------------------------------------------- */
/*
@ -529,10 +657,7 @@ ucnv_io_getDefaultConverterName() {
/* local variable to be thread-safe */
const char *name=defaultConverterName;
if(name==NULL) {
const char *codepage=0;
umtx_lock(NULL);
codepage = uprv_getDefaultCodepage();
umtx_unlock(NULL);
const char *codepage = uprv_getDefaultCodepage();
if(codepage!=NULL) {
UErrorCode errorCode=U_ZERO_ERROR;
name=ucnv_io_getConverterName(codepage, &errorCode);
@ -543,26 +668,27 @@ ucnv_io_getDefaultConverterName() {
/* if the name is there, test it out */
if(name != NULL) {
UErrorCode errorCode = U_ZERO_ERROR;
UConverter *cnv;
cnv = ucnv_open(name, &errorCode);
if(U_FAILURE(errorCode) || (cnv == NULL)) {
/* Panic time, let's use a fallback. */
UErrorCode errorCode = U_ZERO_ERROR;
UConverter *cnv = ucnv_open(name, &errorCode);
if(U_FAILURE(errorCode) || (cnv == NULL)) {
/* Panic time, let's use a fallback. */
#if (U_CHARSET_FAMILY == U_ASCII_FAMILY)
name = "US-ASCII";
/* there is no 'algorithmic' converter for EBCDIC */
name = "US-ASCII";
/* there is no 'algorithmic' converter for EBCDIC */
#elif defined(OS390)
name = "ibm-1047-s390";
name = "ibm-1047-s390";
#else
name = "ibm-37";
name = "ibm-37";
#endif
}
ucnv_close(cnv);
}
ucnv_close(cnv);
}
if(name != NULL) {
/* Did find a name. And it works.*/
defaultConverterName=name;
umtx_lock(NULL);
/* Did find a name. And it works.*/
defaultConverterName=name;
umtx_unlock(NULL);
}
}

View File

@ -15,6 +15,9 @@
#include "unicode/utypes.h"
#define UCNV_AMBIGUOUS_ALIAS_MAP_BIT 0x8000
#define UCNV_CONVERTER_INDEX_MASK 0x7FF
/**
* Map a converter alias name to a canonical converter name.
* The alias is searched for case-insensitively, the converter name
@ -24,6 +27,12 @@
U_CFUNC const char *
ucnv_io_getConverterName(const char *alias, UErrorCode *pErrorCode);
/**
* The count for ucnv_io_getAliases and ucnv_io_getAlias
*/
U_CFUNC uint16_t
ucnv_io_countAliases(const char *alias, UErrorCode *pErrorCode);
/**
* Search case-insensitively for a converter alias and set aliases to
* a pointer to the list of aliases for the actual converter.
@ -34,7 +43,7 @@ ucnv_io_getConverterName(const char *alias, UErrorCode *pErrorCode);
* or 0 if the alias is not found.
*/
U_CFUNC uint16_t
ucnv_io_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode);
ucnv_io_getAliases(const char *alias, uint16_t start, const char **aliases, UErrorCode *pErrorCode);
/**
* Search case-insensitively for a converter alias and return
@ -85,21 +94,6 @@ ucnv_io_flushAvailableConverterCache(void);
U_CFUNC uint16_t
ucnv_io_countAvailableAliases(UErrorCode *pErrorCode);
/**
* Return the (n)th alias or converter name in mixed case, or NULL
* if there is none (typically, if the data cannot be loaded).
* 0<=index<ucnv_io_countAvailableAliases().
*/
U_CFUNC const char *
ucnv_io_getAvailableAlias(uint16_t n, UErrorCode *pErrorCode);
/**
* Fill an array const char *aliases[ucnv_io_countAvailableAliases()]
* with pointers to all aliases and converter names in mixed-case.
*/
U_CFUNC void
ucnv_io_fillAvailableAliases(const char **aliases, UErrorCode *pErrorCode);
/**
* Get the name of the default converter.
* This name is already resolved by <code>ucnv_io_getConverterName()</code>.

View File

@ -40,6 +40,7 @@ U_CDECL_BEGIN
/* maximum length of the converter names */
#define UCNV_MAX_CONVERTER_NAME_LENGTH 60
/* maximum length of the converter name including path */
#define UCNV_MAX_FULL_FILE_NAME_LENGTH (600+UCNV_MAX_CONVERTER_NAME_LENGTH)
#define UCNV_SI 0x0F /*Shift in for EBDCDIC_STATEFUL and iso2022 states */
@ -979,7 +980,6 @@ ucnv_getAvailableName (int32_t n);
/**
* Gives the number of aliases for a given converter or alias name.
* Note that additional aliases are recognized by ucnv_open().
* This method only enumerates the listed entries in the alias file.
* @param alias alias name
* @param pErrorCode error status
@ -991,7 +991,6 @@ ucnv_countAliases(const char *alias, UErrorCode *pErrorCode);
/**
* Gives the name of the alias at given index of alias list.
* Note that additional aliases are recognized by ucnv_open().
* This method only enumerates the listed entries in the alias file.
* @param alias alias name
* @param n index in alias list
@ -1005,7 +1004,6 @@ ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode);
/**
* Fill-up the list of alias names for the given alias.
* Note that additional aliases are recognized by ucnv_open().
* This method only enumerates the listed entries in the alias file.
* @param alias alias name
* @param aliases fill-in list, aliases is a pointer to an array of

View File

@ -379,8 +379,11 @@ enum UErrorCode {
U_STATE_OLD_WARNING = -125, /**< ICU has to use compatibility layer to construct the service. Expect performance/memory usage degradation. Consider upgrading */
U_STRING_NOT_TERMINATED_WARNING = -124,/**< An output string could not be NUL-terminated because output length==destCapacity. */
U_SORT_KEY_TOO_SHORT_WARNING = -123,
U_AMBIGUOUS_ALIAS_WARNING = -122,
U_ERROR_WARNING_LIMIT, /**< This must always be the last warning value to indicate the limit for UErrorCode warnings (last warning code +1) */
/** @deprecated use the enum that ends in _WARNING */
@ -476,18 +479,18 @@ enum UErrorCode {
/*
* the error code range 0x10200 0x10300 are reserved for Break Iterator related error
*/
U_BRK_ERROR_START=0x10200,
U_BRK_INTERNAL_ERROR,
U_BRK_HEX_DIGITS_EXPECTED,
U_BRK_SEMICOLON_EXPECTED,
U_BRK_RULE_SYNTAX,
U_BRK_UNCLOSED_SET,
U_BRK_ASSIGN_ERROR,
U_BRK_VARIABLE_REDFINITION,
U_BRK_MISMATCHED_PAREN,
U_BRK_NEW_LINE_IN_QUOTED_STRING,
U_BRK_UNDEFINED_VARIABLE,
U_BRK_ERROR_LIMIT,
U_BRK_ERROR_START=0x10200,
U_BRK_INTERNAL_ERROR,
U_BRK_HEX_DIGITS_EXPECTED,
U_BRK_SEMICOLON_EXPECTED,
U_BRK_RULE_SYNTAX,
U_BRK_UNCLOSED_SET,
U_BRK_ASSIGN_ERROR,
U_BRK_VARIABLE_REDFINITION,
U_BRK_MISMATCHED_PAREN,
U_BRK_NEW_LINE_IN_QUOTED_STRING,
U_BRK_UNDEFINED_VARIABLE,
U_BRK_ERROR_LIMIT,
U_ERROR_LIMIT=U_BRK_ERROR_LIMIT /**< This must always be the last value to indicate the limit for UErrorCode (last error code +1) */
};

View File

@ -35,6 +35,8 @@ ICUOUT=$(ICUMAKE)\out
#
ICUP=$(ICUMAKE)\..\..
ICUP=$(ICUP:\source\data\..\..=)
# In case the first one didn't do it, try this one. .NET would do the second one.
ICUP=$(ICUP:\source\data\\..\..=)
!MESSAGE ICU root path is $(ICUP)
@ -238,14 +240,14 @@ BRK_FILES = "$(ICUBLD)\sent.brk" "$(ICUBLD)\char.brk" "$(ICUBLD)\line.brk" "$(IC
# move the .dll and .lib files to their final destination afterwards.
# The $(U_ICUDATA_NAME).lib and $(U_ICUDATA_NAME).exp should already be in the right place due to stubdata.
#
"$(DLL_OUTPUT)\$(U_ICUDATA_NAME).dll" : "$(ICUTOOLS)\pkgdata\$(CFG)\pkgdata.exe" $(CNV_FILES) $(BRK_FILES) "$(ICUBLD)\uprops.dat" "$(ICUBLD)\unames.dat" "$(ICUBLD)\unorm.dat" "$(ICUBLD)\cnvalias.dat" "$(ICUBLD)\tz.dat" "$(ICUBLD)\ucadata.dat" "$(ICUBLD)\invuca.dat" $(ALL_RES) "$(ICUBLD)\icudata.res" "$(ICUP)\source\stubdata\stubdatabuilt.txt"
"$(DLL_OUTPUT)\$(U_ICUDATA_NAME).dll" : "$(ICUTOOLS)\pkgdata\$(CFG)\pkgdata.exe" $(CNV_FILES) $(BRK_FILES) "$(ICUBLD)\uprops.dat" "$(ICUBLD)\unames.dat" "$(ICUBLD)\unorm.dat" "$(ICUBLD)\cnvalias.icu" "$(ICUBLD)\tz.dat" "$(ICUBLD)\ucadata.dat" "$(ICUBLD)\invuca.dat" $(ALL_RES) "$(ICUBLD)\icudata.res" "$(ICUP)\source\stubdata\stubdatabuilt.txt"
@echo Building icu data
@cd "$(ICUBLD)"
"$(ICUTOOLS)\pkgdata\$(CFG)\pkgdata" -e $(U_ICUDATA_NAME) -v -m dll -c -p $(U_ICUDATA_NAME) -O "$(PKGOPT)" -d "$(ICUBLD)" -s . <<pkgdatain.txt
uprops.dat
unames.dat
unorm.dat
cnvalias.dat
cnvalias.icu
tz.dat
ucadata.dat
invuca.dat
@ -390,7 +392,7 @@ res_index {
@"$(ICUTOOLS)\gennorm\$(CFG)\gennorm" -u $(UNICODE_VERSION) -s "$(ICUUNIDATA)"
# Targets for converters
"$(ICUBLD)\cnvalias.dat" : {"$(ICUSRCDATA)\$(ICUUCM)"}\convrtrs.txt "$(ICUTOOLS)\gencnval\$(CFG)\gencnval.exe"
"$(ICUBLD)\cnvalias.icu" : {"$(ICUSRCDATA)\$(ICUUCM)"}\convrtrs.txt "$(ICUTOOLS)\gencnval\$(CFG)\gencnval.exe"
@echo Creating data file for Converter Aliases
@set ICU_DATA=$(ICUBLD)
@"$(ICUTOOLS)\gencnval\$(CFG)\gencnval" "$(ICUSRCDATA)\$(ICUUCM)\convrtrs.txt"

View File

@ -11,6 +11,9 @@
# run gencnval, and eventually pkgdata to update the representation that
# ICU uses for aliases.
# Please be friendly to the rest of use that edit this table by
# keeping this table free of tabs.
# This is an alias file used by the character set converter.
#
# Format:
@ -21,8 +24,8 @@
# by whitespace.
#
# All names can be tagged by including a space-separated list of tags in
# curly braces, as in ISO_8859-1:1987{IANA} iso-8859-1 { MIME } or
# some-charset{MIME IANA}. The order of tags does not matter, and
# curly braces, as in ISO_8859-1:1987{IANA*} iso-8859-1 { MIME* } or
# some-charset{MIME* IANA*}. The order of tags does not matter, and
# whitespace is allowed between the tagged name and the tags list.
#
# The tags can be used to get standard names using ucnv_getStandardName().
@ -31,6 +34,10 @@
#
# IANA The IANA charset name, as documented in RFC 1700.
# MIME The MIME charset name, used for content type tagging.
#
# The * after the standard tag denotes that the previous alias is the
# preferred (default) charset name for that standard. There can only
# be one of these default charset names per converter.
# The world is getting more complicated...
# Supporting XML parsers, HTML, MIME, and similar applications
@ -63,13 +70,17 @@
# or names of algorithmic converters, and their case must not
# be changed - or else code and/or file names must also be changed.
# List of supported standard tags
{ IANA MIME
# This is the list of supported standard tags.
# When multiple converters have the same alias under different standards,
# the standard nearest to the top of this list with that alias will
# be the first converter that will be opened.
{ IANA # Source: http://www.iana.org/assignments/character-sets
MIME # Source: http://www.iana.org/assignments/character-sets
#ICU # Can also use ICU_FEATURE ICU_CANONICAL
#IBM AIX DB2
#WINDOWS MSIE # MSIE is Internet Explorer, which is different from Windows
#GLIBC
#JAVA
JAVA # Source: Sun JDK. Preferred name must be an exact match. Alias name case is ignored, but dashes are not ignored.
#SOLARIS
#APPLE
#HPUX
@ -80,20 +91,20 @@
# Fully algorithmic converters
UTF-8 { IANA MIME } ibm-1208 cp1208
UTF-8 { IANA* MIME* } ibm-1208 cp1208
# The ICU 2.2 UTF-16/32 converters detect and write a BOM.
UTF-16 { IANA MIME } ISO-10646-UCS-2 { IANA } csUnicode ibm-17584 ibm-13488 ibm-1200 cp1200 ucs-2
UTF-16BE { IANA MIME } UTF16_BigEndian x-utf-16be
UTF-16LE { IANA MIME } UTF16_LittleEndian x-utf-16le
UTF-16 { IANA* MIME* } ISO-10646-UCS-2 { IANA } csUnicode ibm-17584 ibm-13488 ibm-1200 cp1200 ucs-2
UTF-16BE { IANA* MIME* } UTF16_BigEndian x-utf-16be
UTF-16LE { IANA* MIME* } UTF16_LittleEndian x-utf-16le
# ICU-specific names for special uses
UTF16_PlatformEndian
UTF16_OppositeEndian
UTF-32 { IANA MIME } ISO-10646-UCS-4 { IANA } csUCS4 ucs-4 ibm-1232
UTF-32BE { IANA } UTF32_BigEndian
UTF-32LE { IANA } UTF32_LittleEndian
UTF-32 { IANA* MIME* } ISO-10646-UCS-4 { IANA } csUCS4 ucs-4 ibm-1232
UTF-32BE { IANA* } UTF32_BigEndian
UTF-32LE { IANA* } UTF32_LittleEndian
# ICU-specific names for special uses
UTF32_PlatformEndian
@ -108,31 +119,57 @@ UTF32_OppositeEndian
# By choosing the option "version=1", set O will be escaped instead.
# For example:
# utf7Converter=ucnv_open("UTF-7,version=1");
UTF-7 { IANA MIME }
UTF-7 { IANA* MIME* }
SCSU { IANA }
SCSU { IANA* }
BOCU-1
# See http://www.unicode.org/unicode/reports/tr26 for this Compatibility Encoding Scheme for UTF-16
# The Unicode Consortium does not encourage the use of CESU-8
CESU-8 { IANA }
CESU-8 { IANA* }
ISO-8859-1 { MIME } LATIN_1 ibm-819 cp819 latin1 8859-1 csisolatin1 iso-ir-100 ISO_8859-1:1987 { IANA } l1 ANSI_X3.110-1983 819 #!!!!! There's whole lot of names for this
US-ASCII { MIME } ascii ascii-7 ANSI_X3.4-1968 { IANA } ANSI_X3.4-1986 ISO_646.irv:1991 iso646-us us csASCII 646 iso-ir-6 cp367
ISO-8859-1 { MIME* IANA }
LATIN_1 # Old ICU name
ibm-819
IBM819 { IANA }
cp819 { IANA }
latin1 { IANA }
8859-1
csISOLatin1 { IANA }
iso-ir-100 { IANA }
ISO_8859-1:1987 { IANA* }
l1 { IANA }
819
# ANSI_X3.110-1983 # This is for a different IANA alias. This isn't iso-8859-1.
US-ASCII { MIME* IANA }
ASCII { JAVA* IANA }
ascii-7 { JAVA }
ANSI_X3.4-1968 { IANA* }
ANSI_X3.4-1986 { IANA }
ISO_646.irv:1991 { IANA }
iso_646.irv:1983 { JAVA }
ISO646-US { JAVA IANA }
us { IANA }
csASCII { IANA }
646 { JAVA }
iso-ir-6 { IANA }
cp367 { IANA }
# Java says "default" too, but that makes no sense.
# Partially algorithmic converters
ISO_2022 ISO-2022 { MIME } 2022 cp2022
ISO_2022,locale=ja,version=0 ISO-2022-JP { IANA MIME } csISO2022JP
ISO_2022,locale=ja,version=1 ISO-2022-JP-1 JIS JIS_Encoding { IANA }
ISO_2022,locale=ja,version=2 ISO-2022-JP-2 { IANA MIME } csISO2022JP2
ISO_2022 ISO-2022 { MIME* } 2022 cp2022
ISO_2022,locale=ja,version=0 ISO-2022-JP { IANA* MIME* } csISO2022JP
ISO_2022,locale=ja,version=1 ISO-2022-JP-1 JIS JIS_Encoding { IANA* }
ISO_2022,locale=ja,version=2 ISO-2022-JP-2 { IANA* MIME* } csISO2022JP2
ISO_2022,locale=ja,version=3 JIS7 csJISEncoding
ISO_2022,locale=ja,version=4 JIS8
ISO_2022,locale=ko,version=0 ISO-2022-KR { IANA MIME } csISO2022KR
ISO_2022,locale=ko,version=0 ISO-2022-KR { IANA* MIME* } csISO2022KR
ISO_2022,locale=ko,version=1 ibm-25546 ibm-25546_P100 25546
ISO_2022,locale=zh,version=0 ISO-2022-CN { IANA MIME } csISO2022CN
ISO_2022,locale=zh,version=1 ISO-2022-CN-EXT { IANA MIME }
HZ HZ-GB-2312 { IANA MIME }
ISO_2022,locale=zh,version=0 ISO-2022-CN { IANA* MIME* } # csISO2022CN
ISO_2022,locale=zh,version=1 ISO-2022-CN-EXT { IANA* MIME* }
HZ HZ-GB-2312 { IANA* MIME* }
LMBCS-1 lmbcs
LMBCS-2
LMBCS-3
@ -155,82 +192,210 @@ ISCII,version=6 iscii-tlg x-iscii-te
ISCII,version=7 iscii-knd x-iscii-ka
ISCII,version=8 iscii-mlm x-iscii-ma
# Table-based
# Table-based interchange codepages
ibm-367
ibm-367 IBM367 { IANA* } # This is ASCII, but it has fallbacks
# Central Europe
# Standard iso-8859-1, which does not have the Euro update.
# See iso-8859-15 (latin9) for the Euro update
ibm-912 iso-8859-2 { MIME* IANA }
latin2 { IANA }
# ISO8859_2 { JAVA* } # This is really the default for Java and many others.
8859-2
csISOLatin2 { IANA }
iso-ir-101 { IANA }
ISO_8859-2:1987 { IANA* }
l2 { IANA }
cp912
912
# Maltese Esperanto
ibm-913 iso-8859-3 { MIME* IANA }
latin3 { IANA }
8859-3
csISOLatin3 { IANA }
iso-ir-109
ISO_8859-3:1988 { IANA* }
l3 { IANA }
cp913
913
# Baltic
ibm-914 iso-8859-4 { MIME* IANA }
latin4 { IANA }
8859-4
csISOLatin4 { IANA }
iso-ir-110 { IANA }
ISO_8859-4:1988 { IANA* }
l4 { IANA }
cp914
914
# Cyrillic
ibm-915 iso-8859-5 { MIME* IANA }
cyrillic { IANA }
8859-5
csISOLatinCyrillic { IANA }
iso-ir-144 { IANA }
ISO_8859-5:1988 { IANA* }
cp915
915
# Arabic
# ISO_8859-6-E and ISO_8859-6-I are similar to this charset, but they are not the same
# -E means explicit. -I means implicit. However those aliases are rarely used.
ibm-1089 iso-8859-6 { MIME* IANA }
arabic { IANA }
8859-6
csISOLatinArabic { IANA }
iso-ir-127 { IANA }
ISO_8859-6:1987 { IANA* }
ecma-114 { IANA }
asmo-708 { IANA }
cp1089
1089
# ISO Greek (w/ euro update)
ibm-4909 iso-8859-7 { MIME* IANA }
greek { IANA }
greek8 { IANA }
elot_928 { IANA }
ecma-118 { IANA }
8859-7
csISOLatinGreek { IANA }
iso-ir-126 { IANA }
ISO_8859-7:1987 { IANA* }
cp813
813
ibm-813 # Same as 4909 above but without the euro update
# hebrew
# ISO_8859-8-E and ISO_8859-8-I are similar to this charset, but they are not the same
# -E means explicit. -I means implicit.
ibm-916 iso-8859-8 { MIME* IANA }
hebrew { IANA }
8859-8
csISOLatinHebrew { IANA }
iso-ir-138 { IANA }
ISO_8859-8:1988 { IANA* }
cp916
916
# Turkish
ibm-920 iso-8859-9 { MIME* IANA }
ECMA-128 # IANA doesn't have this alias 6/24/2002
latin5 { IANA }
8859-9
csISOLatin5 { IANA }
iso-ir-148 { IANA }
ISO_8859-9:1989 { IANA* }
l5 { IANA }
cp920
920
# Latin 9
ibm-923 iso-8859-15 { IANA* MIME* } # IANA only has iso-8859-15 (6/24/2002)
# ISO8859_15 { JAVA* } # This is really the default for Java and many others.
8859-15
latin9
latin0
csisolatin0
csisolatin9
iso8859_15_fdis
cp923
923
# Interchange codepages
ibm-912 iso-8859-2 { MIME } latin2 cp912 8859-2 csisolatin2 iso-ir-101 ISO_8859-2:1987 { IANA } l2 912 # Central Europe
ibm-913 iso-8859-3 { MIME } latin3 cp913 8859-3 csisolatin3 iso-ir-109 ISO_8859-3:1988 { IANA } l3 913 # Maltese Esperanto
ibm-914 iso-8859-4 { MIME } latin4 cp914 8859-4 csisolatin4 iso-ir-110 ISO_8859-4:1988 { IANA } l4 914 # Baltic
ibm-915 iso-8859-5 { MIME } cyrillic cp915 8859-5 csisolatincyrillic iso-ir-144 ISO_8859-5:1988 { IANA } 915 # Cyrillic
ibm-1089 iso-8859-6 { MIME } arabic cp1089 8859-6 csisolatinarabic iso-ir-127 ISO_8859-6:1987 { IANA } ecma-114 asmo-708 1089 # Arabic
ibm-4909 iso-8859-7 { MIME } greek cp813 greek8 elot_928 ecma-118 8859-7 csisolatingreek iso-ir-126 ISO_8859-7:1987 { IANA } 813 # ISO Greek (w/ euro update)
ibm-813 # Same as 4909 (w/o euro update)
ibm-916 iso-8859-8 { MIME } hebrew cp916 8859-8 csisolatinhebrew iso-ir-138 ISO_8859-8:1988 { IANA } 916 # hebrew iso-8859-8i - typo?
ibm-920 iso-8859-9 { MIME } ECMA-128 latin5 cp920 8859-9 csisolatin5 iso-ir-148 ISO_8859-9:1989 { IANA } l5 920 # Turkish
ibm-923 iso-8859-15 { IANA MIME } latin9 cp923 8859-15 latin0 csisolatin0 iso8859_15_fdis csisolatin9 923 # Latin 9
ibm-1252 ibm-1004 cp1004 # Windows Latin 1 without Euro
ibm-942_P120-2000 ibm-942_VASCII_VSUB_VPUA ibm-942 ibm-932 ibm-932_VASCII_VSUB_VPUA # Old s_jis ibm-932 added!
ibm-942_P12A-2000 ibm-942_VSUB_VPUA shift_jis78 sjis78 ibm-932_VSUB_VPUA
ibm-943_P130-2000 ibm-943_VASCII_VSUB_VPUA ibm-943 # japanese. Unicode name is \u30b7\u30d5\u30c8\u7b26\u53f7\u5316\u8868\u73fe Iana says that Windows-31J is an extension to csshiftjis ibm-932 removed
ibm-943_P14A-2000 ibm-943_VSUB_VPUA Shift_JIS { MIME } csWindows31J sjis cp943 cp932 pck ms_kanji csshiftjis windows-31j x-sjis 943
ibm-943_P14A-2000 ibm-943_VSUB_VPUA Shift_JIS { MIME* } csWindows31J sjis cp943 cp932 pck ms_kanji csshiftjis windows-31j x-sjis 943
ibm-949_P110-2000 ibm-949_VASCII_VSUB_VPUA ibm-949
ibm-949_P11A-2000 ibm-949_VSUB_VPUA KS_C_5601-1987 { IANA } iso-ir-149 KS_C_5601-1989 csKSC56011987 KSC_5601 { MIME } johab ks_x_1001:1992 949 ksc5601_1992 ksc5601_1987 # KSC-5601-1992, korean
ibm-1370 Big5 { IANA MIME } csBig5 x-big5 cp950 950 # Taiwan Big-5 (w/ euro update)
ibm-949_P11A-2000
ibm-949_VSUB_VPUA
KS_C_5601-1987 { IANA* }
iso-ir-149 { IANA }
KS_C_5601-1989 { IANA }
csKSC56011987 { IANA }
KSC_5601 { MIME* IANA }
johab
ks_x_1001:1992
949
korean { IANA }
ksc5601_1992 # KSC-5601-1992
ksc5601_1987 # Needed by Java
ibm-1370 Big5 { IANA* MIME* } csBig5 x-big5 cp950 950 # Taiwan Big-5 (w/ euro update)
ibm-950 # Taiwan Big-5 (w/o euro update)
ibm-1386 gbk { IANA } cp936 windows-936 ms936 zh_cn # Chinese GBK removed
ibm-1386 gbk { IANA* } cp936 windows-936 ms936 zh_cn # Chinese GBK removed
ibm-33722_P120-2000 ibm-33722_VASCII_VPUA ibm-33722 cp33722 33722 ibm-5050 # Japan EUC with \ <-> Yen mapping
ibm-33722_P12A-2000 ibm-33722_VPUA EUC-JP { MIME } ibm-eucJP eucjis Extended_UNIX_Code_Packed_Format_for_Japanese { IANA } cseucpkdfmtjapanese X-EUC-JP # Japan EUC. x-euc-jp is a MIME name
ibm-970 EUC-KR { IANA MIME } ibm-eucKR csEUCKR # Korean EUC. x-euc-kr is a MIME name
ibm-33722_P12A-2000 ibm-33722_VPUA EUC-JP { MIME* } ibm-eucJP eucjis Extended_UNIX_Code_Packed_Format_for_Japanese { IANA* } cseucpkdfmtjapanese X-EUC-JP # Japan EUC. x-euc-jp is a MIME name
ibm-970 EUC-KR { IANA* MIME* } ibm-eucKR csEUCKR # Korean EUC. x-euc-kr is a MIME name
ibm-964 EUC-TW ibm-eucTW cns11643 # Taiwan EUC. x-euc-tw is a MIME name
ibm-1383_P110-2000 ibm-1383_VPUA ibm-1383 EUC-CN ibm-eucCN GB_2312-80 { IANA } chinese gb iso-ir-58 csISO58GB231280 GB2312 { MIME } gb2312-1980 cp1383 1383 csGB2312# China EUC. x-euc-cn is a MIME name
ibm-1162 tis-620 { IANA } cp874 windows-874 ms874 cp9066 874 # Thai (w/ euro update)
ibm-1383_P110-2000 ibm-1383_VPUA
ibm-1383
EUC-CN
ibm-eucCN
GB_2312-80 { IANA* }
chinese { IANA }
gb # This is not an IANA name. gb in IANA means Great Britain.
iso-ir-58 { IANA }
csISO58GB231280 { IANA }
GB2312 { MIME* }
gb2312-1980
cp1383
1383
csGB2312 # China EUC. x-euc-cn is a MIME name
ibm-1162 tis-620 { IANA* } cp874 windows-874 ms874 cp9066 874 # Thai (w/ euro update)
ibm-874 ibm-1161 # Same as 1162 (w/o euro update)
# Platform codepages
ibm-437 cp437 csPC8CodePage437 437 # PC US
# HSYS:
ibm-850 IBM850 { IANA } cp850 { MIME } 850 csPC850Multilingual # PC latin1
ibm-851 IBM851 { IANA } cp851 { MIME } 851 csPC851 # PC DOS Greek (w/o euro)
ibm-858 cp858 { MIME } IBM00858 { IANA } # PC latin1 with Euro cp850 removed
ibm-850 IBM850 { IANA* } cp850 { MIME* } 850 csPC850Multilingual # PC latin1
ibm-851 IBM851 { IANA* } cp851 { MIME* } 851 csPC851 # PC DOS Greek (w/o euro)
ibm-858 cp858 { MIME* } IBM00858 { IANA* } # PC latin1 with Euro cp850 removed
ibm-9044 852 csPCp852 cp852 # PC latin2 (w/ euro update) cp852 is a MIME name for IBM-852
ibm-852 IBM852 { IANA } # PC latin2 (w/o euro update)
ibm-852 IBM852 { IANA* } # PC latin2 (w/o euro update)
ibm-872 855 csIBM855 cp855 csPCp855 # PC cyrillic (w/ euro update) cp855 is a MIME name for IBM-855
ibm-855 IBM855 { IANA } # PC cyrillic (w/o euro update)
ibm-856 cp856 { MIME } 856 # PC Hebrew (old)
ibm-9049 857 csIBM857 cp857 { MIME } # PC Latin 5 (Turkish) (w/ euro update)
ibm-857 IBM857 { IANA } # PC Latin 5 (w/o euro update)
ibm-859 cp859 { MIME } # PC Latin 9 (w/ euro update)
ibm-860 IBM860 { IANA } cp860 { MIME } 860 csIBM860 # PC Portugal
ibm-861 IBM861 { IANA } cp861 { MIME } 861 cp-is csIBM861 # PC Iceland
ibm-867 cp867 862 cp862 { MIME } cspc862latinhebrew # PC Hebrew (w/ euro update)
ibm-862 IBM862 { IANA } # PC Hebrew (w/o euro update)
ibm-863 IBM863 { IANA } cp863 { MIME } 863 csIBM863 # PC Canadian French
ibm-17248 cp864 { MIME } csIBM864 # PC Arabic (w/ euro update)
ibm-864 IBM864 { IANA } # PC Arabic (w/o euro update)
ibm-865 IBM865 { IANA } cp865 { MIME } 865 csIBM865 # PC Nordic
ibm-808 cp866 { MIME } 866 csIBM866 # PC Russian (w/ euro update)
ibm-855 IBM855 { IANA* } # PC cyrillic (w/o euro update)
ibm-856 cp856 { MIME* } 856 # PC Hebrew (old)
ibm-9049 857 csIBM857 cp857 { MIME* } # PC Latin 5 (Turkish) (w/ euro update)
ibm-857 IBM857 { IANA* } # PC Latin 5 (w/o euro update)
ibm-859 cp859 { MIME* } # PC Latin 9 (w/ euro update)
ibm-860 IBM860 { IANA* } cp860 { MIME* } 860 csIBM860 # PC Portugal
ibm-861 IBM861 { IANA* } cp861 { MIME* } 861 cp-is csIBM861 # PC Iceland
ibm-867 cp867 862 cp862 { MIME* } cspc862latinhebrew # PC Hebrew (w/ euro update)
ibm-862 IBM862 { IANA* } # PC Hebrew (w/o euro update)
ibm-863 IBM863 { IANA* } cp863 { MIME* } 863 csIBM863 # PC Canadian French
ibm-17248 cp864 { MIME* } csIBM864 # PC Arabic (w/ euro update)
ibm-864 IBM864 { IANA* } # PC Arabic (w/o euro update)
ibm-865 IBM865 { IANA* } cp865 { MIME* } 865 csIBM865 # PC Nordic
ibm-808 cp866 { MIME* } 866 csIBM866 # PC Russian (w/ euro update)
ibm-866 # PC Russian (w/o euro update)
ibm-868 IBM868 { IANA } cp868 { MIME } cp-ar csIBM868 868 # PC Urdu
ibm-9061 cp869 { MIME } 869 cp-gr csIBM869 # PC Greek (w/ euro update)
ibm-869 IBM869 { IANA } # PC Greek (w/o euro update)
ibm-878 KOI8-R { IANA MIME } cp878 koi8 cskoi8r # Russian internet
ibm-901 cp921 { MIME } 921 # PC Baltic (w/ euro update)
ibm-868 IBM868 { IANA* } cp868 { MIME* } cp-ar csIBM868 868 # PC Urdu
ibm-9061 cp869 { MIME* } 869 cp-gr csIBM869 # PC Greek (w/ euro update)
ibm-869 IBM869 { IANA* } # PC Greek (w/o euro update)
ibm-878 KOI8-R { IANA* MIME* } cp878 koi8 cskoi8r # Russian internet
ibm-901 cp921 { MIME* } 921 # PC Baltic (w/ euro update)
ibm-921 # PC Baltic (w/o euro update)
ibm-902 cp922 { MIME } 922 # PC Estonian (w/ euro update)
ibm-902 cp922 { MIME* } 922 # PC Estonian (w/ euro update)
ibm-922 # PC Estonian (w/o euro update)
#ibm-941 jis-208 jisx-208 # Pure DBCS jisx-208 # ibm-941 is not JISX 208 code page
#ibm-1038 Adobe-Symbol-Encoding csHPPSMath symbol
ibm-5346 windows-1250 { IANA } cp1250 # Windows Latin2 (w/ euro update)
ibm-5347 windows-1251 { IANA } cp1251 # Windows Cyrillic (w/ euro update)
ibm-5348 windows-1252 { IANA } cp1252 # Windows Latin1 (w/ euro update)
ibm-5349 windows-1253 { IANA } cp1253 # Windows Greek (w/ euro update)
ibm-5350 windows-1254 { IANA } cp1254 # Windows Turkish (w/ euro update)
ibm-5351 windows-1255 { IANA } cp1255 # Windows Hebrew (w/ euro update)
ibm-5352 windows-1256 { IANA } cp1256 # Windows Arabic (w/ euro update)
ibm-5353 windows-1257 { IANA } cp1257 # Windows Baltic (w/ euro update)
ibm-5354 windows-1258 { IANA } cp1258 # Windows Vietnamese (w/ euro update)
ibm-5346 windows-1250 { IANA* } cp1250 # Windows Latin2 (w/ euro update)
ibm-5347 windows-1251 { IANA* } cp1251 # Windows Cyrillic (w/ euro update)
ibm-5348 windows-1252 { IANA* } cp1252 # Windows Latin1 (w/ euro update)
ibm-5349 windows-1253 { IANA* } cp1253 # Windows Greek (w/ euro update)
ibm-5350 windows-1254 { IANA* } cp1254 # Windows Turkish (w/ euro update)
ibm-5351 windows-1255 { IANA* } cp1255 # Windows Hebrew (w/ euro update)
ibm-5352 windows-1256 { IANA* } cp1256 # Windows Arabic (w/ euro update)
ibm-5353 windows-1257 { IANA* } cp1257 # Windows Baltic (w/ euro update)
ibm-5354 windows-1258 { IANA* } cp1258 # Windows Vietnamese (w/ euro update)
ibm-1250 # Windows Latin2 (w/o euro update)
ibm-1251 # Windows Cyrillic (w/o euro update)
ibm-1253 # Windows Greek (w/o euro update)
@ -240,15 +405,15 @@ ibm-1256 # Windows Arabic (w/o euro update)
ibm-1257 # Windows Baltic (w/o euro update)
ibm-1258 # Windows Vietnamese (w/o euro update)
ibm-1275 macintosh { IANA } mac { MIME } csMacintosh # Apple latin 1
ibm-1276 Adobe-Standard-Encoding { IANA } csAdobeStandardEncoding # Different from ISO-Unicode-IBM-1276 (GCSGID: 1276)
ibm-1275 macintosh { IANA* } mac { MIME* } csMacintosh # Apple latin 1
ibm-1276 Adobe-Standard-Encoding { IANA* } csAdobeStandardEncoding # Different from ISO-Unicode-IBM-1276 (GCSGID: 1276)
ibm-1277 Adobe-Latin1-Encoding
ibm-1280 macgr # Apple Greek
ibm-1281 mactr # Apple Turkish
ibm-1282 macce # Apple Central Europe
ibm-1283 maccy # Apple Cyrillic
ibm-1051 hp-roman8 { IANA } roman8 r8 csHPRoman8 # HP Latin1
ibm-1051 hp-roman8 { IANA* } roman8 r8 csHPRoman8 # HP Latin1
ibm-806_P100-2000 ibm-806 ibm-806_VSUB # PC ISCII-91: Indian Script Code
ibm-1006_P100-2000 ibm-1006 ibm-1006_VPUA # Urdu
@ -265,120 +430,127 @@ ibm-9066_P100-2000 ibm-9066 ibm-9066_VSUB # Thai PC
# Added for more euro support
ibm-849 cp1131 # PC Belarus (w/ euro update)
ibm-848 cp1125 # PC Ukraine (w/ euro update)
ibm-5104 cp1008 # 8-bit Arabic (w/ euro update)
ibm-9238 cp1046 # PC Arabic Extended (w/ euro update)
ibm-1363_P110-2000 ibm-1363 ibm-1363_VASCII_VSUB_VPUA ibm-1362 # Korean KSC Korean Windows MBCS
ibm-1363_P11B-2000 ibm-1363_VSUB_VPUA windows-949 cp949 cp1363 ksc korean
ibm-5210 cp1114 # PC SBCS Big-5 (w/ euro update)
ibm-21427 cp947 # PC DBCS Big-5 (w/ euro update)
ibm-849 cp1131 # PC Belarus (w/ euro update)
ibm-848 cp1125 # PC Ukraine (w/ euro update)
ibm-5104 cp1008 # 8-bit Arabic (w/ euro update)
ibm-9238 cp1046 # PC Arabic Extended (w/ euro update)
ibm-1363_P110-2000 ibm-1363 ibm-1363_VASCII_VSUB_VPUA ibm-1362 # Korean KSC Korean Windows MBCS
ibm-1363_P11B-2000 ibm-1363_VSUB_VPUA
windows-949
cp949
cp1363
ksc
# korean # The korean alias from IANA goes to ibm-949_P11A-2000
ibm-5210 cp1114 # PC SBCS Big-5 (w/ euro update)
ibm-21427 cp947 # PC DBCS Big-5 (w/ euro update)
# EBCDIC codepages according to the CDRA
# without Euro
ibm-37 IBM037 { IANA } ibm-037 cpibm37 ebcdic-cp-us ebcdic-cp-ca ebcdic-cp-wt ebcdic-cp-nl csIBM037 cp37 cp037 037 # EBCDIC US
ibm-273 IBM273 { IANA } csIBM273 ebcdic-de cp273 cpibm273 273 # EBCDIC Germanay, Austria...
ibm-277 IBM277 { IANA } EBCDIC-CP-DK EBCDIC-CP-NO csIBM277 ebcdic-dk cp277 cpibm277 277 # EBCDIC Denmark...
ibm-278 IBM278 { IANA } ebcdic-cp-fi ebcdic-cp-se csIBM278 ebcdic-sv cp278 cpibm278 278 # EBCDIC Sweden
ibm-280 IBM280 { IANA } ebcdic-cp-it csIBM280 cp280 cpibm280 280 # EBCDIC Italy
ibm-284 IBM284 { IANA } ebcdic-cp-es csIBM284 cp284 cpibm284 284 # EBCDIC Spain
ibm-285 IBM285 { IANA } ebcdic-cp-gb csIBM285 ebcdic-gb cp285 cpibm285 285 # EBCDIC UK Ireland
ibm-290 IBM290 { IANA } EBCDIC-JP-kana csIBM290 cp290 # host SBCS (Katakana)
ibm-297 IBM297 { IANA } ebcdic-cp-fr csIBM297 cp297 cpibm297 297 # EBCDIC France
ibm-420 IBM420 { IANA } ebcdic-cp-ar1 csIBM420 cp420 420
ibm-424 IBM424 { IANA } ebcdic-cp-he csIBM424 cp424 424
ibm-500 IBM500 { IANA } cpibm500 csIBM500 cp500 ebcdic-cp-be ebcdic-cp-ch 500 # EBCDIC International Latin1
ibm-803 cp803 # Old EBCDIC Hebrew
ibm-834 cp834 # Korean DBCS Host
ibm-835 cp835 # DBCS T-Ch Host
ibm-870_P100-2000 IBM870 { IANA } ibm-870 CP870 ibm-870_STD ebcdic-cp-roece ebcdic-cp-yu csIBM870
ibm-871 IBM871 { IANA } ebcdic-cp-is csIBM871 cpibm871 cp871 871 # EBCDIC Iceland
ibm-875_P100-2000 ibm-875 cp875 ibm-875 875 ibm-875_STD
ibm-918_P100-2000 IBM918 { IANA } ibm-918 CP918 ibm-918_VPUA ebcdic-cp-ar2 csIBM918
ibm-918_X100-2000 ibm-918_STD
ibm-930 cp930 cpibm930 930 # Japan EBCDIC MIXED
ibm-933 cp933 cpibm933 933 # Korea EBCDIC MIXED
ibm-935 cp935 cpibm935 935 # China EBCDIC MIXED
ibm-937 cp937 cpibm937 937 # Taiwan EBCDIC MIXED
ibm-939 cp939 939 # Host MBCS (Latin-Kanji) EBCDIC
ibm-1025_P100-2000 ibm-1025 ibm-1025_STD
ibm-1026_P100-2000 IBM1026 { IANA } ibm-1026 CP1026 csIBM1026 ibm-1026_STD
ibm-1047 cpibm1047 # EBCDIC Open systems Latin1
ibm-1097_P100-2000 ibm-1097 ibm-1097_VPUA
ibm-1097_X100-2000 ibm-1097_STD
ibm-1112_P100-2000 ibm-1112 cp1112 1112 ibm-1112_STD
ibm-1122_P100-2000 ibm-1122 cp1122 ibm-1122 1122 ibm-1122_STD
ibm-1130_P100-2000 ibm-1130 ibm-1130_STD
ibm-1132_P100-2000 ibm-1132 ibm-1132_STD
ibm-1137_P100-2000 ibm-1137 ibm-1137_STD
ibm-1388_P103-2001 ibm-1388 # S-Ch DBCS-Host Data GBK mixed MBCS
ibm-9030_P100-2000 ibm-9030 ibm-9030_STD
ibm-37 IBM037 { IANA* } ibm-037 cpibm37 ebcdic-cp-us ebcdic-cp-ca ebcdic-cp-wt ebcdic-cp-nl csIBM037 cp37 cp037 037 # EBCDIC US
ibm-273 IBM273 { IANA* } csIBM273 ebcdic-de cp273 cpibm273 273 # EBCDIC Germanay, Austria...
ibm-277 IBM277 { IANA* } EBCDIC-CP-DK EBCDIC-CP-NO csIBM277 ebcdic-dk cp277 cpibm277 277 # EBCDIC Denmark...
ibm-278 IBM278 { IANA* } ebcdic-cp-fi ebcdic-cp-se csIBM278 ebcdic-sv cp278 cpibm278 278 # EBCDIC Sweden
ibm-280 IBM280 { IANA* } ebcdic-cp-it csIBM280 cp280 cpibm280 280 # EBCDIC Italy
ibm-284 IBM284 { IANA* } ebcdic-cp-es csIBM284 cp284 cpibm284 284 # EBCDIC Spain
ibm-285 IBM285 { IANA* } ebcdic-cp-gb csIBM285 ebcdic-gb cp285 cpibm285 285 # EBCDIC UK Ireland
ibm-290 IBM290 { IANA* } EBCDIC-JP-kana csIBM290 cp290 # host SBCS (Katakana)
ibm-297 IBM297 { IANA* } ebcdic-cp-fr csIBM297 cp297 cpibm297 297 # EBCDIC France
ibm-420 IBM420 { IANA* } ebcdic-cp-ar1 csIBM420 cp420 420
ibm-424 IBM424 { IANA* } ebcdic-cp-he csIBM424 cp424 424
ibm-500 IBM500 { IANA* } cpibm500 csIBM500 cp500 ebcdic-cp-be ebcdic-cp-ch 500 # EBCDIC International Latin1
ibm-803 cp803 # Old EBCDIC Hebrew
ibm-834 cp834 # Korean DBCS Host
ibm-835 cp835 # DBCS T-Ch Host
ibm-870_P100-2000 IBM870 { IANA* } ibm-870 CP870 ibm-870_STD ebcdic-cp-roece ebcdic-cp-yu csIBM870
ibm-871 IBM871 { IANA* } ebcdic-cp-is csIBM871 cpibm871 cp871 871 # EBCDIC Iceland
ibm-875_P100-2000 ibm-875 cp875 875 ibm-875_STD
ibm-918_P100-2000 IBM918 { IANA* } ibm-918 CP918 ibm-918_VPUA ebcdic-cp-ar2 csIBM918
ibm-918_X100-2000 ibm-918_STD
ibm-930 cp930 cpibm930 930 # Japan EBCDIC MIXED
ibm-933 cp933 cpibm933 933 # Korea EBCDIC MIXED
ibm-935 cp935 cpibm935 935 # China EBCDIC MIXED
ibm-937 cp937 cpibm937 937 # Taiwan EBCDIC MIXED
ibm-939 cp939 939 # Host MBCS (Latin-Kanji) EBCDIC
ibm-1025_P100-2000 ibm-1025 ibm-1025_STD
ibm-1026_P100-2000 IBM1026 { IANA* } ibm-1026 CP1026 csIBM1026 ibm-1026_STD
ibm-1047 cpibm1047 # EBCDIC Open systems Latin1
ibm-1097_P100-2000 ibm-1097 ibm-1097_VPUA
ibm-1097_X100-2000 ibm-1097_STD
ibm-1112_P100-2000 ibm-1112 cp1112 1112 ibm-1112_STD
ibm-1122_P100-2000 ibm-1122 cp1122 1122 ibm-1122_STD
ibm-1130_P100-2000 ibm-1130 ibm-1130_STD
ibm-1132_P100-2000 ibm-1132 ibm-1132_STD
ibm-1137_P100-2000 ibm-1137 ibm-1137_STD
ibm-1388_P103-2001 ibm-1388 # S-Ch DBCS-Host Data GBK mixed MBCS
ibm-9030_P100-2000 ibm-9030 ibm-9030_STD
#ibm-1046 # PC Arabic without EURO
#ibm-1046 # PC Arabic without EURO
# with Euro
ibm-1123 cpibm1123 # EBCDIC Cyrillic Ukraine
ibm-1140 cpibm1140 IBM01140 { IANA } # EBCDIC US...
ibm-1141 cpibm1141 IBM01141 { IANA } # EBCDIC Germanay, Austria...
ibm-1142 cpibm1142 IBM01142 { IANA } # EBCDIC Denmark...
ibm-1143 cpibm1143 IBM01143 { IANA } # EBCDIC Sweden
ibm-1144 cpibm1144 # EBCDIC Italy
ibm-1145 cpibm1145 # EBCDIC Spain
ibm-1146 cpibm1146 # EBCDIC UK Ireland
ibm-1147 cpibm1147 # EBCDIC France
ibm-1148 cpibm1148 # EBCDIC International Latin1
ibm-1149 cpibm1149 ebcdic-is # EBCDIC Iceland
ibm-1153 cpibm1153 # EBCDIC latin 2
ibm-1154 cp1025 cpibm1154 # EBCDIC Cyrillic Multilingual
ibm-1155 cpibm1155 # EBCDIC Turkey
ibm-1156 cpibm1156 # EBCDIC Baltic Multilingual
ibm-1157 cpibm1157 # EBCDIC Estonia
ibm-1158 cp1123 cpibm1158 1123 # EBCDIC Cyrillic Ukraine
ibm-1159 cp28709 # SBCS T-Ch Host
ibm-1160 cp9030 cpibm1160 # EBCDIC Thailand
ibm-1164 cp1130 cpibm1164 # EBCDIC Viet Nam
ibm-1364_P110-2000 ibm-1364_VPUA ibm-1364 cp1364 # Korean Host Mixed
ibm-1371 cpibm1371 # Taiwan EBCDIC MIXED
ibm-1390 cpibm1390 # Japan EBCDIC MIXED
ibm-1399 # Host MBCS (Latin-Kanji)
ibm-4899 cpibm4899 # Old EBCDIC Hebrew
ibm-4971 cpibm4971 # EBCDIC Greek
ibm-5123 cp1027 # Host Roman Jis
ibm-8482 # host SBCS (Katakana)
ibm-9027 # DBCS T-Ch Host
ibm-12712 cpibm12712 ebcdic-he # EBCDIC Hebrew (new sheqel, control charaters update)
ibm-16684 cp300 # Jis + Roman Jis Host
ibm-16804 cpibm16804 ebcdic-ar # EBCDIC Arabic
ibm-1123 cpibm1123 # EBCDIC Cyrillic Ukraine
ibm-1140 cpibm1140 IBM01140 { IANA* } # EBCDIC US...
ibm-1141 cpibm1141 IBM01141 { IANA* } # EBCDIC Germanay, Austria...
ibm-1142 cpibm1142 IBM01142 { IANA* } # EBCDIC Denmark...
ibm-1143 cpibm1143 IBM01143 { IANA* } # EBCDIC Sweden
ibm-1144 cpibm1144 # EBCDIC Italy
ibm-1145 cpibm1145 # EBCDIC Spain
ibm-1146 cpibm1146 # EBCDIC UK Ireland
ibm-1147 cpibm1147 # EBCDIC France
ibm-1148 cpibm1148 # EBCDIC International Latin1
ibm-1149 cpibm1149 ebcdic-is # EBCDIC Iceland
ibm-1153 cpibm1153 # EBCDIC latin 2
ibm-1154 cp1025 cpibm1154 # EBCDIC Cyrillic Multilingual
ibm-1155 cpibm1155 # EBCDIC Turkey
ibm-1156 cpibm1156 # EBCDIC Baltic Multilingual
ibm-1157 cpibm1157 # EBCDIC Estonia
ibm-1158 cp1123 cpibm1158 1123 # EBCDIC Cyrillic Ukraine
ibm-1159 cp28709 # SBCS T-Ch Host
ibm-1160 cp9030 cpibm1160 # EBCDIC Thailand
ibm-1164 cp1130 cpibm1164 # EBCDIC Viet Nam
ibm-1364_P110-2000 ibm-1364_VPUA ibm-1364 cp1364 # Korean Host Mixed
ibm-1371 cpibm1371 # Taiwan EBCDIC MIXED
ibm-1390 cpibm1390 # Japan EBCDIC MIXED
ibm-1399 # Host MBCS (Latin-Kanji)
ibm-4899 cpibm4899 # Old EBCDIC Hebrew
ibm-4971 cpibm4971 # EBCDIC Greek
ibm-5123 cp1027 # Host Roman Jis
ibm-8482 # host SBCS (Katakana)
ibm-9027 # DBCS T-Ch Host
ibm-12712 cpibm12712 ebcdic-he # EBCDIC Hebrew (new sheqel, control charaters update)
ibm-16684 cp300 # Jis + Roman Jis Host
ibm-16804 cpibm16804 ebcdic-ar # EBCDIC Arabic
# unsupported IANA names
# ebcdic-it csEBCDICIT
# ebcdic-es csEBCDICES
# csEBCDICFR ebcdic-fr
# ibm-274 IBM274 { IANA } cp274 csIBM274 ebcdic-be
# ibm-870 IBM870 { IANA } ebcdic-cp-roece ebcdic-cp-yu csIBM870 cp870 870
# ibm-274 IBM274 { IANA* } cp274 csIBM274 ebcdic-be
# ibm-870 IBM870 { IANA* } ebcdic-cp-roece ebcdic-cp-yu csIBM870 cp870 870
# EBCDIC codepages for S/390, with LF and NL codes swapped
ebcdic-xml-us
# without Euro
ibm-37-s390 ibm037-s390 # EBCDIC US
ibm-1047-s390 # EBCDIC for S/390 Open Edition
ibm-37-s390 ibm037-s390 # EBCDIC US
ibm-1047-s390 # EBCDIC for S/390 Open Edition
# with Euro
ibm-1140-s390 # EBCDIC US
ibm-1142-s390 # EBCDIC Denmark
ibm-1143-s390 # EBCDIC Sweden
ibm-1144-s390 # EBCDIC Italy
ibm-1145-s390 # EBCDIC Spain
ibm-1146-s390 # EBCDIC UK Ireland
ibm-1147-s390 # EBCDIC France
ibm-1148-s390 # EBCDIC International Latin1
ibm-1149-s390 # EBCDIC Iceland
ibm-1153-s390 # EBCDIC latin 2
ibm-12712-s390 # EBCDIC Hebrew
ibm-16804-s390 # EBCDIC Arabic
ibm-1140-s390 # EBCDIC US
ibm-1142-s390 # EBCDIC Denmark
ibm-1143-s390 # EBCDIC Sweden
ibm-1144-s390 # EBCDIC Italy
ibm-1145-s390 # EBCDIC Spain
ibm-1146-s390 # EBCDIC UK Ireland
ibm-1147-s390 # EBCDIC France
ibm-1148-s390 # EBCDIC International Latin1
ibm-1149-s390 # EBCDIC Iceland
ibm-1153-s390 # EBCDIC latin 2
ibm-12712-s390 # EBCDIC Hebrew
ibm-16804-s390 # EBCDIC Arabic
# GB 18030 is partly algorithmic, using the MBCS converter
gb18030 { IANA } ibm-1392
gb18030 { IANA* } ibm-1392

View File

@ -240,6 +240,8 @@ static void TestConvert()
/*Testing ucnv_openU()*/
{
UChar converterName[]={ 0x0069, 0x0062, 0x006d, 0x002d, 0x0039, 0x0034, 0x0033, 0x0000}; /*ibm-943*/
UChar firstSortedName[]={ 0x0021, 0x0000}; /* ! */
UChar lastSortedName[]={ 0x007E, 0x0000}; /* ~ */
const char *illegalNameChars={ "ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943"};
UChar illegalName[100];
UConverter *converter=NULL;
@ -269,8 +271,20 @@ static void TestConvert()
if(!(err==U_ILLEGAL_ARGUMENT_ERROR)){
log_err("FAILURE! ucnv_openU(illegalName, err) is expected to fail\n");
}
err=U_ZERO_ERROR;
ucnv_openU(firstSortedName, &err);
if(err!=U_FILE_ACCESS_ERROR){
log_err("FAILURE! ucnv_openU(firstSortedName, err) is expected to fail\n");
}
err=U_ZERO_ERROR;
ucnv_openU(lastSortedName, &err);
if(err!=U_FILE_ACCESS_ERROR){
log_err("FAILURE! ucnv_openU(lastSortedName, err) is expected to fail\n");
}
err=U_ZERO_ERROR;
}
log_verbose("Testing ucnv_open() with converter name greater than 7 characters\n");
{
@ -455,6 +469,11 @@ static void TestConvert()
char* index = NULL;
strcpy(ucs_file_name, loadTestData(&err));
if(U_FAILURE(err)){
log_err("Couldn't get the test data directory... Exiting...Error:%s\n", u_errorName(err));
return;
}
index=strrchr(ucs_file_name,(char)U_FILE_SEP_CHAR);
if((unsigned int)(index-ucs_file_name) != (strlen(ucs_file_name)-1)){
@ -462,11 +481,6 @@ static void TestConvert()
}
strcat(ucs_file_name,".."U_FILE_SEP_STRING);
if(U_FAILURE(err)){
log_err("Couldn't get the test data directory... Exiting...Error:%s\n", u_errorName(err));
return;
}
strcat(ucs_file_name, CodePagesTestFiles[codepage_index]);
ucs_file_in = fopen(ucs_file_name,"rb");

View File

@ -67,7 +67,7 @@ static void TestUDataOpen(){
UErrorCode status=U_ZERO_ERROR;
const char* memMap[][2]={
{"tz", "dat"},
{"cnvalias", "dat"},
{"cnvalias", "icu"},
{"unames", "dat"},
{"ibm-1141", "cnv"}
};
@ -388,7 +388,7 @@ isAcceptable1(void *context,
pInfo->dataFormat[1]==0x76 &&
pInfo->dataFormat[2]==0x41 &&
pInfo->dataFormat[3]==0x6c &&
pInfo->formatVersion[0]==2 )
pInfo->formatVersion[0]==3 )
{
log_verbose("The data from \"%s.%s\" IS acceptable using the verifing function isAcceptable1()\n", name, type);
return TRUE;
@ -473,7 +473,7 @@ static void TestUDataOpenChoiceDemo1() {
strcat(strcpy(testPath, u_getDataDirectory()), "testdata");
result=udata_openChoice(NULL, type, name[0], isAcceptable1, NULL, &status);
result=udata_openChoice(NULL, "icu", name[0], isAcceptable1, NULL, &status);
if(U_FAILURE(status)){
log_err("FAIL: udata_openChoice() failed name=%s, type=%s, \n errorcode=%s\n", name[0], type, myErrorName(status));
} else {
@ -624,7 +624,7 @@ static void TestUDataGetInfo() {
log_verbose("Testing udata_getInfo() for cnvalias.dat\n");
result=udata_open(NULL, type, name, &status);
result=udata_open(NULL, "icu", name, &status);
if(U_FAILURE(status)){
log_err("FAIL: udata_open() failed for path = NULL, name=%s, type=%s, \n errorcode=%s\n", path, name, type, myErrorName(status));
return;
@ -677,32 +677,34 @@ static void TestUDataGetInfo() {
static void TestUDataGetMemory() {
UDataMemory *result;
const uint16_t *table=NULL;
const int32_t *table=NULL;
uint16_t* intValue=0;
UErrorCode status=U_ZERO_ERROR;
const char* name="cnvalias";
const char* type="dat";
const char* type;
const char* name2="test";
char* testPath=(char*)malloc(sizeof(char) * (strlen(u_getDataDirectory()) + strlen("testdata") +1 ) );
char* testPath=(char*)malloc(sizeof(char) * (strlen(u_getDataDirectory()) + strlen("testdata") +1 ) );
strcat(strcpy(testPath, u_getDataDirectory()), "testdata");
strcat(strcpy(testPath, u_getDataDirectory()), "testdata");
type="icu";
log_verbose("Testing udata_getMemory for \"cnvalias.dat()\"\n");
result=udata_openChoice(NULL, type, name, isAcceptable1, NULL, &status);
if(U_FAILURE(status)){
log_err("FAIL: udata_openChoice() failed for name=%s, type=%s, \n errorcode=%s\n", name, type, myErrorName(status));
return;
}
table=(const uint16_t *)udata_getMemory(result);
table=(const uint32_t *)udata_getMemory(result);
/* The alias table may list more converters than what's actually available now. [grhoten] */
if(ucnv_countAvailable() > table[1+2*(*table)]) /*???*/
if(ucnv_countAvailable() > table[1]) /*???*/
log_err("FAIL: udata_getMemory() failed ucnv_countAvailable returned = %d, expected = %d\n", ucnv_countAvailable(), table[1+2*(*table)]);
udata_close(result);
type="dat";
log_verbose("Testing udata_getMemory for \"test.dat\"()\n");
result=udata_openChoice(testPath, type, name2, isAcceptable3, NULL, &status);
if(U_FAILURE(status)){

File diff suppressed because it is too large Load Diff