ICU-868 New alias scheme.
X-SVN-Rev: 8975
This commit is contained in:
parent
a79775fe45
commit
70debd215f
@ -1724,7 +1724,9 @@ _uErrorInfoName[U_ERROR_WARNING_LIMIT-U_ERROR_WARNING_START]={
|
||||
"U_USING_DEFAULT_WARNING",
|
||||
"U_SAFECLONE_ALLOCATED_WARNING",
|
||||
"U_STATE_OLD_WARNING",
|
||||
"U_STRING_NOT_TERMINATED_WARNING"
|
||||
"U_STRING_NOT_TERMINATED_WARNING",
|
||||
"U_SORT_KEY_TOO_SHORT_WARNING",
|
||||
"U_AMBIGUOUS_ALIAS_WARNING"
|
||||
};
|
||||
|
||||
static const char * const
|
||||
|
@ -283,8 +283,7 @@ ucnv_countAvailable ()
|
||||
U_CAPI uint16_t U_EXPORT2
|
||||
ucnv_countAliases(const char *alias, UErrorCode *pErrorCode)
|
||||
{
|
||||
const char *p;
|
||||
return ucnv_io_getAliases(alias, &p, pErrorCode);
|
||||
return ucnv_io_countAliases(alias, pErrorCode);
|
||||
}
|
||||
|
||||
|
||||
@ -297,14 +296,7 @@ ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode)
|
||||
U_CAPI void U_EXPORT2
|
||||
ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode)
|
||||
{
|
||||
const char *p;
|
||||
uint16_t count=ucnv_io_getAliases(alias, &p, pErrorCode);
|
||||
while(count>0) {
|
||||
*aliases++=p;
|
||||
/* skip a name, first the canonical converter name */
|
||||
p+=uprv_strlen(p)+1;
|
||||
--count;
|
||||
}
|
||||
ucnv_io_getAliases(alias, 0, aliases, pErrorCode);
|
||||
}
|
||||
|
||||
U_CAPI uint16_t U_EXPORT2
|
||||
|
@ -31,67 +31,140 @@
|
||||
#include "unicode/udata.h"
|
||||
#include "ucln_cmn.h"
|
||||
|
||||
/* Format of cnvalias.dat ------------------------------------------------------
|
||||
/* Format of cnvalias.icu -----------------------------------------------------
|
||||
*
|
||||
* cnvalias.dat is a binary, memory-mappable form of convrtrs.txt .
|
||||
* It contains two sorted tables and a block of zero-terminated strings.
|
||||
* Each table is preceded by the number of table entries.
|
||||
* cnvalias.dat is a binary, memory-mappable form of convrtrs.txt.
|
||||
* This binary form contains several tables. All indexes are to uint16_t
|
||||
* units, and not to the bytes (uint8_t units). Addressing everything on
|
||||
* 16-bit boundaries allows us to store more information with small index
|
||||
* numbers, which are also 16-bit in size. The majority of the table (except
|
||||
* the string table) are 16-bit numbers.
|
||||
*
|
||||
* The first table maps from aliases to converter indexes.
|
||||
* The converter names themselves are listed as aliases in this table.
|
||||
* Each entry in this table has an offset to the alias and
|
||||
* an index of the converter in the converter table.
|
||||
* First there is the size of the Table of Contents (TOC). The TOC
|
||||
* entries contain the size of each section. In order to find the offset
|
||||
* you just need to sum up the previous offsets.
|
||||
*
|
||||
* The second table lists only the converters themselves.
|
||||
* Each entry in this table has an offset to the converter name and
|
||||
* the number of aliases, including the converter itself.
|
||||
* A count of 1 means that there is no alias, only the converter name.
|
||||
* 1) This section contains a list of converters. This list contains indexes
|
||||
* into the string table for the converter name. The index of this list is
|
||||
* also used by other sections, which are mentioned later on.
|
||||
*
|
||||
* In the block of strings after the tables, each converter name is directly
|
||||
* followed by its aliases. All offsets to strings are offsets from the
|
||||
* beginning of the data.
|
||||
* 2) This section contains a list of tags. This list contains indexes
|
||||
* into the string table for the tag name. The index of this list is
|
||||
* also used by other sections, which are mentioned later on.
|
||||
*
|
||||
* More formal file data structure (data format 2.1):
|
||||
* 3) This section contains a list of sorted list of unique aliases. This
|
||||
* list contains indexes into the string table for the alias name. The
|
||||
* index of this list is also used by other sections, which are mentioned
|
||||
* later on.
|
||||
*
|
||||
* uint16_t aliasCount;
|
||||
* uint16_t aliasOffsets[aliasCount];
|
||||
* uint16_t converterIndexes[aliasCount];
|
||||
* 4) This section contains a list of mapped converter names. Consider this
|
||||
* as a table that maps the 3rd section to the 1st section. This list contains
|
||||
* indexes into the 1st section. The index of this list is the same index in
|
||||
* the 3rd section. There is also some extra information in the high bits of
|
||||
* each converter index in this table. Currently it's only used to say that
|
||||
* an alias mapped to this converter is ambiguous. See UCNV_CONVERTER_INDEX_MASK
|
||||
* and UCNV_AMBIGUOUS_ALIAS_MAP_BIT for more information. This section is
|
||||
* the predigested form of the 5th section so that an alias lookup can be fast.
|
||||
*
|
||||
* 5) This section contains a 2D array with indexes to the 6th section. This
|
||||
* section is the full form of all alias mappings. The column index is the
|
||||
* index into the converter list (column header). The row index is the index
|
||||
* to tag list (row header). This 2D array is the top part a 3D array. The
|
||||
* third dimension is in the 6th section.
|
||||
*
|
||||
* uint16_t converterCount;
|
||||
* struct {
|
||||
* uint16_t converterOffset;
|
||||
* uint16_t aliasCount;
|
||||
* } converters[converterCount];
|
||||
* 6) This is blob of variable length arrays. Each array starts with a size,
|
||||
* and is followed by indexes to alias names in the string table. This is
|
||||
* the third dimension to the section 5. No other section should be referencing
|
||||
* this section.
|
||||
*
|
||||
* uint16_t tagCount;
|
||||
* uint16_t taggedAliasesOffsets[tagCount][converterCount];
|
||||
* char tags[] = { "Tag0\Tag1\0..." };
|
||||
* 7) Reserved at this time (There is no information). This _usually_ has a
|
||||
* size of 0. Future versions may add more information here.
|
||||
*
|
||||
* char strings[]={
|
||||
* "Converter0\0Alias1\0Alias2\0...Converter1\0Converter2\0Alias0\Alias1\0..."
|
||||
* };
|
||||
* 8) This is the string table. All strings are indexed on an even address.
|
||||
* There are two reasons for this. First many chip architectures locate strings
|
||||
* faster on even address boundaries. Second, since all indexes are 16-bit
|
||||
* numbers, this string table can be 128KB in size instead of 64KB when we
|
||||
* only have strings starting on an even address.
|
||||
*
|
||||
* The code included here can read versions 2 and 2.1 of the data format.
|
||||
* Version 2 does not have tag information, but since the code never refers
|
||||
* to strings[] by its base offset, it's okay.
|
||||
*
|
||||
* Here is the concept of section 5 and 6. It's a 3D cube. Each tag
|
||||
* has a unique alias among all converters. That same alias can
|
||||
* be mentioned in other standards on different converters,
|
||||
* but only one alias per tag can be unique.
|
||||
*
|
||||
*
|
||||
* Converter Names (Usually in TR22 form)
|
||||
* -------------------------------------------.
|
||||
* T / /|
|
||||
* a / / |
|
||||
* g / / |
|
||||
* s / / |
|
||||
* / / |
|
||||
* ------------------------------------------/ |
|
||||
* A | | |
|
||||
* l | | |
|
||||
* i | | /
|
||||
* a | | /
|
||||
* s | | /
|
||||
* e | | /
|
||||
* s | |/
|
||||
* -------------------------------------------
|
||||
*
|
||||
*
|
||||
*
|
||||
* Here is what it really looks like. It's like swiss cheese.
|
||||
* There are holes. Some converters aren't recognized by
|
||||
* a standard, or they are really old converters that the
|
||||
* standard doesn't recognize anymore.
|
||||
*
|
||||
* Converter Names (Usually in TR22 form)
|
||||
* -------------------------------------------.
|
||||
* T /##########################################/|
|
||||
* a / # # /#
|
||||
* g / # ## ## ### # ### ### ### #/
|
||||
* s / # ##### #### ## ## #/#
|
||||
* / ### # # ## # # # ### # # #/##
|
||||
* ------------------------------------------/# #
|
||||
* A |### # # ## # # # ### # # #|# #
|
||||
* l |# # # # # ## # #|# #
|
||||
* i |# # # # # # #|#
|
||||
* a |# #|#
|
||||
* s | #|#
|
||||
* e
|
||||
* s
|
||||
*
|
||||
*/
|
||||
|
||||
static const char DATA_NAME[] = "cnvalias";
|
||||
static const char DATA_TYPE[] = "dat";
|
||||
static const char DATA_TYPE[] = "icu";
|
||||
|
||||
static UDataMemory *aliasData=NULL;
|
||||
static const uint16_t *aliasTable=NULL;
|
||||
|
||||
static const uint16_t *converterList = NULL;
|
||||
static const uint16_t *tagList = NULL;
|
||||
static const uint16_t *aliasList = NULL;
|
||||
static const uint16_t *untaggedConvArray = NULL;
|
||||
static const uint16_t *taggedAliasArray = NULL;
|
||||
static const uint16_t *taggedAliasLists = NULL;
|
||||
static const uint16_t *stringTable = NULL;
|
||||
|
||||
static uint32_t converterListNum;
|
||||
static uint32_t tagListNum;
|
||||
static uint32_t aliasListNum;
|
||||
static uint32_t untaggedConvArraySize;
|
||||
static uint32_t taggedAliasArraySize;
|
||||
static uint32_t taggedAliasListsSize;
|
||||
static uint32_t stringTableSize;
|
||||
|
||||
static const char **availableConverters = NULL;
|
||||
static uint16_t availableConverterCount = 0;
|
||||
|
||||
static const uint16_t *converterTable = NULL;
|
||||
static const uint16_t *tagTable = NULL;
|
||||
|
||||
static char defaultConverterNameBuffer[100];
|
||||
static char defaultConverterNameBuffer[UCNV_MAX_CONVERTER_NAME_LENGTH + 1]; /* +1 for NULL */
|
||||
static const char *defaultConverterName = NULL;
|
||||
|
||||
#define GET_STRING(idx) (const char *)(stringTable + (idx))
|
||||
#define NUM_RESERVED_TAGS 2
|
||||
|
||||
static UBool
|
||||
isAcceptable(void *context,
|
||||
const char *type, const char *name,
|
||||
@ -104,7 +177,7 @@ isAcceptable(void *context,
|
||||
pInfo->dataFormat[1]==0x76 &&
|
||||
pInfo->dataFormat[2]==0x41 &&
|
||||
pInfo->dataFormat[3]==0x6c &&
|
||||
pInfo->formatVersion[0]==2);
|
||||
pInfo->formatVersion[0]==3);
|
||||
}
|
||||
|
||||
static UBool
|
||||
@ -115,32 +188,64 @@ haveAliasData(UErrorCode *pErrorCode) {
|
||||
|
||||
/* load converter alias data from file if necessary */
|
||||
if(aliasData==NULL) {
|
||||
UDataMemory *data;
|
||||
UDataInfo info;
|
||||
const uint16_t *table=NULL;
|
||||
UDataMemory *data = NULL;
|
||||
const uint16_t *table = NULL;
|
||||
uint32_t tableStart;
|
||||
uint32_t currOffset;
|
||||
uint32_t reservedSize1;
|
||||
|
||||
/* open the data outside the mutex block */
|
||||
data=udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, pErrorCode);
|
||||
data = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
table=(const uint16_t *)udata_getMemory(data);
|
||||
info.size=sizeof(UDataInfo);
|
||||
udata_getInfo(data, &info);
|
||||
table = (const uint16_t *)udata_getMemory(data);
|
||||
|
||||
tableStart = ((const uint32_t *)(table))[0];
|
||||
if (tableStart < 8) {
|
||||
*pErrorCode = U_INVALID_FORMAT_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* in the mutex block, set the data for this process */
|
||||
umtx_lock(NULL);
|
||||
if(aliasData==NULL) {
|
||||
aliasData=data;
|
||||
aliasData = data;
|
||||
data=NULL;
|
||||
aliasTable=table;
|
||||
table=NULL;
|
||||
converterTable = aliasTable + 1 + 2 * *aliasTable;
|
||||
|
||||
if (info.formatVersion[0] == 2 && info.formatVersion[1] > 0) {
|
||||
tagTable = converterTable + 1 + 2 * *converterTable;
|
||||
}
|
||||
converterListNum = ((const uint32_t *)(table))[1];
|
||||
tagListNum = ((const uint32_t *)(table))[2];
|
||||
aliasListNum = ((const uint32_t *)(table))[3];
|
||||
untaggedConvArraySize = ((const uint32_t *)(table))[4];
|
||||
taggedAliasArraySize = ((const uint32_t *)(table))[5];
|
||||
taggedAliasListsSize = ((const uint32_t *)(table))[6];
|
||||
reservedSize1 = ((const uint32_t *)(table))[7]; /* reserved */
|
||||
stringTableSize = ((const uint32_t *)(table))[8];
|
||||
|
||||
currOffset = tableStart * (sizeof(uint32_t)/sizeof(uint16_t)) + (sizeof(uint32_t)/sizeof(uint16_t));
|
||||
converterList = table + currOffset;
|
||||
|
||||
currOffset += converterListNum;
|
||||
tagList = table + currOffset;
|
||||
|
||||
currOffset += tagListNum;
|
||||
aliasList = table + currOffset;
|
||||
|
||||
currOffset += aliasListNum;
|
||||
untaggedConvArray = table + currOffset;
|
||||
|
||||
currOffset += untaggedConvArraySize;
|
||||
taggedAliasArray = table + currOffset;
|
||||
|
||||
/* aliasLists is a 1's based array, but it has a padding character */
|
||||
currOffset += taggedAliasArraySize;
|
||||
taggedAliasLists = table + currOffset;
|
||||
|
||||
currOffset += taggedAliasListsSize;
|
||||
/* reserved */
|
||||
|
||||
currOffset += reservedSize1;
|
||||
stringTable = table + currOffset;
|
||||
|
||||
}
|
||||
umtx_unlock(NULL);
|
||||
|
||||
@ -175,49 +280,40 @@ ucnv_io_cleanup()
|
||||
|
||||
ucnv_io_flushAvailableConverterCache();
|
||||
|
||||
aliasData = NULL;
|
||||
aliasTable = NULL;
|
||||
converterListNum = 0;
|
||||
tagListNum = 0;
|
||||
aliasListNum = 0;
|
||||
untaggedConvArraySize = 0;
|
||||
taggedAliasArraySize = 0;
|
||||
taggedAliasListsSize = 0;
|
||||
stringTableSize = 0;
|
||||
|
||||
converterTable = NULL;
|
||||
tagTable = NULL;
|
||||
converterList = NULL;
|
||||
tagList = NULL;
|
||||
aliasList = NULL;
|
||||
untaggedConvArray = NULL;
|
||||
taggedAliasArray = NULL;
|
||||
taggedAliasLists = NULL;
|
||||
stringTable = NULL;
|
||||
|
||||
defaultConverterName = NULL;
|
||||
defaultConverterNameBuffer[0] = 0;
|
||||
|
||||
return TRUE; /* Everything was cleaned up */
|
||||
}
|
||||
|
||||
|
||||
static int16_t getTagNumber(const char *tagname) {
|
||||
if (tagTable) {
|
||||
int16_t tag, count = (int16_t) *tagTable;
|
||||
const char *tags = (const char *) (tagTable + 1 + count * *converterTable);
|
||||
|
||||
#if 0
|
||||
|
||||
char name[100];
|
||||
int i;
|
||||
|
||||
/* convert the tag name to lowercase to do case-insensitive comparisons */
|
||||
for(i = 0; i < sizeof(name) - 1 && *tagname; ++i) {
|
||||
name[i] = (char)uprv_tolower(*tagname++);
|
||||
}
|
||||
name[i] = 0;
|
||||
|
||||
#else
|
||||
|
||||
const char *name = tagname;
|
||||
|
||||
#endif
|
||||
|
||||
for (tag = 0; count--; ++tag) {
|
||||
if (!uprv_stricmp(name, tags)) {
|
||||
return tag;
|
||||
static uint32_t getTagNumber(const char *tagname) {
|
||||
if (tagList) {
|
||||
uint32_t tagNum;
|
||||
for (tagNum = 0; tagNum < tagListNum; tagNum++) {
|
||||
if (!uprv_stricmp(GET_STRING(tagList[tagNum]), tagname)) {
|
||||
return tagNum;
|
||||
}
|
||||
tags += strlen(tags) + 1;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
return UINT32_MAX;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -240,14 +336,16 @@ static int16_t getTagNumber(const char *tagname) {
|
||||
U_CAPI int U_EXPORT2
|
||||
ucnv_compareNames(const char *name1, const char *name2) {
|
||||
int rc;
|
||||
unsigned char c1, c2;
|
||||
char c1, c2;
|
||||
|
||||
for (;;) {
|
||||
/* Ignore delimiters '-', '_', and ' ' */
|
||||
while ((c1 = (unsigned char)*name1) == '-'
|
||||
|| c1 == '_' || c1 == ' ') ++name1;
|
||||
while ((c2 = (unsigned char)*name2) == '-'
|
||||
|| c2 == '_' || c2 == ' ') ++name2;
|
||||
while ((c1 = *name1) == '-' || c1 == '_' || c1 == ' ') {
|
||||
++name1;
|
||||
}
|
||||
while ((c2 = *name2) == '-' || c2 == '_' || c2 == ' ') {
|
||||
++name2;
|
||||
}
|
||||
|
||||
/* If we reach the ends of both strings then they match */
|
||||
if ((c1|c2)==0) {
|
||||
@ -257,7 +355,7 @@ ucnv_compareNames(const char *name1, const char *name2) {
|
||||
/* Case-insensitive comparison */
|
||||
rc = (int)(unsigned char)uprv_tolower(c1) -
|
||||
(int)(unsigned char)uprv_tolower(c2);
|
||||
if (rc!=0) {
|
||||
if (rc != 0) {
|
||||
return rc;
|
||||
}
|
||||
++name1;
|
||||
@ -267,69 +365,87 @@ ucnv_compareNames(const char *name1, const char *name2) {
|
||||
|
||||
/*
|
||||
* search for an alias
|
||||
* return NULL or a pointer to the converter table entry
|
||||
* return the converter number index for converterList
|
||||
*/
|
||||
static const uint16_t *
|
||||
findAlias(const char *alias) {
|
||||
char name[100];
|
||||
const uint16_t *p=aliasTable;
|
||||
uint16_t i, start, limit;
|
||||
|
||||
limit=*p++;
|
||||
if(limit==0) {
|
||||
/* there are no aliases */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* convert the alias name to lowercase to do case-insensitive comparisons */
|
||||
for(i=0; i<sizeof(name)-1 && *alias!=0; ++i) {
|
||||
name[i]=(char)uprv_tolower(*alias++);
|
||||
}
|
||||
name[i]=0;
|
||||
static uint32_t
|
||||
findConverter(const char *alias, UErrorCode *pErrorCode) {
|
||||
uint32_t mid, start, limit;
|
||||
int result;
|
||||
|
||||
/* do a binary search for the alias */
|
||||
start=0;
|
||||
while(start<limit-1) {
|
||||
i=(uint16_t)((start+limit)/2);
|
||||
if(ucnv_compareNames(name, (const char *)aliasTable+p[i])<0) {
|
||||
limit=i;
|
||||
start = 0;
|
||||
limit = untaggedConvArraySize - 1;
|
||||
mid = limit;
|
||||
|
||||
/* Once mid == 0 we've already checked the 0'th element and we can stop */
|
||||
while (start <= limit && mid != 0) {
|
||||
mid = (uint32_t)((start + limit + 1) / 2); /* +1 is to round properly */
|
||||
result = ucnv_compareNames(alias, GET_STRING(aliasList[mid]));
|
||||
|
||||
if (result < 0) {
|
||||
limit = mid-1;
|
||||
} else if (result > 0) {
|
||||
start = mid+1;
|
||||
} else {
|
||||
start=i;
|
||||
/* Since the gencnval tool folds duplicates into one entry,
|
||||
* this alias in aliasList is unique, but different standards
|
||||
* may map an alias to different converters.
|
||||
*/
|
||||
if (untaggedConvArray[mid] & UCNV_AMBIGUOUS_ALIAS_MAP_BIT) {
|
||||
*pErrorCode = U_AMBIGUOUS_ALIAS_WARNING;
|
||||
}
|
||||
return untaggedConvArray[mid] & UCNV_CONVERTER_INDEX_MASK;
|
||||
}
|
||||
}
|
||||
|
||||
/* did we really find it? */
|
||||
if(ucnv_compareNames(name, (const char *)aliasTable+p[start])==0) {
|
||||
limit=*(p-1); /* aliasCount */
|
||||
p+=limit; /* advance to the second column of the alias table */
|
||||
i=p[start]; /* converter index */
|
||||
return
|
||||
p+limit+ /* beginning of converter table */
|
||||
1+ /* skip its count */
|
||||
2*i; /* go to this converter's entry and return a pointer to it */
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
return UINT32_MAX;
|
||||
}
|
||||
|
||||
U_CFUNC const char *
|
||||
ucnv_io_getConverterName(const char *alias, UErrorCode *pErrorCode) {
|
||||
if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
|
||||
const uint16_t *p=findAlias(alias);
|
||||
if(p!=NULL) {
|
||||
return (const char *)aliasTable+*p;
|
||||
uint32_t convNum = findConverter(alias, pErrorCode);
|
||||
if (convNum < converterListNum) {
|
||||
return GET_STRING(converterList[convNum]);
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
U_CFUNC uint16_t
|
||||
ucnv_io_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode) {
|
||||
ucnv_io_countAliases(const char *alias, UErrorCode *pErrorCode) {
|
||||
if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
|
||||
const uint16_t *p=findAlias(alias);
|
||||
if(p!=NULL) {
|
||||
*aliases=(const char *)aliasTable+*p;
|
||||
return *(p+1);
|
||||
uint32_t convNum = findConverter(alias, pErrorCode);
|
||||
if (convNum < converterListNum) {
|
||||
/* tagListNum - 1 is the ALL tag */
|
||||
int32_t listOffset = taggedAliasArray[(tagListNum - 1)*converterListNum + convNum];
|
||||
|
||||
if (listOffset) {
|
||||
return taggedAliasLists[listOffset];
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
U_CFUNC uint16_t
|
||||
ucnv_io_getAliases(const char *alias, uint16_t start, const char **aliases, UErrorCode *pErrorCode) {
|
||||
if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
|
||||
uint32_t currAlias;
|
||||
uint32_t convNum = findConverter(alias, pErrorCode);
|
||||
if (convNum < converterListNum) {
|
||||
/* tagListNum - 1 is the ALL tag */
|
||||
int32_t listOffset = taggedAliasArray[(tagListNum - 1)*converterListNum + convNum];
|
||||
|
||||
if (listOffset) {
|
||||
uint32_t listCount = taggedAliasLists[listOffset];
|
||||
/* +1 to skip listCount */
|
||||
const uint16_t *currList = taggedAliasLists + listOffset + 1;
|
||||
|
||||
for (currAlias = start; currAlias < listCount; currAlias++) {
|
||||
aliases[currAlias] = GET_STRING(currList[currAlias]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
@ -338,17 +454,20 @@ ucnv_io_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCo
|
||||
U_CFUNC const char *
|
||||
ucnv_io_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) {
|
||||
if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
|
||||
const uint16_t *p=findAlias(alias);
|
||||
if(p!=NULL) {
|
||||
uint16_t count=*(p+1);
|
||||
if(n<count) {
|
||||
const char *aliases=(const char *)aliasTable+*p;
|
||||
while(n>0) {
|
||||
/* skip a name, first the canonical converter name */
|
||||
aliases+=uprv_strlen(aliases)+1;
|
||||
--n;
|
||||
uint32_t convNum = findConverter(alias, pErrorCode);
|
||||
if (convNum < converterListNum) {
|
||||
/* tagListNum - 1 is the ALL tag */
|
||||
int32_t listOffset = taggedAliasArray[(tagListNum - 1)*converterListNum + convNum];
|
||||
|
||||
if (listOffset) {
|
||||
uint32_t listCount = taggedAliasLists[listOffset];
|
||||
/* +1 to skip listCount */
|
||||
const uint16_t *currList = taggedAliasLists + listOffset + 1;
|
||||
|
||||
if (n < listCount) {
|
||||
return GET_STRING(currList[n]);
|
||||
}
|
||||
return aliases;
|
||||
*pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -358,12 +477,8 @@ ucnv_io_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) {
|
||||
U_CFUNC uint16_t
|
||||
ucnv_io_countStandards(UErrorCode *pErrorCode) {
|
||||
if (haveAliasData(pErrorCode)) {
|
||||
if (!tagTable) {
|
||||
*pErrorCode = U_INVALID_FORMAT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return *tagTable;
|
||||
/* Don't include the empty list */
|
||||
return (uint16_t)(tagListNum - NUM_RESERVED_TAGS);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -371,15 +486,11 @@ ucnv_io_countStandards(UErrorCode *pErrorCode) {
|
||||
|
||||
U_CAPI const char * U_EXPORT2
|
||||
ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode) {
|
||||
if (haveAliasData(pErrorCode) && tagTable) {
|
||||
int16_t count = (int16_t) *tagTable;
|
||||
const char *tags = (const char *) (tagTable + 1 + count * *converterTable);
|
||||
|
||||
while (n-- && count--) {
|
||||
tags += strlen(tags) + 1;
|
||||
if (haveAliasData(pErrorCode)) {
|
||||
if (n < tagListNum - NUM_RESERVED_TAGS) {
|
||||
return GET_STRING(tagList[n]);
|
||||
}
|
||||
|
||||
return count ? tags : NULL;
|
||||
*pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
@ -388,18 +499,56 @@ ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode) {
|
||||
U_CFUNC const char * U_EXPORT2
|
||||
ucnv_getStandardName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
|
||||
if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
|
||||
const uint16_t *p = findAlias(alias);
|
||||
if(p != NULL) {
|
||||
int16_t tag = getTagNumber(standard);
|
||||
uint32_t idx;
|
||||
uint32_t listOffset;
|
||||
uint32_t convNum;
|
||||
uint32_t tagNum = getTagNumber(standard);
|
||||
UErrorCode myErr = U_ZERO_ERROR;
|
||||
|
||||
if (tag > -1) {
|
||||
uint16_t offset = tagTable[1 + tag * *converterTable + (p - converterTable) / 2];
|
||||
return offset ? (const char *) aliasTable + offset : NULL;
|
||||
/* Make a quick guess. Hopefully they used a TR22 canonical alias. */
|
||||
convNum = findConverter(alias, &myErr);
|
||||
|
||||
if (tagNum < (tagListNum - NUM_RESERVED_TAGS) && convNum < converterListNum) {
|
||||
if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
|
||||
/* Uh Oh! They used an ambiguous alias.
|
||||
Hopefully the standard knows the alias.
|
||||
This may take a while.
|
||||
*/
|
||||
for (idx = 0; idx < converterListNum; idx++) {
|
||||
listOffset = taggedAliasArray[tagNum*converterListNum + idx];
|
||||
if (listOffset) {
|
||||
uint32_t currAlias;
|
||||
uint32_t listCount = taggedAliasLists[listOffset];
|
||||
/* +1 to skip listCount */
|
||||
const uint16_t *currList = taggedAliasLists + listOffset + 1;
|
||||
for (currAlias = 0; currAlias < listCount; currAlias++) {
|
||||
if (currList[currAlias]
|
||||
&& ucnv_compareNames(alias, GET_STRING(currList[currAlias]))==0)
|
||||
{
|
||||
if (currList[0]) {
|
||||
return GET_STRING(currList[0]);
|
||||
}
|
||||
else {
|
||||
/* Someone screwed up the alias table. */
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/* The standard doesn't know about the alias */
|
||||
*pErrorCode = U_AMBIGUOUS_ALIAS_WARNING;
|
||||
}
|
||||
listOffset = taggedAliasArray[tagNum*converterListNum + convNum];
|
||||
if (listOffset && taggedAliasLists[listOffset + 1]) {
|
||||
return GET_STRING(taggedAliasLists[listOffset + 1]);
|
||||
}
|
||||
/* else no default name */
|
||||
}
|
||||
/* else converter or tag not found */
|
||||
}
|
||||
|
||||
return NULL;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void
|
||||
@ -413,41 +562,52 @@ ucnv_io_flushAvailableConverterCache() {
|
||||
availableConverterCount = 0;
|
||||
}
|
||||
|
||||
static void ucnv_io_loadAvailableConverterList(void) {
|
||||
uint16_t idx = 0;
|
||||
uint16_t localConverterCount = 0;
|
||||
UErrorCode status;
|
||||
char *converterName;
|
||||
|
||||
/* We can't have more than "*converterTable" converters to open */
|
||||
char **localConverterList = (char **) uprv_malloc(*converterTable * sizeof(char*));
|
||||
|
||||
for (; idx < *converterTable; idx++) {
|
||||
status = U_ZERO_ERROR;
|
||||
converterName = (char *)aliasTable+converterTable[1+2*idx];
|
||||
ucnv_close(ucnv_open(converterName, &status));
|
||||
if (U_SUCCESS(status)) {
|
||||
localConverterList[localConverterCount++] = converterName;
|
||||
}
|
||||
}
|
||||
|
||||
umtx_lock(NULL);
|
||||
static UBool haveAvailableConverterList(UErrorCode *pErrorCode) {
|
||||
if (availableConverters == NULL) {
|
||||
availableConverters = (const char **)localConverterList;
|
||||
availableConverterCount = localConverterCount;
|
||||
uint16_t idx;
|
||||
uint16_t localConverterCount;
|
||||
UErrorCode status;
|
||||
const char *converterName;
|
||||
const char **localConverterList;
|
||||
|
||||
if (!haveAliasData(pErrorCode)) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* We can't have more than "*converterTable" converters to open */
|
||||
localConverterList = (const char **) uprv_malloc(converterListNum * sizeof(char*));
|
||||
if (!localConverterList) {
|
||||
*pErrorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
localConverterCount = 0;
|
||||
|
||||
for (idx = 0; idx < converterListNum; idx++) {
|
||||
status = U_ZERO_ERROR;
|
||||
converterName = GET_STRING(converterList[idx]);
|
||||
ucnv_close(ucnv_open(converterName, &status));
|
||||
if (U_SUCCESS(status)) {
|
||||
localConverterList[localConverterCount++] = converterName;
|
||||
}
|
||||
}
|
||||
|
||||
umtx_lock(NULL);
|
||||
if (availableConverters == NULL) {
|
||||
availableConverters = localConverterList;
|
||||
availableConverterCount = localConverterCount;
|
||||
}
|
||||
else {
|
||||
uprv_free((char **)localConverterList);
|
||||
}
|
||||
umtx_unlock(NULL);
|
||||
}
|
||||
else {
|
||||
uprv_free(localConverterList);
|
||||
}
|
||||
umtx_unlock(NULL);
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
U_CFUNC uint16_t
|
||||
ucnv_io_countAvailableConverters(UErrorCode *pErrorCode) {
|
||||
if(haveAliasData(pErrorCode)) {
|
||||
if (availableConverters == NULL) {
|
||||
ucnv_io_loadAvailableConverterList();
|
||||
}
|
||||
if (haveAvailableConverterList(pErrorCode)) {
|
||||
return availableConverterCount;
|
||||
}
|
||||
return 0;
|
||||
@ -455,20 +615,18 @@ ucnv_io_countAvailableConverters(UErrorCode *pErrorCode) {
|
||||
|
||||
U_CFUNC const char *
|
||||
ucnv_io_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode) {
|
||||
if(haveAliasData(pErrorCode)) {
|
||||
if (availableConverters == NULL) {
|
||||
ucnv_io_loadAvailableConverterList();
|
||||
}
|
||||
if(n < availableConverterCount) {
|
||||
if (haveAvailableConverterList(pErrorCode)) {
|
||||
if (n < availableConverterCount) {
|
||||
return availableConverters[n];
|
||||
}
|
||||
*pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
U_CFUNC void
|
||||
ucnv_io_fillAvailableConverters(const char **aliases, UErrorCode *pErrorCode) {
|
||||
if(haveAliasData(pErrorCode)) {
|
||||
if (haveAvailableConverterList(pErrorCode)) {
|
||||
uint16_t count = 0;
|
||||
while (count < availableConverterCount) {
|
||||
*aliases++=availableConverters[count++];
|
||||
@ -478,42 +636,12 @@ ucnv_io_fillAvailableConverters(const char **aliases, UErrorCode *pErrorCode) {
|
||||
|
||||
U_CFUNC uint16_t
|
||||
ucnv_io_countAvailableAliases(UErrorCode *pErrorCode) {
|
||||
if(haveAliasData(pErrorCode)) {
|
||||
return *aliasTable;
|
||||
if (haveAliasData(pErrorCode)) {
|
||||
return (uint16_t)aliasListNum;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if 0
|
||||
/*
|
||||
* We are not currently using these functions, so I am commenting them out
|
||||
* to reduce the binary file size and improve the code coverage;
|
||||
* I do not currently want to remove this entirely because it may be useful
|
||||
* in the future and also serves to some degree as another piece of
|
||||
* documentation of the data structure.
|
||||
*/
|
||||
U_CFUNC const char *
|
||||
ucnv_io_getAvailableAlias(uint16_t n, UErrorCode *pErrorCode) {
|
||||
if(haveAliasData(pErrorCode) && n<*aliasTable) {
|
||||
return (const char *)aliasTable+*(aliasTable+1+n);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
U_CFUNC void
|
||||
ucnv_io_fillAvailableAliases(const char **aliases, UErrorCode *pErrorCode) {
|
||||
if(haveAliasData(pErrorCode)) {
|
||||
const uint16_t *p=aliasTable;
|
||||
uint16_t count=*p++;
|
||||
while(count>0) {
|
||||
*aliases++=(const char *)aliasTable+*p;
|
||||
++p;
|
||||
--count;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* default converter name --------------------------------------------------- */
|
||||
|
||||
/*
|
||||
@ -529,10 +657,7 @@ ucnv_io_getDefaultConverterName() {
|
||||
/* local variable to be thread-safe */
|
||||
const char *name=defaultConverterName;
|
||||
if(name==NULL) {
|
||||
const char *codepage=0;
|
||||
umtx_lock(NULL);
|
||||
codepage = uprv_getDefaultCodepage();
|
||||
umtx_unlock(NULL);
|
||||
const char *codepage = uprv_getDefaultCodepage();
|
||||
if(codepage!=NULL) {
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
name=ucnv_io_getConverterName(codepage, &errorCode);
|
||||
@ -543,26 +668,27 @@ ucnv_io_getDefaultConverterName() {
|
||||
|
||||
/* if the name is there, test it out */
|
||||
if(name != NULL) {
|
||||
UErrorCode errorCode = U_ZERO_ERROR;
|
||||
UConverter *cnv;
|
||||
cnv = ucnv_open(name, &errorCode);
|
||||
if(U_FAILURE(errorCode) || (cnv == NULL)) {
|
||||
/* Panic time, let's use a fallback. */
|
||||
UErrorCode errorCode = U_ZERO_ERROR;
|
||||
UConverter *cnv = ucnv_open(name, &errorCode);
|
||||
if(U_FAILURE(errorCode) || (cnv == NULL)) {
|
||||
/* Panic time, let's use a fallback. */
|
||||
#if (U_CHARSET_FAMILY == U_ASCII_FAMILY)
|
||||
name = "US-ASCII";
|
||||
/* there is no 'algorithmic' converter for EBCDIC */
|
||||
name = "US-ASCII";
|
||||
/* there is no 'algorithmic' converter for EBCDIC */
|
||||
#elif defined(OS390)
|
||||
name = "ibm-1047-s390";
|
||||
name = "ibm-1047-s390";
|
||||
#else
|
||||
name = "ibm-37";
|
||||
name = "ibm-37";
|
||||
#endif
|
||||
}
|
||||
ucnv_close(cnv);
|
||||
}
|
||||
ucnv_close(cnv);
|
||||
}
|
||||
|
||||
if(name != NULL) {
|
||||
/* Did find a name. And it works.*/
|
||||
defaultConverterName=name;
|
||||
umtx_lock(NULL);
|
||||
/* Did find a name. And it works.*/
|
||||
defaultConverterName=name;
|
||||
umtx_unlock(NULL);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -15,6 +15,9 @@
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#define UCNV_AMBIGUOUS_ALIAS_MAP_BIT 0x8000
|
||||
#define UCNV_CONVERTER_INDEX_MASK 0x7FF
|
||||
|
||||
/**
|
||||
* Map a converter alias name to a canonical converter name.
|
||||
* The alias is searched for case-insensitively, the converter name
|
||||
@ -24,6 +27,12 @@
|
||||
U_CFUNC const char *
|
||||
ucnv_io_getConverterName(const char *alias, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* The count for ucnv_io_getAliases and ucnv_io_getAlias
|
||||
*/
|
||||
U_CFUNC uint16_t
|
||||
ucnv_io_countAliases(const char *alias, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Search case-insensitively for a converter alias and set aliases to
|
||||
* a pointer to the list of aliases for the actual converter.
|
||||
@ -34,7 +43,7 @@ ucnv_io_getConverterName(const char *alias, UErrorCode *pErrorCode);
|
||||
* or 0 if the alias is not found.
|
||||
*/
|
||||
U_CFUNC uint16_t
|
||||
ucnv_io_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode);
|
||||
ucnv_io_getAliases(const char *alias, uint16_t start, const char **aliases, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Search case-insensitively for a converter alias and return
|
||||
@ -85,21 +94,6 @@ ucnv_io_flushAvailableConverterCache(void);
|
||||
U_CFUNC uint16_t
|
||||
ucnv_io_countAvailableAliases(UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Return the (n)th alias or converter name in mixed case, or NULL
|
||||
* if there is none (typically, if the data cannot be loaded).
|
||||
* 0<=index<ucnv_io_countAvailableAliases().
|
||||
*/
|
||||
U_CFUNC const char *
|
||||
ucnv_io_getAvailableAlias(uint16_t n, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Fill an array const char *aliases[ucnv_io_countAvailableAliases()]
|
||||
* with pointers to all aliases and converter names in mixed-case.
|
||||
*/
|
||||
U_CFUNC void
|
||||
ucnv_io_fillAvailableAliases(const char **aliases, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Get the name of the default converter.
|
||||
* This name is already resolved by <code>ucnv_io_getConverterName()</code>.
|
||||
|
@ -40,6 +40,7 @@ U_CDECL_BEGIN
|
||||
|
||||
/* maximum length of the converter names */
|
||||
#define UCNV_MAX_CONVERTER_NAME_LENGTH 60
|
||||
/* maximum length of the converter name including path */
|
||||
#define UCNV_MAX_FULL_FILE_NAME_LENGTH (600+UCNV_MAX_CONVERTER_NAME_LENGTH)
|
||||
|
||||
#define UCNV_SI 0x0F /*Shift in for EBDCDIC_STATEFUL and iso2022 states */
|
||||
@ -979,7 +980,6 @@ ucnv_getAvailableName (int32_t n);
|
||||
|
||||
/**
|
||||
* Gives the number of aliases for a given converter or alias name.
|
||||
* Note that additional aliases are recognized by ucnv_open().
|
||||
* This method only enumerates the listed entries in the alias file.
|
||||
* @param alias alias name
|
||||
* @param pErrorCode error status
|
||||
@ -991,7 +991,6 @@ ucnv_countAliases(const char *alias, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Gives the name of the alias at given index of alias list.
|
||||
* Note that additional aliases are recognized by ucnv_open().
|
||||
* This method only enumerates the listed entries in the alias file.
|
||||
* @param alias alias name
|
||||
* @param n index in alias list
|
||||
@ -1005,7 +1004,6 @@ ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Fill-up the list of alias names for the given alias.
|
||||
* Note that additional aliases are recognized by ucnv_open().
|
||||
* This method only enumerates the listed entries in the alias file.
|
||||
* @param alias alias name
|
||||
* @param aliases fill-in list, aliases is a pointer to an array of
|
||||
|
@ -379,8 +379,11 @@ enum UErrorCode {
|
||||
U_STATE_OLD_WARNING = -125, /**< ICU has to use compatibility layer to construct the service. Expect performance/memory usage degradation. Consider upgrading */
|
||||
|
||||
U_STRING_NOT_TERMINATED_WARNING = -124,/**< An output string could not be NUL-terminated because output length==destCapacity. */
|
||||
|
||||
U_SORT_KEY_TOO_SHORT_WARNING = -123,
|
||||
|
||||
U_AMBIGUOUS_ALIAS_WARNING = -122,
|
||||
|
||||
U_ERROR_WARNING_LIMIT, /**< This must always be the last warning value to indicate the limit for UErrorCode warnings (last warning code +1) */
|
||||
|
||||
/** @deprecated use the enum that ends in _WARNING */
|
||||
@ -476,18 +479,18 @@ enum UErrorCode {
|
||||
/*
|
||||
* the error code range 0x10200 0x10300 are reserved for Break Iterator related error
|
||||
*/
|
||||
U_BRK_ERROR_START=0x10200,
|
||||
U_BRK_INTERNAL_ERROR,
|
||||
U_BRK_HEX_DIGITS_EXPECTED,
|
||||
U_BRK_SEMICOLON_EXPECTED,
|
||||
U_BRK_RULE_SYNTAX,
|
||||
U_BRK_UNCLOSED_SET,
|
||||
U_BRK_ASSIGN_ERROR,
|
||||
U_BRK_VARIABLE_REDFINITION,
|
||||
U_BRK_MISMATCHED_PAREN,
|
||||
U_BRK_NEW_LINE_IN_QUOTED_STRING,
|
||||
U_BRK_UNDEFINED_VARIABLE,
|
||||
U_BRK_ERROR_LIMIT,
|
||||
U_BRK_ERROR_START=0x10200,
|
||||
U_BRK_INTERNAL_ERROR,
|
||||
U_BRK_HEX_DIGITS_EXPECTED,
|
||||
U_BRK_SEMICOLON_EXPECTED,
|
||||
U_BRK_RULE_SYNTAX,
|
||||
U_BRK_UNCLOSED_SET,
|
||||
U_BRK_ASSIGN_ERROR,
|
||||
U_BRK_VARIABLE_REDFINITION,
|
||||
U_BRK_MISMATCHED_PAREN,
|
||||
U_BRK_NEW_LINE_IN_QUOTED_STRING,
|
||||
U_BRK_UNDEFINED_VARIABLE,
|
||||
U_BRK_ERROR_LIMIT,
|
||||
|
||||
U_ERROR_LIMIT=U_BRK_ERROR_LIMIT /**< This must always be the last value to indicate the limit for UErrorCode (last error code +1) */
|
||||
};
|
||||
|
@ -35,6 +35,8 @@ ICUOUT=$(ICUMAKE)\out
|
||||
#
|
||||
ICUP=$(ICUMAKE)\..\..
|
||||
ICUP=$(ICUP:\source\data\..\..=)
|
||||
# In case the first one didn't do it, try this one. .NET would do the second one.
|
||||
ICUP=$(ICUP:\source\data\\..\..=)
|
||||
!MESSAGE ICU root path is $(ICUP)
|
||||
|
||||
|
||||
@ -238,14 +240,14 @@ BRK_FILES = "$(ICUBLD)\sent.brk" "$(ICUBLD)\char.brk" "$(ICUBLD)\line.brk" "$(IC
|
||||
# move the .dll and .lib files to their final destination afterwards.
|
||||
# The $(U_ICUDATA_NAME).lib and $(U_ICUDATA_NAME).exp should already be in the right place due to stubdata.
|
||||
#
|
||||
"$(DLL_OUTPUT)\$(U_ICUDATA_NAME).dll" : "$(ICUTOOLS)\pkgdata\$(CFG)\pkgdata.exe" $(CNV_FILES) $(BRK_FILES) "$(ICUBLD)\uprops.dat" "$(ICUBLD)\unames.dat" "$(ICUBLD)\unorm.dat" "$(ICUBLD)\cnvalias.dat" "$(ICUBLD)\tz.dat" "$(ICUBLD)\ucadata.dat" "$(ICUBLD)\invuca.dat" $(ALL_RES) "$(ICUBLD)\icudata.res" "$(ICUP)\source\stubdata\stubdatabuilt.txt"
|
||||
"$(DLL_OUTPUT)\$(U_ICUDATA_NAME).dll" : "$(ICUTOOLS)\pkgdata\$(CFG)\pkgdata.exe" $(CNV_FILES) $(BRK_FILES) "$(ICUBLD)\uprops.dat" "$(ICUBLD)\unames.dat" "$(ICUBLD)\unorm.dat" "$(ICUBLD)\cnvalias.icu" "$(ICUBLD)\tz.dat" "$(ICUBLD)\ucadata.dat" "$(ICUBLD)\invuca.dat" $(ALL_RES) "$(ICUBLD)\icudata.res" "$(ICUP)\source\stubdata\stubdatabuilt.txt"
|
||||
@echo Building icu data
|
||||
@cd "$(ICUBLD)"
|
||||
"$(ICUTOOLS)\pkgdata\$(CFG)\pkgdata" -e $(U_ICUDATA_NAME) -v -m dll -c -p $(U_ICUDATA_NAME) -O "$(PKGOPT)" -d "$(ICUBLD)" -s . <<pkgdatain.txt
|
||||
uprops.dat
|
||||
unames.dat
|
||||
unorm.dat
|
||||
cnvalias.dat
|
||||
cnvalias.icu
|
||||
tz.dat
|
||||
ucadata.dat
|
||||
invuca.dat
|
||||
@ -390,7 +392,7 @@ res_index {
|
||||
@"$(ICUTOOLS)\gennorm\$(CFG)\gennorm" -u $(UNICODE_VERSION) -s "$(ICUUNIDATA)"
|
||||
|
||||
# Targets for converters
|
||||
"$(ICUBLD)\cnvalias.dat" : {"$(ICUSRCDATA)\$(ICUUCM)"}\convrtrs.txt "$(ICUTOOLS)\gencnval\$(CFG)\gencnval.exe"
|
||||
"$(ICUBLD)\cnvalias.icu" : {"$(ICUSRCDATA)\$(ICUUCM)"}\convrtrs.txt "$(ICUTOOLS)\gencnval\$(CFG)\gencnval.exe"
|
||||
@echo Creating data file for Converter Aliases
|
||||
@set ICU_DATA=$(ICUBLD)
|
||||
@"$(ICUTOOLS)\gencnval\$(CFG)\gencnval" "$(ICUSRCDATA)\$(ICUUCM)\convrtrs.txt"
|
||||
|
@ -11,6 +11,9 @@
|
||||
# run gencnval, and eventually pkgdata to update the representation that
|
||||
# ICU uses for aliases.
|
||||
|
||||
# Please be friendly to the rest of use that edit this table by
|
||||
# keeping this table free of tabs.
|
||||
|
||||
# This is an alias file used by the character set converter.
|
||||
#
|
||||
# Format:
|
||||
@ -21,8 +24,8 @@
|
||||
# by whitespace.
|
||||
#
|
||||
# All names can be tagged by including a space-separated list of tags in
|
||||
# curly braces, as in ISO_8859-1:1987{IANA} iso-8859-1 { MIME } or
|
||||
# some-charset{MIME IANA}. The order of tags does not matter, and
|
||||
# curly braces, as in ISO_8859-1:1987{IANA*} iso-8859-1 { MIME* } or
|
||||
# some-charset{MIME* IANA*}. The order of tags does not matter, and
|
||||
# whitespace is allowed between the tagged name and the tags list.
|
||||
#
|
||||
# The tags can be used to get standard names using ucnv_getStandardName().
|
||||
@ -31,6 +34,10 @@
|
||||
#
|
||||
# IANA The IANA charset name, as documented in RFC 1700.
|
||||
# MIME The MIME charset name, used for content type tagging.
|
||||
#
|
||||
# The * after the standard tag denotes that the previous alias is the
|
||||
# preferred (default) charset name for that standard. There can only
|
||||
# be one of these default charset names per converter.
|
||||
|
||||
# The world is getting more complicated...
|
||||
# Supporting XML parsers, HTML, MIME, and similar applications
|
||||
@ -63,13 +70,17 @@
|
||||
# or names of algorithmic converters, and their case must not
|
||||
# be changed - or else code and/or file names must also be changed.
|
||||
|
||||
# List of supported standard tags
|
||||
{ IANA MIME
|
||||
# This is the list of supported standard tags.
|
||||
# When multiple converters have the same alias under different standards,
|
||||
# the standard nearest to the top of this list with that alias will
|
||||
# be the first converter that will be opened.
|
||||
{ IANA # Source: http://www.iana.org/assignments/character-sets
|
||||
MIME # Source: http://www.iana.org/assignments/character-sets
|
||||
#ICU # Can also use ICU_FEATURE ICU_CANONICAL
|
||||
#IBM AIX DB2
|
||||
#WINDOWS MSIE # MSIE is Internet Explorer, which is different from Windows
|
||||
#GLIBC
|
||||
#JAVA
|
||||
JAVA # Source: Sun JDK. Preferred name must be an exact match. Alias name case is ignored, but dashes are not ignored.
|
||||
#SOLARIS
|
||||
#APPLE
|
||||
#HPUX
|
||||
@ -80,20 +91,20 @@
|
||||
|
||||
# Fully algorithmic converters
|
||||
|
||||
UTF-8 { IANA MIME } ibm-1208 cp1208
|
||||
UTF-8 { IANA* MIME* } ibm-1208 cp1208
|
||||
|
||||
# The ICU 2.2 UTF-16/32 converters detect and write a BOM.
|
||||
UTF-16 { IANA MIME } ISO-10646-UCS-2 { IANA } csUnicode ibm-17584 ibm-13488 ibm-1200 cp1200 ucs-2
|
||||
UTF-16BE { IANA MIME } UTF16_BigEndian x-utf-16be
|
||||
UTF-16LE { IANA MIME } UTF16_LittleEndian x-utf-16le
|
||||
UTF-16 { IANA* MIME* } ISO-10646-UCS-2 { IANA } csUnicode ibm-17584 ibm-13488 ibm-1200 cp1200 ucs-2
|
||||
UTF-16BE { IANA* MIME* } UTF16_BigEndian x-utf-16be
|
||||
UTF-16LE { IANA* MIME* } UTF16_LittleEndian x-utf-16le
|
||||
|
||||
# ICU-specific names for special uses
|
||||
UTF16_PlatformEndian
|
||||
UTF16_OppositeEndian
|
||||
|
||||
UTF-32 { IANA MIME } ISO-10646-UCS-4 { IANA } csUCS4 ucs-4 ibm-1232
|
||||
UTF-32BE { IANA } UTF32_BigEndian
|
||||
UTF-32LE { IANA } UTF32_LittleEndian
|
||||
UTF-32 { IANA* MIME* } ISO-10646-UCS-4 { IANA } csUCS4 ucs-4 ibm-1232
|
||||
UTF-32BE { IANA* } UTF32_BigEndian
|
||||
UTF-32LE { IANA* } UTF32_LittleEndian
|
||||
|
||||
# ICU-specific names for special uses
|
||||
UTF32_PlatformEndian
|
||||
@ -108,31 +119,57 @@ UTF32_OppositeEndian
|
||||
# By choosing the option "version=1", set O will be escaped instead.
|
||||
# For example:
|
||||
# utf7Converter=ucnv_open("UTF-7,version=1");
|
||||
UTF-7 { IANA MIME }
|
||||
UTF-7 { IANA* MIME* }
|
||||
|
||||
SCSU { IANA }
|
||||
SCSU { IANA* }
|
||||
BOCU-1
|
||||
|
||||
# See http://www.unicode.org/unicode/reports/tr26 for this Compatibility Encoding Scheme for UTF-16
|
||||
# The Unicode Consortium does not encourage the use of CESU-8
|
||||
CESU-8 { IANA }
|
||||
CESU-8 { IANA* }
|
||||
|
||||
ISO-8859-1 { MIME } LATIN_1 ibm-819 cp819 latin1 8859-1 csisolatin1 iso-ir-100 ISO_8859-1:1987 { IANA } l1 ANSI_X3.110-1983 819 #!!!!! There's whole lot of names for this
|
||||
US-ASCII { MIME } ascii ascii-7 ANSI_X3.4-1968 { IANA } ANSI_X3.4-1986 ISO_646.irv:1991 iso646-us us csASCII 646 iso-ir-6 cp367
|
||||
ISO-8859-1 { MIME* IANA }
|
||||
LATIN_1 # Old ICU name
|
||||
ibm-819
|
||||
IBM819 { IANA }
|
||||
cp819 { IANA }
|
||||
latin1 { IANA }
|
||||
8859-1
|
||||
csISOLatin1 { IANA }
|
||||
iso-ir-100 { IANA }
|
||||
ISO_8859-1:1987 { IANA* }
|
||||
l1 { IANA }
|
||||
819
|
||||
# ANSI_X3.110-1983 # This is for a different IANA alias. This isn't iso-8859-1.
|
||||
|
||||
US-ASCII { MIME* IANA }
|
||||
ASCII { JAVA* IANA }
|
||||
ascii-7 { JAVA }
|
||||
ANSI_X3.4-1968 { IANA* }
|
||||
ANSI_X3.4-1986 { IANA }
|
||||
ISO_646.irv:1991 { IANA }
|
||||
iso_646.irv:1983 { JAVA }
|
||||
ISO646-US { JAVA IANA }
|
||||
us { IANA }
|
||||
csASCII { IANA }
|
||||
646 { JAVA }
|
||||
iso-ir-6 { IANA }
|
||||
cp367 { IANA }
|
||||
# Java says "default" too, but that makes no sense.
|
||||
|
||||
# Partially algorithmic converters
|
||||
|
||||
ISO_2022 ISO-2022 { MIME } 2022 cp2022
|
||||
ISO_2022,locale=ja,version=0 ISO-2022-JP { IANA MIME } csISO2022JP
|
||||
ISO_2022,locale=ja,version=1 ISO-2022-JP-1 JIS JIS_Encoding { IANA }
|
||||
ISO_2022,locale=ja,version=2 ISO-2022-JP-2 { IANA MIME } csISO2022JP2
|
||||
ISO_2022 ISO-2022 { MIME* } 2022 cp2022
|
||||
ISO_2022,locale=ja,version=0 ISO-2022-JP { IANA* MIME* } csISO2022JP
|
||||
ISO_2022,locale=ja,version=1 ISO-2022-JP-1 JIS JIS_Encoding { IANA* }
|
||||
ISO_2022,locale=ja,version=2 ISO-2022-JP-2 { IANA* MIME* } csISO2022JP2
|
||||
ISO_2022,locale=ja,version=3 JIS7 csJISEncoding
|
||||
ISO_2022,locale=ja,version=4 JIS8
|
||||
ISO_2022,locale=ko,version=0 ISO-2022-KR { IANA MIME } csISO2022KR
|
||||
ISO_2022,locale=ko,version=0 ISO-2022-KR { IANA* MIME* } csISO2022KR
|
||||
ISO_2022,locale=ko,version=1 ibm-25546 ibm-25546_P100 25546
|
||||
ISO_2022,locale=zh,version=0 ISO-2022-CN { IANA MIME } csISO2022CN
|
||||
ISO_2022,locale=zh,version=1 ISO-2022-CN-EXT { IANA MIME }
|
||||
HZ HZ-GB-2312 { IANA MIME }
|
||||
ISO_2022,locale=zh,version=0 ISO-2022-CN { IANA* MIME* } # csISO2022CN
|
||||
ISO_2022,locale=zh,version=1 ISO-2022-CN-EXT { IANA* MIME* }
|
||||
HZ HZ-GB-2312 { IANA* MIME* }
|
||||
LMBCS-1 lmbcs
|
||||
LMBCS-2
|
||||
LMBCS-3
|
||||
@ -155,82 +192,210 @@ ISCII,version=6 iscii-tlg x-iscii-te
|
||||
ISCII,version=7 iscii-knd x-iscii-ka
|
||||
ISCII,version=8 iscii-mlm x-iscii-ma
|
||||
|
||||
# Table-based
|
||||
# Table-based interchange codepages
|
||||
|
||||
ibm-367
|
||||
ibm-367 IBM367 { IANA* } # This is ASCII, but it has fallbacks
|
||||
|
||||
# Central Europe
|
||||
# Standard iso-8859-1, which does not have the Euro update.
|
||||
# See iso-8859-15 (latin9) for the Euro update
|
||||
ibm-912 iso-8859-2 { MIME* IANA }
|
||||
latin2 { IANA }
|
||||
# ISO8859_2 { JAVA* } # This is really the default for Java and many others.
|
||||
8859-2
|
||||
csISOLatin2 { IANA }
|
||||
iso-ir-101 { IANA }
|
||||
ISO_8859-2:1987 { IANA* }
|
||||
l2 { IANA }
|
||||
cp912
|
||||
912
|
||||
|
||||
# Maltese Esperanto
|
||||
ibm-913 iso-8859-3 { MIME* IANA }
|
||||
latin3 { IANA }
|
||||
8859-3
|
||||
csISOLatin3 { IANA }
|
||||
iso-ir-109
|
||||
ISO_8859-3:1988 { IANA* }
|
||||
l3 { IANA }
|
||||
cp913
|
||||
913
|
||||
|
||||
# Baltic
|
||||
ibm-914 iso-8859-4 { MIME* IANA }
|
||||
latin4 { IANA }
|
||||
8859-4
|
||||
csISOLatin4 { IANA }
|
||||
iso-ir-110 { IANA }
|
||||
ISO_8859-4:1988 { IANA* }
|
||||
l4 { IANA }
|
||||
cp914
|
||||
914
|
||||
|
||||
# Cyrillic
|
||||
ibm-915 iso-8859-5 { MIME* IANA }
|
||||
cyrillic { IANA }
|
||||
8859-5
|
||||
csISOLatinCyrillic { IANA }
|
||||
iso-ir-144 { IANA }
|
||||
ISO_8859-5:1988 { IANA* }
|
||||
cp915
|
||||
915
|
||||
|
||||
# Arabic
|
||||
# ISO_8859-6-E and ISO_8859-6-I are similar to this charset, but they are not the same
|
||||
# -E means explicit. -I means implicit. However those aliases are rarely used.
|
||||
ibm-1089 iso-8859-6 { MIME* IANA }
|
||||
arabic { IANA }
|
||||
8859-6
|
||||
csISOLatinArabic { IANA }
|
||||
iso-ir-127 { IANA }
|
||||
ISO_8859-6:1987 { IANA* }
|
||||
ecma-114 { IANA }
|
||||
asmo-708 { IANA }
|
||||
cp1089
|
||||
1089
|
||||
|
||||
# ISO Greek (w/ euro update)
|
||||
ibm-4909 iso-8859-7 { MIME* IANA }
|
||||
greek { IANA }
|
||||
greek8 { IANA }
|
||||
elot_928 { IANA }
|
||||
ecma-118 { IANA }
|
||||
8859-7
|
||||
csISOLatinGreek { IANA }
|
||||
iso-ir-126 { IANA }
|
||||
ISO_8859-7:1987 { IANA* }
|
||||
cp813
|
||||
813
|
||||
ibm-813 # Same as 4909 above but without the euro update
|
||||
|
||||
# hebrew
|
||||
# ISO_8859-8-E and ISO_8859-8-I are similar to this charset, but they are not the same
|
||||
# -E means explicit. -I means implicit.
|
||||
ibm-916 iso-8859-8 { MIME* IANA }
|
||||
hebrew { IANA }
|
||||
8859-8
|
||||
csISOLatinHebrew { IANA }
|
||||
iso-ir-138 { IANA }
|
||||
ISO_8859-8:1988 { IANA* }
|
||||
cp916
|
||||
916
|
||||
|
||||
# Turkish
|
||||
ibm-920 iso-8859-9 { MIME* IANA }
|
||||
ECMA-128 # IANA doesn't have this alias 6/24/2002
|
||||
latin5 { IANA }
|
||||
8859-9
|
||||
csISOLatin5 { IANA }
|
||||
iso-ir-148 { IANA }
|
||||
ISO_8859-9:1989 { IANA* }
|
||||
l5 { IANA }
|
||||
cp920
|
||||
920
|
||||
|
||||
# Latin 9
|
||||
ibm-923 iso-8859-15 { IANA* MIME* } # IANA only has iso-8859-15 (6/24/2002)
|
||||
# ISO8859_15 { JAVA* } # This is really the default for Java and many others.
|
||||
8859-15
|
||||
latin9
|
||||
latin0
|
||||
csisolatin0
|
||||
csisolatin9
|
||||
iso8859_15_fdis
|
||||
cp923
|
||||
923
|
||||
|
||||
# Interchange codepages
|
||||
ibm-912 iso-8859-2 { MIME } latin2 cp912 8859-2 csisolatin2 iso-ir-101 ISO_8859-2:1987 { IANA } l2 912 # Central Europe
|
||||
ibm-913 iso-8859-3 { MIME } latin3 cp913 8859-3 csisolatin3 iso-ir-109 ISO_8859-3:1988 { IANA } l3 913 # Maltese Esperanto
|
||||
ibm-914 iso-8859-4 { MIME } latin4 cp914 8859-4 csisolatin4 iso-ir-110 ISO_8859-4:1988 { IANA } l4 914 # Baltic
|
||||
ibm-915 iso-8859-5 { MIME } cyrillic cp915 8859-5 csisolatincyrillic iso-ir-144 ISO_8859-5:1988 { IANA } 915 # Cyrillic
|
||||
ibm-1089 iso-8859-6 { MIME } arabic cp1089 8859-6 csisolatinarabic iso-ir-127 ISO_8859-6:1987 { IANA } ecma-114 asmo-708 1089 # Arabic
|
||||
ibm-4909 iso-8859-7 { MIME } greek cp813 greek8 elot_928 ecma-118 8859-7 csisolatingreek iso-ir-126 ISO_8859-7:1987 { IANA } 813 # ISO Greek (w/ euro update)
|
||||
ibm-813 # Same as 4909 (w/o euro update)
|
||||
ibm-916 iso-8859-8 { MIME } hebrew cp916 8859-8 csisolatinhebrew iso-ir-138 ISO_8859-8:1988 { IANA } 916 # hebrew iso-8859-8i - typo?
|
||||
ibm-920 iso-8859-9 { MIME } ECMA-128 latin5 cp920 8859-9 csisolatin5 iso-ir-148 ISO_8859-9:1989 { IANA } l5 920 # Turkish
|
||||
ibm-923 iso-8859-15 { IANA MIME } latin9 cp923 8859-15 latin0 csisolatin0 iso8859_15_fdis csisolatin9 923 # Latin 9
|
||||
ibm-1252 ibm-1004 cp1004 # Windows Latin 1 without Euro
|
||||
ibm-942_P120-2000 ibm-942_VASCII_VSUB_VPUA ibm-942 ibm-932 ibm-932_VASCII_VSUB_VPUA # Old s_jis ibm-932 added!
|
||||
ibm-942_P12A-2000 ibm-942_VSUB_VPUA shift_jis78 sjis78 ibm-932_VSUB_VPUA
|
||||
ibm-943_P130-2000 ibm-943_VASCII_VSUB_VPUA ibm-943 # japanese. Unicode name is \u30b7\u30d5\u30c8\u7b26\u53f7\u5316\u8868\u73fe Iana says that Windows-31J is an extension to csshiftjis ibm-932 removed
|
||||
ibm-943_P14A-2000 ibm-943_VSUB_VPUA Shift_JIS { MIME } csWindows31J sjis cp943 cp932 pck ms_kanji csshiftjis windows-31j x-sjis 943
|
||||
ibm-943_P14A-2000 ibm-943_VSUB_VPUA Shift_JIS { MIME* } csWindows31J sjis cp943 cp932 pck ms_kanji csshiftjis windows-31j x-sjis 943
|
||||
ibm-949_P110-2000 ibm-949_VASCII_VSUB_VPUA ibm-949
|
||||
ibm-949_P11A-2000 ibm-949_VSUB_VPUA KS_C_5601-1987 { IANA } iso-ir-149 KS_C_5601-1989 csKSC56011987 KSC_5601 { MIME } johab ks_x_1001:1992 949 ksc5601_1992 ksc5601_1987 # KSC-5601-1992, korean
|
||||
ibm-1370 Big5 { IANA MIME } csBig5 x-big5 cp950 950 # Taiwan Big-5 (w/ euro update)
|
||||
|
||||
ibm-949_P11A-2000
|
||||
ibm-949_VSUB_VPUA
|
||||
KS_C_5601-1987 { IANA* }
|
||||
iso-ir-149 { IANA }
|
||||
KS_C_5601-1989 { IANA }
|
||||
csKSC56011987 { IANA }
|
||||
KSC_5601 { MIME* IANA }
|
||||
johab
|
||||
ks_x_1001:1992
|
||||
949
|
||||
korean { IANA }
|
||||
ksc5601_1992 # KSC-5601-1992
|
||||
ksc5601_1987 # Needed by Java
|
||||
|
||||
ibm-1370 Big5 { IANA* MIME* } csBig5 x-big5 cp950 950 # Taiwan Big-5 (w/ euro update)
|
||||
ibm-950 # Taiwan Big-5 (w/o euro update)
|
||||
ibm-1386 gbk { IANA } cp936 windows-936 ms936 zh_cn # Chinese GBK removed
|
||||
ibm-1386 gbk { IANA* } cp936 windows-936 ms936 zh_cn # Chinese GBK removed
|
||||
ibm-33722_P120-2000 ibm-33722_VASCII_VPUA ibm-33722 cp33722 33722 ibm-5050 # Japan EUC with \ <-> Yen mapping
|
||||
ibm-33722_P12A-2000 ibm-33722_VPUA EUC-JP { MIME } ibm-eucJP eucjis Extended_UNIX_Code_Packed_Format_for_Japanese { IANA } cseucpkdfmtjapanese X-EUC-JP # Japan EUC. x-euc-jp is a MIME name
|
||||
ibm-970 EUC-KR { IANA MIME } ibm-eucKR csEUCKR # Korean EUC. x-euc-kr is a MIME name
|
||||
ibm-33722_P12A-2000 ibm-33722_VPUA EUC-JP { MIME* } ibm-eucJP eucjis Extended_UNIX_Code_Packed_Format_for_Japanese { IANA* } cseucpkdfmtjapanese X-EUC-JP # Japan EUC. x-euc-jp is a MIME name
|
||||
ibm-970 EUC-KR { IANA* MIME* } ibm-eucKR csEUCKR # Korean EUC. x-euc-kr is a MIME name
|
||||
ibm-964 EUC-TW ibm-eucTW cns11643 # Taiwan EUC. x-euc-tw is a MIME name
|
||||
ibm-1383_P110-2000 ibm-1383_VPUA ibm-1383 EUC-CN ibm-eucCN GB_2312-80 { IANA } chinese gb iso-ir-58 csISO58GB231280 GB2312 { MIME } gb2312-1980 cp1383 1383 csGB2312# China EUC. x-euc-cn is a MIME name
|
||||
ibm-1162 tis-620 { IANA } cp874 windows-874 ms874 cp9066 874 # Thai (w/ euro update)
|
||||
|
||||
ibm-1383_P110-2000 ibm-1383_VPUA
|
||||
ibm-1383
|
||||
EUC-CN
|
||||
ibm-eucCN
|
||||
GB_2312-80 { IANA* }
|
||||
chinese { IANA }
|
||||
gb # This is not an IANA name. gb in IANA means Great Britain.
|
||||
iso-ir-58 { IANA }
|
||||
csISO58GB231280 { IANA }
|
||||
GB2312 { MIME* }
|
||||
gb2312-1980
|
||||
cp1383
|
||||
1383
|
||||
csGB2312 # China EUC. x-euc-cn is a MIME name
|
||||
|
||||
ibm-1162 tis-620 { IANA* } cp874 windows-874 ms874 cp9066 874 # Thai (w/ euro update)
|
||||
ibm-874 ibm-1161 # Same as 1162 (w/o euro update)
|
||||
|
||||
# Platform codepages
|
||||
ibm-437 cp437 csPC8CodePage437 437 # PC US
|
||||
# HSYS:
|
||||
ibm-850 IBM850 { IANA } cp850 { MIME } 850 csPC850Multilingual # PC latin1
|
||||
ibm-851 IBM851 { IANA } cp851 { MIME } 851 csPC851 # PC DOS Greek (w/o euro)
|
||||
ibm-858 cp858 { MIME } IBM00858 { IANA } # PC latin1 with Euro cp850 removed
|
||||
ibm-850 IBM850 { IANA* } cp850 { MIME* } 850 csPC850Multilingual # PC latin1
|
||||
ibm-851 IBM851 { IANA* } cp851 { MIME* } 851 csPC851 # PC DOS Greek (w/o euro)
|
||||
ibm-858 cp858 { MIME* } IBM00858 { IANA* } # PC latin1 with Euro cp850 removed
|
||||
ibm-9044 852 csPCp852 cp852 # PC latin2 (w/ euro update) cp852 is a MIME name for IBM-852
|
||||
ibm-852 IBM852 { IANA } # PC latin2 (w/o euro update)
|
||||
ibm-852 IBM852 { IANA* } # PC latin2 (w/o euro update)
|
||||
ibm-872 855 csIBM855 cp855 csPCp855 # PC cyrillic (w/ euro update) cp855 is a MIME name for IBM-855
|
||||
ibm-855 IBM855 { IANA } # PC cyrillic (w/o euro update)
|
||||
ibm-856 cp856 { MIME } 856 # PC Hebrew (old)
|
||||
ibm-9049 857 csIBM857 cp857 { MIME } # PC Latin 5 (Turkish) (w/ euro update)
|
||||
ibm-857 IBM857 { IANA } # PC Latin 5 (w/o euro update)
|
||||
ibm-859 cp859 { MIME } # PC Latin 9 (w/ euro update)
|
||||
ibm-860 IBM860 { IANA } cp860 { MIME } 860 csIBM860 # PC Portugal
|
||||
ibm-861 IBM861 { IANA } cp861 { MIME } 861 cp-is csIBM861 # PC Iceland
|
||||
ibm-867 cp867 862 cp862 { MIME } cspc862latinhebrew # PC Hebrew (w/ euro update)
|
||||
ibm-862 IBM862 { IANA } # PC Hebrew (w/o euro update)
|
||||
ibm-863 IBM863 { IANA } cp863 { MIME } 863 csIBM863 # PC Canadian French
|
||||
ibm-17248 cp864 { MIME } csIBM864 # PC Arabic (w/ euro update)
|
||||
ibm-864 IBM864 { IANA } # PC Arabic (w/o euro update)
|
||||
ibm-865 IBM865 { IANA } cp865 { MIME } 865 csIBM865 # PC Nordic
|
||||
ibm-808 cp866 { MIME } 866 csIBM866 # PC Russian (w/ euro update)
|
||||
ibm-855 IBM855 { IANA* } # PC cyrillic (w/o euro update)
|
||||
ibm-856 cp856 { MIME* } 856 # PC Hebrew (old)
|
||||
ibm-9049 857 csIBM857 cp857 { MIME* } # PC Latin 5 (Turkish) (w/ euro update)
|
||||
ibm-857 IBM857 { IANA* } # PC Latin 5 (w/o euro update)
|
||||
ibm-859 cp859 { MIME* } # PC Latin 9 (w/ euro update)
|
||||
ibm-860 IBM860 { IANA* } cp860 { MIME* } 860 csIBM860 # PC Portugal
|
||||
ibm-861 IBM861 { IANA* } cp861 { MIME* } 861 cp-is csIBM861 # PC Iceland
|
||||
ibm-867 cp867 862 cp862 { MIME* } cspc862latinhebrew # PC Hebrew (w/ euro update)
|
||||
ibm-862 IBM862 { IANA* } # PC Hebrew (w/o euro update)
|
||||
ibm-863 IBM863 { IANA* } cp863 { MIME* } 863 csIBM863 # PC Canadian French
|
||||
ibm-17248 cp864 { MIME* } csIBM864 # PC Arabic (w/ euro update)
|
||||
ibm-864 IBM864 { IANA* } # PC Arabic (w/o euro update)
|
||||
ibm-865 IBM865 { IANA* } cp865 { MIME* } 865 csIBM865 # PC Nordic
|
||||
ibm-808 cp866 { MIME* } 866 csIBM866 # PC Russian (w/ euro update)
|
||||
ibm-866 # PC Russian (w/o euro update)
|
||||
ibm-868 IBM868 { IANA } cp868 { MIME } cp-ar csIBM868 868 # PC Urdu
|
||||
ibm-9061 cp869 { MIME } 869 cp-gr csIBM869 # PC Greek (w/ euro update)
|
||||
ibm-869 IBM869 { IANA } # PC Greek (w/o euro update)
|
||||
ibm-878 KOI8-R { IANA MIME } cp878 koi8 cskoi8r # Russian internet
|
||||
ibm-901 cp921 { MIME } 921 # PC Baltic (w/ euro update)
|
||||
ibm-868 IBM868 { IANA* } cp868 { MIME* } cp-ar csIBM868 868 # PC Urdu
|
||||
ibm-9061 cp869 { MIME* } 869 cp-gr csIBM869 # PC Greek (w/ euro update)
|
||||
ibm-869 IBM869 { IANA* } # PC Greek (w/o euro update)
|
||||
ibm-878 KOI8-R { IANA* MIME* } cp878 koi8 cskoi8r # Russian internet
|
||||
ibm-901 cp921 { MIME* } 921 # PC Baltic (w/ euro update)
|
||||
ibm-921 # PC Baltic (w/o euro update)
|
||||
ibm-902 cp922 { MIME } 922 # PC Estonian (w/ euro update)
|
||||
ibm-902 cp922 { MIME* } 922 # PC Estonian (w/ euro update)
|
||||
ibm-922 # PC Estonian (w/o euro update)
|
||||
#ibm-941 jis-208 jisx-208 # Pure DBCS jisx-208 # ibm-941 is not JISX 208 code page
|
||||
#ibm-1038 Adobe-Symbol-Encoding csHPPSMath symbol
|
||||
ibm-5346 windows-1250 { IANA } cp1250 # Windows Latin2 (w/ euro update)
|
||||
ibm-5347 windows-1251 { IANA } cp1251 # Windows Cyrillic (w/ euro update)
|
||||
ibm-5348 windows-1252 { IANA } cp1252 # Windows Latin1 (w/ euro update)
|
||||
ibm-5349 windows-1253 { IANA } cp1253 # Windows Greek (w/ euro update)
|
||||
ibm-5350 windows-1254 { IANA } cp1254 # Windows Turkish (w/ euro update)
|
||||
ibm-5351 windows-1255 { IANA } cp1255 # Windows Hebrew (w/ euro update)
|
||||
ibm-5352 windows-1256 { IANA } cp1256 # Windows Arabic (w/ euro update)
|
||||
ibm-5353 windows-1257 { IANA } cp1257 # Windows Baltic (w/ euro update)
|
||||
ibm-5354 windows-1258 { IANA } cp1258 # Windows Vietnamese (w/ euro update)
|
||||
ibm-5346 windows-1250 { IANA* } cp1250 # Windows Latin2 (w/ euro update)
|
||||
ibm-5347 windows-1251 { IANA* } cp1251 # Windows Cyrillic (w/ euro update)
|
||||
ibm-5348 windows-1252 { IANA* } cp1252 # Windows Latin1 (w/ euro update)
|
||||
ibm-5349 windows-1253 { IANA* } cp1253 # Windows Greek (w/ euro update)
|
||||
ibm-5350 windows-1254 { IANA* } cp1254 # Windows Turkish (w/ euro update)
|
||||
ibm-5351 windows-1255 { IANA* } cp1255 # Windows Hebrew (w/ euro update)
|
||||
ibm-5352 windows-1256 { IANA* } cp1256 # Windows Arabic (w/ euro update)
|
||||
ibm-5353 windows-1257 { IANA* } cp1257 # Windows Baltic (w/ euro update)
|
||||
ibm-5354 windows-1258 { IANA* } cp1258 # Windows Vietnamese (w/ euro update)
|
||||
ibm-1250 # Windows Latin2 (w/o euro update)
|
||||
ibm-1251 # Windows Cyrillic (w/o euro update)
|
||||
ibm-1253 # Windows Greek (w/o euro update)
|
||||
@ -240,15 +405,15 @@ ibm-1256 # Windows Arabic (w/o euro update)
|
||||
ibm-1257 # Windows Baltic (w/o euro update)
|
||||
ibm-1258 # Windows Vietnamese (w/o euro update)
|
||||
|
||||
ibm-1275 macintosh { IANA } mac { MIME } csMacintosh # Apple latin 1
|
||||
ibm-1276 Adobe-Standard-Encoding { IANA } csAdobeStandardEncoding # Different from ISO-Unicode-IBM-1276 (GCSGID: 1276)
|
||||
ibm-1275 macintosh { IANA* } mac { MIME* } csMacintosh # Apple latin 1
|
||||
ibm-1276 Adobe-Standard-Encoding { IANA* } csAdobeStandardEncoding # Different from ISO-Unicode-IBM-1276 (GCSGID: 1276)
|
||||
ibm-1277 Adobe-Latin1-Encoding
|
||||
ibm-1280 macgr # Apple Greek
|
||||
ibm-1281 mactr # Apple Turkish
|
||||
ibm-1282 macce # Apple Central Europe
|
||||
ibm-1283 maccy # Apple Cyrillic
|
||||
|
||||
ibm-1051 hp-roman8 { IANA } roman8 r8 csHPRoman8 # HP Latin1
|
||||
ibm-1051 hp-roman8 { IANA* } roman8 r8 csHPRoman8 # HP Latin1
|
||||
|
||||
ibm-806_P100-2000 ibm-806 ibm-806_VSUB # PC ISCII-91: Indian Script Code
|
||||
ibm-1006_P100-2000 ibm-1006 ibm-1006_VPUA # Urdu
|
||||
@ -265,120 +430,127 @@ ibm-9066_P100-2000 ibm-9066 ibm-9066_VSUB # Thai PC
|
||||
|
||||
# Added for more euro support
|
||||
|
||||
ibm-849 cp1131 # PC Belarus (w/ euro update)
|
||||
ibm-848 cp1125 # PC Ukraine (w/ euro update)
|
||||
ibm-5104 cp1008 # 8-bit Arabic (w/ euro update)
|
||||
ibm-9238 cp1046 # PC Arabic Extended (w/ euro update)
|
||||
ibm-1363_P110-2000 ibm-1363 ibm-1363_VASCII_VSUB_VPUA ibm-1362 # Korean KSC Korean Windows MBCS
|
||||
ibm-1363_P11B-2000 ibm-1363_VSUB_VPUA windows-949 cp949 cp1363 ksc korean
|
||||
ibm-5210 cp1114 # PC SBCS Big-5 (w/ euro update)
|
||||
ibm-21427 cp947 # PC DBCS Big-5 (w/ euro update)
|
||||
ibm-849 cp1131 # PC Belarus (w/ euro update)
|
||||
ibm-848 cp1125 # PC Ukraine (w/ euro update)
|
||||
ibm-5104 cp1008 # 8-bit Arabic (w/ euro update)
|
||||
ibm-9238 cp1046 # PC Arabic Extended (w/ euro update)
|
||||
ibm-1363_P110-2000 ibm-1363 ibm-1363_VASCII_VSUB_VPUA ibm-1362 # Korean KSC Korean Windows MBCS
|
||||
|
||||
ibm-1363_P11B-2000 ibm-1363_VSUB_VPUA
|
||||
windows-949
|
||||
cp949
|
||||
cp1363
|
||||
ksc
|
||||
# korean # The korean alias from IANA goes to ibm-949_P11A-2000
|
||||
|
||||
ibm-5210 cp1114 # PC SBCS Big-5 (w/ euro update)
|
||||
ibm-21427 cp947 # PC DBCS Big-5 (w/ euro update)
|
||||
|
||||
# EBCDIC codepages according to the CDRA
|
||||
|
||||
# without Euro
|
||||
ibm-37 IBM037 { IANA } ibm-037 cpibm37 ebcdic-cp-us ebcdic-cp-ca ebcdic-cp-wt ebcdic-cp-nl csIBM037 cp37 cp037 037 # EBCDIC US
|
||||
ibm-273 IBM273 { IANA } csIBM273 ebcdic-de cp273 cpibm273 273 # EBCDIC Germanay, Austria...
|
||||
ibm-277 IBM277 { IANA } EBCDIC-CP-DK EBCDIC-CP-NO csIBM277 ebcdic-dk cp277 cpibm277 277 # EBCDIC Denmark...
|
||||
ibm-278 IBM278 { IANA } ebcdic-cp-fi ebcdic-cp-se csIBM278 ebcdic-sv cp278 cpibm278 278 # EBCDIC Sweden
|
||||
ibm-280 IBM280 { IANA } ebcdic-cp-it csIBM280 cp280 cpibm280 280 # EBCDIC Italy
|
||||
ibm-284 IBM284 { IANA } ebcdic-cp-es csIBM284 cp284 cpibm284 284 # EBCDIC Spain
|
||||
ibm-285 IBM285 { IANA } ebcdic-cp-gb csIBM285 ebcdic-gb cp285 cpibm285 285 # EBCDIC UK Ireland
|
||||
ibm-290 IBM290 { IANA } EBCDIC-JP-kana csIBM290 cp290 # host SBCS (Katakana)
|
||||
ibm-297 IBM297 { IANA } ebcdic-cp-fr csIBM297 cp297 cpibm297 297 # EBCDIC France
|
||||
ibm-420 IBM420 { IANA } ebcdic-cp-ar1 csIBM420 cp420 420
|
||||
ibm-424 IBM424 { IANA } ebcdic-cp-he csIBM424 cp424 424
|
||||
ibm-500 IBM500 { IANA } cpibm500 csIBM500 cp500 ebcdic-cp-be ebcdic-cp-ch 500 # EBCDIC International Latin1
|
||||
ibm-803 cp803 # Old EBCDIC Hebrew
|
||||
ibm-834 cp834 # Korean DBCS Host
|
||||
ibm-835 cp835 # DBCS T-Ch Host
|
||||
ibm-870_P100-2000 IBM870 { IANA } ibm-870 CP870 ibm-870_STD ebcdic-cp-roece ebcdic-cp-yu csIBM870
|
||||
ibm-871 IBM871 { IANA } ebcdic-cp-is csIBM871 cpibm871 cp871 871 # EBCDIC Iceland
|
||||
ibm-875_P100-2000 ibm-875 cp875 ibm-875 875 ibm-875_STD
|
||||
ibm-918_P100-2000 IBM918 { IANA } ibm-918 CP918 ibm-918_VPUA ebcdic-cp-ar2 csIBM918
|
||||
ibm-918_X100-2000 ibm-918_STD
|
||||
ibm-930 cp930 cpibm930 930 # Japan EBCDIC MIXED
|
||||
ibm-933 cp933 cpibm933 933 # Korea EBCDIC MIXED
|
||||
ibm-935 cp935 cpibm935 935 # China EBCDIC MIXED
|
||||
ibm-937 cp937 cpibm937 937 # Taiwan EBCDIC MIXED
|
||||
ibm-939 cp939 939 # Host MBCS (Latin-Kanji) EBCDIC
|
||||
ibm-1025_P100-2000 ibm-1025 ibm-1025_STD
|
||||
ibm-1026_P100-2000 IBM1026 { IANA } ibm-1026 CP1026 csIBM1026 ibm-1026_STD
|
||||
ibm-1047 cpibm1047 # EBCDIC Open systems Latin1
|
||||
ibm-1097_P100-2000 ibm-1097 ibm-1097_VPUA
|
||||
ibm-1097_X100-2000 ibm-1097_STD
|
||||
ibm-1112_P100-2000 ibm-1112 cp1112 1112 ibm-1112_STD
|
||||
ibm-1122_P100-2000 ibm-1122 cp1122 ibm-1122 1122 ibm-1122_STD
|
||||
ibm-1130_P100-2000 ibm-1130 ibm-1130_STD
|
||||
ibm-1132_P100-2000 ibm-1132 ibm-1132_STD
|
||||
ibm-1137_P100-2000 ibm-1137 ibm-1137_STD
|
||||
ibm-1388_P103-2001 ibm-1388 # S-Ch DBCS-Host Data GBK mixed MBCS
|
||||
ibm-9030_P100-2000 ibm-9030 ibm-9030_STD
|
||||
ibm-37 IBM037 { IANA* } ibm-037 cpibm37 ebcdic-cp-us ebcdic-cp-ca ebcdic-cp-wt ebcdic-cp-nl csIBM037 cp37 cp037 037 # EBCDIC US
|
||||
ibm-273 IBM273 { IANA* } csIBM273 ebcdic-de cp273 cpibm273 273 # EBCDIC Germanay, Austria...
|
||||
ibm-277 IBM277 { IANA* } EBCDIC-CP-DK EBCDIC-CP-NO csIBM277 ebcdic-dk cp277 cpibm277 277 # EBCDIC Denmark...
|
||||
ibm-278 IBM278 { IANA* } ebcdic-cp-fi ebcdic-cp-se csIBM278 ebcdic-sv cp278 cpibm278 278 # EBCDIC Sweden
|
||||
ibm-280 IBM280 { IANA* } ebcdic-cp-it csIBM280 cp280 cpibm280 280 # EBCDIC Italy
|
||||
ibm-284 IBM284 { IANA* } ebcdic-cp-es csIBM284 cp284 cpibm284 284 # EBCDIC Spain
|
||||
ibm-285 IBM285 { IANA* } ebcdic-cp-gb csIBM285 ebcdic-gb cp285 cpibm285 285 # EBCDIC UK Ireland
|
||||
ibm-290 IBM290 { IANA* } EBCDIC-JP-kana csIBM290 cp290 # host SBCS (Katakana)
|
||||
ibm-297 IBM297 { IANA* } ebcdic-cp-fr csIBM297 cp297 cpibm297 297 # EBCDIC France
|
||||
ibm-420 IBM420 { IANA* } ebcdic-cp-ar1 csIBM420 cp420 420
|
||||
ibm-424 IBM424 { IANA* } ebcdic-cp-he csIBM424 cp424 424
|
||||
ibm-500 IBM500 { IANA* } cpibm500 csIBM500 cp500 ebcdic-cp-be ebcdic-cp-ch 500 # EBCDIC International Latin1
|
||||
ibm-803 cp803 # Old EBCDIC Hebrew
|
||||
ibm-834 cp834 # Korean DBCS Host
|
||||
ibm-835 cp835 # DBCS T-Ch Host
|
||||
ibm-870_P100-2000 IBM870 { IANA* } ibm-870 CP870 ibm-870_STD ebcdic-cp-roece ebcdic-cp-yu csIBM870
|
||||
ibm-871 IBM871 { IANA* } ebcdic-cp-is csIBM871 cpibm871 cp871 871 # EBCDIC Iceland
|
||||
ibm-875_P100-2000 ibm-875 cp875 875 ibm-875_STD
|
||||
ibm-918_P100-2000 IBM918 { IANA* } ibm-918 CP918 ibm-918_VPUA ebcdic-cp-ar2 csIBM918
|
||||
ibm-918_X100-2000 ibm-918_STD
|
||||
ibm-930 cp930 cpibm930 930 # Japan EBCDIC MIXED
|
||||
ibm-933 cp933 cpibm933 933 # Korea EBCDIC MIXED
|
||||
ibm-935 cp935 cpibm935 935 # China EBCDIC MIXED
|
||||
ibm-937 cp937 cpibm937 937 # Taiwan EBCDIC MIXED
|
||||
ibm-939 cp939 939 # Host MBCS (Latin-Kanji) EBCDIC
|
||||
ibm-1025_P100-2000 ibm-1025 ibm-1025_STD
|
||||
ibm-1026_P100-2000 IBM1026 { IANA* } ibm-1026 CP1026 csIBM1026 ibm-1026_STD
|
||||
ibm-1047 cpibm1047 # EBCDIC Open systems Latin1
|
||||
ibm-1097_P100-2000 ibm-1097 ibm-1097_VPUA
|
||||
ibm-1097_X100-2000 ibm-1097_STD
|
||||
ibm-1112_P100-2000 ibm-1112 cp1112 1112 ibm-1112_STD
|
||||
ibm-1122_P100-2000 ibm-1122 cp1122 1122 ibm-1122_STD
|
||||
ibm-1130_P100-2000 ibm-1130 ibm-1130_STD
|
||||
ibm-1132_P100-2000 ibm-1132 ibm-1132_STD
|
||||
ibm-1137_P100-2000 ibm-1137 ibm-1137_STD
|
||||
ibm-1388_P103-2001 ibm-1388 # S-Ch DBCS-Host Data GBK mixed MBCS
|
||||
ibm-9030_P100-2000 ibm-9030 ibm-9030_STD
|
||||
|
||||
#ibm-1046 # PC Arabic without EURO
|
||||
#ibm-1046 # PC Arabic without EURO
|
||||
# with Euro
|
||||
ibm-1123 cpibm1123 # EBCDIC Cyrillic Ukraine
|
||||
ibm-1140 cpibm1140 IBM01140 { IANA } # EBCDIC US...
|
||||
ibm-1141 cpibm1141 IBM01141 { IANA } # EBCDIC Germanay, Austria...
|
||||
ibm-1142 cpibm1142 IBM01142 { IANA } # EBCDIC Denmark...
|
||||
ibm-1143 cpibm1143 IBM01143 { IANA } # EBCDIC Sweden
|
||||
ibm-1144 cpibm1144 # EBCDIC Italy
|
||||
ibm-1145 cpibm1145 # EBCDIC Spain
|
||||
ibm-1146 cpibm1146 # EBCDIC UK Ireland
|
||||
ibm-1147 cpibm1147 # EBCDIC France
|
||||
ibm-1148 cpibm1148 # EBCDIC International Latin1
|
||||
ibm-1149 cpibm1149 ebcdic-is # EBCDIC Iceland
|
||||
ibm-1153 cpibm1153 # EBCDIC latin 2
|
||||
ibm-1154 cp1025 cpibm1154 # EBCDIC Cyrillic Multilingual
|
||||
ibm-1155 cpibm1155 # EBCDIC Turkey
|
||||
ibm-1156 cpibm1156 # EBCDIC Baltic Multilingual
|
||||
ibm-1157 cpibm1157 # EBCDIC Estonia
|
||||
ibm-1158 cp1123 cpibm1158 1123 # EBCDIC Cyrillic Ukraine
|
||||
ibm-1159 cp28709 # SBCS T-Ch Host
|
||||
ibm-1160 cp9030 cpibm1160 # EBCDIC Thailand
|
||||
ibm-1164 cp1130 cpibm1164 # EBCDIC Viet Nam
|
||||
ibm-1364_P110-2000 ibm-1364_VPUA ibm-1364 cp1364 # Korean Host Mixed
|
||||
ibm-1371 cpibm1371 # Taiwan EBCDIC MIXED
|
||||
ibm-1390 cpibm1390 # Japan EBCDIC MIXED
|
||||
ibm-1399 # Host MBCS (Latin-Kanji)
|
||||
ibm-4899 cpibm4899 # Old EBCDIC Hebrew
|
||||
ibm-4971 cpibm4971 # EBCDIC Greek
|
||||
ibm-5123 cp1027 # Host Roman Jis
|
||||
ibm-8482 # host SBCS (Katakana)
|
||||
ibm-9027 # DBCS T-Ch Host
|
||||
ibm-12712 cpibm12712 ebcdic-he # EBCDIC Hebrew (new sheqel, control charaters update)
|
||||
ibm-16684 cp300 # Jis + Roman Jis Host
|
||||
ibm-16804 cpibm16804 ebcdic-ar # EBCDIC Arabic
|
||||
ibm-1123 cpibm1123 # EBCDIC Cyrillic Ukraine
|
||||
ibm-1140 cpibm1140 IBM01140 { IANA* } # EBCDIC US...
|
||||
ibm-1141 cpibm1141 IBM01141 { IANA* } # EBCDIC Germanay, Austria...
|
||||
ibm-1142 cpibm1142 IBM01142 { IANA* } # EBCDIC Denmark...
|
||||
ibm-1143 cpibm1143 IBM01143 { IANA* } # EBCDIC Sweden
|
||||
ibm-1144 cpibm1144 # EBCDIC Italy
|
||||
ibm-1145 cpibm1145 # EBCDIC Spain
|
||||
ibm-1146 cpibm1146 # EBCDIC UK Ireland
|
||||
ibm-1147 cpibm1147 # EBCDIC France
|
||||
ibm-1148 cpibm1148 # EBCDIC International Latin1
|
||||
ibm-1149 cpibm1149 ebcdic-is # EBCDIC Iceland
|
||||
ibm-1153 cpibm1153 # EBCDIC latin 2
|
||||
ibm-1154 cp1025 cpibm1154 # EBCDIC Cyrillic Multilingual
|
||||
ibm-1155 cpibm1155 # EBCDIC Turkey
|
||||
ibm-1156 cpibm1156 # EBCDIC Baltic Multilingual
|
||||
ibm-1157 cpibm1157 # EBCDIC Estonia
|
||||
ibm-1158 cp1123 cpibm1158 1123 # EBCDIC Cyrillic Ukraine
|
||||
ibm-1159 cp28709 # SBCS T-Ch Host
|
||||
ibm-1160 cp9030 cpibm1160 # EBCDIC Thailand
|
||||
ibm-1164 cp1130 cpibm1164 # EBCDIC Viet Nam
|
||||
ibm-1364_P110-2000 ibm-1364_VPUA ibm-1364 cp1364 # Korean Host Mixed
|
||||
ibm-1371 cpibm1371 # Taiwan EBCDIC MIXED
|
||||
ibm-1390 cpibm1390 # Japan EBCDIC MIXED
|
||||
ibm-1399 # Host MBCS (Latin-Kanji)
|
||||
ibm-4899 cpibm4899 # Old EBCDIC Hebrew
|
||||
ibm-4971 cpibm4971 # EBCDIC Greek
|
||||
ibm-5123 cp1027 # Host Roman Jis
|
||||
ibm-8482 # host SBCS (Katakana)
|
||||
ibm-9027 # DBCS T-Ch Host
|
||||
ibm-12712 cpibm12712 ebcdic-he # EBCDIC Hebrew (new sheqel, control charaters update)
|
||||
ibm-16684 cp300 # Jis + Roman Jis Host
|
||||
ibm-16804 cpibm16804 ebcdic-ar # EBCDIC Arabic
|
||||
|
||||
# unsupported IANA names
|
||||
# ebcdic-it csEBCDICIT
|
||||
# ebcdic-es csEBCDICES
|
||||
# csEBCDICFR ebcdic-fr
|
||||
# ibm-274 IBM274 { IANA } cp274 csIBM274 ebcdic-be
|
||||
# ibm-870 IBM870 { IANA } ebcdic-cp-roece ebcdic-cp-yu csIBM870 cp870 870
|
||||
# ibm-274 IBM274 { IANA* } cp274 csIBM274 ebcdic-be
|
||||
# ibm-870 IBM870 { IANA* } ebcdic-cp-roece ebcdic-cp-yu csIBM870 cp870 870
|
||||
|
||||
# EBCDIC codepages for S/390, with LF and NL codes swapped
|
||||
|
||||
ebcdic-xml-us
|
||||
|
||||
# without Euro
|
||||
ibm-37-s390 ibm037-s390 # EBCDIC US
|
||||
ibm-1047-s390 # EBCDIC for S/390 Open Edition
|
||||
ibm-37-s390 ibm037-s390 # EBCDIC US
|
||||
ibm-1047-s390 # EBCDIC for S/390 Open Edition
|
||||
|
||||
# with Euro
|
||||
ibm-1140-s390 # EBCDIC US
|
||||
ibm-1142-s390 # EBCDIC Denmark
|
||||
ibm-1143-s390 # EBCDIC Sweden
|
||||
ibm-1144-s390 # EBCDIC Italy
|
||||
ibm-1145-s390 # EBCDIC Spain
|
||||
ibm-1146-s390 # EBCDIC UK Ireland
|
||||
ibm-1147-s390 # EBCDIC France
|
||||
ibm-1148-s390 # EBCDIC International Latin1
|
||||
ibm-1149-s390 # EBCDIC Iceland
|
||||
ibm-1153-s390 # EBCDIC latin 2
|
||||
ibm-12712-s390 # EBCDIC Hebrew
|
||||
ibm-16804-s390 # EBCDIC Arabic
|
||||
ibm-1140-s390 # EBCDIC US
|
||||
ibm-1142-s390 # EBCDIC Denmark
|
||||
ibm-1143-s390 # EBCDIC Sweden
|
||||
ibm-1144-s390 # EBCDIC Italy
|
||||
ibm-1145-s390 # EBCDIC Spain
|
||||
ibm-1146-s390 # EBCDIC UK Ireland
|
||||
ibm-1147-s390 # EBCDIC France
|
||||
ibm-1148-s390 # EBCDIC International Latin1
|
||||
ibm-1149-s390 # EBCDIC Iceland
|
||||
ibm-1153-s390 # EBCDIC latin 2
|
||||
ibm-12712-s390 # EBCDIC Hebrew
|
||||
ibm-16804-s390 # EBCDIC Arabic
|
||||
|
||||
# GB 18030 is partly algorithmic, using the MBCS converter
|
||||
gb18030 { IANA } ibm-1392
|
||||
gb18030 { IANA* } ibm-1392
|
||||
|
||||
|
@ -240,6 +240,8 @@ static void TestConvert()
|
||||
/*Testing ucnv_openU()*/
|
||||
{
|
||||
UChar converterName[]={ 0x0069, 0x0062, 0x006d, 0x002d, 0x0039, 0x0034, 0x0033, 0x0000}; /*ibm-943*/
|
||||
UChar firstSortedName[]={ 0x0021, 0x0000}; /* ! */
|
||||
UChar lastSortedName[]={ 0x007E, 0x0000}; /* ~ */
|
||||
const char *illegalNameChars={ "ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943"};
|
||||
UChar illegalName[100];
|
||||
UConverter *converter=NULL;
|
||||
@ -269,8 +271,20 @@ static void TestConvert()
|
||||
if(!(err==U_ILLEGAL_ARGUMENT_ERROR)){
|
||||
log_err("FAILURE! ucnv_openU(illegalName, err) is expected to fail\n");
|
||||
}
|
||||
|
||||
err=U_ZERO_ERROR;
|
||||
ucnv_openU(firstSortedName, &err);
|
||||
if(err!=U_FILE_ACCESS_ERROR){
|
||||
log_err("FAILURE! ucnv_openU(firstSortedName, err) is expected to fail\n");
|
||||
}
|
||||
|
||||
err=U_ZERO_ERROR;
|
||||
ucnv_openU(lastSortedName, &err);
|
||||
if(err!=U_FILE_ACCESS_ERROR){
|
||||
log_err("FAILURE! ucnv_openU(lastSortedName, err) is expected to fail\n");
|
||||
}
|
||||
|
||||
err=U_ZERO_ERROR;
|
||||
|
||||
}
|
||||
log_verbose("Testing ucnv_open() with converter name greater than 7 characters\n");
|
||||
{
|
||||
@ -455,6 +469,11 @@ static void TestConvert()
|
||||
char* index = NULL;
|
||||
strcpy(ucs_file_name, loadTestData(&err));
|
||||
|
||||
if(U_FAILURE(err)){
|
||||
log_err("Couldn't get the test data directory... Exiting...Error:%s\n", u_errorName(err));
|
||||
return;
|
||||
}
|
||||
|
||||
index=strrchr(ucs_file_name,(char)U_FILE_SEP_CHAR);
|
||||
|
||||
if((unsigned int)(index-ucs_file_name) != (strlen(ucs_file_name)-1)){
|
||||
@ -462,11 +481,6 @@ static void TestConvert()
|
||||
}
|
||||
|
||||
strcat(ucs_file_name,".."U_FILE_SEP_STRING);
|
||||
|
||||
if(U_FAILURE(err)){
|
||||
log_err("Couldn't get the test data directory... Exiting...Error:%s\n", u_errorName(err));
|
||||
return;
|
||||
}
|
||||
strcat(ucs_file_name, CodePagesTestFiles[codepage_index]);
|
||||
|
||||
ucs_file_in = fopen(ucs_file_name,"rb");
|
||||
|
@ -67,7 +67,7 @@ static void TestUDataOpen(){
|
||||
UErrorCode status=U_ZERO_ERROR;
|
||||
const char* memMap[][2]={
|
||||
{"tz", "dat"},
|
||||
{"cnvalias", "dat"},
|
||||
{"cnvalias", "icu"},
|
||||
{"unames", "dat"},
|
||||
{"ibm-1141", "cnv"}
|
||||
};
|
||||
@ -388,7 +388,7 @@ isAcceptable1(void *context,
|
||||
pInfo->dataFormat[1]==0x76 &&
|
||||
pInfo->dataFormat[2]==0x41 &&
|
||||
pInfo->dataFormat[3]==0x6c &&
|
||||
pInfo->formatVersion[0]==2 )
|
||||
pInfo->formatVersion[0]==3 )
|
||||
{
|
||||
log_verbose("The data from \"%s.%s\" IS acceptable using the verifing function isAcceptable1()\n", name, type);
|
||||
return TRUE;
|
||||
@ -473,7 +473,7 @@ static void TestUDataOpenChoiceDemo1() {
|
||||
|
||||
strcat(strcpy(testPath, u_getDataDirectory()), "testdata");
|
||||
|
||||
result=udata_openChoice(NULL, type, name[0], isAcceptable1, NULL, &status);
|
||||
result=udata_openChoice(NULL, "icu", name[0], isAcceptable1, NULL, &status);
|
||||
if(U_FAILURE(status)){
|
||||
log_err("FAIL: udata_openChoice() failed name=%s, type=%s, \n errorcode=%s\n", name[0], type, myErrorName(status));
|
||||
} else {
|
||||
@ -624,7 +624,7 @@ static void TestUDataGetInfo() {
|
||||
|
||||
|
||||
log_verbose("Testing udata_getInfo() for cnvalias.dat\n");
|
||||
result=udata_open(NULL, type, name, &status);
|
||||
result=udata_open(NULL, "icu", name, &status);
|
||||
if(U_FAILURE(status)){
|
||||
log_err("FAIL: udata_open() failed for path = NULL, name=%s, type=%s, \n errorcode=%s\n", path, name, type, myErrorName(status));
|
||||
return;
|
||||
@ -677,32 +677,34 @@ static void TestUDataGetInfo() {
|
||||
static void TestUDataGetMemory() {
|
||||
|
||||
UDataMemory *result;
|
||||
const uint16_t *table=NULL;
|
||||
const int32_t *table=NULL;
|
||||
uint16_t* intValue=0;
|
||||
UErrorCode status=U_ZERO_ERROR;
|
||||
const char* name="cnvalias";
|
||||
const char* type="dat";
|
||||
const char* type;
|
||||
|
||||
const char* name2="test";
|
||||
|
||||
char* testPath=(char*)malloc(sizeof(char) * (strlen(u_getDataDirectory()) + strlen("testdata") +1 ) );
|
||||
char* testPath=(char*)malloc(sizeof(char) * (strlen(u_getDataDirectory()) + strlen("testdata") +1 ) );
|
||||
|
||||
strcat(strcpy(testPath, u_getDataDirectory()), "testdata");
|
||||
strcat(strcpy(testPath, u_getDataDirectory()), "testdata");
|
||||
|
||||
type="icu";
|
||||
log_verbose("Testing udata_getMemory for \"cnvalias.dat()\"\n");
|
||||
result=udata_openChoice(NULL, type, name, isAcceptable1, NULL, &status);
|
||||
if(U_FAILURE(status)){
|
||||
log_err("FAIL: udata_openChoice() failed for name=%s, type=%s, \n errorcode=%s\n", name, type, myErrorName(status));
|
||||
return;
|
||||
}
|
||||
table=(const uint16_t *)udata_getMemory(result);
|
||||
table=(const uint32_t *)udata_getMemory(result);
|
||||
|
||||
/* The alias table may list more converters than what's actually available now. [grhoten] */
|
||||
if(ucnv_countAvailable() > table[1+2*(*table)]) /*???*/
|
||||
if(ucnv_countAvailable() > table[1]) /*???*/
|
||||
log_err("FAIL: udata_getMemory() failed ucnv_countAvailable returned = %d, expected = %d\n", ucnv_countAvailable(), table[1+2*(*table)]);
|
||||
|
||||
udata_close(result);
|
||||
|
||||
type="dat";
|
||||
log_verbose("Testing udata_getMemory for \"test.dat\"()\n");
|
||||
result=udata_openChoice(testPath, type, name2, isAcceptable3, NULL, &status);
|
||||
if(U_FAILURE(status)){
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user