ICU-322 support for standard names, which are tagged converter

aliases.

X-SVN-Rev: 2161
This commit is contained in:
Yves Arrouye 2000-08-10 01:32:29 +00:00
parent 563d62bc75
commit 921a9787cc
4 changed files with 183 additions and 13 deletions

View File

@ -243,13 +243,20 @@ ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode)
}
}
U_CAPI const char *ucnv_getStandardName(const char *name, const char *standard, UErrorCode *pErrorCode) {
if (!pErrorCode || U_FAILURE(*pErrorCode)) {
return NULL;
}
U_CAPI uint16_t
ucnv_countStandards(void) {
UErrorCode err = U_ZERO_ERROR;
return ucnv_io_countStandards(&err);
}
*pErrorCode = U_UNSUPPORTED_ERROR;
return NULL;
U_CAPI const char *
ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode) {
return ucnv_io_getStandard(n, pErrorCode);
}
U_CAPI const char *
ucnv_getStandardName(const char *name, const char *standard, UErrorCode *pErrorCode) {
return ucnv_io_getStandardName(name, standard, pErrorCode);
}
void ucnv_getSubstChars (const UConverter * converter,
@ -1258,3 +1265,13 @@ ucnv_getInvalidUChars (const UConverter * converter,
uprv_memcpy (errChars, converter->invalidUCharBuffer, sizeof(UChar) * (*len));
}
}
/*
* Hey, Emacs, please set the following:
*
* Local Variables:
* indent-tabs-mode: nil
* End:
*
*/

View File

@ -48,7 +48,7 @@
* followed by its aliases. All offsets to strings are offsets from the
* beginning of the data.
*
* More formal file data structure (data format 2.0):
* More formal file data structure (data format 2.1):
*
* uint16_t aliasCount;
* uint16_t aliasOffsets[aliasCount];
@ -60,9 +60,18 @@
* uint16_t aliasCount;
* } converters[converterCount];
*
* uint16_t tagCount;
* uint16_t taggedAliasesOffsets[tagCount][converterCount];
* char tags[] = { "Tag0\Tag1\0..." };
*
* char strings[]={
* "Converter0\0Alias1\0Alias2\0...Converter1\0Converter2\0Alias0\Alias1\0..."
* };
*
* The code included here can read versions 2 and 2.1 of the data format.
* Version 2 does not have tag information, but since the code never refers
* to strings[] by its base offset, it's okay.
*
*/
#define DATA_NAME "cnvalias"
@ -71,6 +80,9 @@
static UDataMemory *aliasData=NULL;
static const uint16_t *aliasTable=NULL;
static const uint16_t *converterTable = NULL;
static const uint16_t *tagTable = NULL;
static UBool
isAcceptable(void *context,
const char *type, const char *name,
@ -83,7 +95,7 @@ isAcceptable(void *context,
pInfo->dataFormat[1]==0x76 &&
pInfo->dataFormat[2]==0x41 &&
pInfo->dataFormat[3]==0x6c &&
pInfo->formatVersion[0]==2;
pInfo->formatVersion[0]>1;
}
static UBool
@ -95,6 +107,7 @@ haveAliasData(UErrorCode *pErrorCode) {
/* load converter alias data from file if necessary */
if(aliasData==NULL) {
UDataMemory *data;
UDataInfo info;
const uint16_t *table=NULL;
/* open the data outside the mutex block */
@ -104,6 +117,7 @@ haveAliasData(UErrorCode *pErrorCode) {
}
table=(const uint16_t *)udata_getMemory(data);
udata_getInfo(data, &info);
/* in the mutex block, set the data for this process */
umtx_lock(NULL);
@ -112,6 +126,11 @@ haveAliasData(UErrorCode *pErrorCode) {
data=NULL;
aliasTable=table;
table=NULL;
converterTable = aliasTable + 1 + 2 * *aliasTable;
if (info.formatVersion[0] > 2) {
tagTable = converterTable + 1 + 2 * *converterTable;
}
}
umtx_unlock(NULL);
@ -136,6 +155,31 @@ isAlias(const char *alias, UErrorCode *pErrorCode) {
}
}
static int16_t getTagNumber(const char *tagname) {
if (tagTable) {
int16_t tag, count = (int16_t) *tagTable;
const char *tags = (const char *) (tagTable + 1 + count * *converterTable);
char name[100];
int i;
/* convert the tag name to lowercase to do case-insensitive comparisons */
for(i = 0; i < sizeof(name) - 1 && *tagname; ++i) {
name[i] = uprv_tolower(*tagname++);
}
name[i] = 0;
for (tag = 0; count--; ++tag) {
if (!charsetNameCmp(name, tags)) {
return tag;
}
tags += strlen(tags);
}
}
return -1;
}
/**
* Do a fuzzy compare of a two converter/alias names. The comparison
* is case-insensitive. It also ignores the characters '-', '_', and
@ -271,10 +315,57 @@ ucnv_io_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) {
return NULL;
}
U_CFUNC uint16_t
ucnv_io_countStandards(UErrorCode *pErrorCode) {
if (haveAliasData(pErrorCode)) {
if (!tagTable) {
*pErrorCode = U_INVALID_FORMAT_ERROR;
return 0;
}
return *tagTable;
}
return 0;
}
U_CFUNC const char *
ucnv_io_getStandard(uint16_t n, UErrorCode *pErrorCode) {
if (haveAliasData(pErrorCode) && tagTable) {
const char *p = (const char *) tagTable + 1 + *tagTable * *converterTable;
int16_t count = (int16_t) *tagTable;
while (n-- && count--) {
p += strlen(p);
}
return count >= 0 ? p : NULL;
}
return NULL;
}
U_CFUNC const char *
ucnv_io_getStandardName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
const uint16_t *p = findAlias(alias);
if(p != NULL) {
int16_t tag = getTagNumber(standard);
if (tag > -1) {
uint16_t offset = tagTable[1 + tag * *converterTable + (p - converterTable) / 2];
return offset ? (const char *) aliasTable + offset : NULL;
}
}
}
return NULL;
}
U_CFUNC uint16_t
ucnv_io_countAvailableConverters(UErrorCode *pErrorCode) {
if(haveAliasData(pErrorCode)) {
return aliasTable[1+2*(*aliasTable)];
return *converterTable;
}
return 0;
}
@ -282,7 +373,7 @@ ucnv_io_countAvailableConverters(UErrorCode *pErrorCode) {
U_CFUNC const char *
ucnv_io_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode) {
if(haveAliasData(pErrorCode)) {
const uint16_t *p=aliasTable+1+2*(*aliasTable);
const uint16_t *p=converterTable;
if(n<*p) {
return (const char *)aliasTable+p[1+2*n];
}
@ -293,7 +384,7 @@ ucnv_io_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode) {
U_CFUNC void
ucnv_io_fillAvailableConverters(const char **aliases, UErrorCode *pErrorCode) {
if(haveAliasData(pErrorCode)) {
const uint16_t *p=aliasTable+1+2*(*aliasTable);
const uint16_t *p=converterTable;
uint16_t count=*p++;
while(count>0) {
*aliases++=(const char *)aliasTable+*p;
@ -407,3 +498,13 @@ ucnv_io_setDefaultConverterName(const char *converterName) {
}
}
}
/*
* Hey, Emacs, please set the following:
*
* Local Variables:
* indent-tabs-mode: nil
* End:
*
*/

View File

@ -44,6 +44,31 @@ ucnv_io_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCo
U_CFUNC const char *
ucnv_io_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode);
/**
* Return the number of all standard names.
*/
U_CFUNC uint16_t
ucnv_io_countStandards(UErrorCode *pErrorCode);
/**
* Return the (n)th standard name in mixed case, or NULL
* if there is none (typically, if the data cannot be loaded).
* 0 <= index < ucnv_io_countStandards().
*/
U_CFUNC const char *
ucnv_io_getStandard(uint16_t n, UErrorCode *pErrorCode);
/**
* Map a converter alias name to a canonical converter name according to
* a given standard.
* The alias and standard are searched for case-insensitively, the converter
* name is returned in mixed-case.
* Returns NULL if the alias is not found, the standard does not exist, or
* the standard does not specify a name for the alias.
*/
U_CFUNC const char *
ucnv_io_getStandardName(const char *alias, const char *standard, UErrorCode *pErrorCode);
/**
* Return the number of all converter names.
*/
@ -107,3 +132,12 @@ U_CFUNC int
charsetNameCmp(const char *name1, const char *name2);
#endif /* _UCNV_IO */
/*
* Hey, Emacs, please set the following:
*
* Local Variables:
* indent-tabs-mode: nil
* End:
*
*/

View File

@ -760,7 +760,25 @@ U_CAPI void
ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode);
/**
* returns a standard name for a given converter name.
* Gives the number of standards associated to converter names.
* @return number of standards
* @stable
*/
U_CAPI uint16_t
ucnv_countStandards(void);
/**
* Gives the name of the standard at given index of standard list.
* @param n index in standard list
* @param pErrorCode result of operation
* @return returns the name of the standard at given index
* @stable
*/
U_CAPI const char *
ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode);
/**
* Returns a standard name for a given converter name.
*
* @param name original converter name
* @param standard name of the standard governing the names; MIME and IANA
@ -768,7 +786,7 @@ ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode)
* @return returns the standard converter name;
* if a standard converter name cannot be determined,
* then <code>NULL</code> is returned
*
* @stable
*/
U_CAPI const char *ucnv_getStandardName(const char *name, const char *standard, UErrorCode *pErrorCode);