From 921a9787cc86d582a1c89f085aae4e47b4c1942a Mon Sep 17 00:00:00 2001 From: Yves Arrouye Date: Thu, 10 Aug 2000 01:32:29 +0000 Subject: [PATCH] ICU-322 support for standard names, which are tagged converter aliases. X-SVN-Rev: 2161 --- icu4c/source/common/ucnv.c | 29 ++++++-- icu4c/source/common/ucnv_io.c | 111 +++++++++++++++++++++++++++-- icu4c/source/common/ucnv_io.h | 34 +++++++++ icu4c/source/common/unicode/ucnv.h | 22 +++++- 4 files changed, 183 insertions(+), 13 deletions(-) diff --git a/icu4c/source/common/ucnv.c b/icu4c/source/common/ucnv.c index 21f8cae1f4..62df44ba5e 100644 --- a/icu4c/source/common/ucnv.c +++ b/icu4c/source/common/ucnv.c @@ -243,13 +243,20 @@ ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode) } } -U_CAPI const char *ucnv_getStandardName(const char *name, const char *standard, UErrorCode *pErrorCode) { - if (!pErrorCode || U_FAILURE(*pErrorCode)) { - return NULL; - } +U_CAPI uint16_t +ucnv_countStandards(void) { + UErrorCode err = U_ZERO_ERROR; + return ucnv_io_countStandards(&err); +} - *pErrorCode = U_UNSUPPORTED_ERROR; - return NULL; +U_CAPI const char * +ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode) { + return ucnv_io_getStandard(n, pErrorCode); +} + +U_CAPI const char * +ucnv_getStandardName(const char *name, const char *standard, UErrorCode *pErrorCode) { + return ucnv_io_getStandardName(name, standard, pErrorCode); } void ucnv_getSubstChars (const UConverter * converter, @@ -1258,3 +1265,13 @@ ucnv_getInvalidUChars (const UConverter * converter, uprv_memcpy (errChars, converter->invalidUCharBuffer, sizeof(UChar) * (*len)); } } + +/* + * Hey, Emacs, please set the following: + * + * Local Variables: + * indent-tabs-mode: nil + * End: + * + */ + diff --git a/icu4c/source/common/ucnv_io.c b/icu4c/source/common/ucnv_io.c index 2039d0c839..7a0de46917 100644 --- a/icu4c/source/common/ucnv_io.c +++ b/icu4c/source/common/ucnv_io.c @@ -48,7 +48,7 @@ * followed by its aliases. All offsets to strings are offsets from the * beginning of the data. * - * More formal file data structure (data format 2.0): + * More formal file data structure (data format 2.1): * * uint16_t aliasCount; * uint16_t aliasOffsets[aliasCount]; @@ -60,9 +60,18 @@ * uint16_t aliasCount; * } converters[converterCount]; * + * uint16_t tagCount; + * uint16_t taggedAliasesOffsets[tagCount][converterCount]; + * char tags[] = { "Tag0\Tag1\0..." }; + * * char strings[]={ * "Converter0\0Alias1\0Alias2\0...Converter1\0Converter2\0Alias0\Alias1\0..." * }; + * + * The code included here can read versions 2 and 2.1 of the data format. + * Version 2 does not have tag information, but since the code never refers + * to strings[] by its base offset, it's okay. + * */ #define DATA_NAME "cnvalias" @@ -71,6 +80,9 @@ static UDataMemory *aliasData=NULL; static const uint16_t *aliasTable=NULL; +static const uint16_t *converterTable = NULL; +static const uint16_t *tagTable = NULL; + static UBool isAcceptable(void *context, const char *type, const char *name, @@ -83,7 +95,7 @@ isAcceptable(void *context, pInfo->dataFormat[1]==0x76 && pInfo->dataFormat[2]==0x41 && pInfo->dataFormat[3]==0x6c && - pInfo->formatVersion[0]==2; + pInfo->formatVersion[0]>1; } static UBool @@ -95,6 +107,7 @@ haveAliasData(UErrorCode *pErrorCode) { /* load converter alias data from file if necessary */ if(aliasData==NULL) { UDataMemory *data; + UDataInfo info; const uint16_t *table=NULL; /* open the data outside the mutex block */ @@ -104,6 +117,7 @@ haveAliasData(UErrorCode *pErrorCode) { } table=(const uint16_t *)udata_getMemory(data); + udata_getInfo(data, &info); /* in the mutex block, set the data for this process */ umtx_lock(NULL); @@ -112,6 +126,11 @@ haveAliasData(UErrorCode *pErrorCode) { data=NULL; aliasTable=table; table=NULL; + converterTable = aliasTable + 1 + 2 * *aliasTable; + + if (info.formatVersion[0] > 2) { + tagTable = converterTable + 1 + 2 * *converterTable; + } } umtx_unlock(NULL); @@ -136,6 +155,31 @@ isAlias(const char *alias, UErrorCode *pErrorCode) { } } +static int16_t getTagNumber(const char *tagname) { + if (tagTable) { + int16_t tag, count = (int16_t) *tagTable; + const char *tags = (const char *) (tagTable + 1 + count * *converterTable); + + char name[100]; + int i; + + /* convert the tag name to lowercase to do case-insensitive comparisons */ + for(i = 0; i < sizeof(name) - 1 && *tagname; ++i) { + name[i] = uprv_tolower(*tagname++); + } + name[i] = 0; + + for (tag = 0; count--; ++tag) { + if (!charsetNameCmp(name, tags)) { + return tag; + } + tags += strlen(tags); + } + } + + return -1; +} + /** * Do a fuzzy compare of a two converter/alias names. The comparison * is case-insensitive. It also ignores the characters '-', '_', and @@ -271,10 +315,57 @@ ucnv_io_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) { return NULL; } +U_CFUNC uint16_t +ucnv_io_countStandards(UErrorCode *pErrorCode) { + if (haveAliasData(pErrorCode)) { + if (!tagTable) { + *pErrorCode = U_INVALID_FORMAT_ERROR; + return 0; + } + + return *tagTable; + } + + return 0; +} + +U_CFUNC const char * +ucnv_io_getStandard(uint16_t n, UErrorCode *pErrorCode) { + if (haveAliasData(pErrorCode) && tagTable) { + const char *p = (const char *) tagTable + 1 + *tagTable * *converterTable; + int16_t count = (int16_t) *tagTable; + + while (n-- && count--) { + p += strlen(p); + } + + return count >= 0 ? p : NULL; + } + + return NULL; +} + +U_CFUNC const char * +ucnv_io_getStandardName(const char *alias, const char *standard, UErrorCode *pErrorCode) { + if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) { + const uint16_t *p = findAlias(alias); + if(p != NULL) { + int16_t tag = getTagNumber(standard); + + if (tag > -1) { + uint16_t offset = tagTable[1 + tag * *converterTable + (p - converterTable) / 2]; + return offset ? (const char *) aliasTable + offset : NULL; + } + } + } + + return NULL; +} + U_CFUNC uint16_t ucnv_io_countAvailableConverters(UErrorCode *pErrorCode) { if(haveAliasData(pErrorCode)) { - return aliasTable[1+2*(*aliasTable)]; + return *converterTable; } return 0; } @@ -282,7 +373,7 @@ ucnv_io_countAvailableConverters(UErrorCode *pErrorCode) { U_CFUNC const char * ucnv_io_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode) { if(haveAliasData(pErrorCode)) { - const uint16_t *p=aliasTable+1+2*(*aliasTable); + const uint16_t *p=converterTable; if(n<*p) { return (const char *)aliasTable+p[1+2*n]; } @@ -293,7 +384,7 @@ ucnv_io_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode) { U_CFUNC void ucnv_io_fillAvailableConverters(const char **aliases, UErrorCode *pErrorCode) { if(haveAliasData(pErrorCode)) { - const uint16_t *p=aliasTable+1+2*(*aliasTable); + const uint16_t *p=converterTable; uint16_t count=*p++; while(count>0) { *aliases++=(const char *)aliasTable+*p; @@ -407,3 +498,13 @@ ucnv_io_setDefaultConverterName(const char *converterName) { } } } + +/* + * Hey, Emacs, please set the following: + * + * Local Variables: + * indent-tabs-mode: nil + * End: + * + */ + diff --git a/icu4c/source/common/ucnv_io.h b/icu4c/source/common/ucnv_io.h index b83bc27eb9..8921e74f69 100644 --- a/icu4c/source/common/ucnv_io.h +++ b/icu4c/source/common/ucnv_io.h @@ -44,6 +44,31 @@ ucnv_io_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCo U_CFUNC const char * ucnv_io_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode); +/** + * Return the number of all standard names. + */ +U_CFUNC uint16_t +ucnv_io_countStandards(UErrorCode *pErrorCode); + +/** + * Return the (n)th standard name in mixed case, or NULL + * if there is none (typically, if the data cannot be loaded). + * 0 <= index < ucnv_io_countStandards(). + */ +U_CFUNC const char * +ucnv_io_getStandard(uint16_t n, UErrorCode *pErrorCode); + +/** + * Map a converter alias name to a canonical converter name according to + * a given standard. + * The alias and standard are searched for case-insensitively, the converter + * name is returned in mixed-case. + * Returns NULL if the alias is not found, the standard does not exist, or + * the standard does not specify a name for the alias. + */ +U_CFUNC const char * +ucnv_io_getStandardName(const char *alias, const char *standard, UErrorCode *pErrorCode); + /** * Return the number of all converter names. */ @@ -107,3 +132,12 @@ U_CFUNC int charsetNameCmp(const char *name1, const char *name2); #endif /* _UCNV_IO */ + +/* + * Hey, Emacs, please set the following: + * + * Local Variables: + * indent-tabs-mode: nil + * End: + * + */ diff --git a/icu4c/source/common/unicode/ucnv.h b/icu4c/source/common/unicode/ucnv.h index 2175064ab2..a5ae551692 100644 --- a/icu4c/source/common/unicode/ucnv.h +++ b/icu4c/source/common/unicode/ucnv.h @@ -760,7 +760,25 @@ U_CAPI void ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode); /** - * returns a standard name for a given converter name. + * Gives the number of standards associated to converter names. + * @return number of standards + * @stable + */ +U_CAPI uint16_t +ucnv_countStandards(void); + +/** + * Gives the name of the standard at given index of standard list. + * @param n index in standard list + * @param pErrorCode result of operation + * @return returns the name of the standard at given index + * @stable + */ +U_CAPI const char * +ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode); + +/** + * Returns a standard name for a given converter name. * * @param name original converter name * @param standard name of the standard governing the names; MIME and IANA @@ -768,7 +786,7 @@ ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode) * @return returns the standard converter name; * if a standard converter name cannot be determined, * then NULL is returned - * + * @stable */ U_CAPI const char *ucnv_getStandardName(const char *name, const char *standard, UErrorCode *pErrorCode);