ICU-1977 add additional Converter Alias functions

get all converters
get the canonical names from a tagged alias

X-SVN-Rev: 10121
This commit is contained in:
George Rhoten 2002-10-30 22:52:08 +00:00
parent 7efc9a43f1
commit 31ebf42323
5 changed files with 260 additions and 29 deletions

View File

@ -405,7 +405,7 @@ ucnv_compareNames(const char *name1, const char *name2) {
* search for an alias
* return the converter number index for gConverterList
*/
static uint32_t
static U_INLINE uint32_t
findConverter(const char *alias, UErrorCode *pErrorCode) {
uint32_t mid, start, limit;
uint32_t lastMid;
@ -444,6 +444,28 @@ findConverter(const char *alias, UErrorCode *pErrorCode) {
return UINT32_MAX;
}
/*
* Is this alias in this list?
* alias and listOffset should be non-NULL.
*/
static U_INLINE UBool
isAliasInList(const char *alias, uint32_t listOffset) {
if (listOffset) {
uint32_t currAlias;
uint32_t listCount = gTaggedAliasLists[listOffset];
/* +1 to skip listCount */
const uint16_t *currList = gTaggedAliasLists + listOffset + 1;
for (currAlias = 0; currAlias < listCount; currAlias++) {
if (currList[currAlias]
&& ucnv_compareNames(alias, GET_STRING(currList[currAlias]))==0)
{
return TRUE;
}
}
}
return FALSE;
}
/*
* Search for an standard name of an alias (what is the default name
* that this standard uses?)
@ -464,7 +486,7 @@ findTaggedAliasListsOffset(const char *alias, const char *standard, UErrorCode *
*pErrorCode = myErr;
}
if (tagNum < (gTagListSize - UCNV_NUM_RESERVED_TAGS) && convNum < gConverterListSize) {
if (tagNum < (gTagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gConverterListSize) {
listOffset = gTaggedAliasArray[tagNum*gConverterListSize + convNum];
if (listOffset && gTaggedAliasLists[listOffset + 1]) {
return listOffset;
@ -477,28 +499,17 @@ findTaggedAliasListsOffset(const char *alias, const char *standard, UErrorCode *
*/
for (idx = 0; idx < gTaggedAliasArraySize; idx++) {
listOffset = gTaggedAliasArray[idx];
if (listOffset) {
uint32_t currAlias;
uint32_t listCount = gTaggedAliasLists[listOffset];
/* +1 to skip listCount */
const uint16_t *currList = gTaggedAliasLists + listOffset + 1;
for (currAlias = 0; currAlias < listCount; currAlias++) {
if (currList[currAlias]
&& ucnv_compareNames(alias, GET_STRING(currList[currAlias]))==0)
{
/*return listOffset;*/
uint32_t currTagNum = idx/gConverterListSize;
uint32_t currConvNum = (idx - currTagNum*gConverterListSize);
uint32_t tempListOffset = gTaggedAliasArray[tagNum*gConverterListSize + currConvNum];
if (tempListOffset && gTaggedAliasLists[tempListOffset + 1]) {
return tempListOffset;
}
/* else keep on looking */
/* We could speed this up by starting on the next row
because an alias is unique per row, right now.
This would change if alias versioning appears. */
}
if (listOffset && isAliasInList(alias, listOffset)) {
uint32_t currTagNum = idx/gConverterListSize;
uint32_t currConvNum = (idx - currTagNum*gConverterListSize);
uint32_t tempListOffset = gTaggedAliasArray[tagNum*gConverterListSize + currConvNum];
if (tempListOffset && gTaggedAliasLists[tempListOffset + 1]) {
return tempListOffset;
}
/* else keep on looking */
/* We could speed this up by starting on the next row
because an alias is unique per row, right now.
This would change if alias versioning appears. */
}
}
/* The standard doesn't know about the alias */
@ -511,6 +522,51 @@ findTaggedAliasListsOffset(const char *alias, const char *standard, UErrorCode *
return UINT32_MAX;
}
/* Return the canonical name */
static uint32_t
findTaggedConverterNum(const char *alias, const char *standard, UErrorCode *pErrorCode) {
uint32_t idx;
uint32_t listOffset;
uint32_t convNum;
UErrorCode myErr = U_ZERO_ERROR;
uint32_t tagNum = getTagNumber(standard);
/* Make a quick guess. Hopefully they used a TR22 canonical alias. */
convNum = findConverter(alias, &myErr);
if (myErr != U_ZERO_ERROR) {
*pErrorCode = myErr;
}
if (tagNum < (gTagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gConverterListSize) {
listOffset = gTaggedAliasArray[tagNum*gConverterListSize + convNum];
if (listOffset && isAliasInList(alias, listOffset)) {
return convNum;
}
if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
/* Uh Oh! They used an ambiguous alias.
We have to search one slice of the swiss cheese.
We search only in the requested tag, not the whole thing.
This may take a while.
*/
uint32_t convStart = (tagNum)*gConverterListSize;
uint32_t convLimit = (tagNum+1)*gConverterListSize;
for (idx = convStart; idx < convLimit; idx++) {
listOffset = gTaggedAliasArray[idx];
if (listOffset && isAliasInList(alias, listOffset)) {
return convNum;
}
}
/* The standard doesn't know about the alias */
}
/* else no canonical name */
}
/* else converter or tag not found */
return UINT32_MAX;
}
U_CFUNC const char *
ucnv_io_getConverterName(const char *alias, UErrorCode *pErrorCode) {
if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
@ -609,9 +665,9 @@ ucnv_openStandardNames(const char *convName,
uprv_free(myEnum);
return NULL;
}
myEnum->context = myContext;
myContext->listOffset = listOffset;
myContext->listIdx = 0;
myEnum->context = myContext;
}
/* else converter or tag not found */
}
@ -690,7 +746,7 @@ U_CFUNC uint16_t
ucnv_io_countStandards(UErrorCode *pErrorCode) {
if (haveAliasData(pErrorCode)) {
/* Don't include the empty list */
return (uint16_t)(gTagListSize - UCNV_NUM_RESERVED_TAGS);
return (uint16_t)(gTagListSize - UCNV_NUM_HIDDEN_TAGS);
}
return 0;
@ -699,7 +755,7 @@ ucnv_io_countStandards(UErrorCode *pErrorCode) {
U_CAPI const char * U_EXPORT2
ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode) {
if (haveAliasData(pErrorCode)) {
if (n < gTagListSize - UCNV_NUM_RESERVED_TAGS) {
if (n < gTagListSize - UCNV_NUM_HIDDEN_TAGS) {
return GET_STRING(gTagList[n]);
}
*pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
@ -728,6 +784,19 @@ ucnv_getStandardName(const char *alias, const char *standard, UErrorCode *pError
return NULL;
}
U_CAPI const char * U_EXPORT2
ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
uint32_t convNum = findTaggedConverterNum(alias, standard, pErrorCode);
if (convNum < gConverterListSize) {
return GET_STRING(gConverterList[convNum]);
}
}
return NULL;
}
void
ucnv_io_flushAvailableConverterCache() {
if (gAvailableConverters) {
@ -801,6 +870,69 @@ ucnv_io_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode) {
return NULL;
}
static int32_t U_CALLCONV
ucnv_io_countAllConverters(UEnumeration *enumerator, UErrorCode *pErrorCode) {
return gConverterListSize;
}
static const char* U_CALLCONV
ucnv_io_nextAllConverters(UEnumeration *enumerator,
int32_t* resultLength,
UErrorCode *pErrorCode)
{
uint16_t *myContext = (uint16_t *)(enumerator->context);
if (*myContext < gConverterListSize) {
const char *myStr = GET_STRING(gConverterList[(*myContext)++]);
if (resultLength) {
*resultLength = uprv_strlen(myStr);
}
return myStr;
}
/* Either we accessed a zero length list, or we enumerated too far. */
*pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return NULL;
}
static void U_CALLCONV
ucnv_io_resetAllConverters(UEnumeration *enumerator, UErrorCode *pErrorCode) {
*((uint16_t *)(enumerator->context)) = 0;
}
static const UEnumeration gEnumAllConverters = {
NULL,
NULL,
ucnv_io_closeUEnumeration,
ucnv_io_countAllConverters,
uenum_unextDefault,
ucnv_io_nextAllConverters,
ucnv_io_resetAllConverters
};
U_CAPI UEnumeration * U_EXPORT2
ucnv_openAllNames(UErrorCode *pErrorCode) {
UEnumeration *myEnum = NULL;
if (haveAliasData(pErrorCode)) {
uint16_t *myContext;
myEnum = uprv_malloc(sizeof(UEnumeration));
if (myEnum == NULL) {
*pErrorCode = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
uprv_memcpy(myEnum, &gEnumAllConverters, sizeof(UEnumeration));
myContext = uprv_malloc(sizeof(uint16_t));
if (myContext == NULL) {
*pErrorCode = U_MEMORY_ALLOCATION_ERROR;
uprv_free(myEnum);
return NULL;
}
*myContext = 0;
myEnum->context = myContext;
}
return myEnum;
}
U_CFUNC void
ucnv_io_fillAvailableConverters(const char **aliases, UErrorCode *pErrorCode) {
if (haveAvailableConverterList(pErrorCode)) {

View File

@ -18,6 +18,7 @@
#define UCNV_AMBIGUOUS_ALIAS_MAP_BIT 0x8000
#define UCNV_CONVERTER_INDEX_MASK 0xFFF
#define UCNV_NUM_RESERVED_TAGS 2
#define UCNV_NUM_HIDDEN_TAGS 1
/**
* Remove the underscores, dashes and spaces from the name, and convert

View File

@ -1059,6 +1059,20 @@ ucnv_countAvailable (void);
U_CAPI const char* U_EXPORT2
ucnv_getAvailableName (int32_t n);
/**
* Returns a UEnumeration to enumerate all of the canonical converter
* names, as per the alias file, regardless of the ability to open each
* converter.
*
* @return A UEnumeration object for getting all the recognized canonical
* converter names.
* @see ucnv_getAvailableName
* @see uenum_close
* @see uenum_next
*/
U_CAPI UEnumeration * U_EXPORT2
ucnv_openAllNames(UErrorCode *pErrorCode);
/**
* Gives the number of aliases for a given converter or alias name.
* If the alias is ambiguous, then the preferred converter is used
@ -1164,6 +1178,20 @@ ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode);
U_CAPI const char * U_EXPORT2
ucnv_getStandardName(const char *name, const char *standard, UErrorCode *pErrorCode);
/**
* This function will return the internal canonical converter name of the
* tagged alias. This is the opposite of ucnv_openStandardNames, which
* returns the tagged alias given the canonical name.
*
* @return returns the canonical converter name;
* if a standard or alias name cannot be determined,
* then <code>NULL</code> is returned. The returned string is
* owned by the library.
* @see ucnv_getStandardName
*/
U_CAPI const char * U_EXPORT2
ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode);
/**
* returns the current default converter name.
*

View File

@ -162,6 +162,8 @@ static void TestConvert()
UConverterToUCallback oldToUAction = NULL;
const void* oldFromUContext = NULL;
const void* oldToUContext = NULL;
UEnumeration *allNamesEnum = NULL;
int32_t allNamesCount = 0;
/* Allocate memory */
mytarget = (char*) malloc(MAX_FILE_LEN * sizeof(mytarget[0]));
@ -179,12 +181,28 @@ static void TestConvert()
/*Calling all the UnicodeConverterCPP API and checking functionality*/
log_verbose("Testing ucnv_openAllNames()...");
allNamesEnum = ucnv_openAllNames(&err);
if(U_FAILURE(err)) {
log_err("FAILURE! ucnv_openAllNames() -> %s\n", myErrorName(err));
}
else {
const char *string = NULL;
int32_t len = 0;
allNamesCount = uenum_count(allNamesEnum, &err);
while ((string = uenum_next(allNamesEnum, &len, &err))) {
log_verbose("read \"%s\", length %i\n", string, len);
}
}
uenum_close(allNamesEnum);
err = U_ZERO_ERROR;
/*Tests ucnv_getAvailableName(), getAvialableCount()*/
log_verbose("Testing ucnv_countAvailable()...");
testLong1=ucnv_countAvailable();
log_info("Number of available Codepages: %d\n", testLong1);
log_info("Number of available Codepages: %d/%d\n", testLong1, allNamesCount);
log_verbose("\n---Testing ucnv_getAvailableName.."); /*need to check this out */

View File

@ -22,6 +22,7 @@
static void TestStandardName(void);
static void TestStandardNames(void);
static void TestCanonicalName(void);
void addStandardNamesTest(TestNode** root);
@ -31,6 +32,7 @@ addStandardNamesTest(TestNode** root)
{
addTest(root, &TestStandardName, "stdnmtst/TestStandardName");
addTest(root, &TestStandardNames, "stdnmtst/TestStandardNames");
addTest(root, &TestCanonicalName, "stdnmtst/TestCanonicalName");
}
static int dotestname(const char *name, const char *standard, const char *expected) {
@ -61,7 +63,7 @@ static void TestStandardName()
/* Iterate over all standards. */
for (i = 0, count = ucnv_countStandards(); i < count; ++i) {
for (i = 0, count = ucnv_countStandards(); i < count-1; ++i) {
const char *standard;
err = U_ZERO_ERROR;
@ -76,7 +78,13 @@ static void TestStandardName()
}
}
err = U_ZERO_ERROR;
if (ucnv_getStandard(i, &err)) {
/* "" must be last */
if (*ucnv_getStandard((uint16_t)(count-1), &err) != 0) {
log_err("FAIL: ucnv_getStandard(%d) should return ""\n", count-1);
res = 0;
}
err = U_ZERO_ERROR;
if (ucnv_getStandard(++i, &err)) {
log_err("FAIL: ucnv_getStandard(%d) should return NULL\n", i);
res = 0;
}
@ -102,6 +110,50 @@ static void TestStandardName()
}
}
static int dotestconv(const char *name, const char *standard, const char *expected) {
int res = 1;
UErrorCode error;
const char *tag;
error = U_ZERO_ERROR;
tag = ucnv_getCanonicalName(name, standard, &error);
if (tag && !expected) {
log_err("FAIL: Unexpectedly found %s canonical name for %s, got %s\n", standard, name, tag);
res = 0;
} else if (!tag && expected) {
log_err("FAIL: could not find %s canonical name for %s\n", (standard ? "\"\"" : standard), name);
res = 0;
} else if (expected && (name == tag || uprv_strcmp(expected, tag))) {
log_err("FAIL: expected %s for %s canonical name for %s, got %s\n", expected, standard, name, tag);
res = 0;
}
return res;
}
static void TestCanonicalName()
{
/* Test for some expected results. */
if (dotestconv("UTF-8", "IANA", "UTF-8") && /* default name */
dotestconv("UTF-8", "MIME", "UTF-8") && /* default name */
dotestconv("ibm-1208", "IBM", "UTF-8") && /* default name */
dotestconv("ibm-5305", "IBM", "UTF-8") && /* non-default name */
dotestconv("ibm-5305", "MIME", NULL) && /* mapping does not exist */
dotestconv("ascii", "MIME", NULL) && /* mapping does not exist */
dotestconv("ibm-1208", "IANA", NULL) && /* mapping does not exist */
dotestconv("ibm-5305", "IANA", NULL) && /* mapping does not exist */
dotestconv("cp1208", "", "UTF-8") && /* default name due to ordering */
dotestconv("cp65001", "", "UTF-8") && /* non-default name due to ordering */
dotestconv("ISO-2022", "MIME", "ISO_2022") &&/* default name */
dotestconv("crazy", "MIME", NULL) &&
dotestconv("ASCII", "crazy", NULL))
{
log_verbose("PASS: getting IANA and MIME canonical names works\n");
}
}
static UBool doTestNames(const char *name, const char *standard, const char **expected, int32_t size) {
UErrorCode err = U_ZERO_ERROR;