diff --git a/icu4c/source/i18n/translit.cpp b/icu4c/source/i18n/translit.cpp index 9f6feff8a5..ec77aeb645 100644 --- a/icu4c/source/i18n/translit.cpp +++ b/icu4c/source/i18n/translit.cpp @@ -696,15 +696,31 @@ UnicodeString& Transliterator::getDisplayName(const UnicodeString& ID, * localized. * @see java.text.MessageFormat */ -UnicodeString& Transliterator::getDisplayName(const UnicodeString& ID, +UnicodeString& Transliterator::getDisplayName(const UnicodeString& id, const Locale& inLocale, UnicodeString& result) { + if (registry == 0) { + initializeRegistry(); + } + UErrorCode status = U_ZERO_ERROR; ResourceBundle bundle(u_getDataDirectory(), inLocale, status); // Suspend checking status until later... + result.truncate(0); + + // Normalize the ID + UnicodeString source, target, variant; + IDtoSTV(id, source, target, variant); + if (target.length() < 1) { + // No target; malformed id + return result; + } + UnicodeString ID(source); + ID.append(ID_SEP).append(target).append(variant); + // build the char* key char key[200]; uprv_strcpy(key, RB_DISPLAY_NAME_PREFIX); @@ -733,26 +749,16 @@ UnicodeString& Transliterator::getDisplayName(const UnicodeString& ID, // We pass either 2 or 3 Formattable objects to msg. Formattable args[3]; - int32_t i = ID.indexOf(ID_SEP); int32_t nargs; - if (i < 0) { - args[0].setLong(1); // # of args to follow - args[1].setString(ID); - nargs = 2; - } else { - UnicodeString left, right; - ID.extractBetween(0, i, left); - ID.extractBetween(i+1, ID.length(), right); - args[0].setLong(2); // # of args to follow - args[1].setString(left); - args[2].setString(right); - nargs = 3; - } + args[0].setLong(2); // # of args to follow + args[1].setString(source); + args[2].setString(target); + nargs = 3; // Use display names for the scripts, if they exist UnicodeString s; length=(int32_t)uprv_strlen(RB_SCRIPT_DISPLAY_NAME_PREFIX); - for (int j=1; j<=((i<0)?1:2); ++j) { + for (int j=1; j<=2; ++j) { status = U_ZERO_ERROR; uprv_strcpy(key, RB_SCRIPT_DISPLAY_NAME_PREFIX); args[j].getString(s); @@ -769,6 +775,7 @@ UnicodeString& Transliterator::getDisplayName(const UnicodeString& ID, FieldPosition pos; // ignored by msg msg.format(args, nargs, result, pos, status); if (U_SUCCESS(status)) { + result.append(variant); return result; } } @@ -882,6 +889,9 @@ Transliterator* Transliterator::createInstance(const UnicodeString& ID, if (U_FAILURE(status)) { return 0; } + if (registry == 0) { + initializeRegistry(); + } UVector list(status); int32_t ignored; @@ -1112,6 +1122,55 @@ void Transliterator::parseCompoundID(const UnicodeString& id, } } +/** + * Parse an ID into pieces. Take IDs of the form T, T/V, S-T, + * S-T/V, or S/V-T. If the source is missing, return a source of + * ANY. + * @param id the id string, in any of several forms + * @param source fill-in for the source; if the source is not + * present, ANY will be given as the source, and FALSE will be + * returned. Otherwise TRUE will be returned + * @param target fill-in for the target, which may be empty if the + * id is not well-formed. + * @param variant fill-in for the variant, which may be empty; if + * it is not, it will contain a leading '/' + * @return TRUE if the source was present + */ +UBool Transliterator::IDtoSTV(const UnicodeString& id, + UnicodeString& source, UnicodeString& target, + UnicodeString& variant) { + source = ANY; + int32_t sep = id.indexOf(ID_SEP); + int32_t var = id.indexOf(VARIANT_SEP); + if (var < 0) { + var = id.length(); + } + UBool isSourcePresent = FALSE; + + if (sep < 0) { + // Form: T/V or T (or /V) + id.extractBetween(0, var, target); + id.extractBetween(var, 0x7FFFFFFF, variant); + } else if (sep < var) { + // Form: S-T/V or S-T (or -T/V or -T) + if (sep > 0) { + id.extractBetween(0, sep, source); + isSourcePresent = TRUE; + } + id.extractBetween(++sep, var, target); + id.extractBetween(var, 0x7FFFFFFF, variant); + } else { + // Form: (S/V-T or /V-T) + if (var > 0) { + id.extractBetween(0, var, source); + isSourcePresent = TRUE; + } + id.extractBetween(var, sep++, variant); + id.extractBetween(sep, 0x7FFFFFFF, target); + } + return isSourcePresent; +} + /** * Parse a single ID, possibly including an inline filter, and return * the resultant transliterator object. NOTE: If 'create' is FALSE, @@ -1255,38 +1314,11 @@ Transliterator* Transliterator::parseID(const UnicodeString& ID, // produces T-S/V, with a default S of "Any". If the ID has a special // non-canonical inverse, look it up (e.g., NFC -> NFD, Null -> Null). if (id.length() > 0) { // We handle empty IDs below - UnicodeString source(ANY); + UnicodeString source; UnicodeString target; UnicodeString variant; // Variant INCLUDING "/" + UBool isSourcePresent = IDtoSTV(id, source, target, variant); - int32_t sep = id.indexOf(ID_SEP); - int32_t var = id.indexOf(VARIANT_SEP); - if (var < 0) { - var = id.length(); - } - UBool isSourcePresent = FALSE; - - if (sep < 0) { - // Form: T/V or T (or /V) - id.extractBetween(0, var, target); - id.extractBetween(var, 0x7FFFFFFF, variant); - } else if (sep < var) { - // Form: S-T/V or S-T (or -T/V or -T) - if (sep > 0) { - id.extractBetween(0, sep, source); - isSourcePresent = TRUE; - } - id.extractBetween(++sep, var, target); - id.extractBetween(var, 0x7FFFFFFF, variant); - } else { - // Form: (S/V-T or /V-T) - if (var > 0) { - id.extractBetween(0, var, source); - isSourcePresent = TRUE; - } - id.extractBetween(var, sep++, variant); - id.extractBetween(sep, 0x7FFFFFFF, target); - } id.truncate(0); // Source and variant may be empty, but target may not be. if (target.length() == 0) { diff --git a/icu4c/source/i18n/unicode/translit.h b/icu4c/source/i18n/unicode/translit.h index 0ac6cccdd9..7c7a9047e3 100644 --- a/icu4c/source/i18n/unicode/translit.h +++ b/icu4c/source/i18n/unicode/translit.h @@ -323,6 +323,25 @@ protected: UnicodeSet*& compoundFilter, UParseError& parseError, UErrorCode& status); + + /** + * Parse an ID into pieces. Take IDs of the form T, T/V, S-T, + * S-T/V, or S/V-T. If the source is missing, return a source of + * ANY. + * @param id the id string, in any of several forms + * @param source fill-in for the source; if the source is not + * present, ANY will be given as the source, and FALSE will be + * returned. Otherwise TRUE will be returned + * @param target fill-in for the target, which may be empty if the + * id is not well-formed. + * @param variant fill-in for the variant, which may be empty; if + * it is not, it will contain a leading '/' + * @return TRUE if the source was present + */ + static UBool IDtoSTV(const UnicodeString& id, + UnicodeString& source, UnicodeString& target, + UnicodeString& variant); + /** * Internal parsing method for subclasses. */ diff --git a/icu4c/source/test/intltest/transtst.cpp b/icu4c/source/test/intltest/transtst.cpp index cc583895b5..b1a033322c 100644 --- a/icu4c/source/test/intltest/transtst.cpp +++ b/icu4c/source/test/intltest/transtst.cpp @@ -142,6 +142,7 @@ TransliteratorTest::runIndexedTest(int32_t index, UBool exec, TESTCASE(60,TestToRulesMark); TESTCASE(61,TestEscape); TESTCASE(62,TestAnchorMasking); + TESTCASE(63,TestDisplayName); default: name = ""; break; } } @@ -2910,6 +2911,58 @@ void TransliteratorTest::TestAnchorMasking(){ ", rules: " + prettify(rule, TRUE)); } } + +/** + * Make sure display names of variants look reasonable. + */ +void TransliteratorTest::TestDisplayName() { + static const char* DATA[] = { + // ID, forward name, reverse name + // Update the text as necessary -- the important thing is + // not the text itself, but how various cases are handled. + + // Basic test + "Any-Hex", "Any to Hex Escape", "Hex Escape to Any", + + // Variants + "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl", + + // Target-only IDs + "NFC", "Any to NFC", "Any to NFD", + }; + + int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]); + + Locale US("en", "US"); + + for (int32_t i=0; i " + + name + ", expected " + DATA[i+1]); + } else { + logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + name); + } + UErrorCode ec = U_ZERO_ERROR; + UParseError pe; + Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVERSE, pe, ec); + if (U_FAILURE(ec)) { + delete t; + errln("FAIL: createInstance failed"); + continue; + } + name = Transliterator::getDisplayName(t->getID(), US, name); + if (name != DATA[i+2]) { + errln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName() => " + + name + ", expected " + DATA[i+2]); + } else { + logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " + name); + } + delete t; + } +} + //====================================================================== // Support methods //====================================================================== diff --git a/icu4c/source/test/intltest/transtst.h b/icu4c/source/test/intltest/transtst.h index 83c6711a64..50aeb3c006 100644 --- a/icu4c/source/test/intltest/transtst.h +++ b/icu4c/source/test/intltest/transtst.h @@ -293,6 +293,12 @@ class TransliteratorTest : public IntlTest { void TestEscape(); void TestAnchorMasking(); + + /** + * Make sure display names of variants look reasonable. + */ + void TestDisplayName(); + //====================================================================== // Support methods //======================================================================