parent
dd50e38f45
commit
6ea0fc7713
@ -38,6 +38,7 @@
|
||||
#include "unicode/strenum.h"
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "unicode/uloc.h"
|
||||
#include "unicode/ures.h"
|
||||
|
||||
#include "bytesinkutil.h"
|
||||
#include "charstr.h"
|
||||
@ -509,6 +510,36 @@ Locale::operator==( const Locale& other) const
|
||||
|
||||
#define ISASCIIALPHA(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
|
||||
|
||||
namespace {
|
||||
|
||||
CharString& AppendLSCVE(CharString& out, const char* language, const char* script,
|
||||
const char* country, const char* variants, const char* extension,
|
||||
UErrorCode& status) {
|
||||
out.append(language, status);
|
||||
if (script && script[0] != '\0') {
|
||||
out.append('_', status);
|
||||
out.append(script, status);
|
||||
}
|
||||
if (country && country[0] != '\0') {
|
||||
out.append('_', status);
|
||||
out.append(country, status);
|
||||
}
|
||||
if (variants && variants[0] != '\0') {
|
||||
if ((script == nullptr || script[0] == '\0') &&
|
||||
(country == nullptr || country[0] == '\0')) {
|
||||
out.append('_', status);
|
||||
}
|
||||
out.append('_', status);
|
||||
out.append(variants, status);
|
||||
}
|
||||
if (extension && extension[0] != '\0') {
|
||||
out.append(extension, status);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
/*This function initializes a Locale from a C locale ID*/
|
||||
Locale& Locale::init(const char* localeID, UBool canonicalize)
|
||||
{
|
||||
@ -632,6 +663,195 @@ Locale& Locale::init(const char* localeID, UBool canonicalize)
|
||||
break;
|
||||
}
|
||||
|
||||
if (canonicalize) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
// TODO: Try to use ResourceDataValue and ures_getValueWithFallback() etc.
|
||||
LocalUResourceBundlePointer metadata(ures_openDirect(NULL, "metadata", &status));
|
||||
LocalUResourceBundlePointer metadataAlias(ures_getByKey(metadata.getAlias(), "alias", NULL, &status));
|
||||
// Look up the metadata:alias:language:$key:replacement entries
|
||||
// key could be one of the following:
|
||||
// language
|
||||
// language_Script_REGION
|
||||
// language_REGION
|
||||
// language_variant
|
||||
do {
|
||||
// The resource structure looks like
|
||||
// metadata {
|
||||
// alias {
|
||||
// language {
|
||||
// art_lojban {
|
||||
// replacement{"jbo"}
|
||||
// }
|
||||
// ...
|
||||
// ks_Arab_IN {
|
||||
// replacement{"ks_IN"}
|
||||
// }
|
||||
// ...
|
||||
// no {
|
||||
// replacement{"nb"}
|
||||
// }
|
||||
// ....
|
||||
// zh_CN {
|
||||
// replacement{"zh_Hans_CN"}
|
||||
// }
|
||||
// }
|
||||
// ...
|
||||
// }
|
||||
// }
|
||||
LocalUResourceBundlePointer languageAlias(ures_getByKey(metadataAlias.getAlias(), "language", NULL, &status));
|
||||
if (U_FAILURE(status))
|
||||
break;
|
||||
CharString temp;
|
||||
// Handle cases of key pattern "language _ variant"
|
||||
// ex: Map "art_lojban" to "jbo"
|
||||
const char* variants = getVariant();
|
||||
if (variants != nullptr && variants[0] != '\0') {
|
||||
const char* begin = variants;
|
||||
const char* end = begin;
|
||||
// We may have multiple variants, need to look at each of
|
||||
// them.
|
||||
do {
|
||||
status = U_ZERO_ERROR;
|
||||
end = uprv_strchr(begin, '_');
|
||||
int32_t len = (end == nullptr) ? int32_t(uprv_strlen(begin)) : int32_t(end - begin);
|
||||
temp.clear().append(getLanguage(), status).append("_", status).append(begin, len, status);
|
||||
LocalUResourceBundlePointer languageVariantAlias(
|
||||
ures_getByKey(languageAlias.getAlias(),
|
||||
temp.data(),
|
||||
NULL, &status));
|
||||
temp.clear().appendInvariantChars(
|
||||
UnicodeString(ures_getStringByKey(languageVariantAlias.getAlias(), "replacement", nullptr, &status)), status);
|
||||
if (U_SUCCESS(status)) {
|
||||
CharString newVar;
|
||||
if (begin != variants) {
|
||||
newVar.append(variants, begin - variants - 1, status);
|
||||
}
|
||||
if (end != nullptr) {
|
||||
if (begin != variants) {
|
||||
newVar.append("_", status);
|
||||
}
|
||||
newVar.append(end + 1, status);
|
||||
}
|
||||
Locale l(temp.data());
|
||||
init(AppendLSCVE(temp.clear(),
|
||||
l.getLanguage(),
|
||||
(getScript() != nullptr && getScript()[0] != '\0') ? getScript() : l.getScript(),
|
||||
(getCountry() != nullptr && getCountry()[0] != '\0') ? getCountry() : l.getCountry(),
|
||||
newVar.data(),
|
||||
uprv_strchr(fullName, '@'), status).data(), false);
|
||||
break;
|
||||
}
|
||||
begin = end + 1;
|
||||
} while (end != nullptr);
|
||||
} // End of handle language _ variant
|
||||
// Handle cases of key pattern "language _ Script _ REGION"
|
||||
// ex: Map "ks_Arab_IN" to "ks_IN"
|
||||
if (getScript() != nullptr && getScript()[0] != '\0' &&
|
||||
getCountry() != nullptr && getCountry()[0] != '\0') {
|
||||
status = U_ZERO_ERROR;
|
||||
LocalUResourceBundlePointer replacedAlias(
|
||||
ures_getByKey(languageAlias.getAlias(),
|
||||
AppendLSCVE(temp.clear(), getLanguage(), getScript(), getCountry(),
|
||||
nullptr, nullptr, status).data(), NULL, &status));
|
||||
temp.clear().appendInvariantChars(
|
||||
UnicodeString(ures_getStringByKey(replacedAlias.getAlias(), "replacement", nullptr, &status)), status);
|
||||
if (U_SUCCESS(status)) {
|
||||
Locale l(temp.data());
|
||||
init(AppendLSCVE(temp.clear(),
|
||||
l.getLanguage(),
|
||||
l.getScript(),
|
||||
l.getCountry(),
|
||||
getVariant(),
|
||||
uprv_strchr(fullName, '@'), status).data(), false);
|
||||
}
|
||||
} // End of handle language _ Script _ REGION
|
||||
// Handle cases of key pattern "language _ REGION"
|
||||
// ex: Map "zh_CN" to "zh_Hans_CN"
|
||||
if (getCountry() != nullptr && getCountry()[0] != '\0') {
|
||||
status = U_ZERO_ERROR;
|
||||
LocalUResourceBundlePointer replacedAlias(
|
||||
ures_getByKey(languageAlias.getAlias(),
|
||||
AppendLSCVE(temp.clear(), getLanguage(), nullptr, getCountry(),
|
||||
nullptr, nullptr, status).data(), NULL, &status));
|
||||
temp.clear().appendInvariantChars(
|
||||
UnicodeString(ures_getStringByKey(replacedAlias.getAlias(), "replacement", nullptr, &status)), status);
|
||||
if (U_SUCCESS(status)) {
|
||||
Locale l(temp.data());
|
||||
init(AppendLSCVE(temp.clear(),
|
||||
l.getLanguage(),
|
||||
(getScript() != nullptr && getScript()[0] != '\0') ? getScript() : l.getScript(),
|
||||
l.getCountry(),
|
||||
getVariant(),
|
||||
uprv_strchr(fullName, '@'), status).data(), false);
|
||||
}
|
||||
} // End of handle "language _ REGION"
|
||||
// Handle cases of key pattern "language"
|
||||
// ex: Map "no" to "nb"
|
||||
{
|
||||
status = U_ZERO_ERROR;
|
||||
LocalUResourceBundlePointer replaceLanguageAlias(ures_getByKey(languageAlias.getAlias(), getLanguage(), NULL, &status));
|
||||
temp.clear().appendInvariantChars(
|
||||
UnicodeString(ures_getStringByKey(replaceLanguageAlias.getAlias(), "replacement", nullptr, &status)), status);
|
||||
if (U_SUCCESS(status)) {
|
||||
Locale l(temp.data());
|
||||
init(AppendLSCVE(temp.clear(),
|
||||
l.getLanguage(),
|
||||
(getScript() != nullptr && getScript()[0] != '\0') ? getScript() : l.getScript(),
|
||||
(getCountry() != nullptr && getCountry()[0] != '\0') ? getCountry() : l.getCountry(),
|
||||
getVariant(),
|
||||
uprv_strchr(fullName, '@'), status).data(), false);
|
||||
}
|
||||
} // End of handle "language"
|
||||
|
||||
// Look up the metadata:alias:territory:$key:replacement entries
|
||||
// key is region code.
|
||||
if (getCountry() != nullptr) {
|
||||
status = U_ZERO_ERROR;
|
||||
// The resource structure looks like
|
||||
// metadata {
|
||||
// alias {
|
||||
// ...
|
||||
// territory: {
|
||||
// 172 {
|
||||
// replacement{"RU AM AZ BY GE KG KZ MD TJ TM UA UZ"}
|
||||
// }
|
||||
// ...
|
||||
// 554 {
|
||||
// replacement{"NZ"}
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
LocalUResourceBundlePointer territoryAlias(ures_getByKey(metadataAlias.getAlias(), "territory", NULL, &status));
|
||||
LocalUResourceBundlePointer countryAlias(ures_getByKey(territoryAlias.getAlias(), getCountry(), NULL, &status));
|
||||
UnicodeString replacements(
|
||||
ures_getStringByKey(countryAlias.getAlias(), "replacement", nullptr, &status));
|
||||
if (U_SUCCESS(status)) {
|
||||
CharString replacedCountry;
|
||||
int32_t delPos = replacements.indexOf(' ');
|
||||
if (delPos == -1) {
|
||||
replacedCountry.appendInvariantChars(replacements, status);
|
||||
} else {
|
||||
Locale l(AppendLSCVE(temp.clear(), getLanguage(), nullptr, getScript(),
|
||||
nullptr, nullptr, status).data());
|
||||
l.addLikelySubtags(status);
|
||||
if (replacements.indexOf(UnicodeString(l.getCountry())) != -1) {
|
||||
replacedCountry.append(l.getCountry(), status);
|
||||
} else {
|
||||
replacedCountry.appendInvariantChars(replacements.getBuffer(), delPos, status);
|
||||
}
|
||||
}
|
||||
init(AppendLSCVE(temp.clear(),
|
||||
getLanguage(),
|
||||
getScript(),
|
||||
replacedCountry.data(),
|
||||
getVariant(),
|
||||
uprv_strchr(fullName, '@'), status).data(), false);
|
||||
}
|
||||
} // End of handle REGION
|
||||
} while (0);
|
||||
} // if (canonicalize) {
|
||||
|
||||
// successful end of init()
|
||||
return *this;
|
||||
} while(0); /*loop doesn't iterate*/
|
||||
@ -778,6 +998,25 @@ Locale::minimizeSubtags(UErrorCode& status) {
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Locale::canonicalize(UErrorCode& status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
if (isBogus()) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
CharString uncanonicalized(fullName, status);
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
init(uncanonicalized.data(), /*canonicalize=*/TRUE);
|
||||
if (isBogus()) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
Locale U_EXPORT2
|
||||
Locale::forLanguageTag(StringPiece tag, UErrorCode& status)
|
||||
{
|
||||
|
@ -1681,7 +1681,7 @@ _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendT
|
||||
const char *pKey = NULL; /* LDML key */
|
||||
const char *pType = NULL; /* LDML type */
|
||||
|
||||
char bcpKeyBuf[9]; /* BCP key length is always 2 for now */
|
||||
char bcpKeyBuf[3]; /* BCP key length is always 2 for now */
|
||||
|
||||
U_ASSERT(pBcpKey != NULL);
|
||||
|
||||
@ -1690,6 +1690,7 @@ _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendT
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
U_ASSERT(bcpKeyLen <= 2);
|
||||
|
||||
uprv_strncpy(bcpKeyBuf, pBcpKey, bcpKeyLen);
|
||||
bcpKeyBuf[bcpKeyLen] = 0;
|
||||
|
@ -448,7 +448,7 @@ public:
|
||||
|
||||
/**
|
||||
* Creates a locale from the given string after canonicalizing
|
||||
* the string by calling uloc_canonicalize().
|
||||
* the string according to CLDR by calling uloc_canonicalize().
|
||||
* @param name the locale ID to create from. Must not be NULL.
|
||||
* @return a new locale object corresponding to the given name
|
||||
* @stable ICU 3.0
|
||||
@ -567,6 +567,16 @@ public:
|
||||
*/
|
||||
void minimizeSubtags(UErrorCode& status);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Canonicalize the locale ID of this object according to CLDR.
|
||||
* @param status the status code
|
||||
* @draft ICU 67
|
||||
* @see createCanonical
|
||||
*/
|
||||
void canonicalize(UErrorCode& status);
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Gets the list of keywords for the specified locale.
|
||||
*
|
||||
|
@ -6,6 +6,7 @@
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <iterator>
|
||||
#include <set>
|
||||
@ -275,6 +276,7 @@ void LocaleTest::runIndexedTest( int32_t index, UBool exec, const char* &name, c
|
||||
TESTCASE_AUTO(TestCapturingTagConvertingIterator);
|
||||
TESTCASE_AUTO(TestSetUnicodeKeywordValueInLongLocale);
|
||||
TESTCASE_AUTO(TestSetUnicodeKeywordValueNullInLongLocale);
|
||||
TESTCASE_AUTO(TestCanonicalize);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
@ -2595,13 +2597,13 @@ void LocaleTest::TestCanonicalization(void)
|
||||
"ca_ES_WITH_EXTRA_STUFF_THAT REALLY DOESN'T MAKE ANY SENSE_UNLESS_YOU'RE TRYING TO INCREASE CODE COVERAGE",
|
||||
"ca_ES_WITH_EXTRA_STUFF_THAT REALLY DOESN'T MAKE ANY SENSE_UNLESS_YOU'RE TRYING TO INCREASE CODE COVERAGE"},
|
||||
{ "zh@collation=pinyin", "zh@collation=pinyin", "zh@collation=pinyin" },
|
||||
{ "zh_CN@collation=pinyin", "zh_CN@collation=pinyin", "zh_CN@collation=pinyin" },
|
||||
{ "zh_CN_CA@collation=pinyin", "zh_CN_CA@collation=pinyin", "zh_CN_CA@collation=pinyin" },
|
||||
{ "zh_CN@collation=pinyin", "zh_CN@collation=pinyin", "zh_Hans_CN@collation=pinyin" },
|
||||
{ "zh_CN_CA@collation=pinyin", "zh_CN_CA@collation=pinyin", "zh_Hans_CN_CA@collation=pinyin" },
|
||||
{ "en_US_POSIX", "en_US_POSIX", "en_US_POSIX" },
|
||||
{ "hy_AM_REVISED", "hy_AM_REVISED", "hy_AM_REVISED" },
|
||||
{ "no_NO_NY", "no_NO_NY", "no_NO_NY" /* not: "nn_NO" [alan ICU3.0] */ },
|
||||
{ "no@ny", "no@ny", "no__NY" /* not: "nn" [alan ICU3.0] */ }, /* POSIX ID */
|
||||
{ "no-no.utf32@B", "no_NO.utf32@B", "no_NO_B" /* not: "nb_NO_B" [alan ICU3.0] */ }, /* POSIX ID */
|
||||
{ "no_NO_NY", "no_NO_NY", "nb_NO_NY" /* not: "nn_NO" [alan ICU3.0] */ },
|
||||
{ "no@ny", "no@ny", "nb__NY" /* not: "nn" [alan ICU3.0] */ }, /* POSIX ID */
|
||||
{ "no-no.utf32@B", "no_NO.utf32@B", "nb_NO_B" /* not: "nb_NO_B" [alan ICU3.0] */ }, /* POSIX ID */
|
||||
{ "qz-qz@Euro", "qz_QZ@Euro", "qz_QZ_EURO" }, /* qz-qz uses private use iso codes */
|
||||
// NOTE: uloc_getName() works on en-BOONT, but Locale() parser considers it BOGUS
|
||||
// TODO: unify this behavior
|
||||
@ -2615,7 +2617,7 @@ void LocaleTest::TestCanonicalization(void)
|
||||
{ "x-piglatin_ML.MBE", "x-piglatin_ML.MBE", "x-piglatin_ML" },
|
||||
{ "i-cherokee_US.utf7", "i-cherokee_US.utf7", "i-cherokee_US" },
|
||||
{ "x-filfli_MT_FILFLA.gb-18030", "x-filfli_MT_FILFLA.gb-18030", "x-filfli_MT_FILFLA" },
|
||||
{ "no-no-ny.utf8@B", "no_NO_NY.utf8@B", "no_NO_NY_B" /* not: "nn_NO" [alan ICU3.0] */ }, /* @ ignored unless variant is empty */
|
||||
{ "no-no-ny.utf8@B", "no_NO_NY.utf8@B", "nb_NO_NY_B" /* not: "nn_NO" [alan ICU3.0] */ }, /* @ ignored unless variant is empty */
|
||||
|
||||
/* fleshing out canonicalization */
|
||||
/* trim space and sort keywords, ';' is separator so not present at end in canonical form */
|
||||
@ -2623,7 +2625,7 @@ void LocaleTest::TestCanonicalization(void)
|
||||
/* already-canonical ids are not changed */
|
||||
{ "en_Hant_IL_VALLEY_GIRL@calendar=Japanese;currency=EUR", "en_Hant_IL_VALLEY_GIRL@calendar=Japanese;currency=EUR", "en_Hant_IL_VALLEY_GIRL@calendar=Japanese;currency=EUR" },
|
||||
/* norwegian is just too weird, if we handle things in their full generality */
|
||||
{ "no-Hant-GB_NY@currency=$$$", "no_Hant_GB_NY@currency=$$$", "no_Hant_GB_NY@currency=$$$" /* not: "nn_Hant_GB@currency=$$$" [alan ICU3.0] */ },
|
||||
{ "no-Hant-GB_NY@currency=$$$", "no_Hant_GB_NY@currency=$$$", "nb_Hant_GB_NY@currency=$$$" /* not: "nn_Hant_GB@currency=$$$" [alan ICU3.0] */ },
|
||||
|
||||
/* test cases reflecting internal resource bundle usage */
|
||||
{ "root@kw=foo", "root@kw=foo", "root@kw=foo" },
|
||||
@ -2662,13 +2664,13 @@ void LocaleTest::TestCanonicalization(void)
|
||||
{ "hi__DIRECT", "hi__DIRECT", "hi__DIRECT" },
|
||||
{ "ja_JP_TRADITIONAL", "ja_JP_TRADITIONAL", "ja_JP_TRADITIONAL" },
|
||||
{ "th_TH_TRADITIONAL", "th_TH_TRADITIONAL", "th_TH_TRADITIONAL" },
|
||||
{ "zh_TW_STROKE", "zh_TW_STROKE", "zh_TW_STROKE" },
|
||||
{ "zh_TW_STROKE", "zh_TW_STROKE", "zh_Hant_TW_STROKE" },
|
||||
{ "zh__PINYIN", "zh__PINYIN", "zh__PINYIN" },
|
||||
{ "sr-SP-Cyrl", "sr_SP_CYRL", "sr_SP_CYRL" }, /* .NET name */
|
||||
{ "sr-SP-Latn", "sr_SP_LATN", "sr_SP_LATN" }, /* .NET name */
|
||||
{ "sr_YU_CYRILLIC", "sr_YU_CYRILLIC", "sr_YU_CYRILLIC" }, /* Linux name */
|
||||
{ "uz-UZ-Cyrl", "uz_UZ_CYRL", "uz_UZ_CYRL" }, /* .NET name */
|
||||
{ "uz-UZ-Latn", "uz_UZ_LATN", "uz_UZ_LATN" }, /* .NET name */
|
||||
{ "sr_YU_CYRILLIC", "sr_YU_CYRILLIC", "sr_RS_CYRILLIC" }, /* Linux name */
|
||||
{ "uz-UZ-Cyrl", "uz_UZ_CYRL", "uz_Latn_UZ_CYRL" }, /* .NET name */
|
||||
{ "uz-UZ-Latn", "uz_UZ_LATN", "uz_Latn_UZ_LATN" }, /* .NET name */
|
||||
{ "zh-CHS", "zh_CHS", "zh_CHS" }, /* .NET name */
|
||||
{ "zh-CHT", "zh_CHT", "zh_CHT" }, /* .NET name This may change back to zh_Hant */
|
||||
/* PRE_EURO and EURO conversions don't affect other keywords */
|
||||
@ -2699,6 +2701,91 @@ void LocaleTest::TestCanonicalization(void)
|
||||
}
|
||||
}
|
||||
|
||||
void LocaleTest::TestCanonicalize(void)
|
||||
{
|
||||
static const struct {
|
||||
const char *localeID; /* input */
|
||||
const char *canonicalID; /* expected canonicalize() result */
|
||||
} testCases[] = {
|
||||
// language _ variant -> language
|
||||
{ "no-BOKMAL", "nb" },
|
||||
// also test with script, country and extensions
|
||||
{ "no-Cyrl-ID-BOKMAL-u-ca-japanese", "nb-Cyrl-ID-u-ca-japanese" },
|
||||
{ "no-Cyrl-ID-1901-BOKMAL-xsistemo-u-ca-japanese", "nb-Cyrl-ID-1901-xsistemo-u-ca-japanese" },
|
||||
{ "no-Cyrl-ID-1901-BOKMAL-u-ca-japanese", "nb-Cyrl-ID-1901-u-ca-japanese" },
|
||||
{ "no-Cyrl-ID-BOKMAL-xsistemo-u-ca-japanese", "nb-Cyrl-ID-xsistemo-u-ca-japanese" },
|
||||
{ "no-NYNORSK", "nn" },
|
||||
{ "no-Cyrl-ID-NYNORSK-u-ca-japanese", "nn-Cyrl-ID-u-ca-japanese" },
|
||||
{ "aa-SAAHO", "ssy" },
|
||||
// also test with script, country and extensions
|
||||
{ "aa-Deva-IN-SAAHO-u-ca-japanese", "ssy-Deva-IN-u-ca-japanese" },
|
||||
|
||||
// language -> language
|
||||
{ "aam", "aas" },
|
||||
// also test with script, country, variants and extensions
|
||||
{ "aam-Cyrl-ID-3456-u-ca-japanese", "aas-Cyrl-ID-3456-u-ca-japanese" },
|
||||
|
||||
// language -> language _ Script
|
||||
{ "sh", "sr-Latn" },
|
||||
// also test with script
|
||||
{ "sh-Cyrl", "sr-Cyrl" },
|
||||
// also test with country, variants and extensions
|
||||
{ "sh-ID-3456-u-ca-roc", "sr-Latn-ID-3456-u-ca-roc" },
|
||||
|
||||
// language -> language _ country
|
||||
{ "prs", "fa-AF" },
|
||||
// also test with country
|
||||
{ "prs-RU", "fa-RU" },
|
||||
// also test with script, variants and extensions
|
||||
{ "prs-Cyrl-1009-u-ca-roc", "fa-Cyrl-AF-1009-u-ca-roc" },
|
||||
|
||||
// language _ country -> language _ script _ country
|
||||
{ "pa-IN", "pa-Guru-IN" },
|
||||
// also test with script
|
||||
{ "pa-Latn-IN", "pa-Latn-IN" },
|
||||
// also test with variants and extensions
|
||||
{ "pa-IN-5678-u-ca-hindi", "pa-Guru-IN-5678-u-ca-hindi" },
|
||||
|
||||
// language _ script _ country -> language _ country
|
||||
{ "ky-Cyrl-KG", "ky-KG" },
|
||||
// also test with variants and extensions
|
||||
{ "ky-Cyrl-KG-3456-u-ca-roc", "ky-KG-3456-u-ca-roc" },
|
||||
|
||||
// Test replacement of territoryAlias
|
||||
// 554 has one replacement
|
||||
{ "en-554", "en-NZ" },
|
||||
{ "en-554-u-nu-arab", "en-NZ-u-nu-arab" },
|
||||
// 172 has multiple replacements
|
||||
// also test with variants
|
||||
{ "ru-172-1234", "ru-RU-1234" },
|
||||
// also test with extensions
|
||||
{ "ru-172-1234-u-nu-latn", "ru-RU-1234-u-nu-latn" },
|
||||
// also test with scripts
|
||||
{ "uz-172", "uz-UZ" },
|
||||
{ "uz-Cyrl-172", "uz-Cyrl-UZ" },
|
||||
{ "uz-Bopo-172", "uz-Bopo-UZ" },
|
||||
// also test with variants and scripts
|
||||
{ "uz-Cyrl-172-5678-u-nu-latn", "uz-Cyrl-UZ-5678-u-nu-latn" },
|
||||
// a language not used in this region
|
||||
{ "fr-172", "fr-RU" },
|
||||
};
|
||||
int32_t i;
|
||||
for (i=0; i < UPRV_LENGTHOF(testCases); i++) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
std::string otag = testCases[i].localeID;
|
||||
Locale loc = Locale::forLanguageTag(otag.c_str(), status);
|
||||
loc.canonicalize(status);
|
||||
std::string tag = loc.toLanguageTag<std::string>(status);
|
||||
if (tag != testCases[i].canonicalID) {
|
||||
errcheckln(status, "FAIL: %s should be canonicalized to %s but got %s - %s",
|
||||
otag.c_str(),
|
||||
testCases[i].canonicalID,
|
||||
tag.c_str(),
|
||||
u_errorName(status));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void LocaleTest::TestCurrencyByDate(void)
|
||||
{
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
@ -107,6 +107,8 @@ public:
|
||||
|
||||
void TestCanonicalization(void);
|
||||
|
||||
void TestCanonicalize(void);
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
static UDate date(int32_t y, int32_t m, int32_t d, int32_t hr = 0, int32_t min = 0, int32_t sec = 0);
|
||||
#endif
|
||||
|
@ -12,6 +12,7 @@ import java.text.ParseException;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.Map;
|
||||
import java.util.MissingResourceException;
|
||||
import java.util.Set;
|
||||
@ -47,12 +48,11 @@ public class PluralRulesLoader extends PluralRules.Factory {
|
||||
*/
|
||||
public ULocale[] getAvailableULocales() {
|
||||
Set<String> keys = getLocaleIdToRulesIdMap(PluralType.CARDINAL).keySet();
|
||||
ULocale[] locales = new ULocale[keys.size()];
|
||||
int n = 0;
|
||||
Set<ULocale> locales = new LinkedHashSet<ULocale>(keys.size());
|
||||
for (Iterator<String> iter = keys.iterator(); iter.hasNext();) {
|
||||
locales[n++] = ULocale.createCanonical(iter.next());
|
||||
locales.add(ULocale.createCanonical(iter.next()));
|
||||
}
|
||||
return locales;
|
||||
return locales.toArray(new ULocale[0]);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -499,4 +499,4 @@ public class PluralRulesLoader extends PluralRules.Factory {
|
||||
// now make whole thing immutable
|
||||
localeIdToPluralRanges = Collections.unmodifiableMap(tempLocaleIdToPluralRanges);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -498,7 +498,7 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
|
||||
}
|
||||
|
||||
/**
|
||||
* {@icu} Creates a ULocale from the id by first canonicalizing the id.
|
||||
* {@icu} Creates a ULocale from the id by first canonicalizing the id according to CLDR.
|
||||
* @param nonCanonicalID the locale id to canonicalize
|
||||
* @return the locale created from the canonical version of the ID.
|
||||
* @stable ICU 3.0
|
||||
@ -507,6 +507,16 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
|
||||
return new ULocale(canonicalize(nonCanonicalID), (Locale)null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a ULocale from the locale by first canonicalizing the locale according to CLDR.
|
||||
* @param locale the ULocale to canonicalize
|
||||
* @return the ULocale created from the canonical version of the ULocale.
|
||||
* @draft ICU 67
|
||||
*/
|
||||
public static ULocale createCanonical(ULocale locale) {
|
||||
return createCanonical(locale.getName());
|
||||
}
|
||||
|
||||
private static String lscvToID(String lang, String script, String country, String variant) {
|
||||
StringBuilder buf = new StringBuilder();
|
||||
|
||||
@ -1204,8 +1214,8 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
|
||||
}
|
||||
|
||||
/**
|
||||
* {@icu} Returns the canonical name for the specified locale ID. This is used to
|
||||
* convert POSIX and other grandfathered IDs to standard ICU form.
|
||||
* {@icu} Returns the canonical name according to CLDR for the specified locale ID.
|
||||
* This is used to convert POSIX and other grandfathered IDs to standard ICU form.
|
||||
* @param localeID the locale id
|
||||
* @return the canonicalized id
|
||||
* @stable ICU 3.0
|
||||
@ -1239,6 +1249,144 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
|
||||
}
|
||||
}
|
||||
|
||||
// If the BCP 47 primary language subtag matches the type attribute of a languageAlias
|
||||
// element in Supplemental Data, replace the language subtag with the replacement value.
|
||||
// If there are additional subtags in the replacement value, add them to the result, but
|
||||
// only if there is no corresponding subtag already in the tag.
|
||||
// Five special deprecated grandfathered codes (such as i-default) are in type attributes, and are also replaced.
|
||||
try {
|
||||
UResourceBundle languageAlias = UResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME,
|
||||
"metadata", ICUResourceBundle.ICU_DATA_CLASS_LOADER)
|
||||
.get("alias")
|
||||
.get("language");
|
||||
// language _ variant
|
||||
if (!parser.getVariant().isEmpty()) {
|
||||
String [] variants = parser.getVariant().split("_");
|
||||
for (String variant : variants) {
|
||||
try {
|
||||
// Note the key in the metadata.txt is formatted as language_variant
|
||||
// instead of language__variant but lscvToID will generate
|
||||
// language__variant so we have to build the string ourselves.
|
||||
ULocale replaceLocale = new ULocale(languageAlias.get(
|
||||
(new StringBuilder(parser.getLanguage().length() + 1 + parser.getVariant().length()))
|
||||
.append(parser.getLanguage())
|
||||
.append("_")
|
||||
.append(variant)
|
||||
.toString())
|
||||
.get("replacement")
|
||||
.getString());
|
||||
StringBuilder replacedVariant = new StringBuilder(parser.getVariant().length());
|
||||
for (String current : variants) {
|
||||
if (current.equals(variant)) continue;
|
||||
if (replacedVariant.length() > 0) replacedVariant.append("_");
|
||||
replacedVariant.append(current);
|
||||
}
|
||||
parser = new LocaleIDParser(
|
||||
(new StringBuilder(localeID.length()))
|
||||
.append(lscvToID(replaceLocale.getLanguage(),
|
||||
!parser.getScript().isEmpty() ? parser.getScript() : replaceLocale.getScript(),
|
||||
!parser.getCountry().isEmpty() ? parser.getCountry() : replaceLocale.getCountry(),
|
||||
replacedVariant.toString()))
|
||||
.append(parser.getName().substring(parser.getBaseName().length()))
|
||||
.toString());
|
||||
} catch (MissingResourceException e) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// language _ script _ country
|
||||
// ug_Arab_CN -> ug_CN
|
||||
if (!parser.getScript().isEmpty() && !parser.getCountry().isEmpty()) {
|
||||
try {
|
||||
ULocale replaceLocale = new ULocale(languageAlias.get(
|
||||
lscvToID(parser.getLanguage(), parser.getScript(), parser.getCountry(), null))
|
||||
.get("replacement")
|
||||
.getString());
|
||||
parser = new LocaleIDParser((new StringBuilder(localeID.length()))
|
||||
.append(lscvToID(replaceLocale.getLanguage(),
|
||||
replaceLocale.getScript(),
|
||||
replaceLocale.getCountry(),
|
||||
parser.getVariant()))
|
||||
.append(parser.getName().substring(parser.getBaseName().length()))
|
||||
.toString());
|
||||
} catch (MissingResourceException e) {
|
||||
}
|
||||
}
|
||||
// language _ country
|
||||
// eg. az_AZ -> az_Latn_AZ
|
||||
if (!parser.getCountry().isEmpty()) {
|
||||
try {
|
||||
ULocale replaceLocale = new ULocale(languageAlias.get(
|
||||
lscvToID(parser.getLanguage(), null, parser.getCountry(), null))
|
||||
.get("replacement")
|
||||
.getString());
|
||||
parser = new LocaleIDParser((new StringBuilder(localeID.length()))
|
||||
.append(lscvToID(replaceLocale.getLanguage(),
|
||||
parser.getScript().isEmpty() ? replaceLocale.getScript() : parser.getScript(),
|
||||
replaceLocale.getCountry(),
|
||||
parser.getVariant()))
|
||||
.append(parser.getName().substring(parser.getBaseName().length()))
|
||||
.toString());
|
||||
} catch (MissingResourceException e) {
|
||||
}
|
||||
}
|
||||
// only language
|
||||
// e.g. twi -> ak
|
||||
try {
|
||||
ULocale replaceLocale = new ULocale(languageAlias.get(parser.getLanguage())
|
||||
.get("replacement")
|
||||
.getString());
|
||||
parser = new LocaleIDParser((new StringBuilder(localeID.length()))
|
||||
.append(lscvToID(replaceLocale.getLanguage(),
|
||||
parser.getScript().isEmpty() ? replaceLocale.getScript() : parser.getScript() ,
|
||||
parser.getCountry().isEmpty() ? replaceLocale.getCountry() : parser.getCountry() ,
|
||||
parser.getVariant()))
|
||||
.append(parser.getName().substring(parser.getBaseName().length()))
|
||||
.toString());
|
||||
} catch (MissingResourceException e) {
|
||||
}
|
||||
} catch (MissingResourceException e) {
|
||||
}
|
||||
|
||||
// If the BCP 47 region subtag matches the type attribute of a
|
||||
// territoryAlias element in Supplemental Data, replace the language
|
||||
// subtag with the replacement value, as follows:
|
||||
if (!parser.getCountry().isEmpty()) {
|
||||
try {
|
||||
String replacements[] = UResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME,
|
||||
"metadata", ICUResourceBundle.ICU_DATA_CLASS_LOADER)
|
||||
.get("alias")
|
||||
.get("territory")
|
||||
.get(parser.getCountry())
|
||||
.get("replacement")
|
||||
.getString()
|
||||
.split(" ");
|
||||
String replacement = replacements[0];
|
||||
// If there is a single territory in the replacement, use it.
|
||||
// If there are multiple territories:
|
||||
// Look up the most likely territory for the base language code (and script, if there is one).
|
||||
// If that likely territory is in the list, use it.
|
||||
// Otherwise, use the first territory in the list.
|
||||
if (replacements.length > 1) {
|
||||
String likelyCountry = ULocale.addLikelySubtags(
|
||||
new ULocale(lscvToID(parser.getLanguage(), parser.getScript(), null, parser.getVariant())))
|
||||
.getCountry();
|
||||
for (String country : replacements) {
|
||||
if (country.equals(likelyCountry)) {
|
||||
replacement = likelyCountry;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
parser = new LocaleIDParser(
|
||||
(new StringBuilder(localeID.length()))
|
||||
.append(lscvToID(parser.getLanguage(), parser.getScript(), replacement, parser.getVariant()))
|
||||
.append(parser.getName().substring(parser.getBaseName().length()))
|
||||
.toString());
|
||||
} catch (MissingResourceException e) {
|
||||
}
|
||||
}
|
||||
|
||||
return parser.getName();
|
||||
}
|
||||
|
||||
|
@ -239,7 +239,7 @@ public class GlobalizationPreferencesTest extends TestFmwk {
|
||||
{"fr_CA", "fr"},
|
||||
{"fr", "fr_CA"},
|
||||
{"es", "fr", "en_US"},
|
||||
{"zh_CN", "zh_Hans", "zh_Hans_CN"},
|
||||
{"zh_Hans", "zh_Hans_CN"},
|
||||
{"en_US_123"},
|
||||
{"es_US", "es"},
|
||||
{"de_DE", "es", "fr_FR"},
|
||||
@ -261,7 +261,7 @@ public class GlobalizationPreferencesTest extends TestFmwk {
|
||||
{"fr_CA", "fr"},
|
||||
{"fr_CA", "fr"},
|
||||
{"es", "fr", "en_US", "en"},
|
||||
{"zh_Hans_CN", "zh_CN", "zh_Hans", "zh"},
|
||||
{"zh_Hans_CN", "zh_Hans", "zh"},
|
||||
{"en_US_123", "en_US", "en"},
|
||||
{"es_US", "es"},
|
||||
{"de_DE", "de", "es", "fr_FR", "fr"},
|
||||
@ -347,10 +347,14 @@ public class GlobalizationPreferencesTest extends TestFmwk {
|
||||
gp.reset();
|
||||
gp.setLocales(acceptLanguage);
|
||||
|
||||
List resultLocales = gp.getLocales();
|
||||
List<ULocale> resultLocales = gp.getLocales();
|
||||
if (resultLocales.size() != RESULTS_LOCALEIDS[i].length) {
|
||||
StringBuilder res = new StringBuilder();
|
||||
for (ULocale l : resultLocales) {
|
||||
res.append(l.toString()).append(",");
|
||||
}
|
||||
errln("FAIL: Number of locales mismatch - GP:" + resultLocales.size()
|
||||
+ " Expected:" + RESULTS_LOCALEIDS[i].length);
|
||||
+ " Expected:" + RESULTS_LOCALEIDS[i].length + " index: " + i + " " + res.toString());
|
||||
} else {
|
||||
|
||||
for (int j = 0; j < RESULTS_LOCALEIDS[i].length; j++) {
|
||||
|
@ -673,10 +673,10 @@ public class ULocaleTest extends TestFmwk {
|
||||
{"x-piglatin", "", "ML", "", "x-piglatin_ML.MBE", "x-piglatin_ML.MBE", "x-piglatin_ML"}, /* Multibyte English */
|
||||
{"i-cherokee", "","US", "", "i-Cherokee_US.utf7", "i-cherokee_US.utf7", "i-cherokee_US"},
|
||||
{"x-filfli", "", "MT", "FILFLA", "x-filfli_MT_FILFLA.gb-18030", "x-filfli_MT_FILFLA.gb-18030", "x-filfli_MT_FILFLA"},
|
||||
{"no", "", "NO", "NY_B", "no-no-ny.utf32@B", "no_NO_NY.utf32@B", "no_NO_NY_B"},
|
||||
{"no", "", "NO", "B", "no-no.utf32@B", "no_NO.utf32@B", "no_NO_B"},
|
||||
{"no", "", "", "NY", "no__ny", "no__NY", null},
|
||||
{"no", "", "", "NY", "no@ny", "no@ny", "no__NY"},
|
||||
{"no", "", "NO", "NY_B", "no-no-ny.utf32@B", "no_NO_NY.utf32@B", "nb_NO_NY_B"},
|
||||
{"no", "", "NO", "B", "no-no.utf32@B", "no_NO.utf32@B", "nb_NO_B"},
|
||||
{"no", "", "", "NY", "no__ny", "no__NY", "nb__NY"},
|
||||
{"no", "", "", "NY", "no@ny", "no@ny", "nb__NY"},
|
||||
{"el", "Latn", "", "", "el-latn", "el_Latn", null},
|
||||
{"en", "Cyrl", "RU", "", "en-cyrl-ru", "en_Cyrl_RU", null},
|
||||
{"qq", "Qqqq", "QQ", "QQ", "qq_Qqqq_QQ_QQ", "qq_Qqqq_QQ_QQ", null},
|
||||
@ -893,13 +893,13 @@ public class ULocaleTest extends TestFmwk {
|
||||
public void TestCanonicalization(){
|
||||
final String[][]testCases = new String[][]{
|
||||
{ "zh@collation=pinyin", "zh@collation=pinyin", "zh@collation=pinyin" },
|
||||
{ "zh_CN@collation=pinyin", "zh_CN@collation=pinyin", "zh_CN@collation=pinyin" },
|
||||
{ "zh_CN_CA@collation=pinyin", "zh_CN_CA@collation=pinyin", "zh_CN_CA@collation=pinyin" },
|
||||
{ "zh_CN@collation=pinyin", "zh_CN@collation=pinyin", "zh_Hans_CN@collation=pinyin" },
|
||||
{ "zh_CN_CA@collation=pinyin", "zh_CN_CA@collation=pinyin", "zh_Hans_CN_CA@collation=pinyin" },
|
||||
{ "en_US_POSIX", "en_US_POSIX", "en_US_POSIX" },
|
||||
{ "hy_AM_REVISED", "hy_AM_REVISED", "hy_AM_REVISED" },
|
||||
{ "no_NO_NY", "no_NO_NY", "no_NO_NY" /* not: "nn_NO" [alan ICU3.0] */ },
|
||||
{ "no@ny", null, "no__NY" /* not: "nn" [alan ICU3.0] */ }, /* POSIX ID */
|
||||
{ "no-no.utf32@B", null, "no_NO_B" /* not: "nb_NO_B" [alan ICU3.0] */ }, /* POSIX ID */
|
||||
{ "no_NO_NY", "no_NO_NY", "nb_NO_NY" /* not: "nn_NO" [alan ICU3.0] */ },
|
||||
{ "no@ny", null, "nb__NY" /* not: "nn" [alan ICU3.0] */ }, /* POSIX ID */
|
||||
{ "no-no.utf32@B", null, "nb_NO_B" /* not: "nb_NO_B" [alan ICU3.0] */ }, /* POSIX ID */
|
||||
{ "en-BOONT", "en__BOONT", "en__BOONT" }, /* registered name */
|
||||
{ "de-1901", "de__1901", "de__1901" }, /* registered name */
|
||||
{ "de-1906", "de__1906", "de__1906" }, /* registered name */
|
||||
@ -910,7 +910,7 @@ public class ULocaleTest extends TestFmwk {
|
||||
{ "x-piglatin_ML.MBE", null, "x-piglatin_ML" },
|
||||
{ "i-cherokee_US.utf7", null, "i-cherokee_US" },
|
||||
{ "x-filfli_MT_FILFLA.gb-18030", null, "x-filfli_MT_FILFLA" },
|
||||
{ "no-no-ny.utf8@B", null, "no_NO_NY_B" /* not: "nn_NO" [alan ICU3.0] */ }, /* @ ignored unless variant is empty */
|
||||
{ "no-no-ny.utf8@B", null, "nb_NO_NY_B" /* not: "nn_NO" [alan ICU3.0] */ }, /* @ ignored unless variant is empty */
|
||||
|
||||
/* fleshing out canonicalization */
|
||||
/* sort keywords, ';' is separator so not present at end in canonical form */
|
||||
@ -919,7 +919,7 @@ public class ULocaleTest extends TestFmwk {
|
||||
{ "en_Hant_IL_VALLEY_GIRL@calendar=Japanese;currency=EUR", "en_Hant_IL_VALLEY_GIRL@calendar=Japanese;currency=EUR", "en_Hant_IL_VALLEY_GIRL@calendar=Japanese;currency=EUR" },
|
||||
/* norwegian is just too weird, if we handle things in their full generality */
|
||||
/* this is a negative test to show that we DO NOT handle 'lang=no,var=NY' specially. */
|
||||
{ "no-Hant-GB_NY@currency=$$$", "no_Hant_GB_NY@currency=$$$", "no_Hant_GB_NY@currency=$$$" /* not: "nn_Hant_GB@currency=$$$" [alan ICU3.0] */ },
|
||||
{ "no-Hant-GB_NY@currency=$$$", "no_Hant_GB_NY@currency=$$$", "nb_Hant_GB_NY@currency=$$$" /* not: "nn_Hant_GB@currency=$$$" [alan ICU3.0] */ },
|
||||
|
||||
/* test cases reflecting internal resource bundle usage */
|
||||
/* root is just a language */
|
||||
@ -957,14 +957,14 @@ public class ULocaleTest extends TestFmwk {
|
||||
{ "hi__DIRECT", "hi__DIRECT", "hi__DIRECT" },
|
||||
{ "ja_JP_TRADITIONAL", "ja_JP_TRADITIONAL", "ja_JP_TRADITIONAL" },
|
||||
{ "th_TH_TRADITIONAL", "th_TH_TRADITIONAL", "th_TH_TRADITIONAL" },
|
||||
{ "zh_TW_STROKE", "zh_TW_STROKE", "zh_TW_STROKE" },
|
||||
{ "zh_TW_STROKE", "zh_TW_STROKE", "zh_Hant_TW_STROKE" },
|
||||
{ "zh__PINYIN", "zh__PINYIN", "zh__PINYIN" },
|
||||
{ "qz-qz@Euro", null, "qz_QZ_EURO" }, /* qz-qz uses private use iso codes */
|
||||
{ "sr-SP-Cyrl", "sr_SP_CYRL", "sr_SP_CYRL" }, /* .NET name */
|
||||
{ "sr-SP-Latn", "sr_SP_LATN", "sr_SP_LATN" }, /* .NET name */
|
||||
{ "sr_YU_CYRILLIC", "sr_YU_CYRILLIC", "sr_YU_CYRILLIC" }, /* Linux name */
|
||||
{ "uz-UZ-Cyrl", "uz_UZ_CYRL", "uz_UZ_CYRL" }, /* .NET name */
|
||||
{ "uz-UZ-Latn", "uz_UZ_LATN", "uz_UZ_LATN" }, /* .NET name */
|
||||
{ "sr_YU_CYRILLIC", "sr_YU_CYRILLIC", "sr_RS_CYRILLIC" }, /* Linux name */
|
||||
{ "uz-UZ-Cyrl", "uz_UZ_CYRL", "uz_Latn_UZ_CYRL" }, /* .NET name */
|
||||
{ "uz-UZ-Latn", "uz_UZ_LATN", "uz_Latn_UZ_LATN" }, /* .NET name */
|
||||
{ "zh-CHS", "zh_CHS", "zh_CHS" }, /* .NET name */
|
||||
{ "zh-CHT", "zh_CHT", "zh_CHT" }, /* .NET name This may change back to zh_Hant */
|
||||
/* PRE_EURO and EURO conversions don't affect other keywords */
|
||||
@ -1590,7 +1590,7 @@ public class ULocaleTest extends TestFmwk {
|
||||
/*3*/ { null, "true" },
|
||||
/*4*/ { "es", "false" },
|
||||
/*5*/ { "de", "false" },
|
||||
/*6*/ { "zh_TW", "false" },
|
||||
/*6*/ { "zh_Hant_TW", "false" },
|
||||
/*7*/ { "zh", "true" },
|
||||
};
|
||||
|
||||
@ -5154,4 +5154,84 @@ public class ULocaleTest extends TestFmwk {
|
||||
Assert.assertEquals(testData[row][1], loc.toLanguageTag());
|
||||
}
|
||||
}
|
||||
|
||||
// Helper function
|
||||
private String canonicalTag(String languageTag) {
|
||||
return ULocale.createCanonical(ULocale.forLanguageTag(languageTag)).toLanguageTag();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestCanonical() {
|
||||
// Test replacement of languageAlias
|
||||
|
||||
// language _ variant -> language
|
||||
Assert.assertEquals("nb", canonicalTag("no-BOKMAL"));
|
||||
// also test with script, country and extensions
|
||||
Assert.assertEquals("nb-Cyrl-ID-u-ca-japanese", canonicalTag("no-Cyrl-ID-BOKMAL-u-ca-japanese"));
|
||||
// also test with other variants, script, country and extensions
|
||||
Assert.assertEquals("nb-Cyrl-ID-1901-xsistemo-u-ca-japanese",
|
||||
canonicalTag("no-Cyrl-ID-1901-BOKMAL-xsistemo-u-ca-japanese"));
|
||||
Assert.assertEquals("nb-Cyrl-ID-1901-u-ca-japanese",
|
||||
canonicalTag("no-Cyrl-ID-1901-BOKMAL-u-ca-japanese"));
|
||||
Assert.assertEquals("nb-Cyrl-ID-xsistemo-u-ca-japanese",
|
||||
canonicalTag("no-Cyrl-ID-BOKMAL-xsistemo-u-ca-japanese"));
|
||||
|
||||
Assert.assertEquals("nn", canonicalTag("no-NYNORSK"));
|
||||
// also test with script, country and extensions
|
||||
Assert.assertEquals("nn-Cyrl-ID-u-ca-japanese", canonicalTag("no-Cyrl-ID-NYNORSK-u-ca-japanese"));
|
||||
|
||||
Assert.assertEquals("ssy", canonicalTag("aa-SAAHO"));
|
||||
// also test with script, country and extensions
|
||||
Assert.assertEquals("ssy-Devn-IN-u-ca-japanese", canonicalTag("aa-Devn-IN-SAAHO-u-ca-japanese"));
|
||||
|
||||
// language -> language
|
||||
Assert.assertEquals("aas", canonicalTag("aam"));
|
||||
// also test with script, country, variants and extensions
|
||||
Assert.assertEquals("aas-Cyrl-ID-3456-u-ca-japanese", canonicalTag("aam-Cyrl-ID-3456-u-ca-japanese"));
|
||||
|
||||
// language -> language _ Script
|
||||
Assert.assertEquals("sr-Latn", canonicalTag("sh"));
|
||||
// also test with script
|
||||
Assert.assertEquals("sr-Cyrl", canonicalTag("sh-Cyrl"));
|
||||
// also test with country, variants and extensions
|
||||
Assert.assertEquals("sr-Latn-ID-3456-u-ca-roc", canonicalTag("sh-ID-3456-u-ca-roc"));
|
||||
|
||||
// language -> language _ country
|
||||
Assert.assertEquals("fa-AF", canonicalTag("prs"));
|
||||
// also test with country
|
||||
Assert.assertEquals("fa-RU", canonicalTag("prs-RU"));
|
||||
// also test with script, variants and extensions
|
||||
Assert.assertEquals("fa-Cyrl-AF-1009-u-ca-roc", canonicalTag("prs-Cyrl-1009-u-ca-roc"));
|
||||
|
||||
// language _ country -> language _ script _ country
|
||||
Assert.assertEquals("pa-Guru-IN", canonicalTag("pa-IN"));
|
||||
// also test with script
|
||||
Assert.assertEquals("pa-Latn-IN", canonicalTag("pa-Latn-IN"));
|
||||
// also test with variants and extensions
|
||||
Assert.assertEquals("pa-Guru-IN-5678-u-ca-hindi", canonicalTag("pa-IN-5678-u-ca-hindi"));
|
||||
|
||||
// language _ script _ country -> language _ country
|
||||
Assert.assertEquals("ky-KG", canonicalTag("ky-Cyrl-KG"));
|
||||
// also test with variants and extensions
|
||||
Assert.assertEquals("ky-KG-3456-u-ca-roc", canonicalTag("ky-Cyrl-KG-3456-u-ca-roc"));
|
||||
|
||||
// Test replacement of territoryAlias
|
||||
// 554 has one replacement
|
||||
Assert.assertEquals("en-NZ", canonicalTag("en-554"));
|
||||
Assert.assertEquals("en-NZ-u-nu-arab", canonicalTag("en-554-u-nu-arab"));
|
||||
|
||||
// 172 has multiple replacements
|
||||
// also test with variants
|
||||
Assert.assertEquals("ru-RU-1234", canonicalTag("ru-172-1234"));
|
||||
// also test with variants
|
||||
Assert.assertEquals("ru-RU-1234-u-nu-latn", canonicalTag("ru-172-1234-u-nu-latn"));
|
||||
Assert.assertEquals("uz-UZ", canonicalTag("uz-172"));
|
||||
// also test with scripts
|
||||
Assert.assertEquals("uz-Cyrl-UZ", canonicalTag("uz-Cyrl-172"));
|
||||
Assert.assertEquals("uz-Bopo-UZ", canonicalTag("uz-Bopo-172"));
|
||||
// also test with variants and scripts
|
||||
Assert.assertEquals("uz-Cyrl-UZ-5678-u-nu-latn", canonicalTag("uz-Cyrl-172-5678-u-nu-latn"));
|
||||
// a language not used in this region
|
||||
Assert.assertEquals("fr-RU", canonicalTag("fr-172"));
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user