ICU-20478 Sort variant in (for|to)LanguageTag of icu::Locale and ULocale
See #836
This commit is contained in:
parent
176674f9f1
commit
84f6735fde
@ -1110,6 +1110,19 @@ _appendRegionToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool stri
|
||||
}
|
||||
}
|
||||
|
||||
static void _sortVariants(VariantListEntry* first) {
|
||||
for (VariantListEntry* var1 = first; var1 != NULL; var1 = var1->next) {
|
||||
for (VariantListEntry* var2 = var1->next; var2 != NULL; var2 = var2->next) {
|
||||
// Swap var1->variant and var2->variant.
|
||||
if (uprv_compareInvCharsAsAscii(var1->variant, var2->variant) > 0) {
|
||||
const char* temp = var1->variant;
|
||||
var1->variant = var2->variant;
|
||||
var2->variant = temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
_appendVariantsToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UBool *hadPosix, UErrorCode* status) {
|
||||
char buf[ULOC_FULLNAME_CAPACITY];
|
||||
@ -1199,6 +1212,9 @@ _appendVariantsToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool st
|
||||
if (varFirst != NULL) {
|
||||
int32_t varLen;
|
||||
|
||||
/* per UTS35, we should sort the variants */
|
||||
_sortVariants(varFirst);
|
||||
|
||||
/* write out validated/normalized variants to the target */
|
||||
var = varFirst;
|
||||
while (var != NULL) {
|
||||
@ -2822,6 +2838,7 @@ ulocimp_forLanguageTag(const char* langtag,
|
||||
}
|
||||
|
||||
/* variants */
|
||||
_sortVariants(lt.getAlias()->variants);
|
||||
n = ultag_getVariantsSize(lt.getAlias());
|
||||
if (n > 0) {
|
||||
if (noRegion) {
|
||||
|
@ -6029,7 +6029,10 @@ const char* const locale_to_langtag[][3] = {
|
||||
{"aa_BB_CYRL", "aa-BB-x-lvariant-cyrl", NULL},
|
||||
{"en_US_1234", "en-US-1234", "en-US-1234"},
|
||||
{"en_US_VARIANTA_VARIANTB", "en-US-varianta-variantb", "en-US-varianta-variantb"},
|
||||
{"ja__9876_5432", "ja-9876-5432", "ja-9876-5432"},
|
||||
{"en_US_VARIANTB_VARIANTA", "en-US-varianta-variantb", "en-US-varianta-variantb"}, /* ICU-20478 */
|
||||
{"ja__9876_5432", "ja-5432-9876", "ja-5432-9876"}, /* ICU-20478 */
|
||||
{"sl__ROZAJ_BISKE_1994", "sl-1994-biske-rozaj", "sl-1994-biske-rozaj"}, /* ICU-20478 */
|
||||
{"en__SCOUSE_FONIPA", "en-fonipa-scouse", "en-fonipa-scouse"}, /* ICU-20478 */
|
||||
{"zh_Hant__VAR", "zh-Hant-x-lvariant-var", NULL},
|
||||
{"es__BADVARIANT_GOODVAR", "es-goodvar", NULL},
|
||||
{"en@calendar=gregorian", "en-u-ca-gregory", "en-u-ca-gregory"},
|
||||
@ -6187,7 +6190,16 @@ static const struct {
|
||||
{"bogus", "bogus", FULL_LENGTH},
|
||||
{"boguslang", "", 0},
|
||||
{"EN-lATN-us", "en_Latn_US", FULL_LENGTH},
|
||||
{"und-variant-1234", "__VARIANT_1234", FULL_LENGTH},
|
||||
{"und-variant-1234", "__1234_VARIANT", FULL_LENGTH}, /* ICU-20478 */
|
||||
{"ja-9876-5432", "ja__5432_9876", FULL_LENGTH}, /* ICU-20478 */
|
||||
{"en-US-varianta-variantb", "en_US_VARIANTA_VARIANTB", FULL_LENGTH}, /* ICU-20478 */
|
||||
{"en-US-variantb-varianta", "en_US_VARIANTA_VARIANTB", FULL_LENGTH}, /* ICU-20478 */
|
||||
{"sl-rozaj-1994-biske", "sl__1994_BISKE_ROZAJ", FULL_LENGTH}, /* ICU-20478 */
|
||||
{"sl-biske-1994-rozaj", "sl__1994_BISKE_ROZAJ", FULL_LENGTH}, /* ICU-20478 */
|
||||
{"sl-1994-rozaj-biske", "sl__1994_BISKE_ROZAJ", FULL_LENGTH}, /* ICU-20478 */
|
||||
{"sl-rozaj-biske-1994", "sl__1994_BISKE_ROZAJ", FULL_LENGTH}, /* ICU-20478 */
|
||||
{"en-fonipa-scouse", "en__FONIPA_SCOUSE", FULL_LENGTH}, /* ICU-20478 */
|
||||
{"en-scouse-fonipa", "en__FONIPA_SCOUSE", FULL_LENGTH}, /* ICU-20478 */
|
||||
{"und-varzero-var1-vartwo", "__VARZERO", 11},
|
||||
{"en-u-ca-gregory", "en@calendar=gregorian", FULL_LENGTH},
|
||||
{"en-U-cu-USD", "en@currency=usd", FULL_LENGTH},
|
||||
|
@ -3156,6 +3156,7 @@ void LocaleTest::TestForLanguageTag() {
|
||||
static const char tag_ill[] = "!";
|
||||
static const char tag_no_nul[] = { 'e', 'n', '-', 'G', 'B' };
|
||||
static const char tag_ext[] = "en-GB-1-abc-efg-a-xyz";
|
||||
static const char tag_var[] = "sl-rozaj-biske-1994";
|
||||
|
||||
static const Locale loc_en("en_US");
|
||||
static const Locale loc_oed("en_GB_OXENDICT");
|
||||
@ -3163,6 +3164,7 @@ void LocaleTest::TestForLanguageTag() {
|
||||
static const Locale loc_null("");
|
||||
static const Locale loc_gb("en_GB");
|
||||
static const Locale loc_ext("en_GB@1=abc-efg;a=xyz");
|
||||
static const Locale loc_var("sl__1994_BISKE_ROZAJ");
|
||||
|
||||
Locale result_en = Locale::forLanguageTag(tag_en, status);
|
||||
status.errIfFailureAndReset("\"%s\"", tag_en);
|
||||
@ -3176,6 +3178,10 @@ void LocaleTest::TestForLanguageTag() {
|
||||
status.errIfFailureAndReset("\"%s\"", tag_af);
|
||||
assertEquals(tag_af, loc_af.getName(), result_af.getName());
|
||||
|
||||
Locale result_var = Locale::forLanguageTag(tag_var, status);
|
||||
status.errIfFailureAndReset("\"%s\"", tag_var);
|
||||
assertEquals(tag_var, loc_var.getName(), result_var.getName());
|
||||
|
||||
Locale result_ill = Locale::forLanguageTag(tag_ill, status);
|
||||
assertEquals(tag_ill, U_ILLEGAL_ARGUMENT_ERROR, status.reset());
|
||||
assertTrue(result_ill.getName(), result_ill.isBogus());
|
||||
@ -3210,12 +3216,14 @@ void LocaleTest::TestToLanguageTag() {
|
||||
static const Locale loc_ext("en@0=abc;a=xyz");
|
||||
static const Locale loc_empty("");
|
||||
static const Locale loc_ill("!");
|
||||
static const Locale loc_variant("sl__ROZAJ_BISKE_1994");
|
||||
|
||||
static const char tag_c[] = "en-US-u-va-posix";
|
||||
static const char tag_en[] = "en-US";
|
||||
static const char tag_af[] = "af-t-ar-i0-handwrit-u-ca-coptic-x-foo";
|
||||
static const char tag_ext[] = "en-0-abc-a-xyz";
|
||||
static const char tag_und[] = "und";
|
||||
static const char tag_variant[] = "sl-1994-biske-rozaj";
|
||||
|
||||
std::string result;
|
||||
StringByteSink<std::string> sink(&result);
|
||||
@ -3247,6 +3255,10 @@ void LocaleTest::TestToLanguageTag() {
|
||||
status.errIfFailureAndReset("\"%s\"", loc_ill.getName());
|
||||
assertEquals(loc_ill.getName(), tag_und, result_ill.c_str());
|
||||
|
||||
std::string result_variant = loc_variant.toLanguageTag<std::string>(status);
|
||||
status.errIfFailureAndReset("\"%s\"", loc_variant.getName());
|
||||
assertEquals(loc_variant.getName(), tag_variant, result_variant.c_str());
|
||||
|
||||
Locale loc_bogus;
|
||||
loc_bogus.setToBogus();
|
||||
std::string result_bogus = loc_bogus.toLanguageTag<std::string>(status);
|
||||
|
@ -9,6 +9,7 @@
|
||||
package com.ibm.icu.impl.locale;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
@ -326,7 +327,8 @@ public final class InternalLocaleBuilder {
|
||||
_script = langtag.getScript();
|
||||
_region = langtag.getRegion();
|
||||
|
||||
List<String> bcpVariants = langtag.getVariants();
|
||||
ArrayList<String> bcpVariants = new ArrayList<String>(langtag.getVariants());
|
||||
Collections.sort(bcpVariants);
|
||||
if (bcpVariants.size() > 0) {
|
||||
StringBuilder var = new StringBuilder(bcpVariants.get(0));
|
||||
for (int i = 1; i < bcpVariants.size(); i++) {
|
||||
|
@ -3246,7 +3246,10 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
|
||||
}
|
||||
|
||||
List<String>subtags = tag.getVariants();
|
||||
for (String s : subtags) {
|
||||
// ICU-20478: Sort variants per UTS35.
|
||||
ArrayList<String> variants = new ArrayList<String>(subtags);
|
||||
Collections.sort(variants);
|
||||
for (String s : variants) {
|
||||
buf.append(LanguageTag.SEP);
|
||||
buf.append(LanguageTag.canonicalizeVariant(s));
|
||||
}
|
||||
|
@ -4106,8 +4106,8 @@ public class ULocaleTest extends TestFmwk {
|
||||
{"aa_BB_CYRL", "aa-BB-x-lvariant-cyrl"},
|
||||
{"en_US_1234", "en-US-1234"},
|
||||
{"en_US_VARIANTA_VARIANTB", "en-US-varianta-variantb"},
|
||||
{"en_US_VARIANTB_VARIANTA", "en-US-variantb-varianta"},
|
||||
{"ja__9876_5432", "ja-9876-5432"},
|
||||
{"en_US_VARIANTB_VARIANTA", "en-US-varianta-variantb"}, /* ICU-20478 */
|
||||
{"ja__9876_5432", "ja-5432-9876"}, /* ICU-20478 */
|
||||
{"zh_Hant__VAR", "zh-Hant-x-lvariant-var"},
|
||||
{"es__BADVARIANT_GOODVAR", "es"},
|
||||
{"es__GOODVAR_BAD_BADVARIANT", "es-goodvar-x-lvariant-bad"},
|
||||
@ -4131,6 +4131,9 @@ public class ULocaleTest extends TestFmwk {
|
||||
{"en@a=bar;attribute=baz;calendar=islamic-civil;x=u-foo", "en-a-bar-u-baz-ca-islamic-civil-x-u-foo"},
|
||||
/* ICU-20320*/
|
||||
{"en@9=efg;a=baz", "en-9-efg-a-baz"},
|
||||
/* ICU-20478 */
|
||||
{"sl__ROZAJ_BISKE_1994", "sl-1994-biske-rozaj"},
|
||||
{"en__SCOUSE_FONIPA", "en-fonipa-scouse"},
|
||||
};
|
||||
|
||||
for (int i = 0; i < locale_to_langtag.length; i++) {
|
||||
@ -4228,7 +4231,7 @@ public class ULocaleTest extends TestFmwk {
|
||||
{"bogus", "bogus", NOERROR},
|
||||
{"boguslang", "", Integer.valueOf(0)},
|
||||
{"EN-lATN-us", "en_Latn_US", NOERROR},
|
||||
{"und-variant-1234", "__VARIANT_1234", NOERROR},
|
||||
{"und-variant-1234", "__1234_VARIANT", NOERROR}, /* ICU-20478 */
|
||||
{"und-varzero-var1-vartwo", "__VARZERO", Integer.valueOf(12)},
|
||||
{"en-u-ca-gregory", "en@calendar=gregorian", NOERROR},
|
||||
{"en-U-cu-USD", "en@currency=usd", NOERROR},
|
||||
@ -4274,6 +4277,16 @@ public class ULocaleTest extends TestFmwk {
|
||||
/* #20410 */
|
||||
{"art-lojban-x-0", "jbo@x=0", NOERROR},
|
||||
{"zh-xiang-u-nu-thai-x-0", "hsn@numbers=thai;x=0", NOERROR},
|
||||
/* ICU-20478 */
|
||||
{"ja-9876-5432", "ja__5432_9876", NOERROR},
|
||||
{"en-US-variantb-varianta", "en_US_VARIANTA_VARIANTB", NOERROR},
|
||||
{"en-US-varianta-variantb", "en_US_VARIANTA_VARIANTB", NOERROR},
|
||||
{"sl-rozaj-biske-1994", "sl__1994_BISKE_ROZAJ", NOERROR},
|
||||
{"sl-biske-rozaj-1994", "sl__1994_BISKE_ROZAJ", NOERROR},
|
||||
{"sl-biske-1994-rozaj", "sl__1994_BISKE_ROZAJ", NOERROR},
|
||||
{"sl-1994-biske-rozaj", "sl__1994_BISKE_ROZAJ", NOERROR},
|
||||
{"en-fonipa-scouse", "en__FONIPA_SCOUSE", NOERROR},
|
||||
{"en-scouse-fonipa", "en__FONIPA_SCOUSE", NOERROR},
|
||||
};
|
||||
|
||||
for (int i = 0; i < langtag_to_locale.length; i++) {
|
||||
|
Loading…
Reference in New Issue
Block a user