ICU-20478 Sort variant in (for|to)LanguageTag of icu::Locale and ULocale

See #836
This commit is contained in:
Frank Tang 2019-10-23 17:12:56 +00:00 committed by Frank Yung-Fong Tang
parent 176674f9f1
commit 84f6735fde
6 changed files with 66 additions and 7 deletions

View File

@ -1110,6 +1110,19 @@ _appendRegionToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool stri
}
}
static void _sortVariants(VariantListEntry* first) {
for (VariantListEntry* var1 = first; var1 != NULL; var1 = var1->next) {
for (VariantListEntry* var2 = var1->next; var2 != NULL; var2 = var2->next) {
// Swap var1->variant and var2->variant.
if (uprv_compareInvCharsAsAscii(var1->variant, var2->variant) > 0) {
const char* temp = var1->variant;
var1->variant = var2->variant;
var2->variant = temp;
}
}
}
}
static void
_appendVariantsToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UBool *hadPosix, UErrorCode* status) {
char buf[ULOC_FULLNAME_CAPACITY];
@ -1199,6 +1212,9 @@ _appendVariantsToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool st
if (varFirst != NULL) {
int32_t varLen;
/* per UTS35, we should sort the variants */
_sortVariants(varFirst);
/* write out validated/normalized variants to the target */
var = varFirst;
while (var != NULL) {
@ -2822,6 +2838,7 @@ ulocimp_forLanguageTag(const char* langtag,
}
/* variants */
_sortVariants(lt.getAlias()->variants);
n = ultag_getVariantsSize(lt.getAlias());
if (n > 0) {
if (noRegion) {

View File

@ -6029,7 +6029,10 @@ const char* const locale_to_langtag[][3] = {
{"aa_BB_CYRL", "aa-BB-x-lvariant-cyrl", NULL},
{"en_US_1234", "en-US-1234", "en-US-1234"},
{"en_US_VARIANTA_VARIANTB", "en-US-varianta-variantb", "en-US-varianta-variantb"},
{"ja__9876_5432", "ja-9876-5432", "ja-9876-5432"},
{"en_US_VARIANTB_VARIANTA", "en-US-varianta-variantb", "en-US-varianta-variantb"}, /* ICU-20478 */
{"ja__9876_5432", "ja-5432-9876", "ja-5432-9876"}, /* ICU-20478 */
{"sl__ROZAJ_BISKE_1994", "sl-1994-biske-rozaj", "sl-1994-biske-rozaj"}, /* ICU-20478 */
{"en__SCOUSE_FONIPA", "en-fonipa-scouse", "en-fonipa-scouse"}, /* ICU-20478 */
{"zh_Hant__VAR", "zh-Hant-x-lvariant-var", NULL},
{"es__BADVARIANT_GOODVAR", "es-goodvar", NULL},
{"en@calendar=gregorian", "en-u-ca-gregory", "en-u-ca-gregory"},
@ -6187,7 +6190,16 @@ static const struct {
{"bogus", "bogus", FULL_LENGTH},
{"boguslang", "", 0},
{"EN-lATN-us", "en_Latn_US", FULL_LENGTH},
{"und-variant-1234", "__VARIANT_1234", FULL_LENGTH},
{"und-variant-1234", "__1234_VARIANT", FULL_LENGTH}, /* ICU-20478 */
{"ja-9876-5432", "ja__5432_9876", FULL_LENGTH}, /* ICU-20478 */
{"en-US-varianta-variantb", "en_US_VARIANTA_VARIANTB", FULL_LENGTH}, /* ICU-20478 */
{"en-US-variantb-varianta", "en_US_VARIANTA_VARIANTB", FULL_LENGTH}, /* ICU-20478 */
{"sl-rozaj-1994-biske", "sl__1994_BISKE_ROZAJ", FULL_LENGTH}, /* ICU-20478 */
{"sl-biske-1994-rozaj", "sl__1994_BISKE_ROZAJ", FULL_LENGTH}, /* ICU-20478 */
{"sl-1994-rozaj-biske", "sl__1994_BISKE_ROZAJ", FULL_LENGTH}, /* ICU-20478 */
{"sl-rozaj-biske-1994", "sl__1994_BISKE_ROZAJ", FULL_LENGTH}, /* ICU-20478 */
{"en-fonipa-scouse", "en__FONIPA_SCOUSE", FULL_LENGTH}, /* ICU-20478 */
{"en-scouse-fonipa", "en__FONIPA_SCOUSE", FULL_LENGTH}, /* ICU-20478 */
{"und-varzero-var1-vartwo", "__VARZERO", 11},
{"en-u-ca-gregory", "en@calendar=gregorian", FULL_LENGTH},
{"en-U-cu-USD", "en@currency=usd", FULL_LENGTH},

View File

@ -3156,6 +3156,7 @@ void LocaleTest::TestForLanguageTag() {
static const char tag_ill[] = "!";
static const char tag_no_nul[] = { 'e', 'n', '-', 'G', 'B' };
static const char tag_ext[] = "en-GB-1-abc-efg-a-xyz";
static const char tag_var[] = "sl-rozaj-biske-1994";
static const Locale loc_en("en_US");
static const Locale loc_oed("en_GB_OXENDICT");
@ -3163,6 +3164,7 @@ void LocaleTest::TestForLanguageTag() {
static const Locale loc_null("");
static const Locale loc_gb("en_GB");
static const Locale loc_ext("en_GB@1=abc-efg;a=xyz");
static const Locale loc_var("sl__1994_BISKE_ROZAJ");
Locale result_en = Locale::forLanguageTag(tag_en, status);
status.errIfFailureAndReset("\"%s\"", tag_en);
@ -3176,6 +3178,10 @@ void LocaleTest::TestForLanguageTag() {
status.errIfFailureAndReset("\"%s\"", tag_af);
assertEquals(tag_af, loc_af.getName(), result_af.getName());
Locale result_var = Locale::forLanguageTag(tag_var, status);
status.errIfFailureAndReset("\"%s\"", tag_var);
assertEquals(tag_var, loc_var.getName(), result_var.getName());
Locale result_ill = Locale::forLanguageTag(tag_ill, status);
assertEquals(tag_ill, U_ILLEGAL_ARGUMENT_ERROR, status.reset());
assertTrue(result_ill.getName(), result_ill.isBogus());
@ -3210,12 +3216,14 @@ void LocaleTest::TestToLanguageTag() {
static const Locale loc_ext("en@0=abc;a=xyz");
static const Locale loc_empty("");
static const Locale loc_ill("!");
static const Locale loc_variant("sl__ROZAJ_BISKE_1994");
static const char tag_c[] = "en-US-u-va-posix";
static const char tag_en[] = "en-US";
static const char tag_af[] = "af-t-ar-i0-handwrit-u-ca-coptic-x-foo";
static const char tag_ext[] = "en-0-abc-a-xyz";
static const char tag_und[] = "und";
static const char tag_variant[] = "sl-1994-biske-rozaj";
std::string result;
StringByteSink<std::string> sink(&result);
@ -3247,6 +3255,10 @@ void LocaleTest::TestToLanguageTag() {
status.errIfFailureAndReset("\"%s\"", loc_ill.getName());
assertEquals(loc_ill.getName(), tag_und, result_ill.c_str());
std::string result_variant = loc_variant.toLanguageTag<std::string>(status);
status.errIfFailureAndReset("\"%s\"", loc_variant.getName());
assertEquals(loc_variant.getName(), tag_variant, result_variant.c_str());
Locale loc_bogus;
loc_bogus.setToBogus();
std::string result_bogus = loc_bogus.toLanguageTag<std::string>(status);

View File

@ -9,6 +9,7 @@
package com.ibm.icu.impl.locale;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
@ -326,7 +327,8 @@ public final class InternalLocaleBuilder {
_script = langtag.getScript();
_region = langtag.getRegion();
List<String> bcpVariants = langtag.getVariants();
ArrayList<String> bcpVariants = new ArrayList<String>(langtag.getVariants());
Collections.sort(bcpVariants);
if (bcpVariants.size() > 0) {
StringBuilder var = new StringBuilder(bcpVariants.get(0));
for (int i = 1; i < bcpVariants.size(); i++) {

View File

@ -3246,7 +3246,10 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
}
List<String>subtags = tag.getVariants();
for (String s : subtags) {
// ICU-20478: Sort variants per UTS35.
ArrayList<String> variants = new ArrayList<String>(subtags);
Collections.sort(variants);
for (String s : variants) {
buf.append(LanguageTag.SEP);
buf.append(LanguageTag.canonicalizeVariant(s));
}

View File

@ -4106,8 +4106,8 @@ public class ULocaleTest extends TestFmwk {
{"aa_BB_CYRL", "aa-BB-x-lvariant-cyrl"},
{"en_US_1234", "en-US-1234"},
{"en_US_VARIANTA_VARIANTB", "en-US-varianta-variantb"},
{"en_US_VARIANTB_VARIANTA", "en-US-variantb-varianta"},
{"ja__9876_5432", "ja-9876-5432"},
{"en_US_VARIANTB_VARIANTA", "en-US-varianta-variantb"}, /* ICU-20478 */
{"ja__9876_5432", "ja-5432-9876"}, /* ICU-20478 */
{"zh_Hant__VAR", "zh-Hant-x-lvariant-var"},
{"es__BADVARIANT_GOODVAR", "es"},
{"es__GOODVAR_BAD_BADVARIANT", "es-goodvar-x-lvariant-bad"},
@ -4131,6 +4131,9 @@ public class ULocaleTest extends TestFmwk {
{"en@a=bar;attribute=baz;calendar=islamic-civil;x=u-foo", "en-a-bar-u-baz-ca-islamic-civil-x-u-foo"},
/* ICU-20320*/
{"en@9=efg;a=baz", "en-9-efg-a-baz"},
/* ICU-20478 */
{"sl__ROZAJ_BISKE_1994", "sl-1994-biske-rozaj"},
{"en__SCOUSE_FONIPA", "en-fonipa-scouse"},
};
for (int i = 0; i < locale_to_langtag.length; i++) {
@ -4228,7 +4231,7 @@ public class ULocaleTest extends TestFmwk {
{"bogus", "bogus", NOERROR},
{"boguslang", "", Integer.valueOf(0)},
{"EN-lATN-us", "en_Latn_US", NOERROR},
{"und-variant-1234", "__VARIANT_1234", NOERROR},
{"und-variant-1234", "__1234_VARIANT", NOERROR}, /* ICU-20478 */
{"und-varzero-var1-vartwo", "__VARZERO", Integer.valueOf(12)},
{"en-u-ca-gregory", "en@calendar=gregorian", NOERROR},
{"en-U-cu-USD", "en@currency=usd", NOERROR},
@ -4274,6 +4277,16 @@ public class ULocaleTest extends TestFmwk {
/* #20410 */
{"art-lojban-x-0", "jbo@x=0", NOERROR},
{"zh-xiang-u-nu-thai-x-0", "hsn@numbers=thai;x=0", NOERROR},
/* ICU-20478 */
{"ja-9876-5432", "ja__5432_9876", NOERROR},
{"en-US-variantb-varianta", "en_US_VARIANTA_VARIANTB", NOERROR},
{"en-US-varianta-variantb", "en_US_VARIANTA_VARIANTB", NOERROR},
{"sl-rozaj-biske-1994", "sl__1994_BISKE_ROZAJ", NOERROR},
{"sl-biske-rozaj-1994", "sl__1994_BISKE_ROZAJ", NOERROR},
{"sl-biske-1994-rozaj", "sl__1994_BISKE_ROZAJ", NOERROR},
{"sl-1994-biske-rozaj", "sl__1994_BISKE_ROZAJ", NOERROR},
{"en-fonipa-scouse", "en__FONIPA_SCOUSE", NOERROR},
{"en-scouse-fonipa", "en__FONIPA_SCOUSE", NOERROR},
};
for (int i = 0; i < langtag_to_locale.length; i++) {