ICU-13786 Fix addLikelySubtags/minimizeSubtags

See #1140
This commit is contained in:
Frank Tang 2020-05-01 22:13:37 +00:00 committed by Frank Yung-Fong Tang
parent c5ebb80a73
commit ec7e29f2b6
4 changed files with 2239 additions and 28 deletions

View File

@ -826,7 +826,7 @@ error:
} \
} UPRV_BLOCK_MACRO_END
static void
static UBool
_uloc_addLikelySubtags(const char* localeID,
icu::ByteSink& sink,
UErrorCode* err) {
@ -897,15 +897,22 @@ _uloc_addLikelySubtags(const char* localeID,
sink.Append(localeID, localIDLength);
}
return;
return success;
error:
if (!U_FAILURE(*err)) {
*err = U_ILLEGAL_ARGUMENT_ERROR;
}
return FALSE;
}
// Add likely subtags to the sink
// return true if the value in the sink is produced by a match during the lookup
// return false if the value in the sink is the same as input because there are
// no match after the lookup.
static UBool _ulocimp_addLikelySubtags(const char*, icu::ByteSink&, UErrorCode*);
static void
_uloc_minimizeSubtags(const char* localeID,
icu::ByteSink& sink,
@ -921,6 +928,7 @@ _uloc_minimizeSubtags(const char* localeID,
const char* trailing = "";
int32_t trailingLength = 0;
int32_t trailingIndex = 0;
UBool successGetMax = FALSE;
if(U_FAILURE(*err)) {
goto error;
@ -961,7 +969,7 @@ _uloc_minimizeSubtags(const char* localeID,
{
icu::CharString base;
{
icu::CharStringByteSink sink(&base);
icu::CharStringByteSink baseSink(&base);
createTagString(
lang,
langLength,
@ -971,7 +979,7 @@ _uloc_minimizeSubtags(const char* localeID,
regionLength,
NULL,
0,
sink,
baseSink,
err);
}
@ -980,8 +988,8 @@ _uloc_minimizeSubtags(const char* localeID,
* from AddLikelySubtags.
**/
{
icu::CharStringByteSink sink(&maximizedTagBuffer);
ulocimp_addLikelySubtags(base.data(), sink, err);
icu::CharStringByteSink maxSink(&maximizedTagBuffer);
successGetMax = _ulocimp_addLikelySubtags(base.data(), maxSink, err);
}
}
@ -989,13 +997,40 @@ _uloc_minimizeSubtags(const char* localeID,
goto error;
}
if (!successGetMax) {
/**
* If we got here, return the locale ID parameter unchanged.
**/
const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
sink.Append(localeID, localeIDLength);
return;
}
// In the following, the lang, script, region are referring to those in
// the maximizedTagBuffer, not the one in the localeID.
langLength = sizeof(lang);
scriptLength = sizeof(script);
regionLength = sizeof(region);
parseTagString(
maximizedTagBuffer.data(),
lang,
&langLength,
script,
&scriptLength,
region,
&regionLength,
err);
if(U_FAILURE(*err)) {
goto error;
}
/**
* Start first with just the language.
**/
{
icu::CharString tagBuffer;
{
icu::CharStringByteSink sink(&tagBuffer);
icu::CharStringByteSink tagSink(&tagBuffer);
createLikelySubtagsString(
lang,
langLength,
@ -1005,14 +1040,15 @@ _uloc_minimizeSubtags(const char* localeID,
0,
NULL,
0,
sink,
tagSink,
err);
}
if(U_FAILURE(*err)) {
goto error;
}
else if (!tagBuffer.isEmpty() && uprv_strnicmp(
else if (!tagBuffer.isEmpty() &&
uprv_strnicmp(
maximizedTagBuffer.data(),
tagBuffer.data(),
tagBuffer.length()) == 0) {
@ -1039,7 +1075,7 @@ _uloc_minimizeSubtags(const char* localeID,
icu::CharString tagBuffer;
{
icu::CharStringByteSink sink(&tagBuffer);
icu::CharStringByteSink tagSink(&tagBuffer);
createLikelySubtagsString(
lang,
langLength,
@ -1049,14 +1085,15 @@ _uloc_minimizeSubtags(const char* localeID,
regionLength,
NULL,
0,
sink,
tagSink,
err);
}
if(U_FAILURE(*err)) {
goto error;
}
else if (uprv_strnicmp(
else if (!tagBuffer.isEmpty() &&
uprv_strnicmp(
maximizedTagBuffer.data(),
tagBuffer.data(),
tagBuffer.length()) == 0) {
@ -1081,10 +1118,10 @@ _uloc_minimizeSubtags(const char* localeID,
* since trying with all three subtags would only yield the
* maximal version that we already have.
**/
if (scriptLength > 0 && regionLength > 0) {
if (scriptLength > 0) {
icu::CharString tagBuffer;
{
icu::CharStringByteSink sink(&tagBuffer);
icu::CharStringByteSink tagSink(&tagBuffer);
createLikelySubtagsString(
lang,
langLength,
@ -1094,14 +1131,15 @@ _uloc_minimizeSubtags(const char* localeID,
0,
NULL,
0,
sink,
tagSink,
err);
}
if(U_FAILURE(*err)) {
goto error;
}
else if (uprv_strnicmp(
else if (!tagBuffer.isEmpty() &&
uprv_strnicmp(
maximizedTagBuffer.data(),
tagBuffer.data(),
tagBuffer.length()) == 0) {
@ -1123,10 +1161,19 @@ _uloc_minimizeSubtags(const char* localeID,
{
/**
* If we got here, return the locale ID parameter.
* If we got here, return the max + trail.
**/
const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
sink.Append(localeID, localeIDLength);
createTagString(
lang,
langLength,
script,
scriptLength,
region,
regionLength,
trailing,
trailingLength,
sink,
err);
return;
}
@ -1193,15 +1240,23 @@ uloc_addLikelySubtags(const char* localeID,
return reslen;
}
static UBool
_ulocimp_addLikelySubtags(const char* localeID,
icu::ByteSink& sink,
UErrorCode* status) {
char localeBuffer[ULOC_FULLNAME_CAPACITY];
if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) {
return _uloc_addLikelySubtags(localeBuffer, sink, status);
}
return FALSE;
}
U_CAPI void U_EXPORT2
ulocimp_addLikelySubtags(const char* localeID,
icu::ByteSink& sink,
UErrorCode* status) {
char localeBuffer[ULOC_FULLNAME_CAPACITY];
if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) {
_uloc_addLikelySubtags(localeBuffer, sink, status);
}
_ulocimp_addLikelySubtags(localeID, sink, status);
}
U_CAPI int32_t U_EXPORT2

View File

@ -3653,7 +3653,7 @@ const char* const basic_minimize_data[][2] = {
"de_Latn_DE_POSIX_1901",
"de__POSIX_1901"
}, {
"",
"zzz",
""
}, {
"en_Latn_US@calendar=gregorian",

File diff suppressed because it is too large Load Diff

View File

@ -3932,6 +3932,50 @@ public class ULocaleTest extends TestFmwk {
"art_Moon_AQ",
"",
""
}, {
"aae_Latn_IT",
"aae_Latn_IT",
"aae_Latn_IT"
}, {
"aae_Thai_CO",
"aae_Thai_CO",
"aae_Thai_CO"
}, {
"und_CW",
"pap_Latn_CW",
"pap_CW"
}, {
"zh_Hant",
"zh_Hant_TW",
"zh_TW"
}, {
"zh_Hani",
"zh_Hani_CN",
"zh_Hani"
}, {
"und",
"en_Latn_US",
"en"
}, {
"und_Thai",
"th_Thai_TH",
"th"
}, {
"und_419",
"es_Latn_419",
"es_419"
}, {
"und_150",
"ru_Cyrl_RU",
"ru"
}, {
"und_AT",
"de_Latn_AT",
"de_AT"
}, {
"und_US",
"en_Latn_US",
"en"
}
};