ICU-20915 LocaleMatcher no match: always getSupportedIndex()=-1; remove defaultLocaleIndex field; constructor check if locales are equivalent to default, not just equal; simplify locale sorting; minor builder & test deflaking
This commit is contained in:
parent
94c9ff2089
commit
3edff03393
@ -308,20 +308,22 @@ UBool compareLSRs(const UHashTok t1, const UHashTok t2) {
|
||||
return *lsr1 == *lsr2;
|
||||
}
|
||||
|
||||
bool putIfAbsent(UHashtable *lsrToIndex, const LSR &lsr, int32_t i, UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode)) { return false; }
|
||||
U_ASSERT(i > 0);
|
||||
int32_t index = uhash_geti(lsrToIndex, &lsr);
|
||||
if (index != 0) {
|
||||
return false;
|
||||
} else {
|
||||
uhash_puti(lsrToIndex, const_cast<LSR *>(&lsr), i, &errorCode);
|
||||
return U_SUCCESS(errorCode);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int32_t LocaleMatcher::putIfAbsent(const LSR &lsr, int32_t i, int32_t suppLength,
|
||||
UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode)) { return suppLength; }
|
||||
int32_t index = uhash_geti(supportedLsrToIndex, &lsr);
|
||||
if (index == 0) {
|
||||
uhash_puti(supportedLsrToIndex, const_cast<LSR *>(&lsr), i + 1, &errorCode);
|
||||
if (U_SUCCESS(errorCode)) {
|
||||
supportedLSRs[suppLength] = &lsr;
|
||||
supportedIndexes[suppLength++] = i;
|
||||
}
|
||||
}
|
||||
return suppLength;
|
||||
}
|
||||
|
||||
LocaleMatcher::LocaleMatcher(const Builder &builder, UErrorCode &errorCode) :
|
||||
likelySubtags(*XLikelySubtags::getSingleton(errorCode)),
|
||||
localeDistance(*LocaleDistance::getSingleton(errorCode)),
|
||||
@ -331,15 +333,27 @@ LocaleMatcher::LocaleMatcher(const Builder &builder, UErrorCode &errorCode) :
|
||||
supportedLocales(nullptr), lsrs(nullptr), supportedLocalesLength(0),
|
||||
supportedLsrToIndex(nullptr),
|
||||
supportedLSRs(nullptr), supportedIndexes(nullptr), supportedLSRsLength(0),
|
||||
ownedDefaultLocale(nullptr), defaultLocale(nullptr), defaultLocaleIndex(-1) {
|
||||
ownedDefaultLocale(nullptr), defaultLocale(nullptr) {
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
if (thresholdDistance < 0) {
|
||||
thresholdDistance = localeDistance.getDefaultScriptDistance();
|
||||
}
|
||||
const Locale *def = builder.defaultLocale_;
|
||||
LSR builderDefaultLSR;
|
||||
const LSR *defLSR = nullptr;
|
||||
if (def != nullptr) {
|
||||
ownedDefaultLocale = def->clone();
|
||||
if (ownedDefaultLocale == nullptr) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
def = ownedDefaultLocale;
|
||||
builderDefaultLSR = getMaximalLsrOrUnd(likelySubtags, *def, errorCode);
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
defLSR = &builderDefaultLSR;
|
||||
}
|
||||
supportedLocalesLength = builder.supportedLocales_ != nullptr ?
|
||||
builder.supportedLocales_->size() : 0;
|
||||
const Locale *def = builder.defaultLocale_;
|
||||
int32_t idef = -1;
|
||||
if (supportedLocalesLength > 0) {
|
||||
// Store the supported locales in input order,
|
||||
// so that when different types are used (e.g., language tag strings)
|
||||
@ -356,15 +370,6 @@ LocaleMatcher::LocaleMatcher(const Builder &builder, UErrorCode &errorCode) :
|
||||
}
|
||||
// If the constructor fails partway, we need null pointers for destructibility.
|
||||
uprv_memset(supportedLocales, 0, supportedLocalesLength * sizeof(const Locale *));
|
||||
// Also find the first supported locale whose LSR is
|
||||
// the same as that for the default locale.
|
||||
LSR builderDefaultLSR;
|
||||
const LSR *defLSR = nullptr;
|
||||
if (def != nullptr) {
|
||||
builderDefaultLSR = getMaximalLsrOrUnd(likelySubtags, *def, errorCode);
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
defLSR = &builderDefaultLSR;
|
||||
}
|
||||
for (int32_t i = 0; i < supportedLocalesLength; ++i) {
|
||||
const Locale &locale = *static_cast<Locale *>(builder.supportedLocales_->elementAt(i));
|
||||
supportedLocales[i] = locale.clone();
|
||||
@ -376,31 +381,14 @@ LocaleMatcher::LocaleMatcher(const Builder &builder, UErrorCode &errorCode) :
|
||||
LSR &lsr = lsrs[i] = getMaximalLsrOrUnd(likelySubtags, supportedLocale, errorCode);
|
||||
lsr.setHashCode();
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
if (idef < 0 && defLSR != nullptr && lsr == *defLSR) {
|
||||
idef = i;
|
||||
defLSR = &lsr; // owned pointer to put into supportedLsrToIndex
|
||||
if (*def == supportedLocale) {
|
||||
def = &supportedLocale; // owned pointer to keep
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// We need an unordered map from LSR to first supported locale with that LSR,
|
||||
// and an ordered list of (LSR, supported index).
|
||||
// We insert the supported locales in the following order:
|
||||
// and an ordered list of (LSR, supported index) for
|
||||
// the supported locales in the following order:
|
||||
// 1. Default locale, if it is supported.
|
||||
// 2. Priority locales (aka "paradigm locales") in builder order.
|
||||
// 3. Remaining locales in builder order.
|
||||
// In Java, we use a LinkedHashMap for both map & ordered lists.
|
||||
// In C++, we use separate structures.
|
||||
//
|
||||
// We allocate arrays of LSRs and indexes,
|
||||
// with as many slots as supported locales, for simplicity.
|
||||
// We write the default and paradigm LSRs starting from the front of the arrays,
|
||||
// and others starting from the back.
|
||||
// At the end we reverse the non-paradigm LSRs.
|
||||
// We end up wasting as many array slots as there are duplicate supported LSRs,
|
||||
// but the amount of wasted space is small as long as there are few duplicates.
|
||||
supportedLsrToIndex = uhash_openSize(hashLSR, compareLSRs, uhash_compareLong,
|
||||
supportedLocalesLength, &errorCode);
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
@ -412,79 +400,53 @@ LocaleMatcher::LocaleMatcher(const Builder &builder, UErrorCode &errorCode) :
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
int32_t paradigmIndex = 0;
|
||||
int32_t otherIndex = supportedLocalesLength;
|
||||
if (idef >= 0) {
|
||||
uhash_puti(supportedLsrToIndex, const_cast<LSR *>(defLSR), idef + 1, &errorCode);
|
||||
supportedLSRs[0] = defLSR;
|
||||
supportedIndexes[0] = idef;
|
||||
paradigmIndex = 1;
|
||||
int32_t suppLength = 0;
|
||||
// Determine insertion order.
|
||||
// Add locales immediately that are equivalent to the default.
|
||||
MaybeStackArray<int8_t, 100> order(supportedLocalesLength);
|
||||
if (order.getAlias() == nullptr) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
int32_t numParadigms = 0;
|
||||
for (int32_t i = 0; i < supportedLocalesLength; ++i) {
|
||||
if (i == idef) { continue; }
|
||||
const Locale &locale = *supportedLocales[i];
|
||||
const LSR &lsr = lsrs[i];
|
||||
if (defLSR == nullptr) {
|
||||
U_ASSERT(i == 0);
|
||||
def = &locale;
|
||||
defLSR = &lsr;
|
||||
idef = 0;
|
||||
uhash_puti(supportedLsrToIndex, const_cast<LSR *>(&lsr), 0 + 1, &errorCode);
|
||||
supportedLSRs[0] = &lsr;
|
||||
supportedIndexes[0] = 0;
|
||||
paradigmIndex = 1;
|
||||
} else if (idef >= 0 && lsr == *defLSR) {
|
||||
// lsr == *defLSR means that this supported locale is
|
||||
// a duplicate of the default locale.
|
||||
// Either an explicit default locale is supported, and we added it before the loop,
|
||||
// or there is no explicit default locale, and this is
|
||||
// a duplicate of the first supported locale.
|
||||
// In both cases, idef >= 0 now, so otherwise we can skip the comparison.
|
||||
// For a duplicate, putIfAbsent() is a no-op, so nothing to do.
|
||||
suppLength = putIfAbsent(lsr, 0, suppLength, errorCode);
|
||||
} else if (lsr.isEquivalentTo(*defLSR)) {
|
||||
suppLength = putIfAbsent(lsr, i, suppLength, errorCode);
|
||||
} else if (localeDistance.isParadigmLSR(lsr)) {
|
||||
order[i] = 2;
|
||||
++numParadigms;
|
||||
} else {
|
||||
if (putIfAbsent(supportedLsrToIndex, lsr, i + 1, errorCode)) {
|
||||
if (localeDistance.isParadigmLSR(lsr)) {
|
||||
supportedLSRs[paradigmIndex] = &lsr;
|
||||
supportedIndexes[paradigmIndex++] = i;
|
||||
} else {
|
||||
supportedLSRs[--otherIndex] = &lsr;
|
||||
supportedIndexes[otherIndex] = i;
|
||||
}
|
||||
}
|
||||
order[i] = 3;
|
||||
}
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
}
|
||||
// Reverse the non-paradigm LSRs to be in order, right after the paradigm LSRs.
|
||||
// First fill the unused slots between paradigm LSRs and other LSRs.
|
||||
// This gap is as large as the number of locales with duplicate LSRs.
|
||||
int32_t i = paradigmIndex;
|
||||
int32_t j = supportedLocalesLength - 1;
|
||||
while (i < otherIndex && otherIndex <= j) {
|
||||
supportedLSRs[i] = supportedLSRs[j];
|
||||
supportedIndexes[i++] = supportedIndexes[j--];
|
||||
// Add supported paradigm locales.
|
||||
int32_t paradigmLimit = suppLength + numParadigms;
|
||||
for (int32_t i = 0; i < supportedLocalesLength && suppLength < paradigmLimit; ++i) {
|
||||
if (order[i] == 2) {
|
||||
suppLength = putIfAbsent(lsrs[i], i, suppLength, errorCode);
|
||||
}
|
||||
}
|
||||
// Swap remaining non-paradigm LSRs in place.
|
||||
while (i < j) {
|
||||
const LSR *tempLSR = supportedLSRs[i];
|
||||
supportedLSRs[i] = supportedLSRs[j];
|
||||
supportedLSRs[j] = tempLSR;
|
||||
int32_t tempIndex = supportedIndexes[i];
|
||||
supportedIndexes[i++] = supportedIndexes[j];
|
||||
supportedIndexes[j--] = tempIndex;
|
||||
// Add remaining supported locales.
|
||||
for (int32_t i = 0; i < supportedLocalesLength; ++i) {
|
||||
if (order[i] == 3) {
|
||||
suppLength = putIfAbsent(lsrs[i], i, suppLength, errorCode);
|
||||
}
|
||||
}
|
||||
supportedLSRsLength = supportedLocalesLength - (otherIndex - paradigmIndex);
|
||||
supportedLSRsLength = suppLength;
|
||||
// If supportedLSRsLength < supportedLocalesLength then
|
||||
// we waste as many array slots as there are duplicate supported LSRs,
|
||||
// but the amount of wasted space is small as long as there are few duplicates.
|
||||
}
|
||||
|
||||
if (def != nullptr && (idef < 0 || def != supportedLocales[idef])) {
|
||||
ownedDefaultLocale = def->clone();
|
||||
if (ownedDefaultLocale == nullptr) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
def = ownedDefaultLocale;
|
||||
}
|
||||
defaultLocale = def;
|
||||
defaultLocaleIndex = idef;
|
||||
|
||||
if (builder.demotion_ == ULOCMATCH_DEMOTION_REGION) {
|
||||
demotionPerDesiredLocale = localeDistance.getDefaultDemotionPerDesiredLocale();
|
||||
@ -503,8 +465,7 @@ LocaleMatcher::LocaleMatcher(LocaleMatcher &&src) U_NOEXCEPT :
|
||||
supportedLSRs(src.supportedLSRs),
|
||||
supportedIndexes(src.supportedIndexes),
|
||||
supportedLSRsLength(src.supportedLSRsLength),
|
||||
ownedDefaultLocale(src.ownedDefaultLocale), defaultLocale(src.defaultLocale),
|
||||
defaultLocaleIndex(src.defaultLocaleIndex) {
|
||||
ownedDefaultLocale(src.ownedDefaultLocale), defaultLocale(src.defaultLocale) {
|
||||
src.supportedLocales = nullptr;
|
||||
src.lsrs = nullptr;
|
||||
src.supportedLocalesLength = 0;
|
||||
@ -514,7 +475,6 @@ LocaleMatcher::LocaleMatcher(LocaleMatcher &&src) U_NOEXCEPT :
|
||||
src.supportedLSRsLength = 0;
|
||||
src.ownedDefaultLocale = nullptr;
|
||||
src.defaultLocale = nullptr;
|
||||
src.defaultLocaleIndex = -1;
|
||||
}
|
||||
|
||||
LocaleMatcher::~LocaleMatcher() {
|
||||
@ -544,7 +504,6 @@ LocaleMatcher &LocaleMatcher::operator=(LocaleMatcher &&src) U_NOEXCEPT {
|
||||
supportedLSRsLength = src.supportedLSRsLength;
|
||||
ownedDefaultLocale = src.ownedDefaultLocale;
|
||||
defaultLocale = src.defaultLocale;
|
||||
defaultLocaleIndex = src.defaultLocaleIndex;
|
||||
|
||||
src.supportedLocales = nullptr;
|
||||
src.lsrs = nullptr;
|
||||
@ -555,7 +514,6 @@ LocaleMatcher &LocaleMatcher::operator=(LocaleMatcher &&src) U_NOEXCEPT {
|
||||
src.supportedLSRsLength = 0;
|
||||
src.ownedDefaultLocale = nullptr;
|
||||
src.defaultLocale = nullptr;
|
||||
src.defaultLocaleIndex = -1;
|
||||
return *this;
|
||||
}
|
||||
|
||||
@ -642,13 +600,13 @@ const Locale *LocaleMatcher::getBestMatchForListString(
|
||||
LocaleMatcher::Result LocaleMatcher::getBestMatchResult(
|
||||
const Locale &desiredLocale, UErrorCode &errorCode) const {
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return Result(nullptr, defaultLocale, -1, defaultLocaleIndex, FALSE);
|
||||
return Result(nullptr, defaultLocale, -1, -1, FALSE);
|
||||
}
|
||||
int32_t suppIndex = getBestSuppIndex(
|
||||
getMaximalLsrOrUnd(likelySubtags, desiredLocale, errorCode),
|
||||
nullptr, errorCode);
|
||||
if (U_FAILURE(errorCode) || suppIndex < 0) {
|
||||
return Result(nullptr, defaultLocale, -1, defaultLocaleIndex, FALSE);
|
||||
return Result(nullptr, defaultLocale, -1, -1, FALSE);
|
||||
} else {
|
||||
return Result(&desiredLocale, supportedLocales[suppIndex], 0, suppIndex, FALSE);
|
||||
}
|
||||
@ -657,12 +615,12 @@ LocaleMatcher::Result LocaleMatcher::getBestMatchResult(
|
||||
LocaleMatcher::Result LocaleMatcher::getBestMatchResult(
|
||||
Locale::Iterator &desiredLocales, UErrorCode &errorCode) const {
|
||||
if (U_FAILURE(errorCode) || !desiredLocales.hasNext()) {
|
||||
return Result(nullptr, defaultLocale, -1, defaultLocaleIndex, FALSE);
|
||||
return Result(nullptr, defaultLocale, -1, -1, FALSE);
|
||||
}
|
||||
LocaleLsrIterator lsrIter(likelySubtags, desiredLocales, ULOCMATCH_TEMPORARY_LOCALES);
|
||||
int32_t suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode);
|
||||
if (U_FAILURE(errorCode) || suppIndex < 0) {
|
||||
return Result(nullptr, defaultLocale, -1, defaultLocaleIndex, FALSE);
|
||||
return Result(nullptr, defaultLocale, -1, -1, FALSE);
|
||||
} else {
|
||||
return Result(lsrIter.orphanRemembered(), supportedLocales[suppIndex],
|
||||
lsrIter.getBestDesiredIndex(), suppIndex, TRUE);
|
||||
@ -696,8 +654,7 @@ int32_t LocaleMatcher::getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remai
|
||||
remainingIter->rememberCurrent(desiredIndex, errorCode);
|
||||
if (U_FAILURE(errorCode)) { return -1; }
|
||||
}
|
||||
bestSupportedLsrIndex = bestIndexAndDistance >= 0 ?
|
||||
LocaleDistance::getIndex(bestIndexAndDistance) : -1;
|
||||
bestSupportedLsrIndex = LocaleDistance::getIndex(bestIndexAndDistance);
|
||||
}
|
||||
if ((bestShiftedDistance -= LocaleDistance::shiftDistance(demotionPerDesiredLocale)) <= 0) {
|
||||
break;
|
||||
|
@ -574,6 +574,8 @@ private:
|
||||
LocaleMatcher(const LocaleMatcher &other) = delete;
|
||||
LocaleMatcher &operator=(const LocaleMatcher &other) = delete;
|
||||
|
||||
int32_t putIfAbsent(const LSR &lsr, int32_t i, int32_t suppLength, UErrorCode &errorCode);
|
||||
|
||||
int32_t getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remainingIter, UErrorCode &errorCode) const;
|
||||
|
||||
const XLikelySubtags &likelySubtags;
|
||||
@ -595,7 +597,6 @@ private:
|
||||
int32_t supportedLSRsLength;
|
||||
Locale *ownedDefaultLocale;
|
||||
const Locale *defaultLocale;
|
||||
int32_t defaultLocaleIndex;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
@ -272,7 +272,7 @@ void LocaleMatcherTest::testSupportedDefault() {
|
||||
assertEquals("getBestMatchResult(ja_JP).supp",
|
||||
"en_GB", locString(result.getSupportedLocale()));
|
||||
assertEquals("getBestMatchResult(ja_JP).suppIndex",
|
||||
1, result.getSupportedIndex());
|
||||
-1, result.getSupportedIndex());
|
||||
}
|
||||
|
||||
void LocaleMatcherTest::testUnsupportedDefault() {
|
||||
|
@ -5,7 +5,7 @@ package com.ibm.icu.impl.locale;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.Map;
|
||||
import java.util.MissingResourceException;
|
||||
import java.util.Set;
|
||||
@ -152,7 +152,8 @@ public class LocaleDistance {
|
||||
Set<LSR> paradigmLSRs;
|
||||
if (matchTable.findValue("paradigms", value)) {
|
||||
String[] paradigms = value.getStringArray();
|
||||
paradigmLSRs = new HashSet<>(paradigms.length / 3);
|
||||
// LinkedHashSet for stable order; otherwise a unit test is flaky.
|
||||
paradigmLSRs = new LinkedHashSet<>(paradigms.length / 3);
|
||||
for (int i = 0; i < paradigms.length; i += 3) {
|
||||
paradigmLSRs.add(new LSR(paradigms[i], paradigms[i + 1], paradigms[i + 2],
|
||||
LSR.DONT_CARE_FLAGS));
|
||||
@ -209,7 +210,7 @@ public class LocaleDistance {
|
||||
// As of CLDR 36, we have <languageMatch desired="en_*_*" supported="en_*_*" distance="5"/>.
|
||||
LSR en = new LSR("en", "Latn", "US", LSR.EXPLICIT_LSR);
|
||||
LSR enGB = new LSR("en", "Latn", "GB", LSR.EXPLICIT_LSR);
|
||||
int indexAndDistance = getBestIndexAndDistance(en, new LSR[] { enGB },
|
||||
int indexAndDistance = getBestIndexAndDistance(en, new LSR[] { enGB }, 1,
|
||||
shiftDistance(50), FavorSubtag.LANGUAGE);
|
||||
defaultDemotionPerDesiredLocale = getDistanceFloor(indexAndDistance);
|
||||
|
||||
@ -227,7 +228,7 @@ public class LocaleDistance {
|
||||
int threshold, FavorSubtag favorSubtag) {
|
||||
LSR supportedLSR = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(supported);
|
||||
LSR desiredLSR = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(desired);
|
||||
int indexAndDistance = getBestIndexAndDistance(desiredLSR, new LSR[] { supportedLSR },
|
||||
int indexAndDistance = getBestIndexAndDistance(desiredLSR, new LSR[] { supportedLSR }, 1,
|
||||
shiftDistance(threshold), favorSubtag);
|
||||
return getDistanceFloor(indexAndDistance);
|
||||
}
|
||||
@ -240,7 +241,7 @@ public class LocaleDistance {
|
||||
* (negative if none has a distance below the threshold),
|
||||
* and its distance (0..ABOVE_THRESHOLD) in the low bits.
|
||||
*/
|
||||
public int getBestIndexAndDistance(LSR desired, LSR[] supportedLSRs,
|
||||
public int getBestIndexAndDistance(LSR desired, LSR[] supportedLSRs, int supportedLSRsLength,
|
||||
int shiftedThreshold, FavorSubtag favorSubtag) {
|
||||
// Round up the shifted threshold (if fraction bits are not 0)
|
||||
// for comparison with un-shifted distances until we need fraction bits.
|
||||
@ -252,12 +253,12 @@ public class LocaleDistance {
|
||||
// Its "distance" is either a match point value of 0, or a non-match negative value.
|
||||
// Note: The data builder verifies that there are no <*, supported> or <desired, *> rules.
|
||||
int desLangDistance = trieNext(iter, desired.language, false);
|
||||
long desLangState = desLangDistance >= 0 && supportedLSRs.length > 1 ? iter.getState64() : 0;
|
||||
long desLangState = desLangDistance >= 0 && supportedLSRsLength > 1 ? iter.getState64() : 0;
|
||||
// Index of the supported LSR with the lowest distance.
|
||||
int bestIndex = -1;
|
||||
// Cached lookup info from XLikelySubtags.compareLikely().
|
||||
int bestLikelyInfo = -1;
|
||||
for (int slIndex = 0; slIndex < supportedLSRs.length; ++slIndex) {
|
||||
for (int slIndex = 0; slIndex < supportedLSRsLength; ++slIndex) {
|
||||
LSR supported = supportedLSRs[slIndex];
|
||||
boolean star = false;
|
||||
int distance = desLangDistance;
|
||||
|
@ -10,8 +10,8 @@ package com.ibm.icu.util;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
@ -329,9 +329,9 @@ public final class LocaleMatcher {
|
||||
// The distance lookup loops over the supportedLSRs and returns the index of the best match.
|
||||
private final LSR[] supportedLSRs;
|
||||
private final int[] supportedIndexes;
|
||||
private final int supportedLSRsLength;
|
||||
private final ULocale defaultULocale;
|
||||
private final Locale defaultLocale;
|
||||
private final int defaultLocaleIndex;
|
||||
|
||||
/**
|
||||
* LocaleMatcher Builder.
|
||||
@ -578,103 +578,85 @@ public final class LocaleMatcher {
|
||||
private LocaleMatcher(Builder builder) {
|
||||
thresholdDistance = builder.thresholdDistance < 0 ?
|
||||
LocaleDistance.INSTANCE.getDefaultScriptDistance() : builder.thresholdDistance;
|
||||
int supportedLocalesLength = builder.supportedLocales != null ?
|
||||
builder.supportedLocales.size() : 0;
|
||||
ULocale udef = builder.defaultLocale;
|
||||
Locale def = null;
|
||||
int idef = -1;
|
||||
// Store the supported locales in input order,
|
||||
// so that when different types are used (e.g., java.util.Locale)
|
||||
// we can return those by parallel index.
|
||||
supportedULocales = new ULocale[supportedLocalesLength];
|
||||
supportedLocales = new Locale[supportedLocalesLength];
|
||||
// Supported LRSs in input order.
|
||||
LSR lsrs[] = new LSR[supportedLocalesLength];
|
||||
// Also find the first supported locale whose LSR is
|
||||
// the same as that for the default locale.
|
||||
LSR defLSR = null;
|
||||
if (udef != null) {
|
||||
def = udef.toLocale();
|
||||
defLSR = getMaximalLsrOrUnd(udef);
|
||||
}
|
||||
// Store the supported locales in input order,
|
||||
// so that when different types are used (e.g., java.util.Locale)
|
||||
// we can return those by parallel index.
|
||||
int supportedLocalesLength = builder.supportedLocales != null ?
|
||||
builder.supportedLocales.size() : 0;
|
||||
supportedULocales = new ULocale[supportedLocalesLength];
|
||||
supportedLocales = new Locale[supportedLocalesLength];
|
||||
// Supported LRSs in input order.
|
||||
LSR lsrs[] = new LSR[supportedLocalesLength];
|
||||
int i = 0;
|
||||
if (supportedLocalesLength > 0) {
|
||||
for (ULocale locale : builder.supportedLocales) {
|
||||
supportedULocales[i] = locale;
|
||||
supportedLocales[i] = locale.toLocale();
|
||||
LSR lsr = lsrs[i] = getMaximalLsrOrUnd(locale);
|
||||
if (idef < 0 && defLSR != null && lsr.equals(defLSR)) {
|
||||
idef = i;
|
||||
}
|
||||
lsrs[i] = getMaximalLsrOrUnd(locale);
|
||||
++i;
|
||||
}
|
||||
}
|
||||
|
||||
// We need an unordered map from LSR to first supported locale with that LSR,
|
||||
// and an ordered list of (LSR, supported index).
|
||||
// We use a LinkedHashMap for both,
|
||||
// and insert the supported locales in the following order:
|
||||
// and an ordered list of (LSR, supported index) for
|
||||
// the supported locales in the following order:
|
||||
// 1. Default locale, if it is supported.
|
||||
// 2. Priority locales (aka "paradigm locales") in builder order.
|
||||
// 3. Remaining locales in builder order.
|
||||
supportedLsrToIndex = new LinkedHashMap<>(supportedLocalesLength);
|
||||
// Note: We could work with a single LinkedHashMap by storing ~i (the binary-not index)
|
||||
// for the default and paradigm locales, counting the number of those locales,
|
||||
// and keeping two indexes to fill the LSR and index arrays with
|
||||
// priority vs. normal locales. In that loop we would need to entry.setValue(~i)
|
||||
// to restore non-negative indexes in the map.
|
||||
// Probably saves little but less readable.
|
||||
Map<LSR, Integer> otherLsrToIndex = null;
|
||||
if (idef >= 0) {
|
||||
supportedLsrToIndex.put(defLSR, idef);
|
||||
}
|
||||
supportedLsrToIndex = new HashMap<>(supportedLocalesLength);
|
||||
supportedLSRs = new LSR[supportedLocalesLength];
|
||||
supportedIndexes = new int[supportedLocalesLength];
|
||||
int suppLength = 0;
|
||||
// Determine insertion order.
|
||||
// Add locales immediately that are equivalent to the default.
|
||||
byte[] order = new byte[supportedLocalesLength];
|
||||
int numParadigms = 0;
|
||||
i = 0;
|
||||
for (ULocale locale : supportedULocales) {
|
||||
if (i == idef) {
|
||||
++i;
|
||||
continue;
|
||||
}
|
||||
LSR lsr = lsrs[i];
|
||||
if (defLSR == null) {
|
||||
assert i == 0;
|
||||
udef = locale;
|
||||
def = supportedLocales[0];
|
||||
defLSR = lsr;
|
||||
idef = 0;
|
||||
supportedLsrToIndex.put(lsr, 0);
|
||||
} else if (idef >= 0 && lsr.equals(defLSR)) {
|
||||
// lsr.equals(defLSR) means that this supported locale is
|
||||
// a duplicate of the default locale.
|
||||
// Either an explicit default locale is supported, and we added it before the loop,
|
||||
// or there is no explicit default locale, and this is
|
||||
// a duplicate of the first supported locale.
|
||||
// In both cases, idef >= 0 now, so otherwise we can skip the comparison.
|
||||
// For a duplicate, putIfAbsent() is a no-op, so nothing to do.
|
||||
suppLength = putIfAbsent(lsr, 0, suppLength);
|
||||
} else if (lsr.isEquivalentTo(defLSR)) {
|
||||
suppLength = putIfAbsent(lsr, i, suppLength);
|
||||
} else if (LocaleDistance.INSTANCE.isParadigmLSR(lsr)) {
|
||||
putIfAbsent(supportedLsrToIndex, lsr, i);
|
||||
order[i] = 2;
|
||||
++numParadigms;
|
||||
} else {
|
||||
if (otherLsrToIndex == null) {
|
||||
otherLsrToIndex = new LinkedHashMap<>(supportedLocalesLength);
|
||||
}
|
||||
putIfAbsent(otherLsrToIndex, lsr, i);
|
||||
order[i] = 3;
|
||||
}
|
||||
++i;
|
||||
}
|
||||
if (otherLsrToIndex != null) {
|
||||
supportedLsrToIndex.putAll(otherLsrToIndex);
|
||||
// Add supported paradigm locales.
|
||||
int paradigmLimit = suppLength + numParadigms;
|
||||
for (i = 0; i < supportedLocalesLength && suppLength < paradigmLimit; ++i) {
|
||||
if (order[i] == 2) {
|
||||
suppLength = putIfAbsent(lsrs[i], i, suppLength);
|
||||
}
|
||||
}
|
||||
int supportedLSRsLength = supportedLsrToIndex.size();
|
||||
supportedLSRs = new LSR[supportedLSRsLength];
|
||||
supportedIndexes = new int[supportedLSRsLength];
|
||||
i = 0;
|
||||
for (Map.Entry<LSR, Integer> entry : supportedLsrToIndex.entrySet()) {
|
||||
supportedLSRs[i] = entry.getKey(); // = lsrs[entry.getValue()]
|
||||
supportedIndexes[i++] = entry.getValue();
|
||||
// Add remaining supported locales.
|
||||
for (i = 0; i < supportedLocalesLength; ++i) {
|
||||
if (order[i] == 3) {
|
||||
suppLength = putIfAbsent(lsrs[i], i, suppLength);
|
||||
}
|
||||
}
|
||||
supportedLSRsLength = suppLength;
|
||||
// If supportedLSRsLength < supportedLocalesLength then
|
||||
// we waste as many array slots as there are duplicate supported LSRs,
|
||||
// but the amount of wasted space is small as long as there are few duplicates.
|
||||
|
||||
defaultULocale = udef;
|
||||
defaultLocale = def;
|
||||
defaultLocaleIndex = idef;
|
||||
demotionPerDesiredLocale =
|
||||
builder.demotion == Demotion.NONE ? 0 :
|
||||
LocaleDistance.INSTANCE.getDefaultDemotionPerDesiredLocale(); // null or REGION
|
||||
@ -684,11 +666,13 @@ public final class LocaleMatcher {
|
||||
}
|
||||
}
|
||||
|
||||
private static final void putIfAbsent(Map<LSR, Integer> lsrToIndex, LSR lsr, int i) {
|
||||
Integer index = lsrToIndex.get(lsr);
|
||||
if (index == null) {
|
||||
lsrToIndex.put(lsr, i);
|
||||
private final int putIfAbsent(LSR lsr, int i, int suppLength) {
|
||||
if (!supportedLsrToIndex.containsKey(lsr)) {
|
||||
supportedLsrToIndex.put(lsr, i);
|
||||
supportedLSRs[suppLength] = lsr;
|
||||
supportedIndexes[suppLength++] = i;
|
||||
}
|
||||
return suppLength;
|
||||
}
|
||||
|
||||
private static final LSR getMaximalLsrOrUnd(ULocale locale) {
|
||||
@ -838,7 +822,7 @@ public final class LocaleMatcher {
|
||||
}
|
||||
|
||||
private Result defaultResult() {
|
||||
return new Result(null, defaultULocale, null, defaultLocale, -1, defaultLocaleIndex);
|
||||
return new Result(null, defaultULocale, null, defaultLocale, -1, -1);
|
||||
}
|
||||
|
||||
private Result makeResult(ULocale desiredLocale, ULocaleLsrIterator lsrIter, int suppIndex) {
|
||||
@ -960,7 +944,8 @@ public final class LocaleMatcher {
|
||||
return suppIndex;
|
||||
}
|
||||
int bestIndexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
|
||||
desiredLSR, supportedLSRs, bestShiftedDistance, favorSubtag);
|
||||
desiredLSR, supportedLSRs, supportedLSRsLength,
|
||||
bestShiftedDistance, favorSubtag);
|
||||
if (bestIndexAndDistance >= 0) {
|
||||
bestShiftedDistance = LocaleDistance.getShiftedDistance(bestIndexAndDistance);
|
||||
if (remainingIter != null) { remainingIter.rememberCurrent(desiredIndex); }
|
||||
@ -1012,7 +997,7 @@ public final class LocaleMatcher {
|
||||
// Returns the inverse of the distance: That is, 1-distance(desired, supported).
|
||||
int indexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
|
||||
getMaximalLsrOrUnd(desired),
|
||||
new LSR[] { getMaximalLsrOrUnd(supported) },
|
||||
new LSR[] { getMaximalLsrOrUnd(supported) }, 1,
|
||||
LocaleDistance.shiftDistance(thresholdDistance), favorSubtag);
|
||||
double distance = LocaleDistance.getDistanceDouble(indexAndDistance);
|
||||
if (TRACE_MATCHER) {
|
||||
@ -1048,9 +1033,9 @@ public final class LocaleMatcher {
|
||||
public String toString() {
|
||||
StringBuilder s = new StringBuilder().append("{LocaleMatcher");
|
||||
// Supported languages in the order that we try to match them.
|
||||
if (supportedLSRs.length > 0) {
|
||||
if (supportedLSRsLength > 0) {
|
||||
s.append(" supportedLSRs={").append(supportedLSRs[0]);
|
||||
for (int i = 1; i < supportedLSRs.length; ++i) {
|
||||
for (int i = 1; i < supportedLSRsLength; ++i) {
|
||||
s.append(", ").append(supportedLSRs[i]);
|
||||
}
|
||||
s.append('}');
|
||||
|
@ -194,7 +194,7 @@ public class LocaleMatcherTest extends TestFmwk {
|
||||
assertEquals("getBestMatchResult(ja_JP).supp",
|
||||
"en_GB", locString(result.getSupportedULocale()));
|
||||
assertEquals("getBestMatchResult(ja_JP).suppIndex",
|
||||
1, result.getSupportedIndex());
|
||||
-1, result.getSupportedIndex());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -15,6 +15,7 @@ import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
@ -484,7 +485,8 @@ public final class LocaleDistanceBuilder {
|
||||
ICUResourceBundle supplementalData = getSupplementalDataBundle("supplementalData");
|
||||
String[] paradigms = supplementalData.getValueWithFallback(
|
||||
"languageMatchingInfo/written/paradigmLocales").getStringArray();
|
||||
Set<LSR> paradigmLSRs = new HashSet<>(); // could be TreeSet if LSR were Comparable
|
||||
// LinkedHashSet for stable order; otherwise a unit test is flaky.
|
||||
Set<LSR> paradigmLSRs = new LinkedHashSet<>(); // could be TreeSet if LSR were Comparable
|
||||
for (String paradigm : paradigms) {
|
||||
ULocale pl = new ULocale(paradigm);
|
||||
LSR max = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(pl);
|
||||
|
Loading…
Reference in New Issue
Block a user