ICU-20915 LocaleMatcher no match: always getSupportedIndex()=-1; remove defaultLocaleIndex field; constructor check if locales are equivalent to default, not just equal; simplify locale sorting; minor builder & test deflaking

This commit is contained in:
Markus Scherer 2020-03-05 15:03:42 -08:00
parent 94c9ff2089
commit 3edff03393
7 changed files with 137 additions and 191 deletions

View File

@ -308,20 +308,22 @@ UBool compareLSRs(const UHashTok t1, const UHashTok t2) {
return *lsr1 == *lsr2;
}
bool putIfAbsent(UHashtable *lsrToIndex, const LSR &lsr, int32_t i, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return false; }
U_ASSERT(i > 0);
int32_t index = uhash_geti(lsrToIndex, &lsr);
if (index != 0) {
return false;
} else {
uhash_puti(lsrToIndex, const_cast<LSR *>(&lsr), i, &errorCode);
return U_SUCCESS(errorCode);
}
}
} // namespace
int32_t LocaleMatcher::putIfAbsent(const LSR &lsr, int32_t i, int32_t suppLength,
UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return suppLength; }
int32_t index = uhash_geti(supportedLsrToIndex, &lsr);
if (index == 0) {
uhash_puti(supportedLsrToIndex, const_cast<LSR *>(&lsr), i + 1, &errorCode);
if (U_SUCCESS(errorCode)) {
supportedLSRs[suppLength] = &lsr;
supportedIndexes[suppLength++] = i;
}
}
return suppLength;
}
LocaleMatcher::LocaleMatcher(const Builder &builder, UErrorCode &errorCode) :
likelySubtags(*XLikelySubtags::getSingleton(errorCode)),
localeDistance(*LocaleDistance::getSingleton(errorCode)),
@ -331,15 +333,27 @@ LocaleMatcher::LocaleMatcher(const Builder &builder, UErrorCode &errorCode) :
supportedLocales(nullptr), lsrs(nullptr), supportedLocalesLength(0),
supportedLsrToIndex(nullptr),
supportedLSRs(nullptr), supportedIndexes(nullptr), supportedLSRsLength(0),
ownedDefaultLocale(nullptr), defaultLocale(nullptr), defaultLocaleIndex(-1) {
ownedDefaultLocale(nullptr), defaultLocale(nullptr) {
if (U_FAILURE(errorCode)) { return; }
if (thresholdDistance < 0) {
thresholdDistance = localeDistance.getDefaultScriptDistance();
}
const Locale *def = builder.defaultLocale_;
LSR builderDefaultLSR;
const LSR *defLSR = nullptr;
if (def != nullptr) {
ownedDefaultLocale = def->clone();
if (ownedDefaultLocale == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
def = ownedDefaultLocale;
builderDefaultLSR = getMaximalLsrOrUnd(likelySubtags, *def, errorCode);
if (U_FAILURE(errorCode)) { return; }
defLSR = &builderDefaultLSR;
}
supportedLocalesLength = builder.supportedLocales_ != nullptr ?
builder.supportedLocales_->size() : 0;
const Locale *def = builder.defaultLocale_;
int32_t idef = -1;
if (supportedLocalesLength > 0) {
// Store the supported locales in input order,
// so that when different types are used (e.g., language tag strings)
@ -356,15 +370,6 @@ LocaleMatcher::LocaleMatcher(const Builder &builder, UErrorCode &errorCode) :
}
// If the constructor fails partway, we need null pointers for destructibility.
uprv_memset(supportedLocales, 0, supportedLocalesLength * sizeof(const Locale *));
// Also find the first supported locale whose LSR is
// the same as that for the default locale.
LSR builderDefaultLSR;
const LSR *defLSR = nullptr;
if (def != nullptr) {
builderDefaultLSR = getMaximalLsrOrUnd(likelySubtags, *def, errorCode);
if (U_FAILURE(errorCode)) { return; }
defLSR = &builderDefaultLSR;
}
for (int32_t i = 0; i < supportedLocalesLength; ++i) {
const Locale &locale = *static_cast<Locale *>(builder.supportedLocales_->elementAt(i));
supportedLocales[i] = locale.clone();
@ -376,31 +381,14 @@ LocaleMatcher::LocaleMatcher(const Builder &builder, UErrorCode &errorCode) :
LSR &lsr = lsrs[i] = getMaximalLsrOrUnd(likelySubtags, supportedLocale, errorCode);
lsr.setHashCode();
if (U_FAILURE(errorCode)) { return; }
if (idef < 0 && defLSR != nullptr && lsr == *defLSR) {
idef = i;
defLSR = &lsr; // owned pointer to put into supportedLsrToIndex
if (*def == supportedLocale) {
def = &supportedLocale; // owned pointer to keep
}
}
}
// We need an unordered map from LSR to first supported locale with that LSR,
// and an ordered list of (LSR, supported index).
// We insert the supported locales in the following order:
// and an ordered list of (LSR, supported index) for
// the supported locales in the following order:
// 1. Default locale, if it is supported.
// 2. Priority locales (aka "paradigm locales") in builder order.
// 3. Remaining locales in builder order.
// In Java, we use a LinkedHashMap for both map & ordered lists.
// In C++, we use separate structures.
//
// We allocate arrays of LSRs and indexes,
// with as many slots as supported locales, for simplicity.
// We write the default and paradigm LSRs starting from the front of the arrays,
// and others starting from the back.
// At the end we reverse the non-paradigm LSRs.
// We end up wasting as many array slots as there are duplicate supported LSRs,
// but the amount of wasted space is small as long as there are few duplicates.
supportedLsrToIndex = uhash_openSize(hashLSR, compareLSRs, uhash_compareLong,
supportedLocalesLength, &errorCode);
if (U_FAILURE(errorCode)) { return; }
@ -412,79 +400,53 @@ LocaleMatcher::LocaleMatcher(const Builder &builder, UErrorCode &errorCode) :
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
int32_t paradigmIndex = 0;
int32_t otherIndex = supportedLocalesLength;
if (idef >= 0) {
uhash_puti(supportedLsrToIndex, const_cast<LSR *>(defLSR), idef + 1, &errorCode);
supportedLSRs[0] = defLSR;
supportedIndexes[0] = idef;
paradigmIndex = 1;
int32_t suppLength = 0;
// Determine insertion order.
// Add locales immediately that are equivalent to the default.
MaybeStackArray<int8_t, 100> order(supportedLocalesLength);
if (order.getAlias() == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
int32_t numParadigms = 0;
for (int32_t i = 0; i < supportedLocalesLength; ++i) {
if (i == idef) { continue; }
const Locale &locale = *supportedLocales[i];
const LSR &lsr = lsrs[i];
if (defLSR == nullptr) {
U_ASSERT(i == 0);
def = &locale;
defLSR = &lsr;
idef = 0;
uhash_puti(supportedLsrToIndex, const_cast<LSR *>(&lsr), 0 + 1, &errorCode);
supportedLSRs[0] = &lsr;
supportedIndexes[0] = 0;
paradigmIndex = 1;
} else if (idef >= 0 && lsr == *defLSR) {
// lsr == *defLSR means that this supported locale is
// a duplicate of the default locale.
// Either an explicit default locale is supported, and we added it before the loop,
// or there is no explicit default locale, and this is
// a duplicate of the first supported locale.
// In both cases, idef >= 0 now, so otherwise we can skip the comparison.
// For a duplicate, putIfAbsent() is a no-op, so nothing to do.
suppLength = putIfAbsent(lsr, 0, suppLength, errorCode);
} else if (lsr.isEquivalentTo(*defLSR)) {
suppLength = putIfAbsent(lsr, i, suppLength, errorCode);
} else if (localeDistance.isParadigmLSR(lsr)) {
order[i] = 2;
++numParadigms;
} else {
if (putIfAbsent(supportedLsrToIndex, lsr, i + 1, errorCode)) {
if (localeDistance.isParadigmLSR(lsr)) {
supportedLSRs[paradigmIndex] = &lsr;
supportedIndexes[paradigmIndex++] = i;
} else {
supportedLSRs[--otherIndex] = &lsr;
supportedIndexes[otherIndex] = i;
}
}
order[i] = 3;
}
if (U_FAILURE(errorCode)) { return; }
}
// Reverse the non-paradigm LSRs to be in order, right after the paradigm LSRs.
// First fill the unused slots between paradigm LSRs and other LSRs.
// This gap is as large as the number of locales with duplicate LSRs.
int32_t i = paradigmIndex;
int32_t j = supportedLocalesLength - 1;
while (i < otherIndex && otherIndex <= j) {
supportedLSRs[i] = supportedLSRs[j];
supportedIndexes[i++] = supportedIndexes[j--];
// Add supported paradigm locales.
int32_t paradigmLimit = suppLength + numParadigms;
for (int32_t i = 0; i < supportedLocalesLength && suppLength < paradigmLimit; ++i) {
if (order[i] == 2) {
suppLength = putIfAbsent(lsrs[i], i, suppLength, errorCode);
}
}
// Swap remaining non-paradigm LSRs in place.
while (i < j) {
const LSR *tempLSR = supportedLSRs[i];
supportedLSRs[i] = supportedLSRs[j];
supportedLSRs[j] = tempLSR;
int32_t tempIndex = supportedIndexes[i];
supportedIndexes[i++] = supportedIndexes[j];
supportedIndexes[j--] = tempIndex;
// Add remaining supported locales.
for (int32_t i = 0; i < supportedLocalesLength; ++i) {
if (order[i] == 3) {
suppLength = putIfAbsent(lsrs[i], i, suppLength, errorCode);
}
}
supportedLSRsLength = supportedLocalesLength - (otherIndex - paradigmIndex);
supportedLSRsLength = suppLength;
// If supportedLSRsLength < supportedLocalesLength then
// we waste as many array slots as there are duplicate supported LSRs,
// but the amount of wasted space is small as long as there are few duplicates.
}
if (def != nullptr && (idef < 0 || def != supportedLocales[idef])) {
ownedDefaultLocale = def->clone();
if (ownedDefaultLocale == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
def = ownedDefaultLocale;
}
defaultLocale = def;
defaultLocaleIndex = idef;
if (builder.demotion_ == ULOCMATCH_DEMOTION_REGION) {
demotionPerDesiredLocale = localeDistance.getDefaultDemotionPerDesiredLocale();
@ -503,8 +465,7 @@ LocaleMatcher::LocaleMatcher(LocaleMatcher &&src) U_NOEXCEPT :
supportedLSRs(src.supportedLSRs),
supportedIndexes(src.supportedIndexes),
supportedLSRsLength(src.supportedLSRsLength),
ownedDefaultLocale(src.ownedDefaultLocale), defaultLocale(src.defaultLocale),
defaultLocaleIndex(src.defaultLocaleIndex) {
ownedDefaultLocale(src.ownedDefaultLocale), defaultLocale(src.defaultLocale) {
src.supportedLocales = nullptr;
src.lsrs = nullptr;
src.supportedLocalesLength = 0;
@ -514,7 +475,6 @@ LocaleMatcher::LocaleMatcher(LocaleMatcher &&src) U_NOEXCEPT :
src.supportedLSRsLength = 0;
src.ownedDefaultLocale = nullptr;
src.defaultLocale = nullptr;
src.defaultLocaleIndex = -1;
}
LocaleMatcher::~LocaleMatcher() {
@ -544,7 +504,6 @@ LocaleMatcher &LocaleMatcher::operator=(LocaleMatcher &&src) U_NOEXCEPT {
supportedLSRsLength = src.supportedLSRsLength;
ownedDefaultLocale = src.ownedDefaultLocale;
defaultLocale = src.defaultLocale;
defaultLocaleIndex = src.defaultLocaleIndex;
src.supportedLocales = nullptr;
src.lsrs = nullptr;
@ -555,7 +514,6 @@ LocaleMatcher &LocaleMatcher::operator=(LocaleMatcher &&src) U_NOEXCEPT {
src.supportedLSRsLength = 0;
src.ownedDefaultLocale = nullptr;
src.defaultLocale = nullptr;
src.defaultLocaleIndex = -1;
return *this;
}
@ -642,13 +600,13 @@ const Locale *LocaleMatcher::getBestMatchForListString(
LocaleMatcher::Result LocaleMatcher::getBestMatchResult(
const Locale &desiredLocale, UErrorCode &errorCode) const {
if (U_FAILURE(errorCode)) {
return Result(nullptr, defaultLocale, -1, defaultLocaleIndex, FALSE);
return Result(nullptr, defaultLocale, -1, -1, FALSE);
}
int32_t suppIndex = getBestSuppIndex(
getMaximalLsrOrUnd(likelySubtags, desiredLocale, errorCode),
nullptr, errorCode);
if (U_FAILURE(errorCode) || suppIndex < 0) {
return Result(nullptr, defaultLocale, -1, defaultLocaleIndex, FALSE);
return Result(nullptr, defaultLocale, -1, -1, FALSE);
} else {
return Result(&desiredLocale, supportedLocales[suppIndex], 0, suppIndex, FALSE);
}
@ -657,12 +615,12 @@ LocaleMatcher::Result LocaleMatcher::getBestMatchResult(
LocaleMatcher::Result LocaleMatcher::getBestMatchResult(
Locale::Iterator &desiredLocales, UErrorCode &errorCode) const {
if (U_FAILURE(errorCode) || !desiredLocales.hasNext()) {
return Result(nullptr, defaultLocale, -1, defaultLocaleIndex, FALSE);
return Result(nullptr, defaultLocale, -1, -1, FALSE);
}
LocaleLsrIterator lsrIter(likelySubtags, desiredLocales, ULOCMATCH_TEMPORARY_LOCALES);
int32_t suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode);
if (U_FAILURE(errorCode) || suppIndex < 0) {
return Result(nullptr, defaultLocale, -1, defaultLocaleIndex, FALSE);
return Result(nullptr, defaultLocale, -1, -1, FALSE);
} else {
return Result(lsrIter.orphanRemembered(), supportedLocales[suppIndex],
lsrIter.getBestDesiredIndex(), suppIndex, TRUE);
@ -696,8 +654,7 @@ int32_t LocaleMatcher::getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remai
remainingIter->rememberCurrent(desiredIndex, errorCode);
if (U_FAILURE(errorCode)) { return -1; }
}
bestSupportedLsrIndex = bestIndexAndDistance >= 0 ?
LocaleDistance::getIndex(bestIndexAndDistance) : -1;
bestSupportedLsrIndex = LocaleDistance::getIndex(bestIndexAndDistance);
}
if ((bestShiftedDistance -= LocaleDistance::shiftDistance(demotionPerDesiredLocale)) <= 0) {
break;

View File

@ -574,6 +574,8 @@ private:
LocaleMatcher(const LocaleMatcher &other) = delete;
LocaleMatcher &operator=(const LocaleMatcher &other) = delete;
int32_t putIfAbsent(const LSR &lsr, int32_t i, int32_t suppLength, UErrorCode &errorCode);
int32_t getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remainingIter, UErrorCode &errorCode) const;
const XLikelySubtags &likelySubtags;
@ -595,7 +597,6 @@ private:
int32_t supportedLSRsLength;
Locale *ownedDefaultLocale;
const Locale *defaultLocale;
int32_t defaultLocaleIndex;
};
U_NAMESPACE_END

View File

@ -272,7 +272,7 @@ void LocaleMatcherTest::testSupportedDefault() {
assertEquals("getBestMatchResult(ja_JP).supp",
"en_GB", locString(result.getSupportedLocale()));
assertEquals("getBestMatchResult(ja_JP).suppIndex",
1, result.getSupportedIndex());
-1, result.getSupportedIndex());
}
void LocaleMatcherTest::testUnsupportedDefault() {

View File

@ -5,7 +5,7 @@ package com.ibm.icu.impl.locale;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.MissingResourceException;
import java.util.Set;
@ -152,7 +152,8 @@ public class LocaleDistance {
Set<LSR> paradigmLSRs;
if (matchTable.findValue("paradigms", value)) {
String[] paradigms = value.getStringArray();
paradigmLSRs = new HashSet<>(paradigms.length / 3);
// LinkedHashSet for stable order; otherwise a unit test is flaky.
paradigmLSRs = new LinkedHashSet<>(paradigms.length / 3);
for (int i = 0; i < paradigms.length; i += 3) {
paradigmLSRs.add(new LSR(paradigms[i], paradigms[i + 1], paradigms[i + 2],
LSR.DONT_CARE_FLAGS));
@ -209,7 +210,7 @@ public class LocaleDistance {
// As of CLDR 36, we have <languageMatch desired="en_*_*" supported="en_*_*" distance="5"/>.
LSR en = new LSR("en", "Latn", "US", LSR.EXPLICIT_LSR);
LSR enGB = new LSR("en", "Latn", "GB", LSR.EXPLICIT_LSR);
int indexAndDistance = getBestIndexAndDistance(en, new LSR[] { enGB },
int indexAndDistance = getBestIndexAndDistance(en, new LSR[] { enGB }, 1,
shiftDistance(50), FavorSubtag.LANGUAGE);
defaultDemotionPerDesiredLocale = getDistanceFloor(indexAndDistance);
@ -227,7 +228,7 @@ public class LocaleDistance {
int threshold, FavorSubtag favorSubtag) {
LSR supportedLSR = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(supported);
LSR desiredLSR = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(desired);
int indexAndDistance = getBestIndexAndDistance(desiredLSR, new LSR[] { supportedLSR },
int indexAndDistance = getBestIndexAndDistance(desiredLSR, new LSR[] { supportedLSR }, 1,
shiftDistance(threshold), favorSubtag);
return getDistanceFloor(indexAndDistance);
}
@ -240,7 +241,7 @@ public class LocaleDistance {
* (negative if none has a distance below the threshold),
* and its distance (0..ABOVE_THRESHOLD) in the low bits.
*/
public int getBestIndexAndDistance(LSR desired, LSR[] supportedLSRs,
public int getBestIndexAndDistance(LSR desired, LSR[] supportedLSRs, int supportedLSRsLength,
int shiftedThreshold, FavorSubtag favorSubtag) {
// Round up the shifted threshold (if fraction bits are not 0)
// for comparison with un-shifted distances until we need fraction bits.
@ -252,12 +253,12 @@ public class LocaleDistance {
// Its "distance" is either a match point value of 0, or a non-match negative value.
// Note: The data builder verifies that there are no <*, supported> or <desired, *> rules.
int desLangDistance = trieNext(iter, desired.language, false);
long desLangState = desLangDistance >= 0 && supportedLSRs.length > 1 ? iter.getState64() : 0;
long desLangState = desLangDistance >= 0 && supportedLSRsLength > 1 ? iter.getState64() : 0;
// Index of the supported LSR with the lowest distance.
int bestIndex = -1;
// Cached lookup info from XLikelySubtags.compareLikely().
int bestLikelyInfo = -1;
for (int slIndex = 0; slIndex < supportedLSRs.length; ++slIndex) {
for (int slIndex = 0; slIndex < supportedLSRsLength; ++slIndex) {
LSR supported = supportedLSRs[slIndex];
boolean star = false;
int distance = desLangDistance;

View File

@ -10,8 +10,8 @@ package com.ibm.icu.util;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
@ -329,9 +329,9 @@ public final class LocaleMatcher {
// The distance lookup loops over the supportedLSRs and returns the index of the best match.
private final LSR[] supportedLSRs;
private final int[] supportedIndexes;
private final int supportedLSRsLength;
private final ULocale defaultULocale;
private final Locale defaultLocale;
private final int defaultLocaleIndex;
/**
* LocaleMatcher Builder.
@ -578,103 +578,85 @@ public final class LocaleMatcher {
private LocaleMatcher(Builder builder) {
thresholdDistance = builder.thresholdDistance < 0 ?
LocaleDistance.INSTANCE.getDefaultScriptDistance() : builder.thresholdDistance;
int supportedLocalesLength = builder.supportedLocales != null ?
builder.supportedLocales.size() : 0;
ULocale udef = builder.defaultLocale;
Locale def = null;
int idef = -1;
// Store the supported locales in input order,
// so that when different types are used (e.g., java.util.Locale)
// we can return those by parallel index.
supportedULocales = new ULocale[supportedLocalesLength];
supportedLocales = new Locale[supportedLocalesLength];
// Supported LRSs in input order.
LSR lsrs[] = new LSR[supportedLocalesLength];
// Also find the first supported locale whose LSR is
// the same as that for the default locale.
LSR defLSR = null;
if (udef != null) {
def = udef.toLocale();
defLSR = getMaximalLsrOrUnd(udef);
}
// Store the supported locales in input order,
// so that when different types are used (e.g., java.util.Locale)
// we can return those by parallel index.
int supportedLocalesLength = builder.supportedLocales != null ?
builder.supportedLocales.size() : 0;
supportedULocales = new ULocale[supportedLocalesLength];
supportedLocales = new Locale[supportedLocalesLength];
// Supported LRSs in input order.
LSR lsrs[] = new LSR[supportedLocalesLength];
int i = 0;
if (supportedLocalesLength > 0) {
for (ULocale locale : builder.supportedLocales) {
supportedULocales[i] = locale;
supportedLocales[i] = locale.toLocale();
LSR lsr = lsrs[i] = getMaximalLsrOrUnd(locale);
if (idef < 0 && defLSR != null && lsr.equals(defLSR)) {
idef = i;
}
lsrs[i] = getMaximalLsrOrUnd(locale);
++i;
}
}
// We need an unordered map from LSR to first supported locale with that LSR,
// and an ordered list of (LSR, supported index).
// We use a LinkedHashMap for both,
// and insert the supported locales in the following order:
// and an ordered list of (LSR, supported index) for
// the supported locales in the following order:
// 1. Default locale, if it is supported.
// 2. Priority locales (aka "paradigm locales") in builder order.
// 3. Remaining locales in builder order.
supportedLsrToIndex = new LinkedHashMap<>(supportedLocalesLength);
// Note: We could work with a single LinkedHashMap by storing ~i (the binary-not index)
// for the default and paradigm locales, counting the number of those locales,
// and keeping two indexes to fill the LSR and index arrays with
// priority vs. normal locales. In that loop we would need to entry.setValue(~i)
// to restore non-negative indexes in the map.
// Probably saves little but less readable.
Map<LSR, Integer> otherLsrToIndex = null;
if (idef >= 0) {
supportedLsrToIndex.put(defLSR, idef);
}
supportedLsrToIndex = new HashMap<>(supportedLocalesLength);
supportedLSRs = new LSR[supportedLocalesLength];
supportedIndexes = new int[supportedLocalesLength];
int suppLength = 0;
// Determine insertion order.
// Add locales immediately that are equivalent to the default.
byte[] order = new byte[supportedLocalesLength];
int numParadigms = 0;
i = 0;
for (ULocale locale : supportedULocales) {
if (i == idef) {
++i;
continue;
}
LSR lsr = lsrs[i];
if (defLSR == null) {
assert i == 0;
udef = locale;
def = supportedLocales[0];
defLSR = lsr;
idef = 0;
supportedLsrToIndex.put(lsr, 0);
} else if (idef >= 0 && lsr.equals(defLSR)) {
// lsr.equals(defLSR) means that this supported locale is
// a duplicate of the default locale.
// Either an explicit default locale is supported, and we added it before the loop,
// or there is no explicit default locale, and this is
// a duplicate of the first supported locale.
// In both cases, idef >= 0 now, so otherwise we can skip the comparison.
// For a duplicate, putIfAbsent() is a no-op, so nothing to do.
suppLength = putIfAbsent(lsr, 0, suppLength);
} else if (lsr.isEquivalentTo(defLSR)) {
suppLength = putIfAbsent(lsr, i, suppLength);
} else if (LocaleDistance.INSTANCE.isParadigmLSR(lsr)) {
putIfAbsent(supportedLsrToIndex, lsr, i);
order[i] = 2;
++numParadigms;
} else {
if (otherLsrToIndex == null) {
otherLsrToIndex = new LinkedHashMap<>(supportedLocalesLength);
}
putIfAbsent(otherLsrToIndex, lsr, i);
order[i] = 3;
}
++i;
}
if (otherLsrToIndex != null) {
supportedLsrToIndex.putAll(otherLsrToIndex);
// Add supported paradigm locales.
int paradigmLimit = suppLength + numParadigms;
for (i = 0; i < supportedLocalesLength && suppLength < paradigmLimit; ++i) {
if (order[i] == 2) {
suppLength = putIfAbsent(lsrs[i], i, suppLength);
}
}
int supportedLSRsLength = supportedLsrToIndex.size();
supportedLSRs = new LSR[supportedLSRsLength];
supportedIndexes = new int[supportedLSRsLength];
i = 0;
for (Map.Entry<LSR, Integer> entry : supportedLsrToIndex.entrySet()) {
supportedLSRs[i] = entry.getKey(); // = lsrs[entry.getValue()]
supportedIndexes[i++] = entry.getValue();
// Add remaining supported locales.
for (i = 0; i < supportedLocalesLength; ++i) {
if (order[i] == 3) {
suppLength = putIfAbsent(lsrs[i], i, suppLength);
}
}
supportedLSRsLength = suppLength;
// If supportedLSRsLength < supportedLocalesLength then
// we waste as many array slots as there are duplicate supported LSRs,
// but the amount of wasted space is small as long as there are few duplicates.
defaultULocale = udef;
defaultLocale = def;
defaultLocaleIndex = idef;
demotionPerDesiredLocale =
builder.demotion == Demotion.NONE ? 0 :
LocaleDistance.INSTANCE.getDefaultDemotionPerDesiredLocale(); // null or REGION
@ -684,11 +666,13 @@ public final class LocaleMatcher {
}
}
private static final void putIfAbsent(Map<LSR, Integer> lsrToIndex, LSR lsr, int i) {
Integer index = lsrToIndex.get(lsr);
if (index == null) {
lsrToIndex.put(lsr, i);
private final int putIfAbsent(LSR lsr, int i, int suppLength) {
if (!supportedLsrToIndex.containsKey(lsr)) {
supportedLsrToIndex.put(lsr, i);
supportedLSRs[suppLength] = lsr;
supportedIndexes[suppLength++] = i;
}
return suppLength;
}
private static final LSR getMaximalLsrOrUnd(ULocale locale) {
@ -838,7 +822,7 @@ public final class LocaleMatcher {
}
private Result defaultResult() {
return new Result(null, defaultULocale, null, defaultLocale, -1, defaultLocaleIndex);
return new Result(null, defaultULocale, null, defaultLocale, -1, -1);
}
private Result makeResult(ULocale desiredLocale, ULocaleLsrIterator lsrIter, int suppIndex) {
@ -960,7 +944,8 @@ public final class LocaleMatcher {
return suppIndex;
}
int bestIndexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
desiredLSR, supportedLSRs, bestShiftedDistance, favorSubtag);
desiredLSR, supportedLSRs, supportedLSRsLength,
bestShiftedDistance, favorSubtag);
if (bestIndexAndDistance >= 0) {
bestShiftedDistance = LocaleDistance.getShiftedDistance(bestIndexAndDistance);
if (remainingIter != null) { remainingIter.rememberCurrent(desiredIndex); }
@ -1012,7 +997,7 @@ public final class LocaleMatcher {
// Returns the inverse of the distance: That is, 1-distance(desired, supported).
int indexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
getMaximalLsrOrUnd(desired),
new LSR[] { getMaximalLsrOrUnd(supported) },
new LSR[] { getMaximalLsrOrUnd(supported) }, 1,
LocaleDistance.shiftDistance(thresholdDistance), favorSubtag);
double distance = LocaleDistance.getDistanceDouble(indexAndDistance);
if (TRACE_MATCHER) {
@ -1048,9 +1033,9 @@ public final class LocaleMatcher {
public String toString() {
StringBuilder s = new StringBuilder().append("{LocaleMatcher");
// Supported languages in the order that we try to match them.
if (supportedLSRs.length > 0) {
if (supportedLSRsLength > 0) {
s.append(" supportedLSRs={").append(supportedLSRs[0]);
for (int i = 1; i < supportedLSRs.length; ++i) {
for (int i = 1; i < supportedLSRsLength; ++i) {
s.append(", ").append(supportedLSRs[i]);
}
s.append('}');

View File

@ -194,7 +194,7 @@ public class LocaleMatcherTest extends TestFmwk {
assertEquals("getBestMatchResult(ja_JP).supp",
"en_GB", locString(result.getSupportedULocale()));
assertEquals("getBestMatchResult(ja_JP).suppIndex",
1, result.getSupportedIndex());
-1, result.getSupportedIndex());
}
@Test

View File

@ -15,6 +15,7 @@ import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
@ -484,7 +485,8 @@ public final class LocaleDistanceBuilder {
ICUResourceBundle supplementalData = getSupplementalDataBundle("supplementalData");
String[] paradigms = supplementalData.getValueWithFallback(
"languageMatchingInfo/written/paradigmLocales").getStringArray();
Set<LSR> paradigmLSRs = new HashSet<>(); // could be TreeSet if LSR were Comparable
// LinkedHashSet for stable order; otherwise a unit test is flaky.
Set<LSR> paradigmLSRs = new LinkedHashSet<>(); // could be TreeSet if LSR were Comparable
for (String paradigm : paradigms) {
ULocale pl = new ULocale(paradigm);
LSR max = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(pl);