ICU-20330 simplify LocaleMatcher code:
- widen API from LocalePriorityList to Iterable - merge getBestMatch(multiple locales) and getBestMatch(single locale) into one function - process desired locales incrementally, create fewer objects - reject poor matches early: use bestDistance-demotion for threshold - add API for java.util.Locale, convert incrementally - new feature: tracks indexes of supported and desired locales which eliminates conversion of result objects in wrappers around getBestMatch() as shown by the java.util.Locale API here - simpler data structures, more serialization-friendly (easier to port to C++) - e.g., use a BytesTrie each for likelySubtags & locale distance, instead of layers of TreeMap - un-hardcode locale matcher data; use modern resource bundle functions - split builder code & runtime code into separate classes - move LSR to simple top-level value class, cache regionIndex in LSR - simpler handling of private use languages and pseudolocales - simplify RegionMapper - LocaleDistance builder: move the node distance into the DistanceTable, remove DistanceNode - support distance rules with region codes, not just with variables - enforce & use distance rule constraints: - no rule with *,supported or desired,* - no rule with language * and script/region non-* - distance trie collapse a (desired, supported)=(ANY, ANY) pair into a single * - look up each desired language only once for all supported LSRs - remove layers-of-Maps compaction (trie builder compacts) - remove unused XML printing - remove other unused code - make XLocaleMatcherTest.testPerf() exercise locale distance lookup code
This commit is contained in:
parent
026095ddc3
commit
8335adc310
@ -248,7 +248,7 @@ public class ICUResourceBundle extends UResourceBundle {
|
||||
* @internal ICU 3.0
|
||||
*/
|
||||
public static final String[] getKeywordValues(String baseName, String keyword) {
|
||||
Set<String> keywords = new HashSet<String>();
|
||||
Set<String> keywords = new HashSet<>();
|
||||
ULocale locales[] = getAvailEntry(baseName, ICU_DATA_CLASS_LOADER).getULocaleList();
|
||||
int i;
|
||||
|
||||
@ -364,6 +364,26 @@ public class ICUResourceBundle extends UResourceBundle {
|
||||
return result;
|
||||
}
|
||||
|
||||
public UResource.Value getValueWithFallback(String path) throws MissingResourceException {
|
||||
ICUResourceBundle rb;
|
||||
if (path.isEmpty()) {
|
||||
rb = this;
|
||||
} else {
|
||||
rb = findResourceWithFallback(path, this, null);
|
||||
if (rb == null) {
|
||||
throw new MissingResourceException(
|
||||
"Can't find resource for bundle "
|
||||
+ this.getClass().getName() + ", key " + getType(),
|
||||
path, getKey());
|
||||
}
|
||||
}
|
||||
ReaderValue readerValue = new ReaderValue();
|
||||
ICUResourceBundleImpl impl = (ICUResourceBundleImpl)rb;
|
||||
readerValue.reader = impl.wholeBundle.reader;
|
||||
readerValue.res = impl.getResource();
|
||||
return readerValue;
|
||||
}
|
||||
|
||||
public void getAllItemsWithFallbackNoFail(String path, UResource.Sink sink) {
|
||||
try {
|
||||
getAllItemsWithFallback(path, sink);
|
||||
@ -512,8 +532,8 @@ public class ICUResourceBundle extends UResourceBundle {
|
||||
* @return the list of converted ULocales
|
||||
*/
|
||||
public static final Locale[] getLocaleList(ULocale[] ulocales) {
|
||||
ArrayList<Locale> list = new ArrayList<Locale>(ulocales.length);
|
||||
HashSet<Locale> uniqueSet = new HashSet<Locale>();
|
||||
ArrayList<Locale> list = new ArrayList<>(ulocales.length);
|
||||
HashSet<Locale> uniqueSet = new HashSet<>();
|
||||
for (int i = 0; i < ulocales.length; i++) {
|
||||
Locale loc = ulocales[i].toLocale();
|
||||
if (!uniqueSet.contains(loc)) {
|
||||
@ -662,7 +682,7 @@ public class ICUResourceBundle extends UResourceBundle {
|
||||
|
||||
private static Set<String> createFullLocaleNameSet(String baseName, ClassLoader loader) {
|
||||
String bn = baseName.endsWith("/") ? baseName : baseName + "/";
|
||||
Set<String> set = new HashSet<String>();
|
||||
Set<String> set = new HashSet<>();
|
||||
String skipScan = ICUConfig.get("com.ibm.icu.impl.ICUResourceBundle.skipRuntimeLocaleResourceScan", "false");
|
||||
if (!skipScan.equalsIgnoreCase("true")) {
|
||||
// scan available locale resources under the base url first
|
||||
@ -707,7 +727,7 @@ public class ICUResourceBundle extends UResourceBundle {
|
||||
}
|
||||
|
||||
private static Set<String> createLocaleNameSet(String baseName, ClassLoader loader) {
|
||||
HashSet<String> set = new HashSet<String>();
|
||||
HashSet<String> set = new HashSet<>();
|
||||
addLocaleIDsFromIndexBundle(baseName, loader, set);
|
||||
return Collections.unmodifiableSet(set);
|
||||
}
|
||||
@ -1408,7 +1428,7 @@ public class ICUResourceBundle extends UResourceBundle {
|
||||
String bundleName;
|
||||
String rpath = wholeBundle.reader.getAlias(_resource);
|
||||
if (aliasesVisited == null) {
|
||||
aliasesVisited = new HashMap<String, String>();
|
||||
aliasesVisited = new HashMap<>();
|
||||
}
|
||||
if (aliasesVisited.get(rpath) != null) {
|
||||
throw new IllegalArgumentException(
|
||||
|
@ -1065,6 +1065,17 @@ public final class ICUResourceBundleReader {
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@Override
|
||||
public boolean findValue(CharSequence key, UResource.Value value) {
|
||||
ReaderValue readerValue = (ReaderValue)value;
|
||||
int i = findTableItem(readerValue.reader, key);
|
||||
if (i >= 0) {
|
||||
readerValue.res = getContainerResource(readerValue.reader, i);
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
private static final class Table1632 extends Table {
|
||||
@Override
|
||||
|
@ -290,16 +290,22 @@ public final class UResource {
|
||||
*/
|
||||
public interface Table {
|
||||
/**
|
||||
* @return The number of items in the array resource.
|
||||
* @return The number of items in the table resource.
|
||||
*/
|
||||
public int getSize();
|
||||
/**
|
||||
* @param i Array item index.
|
||||
* @param i Table item index.
|
||||
* @param key Output-only, receives the key of the i'th item.
|
||||
* @param value Output-only, receives the value of the i'th item.
|
||||
* @return true if i is non-negative and less than getSize().
|
||||
*/
|
||||
public boolean getKeyAndValue(int i, Key key, Value value);
|
||||
/**
|
||||
* @param key Key string to find in the table.
|
||||
* @param value Output-only, receives the value of the item with that key.
|
||||
* @return true if the table contains the key.
|
||||
*/
|
||||
public boolean findValue(CharSequence key, Value value);
|
||||
}
|
||||
|
||||
/**
|
||||
|
72
icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LSR.java
Normal file
72
icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LSR.java
Normal file
@ -0,0 +1,72 @@
|
||||
// © 2017 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.impl.locale;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
final class LSR {
|
||||
static final int REGION_INDEX_LIMIT = 1000 + 26 * 26;
|
||||
|
||||
final String language;
|
||||
final String script;
|
||||
final String region;
|
||||
/** Index for region, negative if ill-formed. @see indexForRegion */
|
||||
final int regionIndex;
|
||||
|
||||
LSR(String language, String script, String region) {
|
||||
this.language = language;
|
||||
this.script = script;
|
||||
this.region = region;
|
||||
regionIndex = indexForRegion(region);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a non-negative index for a well-formed region code.
|
||||
* Do not rely on a particular region->index mapping; it may change.
|
||||
* Returns -1 for ill-formed strings.
|
||||
*/
|
||||
static final int indexForRegion(String region) {
|
||||
if (region.length() == 2) {
|
||||
int a = region.charAt(0) - 'A';
|
||||
if (a < 0 || 25 < a) { return -1; }
|
||||
int b = region.charAt(1) - 'A';
|
||||
if (b < 0 || 25 < b) { return -1; }
|
||||
return 26 * a + b + 1000;
|
||||
} else if (region.length() == 3) {
|
||||
int a = region.charAt(0) - '0';
|
||||
if (a < 0 || 9 < a) { return -1; }
|
||||
int b = region.charAt(1) - '0';
|
||||
if (b < 0 || 9 < b) { return -1; }
|
||||
int c = region.charAt(2) - '0';
|
||||
if (c < 0 || 9 < c) { return -1; }
|
||||
return (10 * a + b) * 10 + c;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder result = new StringBuilder(language);
|
||||
if (!script.isEmpty()) {
|
||||
result.append('-').append(script);
|
||||
}
|
||||
if (!region.isEmpty()) {
|
||||
result.append('-').append(region);
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
LSR other;
|
||||
return this == obj ||
|
||||
(obj != null
|
||||
&& obj.getClass() == this.getClass()
|
||||
&& language.equals((other = (LSR) obj).language)
|
||||
&& script.equals(other.script)
|
||||
&& region.equals(other.region));
|
||||
}
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(language, script, region);
|
||||
}
|
||||
}
|
@ -0,0 +1,265 @@
|
||||
// © 2017 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.impl.locale;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import com.ibm.icu.impl.ICUData;
|
||||
import com.ibm.icu.impl.ICUResourceBundle;
|
||||
import com.ibm.icu.impl.UResource;
|
||||
import com.ibm.icu.impl.locale.XCldrStub.HashMultimap;
|
||||
import com.ibm.icu.impl.locale.XCldrStub.Multimap;
|
||||
import com.ibm.icu.impl.locale.XCldrStub.Multimaps;
|
||||
import com.ibm.icu.util.BytesTrie;
|
||||
import com.ibm.icu.util.BytesTrieBuilder;
|
||||
import com.ibm.icu.util.ICUException;
|
||||
|
||||
/**
|
||||
* Builds data for XLikelySubtags.
|
||||
* Reads source data from ICU resource bundles.
|
||||
*/
|
||||
class LikelySubtagsBuilder {
|
||||
private static final boolean DEBUG_OUTPUT = false;
|
||||
|
||||
private static ICUResourceBundle getSupplementalDataBundle(String name) {
|
||||
return ICUResourceBundle.getBundleInstance(
|
||||
ICUData.ICU_BASE_NAME, name,
|
||||
ICUResourceBundle.ICU_DATA_CLASS_LOADER, ICUResourceBundle.OpenType.DIRECT);
|
||||
}
|
||||
|
||||
private static final class AliasesBuilder {
|
||||
final Map<String, String> toCanonical = new HashMap<>();
|
||||
final Multimap<String, String> toAliases;
|
||||
|
||||
public Set<String> getAliases(String canonical) {
|
||||
Set<String> aliases = toAliases.get(canonical);
|
||||
return aliases == null ? Collections.singleton(canonical) : aliases;
|
||||
}
|
||||
|
||||
public AliasesBuilder(String type) {
|
||||
ICUResourceBundle metadata = getSupplementalDataBundle("metadata");
|
||||
UResource.Value value = metadata.getValueWithFallback("alias/" + type);
|
||||
UResource.Table aliases = value.getTable();
|
||||
UResource.Key key = new UResource.Key();
|
||||
for (int i = 0; aliases.getKeyAndValue(i, key, value); ++i) {
|
||||
String aliasFrom = key.toString();
|
||||
if (aliasFrom.contains("_")) {
|
||||
continue; // only simple aliasing
|
||||
}
|
||||
UResource.Table table = value.getTable();
|
||||
if (table.findValue("reason", value) && value.getString().equals("overlong")) {
|
||||
continue;
|
||||
}
|
||||
if (!table.findValue("replacement", value)) {
|
||||
continue;
|
||||
}
|
||||
String aliasTo = value.getString();
|
||||
int spacePos = aliasTo.indexOf(' ');
|
||||
String aliasFirst = spacePos < 0 ? aliasTo : aliasTo.substring(0, spacePos);
|
||||
if (aliasFirst.contains("_")) {
|
||||
continue; // only simple aliasing
|
||||
}
|
||||
toCanonical.put(aliasFrom, aliasFirst);
|
||||
}
|
||||
if (type.equals("language")) {
|
||||
toCanonical.put("mo", "ro"); // special case
|
||||
}
|
||||
toAliases = Multimaps.invertFrom(toCanonical, HashMultimap.<String, String>create());
|
||||
|
||||
if (DEBUG_OUTPUT) {
|
||||
System.out.println("*** " + type + " aliases");
|
||||
for (Map.Entry<String, String> mapping : new TreeMap<>(toCanonical).entrySet()) {
|
||||
System.out.println(mapping);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static final class TrieBuilder {
|
||||
byte[] bytes = new byte[24];
|
||||
BytesTrieBuilder tb = new BytesTrieBuilder();
|
||||
|
||||
void addMapping(String s, int value) {
|
||||
// s contains only ASCII characters.
|
||||
s.getBytes(0, s.length(), bytes, 0);
|
||||
tb.add(bytes, s.length(), value);
|
||||
}
|
||||
|
||||
BytesTrie build() {
|
||||
ByteBuffer buffer = tb.buildByteBuffer(BytesTrieBuilder.Option.SMALL);
|
||||
// Allocate an array with just the necessary capacity,
|
||||
// so that we do not hold on to a larger array for a long time.
|
||||
byte[] bytes = new byte[buffer.remaining()];
|
||||
buffer.get(bytes);
|
||||
if (DEBUG_OUTPUT) {
|
||||
System.out.println("likely subtags trie size: " + bytes.length + " bytes");
|
||||
}
|
||||
return new BytesTrie(bytes, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static XLikelySubtags.Data build() {
|
||||
AliasesBuilder languageAliasesBuilder = new AliasesBuilder("language");
|
||||
AliasesBuilder regionAliasesBuilder = new AliasesBuilder("territory");
|
||||
|
||||
Map<String, Map<String, Map<String, LSR>>> langTable =
|
||||
makeTable(languageAliasesBuilder, regionAliasesBuilder);
|
||||
|
||||
TrieBuilder trieBuilder = new TrieBuilder();
|
||||
Map<LSR, Integer> lsrIndexes = new LinkedHashMap<>();
|
||||
// Bogus LSR at index 0 for some code to easily distinguish between
|
||||
// intermediate match points and real result values.
|
||||
LSR bogus = new LSR("", "", "");
|
||||
lsrIndexes.put(bogus, 0);
|
||||
// We could prefill the lsrList with common locales to give them small indexes,
|
||||
// and see if that improves performance a little.
|
||||
for (Map.Entry<String, Map<String, Map<String, LSR>>> ls : langTable.entrySet()) {
|
||||
String lang = ls.getKey();
|
||||
if (lang.equals("und")) {
|
||||
lang = "*";
|
||||
}
|
||||
// Create a match point for the language.
|
||||
trieBuilder.addMapping(lang, 0);
|
||||
Map<String, Map<String, LSR>> scriptTable = ls.getValue();
|
||||
for (Map.Entry<String, Map<String, LSR>> sr : scriptTable.entrySet()) {
|
||||
String script = sr.getKey();
|
||||
if (script.isEmpty()) {
|
||||
script = "*";
|
||||
}
|
||||
// Match point for lang+script.
|
||||
trieBuilder.addMapping(lang + script, 0);
|
||||
Map<String, LSR> regionTable = sr.getValue();
|
||||
for (Map.Entry<String, LSR> r2lsr : regionTable.entrySet()) {
|
||||
String region = r2lsr.getKey();
|
||||
if (region.isEmpty()) {
|
||||
region = "*";
|
||||
}
|
||||
// Map the whole lang+script+region to a unique, dense index of the LSR.
|
||||
LSR lsr = r2lsr.getValue();
|
||||
Integer index = lsrIndexes.get(lsr);
|
||||
int i;
|
||||
if (index != null) {
|
||||
i = index.intValue();
|
||||
} else {
|
||||
i = lsrIndexes.size();
|
||||
lsrIndexes.put(lsr, i);
|
||||
}
|
||||
trieBuilder.addMapping(lang + script + region, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
BytesTrie trie = trieBuilder.build();
|
||||
LSR[] lsrs = lsrIndexes.keySet().toArray(new LSR[lsrIndexes.size()]);
|
||||
return new XLikelySubtags.Data(
|
||||
languageAliasesBuilder.toCanonical, regionAliasesBuilder.toCanonical, trie, lsrs);
|
||||
}
|
||||
|
||||
private static Map<String, Map<String, Map<String, LSR>>> makeTable(
|
||||
AliasesBuilder languageAliasesBuilder, AliasesBuilder regionAliasesBuilder) {
|
||||
Map<String, Map<String, Map<String, LSR>>> result = new TreeMap<>();
|
||||
// set the base data
|
||||
ICUResourceBundle likelySubtags = getSupplementalDataBundle("likelySubtags");
|
||||
UResource.Value value = likelySubtags.getValueWithFallback("");
|
||||
UResource.Table table = value.getTable();
|
||||
UResource.Key key = new UResource.Key();
|
||||
for (int i = 0; table.getKeyAndValue(i, key, value); ++i) {
|
||||
LSR ltp = lsrFromLocaleID(key.toString()); // source
|
||||
final String language = ltp.language;
|
||||
final String script = ltp.script;
|
||||
final String region = ltp.region;
|
||||
|
||||
ltp = lsrFromLocaleID(value.getString()); // target
|
||||
String languageTarget = ltp.language;
|
||||
final String scriptTarget = ltp.script;
|
||||
final String regionTarget = ltp.region;
|
||||
|
||||
set(result, language, script, region, languageTarget, scriptTarget, regionTarget);
|
||||
// now add aliases
|
||||
Collection<String> languageAliases = languageAliasesBuilder.getAliases(language);
|
||||
Collection<String> regionAliases = regionAliasesBuilder.getAliases(region);
|
||||
for (String languageAlias : languageAliases) {
|
||||
for (String regionAlias : regionAliases) {
|
||||
if (languageAlias.equals(language) && regionAlias.equals(region)) {
|
||||
continue;
|
||||
}
|
||||
set(result, languageAlias, script, regionAlias,
|
||||
languageTarget, scriptTarget, regionTarget);
|
||||
}
|
||||
}
|
||||
}
|
||||
// hack
|
||||
set(result, "und", "Latn", "", "en", "Latn", "US");
|
||||
|
||||
// hack, ensure that if und-YY => und-Xxxx-YY, then we add Xxxx=>YY to the table
|
||||
// <likelySubtag from="und_GH" to="ak_Latn_GH"/>
|
||||
|
||||
// so und-Latn-GH => ak-Latn-GH
|
||||
Map<String, Map<String, LSR>> undScriptMap = result.get("und");
|
||||
Map<String, LSR> undEmptyRegionMap = undScriptMap.get("");
|
||||
for (Map.Entry<String, LSR> regionEntry : undEmptyRegionMap.entrySet()) {
|
||||
final LSR lsr = regionEntry.getValue();
|
||||
set(result, "und", lsr.script, lsr.region, lsr);
|
||||
}
|
||||
//
|
||||
// check that every level has "" (or "und")
|
||||
if (!result.containsKey("und")) {
|
||||
throw new IllegalArgumentException("failure: base");
|
||||
}
|
||||
for (Map.Entry<String, Map<String, Map<String, LSR>>> langEntry : result.entrySet()) {
|
||||
String lang = langEntry.getKey();
|
||||
final Map<String, Map<String, LSR>> scriptMap = langEntry.getValue();
|
||||
if (!scriptMap.containsKey("")) {
|
||||
throw new IllegalArgumentException("failure: " + lang);
|
||||
}
|
||||
for (Map.Entry<String, Map<String, LSR>> scriptEntry : scriptMap.entrySet()) {
|
||||
String script = scriptEntry.getKey();
|
||||
final Map<String, LSR> regionMap = scriptEntry.getValue();
|
||||
if (!regionMap.containsKey("")) {
|
||||
throw new IllegalArgumentException("failure: " + lang + "-" + script);
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Parses locale IDs in the likelySubtags data, not arbitrary language tags.
|
||||
private static LSR lsrFromLocaleID(String languageIdentifier) {
|
||||
String[] parts = languageIdentifier.split("[-_]");
|
||||
if (parts.length < 1 || parts.length > 3) {
|
||||
throw new ICUException("too many subtags");
|
||||
}
|
||||
String lang = parts[0];
|
||||
String p2 = parts.length < 2 ? "" : parts[1];
|
||||
String p3 = parts.length < 3 ? "" : parts[2];
|
||||
return p2.length() < 4 ? new LSR(lang, "", p2) : new LSR(lang, p2, p3);
|
||||
}
|
||||
|
||||
private static void set(Map<String, Map<String, Map<String, LSR>>> langTable,
|
||||
final String language, final String script, final String region,
|
||||
final String languageTarget, final String scriptTarget, final String regionTarget) {
|
||||
LSR target = new LSR(languageTarget, scriptTarget, regionTarget);
|
||||
set(langTable, language, script, region, target);
|
||||
}
|
||||
|
||||
private static void set(Map<String, Map<String, Map<String, LSR>>> langTable,
|
||||
final String language, final String script, final String region, LSR newValue) {
|
||||
Map<String, Map<String, LSR>> scriptTable = getSubtable(langTable, language);
|
||||
Map<String, LSR> regionTable = getSubtable(scriptTable, script);
|
||||
regionTable.put(region, newValue);
|
||||
}
|
||||
|
||||
private static <K, V, T> Map<V, T> getSubtable(Map<K, Map<V, T>> table, final K language) {
|
||||
Map<V, T> subTable = table.get(language);
|
||||
if (subTable == null) {
|
||||
table.put(language, subTable = new TreeMap<>());
|
||||
}
|
||||
return subTable;
|
||||
}
|
||||
}
|
@ -0,0 +1,343 @@
|
||||
// © 2017 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.impl.locale;
|
||||
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import com.ibm.icu.util.BytesTrie;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
|
||||
/**
|
||||
* Off-line-built data for LocaleMatcher.
|
||||
* Mostly but not only the data for mapping locales to their maximized forms.
|
||||
*/
|
||||
public class LocaleDistance {
|
||||
private static final int ABOVE_THRESHOLD = 100;
|
||||
|
||||
private static final boolean DEBUG_OUTPUT = false;
|
||||
|
||||
// The trie maps each dlang+slang+dscript+sscript+dregion+sregion
|
||||
// (encoded in ASCII with bit 7 set on the last character of each subtag) to a distance.
|
||||
// There is also a trie value for each subsequence of whole subtags.
|
||||
// One '*' is used for a (desired, supported) pair of "und", "Zzzz"/"", or "ZZ"/"".
|
||||
private final BytesTrie trie;
|
||||
|
||||
/**
|
||||
* Maps each region to zero or more single-character partitions.
|
||||
*/
|
||||
private final byte[] regionToPartitionsIndex;
|
||||
private final String[][] partitionArrays;
|
||||
|
||||
/**
|
||||
* Used to get the paradigm region for a cluster, if there is one.
|
||||
*/
|
||||
private final Set<LSR> paradigmLSRs;
|
||||
|
||||
private final int defaultLanguageDistance;
|
||||
private final int defaultScriptDistance;
|
||||
private final int defaultRegionDistance;
|
||||
|
||||
// TODO: Load prebuilt data from a resource bundle
|
||||
// to avoid the dependency on the builder code.
|
||||
// VisibleForTesting
|
||||
public static final LocaleDistance INSTANCE = LocaleDistanceBuilder.build();
|
||||
|
||||
LocaleDistance(BytesTrie trie,
|
||||
byte[] regionToPartitionsIndex, String[][] partitionArrays,
|
||||
Set<LSR> paradigmLSRs) {
|
||||
this.trie = trie;
|
||||
if (DEBUG_OUTPUT) {
|
||||
System.out.println("*** locale distance");
|
||||
testOnlyPrintDistanceTable();
|
||||
}
|
||||
this.regionToPartitionsIndex = regionToPartitionsIndex;
|
||||
this.partitionArrays = partitionArrays;
|
||||
this.paradigmLSRs = paradigmLSRs;
|
||||
|
||||
BytesTrie iter = new BytesTrie(trie);
|
||||
BytesTrie.Result result = iter.next('*');
|
||||
assert result == BytesTrie.Result.INTERMEDIATE_VALUE;
|
||||
defaultLanguageDistance = iter.getValue();
|
||||
result = iter.next('*');
|
||||
assert result == BytesTrie.Result.INTERMEDIATE_VALUE;
|
||||
defaultScriptDistance = iter.getValue();
|
||||
result = iter.next('*');
|
||||
assert result.hasValue();
|
||||
defaultRegionDistance = iter.getValue();
|
||||
}
|
||||
|
||||
// VisibleForTesting
|
||||
public int testOnlyDistance(ULocale desired, ULocale supported,
|
||||
int threshold, DistanceOption distanceOption) {
|
||||
LSR supportedLSR = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(supported);
|
||||
LSR desiredLSR = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(desired);
|
||||
return getBestIndexAndDistance(desiredLSR, new LSR[] { supportedLSR },
|
||||
threshold, distanceOption) & 0xff;
|
||||
}
|
||||
|
||||
public enum DistanceOption {REGION_FIRST, SCRIPT_FIRST}
|
||||
// NOTE: Replaced "NORMAL" with "REGION_FIRST". By default, scripts have greater weight
|
||||
// than regions, so they might be considered the "normal" case.
|
||||
|
||||
/**
|
||||
* Finds the supported LSR with the smallest distance from the desired one.
|
||||
* Equivalent LSR subtags must be normalized into a canonical form.
|
||||
*
|
||||
* <p>Returns the index of the lowest-distance supported LSR in bits 31..8
|
||||
* (negative if none has a distance below the threshold),
|
||||
* and its distance (0..ABOVE_THRESHOLD) in bits 7..0.
|
||||
*/
|
||||
int getBestIndexAndDistance(LSR desired, LSR[] supportedLsrs,
|
||||
int threshold, DistanceOption distanceOption) {
|
||||
BytesTrie iter = new BytesTrie(trie);
|
||||
// Look up the desired language only once for all supported LSRs.
|
||||
// Its "distance" is either a match point value of 0, or a non-match negative value.
|
||||
// Note: The data builder verifies that there are no <*, supported> or <desired, *> rules.
|
||||
// Set wantValue=true so that iter reads & skips the match point value.
|
||||
int desLangDistance = trieNext(iter, desired.language, true, true);
|
||||
long desLangState = desLangDistance >= 0 && supportedLsrs.length > 1 ? iter.getState64() : 0;
|
||||
// Index of the supported LSR with the lowest distance.
|
||||
int bestIndex = -1;
|
||||
for (int slIndex = 0; slIndex < supportedLsrs.length; ++slIndex) {
|
||||
LSR supported = supportedLsrs[slIndex];
|
||||
boolean star = false;
|
||||
int distance = desLangDistance;
|
||||
if (distance >= 0) {
|
||||
if (slIndex != 0) {
|
||||
iter.resetToState64(desLangState);
|
||||
}
|
||||
distance = trieNext(iter, supported.language, true, true);
|
||||
}
|
||||
// Note: The data builder verifies that there are no rules with "any" (*) language and
|
||||
// real (non *) script or region subtags.
|
||||
// This means that if the lookup for either language fails we can use
|
||||
// the default distances without further lookups.
|
||||
if (distance < 0) { // <*, *>
|
||||
if (desired.language.equals(supported.language)) {
|
||||
distance = 0;
|
||||
} else {
|
||||
distance = defaultLanguageDistance;
|
||||
}
|
||||
star = true;
|
||||
}
|
||||
assert 0 <= distance && distance <= 100;
|
||||
boolean scriptFirst = distanceOption == DistanceOption.SCRIPT_FIRST;
|
||||
if (scriptFirst) {
|
||||
distance >>= 2;
|
||||
}
|
||||
if (distance >= threshold) {
|
||||
continue;
|
||||
}
|
||||
|
||||
int scriptDistance;
|
||||
if (star) {
|
||||
if (desired.script.equals(supported.script)) {
|
||||
scriptDistance = 0;
|
||||
} else {
|
||||
scriptDistance = defaultScriptDistance;
|
||||
}
|
||||
} else {
|
||||
scriptDistance = getDesSuppDistance(iter, iter.getState64(),
|
||||
desired.script, supported.script, false);
|
||||
}
|
||||
if (scriptFirst) {
|
||||
scriptDistance >>= 1;
|
||||
}
|
||||
distance += scriptDistance;
|
||||
if (distance >= threshold) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (desired.region.equals(supported.region)) {
|
||||
// regionDistance = 0
|
||||
} else if (star) {
|
||||
distance += defaultRegionDistance;
|
||||
} else {
|
||||
long startState = iter.getState64();
|
||||
|
||||
// From here on we know the regions are not equal.
|
||||
// Map each region to zero or more partitions. (zero = one empty string)
|
||||
// If either side has more than one, then we find the maximum distance.
|
||||
// This could be optimized by adding some more structure, but probably not worth it.
|
||||
final String[] desiredPartitions = partitionsForRegion(desired);
|
||||
final String[] supportedPartitions = partitionsForRegion(supported);
|
||||
int regionDistance;
|
||||
|
||||
if (desiredPartitions.length > 1 || supportedPartitions.length > 1) {
|
||||
regionDistance = getRegionPartitionsDistance(iter, startState,
|
||||
desiredPartitions, supportedPartitions, threshold - distance);
|
||||
} else {
|
||||
regionDistance = getDesSuppDistance(iter, startState,
|
||||
desiredPartitions[0], supportedPartitions[0], true);
|
||||
}
|
||||
distance += regionDistance;
|
||||
}
|
||||
if (distance < threshold) {
|
||||
if (distance == 0) {
|
||||
return slIndex << 8;
|
||||
}
|
||||
bestIndex = slIndex;
|
||||
threshold = distance;
|
||||
}
|
||||
}
|
||||
return bestIndex >= 0 ? (bestIndex << 8) | threshold : 0xffffff00 | ABOVE_THRESHOLD;
|
||||
}
|
||||
|
||||
private int getRegionPartitionsDistance(BytesTrie iter, long startState,
|
||||
String[] desiredPartitions, String[] supportedPartitions, int threshold) {
|
||||
int regionDistance = -1;
|
||||
for (String dp : desiredPartitions) {
|
||||
for (String sp : supportedPartitions) {
|
||||
if (regionDistance >= 0) { // no need to reset in first iteration
|
||||
iter.resetToState64(startState);
|
||||
}
|
||||
int d = getDesSuppDistance(iter, startState, dp, sp, true);
|
||||
if (regionDistance < d) {
|
||||
if (d >= threshold) {
|
||||
return d;
|
||||
}
|
||||
regionDistance = d;
|
||||
}
|
||||
}
|
||||
}
|
||||
assert regionDistance >= 0;
|
||||
return regionDistance;
|
||||
}
|
||||
|
||||
// Modified from
|
||||
// DistanceTable#getDistance(desired, supported, Output distanceTable, starEquals).
|
||||
private static final int getDesSuppDistance(BytesTrie iter, long startState,
|
||||
String desired, String supported, boolean finalSubtag) {
|
||||
// Note: The data builder verifies that there are no <*, supported> or <desired, *> rules.
|
||||
int distance = trieNext(iter, desired, false, true);
|
||||
if (distance >= 0) {
|
||||
distance = trieNext(iter, supported, true, !finalSubtag);
|
||||
}
|
||||
if (distance < 0) {
|
||||
BytesTrie.Result result = iter.resetToState64(startState).next('*'); // <*, *>
|
||||
assert finalSubtag ? result.hasValue() : result == BytesTrie.Result.INTERMEDIATE_VALUE;
|
||||
if (!finalSubtag && desired.equals(supported)) {
|
||||
distance = 0; // same language or script
|
||||
} else {
|
||||
distance = iter.getValue();
|
||||
assert distance >= 0;
|
||||
}
|
||||
}
|
||||
return distance;
|
||||
}
|
||||
|
||||
private static final int trieNext(BytesTrie iter, String s, boolean wantValue, boolean wantNext) {
|
||||
if (s.isEmpty()) {
|
||||
return -1; // no empty subtags in the distance data
|
||||
}
|
||||
BytesTrie.Result result;
|
||||
int end = s.length() - 1;
|
||||
for (int i = 0;; ++i) {
|
||||
int c = s.charAt(i);
|
||||
assert c <= 0x7f;
|
||||
if (i < end) {
|
||||
result = iter.next(c);
|
||||
if (!result.hasNext()) {
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
// last character of this subtag
|
||||
result = iter.next(c | 0x80);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (wantValue) {
|
||||
if (wantNext) {
|
||||
if (result == BytesTrie.Result.INTERMEDIATE_VALUE) {
|
||||
return iter.getValue();
|
||||
}
|
||||
} else {
|
||||
if (result.hasValue()) {
|
||||
return iter.getValue();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (wantNext) {
|
||||
if (result == BytesTrie.Result.INTERMEDIATE_VALUE) {
|
||||
return 0;
|
||||
}
|
||||
} else {
|
||||
if (result.hasValue()) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return testOnlyGetDistanceTable(true).toString();
|
||||
}
|
||||
|
||||
private String[] partitionsForRegion(LSR lsr) {
|
||||
// ill-formed region -> one empty string
|
||||
int pIndex = lsr.regionIndex >= 0 ? regionToPartitionsIndex[lsr.regionIndex] : 0;
|
||||
return partitionArrays[pIndex];
|
||||
}
|
||||
|
||||
boolean isParadigmLSR(LSR lsr) {
|
||||
return paradigmLSRs.contains(lsr);
|
||||
}
|
||||
|
||||
// VisibleForTesting
|
||||
public int getDefaultScriptDistance() {
|
||||
return defaultScriptDistance;
|
||||
}
|
||||
|
||||
int getDefaultRegionDistance() {
|
||||
return defaultRegionDistance;
|
||||
}
|
||||
|
||||
// VisibleForTesting
|
||||
public Map<String, Integer> testOnlyGetDistanceTable(boolean skipIntermediateMatchPoints) {
|
||||
Map<String, Integer> map = new LinkedHashMap<>();
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (BytesTrie.Entry entry : trie) {
|
||||
sb.setLength(0);
|
||||
int numSubtags = 0;
|
||||
int length = entry.bytesLength();
|
||||
for (int i = 0; i < length; ++i) {
|
||||
byte b = entry.byteAt(i);
|
||||
if (b == '*') {
|
||||
// One * represents a (desired, supported) = (ANY, ANY) pair.
|
||||
sb.append("*-*-");
|
||||
numSubtags += 2;
|
||||
} else {
|
||||
if (b >= 0) {
|
||||
sb.append((char) b);
|
||||
} else { // end of subtag
|
||||
sb.append((char) (b & 0x7f)).append('-');
|
||||
++numSubtags;
|
||||
}
|
||||
}
|
||||
}
|
||||
assert sb.length() > 0 && sb.charAt(sb.length() - 1) == '-';
|
||||
if (!skipIntermediateMatchPoints || (numSubtags & 1) == 0) {
|
||||
sb.setLength(sb.length() - 1);
|
||||
String s = sb.toString();
|
||||
if (!skipIntermediateMatchPoints && s.endsWith("*-*")) {
|
||||
// Re-insert single-ANY match points to show consistent structure
|
||||
// for the test code.
|
||||
map.put(s.substring(0, s.length() - 2), 0);
|
||||
}
|
||||
map.put(s, entry.value);
|
||||
}
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
// VisibleForTesting
|
||||
public void testOnlyPrintDistanceTable() {
|
||||
for (Map.Entry<String, Integer> mapping : testOnlyGetDistanceTable(true).entrySet()) {
|
||||
System.out.println(mapping);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,781 @@
|
||||
// © 2017 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.impl.locale;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import com.ibm.icu.impl.ICUData;
|
||||
import com.ibm.icu.impl.ICUResourceBundle;
|
||||
import com.ibm.icu.impl.UResource;
|
||||
import com.ibm.icu.impl.locale.XCldrStub.Multimap;
|
||||
import com.ibm.icu.impl.locale.XCldrStub.Predicate;
|
||||
import com.ibm.icu.impl.locale.XCldrStub.Splitter;
|
||||
import com.ibm.icu.impl.locale.XCldrStub.TreeMultimap;
|
||||
import com.ibm.icu.util.BytesTrie;
|
||||
import com.ibm.icu.util.BytesTrieBuilder;
|
||||
import com.ibm.icu.util.Output;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
|
||||
public final class LocaleDistanceBuilder {
|
||||
private static final String ANY = "<EFBFBD>"; // matches any character. Uses value above any subtag.
|
||||
|
||||
private static final boolean DEBUG_OUTPUT = false;
|
||||
|
||||
private static String fixAny(String string) {
|
||||
return "*".equals(string) ? ANY : string;
|
||||
}
|
||||
|
||||
private static ICUResourceBundle getSupplementalDataBundle(String name) {
|
||||
return ICUResourceBundle.getBundleInstance(
|
||||
ICUData.ICU_BASE_NAME, name,
|
||||
ICUResourceBundle.ICU_DATA_CLASS_LOADER, ICUResourceBundle.OpenType.DIRECT);
|
||||
}
|
||||
|
||||
private static final class TerritoryContainment {
|
||||
/** Directed, acyclic containment graph. Maps each container to its direct contents. */
|
||||
final Multimap<String, String> graph = TreeMultimap.create();
|
||||
/** Maps each container to all of its contents, direct and indirect. */
|
||||
final Multimap<String, String> resolved = TreeMultimap.create();
|
||||
/** Maps each container only to its leaf contents. */
|
||||
final Multimap<String, String> toLeavesOnly = TreeMultimap.create();
|
||||
/** The leaves of the graph. */
|
||||
final Set<String> leaves;
|
||||
|
||||
TerritoryContainment(ICUResourceBundle supplementalData) {
|
||||
UResource.Value value = supplementalData.getValueWithFallback("territoryContainment");
|
||||
UResource.Key key = new UResource.Key();
|
||||
addContainments(key, value);
|
||||
resolve("001");
|
||||
|
||||
for (Map.Entry<String, Set<String>> entry : resolved.asMap().entrySet()) {
|
||||
String container = entry.getKey();
|
||||
for (String contained : entry.getValue()) {
|
||||
if (resolved.get(contained) == null) { // a leaf node (usually a country)
|
||||
toLeavesOnly.put(container, contained);
|
||||
}
|
||||
}
|
||||
}
|
||||
leaves = toLeavesOnly.get("001");
|
||||
}
|
||||
|
||||
private void addContainments(UResource.Key key, UResource.Value value) {
|
||||
UResource.Table containers = value.getTable();
|
||||
for (int i = 0; containers.getKeyAndValue(i, key, value); ++i) {
|
||||
if (key.length() <= 3) {
|
||||
String container = key.toString();
|
||||
String[] contents = value.getStringArrayOrStringAsArray();
|
||||
for (String s : contents) {
|
||||
graph.put(container, s);
|
||||
}
|
||||
} else {
|
||||
addContainments(key, value); // containedGroupings etc.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private Set<String> resolve(String region) {
|
||||
Set<String> contained = graph.get(region);
|
||||
if (contained == null) {
|
||||
return Collections.emptySet();
|
||||
}
|
||||
resolved.putAll(region, contained); // do top level
|
||||
// then recursively
|
||||
for (String subregion : contained) {
|
||||
resolved.putAll(region, resolve(subregion));
|
||||
}
|
||||
return resolved.get(region);
|
||||
}
|
||||
}
|
||||
|
||||
private static final class Rule {
|
||||
final List<String> desired;
|
||||
final List<String> supported;
|
||||
final int distance;
|
||||
final boolean oneway;
|
||||
|
||||
Rule(List<String> desired, List<String> supported, int distance, boolean oneway) {
|
||||
this.desired = desired;
|
||||
this.supported = supported;
|
||||
this.distance = distance;
|
||||
this.oneway = oneway;
|
||||
}
|
||||
}
|
||||
|
||||
private static final <T> int makeUniqueIndex(Map<T, Integer> objectToInt, T source) {
|
||||
Integer result = objectToInt.get(source);
|
||||
if (result == null) {
|
||||
int newResult = objectToInt.size();
|
||||
objectToInt.put(source, newResult);
|
||||
return newResult;
|
||||
} else {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
private static final class TrieBuilder {
|
||||
byte[] bytes = new byte[24];
|
||||
int length = 0;
|
||||
BytesTrieBuilder tb = new BytesTrieBuilder();
|
||||
|
||||
void addStar(int value) {
|
||||
assert value >= 0;
|
||||
bytes[length++] = '*';
|
||||
tb.add(bytes, length, value);
|
||||
}
|
||||
|
||||
void addSubtag(String s, int value) {
|
||||
assert !s.isEmpty();
|
||||
assert value >= 0;
|
||||
assert !s.equals(ANY);
|
||||
int end = s.length() - 1;
|
||||
for (int i = 0;; ++i) {
|
||||
char c = s.charAt(i);
|
||||
assert c <= 0x7f;
|
||||
if (i < end) {
|
||||
bytes[length++] = (byte) c;
|
||||
} else {
|
||||
// Mark the last character as a terminator to avoid overlap matches.
|
||||
bytes[length++] = (byte) (c | 0x80);
|
||||
break;
|
||||
}
|
||||
}
|
||||
tb.add(bytes, length, value);
|
||||
}
|
||||
|
||||
BytesTrie build() {
|
||||
ByteBuffer buffer = tb.buildByteBuffer(BytesTrieBuilder.Option.SMALL);
|
||||
// Allocate an array with just the necessary capacity,
|
||||
// so that we do not hold on to a larger array for a long time.
|
||||
byte[] bytes = new byte[buffer.remaining()];
|
||||
buffer.get(bytes);
|
||||
if (DEBUG_OUTPUT) {
|
||||
System.out.println("distance trie size: " + bytes.length + " bytes");
|
||||
}
|
||||
return new BytesTrie(bytes, 0);
|
||||
}
|
||||
}
|
||||
|
||||
private static final class DistanceTable {
|
||||
final int nodeDistance; // distance for the lookup so far
|
||||
final Map<String, Map<String, DistanceTable>> subtables;
|
||||
|
||||
DistanceTable(int distance) {
|
||||
nodeDistance = distance;
|
||||
subtables = new TreeMap<>();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
DistanceTable other;
|
||||
return this == obj ||
|
||||
(obj != null
|
||||
&& obj.getClass() == this.getClass()
|
||||
&& nodeDistance == (other = (DistanceTable) obj).nodeDistance
|
||||
&& subtables.equals(other.subtables));
|
||||
}
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return nodeDistance ^ subtables.hashCode();
|
||||
}
|
||||
|
||||
public int getDistance(String desired, String supported, Output<DistanceTable> distanceTable, boolean starEquals) {
|
||||
boolean star = false;
|
||||
Map<String, DistanceTable> sub2 = subtables.get(desired);
|
||||
if (sub2 == null) {
|
||||
sub2 = subtables.get(ANY); // <*, supported>
|
||||
star = true;
|
||||
}
|
||||
DistanceTable value = sub2.get(supported); // <*/desired, supported>
|
||||
if (value == null) {
|
||||
value = sub2.get(ANY); // <*/desired, *>
|
||||
if (value == null && !star) {
|
||||
sub2 = subtables.get(ANY); // <*, supported>
|
||||
value = sub2.get(supported);
|
||||
if (value == null) {
|
||||
value = sub2.get(ANY); // <*, *>
|
||||
}
|
||||
}
|
||||
star = true;
|
||||
}
|
||||
if (distanceTable != null) {
|
||||
distanceTable.value = value;
|
||||
}
|
||||
int result = starEquals && star && desired.equals(supported) ? 0 : value.nodeDistance;
|
||||
return result;
|
||||
}
|
||||
|
||||
void copy(DistanceTable other) {
|
||||
for (Map.Entry<String, Map<String, DistanceTable>> e1 : other.subtables.entrySet()) {
|
||||
for (Map.Entry<String, DistanceTable> e2 : e1.getValue().entrySet()) {
|
||||
DistanceTable value = e2.getValue();
|
||||
addSubtable(e1.getKey(), e2.getKey(), value.nodeDistance);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DistanceTable addSubtable(String desired, String supported, int distance) {
|
||||
Map<String, DistanceTable> sub2 = subtables.get(desired);
|
||||
if (sub2 == null) {
|
||||
subtables.put(desired, sub2 = new TreeMap<>());
|
||||
}
|
||||
DistanceTable oldNode = sub2.get(supported);
|
||||
if (oldNode != null) {
|
||||
return oldNode;
|
||||
}
|
||||
|
||||
final DistanceTable newNode = new DistanceTable(distance);
|
||||
sub2.put(supported, newNode);
|
||||
return newNode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return null if value doesn't exist
|
||||
*/
|
||||
private DistanceTable getNode(String desired, String supported) {
|
||||
Map<String, DistanceTable> sub2 = subtables.get(desired);
|
||||
if (sub2 == null) {
|
||||
return null;
|
||||
}
|
||||
return sub2.get(supported);
|
||||
}
|
||||
|
||||
|
||||
/** add table for each subitem that matches and doesn't have a table already
|
||||
*/
|
||||
void addSubtables(
|
||||
String desired, String supported,
|
||||
Predicate<DistanceTable> action) {
|
||||
DistanceTable node = getNode(desired, supported);
|
||||
if (node == null) {
|
||||
// get the distance it would have
|
||||
Output<DistanceTable> node2 = new Output<>();
|
||||
int distance = getDistance(desired, supported, node2, true);
|
||||
// now add it
|
||||
node = addSubtable(desired, supported, distance);
|
||||
if (node2.value != null) {
|
||||
DistanceTable nextTable = node2.value;
|
||||
node.copy(nextTable);
|
||||
}
|
||||
}
|
||||
action.test(node);
|
||||
}
|
||||
|
||||
void addSubtables(String desiredLang, String supportedLang,
|
||||
String desiredScript, String supportedScript,
|
||||
int percentage) {
|
||||
|
||||
// add to all the values that have the matching desiredLang and supportedLang
|
||||
@SuppressWarnings("unused")
|
||||
boolean haveKeys = false;
|
||||
for (Map.Entry<String, Map<String, DistanceTable>> e1 : subtables.entrySet()) {
|
||||
String key1 = e1.getKey();
|
||||
final boolean desiredIsKey = desiredLang.equals(key1);
|
||||
if (desiredIsKey || desiredLang.equals(ANY)) {
|
||||
for (Map.Entry<String, DistanceTable> e2 : e1.getValue().entrySet()) {
|
||||
String key2 = e2.getKey();
|
||||
final boolean supportedIsKey = supportedLang.equals(key2);
|
||||
haveKeys |= (desiredIsKey && supportedIsKey);
|
||||
if (supportedIsKey || supportedLang.equals(ANY)) {
|
||||
DistanceTable value = e2.getValue();
|
||||
value.addSubtable(desiredScript, supportedScript, percentage);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// now add the sequence explicitly
|
||||
DistanceTable dt = new DistanceTable(-1);
|
||||
dt.addSubtable(desiredScript, supportedScript, percentage);
|
||||
CopyIfEmpty r = new CopyIfEmpty(dt);
|
||||
addSubtables(desiredLang, supportedLang, r);
|
||||
}
|
||||
|
||||
void addSubtables(String desiredLang, String supportedLang,
|
||||
String desiredScript, String supportedScript,
|
||||
String desiredRegion, String supportedRegion,
|
||||
int percentage) {
|
||||
|
||||
// add to all the values that have the matching desiredLang and supportedLang
|
||||
@SuppressWarnings("unused")
|
||||
boolean haveKeys = false;
|
||||
for (Map.Entry<String, Map<String, DistanceTable>> e1 : subtables.entrySet()) {
|
||||
String key1 = e1.getKey();
|
||||
final boolean desiredIsKey = desiredLang.equals(key1);
|
||||
if (desiredIsKey || desiredLang.equals(ANY)) {
|
||||
for (Map.Entry<String, DistanceTable> e2 : e1.getValue().entrySet()) {
|
||||
String key2 = e2.getKey();
|
||||
final boolean supportedIsKey = supportedLang.equals(key2);
|
||||
haveKeys |= (desiredIsKey && supportedIsKey);
|
||||
if (supportedIsKey || supportedLang.equals(ANY)) {
|
||||
DistanceTable value = e2.getValue();
|
||||
value.addSubtables(desiredScript, supportedScript, desiredRegion, supportedRegion, percentage);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// now add the sequence explicitly
|
||||
|
||||
DistanceTable dt = new DistanceTable(-1);
|
||||
dt.addSubtable(desiredRegion, supportedRegion, percentage);
|
||||
AddSub r = new AddSub(desiredScript, supportedScript, dt);
|
||||
addSubtables(desiredLang, supportedLang, r);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder("distance: ").append(nodeDistance).append('\n');
|
||||
return toString("", sb).toString();
|
||||
}
|
||||
|
||||
private StringBuilder toString(String indent, StringBuilder buffer) {
|
||||
String indent2 = indent.isEmpty() ? "" : "\t";
|
||||
for (Map.Entry<String, Map<String, DistanceTable>> e1 : subtables.entrySet()) {
|
||||
final Map<String, DistanceTable> subsubtable = e1.getValue();
|
||||
buffer.append(indent2).append(e1.getKey());
|
||||
String indent3 = "\t";
|
||||
for (Map.Entry<String, DistanceTable> e2 : subsubtable.entrySet()) {
|
||||
DistanceTable value = e2.getValue();
|
||||
buffer.append(indent3).append(e2.getKey());
|
||||
buffer.append('\t').append(value.nodeDistance);
|
||||
value.toString(indent+"\t\t\t", buffer);
|
||||
buffer.append('\n');
|
||||
indent3 = indent+'\t';
|
||||
}
|
||||
indent2 = indent;
|
||||
}
|
||||
return buffer;
|
||||
}
|
||||
|
||||
void toTrie(TrieBuilder builder) {
|
||||
int startLength = builder.length;
|
||||
for (Map.Entry<String, Map<String, DistanceTable>> desSuppNode : subtables.entrySet()) {
|
||||
String desired = desSuppNode.getKey();
|
||||
Map<String, DistanceTable> suppNodeMap = desSuppNode.getValue();
|
||||
// Collapse ANY-ANY into one single *.
|
||||
if (desired.equals(ANY)) {
|
||||
assert suppNodeMap.size() == 1;
|
||||
DistanceTable node = suppNodeMap.get(ANY);
|
||||
builder.addStar(node.nodeDistance);
|
||||
node.toTrie(builder);
|
||||
} else {
|
||||
builder.addSubtag(desired, 0);
|
||||
int desiredLength = builder.length;
|
||||
for (Map.Entry<String, DistanceTable> suppNode : suppNodeMap.entrySet()) {
|
||||
String supported = suppNode.getKey();
|
||||
assert !supported.equals(ANY);
|
||||
DistanceTable node = suppNode.getValue();
|
||||
builder.addSubtag(supported, node.nodeDistance);
|
||||
node.toTrie(builder);
|
||||
builder.length = desiredLength;
|
||||
}
|
||||
}
|
||||
builder.length = startLength;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static final class CopyIfEmpty implements Predicate<DistanceTable> {
|
||||
private final DistanceTable toCopy;
|
||||
CopyIfEmpty(DistanceTable resetIfNotNull) {
|
||||
this.toCopy = resetIfNotNull;
|
||||
}
|
||||
@Override
|
||||
public boolean test(DistanceTable node) {
|
||||
if (node.subtables.isEmpty()) {
|
||||
node.copy(toCopy);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
private static final class AddSub implements Predicate<DistanceTable> {
|
||||
private final String desiredSub;
|
||||
private final String supportedSub;
|
||||
private final CopyIfEmpty r;
|
||||
|
||||
AddSub(String desiredSub, String supportedSub, DistanceTable distanceTableToCopy) {
|
||||
this.r = new CopyIfEmpty(distanceTableToCopy);
|
||||
this.desiredSub = desiredSub;
|
||||
this.supportedSub = supportedSub;
|
||||
}
|
||||
@Override
|
||||
public boolean test(DistanceTable node) {
|
||||
if (node == null) {
|
||||
throw new IllegalArgumentException("bad structure");
|
||||
} else {
|
||||
node.addSubtables(desiredSub, supportedSub, r);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
private static Collection<String> getIdsFromVariable(
|
||||
Multimap<String, String> variableToPartition, String variable) {
|
||||
if (variable.equals("*")) {
|
||||
return Collections.singleton("*");
|
||||
}
|
||||
Collection<String> result = variableToPartition.get(variable);
|
||||
if (result == null || result.isEmpty()) {
|
||||
throw new IllegalArgumentException("Variable not defined: " + variable);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static LocaleDistance build() {
|
||||
// From CLDR supplementalData/languageMatching/languageMatches type="written_new"/
|
||||
// and then paradigmLocales, matchVariable, and the last languageMatch items.
|
||||
ICUResourceBundle supplementalData = getSupplementalDataBundle("supplementalData");
|
||||
String[] paradigms = supplementalData.getValueWithFallback(
|
||||
"languageMatchingInfo/written/paradigmLocales").getStringArray();
|
||||
Set<LSR> paradigmLSRs = new HashSet<>(); // could be TreeSet if LSR were Comparable
|
||||
for (String paradigm : paradigms) {
|
||||
ULocale pl = new ULocale(paradigm);
|
||||
paradigmLSRs.add(XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(pl));
|
||||
}
|
||||
|
||||
TerritoryContainment tc = new TerritoryContainment(supplementalData);
|
||||
|
||||
RegionMapperBuilder rmb = new RegionMapperBuilder(tc);
|
||||
UResource.Value value = supplementalData.getValueWithFallback(
|
||||
"languageMatchingInfo/written/matchVariable");
|
||||
UResource.Table variables = value.getTable();
|
||||
UResource.Key key = new UResource.Key();
|
||||
for (int i = 0; variables.getKeyAndValue(i, key, value); ++i) {
|
||||
String variable = "$" + key.toString();
|
||||
String regions = value.getString();
|
||||
rmb.add(variable, regions);
|
||||
}
|
||||
|
||||
// Parse the rules.
|
||||
// We could almost process them while reading them from the source data,
|
||||
// but a rule may contain a region code rather than a variable.
|
||||
// We need to create a variable for each such region code
|
||||
// before rmb.build() and before processing the rules.
|
||||
Splitter bar = Splitter.on('_');
|
||||
|
||||
int prevSize = 0;
|
||||
value = supplementalData.getValueWithFallback("languageMatchingNew/written");
|
||||
UResource.Array matches = value.getArray();
|
||||
List<Rule> rules = new ArrayList<>(matches.getSize());
|
||||
for (int i = 0; matches.getValue(i, value); ++i) {
|
||||
String[] tuple = value.getStringArray();
|
||||
int distance = Integer.parseInt(tuple[2]);
|
||||
boolean oneway = tuple.length >= 4 && tuple[3].equals("1");
|
||||
List<String> desired = new ArrayList<>(bar.splitToList(tuple[0]));
|
||||
List<String> supported = new ArrayList<>(bar.splitToList(tuple[1]));
|
||||
int size = desired.size();
|
||||
if (size != supported.size()) {
|
||||
throw new IllegalArgumentException("uneven languageMatches pair");
|
||||
}
|
||||
if (size < prevSize) {
|
||||
throw new IllegalArgumentException("languageMatches out of order");
|
||||
}
|
||||
prevSize = size;
|
||||
// Implementation shortcuts assume:
|
||||
// - At any level, either both or neither rule subtags are *.
|
||||
// - If the rule language subtags are *, the other-level subtags must also be *.
|
||||
// If there are rules that do not fit these constraints,
|
||||
// then we need to revise the implementation.
|
||||
int langStars = checkStars(desired.get(0), supported.get(0), false);
|
||||
if (size >= 2) {
|
||||
checkStars(desired.get(1), supported.get(1), langStars == 2);
|
||||
}
|
||||
if (size == 3) {
|
||||
checkStars(desired.get(2), supported.get(2), langStars == 2);
|
||||
rmb.ensureRegionIsVariable(desired);
|
||||
rmb.ensureRegionIsVariable(supported);
|
||||
}
|
||||
rules.add(new Rule(desired, supported, distance, oneway));
|
||||
}
|
||||
|
||||
rmb.build();
|
||||
|
||||
/**
|
||||
* Used for processing rules. At the start we have a variable setting like $A1=US+CA+MX.
|
||||
* We generate a mapping from $A1 to a set of partitions {P1, P2}
|
||||
* When we hit a rule that contains a variable,
|
||||
* we replace that rule by multiple rules for the partitions.
|
||||
*/
|
||||
final Multimap<String, String> variableToPartition = rmb.variableToPartitions;
|
||||
|
||||
final DistanceTable defaultDistanceTable = new DistanceTable(-1);
|
||||
for (Rule rule : rules) {
|
||||
List<String> desired = rule.desired;
|
||||
List<String> supported = rule.supported;
|
||||
if (rule.desired.size() <= 2) {
|
||||
// language-only or language-script
|
||||
add(defaultDistanceTable, desired, supported, rule.distance);
|
||||
if (!rule.oneway && !desired.equals(supported)) {
|
||||
add(defaultDistanceTable, supported, desired, rule.distance);
|
||||
}
|
||||
} else {
|
||||
// language-script-region
|
||||
Collection<String> desiredRegions = getIdsFromVariable(variableToPartition, desired.get(2));
|
||||
Collection<String> supportedRegions = getIdsFromVariable(variableToPartition, supported.get(2));
|
||||
for (String desiredRegion2 : desiredRegions) {
|
||||
desired.set(2, desiredRegion2.toString()); // fix later
|
||||
for (String supportedRegion2 : supportedRegions) {
|
||||
supported.set(2, supportedRegion2.toString()); // fix later
|
||||
add(defaultDistanceTable, desired, supported, rule.distance);
|
||||
if (!rule.oneway) {
|
||||
add(defaultDistanceTable, supported, desired, rule.distance);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TrieBuilder trieBuilder = new TrieBuilder();
|
||||
defaultDistanceTable.toTrie(trieBuilder);
|
||||
BytesTrie trie = trieBuilder.build();
|
||||
return new LocaleDistance(
|
||||
trie, rmb.regionToPartitionsIndex, rmb.partitionArrays, paradigmLSRs);
|
||||
}
|
||||
|
||||
private static int checkStars(String desired, String supported, boolean allStars) {
|
||||
int stars = (desired.equals("*") ? 1 : 0) + (supported.equals("*") ? 1 : 0);
|
||||
if (stars == 1) {
|
||||
throw new IllegalArgumentException("either both or neither rule subtags must be *: " +
|
||||
desired + ", " + supported);
|
||||
}
|
||||
if (allStars && stars != 2) {
|
||||
throw new IllegalArgumentException("both language subtags are * --> " +
|
||||
"both rule subtags on all levels must be *: " +
|
||||
desired + ", " + supported);
|
||||
}
|
||||
return stars;
|
||||
}
|
||||
|
||||
private static void add(DistanceTable languageDesired2Supported,
|
||||
List<String> desired, List<String> supported, int percentage) {
|
||||
int size = desired.size();
|
||||
if (size != supported.size() || size < 1 || size > 3) {
|
||||
throw new IllegalArgumentException();
|
||||
}
|
||||
final String desiredLang = fixAny(desired.get(0));
|
||||
final String supportedLang = fixAny(supported.get(0));
|
||||
if (size == 1) {
|
||||
languageDesired2Supported.addSubtable(desiredLang, supportedLang, percentage);
|
||||
} else {
|
||||
final String desiredScript = fixAny(desired.get(1));
|
||||
final String supportedScript = fixAny(supported.get(1));
|
||||
if (size == 2) {
|
||||
languageDesired2Supported.addSubtables(desiredLang, supportedLang, desiredScript, supportedScript, percentage);
|
||||
} else {
|
||||
final String desiredRegion = fixAny(desired.get(2));
|
||||
final String supportedRegion = fixAny(supported.get(2));
|
||||
languageDesired2Supported.addSubtables(desiredLang, supportedLang, desiredScript, supportedScript, desiredRegion, supportedRegion, percentage);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static final class RegionMapperBuilder {
|
||||
private final Set<String> variables = new HashSet<>();
|
||||
final private Multimap<String, String> regionToRawPartition = TreeMultimap.create();
|
||||
final private RegionSet regionSet;
|
||||
private final TerritoryContainment tc;
|
||||
|
||||
// build() output
|
||||
Multimap<String, String> variableToPartitions;
|
||||
private byte[] regionToPartitionsIndex;
|
||||
private String[][] partitionArrays;
|
||||
|
||||
RegionMapperBuilder(TerritoryContainment tc) {
|
||||
regionSet = new RegionSet(tc);
|
||||
this.tc = tc;
|
||||
}
|
||||
|
||||
private boolean isKnownVariable(String variable) {
|
||||
return variables.contains(variable) || variable.equals("*");
|
||||
}
|
||||
|
||||
void add(String variable, String barString) {
|
||||
assert !isKnownVariable(variable);
|
||||
assert variable.startsWith("$");
|
||||
assert !variable.startsWith("$!");
|
||||
variables.add(variable);
|
||||
Set<String> tempRegions = regionSet.parseSet(barString);
|
||||
|
||||
for (String region : tempRegions) {
|
||||
regionToRawPartition.put(region, variable);
|
||||
}
|
||||
|
||||
// now add the inverse variable
|
||||
|
||||
Set<String> inverse = regionSet.inverse();
|
||||
String inverseVariable = "$!" + variable.substring(1);
|
||||
assert !isKnownVariable(inverseVariable);
|
||||
variables.add(inverseVariable);
|
||||
for (String region : inverse) {
|
||||
regionToRawPartition.put(region, inverseVariable);
|
||||
}
|
||||
}
|
||||
|
||||
void ensureRegionIsVariable(List<String> lsrList) {
|
||||
String region = lsrList.get(2);
|
||||
if (!isKnownVariable(region)) {
|
||||
assert LSR.indexForRegion(region) >= 0; // well-formed region subtag
|
||||
String variable = "$" + region;
|
||||
add(variable, region);
|
||||
lsrList.set(2, variable);
|
||||
}
|
||||
}
|
||||
|
||||
void build() {
|
||||
// Partitions as sets of variables.
|
||||
// LinkedHashMap to store & number unique sets.
|
||||
// Example: {"$!cnsar", "$!enUS", "$!maghreb", "$americas"}
|
||||
Map<Collection<String>, Integer> partitionVariables = new LinkedHashMap<>();
|
||||
// Partitions as sets of lookup ID strings.
|
||||
// Example: {"1", "5"}
|
||||
Map<Collection<String>, Integer> partitionStrings = new LinkedHashMap<>();
|
||||
// pIndex 0: default value in regionToPartitionsIndex
|
||||
Collection<String> noPartitions = Collections.singleton("");
|
||||
makeUniqueIndex(partitionStrings, noPartitions);
|
||||
|
||||
// Example: "$americas" -> {"1", "5"}
|
||||
variableToPartitions = TreeMultimap.create();
|
||||
// Maps the index of each region code to a pIndex into partitionStrings.
|
||||
regionToPartitionsIndex = new byte[LSR.REGION_INDEX_LIMIT];
|
||||
// Maps a partition string to the set of region codes in that partition.
|
||||
// Example: "5" -> {"PR", "US", "VI"}
|
||||
Multimap<String, String> partitionToRegions = TreeMultimap.create();
|
||||
|
||||
for (Map.Entry<String, Set<String>> e : regionToRawPartition.asMap().entrySet()) {
|
||||
final String region = e.getKey();
|
||||
final Collection<String> rawPartition = e.getValue();
|
||||
// Single-character string.
|
||||
// Must be an ASCII character and must not be '*'.
|
||||
// Used to start with α.
|
||||
char partitionChar = (char) ('0' + makeUniqueIndex(partitionVariables, rawPartition));
|
||||
assert partitionChar <= 0x7f;
|
||||
String partition = String.valueOf(partitionChar);
|
||||
int pIndex = makeUniqueIndex(partitionStrings, Collections.singleton(partition));
|
||||
// The pIndex must fit into a byte.
|
||||
// For Java code simplicity, we want it to also be non-negative.
|
||||
assert pIndex <= 0x7f;
|
||||
|
||||
regionToPartitionsIndex[LSR.indexForRegion(region)] = (byte) pIndex;
|
||||
partitionToRegions.put(partition, region);
|
||||
|
||||
for (String variable : rawPartition) {
|
||||
variableToPartitions.put(variable, partition);
|
||||
}
|
||||
}
|
||||
|
||||
// We get a mapping of each macro to the partitions it intersects with.
|
||||
// Example: "419" -> {"1", "5"}
|
||||
Multimap<String,String> macroToPartitions = TreeMultimap.create();
|
||||
for (Map.Entry<String, Set<String>> e : tc.resolved.asMap().entrySet()) {
|
||||
String macro = e.getKey();
|
||||
for (Map.Entry<String, Set<String>> e2 : partitionToRegions.asMap().entrySet()) {
|
||||
String partition = e2.getKey();
|
||||
if (!Collections.disjoint(e.getValue(), e2.getValue())) {
|
||||
macroToPartitions.put(macro, partition);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Create a combined mapping from a region code, which can be a macro region,
|
||||
// via the getRegionIndex() of that region code,
|
||||
// to a set of single-character partition strings.
|
||||
for (Map.Entry<String, Set<String>> m2p : macroToPartitions.asMap().entrySet()) {
|
||||
String macro = m2p.getKey();
|
||||
int regionIndex = LSR.indexForRegion(macro);
|
||||
if (regionToPartitionsIndex[regionIndex] == 0) {
|
||||
Set<String> partitions = m2p.getValue();
|
||||
int pIndex = makeUniqueIndex(partitionStrings, partitions);
|
||||
regionToPartitionsIndex[regionIndex] = (byte) pIndex;
|
||||
}
|
||||
}
|
||||
|
||||
// Turn the Collection of Collections into an array of arrays.
|
||||
Collection<Collection<String>> list = partitionStrings.keySet();
|
||||
partitionArrays = new String[list.size()][];
|
||||
int i = 0;
|
||||
for (Collection<String> partitions : list) {
|
||||
partitionArrays[i++] = partitions.toArray(new String[partitions.size()]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a string of regions like "US+005-BR" and produces a set of resolved regions.
|
||||
* All macroregions are fully resolved to sets of non-macro regions.
|
||||
* <br>Syntax is simple for now:
|
||||
* <pre>regionSet := region ([-+] region)*</pre>
|
||||
* No precedence, so "x+y-y+z" is (((x+y)-y)+z) NOT (x+y)-(y+z)
|
||||
*/
|
||||
private static final class RegionSet {
|
||||
private enum Operation {add, remove}
|
||||
private final TerritoryContainment tc;
|
||||
// temporaries used in processing
|
||||
final private Set<String> tempRegions = new TreeSet<>();
|
||||
private Operation operation = null;
|
||||
|
||||
RegionSet(TerritoryContainment tc) {
|
||||
this.tc = tc;
|
||||
}
|
||||
|
||||
private Set<String> parseSet(String barString) {
|
||||
operation = Operation.add;
|
||||
int last = 0;
|
||||
tempRegions.clear();
|
||||
int i = 0;
|
||||
for (; i < barString.length(); ++i) {
|
||||
char c = barString.charAt(i); // UTF16 is ok, since syntax is only ascii
|
||||
switch(c) {
|
||||
case '+':
|
||||
add(barString, last, i);
|
||||
last = i+1;
|
||||
operation = Operation.add;
|
||||
break;
|
||||
case '-':
|
||||
add(barString, last, i);
|
||||
last = i+1;
|
||||
operation = Operation.remove;
|
||||
break;
|
||||
}
|
||||
}
|
||||
add(barString, last, i);
|
||||
return tempRegions;
|
||||
}
|
||||
|
||||
private Set<String> inverse() {
|
||||
TreeSet<String> result = new TreeSet<>(tc.leaves);
|
||||
result.removeAll(tempRegions);
|
||||
return result;
|
||||
}
|
||||
|
||||
private void add(String barString, int last, int i) {
|
||||
if (i > last) {
|
||||
String region = barString.substring(last,i);
|
||||
changeSet(operation, region);
|
||||
}
|
||||
}
|
||||
|
||||
private void changeSet(Operation operation, String region) {
|
||||
Collection<String> contained = tc.toLeavesOnly.get(region);
|
||||
if (contained != null && !contained.isEmpty()) {
|
||||
if (Operation.add == operation) {
|
||||
tempRegions.addAll(contained);
|
||||
} else {
|
||||
tempRegions.removeAll(contained);
|
||||
}
|
||||
} else if (Operation.add == operation) {
|
||||
tempRegions.add(region);
|
||||
} else {
|
||||
tempRegions.remove(region);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -2,470 +2,256 @@
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.impl.locale;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Enumeration;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import com.ibm.icu.impl.ICUData;
|
||||
import com.ibm.icu.impl.ICUResourceBundle;
|
||||
import com.ibm.icu.impl.locale.XCldrStub.HashMultimap;
|
||||
import com.ibm.icu.impl.locale.XCldrStub.Multimap;
|
||||
import com.ibm.icu.impl.locale.XCldrStub.Multimaps;
|
||||
import com.ibm.icu.util.ICUException;
|
||||
import com.ibm.icu.util.BytesTrie;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
import com.ibm.icu.util.ULocale.Minimize;
|
||||
import com.ibm.icu.util.UResourceBundle;
|
||||
|
||||
public class XLikelySubtags {
|
||||
public final class XLikelySubtags {
|
||||
private static final String PSEUDO_ACCENTS_PREFIX = "'"; // -XA, -PSACCENT
|
||||
private static final String PSEUDO_BIDI_PREFIX = "+"; // -XB, -PSBIDI
|
||||
private static final String PSEUDO_CRACKED_PREFIX = ","; // -XC, -PSCRACK
|
||||
|
||||
private static final XLikelySubtags DEFAULT = new XLikelySubtags();
|
||||
private static final boolean DEBUG_OUTPUT = false;
|
||||
|
||||
public static final XLikelySubtags getDefault() {
|
||||
return DEFAULT;
|
||||
}
|
||||
// TODO: Load prebuilt data from a resource bundle
|
||||
// to avoid the dependency on the builder code.
|
||||
static final XLikelySubtags INSTANCE = new XLikelySubtags(LikelySubtagsBuilder.build());
|
||||
|
||||
private static <K, V, T> Map<V, T> getSubtable(Map<K, Map<V, T>> table, final K language) {
|
||||
Map<V, T> subTable = table.get(language);
|
||||
if (subTable == null) {
|
||||
table.put(language, subTable = new TreeMap<>());
|
||||
}
|
||||
return subTable;
|
||||
}
|
||||
static final class Data {
|
||||
private final Map<String, String> languageAliases;
|
||||
private final Map<String, String> regionAliases;
|
||||
private final BytesTrie trie;
|
||||
private final LSR[] lsrs;
|
||||
|
||||
public static class Aliases {
|
||||
final Map<String, String> toCanonical;
|
||||
final Multimap<String, String> toAliases;
|
||||
public String getCanonical(String alias) {
|
||||
String canonical = toCanonical.get(alias);
|
||||
return canonical == null ? alias : canonical;
|
||||
}
|
||||
public Set<String> getAliases(String canonical) {
|
||||
Set<String> aliases = toAliases.get(canonical);
|
||||
return aliases == null ? Collections.singleton(canonical) : aliases;
|
||||
}
|
||||
public Aliases(String key) {
|
||||
UResourceBundle metadata = UResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME,"metadata",ICUResourceBundle.ICU_DATA_CLASS_LOADER);
|
||||
UResourceBundle metadataAlias = metadata.get("alias");
|
||||
UResourceBundle territoryAlias = metadataAlias.get(key);
|
||||
Map<String, String> toCanonical1 = new HashMap<>();
|
||||
for ( int i = 0 ; i < territoryAlias.getSize(); i++ ) {
|
||||
UResourceBundle res = territoryAlias.get(i);
|
||||
String aliasFrom = res.getKey();
|
||||
if (aliasFrom.contains("_")) {
|
||||
continue; // only simple aliasing
|
||||
}
|
||||
String aliasReason = res.get("reason").getString();
|
||||
if (aliasReason.equals("overlong")) {
|
||||
continue;
|
||||
}
|
||||
String aliasTo = res.get("replacement").getString();
|
||||
int spacePos = aliasTo.indexOf(' ');
|
||||
String aliasFirst = spacePos < 0 ? aliasTo : aliasTo.substring(0, spacePos);
|
||||
if (aliasFirst.contains("_")) {
|
||||
continue; // only simple aliasing
|
||||
}
|
||||
toCanonical1.put(aliasFrom, aliasFirst);
|
||||
}
|
||||
if (key.equals("language")) {
|
||||
toCanonical1.put("mo", "ro"); // special case
|
||||
}
|
||||
toCanonical = Collections.unmodifiableMap(toCanonical1);
|
||||
toAliases = Multimaps.invertFrom(toCanonical1, HashMultimap.<String,String>create());
|
||||
Data(Map<String, String> languageAliases, Map<String, String> regionAliases,
|
||||
BytesTrie trie, LSR[] lsrs) {
|
||||
this.languageAliases = languageAliases;
|
||||
this.regionAliases = regionAliases;
|
||||
this.trie = trie;
|
||||
this.lsrs = lsrs;
|
||||
}
|
||||
}
|
||||
|
||||
public static class LSR {
|
||||
public final String language;
|
||||
public final String script;
|
||||
public final String region;
|
||||
private final Map<String, String> languageAliases;
|
||||
private final Map<String, String> regionAliases;
|
||||
|
||||
public static Aliases LANGUAGE_ALIASES = new Aliases("language");
|
||||
public static Aliases REGION_ALIASES = new Aliases("territory");
|
||||
// The trie maps each lang+script+region (encoded in ASCII) to an index into lsrs.
|
||||
// There is also a trie value for each intermediate lang and lang+script.
|
||||
// '*' is used instead of "und", "Zzzz"/"" and "ZZ"/"".
|
||||
private final BytesTrie trie;
|
||||
private final long trieUndState;
|
||||
private final long trieUndZzzzState;
|
||||
private final int defaultLsrIndex;
|
||||
private final LSR[] lsrs;
|
||||
|
||||
public static LSR from(String language, String script, String region) {
|
||||
return new LSR(language, script, region);
|
||||
}
|
||||
private XLikelySubtags(XLikelySubtags.Data data) {
|
||||
languageAliases = data.languageAliases;
|
||||
regionAliases = data.regionAliases;
|
||||
trie = data.trie;
|
||||
lsrs = data.lsrs;
|
||||
|
||||
// from http://unicode.org/reports/tr35/#Unicode_language_identifier
|
||||
// but simplified to requiring language subtag, and nothing beyond region
|
||||
// #1 is language
|
||||
// #2 is script
|
||||
// #3 is region
|
||||
// static final String pat =
|
||||
// "language_id = (unicode_language_subtag)"
|
||||
// + "(?:sep(unicode_script_subtag))?"
|
||||
// + "(?:sep(unicode_region_subtag))?;\n"
|
||||
// + "unicode_language_subtag = alpha{2,3}|alpha{5,8};\n"
|
||||
// + "unicode_script_subtag = alpha{4};\n"
|
||||
// + "unicode_region_subtag = alpha{2}|digit{3};\n"
|
||||
// + "sep = [-_];\n"
|
||||
// + "digit = [0-9];\n"
|
||||
// + "alpha = [A-Za-z];\n"
|
||||
// ;
|
||||
// static {
|
||||
// System.out.println(pat);
|
||||
// System.out.println(new UnicodeRegex().compileBnf(pat));
|
||||
// }
|
||||
// static final Pattern LANGUAGE_PATTERN = Pattern.compile(
|
||||
// "([a-zA-Z0-9]+)" // (?:[-_]([a-zA-Z0-9]+))?(?:[-_]([a-zA-Z0-9]+))?"
|
||||
// //new UnicodeRegex().compileBnf(pat)
|
||||
// );
|
||||
//
|
||||
// NOTE: Should we fix this to check for format?
|
||||
// ANSWER: Not required, since this is only called internally. Moreover, we deliberately
|
||||
// use invalid language tags ("x1", "x2", etc.) to represent pseudo-locales. See below.
|
||||
static LSR from(String languageIdentifier) {
|
||||
String[] parts = languageIdentifier.split("[-_]");
|
||||
if (parts.length < 1 || parts.length > 3) {
|
||||
throw new ICUException("too many subtags");
|
||||
// Cache the result of looking up language="und" encoded as "*", and "und-Zzzz" ("**").
|
||||
BytesTrie.Result result = trie.next('*');
|
||||
assert result == BytesTrie.Result.INTERMEDIATE_VALUE;
|
||||
int value = trie.getValue();
|
||||
assert value == 0;
|
||||
trieUndState = trie.getState64();
|
||||
result = trie.next('*');
|
||||
assert result == BytesTrie.Result.INTERMEDIATE_VALUE;
|
||||
value = trie.getValue();
|
||||
assert value == 0;
|
||||
trieUndZzzzState = trie.getState64();
|
||||
result = trie.next('*');
|
||||
assert result.hasValue();
|
||||
defaultLsrIndex = trie.getValue();
|
||||
trie.reset();
|
||||
|
||||
if (DEBUG_OUTPUT) {
|
||||
System.out.println("*** likely subtags");
|
||||
for (Map.Entry<String, LSR> mapping : getTable().entrySet()) {
|
||||
System.out.println(mapping);
|
||||
}
|
||||
String lang = parts[0].toLowerCase();
|
||||
String p2 = parts.length < 2 ? "" : parts[1];
|
||||
String p3 = parts.length < 3 ? "" : parts[2];
|
||||
return p2.length() < 4 ? new LSR(lang, "", p2) : new LSR(lang, p2, p3);
|
||||
|
||||
// Matcher matcher = LANGUAGE_PATTERN.matcher(languageIdentifier);
|
||||
// if (!matcher.matches()) {
|
||||
// return new LSR(matcher.group(1), matcher.group(2), matcher.group(3));
|
||||
// }
|
||||
// System.out.println(RegexUtilities.showMismatch(matcher, languageIdentifier));
|
||||
// throw new ICUException("invalid language id");
|
||||
}
|
||||
|
||||
private static final HashMap<ULocale, LSR> pseudoReplacements = new HashMap<>(11);
|
||||
|
||||
// Note code in XLocaledistance.java handle pseudo-regions XA, XB, and XC, making them
|
||||
// very distant from any other locale. Similarly, it establishes that any of the
|
||||
// invalid locales below ("x1", "x2", ..., "x7", and "x8-en") are very distant
|
||||
// from any other locale.
|
||||
static {
|
||||
String[][] source = {
|
||||
{"x-bork", "x1", "", ""},
|
||||
{"x-elmer", "x2", "", ""},
|
||||
{"x-hacker", "x3", "", ""},
|
||||
{"x-piglatin", "x4", "", ""},
|
||||
{"x-pirate", "x5", "", ""},
|
||||
{"en-XA", "x6", "", ""},
|
||||
{"en-PSACCENT", "x6", "", ""}, // Note: same as for ex-XA
|
||||
{"ar-XB", "x7", "", ""},
|
||||
{"ar-PSBIDI", "x7", "", ""}, // Note: same as for ar-XB
|
||||
{"en-XC", "x8", "en", ""}, // Note: language is stored in LSR.script field
|
||||
{"en-PSCRACK", "x8", "en", ""}, // Note: same as for en-XC
|
||||
};
|
||||
for (int i = 0; i < source.length; ++i) {
|
||||
pseudoReplacements.put(new ULocale(source[i][0]),
|
||||
new LSR(source[i][1], source[i][2], source[i][3]));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static LSR from(ULocale locale) {
|
||||
LSR replacement = pseudoReplacements.get(locale);
|
||||
if (replacement != null) {
|
||||
return replacement;
|
||||
}
|
||||
// Map *-*-*-PSCRACK to x8-***, same as for en-PSCRACK.
|
||||
if ("PSCRACK".equals(locale.getVariant())) {
|
||||
return new LSR(
|
||||
"x8", locale.getLanguage() + locale.getScript() + locale.getCountry(), "");
|
||||
}
|
||||
return new LSR(locale.getLanguage(), locale.getScript(), locale.getCountry());
|
||||
}
|
||||
|
||||
public static LSR fromMaximalized(ULocale locale) {
|
||||
LSR replacement = pseudoReplacements.get(locale);
|
||||
if (replacement != null) {
|
||||
return replacement;
|
||||
}
|
||||
// Map *-*-*-PSCRACK to x8-***, same as for en-PSCRACK.
|
||||
if ("PSCRACK".equals(locale.getVariant())) {
|
||||
return new LSR(
|
||||
"x8", locale.getLanguage() + locale.getScript() + locale.getCountry(), "");
|
||||
}
|
||||
return fromMaximalized(locale.getLanguage(), locale.getScript(), locale.getCountry());
|
||||
}
|
||||
|
||||
public static LSR fromMaximalized(String language, String script, String region) {
|
||||
String canonicalLanguage = LANGUAGE_ALIASES.getCanonical(language);
|
||||
// script is ok
|
||||
String canonicalRegion = REGION_ALIASES.getCanonical(region);
|
||||
|
||||
return DEFAULT.maximize(canonicalLanguage, script, canonicalRegion);
|
||||
}
|
||||
|
||||
public LSR(String language, String script, String region) {
|
||||
this.language = language;
|
||||
this.script = script;
|
||||
this.region = region;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder result = new StringBuilder(language);
|
||||
if (!script.isEmpty()) {
|
||||
result.append('-').append(script);
|
||||
}
|
||||
if (!region.isEmpty()) {
|
||||
result.append('-').append(region);
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
public LSR replace(String language2, String script2, String region2) {
|
||||
if (language2 == null && script2 == null && region2 == null) return this;
|
||||
return new LSR(
|
||||
language2 == null ? language: language2,
|
||||
script2 == null ? script : script2,
|
||||
region2 == null ? region : region2);
|
||||
}
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
LSR other;
|
||||
return this == obj ||
|
||||
(obj != null
|
||||
&& obj.getClass() == this.getClass()
|
||||
&& language.equals((other = (LSR) obj).language)
|
||||
&& script.equals(other.script)
|
||||
&& region.equals(other.region));
|
||||
}
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(language, script, region);
|
||||
}
|
||||
}
|
||||
|
||||
final Map<String, Map<String, Map<String, LSR>>> langTable;
|
||||
|
||||
public XLikelySubtags() {
|
||||
this(getDefaultRawData());
|
||||
private static String getCanonical(Map<String, String> aliases, String alias) {
|
||||
String canonical = aliases.get(alias);
|
||||
return canonical == null ? alias : canonical;
|
||||
}
|
||||
|
||||
private static Map<String, String> getDefaultRawData() {
|
||||
Map<String, String> rawData = new TreeMap<>();
|
||||
UResourceBundle bundle = UResourceBundle.getBundleInstance( ICUData.ICU_BASE_NAME, "likelySubtags");
|
||||
for (Enumeration<String> enumer = bundle.getKeys(); enumer.hasMoreElements();) {
|
||||
String key = enumer.nextElement();
|
||||
rawData.put(key, bundle.getString(key));
|
||||
LSR makeMaximizedLsrFrom(ULocale locale) {
|
||||
String name = locale.getName();
|
||||
if (name.startsWith("@x=")) {
|
||||
// Private use language tag x-subtag-subtag...
|
||||
return new LSR(name, "", "");
|
||||
}
|
||||
return rawData;
|
||||
}
|
||||
|
||||
public XLikelySubtags(Map<String, String> rawData) {
|
||||
this.langTable = init(rawData);
|
||||
}
|
||||
|
||||
private Map<String, Map<String, Map<String, LSR>>> init(final Map<String, String> rawData) {
|
||||
// prepare alias info. We want a mapping from the canonical form to all aliases
|
||||
|
||||
//Multimap<String,String> canonicalToAliasLanguage = HashMultimap.create();
|
||||
// getAliasInfo(LANGUAGE_ALIASES, canonicalToAliasLanguage);
|
||||
|
||||
// Don't bother with script; there are none
|
||||
|
||||
//Multimap<String,String> canonicalToAliasRegion = HashMultimap.create();
|
||||
// getAliasInfo(REGION_ALIASES, canonicalToAliasRegion);
|
||||
|
||||
Map<String, Map<String, Map<String, LSR>>> result = new TreeMap<>();
|
||||
// Splitter bar = Splitter.on('_');
|
||||
// int last = -1;
|
||||
// set the base data
|
||||
Map<LSR,LSR> internCache = new HashMap<>();
|
||||
for (Entry<String, String> sourceTarget : rawData.entrySet()) {
|
||||
LSR ltp = LSR.from(sourceTarget.getKey());
|
||||
final String language = ltp.language;
|
||||
final String script = ltp.script;
|
||||
final String region = ltp.region;
|
||||
|
||||
ltp = LSR.from(sourceTarget.getValue());
|
||||
String languageTarget = ltp.language;
|
||||
final String scriptTarget = ltp.script;
|
||||
final String regionTarget = ltp.region;
|
||||
|
||||
set(result, language, script, region, languageTarget, scriptTarget, regionTarget, internCache);
|
||||
// now add aliases
|
||||
Collection<String> languageAliases = LSR.LANGUAGE_ALIASES.getAliases(language);
|
||||
// if (languageAliases.isEmpty()) {
|
||||
// languageAliases = Collections.singleton(language);
|
||||
// }
|
||||
Collection<String> regionAliases = LSR.REGION_ALIASES.getAliases(region);
|
||||
// if (regionAliases.isEmpty()) {
|
||||
// regionAliases = Collections.singleton(region);
|
||||
// }
|
||||
for (String languageAlias : languageAliases) {
|
||||
for (String regionAlias : regionAliases) {
|
||||
if (languageAlias.equals(language) && regionAlias.equals(region)) {
|
||||
continue;
|
||||
}
|
||||
set(result, languageAlias, script, regionAlias, languageTarget, scriptTarget, regionTarget, internCache);
|
||||
}
|
||||
// Handle pseudolocales like en-XA, ar-XB, fr-PSCRACK.
|
||||
// They should match only themselves,
|
||||
// not other locales with what looks like the same language and script subtags.
|
||||
String language = locale.getLanguage();
|
||||
String script = locale.getScript();
|
||||
String region = locale.getCountry();
|
||||
if (region.length() == 2 && region.charAt(0) == 'X') {
|
||||
switch (region.charAt(1)) {
|
||||
case 'A':
|
||||
return new LSR(PSEUDO_ACCENTS_PREFIX + language,
|
||||
PSEUDO_ACCENTS_PREFIX + script, region);
|
||||
case 'B':
|
||||
return new LSR(PSEUDO_BIDI_PREFIX + language,
|
||||
PSEUDO_BIDI_PREFIX + script, region);
|
||||
case 'C':
|
||||
return new LSR(PSEUDO_CRACKED_PREFIX + language,
|
||||
PSEUDO_CRACKED_PREFIX + script, region);
|
||||
default: // normal locale
|
||||
break;
|
||||
}
|
||||
}
|
||||
// hack
|
||||
set(result, "und", "Latn", "", "en", "Latn", "US", internCache);
|
||||
|
||||
// hack, ensure that if und-YY => und-Xxxx-YY, then we add Xxxx=>YY to the table
|
||||
// <likelySubtag from="und_GH" to="ak_Latn_GH"/>
|
||||
|
||||
// so und-Latn-GH => ak-Latn-GH
|
||||
Map<String, Map<String, LSR>> undScriptMap = result.get("und");
|
||||
Map<String, LSR> undEmptyRegionMap = undScriptMap.get("");
|
||||
for (Entry<String, LSR> regionEntry : undEmptyRegionMap.entrySet()) {
|
||||
final LSR value = regionEntry.getValue();
|
||||
set(result, "und", value.script, value.region, value);
|
||||
}
|
||||
//
|
||||
// check that every level has "" (or "und")
|
||||
if (!result.containsKey("und")) {
|
||||
throw new IllegalArgumentException("failure: base");
|
||||
}
|
||||
for (Entry<String, Map<String, Map<String, LSR>>> langEntry : result.entrySet()) {
|
||||
String lang = langEntry.getKey();
|
||||
final Map<String, Map<String, LSR>> scriptMap = langEntry.getValue();
|
||||
if (!scriptMap.containsKey("")) {
|
||||
throw new IllegalArgumentException("failure: " + lang);
|
||||
}
|
||||
for (Entry<String, Map<String, LSR>> scriptEntry : scriptMap.entrySet()) {
|
||||
String script = scriptEntry.getKey();
|
||||
final Map<String, LSR> regionMap = scriptEntry.getValue();
|
||||
if (!regionMap.containsKey("")) {
|
||||
throw new IllegalArgumentException("failure: " + lang + "-" + script);
|
||||
}
|
||||
// for (Entry<String, LSR> regionEntry : regionMap.entrySet()) {
|
||||
// String region = regionEntry.getKey();
|
||||
// LSR value = regionEntry.getValue();
|
||||
// }
|
||||
String variant = locale.getVariant();
|
||||
if (variant.startsWith("PS")) {
|
||||
switch (variant) {
|
||||
case "PSACCENT":
|
||||
return new LSR(PSEUDO_ACCENTS_PREFIX + language,
|
||||
PSEUDO_ACCENTS_PREFIX + script, region.isEmpty() ? "XA" : region);
|
||||
case "PSBIDI":
|
||||
return new LSR(PSEUDO_BIDI_PREFIX + language,
|
||||
PSEUDO_BIDI_PREFIX + script, region.isEmpty() ? "XB" : region);
|
||||
case "PSCRACK":
|
||||
return new LSR(PSEUDO_CRACKED_PREFIX + language,
|
||||
PSEUDO_CRACKED_PREFIX + script, region.isEmpty() ? "XC" : region);
|
||||
default: // normal locale
|
||||
break;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
|
||||
language = getCanonical(languageAliases, language);
|
||||
// script is ok
|
||||
region = getCanonical(regionAliases, region);
|
||||
return INSTANCE.maximize(language, script, region);
|
||||
}
|
||||
|
||||
// private void getAliasInfo(Map<String, R2<List<String>, String>> aliasInfo, Multimap<String, String> canonicalToAlias) {
|
||||
// for (Entry<String, R2<List<String>, String>> e : aliasInfo.entrySet()) {
|
||||
// final String alias = e.getKey();
|
||||
// if (alias.contains("_")) {
|
||||
// continue; // only do simple aliasing
|
||||
// }
|
||||
// String canonical = getCanonical(e.getValue());
|
||||
// canonicalToAlias.put(canonical, alias);
|
||||
// }
|
||||
// }
|
||||
|
||||
// private static String getCanonical(R2<List<String>, String> aliasAndReason) {
|
||||
// if (aliasAndReason == null) {
|
||||
// return null;
|
||||
// }
|
||||
// if (aliasAndReason.get1().equals("overlong")) {
|
||||
// return null;
|
||||
// }
|
||||
// List<String> value = aliasAndReason.get0();
|
||||
// if (value.size() != 1) {
|
||||
// return null;
|
||||
// }
|
||||
// final String canonical = value.iterator().next();
|
||||
// if (canonical.contains("_")) {
|
||||
// return null; // only do simple aliasing
|
||||
// }
|
||||
// return canonical;
|
||||
// }
|
||||
|
||||
private void set(Map<String, Map<String, Map<String, LSR>>> langTable, final String language, final String script, final String region,
|
||||
final String languageTarget, final String scriptTarget, final String regionTarget, Map<LSR, LSR> internCache) {
|
||||
LSR newValue = new LSR(languageTarget, scriptTarget, regionTarget);
|
||||
LSR oldValue = internCache.get(newValue);
|
||||
if (oldValue == null) {
|
||||
internCache.put(newValue, newValue);
|
||||
oldValue = newValue;
|
||||
}
|
||||
set(langTable, language, script, region, oldValue);
|
||||
}
|
||||
|
||||
private void set(Map<String, Map<String, Map<String, LSR>>> langTable, final String language, final String script, final String region, LSR newValue) {
|
||||
Map<String, Map<String, LSR>> scriptTable = getSubtable(langTable, language);
|
||||
Map<String, LSR> regionTable = getSubtable(scriptTable, script);
|
||||
// LSR oldValue = regionTable.get(region);
|
||||
// if (oldValue != null) {
|
||||
// int debug = 0;
|
||||
// }
|
||||
regionTable.put(region, newValue);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience methods
|
||||
*/
|
||||
public LSR maximize(String source) {
|
||||
return maximize(ULocale.forLanguageTag(source));
|
||||
}
|
||||
|
||||
public LSR maximize(ULocale source) {
|
||||
return maximize(source.getLanguage(), source.getScript(), source.getCountry());
|
||||
}
|
||||
|
||||
public LSR maximize(LSR source) {
|
||||
return maximize(source.language, source.script, source.region);
|
||||
}
|
||||
|
||||
// public static ULocale addLikelySubtags(ULocale loc) {
|
||||
//
|
||||
// }
|
||||
|
||||
/**
|
||||
* Raw access to addLikelySubtags. Input must be in canonical format, eg "en", not "eng" or "EN".
|
||||
*/
|
||||
public LSR maximize(String language, String script, String region) {
|
||||
private LSR maximize(String language, String script, String region) {
|
||||
int retainOldMask = 0;
|
||||
Map<String, Map<String, LSR>> scriptTable = langTable.get(language);
|
||||
if (scriptTable == null) { // cannot happen if language == "und"
|
||||
retainOldMask |= 4;
|
||||
scriptTable = langTable.get("und");
|
||||
} else if (!language.equals("und")) {
|
||||
retainOldMask |= 4;
|
||||
BytesTrie iter = new BytesTrie(trie);
|
||||
// language lookup
|
||||
if (language.equals("und")) {
|
||||
language = "";
|
||||
}
|
||||
|
||||
long state;
|
||||
int value = trieNext(iter, language, false);
|
||||
if (value >= 0) {
|
||||
if (!language.isEmpty()) {
|
||||
retainOldMask |= 4;
|
||||
}
|
||||
state = iter.getState64();
|
||||
} else {
|
||||
retainOldMask |= 4;
|
||||
iter.resetToState64(trieUndState); // "und" ("*")
|
||||
state = 0;
|
||||
}
|
||||
// script lookup
|
||||
if (script.equals("Zzzz")) {
|
||||
script = "";
|
||||
}
|
||||
Map<String, LSR> regionTable = scriptTable.get(script);
|
||||
if (regionTable == null) { // cannot happen if script == ""
|
||||
retainOldMask |= 2;
|
||||
regionTable = scriptTable.get("");
|
||||
} else if (!script.isEmpty()) {
|
||||
value = trieNext(iter, script, false);
|
||||
if (value >= 0) {
|
||||
if (!script.isEmpty()) {
|
||||
retainOldMask |= 2;
|
||||
}
|
||||
state = iter.getState64();
|
||||
} else {
|
||||
retainOldMask |= 2;
|
||||
if (state == 0) {
|
||||
iter.resetToState64(trieUndZzzzState); // "und-Zzzz" ("**")
|
||||
} else {
|
||||
iter.resetToState64(state);
|
||||
value = trieNext(iter, "", false);
|
||||
assert value == 0;
|
||||
state = iter.getState64();
|
||||
}
|
||||
}
|
||||
|
||||
// region lookup
|
||||
if (region.equals("ZZ")) {
|
||||
region = "";
|
||||
}
|
||||
LSR result = regionTable.get(region);
|
||||
if (result == null) { // cannot happen if region == ""
|
||||
retainOldMask |= 1;
|
||||
result = regionTable.get("");
|
||||
if (result == null) {
|
||||
return null;
|
||||
value = trieNext(iter, region, true);
|
||||
if (value >= 0) {
|
||||
if (!region.isEmpty()) {
|
||||
retainOldMask |= 1;
|
||||
}
|
||||
} else if (!region.isEmpty()) {
|
||||
} else {
|
||||
retainOldMask |= 1;
|
||||
if (state == 0) {
|
||||
value = defaultLsrIndex;
|
||||
} else {
|
||||
iter.resetToState64(state);
|
||||
value = trieNext(iter, "", true);
|
||||
if (value < 0) { // TODO: should never happen?! just assert value >= 0?
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
LSR result = lsrs[value];
|
||||
|
||||
if (language.isEmpty()) {
|
||||
language = "und";
|
||||
}
|
||||
|
||||
switch (retainOldMask) {
|
||||
default:
|
||||
case 0: return result;
|
||||
case 1: return result.replace(null, null, region);
|
||||
case 2: return result.replace(null, script, null);
|
||||
case 3: return result.replace(null, script, region);
|
||||
case 4: return result.replace(language, null, null);
|
||||
case 5: return result.replace(language, null, region);
|
||||
case 6: return result.replace(language, script, null);
|
||||
case 7: return result.replace(language, script, region);
|
||||
if (retainOldMask == 0) {
|
||||
return result;
|
||||
}
|
||||
if ((retainOldMask & 4) == 0) {
|
||||
language = result.language;
|
||||
}
|
||||
if ((retainOldMask & 2) == 0) {
|
||||
script = result.script;
|
||||
}
|
||||
if ((retainOldMask & 1) == 0) {
|
||||
region = result.region;
|
||||
}
|
||||
return new LSR(language, script, region);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
private LSR minimizeSubtags(String languageIn, String scriptIn, String regionIn, Minimize fieldToFavor) {
|
||||
private static final int trieNext(BytesTrie iter, String s, boolean finalSubtag) {
|
||||
BytesTrie.Result result;
|
||||
if (s.isEmpty()) {
|
||||
result = iter.next('*');
|
||||
} else {
|
||||
int end = s.length() - 1;
|
||||
for (int i = 0;; ++i) {
|
||||
result = iter.next(s.charAt(i));
|
||||
if (i < end) {
|
||||
if (!result.hasNext()) {
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
// last character of this subtag
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!finalSubtag) {
|
||||
if (result == BytesTrie.Result.INTERMEDIATE_VALUE) {
|
||||
return 0; // value should be 0, don't care
|
||||
}
|
||||
} else {
|
||||
if (result.hasValue()) {
|
||||
return iter.getValue();
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
LSR minimizeSubtags(String languageIn, String scriptIn, String regionIn,
|
||||
ULocale.Minimize fieldToFavor) {
|
||||
LSR result = maximize(languageIn, scriptIn, regionIn);
|
||||
|
||||
// We could try just a series of checks, like:
|
||||
@ -475,16 +261,20 @@ public class XLikelySubtags {
|
||||
// (languageIn, "", "")
|
||||
// (languageIn, "", regionIn)
|
||||
|
||||
Map<String, Map<String, LSR>> scriptTable = langTable.get(result.language);
|
||||
|
||||
Map<String, LSR> regionTable0 = scriptTable.get("");
|
||||
LSR value00 = regionTable0.get("");
|
||||
// value00 = lookup(result.language, "", "")
|
||||
BytesTrie iter = new BytesTrie(trie);
|
||||
int value = trieNext(iter, result.language, false);
|
||||
assert value >= 0;
|
||||
value = trieNext(iter, "", false);
|
||||
assert value >= 0;
|
||||
value = trieNext(iter, "", true);
|
||||
LSR value00 = lsrs[value];
|
||||
boolean favorRegionOk = false;
|
||||
if (result.script.equals(value00.script)) { //script is default
|
||||
if (result.region.equals(value00.region)) {
|
||||
return result.replace(null, "", "");
|
||||
} else if (fieldToFavor == Minimize.FAVOR_REGION) {
|
||||
return result.replace(null, "", null);
|
||||
return new LSR(result.language, "", "");
|
||||
} else if (fieldToFavor == ULocale.Minimize.FAVOR_REGION) {
|
||||
return new LSR(result.language, "", result.region);
|
||||
} else {
|
||||
favorRegionOk = true;
|
||||
}
|
||||
@ -494,201 +284,40 @@ public class XLikelySubtags {
|
||||
// Maybe do later, but for now use the straightforward code.
|
||||
LSR result2 = maximize(languageIn, scriptIn, "");
|
||||
if (result2.equals(result)) {
|
||||
return result.replace(null, null, "");
|
||||
return new LSR(result.language, result.script, "");
|
||||
} else if (favorRegionOk) {
|
||||
return result.replace(null, "", null);
|
||||
return new LSR(result.language, "", result.region);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private static StringBuilder show(Map<?,?> map, String indent, StringBuilder output) {
|
||||
String first = indent.isEmpty() ? "" : "\t";
|
||||
for (Entry<?,?> e : map.entrySet()) {
|
||||
String key = e.getKey().toString();
|
||||
Object value = e.getValue();
|
||||
output.append(first + (key.isEmpty() ? "∅" : key));
|
||||
if (value instanceof Map) {
|
||||
show((Map<?,?>)value, indent+"\t", output);
|
||||
} else {
|
||||
output.append("\t" + Objects.toString(value)).append("\n");
|
||||
private Map<String, LSR> getTable() {
|
||||
Map<String, LSR> map = new LinkedHashMap<>();
|
||||
Set<String> prefixes = new HashSet<>();
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (BytesTrie.Entry entry : trie) {
|
||||
sb.setLength(0);
|
||||
int length = entry.bytesLength();
|
||||
for (int i = 0; i < length;) {
|
||||
byte b = entry.byteAt(i++);
|
||||
sb.append((char) b);
|
||||
if (i < length && prefixes.contains(sb.toString())) {
|
||||
sb.append('-');
|
||||
}
|
||||
}
|
||||
String s = sb.toString();
|
||||
if (entry.value == 0) {
|
||||
// intermediate match point
|
||||
prefixes.add(s);
|
||||
} else {
|
||||
map.put(s, lsrs[entry.value]);
|
||||
}
|
||||
first = indent;
|
||||
}
|
||||
return output;
|
||||
return map;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return show(langTable, "", new StringBuilder()).toString();
|
||||
return getTable().toString();
|
||||
}
|
||||
|
||||
// public static void main(String[] args) {
|
||||
// System.out.println(LSR.fromMaximalized(ULocale.ENGLISH));
|
||||
//
|
||||
// final Map<String, String> rawData = sdi.getLikelySubtags();
|
||||
// XLikelySubtags ls = XLikelySubtags.getDefault();
|
||||
// System.out.println(ls);
|
||||
// ls.maximize(new ULocale("iw"));
|
||||
// if (true) return;
|
||||
//
|
||||
// LanguageTagParser ltp = new LanguageTagParser();
|
||||
//
|
||||
// // get all the languages, scripts, and regions
|
||||
// Set<String> languages = new TreeSet<String>();
|
||||
// Set<String> scripts = new TreeSet<String>();
|
||||
// Set<String> regions = new TreeSet<String>();
|
||||
// Counter<String> languageCounter = new Counter<String>();
|
||||
// Counter<String> scriptCounter = new Counter<String>();
|
||||
// Counter<String> regionCounter = new Counter<String>();
|
||||
//
|
||||
// for (Entry<String, String> sourceTarget : rawData.entrySet()) {
|
||||
// final String source = sourceTarget.getKey();
|
||||
// ltp.set(source);
|
||||
// languages.add(ltp.getLanguage());
|
||||
// scripts.add(ltp.getScript());
|
||||
// regions.add(ltp.getRegion());
|
||||
// final String target = sourceTarget.getValue();
|
||||
// ltp.set(target);
|
||||
// add(target, languageCounter, ltp.getLanguage(), 1);
|
||||
// add(target, scriptCounter, ltp.getScript(), 1);
|
||||
// add(target, regionCounter, ltp.getRegion(), 1);
|
||||
// }
|
||||
// ltp.set("und-Zzzz-ZZ");
|
||||
// languageCounter.add(ltp.getLanguage(), 1);
|
||||
// scriptCounter.add(ltp.getScript(), 1);
|
||||
// regionCounter.add(ltp.getRegion(), 1);
|
||||
//
|
||||
// if (SHORT) {
|
||||
// removeSingletons(languages, languageCounter);
|
||||
// removeSingletons(scripts, scriptCounter);
|
||||
// removeSingletons(regions, regionCounter);
|
||||
// }
|
||||
//
|
||||
// System.out.println("languages: " + languages.size() + "\n\t" + languages + "\n\t" + languageCounter);
|
||||
// System.out.println("scripts: " + scripts.size() + "\n\t" + scripts + "\n\t" + scriptCounter);
|
||||
// System.out.println("regions: " + regions.size() + "\n\t" + regions + "\n\t" + regionCounter);
|
||||
//
|
||||
// int maxCount = Integer.MAX_VALUE;
|
||||
//
|
||||
// int counter = maxCount;
|
||||
// long tempTime = System.nanoTime();
|
||||
// newMax:
|
||||
// for (String language : languages) {
|
||||
// for (String script : scripts) {
|
||||
// for (String region : regions) {
|
||||
// if (--counter < 0) break newMax;
|
||||
// LSR result = ls.maximize(language, script, region);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// long newMaxTime = System.nanoTime() - tempTime;
|
||||
// System.out.println("newMaxTime: " + newMaxTime);
|
||||
//
|
||||
// counter = maxCount;
|
||||
// tempTime = System.nanoTime();
|
||||
// newMin:
|
||||
// for (String language : languages) {
|
||||
// for (String script : scripts) {
|
||||
// for (String region : regions) {
|
||||
// if (--counter < 0) break newMin;
|
||||
// LSR minNewS = ls.minimizeSubtags(language, script, region, Minimize.FAVOR_SCRIPT);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// long newMinTime = System.nanoTime() - tempTime;
|
||||
// System.out.println("newMinTime: " + newMinTime);
|
||||
//
|
||||
// // *****
|
||||
//
|
||||
// tempTime = System.nanoTime();
|
||||
// counter = maxCount;
|
||||
// oldMax:
|
||||
// for (String language : languages) {
|
||||
// for (String script : scripts) {
|
||||
// for (String region : regions) {
|
||||
// if (--counter < 0) break oldMax;
|
||||
// ULocale tempLocale = new ULocale(language, script, region);
|
||||
// ULocale max = ULocale.addLikelySubtags(tempLocale);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// long oldMaxTime = System.nanoTime() - tempTime;
|
||||
// System.out.println("oldMaxTime: " + oldMaxTime + "\t" + oldMaxTime/newMaxTime + "x");
|
||||
//
|
||||
// counter = maxCount;
|
||||
// tempTime = System.nanoTime();
|
||||
// oldMin:
|
||||
// for (String language : languages) {
|
||||
// for (String script : scripts) {
|
||||
// for (String region : regions) {
|
||||
// if (--counter < 0) break oldMin;
|
||||
// ULocale tempLocale = new ULocale(language, script, region);
|
||||
// ULocale minOldS = ULocale.minimizeSubtags(tempLocale, Minimize.FAVOR_SCRIPT);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// long oldMinTime = System.nanoTime() - tempTime;
|
||||
// System.out.println("oldMinTime: " + oldMinTime + "\t" + oldMinTime/newMinTime + "x");
|
||||
//
|
||||
// counter = maxCount;
|
||||
// testMain:
|
||||
// for (String language : languages) {
|
||||
// System.out.println(language);
|
||||
// int tests = 0;
|
||||
// for (String script : scripts) {
|
||||
// for (String region : regions) {
|
||||
// ++tests;
|
||||
// if (--counter < 0) break testMain;
|
||||
// LSR maxNew = ls.maximize(language, script, region);
|
||||
// LSR minNewS = ls.minimizeSubtags(language, script, region, Minimize.FAVOR_SCRIPT);
|
||||
// LSR minNewR = ls.minimizeSubtags(language, script, region, Minimize.FAVOR_REGION);
|
||||
//
|
||||
// ULocale tempLocale = new ULocale(language, script, region);
|
||||
// ULocale maxOld = ULocale.addLikelySubtags(tempLocale);
|
||||
// ULocale minOldS = ULocale.minimizeSubtags(tempLocale, Minimize.FAVOR_SCRIPT);
|
||||
// ULocale minOldR = ULocale.minimizeSubtags(tempLocale, Minimize.FAVOR_REGION);
|
||||
//
|
||||
// // check values
|
||||
// final String maxNewS = String.valueOf(maxNew);
|
||||
// final String maxOldS = maxOld.toLanguageTag();
|
||||
// boolean sameMax = maxOldS.equals(maxNewS);
|
||||
//
|
||||
// final String minNewSS = String.valueOf(minNewS);
|
||||
// final String minOldSS = minOldS.toLanguageTag();
|
||||
// boolean sameMinS = minNewSS.equals(minOldSS);
|
||||
//
|
||||
// final String minNewRS = String.valueOf(minNewR);
|
||||
// final String minOldRS = minOldS.toLanguageTag();
|
||||
// boolean sameMinR = minNewRS.equals(minOldRS);
|
||||
//
|
||||
// if (sameMax && sameMinS && sameMinR) continue;
|
||||
// System.out.println(new LSR(language, script, region)
|
||||
// + "\tmax: " + maxNew
|
||||
// + (sameMax ? "" : "≠" + maxOldS)
|
||||
// + "\tminS: " + minNewS
|
||||
// + (sameMinS ? "" : "≠" + minOldS)
|
||||
// + "\tminR: " + minNewR
|
||||
// + (sameMinR ? "" : "≠" + minOldR)
|
||||
// );
|
||||
// }
|
||||
// }
|
||||
// System.out.println(language + ": " + tests);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// private static void add(String target, Counter<String> languageCounter, String language, int count) {
|
||||
// if (language.equals("aa")) {
|
||||
// int debug = 0;
|
||||
// }
|
||||
// languageCounter.add(language, count);
|
||||
// }
|
||||
//
|
||||
// private static void removeSingletons(Set<String> languages, Counter<String> languageCounter) {
|
||||
// for (String s : languageCounter) {
|
||||
// final long count = languageCounter.get(s);
|
||||
// if (count <= 1) {
|
||||
// languages.remove(s);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -2,19 +2,20 @@
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.impl.locale;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
import com.ibm.icu.impl.locale.XCldrStub.ImmutableMultimap;
|
||||
import com.ibm.icu.impl.locale.XCldrStub.ImmutableSet;
|
||||
import com.ibm.icu.impl.locale.XCldrStub.LinkedHashMultimap;
|
||||
import com.ibm.icu.impl.locale.XCldrStub.Multimap;
|
||||
import com.ibm.icu.impl.locale.XLikelySubtags.LSR;
|
||||
import com.ibm.icu.impl.locale.XLocaleDistance.DistanceOption;
|
||||
import com.ibm.icu.impl.locale.LocaleDistance.DistanceOption;
|
||||
import com.ibm.icu.util.LocalePriorityList;
|
||||
import com.ibm.icu.util.Output;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
@ -23,57 +24,144 @@ import com.ibm.icu.util.ULocale;
|
||||
* Immutable class that picks best match between user's desired locales and application's supported locales.
|
||||
* @author markdavis
|
||||
*/
|
||||
public class XLocaleMatcher {
|
||||
private static final LSR UND = new LSR("und","","");
|
||||
public final class XLocaleMatcher {
|
||||
private static final LSR UND_LSR = new LSR("und","","");
|
||||
private static final ULocale UND_LOCALE = new ULocale("und");
|
||||
private static final Iterator<ULocale> NULL_ITERATOR = null;
|
||||
|
||||
// Activates debugging output to stderr with details of GetBestMatch.
|
||||
private static final boolean TRACE_MATCHER = false;
|
||||
|
||||
// List of indexes, optimized for one or two.
|
||||
private static final class Indexes {
|
||||
// Some indexes without further object creation and auto-boxing.
|
||||
int first, second = -1;
|
||||
// We could turn the List into an int array + length and manage its growth.
|
||||
List<Integer> remaining;
|
||||
|
||||
Indexes(int firstIndex) {
|
||||
first = firstIndex;
|
||||
}
|
||||
void add(int i) {
|
||||
if (second < 0) {
|
||||
second = i;
|
||||
} else {
|
||||
if (remaining == null) {
|
||||
remaining = new ArrayList<>();
|
||||
}
|
||||
remaining.add(i);
|
||||
}
|
||||
}
|
||||
int getFirst() { return first; }
|
||||
int get(int i) { // returns -1 when i >= length
|
||||
if (i == 0) {
|
||||
return first;
|
||||
} else if (i == 1) {
|
||||
return second;
|
||||
} else if (remaining != null && (i -= 2) < remaining.size()) {
|
||||
return remaining.get(i);
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Make public, and add public methods that return it.
|
||||
private static final class Result {
|
||||
private Result(ULocale desired, ULocale supported,
|
||||
/* Locale jdesired, */ Locale jsupported,
|
||||
int desIndex, int suppIndex) {
|
||||
desiredLocale = desired;
|
||||
supportedLocale = supported;
|
||||
// desiredJavaLocale = jdesired;
|
||||
supportedJavaLocale = jsupported;
|
||||
desiredIndex = desIndex;
|
||||
supportedIndex = suppIndex;
|
||||
}
|
||||
|
||||
ULocale desiredLocale;
|
||||
ULocale supportedLocale;
|
||||
// Locale desiredJavaLocale;
|
||||
Locale supportedJavaLocale;
|
||||
int desiredIndex;
|
||||
@SuppressWarnings("unused") // unused until public, for other wrappers
|
||||
int supportedIndex;
|
||||
}
|
||||
|
||||
// normally the default values, but can be set via constructor
|
||||
|
||||
private final XLocaleDistance localeDistance;
|
||||
private final int thresholdDistance;
|
||||
private final int demotionPerAdditionalDesiredLocale;
|
||||
private final DistanceOption distanceOption;
|
||||
|
||||
// built based on application's supported languages in constructor
|
||||
|
||||
private final Map<LSR, Set<ULocale>> supportedLanguages; // the locales in the collection are ordered!
|
||||
private final Set<ULocale> exactSupportedLocales; // the locales in the collection are ordered!
|
||||
private final ULocale defaultLanguage;
|
||||
private final ULocale[] supportedLocales;
|
||||
private final Locale[] supportedJavaLocales;
|
||||
private final Map<ULocale, Integer> supportedToIndex;
|
||||
private final Map<LSR, Indexes> supportedLsrToIndexes;
|
||||
// Array versions of the supportedLsrToIndexes keys and values.
|
||||
// The distance lookup loops over the supportedLsrs and returns the index of the best match.
|
||||
private final LSR[] supportedLsrs;
|
||||
private final Indexes[] supportedIndexes;
|
||||
private final ULocale defaultLocale;
|
||||
private final Locale defaultJavaLocale;
|
||||
private final int defaultLocaleIndex;
|
||||
|
||||
public static class Builder {
|
||||
private Set<ULocale> supportedLanguagesList;
|
||||
/**
|
||||
* Supported locales. A Set, to avoid duplicates.
|
||||
* Maintains iteration order for consistent matching behavior (first best match wins).
|
||||
*/
|
||||
private Set<ULocale> supportedLocales;
|
||||
private int thresholdDistance = -1;
|
||||
private int demotionPerAdditionalDesiredLocale = -1;;
|
||||
private ULocale defaultLanguage;
|
||||
private XLocaleDistance localeDistance;
|
||||
private ULocale defaultLocale;
|
||||
private DistanceOption distanceOption;
|
||||
/**
|
||||
* @param languagePriorityList the languagePriorityList to set
|
||||
* @param locales the languagePriorityList to set
|
||||
* @return this Builder object
|
||||
*/
|
||||
public Builder setSupportedLocales(String languagePriorityList) {
|
||||
this.supportedLanguagesList = asSet(LocalePriorityList.add(languagePriorityList).build());
|
||||
public Builder setSupportedLocales(String locales) {
|
||||
return setSupportedLocales(LocalePriorityList.add(locales).build());
|
||||
}
|
||||
public Builder setSupportedLocales(Iterable<ULocale> locales) {
|
||||
supportedLocales = new LinkedHashSet<>(); // maintain order
|
||||
for (ULocale locale : locales) {
|
||||
supportedLocales.add(locale);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
public Builder setSupportedLocales(LocalePriorityList languagePriorityList) {
|
||||
this.supportedLanguagesList = asSet(languagePriorityList);
|
||||
public Builder setSupportedLocales(Collection<ULocale> locales) {
|
||||
supportedLocales = new LinkedHashSet<>(locales); // maintain order
|
||||
return this;
|
||||
}
|
||||
public Builder setSupportedLocales(Set<ULocale> languagePriorityList) {
|
||||
Set<ULocale> temp = new LinkedHashSet<ULocale>(); // maintain order
|
||||
temp.addAll(languagePriorityList);
|
||||
this.supportedLanguagesList = temp;
|
||||
public Builder setSupportedJavaLocales(Collection<Locale> locales) {
|
||||
supportedLocales = new LinkedHashSet<>(locales.size()); // maintain order
|
||||
for (Locale locale : locales) {
|
||||
supportedLocales.add(ULocale.forLocale(locale));
|
||||
}
|
||||
return this;
|
||||
}
|
||||
public Builder addSupportedLocale(ULocale locale) {
|
||||
if (supportedLocales == null) {
|
||||
supportedLocales = new LinkedHashSet<>();
|
||||
}
|
||||
supportedLocales.add(locale);
|
||||
return this;
|
||||
}
|
||||
public Builder addSupportedLocale(Locale locale) {
|
||||
return addSupportedLocale(ULocale.forLocale(locale));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param thresholdDistance the thresholdDistance to set, with -1 = default
|
||||
* @return this Builder object
|
||||
*/
|
||||
public Builder setThresholdDistance(int thresholdDistance) {
|
||||
if (thresholdDistance > 100) {
|
||||
thresholdDistance = 100;
|
||||
}
|
||||
this.thresholdDistance = thresholdDistance;
|
||||
return this;
|
||||
}
|
||||
@ -86,22 +174,13 @@ public class XLocaleMatcher {
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param localeDistance the localeDistance to set, with default = XLocaleDistance.getDefault().
|
||||
* @return this Builder object
|
||||
*/
|
||||
public Builder setLocaleDistance(XLocaleDistance localeDistance) {
|
||||
this.localeDistance = localeDistance;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the default language, with null = default = first supported language
|
||||
* @param defaultLanguage the default language
|
||||
* @param defaultLocale the default language
|
||||
* @return this Builder object
|
||||
*/
|
||||
public Builder setDefaultLanguage(ULocale defaultLanguage) {
|
||||
this.defaultLanguage = defaultLanguage;
|
||||
public Builder setDefaultLanguage(ULocale defaultLocale) {
|
||||
this.defaultLocale = defaultLocale;
|
||||
return this;
|
||||
}
|
||||
|
||||
@ -122,18 +201,23 @@ public class XLocaleMatcher {
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder s = new StringBuilder().append("{XLocaleMatcher.Builder");
|
||||
if (!supportedLanguagesList.isEmpty()) {
|
||||
s.append(" supported={").append(supportedLanguagesList.toString()).append("}");
|
||||
}
|
||||
if (defaultLanguage != null) {
|
||||
s.append(" default=").append(defaultLanguage.toString());
|
||||
}
|
||||
if (thresholdDistance >= 0) {
|
||||
s.append(String.format(" thresholdDistance=%d", thresholdDistance));
|
||||
}
|
||||
s.append(" preference=").append(distanceOption.name());
|
||||
return s.append("}").toString();
|
||||
StringBuilder s = new StringBuilder().append("{XLocaleMatcher.Builder");
|
||||
if (!supportedLocales.isEmpty()) {
|
||||
s.append(" supported={").append(supportedLocales.toString()).append('}');
|
||||
}
|
||||
if (defaultLocale != null) {
|
||||
s.append(" default=").append(defaultLocale.toString());
|
||||
}
|
||||
if (distanceOption != null) {
|
||||
s.append(" distance=").append(distanceOption.toString());
|
||||
}
|
||||
if (thresholdDistance >= 0) {
|
||||
s.append(String.format(" threshold=%d", thresholdDistance));
|
||||
}
|
||||
if (demotionPerAdditionalDesiredLocale >= 0) {
|
||||
s.append(String.format(" demotion=%d", demotionPerAdditionalDesiredLocale));
|
||||
}
|
||||
return s.append('}').toString();
|
||||
}
|
||||
}
|
||||
|
||||
@ -159,75 +243,101 @@ public class XLocaleMatcher {
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a locale matcher with the given parameters.
|
||||
* @param supportedLocales
|
||||
* @param thresholdDistance
|
||||
* @param demotionPerAdditionalDesiredLocale
|
||||
* @param localeDistance
|
||||
* @param likelySubtags
|
||||
* Creates a locale matcher with the given Builder parameters.
|
||||
*/
|
||||
private XLocaleMatcher(Builder builder) {
|
||||
localeDistance = builder.localeDistance == null ? XLocaleDistance.getDefault()
|
||||
: builder.localeDistance;
|
||||
thresholdDistance = builder.thresholdDistance < 0 ? localeDistance.getDefaultScriptDistance()
|
||||
: builder.thresholdDistance;
|
||||
// only do AFTER above are set
|
||||
Set<LSR> paradigms = extractLsrSet(localeDistance.getParadigms());
|
||||
final Multimap<LSR, ULocale> temp2 = extractLsrMap(builder.supportedLanguagesList, paradigms);
|
||||
supportedLanguages = temp2.asMap();
|
||||
exactSupportedLocales = ImmutableSet.copyOf(temp2.values());
|
||||
defaultLanguage = builder.defaultLanguage != null ? builder.defaultLanguage
|
||||
: supportedLanguages.isEmpty() ? null
|
||||
: supportedLanguages.entrySet().iterator().next().getValue().iterator().next(); // first language
|
||||
demotionPerAdditionalDesiredLocale = builder.demotionPerAdditionalDesiredLocale < 0 ? localeDistance.getDefaultRegionDistance()+1
|
||||
: builder.demotionPerAdditionalDesiredLocale;
|
||||
thresholdDistance = builder.thresholdDistance < 0 ?
|
||||
LocaleDistance.INSTANCE.getDefaultScriptDistance() : builder.thresholdDistance;
|
||||
// Store the supported locales in input order,
|
||||
// so that when different types are used (e.g., java.util.Locale)
|
||||
// we can return those by parallel index.
|
||||
int supportedLocalesLength = builder.supportedLocales.size();
|
||||
supportedLocales = new ULocale[supportedLocalesLength];
|
||||
supportedJavaLocales = new Locale[supportedLocalesLength];
|
||||
supportedToIndex = new HashMap<>(supportedLocalesLength);
|
||||
// We need an unordered map from LSR to first supported locale with that LSR,
|
||||
// and an ordered list of (LSR, Indexes).
|
||||
// We use a LinkedHashMap for both,
|
||||
// and insert the supported locales in the following order:
|
||||
// 1. First supported locale.
|
||||
// 2. Priority locales in builder order.
|
||||
// 3. Remaining locales in builder order.
|
||||
supportedLsrToIndexes = new LinkedHashMap<>(supportedLocalesLength);
|
||||
Map<LSR, Indexes> otherLsrToIndexes = null;
|
||||
LSR firstLSR = null;
|
||||
int i = 0;
|
||||
for (ULocale locale : builder.supportedLocales) {
|
||||
supportedLocales[i] = locale;
|
||||
supportedJavaLocales[i] = locale.toLocale();
|
||||
// supportedToIndex.putIfAbsent(locale, i)
|
||||
Integer oldIndex = supportedToIndex.get(locale);
|
||||
if (oldIndex == null) {
|
||||
supportedToIndex.put(locale, i);
|
||||
}
|
||||
LSR lsr = getMaximalLsrOrUnd(locale);
|
||||
if (i == 0) {
|
||||
firstLSR = lsr;
|
||||
supportedLsrToIndexes.put(lsr, new Indexes(0));
|
||||
} else if (lsr.equals(firstLSR) || LocaleDistance.INSTANCE.isParadigmLSR(lsr)) {
|
||||
addIndex(supportedLsrToIndexes, lsr, i);
|
||||
} else {
|
||||
if (otherLsrToIndexes == null) {
|
||||
otherLsrToIndexes = new LinkedHashMap<>(supportedLocalesLength);
|
||||
}
|
||||
addIndex(otherLsrToIndexes, lsr, i);
|
||||
}
|
||||
++i;
|
||||
}
|
||||
if (otherLsrToIndexes != null) {
|
||||
supportedLsrToIndexes.putAll(otherLsrToIndexes);
|
||||
}
|
||||
int numSuppLsrs = supportedLsrToIndexes.size();
|
||||
supportedLsrs = supportedLsrToIndexes.keySet().toArray(new LSR[numSuppLsrs]);
|
||||
supportedIndexes = supportedLsrToIndexes.values().toArray(new Indexes[numSuppLsrs]);
|
||||
ULocale def;
|
||||
Locale jdef = null;
|
||||
int idef = -1;
|
||||
if (builder.defaultLocale != null) {
|
||||
def = builder.defaultLocale;
|
||||
} else if (supportedLocalesLength > 0) {
|
||||
def = supportedLocales[0]; // first language
|
||||
jdef = supportedJavaLocales[0];
|
||||
idef = 0;
|
||||
} else {
|
||||
def = null;
|
||||
}
|
||||
if (jdef == null && def != null) {
|
||||
jdef = def.toLocale();
|
||||
}
|
||||
defaultLocale = def;
|
||||
defaultJavaLocale = jdef;
|
||||
defaultLocaleIndex = idef;
|
||||
demotionPerAdditionalDesiredLocale = builder.demotionPerAdditionalDesiredLocale < 0 ?
|
||||
LocaleDistance.INSTANCE.getDefaultRegionDistance() + 1 :
|
||||
builder.demotionPerAdditionalDesiredLocale;
|
||||
distanceOption = builder.distanceOption;
|
||||
}
|
||||
|
||||
// Result is not immutable!
|
||||
private Set<LSR> extractLsrSet(Set<ULocale> languagePriorityList) {
|
||||
Set<LSR> result = new LinkedHashSet<LSR>();
|
||||
for (ULocale item : languagePriorityList) {
|
||||
final LSR max = item.equals(UND_LOCALE) ? UND : LSR.fromMaximalized(item);
|
||||
result.add(max);
|
||||
private static final void addIndex(Map<LSR, Indexes> lsrToIndexes, LSR lsr, int i) {
|
||||
Indexes indexes = lsrToIndexes.get(lsr);
|
||||
if (indexes == null) {
|
||||
lsrToIndexes.put(lsr, new Indexes(i));
|
||||
} else {
|
||||
indexes.add(i);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private Multimap<LSR,ULocale> extractLsrMap(Set<ULocale> languagePriorityList, Set<LSR> priorities) {
|
||||
Multimap<LSR, ULocale> builder = LinkedHashMultimap.create();
|
||||
for (ULocale item : languagePriorityList) {
|
||||
final LSR max = item.equals(UND_LOCALE) ? UND :
|
||||
LSR.fromMaximalized(item);
|
||||
builder.put(max, item);
|
||||
private static final LSR getMaximalLsrOrUnd(ULocale locale) {
|
||||
if (locale.equals(UND_LOCALE)) {
|
||||
return UND_LSR;
|
||||
} else {
|
||||
return XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale);
|
||||
}
|
||||
if (builder.size() > 1 && priorities != null) {
|
||||
// for the supported list, we put any priorities before all others, except for the first.
|
||||
Multimap<LSR, ULocale> builder2 = LinkedHashMultimap.create();
|
||||
|
||||
// copy the long way so the priorities are in the same order as in the original
|
||||
boolean first = true;
|
||||
for (Entry<LSR, Set<ULocale>> entry : builder.asMap().entrySet()) {
|
||||
final LSR key = entry.getKey();
|
||||
if (first || priorities.contains(key)) {
|
||||
builder2.putAll(key, entry.getValue());
|
||||
first = false;
|
||||
}
|
||||
}
|
||||
// now copy the rest
|
||||
builder2.putAll(builder);
|
||||
if (!builder2.equals(builder)) {
|
||||
throw new IllegalArgumentException();
|
||||
}
|
||||
builder = builder2;
|
||||
}
|
||||
return ImmutableMultimap.copyOf(builder);
|
||||
}
|
||||
|
||||
|
||||
/** Convenience method */
|
||||
public ULocale getBestMatch(ULocale ulocale) {
|
||||
return getBestMatch(ulocale, null);
|
||||
return getBestMatch(ulocale, NULL_ITERATOR).supportedLocale;
|
||||
}
|
||||
/** Convenience method */
|
||||
public ULocale getBestMatch(String languageList) {
|
||||
@ -235,126 +345,128 @@ public class XLocaleMatcher {
|
||||
}
|
||||
/** Convenience method */
|
||||
public ULocale getBestMatch(ULocale... locales) {
|
||||
return getBestMatch(new LinkedHashSet<ULocale>(Arrays.asList(locales)), null);
|
||||
return getBestMatch(Arrays.asList(locales), null);
|
||||
}
|
||||
/** Convenience method */
|
||||
public ULocale getBestMatch(Set<ULocale> desiredLanguages) {
|
||||
return getBestMatch(desiredLanguages, null);
|
||||
}
|
||||
/** Convenience method */
|
||||
public ULocale getBestMatch(LocalePriorityList desiredLanguages) {
|
||||
return getBestMatch(desiredLanguages, null);
|
||||
}
|
||||
/** Convenience method */
|
||||
public ULocale getBestMatch(LocalePriorityList desiredLanguages, Output<ULocale> outputBestDesired) {
|
||||
return getBestMatch(asSet(desiredLanguages), outputBestDesired);
|
||||
}
|
||||
|
||||
// TODO add LocalePriorityList method asSet() for ordered Set view backed by LocalePriorityList
|
||||
private static Set<ULocale> asSet(LocalePriorityList languageList) {
|
||||
Set<ULocale> temp = new LinkedHashSet<ULocale>(); // maintain order
|
||||
for (ULocale locale : languageList) {
|
||||
temp.add(locale);
|
||||
};
|
||||
return temp;
|
||||
public ULocale getBestMatch(Iterable<ULocale> desiredLocales) {
|
||||
return getBestMatch(desiredLocales, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the best match between the desired languages and supported languages
|
||||
* @param desiredLanguages Typically the supplied user's languages, in order of preference, with best first.
|
||||
* @param outputBestDesired The one of the desired languages that matched best.
|
||||
* @param desiredLocales Typically the supplied user's languages, in order of preference, with best first.
|
||||
* @param outputBestDesired The one of the desired languages that matched best (can be null).
|
||||
* Set to null if the best match was not below the threshold distance.
|
||||
* @return the best match.
|
||||
*/
|
||||
public ULocale getBestMatch(Set<ULocale> desiredLanguages, Output<ULocale> outputBestDesired) {
|
||||
// fast path for singleton
|
||||
if (desiredLanguages.size() == 1) {
|
||||
return getBestMatch(desiredLanguages.iterator().next(), outputBestDesired);
|
||||
}
|
||||
// TODO produce optimized version for single desired ULocale
|
||||
Multimap<LSR, ULocale> desiredLSRs = extractLsrMap(desiredLanguages,null);
|
||||
int bestDistance = Integer.MAX_VALUE;
|
||||
ULocale bestDesiredLocale = null;
|
||||
Collection<ULocale> bestSupportedLocales = null;
|
||||
int delta = 0;
|
||||
mainLoop:
|
||||
for (final Entry<LSR, Set<ULocale>> desiredLsrAndLocales : desiredLSRs.asMap().entrySet()) {
|
||||
LSR desiredLSR = desiredLsrAndLocales.getKey();
|
||||
for (ULocale desiredLocale : desiredLsrAndLocales.getValue()) {
|
||||
// quick check for exact match
|
||||
if (delta < bestDistance) {
|
||||
if (exactSupportedLocales.contains(desiredLocale)) {
|
||||
if (outputBestDesired != null) {
|
||||
outputBestDesired.value = desiredLocale;
|
||||
}
|
||||
if (TRACE_MATCHER) {
|
||||
System.err.printf(
|
||||
"Returning %s, which is an exact match for a supported language\n",
|
||||
desiredLocale);
|
||||
}
|
||||
return desiredLocale;
|
||||
}
|
||||
// quick check for maximized locale
|
||||
Collection<ULocale> found = supportedLanguages.get(desiredLSR);
|
||||
if (found != null) {
|
||||
// if we find one in the set, return first (lowest). We already know the exact one isn't
|
||||
// there.
|
||||
if (outputBestDesired != null) {
|
||||
outputBestDesired.value = desiredLocale;
|
||||
}
|
||||
ULocale result = found.iterator().next();
|
||||
if (TRACE_MATCHER) {
|
||||
System.err.printf("Returning %s\n", result.toString());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
for (final Entry<LSR, Set<ULocale>> supportedLsrAndLocale : supportedLanguages.entrySet()) {
|
||||
int distance =
|
||||
delta
|
||||
+ localeDistance.distanceRaw(
|
||||
desiredLSR,
|
||||
supportedLsrAndLocale.getKey(),
|
||||
thresholdDistance,
|
||||
distanceOption);
|
||||
if (distance < bestDistance) {
|
||||
bestDistance = distance;
|
||||
bestDesiredLocale = desiredLocale;
|
||||
bestSupportedLocales = supportedLsrAndLocale.getValue();
|
||||
if (distance == 0) {
|
||||
break mainLoop;
|
||||
}
|
||||
}
|
||||
}
|
||||
delta += demotionPerAdditionalDesiredLocale;
|
||||
}
|
||||
}
|
||||
if (bestDistance >= thresholdDistance) {
|
||||
public ULocale getBestMatch(Iterable<ULocale> desiredLocales, Output<ULocale> outputBestDesired) {
|
||||
Iterator<ULocale> desiredIter = desiredLocales.iterator();
|
||||
if (!desiredIter.hasNext()) {
|
||||
if (outputBestDesired != null) {
|
||||
outputBestDesired.value = null;
|
||||
}
|
||||
if (TRACE_MATCHER) {
|
||||
System.err.printf("Returning default %s\n", defaultLanguage.toString());
|
||||
System.err.printf("Returning default %s: no desired languages\n", defaultLocale);
|
||||
}
|
||||
return defaultLanguage;
|
||||
return defaultLocale;
|
||||
}
|
||||
ULocale desiredLocale = desiredIter.next();
|
||||
return getBestMatch(desiredLocale, desiredIter, outputBestDesired);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param desiredLocale First desired locale.
|
||||
* @param remainingIter Remaining desired locales, null or empty if none.
|
||||
* @param outputBestDesired If not null,
|
||||
* will be set to the desired locale that matches the best supported one.
|
||||
* @return the best supported locale.
|
||||
*/
|
||||
private ULocale getBestMatch(ULocale desiredLocale, Iterator<ULocale> remainingIter,
|
||||
Output<ULocale> outputBestDesired) {
|
||||
Result result = getBestMatch(desiredLocale, remainingIter);
|
||||
if (outputBestDesired != null) {
|
||||
outputBestDesired.value = bestDesiredLocale;
|
||||
outputBestDesired.value = result.desiredLocale;
|
||||
}
|
||||
// pick exact match if there is one
|
||||
if (bestSupportedLocales.contains(bestDesiredLocale)) {
|
||||
if (TRACE_MATCHER) {
|
||||
System.err.printf(
|
||||
"Returning %s which matches a supported language\n", bestDesiredLocale.toString());
|
||||
return result.supportedLocale;
|
||||
}
|
||||
|
||||
private Result getBestMatch(ULocale desiredLocale, Iterator<ULocale> remainingIter) {
|
||||
int desiredIndex = 0;
|
||||
int bestDesiredIndex = -1;
|
||||
ULocale bestDesiredLocale = null;
|
||||
int bestSupportedLsrIndex = 0;
|
||||
for (int bestDistance = thresholdDistance; bestDistance > 0;
|
||||
bestDistance -= demotionPerAdditionalDesiredLocale) {
|
||||
// Quick check for exact locale match.
|
||||
Integer supportedIndex = supportedToIndex.get(desiredLocale);
|
||||
if (supportedIndex != null) {
|
||||
if (TRACE_MATCHER) {
|
||||
System.err.printf("Returning %s: desired=supported\n", desiredLocale);
|
||||
}
|
||||
int suppIndex = supportedIndex;
|
||||
return new Result(desiredLocale, supportedLocales[suppIndex],
|
||||
supportedJavaLocales[suppIndex], desiredIndex, suppIndex);
|
||||
}
|
||||
return bestDesiredLocale;
|
||||
// Quick check for exact maximized LSR.
|
||||
LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale);
|
||||
Indexes indexes = supportedLsrToIndexes.get(desiredLSR);
|
||||
if (indexes != null) {
|
||||
// If this is a supported LSR, return the first locale.
|
||||
// We already know the exact locale isn't there.
|
||||
int suppIndex = indexes.getFirst();
|
||||
ULocale result = supportedLocales[suppIndex];
|
||||
if (TRACE_MATCHER) {
|
||||
System.err.printf("Returning %s: desiredLSR=supportedLSR\n", result);
|
||||
}
|
||||
return new Result(desiredLocale, result,
|
||||
supportedJavaLocales[suppIndex], desiredIndex, suppIndex);
|
||||
}
|
||||
int bestIndexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
|
||||
desiredLSR, supportedLsrs, bestDistance, distanceOption);
|
||||
if (bestIndexAndDistance >= 0) {
|
||||
bestDistance = bestIndexAndDistance & 0xff;
|
||||
bestDesiredIndex = desiredIndex;
|
||||
bestDesiredLocale = desiredLocale;
|
||||
bestSupportedLsrIndex = bestIndexAndDistance >> 8;
|
||||
if (bestDistance == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (remainingIter == null || !remainingIter.hasNext()) {
|
||||
break;
|
||||
}
|
||||
desiredLocale = remainingIter.next();
|
||||
++desiredIndex;
|
||||
}
|
||||
// otherwise return first supported, combining variants and extensions from bestDesired
|
||||
ULocale result = bestSupportedLocales.iterator().next();
|
||||
if (bestDesiredIndex < 0) {
|
||||
if (TRACE_MATCHER) {
|
||||
System.err.printf("Returning default %s: no good match\n", defaultLocale);
|
||||
}
|
||||
return new Result(null, defaultLocale, defaultJavaLocale, -1, defaultLocaleIndex);
|
||||
}
|
||||
// Pick exact match if there is one.
|
||||
// The length of the list is normally 1.
|
||||
Indexes bestSupportedIndexes = supportedIndexes[bestSupportedLsrIndex];
|
||||
int suppIndex;
|
||||
for (int i = 0; (suppIndex = bestSupportedIndexes.get(i)) >= 0; ++i) {
|
||||
ULocale locale = supportedLocales[suppIndex];
|
||||
if (bestDesiredLocale.equals(locale)) {
|
||||
if (TRACE_MATCHER) {
|
||||
System.err.printf("Returning %s: desired=best matching supported language\n",
|
||||
bestDesiredLocale);
|
||||
}
|
||||
return new Result(bestDesiredLocale, locale,
|
||||
supportedJavaLocales[suppIndex], bestDesiredIndex, suppIndex);
|
||||
}
|
||||
}
|
||||
// Otherwise return the first of the supported languages that share the best-matching LSR.
|
||||
suppIndex = bestSupportedIndexes.getFirst();
|
||||
ULocale result = supportedLocales[suppIndex];
|
||||
if (TRACE_MATCHER) {
|
||||
System.err.printf("Returning first supported language %s\n", result.toString());
|
||||
System.err.printf("Returning %s: first best matching supported language\n", result);
|
||||
}
|
||||
return result;
|
||||
return new Result(bestDesiredLocale, result,
|
||||
supportedJavaLocales[suppIndex], bestDesiredIndex, suppIndex);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -365,73 +477,88 @@ public class XLocaleMatcher {
|
||||
* @return the best match.
|
||||
*/
|
||||
public ULocale getBestMatch(ULocale desiredLocale, Output<ULocale> outputBestDesired) {
|
||||
int bestDistance = Integer.MAX_VALUE;
|
||||
ULocale bestDesiredLocale = null;
|
||||
Collection<ULocale> bestSupportedLocales = null;
|
||||
return getBestMatch(desiredLocale, null, outputBestDesired);
|
||||
}
|
||||
|
||||
// quick check for exact match, with hack for und
|
||||
final LSR desiredLSR = desiredLocale.equals(UND_LOCALE) ? UND : LSR.fromMaximalized(desiredLocale);
|
||||
/**
|
||||
* Converts Locales to ULocales on the fly.
|
||||
*/
|
||||
private static final class LocalesWrapper implements Iterator<ULocale> {
|
||||
private Iterator<Locale> locales;
|
||||
// Cache locales to avoid conversion of the result.
|
||||
private Locale first, second;
|
||||
private List<Locale> remaining;
|
||||
|
||||
if (exactSupportedLocales.contains(desiredLocale)) {
|
||||
if (outputBestDesired != null) {
|
||||
outputBestDesired.value = desiredLocale;
|
||||
}
|
||||
if (TRACE_MATCHER) {
|
||||
System.err.printf("Exact match with a supported locale.\n");
|
||||
}
|
||||
return desiredLocale;
|
||||
LocalesWrapper(Iterator<Locale> locales) {
|
||||
this.locales = locales;
|
||||
}
|
||||
// quick check for maximized locale
|
||||
if (distanceOption == DistanceOption.REGION_FIRST) {
|
||||
Collection<ULocale> found = supportedLanguages.get(desiredLSR);
|
||||
if (found != null) {
|
||||
// if we find one in the set, return first (lowest). We already know the exact one isn't there.
|
||||
if (outputBestDesired != null) {
|
||||
outputBestDesired.value = desiredLocale;
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return locales.hasNext();
|
||||
}
|
||||
|
||||
@Override
|
||||
public ULocale next() {
|
||||
Locale locale = locales.next();
|
||||
if (first == null) {
|
||||
first = locale;
|
||||
} else if (second == null) {
|
||||
second = locale;
|
||||
} else {
|
||||
if (remaining == null) {
|
||||
remaining = new ArrayList<>();
|
||||
}
|
||||
ULocale result = found.iterator().next();
|
||||
if (TRACE_MATCHER) {
|
||||
System.err.printf("Matches a maximized supported locale: %s\n", result);
|
||||
}
|
||||
return result;
|
||||
remaining.add(locale);
|
||||
}
|
||||
return ULocale.forLocale(locale);
|
||||
}
|
||||
|
||||
Locale getJavaLocale(int i) {
|
||||
if (i == 0) {
|
||||
return first;
|
||||
} else if (i == 1) {
|
||||
return second;
|
||||
} else {
|
||||
// TODO: test code coverage
|
||||
return remaining.get(i - 2);
|
||||
}
|
||||
}
|
||||
for (final Entry<LSR, Set<ULocale>> supportedLsrAndLocale : supportedLanguages.entrySet()) {
|
||||
int distance = localeDistance.distanceRaw(desiredLSR, supportedLsrAndLocale.getKey(),
|
||||
thresholdDistance, distanceOption);
|
||||
if (distance < bestDistance) {
|
||||
bestDistance = distance;
|
||||
bestDesiredLocale = desiredLocale;
|
||||
bestSupportedLocales = supportedLsrAndLocale.getValue();
|
||||
if (distance == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
if (bestDistance >= thresholdDistance) {
|
||||
}
|
||||
|
||||
public Locale getBestJavaMatch(Iterable<Locale> desiredLocales, Output<Locale> outputBestDesired) {
|
||||
Iterator<Locale> desiredIter = desiredLocales.iterator();
|
||||
if (!desiredIter.hasNext()) {
|
||||
if (outputBestDesired != null) {
|
||||
outputBestDesired.value = null;
|
||||
}
|
||||
if (TRACE_MATCHER) {
|
||||
System.err.printf(
|
||||
"Returning default %s because everything exceeded the threshold of %d.\n",
|
||||
defaultLanguage, thresholdDistance);
|
||||
System.err.printf("Returning default %s: no desired languages\n", defaultLocale);
|
||||
}
|
||||
return defaultLanguage;
|
||||
return defaultJavaLocale;
|
||||
}
|
||||
LocalesWrapper wrapper = new LocalesWrapper(desiredIter);
|
||||
ULocale desiredLocale = wrapper.next();
|
||||
Result result = getBestMatch(desiredLocale, NULL_ITERATOR);
|
||||
if (outputBestDesired != null) {
|
||||
outputBestDesired.value = bestDesiredLocale;
|
||||
outputBestDesired.value = result.desiredIndex >= 0 ?
|
||||
wrapper.getJavaLocale(result.desiredIndex) : null;
|
||||
}
|
||||
// pick exact match if there is one
|
||||
if (bestSupportedLocales.contains(bestDesiredLocale)) {
|
||||
return bestDesiredLocale;
|
||||
return result.supportedJavaLocale;
|
||||
}
|
||||
|
||||
public Locale getBestJavaMatch(Locale desiredLocale, Output<Locale> outputBestDesired) {
|
||||
ULocale desiredULocale = ULocale.forLocale(desiredLocale);
|
||||
Result result = getBestMatch(desiredULocale, NULL_ITERATOR);
|
||||
if (outputBestDesired != null) {
|
||||
outputBestDesired.value = result.desiredIndex >= 0 ? desiredLocale : null;
|
||||
}
|
||||
// otherwise return first supported, combining variants and extensions from bestDesired
|
||||
ULocale result = bestSupportedLocales.iterator().next();
|
||||
if (TRACE_MATCHER) {
|
||||
System.err.printf("First in the list of supported locales: %s\n", result);
|
||||
}
|
||||
return result;
|
||||
return result.supportedJavaLocale;
|
||||
}
|
||||
|
||||
/** Combine features of the desired locale into those of the supported, and return result. */
|
||||
@ -474,22 +601,39 @@ public class XLocaleMatcher {
|
||||
* A language is first maximized with add likely subtags, then compared.
|
||||
*/
|
||||
public int distance(ULocale desired, ULocale supported) {
|
||||
return localeDistance.distanceRaw(
|
||||
LSR.fromMaximalized(desired),
|
||||
LSR.fromMaximalized(supported), thresholdDistance, distanceOption);
|
||||
return LocaleDistance.INSTANCE.getBestIndexAndDistance(
|
||||
XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(desired),
|
||||
new LSR[] { XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(supported) },
|
||||
thresholdDistance, distanceOption) & 0xff;
|
||||
}
|
||||
|
||||
/** Convenience method */
|
||||
public int distance(String desiredLanguage, String supportedLanguage) {
|
||||
return localeDistance.distanceRaw(
|
||||
LSR.fromMaximalized(new ULocale(desiredLanguage)),
|
||||
LSR.fromMaximalized(new ULocale(supportedLanguage)),
|
||||
thresholdDistance, distanceOption);
|
||||
return LocaleDistance.INSTANCE.getBestIndexAndDistance(
|
||||
XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(new ULocale(desiredLanguage)),
|
||||
new LSR[] { XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(new ULocale(supportedLanguage)) },
|
||||
thresholdDistance, distanceOption) & 0xff;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return exactSupportedLocales.toString();
|
||||
StringBuilder s = new StringBuilder().append("{XLocaleMatcher");
|
||||
if (supportedLocales.length > 0) {
|
||||
s.append(" supported={").append(supportedLocales[0].toString());
|
||||
for (int i = 1; i < supportedLocales.length; ++i) {
|
||||
s.append(", ").append(supportedLocales[1].toString());
|
||||
}
|
||||
s.append('}');
|
||||
}
|
||||
s.append(" default=").append(Objects.toString(defaultLocale));
|
||||
if (distanceOption != null) {
|
||||
s.append(" distance=").append(distanceOption.toString());
|
||||
}
|
||||
if (thresholdDistance >= 0) {
|
||||
s.append(String.format(" threshold=%d", thresholdDistance));
|
||||
}
|
||||
s.append(String.format(" demotion=%d", demotionPerAdditionalDesiredLocale));
|
||||
return s.append('}').toString();
|
||||
}
|
||||
|
||||
/** Return the inverse of the distance: that is, 1-distance(desired, supported) */
|
||||
|
@ -25,7 +25,7 @@ import com.ibm.icu.impl.ICUResourceBundle;
|
||||
import com.ibm.icu.impl.Relation;
|
||||
import com.ibm.icu.impl.Row;
|
||||
import com.ibm.icu.impl.Row.R3;
|
||||
import com.ibm.icu.impl.locale.XLocaleDistance.DistanceOption;
|
||||
import com.ibm.icu.impl.locale.LocaleDistance.DistanceOption;
|
||||
import com.ibm.icu.impl.locale.XLocaleMatcher;
|
||||
import com.ibm.icu.impl.locale.XLocaleMatcher.Builder;
|
||||
|
||||
|
@ -4,9 +4,8 @@ package com.ibm.icu.dev.test.util;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Set;
|
||||
|
||||
import org.junit.Ignore;
|
||||
@ -15,17 +14,15 @@ import org.junit.runner.RunWith;
|
||||
import org.junit.runners.JUnit4;
|
||||
|
||||
import com.ibm.icu.dev.test.TestFmwk;
|
||||
import com.ibm.icu.impl.locale.XLikelySubtags.LSR;
|
||||
import com.ibm.icu.impl.locale.XLocaleDistance;
|
||||
import com.ibm.icu.impl.locale.XLocaleDistance.DistanceNode;
|
||||
import com.ibm.icu.impl.locale.XLocaleDistance.DistanceOption;
|
||||
import com.ibm.icu.impl.locale.XLocaleDistance.DistanceTable;
|
||||
import com.ibm.icu.impl.locale.LocaleDistance;
|
||||
import com.ibm.icu.impl.locale.LocaleDistance.DistanceOption;
|
||||
import com.ibm.icu.util.LocaleMatcher;
|
||||
import com.ibm.icu.util.Output;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
|
||||
/**
|
||||
* Test the XLocaleDistance.
|
||||
* Test the LocaleDistance.
|
||||
* TODO: Rename to LocaleDistanceTest.
|
||||
*
|
||||
* @author markdavis
|
||||
*/
|
||||
@ -33,9 +30,7 @@ import com.ibm.icu.util.ULocale;
|
||||
public class XLocaleDistanceTest extends TestFmwk {
|
||||
private static final boolean REFORMAT = false; // set to true to get a reformatted data file listed
|
||||
|
||||
public static final int FAIL = XLocaleDistance.ABOVE_THRESHOLD;
|
||||
|
||||
private XLocaleDistance localeMatcher = XLocaleDistance.getDefault();
|
||||
private LocaleDistance localeDistance = LocaleDistance.INSTANCE;
|
||||
DataDrivenTestHelper tfh = new MyTestFileHandler()
|
||||
.setFramework(this)
|
||||
.load(XLocaleDistanceTest.class, "data/localeDistanceTest.txt");
|
||||
@ -58,7 +53,7 @@ public class XLocaleDistanceTest extends TestFmwk {
|
||||
@Ignore("Disabled because of Linux; need to investigate.")
|
||||
@Test
|
||||
public void testTiming() {
|
||||
List<Arguments> testArgs = new ArrayList<Arguments>();
|
||||
List<Arguments> testArgs = new ArrayList<>();
|
||||
for (List<String> line : tfh.getLines()) {
|
||||
if (tfh.isTestLine(line)) {
|
||||
testArgs.add(new Arguments(line));
|
||||
@ -94,13 +89,13 @@ public class XLocaleDistanceTest extends TestFmwk {
|
||||
oldTimeMinusLikely += System.nanoTime()-temp;
|
||||
|
||||
temp = System.nanoTime();
|
||||
final LSR desiredLSR = LSR.fromMaximalized(desired);
|
||||
final LSR supportedLSR = LSR.fromMaximalized(supported);
|
||||
// final LSR desiredLSR = LSR.maximizedFrom(desired);
|
||||
// final LSR supportedLSR = LSR.maximizedFrom(supported);
|
||||
newLikelyTime += System.nanoTime()-temp;
|
||||
|
||||
temp = System.nanoTime();
|
||||
int dist1 = localeMatcher.distanceRaw(desiredLSR, supportedLSR, 1000, DistanceOption.REGION_FIRST);
|
||||
int dist2 = localeMatcher.distanceRaw(supportedLSR, desiredLSR, 1000, DistanceOption.REGION_FIRST);
|
||||
int dist1 = localeDistance.testOnlyDistance(desired, supported, 1000, DistanceOption.REGION_FIRST);
|
||||
int dist2 = localeDistance.testOnlyDistance(supported, desired, 1000, DistanceOption.REGION_FIRST);
|
||||
newTimeMinusLikely += System.nanoTime()-temp;
|
||||
}
|
||||
}
|
||||
@ -118,52 +113,53 @@ public class XLocaleDistanceTest extends TestFmwk {
|
||||
}
|
||||
|
||||
@Test
|
||||
@SuppressWarnings("deprecation")
|
||||
public void testInternalTable() {
|
||||
checkTables(localeMatcher.internalGetDistanceTable(), "", 1);
|
||||
}
|
||||
|
||||
@SuppressWarnings("deprecation")
|
||||
private void checkTables(DistanceTable internalGetDistanceTable, String title, int depth) {
|
||||
// Check that ANY, ANY is always present, and that the table has a depth of exactly 3 everyplace.
|
||||
Map<String, Set<String>> matches = internalGetDistanceTable.getInternalMatches();
|
||||
|
||||
// must have ANY,ANY
|
||||
boolean haveANYANY = false;
|
||||
for (Entry<String, Set<String>> entry : matches.entrySet()) {
|
||||
String first = entry.getKey();
|
||||
boolean haveANYfirst = first.equals(XLocaleDistance.ANY);
|
||||
for (String second : entry.getValue()) {
|
||||
haveANYANY |= haveANYfirst && second.equals(XLocaleDistance.ANY);
|
||||
DistanceNode distanceNode = internalGetDistanceTable.getInternalNode(first, second);
|
||||
DistanceTable subDistanceTable = distanceNode.getDistanceTable();
|
||||
if (subDistanceTable == null || subDistanceTable.isEmpty()) {
|
||||
if (depth != 3) {
|
||||
logln("depth should be 3");
|
||||
}
|
||||
if (distanceNode.getClass() != DistanceNode.class) {
|
||||
logln("should be plain DistanceNode");
|
||||
}
|
||||
} else {
|
||||
if (depth >= 3) {
|
||||
logln("depth should be ≤ 3");
|
||||
}
|
||||
if (distanceNode.getClass() == DistanceNode.class) {
|
||||
logln("should NOT be plain DistanceNode");
|
||||
}
|
||||
checkTables(subDistanceTable, first + "," + second + ",", depth+1);
|
||||
Set<String> strings = localeDistance.testOnlyGetDistanceTable(false).keySet();
|
||||
// Check that the table has a depth of exactly 3 (desired, supported) pairs everyplace
|
||||
// by removing every prefix of a 6-subtag string from a copy of the set of strings.
|
||||
// Any remaining string is not a prefix of a full-depth string.
|
||||
Set<String> remaining = new HashSet<>(strings);
|
||||
// Check that ANY, ANY is always present.
|
||||
assertTrue("*-*", strings.contains("*-*"));
|
||||
for (String s : strings) {
|
||||
int num = countSubtags(s);
|
||||
assertTrue(s, 1 <= num && num <= 6);
|
||||
if (num > 1) {
|
||||
String oneShorter = removeLastSubtag(s);
|
||||
assertTrue(oneShorter, strings.contains(oneShorter));
|
||||
}
|
||||
if (num == 2 || num == 4) {
|
||||
String sPlusAnyAny = s + "-*-*";
|
||||
assertTrue(sPlusAnyAny, strings.contains(sPlusAnyAny));
|
||||
} else if (num == 6) {
|
||||
for (;; --num) {
|
||||
remaining.remove(s);
|
||||
if (num == 1) { break; }
|
||||
s = removeLastSubtag(s);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!haveANYANY) {
|
||||
logln("ANY-ANY not in" + matches);
|
||||
assertTrue("strings that do not lead to 6-subtag matches", remaining.isEmpty());
|
||||
}
|
||||
|
||||
private static final int countSubtags(String s) {
|
||||
if (s.isEmpty()) { return 0; }
|
||||
int num = 1;
|
||||
for (int pos = 0; (pos = s.indexOf('-', pos)) >= 0; ++pos) {
|
||||
++num;
|
||||
}
|
||||
return num;
|
||||
}
|
||||
|
||||
private static final String removeLastSubtag(String s) {
|
||||
int last = s.lastIndexOf('-');
|
||||
return s.substring(0, last);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testShowDistanceTable() {
|
||||
if (isVerbose()) {
|
||||
System.out.println(XLocaleDistance.getDefault().toString(false));
|
||||
localeDistance.testOnlyPrintDistanceTable();
|
||||
}
|
||||
}
|
||||
|
||||
@ -176,10 +172,9 @@ public class XLocaleDistanceTest extends TestFmwk {
|
||||
}
|
||||
|
||||
class MyTestFileHandler extends DataDrivenTestHelper {
|
||||
final XLocaleDistance distance = XLocaleDistance.getDefault();
|
||||
Output<ULocale> bestDesired = new Output<ULocale>();
|
||||
Output<ULocale> bestDesired = new Output<>();
|
||||
private DistanceOption distanceOption = DistanceOption.REGION_FIRST;
|
||||
private Integer threshold = distance.getDefaultScriptDistance();
|
||||
private Integer threshold = localeDistance.getDefaultScriptDistance();
|
||||
|
||||
@Override
|
||||
public void handle(int lineNumber, boolean breakpoint, String commentBase, List<String> arguments) {
|
||||
@ -187,8 +182,8 @@ public class XLocaleDistanceTest extends TestFmwk {
|
||||
breakpoint = false; // put debugger breakpoint here to break at @debug in test file
|
||||
}
|
||||
Arguments args = new Arguments(arguments);
|
||||
int supportedToDesiredActual = distance.distance(args.supported, args.desired, threshold, distanceOption);
|
||||
int desiredToSupportedActual = distance.distance(args.desired, args.supported, threshold, distanceOption);
|
||||
int supportedToDesiredActual = localeDistance.testOnlyDistance(args.supported, args.desired, threshold, distanceOption);
|
||||
int desiredToSupportedActual = localeDistance.testOnlyDistance(args.desired, args.supported, threshold, distanceOption);
|
||||
String desiredTag = args.desired.toLanguageTag();
|
||||
String supportedTag = args.supported.toLanguageTag();
|
||||
final String comment = commentBase.isEmpty() ? "" : "\t# " + commentBase;
|
||||
|
@ -2,7 +2,6 @@
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.dev.test.util;
|
||||
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
@ -16,9 +15,9 @@ import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
|
||||
import com.ibm.icu.dev.test.TestFmwk;
|
||||
import com.ibm.icu.impl.locale.LocaleDistance;
|
||||
import com.ibm.icu.impl.locale.LocaleDistance.DistanceOption;
|
||||
import com.ibm.icu.impl.locale.XCldrStub.FileUtilities;
|
||||
import com.ibm.icu.impl.locale.XLocaleDistance;
|
||||
import com.ibm.icu.impl.locale.XLocaleDistance.DistanceOption;
|
||||
import com.ibm.icu.impl.locale.XLocaleMatcher;
|
||||
import com.ibm.icu.util.LocaleMatcher;
|
||||
import com.ibm.icu.util.LocalePriorityList;
|
||||
@ -37,7 +36,7 @@ import junitparams.Parameters;
|
||||
public class XLocaleMatcherTest extends TestFmwk {
|
||||
private static final int REGION_DISTANCE = 4;
|
||||
|
||||
private static final XLocaleDistance LANGUAGE_MATCHER_DATA = XLocaleDistance.getDefault();
|
||||
private static final LocaleDistance LANGUAGE_MATCHER_DATA = LocaleDistance.INSTANCE;
|
||||
|
||||
private XLocaleMatcher newXLocaleMatcher() {
|
||||
return new XLocaleMatcher("");
|
||||
@ -176,17 +175,102 @@ public class XLocaleMatcherTest extends TestFmwk {
|
||||
}
|
||||
}
|
||||
|
||||
private static final class PerfCase {
|
||||
ULocale desired;
|
||||
ULocale expectedShort;
|
||||
ULocale expectedLong;
|
||||
ULocale expectedVeryLong;
|
||||
|
||||
PerfCase(String des, String expShort, String expLong, String expVeryLong) {
|
||||
desired = new ULocale(des);
|
||||
expectedShort = new ULocale(expShort);
|
||||
expectedLong = new ULocale(expLong);
|
||||
expectedVeryLong = new ULocale(expVeryLong);
|
||||
}
|
||||
}
|
||||
|
||||
private static final int WARM_UP_ITERATIONS = 1000;
|
||||
private static final int BENCHMARK_ITERATIONS = 20000;
|
||||
private static final int AVG_PCT_MEDIUM_NEW_OLD = 33;
|
||||
private static final int AVG_PCT_LONG_NEW_OLD = 80;
|
||||
|
||||
@Test
|
||||
public void testPerf() {
|
||||
if (LANGUAGE_MATCHER_DATA == null) {
|
||||
return; // skip except when testing data
|
||||
}
|
||||
final ULocale desired = new ULocale("sv");
|
||||
|
||||
final String shortList = "en, sv";
|
||||
final String longList = "af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, zh-CN, zh-TW, zu";
|
||||
final String veryLongList = "af, af_NA, af_ZA, agq, agq_CM, ak, ak_GH, am, am_ET, ar, ar_001, ar_AE, ar_BH, ar_DJ, ar_DZ, ar_EG, ar_EH, ar_ER, ar_IL, ar_IQ, ar_JO, ar_KM, ar_KW, ar_LB, ar_LY, ar_MA, ar_MR, ar_OM, ar_PS, ar_QA, ar_SA, ar_SD, ar_SO, ar_SS, ar_SY, ar_TD, ar_TN, ar_YE, as, as_IN, asa, asa_TZ, ast, ast_ES, az, az_Cyrl, az_Cyrl_AZ, az_Latn, az_Latn_AZ, bas, bas_CM, be, be_BY, bem, bem_ZM, bez, bez_TZ, bg, bg_BG, bm, bm_ML, bn, bn_BD, bn_IN, bo, bo_CN, bo_IN, br, br_FR, brx, brx_IN, bs, bs_Cyrl, bs_Cyrl_BA, bs_Latn, bs_Latn_BA, ca, ca_AD, ca_ES, ca_ES_VALENCIA, ca_FR, ca_IT, ce, ce_RU, cgg, cgg_UG, chr, chr_US, ckb, ckb_IQ, ckb_IR, cs, cs_CZ, cu, cu_RU, cy, cy_GB, da, da_DK, da_GL, dav, dav_KE, de, de_AT, de_BE, de_CH, de_DE, de_LI, de_LU, dje, dje_NE, dsb, dsb_DE, dua, dua_CM, dyo, dyo_SN, dz, dz_BT, ebu, ebu_KE, ee, ee_GH, ee_TG, el, el_CY, el_GR, en, en_001, en_150, en_AG, en_AI, en_AS, en_AT, en_AU, en_BB, en_BE, en_BI, en_BM, en_BS, en_BW, en_BZ, en_CA, en_CC, en_CH, en_CK, en_CM, en_CX, en_CY, en_DE, en_DG, en_DK, en_DM, en_ER, en_FI, en_FJ, en_FK, en_FM, en_GB, en_GD, en_GG, en_GH, en_GI, en_GM, en_GU, en_GY, en_HK, en_IE, en_IL, en_IM, en_IN, en_IO, en_JE, en_JM, en_KE, en_KI, en_KN, en_KY, en_LC, en_LR, en_LS, en_MG, en_MH, en_MO, en_MP, en_MS, en_MT, en_MU, en_MW, en_MY, en_NA, en_NF, en_NG, en_NL, en_NR, en_NU, en_NZ, en_PG, en_PH, en_PK, en_PN, en_PR, en_PW, en_RW, en_SB, en_SC, en_SD, en_SE, en_SG, en_SH, en_SI, en_SL, en_SS, en_SX, en_SZ, en_TC, en_TK, en_TO, en_TT, en_TV, en_TZ, en_UG, en_UM, en_US, en_US_POSIX, en_VC, en_VG, en_VI, en_VU, en_WS, en_ZA, en_ZM, en_ZW, eo, eo_001, es, es_419, es_AR, es_BO, es_CL, es_CO, es_CR, es_CU, es_DO, es_EA, es_EC, es_ES, es_GQ, es_GT, es_HN, es_IC, es_MX, es_NI, es_PA, es_PE, es_PH, es_PR, es_PY, es_SV, es_US, es_UY, es_VE, et, et_EE, eu, eu_ES, ewo, ewo_CM, fa, fa_AF, fa_IR, ff, ff_CM, ff_GN, ff_MR, ff_SN, fi, fi_FI, fil, fil_PH, fo, fo_DK, fo_FO, fr, fr_BE, fr_BF, fr_BI, fr_BJ, fr_BL, fr_CA, fr_CD, fr_CF, fr_CG, fr_CH, fr_CI, fr_CM, fr_DJ, fr_DZ, fr_FR, fr_GA, fr_GF, fr_GN, fr_GP, fr_GQ, fr_HT, fr_KM, fr_LU, fr_MA, fr_MC, fr_MF, fr_MG, fr_ML, fr_MQ, fr_MR, fr_MU, fr_NC, fr_NE, fr_PF, fr_PM, fr_RE, fr_RW, fr_SC, fr_SN, fr_SY, fr_TD, fr_TG, fr_TN, fr_VU, fr_WF, fr_YT, fur, fur_IT, fy, fy_NL, ga, ga_IE, gd, gd_GB, gl, gl_ES, gsw, gsw_CH, gsw_FR, gsw_LI, gu, gu_IN, guz, guz_KE, gv, gv_IM, ha, ha_GH, ha_NE, ha_NG, haw, haw_US, he, he_IL, hi, hi_IN, hr, hr_BA, hr_HR, hsb, hsb_DE, hu, hu_HU, hy, hy_AM, id, id_ID, ig, ig_NG, ii, ii_CN, is, is_IS, it, it_CH, it_IT, it_SM, ja, ja_JP, jgo, jgo_CM, jmc, jmc_TZ, ka, ka_GE, kab, kab_DZ, kam, kam_KE, kde, kde_TZ, kea, kea_CV, khq, khq_ML, ki, ki_KE, kk, kk_KZ, kkj, kkj_CM, kl, kl_GL, kln, kln_KE, km, km_KH, kn, kn_IN, ko, ko_KP, ko_KR, kok, kok_IN, ks, ks_IN, ksb, ksb_TZ, ksf, ksf_CM, ksh, ksh_DE, kw, kw_GB, ky, ky_KG, lag, lag_TZ, lb, lb_LU, lg, lg_UG, lkt, lkt_US, ln, ln_AO, ln_CD, ln_CF, ln_CG, lo, lo_LA, lrc, lrc_IQ, lrc_IR, lt, lt_LT, lu, lu_CD, luo, luo_KE, luy, luy_KE, lv, lv_LV, mas, mas_KE, mas_TZ, mer, mer_KE, mfe, mfe_MU, mg, mg_MG, mgh, mgh_MZ, mgo, mgo_CM, mk, mk_MK, ml, ml_IN, mn, mn_MN, mr, mr_IN, ms, ms_BN, ms_MY, ms_SG, mt, mt_MT, mua, mua_CM, my, my_MM, mzn, mzn_IR, naq, naq_NA, nb, nb_NO, nb_SJ, nd, nd_ZW, ne, ne_IN, ne_NP, nl, nl_AW, nl_BE, nl_BQ, nl_CW, nl_NL, nl_SR, nl_SX, nmg, nmg_CM, nn, nn_NO, nnh, nnh_CM, nus, nus_SS, nyn, nyn_UG, om, om_ET, om_KE, or, or_IN, os, os_GE, os_RU, pa, pa_Arab, pa_Arab_PK, pa_Guru, pa_Guru_IN, pl, pl_PL, prg, prg_001, ps, ps_AF, pt, pt_AO, pt_BR, pt_CV, pt_GW, pt_MO, pt_MZ, pt_PT, pt_ST, pt_TL, qu, qu_BO, qu_EC, qu_PE, rm, rm_CH, rn, rn_BI, ro, ro_MD, ro_RO, rof, rof_TZ, root, ru, ru_BY, ru_KG, ru_KZ, ru_MD, ru_RU, ru_UA, rw, rw_RW, rwk, rwk_TZ, sah, sah_RU, saq, saq_KE, sbp, sbp_TZ, se, se_FI, se_NO, se_SE, seh, seh_MZ, ses, ses_ML, sg, sg_CF, shi, shi_Latn, shi_Latn_MA, shi_Tfng, shi_Tfng_MA, si, si_LK, sk, sk_SK, sl, sl_SI, smn, smn_FI, sn, sn_ZW, so, so_DJ, so_ET, so_KE, so_SO, sq, sq_AL, sq_MK, sq_XK, sr, sr_Cyrl, sr_Cyrl_BA, sr_Cyrl_ME, sr_Cyrl_RS, sr_Cyrl_XK, sr_Latn, sr_Latn_BA, sr_Latn_ME, sr_Latn_RS, sr_Latn_XK, sv, sv_AX, sv_FI, sv_SE, sw, sw_CD, sw_KE, sw_TZ, sw_UG, ta, ta_IN, ta_LK, ta_MY, ta_SG, te, te_IN, teo, teo_KE, teo_UG, th, th_TH, ti, ti_ER, ti_ET, tk, tk_TM, to, to_TO, tr, tr_CY, tr_TR, twq, twq_NE, tzm, tzm_MA, ug, ug_CN, uk, uk_UA, ur, ur_IN, ur_PK, uz, uz_Arab, uz_Arab_AF, uz_Cyrl, uz_Cyrl_UZ, uz_Latn, uz_Latn_UZ, vai, vai_Latn, vai_Latn_LR, vai_Vaii, vai_Vaii_LR, vi, vi_VN, vo, vo_001, vun, vun_TZ, wae, wae_CH, xog, xog_UG, yav, yav_CM, yi, yi_001, yo, yo_BJ, yo_NG, zgh, zgh_MA, zh, zh_Hans, zh_Hans_CN, zh_Hans_HK, zh_Hans_MO, zh_Hans_SG, zh_Hant, zh_Hant_HK, zh_Hant_MO, zh_Hant_TW, zu, zu_ZA";
|
||||
final String longList = "af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, " +
|
||||
"el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, " +
|
||||
"hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, " +
|
||||
"mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, " +
|
||||
"si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, " +
|
||||
"zh-CN, zh-TW, zu";
|
||||
final String veryLongList = "af, af_NA, af_ZA, agq, agq_CM, ak, ak_GH, am, am_ET, " +
|
||||
"ar, ar_001, ar_AE, ar_BH, ar_DJ, ar_DZ, ar_EG, ar_EH, ar_ER, ar_IL, ar_IQ, " +
|
||||
"ar_JO, ar_KM, ar_KW, ar_LB, ar_LY, ar_MA, ar_MR, ar_OM, ar_PS, ar_QA, " +
|
||||
"ar_SA, ar_SD, ar_SO, ar_SS, ar_SY, ar_TD, ar_TN, ar_YE, as, as_IN, asa, asa_TZ, " +
|
||||
"ast, ast_ES, az, az_Cyrl, az_Cyrl_AZ, az_Latn, az_Latn_AZ, " +
|
||||
"bas, bas_CM, be, be_BY, bem, bem_ZM, bez, bez_TZ, bg, bg_BG, bm, bm_ML, " +
|
||||
"bn, bn_BD, bn_IN, bo, bo_CN, bo_IN, br, br_FR, brx, brx_IN, " +
|
||||
"bs, bs_Cyrl, bs_Cyrl_BA, bs_Latn, bs_Latn_BA, ca, ca_AD, ca_ES, ca_ES_VALENCIA, " +
|
||||
"ca_FR, ca_IT, ce, ce_RU, cgg, cgg_UG, chr, chr_US, ckb, ckb_IQ, ckb_IR, cs, cs_CZ, " +
|
||||
"cu, cu_RU, cy, cy_GB, da, da_DK, da_GL, dav, dav_KE, de, de_AT, de_BE, de_CH, " +
|
||||
"de_DE, de_LI, de_LU, dje, dje_NE, dsb, dsb_DE, dua, dua_CM, dyo, dyo_SN, dz, dz_BT, " +
|
||||
// removed en_001 to avoid exact match
|
||||
"ebu, ebu_KE, ee, ee_GH, ee_TG, el, el_CY, el_GR, en, en_150, " +
|
||||
"en_AG, en_AI, en_AS, en_AT, en_AU, en_BB, en_BE, en_BI, en_BM, en_BS, en_BW, " +
|
||||
"en_BZ, en_CA, en_CC, en_CH, en_CK, en_CM, en_CX, en_CY, en_DE, en_DG, en_DK, " +
|
||||
"en_DM, en_ER, en_FI, en_FJ, en_FK, en_FM, en_GB, en_GD, en_GG, en_GH, en_GI, " +
|
||||
"en_GM, en_GU, en_GY, en_HK, en_IE, en_IL, en_IM, en_IN, en_IO, en_JE, en_JM, " +
|
||||
"en_KE, en_KI, en_KN, en_KY, en_LC, en_LR, en_LS, en_MG, en_MH, en_MO, en_MP, " +
|
||||
"en_MS, en_MT, en_MU, en_MW, en_MY, en_NA, en_NF, en_NG, en_NL, en_NR, en_NU, " +
|
||||
"en_NZ, en_PG, en_PH, en_PK, en_PN, en_PR, en_PW, en_RW, en_SB, en_SC, en_SD, " +
|
||||
"en_SE, en_SG, en_SH, en_SI, en_SL, en_SS, en_SX, en_SZ, en_TC, en_TK, en_TO, " +
|
||||
"en_TT, en_TV, en_TZ, en_UG, en_UM, en_US, en_US_POSIX, en_VC, en_VG, en_VI, " +
|
||||
"en_VU, en_WS, en_ZA, en_ZM, en_ZW, eo, eo_001, es, es_419, es_AR, es_BO, es_CL, " +
|
||||
"es_CO, es_CR, es_CU, es_DO, es_EA, es_EC, es_ES, es_GQ, es_GT, es_HN, es_IC, " +
|
||||
"es_MX, es_NI, es_PA, es_PE, es_PH, es_PR, es_PY, es_SV, es_US, es_UY, es_VE, " +
|
||||
"et, et_EE, eu, eu_ES, ewo, ewo_CM, fa, fa_AF, fa_IR, ff, ff_CM, ff_GN, ff_MR, " +
|
||||
"ff_SN, fi, fi_FI, fil, fil_PH, fo, fo_DK, fo_FO, fr, fr_BE, fr_BF, fr_BI, fr_BJ, " +
|
||||
"fr_BL, fr_CA, fr_CD, fr_CF, fr_CG, fr_CH, fr_CI, fr_CM, fr_DJ, fr_DZ, " +
|
||||
"fr_FR, fr_GA, fr_GF, fr_GN, fr_GP, fr_GQ, fr_HT, fr_KM, fr_LU, fr_MA, fr_MC, " +
|
||||
"fr_MF, fr_MG, fr_ML, fr_MQ, fr_MR, fr_MU, fr_NC, fr_NE, fr_PF, fr_PM, fr_RE, " +
|
||||
"fr_RW, fr_SC, fr_SN, fr_SY, fr_TD, fr_TG, fr_TN, fr_VU, fr_WF, fr_YT, " +
|
||||
"fur, fur_IT, fy, fy_NL, ga, ga_IE, gd, gd_GB, gl, gl_ES, gsw, gsw_CH, gsw_FR, " +
|
||||
"gsw_LI, gu, gu_IN, guz, guz_KE, gv, gv_IM, ha, ha_GH, ha_NE, ha_NG, haw, haw_US, " +
|
||||
"he, he_IL, hi, hi_IN, hr, hr_BA, hr_HR, hsb, hsb_DE, hu, hu_HU, hy, hy_AM, " +
|
||||
"id, id_ID, ig, ig_NG, ii, ii_CN, is, is_IS, it, it_CH, it_IT, it_SM, ja, ja_JP, " +
|
||||
"jgo, jgo_CM, jmc, jmc_TZ, ka, ka_GE, kab, kab_DZ, kam, kam_KE, kde, kde_TZ, " +
|
||||
"kea, kea_CV, khq, khq_ML, ki, ki_KE, kk, kk_KZ, kkj, kkj_CM, kl, kl_GL, " +
|
||||
"kln, kln_KE, km, km_KH, kn, kn_IN, ko, ko_KP, ko_KR, kok, kok_IN, " +
|
||||
"ks, ks_IN, ksb, ksb_TZ, ksf, ksf_CM, ksh, ksh_DE, kw, kw_GB, ky, ky_KG, " +
|
||||
"lag, lag_TZ, lb, lb_LU, lg, lg_UG, lkt, lkt_US, ln, ln_AO, ln_CD, ln_CF, ln_CG, " +
|
||||
"lo, lo_LA, lrc, lrc_IQ, lrc_IR, lt, lt_LT, lu, lu_CD, luo, luo_KE, luy, luy_KE, " +
|
||||
"lv, lv_LV, mas, mas_KE, mas_TZ, mer, mer_KE, mfe, mfe_MU, mg, mg_MG, " +
|
||||
"mgh, mgh_MZ, mgo, mgo_CM, mk, mk_MK, ml, ml_IN, mn, mn_MN, mr, mr_IN, ms, ms_BN, " +
|
||||
"ms_MY, ms_SG, mt, mt_MT, mua, mua_CM, my, my_MM, mzn, mzn_IR, naq, naq_NA, " +
|
||||
"nb, nb_NO, nb_SJ, nd, nd_ZW, ne, ne_IN, ne_NP, nl, nl_AW, nl_BE, nl_BQ, nl_CW, " +
|
||||
"nl_NL, nl_SR, nl_SX, nmg, nmg_CM, nn, nn_NO, nnh, nnh_CM, nus, nus_SS, nyn, " +
|
||||
"nyn_UG, om, om_ET, om_KE, or, or_IN, os, os_GE, os_RU, pa, pa_Arab, pa_Arab_PK, " +
|
||||
"pa_Guru, pa_Guru_IN, pl, pl_PL, prg, prg_001, ps, ps_AF, pt, pt_AO, pt_BR, " +
|
||||
"pt_CV, pt_GW, pt_MO, pt_MZ, pt_PT, pt_ST, pt_TL, qu, qu_BO, qu_EC, qu_PE, rm, " +
|
||||
"rm_CH, rn, rn_BI, ro, ro_MD, ro_RO, rof, rof_TZ, root, ru, ru_BY, ru_KG, ru_KZ, " +
|
||||
"ru_MD, ru_RU, ru_UA, rw, rw_RW, rwk, rwk_TZ, sah, sah_RU, saq, saq_KE, sbp, " +
|
||||
"sbp_TZ, se, se_FI, se_NO, se_SE, seh, seh_MZ, ses, ses_ML, sg, sg_CF, shi, " +
|
||||
"shi_Latn, shi_Latn_MA, shi_Tfng, shi_Tfng_MA, si, si_LK, sk, sk_SK, sl, sl_SI, " +
|
||||
"smn, smn_FI, sn, sn_ZW, so, so_DJ, so_ET, so_KE, so_SO, sq, sq_AL, sq_MK, sq_XK, " +
|
||||
"sr, sr_Cyrl, sr_Cyrl_BA, sr_Cyrl_ME, sr_Cyrl_RS, sr_Cyrl_XK, sr_Latn, " +
|
||||
"sr_Latn_BA, sr_Latn_ME, sr_Latn_RS, sr_Latn_XK, sv, sv_AX, sv_FI, sv_SE, sw, " +
|
||||
"sw_CD, sw_KE, sw_TZ, sw_UG, ta, ta_IN, ta_LK, ta_MY, ta_SG, te, te_IN, teo, " +
|
||||
"teo_KE, teo_UG, th, th_TH, ti, ti_ER, ti_ET, tk, tk_TM, to, to_TO, tr, tr_CY, " +
|
||||
"tr_TR, twq, twq_NE, tzm, tzm_MA, ug, ug_CN, uk, uk_UA, ur, ur_IN, ur_PK, uz, " +
|
||||
"uz_Arab, uz_Arab_AF, uz_Cyrl, uz_Cyrl_UZ, uz_Latn, uz_Latn_UZ, vai, vai_Latn, " +
|
||||
"vai_Latn_LR, vai_Vaii, vai_Vaii_LR, vi, vi_VN, vo, vo_001, vun, vun_TZ, wae, " +
|
||||
"wae_CH, xog, xog_UG, yav, yav_CM, yi, yi_001, yo, yo_BJ, yo_NG, zgh, zgh_MA, " +
|
||||
"zh, zh_Hans, zh_Hans_CN, zh_Hans_HK, zh_Hans_MO, zh_Hans_SG, zh_Hant, " +
|
||||
"zh_Hant_HK, zh_Hant_MO, zh_Hant_TW, zu, zu_ZA";
|
||||
|
||||
final XLocaleMatcher matcherShort = newXLocaleMatcher(shortList);
|
||||
final XLocaleMatcher matcherLong = newXLocaleMatcher(longList);
|
||||
@ -196,62 +280,93 @@ public class XLocaleMatcherTest extends TestFmwk {
|
||||
final LocaleMatcher matcherLongOld = new LocaleMatcher(longList);
|
||||
final LocaleMatcher matcherVeryLongOld = new LocaleMatcher(veryLongList);
|
||||
|
||||
//XLocaleMatcher.DEBUG = true;
|
||||
ULocale expected = new ULocale("sv");
|
||||
assertEquals(expected, matcherShort.getBestMatch(desired));
|
||||
assertEquals(expected, matcherLong.getBestMatch(desired));
|
||||
assertEquals(expected, matcherVeryLong.getBestMatch(desired));
|
||||
//XLocaleMatcher.DEBUG = false;
|
||||
|
||||
long timeShortNew=0;
|
||||
long timeMediumNew=0;
|
||||
long timeLongNew=0;
|
||||
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
int iterations = i == 0 ? 1000 : 1000000;
|
||||
boolean showMessage = i != 0;
|
||||
timeShortNew = timeXLocaleMatcher("Duration (few supported):\t", desired, matcherShort, showMessage, iterations);
|
||||
timeMediumNew = timeXLocaleMatcher("Duration (med. supported):\t", desired, matcherLong, showMessage, iterations);
|
||||
timeLongNew = timeXLocaleMatcher("Duration (many supported):\t", desired, matcherVeryLong, showMessage, iterations);
|
||||
}
|
||||
|
||||
long timeShortOld=0;
|
||||
long timeMediumOld=0;
|
||||
long timeLongOld=0;
|
||||
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
int iterations = i == 0 ? 1000 : 100000;
|
||||
boolean showMessage = i != 0;
|
||||
timeShortOld = timeLocaleMatcher("Old Duration (few supported):\t", desired, matcherShortOld, showMessage, iterations);
|
||||
timeMediumOld = timeLocaleMatcher("Old Duration (med. supported):\t", desired, matcherLongOld, showMessage, iterations);
|
||||
timeLongOld = timeLocaleMatcher("Old Duration (many supported):\t", desired, matcherVeryLongOld, showMessage, iterations);
|
||||
PerfCase[] pcs = new PerfCase[] {
|
||||
// Exact match in all matchers.
|
||||
new PerfCase("sv", "sv", "sv", "sv"),
|
||||
// Common locale, exact match only in very long list.
|
||||
new PerfCase("fr_CA", "en", "fr", "fr_CA"),
|
||||
// Unusual locale, no exact match.
|
||||
new PerfCase("de_CA", "en", "de", "de"),
|
||||
// World English maps to several region partitions.
|
||||
new PerfCase("en_001", "en", "en", "en"),
|
||||
// Ancient language with interesting subtags.
|
||||
new PerfCase("egy_Copt_CY", "en", "af", "af")
|
||||
};
|
||||
|
||||
for (PerfCase pc : pcs) {
|
||||
final ULocale desired = pc.desired;
|
||||
|
||||
assertEquals(pc.expectedShort, matcherShort.getBestMatch(desired));
|
||||
assertEquals(pc.expectedLong, matcherLong.getBestMatch(desired));
|
||||
assertEquals(pc.expectedVeryLong, matcherVeryLong.getBestMatch(desired));
|
||||
|
||||
timeXLocaleMatcher(desired, matcherShort, WARM_UP_ITERATIONS);
|
||||
timeXLocaleMatcher(desired, matcherLong, WARM_UP_ITERATIONS);
|
||||
timeXLocaleMatcher(desired, matcherVeryLong, WARM_UP_ITERATIONS);
|
||||
long tns = timeXLocaleMatcher(desired, matcherShort, BENCHMARK_ITERATIONS);
|
||||
System.out.format("New Duration (few supported):\t%s\t%d\tnanos\n", desired, tns);
|
||||
timeShortNew += tns;
|
||||
long tnl = timeXLocaleMatcher(desired, matcherLong, BENCHMARK_ITERATIONS);
|
||||
System.out.format("New Duration (med. supported):\t%s\t%d\tnanos\n", desired, tnl);
|
||||
timeMediumNew += tnl;
|
||||
long tnv = timeXLocaleMatcher(desired, matcherVeryLong, BENCHMARK_ITERATIONS);
|
||||
System.out.format("New Duration (many supported):\t%s\t%d\tnanos\n", desired, tnv);
|
||||
timeLongNew += tnv;
|
||||
|
||||
timeLocaleMatcher(desired, matcherShortOld, WARM_UP_ITERATIONS);
|
||||
timeLocaleMatcher(desired, matcherLongOld, WARM_UP_ITERATIONS);
|
||||
timeLocaleMatcher(desired, matcherVeryLongOld, WARM_UP_ITERATIONS);
|
||||
long tos = timeLocaleMatcher(desired, matcherShortOld, BENCHMARK_ITERATIONS);
|
||||
System.out.format("Old Duration (few supported):\t%s\t%d\tnanos new/old=%d%%\n",
|
||||
desired, tos, (100 * tns) / tos);
|
||||
timeShortOld += tos;
|
||||
long tol = timeLocaleMatcher(desired, matcherLongOld, BENCHMARK_ITERATIONS);
|
||||
System.out.format("Old Duration (med. supported):\t%s\t%d\tnanos new/old=%d%%\n",
|
||||
desired, tol, (100 * tnl) / tol);
|
||||
timeMediumOld += tol;
|
||||
long tov = timeLocaleMatcher(desired, matcherVeryLongOld, BENCHMARK_ITERATIONS);
|
||||
System.out.format("Old Duration (many supported):\t%s\t%d\tnanos new/old=%d%%\n",
|
||||
desired, tov, (100 * tnv) / tov);
|
||||
timeLongOld += tov;
|
||||
}
|
||||
|
||||
assertTrue("timeShortNew (=" + timeShortNew + ") < 25% of timeShortOld (=" + timeShortOld + ")", timeShortNew * 4 < timeShortOld);
|
||||
assertTrue("timeMediumNew (=" + timeMediumNew + ") < 25% of timeMediumOld (=" + timeMediumOld + ")", timeMediumNew * 4 < timeMediumOld);
|
||||
assertTrue("timeLongNew (=" + timeLongNew + ") < 25% of timeLongOld (=" + timeLongOld + ")", timeLongNew * 4 < timeLongOld);
|
||||
|
||||
assertTrue(
|
||||
String.format("timeShortNew=%d < %d%% of timeShortOld=%d",
|
||||
timeShortNew, AVG_PCT_MEDIUM_NEW_OLD, timeShortOld),
|
||||
timeShortNew * 100 < timeShortOld * AVG_PCT_MEDIUM_NEW_OLD);
|
||||
assertTrue(
|
||||
String.format("timeMediumNew=%d < %d%% of timeMediumOld=%d",
|
||||
timeMediumNew, AVG_PCT_MEDIUM_NEW_OLD, timeMediumOld),
|
||||
timeMediumNew * 100 < timeMediumOld * AVG_PCT_MEDIUM_NEW_OLD);
|
||||
assertTrue(
|
||||
String.format("timeLongNew=%d < %d%% of timeLongOld=%d",
|
||||
timeLongNew, AVG_PCT_LONG_NEW_OLD, timeLongOld),
|
||||
timeLongNew * 100 < timeLongOld * AVG_PCT_LONG_NEW_OLD);
|
||||
}
|
||||
|
||||
private long timeXLocaleMatcher(String title, ULocale desired, XLocaleMatcher matcher,
|
||||
boolean showmessage, int iterations) {
|
||||
private long timeXLocaleMatcher(ULocale desired, XLocaleMatcher matcher, int iterations) {
|
||||
long start = System.nanoTime();
|
||||
for (int i = iterations; i > 0; --i) {
|
||||
matcher.getBestMatch(desired);
|
||||
}
|
||||
long delta = System.nanoTime() - start;
|
||||
if (showmessage) logln(title + (delta / iterations) + " nanos");
|
||||
return (delta / iterations);
|
||||
}
|
||||
|
||||
private long timeLocaleMatcher(String title, ULocale desired, LocaleMatcher matcher,
|
||||
boolean showmessage, int iterations) {
|
||||
private long timeLocaleMatcher(ULocale desired, LocaleMatcher matcher, int iterations) {
|
||||
long start = System.nanoTime();
|
||||
for (int i = iterations; i > 0; --i) {
|
||||
matcher.getBestMatch(desired);
|
||||
}
|
||||
long delta = System.nanoTime() - start;
|
||||
if (showmessage) logln(title + (delta / iterations) + " nanos");
|
||||
return (delta / iterations);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user