ICU-11544 Made the main changes to bring it up to snuff with the CLDR data.
X-SVN-Rev: 37915
This commit is contained in:
parent
cd4634345e
commit
d81ee368f0
icu4j/main
classes/core/src/com/ibm/icu
tests
core/src/com/ibm/icu/dev/test
framework/src/com/ibm/icu/dev/util
@ -1,12 +1,12 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2002-2014, International Business Machines
|
||||
* Copyright (c) 2002-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Author: Mark Davis
|
||||
**********************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.util;
|
||||
package com.ibm.icu.impl;
|
||||
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.util.Arrays;
|
||||
@ -28,7 +28,7 @@ import com.ibm.icu.util.Freezable;
|
||||
* @author medavis
|
||||
|
||||
*/
|
||||
public class Relation<K, V> implements Freezable { // TODO: add , Map<K, Collection<V>>, but requires API changes
|
||||
public class Relation<K, V> implements Freezable<Relation<K,V>> { // TODO: add , Map<K, Collection<V>>, but requires API changes
|
||||
private Map<K, Set<V>> data;
|
||||
|
||||
Constructor<Set<V>> setCreator;
|
||||
@ -275,7 +275,7 @@ public class Relation<K, V> implements Freezable { // TODO: add , Map<K, Collect
|
||||
return frozen;
|
||||
}
|
||||
|
||||
public Object freeze() {
|
||||
public Relation<K, V> freeze() {
|
||||
if (!frozen) {
|
||||
// does not handle one level down, so we do that on a case-by-case basis
|
||||
for (K key : data.keySet()) {
|
||||
@ -288,7 +288,7 @@ public class Relation<K, V> implements Freezable { // TODO: add , Map<K, Collect
|
||||
return this;
|
||||
}
|
||||
|
||||
public Object cloneAsThawed() {
|
||||
public Relation<K, V> cloneAsThawed() {
|
||||
// TODO do later
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
@ -7,16 +7,19 @@
|
||||
package com.ibm.icu.util;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import com.ibm.icu.impl.ICUResourceBundle;
|
||||
import com.ibm.icu.impl.Relation;
|
||||
import com.ibm.icu.impl.Row;
|
||||
import com.ibm.icu.impl.Row.R2;
|
||||
import com.ibm.icu.impl.Row.R3;
|
||||
import com.ibm.icu.impl.Utility;
|
||||
|
||||
@ -45,8 +48,8 @@ import com.ibm.icu.impl.Utility;
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
public class LocaleMatcher {
|
||||
|
||||
private static boolean DEBUG = false;
|
||||
|
||||
public static final boolean DEBUG = false;
|
||||
|
||||
private static final ULocale UNKNOWN_LOCALE = new ULocale("und");
|
||||
|
||||
@ -112,10 +115,11 @@ public class LocaleMatcher {
|
||||
*/
|
||||
@Deprecated
|
||||
public LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData, double threshold) {
|
||||
this.matcherData = matcherData == null ? defaultWritten : matcherData;
|
||||
this.matcherData = matcherData == null ? defaultWritten : matcherData.freeze();
|
||||
for (final ULocale language : languagePriorityList) {
|
||||
add(language, languagePriorityList.getWeight(language));
|
||||
}
|
||||
processMapping();
|
||||
Iterator<ULocale> it = languagePriorityList.iterator();
|
||||
defaultLanguage = it.hasNext() ? it.next() : null;
|
||||
this.threshold = threshold;
|
||||
@ -157,10 +161,10 @@ public class LocaleMatcher {
|
||||
String region2 = canonicalMap.get(region);
|
||||
if (lang2 != null || script2 != null || region2 != null) {
|
||||
return new ULocale(
|
||||
lang2 == null ? lang : lang2,
|
||||
script2 == null ? script : script2,
|
||||
region2 == null ? region : region2
|
||||
);
|
||||
lang2 == null ? lang : lang2,
|
||||
script2 == null ? script : script2,
|
||||
region2 == null ? region : region2
|
||||
);
|
||||
}
|
||||
return ulocale;
|
||||
}
|
||||
@ -175,13 +179,16 @@ public class LocaleMatcher {
|
||||
public ULocale getBestMatch(LocalePriorityList languageList) {
|
||||
double bestWeight = 0;
|
||||
ULocale bestTableMatch = null;
|
||||
double penalty = 0;
|
||||
OutputDouble matchWeight = new OutputDouble();
|
||||
for (final ULocale language : languageList) {
|
||||
final Row.R2<ULocale, Double> matchRow = getBestMatchInternal(language);
|
||||
final double weight = matchRow.get1() * languageList.getWeight(language);
|
||||
final ULocale matchLocale = getBestMatchInternal(language, matchWeight);
|
||||
final double weight = matchWeight.value * languageList.getWeight(language) - penalty;
|
||||
if (weight > bestWeight) {
|
||||
bestWeight = weight;
|
||||
bestTableMatch = matchRow.get0();
|
||||
bestTableMatch = matchLocale;
|
||||
}
|
||||
penalty += 0.07000001;
|
||||
}
|
||||
if (bestWeight < threshold) {
|
||||
bestTableMatch = defaultLanguage;
|
||||
@ -208,7 +215,7 @@ public class LocaleMatcher {
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
public ULocale getBestMatch(ULocale ulocale) {
|
||||
return getBestMatchInternal(ulocale).get0();
|
||||
return getBestMatchInternal(ulocale, null);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -227,7 +234,7 @@ public class LocaleMatcher {
|
||||
@Override
|
||||
public String toString() {
|
||||
return "{" + defaultLanguage + ", "
|
||||
+ maximizedLanguageToWeight + "}";
|
||||
+ localeToMaxLocaleAndWeight + "}";
|
||||
}
|
||||
// ================= Privates =====================
|
||||
|
||||
@ -238,40 +245,93 @@ public class LocaleMatcher {
|
||||
* @return best matching language code and weight (as per
|
||||
* {@link #match(ULocale, ULocale)})
|
||||
*/
|
||||
private Row.R2<ULocale, Double> getBestMatchInternal(ULocale languageCode) {
|
||||
private ULocale getBestMatchInternal(ULocale languageCode, OutputDouble outputWeight) {
|
||||
languageCode = canonicalize(languageCode);
|
||||
final ULocale maximized = addLikelySubtags(languageCode);
|
||||
if (DEBUG) {
|
||||
System.out.println("\n" + languageCode + ";\t" + maximized);
|
||||
System.out.println("\ngetBestMatchInternal: " + languageCode + ";\t" + maximized);
|
||||
}
|
||||
double bestWeight = 0;
|
||||
ULocale bestTableMatch = null;
|
||||
for (final ULocale tableKey : maximizedLanguageToWeight.keySet()) {
|
||||
R2<ULocale, Double> row = maximizedLanguageToWeight.get(tableKey);
|
||||
final double match = match(languageCode, maximized, tableKey, row.get0());
|
||||
if (DEBUG) {
|
||||
System.out.println("\t" + tableKey + ";\t" + row.toString() + ";\t" + match + "\n");
|
||||
}
|
||||
final double weight = match * row.get1();
|
||||
if (weight > bestWeight) {
|
||||
bestWeight = weight;
|
||||
bestTableMatch = tableKey;
|
||||
String baseLanguage = maximized.getLanguage();
|
||||
Set<R3<ULocale, ULocale, Double>> searchTable = desiredLanguageToPossibleLocalesToMaxLocaleToData.get(baseLanguage);
|
||||
if (searchTable != null) { // we preprocessed the table so as to filter by lanugage
|
||||
if (DEBUG) System.out.println("\tSearching: " + searchTable);
|
||||
for (final R3<ULocale, ULocale, Double> tableKeyValue : searchTable) {
|
||||
ULocale tableKey = tableKeyValue.get0();
|
||||
ULocale maxLocale = tableKeyValue.get1();
|
||||
Double matchedWeight = tableKeyValue.get2();
|
||||
final double match = match(languageCode, maximized, tableKey, maxLocale);
|
||||
if (DEBUG) {
|
||||
System.out.println("\t" + tableKeyValue + ";\t" + match + "\n");
|
||||
}
|
||||
final double weight = match * matchedWeight;
|
||||
if (weight > bestWeight) {
|
||||
bestWeight = weight;
|
||||
bestTableMatch = tableKey;
|
||||
if (weight > 0.999d) { // bail on good enough match.
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (bestWeight < threshold) {
|
||||
bestTableMatch = defaultLanguage;
|
||||
}
|
||||
return Row.R2.of(bestTableMatch, bestWeight);
|
||||
if (outputWeight != null) {
|
||||
outputWeight.value = bestWeight; // only return the weight when needed
|
||||
}
|
||||
return bestTableMatch;
|
||||
}
|
||||
|
||||
public static class OutputDouble { // TODO, move to where OutputInt is
|
||||
double value;
|
||||
}
|
||||
|
||||
private void add(ULocale language, Double weight) {
|
||||
language = canonicalize(language);
|
||||
R2<ULocale, Double> row = Row.of(addLikelySubtags(language), weight);
|
||||
maximizedLanguageToWeight.put(language, row);
|
||||
R3<ULocale, ULocale, Double> row = Row.of(language, addLikelySubtags(language), weight);
|
||||
row.freeze();
|
||||
localeToMaxLocaleAndWeight.add(row);
|
||||
}
|
||||
|
||||
Map<ULocale,Row.R2<ULocale, Double>> maximizedLanguageToWeight = new LinkedHashMap<ULocale, R2<ULocale, Double>>();
|
||||
/**
|
||||
* We preprocess the data to get just the possible matches for each desired base language.
|
||||
*/
|
||||
private void processMapping() {
|
||||
for (Entry<String, Set<String>> desiredToMatchingLanguages : matcherData.matchingLanguages().keyValuesSet()) {
|
||||
String desired = desiredToMatchingLanguages.getKey();
|
||||
Set<String> supported = desiredToMatchingLanguages.getValue();
|
||||
for (R3<ULocale, ULocale, Double> localeToMaxAndWeight : localeToMaxLocaleAndWeight) {
|
||||
final ULocale key = localeToMaxAndWeight.get0();
|
||||
String lang = key.getLanguage();
|
||||
if (supported.contains(lang)) {
|
||||
addFiltered(desired, localeToMaxAndWeight);
|
||||
}
|
||||
}
|
||||
}
|
||||
// now put in the values directly, since languages always map to themselves
|
||||
for (R3<ULocale, ULocale, Double> localeToMaxAndWeight : localeToMaxLocaleAndWeight) {
|
||||
final ULocale key = localeToMaxAndWeight.get0();
|
||||
String lang = key.getLanguage();
|
||||
addFiltered(lang, localeToMaxAndWeight);
|
||||
}
|
||||
}
|
||||
|
||||
private void addFiltered(String desired, R3<ULocale, ULocale, Double> localeToMaxAndWeight) {
|
||||
Set<R3<ULocale, ULocale, Double>> map = desiredLanguageToPossibleLocalesToMaxLocaleToData.get(desired);
|
||||
if (map == null) {
|
||||
desiredLanguageToPossibleLocalesToMaxLocaleToData.put(desired, map = new LinkedHashSet<R3<ULocale, ULocale, Double>>());
|
||||
}
|
||||
map.add(localeToMaxAndWeight);
|
||||
if (DEBUG) {
|
||||
System.out.println(desired + ", " + localeToMaxAndWeight);
|
||||
}
|
||||
}
|
||||
|
||||
Set<Row.R3<ULocale, ULocale, Double>> localeToMaxLocaleAndWeight = new LinkedHashSet<Row.R3<ULocale, ULocale, Double>>();
|
||||
Map<String,Set<Row.R3<ULocale, ULocale, Double>>> desiredLanguageToPossibleLocalesToMaxLocaleToData
|
||||
= new LinkedHashMap<String,Set<Row.R3<ULocale, ULocale, Double>>>();
|
||||
|
||||
// =============== Special Mapping Information ==============
|
||||
|
||||
@ -279,11 +339,10 @@ public class LocaleMatcher {
|
||||
* We need to add another method to addLikelySubtags that doesn't return
|
||||
* null, but instead substitutes Zzzz and ZZ if unknown. There are also
|
||||
* a few cases where addLikelySubtags needs to have expanded data, to handle
|
||||
* all deprecated codes, and to update to CLDR 1.6.
|
||||
* all deprecated codes.
|
||||
* @param languageCode
|
||||
* @return "fixed" addLikelySubtags
|
||||
*/
|
||||
// TODO(markdavis): update the above when CLDR 1.6 is final.
|
||||
private ULocale addLikelySubtags(ULocale languageCode) {
|
||||
// max("und") = "en_Latn_US", and since matching is based on maximized tags, the undefined
|
||||
// language would normally match English. But that would produce the counterintuitive results
|
||||
@ -302,11 +361,11 @@ public class LocaleMatcher {
|
||||
final String script = languageCode.getScript();
|
||||
final String region = languageCode.getCountry();
|
||||
return new ULocale((language.length()==0 ? "und"
|
||||
: language)
|
||||
+ "_"
|
||||
+ (script.length()==0 ? "Zzzz" : script)
|
||||
+ "_"
|
||||
+ (region.length()==0 ? "ZZ" : region));
|
||||
: language)
|
||||
+ "_"
|
||||
+ (script.length()==0 ? "Zzzz" : script)
|
||||
+ "_"
|
||||
+ (region.length()==0 ? "ZZ" : region));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
@ -318,9 +377,9 @@ public class LocaleMatcher {
|
||||
private String region;
|
||||
private Level level;
|
||||
static Pattern pattern = Pattern.compile(
|
||||
"([a-z]{1,8}|\\*)"
|
||||
+ "(?:[_-]([A-Z][a-z]{3}|\\*))?"
|
||||
+ "(?:[_-]([A-Z]{2}|[0-9]{3}|\\*))?");
|
||||
"([a-z]{1,8}|\\*)"
|
||||
+ "(?:[_-]([A-Z][a-z]{3}|\\*))?"
|
||||
+ "(?:[_-]([A-Z]{2}|[0-9]{3}|\\*))?");
|
||||
|
||||
public LocalePatternMatcher(String toMatch) {
|
||||
Matcher matcher = pattern.matcher(toMatch);
|
||||
@ -382,29 +441,29 @@ public class LocaleMatcher {
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see java.lang.Object#equals(java.lang.Object)
|
||||
*/
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
LocalePatternMatcher other = (LocalePatternMatcher) obj;
|
||||
return Utility.objectEquals(level, other.level)
|
||||
&& Utility.objectEquals(lang, other.lang)
|
||||
&& Utility.objectEquals(script, other.script)
|
||||
&& Utility.objectEquals(region, other.region);
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see java.lang.Object#hashCode()
|
||||
*/
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return level.ordinal()
|
||||
^ (lang == null ? 0 : lang.hashCode())
|
||||
^ (script == null ? 0 : script.hashCode())
|
||||
^ (region == null ? 0 : region.hashCode());
|
||||
}
|
||||
* @see java.lang.Object#equals(java.lang.Object)
|
||||
*/
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
LocalePatternMatcher other = (LocalePatternMatcher) obj;
|
||||
return Utility.objectEquals(level, other.level)
|
||||
&& Utility.objectEquals(lang, other.lang)
|
||||
&& Utility.objectEquals(script, other.script)
|
||||
&& Utility.objectEquals(region, other.region);
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see java.lang.Object#hashCode()
|
||||
*/
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return level.ordinal()
|
||||
^ (lang == null ? 0 : lang.hashCode())
|
||||
^ (script == null ? 0 : script.hashCode())
|
||||
^ (region == null ? 0 : region.hashCode());
|
||||
}
|
||||
}
|
||||
|
||||
enum Level {
|
||||
@ -443,75 +502,19 @@ public class LocaleMatcher {
|
||||
// lang_result.put(supported, result = new LinkedHashSet());
|
||||
// }
|
||||
// result.add(data);
|
||||
boolean added = scores.add(data);
|
||||
if (!added) {
|
||||
throw new ICUException("trying to add duplicate data: " + data);
|
||||
}
|
||||
boolean added = scores.add(data);
|
||||
if (!added) {
|
||||
throw new ICUException("trying to add duplicate data: " + data);
|
||||
}
|
||||
}
|
||||
|
||||
double getScore(ULocale desiredLocale, ULocale dMax, String desiredRaw, String desiredMax,
|
||||
ULocale supportedLocale, ULocale sMax, String supportedRaw, String supportedMax) {
|
||||
|
||||
/*
|
||||
* d, dm, s, sm
|
||||
* dc = d != dm
|
||||
* sc = s != sm
|
||||
* if dm != sm
|
||||
* rd = rd(dm,sm) // line 4
|
||||
* if dc != sc
|
||||
* rd *= 0.75 // lines 3,8
|
||||
* ef dc
|
||||
* rd *= 0.5 // lines 7
|
||||
* end
|
||||
* ef dc == sc
|
||||
* rd = 0 // line 6
|
||||
* else
|
||||
* rd = 0.25*StdRDiff // lines 2,5
|
||||
*/
|
||||
|
||||
// example: input en-GB, supported en en-GB
|
||||
// we want to have a closer match with
|
||||
|
||||
// boolean desiredChange = desiredRaw.equals(desiredMax);
|
||||
// boolean supportedChange = supportedRaw.equals(supportedMax);
|
||||
double getScore(ULocale dMax, String desiredRaw, String desiredMax,
|
||||
ULocale sMax, String supportedRaw, String supportedMax) {
|
||||
double distance = 0;
|
||||
if (!desiredMax.equals(supportedMax)) {
|
||||
// Map<String, Set<R3<LocalePatternMatcher,LocalePatternMatcher,Double>>> lang_result = scores.get(desiredMax);
|
||||
// if (lang_result == null) {
|
||||
// distance = worst;
|
||||
// } else {
|
||||
// Set<R3<LocalePatternMatcher,LocalePatternMatcher,Double>> result = lang_result.get(supportedMax);
|
||||
// skip:
|
||||
// if (result == null) {
|
||||
// distance = worst;
|
||||
// } else {
|
||||
distance = getRawScore(dMax, sMax);
|
||||
// }
|
||||
// if (desiredChange == supportedChange) {
|
||||
// distance *= maxUnequal_changeEqual;
|
||||
// if (DEBUG) {
|
||||
// System.out.println("\t\t\t" + level + " Distance (maxD≠maxS, changeD=changeS)\t" + distance);
|
||||
// }
|
||||
// } else if (desiredChange) {
|
||||
// distance *= maxUnequal_changeD_sameS;
|
||||
// if (DEBUG) {
|
||||
// System.out.println("\t\t\t" + level + " Distance (maxD≠maxS, changeD, !changeS)\t" + distance);
|
||||
// }
|
||||
// } else {
|
||||
// if (DEBUG) {
|
||||
// System.out.println("\t\t\t" + level + " Distance (maxD≠maxS, !changeD, changeS)\t" + distance);
|
||||
// }
|
||||
// }
|
||||
} else if (!desiredRaw.equals(supportedRaw)) { // maxes are equal, changes are equal
|
||||
distance += 0.001;
|
||||
// if (DEBUG) {
|
||||
// System.out.println("\t\t\t" + level + " Distance (maxD=maxS, changeD=changeS)\t" + distance);
|
||||
// }
|
||||
} else { // maxes are equal, changes are different
|
||||
// distance = 0.25*level.worst;
|
||||
// if (DEBUG) {
|
||||
// System.out.println("\t\t\t" + level + " Distance (maxD=maxS, changeD≠changeS)\t" + distance);
|
||||
// }
|
||||
}
|
||||
return distance;
|
||||
}
|
||||
@ -522,7 +525,7 @@ public class LocaleMatcher {
|
||||
}
|
||||
for (R3<LocalePatternMatcher,LocalePatternMatcher,Double> datum : scores) { // : result
|
||||
if (datum.get0().matches(desiredLocale)
|
||||
&& datum.get1().matches(supportedLocale)) {
|
||||
&& datum.get1().matches(supportedLocale)) {
|
||||
if (DEBUG) {
|
||||
System.out.println("\t\t\t\tFOUND\t" + datum);
|
||||
}
|
||||
@ -566,6 +569,19 @@ public class LocaleMatcher {
|
||||
public boolean isFrozen() {
|
||||
return frozen;
|
||||
}
|
||||
|
||||
public Relation<String,String> getMatchingLanguages() {
|
||||
Relation<String,String> desiredToSupported = Relation.of(new LinkedHashMap<String,Set<String>>(), HashSet.class);
|
||||
for (R3<LocalePatternMatcher, LocalePatternMatcher, Double> item : scores) {
|
||||
LocalePatternMatcher desired = item.get0();
|
||||
LocalePatternMatcher supported = item.get1();
|
||||
if (desired.lang != null && supported.lang != null) { // explicitly mentioned languages must have reasonable distance
|
||||
desiredToSupported.put(desired.lang, supported.lang);
|
||||
}
|
||||
}
|
||||
desiredToSupported.freeze();
|
||||
return desiredToSupported;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -575,9 +591,12 @@ public class LocaleMatcher {
|
||||
*/
|
||||
@Deprecated
|
||||
public static class LanguageMatcherData implements Freezable<LanguageMatcherData> {
|
||||
ScoreData languageScores = new ScoreData(Level.language);
|
||||
ScoreData scriptScores = new ScoreData(Level.script);
|
||||
ScoreData regionScores = new ScoreData(Level.region);
|
||||
private ScoreData languageScores = new ScoreData(Level.language);
|
||||
private ScoreData scriptScores = new ScoreData(Level.script);
|
||||
private ScoreData regionScores = new ScoreData(Level.region);
|
||||
private Relation<String, String> matchingLanguages;
|
||||
private volatile boolean frozen = false;
|
||||
|
||||
|
||||
/**
|
||||
* @internal
|
||||
@ -587,6 +606,15 @@ public class LocaleMatcher {
|
||||
public LanguageMatcherData() {
|
||||
}
|
||||
|
||||
/**
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
*/
|
||||
@Deprecated
|
||||
public Relation<String, String> matchingLanguages() {
|
||||
return matchingLanguages;
|
||||
}
|
||||
|
||||
/**
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
@ -603,9 +631,12 @@ public class LocaleMatcher {
|
||||
@Deprecated
|
||||
public double match(ULocale a, ULocale aMax, ULocale b, ULocale bMax) {
|
||||
double diff = 0;
|
||||
diff += languageScores.getScore(a, aMax, a.getLanguage(), aMax.getLanguage(), b, bMax, b.getLanguage(), bMax.getLanguage());
|
||||
diff += scriptScores.getScore(a, aMax, a.getScript(), aMax.getScript(), b, bMax, b.getScript(), bMax.getScript());
|
||||
diff += regionScores.getScore(a, aMax, a.getCountry(), aMax.getCountry(), b, bMax, b.getCountry(), bMax.getCountry());
|
||||
diff += languageScores.getScore(aMax, a.getLanguage(), aMax.getLanguage(), bMax, b.getLanguage(), bMax.getLanguage());
|
||||
if (diff > 0.999d) { // with no language match, we bail
|
||||
return 0.0d;
|
||||
}
|
||||
diff += scriptScores.getScore(aMax, a.getScript(), aMax.getScript(), bMax, b.getScript(), bMax.getScript());
|
||||
diff += regionScores.getScore(aMax, a.getCountry(), aMax.getCountry(), bMax, b.getCountry(), bMax.getCountry());
|
||||
|
||||
if (!a.getVariant().equals(b.getVariant())) {
|
||||
diff += 0.01;
|
||||
@ -656,11 +687,11 @@ public class LocaleMatcher {
|
||||
private LanguageMatcherData addDistance(String desired, String supported, int percent, boolean oneway, String comment) {
|
||||
if (DEBUG) {
|
||||
System.out.println("\t<languageMatch desired=\"" + desired + "\"" +
|
||||
" supported=\"" + supported + "\"" +
|
||||
" percent=\"" + percent + "\""
|
||||
+ (oneway ? " oneway=\"true\"" : "")
|
||||
+ "/>"
|
||||
+ (comment == null ? "" : "\t<!-- " + comment + " -->"));
|
||||
" supported=\"" + supported + "\"" +
|
||||
" percent=\"" + percent + "\""
|
||||
+ (oneway ? " oneway=\"true\"" : "")
|
||||
+ "/>"
|
||||
+ (comment == null ? "" : "\t<!-- " + comment + " -->"));
|
||||
// // .addDistance("nn", "nb", 4, true)
|
||||
// System.out.println(".addDistance(\"" + desired + "\"" +
|
||||
// ", \"" + supported + "\"" +
|
||||
@ -681,13 +712,13 @@ public class LocaleMatcher {
|
||||
}
|
||||
R3<LocalePatternMatcher,LocalePatternMatcher,Double> data = Row.of(desiredMatcher, supportedMatcher, score);
|
||||
R3<LocalePatternMatcher,LocalePatternMatcher,Double> data2 = oneway ? null : Row.of(supportedMatcher, desiredMatcher, score);
|
||||
boolean desiredEqualsSupported = desiredMatcher.equals(supportedMatcher);
|
||||
boolean desiredEqualsSupported = desiredMatcher.equals(supportedMatcher);
|
||||
switch (desiredLen) {
|
||||
case language:
|
||||
String dlanguage = desiredMatcher.getLanguage();
|
||||
String slanguage = supportedMatcher.getLanguage();
|
||||
languageScores.addDataToScores(dlanguage, slanguage, data);
|
||||
if (!oneway && !desiredEqualsSupported) {
|
||||
if (!oneway && !desiredEqualsSupported) {
|
||||
languageScores.addDataToScores(slanguage, dlanguage, data2);
|
||||
}
|
||||
break;
|
||||
@ -695,7 +726,7 @@ public class LocaleMatcher {
|
||||
String dscript = desiredMatcher.getScript();
|
||||
String sscript = supportedMatcher.getScript();
|
||||
scriptScores.addDataToScores(dscript, sscript, data);
|
||||
if (!oneway && !desiredEqualsSupported) {
|
||||
if (!oneway && !desiredEqualsSupported) {
|
||||
scriptScores.addDataToScores(sscript, dscript, data2);
|
||||
}
|
||||
break;
|
||||
@ -703,7 +734,7 @@ public class LocaleMatcher {
|
||||
String dregion = desiredMatcher.getRegion();
|
||||
String sregion = supportedMatcher.getRegion();
|
||||
regionScores.addDataToScores(dregion, sregion, data);
|
||||
if (!oneway && !desiredEqualsSupported) {
|
||||
if (!oneway && !desiredEqualsSupported) {
|
||||
regionScores.addDataToScores(sregion, dregion, data2);
|
||||
}
|
||||
break;
|
||||
@ -731,8 +762,6 @@ public class LocaleMatcher {
|
||||
}
|
||||
}
|
||||
|
||||
private volatile boolean frozen = false;
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
* @internal
|
||||
@ -740,6 +769,11 @@ public class LocaleMatcher {
|
||||
*/
|
||||
@Deprecated
|
||||
public LanguageMatcherData freeze() {
|
||||
languageScores.freeze();
|
||||
regionScores.freeze();
|
||||
scriptScores.freeze();
|
||||
matchingLanguages = languageScores.getMatchingLanguages();
|
||||
frozen = true;
|
||||
return this;
|
||||
}
|
||||
|
||||
@ -757,115 +791,19 @@ public class LocaleMatcher {
|
||||
LanguageMatcherData matcherData;
|
||||
|
||||
private static final LanguageMatcherData defaultWritten;
|
||||
// = new LanguageMatcherData()
|
||||
// // TODO get data from CLDR
|
||||
// .addDistance("no", "nb", 100, "The language no is normally taken as nb in content; we might alias this for lookup.")
|
||||
// .addDistance("nn", "nb", 96)
|
||||
// .addDistance("nn", "no", 96)
|
||||
// .addDistance("da", "no", 90, "Danish and norwegian are reasonably close.")
|
||||
// .addDistance("da", "nb", 90)
|
||||
// .addDistance("hr", "br", 96, "Serbo-croatian variants are all very close.")
|
||||
// .addDistance("sh", "br", 96)
|
||||
// .addDistance("sr", "br", 96)
|
||||
// .addDistance("sh", "hr", 96)
|
||||
// .addDistance("sr", "hr", 96)
|
||||
// .addDistance("sh", "sr", 96)
|
||||
// .addDistance("sr-Latn", "sr-Cyrl", 90, "Most serbs can read either script.")
|
||||
// .addDistance("*-Hans", "*-Hant", 85, true, "Readers of simplified can read traditional much better than reverse.")
|
||||
// .addDistance("*-Hant", "*-Hans", 75, true)
|
||||
// .addDistance("en-*-US", "en-*-*", 97, "Non-US English variants are closer to each other (written). Make en-US be further from everything else.")
|
||||
// .addDistance("en-*-*", "en-*-*", 99)
|
||||
// .addDistance("es-*-ES", "es-*-*", 97, "Latin American Spanishes are closer to each other. Make es-ES be further from everything else.")
|
||||
// .addDistance("es-*-419", "es-*-*", 99, "Have es-MX, es-AR, etc be closer to es-419 than to each other")
|
||||
// .addDistance("es-*-*", "es-*-*", 97)
|
||||
// .addDistance("*", "*", 1, "[Default value -- must be at end!] Normally there is no comprehension of different languages.")
|
||||
// .addDistance("*-*", "*-*", 20, "[Default value -- must be at end!] Normally there is little comprehension of different scripts.")
|
||||
// .addDistance("*-*-*", "*-*-*", 96, "[Default value -- must be at end!] Normally there are small differences across regions.")
|
||||
// .freeze();
|
||||
|
||||
private static HashMap<String,String> canonicalMap = new HashMap<String, String>();
|
||||
|
||||
static class DataHack implements Comparable<DataHack>{
|
||||
final String source;
|
||||
final String target;
|
||||
int percent;
|
||||
public DataHack(String source, String target, int percent) {
|
||||
this.source = source;
|
||||
this.target = target.equals("de_CH") ? "de" : target; // hack to fix bad data
|
||||
this.percent = percent;
|
||||
}
|
||||
static final Pattern STAR_KEEP = Pattern.compile("([^_]+)(?:_[^_]+(?:_[^_]+)?)?");
|
||||
public int compareTo(DataHack other) {
|
||||
// this is just a one-time hack so we don't need to optimize
|
||||
int diff = getUnderbars(source) - getUnderbars(other.source);
|
||||
if (0 != diff) {
|
||||
return diff;
|
||||
}
|
||||
String thisSource = source.replace('*', 'þ'); // just something after Z
|
||||
String otherSource = other.source.replace('*', 'þ'); // just something after Z
|
||||
diff = thisSource.compareTo(otherSource);
|
||||
if (0 != diff) {
|
||||
return diff;
|
||||
}
|
||||
String thisTarget = target.replace('*', 'þ'); // just something after Z
|
||||
String otherTarget = other.target.replace('*', 'þ'); // just something after Z
|
||||
diff = thisTarget.compareTo(otherTarget);
|
||||
|
||||
// Matcher matcher = STAR_KEEP.matcher(source);
|
||||
// matcher.matches();
|
||||
// String first = matcher.group(0);
|
||||
// String second = matcher.group(1);
|
||||
// String third = matcher.group(2);
|
||||
// Matcher matcherB = STAR_KEEP.matcher(source);
|
||||
// String firstB = matcher.group(0);
|
||||
// String secondB = matcher.group(1);
|
||||
// String thirdB = matcher.group(2);
|
||||
//
|
||||
// int diff = onlyStars.length() - onlyStarsOther.length();
|
||||
|
||||
if (0 != diff) {
|
||||
return diff;
|
||||
}
|
||||
diff = source.compareTo(other.source);
|
||||
if (0 != diff) {
|
||||
return diff;
|
||||
}
|
||||
return target.compareTo(other.target);
|
||||
}
|
||||
/**
|
||||
* @param source2
|
||||
*/
|
||||
private int getUnderbars(String source2) {
|
||||
int pos = source2.indexOf('_');
|
||||
if (pos < 0) {
|
||||
return 0;
|
||||
}
|
||||
pos = source2.indexOf('_',pos+1);
|
||||
return pos < 0 ? 1 : 2;
|
||||
}
|
||||
public String toString() {
|
||||
return source + ", " + target + " => " + percent;
|
||||
}
|
||||
}
|
||||
|
||||
static {
|
||||
// TODO get data from CLDR
|
||||
canonicalMap.put("iw", "he");
|
||||
canonicalMap.put("mo", "ro");
|
||||
canonicalMap.put("tl", "fil");
|
||||
|
||||
|
||||
ICUResourceBundle suppData = getICUSupplementalData();
|
||||
ICUResourceBundle languageMatching = suppData.findTopLevel("languageMatching");
|
||||
ICUResourceBundle written = (ICUResourceBundle) languageMatching.get("written");
|
||||
defaultWritten = new LanguageMatcherData();
|
||||
// HACK
|
||||
// The data coming from ICU may be old, and badly ordered.
|
||||
// TreeSet<DataHack> hack = new TreeSet<DataHack>();
|
||||
// defaultWritten.addDistance("en_*_US", "en_*_*", 97);
|
||||
// defaultWritten.addDistance("en_*_GB", "en_*_*", 98);
|
||||
// defaultWritten.addDistance("es_*_ES", "es_*_*", 97);
|
||||
// defaultWritten.addDistance("es_*_419", "es_*_*", 99);
|
||||
// defaultWritten.addDistance("es_*_*", "es_*_*", 98);
|
||||
|
||||
for(UResourceBundleIterator iter = written.getIterator(); iter.hasNext();) {
|
||||
ICUResourceBundle item = (ICUResourceBundle) iter.next();
|
||||
@ -874,17 +812,13 @@ public class LocaleMatcher {
|
||||
"*_*_*",
|
||||
"96",
|
||||
*/
|
||||
// <languageMatch desired="gsw" supported="de" percent="96" oneway="true" />
|
||||
boolean oneway = item.getSize() > 3 && "1".equals(item.getString(3));
|
||||
//hack.add(new DataHack(item.getString(0), item.getString(1), Integer.parseInt(item.getString(2))));
|
||||
defaultWritten.addDistance(item.getString(0), item.getString(1), Integer.parseInt(item.getString(2)), oneway);
|
||||
// <languageMatch desired="gsw" supported="de" percent="96" oneway="true" />
|
||||
boolean oneway = item.getSize() > 3 && "1".equals(item.getString(3));
|
||||
defaultWritten.addDistance(item.getString(0), item.getString(1), Integer.parseInt(item.getString(2)), oneway);
|
||||
}
|
||||
// for (DataHack dataHack : hack) {
|
||||
// defaultWritten.addDistance(dataHack.source, dataHack.target, dataHack.percent);
|
||||
// }
|
||||
defaultWritten.freeze();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
@ -892,9 +826,9 @@ public class LocaleMatcher {
|
||||
@Deprecated
|
||||
public static ICUResourceBundle getICUSupplementalData() {
|
||||
ICUResourceBundle suppData = (ICUResourceBundle) UResourceBundle.getBundleInstance(
|
||||
ICUResourceBundle.ICU_BASE_NAME,
|
||||
"supplementalData",
|
||||
ICUResourceBundle.ICU_DATA_CLASS_LOADER);
|
||||
ICUResourceBundle.ICU_BASE_NAME,
|
||||
"supplementalData",
|
||||
ICUResourceBundle.ICU_DATA_CLASS_LOADER);
|
||||
return suppData;
|
||||
}
|
||||
|
||||
|
@ -33,7 +33,7 @@ import java.util.TreeSet;
|
||||
import com.ibm.icu.dev.test.TestFmwk;
|
||||
import com.ibm.icu.dev.test.serializable.SerializableTest;
|
||||
import com.ibm.icu.dev.util.CollectionUtilities;
|
||||
import com.ibm.icu.dev.util.Relation;
|
||||
import com.ibm.icu.impl.Relation;
|
||||
import com.ibm.icu.impl.Utility;
|
||||
import com.ibm.icu.text.NumberFormat;
|
||||
import com.ibm.icu.text.PluralRules;
|
||||
|
@ -0,0 +1,20 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2015, Google, Inc., International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.test.util;
|
||||
|
||||
import com.ibm.icu.util.LocaleMatcher.LanguageMatcherData;
|
||||
|
||||
/**
|
||||
* @author markdavis
|
||||
*
|
||||
*/
|
||||
public class LocaleMatcherShim {
|
||||
public static LanguageMatcherData load() {
|
||||
// In CLDR, has different value
|
||||
return null;
|
||||
}
|
||||
}
|
@ -17,18 +17,97 @@ import com.ibm.icu.util.LocalePriorityList;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
|
||||
/**
|
||||
* Test the LanguageMatcher.
|
||||
* Test the LocaleMatcher.
|
||||
*
|
||||
* @author markdavis
|
||||
*/
|
||||
@SuppressWarnings("deprecation")
|
||||
public class LocaleMatcherTest extends TestFmwk {
|
||||
|
||||
|
||||
private static final ULocale ZH_MO = new ULocale("zh_MO");
|
||||
private static final ULocale ZH_HK = new ULocale("zh_HK");
|
||||
static LanguageMatcherData LANGUAGE_MATCHER_DATA = LocaleMatcherShim.load();
|
||||
|
||||
private LocaleMatcher newLocaleMatcher(LocalePriorityList build) {
|
||||
return new LocaleMatcher(build, LANGUAGE_MATCHER_DATA);
|
||||
}
|
||||
|
||||
private LocaleMatcher newLocaleMatcher(LocalePriorityList build, LanguageMatcherData data) {
|
||||
return new LocaleMatcher(build, data == null ? LANGUAGE_MATCHER_DATA : data);
|
||||
}
|
||||
|
||||
private LocaleMatcher newLocaleMatcher(LocalePriorityList lpl, LanguageMatcherData data, double d) {
|
||||
return new LocaleMatcher(lpl, data == null ? LANGUAGE_MATCHER_DATA : data, d);
|
||||
}
|
||||
|
||||
private LocaleMatcher newLocaleMatcher(String string) {
|
||||
return new LocaleMatcher(LocalePriorityList.add(string).build(), LANGUAGE_MATCHER_DATA);
|
||||
}
|
||||
|
||||
// public LocaleMatcher(LocalePriorityList languagePriorityList,
|
||||
// LocaleMatcherData matcherData, double threshold)
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
new LocaleMatcherTest().run(args);
|
||||
}
|
||||
|
||||
public void testParentLocales() {
|
||||
assertCloser("es_AR", "es_419", "es_ES");
|
||||
assertCloser("es_AR", "es_419", "es");
|
||||
|
||||
assertCloser("es_AR", "es_MX", "es");
|
||||
assertCloser("es_AR", "es_MX", "es");
|
||||
|
||||
assertCloser("en_AU", "en_GB", "en_US");
|
||||
assertCloser("en_AU", "en_GB", "en");
|
||||
|
||||
assertCloser("en_AU", "en_NZ", "en_US");
|
||||
assertCloser("en_AU", "en_NZ", "en");
|
||||
|
||||
assertCloser("pt_AO", "pt_PT", "pt_BR");
|
||||
assertCloser("pt_AO", "pt_PT", "pt");
|
||||
|
||||
assertCloser("zh_HK", "zh_MO", "zh_TW");
|
||||
assertCloser("zh_HK", "zh_MO", "zh_CN");
|
||||
assertCloser("zh_HK", "zh_MO", "zh");
|
||||
}
|
||||
|
||||
private void assertCloser(String a, String closer, String further) {
|
||||
LocaleMatcher matcher = newLocaleMatcher(further + ", " + closer);
|
||||
assertEquals("test " + a + " is closer to " + closer + " than to " + further, new ULocale(closer), matcher.getBestMatch(a));
|
||||
matcher = newLocaleMatcher(closer + ", " + further);
|
||||
assertEquals("test " + a + " is closer to " + closer + " than to " + further, new ULocale(closer), matcher.getBestMatch(a));
|
||||
}
|
||||
|
||||
// public void testParentLocales() {
|
||||
// // find all the regions that have a closer relation because of an explicit parent
|
||||
// Set<String> explicitParents = new HashSet<>(INFO.getExplicitParents());
|
||||
// explicitParents.remove("root");
|
||||
// Set<String> otherParents = new HashSet<>(INFO.getExplicitParents());
|
||||
// for (String locale : explicitParents) {
|
||||
// while (true) {
|
||||
// locale = LocaleIDParser.getParent(locale);
|
||||
// if (locale == null || locale.equals("root")) {
|
||||
// break;
|
||||
// }
|
||||
// otherParents.add(locale);
|
||||
// }
|
||||
// }
|
||||
// otherParents.remove("root");
|
||||
//
|
||||
// for (String locale : CONFIG.getCldrFactory().getAvailable()) {
|
||||
// String parentId = LocaleIDParser.getParent(locale);
|
||||
// String parentIdSimple = LocaleIDParser.getSimpleParent(locale);
|
||||
// if (!explicitParents.contains(parentId) && !otherParents.contains(parentIdSimple)) {
|
||||
// continue;
|
||||
// }
|
||||
// System.out.println(locale + "\t" + CONFIG.getEnglish().getName(locale) + "\t" + parentId + "\t" + parentIdSimple);
|
||||
// }
|
||||
// }
|
||||
|
||||
public void testChinese() {
|
||||
LocaleMatcher matcher = new LocaleMatcher("zh_CN, zh_TW, iw");
|
||||
LocaleMatcher matcher = newLocaleMatcher("zh_CN, zh_TW, iw");
|
||||
ULocale taiwanChinese = new ULocale("zh_TW");
|
||||
ULocale chinaChinese = new ULocale("zh_CN");
|
||||
assertEquals("zh_CN, zh_TW, iw;", taiwanChinese, matcher.getBestMatch("zh_Hant_TW"));
|
||||
@ -41,7 +120,7 @@ public class LocaleMatcherTest extends TestFmwk {
|
||||
}
|
||||
|
||||
public void testenGB() {
|
||||
final LocaleMatcher matcher = new LocaleMatcher("fr, en, en_GB, es_MX, es_419, es");
|
||||
final LocaleMatcher matcher = newLocaleMatcher("fr, en, en_GB, es_MX, es_419, es");
|
||||
assertEquals("en_GB", matcher.getBestMatch("en_NZ").toString());
|
||||
assertEquals("es", matcher.getBestMatch("es_ES").toString());
|
||||
assertEquals("es_419", matcher.getBestMatch("es_AR").toString());
|
||||
@ -50,7 +129,7 @@ public class LocaleMatcherTest extends TestFmwk {
|
||||
|
||||
public void testFallbacks() {
|
||||
LocalePriorityList lpl = LocalePriorityList.add("en, hi").build();
|
||||
final LocaleMatcher matcher = new LocaleMatcher(lpl, null, 0.09);
|
||||
final LocaleMatcher matcher = newLocaleMatcher(lpl, null, 0.09);
|
||||
assertEquals("hi", matcher.getBestMatch("sa").toString());
|
||||
}
|
||||
|
||||
@ -58,25 +137,25 @@ public class LocaleMatcherTest extends TestFmwk {
|
||||
double threshold = 0.05;
|
||||
LanguageMatcherData localeMatcherData = new LanguageMatcherData()
|
||||
.addDistance("br", "fr", 10, true)
|
||||
.addDistance("es", "cy", 10, true)
|
||||
;
|
||||
.addDistance("es", "cy", 10, true);
|
||||
logln(localeMatcherData.toString());
|
||||
|
||||
final LocaleMatcher matcher = new LocaleMatcher(
|
||||
LocalePriorityList
|
||||
.add(ULocale.ENGLISH)
|
||||
.add(ULocale.FRENCH)
|
||||
.add(ULocale.UK)
|
||||
.build(), localeMatcherData , threshold);
|
||||
final LocaleMatcher matcher = newLocaleMatcher(
|
||||
LocalePriorityList
|
||||
.add(ULocale.ENGLISH)
|
||||
.add(ULocale.FRENCH)
|
||||
.add(ULocale.UK)
|
||||
.build(), localeMatcherData, threshold);
|
||||
logln(matcher.toString());
|
||||
|
||||
assertEquals(ULocale.FRENCH, matcher.getBestMatch(new ULocale("br")));
|
||||
assertEquals(ULocale.ENGLISH, matcher.getBestMatch(new ULocale("es"))); // one way
|
||||
assertEquals(ULocale.ENGLISH, matcher.getBestMatch(new ULocale("es"))); // one
|
||||
// way
|
||||
}
|
||||
|
||||
public void testBasics() {
|
||||
final LocaleMatcher matcher = new LocaleMatcher(LocalePriorityList.add(ULocale.FRENCH).add(ULocale.UK)
|
||||
.add(ULocale.ENGLISH).build());
|
||||
final LocaleMatcher matcher = newLocaleMatcher(LocalePriorityList.add(ULocale.FRENCH).add(ULocale.UK)
|
||||
.add(ULocale.ENGLISH).build());
|
||||
logln(matcher.toString());
|
||||
|
||||
assertEquals(ULocale.UK, matcher.getBestMatch(ULocale.UK));
|
||||
@ -87,7 +166,7 @@ public class LocaleMatcherTest extends TestFmwk {
|
||||
|
||||
public void testFallback() {
|
||||
// check that script fallbacks are handled right
|
||||
final LocaleMatcher matcher = new LocaleMatcher("zh_CN, zh_TW, iw");
|
||||
final LocaleMatcher matcher = newLocaleMatcher("zh_CN, zh_TW, iw");
|
||||
assertEquals(new ULocale("zh_TW"), matcher.getBestMatch("zh_Hant"));
|
||||
assertEquals(new ULocale("zh_CN"), matcher.getBestMatch("zh"));
|
||||
assertEquals(new ULocale("zh_CN"), matcher.getBestMatch("zh_Hans_CN"));
|
||||
@ -97,7 +176,7 @@ public class LocaleMatcherTest extends TestFmwk {
|
||||
|
||||
public void testSpecials() {
|
||||
// check that nearby languages are handled
|
||||
final LocaleMatcher matcher = new LocaleMatcher("en, fil, ro, nn");
|
||||
final LocaleMatcher matcher = newLocaleMatcher("en, fil, ro, nn");
|
||||
assertEquals(new ULocale("fil"), matcher.getBestMatch("tl"));
|
||||
assertEquals(new ULocale("ro"), matcher.getBestMatch("mo"));
|
||||
assertEquals(new ULocale("nn"), matcher.getBestMatch("nb"));
|
||||
@ -107,15 +186,23 @@ public class LocaleMatcherTest extends TestFmwk {
|
||||
|
||||
public void testRegionalSpecials() {
|
||||
// verify that en_AU is closer to en_GB than to en (which is en_US)
|
||||
final LocaleMatcher matcher = new LocaleMatcher("en, en_GB, es, es_419");
|
||||
assertEquals("en_AU in {en, en_GB, es, es_419}", new ULocale("en_GB"), matcher.getBestMatch("en_AU"));
|
||||
final LocaleMatcher matcher = newLocaleMatcher("en, en_GB, es, es_419");
|
||||
assertEquals("es_MX in {en, en_GB, es, es_419}", new ULocale("es_419"), matcher.getBestMatch("es_MX"));
|
||||
assertEquals("en_AU in {en, en_GB, es, es_419}", new ULocale("en_GB"), matcher.getBestMatch("en_AU"));
|
||||
assertEquals("es_ES in {en, en_GB, es, es_419}", new ULocale("es"), matcher.getBestMatch("es_ES"));
|
||||
}
|
||||
|
||||
public void testHK() {
|
||||
// HK and MO are closer to each other for Hant than to TW
|
||||
final LocaleMatcher matcher = newLocaleMatcher("zh, zh_TW, zh_MO");
|
||||
assertEquals("zh_HK in {zh, zh_TW, zh_MO}", ZH_MO, matcher.getBestMatch("zh_HK"));
|
||||
final LocaleMatcher matcher2 = newLocaleMatcher("zh, zh_TW, zh_HK");
|
||||
assertEquals("zh_MO in {zh, zh_TW, zh_HK}", ZH_HK, matcher2.getBestMatch("zh_MO"));
|
||||
}
|
||||
|
||||
public void TestLocaleMatcherCoverage() {
|
||||
// Add tests for better code coverage
|
||||
LocaleMatcher matcher = new LocaleMatcher(LocalePriorityList.add(null, 0).build(), null);
|
||||
LocaleMatcher matcher = newLocaleMatcher(LocalePriorityList.add(null, 0).build(), null);
|
||||
logln(matcher.toString());
|
||||
|
||||
LanguageMatcherData data = new LanguageMatcherData();
|
||||
@ -127,19 +214,20 @@ public class LocaleMatcherTest extends TestFmwk {
|
||||
}
|
||||
|
||||
if (data.isFrozen()) {
|
||||
errln("Error LanguageMatcherData is frozen!");
|
||||
errln("Error LocaleMatcherData is frozen!");
|
||||
}
|
||||
}
|
||||
|
||||
private void assertEquals(Object expected, Object string) {
|
||||
assertEquals("", expected, string);
|
||||
}
|
||||
|
||||
private void assertNull(Object bestMatch) {
|
||||
assertNull("", bestMatch);
|
||||
}
|
||||
|
||||
public void testEmpty() {
|
||||
final LocaleMatcher matcher = new LocaleMatcher("");
|
||||
final LocaleMatcher matcher = newLocaleMatcher("");
|
||||
assertNull(matcher.getBestMatch(ULocale.FRENCH));
|
||||
}
|
||||
|
||||
@ -147,13 +235,13 @@ public class LocaleMatcherTest extends TestFmwk {
|
||||
|
||||
public void testMatch_exact() {
|
||||
assertEquals(1.0,
|
||||
LocaleMatcher.match(ENGLISH_CANADA, ENGLISH_CANADA));
|
||||
LocaleMatcher.match(ENGLISH_CANADA, ENGLISH_CANADA));
|
||||
}
|
||||
|
||||
public void testMatch_none() {
|
||||
double match = LocaleMatcher.match(
|
||||
new ULocale("ar_MK"),
|
||||
ENGLISH_CANADA);
|
||||
new ULocale("ar_MK"),
|
||||
ENGLISH_CANADA);
|
||||
assertTrue("Actual < 0: " + match, 0 <= match);
|
||||
assertTrue("Actual > 0.15 (~ language + script distance): " + match, 0.2 > match);
|
||||
}
|
||||
@ -164,75 +252,88 @@ public class LocaleMatcherTest extends TestFmwk {
|
||||
double matchZh = LocaleMatcher.match(undTw, new ULocale("zh"));
|
||||
double matchZhHant = LocaleMatcher.match(undTw, zhHant);
|
||||
assertTrue("und_TW should be closer to zh_Hant (" + matchZhHant +
|
||||
") than to zh (" + matchZh + ")",
|
||||
matchZh < matchZhHant);
|
||||
") than to zh (" + matchZh + ")",
|
||||
matchZh < matchZhHant);
|
||||
double matchEnHantTw = LocaleMatcher.match(new ULocale("en_Hant_TW"),
|
||||
zhHant);
|
||||
zhHant);
|
||||
assertTrue("zh_Hant should be closer to und_TW (" + matchZhHant +
|
||||
") than to en_Hant_TW (" + matchEnHantTw + ")",
|
||||
matchEnHantTw < matchZhHant);
|
||||
") than to en_Hant_TW (" + matchEnHantTw + ")",
|
||||
matchEnHantTw < matchZhHant);
|
||||
assertTrue("zh should be closer to und_TW (" + matchZh +
|
||||
") than to en_Hant_TW (" + matchEnHantTw + ")",
|
||||
matchEnHantTw < matchZh);
|
||||
") than to en_Hant_TW (" + matchEnHantTw + ")",
|
||||
matchEnHantTw < matchZh);
|
||||
}
|
||||
|
||||
public void testMatchGrandfatheredCode() {
|
||||
final LocaleMatcher matcher = new LocaleMatcher("fr, i_klingon, en_Latn_US");
|
||||
final LocaleMatcher matcher = newLocaleMatcher("fr, i_klingon, en_Latn_US");
|
||||
assertEquals("en_Latn_US", matcher.getBestMatch("en_GB_oed").toString());
|
||||
//assertEquals("tlh", matcher.getBestMatch("i_klingon").toString());
|
||||
// assertEquals("tlh", matcher.getBestMatch("i_klingon").toString());
|
||||
}
|
||||
|
||||
public void testGetBestMatchForList_exactMatch() {
|
||||
final LocaleMatcher matcher = new LocaleMatcher("fr, en_GB, ja, es_ES, es_MX");
|
||||
final LocaleMatcher matcher = newLocaleMatcher("fr, en_GB, ja, es_ES, es_MX");
|
||||
assertEquals("ja", matcher.getBestMatch("ja, de").toString());
|
||||
}
|
||||
|
||||
public void testGetBestMatchForList_simpleVariantMatch() {
|
||||
final LocaleMatcher matcher = new LocaleMatcher("fr, en_GB, ja, es_ES, es_MX");
|
||||
// Intentionally avoiding a perfect_match or two candidates for variant matches.
|
||||
final LocaleMatcher matcher = newLocaleMatcher("fr, en_GB, ja, es_ES, es_MX");
|
||||
// Intentionally avoiding a perfect_match or two candidates for variant
|
||||
// matches.
|
||||
assertEquals("en_GB", matcher.getBestMatch("de, en_US").toString());
|
||||
// Fall back.
|
||||
assertEquals("fr", matcher.getBestMatch("de, zh").toString());
|
||||
}
|
||||
|
||||
public void testGetBestMatchForList_matchOnMaximized() {
|
||||
final LocaleMatcher matcher = new LocaleMatcher("en, ja");
|
||||
//final LocaleMatcher matcher = new LocaleMatcher("fr, en, ja, es_ES, es_MX");
|
||||
final LocaleMatcher matcher = newLocaleMatcher("en, ja");
|
||||
// final LocaleMatcher matcher =
|
||||
// newLocaleMatcher("fr, en, ja, es_ES, es_MX");
|
||||
// Check that if the preference is maximized already, it works as well.
|
||||
assertEquals("Match for ja_Jpan_JP (maximized already)",
|
||||
"ja", matcher.getBestMatch("ja_Jpan_JP, en-AU").toString());
|
||||
if (true) return;
|
||||
// ja_JP matches ja on likely subtags, and it's listed first, thus it wins over
|
||||
"ja", matcher.getBestMatch("ja_Jpan_JP, en-AU").toString());
|
||||
if (true)
|
||||
return;
|
||||
// ja_JP matches ja on likely subtags, and it's listed first, thus it
|
||||
// wins over
|
||||
// thus it wins over the second preference en_GB.
|
||||
assertEquals("Match for ja_JP, with likely region subtag",
|
||||
"ja", matcher.getBestMatch("ja_JP, en_US").toString());
|
||||
"ja", matcher.getBestMatch("ja_JP, en_US").toString());
|
||||
// Check that if the preference is maximized already, it works as well.
|
||||
assertEquals("Match for ja_Jpan_JP (maximized already)",
|
||||
"ja", matcher.getBestMatch("ja_Jpan_JP, en_US").toString());
|
||||
"ja", matcher.getBestMatch("ja_Jpan_JP, en_US").toString());
|
||||
}
|
||||
|
||||
public void testGetBestMatchForList_noMatchOnMaximized() {
|
||||
// Regression test for http://b/5714572 .
|
||||
final LocaleMatcher matcher = new LocaleMatcher("en, de, fr, ja");
|
||||
// de maximizes to de_DE. Pick the exact match for the secondary language instead.
|
||||
assertEquals("fr", matcher.getBestMatch("de_CH, fr").toString());
|
||||
final LocaleMatcher matcher = newLocaleMatcher("en, de, fr, ja");
|
||||
// de maximizes to de_DE. Pick the exact match for the secondary
|
||||
// language instead.
|
||||
assertEquals("de", matcher.getBestMatch("de_CH, fr").toString());
|
||||
}
|
||||
|
||||
public void testBestMatchForTraditionalChinese() {
|
||||
// Scenario: An application that only supports Simplified Chinese (and some other languages),
|
||||
// but does not support Traditional Chinese. zh_Hans_CN could be replaced with zh_CN, zh, or
|
||||
// Scenario: An application that only supports Simplified Chinese (and
|
||||
// some other languages),
|
||||
// but does not support Traditional Chinese. zh_Hans_CN could be
|
||||
// replaced with zh_CN, zh, or
|
||||
// zh_Hans, it wouldn't make much of a difference.
|
||||
final LocaleMatcher matcher = new LocaleMatcher("fr, zh_Hans_CN, en_US");
|
||||
final LocaleMatcher matcher = newLocaleMatcher("fr, zh_Hans_CN, en_US");
|
||||
|
||||
// The script distance (simplified vs. traditional Han) is considered small enough
|
||||
// to be an acceptable match. The regional difference is considered almost insignificant.
|
||||
// The script distance (simplified vs. traditional Han) is considered
|
||||
// small enough
|
||||
// to be an acceptable match. The regional difference is considered
|
||||
// almost insignificant.
|
||||
assertEquals("zh_Hans_CN", matcher.getBestMatch("zh_TW").toString());
|
||||
assertEquals("zh_Hans_CN", matcher.getBestMatch("zh_Hant").toString());
|
||||
|
||||
// For geo_political reasons, you might want to avoid a zh_Hant -> zh_Hans match.
|
||||
// In this case, if zh_TW, zh_HK or a tag starting with zh_Hant is requested, you can
|
||||
// change your call to getBestMatch to include a 2nd language preference.
|
||||
// "en" is a better match since its distance to "en_US" is closer than the distance
|
||||
// For geo_political reasons, you might want to avoid a zh_Hant ->
|
||||
// zh_Hans match.
|
||||
// In this case, if zh_TW, zh_HK or a tag starting with zh_Hant is
|
||||
// requested, you can
|
||||
// change your call to getBestMatch to include a 2nd language
|
||||
// preference.
|
||||
// "en" is a better match since its distance to "en_US" is closer than
|
||||
// the distance
|
||||
// from "zh_TW" to "zh_CN" (script distance).
|
||||
assertEquals("en_US", matcher.getBestMatch("zh_TW, en").toString());
|
||||
assertEquals("en_US", matcher.getBestMatch("zh_Hant_CN, en").toString());
|
||||
@ -240,41 +341,46 @@ public class LocaleMatcherTest extends TestFmwk {
|
||||
}
|
||||
|
||||
public void testUndefined() {
|
||||
// When the undefined language doesn't match anything in the list, getBestMatch returns
|
||||
// When the undefined language doesn't match anything in the list,
|
||||
// getBestMatch returns
|
||||
// the default, as usual.
|
||||
LocaleMatcher matcher = new LocaleMatcher("it,fr");
|
||||
LocaleMatcher matcher = newLocaleMatcher("it,fr");
|
||||
assertEquals("it", matcher.getBestMatch("und").toString());
|
||||
|
||||
// When it *does* occur in the list, BestMatch returns it, as expected.
|
||||
matcher = new LocaleMatcher("it,und");
|
||||
matcher = newLocaleMatcher("it,und");
|
||||
assertEquals("und", matcher.getBestMatch("und").toString());
|
||||
|
||||
// The unusual part:
|
||||
// max("und") = "en_Latn_US", and since matching is based on maximized tags, the undefined
|
||||
// language would normally match English. But that would produce the counterintuitive results
|
||||
// max("und") = "en_Latn_US", and since matching is based on maximized
|
||||
// tags, the undefined
|
||||
// language would normally match English. But that would produce the
|
||||
// counterintuitive results
|
||||
// that getBestMatch("und", LocaleMatcher("it,en")) would be "en", and
|
||||
// getBestMatch("en", LocaleMatcher("it,und")) would be "und".
|
||||
//
|
||||
// To avoid that, we change the matcher's definitions of max (AddLikelySubtagsWithDefaults)
|
||||
// so that max("und")="und". That produces the following, more desirable results:
|
||||
matcher = new LocaleMatcher("it,en");
|
||||
// To avoid that, we change the matcher's definitions of max
|
||||
// (AddLikelySubtagsWithDefaults)
|
||||
// so that max("und")="und". That produces the following, more desirable
|
||||
// results:
|
||||
matcher = newLocaleMatcher("it,en");
|
||||
assertEquals("it", matcher.getBestMatch("und").toString());
|
||||
matcher = new LocaleMatcher("it,und");
|
||||
matcher = newLocaleMatcher("it,und");
|
||||
assertEquals("it", matcher.getBestMatch("en").toString());
|
||||
}
|
||||
|
||||
// public void testGetBestMatch_emptyList() {
|
||||
// final LocaleMatcher matcher = new LocaleMatcher(
|
||||
// new LocalePriorityList(new HashMap()));
|
||||
// assertNull(matcher.getBestMatch(ULocale.ENGLISH));
|
||||
// }
|
||||
// public void testGetBestMatch_emptyList() {
|
||||
// final LocaleMatcher matcher = newLocaleMatcher(
|
||||
// new LocalePriorityList(new HashMap()));
|
||||
// assertNull(matcher.getBestMatch(ULocale.ENGLISH));
|
||||
// }
|
||||
|
||||
public void testGetBestMatch_googlePseudoLocales() {
|
||||
// Google pseudo locales are primarily based on variant subtags.
|
||||
// See http://sites/intl_eng/pseudo_locales.
|
||||
// (See below for the region code based fall back options.)
|
||||
final LocaleMatcher matcher = new LocaleMatcher(
|
||||
"fr, pt");
|
||||
final LocaleMatcher matcher = newLocaleMatcher(
|
||||
"fr, pt");
|
||||
assertEquals("fr", matcher.getBestMatch("de").toString());
|
||||
assertEquals("fr", matcher.getBestMatch("en_US").toString());
|
||||
assertEquals("fr", matcher.getBestMatch("en").toString());
|
||||
@ -282,15 +388,15 @@ public class LocaleMatcherTest extends TestFmwk {
|
||||
}
|
||||
|
||||
public void testGetBestMatch_regionDistance() {
|
||||
LocaleMatcher matcher = new LocaleMatcher("es_AR, es");
|
||||
LocaleMatcher matcher = newLocaleMatcher("es_AR, es");
|
||||
assertEquals("es_AR", matcher.getBestMatch("es_MX").toString());
|
||||
|
||||
matcher = new LocaleMatcher("fr, en, en_GB");
|
||||
matcher = newLocaleMatcher("fr, en, en_GB");
|
||||
assertEquals("en_GB", matcher.getBestMatch("en_CA").toString());
|
||||
|
||||
matcher = new LocaleMatcher("de_AT, de_DE, de_CH");
|
||||
matcher = newLocaleMatcher("de_AT, de_DE, de_CH");
|
||||
assertEquals("de_DE", matcher.getBestMatch("de").toString());
|
||||
|
||||
|
||||
showDistance(matcher, "en", "en_CA");
|
||||
showDistance(matcher, "en_CA", "en");
|
||||
showDistance(matcher, "en_US", "en_CA");
|
||||
@ -300,7 +406,7 @@ public class LocaleMatcherTest extends TestFmwk {
|
||||
showDistance(matcher, "en", "en_UM");
|
||||
showDistance(matcher, "en_UM", "en");
|
||||
}
|
||||
|
||||
|
||||
private void showDistance(LocaleMatcher matcher, String desired, String supported) {
|
||||
ULocale desired2 = new ULocale(desired);
|
||||
ULocale supported2 = new ULocale(supported);
|
||||
@ -308,13 +414,13 @@ public class LocaleMatcherTest extends TestFmwk {
|
||||
logln(desired + " to " + supported + " :\t" + distance);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* If all the base languages are the same, then each sublocale matches itself most closely
|
||||
* If all the base languages are the same, then each sublocale matches
|
||||
* itself most closely
|
||||
*/
|
||||
public void testExactMatches() {
|
||||
String lastBase = "";
|
||||
TreeSet<ULocale> sorted = new TreeSet();
|
||||
TreeSet<ULocale> sorted = new TreeSet<ULocale>();
|
||||
for (ULocale loc : ULocale.getAvailableLocales()) {
|
||||
String language = loc.getLanguage();
|
||||
if (!lastBase.equals(language)) {
|
||||
@ -337,16 +443,17 @@ public class LocaleMatcherTest extends TestFmwk {
|
||||
sorted.add(max);
|
||||
check2(sorted);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param sorted
|
||||
*/
|
||||
private void check2(Set<ULocale> sorted) {
|
||||
// TODO Auto-generated method stub
|
||||
logln("Checking: " + sorted);
|
||||
LocaleMatcher matcher = new LocaleMatcher(
|
||||
LocalePriorityList.add(
|
||||
sorted.toArray(new ULocale[sorted.size()]))
|
||||
.build());
|
||||
LocaleMatcher matcher = newLocaleMatcher(
|
||||
LocalePriorityList.add(
|
||||
sorted.toArray(new ULocale[sorted.size()]))
|
||||
.build());
|
||||
for (ULocale loc : sorted) {
|
||||
String stringLoc = loc.toString();
|
||||
assertEquals(stringLoc, matcher.getBestMatch(stringLoc).toString());
|
||||
@ -363,21 +470,129 @@ public class LocaleMatcherTest extends TestFmwk {
|
||||
}
|
||||
|
||||
|
||||
// public void testComputeDistance_monkeyTest() {
|
||||
// RegionCode[] codes = RegionCode.values();
|
||||
// Random random = new Random();
|
||||
// for (int i = 0; i < 1000; ++i) {
|
||||
// RegionCode x = codes[random.nextInt(codes.length)];
|
||||
// RegionCode y = codes[random.nextInt(codes.length)];
|
||||
// double d = LocaleMatcher.getRegionDistance(x, y, null, null);
|
||||
// if (x == RegionCode.ZZ || y == RegionCode.ZZ) {
|
||||
// assertEquals(LocaleMatcher.REGION_DISTANCE, d);
|
||||
// } else if (x == y) {
|
||||
// assertEquals(0.0, d);
|
||||
// } else {
|
||||
// assertTrue(d > 0);
|
||||
// assertTrue(d <= LocaleMatcher.REGION_DISTANCE);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// public void testComputeDistance_monkeyTest() {
|
||||
// RegionCode[] codes = RegionCode.values();
|
||||
// Random random = new Random();
|
||||
// for (int i = 0; i < 1000; ++i) {
|
||||
// RegionCode x = codes[random.nextInt(codes.length)];
|
||||
// RegionCode y = codes[random.nextInt(codes.length)];
|
||||
// double d = LocaleMatcher.getRegionDistance(x, y, null, null);
|
||||
// if (x == RegionCode.ZZ || y == RegionCode.ZZ) {
|
||||
// assertEquals(LocaleMatcher.REGION_DISTANCE, d);
|
||||
// } else if (x == y) {
|
||||
// assertEquals(0.0, d);
|
||||
// } else {
|
||||
// assertTrue(d > 0);
|
||||
// assertTrue(d <= LocaleMatcher.REGION_DISTANCE);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
public void testGetBestMatchForList_matchOnMaximized2() {
|
||||
// if (logKnownIssue("Cldrbug:8811", "Problems with LocaleMatcher test")) {
|
||||
// return;
|
||||
// }
|
||||
final LocaleMatcher matcher = newLocaleMatcher("fr, en-GB, ja, es-ES, es-MX");
|
||||
// ja-JP matches ja on likely subtags, and it's listed first, thus it wins over
|
||||
// thus it wins over the second preference en-GB.
|
||||
assertEquals("Match for ja-JP, with likely region subtag",
|
||||
"ja", matcher.getBestMatch("ja-JP, en-GB").toString());
|
||||
// Check that if the preference is maximized already, it works as well.
|
||||
assertEquals("Match for ja-Jpan-JP (maximized already)",
|
||||
"ja", matcher.getBestMatch("ja-Jpan-JP, en-GB").toString());
|
||||
}
|
||||
|
||||
public void testGetBestMatchForList_closeEnoughMatchOnMaximized() {
|
||||
// if (logKnownIssue("Cldrbug:8811", "Problems with LocaleMatcher test")) {
|
||||
// return;
|
||||
// }
|
||||
final LocaleMatcher matcher = newLocaleMatcher("en-GB, en, de, fr, ja");
|
||||
assertEquals("de", matcher.getBestMatch("de-CH, fr").toString());
|
||||
assertEquals("en", matcher.getBestMatch("en-US, ar, nl, de, ja").toString());
|
||||
}
|
||||
|
||||
public void testGetBestMatchForPortuguese() {
|
||||
|
||||
// if (logKnownIssue("Cldrbug:8811", "Problems with LocaleMatcher test")) {
|
||||
// return;
|
||||
// }
|
||||
|
||||
final LocaleMatcher withPTExplicit = newLocaleMatcher("pt_PT, pt_BR, es, es_419");
|
||||
final LocaleMatcher withPTImplicit = newLocaleMatcher("pt_PT, pt, es, es_419");
|
||||
// Could happen because "pt_BR" is a tier_1 language and "pt_PT" is tier_2.
|
||||
|
||||
final LocaleMatcher withoutPT = newLocaleMatcher("pt_BR, es, es_419");
|
||||
// European user who prefers Spanish over Brazillian Portuguese as a fallback.
|
||||
|
||||
assertEquals("pt_PT", withPTExplicit.getBestMatch("pt_PT, es, pt").toString());
|
||||
assertEquals("pt_PT", withPTImplicit.getBestMatch("pt_PT, es, pt").toString());
|
||||
assertEquals("es", withoutPT.getBestMatch("pt_PT, es, pt").toString());
|
||||
|
||||
// Brazillian user who prefers South American Spanish over European Portuguese as a fallback.
|
||||
// The asymmetry between this case and above is because it's "pt_PT" that's missing between the
|
||||
// matchers as "pt_BR" is a much more common language.
|
||||
assertEquals("pt_BR", withPTExplicit.getBestMatch("pt, es_419, pt_PT").toString());
|
||||
assertEquals("pt", withPTImplicit.getBestMatch("pt, es_419, pt_PT").toString());
|
||||
assertEquals("pt_BR", withoutPT.getBestMatch("pt, es_419, pt_PT").toString());
|
||||
|
||||
// Code that adds the user's country can get "pt_US" for a user's language.
|
||||
// That should fall back to "pt_BR".
|
||||
assertEquals("pt_BR", withPTExplicit.getBestMatch("pt_US, pt_PT").toString());
|
||||
assertEquals("pt", withPTImplicit.getBestMatch("pt_US, pt_PT").toString());
|
||||
}
|
||||
|
||||
public void testVariantWithScriptMatch() {
|
||||
// if (logKnownIssue("Cldrbug:8811", "Problems with LocaleMatcher test")) {
|
||||
// return;
|
||||
// }
|
||||
final LocaleMatcher matcher = newLocaleMatcher("fr, en, sv");
|
||||
assertEquals("en", matcher.getBestMatch("en-GB").toString());
|
||||
assertEquals("en", matcher.getBestMatch("en-GB, sv").toString());
|
||||
}
|
||||
|
||||
public void testVariantWithScriptMatch2() {
|
||||
// if (logKnownIssue("Cldrbug:8811", "Problems with LocaleMatcher test")) {
|
||||
// return;
|
||||
// }
|
||||
final LocaleMatcher matcher = newLocaleMatcher("en, sv");
|
||||
assertEquals("en", matcher.getBestMatch("en-GB, sv").toString());
|
||||
}
|
||||
|
||||
public void testPerf() {
|
||||
if (LANGUAGE_MATCHER_DATA == null) {
|
||||
return; // skip except when testing data
|
||||
}
|
||||
final String desired = "sv, en";
|
||||
|
||||
final LocaleMatcher matcherShort = newLocaleMatcher(desired);
|
||||
final LocaleMatcher matcherLong = newLocaleMatcher("af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, zh-CN, zh-TW, zu");
|
||||
final LocaleMatcher matcherVeryLong = newLocaleMatcher("af, af_NA, af_ZA, agq, agq_CM, ak, ak_GH, am, am_ET, ar, ar_001, ar_AE, ar_BH, ar_DJ, ar_DZ, ar_EG, ar_EH, ar_ER, ar_IL, ar_IQ, ar_JO, ar_KM, ar_KW, ar_LB, ar_LY, ar_MA, ar_MR, ar_OM, ar_PS, ar_QA, ar_SA, ar_SD, ar_SO, ar_SS, ar_SY, ar_TD, ar_TN, ar_YE, as, as_IN, asa, asa_TZ, ast, ast_ES, az, az_Cyrl, az_Cyrl_AZ, az_Latn, az_Latn_AZ, bas, bas_CM, be, be_BY, bem, bem_ZM, bez, bez_TZ, bg, bg_BG, bm, bm_ML, bn, bn_BD, bn_IN, bo, bo_CN, bo_IN, br, br_FR, brx, brx_IN, bs, bs_Cyrl, bs_Cyrl_BA, bs_Latn, bs_Latn_BA, ca, ca_AD, ca_ES, ca_ES_VALENCIA, ca_FR, ca_IT, ce, ce_RU, cgg, cgg_UG, chr, chr_US, ckb, ckb_IQ, ckb_IR, cs, cs_CZ, cu, cu_RU, cy, cy_GB, da, da_DK, da_GL, dav, dav_KE, de, de_AT, de_BE, de_CH, de_DE, de_LI, de_LU, dje, dje_NE, dsb, dsb_DE, dua, dua_CM, dyo, dyo_SN, dz, dz_BT, ebu, ebu_KE, ee, ee_GH, ee_TG, el, el_CY, el_GR, en, en_001, en_150, en_AG, en_AI, en_AS, en_AT, en_AU, en_BB, en_BE, en_BI, en_BM, en_BS, en_BW, en_BZ, en_CA, en_CC, en_CH, en_CK, en_CM, en_CX, en_CY, en_DE, en_DG, en_DK, en_DM, en_ER, en_FI, en_FJ, en_FK, en_FM, en_GB, en_GD, en_GG, en_GH, en_GI, en_GM, en_GU, en_GY, en_HK, en_IE, en_IL, en_IM, en_IN, en_IO, en_JE, en_JM, en_KE, en_KI, en_KN, en_KY, en_LC, en_LR, en_LS, en_MG, en_MH, en_MO, en_MP, en_MS, en_MT, en_MU, en_MW, en_MY, en_NA, en_NF, en_NG, en_NL, en_NR, en_NU, en_NZ, en_PG, en_PH, en_PK, en_PN, en_PR, en_PW, en_RW, en_SB, en_SC, en_SD, en_SE, en_SG, en_SH, en_SI, en_SL, en_SS, en_SX, en_SZ, en_TC, en_TK, en_TO, en_TT, en_TV, en_TZ, en_UG, en_UM, en_US, en_US_POSIX, en_VC, en_VG, en_VI, en_VU, en_WS, en_ZA, en_ZM, en_ZW, eo, eo_001, es, es_419, es_AR, es_BO, es_CL, es_CO, es_CR, es_CU, es_DO, es_EA, es_EC, es_ES, es_GQ, es_GT, es_HN, es_IC, es_MX, es_NI, es_PA, es_PE, es_PH, es_PR, es_PY, es_SV, es_US, es_UY, es_VE, et, et_EE, eu, eu_ES, ewo, ewo_CM, fa, fa_AF, fa_IR, ff, ff_CM, ff_GN, ff_MR, ff_SN, fi, fi_FI, fil, fil_PH, fo, fo_DK, fo_FO, fr, fr_BE, fr_BF, fr_BI, fr_BJ, fr_BL, fr_CA, fr_CD, fr_CF, fr_CG, fr_CH, fr_CI, fr_CM, fr_DJ, fr_DZ, fr_FR, fr_GA, fr_GF, fr_GN, fr_GP, fr_GQ, fr_HT, fr_KM, fr_LU, fr_MA, fr_MC, fr_MF, fr_MG, fr_ML, fr_MQ, fr_MR, fr_MU, fr_NC, fr_NE, fr_PF, fr_PM, fr_RE, fr_RW, fr_SC, fr_SN, fr_SY, fr_TD, fr_TG, fr_TN, fr_VU, fr_WF, fr_YT, fur, fur_IT, fy, fy_NL, ga, ga_IE, gd, gd_GB, gl, gl_ES, gsw, gsw_CH, gsw_FR, gsw_LI, gu, gu_IN, guz, guz_KE, gv, gv_IM, ha, ha_GH, ha_NE, ha_NG, haw, haw_US, he, he_IL, hi, hi_IN, hr, hr_BA, hr_HR, hsb, hsb_DE, hu, hu_HU, hy, hy_AM, id, id_ID, ig, ig_NG, ii, ii_CN, is, is_IS, it, it_CH, it_IT, it_SM, ja, ja_JP, jgo, jgo_CM, jmc, jmc_TZ, ka, ka_GE, kab, kab_DZ, kam, kam_KE, kde, kde_TZ, kea, kea_CV, khq, khq_ML, ki, ki_KE, kk, kk_KZ, kkj, kkj_CM, kl, kl_GL, kln, kln_KE, km, km_KH, kn, kn_IN, ko, ko_KP, ko_KR, kok, kok_IN, ks, ks_IN, ksb, ksb_TZ, ksf, ksf_CM, ksh, ksh_DE, kw, kw_GB, ky, ky_KG, lag, lag_TZ, lb, lb_LU, lg, lg_UG, lkt, lkt_US, ln, ln_AO, ln_CD, ln_CF, ln_CG, lo, lo_LA, lrc, lrc_IQ, lrc_IR, lt, lt_LT, lu, lu_CD, luo, luo_KE, luy, luy_KE, lv, lv_LV, mas, mas_KE, mas_TZ, mer, mer_KE, mfe, mfe_MU, mg, mg_MG, mgh, mgh_MZ, mgo, mgo_CM, mk, mk_MK, ml, ml_IN, mn, mn_MN, mr, mr_IN, ms, ms_BN, ms_MY, ms_SG, mt, mt_MT, mua, mua_CM, my, my_MM, mzn, mzn_IR, naq, naq_NA, nb, nb_NO, nb_SJ, nd, nd_ZW, ne, ne_IN, ne_NP, nl, nl_AW, nl_BE, nl_BQ, nl_CW, nl_NL, nl_SR, nl_SX, nmg, nmg_CM, nn, nn_NO, nnh, nnh_CM, nus, nus_SS, nyn, nyn_UG, om, om_ET, om_KE, or, or_IN, os, os_GE, os_RU, pa, pa_Arab, pa_Arab_PK, pa_Guru, pa_Guru_IN, pl, pl_PL, prg, prg_001, ps, ps_AF, pt, pt_AO, pt_BR, pt_CV, pt_GW, pt_MO, pt_MZ, pt_PT, pt_ST, pt_TL, qu, qu_BO, qu_EC, qu_PE, rm, rm_CH, rn, rn_BI, ro, ro_MD, ro_RO, rof, rof_TZ, root, ru, ru_BY, ru_KG, ru_KZ, ru_MD, ru_RU, ru_UA, rw, rw_RW, rwk, rwk_TZ, sah, sah_RU, saq, saq_KE, sbp, sbp_TZ, se, se_FI, se_NO, se_SE, seh, seh_MZ, ses, ses_ML, sg, sg_CF, shi, shi_Latn, shi_Latn_MA, shi_Tfng, shi_Tfng_MA, si, si_LK, sk, sk_SK, sl, sl_SI, smn, smn_FI, sn, sn_ZW, so, so_DJ, so_ET, so_KE, so_SO, sq, sq_AL, sq_MK, sq_XK, sr, sr_Cyrl, sr_Cyrl_BA, sr_Cyrl_ME, sr_Cyrl_RS, sr_Cyrl_XK, sr_Latn, sr_Latn_BA, sr_Latn_ME, sr_Latn_RS, sr_Latn_XK, sv, sv_AX, sv_FI, sv_SE, sw, sw_CD, sw_KE, sw_TZ, sw_UG, ta, ta_IN, ta_LK, ta_MY, ta_SG, te, te_IN, teo, teo_KE, teo_UG, th, th_TH, ti, ti_ER, ti_ET, tk, tk_TM, to, to_TO, tr, tr_CY, tr_TR, twq, twq_NE, tzm, tzm_MA, ug, ug_CN, uk, uk_UA, ur, ur_IN, ur_PK, uz, uz_Arab, uz_Arab_AF, uz_Cyrl, uz_Cyrl_UZ, uz_Latn, uz_Latn_UZ, vai, vai_Latn, vai_Latn_LR, vai_Vaii, vai_Vaii_LR, vi, vi_VN, vo, vo_001, vun, vun_TZ, wae, wae_CH, xog, xog_UG, yav, yav_CM, yi, yi_001, yo, yo_BJ, yo_NG, zgh, zgh_MA, zh, zh_Hans, zh_Hans_CN, zh_Hans_HK, zh_Hans_MO, zh_Hans_SG, zh_Hant, zh_Hant_HK, zh_Hant_MO, zh_Hant_TW, zu, zu_ZA");
|
||||
|
||||
//LocaleMatcher.DEBUG = true;
|
||||
ULocale expected = new ULocale("sv");
|
||||
assertEquals(expected, matcherShort.getBestMatch(desired));
|
||||
assertEquals(expected, matcherLong.getBestMatch(desired));
|
||||
assertEquals(expected, matcherVeryLong.getBestMatch(desired));
|
||||
//LocaleMatcher.DEBUG = false;
|
||||
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
int iterations = i == 0 ? 1000 : 100000;
|
||||
boolean showMessage = i != 0;
|
||||
long timeShort = timeLocaleMatcher("Duration (few supported):\t", desired, matcherShort, showMessage, iterations, 0);
|
||||
long timeMedium = timeLocaleMatcher("Duration (med. supported):\t", desired, matcherLong, showMessage, iterations, timeShort);
|
||||
long timeLong = timeLocaleMatcher("Duration (many supported):\t", desired, matcherVeryLong, showMessage, iterations, timeShort);
|
||||
}
|
||||
}
|
||||
|
||||
private long timeLocaleMatcher(String title, String desired, LocaleMatcher matcher,
|
||||
boolean showmessage, int iterations, long comparisonTime) {
|
||||
long start = System.nanoTime();
|
||||
for (int i = iterations; i > 0; --i) {
|
||||
matcher.getBestMatch(desired);
|
||||
}
|
||||
long delta = System.nanoTime() - start;
|
||||
if (showmessage) warnln(title + (delta / iterations) + " nanos, "
|
||||
+ (comparisonTime > 0 ? (delta * 100 / comparisonTime - 100) + "% longer" : ""));
|
||||
return delta;
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2014, International Business Machines Corporation and *
|
||||
* Copyright (C) 1996-2015, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -617,7 +617,9 @@ public final class CollectionUtilities {
|
||||
if (diff != 0) {
|
||||
return diff;
|
||||
}
|
||||
return compare(new TreeSet(o1), new TreeSet(o2));
|
||||
Collection<T> x1 = SortedSet.class.isInstance(o1) ? o1 : new TreeSet<T>(o1);
|
||||
Collection<T> x2 = SortedSet.class.isInstance(o2) ? o2 : new TreeSet<T>(o2);
|
||||
return compare(x1, x2);
|
||||
}
|
||||
|
||||
public static class SetComparator<T extends Comparable>
|
||||
|
Loading…
Reference in New Issue
Block a user