From 4544a84509b73c23fa21e471797b28ff8eb619bc Mon Sep 17 00:00:00 2001 From: Shane Carr Date: Wed, 4 May 2016 21:12:46 +0000 Subject: [PATCH] ICU-12528 Committing Java version of CompactDecimalDataCache data sink. X-SVN-Rev: 38696 --- .../ibm/icu/text/CompactDecimalDataCache.java | 499 ++++++++++-------- 1 file changed, 274 insertions(+), 225 deletions(-) diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/CompactDecimalDataCache.java b/icu4j/main/classes/core/src/com/ibm/icu/text/CompactDecimalDataCache.java index 7277b0037f..3311404d30 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/CompactDecimalDataCache.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/CompactDecimalDataCache.java @@ -14,6 +14,8 @@ import com.ibm.icu.impl.ICUCache; import com.ibm.icu.impl.ICUData; import com.ibm.icu.impl.ICUResourceBundle; import com.ibm.icu.impl.SimpleCache; +import com.ibm.icu.impl.UResource; +import com.ibm.icu.text.DecimalFormat.Unit; import com.ibm.icu.util.ULocale; import com.ibm.icu.util.UResourceBundle; @@ -28,11 +30,16 @@ class CompactDecimalDataCache { private static final String LONG_STYLE = "long"; private static final String SHORT_CURRENCY_STYLE = "shortCurrency"; private static final String NUMBER_ELEMENTS = "NumberElements"; - private static final String PATTERN_LONG_PATH = "patternsLong/decimalFormat"; - private static final String PATTERNS_SHORT_PATH = "patternsShort/decimalFormat"; - private static final String PATTERNS_SHORT_CURRENCY_PATH = "patternsShort/currencyFormat"; + private static final String PATTERNS_LONG = "patternsLong"; + private static final String PATTERNS_SHORT = "patternsShort"; + private static final String DECIMAL_FORMAT = "decimalFormat"; + private static final String CURRENCY_FORMAT = "currencyFormat"; + private static final String LATIN_NUMBERING_SYSTEM = "latn"; - static final String OTHER = "other"; + private static enum PatternsTableKey { PATTERNS_LONG, PATTERNS_SHORT }; + private static enum FormatsTableKey { DECIMAL_FORMAT, CURRENCY_FORMAT }; + + public static final String OTHER = "other"; /** * We can specify prefixes or suffixes for values with up to 15 digits, @@ -40,8 +47,6 @@ class CompactDecimalDataCache { */ static final int MAX_DIGITS = 15; - private static final String LATIN_NUMBERING_SYSTEM = "latn"; - private final ICUCache cache = new SimpleCache(); @@ -67,12 +72,17 @@ class CompactDecimalDataCache { static class Data { long[] divisors; Map units; + boolean fromLatin; Data(long[] divisors, Map units) { this.divisors = divisors; this.units = units; } + + public boolean isEmpty() { + return units == null || units.isEmpty(); + } } /** @@ -87,23 +97,177 @@ class CompactDecimalDataCache { Data longData; Data shortCurrencyData; - DataBundle(Data shortData, Data longData, Data shortCurrencyData) { + private DataBundle(Data shortData, Data longData, Data shortCurrencyData) { this.shortData = shortData; this.longData = longData; this.shortCurrencyData = shortCurrencyData; } - } - private static enum UResFlags { - ANY, // Any locale will do. - NOT_ROOT // Locale cannot be root. + private static DataBundle createEmpty() { + return new DataBundle( + new Data(new long[MAX_DIGITS], new HashMap()), + new Data(new long[MAX_DIGITS], new HashMap()), + new Data(new long[MAX_DIGITS], new HashMap()) + ); + } } - /** - * Fetch data for a particular locale. Clients must not modify any part - * of the returned data. Portions of returned data may be shared so modifying - * it will have unpredictable results. + * Sink for enumerating all of the compact decimal format patterns. + * + * More specific bundles (en_GB) are enumerated before their parents (en_001, en, root): + * Only store a value if it is still missing, that is, it has not been overridden. + */ + private static final class CompactDecimalDataSink extends UResource.Sink { + + private DataBundle dataBundle; // Where to save values when they are read + private ULocale locale; // The locale we are traversing (for exception messages) + private boolean isLatin; // Whether or not we are traversing the Latin table + + /* + * NumberElements{ <-- top (numbering system table) + * latn{ <-- patternsTable (one per numbering system) + * patternsLong{ <-- formatsTable (one per pattern) + * decimalFormat{ <-- powersOfTenTable (one per format) + * 1000{ <-- pluralVariantsTable (one per power of ten) + * one{"0 thousand"} <-- plural variant and template + */ + + public CompactDecimalDataSink(DataBundle dataBundle, ULocale locale) { + this.dataBundle = dataBundle; + this.locale = locale; + } + + @Override + public void put(UResource.Key key, UResource.Value value, boolean isRoot) { + // SPECIAL CASE: Skip root data while loading non-latn so that we fall back to latn then root. + if (!isLatin && isRoot) return; + + UResource.Table patternsTable = value.getTable(); + for (int i1 = 0; patternsTable.getKeyAndValue(i1, key, value); ++i1) { + + // patterns table: check for patternsShort or patternsLong + PatternsTableKey patternsTableKey; + if (key.contentEquals(PATTERNS_SHORT)) { + patternsTableKey = PatternsTableKey.PATTERNS_SHORT; + } else if (key.contentEquals(PATTERNS_LONG)) { + patternsTableKey = PatternsTableKey.PATTERNS_LONG; + } else { + continue; + } + + // traverse into the table of formats + UResource.Table formatsTable = value.getTable(); + for (int i2 = 0; formatsTable.getKeyAndValue(i2, key, value); ++i2) { + + // formats table: check for decimalFormat or currencyFormat + FormatsTableKey formatsTableKey; + if (key.contentEquals(DECIMAL_FORMAT)) { + formatsTableKey = FormatsTableKey.DECIMAL_FORMAT; + } else if (key.contentEquals(CURRENCY_FORMAT)) { + formatsTableKey = FormatsTableKey.CURRENCY_FORMAT; + } else { + continue; + } + + // Set the current style and destination based on the lvl1 and lvl2 keys + String style = null; + Data destination = null; + if (patternsTableKey == PatternsTableKey.PATTERNS_LONG + && formatsTableKey == FormatsTableKey.DECIMAL_FORMAT) { + style = LONG_STYLE; + destination = dataBundle.longData; + } else if (patternsTableKey == PatternsTableKey.PATTERNS_SHORT + && formatsTableKey == FormatsTableKey.DECIMAL_FORMAT) { + style = SHORT_STYLE; + destination = dataBundle.shortData; + } else if (patternsTableKey == PatternsTableKey.PATTERNS_SHORT + && formatsTableKey == FormatsTableKey.CURRENCY_FORMAT) { + style = SHORT_CURRENCY_STYLE; + destination = dataBundle.shortCurrencyData; + } else { + // Silently ignore this case + continue; + } + + // SPECIAL CASE: RULES FOR WHETHER OR NOT TO CONSUME THIS TABLE: + // 1) Don't consume any data if we're in Latin and it was already + // populated from before + // 2) Don't consume longData if shortData was consumed from the non-Latin + // locale numbering system + // 3) Don't consume longData if this is the root bundle in Latin + // and shortData is already populated, perhaps from deeper in Latin + if (isLatin + && !destination.isEmpty()) { + continue; + } + if (isLatin + && style == LONG_STYLE + && dataBundle.shortData.fromLatin) { + continue; + } + if (isLatin + && style == LONG_STYLE + && isRoot + && !dataBundle.shortData.isEmpty()) { + continue; + } + + // Set the "fromLatin" flag on the data object + destination.fromLatin = isLatin; + + // traverse into the table of powers of ten + UResource.Table powersOfTenTable = value.getTable(); + for (int i3 = 0; powersOfTenTable.getKeyAndValue(i3, key, value); ++i3) { + + // This value will always be some even power of 10. e.g 10000. + long power10 = Long.parseLong(key.toString()); + int log10Value = (int) Math.log10(power10); + + // Silently ignore divisors that are too big. + if (log10Value >= MAX_DIGITS) continue; + + // Iterate over the plural variants ("one", "other", etc) + UResource.Table pluralVariantsTable = value.getTable(); + for (int i4 = 0; pluralVariantsTable.getKeyAndValue(i4, key, value); ++i4) { + // TODO: Use StandardPlural rather than String. + String pluralVariant = key.toString(); + String template = value.toString(); + + // Copy the data into the in-memory data bundle (do not overwrite + // existing values) + int numZeros = populatePrefixSuffix( + pluralVariant, log10Value, template, locale, style, destination, false); + + // If populatePrefixSuffix returns -1, it means that this key has been + // encountered already. + if (numZeros < 0) { + continue; + } + + // Set the divisor, which is based on the number of zeros in the template + // string. If the divisor from here is different from the one previously + // stored, it means that the number of zeros in different plural variants + // differs; throw an exception. + long divisor = calculateDivisor(power10, numZeros); + if (destination.divisors[log10Value] != 0L + && destination.divisors[log10Value] != divisor) { + throw new IllegalArgumentException("Plural variant '" + pluralVariant + + "' template '" + template + + "' for 10^" + log10Value + + " has wrong number of zeros in " + localeAndStyle(locale, style)); + } + destination.divisors[log10Value] = divisor; + } + } + } + } + } + } + + /** + * Fetch data for a particular locale. Clients must not modify any part of the returned data. Portions of returned + * data may be shared so modifying it will have unpredictable results. */ DataBundle get(ULocale locale) { DataBundle result = cache.get(locale); @@ -114,217 +278,40 @@ class CompactDecimalDataCache { return result; } - /** - * Loads the "patternsShort" and "patternsLong" data for a particular locale. - * We look for both of them in 3 places in this order:
    - *
  1. local numbering system no ROOT fallback
  2. - *
  3. latin numbering system no ROOT fallback
  4. - *
  5. latin numbering system ROOT locale.
  6. - *
- * If we find "patternsShort" data before finding "patternsLong" data, we - * make the "patternsLong" data be the same as "patternsShort." - * @param ulocale the locale for which we are loading the data. - * @return The returned data, never null. - */ - private static DataBundle load(ULocale ulocale) { - NumberingSystem ns = NumberingSystem.getInstance(ulocale); - ICUResourceBundle r = (ICUResourceBundle)UResourceBundle. - getBundleInstance(ICUData.ICU_BASE_NAME, ulocale); - r = r.getWithFallback(NUMBER_ELEMENTS); - String numberingSystemName = ns.getName(); + private static DataBundle load(ULocale ulocale) throws MissingResourceException { + DataBundle dataBundle = DataBundle.createEmpty(); + String nsName = NumberingSystem.getInstance(ulocale).getName(); + ICUResourceBundle r = (ICUResourceBundle) UResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME, + ulocale); + CompactDecimalDataSink sink = new CompactDecimalDataSink(dataBundle, ulocale); - ICUResourceBundle shortDataBundle = null; - ICUResourceBundle longDataBundle = null; - ICUResourceBundle shortCurrencyDataBundle = null; - if (!LATIN_NUMBERING_SYSTEM.equals(numberingSystemName)) { - ICUResourceBundle bundle = findWithFallback(r, numberingSystemName, UResFlags.NOT_ROOT); - shortDataBundle = findWithFallback(bundle, PATTERNS_SHORT_PATH, UResFlags.NOT_ROOT); - longDataBundle = findWithFallback(bundle, PATTERN_LONG_PATH, UResFlags.NOT_ROOT); - shortCurrencyDataBundle = findWithFallback(bundle, PATTERNS_SHORT_CURRENCY_PATH, UResFlags.NOT_ROOT); + // First load the number elements data from nsName if nsName is not Latin. + if (!nsName.equals(LATIN_NUMBERING_SYSTEM)) { + sink.isLatin = false; + r.getAllItemsWithFallback(NUMBER_ELEMENTS + "/" + nsName, sink); } - // If we haven't found, look in latin numbering system. - if (shortDataBundle == null) { - ICUResourceBundle bundle = getWithFallback(r, LATIN_NUMBERING_SYSTEM, UResFlags.ANY); - shortDataBundle = getWithFallback(bundle, PATTERNS_SHORT_PATH, UResFlags.ANY); - if (longDataBundle == null) { - longDataBundle = findWithFallback(bundle, PATTERN_LONG_PATH, UResFlags.ANY); - if (longDataBundle != null && isRoot(longDataBundle) && !isRoot(shortDataBundle)) { - longDataBundle = null; - } - } - } - if (shortCurrencyDataBundle == null) { - ICUResourceBundle bundle = getWithFallback(r, LATIN_NUMBERING_SYSTEM, UResFlags.ANY); - shortCurrencyDataBundle = getWithFallback(bundle, PATTERNS_SHORT_CURRENCY_PATH, UResFlags.ANY); - } - Data shortData = loadStyle(shortDataBundle, ulocale, SHORT_STYLE); - Data longData; - if (longDataBundle == null) { - longData = shortData; - } else { - longData = loadStyle(longDataBundle, ulocale, LONG_STYLE); - } - Data shortCurrencyData = loadStyle(shortCurrencyDataBundle, ulocale, SHORT_CURRENCY_STYLE); - return new DataBundle(shortData, longData, shortCurrencyData); - } + // Now load Latin, which will fill in things that were left out from above. + sink.isLatin = true; + r.getAllItemsWithFallback(NUMBER_ELEMENTS + "/" + LATIN_NUMBERING_SYSTEM, sink); - /** - * findWithFallback finds a sub-resource bundle within r. - * @param r a resource bundle. It may be null in which case sub-resource bundle - * won't be found. - * @param path the path relative to r - * @param flags ANY or NOT_ROOT for locale of found sub-resource bundle. - * @return The sub-resource bundle or NULL if none found. - */ - private static ICUResourceBundle findWithFallback( - ICUResourceBundle r, String path, UResFlags flags) { - if (r == null) { - return null; - } - ICUResourceBundle result = r.findWithFallback(path); - if (result == null) { - return null; - } - switch (flags) { - case NOT_ROOT: - return isRoot(result) ? null : result; - case ANY: - return result; - default: - throw new IllegalArgumentException(); - } - } - - /** - * Like findWithFallback but throws MissingResourceException if no - * resource found instead of returning null. - */ - private static ICUResourceBundle getWithFallback( - ICUResourceBundle r, String path, UResFlags flags) { - ICUResourceBundle result = findWithFallback(r, path, flags); - if (result == null) { - throw new MissingResourceException( - "Cannot find " + path, - ICUResourceBundle.class.getName(), path); - - } - return result; - } - - /** - * isRoot returns true if r is in root locale or false otherwise. - */ - private static boolean isRoot(ICUResourceBundle r) { - ULocale bundleLocale = r.getULocale(); - // Note: bundleLocale for root should be ULocale.ROOT, which is equivalent to new ULocale(""). - // However, resource bundle might be initialized with locale ID "root", which should be - // actually normalized to "" in ICUResourceBundle. For now, this logic also compare to - // "root", not just ULocale.ROOT. - return bundleLocale.equals(ULocale.ROOT) || bundleLocale.toString().equals("root"); - } - - /** - * Loads the data - * @param r the main resource bundle. - * @param numberingSystemName The namespace name. - * @param allowNullResult If true, returns null if no data can be found - * for particular locale and style. If false, throws a runtime exception - * if data cannot be found. - * @return The loaded data or possibly null if allowNullResult is true. - */ - private static Data loadStyle(ICUResourceBundle r, ULocale locale, String style) { - int size = r.getSize(); - Data result = new Data( - new long[MAX_DIGITS], - new HashMap()); - for (int i = 0; i < size; i++) { - populateData(r.get(i), locale, style, result); - } - fillInMissing(result); - return result; - } - - /** - * Populates Data object with data for a particular divisor from resource bundle. - * @param divisorData represents the rules for numbers of a particular size. - * This may look like: - *
-     *   10000{
-     *       few{"00K"}
-     *       many{"00K"}
-     *       one{"00 xnb"}
-     *       other{"00 xnb"}
-     *   }
-     * 
- * @param locale the locale - * @param style the style - * @param result rule stored here. - * - */ - private static void populateData( - UResourceBundle divisorData, ULocale locale, String style, Data result) { - // This value will always be some even pwoer of 10. e.g 10000. - long magnitude = Long.parseLong(divisorData.getKey()); - int thisIndex = (int) Math.log10(magnitude); - - // Silently ignore divisors that are too big. - if (thisIndex >= MAX_DIGITS) { - return; + // If longData is empty, default it to be equal to shortData + if (dataBundle.longData.isEmpty()) { + dataBundle.longData = dataBundle.shortData; } - int size = divisorData.getSize(); + // Check for "other" variants in each of the three data classes + checkForOtherVariants(dataBundle.longData, ulocale, LONG_STYLE); + checkForOtherVariants(dataBundle.shortData, ulocale, SHORT_STYLE); + checkForOtherVariants(dataBundle.shortCurrencyData, ulocale, SHORT_CURRENCY_STYLE); - // keep track of how many zeros are used in the plural variants. - // For "00K" this would be 2. This number must be the same for all - // plural variants. If they differ, we throw a runtime exception as - // such an anomaly is unrecoverable. We expect at least one zero. - int numZeros = 0; + // Resolve missing elements + fillInMissing(dataBundle.longData); + fillInMissing(dataBundle.shortData); + fillInMissing(dataBundle.shortCurrencyData); - // Keep track if this block defines "other" variant. If a block - // fails to define the "other" variant, we must immediately throw - // an exception as it is assumed that "other" variants are always - // defined. - boolean otherVariantDefined = false; - - // Loop over all the plural variants. e.g one, other. - for (int i = 0; i < size; i++) { - UResourceBundle pluralVariantData = divisorData.get(i); - String pluralVariant = pluralVariantData.getKey(); - String template = pluralVariantData.getString(); - if (pluralVariant.equals(OTHER)) { - otherVariantDefined = true; - } - int nz = populatePrefixSuffix( - pluralVariant, thisIndex, template, locale, style, result); - if (nz != numZeros) { - if (numZeros != 0) { - throw new IllegalArgumentException( - "Plural variant '" + pluralVariant + "' template '" + - template + "' for 10^" + thisIndex + - " has wrong number of zeros in " + localeAndStyle(locale, style)); - } - numZeros = nz; - } - } - - if (!otherVariantDefined) { - throw new IllegalArgumentException( - "No 'other' plural variant defined for 10^" + thisIndex + - "in " +localeAndStyle(locale, style)); - } - - // We craft our divisor such that when we divide by it, we get a - // number with the same number of digits as zeros found in the - // plural variant templates. If our magnitude is 10000 and we have - // two 0's in our plural variants, then we want a divisor of 1000. - // Note that if we have 43560 which is of same magnitude as 10000. - // When we divide by 1000 we a quotient which rounds to 44 (2 digits) - long divisor = magnitude; - for (int i = 1; i < numZeros; i++) { - divisor /= 10; - } - result.divisors[thisIndex] = divisor; + // Return the data bundle + return dataBundle; } @@ -336,12 +323,12 @@ class CompactDecimalDataCache { * @param template e.g "00K" * @param locale the locale * @param style the style - * @param result Extracted prefix and suffix stored here. - * @return number of zeros found before any decimal point in template. + * @param destination Extracted prefix and suffix stored here. + * @return number of zeros found before any decimal point in template, or -1 if it was not saved. */ private static int populatePrefixSuffix( String pluralVariant, int idx, String template, ULocale locale, String style, - Data result) { + Data destination, boolean overwrite) { int firstIdx = template.indexOf("0"); int lastIdx = template.lastIndexOf("0"); if (firstIdx == -1) { @@ -352,7 +339,12 @@ class CompactDecimalDataCache { } String prefix = template.substring(0, firstIdx); String suffix = template.substring(lastIdx + 1); - saveUnit(new DecimalFormat.Unit(prefix, suffix), pluralVariant, idx, result.units); + + // Save the unit, and return -1 if it was not saved + boolean saved = saveUnit(new DecimalFormat.Unit(prefix, suffix), pluralVariant, idx, destination.units, overwrite); + if (!saved) { + return -1; + } // If there is effectively no prefix or suffix, ignore the actual // number of 0's and act as if the number of 0's matches the size @@ -369,6 +361,27 @@ class CompactDecimalDataCache { return i - firstIdx; } + /** + * Calculate a divisor based on the magnitude and number of zeros in the + * template string. + * @param power10 + * @param numZeros + * @return + */ + private static long calculateDivisor(long power10, int numZeros) { + // We craft our divisor such that when we divide by it, we get a + // number with the same number of digits as zeros found in the + // plural variant templates. If our magnitude is 10000 and we have + // two 0's in our plural variants, then we want a divisor of 1000. + // Note that if we have 43560 which is of same magnitude as 10000. + // When we divide by 1000 we a quotient which rounds to 44 (2 digits) + long divisor = power10; + for (int i = 1; i < numZeros; i++) { + divisor /= 10; + } + return divisor; + } + /** * Returns locale and style. Used to form useful messages in thrown @@ -380,6 +393,34 @@ class CompactDecimalDataCache { return "locale '" + locale + "' style '" + style + "'"; } + /** + * Checks to make sure that an "other" variant is present in all powers of 10. + * @param data + */ + private static void checkForOtherVariants(Data data, ULocale locale, String style) { + DecimalFormat.Unit[] otherByBase = data.units.get(OTHER); + + if (otherByBase == null) { + throw new IllegalArgumentException("No 'other' plural variants defined in " + + localeAndStyle(locale, style)); + } + + // Check all other plural variants, and make sure that if any of them are populated, then + // other is also populated + for (Map.Entry entry : data.units.entrySet()) { + if (entry.getKey() == OTHER) continue; + DecimalFormat.Unit[] variantByBase = entry.getValue(); + for (int log10Value = 0; log10Value < MAX_DIGITS; log10Value++) { + if (variantByBase[log10Value] != null && otherByBase[log10Value] == null) { + throw new IllegalArgumentException( + "No 'other' plural variant defined for 10^" + log10Value + + " but a '" + entry.getKey() + "' variant is defined" + + " in " +localeAndStyle(locale, style)); + } + } + } + } + /** * After reading information from resource bundle into a Data object, there * is guarantee that it is complete. @@ -432,16 +473,24 @@ class CompactDecimalDataCache { } } - private static void saveUnit( + private static boolean saveUnit( DecimalFormat.Unit unit, String pluralVariant, int idx, - Map units) { + Map units, + boolean overwrite) { DecimalFormat.Unit[] byBase = units.get(pluralVariant); if (byBase == null) { byBase = new DecimalFormat.Unit[MAX_DIGITS]; units.put(pluralVariant, byBase); } - byBase[idx] = unit; + // Don't overwrite a pre-existing value unless the "overwrite" flag is true. + if (!overwrite && byBase[idx] != null) { + return false; + } + + // Save the value and return + byBase[idx] = unit; + return true; } /**