diff --git a/icu4c/source/common/static_unicode_sets.cpp b/icu4c/source/common/static_unicode_sets.cpp index 5d598a0e33..5dab3931a7 100644 --- a/icu4c/source/common/static_unicode_sets.cpp +++ b/icu4c/source/common/static_unicode_sets.cpp @@ -23,7 +23,7 @@ using namespace icu::unisets; namespace { -UnicodeSet* gUnicodeSets[COUNT] = {}; +UnicodeSet* gUnicodeSets[UNISETS_KEY_COUNT] = {}; // Save the empty instance in static memory to have well-defined behavior if a // regular UnicodeSet cannot be allocated. @@ -97,14 +97,28 @@ class ParseDataSink : public ResourceSink { saveSet(isLenient ? COMMA : STRICT_COMMA, str, status); } else if (str.indexOf(u'+') != -1) { saveSet(PLUS_SIGN, str, status); - } else if (str.indexOf(u'‒') != -1) { + } else if (str.indexOf(u'-') != -1) { saveSet(MINUS_SIGN, str, status); } else if (str.indexOf(u'$') != -1) { saveSet(DOLLAR_SIGN, str, status); } else if (str.indexOf(u'£') != -1) { saveSet(POUND_SIGN, str, status); - } else if (str.indexOf(u'₨') != -1) { + } else if (str.indexOf(u'₹') != -1) { saveSet(RUPEE_SIGN, str, status); + } else if (str.indexOf(u'¥') != -1) { + saveSet(YEN_SIGN, str, status); + } else if (str.indexOf(u'₩') != -1) { + saveSet(WON_SIGN, str, status); + } else if (str.indexOf(u'%') != -1) { + saveSet(PERCENT_SIGN, str, status); + } else if (str.indexOf(u'‰') != -1) { + saveSet(PERMILLE_SIGN, str, status); + } else if (str.indexOf(u'’') != -1) { + saveSet(APOSTROPHE_SIGN, str, status); + } else { + // Unknown class of parse lenients + // TODO(ICU-20428): Make ICU automatically accept new classes? + U_ASSERT(FALSE); } if (U_FAILURE(status)) { return; } } @@ -122,7 +136,7 @@ UBool U_CALLCONV cleanupNumberParseUniSets() { reinterpret_cast(gEmptyUnicodeSet)->~UnicodeSet(); gEmptyUnicodeSetInitialized = FALSE; } - for (int32_t i = 0; i < COUNT; i++) { + for (int32_t i = 0; i < UNISETS_KEY_COUNT; i++) { delete gUnicodeSets[i]; gUnicodeSets[i] = nullptr; } @@ -155,27 +169,35 @@ void U_CALLCONV initNumberParseUniSets(UErrorCode& status) { U_ASSERT(gUnicodeSets[STRICT_COMMA] != nullptr); U_ASSERT(gUnicodeSets[PERIOD] != nullptr); U_ASSERT(gUnicodeSets[STRICT_PERIOD] != nullptr); + U_ASSERT(gUnicodeSets[APOSTROPHE_SIGN] != nullptr); - gUnicodeSets[OTHER_GROUPING_SEPARATORS] = new UnicodeSet( - u"['٬‘’'\\u0020\\u00A0\\u2000-\\u200A\\u202F\\u205F\\u3000]", status); + LocalPointer otherGrouping(new UnicodeSet( + u"[٬‘\\u0020\\u00A0\\u2000-\\u200A\\u202F\\u205F\\u3000]", + status + ), status); + if (U_FAILURE(status)) { return; } + otherGrouping->addAll(*gUnicodeSets[APOSTROPHE_SIGN]); + gUnicodeSets[OTHER_GROUPING_SEPARATORS] = otherGrouping.orphan(); gUnicodeSets[ALL_SEPARATORS] = computeUnion(COMMA, PERIOD, OTHER_GROUPING_SEPARATORS); gUnicodeSets[STRICT_ALL_SEPARATORS] = computeUnion( STRICT_COMMA, STRICT_PERIOD, OTHER_GROUPING_SEPARATORS); U_ASSERT(gUnicodeSets[MINUS_SIGN] != nullptr); U_ASSERT(gUnicodeSets[PLUS_SIGN] != nullptr); + U_ASSERT(gUnicodeSets[PERCENT_SIGN] != nullptr); + U_ASSERT(gUnicodeSets[PERMILLE_SIGN] != nullptr); - gUnicodeSets[PERCENT_SIGN] = new UnicodeSet(u"[%٪]", status); - gUnicodeSets[PERMILLE_SIGN] = new UnicodeSet(u"[‰؉]", status); - gUnicodeSets[INFINITY_KEY] = new UnicodeSet(u"[∞]", status); + gUnicodeSets[INFINITY_SIGN] = new UnicodeSet(u"[∞]", status); + if (U_FAILURE(status)) { return; } U_ASSERT(gUnicodeSets[DOLLAR_SIGN] != nullptr); U_ASSERT(gUnicodeSets[POUND_SIGN] != nullptr); U_ASSERT(gUnicodeSets[RUPEE_SIGN] != nullptr); - gUnicodeSets[YEN_SIGN] = new UnicodeSet(u"[¥\\uffe5]", status); + U_ASSERT(gUnicodeSets[YEN_SIGN] != nullptr); + U_ASSERT(gUnicodeSets[WON_SIGN] != nullptr); gUnicodeSets[DIGITS] = new UnicodeSet(u"[:digit:]", status); - + if (U_FAILURE(status)) { return; } gUnicodeSets[DIGITS_OR_ALL_SEPARATORS] = computeUnion(DIGITS, ALL_SEPARATORS); gUnicodeSets[DIGITS_OR_STRICT_ALL_SEPARATORS] = computeUnion(DIGITS, STRICT_ALL_SEPARATORS); diff --git a/icu4c/source/common/static_unicode_sets.h b/icu4c/source/common/static_unicode_sets.h index 0332ee6637..5d90ce5908 100644 --- a/icu4c/source/common/static_unicode_sets.h +++ b/icu4c/source/common/static_unicode_sets.h @@ -45,6 +45,7 @@ enum Key { PERIOD, STRICT_COMMA, STRICT_PERIOD, + APOSTROPHE_SIGN, OTHER_GROUPING_SEPARATORS, ALL_SEPARATORS, STRICT_ALL_SEPARATORS, @@ -54,13 +55,14 @@ enum Key { PLUS_SIGN, PERCENT_SIGN, PERMILLE_SIGN, - INFINITY_KEY, // INFINITY is defined in cmath + INFINITY_SIGN, // Currency Symbols DOLLAR_SIGN, POUND_SIGN, RUPEE_SIGN, - YEN_SIGN, // not in CLDR data, but Currency.java wants it + YEN_SIGN, + WON_SIGN, // Other DIGITS, @@ -70,7 +72,7 @@ enum Key { DIGITS_OR_STRICT_ALL_SEPARATORS, // The number of elements in the enum. - COUNT + UNISETS_KEY_COUNT }; /** @@ -126,8 +128,9 @@ static const struct { } kCurrencyEntries[] = { {DOLLAR_SIGN, u'$'}, {POUND_SIGN, u'£'}, - {RUPEE_SIGN, u'₨'}, + {RUPEE_SIGN, u'₹'}, {YEN_SIGN, u'¥'}, + {WON_SIGN, u'₩'}, }; } // namespace unisets diff --git a/icu4c/source/i18n/numparse_symbols.cpp b/icu4c/source/i18n/numparse_symbols.cpp index 9ccceec847..e0daab9374 100644 --- a/icu4c/source/i18n/numparse_symbols.cpp +++ b/icu4c/source/i18n/numparse_symbols.cpp @@ -90,7 +90,7 @@ void IgnorablesMatcher::accept(StringSegment&, ParsedNumber&) const { InfinityMatcher::InfinityMatcher(const DecimalFormatSymbols& dfs) - : SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kInfinitySymbol), unisets::INFINITY_KEY) { + : SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kInfinitySymbol), unisets::INFINITY_SIGN) { } bool InfinityMatcher::isDisabled(const ParsedNumber& result) const { diff --git a/icu4c/source/test/intltest/numbertest_parse.cpp b/icu4c/source/test/intltest/numbertest_parse.cpp index e391f5904e..53c527cc06 100644 --- a/icu4c/source/test/intltest/numbertest_parse.cpp +++ b/icu4c/source/test/intltest/numbertest_parse.cpp @@ -14,8 +14,6 @@ #include #include -using icu::unisets::get; - void NumberParserTest::runIndexedTest(int32_t index, UBool exec, const char*& name, char*) { if (exec) { logln("TestSuite NumberParserTest: "); diff --git a/icu4c/source/test/intltest/static_unisets_test.cpp b/icu4c/source/test/intltest/static_unisets_test.cpp index bfe6996352..5cc946bc22 100644 --- a/icu4c/source/test/intltest/static_unisets_test.cpp +++ b/icu4c/source/test/intltest/static_unisets_test.cpp @@ -34,7 +34,10 @@ void StaticUnicodeSetsTest::runIndexedTest(int32_t index, UBool exec, const char logln("TestSuite StaticUnicodeSetsTest: "); } TESTCASE_AUTO_BEGIN; - TESTCASE_AUTO(testSetCoverage); + if (!quick) { + // Slow test: run in exhaustive mode only + TESTCASE_AUTO(testSetCoverage); + } TESTCASE_AUTO(testNonEmpty); TESTCASE_AUTO_END; } @@ -64,7 +67,7 @@ void StaticUnicodeSetsTest::testSetCoverage() { const UnicodeSet &minusSign = *get(unisets::MINUS_SIGN); const UnicodeSet &percent = *get(unisets::PERCENT_SIGN); const UnicodeSet &permille = *get(unisets::PERMILLE_SIGN); - const UnicodeSet &infinity = *get(unisets::INFINITY_KEY); + const UnicodeSet &infinity = *get(unisets::INFINITY_SIGN); int32_t localeCount; const Locale* allAvailableLocales = Locale::getAvailableLocales(localeCount); @@ -87,7 +90,7 @@ void StaticUnicodeSetsTest::testSetCoverage() { } void StaticUnicodeSetsTest::testNonEmpty() { - for (int32_t i=0; i unicodeSets = new EnumMap(Key.class); + private static final Map unicodeSets = new EnumMap<>(Key.class); /** * Gets the static-allocated UnicodeSet according to the provided key. @@ -126,6 +128,8 @@ public class StaticUnicodeSets { return Key.RUPEE_SIGN; } else if (get(Key.YEN_SIGN).contains(str)) { return Key.YEN_SIGN; + } else if (get(Key.WON_SIGN).contains(str)) { + return Key.WON_SIGN; } else { return null; } @@ -197,14 +201,27 @@ public class StaticUnicodeSets { saveSet(isLenient ? Key.COMMA : Key.STRICT_COMMA, str); } else if (str.indexOf('+') != -1) { saveSet(Key.PLUS_SIGN, str); - } else if (str.indexOf('‒') != -1) { + } else if (str.indexOf('-') != -1) { saveSet(Key.MINUS_SIGN, str); } else if (str.indexOf('$') != -1) { saveSet(Key.DOLLAR_SIGN, str); } else if (str.indexOf('£') != -1) { saveSet(Key.POUND_SIGN, str); - } else if (str.indexOf('₨') != -1) { + } else if (str.indexOf('₹') != -1) { saveSet(Key.RUPEE_SIGN, str); + } else if (str.indexOf('¥') != -1) { + saveSet(Key.YEN_SIGN, str); + } else if (str.indexOf('₩') != -1) { + saveSet(Key.WON_SIGN, str); + } else if (str.indexOf('%') != -1) { + saveSet(Key.PERCENT_SIGN, str); + } else if (str.indexOf('‰') != -1) { + saveSet(Key.PERMILLE_SIGN, str); + } else if (str.indexOf('’') != -1) { + saveSet(Key.APOSTROPHE_SIGN, str); + } else { + // TODO(ICU-20428): Make ICU automatically accept new classes? + throw new AssertionError("Unknown class of parse lenients: " + str); } } } @@ -230,9 +247,12 @@ public class StaticUnicodeSets { assert unicodeSets.containsKey(Key.STRICT_COMMA); assert unicodeSets.containsKey(Key.PERIOD); assert unicodeSets.containsKey(Key.STRICT_PERIOD); + assert unicodeSets.containsKey(Key.APOSTROPHE_SIGN); - unicodeSets.put(Key.OTHER_GROUPING_SEPARATORS, - new UnicodeSet("['٬‘’'\\u0020\\u00A0\\u2000-\\u200A\\u202F\\u205F\\u3000]").freeze()); + UnicodeSet otherGrouping = new UnicodeSet( + "[٬‘\\u0020\\u00A0\\u2000-\\u200A\\u202F\\u205F\\u3000]"); + otherGrouping.addAll(unicodeSets.get(Key.APOSTROPHE_SIGN)); + unicodeSets.put(Key.OTHER_GROUPING_SEPARATORS, otherGrouping.freeze()); unicodeSets.put(Key.ALL_SEPARATORS, computeUnion(Key.COMMA, Key.PERIOD, Key.OTHER_GROUPING_SEPARATORS)); unicodeSets.put(Key.STRICT_ALL_SEPARATORS, @@ -240,15 +260,16 @@ public class StaticUnicodeSets { assert unicodeSets.containsKey(Key.MINUS_SIGN); assert unicodeSets.containsKey(Key.PLUS_SIGN); + assert unicodeSets.containsKey(Key.PERCENT_SIGN); + assert unicodeSets.containsKey(Key.PERMILLE_SIGN); - unicodeSets.put(Key.PERCENT_SIGN, new UnicodeSet("[%٪]").freeze()); - unicodeSets.put(Key.PERMILLE_SIGN, new UnicodeSet("[‰؉]").freeze()); - unicodeSets.put(Key.INFINITY, new UnicodeSet("[∞]").freeze()); + unicodeSets.put(Key.INFINITY_SIGN, new UnicodeSet("[∞]").freeze()); assert unicodeSets.containsKey(Key.DOLLAR_SIGN); assert unicodeSets.containsKey(Key.POUND_SIGN); assert unicodeSets.containsKey(Key.RUPEE_SIGN); - unicodeSets.put(Key.YEN_SIGN, new UnicodeSet("[¥\\uffe5]").freeze()); + assert unicodeSets.containsKey(Key.YEN_SIGN); + assert unicodeSets.containsKey(Key.WON_SIGN); unicodeSets.put(Key.DIGITS, new UnicodeSet("[:digit:]").freeze()); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/InfinityMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/InfinityMatcher.java index 0aa915aca6..54d683acee 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/InfinityMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/InfinityMatcher.java @@ -30,7 +30,7 @@ public class InfinityMatcher extends SymbolMatcher { } private InfinityMatcher() { - super(StaticUnicodeSets.Key.INFINITY); + super(StaticUnicodeSets.Key.INFINITY_SIGN); } @Override diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/ExhaustiveNumberTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/ExhaustiveNumberTest.java index 8813857517..450f08ce72 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/ExhaustiveNumberTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/ExhaustiveNumberTest.java @@ -50,7 +50,7 @@ public class ExhaustiveNumberTest extends TestFmwk { UnicodeSet minusSign = get(Key.MINUS_SIGN); UnicodeSet percent = get(Key.PERCENT_SIGN); UnicodeSet permille = get(Key.PERMILLE_SIGN); - UnicodeSet infinity = get(Key.INFINITY); + UnicodeSet infinity = get(Key.INFINITY_SIGN); for (ULocale locale : ULocale.getAvailableLocales()) { DecimalFormatSymbols dfs = DecimalFormatSymbols.getInstance(locale);