ICU-20108 Adding new parseLenients from CLDR 34 to ICU.
This commit is contained in:
parent
35b182767f
commit
0ef0629736
@ -23,7 +23,7 @@ using namespace icu::unisets;
|
||||
|
||||
namespace {
|
||||
|
||||
UnicodeSet* gUnicodeSets[COUNT] = {};
|
||||
UnicodeSet* gUnicodeSets[UNISETS_KEY_COUNT] = {};
|
||||
|
||||
// Save the empty instance in static memory to have well-defined behavior if a
|
||||
// regular UnicodeSet cannot be allocated.
|
||||
@ -97,14 +97,28 @@ class ParseDataSink : public ResourceSink {
|
||||
saveSet(isLenient ? COMMA : STRICT_COMMA, str, status);
|
||||
} else if (str.indexOf(u'+') != -1) {
|
||||
saveSet(PLUS_SIGN, str, status);
|
||||
} else if (str.indexOf(u'‒') != -1) {
|
||||
} else if (str.indexOf(u'-') != -1) {
|
||||
saveSet(MINUS_SIGN, str, status);
|
||||
} else if (str.indexOf(u'$') != -1) {
|
||||
saveSet(DOLLAR_SIGN, str, status);
|
||||
} else if (str.indexOf(u'£') != -1) {
|
||||
saveSet(POUND_SIGN, str, status);
|
||||
} else if (str.indexOf(u'₨') != -1) {
|
||||
} else if (str.indexOf(u'₹') != -1) {
|
||||
saveSet(RUPEE_SIGN, str, status);
|
||||
} else if (str.indexOf(u'¥') != -1) {
|
||||
saveSet(YEN_SIGN, str, status);
|
||||
} else if (str.indexOf(u'₩') != -1) {
|
||||
saveSet(WON_SIGN, str, status);
|
||||
} else if (str.indexOf(u'%') != -1) {
|
||||
saveSet(PERCENT_SIGN, str, status);
|
||||
} else if (str.indexOf(u'‰') != -1) {
|
||||
saveSet(PERMILLE_SIGN, str, status);
|
||||
} else if (str.indexOf(u'’') != -1) {
|
||||
saveSet(APOSTROPHE_SIGN, str, status);
|
||||
} else {
|
||||
// Unknown class of parse lenients
|
||||
// TODO(ICU-20428): Make ICU automatically accept new classes?
|
||||
U_ASSERT(FALSE);
|
||||
}
|
||||
if (U_FAILURE(status)) { return; }
|
||||
}
|
||||
@ -122,7 +136,7 @@ UBool U_CALLCONV cleanupNumberParseUniSets() {
|
||||
reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet)->~UnicodeSet();
|
||||
gEmptyUnicodeSetInitialized = FALSE;
|
||||
}
|
||||
for (int32_t i = 0; i < COUNT; i++) {
|
||||
for (int32_t i = 0; i < UNISETS_KEY_COUNT; i++) {
|
||||
delete gUnicodeSets[i];
|
||||
gUnicodeSets[i] = nullptr;
|
||||
}
|
||||
@ -155,27 +169,35 @@ void U_CALLCONV initNumberParseUniSets(UErrorCode& status) {
|
||||
U_ASSERT(gUnicodeSets[STRICT_COMMA] != nullptr);
|
||||
U_ASSERT(gUnicodeSets[PERIOD] != nullptr);
|
||||
U_ASSERT(gUnicodeSets[STRICT_PERIOD] != nullptr);
|
||||
U_ASSERT(gUnicodeSets[APOSTROPHE_SIGN] != nullptr);
|
||||
|
||||
gUnicodeSets[OTHER_GROUPING_SEPARATORS] = new UnicodeSet(
|
||||
u"['٬‘’'\\u0020\\u00A0\\u2000-\\u200A\\u202F\\u205F\\u3000]", status);
|
||||
LocalPointer<UnicodeSet> otherGrouping(new UnicodeSet(
|
||||
u"[٬‘\\u0020\\u00A0\\u2000-\\u200A\\u202F\\u205F\\u3000]",
|
||||
status
|
||||
), status);
|
||||
if (U_FAILURE(status)) { return; }
|
||||
otherGrouping->addAll(*gUnicodeSets[APOSTROPHE_SIGN]);
|
||||
gUnicodeSets[OTHER_GROUPING_SEPARATORS] = otherGrouping.orphan();
|
||||
gUnicodeSets[ALL_SEPARATORS] = computeUnion(COMMA, PERIOD, OTHER_GROUPING_SEPARATORS);
|
||||
gUnicodeSets[STRICT_ALL_SEPARATORS] = computeUnion(
|
||||
STRICT_COMMA, STRICT_PERIOD, OTHER_GROUPING_SEPARATORS);
|
||||
|
||||
U_ASSERT(gUnicodeSets[MINUS_SIGN] != nullptr);
|
||||
U_ASSERT(gUnicodeSets[PLUS_SIGN] != nullptr);
|
||||
U_ASSERT(gUnicodeSets[PERCENT_SIGN] != nullptr);
|
||||
U_ASSERT(gUnicodeSets[PERMILLE_SIGN] != nullptr);
|
||||
|
||||
gUnicodeSets[PERCENT_SIGN] = new UnicodeSet(u"[%٪]", status);
|
||||
gUnicodeSets[PERMILLE_SIGN] = new UnicodeSet(u"[‰؉]", status);
|
||||
gUnicodeSets[INFINITY_KEY] = new UnicodeSet(u"[∞]", status);
|
||||
gUnicodeSets[INFINITY_SIGN] = new UnicodeSet(u"[∞]", status);
|
||||
if (U_FAILURE(status)) { return; }
|
||||
|
||||
U_ASSERT(gUnicodeSets[DOLLAR_SIGN] != nullptr);
|
||||
U_ASSERT(gUnicodeSets[POUND_SIGN] != nullptr);
|
||||
U_ASSERT(gUnicodeSets[RUPEE_SIGN] != nullptr);
|
||||
gUnicodeSets[YEN_SIGN] = new UnicodeSet(u"[¥\\uffe5]", status);
|
||||
U_ASSERT(gUnicodeSets[YEN_SIGN] != nullptr);
|
||||
U_ASSERT(gUnicodeSets[WON_SIGN] != nullptr);
|
||||
|
||||
gUnicodeSets[DIGITS] = new UnicodeSet(u"[:digit:]", status);
|
||||
|
||||
if (U_FAILURE(status)) { return; }
|
||||
gUnicodeSets[DIGITS_OR_ALL_SEPARATORS] = computeUnion(DIGITS, ALL_SEPARATORS);
|
||||
gUnicodeSets[DIGITS_OR_STRICT_ALL_SEPARATORS] = computeUnion(DIGITS, STRICT_ALL_SEPARATORS);
|
||||
|
||||
|
@ -45,6 +45,7 @@ enum Key {
|
||||
PERIOD,
|
||||
STRICT_COMMA,
|
||||
STRICT_PERIOD,
|
||||
APOSTROPHE_SIGN,
|
||||
OTHER_GROUPING_SEPARATORS,
|
||||
ALL_SEPARATORS,
|
||||
STRICT_ALL_SEPARATORS,
|
||||
@ -54,13 +55,14 @@ enum Key {
|
||||
PLUS_SIGN,
|
||||
PERCENT_SIGN,
|
||||
PERMILLE_SIGN,
|
||||
INFINITY_KEY, // INFINITY is defined in cmath
|
||||
INFINITY_SIGN,
|
||||
|
||||
// Currency Symbols
|
||||
DOLLAR_SIGN,
|
||||
POUND_SIGN,
|
||||
RUPEE_SIGN,
|
||||
YEN_SIGN, // not in CLDR data, but Currency.java wants it
|
||||
YEN_SIGN,
|
||||
WON_SIGN,
|
||||
|
||||
// Other
|
||||
DIGITS,
|
||||
@ -70,7 +72,7 @@ enum Key {
|
||||
DIGITS_OR_STRICT_ALL_SEPARATORS,
|
||||
|
||||
// The number of elements in the enum.
|
||||
COUNT
|
||||
UNISETS_KEY_COUNT
|
||||
};
|
||||
|
||||
/**
|
||||
@ -126,8 +128,9 @@ static const struct {
|
||||
} kCurrencyEntries[] = {
|
||||
{DOLLAR_SIGN, u'$'},
|
||||
{POUND_SIGN, u'£'},
|
||||
{RUPEE_SIGN, u'₨'},
|
||||
{RUPEE_SIGN, u'₹'},
|
||||
{YEN_SIGN, u'¥'},
|
||||
{WON_SIGN, u'₩'},
|
||||
};
|
||||
|
||||
} // namespace unisets
|
||||
|
@ -90,7 +90,7 @@ void IgnorablesMatcher::accept(StringSegment&, ParsedNumber&) const {
|
||||
|
||||
|
||||
InfinityMatcher::InfinityMatcher(const DecimalFormatSymbols& dfs)
|
||||
: SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kInfinitySymbol), unisets::INFINITY_KEY) {
|
||||
: SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kInfinitySymbol), unisets::INFINITY_SIGN) {
|
||||
}
|
||||
|
||||
bool InfinityMatcher::isDisabled(const ParsedNumber& result) const {
|
||||
|
@ -14,8 +14,6 @@
|
||||
#include <cmath>
|
||||
#include <numparse_affixes.h>
|
||||
|
||||
using icu::unisets::get;
|
||||
|
||||
void NumberParserTest::runIndexedTest(int32_t index, UBool exec, const char*& name, char*) {
|
||||
if (exec) {
|
||||
logln("TestSuite NumberParserTest: ");
|
||||
|
@ -34,7 +34,10 @@ void StaticUnicodeSetsTest::runIndexedTest(int32_t index, UBool exec, const char
|
||||
logln("TestSuite StaticUnicodeSetsTest: ");
|
||||
}
|
||||
TESTCASE_AUTO_BEGIN;
|
||||
TESTCASE_AUTO(testSetCoverage);
|
||||
if (!quick) {
|
||||
// Slow test: run in exhaustive mode only
|
||||
TESTCASE_AUTO(testSetCoverage);
|
||||
}
|
||||
TESTCASE_AUTO(testNonEmpty);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
@ -64,7 +67,7 @@ void StaticUnicodeSetsTest::testSetCoverage() {
|
||||
const UnicodeSet &minusSign = *get(unisets::MINUS_SIGN);
|
||||
const UnicodeSet &percent = *get(unisets::PERCENT_SIGN);
|
||||
const UnicodeSet &permille = *get(unisets::PERMILLE_SIGN);
|
||||
const UnicodeSet &infinity = *get(unisets::INFINITY_KEY);
|
||||
const UnicodeSet &infinity = *get(unisets::INFINITY_SIGN);
|
||||
|
||||
int32_t localeCount;
|
||||
const Locale* allAvailableLocales = Locale::getAvailableLocales(localeCount);
|
||||
@ -87,7 +90,7 @@ void StaticUnicodeSetsTest::testSetCoverage() {
|
||||
}
|
||||
|
||||
void StaticUnicodeSetsTest::testNonEmpty() {
|
||||
for (int32_t i=0; i<unisets::COUNT; i++) {
|
||||
for (int32_t i=0; i<unisets::UNISETS_KEY_COUNT; i++) {
|
||||
if (i == unisets::EMPTY) {
|
||||
continue;
|
||||
}
|
||||
|
@ -38,6 +38,7 @@ public class StaticUnicodeSets {
|
||||
PERIOD,
|
||||
STRICT_COMMA,
|
||||
STRICT_PERIOD,
|
||||
APOSTROPHE_SIGN,
|
||||
OTHER_GROUPING_SEPARATORS,
|
||||
ALL_SEPARATORS,
|
||||
STRICT_ALL_SEPARATORS,
|
||||
@ -48,13 +49,14 @@ public class StaticUnicodeSets {
|
||||
PLUS_SIGN,
|
||||
PERCENT_SIGN,
|
||||
PERMILLE_SIGN,
|
||||
INFINITY,
|
||||
INFINITY_SIGN,
|
||||
|
||||
// Currency Symbols
|
||||
DOLLAR_SIGN,
|
||||
POUND_SIGN,
|
||||
RUPEE_SIGN,
|
||||
YEN_SIGN, // not in CLDR data, but Currency.java wants it
|
||||
YEN_SIGN,
|
||||
WON_SIGN,
|
||||
|
||||
// Other
|
||||
DIGITS,
|
||||
@ -64,7 +66,7 @@ public class StaticUnicodeSets {
|
||||
DIGITS_OR_STRICT_ALL_SEPARATORS,
|
||||
};
|
||||
|
||||
private static final Map<Key, UnicodeSet> unicodeSets = new EnumMap<Key, UnicodeSet>(Key.class);
|
||||
private static final Map<Key, UnicodeSet> unicodeSets = new EnumMap<>(Key.class);
|
||||
|
||||
/**
|
||||
* Gets the static-allocated UnicodeSet according to the provided key.
|
||||
@ -126,6 +128,8 @@ public class StaticUnicodeSets {
|
||||
return Key.RUPEE_SIGN;
|
||||
} else if (get(Key.YEN_SIGN).contains(str)) {
|
||||
return Key.YEN_SIGN;
|
||||
} else if (get(Key.WON_SIGN).contains(str)) {
|
||||
return Key.WON_SIGN;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
@ -197,14 +201,27 @@ public class StaticUnicodeSets {
|
||||
saveSet(isLenient ? Key.COMMA : Key.STRICT_COMMA, str);
|
||||
} else if (str.indexOf('+') != -1) {
|
||||
saveSet(Key.PLUS_SIGN, str);
|
||||
} else if (str.indexOf('‒') != -1) {
|
||||
} else if (str.indexOf('-') != -1) {
|
||||
saveSet(Key.MINUS_SIGN, str);
|
||||
} else if (str.indexOf('$') != -1) {
|
||||
saveSet(Key.DOLLAR_SIGN, str);
|
||||
} else if (str.indexOf('£') != -1) {
|
||||
saveSet(Key.POUND_SIGN, str);
|
||||
} else if (str.indexOf('₨') != -1) {
|
||||
} else if (str.indexOf('₹') != -1) {
|
||||
saveSet(Key.RUPEE_SIGN, str);
|
||||
} else if (str.indexOf('¥') != -1) {
|
||||
saveSet(Key.YEN_SIGN, str);
|
||||
} else if (str.indexOf('₩') != -1) {
|
||||
saveSet(Key.WON_SIGN, str);
|
||||
} else if (str.indexOf('%') != -1) {
|
||||
saveSet(Key.PERCENT_SIGN, str);
|
||||
} else if (str.indexOf('‰') != -1) {
|
||||
saveSet(Key.PERMILLE_SIGN, str);
|
||||
} else if (str.indexOf('’') != -1) {
|
||||
saveSet(Key.APOSTROPHE_SIGN, str);
|
||||
} else {
|
||||
// TODO(ICU-20428): Make ICU automatically accept new classes?
|
||||
throw new AssertionError("Unknown class of parse lenients: " + str);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -230,9 +247,12 @@ public class StaticUnicodeSets {
|
||||
assert unicodeSets.containsKey(Key.STRICT_COMMA);
|
||||
assert unicodeSets.containsKey(Key.PERIOD);
|
||||
assert unicodeSets.containsKey(Key.STRICT_PERIOD);
|
||||
assert unicodeSets.containsKey(Key.APOSTROPHE_SIGN);
|
||||
|
||||
unicodeSets.put(Key.OTHER_GROUPING_SEPARATORS,
|
||||
new UnicodeSet("['٬‘’'\\u0020\\u00A0\\u2000-\\u200A\\u202F\\u205F\\u3000]").freeze());
|
||||
UnicodeSet otherGrouping = new UnicodeSet(
|
||||
"[٬‘\\u0020\\u00A0\\u2000-\\u200A\\u202F\\u205F\\u3000]");
|
||||
otherGrouping.addAll(unicodeSets.get(Key.APOSTROPHE_SIGN));
|
||||
unicodeSets.put(Key.OTHER_GROUPING_SEPARATORS, otherGrouping.freeze());
|
||||
unicodeSets.put(Key.ALL_SEPARATORS,
|
||||
computeUnion(Key.COMMA, Key.PERIOD, Key.OTHER_GROUPING_SEPARATORS));
|
||||
unicodeSets.put(Key.STRICT_ALL_SEPARATORS,
|
||||
@ -240,15 +260,16 @@ public class StaticUnicodeSets {
|
||||
|
||||
assert unicodeSets.containsKey(Key.MINUS_SIGN);
|
||||
assert unicodeSets.containsKey(Key.PLUS_SIGN);
|
||||
assert unicodeSets.containsKey(Key.PERCENT_SIGN);
|
||||
assert unicodeSets.containsKey(Key.PERMILLE_SIGN);
|
||||
|
||||
unicodeSets.put(Key.PERCENT_SIGN, new UnicodeSet("[%٪]").freeze());
|
||||
unicodeSets.put(Key.PERMILLE_SIGN, new UnicodeSet("[‰؉]").freeze());
|
||||
unicodeSets.put(Key.INFINITY, new UnicodeSet("[∞]").freeze());
|
||||
unicodeSets.put(Key.INFINITY_SIGN, new UnicodeSet("[∞]").freeze());
|
||||
|
||||
assert unicodeSets.containsKey(Key.DOLLAR_SIGN);
|
||||
assert unicodeSets.containsKey(Key.POUND_SIGN);
|
||||
assert unicodeSets.containsKey(Key.RUPEE_SIGN);
|
||||
unicodeSets.put(Key.YEN_SIGN, new UnicodeSet("[¥\\uffe5]").freeze());
|
||||
assert unicodeSets.containsKey(Key.YEN_SIGN);
|
||||
assert unicodeSets.containsKey(Key.WON_SIGN);
|
||||
|
||||
unicodeSets.put(Key.DIGITS, new UnicodeSet("[:digit:]").freeze());
|
||||
|
||||
|
@ -30,7 +30,7 @@ public class InfinityMatcher extends SymbolMatcher {
|
||||
}
|
||||
|
||||
private InfinityMatcher() {
|
||||
super(StaticUnicodeSets.Key.INFINITY);
|
||||
super(StaticUnicodeSets.Key.INFINITY_SIGN);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -50,7 +50,7 @@ public class ExhaustiveNumberTest extends TestFmwk {
|
||||
UnicodeSet minusSign = get(Key.MINUS_SIGN);
|
||||
UnicodeSet percent = get(Key.PERCENT_SIGN);
|
||||
UnicodeSet permille = get(Key.PERMILLE_SIGN);
|
||||
UnicodeSet infinity = get(Key.INFINITY);
|
||||
UnicodeSet infinity = get(Key.INFINITY_SIGN);
|
||||
|
||||
for (ULocale locale : ULocale.getAvailableLocales()) {
|
||||
DecimalFormatSymbols dfs = DecimalFormatSymbols.getInstance(locale);
|
||||
|
Loading…
Reference in New Issue
Block a user