scuffed-code/icu4c/source/test/intltest/numbertest_parse.cpp

413 lines
18 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#include "numbertest.h"
#include "numparse_impl.h"
#include "static_unicode_sets.h"
#include "unicode/dcfmtsym.h"
#include "unicode/testlog.h"
#include <cmath>
#include <numparse_affixes.h>
void NumberParserTest::runIndexedTest(int32_t index, UBool exec, const char*& name, char*) {
if (exec) {
logln("TestSuite NumberParserTest: ");
}
TESTCASE_AUTO_BEGIN;
TESTCASE_AUTO(testBasic);
TESTCASE_AUTO(testSeriesMatcher);
TESTCASE_AUTO(testCombinedCurrencyMatcher);
TESTCASE_AUTO(testAffixPatternMatcher);
TESTCASE_AUTO(test20360_BidiOverflow);
TESTCASE_AUTO(testInfiniteRecursion);
TESTCASE_AUTO_END;
}
void NumberParserTest::testBasic() {
IcuTestErrorCode status(*this, "testBasic");
static const struct TestCase {
int32_t flags;
const char16_t* inputString;
const char16_t* patternString;
int32_t expectedCharsConsumed;
double expectedResultDouble;
} cases[] = {{3, u"51423", u"0", 5, 51423.},
{3, u"51423x", u"0", 5, 51423.},
{3, u" 51423", u"0", 6, 51423.},
{3, u"51423 ", u"0", 5, 51423.},
{3, u"𝟱𝟭𝟰𝟮𝟯", u"0", 10, 51423.},
{3, u"𝟱𝟭𝟰𝟮𝟯x", u"0", 10, 51423.},
{3, u" 𝟱𝟭𝟰𝟮𝟯", u"0", 11, 51423.},
{3, u"𝟱𝟭𝟰𝟮𝟯 ", u"0", 10, 51423.},
{7, u"51,423", u"#,##,##0", 6, 51423.},
{7, u" 51,423", u"#,##,##0", 7, 51423.},
{7, u"51,423 ", u"#,##,##0", 6, 51423.},
{7, u"51,423,", u"#,##,##0", 6, 51423.},
{7, u"51,423,,", u"#,##,##0", 6, 51423.},
{7, u"51,423.5", u"#,##,##0", 8, 51423.5},
{7, u"51,423.5,", u"#,##,##0", 8, 51423.5},
{7, u"51,423.5,,", u"#,##,##0", 8, 51423.5},
{7, u"51,423.5.", u"#,##,##0", 8, 51423.5},
{7, u"51,423.5..", u"#,##,##0", 8, 51423.5},
{7, u"𝟱𝟭,𝟰𝟮𝟯", u"#,##,##0", 11, 51423.},
{7, u"𝟳,𝟴𝟵,𝟱𝟭,𝟰𝟮𝟯", u"#,##,##0", 19, 78951423.},
{7, u"𝟳𝟴,𝟵𝟱𝟭.𝟰𝟮𝟯", u"#,##,##0", 18, 78951.423},
{7, u"𝟳𝟴,𝟬𝟬𝟬", u"#,##,##0", 11, 78000.},
{7, u"𝟳𝟴,𝟬𝟬𝟬.𝟬𝟬𝟬", u"#,##,##0", 18, 78000.},
{7, u"𝟳𝟴,𝟬𝟬𝟬.𝟬𝟮𝟯", u"#,##,##0", 18, 78000.023},
{7, u"𝟳𝟴.𝟬𝟬𝟬.𝟬𝟮𝟯", u"#,##,##0", 11, 78.},
{7, u"1,", u"#,##,##0", 1, 1.},
{7, u"1,,", u"#,##,##0", 1, 1.},
{7, u"1.,", u"#,##,##0", 2, 1.},
{3, u"1,.", u"#,##,##0", 3, 1.},
{7, u"1..", u"#,##,##0", 2, 1.},
{3, u",1", u"#,##,##0", 2, 1.},
{3, u"1,1", u"#,##,##0", 1, 1.},
{3, u"1,1,", u"#,##,##0", 1, 1.},
{3, u"1,1,,", u"#,##,##0", 1, 1.},
{3, u"1,1,1", u"#,##,##0", 1, 1.},
{3, u"1,1,1,", u"#,##,##0", 1, 1.},
{3, u"1,1,1,,", u"#,##,##0", 1, 1.},
{3, u"-51423", u"0", 6, -51423.},
{3, u"51423-", u"0", 5, 51423.}, // plus and minus sign by default do NOT match after
{3, u"+51423", u"0", 6, 51423.},
{3, u"51423+", u"0", 5, 51423.}, // plus and minus sign by default do NOT match after
{3, u"%51423", u"0", 6, 51423.},
{3, u"51423%", u"0", 6, 51423.},
{3, u"51423%%", u"0", 6, 51423.},
{3, u"‰51423", u"0", 6, 51423.},
{3, u"51423‰", u"0", 6, 51423.},
{3, u"51423‰‰", u"0", 6, 51423.},
{3, u"", u"0", 1, INFINITY},
{3, u"-∞", u"0", 2, -INFINITY},
{3, u"@@@123 @@", u"0", 6, 123.}, // TODO: Should padding be strong instead of weak?
{3, u"@@@123@@ ", u"0", 6, 123.}, // TODO: Should padding be strong instead of weak?
{3, u"a51423US dollars", u"a0¤¤¤", 16, 51423.},
{3, u"a 51423 US dollars", u"a0¤¤¤", 18, 51423.},
{3, u"514.23 USD", u"¤0", 10, 514.23},
{3, u"514.23 GBP", u"¤0", 10, 514.23},
{3, u"a 𝟱𝟭𝟰𝟮𝟯 b", u"a0b", 14, 51423.},
{3, u"-a 𝟱𝟭𝟰𝟮𝟯 b", u"a0b", 15, -51423.},
{3, u"a -𝟱𝟭𝟰𝟮𝟯 b", u"a0b", 15, -51423.},
{3, u"𝟱𝟭𝟰𝟮𝟯", u"[0];(0)", 10, 51423.},
{3, u"[𝟱𝟭𝟰𝟮𝟯", u"[0];(0)", 11, 51423.},
{3, u"𝟱𝟭𝟰𝟮𝟯]", u"[0];(0)", 11, 51423.},
{3, u"[𝟱𝟭𝟰𝟮𝟯]", u"[0];(0)", 12, 51423.},
{3, u"(𝟱𝟭𝟰𝟮𝟯", u"[0];(0)", 11, -51423.},
{3, u"𝟱𝟭𝟰𝟮𝟯)", u"[0];(0)", 11, -51423.},
{3, u"(𝟱𝟭𝟰𝟮𝟯)", u"[0];(0)", 12, -51423.},
{3, u"𝟱𝟭𝟰𝟮𝟯", u"{0};{0}", 10, 51423.},
{3, u"{𝟱𝟭𝟰𝟮𝟯", u"{0};{0}", 11, 51423.},
{3, u"𝟱𝟭𝟰𝟮𝟯}", u"{0};{0}", 11, 51423.},
{3, u"{𝟱𝟭𝟰𝟮𝟯}", u"{0};{0}", 12, 51423.},
{1, u"a40b", u"a0'0b'", 3, 40.}, // greedy code path thinks "40" is the number
{2, u"a40b", u"a0'0b'", 4, 4.}, // slow code path finds the suffix "0b"
{3, u"𝟱.𝟭𝟰𝟮E𝟯", u"0", 12, 5142.},
{3, u"𝟱.𝟭𝟰𝟮E-𝟯", u"0", 13, 0.005142},
{3, u"𝟱.𝟭𝟰𝟮e-𝟯", u"0", 13, 0.005142},
{3, u"5.142e+3", u"0", 8, 5142.0 },
{3, u"5.142\u200Ee+3", u"0", 9, 5142.0},
{3, u"5.142e\u200E+3", u"0", 9, 5142.0},
{3, u"5.142e+\u200E3", u"0", 9, 5142.0},
{7, u"5,142.50 Canadian dollars", u"#,##,##0 ¤¤¤", 25, 5142.5},
{3, u"a$ b5", u"a ¤ b0", 5, 5.0},
{3, u"📺1.23", u"📺0;📻0", 6, 1.23},
{3, u"📻1.23", u"📺0;📻0", 6, -1.23},
{3, u".00", u"0", 3, 0.0},
{3, u" 1,234", u"a0", 35, 1234.}, // should not hang
{3, u"NaN", u"0", 3, NAN},
{3, u"NaN E5", u"0", 6, NAN},
{3, u"0", u"0", 1, 0.0}};
parse_flags_t parseFlags = PARSE_FLAG_IGNORE_CASE | PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES;
for (auto& cas : cases) {
UnicodeString inputString(cas.inputString);
UnicodeString patternString(cas.patternString);
LocalPointer<const NumberParserImpl> parser(
NumberParserImpl::createSimpleParser(
Locale("en"), patternString, parseFlags, status));
if (status.errDataIfFailureAndReset("createSimpleParser() failed")) {
continue;
}
UnicodeString message =
UnicodeString("Input <") + inputString + UnicodeString("> Parser ") + parser->toString();
if (0 != (cas.flags & 0x01)) {
// Test greedy code path
ParsedNumber resultObject;
parser->parse(inputString, true, resultObject, status);
assertTrue("Greedy Parse failed: " + message, resultObject.success());
assertEquals("Greedy Parse failed: " + message,
cas.expectedCharsConsumed, resultObject.charEnd);
assertEquals("Greedy Parse failed: " + message,
cas.expectedResultDouble, resultObject.getDouble(status));
}
if (0 != (cas.flags & 0x02)) {
// Test slow code path
ParsedNumber resultObject;
parser->parse(inputString, false, resultObject, status);
assertTrue("Non-Greedy Parse failed: " + message, resultObject.success());
assertEquals(
"Non-Greedy Parse failed: " + message,
cas.expectedCharsConsumed,
resultObject.charEnd);
assertEquals(
"Non-Greedy Parse failed: " + message,
cas.expectedResultDouble,
resultObject.getDouble(status));
}
if (0 != (cas.flags & 0x04)) {
// Test with strict separators
parser.adoptInstead(
NumberParserImpl::createSimpleParser(
Locale("en"),
patternString,
parseFlags | PARSE_FLAG_STRICT_GROUPING_SIZE,
status));
ParsedNumber resultObject;
parser->parse(inputString, true, resultObject, status);
assertTrue("Strict Parse failed: " + message, resultObject.success());
assertEquals("Strict Parse failed: " + message,
cas.expectedCharsConsumed, resultObject.charEnd);
assertEquals("Strict Parse failed: " + message,
cas.expectedResultDouble, resultObject.getDouble(status));
}
}
}
void NumberParserTest::testSeriesMatcher() {
IcuTestErrorCode status(*this, "testSeriesMatcher");
DecimalFormatSymbols symbols("en", status);
if (status.errDataIfFailureAndReset("Failure in DecimalFormtSymbols constructor")) {
return;
}
PlusSignMatcher m0(symbols, false);
MinusSignMatcher m1(symbols, false);
IgnorablesMatcher m2(0);
PercentMatcher m3(symbols);
IgnorablesMatcher m4(0);
ArraySeriesMatcher::MatcherArray matchers(5);
matchers[0] = &m0;
matchers[1] = &m1;
matchers[2] = &m2;
matchers[3] = &m3;
matchers[4] = &m4;
ArraySeriesMatcher series(matchers, 5);
assertFalse("", series.smokeTest(StringSegment(u"x", false)));
assertFalse("", series.smokeTest(StringSegment(u"-", false)));
assertTrue("", series.smokeTest(StringSegment(u"+", false)));
static const struct TestCase {
const char16_t* input;
int32_t expectedOffset;
bool expectedMaybeMore;
} cases[] = {{u"", 0, true},
{u" ", 0, false},
{u"$", 0, false},
{u"+", 0, true},
{u" +", 0, false},
{u"+-", 0, true},
{u"+ -", 0, false},
{u"+- ", 0, true},
{u"+- $", 0, false},
{u"+-%", 3, true},
{u" +- % ", 0, false},
{u"+- % ", 7, true},
{u"+-%$", 3, false}};
for (auto& cas : cases) {
UnicodeString input(cas.input);
StringSegment segment(input, false);
ParsedNumber result;
bool actualMaybeMore = series.match(segment, result, status);
int actualOffset = segment.getOffset();
assertEquals("'" + input + "'", cas.expectedOffset, actualOffset);
assertEquals("'" + input + "'", cas.expectedMaybeMore, actualMaybeMore);
}
}
void NumberParserTest::testCombinedCurrencyMatcher() {
IcuTestErrorCode status(*this, "testCombinedCurrencyMatcher");
IgnorablesMatcher ignorables(0);
Locale locale = Locale::getEnglish();
DecimalFormatSymbols dfs(locale, status);
if (status.errDataIfFailureAndReset("Failure in DecimalFormtSymbols constructor")) {
return;
}
dfs.setSymbol(DecimalFormatSymbols::kCurrencySymbol, u"IU$", status);
dfs.setSymbol(DecimalFormatSymbols::kIntlCurrencySymbol, u"ICU", status);
CurrencySymbols currencySymbols({u"ICU", status}, locale, dfs, status);
AffixTokenMatcherSetupData affixSetupData = {
currencySymbols, {"en", status}, ignorables, "en", 0};
AffixTokenMatcherWarehouse warehouse(&affixSetupData);
NumberParseMatcher& matcher = warehouse.currency(status);
affixSetupData.parseFlags = PARSE_FLAG_NO_FOREIGN_CURRENCY;
AffixTokenMatcherWarehouse warehouseNoForeign(&affixSetupData);
NumberParseMatcher& matcherNoForeign = warehouseNoForeign.currency(status);
static const struct TestCase {
const char16_t* input;
const char16_t* expectedCurrencyCode;
const char16_t* expectedNoForeignCurrencyCode;
} cases[]{{u"", u"", u""},
{u"FOO", u"", u""},
{u"USD", u"USD", u""},
{u"$", u"USD", u""},
{u"US dollars", u"USD", u""},
{u"eu", u"", u""},
{u"euros", u"EUR", u""},
{u"ICU", u"ICU", u"ICU"},
{u"IU$", u"ICU", u"ICU"}};
for (auto& cas : cases) {
UnicodeString input(cas.input);
{
StringSegment segment(input, false);
ParsedNumber result;
matcher.match(segment, result, status);
assertEquals(
"Parsing " + input,
cas.expectedCurrencyCode,
result.currencyCode);
assertEquals(
"Whole string on " + input,
cas.expectedCurrencyCode[0] == 0 ? 0 : input.length(),
result.charEnd);
}
{
StringSegment segment(input, false);
ParsedNumber result;
matcherNoForeign.match(segment, result, status);
assertEquals(
"[no foreign] Parsing " + input,
cas.expectedNoForeignCurrencyCode,
result.currencyCode);
assertEquals(
"[no foreign] Whole string on " + input,
cas.expectedNoForeignCurrencyCode[0] == 0 ? 0 : input.length(),
result.charEnd);
}
}
}
void NumberParserTest::testAffixPatternMatcher() {
IcuTestErrorCode status(*this, "testAffixPatternMatcher");
Locale locale = Locale::getEnglish();
IgnorablesMatcher ignorables(0);
DecimalFormatSymbols dfs(locale, status);
dfs.setSymbol(DecimalFormatSymbols::kCurrencySymbol, u"IU$", status);
dfs.setSymbol(DecimalFormatSymbols::kIntlCurrencySymbol, u"ICU", status);
CurrencySymbols currencySymbols({u"ICU", status}, locale, dfs, status);
AffixTokenMatcherSetupData affixSetupData = {
currencySymbols, {"en", status}, ignorables, "en", 0};
AffixTokenMatcherWarehouse warehouse(&affixSetupData);
static const struct TestCase {
bool exactMatch;
const char16_t* affixPattern;
int32_t expectedMatcherLength;
const char16_t* sampleParseableString;
} cases[] = {{false, u"-", 1, u"-"},
{false, u"+-%", 5, u"+-%"},
{true, u"+-%", 3, u"+-%"},
{false, u"ab c", 5, u"a bc"},
{true, u"abc", 3, u"abc"},
{false, u"hello-to+this%very¤long‰string", 59, u"hello-to+this%very USD long‰string"}};
for (auto& cas : cases) {
UnicodeString affixPattern(cas.affixPattern);
UnicodeString sampleParseableString(cas.sampleParseableString);
int parseFlags = cas.exactMatch ? PARSE_FLAG_EXACT_AFFIX : 0;
bool success;
AffixPatternMatcher matcher = AffixPatternMatcher::fromAffixPattern(
affixPattern, warehouse, parseFlags, &success, status);
if (!status.errDataIfFailureAndReset("Creation should be successful")) {
// Check that the matcher has the expected number of children
assertEquals(affixPattern + " " + cas.exactMatch, cas.expectedMatcherLength, matcher.length());
// Check that the matcher works on a sample string
StringSegment segment(sampleParseableString, false);
ParsedNumber result;
matcher.match(segment, result, status);
assertEquals(affixPattern + " " + cas.exactMatch, sampleParseableString.length(), result.charEnd);
}
}
}
void NumberParserTest::test20360_BidiOverflow() {
IcuTestErrorCode status(*this, "test20360_BidiOverflow");
UnicodeString inputString;
inputString.append(u'-');
for (int32_t i=0; i<100000; i++) {
inputString.append(u'\u061C');
}
inputString.append(u'5');
LocalPointer<const NumberParserImpl> parser(NumberParserImpl::createSimpleParser("en", u"0", 0, status));
if (status.errDataIfFailureAndReset("createSimpleParser() failed")) {
return;
}
ParsedNumber resultObject;
parser->parse(inputString, true, resultObject, status);
assertTrue("Greedy Parse, success", resultObject.success());
assertEquals("Greedy Parse, chars consumed", 100002, resultObject.charEnd);
assertEquals("Greedy Parse, expected double", -5.0, resultObject.getDouble(status));
resultObject.clear();
parser->parse(inputString, false, resultObject, status);
assertFalse("Non-Greedy Parse, success", resultObject.success());
assertEquals("Non-Greedy Parse, chars consumed", 1, resultObject.charEnd);
}
void NumberParserTest::testInfiniteRecursion() {
IcuTestErrorCode status(*this, "testInfiniteRecursion");
UnicodeString inputString;
inputString.append(u'-');
for (int32_t i=0; i<200; i++) {
inputString.append(u'\u061C');
}
inputString.append(u'5');
LocalPointer<const NumberParserImpl> parser(NumberParserImpl::createSimpleParser("en", u"0", 0, status));
if (status.errDataIfFailureAndReset("createSimpleParser() failed")) {
return;
}
ParsedNumber resultObject;
parser->parse(inputString, false, resultObject, status);
assertFalse("Default recursion limit, success", resultObject.success());
assertEquals("Default recursion limit, chars consumed", 1, resultObject.charEnd);
parser.adoptInstead(NumberParserImpl::createSimpleParser(
"en", u"0", PARSE_FLAG_ALLOW_INFINITE_RECURSION, status));
resultObject.clear();
parser->parse(inputString, false, resultObject, status);
assertTrue("Unlimited recursion, success", resultObject.success());
assertEquals("Unlimited recursion, chars consumed", 202, resultObject.charEnd);
assertEquals("Unlimited recursion, expected double", -5.0, resultObject.getDouble(status));
}
#endif