ICU-21071 Fix lenient parse rules

- Check non-lenient rules before call lenint parsing
- Remove logKnownIssue 9503 from test code
- Adjust TestAllLocales test on ICU4C
- Add lenient checks on ICU4J
This commit is contained in:
Robert Melo 2020-04-21 18:25:46 -03:00
parent 5944e1856c
commit 440cef61a7
6 changed files with 156 additions and 59 deletions

View File

@ -1297,6 +1297,10 @@ NFRule::prefixLength(const UnicodeString& str, const UnicodeString& prefix, UErr
#if !UCONFIG_NO_COLLATION
// go through all this grief if we're in lenient-parse mode
if (formatter->isLenient()) {
// Check if non-lenient rule finds the text before call lenient parsing
if (str.startsWith(prefix)) {
return prefix.length();
}
// get the formatter's collator and use it to create two
// collation element iterators, one over the target string
// and another over the prefix (right now, we'll throw an
@ -1505,9 +1509,15 @@ NFRule::findText(const UnicodeString& str,
return str.indexOf(key, startingAt);
}
else {
// but if lenient parsing is turned ON, we've got some work
// ahead of us
return findTextLenient(str, key, startingAt, length);
// Check if non-lenient rule finds the text before call lenient parsing
*length = key.length();
int32_t pos = str.indexOf(key, startingAt);
if(pos >= 0) {
return pos;
} else {
// but if lenient parsing is turned ON, we've got some work ahead of us
return findTextLenient(str, key, startingAt, length);
}
}
}

View File

@ -549,9 +549,15 @@ void PluralFormat::parseType(const UnicodeString& source, const NFRule *rbnfLeni
UnicodeString currArg = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit());
if (rbnfLenientScanner != NULL) {
// If lenient parsing is turned ON, we've got some time consuming parsing ahead of us.
int32_t length = -1;
currMatchIndex = rbnfLenientScanner->findTextLenient(source, currArg, startingAt, &length);
// Check if non-lenient rule finds the text before call lenient parsing
int32_t tempIndex = source.indexOf(currArg, startingAt);
if (tempIndex >= 0) {
currMatchIndex = tempIndex;
} else {
// If lenient parsing is turned ON, we've got some time consuming parsing ahead of us.
int32_t length = -1;
currMatchIndex = rbnfLenientScanner->findTextLenient(source, currArg, startingAt, &length);
}
}
else {
currMatchIndex = source.indexOf(currArg, startingAt);

View File

@ -1889,16 +1889,19 @@ IntlTestRBNF::TestAllLocales()
UErrorCode status = U_ZERO_ERROR;
RuleBasedNumberFormat* f = new RuleBasedNumberFormat((URBNFRuleSetTag)j, *loc, status);
if (status == U_USING_DEFAULT_WARNING || status == U_USING_FALLBACK_WARNING) {
// Skip it.
delete f;
break;
}
if (U_FAILURE(status)) {
errln(UnicodeString(loc->getName()) + names[j]
+ "ERROR could not instantiate -> " + u_errorName(status));
continue;
}
Locale actualLocale = f->getLocale(ULOC_ACTUAL_LOCALE, status);
if (actualLocale != *loc) {
// Skip the redundancy
delete f;
break;
}
#if !UCONFIG_NO_COLLATION
for (unsigned int numidx = 0; numidx < UPRV_LENGTHOF(numbers); numidx++) {
double n = numbers[numidx];
@ -1936,28 +1939,26 @@ IntlTestRBNF::TestAllLocales()
+ UnicodeString(" -> ") + str + UnicodeString(" -> ") + num.getDouble());
}
}
if (!quick && !logKnownIssue("9503") ) {
// lenient parse
status = U_ZERO_ERROR;
f->setLenient(TRUE);
f->parse(str, num, status);
if (U_FAILURE(status)) {
// lenient parse
status = U_ZERO_ERROR;
f->setLenient(TRUE);
f->parse(str, num, status);
if (U_FAILURE(status)) {
errln(UnicodeString(loc->getName()) + names[j]
+ "ERROR could not parse(lenient) '" + str + "' -> " + u_errorName(status));
}
// We only check the spellout. The behavior is undefined for numbers < 1 and fractional numbers.
if (j == 0) {
if (num.getType() == Formattable::kLong && num.getLong() != n) {
errln(UnicodeString(loc->getName()) + names[j]
+ "ERROR could not parse(lenient) '" + str + "' -> " + u_errorName(status));
+ UnicodeString("ERROR could not roundtrip ") + n
+ UnicodeString(" -> ") + str + UnicodeString(" -> ") + num.getLong());
}
// We only check the spellout. The behavior is undefined for numbers < 1 and fractional numbers.
if (j == 0) {
if (num.getType() == Formattable::kLong && num.getLong() != n) {
errln(UnicodeString(loc->getName()) + names[j]
+ UnicodeString("ERROR could not roundtrip ") + n
+ UnicodeString(" -> ") + str + UnicodeString(" -> ") + num.getLong());
}
else if (num.getType() == Formattable::kDouble && (int64_t)(num.getDouble() * 1000) != (int64_t)(n*1000)) {
// The epsilon difference is too high.
errln(UnicodeString(loc->getName()) + names[j]
+ UnicodeString("ERROR could not roundtrip ") + n
+ UnicodeString(" -> ") + str + UnicodeString(" -> ") + num.getDouble());
}
else if (num.getType() == Formattable::kDouble && (int64_t)(num.getDouble() * 1000) != (int64_t)(n*1000)) {
// The epsilon difference is too high.
errln(UnicodeString(loc->getName()) + names[j]
+ UnicodeString("ERROR could not roundtrip ") + n
+ UnicodeString(" -> ") + str + UnicodeString(" -> ") + num.getDouble());
}
}
}

View File

@ -1241,6 +1241,10 @@ final class NFRule {
RbnfLenientScanner scanner = formatter.getLenientScanner();
if (scanner != null) {
// Check if non-lenient rule finds the text before call lenient parsing
if (str.startsWith(prefix)) {
return prefix.length();
}
return scanner.prefixLength(str, prefix);
}
@ -1290,9 +1294,14 @@ final class NFRule {
}
if (scanner != null) {
// if lenient parsing is turned ON, we've got some work
// ahead of us
return scanner.findText(str, key, startingAt);
// Check if non-lenient rule finds the text before call lenient parsing
int pos[] = new int[] { str.indexOf(key, startingAt), key.length() };
if (pos[0] >= 0) {
return pos;
} else {
// if lenient parsing is turned ON, we've got some work ahead of us
return scanner.findText(str, key, startingAt);
}
}
// if lenient parsing is turned off, this is easy. Just call
// String.indexOf() and we're done

View File

@ -760,9 +760,15 @@ public class PluralFormat extends UFormat {
String currArg = pattern.substring(partStart.getLimit(), partLimit.getIndex());
if (scanner != null) {
// If lenient parsing is turned ON, we've got some time consuming parsing ahead of us.
int[] scannerMatchResult = scanner.findText(source, currArg, startingAt);
currMatchIndex = scannerMatchResult[0];
// Check if non-lenient rule finds the text before call lenient parsing
int tempPos = source.indexOf(currArg, startingAt);
if (tempPos >= 0) {
currMatchIndex = tempPos;
} else {
// If lenient parsing is turned ON, we've got some time consuming parsing ahead of us.
int[] scannerMatchResult = scanner.findText(source, currArg, startingAt);
currMatchIndex = scannerMatchResult[0];
}
}
else {
currMatchIndex = source.indexOf(currArg, startingAt);

View File

@ -296,6 +296,17 @@ public class RbnfTest extends TestFmwk {
};
doTest(formatter, testData, true);
String[][] testDataLenient = {
{ "fifty-7", "57" },
{ " fifty-7", "57" },
{ " fifty-7", "57" },
{ "2 thousand six HUNDRED fifty-7", "2,657" },
{ "fifteen hundred and zero", "1,500" },
{ "FOurhundred thiRTY six", "436" }
};
doParsingTest(formatter, testDataLenient, true);
}
/**
@ -350,6 +361,12 @@ public class RbnfTest extends TestFmwk {
};
doTest(formatter, testData, true);
String[][] testDataLenient = {
{ "2-51-33", "10,293" },
};
doParsingTest(formatter, testDataLenient, true);
}
/**
@ -425,6 +442,13 @@ public class RbnfTest extends TestFmwk {
};
doTest(formatter, testData, true);
String[][] testDataLenient = {
{ "trente-et-un", "31" },
{ "un cent quatre vingt dix huit", "198" },
};
doParsingTest(formatter, testDataLenient, true);
}
/**
@ -529,6 +553,12 @@ public class RbnfTest extends TestFmwk {
};
doTest(formatter, testData, true);
String[][] testDataLenient = {
{ "ein Tausend sechs Hundert fuenfunddreissig", "1,635" },
};
doParsingTest(formatter, testDataLenient, true);
}
/**
@ -1117,6 +1147,10 @@ public class RbnfTest extends TestFmwk {
" (ordinal) "
//" (duration) " // English only
};
boolean[] lenientMode = {
false, // non-lenient mode
true // lenient mode
};
double[] numbers = {45.678, 1, 2, 10, 11, 100, 110, 200, 1000, 1111, -1111};
int count = numbers.length;
Random r = (count <= numbers.length ? null : createRandom());
@ -1142,25 +1176,25 @@ public class RbnfTest extends TestFmwk {
logln(loc.getName() + names[j] + "success format: " + n + " -> " + s);
}
try {
// RBNF parse is extremely slow when lenient option is enabled.
// non-lenient parse
fmt.setLenientParseMode(false);
Number num = fmt.parse(s);
if (isVerbose()) {
logln(loc.getName() + names[j] + "success parse: " + s + " -> " + num);
for (int k = 0; k < lenientMode.length; k++) {
try {
fmt.setLenientParseMode(lenientMode[k]);
Number num = fmt.parse(s);
if (isVerbose()) {
logln(loc.getName() + names[j] + "success parse: " + s + " -> " + num);
}
if (j != 0) {
// TODO: Fix the ordinal rules.
continue;
}
if (n != num.doubleValue()) {
errors.append("\n" + loc + names[j] + "got " + num + " expected " + n);
}
} catch (ParseException pe) {
String msg = loc.getName() + names[j] + "ERROR:" + pe.getMessage();
logln(msg);
errors.append("\n" + msg);
}
if (j != 0) {
// TODO: Fix the ordinal rules.
continue;
}
if (n != num.doubleValue()) {
errors.append("\n" + loc + names[j] + "got " + num + " expected " + n);
}
} catch (ParseException pe) {
String msg = loc.getName() + names[j] + "ERROR:" + pe.getMessage();
logln(msg);
errors.append("\n" + msg);
}
}
}
@ -1170,10 +1204,12 @@ public class RbnfTest extends TestFmwk {
}
}
void doTest(RuleBasedNumberFormat formatter, String[][] testData,
boolean testParsing) {
// NumberFormat decFmt = NumberFormat.getInstance(Locale.US);
NumberFormat decFmt = new DecimalFormat("#,###.################");
NumberFormat createDecimalFormatter() {
return new DecimalFormat("#,###.################");
}
void doTest(RuleBasedNumberFormat formatter, String[][] testData, boolean testParsing) {
NumberFormat decFmt = createDecimalFormatter();
try {
for (int i = 0; i < testData.length; i++) {
String number = testData[i][0];
@ -1207,6 +1243,35 @@ public class RbnfTest extends TestFmwk {
}
}
void doParsingTest(RuleBasedNumberFormat formatter, String[][] testData, boolean lenient) {
NumberFormat decFmt = createDecimalFormatter();
if (lenient) {
formatter.setLenientParseMode(true);
}
for (int i = 0; i < testData.length; i++) {
try {
String s = testData[i][0];
Number expectedNumber = decFmt.parse(testData[i][1]);
if (isVerbose()) {
logln("test[" + i + "] spellout value: (" + s + ") target: " + expectedNumber);
}
Number num = formatter.parse(s);
if (isVerbose()) {
logln("success parse: (" + s + ") -> " + num);
}
if (expectedNumber.doubleValue() != num.doubleValue()) {
errln("\nParsing (" + s + ") failed: got " + num + " expected " + expectedNumber);
}
} catch (Throwable e) {
e.printStackTrace();
errln("Test failed with exception: " + e.toString());
}
}
}
/* Tests the method
* public boolean equals(Object that)
*/