ICU-21071 Fix lenient parse rules
- Check non-lenient rules before call lenint parsing - Remove logKnownIssue 9503 from test code - Adjust TestAllLocales test on ICU4C - Add lenient checks on ICU4J
This commit is contained in:
parent
5944e1856c
commit
440cef61a7
@ -1297,6 +1297,10 @@ NFRule::prefixLength(const UnicodeString& str, const UnicodeString& prefix, UErr
|
||||
#if !UCONFIG_NO_COLLATION
|
||||
// go through all this grief if we're in lenient-parse mode
|
||||
if (formatter->isLenient()) {
|
||||
// Check if non-lenient rule finds the text before call lenient parsing
|
||||
if (str.startsWith(prefix)) {
|
||||
return prefix.length();
|
||||
}
|
||||
// get the formatter's collator and use it to create two
|
||||
// collation element iterators, one over the target string
|
||||
// and another over the prefix (right now, we'll throw an
|
||||
@ -1505,9 +1509,15 @@ NFRule::findText(const UnicodeString& str,
|
||||
return str.indexOf(key, startingAt);
|
||||
}
|
||||
else {
|
||||
// but if lenient parsing is turned ON, we've got some work
|
||||
// ahead of us
|
||||
return findTextLenient(str, key, startingAt, length);
|
||||
// Check if non-lenient rule finds the text before call lenient parsing
|
||||
*length = key.length();
|
||||
int32_t pos = str.indexOf(key, startingAt);
|
||||
if(pos >= 0) {
|
||||
return pos;
|
||||
} else {
|
||||
// but if lenient parsing is turned ON, we've got some work ahead of us
|
||||
return findTextLenient(str, key, startingAt, length);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -549,9 +549,15 @@ void PluralFormat::parseType(const UnicodeString& source, const NFRule *rbnfLeni
|
||||
|
||||
UnicodeString currArg = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit());
|
||||
if (rbnfLenientScanner != NULL) {
|
||||
// If lenient parsing is turned ON, we've got some time consuming parsing ahead of us.
|
||||
int32_t length = -1;
|
||||
currMatchIndex = rbnfLenientScanner->findTextLenient(source, currArg, startingAt, &length);
|
||||
// Check if non-lenient rule finds the text before call lenient parsing
|
||||
int32_t tempIndex = source.indexOf(currArg, startingAt);
|
||||
if (tempIndex >= 0) {
|
||||
currMatchIndex = tempIndex;
|
||||
} else {
|
||||
// If lenient parsing is turned ON, we've got some time consuming parsing ahead of us.
|
||||
int32_t length = -1;
|
||||
currMatchIndex = rbnfLenientScanner->findTextLenient(source, currArg, startingAt, &length);
|
||||
}
|
||||
}
|
||||
else {
|
||||
currMatchIndex = source.indexOf(currArg, startingAt);
|
||||
|
@ -1889,16 +1889,19 @@ IntlTestRBNF::TestAllLocales()
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
RuleBasedNumberFormat* f = new RuleBasedNumberFormat((URBNFRuleSetTag)j, *loc, status);
|
||||
|
||||
if (status == U_USING_DEFAULT_WARNING || status == U_USING_FALLBACK_WARNING) {
|
||||
// Skip it.
|
||||
delete f;
|
||||
break;
|
||||
}
|
||||
if (U_FAILURE(status)) {
|
||||
errln(UnicodeString(loc->getName()) + names[j]
|
||||
+ "ERROR could not instantiate -> " + u_errorName(status));
|
||||
continue;
|
||||
}
|
||||
|
||||
Locale actualLocale = f->getLocale(ULOC_ACTUAL_LOCALE, status);
|
||||
if (actualLocale != *loc) {
|
||||
// Skip the redundancy
|
||||
delete f;
|
||||
break;
|
||||
}
|
||||
|
||||
#if !UCONFIG_NO_COLLATION
|
||||
for (unsigned int numidx = 0; numidx < UPRV_LENGTHOF(numbers); numidx++) {
|
||||
double n = numbers[numidx];
|
||||
@ -1936,28 +1939,26 @@ IntlTestRBNF::TestAllLocales()
|
||||
+ UnicodeString(" -> ") + str + UnicodeString(" -> ") + num.getDouble());
|
||||
}
|
||||
}
|
||||
if (!quick && !logKnownIssue("9503") ) {
|
||||
// lenient parse
|
||||
status = U_ZERO_ERROR;
|
||||
f->setLenient(TRUE);
|
||||
f->parse(str, num, status);
|
||||
if (U_FAILURE(status)) {
|
||||
// lenient parse
|
||||
status = U_ZERO_ERROR;
|
||||
f->setLenient(TRUE);
|
||||
f->parse(str, num, status);
|
||||
if (U_FAILURE(status)) {
|
||||
errln(UnicodeString(loc->getName()) + names[j]
|
||||
+ "ERROR could not parse(lenient) '" + str + "' -> " + u_errorName(status));
|
||||
}
|
||||
// We only check the spellout. The behavior is undefined for numbers < 1 and fractional numbers.
|
||||
if (j == 0) {
|
||||
if (num.getType() == Formattable::kLong && num.getLong() != n) {
|
||||
errln(UnicodeString(loc->getName()) + names[j]
|
||||
+ "ERROR could not parse(lenient) '" + str + "' -> " + u_errorName(status));
|
||||
+ UnicodeString("ERROR could not roundtrip ") + n
|
||||
+ UnicodeString(" -> ") + str + UnicodeString(" -> ") + num.getLong());
|
||||
}
|
||||
// We only check the spellout. The behavior is undefined for numbers < 1 and fractional numbers.
|
||||
if (j == 0) {
|
||||
if (num.getType() == Formattable::kLong && num.getLong() != n) {
|
||||
errln(UnicodeString(loc->getName()) + names[j]
|
||||
+ UnicodeString("ERROR could not roundtrip ") + n
|
||||
+ UnicodeString(" -> ") + str + UnicodeString(" -> ") + num.getLong());
|
||||
}
|
||||
else if (num.getType() == Formattable::kDouble && (int64_t)(num.getDouble() * 1000) != (int64_t)(n*1000)) {
|
||||
// The epsilon difference is too high.
|
||||
errln(UnicodeString(loc->getName()) + names[j]
|
||||
+ UnicodeString("ERROR could not roundtrip ") + n
|
||||
+ UnicodeString(" -> ") + str + UnicodeString(" -> ") + num.getDouble());
|
||||
}
|
||||
else if (num.getType() == Formattable::kDouble && (int64_t)(num.getDouble() * 1000) != (int64_t)(n*1000)) {
|
||||
// The epsilon difference is too high.
|
||||
errln(UnicodeString(loc->getName()) + names[j]
|
||||
+ UnicodeString("ERROR could not roundtrip ") + n
|
||||
+ UnicodeString(" -> ") + str + UnicodeString(" -> ") + num.getDouble());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1241,6 +1241,10 @@ final class NFRule {
|
||||
|
||||
RbnfLenientScanner scanner = formatter.getLenientScanner();
|
||||
if (scanner != null) {
|
||||
// Check if non-lenient rule finds the text before call lenient parsing
|
||||
if (str.startsWith(prefix)) {
|
||||
return prefix.length();
|
||||
}
|
||||
return scanner.prefixLength(str, prefix);
|
||||
}
|
||||
|
||||
@ -1290,9 +1294,14 @@ final class NFRule {
|
||||
}
|
||||
|
||||
if (scanner != null) {
|
||||
// if lenient parsing is turned ON, we've got some work
|
||||
// ahead of us
|
||||
return scanner.findText(str, key, startingAt);
|
||||
// Check if non-lenient rule finds the text before call lenient parsing
|
||||
int pos[] = new int[] { str.indexOf(key, startingAt), key.length() };
|
||||
if (pos[0] >= 0) {
|
||||
return pos;
|
||||
} else {
|
||||
// if lenient parsing is turned ON, we've got some work ahead of us
|
||||
return scanner.findText(str, key, startingAt);
|
||||
}
|
||||
}
|
||||
// if lenient parsing is turned off, this is easy. Just call
|
||||
// String.indexOf() and we're done
|
||||
|
@ -760,9 +760,15 @@ public class PluralFormat extends UFormat {
|
||||
|
||||
String currArg = pattern.substring(partStart.getLimit(), partLimit.getIndex());
|
||||
if (scanner != null) {
|
||||
// If lenient parsing is turned ON, we've got some time consuming parsing ahead of us.
|
||||
int[] scannerMatchResult = scanner.findText(source, currArg, startingAt);
|
||||
currMatchIndex = scannerMatchResult[0];
|
||||
// Check if non-lenient rule finds the text before call lenient parsing
|
||||
int tempPos = source.indexOf(currArg, startingAt);
|
||||
if (tempPos >= 0) {
|
||||
currMatchIndex = tempPos;
|
||||
} else {
|
||||
// If lenient parsing is turned ON, we've got some time consuming parsing ahead of us.
|
||||
int[] scannerMatchResult = scanner.findText(source, currArg, startingAt);
|
||||
currMatchIndex = scannerMatchResult[0];
|
||||
}
|
||||
}
|
||||
else {
|
||||
currMatchIndex = source.indexOf(currArg, startingAt);
|
||||
|
@ -296,6 +296,17 @@ public class RbnfTest extends TestFmwk {
|
||||
};
|
||||
|
||||
doTest(formatter, testData, true);
|
||||
|
||||
String[][] testDataLenient = {
|
||||
{ "fifty-7", "57" },
|
||||
{ " fifty-7", "57" },
|
||||
{ " fifty-7", "57" },
|
||||
{ "2 thousand six HUNDRED fifty-7", "2,657" },
|
||||
{ "fifteen hundred and zero", "1,500" },
|
||||
{ "FOurhundred thiRTY six", "436" }
|
||||
};
|
||||
|
||||
doParsingTest(formatter, testDataLenient, true);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -350,6 +361,12 @@ public class RbnfTest extends TestFmwk {
|
||||
};
|
||||
|
||||
doTest(formatter, testData, true);
|
||||
|
||||
String[][] testDataLenient = {
|
||||
{ "2-51-33", "10,293" },
|
||||
};
|
||||
|
||||
doParsingTest(formatter, testDataLenient, true);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -425,6 +442,13 @@ public class RbnfTest extends TestFmwk {
|
||||
};
|
||||
|
||||
doTest(formatter, testData, true);
|
||||
|
||||
String[][] testDataLenient = {
|
||||
{ "trente-et-un", "31" },
|
||||
{ "un cent quatre vingt dix huit", "198" },
|
||||
};
|
||||
|
||||
doParsingTest(formatter, testDataLenient, true);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -529,6 +553,12 @@ public class RbnfTest extends TestFmwk {
|
||||
};
|
||||
|
||||
doTest(formatter, testData, true);
|
||||
|
||||
String[][] testDataLenient = {
|
||||
{ "ein Tausend sechs Hundert fuenfunddreissig", "1,635" },
|
||||
};
|
||||
|
||||
doParsingTest(formatter, testDataLenient, true);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1117,6 +1147,10 @@ public class RbnfTest extends TestFmwk {
|
||||
" (ordinal) "
|
||||
//" (duration) " // English only
|
||||
};
|
||||
boolean[] lenientMode = {
|
||||
false, // non-lenient mode
|
||||
true // lenient mode
|
||||
};
|
||||
double[] numbers = {45.678, 1, 2, 10, 11, 100, 110, 200, 1000, 1111, -1111};
|
||||
int count = numbers.length;
|
||||
Random r = (count <= numbers.length ? null : createRandom());
|
||||
@ -1142,25 +1176,25 @@ public class RbnfTest extends TestFmwk {
|
||||
logln(loc.getName() + names[j] + "success format: " + n + " -> " + s);
|
||||
}
|
||||
|
||||
try {
|
||||
// RBNF parse is extremely slow when lenient option is enabled.
|
||||
// non-lenient parse
|
||||
fmt.setLenientParseMode(false);
|
||||
Number num = fmt.parse(s);
|
||||
if (isVerbose()) {
|
||||
logln(loc.getName() + names[j] + "success parse: " + s + " -> " + num);
|
||||
for (int k = 0; k < lenientMode.length; k++) {
|
||||
try {
|
||||
fmt.setLenientParseMode(lenientMode[k]);
|
||||
Number num = fmt.parse(s);
|
||||
if (isVerbose()) {
|
||||
logln(loc.getName() + names[j] + "success parse: " + s + " -> " + num);
|
||||
}
|
||||
if (j != 0) {
|
||||
// TODO: Fix the ordinal rules.
|
||||
continue;
|
||||
}
|
||||
if (n != num.doubleValue()) {
|
||||
errors.append("\n" + loc + names[j] + "got " + num + " expected " + n);
|
||||
}
|
||||
} catch (ParseException pe) {
|
||||
String msg = loc.getName() + names[j] + "ERROR:" + pe.getMessage();
|
||||
logln(msg);
|
||||
errors.append("\n" + msg);
|
||||
}
|
||||
if (j != 0) {
|
||||
// TODO: Fix the ordinal rules.
|
||||
continue;
|
||||
}
|
||||
if (n != num.doubleValue()) {
|
||||
errors.append("\n" + loc + names[j] + "got " + num + " expected " + n);
|
||||
}
|
||||
} catch (ParseException pe) {
|
||||
String msg = loc.getName() + names[j] + "ERROR:" + pe.getMessage();
|
||||
logln(msg);
|
||||
errors.append("\n" + msg);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1170,10 +1204,12 @@ public class RbnfTest extends TestFmwk {
|
||||
}
|
||||
}
|
||||
|
||||
void doTest(RuleBasedNumberFormat formatter, String[][] testData,
|
||||
boolean testParsing) {
|
||||
// NumberFormat decFmt = NumberFormat.getInstance(Locale.US);
|
||||
NumberFormat decFmt = new DecimalFormat("#,###.################");
|
||||
NumberFormat createDecimalFormatter() {
|
||||
return new DecimalFormat("#,###.################");
|
||||
}
|
||||
|
||||
void doTest(RuleBasedNumberFormat formatter, String[][] testData, boolean testParsing) {
|
||||
NumberFormat decFmt = createDecimalFormatter();
|
||||
try {
|
||||
for (int i = 0; i < testData.length; i++) {
|
||||
String number = testData[i][0];
|
||||
@ -1207,6 +1243,35 @@ public class RbnfTest extends TestFmwk {
|
||||
}
|
||||
}
|
||||
|
||||
void doParsingTest(RuleBasedNumberFormat formatter, String[][] testData, boolean lenient) {
|
||||
NumberFormat decFmt = createDecimalFormatter();
|
||||
|
||||
if (lenient) {
|
||||
formatter.setLenientParseMode(true);
|
||||
}
|
||||
for (int i = 0; i < testData.length; i++) {
|
||||
try {
|
||||
String s = testData[i][0];
|
||||
Number expectedNumber = decFmt.parse(testData[i][1]);
|
||||
if (isVerbose()) {
|
||||
logln("test[" + i + "] spellout value: (" + s + ") target: " + expectedNumber);
|
||||
}
|
||||
|
||||
Number num = formatter.parse(s);
|
||||
if (isVerbose()) {
|
||||
logln("success parse: (" + s + ") -> " + num);
|
||||
}
|
||||
|
||||
if (expectedNumber.doubleValue() != num.doubleValue()) {
|
||||
errln("\nParsing (" + s + ") failed: got " + num + " expected " + expectedNumber);
|
||||
}
|
||||
} catch (Throwable e) {
|
||||
e.printStackTrace();
|
||||
errln("Test failed with exception: " + e.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Tests the method
|
||||
* public boolean equals(Object that)
|
||||
*/
|
||||
|
Loading…
Reference in New Issue
Block a user