ICU-5909 lenient number formatting, with tests

X-SVN-Rev: 23110
This commit is contained in:
Mark Davis 2007-12-18 23:04:19 +00:00
parent bebc001574
commit 7347bcee9c
4 changed files with 133 additions and 26 deletions

View File

@ -1110,19 +1110,21 @@ public class NumberFormatTest extends com.ibm.icu.dev.test.TestFmwk {
/*6*/ "perr:", // <pattern or '-'> <invalid string>
/*7*/ "pat:", // <pattern or '-'> <exp. toPattern or '-' or 'err'>
/*8*/ "fpc:", // <loc or '-'> <curr.amt> <exp. string> <exp. curr.amt>
/*9*/ "strict=", // true or false
};
public void TestCases() {
String caseFileName = "NumberFormatTestCases.txt";
java.io.InputStream is = NumberFormatTest.class.getResourceAsStream(caseFileName);
ResourceReader reader = new ResourceReader(is, caseFileName);
ResourceReader reader = new ResourceReader(is, caseFileName, "utf-8");
TokenIterator tokens = new TokenIterator(reader);
Locale loc = new Locale("en", "US", "");
DecimalFormat ref = null, fmt = null;
MeasureFormat mfmt = null;
String pat = null, str = null, mloc = null;
boolean strict = false;
try {
for (;;) {
@ -1137,10 +1139,14 @@ public class NumberFormatTest extends com.ibm.icu.dev.test.TestFmwk {
// ref= <reference pattern>
ref = new DecimalFormat(tokens.next(),
new DecimalFormatSymbols(Locale.US));
ref.setParseStrict(strict);
logln("Setting reference pattern to:\t" + ref);
break;
case 1:
// loc= <locale>
loc = LocaleUtility.getLocaleFromName(tokens.next());
pat = ((DecimalFormat) NumberFormat.getInstance(loc)).toPattern();
logln("Setting locale to:\t" + loc + ", \tand pattern to:\t" + pat);
break;
case 2: // f:
case 3: // fp:
@ -1149,18 +1155,19 @@ public class NumberFormatTest extends com.ibm.icu.dev.test.TestFmwk {
tok = tokens.next();
if (!tok.equals("-")) {
pat = tok;
try {
fmt = new DecimalFormat(pat, new DecimalFormatSymbols(loc));
} catch (IllegalArgumentException iae) {
errln(where + "Pattern \"" + pat + '"');
iae.printStackTrace();
tokens.next(); // consume remaining tokens
tokens.next();
if (cmd == 3) tokens.next();
continue;
}
}
str = null;
try {
fmt = new DecimalFormat(pat, new DecimalFormatSymbols(loc));
fmt.setParseStrict(strict);
} catch (IllegalArgumentException iae) {
errln(where + "Pattern \"" + pat + '"');
iae.printStackTrace();
tokens.next(); // consume remaining tokens
//tokens.next();
if (cmd == 3) tokens.next();
continue;
}
str = null;
try {
if (cmd == 2 || cmd == 3 || cmd == 4) {
// f: <pattern or '-'> <number> <exp. string>
@ -1183,9 +1190,10 @@ public class NumberFormatTest extends com.ibm.icu.dev.test.TestFmwk {
else {
str = tokens.next();
String expstr = tokens.next();
Number parsed = fmt.parse(str);
Number exp = (Number) ref.parse(expstr);
assertEquals(where + '"' + pat + "\".parse(\"" + str + "\")",
exp, fmt.parse(str));
exp, parsed);
}
} catch (ParseException e) {
errln(where + '"' + pat + "\".parse(\"" + str +
@ -1216,6 +1224,7 @@ public class NumberFormatTest extends com.ibm.icu.dev.test.TestFmwk {
f = fmt;
} else {
f = new DecimalFormat(testpat);
f.setParseStrict(strict);
}
if (err) {
errln(where + "Invalid pattern \"" + testpat +
@ -1268,6 +1277,10 @@ public class NumberFormatTest extends com.ibm.icu.dev.test.TestFmwk {
e.printStackTrace();
}
break;
case 9: // strict= true or false
strict = "true".equalsIgnoreCase(tokens.next());
logln("Setting strict to:\t" + strict);
break;
case -1:
errln("Unknown command \"" + tok + "\" at " + tokens.describePosition());
return;

View File

@ -1,4 +1,4 @@
######################################################################
######################################################################
# Copyright (c) 2004, International Business Machines
# Corporation and others. All Rights Reserved.
######################################################################
@ -73,7 +73,7 @@ rt: "" -123.456 "-123.456"
# Currency
fpc: "en_US" 1234.56/USD "$1,234.56" 1234.56/USD
fpc: - 1234.56/JPY "\u00A51,235" 1235/JPY
fpc: - 1234.56/JPY "¥1,235" 1235/JPY
# ISO codes that overlap display names (QQQ vs. Q)
fpc: - 123/QQQ "QQQ123.00" 123/QQQ # QQQ is fake
fpc: - 123/GTQ "Q123.00" 123/GTQ
@ -83,3 +83,38 @@ fpc: - 2/INR "Rs.2.00" 2/INR
# Display names with shared prefix (YDD vs. Y)
fpc: - 100/YDD "YDD100.00" 100/YDD
fpc: - 100/CNY "Y100.00" 100/CNY
# Lenient Tests
loc= "en"
p: - "1,234.56" 1234.56
p: - "1'234.56" 1234.56
p: - "1 234.56" 1234.56
p: - "1٬234.56" 1234.56
p: - "123456" 1234.56
p: - "1.234.56" 1.234
p: - "1、234。56" 1234.56
loc= "fr"
p: - "1.234,56" 1234.56
p: - "1'234,56" 1234.56
p: - "1 234,56" 1234.56
p: - "1,234,56" 1.234
p: - "1。234、56" 1234.56
loc= "ar"
p: - "1.234٫56" 1234.56
p: - "1'234،56" 1234.56
p: - "1٬234،56" 1234.56
p: - "1.234,56" 1234.56
p: - "1'234,56" 1234.56
p: - "1٬234,56" 1234.56
strict= true
loc= "en"
p: - "1、234。56" 1
loc= "fr"
p: - "1。234、56" 1
loc= "ar"
p: - "1'234،56" 1234
p: - "1٬234،56" 1234

View File

@ -81,16 +81,16 @@ public class ResourceReader {
_reset();
}
/**
* Construct a reader object for the input stream associated with
* the given resource name.
* @param is the input stream of the resource
* @param resourceName the name of the resource
*/
public ResourceReader(InputStream is, String resourceName) {
this.root = null;
/**
* Construct a reader object for the input stream associated with
* the given resource name.
* @param is the input stream of the resource
* @param resourceName the name of the resource
*/
public ResourceReader(InputStream is, String resourceName, String encoding) {
this.root = null;
this.resourceName = resourceName;
this.encoding = null;
this.encoding = encoding;
this.lineNo = -1;
try {
@ -105,6 +105,16 @@ public class ResourceReader {
}
}
/**
* Construct a reader object for the input stream associated with
* the given resource name.
* @param is the input stream of the resource
* @param resourceName the name of the resource
*/
public ResourceReader(InputStream is, String resourceName) {
this(is, resourceName, null);
}
/**
* Construct a reader object for the text file of the given name
* in the given class's package, using the default encoding.

View File

@ -1709,6 +1709,24 @@ public class DecimalFormat extends NumberFormat {
private static final int STATUS_INFINITE = 0;
private static final int STATUS_POSITIVE = 1;
private static final int STATUS_LENGTH = 2;
private static final UnicodeSet dotEquivalents =(UnicodeSet) new UnicodeSet(
"[.\u2024\u3002\uFE12\uFE52\uFF0E\uFF61]").freeze();
private static final UnicodeSet commaEquivalents = (UnicodeSet) new UnicodeSet(
"[,\u060C\u066B\u3001\uFE10\uFE11\uFE50\uFE51\uFF0C\uFF64]").freeze();
private static final UnicodeSet otherGroupingSeparators = (UnicodeSet) new UnicodeSet(
"[\\ '\u00A0\u066C\u2000-\u200A\u2018\u2019\u202F\u205F\u3000\uFF07]").freeze();
private static final UnicodeSet strictDotEquivalents =(UnicodeSet) new UnicodeSet(
"[.\u2024\uFE52\uFF0E\uFF61]").freeze();
private static final UnicodeSet strictCommaEquivalents = (UnicodeSet) new UnicodeSet(
"[,\u066B\uFE10\uFE50\uFF0C]").freeze();
private static final UnicodeSet strictOtherGroupingSeparators = (UnicodeSet) new UnicodeSet(
"[\\ '\u00A0\u066C\u2000-\u200A\u2018\u2019\u202F\u205F\u3000\uFF07]").freeze();
private static final UnicodeSet defaultGroupingSeparators = (UnicodeSet) new UnicodeSet(
dotEquivalents).addAll(commaEquivalents).addAll(otherGroupingSeparators).freeze();
private static final UnicodeSet strictDefaultGroupingSeparators = (UnicodeSet) new UnicodeSet(
strictDotEquivalents).addAll(strictCommaEquivalents).addAll(strictOtherGroupingSeparators).freeze();
/**
* <strong><font face=helvetica color=red>CHANGED</font></strong>
@ -1783,6 +1801,7 @@ public class DecimalFormat extends NumberFormat {
char decimal = isCurrencyFormat ?
symbols.getMonetaryDecimalSeparator() : symbols.getDecimalSeparator();
char grouping = symbols.getGroupingSeparator();
String exponentSep = symbols.getExponentSeparator();
boolean sawDecimal = false;
boolean sawExponent = false;
@ -1797,6 +1816,19 @@ public class DecimalFormat extends NumberFormat {
int lastGroup = -1; // where did we last see a grouping separator?
int prevGroup = -1; // where did we see the grouping separator before that?
int gs2 = groupingSize2 == 0 ? groupingSize : groupingSize2;
// equivalent grouping and decimal support
// TODO markdavis Cache these if it makes a difference in performance.
UnicodeSet decimalSet = new UnicodeSet(getSimilarDecimals(decimal, strictParse));
UnicodeSet groupingSet = new UnicodeSet(strictParse ? strictDefaultGroupingSeparators : defaultGroupingSeparators)
.add(grouping).removeAll(decimalSet);
// we are guaranteed that
// decimalSet contains the decimal, and
// groupingSet contains the groupingSeparator
// (unless decimal and grouping are the same, which should never happen. But in that case, groupingSet will just be empty.)
// We have to track digitCount ourselves, because digits.count will
// pin when the maximum allowable digits is reached.
@ -1894,7 +1926,7 @@ public class DecimalFormat extends NumberFormat {
// Cancel out backup setting (see grouping handler below)
backup = -1;
}
else if (!isExponent && ch == decimal)
else if (!isExponent && decimalSet.contains(ch))
{
if (strictParse) {
if (backup != -1 ||
@ -1910,7 +1942,7 @@ public class DecimalFormat extends NumberFormat {
sawDecimal = true;
leadingZero = false; // a single leading zero before a decimal is ok
}
else if (!isExponent && ch == grouping && isGroupingUsed())
else if (!isExponent && isGroupingUsed() && groupingSet.contains(ch))
{
if (sawDecimal) {
break;
@ -2067,6 +2099,23 @@ public class DecimalFormat extends NumberFormat {
return true;
}
/**
* Return characters that are used where this decimal is used.
* @param decimal
* @param strictParse
* @return
*/
private UnicodeSet getSimilarDecimals(char decimal, boolean strictParse) {
if (dotEquivalents.contains(decimal)) {
return strictParse ? strictDotEquivalents : dotEquivalents;
}
if (commaEquivalents.contains(decimal)) {
return strictParse ? strictCommaEquivalents : commaEquivalents;
}
// if there is no match, return the character itself
return new UnicodeSet().add(decimal);
}
/**
* Starting at position, advance past a run of pad characters, if any.
* Return the index of the first character after position that is not a pad