ICU-5909 lenient number formatting, with tests
X-SVN-Rev: 23110
This commit is contained in:
parent
bebc001574
commit
7347bcee9c
@ -1110,19 +1110,21 @@ public class NumberFormatTest extends com.ibm.icu.dev.test.TestFmwk {
|
||||
/*6*/ "perr:", // <pattern or '-'> <invalid string>
|
||||
/*7*/ "pat:", // <pattern or '-'> <exp. toPattern or '-' or 'err'>
|
||||
/*8*/ "fpc:", // <loc or '-'> <curr.amt> <exp. string> <exp. curr.amt>
|
||||
/*9*/ "strict=", // true or false
|
||||
};
|
||||
|
||||
public void TestCases() {
|
||||
String caseFileName = "NumberFormatTestCases.txt";
|
||||
java.io.InputStream is = NumberFormatTest.class.getResourceAsStream(caseFileName);
|
||||
|
||||
ResourceReader reader = new ResourceReader(is, caseFileName);
|
||||
ResourceReader reader = new ResourceReader(is, caseFileName, "utf-8");
|
||||
TokenIterator tokens = new TokenIterator(reader);
|
||||
|
||||
Locale loc = new Locale("en", "US", "");
|
||||
DecimalFormat ref = null, fmt = null;
|
||||
MeasureFormat mfmt = null;
|
||||
String pat = null, str = null, mloc = null;
|
||||
boolean strict = false;
|
||||
|
||||
try {
|
||||
for (;;) {
|
||||
@ -1137,10 +1139,14 @@ public class NumberFormatTest extends com.ibm.icu.dev.test.TestFmwk {
|
||||
// ref= <reference pattern>
|
||||
ref = new DecimalFormat(tokens.next(),
|
||||
new DecimalFormatSymbols(Locale.US));
|
||||
ref.setParseStrict(strict);
|
||||
logln("Setting reference pattern to:\t" + ref);
|
||||
break;
|
||||
case 1:
|
||||
// loc= <locale>
|
||||
loc = LocaleUtility.getLocaleFromName(tokens.next());
|
||||
pat = ((DecimalFormat) NumberFormat.getInstance(loc)).toPattern();
|
||||
logln("Setting locale to:\t" + loc + ", \tand pattern to:\t" + pat);
|
||||
break;
|
||||
case 2: // f:
|
||||
case 3: // fp:
|
||||
@ -1149,18 +1155,19 @@ public class NumberFormatTest extends com.ibm.icu.dev.test.TestFmwk {
|
||||
tok = tokens.next();
|
||||
if (!tok.equals("-")) {
|
||||
pat = tok;
|
||||
try {
|
||||
fmt = new DecimalFormat(pat, new DecimalFormatSymbols(loc));
|
||||
} catch (IllegalArgumentException iae) {
|
||||
errln(where + "Pattern \"" + pat + '"');
|
||||
iae.printStackTrace();
|
||||
tokens.next(); // consume remaining tokens
|
||||
tokens.next();
|
||||
if (cmd == 3) tokens.next();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
str = null;
|
||||
try {
|
||||
fmt = new DecimalFormat(pat, new DecimalFormatSymbols(loc));
|
||||
fmt.setParseStrict(strict);
|
||||
} catch (IllegalArgumentException iae) {
|
||||
errln(where + "Pattern \"" + pat + '"');
|
||||
iae.printStackTrace();
|
||||
tokens.next(); // consume remaining tokens
|
||||
//tokens.next();
|
||||
if (cmd == 3) tokens.next();
|
||||
continue;
|
||||
}
|
||||
str = null;
|
||||
try {
|
||||
if (cmd == 2 || cmd == 3 || cmd == 4) {
|
||||
// f: <pattern or '-'> <number> <exp. string>
|
||||
@ -1183,9 +1190,10 @@ public class NumberFormatTest extends com.ibm.icu.dev.test.TestFmwk {
|
||||
else {
|
||||
str = tokens.next();
|
||||
String expstr = tokens.next();
|
||||
Number parsed = fmt.parse(str);
|
||||
Number exp = (Number) ref.parse(expstr);
|
||||
assertEquals(where + '"' + pat + "\".parse(\"" + str + "\")",
|
||||
exp, fmt.parse(str));
|
||||
exp, parsed);
|
||||
}
|
||||
} catch (ParseException e) {
|
||||
errln(where + '"' + pat + "\".parse(\"" + str +
|
||||
@ -1216,6 +1224,7 @@ public class NumberFormatTest extends com.ibm.icu.dev.test.TestFmwk {
|
||||
f = fmt;
|
||||
} else {
|
||||
f = new DecimalFormat(testpat);
|
||||
f.setParseStrict(strict);
|
||||
}
|
||||
if (err) {
|
||||
errln(where + "Invalid pattern \"" + testpat +
|
||||
@ -1268,6 +1277,10 @@ public class NumberFormatTest extends com.ibm.icu.dev.test.TestFmwk {
|
||||
e.printStackTrace();
|
||||
}
|
||||
break;
|
||||
case 9: // strict= true or false
|
||||
strict = "true".equalsIgnoreCase(tokens.next());
|
||||
logln("Setting strict to:\t" + strict);
|
||||
break;
|
||||
case -1:
|
||||
errln("Unknown command \"" + tok + "\" at " + tokens.describePosition());
|
||||
return;
|
||||
|
@ -1,4 +1,4 @@
|
||||
######################################################################
|
||||
######################################################################
|
||||
# Copyright (c) 2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
######################################################################
|
||||
@ -73,7 +73,7 @@ rt: "" -123.456 "-123.456"
|
||||
|
||||
# Currency
|
||||
fpc: "en_US" 1234.56/USD "$1,234.56" 1234.56/USD
|
||||
fpc: - 1234.56/JPY "\u00A51,235" 1235/JPY
|
||||
fpc: - 1234.56/JPY "¥1,235" 1235/JPY
|
||||
# ISO codes that overlap display names (QQQ vs. Q)
|
||||
fpc: - 123/QQQ "QQQ123.00" 123/QQQ # QQQ is fake
|
||||
fpc: - 123/GTQ "Q123.00" 123/GTQ
|
||||
@ -83,3 +83,38 @@ fpc: - 2/INR "Rs.2.00" 2/INR
|
||||
# Display names with shared prefix (YDD vs. Y)
|
||||
fpc: - 100/YDD "YDD100.00" 100/YDD
|
||||
fpc: - 100/CNY "Y100.00" 100/CNY
|
||||
|
||||
# Lenient Tests
|
||||
|
||||
loc= "en"
|
||||
p: - "1,234.56" 1234.56
|
||||
p: - "1'234.56" 1234.56
|
||||
p: - "1 234.56" 1234.56
|
||||
p: - "1٬234.56" 1234.56
|
||||
p: - "1,234.56" 1234.56
|
||||
p: - "1.234.56" 1.234
|
||||
p: - "1、234。56" 1234.56
|
||||
|
||||
loc= "fr"
|
||||
p: - "1.234,56" 1234.56
|
||||
p: - "1'234,56" 1234.56
|
||||
p: - "1 234,56" 1234.56
|
||||
p: - "1,234,56" 1.234
|
||||
p: - "1。234、56" 1234.56
|
||||
|
||||
loc= "ar"
|
||||
p: - "1.234٫56" 1234.56
|
||||
p: - "1'234،56" 1234.56
|
||||
p: - "1٬234،56" 1234.56
|
||||
p: - "1.234,56" 1234.56
|
||||
p: - "1'234,56" 1234.56
|
||||
p: - "1٬234,56" 1234.56
|
||||
|
||||
strict= true
|
||||
loc= "en"
|
||||
p: - "1、234。56" 1
|
||||
loc= "fr"
|
||||
p: - "1。234、56" 1
|
||||
loc= "ar"
|
||||
p: - "1'234،56" 1234
|
||||
p: - "1٬234،56" 1234
|
||||
|
@ -81,16 +81,16 @@ public class ResourceReader {
|
||||
_reset();
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a reader object for the input stream associated with
|
||||
* the given resource name.
|
||||
* @param is the input stream of the resource
|
||||
* @param resourceName the name of the resource
|
||||
*/
|
||||
public ResourceReader(InputStream is, String resourceName) {
|
||||
this.root = null;
|
||||
/**
|
||||
* Construct a reader object for the input stream associated with
|
||||
* the given resource name.
|
||||
* @param is the input stream of the resource
|
||||
* @param resourceName the name of the resource
|
||||
*/
|
||||
public ResourceReader(InputStream is, String resourceName, String encoding) {
|
||||
this.root = null;
|
||||
this.resourceName = resourceName;
|
||||
this.encoding = null;
|
||||
this.encoding = encoding;
|
||||
|
||||
this.lineNo = -1;
|
||||
try {
|
||||
@ -105,6 +105,16 @@ public class ResourceReader {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a reader object for the input stream associated with
|
||||
* the given resource name.
|
||||
* @param is the input stream of the resource
|
||||
* @param resourceName the name of the resource
|
||||
*/
|
||||
public ResourceReader(InputStream is, String resourceName) {
|
||||
this(is, resourceName, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a reader object for the text file of the given name
|
||||
* in the given class's package, using the default encoding.
|
||||
|
@ -1709,6 +1709,24 @@ public class DecimalFormat extends NumberFormat {
|
||||
private static final int STATUS_INFINITE = 0;
|
||||
private static final int STATUS_POSITIVE = 1;
|
||||
private static final int STATUS_LENGTH = 2;
|
||||
private static final UnicodeSet dotEquivalents =(UnicodeSet) new UnicodeSet(
|
||||
"[.\u2024\u3002\uFE12\uFE52\uFF0E\uFF61]").freeze();
|
||||
private static final UnicodeSet commaEquivalents = (UnicodeSet) new UnicodeSet(
|
||||
"[,\u060C\u066B\u3001\uFE10\uFE11\uFE50\uFE51\uFF0C\uFF64]").freeze();
|
||||
private static final UnicodeSet otherGroupingSeparators = (UnicodeSet) new UnicodeSet(
|
||||
"[\\ '\u00A0\u066C\u2000-\u200A\u2018\u2019\u202F\u205F\u3000\uFF07]").freeze();
|
||||
|
||||
private static final UnicodeSet strictDotEquivalents =(UnicodeSet) new UnicodeSet(
|
||||
"[.\u2024\uFE52\uFF0E\uFF61]").freeze();
|
||||
private static final UnicodeSet strictCommaEquivalents = (UnicodeSet) new UnicodeSet(
|
||||
"[,\u066B\uFE10\uFE50\uFF0C]").freeze();
|
||||
private static final UnicodeSet strictOtherGroupingSeparators = (UnicodeSet) new UnicodeSet(
|
||||
"[\\ '\u00A0\u066C\u2000-\u200A\u2018\u2019\u202F\u205F\u3000\uFF07]").freeze();
|
||||
|
||||
private static final UnicodeSet defaultGroupingSeparators = (UnicodeSet) new UnicodeSet(
|
||||
dotEquivalents).addAll(commaEquivalents).addAll(otherGroupingSeparators).freeze();
|
||||
private static final UnicodeSet strictDefaultGroupingSeparators = (UnicodeSet) new UnicodeSet(
|
||||
strictDotEquivalents).addAll(strictCommaEquivalents).addAll(strictOtherGroupingSeparators).freeze();
|
||||
|
||||
/**
|
||||
* <strong><font face=helvetica color=red>CHANGED</font></strong>
|
||||
@ -1783,6 +1801,7 @@ public class DecimalFormat extends NumberFormat {
|
||||
char decimal = isCurrencyFormat ?
|
||||
symbols.getMonetaryDecimalSeparator() : symbols.getDecimalSeparator();
|
||||
char grouping = symbols.getGroupingSeparator();
|
||||
|
||||
String exponentSep = symbols.getExponentSeparator();
|
||||
boolean sawDecimal = false;
|
||||
boolean sawExponent = false;
|
||||
@ -1797,6 +1816,19 @@ public class DecimalFormat extends NumberFormat {
|
||||
int lastGroup = -1; // where did we last see a grouping separator?
|
||||
int prevGroup = -1; // where did we see the grouping separator before that?
|
||||
int gs2 = groupingSize2 == 0 ? groupingSize : groupingSize2;
|
||||
|
||||
// equivalent grouping and decimal support
|
||||
|
||||
// TODO markdavis Cache these if it makes a difference in performance.
|
||||
UnicodeSet decimalSet = new UnicodeSet(getSimilarDecimals(decimal, strictParse));
|
||||
UnicodeSet groupingSet = new UnicodeSet(strictParse ? strictDefaultGroupingSeparators : defaultGroupingSeparators)
|
||||
.add(grouping).removeAll(decimalSet);
|
||||
|
||||
// we are guaranteed that
|
||||
// decimalSet contains the decimal, and
|
||||
// groupingSet contains the groupingSeparator
|
||||
// (unless decimal and grouping are the same, which should never happen. But in that case, groupingSet will just be empty.)
|
||||
|
||||
|
||||
// We have to track digitCount ourselves, because digits.count will
|
||||
// pin when the maximum allowable digits is reached.
|
||||
@ -1894,7 +1926,7 @@ public class DecimalFormat extends NumberFormat {
|
||||
// Cancel out backup setting (see grouping handler below)
|
||||
backup = -1;
|
||||
}
|
||||
else if (!isExponent && ch == decimal)
|
||||
else if (!isExponent && decimalSet.contains(ch))
|
||||
{
|
||||
if (strictParse) {
|
||||
if (backup != -1 ||
|
||||
@ -1910,7 +1942,7 @@ public class DecimalFormat extends NumberFormat {
|
||||
sawDecimal = true;
|
||||
leadingZero = false; // a single leading zero before a decimal is ok
|
||||
}
|
||||
else if (!isExponent && ch == grouping && isGroupingUsed())
|
||||
else if (!isExponent && isGroupingUsed() && groupingSet.contains(ch))
|
||||
{
|
||||
if (sawDecimal) {
|
||||
break;
|
||||
@ -2067,6 +2099,23 @@ public class DecimalFormat extends NumberFormat {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return characters that are used where this decimal is used.
|
||||
* @param decimal
|
||||
* @param strictParse
|
||||
* @return
|
||||
*/
|
||||
private UnicodeSet getSimilarDecimals(char decimal, boolean strictParse) {
|
||||
if (dotEquivalents.contains(decimal)) {
|
||||
return strictParse ? strictDotEquivalents : dotEquivalents;
|
||||
}
|
||||
if (commaEquivalents.contains(decimal)) {
|
||||
return strictParse ? strictCommaEquivalents : commaEquivalents;
|
||||
}
|
||||
// if there is no match, return the character itself
|
||||
return new UnicodeSet().add(decimal);
|
||||
}
|
||||
|
||||
/**
|
||||
* Starting at position, advance past a run of pad characters, if any.
|
||||
* Return the index of the first character after position that is not a pad
|
||||
|
Loading…
Reference in New Issue
Block a user