ICU-13084 Updating set of ignorable control characters to [:DI:].
X-SVN-Rev: 41002
This commit is contained in:
parent
9759fc597f
commit
9ae7e8eba1
@ -63,8 +63,9 @@ void U_CALLCONV initNumberParseUniSets(UErrorCode& status) {
|
|||||||
|
|
||||||
gUnicodeSets[EMPTY] = new UnicodeSet();
|
gUnicodeSets[EMPTY] = new UnicodeSet();
|
||||||
|
|
||||||
// BiDi characters are skipped over and ignored at any point in the string, even in strict mode.
|
// These characters are skipped over and ignored at any point in the string, even in strict mode.
|
||||||
gUnicodeSets[BIDI] = new UnicodeSet(u"[[\\u200E\\u200F\\u061C]]", status);
|
// See ticket #13084.
|
||||||
|
gUnicodeSets[BIDI] = new UnicodeSet(u"[[:DI:]]", status);
|
||||||
|
|
||||||
// This set was decided after discussion with icu-design@. See ticket #13309.
|
// This set was decided after discussion with icu-design@. See ticket #13309.
|
||||||
// Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property).
|
// Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property).
|
||||||
|
@ -81,8 +81,9 @@ public class UnicodeSetStaticCache {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static {
|
static {
|
||||||
// BiDi characters are skipped over and ignored at any point in the string, even in strict mode.
|
// These characters are skipped over and ignored at any point in the string, even in strict mode.
|
||||||
unicodeSets.put(Key.BIDI, new UnicodeSet("[[\\u200E\\u200F\\u061C]]").freeze());
|
// See ticket #13084.
|
||||||
|
unicodeSets.put(Key.BIDI, new UnicodeSet("[[:DI:]]").freeze());
|
||||||
|
|
||||||
// This set was decided after discussion with icu-design@. See ticket #13309.
|
// This set was decided after discussion with icu-design@. See ticket #13309.
|
||||||
// Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property).
|
// Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property).
|
||||||
|
@ -198,7 +198,7 @@ import com.ibm.icu.util.ULocale.Category;
|
|||||||
* example, a formatter instance gotten from NumberFormat.getInstance(ULocale,
|
* example, a formatter instance gotten from NumberFormat.getInstance(ULocale,
|
||||||
* NumberFormat.CURRENCYSTYLE) can parse both "USD1.00" and "3.00 US dollars".
|
* NumberFormat.CURRENCYSTYLE) can parse both "USD1.00" and "3.00 US dollars".
|
||||||
*
|
*
|
||||||
* <p>Whitespace characters (lenient mode) and bidi control characters (lenient and strict mode),
|
* <p>Whitespace characters (lenient mode) and control characters (lenient and strict mode),
|
||||||
* collectively called "ignorables", do not need to match in identity or quantity between the
|
* collectively called "ignorables", do not need to match in identity or quantity between the
|
||||||
* pattern string and the input string. For example, the pattern "# %" matches "35 %" (with a single
|
* pattern string and the input string. For example, the pattern "# %" matches "35 %" (with a single
|
||||||
* space), "35%" (with no space), "35 %" (with a non-breaking space), and "35 %" (with
|
* space), "35%" (with no space), "35 %" (with a non-breaking space), and "35 %" (with
|
||||||
@ -206,6 +206,7 @@ import com.ibm.icu.util.ULocale.Category;
|
|||||||
* number: prefix, number, exponent separator, and suffix. Ignorable whitespace characters are those
|
* number: prefix, number, exponent separator, and suffix. Ignorable whitespace characters are those
|
||||||
* having the Unicode "blank" property for regular expressions, defined in UTS #18 Annex C, which is
|
* having the Unicode "blank" property for regular expressions, defined in UTS #18 Annex C, which is
|
||||||
* "horizontal" whitespace, like spaces and tabs, but not "vertical" whitespace, like line breaks.
|
* "horizontal" whitespace, like spaces and tabs, but not "vertical" whitespace, like line breaks.
|
||||||
|
* Ignorable control characters are those in the Unicode set [:Default_Ignorable_Code_Point:].
|
||||||
*
|
*
|
||||||
* <p>If {@link #parse(String, ParsePosition)} fails to parse a string, it returns <code>null</code>
|
* <p>If {@link #parse(String, ParsePosition)} fails to parse a string, it returns <code>null</code>
|
||||||
* and leaves the parse position unchanged. The convenience method {@link #parse(String)} indicates
|
* and leaves the parse position unchanged. The convenience method {@link #parse(String)} indicates
|
||||||
|
@ -1722,11 +1722,32 @@ public class NumberFormatTest extends TestFmwk {
|
|||||||
// Test all characters in the UTS 18 "blank" set stated in the API docstring.
|
// Test all characters in the UTS 18 "blank" set stated in the API docstring.
|
||||||
UnicodeSet blanks = new UnicodeSet("[[:Zs:][\\u0009]]").freeze();
|
UnicodeSet blanks = new UnicodeSet("[[:Zs:][\\u0009]]").freeze();
|
||||||
for (String space : blanks) {
|
for (String space : blanks) {
|
||||||
String str = "a " + space + " b1234";
|
String str = "a " + space + " b1234c ";
|
||||||
|
expect(fmt, str, n);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Arbitrary whitespace is not accepted in strict mode.
|
||||||
|
fmt.setParseStrict(true);
|
||||||
|
for (String space : blanks) {
|
||||||
|
String str = "a " + space + " b1234c ";
|
||||||
|
expectParseException(fmt, str, n);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test default ignorable characters. These should work in both lenient and strict.
|
||||||
|
UnicodeSet defaultIgnorables = new UnicodeSet("[[:Default_Ignorable_Code_Point:]]").freeze();
|
||||||
|
fmt.setParseStrict(false);
|
||||||
|
for (String ignorable : defaultIgnorables) {
|
||||||
|
String str = "a b " + ignorable + "1234c ";
|
||||||
|
expect(fmt, str, n);
|
||||||
|
}
|
||||||
|
fmt.setParseStrict(true);
|
||||||
|
for (String ignorable : defaultIgnorables) {
|
||||||
|
String str = "a b " + ignorable + "1234c ";
|
||||||
expect(fmt, str, n);
|
expect(fmt, str, n);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test that other whitespace characters do not work
|
// Test that other whitespace characters do not work
|
||||||
|
fmt.setParseStrict(false);
|
||||||
UnicodeSet otherWhitespace = new UnicodeSet("[[:whitespace:]]").removeAll(blanks).freeze();
|
UnicodeSet otherWhitespace = new UnicodeSet("[[:whitespace:]]").removeAll(blanks).freeze();
|
||||||
for (String space : otherWhitespace) {
|
for (String space : otherWhitespace) {
|
||||||
String str = "a " + space + " b1234";
|
String str = "a " + space + " b1234";
|
||||||
|
Loading…
Reference in New Issue
Block a user