ICU-13513 Refactoring MutablePatternModifier's LDML parsing logic into a more general place in the code. Adding proper support for sign-always-shown to strict parsing.

X-SVN-Rev: 40826
This commit is contained in:
Shane Carr 2018-01-30 03:50:12 +00:00
parent 10e7c07765
commit 79014240bb
10 changed files with 246 additions and 157 deletions

View File

@ -30,8 +30,7 @@ import com.ibm.icu.util.Currency;
* pattern modifier by calling {@link MutablePatternModifier#createImmutable}, in effect treating this
* instance as a builder for the immutable variant.
*/
public class MutablePatternModifier
implements Modifier, SymbolProvider, CharSequence, MicroPropsGenerator {
public class MutablePatternModifier implements Modifier, SymbolProvider, MicroPropsGenerator {
// Modifier details
final boolean isStrong;
@ -54,12 +53,8 @@ public class MutablePatternModifier
// QuantityChain details
MicroPropsGenerator parent;
// Transient CharSequence fields
boolean inCharSequenceMode;
int flags;
int length;
boolean prependSign;
boolean plusReplacesMinusSign;
// Transient fields for rendering
StringBuilder currentAffix;
/**
* @param isStrong
@ -111,7 +106,7 @@ public class MutablePatternModifier
Currency currency,
UnitWidth unitWidth,
PluralRules rules) {
//assert (rules != null) == needsPlurals();
// assert (rules != null) == needsPlurals();
this.symbols = symbols;
this.currency = currency;
this.unitWidth = unitWidth;
@ -288,21 +283,18 @@ public class MutablePatternModifier
@Override
public int getPrefixLength() {
// Enter and exit CharSequence Mode to get the length.
enterCharSequenceMode(true);
int result = AffixUtils.unescapedCodePointCount(this, this); // prefix length
exitCharSequenceMode();
prepareAffix(true);
int result = AffixUtils.unescapedCodePointCount(currentAffix, this); // prefix length
return result;
}
@Override
public int getCodePointCount() {
// Enter and exit CharSequence Mode to get the length.
enterCharSequenceMode(true);
int result = AffixUtils.unescapedCodePointCount(this, this); // prefix length
exitCharSequenceMode();
enterCharSequenceMode(false);
result += AffixUtils.unescapedCodePointCount(this, this); // suffix length
exitCharSequenceMode();
prepareAffix(true);
int result = AffixUtils.unescapedCodePointCount(currentAffix, this); // prefix length
prepareAffix(false);
result += AffixUtils.unescapedCodePointCount(currentAffix, this); // suffix length
return result;
}
@ -312,19 +304,37 @@ public class MutablePatternModifier
}
private int insertPrefix(NumberStringBuilder sb, int position) {
enterCharSequenceMode(true);
int length = AffixUtils.unescape(this, sb, position, this);
exitCharSequenceMode();
prepareAffix(true);
int length = AffixUtils.unescape(currentAffix, sb, position, this);
return length;
}
private int insertSuffix(NumberStringBuilder sb, int position) {
enterCharSequenceMode(false);
int length = AffixUtils.unescape(this, sb, position, this);
exitCharSequenceMode();
prepareAffix(false);
int length = AffixUtils.unescape(currentAffix, sb, position, this);
return length;
}
/**
* Pre-processes the prefix or suffix into the currentAffix field, creating and mutating that field
* if necessary. Calls down to {@link PatternStringUtils#affixPatternProviderToStringBuilder}.
*
* @param isPrefix
* true to prepare the prefix; false to prepare the suffix.
*/
private void prepareAffix(boolean isPrefix) {
if (currentAffix == null) {
currentAffix = new StringBuilder();
}
PatternStringUtils.patternInfoToStringBuilder(patternInfo,
isPrefix,
signum,
signDisplay,
plural,
perMilleReplacesPercent,
currentAffix);
}
/**
* Returns the string that substitutes a given symbol type in a pattern.
*/
@ -367,86 +377,4 @@ public class MutablePatternModifier
throw new AssertionError();
}
}
/** This method contains the heart of the logic for rendering LDML affix strings. */
private void enterCharSequenceMode(boolean isPrefix) {
assert !inCharSequenceMode;
inCharSequenceMode = true;
// Should the output render '+' where '-' would normally appear in the pattern?
plusReplacesMinusSign = signum != -1
&& (signDisplay == SignDisplay.ALWAYS
|| signDisplay == SignDisplay.ACCOUNTING_ALWAYS
|| (signum == 1
&& (signDisplay == SignDisplay.EXCEPT_ZERO
|| signDisplay == SignDisplay.ACCOUNTING_EXCEPT_ZERO)))
&& patternInfo.positiveHasPlusSign() == false;
// Should we use the affix from the negative subpattern? (If not, we will use the positive
// subpattern.)
boolean useNegativeAffixPattern = patternInfo.hasNegativeSubpattern()
&& (signum == -1 || (patternInfo.negativeHasMinusSign() && plusReplacesMinusSign));
// Resolve the flags for the affix pattern.
flags = 0;
if (useNegativeAffixPattern) {
flags |= AffixPatternProvider.Flags.NEGATIVE_SUBPATTERN;
}
if (isPrefix) {
flags |= AffixPatternProvider.Flags.PREFIX;
}
if (plural != null) {
assert plural.ordinal() == (AffixPatternProvider.Flags.PLURAL_MASK & plural.ordinal());
flags |= plural.ordinal();
}
// Should we prepend a sign to the pattern?
if (!isPrefix || useNegativeAffixPattern) {
prependSign = false;
} else if (signum == -1) {
prependSign = signDisplay != SignDisplay.NEVER;
} else {
prependSign = plusReplacesMinusSign;
}
// Finally, compute the length of the affix pattern.
length = patternInfo.length(flags) + (prependSign ? 1 : 0);
}
private void exitCharSequenceMode() {
assert inCharSequenceMode;
inCharSequenceMode = false;
}
@Override
public int length() {
assert inCharSequenceMode;
return length;
}
@Override
public char charAt(int index) {
assert inCharSequenceMode;
char candidate;
if (prependSign && index == 0) {
candidate = '-';
} else if (prependSign) {
candidate = patternInfo.charAt(flags, index - 1);
} else {
candidate = patternInfo.charAt(flags, index);
}
if (plusReplacesMinusSign && candidate == '-') {
return '+';
}
if (perMilleReplacesPercent && candidate == '%') {
return '‰';
}
return candidate;
}
@Override
public CharSequence subSequence(int start, int end) {
// Never called by AffixUtils
throw new AssertionError();
}
}

View File

@ -4,7 +4,9 @@ package com.ibm.icu.impl.number;
import java.math.BigDecimal;
import com.ibm.icu.impl.StandardPlural;
import com.ibm.icu.impl.number.Padder.PadPosition;
import com.ibm.icu.number.NumberFormatter.SignDisplay;
import com.ibm.icu.text.DecimalFormatSymbols;
/**
@ -398,4 +400,79 @@ public class PatternStringUtils {
return result.toString();
}
/**
* This method contains the heart of the logic for rendering LDML affix strings. It handles
* sign-always-shown resolution, whether to use the positive or negative subpattern, permille
* substitution, and plural forms for CurrencyPluralInfo.
*/
public static void patternInfoToStringBuilder(
AffixPatternProvider patternInfo,
boolean isPrefix,
int signum,
SignDisplay signDisplay,
StandardPlural plural,
boolean perMilleReplacesPercent,
StringBuilder output) {
// Should the output render '+' where '-' would normally appear in the pattern?
boolean plusReplacesMinusSign = signum != -1
&& (signDisplay == SignDisplay.ALWAYS
|| signDisplay == SignDisplay.ACCOUNTING_ALWAYS
|| (signum == 1
&& (signDisplay == SignDisplay.EXCEPT_ZERO
|| signDisplay == SignDisplay.ACCOUNTING_EXCEPT_ZERO)))
&& patternInfo.positiveHasPlusSign() == false;
// Should we use the affix from the negative subpattern? (If not, we will use the positive
// subpattern.)
boolean useNegativeAffixPattern = patternInfo.hasNegativeSubpattern()
&& (signum == -1 || (patternInfo.negativeHasMinusSign() && plusReplacesMinusSign));
// Resolve the flags for the affix pattern.
int flags = 0;
if (useNegativeAffixPattern) {
flags |= AffixPatternProvider.Flags.NEGATIVE_SUBPATTERN;
}
if (isPrefix) {
flags |= AffixPatternProvider.Flags.PREFIX;
}
if (plural != null) {
assert plural.ordinal() == (AffixPatternProvider.Flags.PLURAL_MASK & plural.ordinal());
flags |= plural.ordinal();
}
// Should we prepend a sign to the pattern?
boolean prependSign;
if (!isPrefix || useNegativeAffixPattern) {
prependSign = false;
} else if (signum == -1) {
prependSign = signDisplay != SignDisplay.NEVER;
} else {
prependSign = plusReplacesMinusSign;
}
// Compute the length of the affix pattern.
int length = patternInfo.length(flags) + (prependSign ? 1 : 0);
// Finally, set the result into the StringBuilder.
output.setLength(0);
for (int index = 0; index < length; index++) {
char candidate;
if (prependSign && index == 0) {
candidate = '-';
} else if (prependSign) {
candidate = patternInfo.charAt(flags, index - 1);
} else {
candidate = patternInfo.charAt(flags, index);
}
if (plusReplacesMinusSign && candidate == '-') {
candidate = '+';
}
if (perMilleReplacesPercent && candidate == '%') {
candidate = '‰';
}
output.append(candidate);
}
}
}

View File

@ -7,8 +7,11 @@ import java.util.Collections;
import java.util.Comparator;
import java.util.Objects;
import com.ibm.icu.impl.StandardPlural;
import com.ibm.icu.impl.number.AffixPatternProvider;
import com.ibm.icu.impl.number.AffixUtils;
import com.ibm.icu.impl.number.PatternStringUtils;
import com.ibm.icu.number.NumberFormatter.SignDisplay;
import com.ibm.icu.text.UnicodeSet;
/**
@ -41,13 +44,10 @@ public class AffixMatcher implements NumberParseMatcher {
}
};
public static void newGenerate(
private static boolean isInteresting(
AffixPatternProvider patternInfo,
NumberParserImpl output,
MatcherFactory factory,
IgnorablesMatcher ignorables,
int parseFlags) {
String posPrefixString = patternInfo.getString(AffixPatternProvider.FLAG_POS_PREFIX);
String posSuffixString = patternInfo.getString(AffixPatternProvider.FLAG_POS_SUFFIX);
String negPrefixString = null;
@ -70,44 +70,78 @@ public class AffixMatcher implements NumberParseMatcher {
&& !AffixUtils.containsType(negSuffixString, AffixUtils.TYPE_MINUS_SIGN)) {
// The affixes contain only symbols and ignorables.
// No need to generate affix matchers.
return false;
}
return true;
}
public static void newGenerate(
AffixPatternProvider patternInfo,
NumberParserImpl output,
MatcherFactory factory,
IgnorablesMatcher ignorables,
int parseFlags) {
if (!isInteresting(patternInfo, ignorables, parseFlags)) {
return;
}
// The affixes have interesting characters, or we are in strict mode.
// Use initial capacity of 6, the highest possible number of AffixMatchers.
StringBuilder sb = new StringBuilder();
ArrayList<AffixMatcher> matchers = new ArrayList<AffixMatcher>(6);
boolean includeUnpaired = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES);
SignDisplay signDisplay = (0 != (parseFlags & ParsingUtils.PARSE_FLAG_PLUS_SIGN_ALLOWED))
? SignDisplay.ALWAYS
: SignDisplay.NEVER;
AffixPatternMatcher posPrefix = AffixPatternMatcher
.fromAffixPattern(posPrefixString, factory, parseFlags);
AffixPatternMatcher posSuffix = AffixPatternMatcher
.fromAffixPattern(posSuffixString, factory, parseFlags);
AffixPatternMatcher posPrefix = null;
AffixPatternMatcher posSuffix = null;
// Note: it is indeed possible for posPrefix and posSuffix to both be null.
// We still need to add that matcher for strict mode to work.
matchers.add(getInstance(posPrefix, posSuffix, 0));
if (includeUnpaired && posPrefix != null && posSuffix != null) {
matchers.add(getInstance(posPrefix, null, 0));
matchers.add(getInstance(null, posSuffix, 0));
}
// Pre-process the affix strings to resolve LDML rules like sign display.
for (int signum = 1; signum >= -1; signum--) {
// Generate Prefix
PatternStringUtils.patternInfoToStringBuilder(patternInfo,
true,
signum,
signDisplay,
StandardPlural.OTHER,
false,
sb);
AffixPatternMatcher prefix = AffixPatternMatcher
.fromAffixPattern(sb.toString(), factory, parseFlags);
if (patternInfo.hasNegativeSubpattern()) {
AffixPatternMatcher negPrefix = AffixPatternMatcher
.fromAffixPattern(negPrefixString, factory, parseFlags);
AffixPatternMatcher negSuffix = AffixPatternMatcher
.fromAffixPattern(negSuffixString, factory, parseFlags);
// Generate Suffix
PatternStringUtils.patternInfoToStringBuilder(patternInfo,
false,
signum,
signDisplay,
StandardPlural.OTHER,
false,
sb);
AffixPatternMatcher suffix = AffixPatternMatcher
.fromAffixPattern(sb.toString(), factory, parseFlags);
if (Objects.equals(negPrefix, posPrefix) && Objects.equals(negSuffix, posSuffix)) {
// No-op: favor the positive AffixMatcher
} else {
matchers.add(getInstance(negPrefix, negSuffix, ParsedNumber.FLAG_NEGATIVE));
if (includeUnpaired && negPrefix != null && negSuffix != null) {
if (!negPrefix.equals(posPrefix)) {
matchers.add(getInstance(negPrefix, null, ParsedNumber.FLAG_NEGATIVE));
}
if (!negSuffix.equals(posSuffix)) {
matchers.add(getInstance(null, negSuffix, ParsedNumber.FLAG_NEGATIVE));
}
if (signum == 1) {
posPrefix = prefix;
posSuffix = suffix;
} else if (Objects.equals(prefix, posPrefix) && Objects.equals(suffix, posSuffix)) {
// Skip adding these matchers (we already have equivalents)
continue;
}
// Flags for setting in the ParsedNumber
int flags = (signum == -1) ? ParsedNumber.FLAG_NEGATIVE : 0;
// Note: it is indeed possible for posPrefix and posSuffix to both be null.
// We still need to add that matcher for strict mode to work.
matchers.add(getInstance(prefix, suffix, flags));
if (includeUnpaired && prefix != null && suffix != null) {
// The following if statements are designed to prevent adding two identical matchers.
if (signum == 1 || !Objects.equals(prefix, posPrefix)) {
matchers.add(getInstance(prefix, null, flags));
}
if (signum == 1 || !Objects.equals(suffix, posSuffix)) {
matchers.add(getInstance(null, suffix, flags));
}
}
}

View File

@ -174,9 +174,14 @@ public class NumberParserImpl {
if (properties.getParseIntegerOnly()) {
parseFlags |= ParsingUtils.PARSE_FLAG_INTEGER_ONLY;
}
if (properties.getSignAlwaysShown()) {
parseFlags |= ParsingUtils.PARSE_FLAG_PLUS_SIGN_ALLOWED;
}
if (isStrict) {
parseFlags |= ParsingUtils.PARSE_FLAG_STRICT_GROUPING_SIZE;
parseFlags |= ParsingUtils.PARSE_FLAG_STRICT_SEPARATORS;
parseFlags |= ParsingUtils.PARSE_FLAG_USE_FULL_AFFIXES;
parseFlags |= ParsingUtils.PARSE_FLAG_EXACT_AFFIX;
} else {
parseFlags |= ParsingUtils.PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES;
}
@ -217,15 +222,17 @@ public class NumberParserImpl {
/// OTHER STANDARD MATCHERS ///
///////////////////////////////
if (!isStrict
|| patternInfo.containsSymbolType(AffixUtils.TYPE_PLUS_SIGN)
|| properties.getSignAlwaysShown()) {
parser.addMatcher(PlusSignMatcher.getInstance(symbols, false));
if (!isStrict) {
if (!isStrict
|| patternInfo.containsSymbolType(AffixUtils.TYPE_PLUS_SIGN)
|| properties.getSignAlwaysShown()) {
parser.addMatcher(PlusSignMatcher.getInstance(symbols, false));
}
parser.addMatcher(MinusSignMatcher.getInstance(symbols, false));
parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags));
parser.addMatcher(PercentMatcher.getInstance(symbols));
parser.addMatcher(PermilleMatcher.getInstance(symbols));
}
parser.addMatcher(MinusSignMatcher.getInstance(symbols, false));
parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags));
parser.addMatcher(PercentMatcher.getInstance(symbols));
parser.addMatcher(PermilleMatcher.getInstance(symbols));
parser.addMatcher(InfinityMatcher.getInstance(symbols));
String padString = properties.getPadString();
if (padString != null && !ignorables.getSet().contains(padString)) {

View File

@ -21,6 +21,7 @@ public class ParsingUtils {
public static final int PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES = 0x0080;
public static final int PARSE_FLAG_USE_FULL_AFFIXES = 0x0100;
public static final int PARSE_FLAG_EXACT_AFFIX = 0x0200;
public static final int PARSE_FLAG_PLUS_SIGN_ALLOWED = 0x0400;
public static void putLeadCodePoints(UnicodeSet input, UnicodeSet output) {
for (EntryRange range : input.ranges()) {

View File

@ -10,7 +10,7 @@ public class RequireAffixMatcher extends ValidationMatcher {
@Override
public void postProcess(ParsedNumber result) {
if ((result.prefix == null) != (result.suffix == null)) {
if (result.prefix == null || result.suffix == null) {
// We saw a prefix or a suffix but not both. Fail the parse.
result.flags |= ParsedNumber.FLAG_FAIL;
}

View File

@ -831,7 +831,7 @@ parse output breaks
// JDK does allow separators in the wrong place and parses as -5347.25
(53,47.25) fail K
// strict requires prefix or suffix, except in C
65,347.25 fail P
65,347.25 fail
+3.52E4 35200
(34.8E-3) -0.0348
(3425E-1) -342.5
@ -1310,11 +1310,11 @@ set locale en_US
set lenient 0
begin
parse output outputCurrency breaks
$53.45 53.45 USD
$53.45 53.45 USD P
53.45 USD 53.45 USD
USD 53.45 fail USD
53.45USD fail USD
USD53.45 53.45 USD
USD53.45 53.45 USD P
(7.92) USD -7.92 USD
(7.92) EUR -7.92 EUR
(7.926) USD -7.926 USD
@ -1332,9 +1332,9 @@ US Dollars 53.45 fail USD
53.45 US Dollars 53.45 USD
US Dollar 53.45 fail USD
53.45 US Dollar 53.45 USD
US Dollars53.45 53.45 USD
US Dollars53.45 53.45 USD P
53.45US Dollars fail USD
US Dollar53.45 53.45 USD
US Dollar53.45 53.45 USD P
US Dollat53.45 fail USD
53.45US Dollar fail USD
US Dollars (53.45) fail USD
@ -1362,13 +1362,12 @@ set decimalPatternMatchRequired 1
begin
pattern parse output breaks
// K doesn't support this feature.
// P stops parsing when it sees the decimal separator, but doesn't fail.
0 123 123
0 123. fail CJKP
0 1.23 fail CJKP
0 123. fail CJK
0 1.23 fail CJK
0 -513 -513
0 -513. fail CJKP
0 -5.13 fail CJKP
0 -513. fail CJK
0 -5.13 fail CJK
0.0 123 fail K
0.0 123. 123 C
0.0 1.23 1.23 C
@ -1581,6 +1580,34 @@ begin
parse output breaks
9223372036854775807% 92233720368547758.07
test sign always shown
set locale en
set pattern 0
set signAlwaysShown 1
begin
format output breaks
// J and K do not support this feature
42 +42 JK
0 +0 JK
-42 -42
test parse strict with plus sign
set locale en
set pattern 0
set signAlwaysShown 1
begin
lenient parse output breaks
1 42 42
1 -42 -42
1 +42 42 JK
1 0 0
1 +0 0 JK
0 42 fail CJK
0 -42 -42
0 +42 42 JK
0 0 fail CJK
0 +0 0 JK

View File

@ -108,6 +108,7 @@ public class DataDrivenNumberFormatTestData {
public String positiveSuffix = null;
public String negativePrefix = null;
public String negativeSuffix = null;
public Integer signAlwaysShown = null;
public String localizedPattern = null;
public String toPattern = null;
public String toLocalizedPattern = null;
@ -213,6 +214,7 @@ public class DataDrivenNumberFormatTestData {
"positiveSuffix",
"negativePrefix",
"negativeSuffix",
"signAlwaysShown",
"localizedPattern",
"toPattern",
"toLocalizedPattern",
@ -378,6 +380,10 @@ public class DataDrivenNumberFormatTestData {
negativeSuffix = value;
}
public void setSignAlwaysShown(String value) {
signAlwaysShown = Integer.valueOf(value);
}
public void setLocalizedPattern(String value) {
localizedPattern = value;
}

View File

@ -273,7 +273,7 @@ public class DataDrivenNumberFormatTestUtility extends TestFmwk {
tuple.setField(name, Utility.unescape(value));
return true;
} catch (Exception e) {
showError("No such field: " + name + ", or bad value: " + value);
showError("No such field: " + name + ", or bad value: " + value + ": " + e);
return false;
}
}

View File

@ -225,6 +225,9 @@ public class NumberFormatDataDrivenTest {
if (tuple.negativeSuffix != null) {
fmt.setNegativeSuffix(tuple.negativeSuffix);
}
if (tuple.signAlwaysShown != null) {
// Not supported.
}
if (tuple.localizedPattern != null) {
fmt.applyLocalizedPattern(tuple.localizedPattern);
}
@ -406,6 +409,9 @@ public class NumberFormatDataDrivenTest {
if (tuple.negativeSuffix != null) {
fmt.setNegativeSuffix(tuple.negativeSuffix);
}
if (tuple.signAlwaysShown != null) {
// Not supported.
}
if (tuple.localizedPattern != null) {
fmt.applyLocalizedPattern(tuple.localizedPattern);
}
@ -510,6 +516,9 @@ public class NumberFormatDataDrivenTest {
if (tuple.negativeSuffix != null) {
properties.setNegativeSuffix(tuple.negativeSuffix);
}
if (tuple.signAlwaysShown != null) {
properties.setSignAlwaysShown(tuple.signAlwaysShown != 0);
}
if (tuple.localizedPattern != null) {
DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(tuple.locale);
String converted = PatternStringUtils.convertLocalized(tuple.localizedPattern, symbols, false);