ICU-8610 Full support for skeletons in ICU4J. Needs a few more tests.

X-SVN-Rev: 41038
This commit is contained in:
Shane Carr 2018-03-01 09:24:37 +00:00
parent 59e4fc5172
commit c5e86f87c8
3 changed files with 580 additions and 205 deletions

View File

@ -12,7 +12,11 @@ import java.util.concurrent.ConcurrentHashMap;
import com.ibm.icu.impl.PatternProps;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.impl.number.MacroProps;
import com.ibm.icu.number.NumberFormatter.DecimalSeparatorDisplay;
import com.ibm.icu.number.NumberFormatter.GroupingStrategy;
import com.ibm.icu.number.NumberFormatter.SignDisplay;
import com.ibm.icu.number.NumberFormatter.UnitWidth;
import com.ibm.icu.text.NumberingSystem;
import com.ibm.icu.util.Currency;
import com.ibm.icu.util.Currency.CurrencyUsage;
import com.ibm.icu.util.MeasureUnit;
@ -25,7 +29,25 @@ import com.ibm.icu.util.NoUnit;
class NumberSkeletonImpl {
static enum StemType {
OTHER, ROUNDER, FRACTION_ROUNDER, MAYBE_INCREMENT_ROUNDER, CURRENCY_ROUNDER, MEASURE_UNIT, UNIT_WIDTH
OTHER,
COMPACT_NOTATION,
SCIENTIFIC_NOTATION,
SIMPLE_NOTATION,
NO_UNIT,
CURRENCY,
MEASURE_UNIT,
PER_MEASURE_UNIT,
ROUNDER,
FRACTION_ROUNDER,
MAYBE_INCREMENT_ROUNDER,
CURRENCY_ROUNDER,
GROUPING,
INTEGER_WIDTH,
LATIN,
NUMBERING_SYSTEM,
UNIT_WIDTH,
SIGN_DISPLAY,
DECIMAL_DISPLAY
}
static class SkeletonDataStructure {
@ -61,6 +83,16 @@ class NumberSkeletonImpl {
static final SkeletonDataStructure skeletonData = new SkeletonDataStructure();
static {
skeletonData.put(StemType.COMPACT_NOTATION, "compact-short", Notation.compactShort());
skeletonData.put(StemType.COMPACT_NOTATION, "compact-long", Notation.compactLong());
skeletonData.put(StemType.SCIENTIFIC_NOTATION, "scientific", Notation.scientific());
skeletonData.put(StemType.SCIENTIFIC_NOTATION, "engineering", Notation.engineering());
skeletonData.put(StemType.SIMPLE_NOTATION, "simple-notation", Notation.simple());
skeletonData.put(StemType.NO_UNIT, "base-unit", NoUnit.BASE);
skeletonData.put(StemType.NO_UNIT, "percent", NoUnit.PERCENT);
skeletonData.put(StemType.NO_UNIT, "permille", NoUnit.PERMILLE);
skeletonData.put(StemType.ROUNDER, "round-integer", Rounder.integer());
skeletonData.put(StemType.ROUNDER, "round-unlimited", Rounder.unlimited());
skeletonData.put(StemType.ROUNDER,
@ -68,11 +100,32 @@ class NumberSkeletonImpl {
Rounder.currency(CurrencyUsage.STANDARD));
skeletonData.put(StemType.ROUNDER, "round-currency-cash", Rounder.currency(CurrencyUsage.CASH));
skeletonData.put(StemType.GROUPING, "group-off", GroupingStrategy.OFF);
skeletonData.put(StemType.GROUPING, "group-min2", GroupingStrategy.MIN2);
skeletonData.put(StemType.GROUPING, "group-auto", GroupingStrategy.AUTO);
skeletonData.put(StemType.GROUPING, "group-on-aligned", GroupingStrategy.ON_ALIGNED);
skeletonData.put(StemType.GROUPING, "group-thousands", GroupingStrategy.THOUSANDS);
skeletonData.put(StemType.LATIN, "latin", NumberingSystem.LATIN);
skeletonData.put(StemType.UNIT_WIDTH, "unit-width-narrow", UnitWidth.NARROW);
skeletonData.put(StemType.UNIT_WIDTH, "unit-width-short", UnitWidth.SHORT);
skeletonData.put(StemType.UNIT_WIDTH, "unit-width-full-name", UnitWidth.FULL_NAME);
skeletonData.put(StemType.UNIT_WIDTH, "unit-width-iso-code", UnitWidth.ISO_CODE);
skeletonData.put(StemType.UNIT_WIDTH, "unit-width-hidden", UnitWidth.HIDDEN);
skeletonData.put(StemType.SIGN_DISPLAY, "sign-auto", SignDisplay.AUTO);
skeletonData.put(StemType.SIGN_DISPLAY, "sign-always", SignDisplay.ALWAYS);
skeletonData.put(StemType.SIGN_DISPLAY, "sign-never", SignDisplay.NEVER);
skeletonData.put(StemType.SIGN_DISPLAY, "sign-accounting", SignDisplay.ACCOUNTING);
skeletonData.put(StemType.SIGN_DISPLAY, "sign-accounting-always", SignDisplay.ACCOUNTING_ALWAYS);
skeletonData.put(StemType.SIGN_DISPLAY, "sign-except-zero", SignDisplay.EXCEPT_ZERO);
skeletonData.put(StemType.SIGN_DISPLAY,
"sign-accounting-except-zero",
SignDisplay.ACCOUNTING_EXCEPT_ZERO);
skeletonData.put(StemType.DECIMAL_DISPLAY, "decimal-auto", DecimalSeparatorDisplay.AUTO);
skeletonData.put(StemType.DECIMAL_DISPLAY, "decimal-always", DecimalSeparatorDisplay.ALWAYS);
}
private static final Map<String, UnlocalizedNumberFormatter> cache = new ConcurrentHashMap<String, UnlocalizedNumberFormatter>();
@ -183,6 +236,10 @@ class NumberSkeletonImpl {
switch (stem) {
case MAYBE_INCREMENT_ROUNDER:
case MEASURE_UNIT:
case PER_MEASURE_UNIT:
case CURRENCY:
case INTEGER_WIDTH:
case NUMBERING_SYSTEM:
throw new SkeletonSyntaxException("Stem requires an option", segment);
default:
break;
@ -200,14 +257,40 @@ class NumberSkeletonImpl {
if (stem != null) {
Object value = skeletonData.stemToValue(content);
switch (stem) {
case COMPACT_NOTATION:
case SCIENTIFIC_NOTATION:
case SIMPLE_NOTATION:
checkNull(macros.notation, content);
macros.notation = (Notation) value;
break;
case NO_UNIT:
checkNull(macros.unit, content);
macros.unit = (NoUnit) value;
break;
case ROUNDER:
checkNull(macros.rounder, content);
macros.rounder = (Rounder) value;
break;
case GROUPING:
checkNull(macros.grouping, content);
macros.grouping = value;
break;
case LATIN:
checkNull(macros.symbols, content);
macros.symbols = value;
break;
case UNIT_WIDTH:
checkNull(macros.unitWidth, content);
macros.unitWidth = (UnitWidth) value;
break;
case SIGN_DISPLAY:
checkNull(macros.sign, content);
macros.sign = (SignDisplay) value;
break;
case DECIMAL_DISPLAY:
checkNull(macros.decimal, content);
macros.decimal = (DecimalSeparatorDisplay) value;
break;
default:
assert false;
}
@ -216,19 +299,35 @@ class NumberSkeletonImpl {
// Second try: literal stems that require an option
if (content.equals("round-increment")) {
checkNull(macros.rounder, content);
return StemType.MAYBE_INCREMENT_ROUNDER;
} else if (content.equals("measure-unit")) {
checkNull(macros.unit, content);
return StemType.MEASURE_UNIT;
} else if (content.equals("per-measure-unit")) {
checkNull(macros.perUnit, content);
return StemType.PER_MEASURE_UNIT;
} else if (content.equals("currency")) {
checkNull(macros.unit, content);
return StemType.CURRENCY;
} else if (content.equals("integer-width")) {
checkNull(macros.integerWidth, content);
return StemType.INTEGER_WIDTH;
} else if (content.equals("numbering-system")) {
checkNull(macros.symbols, content);
return StemType.NUMBERING_SYSTEM;
}
// Second try: stem "blueprint" syntax
// Third try: stem "blueprint" syntax
switch (content.charAt(0)) {
case '.':
stem = StemType.FRACTION_ROUNDER;
checkNull(macros.rounder, content);
parseFractionStem(content, macros);
break;
case '@':
stem = StemType.ROUNDER;
checkNull(macros.rounder, content);
parseDigitsStem(content, macros);
break;
}
@ -241,6 +340,43 @@ class NumberSkeletonImpl {
}
private static StemType parseOption(StemType stem, CharSequence content, MacroProps macros) {
///// Required options: /////
switch (stem) {
case CURRENCY:
parseCurrencyOption(content, macros);
return StemType.OTHER;
case MEASURE_UNIT:
parseMeasureUnitOption(content, macros);
return StemType.OTHER;
case PER_MEASURE_UNIT:
parseMeasurePerUnitOption(content, macros);
return StemType.OTHER;
case MAYBE_INCREMENT_ROUNDER:
parseIncrementOption(content, macros);
return StemType.ROUNDER;
case INTEGER_WIDTH:
parseIntegerWidthOption(content, macros);
return StemType.OTHER;
case NUMBERING_SYSTEM:
parseNumberingSystemOption(content, macros);
return StemType.OTHER;
}
///// Non-required options: /////
// Scientific options
switch (stem) {
case SCIENTIFIC_NOTATION:
if (parseExponentWidthOption(content, macros)) {
return StemType.SCIENTIFIC_NOTATION;
}
if (parseExponentSignOption(content, macros)) {
return StemType.SCIENTIFIC_NOTATION;
}
}
// Frac-sig option
switch (stem) {
case FRACTION_ROUNDER:
@ -249,51 +385,61 @@ class NumberSkeletonImpl {
}
}
// Increment option
switch (stem) {
case MAYBE_INCREMENT_ROUNDER:
// The increment option is required.
parseIncrementOption(content, macros);
return StemType.ROUNDER;
}
// Rounding mode option
switch (stem) {
case ROUNDER:
case FRACTION_ROUNDER:
case CURRENCY_ROUNDER:
if (parseRoundingModeOption(content, macros)) {
break;
return StemType.ROUNDER;
}
}
// Measure unit option
switch (stem) {
case MEASURE_UNIT:
// The measure unit option is required.
parseMeasureUnitOption(content, macros);
return StemType.OTHER;
}
// Unknown option
throw new SkeletonSyntaxException("Unknown option", content);
}
/////
private static void generateSkeleton(MacroProps macros, StringBuilder sb) {
if (macros.rounder != null) {
generateRoundingValue(macros, sb);
if (macros.notation != null) {
generateNotationValue(macros, sb);
sb.append(' ');
}
if (macros.unit != null) {
generateUnitValue(macros, sb);
sb.append(' ');
}
if (macros.perUnit != null) {
generatePerUnitValue(macros, sb);
sb.append(' ');
}
if (macros.rounder != null) {
generateRoundingValue(macros, sb);
sb.append(' ');
}
if (macros.grouping != null) {
generateGroupingValue(macros, sb);
sb.append(' ');
}
if (macros.integerWidth != null) {
generateIntegerWidthValue(macros, sb);
sb.append(' ');
}
if (macros.symbols != null) {
generateSymbolsValue(macros, sb);
sb.append(' ');
}
if (macros.unitWidth != null) {
generateUnitWidthValue(macros, sb);
sb.append(' ');
}
if (macros.sign != null) {
generateSignValue(macros, sb);
sb.append(' ');
}
if (macros.decimal != null) {
generateDecimalValue(macros, sb);
sb.append(' ');
}
// Remove the trailing space
if (sb.length() > 0) {
@ -303,6 +449,90 @@ class NumberSkeletonImpl {
/////
private static boolean parseExponentWidthOption(CharSequence content, MacroProps macros) {
if (content.charAt(0) != '+') {
return false;
}
int offset = 1;
int minExp = 0;
for (; offset < content.length(); offset++) {
if (content.charAt(offset) == 'e') {
minExp++;
} else {
break;
}
}
if (offset < content.length()) {
return false;
}
// Use the public APIs to enforce bounds checking
macros.notation = ((ScientificNotation) macros.notation).withMinExponentDigits(minExp);
return true;
}
private static void generateExponentWidthOption(int minInt, int maxInt, StringBuilder sb) {
sb.append('+');
appendMultiple(sb, 'e', minInt);
}
private static boolean parseExponentSignOption(CharSequence content, MacroProps macros) {
Object value = skeletonData.stemToValue(content);
if (value != null && value instanceof SignDisplay) {
macros.notation = ((ScientificNotation) macros.notation)
.withExponentSignDisplay((SignDisplay) value);
return true;
}
return false;
}
private static void generateCurrencyOption(Currency currency, StringBuilder sb) {
sb.append(currency.getCurrencyCode());
}
private static void parseCurrencyOption(CharSequence content, MacroProps macros) {
String currencyCode = content.subSequence(0, content.length()).toString();
try {
macros.unit = Currency.getInstance(currencyCode);
} catch (IllegalArgumentException e) {
throw new SkeletonSyntaxException("Invalid currency", content, e);
}
}
private static void parseMeasureUnitOption(CharSequence content, MacroProps macros) {
// NOTE: The category (type) of the unit is guaranteed to be a valid subtag (alphanumeric)
// http://unicode.org/reports/tr35/#Validity_Data
int firstHyphen = 0;
while (firstHyphen < content.length() && content.charAt(firstHyphen) != '-') {
firstHyphen++;
}
if (firstHyphen == content.length()) {
throw new SkeletonSyntaxException("Invalid measure unit option", content);
}
String type = content.subSequence(0, firstHyphen).toString();
String subType = content.subSequence(firstHyphen + 1, content.length()).toString();
Set<MeasureUnit> units = MeasureUnit.getAvailable(type);
for (MeasureUnit unit : units) {
if (subType.equals(unit.getSubtype())) {
macros.unit = unit;
return;
}
}
throw new SkeletonSyntaxException("Unknown measure unit", content);
}
private static void generateMeasureUnitOption(MeasureUnit unit, StringBuilder sb) {
sb.append(unit.getType() + "-" + unit.getSubtype());
}
private static void parseMeasurePerUnitOption(CharSequence content, MacroProps macros) {
// A little bit of a hack: safe the current unit (numerator), call the main measure unit parsing
// code, put back the numerator unit, and put the new unit into per-unit.
MeasureUnit numerator = macros.unit;
parseMeasureUnitOption(content, macros);
macros.perUnit = macros.unit;
macros.unit = numerator;
}
private static void parseFractionStem(CharSequence content, MacroProps macros) {
assert content.charAt(0) == '.';
int offset = 1;
@ -412,7 +642,7 @@ class NumberSkeletonImpl {
}
FractionRounder oldRounder = (FractionRounder) macros.rounder;
// A little bit of a hack: parse the option as a digits stem, and extract the min/max sig from
// the new Rounder saved into the macros
// the new Rounder saved into the macros.
parseDigitsStem(content, macros);
Rounder.SignificantRounderImpl intermediate = (Rounder.SignificantRounderImpl) macros.rounder;
if (intermediate.maxSig == -1) {
@ -455,31 +685,134 @@ class NumberSkeletonImpl {
sb.append(mode.toString());
}
private static void parseMeasureUnitOption(CharSequence content, MacroProps macros) {
// NOTE: The category (type) of the unit is guaranteed to be a valid subtag (alphanumeric)
// http://unicode.org/reports/tr35/#Validity_Data
int firstHyphen = 0;
while (firstHyphen < content.length() && content.charAt(firstHyphen) != '-') {
firstHyphen++;
private static void parseIntegerWidthOption(CharSequence content, MacroProps macros) {
int offset = 0;
int minInt = 0;
int maxInt;
if (content.charAt(0) == '+') {
maxInt = -1;
offset++;
} else {
maxInt = 0;
}
String type = content.subSequence(0, firstHyphen).toString();
String subType = content.subSequence(firstHyphen + 1, content.length()).toString();
Set<MeasureUnit> units = MeasureUnit.getAvailable(type);
for (MeasureUnit unit : units) {
if (subType.equals(unit.getSubtype())) {
macros.unit = unit;
return;
for (; offset < content.length(); offset++) {
if (content.charAt(offset) == '#') {
maxInt++;
} else {
break;
}
}
throw new SkeletonSyntaxException("Unknown unit", content);
if (offset < content.length()) {
for (; offset < content.length(); offset++) {
if (content.charAt(offset) == '0') {
minInt++;
} else {
break;
}
}
}
if (maxInt != -1) {
maxInt += minInt;
}
if (offset < content.length()) {
throw new SkeletonSyntaxException("Invalid integer width stem", content);
}
// Use the public APIs to enforce bounds checking
if (maxInt == -1) {
macros.integerWidth = IntegerWidth.zeroFillTo(minInt);
} else {
macros.integerWidth = IntegerWidth.zeroFillTo(minInt).truncateAt(maxInt);
}
}
private static void generateMeasureUnitOption(MeasureUnit unit, StringBuilder sb) {
sb.append(unit.getType() + "-" + unit.getSubtype());
private static void generateIntegerWidthOption(int minInt, int maxInt, StringBuilder sb) {
if (maxInt == -1) {
sb.append('+');
} else {
appendMultiple(sb, '#', maxInt - minInt);
}
appendMultiple(sb, '0', minInt);
}
private static void parseNumberingSystemOption(CharSequence content, MacroProps macros) {
String nsName = content.subSequence(0, content.length()).toString();
NumberingSystem ns = NumberingSystem.getInstanceByName(nsName);
if (ns == null) {
throw new SkeletonSyntaxException("Unknown numbering system", content);
}
macros.symbols = ns;
}
private static void generateNumberingSystemOption(NumberingSystem ns, StringBuilder sb) {
sb.append(ns.getName());
}
/////
private static void generateNotationValue(MacroProps macros, StringBuilder sb) {
// Check for literals
String literal = skeletonData.valueToStem(macros.notation);
if (literal != null) {
sb.append(literal);
return;
}
// Generate the stem
if (macros.notation instanceof CompactNotation) {
// Compact notation generated from custom data (not supported in skeleton)
// The other compact notations are literals
} else if (macros.notation instanceof ScientificNotation) {
ScientificNotation impl = (ScientificNotation) macros.notation;
if (impl.engineeringInterval == 3) {
sb.append("engineering");
} else {
sb.append("scientific");
}
if (impl.minExponentDigits > 1) {
sb.append('/');
generateExponentWidthOption(impl.minExponentDigits, -1, sb);
}
if (impl.exponentSignDisplay != SignDisplay.AUTO) {
sb.append('/');
sb.append(skeletonData.valueToStem(impl.exponentSignDisplay));
}
} else {
assert macros.notation instanceof SimpleNotation;
sb.append("notation-simple");
}
}
private static void generateUnitValue(MacroProps macros, StringBuilder sb) {
// Check for literals
String literal = skeletonData.valueToStem(macros.unit);
if (literal != null) {
sb.append(literal);
return;
}
// Generate the stem
if (macros.unit instanceof Currency) {
sb.append("currency/");
generateCurrencyOption((Currency) macros.unit, sb);
} else if (macros.unit instanceof NoUnit) {
// This should be taken care of by the literals.
assert false;
} else {
sb.append("measure-unit/");
generateMeasureUnitOption(macros.unit, sb);
}
}
private static void generatePerUnitValue(MacroProps macros, StringBuilder sb) {
// Per-units are currently expected to be only MeasureUnits.
if (macros.unit instanceof Currency || macros.unit instanceof NoUnit) {
assert false;
} else {
sb.append("per-measure-unit/");
generateMeasureUnitOption(macros.perUnit, sb);
}
}
private static void generateRoundingValue(MacroProps macros, StringBuilder sb) {
// Check for literals
String literal = skeletonData.valueToStem(macros.rounder);
@ -527,30 +860,39 @@ class NumberSkeletonImpl {
}
}
private static void generateUnitValue(MacroProps macros, StringBuilder sb) {
// Check for literals
String literal = skeletonData.valueToStem(macros.unit);
if (literal != null) {
sb.append(literal);
return;
}
private static void generateGroupingValue(MacroProps macros, StringBuilder sb) {
appendExpectedLiteral(macros.grouping, sb);
}
// Generate the stem
if (macros.unit instanceof Currency) {
// TODO
} else if (macros.unit instanceof NoUnit) {
// TODO
private static void generateIntegerWidthValue(MacroProps macros, StringBuilder sb) {
sb.append("integer-width/");
generateIntegerWidthOption(macros.integerWidth.minInt, macros.integerWidth.maxInt, sb);
}
private static void generateSymbolsValue(MacroProps macros, StringBuilder sb) {
if (macros.symbols instanceof NumberingSystem) {
NumberingSystem ns = (NumberingSystem) macros.symbols;
if (ns.getName().equals("latn")) {
sb.append("latin");
} else {
sb.append("numbering-system/");
generateNumberingSystemOption(ns, sb);
}
} else {
sb.append("measure-unit/");
generateMeasureUnitOption(macros.unit, sb);
// DecimalFormatSymbols (not supported in skeleton)
}
}
private static void generateUnitWidthValue(MacroProps macros, StringBuilder sb) {
// There should be a literal.
String literal = skeletonData.valueToStem(macros.unitWidth);
assert literal != null;
sb.append(literal);
appendExpectedLiteral(macros.unitWidth, sb);
}
private static void generateSignValue(MacroProps macros, StringBuilder sb) {
appendExpectedLiteral(macros.sign, sb);
}
private static void generateDecimalValue(MacroProps macros, StringBuilder sb) {
appendExpectedLiteral(macros.decimal, sb);
}
/////
@ -566,4 +908,10 @@ class NumberSkeletonImpl {
sb.appendCodePoint(cp);
}
}
private static void appendExpectedLiteral(Object value, StringBuilder sb) {
String literal = skeletonData.valueToStem(value);
assert literal != null;
sb.append(literal);
}
}

View File

@ -38,7 +38,11 @@ public class NumberSkeletonTest {
"@#+",
"round-increment/xxx",
"round-increment/0.1.2",
};
"currency/dummy",
"measure-unit/foo",
"integer-width/xxx",
"integer-width/0+",
"integer-width/+0#", };
for (String cas : cases) {
try {
@ -51,19 +55,33 @@ public class NumberSkeletonTest {
}
@Test
public void stemsRequiringOption() {
String[] cases = {
"round-increment",
"round-increment/",
"round-increment scientific",
};
public void unknownTokens() {
String[] cases = { "measure-unit/foo-bar", "numbering-system/dummy" };
for (String cas : cases) {
try {
NumberFormatter.fromSkeleton(cas);
fail();
} catch (SkeletonSyntaxException expected) {
assertTrue(expected.getMessage(), expected.getMessage().contains("requires an option"));
assertTrue(expected.getMessage(), expected.getMessage().contains("Unknown"));
}
}
}
@Test
public void stemsRequiringOption() {
String[] stems = { "round-increment", "currency", "measure-unit", "integer-width", };
String[] suffixes = { "", "/", " scientific", "/ scientific" };
for (String stem : stems) {
for (String suffix : suffixes) {
try {
NumberFormatter.fromSkeleton(stem + suffix);
fail();
} catch (SkeletonSyntaxException expected) {
assertTrue(expected.getMessage(),
expected.getMessage().contains("requires an option"));
}
}
}
}