ICU-8319 merge MessageFormat 2011q1 work into trunk, from icu4j/branches/markus/msg48 -r 29385:29881
X-SVN-Rev: 29885
This commit is contained in:
parent
73c400496f
commit
fb5332c296
@ -0,0 +1,123 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2010-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* created on: 2010aug21
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
package com.ibm.icu.dev.demo.messagepattern;
|
||||
|
||||
import com.ibm.icu.text.MessagePattern;
|
||||
|
||||
/**
|
||||
* Demo code for MessagePattern class.
|
||||
* Pretty-prints the list of MessagePattern Parts and uses the MiniMessageFormatter
|
||||
* with a few patterns.
|
||||
* @author Markus Scherer
|
||||
* @since 2010-aug-21
|
||||
*/
|
||||
public final class MessagePatternDemo {
|
||||
private static final String manySpaces=" ";
|
||||
|
||||
private static final void printParts(MessagePattern msg) {
|
||||
String autoQA=msg.autoQuoteApostropheDeep();
|
||||
if(!autoQA.equals(msg.getPatternString())) {
|
||||
System.out.println("autoQA: "+autoQA);
|
||||
}
|
||||
String indent="";
|
||||
StringBuilder explanation=new StringBuilder();
|
||||
MessagePattern.Part prevPart=null;
|
||||
int count=msg.countParts();
|
||||
for(int i=0; i<count; ++i) {
|
||||
explanation.delete(0, 0x7fffffff);
|
||||
MessagePattern.Part part=msg.getPart(i);
|
||||
assert prevPart==null || prevPart.getLimit()<=part.getIndex();
|
||||
String partString=part.toString();
|
||||
MessagePattern.Part.Type type=part.getType();
|
||||
if(type==MessagePattern.Part.Type.MSG_START) {
|
||||
indent=manySpaces.substring(0, part.getValue()*2);
|
||||
}
|
||||
if(part.getLength()>0) {
|
||||
explanation.append("=\"").append(msg.getSubstring(part)).append('"');
|
||||
}
|
||||
if(type.hasNumericValue()) {
|
||||
explanation.append('=').append(msg.getNumericValue(part));
|
||||
}
|
||||
System.out.format("%2d: %s%s%s\n", i, indent, partString, explanation);
|
||||
if(type==MessagePattern.Part.Type.MSG_LIMIT) {
|
||||
int nestingLevel=part.getValue();
|
||||
if(nestingLevel>1) {
|
||||
indent=manySpaces.substring(0, (nestingLevel-1)*2); // outdent
|
||||
} else {
|
||||
indent="";
|
||||
}
|
||||
}
|
||||
prevPart=part;
|
||||
}
|
||||
}
|
||||
|
||||
private static final MessagePattern print(String s) {
|
||||
System.out.println("message: "+s);
|
||||
try {
|
||||
MessagePattern msg=new MessagePattern(s);
|
||||
printParts(msg);
|
||||
return msg;
|
||||
} catch(Exception e) {
|
||||
System.out.println("Exception: "+e.getMessage());
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static final void printFormat(String s, Object... args) {
|
||||
MessagePattern msg=print(s);
|
||||
if(msg!=null) {
|
||||
System.out.println(new MiniMessageFormatter(msg).format(new StringBuilder(), args));
|
||||
}
|
||||
}
|
||||
|
||||
private static final void printFormatWithNamedArgs(String s, Object... args) {
|
||||
MessagePattern msg=print(s);
|
||||
if(msg!=null) {
|
||||
System.out.println(new MiniMessageFormatter(msg).format(
|
||||
new StringBuilder(), MiniMessageFormatter.mapFromNameValuePairs(args)));
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] argv) {
|
||||
print("Hello!");
|
||||
print("Hel'lo!");
|
||||
print("Hel'{o");
|
||||
print("Hel'{'o");
|
||||
// double apostrophe inside quoted literal text still encodes a single apostrophe
|
||||
printFormat("a'{bc''de'f");
|
||||
print("a'{bc''de'f{0,number,g'hi''jk'l#}");
|
||||
print("abc{0}def");
|
||||
print("abc{ arg }def");
|
||||
print("abc{1}def{arg}ghi");
|
||||
print("abc{2, number}ghi{3, select, xx {xxx} other {ooo}} xyz");
|
||||
print("abc{gender,select,"+
|
||||
"other{His name is {person,XML,<entry name=\"PERSON\">{$PERSON}</entry>}.}}xyz");
|
||||
print("abc{num_people, plural, offset:17 few{fff} other {oooo}}xyz");
|
||||
print("abc{ num , plural , offset: 2 =1 {1} =-1 {-1} =3.14 {3.14} other {oo} }xyz");
|
||||
print("I don't {a,plural,other{w'{'on't #'#'}} and "+
|
||||
"{b,select,other{shan't'}'}} '{'''know'''}' and "+
|
||||
"{c,choice,0#can't'|'}"+
|
||||
"{z,number,#'#'###.00'}'}.");
|
||||
print("a_{0,choice,-∞ #-inf| 5≤ five | 99 # ninety'|'nine }_z");
|
||||
print("a_{0,plural,other{num=#'#'=#'#'={1,number,##}!}}_z");
|
||||
print("}}}{0}}"); // yes, unmatched '}' are ok in ICU MessageFormat
|
||||
printFormat("Hello {0}!", "Alice");
|
||||
String msg="++{0, select, female{{1} calls you her friend}"+
|
||||
"other{{1} calls you '{their}' friend}"+
|
||||
"male{{1} calls you his friend}}--";
|
||||
printFormat(msg, "female", "Alice");
|
||||
printFormat(msg, "male", "Bob");
|
||||
printFormat(msg, "unknown", "sushifan3");
|
||||
msg="_'__{gender, select, female{Her n'ame is {person_name}.}"+
|
||||
"other{His n'ame is {person_name}.}}__'_";
|
||||
printFormatWithNamedArgs(msg, "gender", "female", "person_name", "Alice");
|
||||
printFormatWithNamedArgs(msg, "gender", "male", "person_name", "Bob");
|
||||
}
|
||||
}
|
@ -0,0 +1,186 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2010-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* created on: 2010aug21
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
package com.ibm.icu.dev.demo.messagepattern;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import com.ibm.icu.text.MessagePattern;
|
||||
import com.ibm.icu.text.MessagePattern.ArgType;
|
||||
import com.ibm.icu.text.MessagePattern.Part;
|
||||
import com.ibm.icu.util.Freezable;
|
||||
|
||||
/**
|
||||
* Mini message formatter for a small subset of the ICU MessageFormat syntax.
|
||||
* Supports only string substitution and select formatting.
|
||||
* @author Markus Scherer
|
||||
* @since 2010-aug-21
|
||||
*/
|
||||
public final class MiniMessageFormatter implements Freezable<MiniMessageFormatter> {
|
||||
public MiniMessageFormatter() {
|
||||
this.msg=new MessagePattern();
|
||||
}
|
||||
|
||||
public MiniMessageFormatter(MessagePattern msg) {
|
||||
this.msg=(MessagePattern)msg.clone();
|
||||
}
|
||||
|
||||
public MiniMessageFormatter(String msg) {
|
||||
this.msg=new MessagePattern(msg);
|
||||
}
|
||||
|
||||
public MiniMessageFormatter applyPattern(String msg) {
|
||||
this.msg.parse(msg);
|
||||
return this;
|
||||
}
|
||||
|
||||
public String getPatternString() {
|
||||
return msg.getPatternString();
|
||||
}
|
||||
|
||||
public boolean hasNamedArguments() {
|
||||
return msg.hasNamedArguments();
|
||||
}
|
||||
|
||||
public boolean hasNumberedArguments() {
|
||||
return msg.hasNumberedArguments();
|
||||
}
|
||||
|
||||
/**
|
||||
* Formats the parsed message with positional arguments.
|
||||
* Supports only string substitution (e.g., {3}) and select format.
|
||||
* @param dest gets the formatted message appended
|
||||
* @param args positional arguments
|
||||
* @return dest
|
||||
*/
|
||||
public Appendable format(Appendable dest, Object... args) {
|
||||
if(msg.hasNamedArguments()) {
|
||||
throw new IllegalArgumentException(
|
||||
"Formatting message with named arguments using positional argument values.");
|
||||
}
|
||||
format(0, dest, args, null);
|
||||
return dest;
|
||||
}
|
||||
|
||||
public static final String format(String msg, Object... args) {
|
||||
return new MiniMessageFormatter(msg).format(new StringBuilder(2*msg.length()), args).toString();
|
||||
}
|
||||
|
||||
public Appendable format(Appendable dest, Map<String, Object> argsMap) {
|
||||
if(msg.hasNumberedArguments()) {
|
||||
throw new IllegalArgumentException(
|
||||
"Formatting message with numbered arguments using named argument values.");
|
||||
}
|
||||
format(0, dest, null, argsMap);
|
||||
return dest;
|
||||
}
|
||||
|
||||
public static final String format(String msg, Map<String, Object> argsMap) {
|
||||
return new MiniMessageFormatter(msg).format(new StringBuilder(2*msg.length()), argsMap).toString();
|
||||
}
|
||||
|
||||
private int format(int msgStart, Appendable dest, Object[] args, Map<String, Object> argsMap) {
|
||||
try {
|
||||
String msgString=msg.getPatternString();
|
||||
int prevIndex=msg.getPart(msgStart).getLimit();
|
||||
for(int i=msgStart+1;; ++i) {
|
||||
Part part=msg.getPart(i);
|
||||
Part.Type type=part.getType();
|
||||
int index=part.getIndex();
|
||||
dest.append(msgString, prevIndex, index);
|
||||
if(type==Part.Type.MSG_LIMIT) {
|
||||
return i;
|
||||
}
|
||||
if(type==Part.Type.SKIP_SYNTAX || type==Part.Type.INSERT_CHAR) {
|
||||
prevIndex=part.getLimit();
|
||||
continue;
|
||||
}
|
||||
assert type==Part.Type.ARG_START : "Unexpected Part "+part+" in parsed message.";
|
||||
int argLimit=msg.getLimitPartIndex(i);
|
||||
ArgType argType=part.getArgType();
|
||||
part=msg.getPart(++i);
|
||||
Object arg;
|
||||
if(args!=null) {
|
||||
try {
|
||||
arg=args[part.getValue()]; // args[ARG_NUMBER]
|
||||
} catch(IndexOutOfBoundsException e) {
|
||||
throw new IndexOutOfBoundsException(
|
||||
"No argument at index "+part.getValue());
|
||||
}
|
||||
} else {
|
||||
arg=argsMap.get(msg.getSubstring(part)); // args[ARG_NAME]
|
||||
if(arg==null) {
|
||||
throw new IndexOutOfBoundsException(
|
||||
"No argument for name "+msg.getSubstring(part));
|
||||
}
|
||||
}
|
||||
String argValue=arg.toString();
|
||||
++i;
|
||||
if(argType==ArgType.NONE) {
|
||||
dest.append(argValue);
|
||||
} else if(argType==ArgType.SELECT) {
|
||||
// Similar to SelectFormat.findSubMessage().
|
||||
int subMsgStart=0;
|
||||
for(;; ++i) { // (ARG_SELECTOR, message) pairs until ARG_LIMIT
|
||||
part=msg.getPart(i++);
|
||||
if(part.getType()==Part.Type.ARG_LIMIT) {
|
||||
assert subMsgStart!=0; // The parser made sure this is the case.
|
||||
break;
|
||||
// else: part is an ARG_SELECTOR followed by a message
|
||||
} else if(msg.partSubstringMatches(part, argValue)) {
|
||||
// keyword matches
|
||||
subMsgStart=i;
|
||||
break;
|
||||
} else if(subMsgStart==0 && msg.partSubstringMatches(part, "other")) {
|
||||
subMsgStart=i;
|
||||
}
|
||||
i=msg.getLimitPartIndex(i);
|
||||
}
|
||||
format(subMsgStart, dest, args, argsMap);
|
||||
} else {
|
||||
throw new UnsupportedOperationException("Unsupported argument type "+argType);
|
||||
}
|
||||
prevIndex=msg.getPart(argLimit).getLimit();
|
||||
i=argLimit;
|
||||
}
|
||||
} catch(IOException e) { // Appendable throws IOException
|
||||
throw new RuntimeException(e); // We do not want a throws clause.
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Presents an array of (String, Object) pairs as a Map.
|
||||
* Only for temporary use for formatting with named arguments.
|
||||
*/
|
||||
public static Map<String, Object> mapFromNameValuePairs(Object[] args) {
|
||||
HashMap<String, Object> argsMap = new HashMap<String, Object>();
|
||||
for(int i=0; i<args.length; i+=2) {
|
||||
argsMap.put((String)args[i], args[i+1]);
|
||||
}
|
||||
return argsMap;
|
||||
}
|
||||
|
||||
public MiniMessageFormatter cloneAsThawed() {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
public MiniMessageFormatter freeze() {
|
||||
msg.freeze();
|
||||
return this;
|
||||
}
|
||||
|
||||
public boolean isFrozen() {
|
||||
return msg.isFrozen();
|
||||
}
|
||||
|
||||
private final MessagePattern msg;
|
||||
}
|
@ -22,6 +22,12 @@ com.ibm.icu.util.TimeZone.DefaultTimeZoneType = ICU
|
||||
#
|
||||
com.ibm.icu.text.DecimalFormat.SkipExtendedSeparatorParsing = false
|
||||
|
||||
# Sets the default MessageFormat apostrophe-quoting behavior.
|
||||
# See the com.ibm.icu.text.MessagePattern.ApostropheMode enum documentation.
|
||||
# Values: DOUBLE_OPTIONAL or DOUBLE_REQUIRED.
|
||||
# This is new in ICU 4.8.
|
||||
# DOUBLE_OPTIONAL is the ICU default behavior.
|
||||
com.ibm.icu.text.MessagePattern.ApostropheMode = DOUBLE_OPTIONAL
|
||||
|
||||
#
|
||||
# [Internal Use Only]
|
||||
|
264
icu4j/main/classes/core/src/com/ibm/icu/impl/PatternProps.java
Normal file
264
icu4j/main/classes/core/src/com/ibm/icu/impl/PatternProps.java
Normal file
@ -0,0 +1,264 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* created on: 2011feb25
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
package com.ibm.icu.impl;
|
||||
|
||||
/**
|
||||
* Implements the immutable Unicode properties Pattern_Syntax and Pattern_White_Space.
|
||||
* Hardcodes these properties, does not load data, does not depend on other ICU classes.
|
||||
* <p>
|
||||
* Note: Both properties include ASCII as well as non-ASCII, non-Latin-1 code points,
|
||||
* and both properties only include BMP code points (no supplementary ones).
|
||||
* Pattern_Syntax includes some unassigned code points.
|
||||
* <p>
|
||||
* [:Pattern_White_Space:] =
|
||||
* [\u0009-\u000D\ \u0085\u200E\u200F\u2028\u2029]
|
||||
* <p>
|
||||
* [:Pattern_Syntax:] =
|
||||
* [!-/\:-@\[-\^`\{-~\u00A1-\u00A7\u00A9\u00AB\u00AC\u00AE
|
||||
* \u00B0\u00B1\u00B6\u00BB\u00BF\u00D7\u00F7
|
||||
* \u2010-\u2027\u2030-\u203E\u2041-\u2053\u2055-\u205E
|
||||
* \u2190-\u245F\u2500-\u2775\u2794-\u2BFF\u2E00-\u2E7F
|
||||
* \u3001-\u3003\u3008-\u3020\u3030\uFD3E\uFD3F\uFE45\uFE46]
|
||||
* @author mscherer
|
||||
*/
|
||||
public final class PatternProps {
|
||||
/**
|
||||
* @return true if c is a Pattern_Syntax code point.
|
||||
*/
|
||||
public static boolean isSyntax(int c) {
|
||||
if(c<0) {
|
||||
return false;
|
||||
} else if(c<=0xff) {
|
||||
return latin1[c]==3;
|
||||
} else if(c<0x2010) {
|
||||
return false;
|
||||
} else if(c<=0x3030) {
|
||||
int bits=syntax2000[index2000[(c-0x2000)>>5]];
|
||||
return ((bits>>(c&0x1f))&1)!=0;
|
||||
} else if(0xfd3e<=c && c<=0xfe46) {
|
||||
return c<=0xfd3f || 0xfe45<=c;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if c is a Pattern_Syntax or Pattern_White_Space code point.
|
||||
*/
|
||||
public static boolean isSyntaxOrWhiteSpace(int c) {
|
||||
if(c<0) {
|
||||
return false;
|
||||
} else if(c<=0xff) {
|
||||
return latin1[c]!=0;
|
||||
} else if(c<0x200e) {
|
||||
return false;
|
||||
} else if(c<=0x3030) {
|
||||
int bits=syntaxOrWhiteSpace2000[index2000[(c-0x2000)>>5]];
|
||||
return ((bits>>(c&0x1f))&1)!=0;
|
||||
} else if(0xfd3e<=c && c<=0xfe46) {
|
||||
return c<=0xfd3f || 0xfe45<=c;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if c is a Pattern_White_Space character.
|
||||
*/
|
||||
public static boolean isWhiteSpace(int c) {
|
||||
if(c<0) {
|
||||
return false;
|
||||
} else if(c<=0xff) {
|
||||
return latin1[c]==5;
|
||||
} else if(0x200e<=c && c<=0x2029) {
|
||||
return c<=0x200f || 0x2028<=c;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Skips over Pattern_White_Space starting at index i of the CharSequence.
|
||||
* @return The smallest index at or after i with a non-white space character.
|
||||
*/
|
||||
public static int skipWhiteSpace(CharSequence s, int i) {
|
||||
while(i<s.length() && isWhiteSpace(s.charAt(i))) {
|
||||
++i;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return s except with leading and trailing Pattern_White_Space removed.
|
||||
*/
|
||||
public static String trimWhiteSpace(String s) {
|
||||
if(s.length()==0 || (!isWhiteSpace(s.charAt(0)) && !isWhiteSpace(s.charAt(s.length()-1)))) {
|
||||
return s;
|
||||
}
|
||||
int start=0;
|
||||
int limit=s.length();
|
||||
while(start<limit && isWhiteSpace(s.charAt(start))) {
|
||||
++start;
|
||||
}
|
||||
if(start<limit) {
|
||||
// There is non-white space at start; we will not move limit below that,
|
||||
// so we need not test start<limit in the loop.
|
||||
while(isWhiteSpace(s.charAt(limit-1))) {
|
||||
--limit;
|
||||
}
|
||||
}
|
||||
return s.substring(start, limit);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests whether the CharSequence contains a "pattern identifier", that is,
|
||||
* whether it contains only non-Pattern_White_Space, non-Pattern_Syntax characters.
|
||||
* @return true if there are no Pattern_White_Space or Pattern_Syntax characters in s.
|
||||
*/
|
||||
public static boolean isIdentifier(CharSequence s) {
|
||||
int limit=s.length();
|
||||
if(limit==0) {
|
||||
return false;
|
||||
}
|
||||
int start=0;
|
||||
do {
|
||||
if(isSyntaxOrWhiteSpace(s.charAt(start++))) {
|
||||
return false;
|
||||
}
|
||||
} while(start<limit);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests whether the CharSequence contains a "pattern identifier", that is,
|
||||
* whether it contains only non-Pattern_White_Space, non-Pattern_Syntax characters.
|
||||
* @return true if there are no Pattern_White_Space or Pattern_Syntax characters
|
||||
* in s between start and (exclusive) limit.
|
||||
*/
|
||||
public static boolean isIdentifier(CharSequence s, int start, int limit) {
|
||||
if(start>=limit) {
|
||||
return false;
|
||||
}
|
||||
do {
|
||||
if(isSyntaxOrWhiteSpace(s.charAt(start++))) {
|
||||
return false;
|
||||
}
|
||||
} while(start<limit);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Skips over a "pattern identifier" starting at index i of the CharSequence.
|
||||
* @return The smallest index at or after i with
|
||||
* a Pattern_White_Space or Pattern_Syntax character.
|
||||
*/
|
||||
public static int skipIdentifier(CharSequence s, int i) {
|
||||
while(i<s.length() && !isSyntaxOrWhiteSpace(s.charAt(i))) {
|
||||
++i;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
/*
|
||||
* One byte per Latin-1 character.
|
||||
* Bit 0 is set if either Pattern property is true,
|
||||
* bit 1 if Pattern_Syntax is true,
|
||||
* bit 2 if Pattern_White_Space is true.
|
||||
* That is, Pattern_Syntax is encoded as 3 and Pattern_White_Space as 5.
|
||||
*/
|
||||
private static final byte latin1[]=new byte[] { // 256
|
||||
// WS: 9..D
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
// WS: 20 Syntax: 21..2F
|
||||
5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
// Syntax: 3A..40
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3,
|
||||
3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
// Syntax: 5B..5E
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0,
|
||||
// Syntax: 60
|
||||
3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
// Syntax: 7B..7E
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0,
|
||||
// WS: 85
|
||||
0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
// Syntax: A1..A7, A9, AB, AC, AE
|
||||
0, 3, 3, 3, 3, 3, 3, 3, 0, 3, 0, 3, 3, 0, 3, 0,
|
||||
// Syntax: B0, B1, B6, BB, BF
|
||||
3, 3, 0, 0, 0, 0, 3, 0, 0, 0, 0, 3, 0, 0, 0, 3,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
// Syntax: D7
|
||||
0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
// Syntax: F7
|
||||
0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
/*
|
||||
* One byte per 32 characters from U+2000..U+303F indexing into
|
||||
* a small table of 32-bit data words.
|
||||
* The first two data words are all-zeros and all-ones.
|
||||
*/
|
||||
private static final byte index2000[]=new byte[] { // 130
|
||||
2, 3, 4, 0, 0, 0, 0, 0, // 20xx
|
||||
0, 0, 0, 0, 5, 1, 1, 1, // 21xx
|
||||
1, 1, 1, 1, 1, 1, 1, 1, // 22xx
|
||||
1, 1, 1, 1, 1, 1, 1, 1, // 23xx
|
||||
1, 1, 1, 0, 0, 0, 0, 0, // 24xx
|
||||
1, 1, 1, 1, 1, 1, 1, 1, // 25xx
|
||||
1, 1, 1, 1, 1, 1, 1, 1, // 26xx
|
||||
1, 1, 1, 6, 7, 1, 1, 1, // 27xx
|
||||
1, 1, 1, 1, 1, 1, 1, 1, // 28xx
|
||||
1, 1, 1, 1, 1, 1, 1, 1, // 29xx
|
||||
1, 1, 1, 1, 1, 1, 1, 1, // 2Axx
|
||||
1, 1, 1, 1, 1, 1, 1, 1, // 2Bxx
|
||||
0, 0, 0, 0, 0, 0, 0, 0, // 2Cxx
|
||||
0, 0, 0, 0, 0, 0, 0, 0, // 2Dxx
|
||||
1, 1, 1, 1, 0, 0, 0, 0, // 2Exx
|
||||
0, 0, 0, 0, 0, 0, 0, 0, // 2Fxx
|
||||
8, 9 // 3000..303F
|
||||
};
|
||||
|
||||
/*
|
||||
* One 32-bit integer per 32 characters. Ranges of all-false and all-true
|
||||
* are mapped to the first two values, other ranges map to appropriate bit patterns.
|
||||
*/
|
||||
private static final int syntax2000[]=new int[] {
|
||||
0,
|
||||
-1,
|
||||
0xffff0000, // 2: 2010..201F
|
||||
0x7fff00ff, // 3: 2020..2027, 2030..203E
|
||||
0x7feffffe, // 4: 2041..2053, 2055..205E
|
||||
0xffff0000, // 5: 2190..219F
|
||||
0x003fffff, // 6: 2760..2775
|
||||
0xfff00000, // 7: 2794..279F
|
||||
0xffffff0e, // 8: 3001..3003, 3008..301F
|
||||
0x00010001 // 9: 3020, 3030
|
||||
};
|
||||
|
||||
/*
|
||||
* Same as syntax2000, but with additional bits set for the
|
||||
* Pattern_White_Space characters 200E 200F 2028 2029.
|
||||
*/
|
||||
private static final int syntaxOrWhiteSpace2000[]=new int[] {
|
||||
0,
|
||||
-1,
|
||||
0xffffc000, // 2: 200E..201F
|
||||
0x7fff03ff, // 3: 2020..2029, 2030..203E
|
||||
0x7feffffe, // 4: 2041..2053, 2055..205E
|
||||
0xffff0000, // 5: 2190..219F
|
||||
0x003fffff, // 6: 2760..2775
|
||||
0xfff00000, // 7: 2794..279F
|
||||
0xffffff0e, // 8: 3001..3003, 3008..301F
|
||||
0x00010001 // 9: 3020, 3030
|
||||
};
|
||||
}
|
@ -1,6 +1,6 @@
|
||||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2010, International Business Machines Corporation and *
|
||||
* Copyright (C) 1996-2011, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -737,18 +737,11 @@ public final class UCharacterProperty
|
||||
* See UAX #31 Identifier and Pattern Syntax: http://www.unicode.org/reports/tr31/
|
||||
* @param c codepoint to check
|
||||
* @return true if c is a ICU white space
|
||||
* @deprecated use PatternProps.isWhiteSpace(c)
|
||||
*/
|
||||
public static boolean isRuleWhiteSpace(int c)
|
||||
{
|
||||
/* "white space" in the sense of ICU rule parsers
|
||||
This is a FIXED LIST that is NOT DEPENDENT ON UNICODE PROPERTIES.
|
||||
See UAX #31 Identifier and Pattern Syntax: http://www.unicode.org/reports/tr31/
|
||||
U+0009..U+000D, U+0020, U+0085, U+200E..U+200F, and U+2028..U+2029
|
||||
Equivalent to test for Pattern_White_Space Unicode property.
|
||||
*/
|
||||
return (c >= 0x0009 && c <= 0x2029 &&
|
||||
(c <= 0x000D || c == 0x0020 || c == 0x0085 ||
|
||||
c == 0x200E || c == 0x200F || c >= 0x2028));
|
||||
return PatternProps.isWhiteSpace(c);
|
||||
}
|
||||
|
||||
/**
|
||||
|
File diff suppressed because it is too large
Load Diff
1643
icu4j/main/classes/core/src/com/ibm/icu/text/MessagePattern.java
Normal file
1643
icu4j/main/classes/core/src/com/ibm/icu/text/MessagePattern.java
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,20 +1,19 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2007-2010, International Business Machines Corporation and *
|
||||
* Copyright (C) 2007-2011, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.icu.text;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.ObjectInputStream;
|
||||
import java.text.FieldPosition;
|
||||
import java.text.ParsePosition;
|
||||
import java.util.HashMap;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import com.ibm.icu.impl.UCharacterProperty;
|
||||
import com.ibm.icu.impl.Utility;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
|
||||
/**
|
||||
@ -22,7 +21,7 @@ import com.ibm.icu.util.ULocale;
|
||||
* <code>PluralFormat</code> supports the creation of internationalized
|
||||
* messages with plural inflection. It is based on <i>plural
|
||||
* selection</i>, i.e. the caller specifies messages for each
|
||||
* plural case that can appear in the users language and the
|
||||
* plural case that can appear in the user's language and the
|
||||
* <code>PluralFormat</code> selects the appropriate message based on
|
||||
* the number.
|
||||
* </p>
|
||||
@ -36,7 +35,7 @@ import com.ibm.icu.util.ULocale;
|
||||
* each message and selects the message whose interval contains a
|
||||
* given number. This can only handle a finite number of
|
||||
* intervals. But in some languages, like Polish, one plural case
|
||||
* applies to infinitely many intervals (e.g., paucal applies to
|
||||
* applies to infinitely many intervals (e.g., the paucal case applies to
|
||||
* numbers ending with 2, 3, or 4 except those ending with 12, 13, or
|
||||
* 14). Thus <code>ChoiceFormat</code> is not adequate.
|
||||
* </p><p>
|
||||
@ -47,17 +46,20 @@ import com.ibm.icu.util.ULocale;
|
||||
* conditions for a plural case than just a single interval. These plural
|
||||
* rules define both what plural cases exist in a language, and to
|
||||
* which numbers these cases apply.
|
||||
* <li>It provides predefined plural rules for many locales. Thus, the programmer
|
||||
* need not worry about the plural cases of a language. On the flip side,
|
||||
* the localizer does not have to specify the plural cases; he can simply
|
||||
* <li>It provides predefined plural rules for many languages. Thus, the programmer
|
||||
* need not worry about the plural cases of a language and
|
||||
* does not have to define the plural cases; they can simply
|
||||
* use the predefined keywords. The whole plural formatting of messages can
|
||||
* be done using localized patterns from resource bundles. For predefined plural
|
||||
* rules, see CLDR <i>Language Plural Rules</i> page at
|
||||
* rules, see the CLDR <i>Language Plural Rules</i> page at
|
||||
* http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html
|
||||
* </ul>
|
||||
* </p>
|
||||
* <h4>Usage of <code>PluralFormat</code></h4>
|
||||
* <p>
|
||||
* <p>Note: Typically, plural formatting is done via <code>MessageFormat</code>
|
||||
* with a <code>plural</code> argument type,
|
||||
* rather than using a stand-alone <code>PluralFormat</code>.
|
||||
* </p><p>
|
||||
* This discussion assumes that you use <code>PluralFormat</code> with
|
||||
* a predefined set of plural rules. You can create one using one of
|
||||
* the constructors that takes a <code>ULocale</code> object. To
|
||||
@ -70,72 +72,46 @@ import com.ibm.icu.util.ULocale;
|
||||
* <h5>Patterns and Their Interpretation</h5>
|
||||
* <p>
|
||||
* The pattern text defines the message output for each plural case of the
|
||||
* used locale. The pattern is a sequence of
|
||||
* <code><i>caseKeyword</i>{<i>message</i>}</code> clauses, separated by white
|
||||
* space characters. Each clause assigns the message <code><i>message</i></code>
|
||||
* to the plural case identified by <code><i>caseKeyword</i></code>.
|
||||
* specified locale. Syntax:
|
||||
* <blockquote><pre>
|
||||
* pluralStyle = [offsetValue] (selector '{' message '}')+
|
||||
* offsetValue = "offset:" number
|
||||
* selector = explicitValue | keyword
|
||||
* explicitValue = '=' number // adjacent, no white space in between
|
||||
* keyword = [^[[:Pattern_Syntax:][:Pattern_White_Space:]]]+
|
||||
* message: see {@link MessageFormat}
|
||||
* </pre></blockquote>
|
||||
* Pattern_White_Space between syntax elements is ignored, except
|
||||
* between the {curly braces} and their sub-message,
|
||||
* and between the '=' and the number of an explicitValue.
|
||||
*
|
||||
* </p><p>
|
||||
* There are 6 predefined case keywords in ICU - 'zero', 'one', 'two', 'few', 'many' and
|
||||
* There are 6 predefined case keywords in CLDR/ICU - 'zero', 'one', 'two', 'few', 'many' and
|
||||
* 'other'. You always have to define a message text for the default plural case
|
||||
* "<code>other</code>" which is contained in every rule set. If the plural
|
||||
* rules of the <code>PluralFormat</code> object do not contain a plural case
|
||||
* identified by <code><i>caseKeyword</i></code>, an
|
||||
* <code>IllegalArgumentException</code> is thrown.
|
||||
* "<code>other</code>" which is contained in every rule set.
|
||||
* If you do not specify a message text for a particular plural case, the
|
||||
* message text of the plural case "<code>other</code>" gets assigned to this
|
||||
* plural case. If you specify more than one message for the same plural case,
|
||||
* an <code>IllegalArgumentException</code> is thrown.
|
||||
* <br/>
|
||||
* Spaces between <code><i>caseKeyword</i></code> and
|
||||
* <code><i>message</i></code> will be ignored; spaces within
|
||||
* <code><i>message</i></code> will be preserved.
|
||||
* plural case.
|
||||
* </p><p>
|
||||
* The message text for a particular plural case may contain other message
|
||||
* format patterns. <code>PluralFormat</code> preserves these so that you
|
||||
* can use the strings produced by <code>PluralFormat</code> with other
|
||||
* formatters. If you are using <code>PluralFormat</code> inside a
|
||||
* <code>MessageFormat</code> pattern, <code>MessageFormat</code> will
|
||||
* automatically evaluate the resulting format pattern.<br/>
|
||||
* Thus, curly braces (<code>{</code>, <code>}</code>) are <i>only</i> allowed
|
||||
* in message texts to define a nested format pattern.<br/>
|
||||
* The pound sign (<code>#</code>) will be interpreted as the number placeholder
|
||||
* in the message text, if it is not contained in curly braces (to preserve
|
||||
* <code>NumberFormat</code> patterns). <code>PluralFormat</code> will
|
||||
* replace each of those pound signs by the number passed to the
|
||||
* <code>format()</code> method. It will be formatted using a
|
||||
* When formatting, the input number is first matched against the explicitValue clauses.
|
||||
* If there is no exact-number match, then a keyword is selected by calling
|
||||
* the <code>PluralRules</code> with the input number <em>minus the offset</em>.
|
||||
* (The offset defaults to 0 if it is omitted from the pattern string.)
|
||||
* If there is no clause with that keyword, then the "other" clauses is returned.
|
||||
* </p><p>
|
||||
* An unquoted pound sign (<code>#</code>) in the selected sub-message
|
||||
* itself (i.e., outside of arguments nested in the sub-message)
|
||||
* is replaced by the input number minus the offset.
|
||||
* The number-minus-offset value is formatted using a
|
||||
* <code>NumberFormat</code> for the <code>PluralFormat</code>'s locale. If you
|
||||
* need special number formatting, you have to explicitly specify a
|
||||
* <code>NumberFormat</code> for the <code>PluralFormat</code> to use.
|
||||
* </p>
|
||||
* Example
|
||||
* <pre>
|
||||
* MessageFormat msgFmt = new MessageFormat("{0, plural, " +
|
||||
* "one{{0, number, C''''est #,##0.0# fichier}} " +
|
||||
* "other {Ce sont # fichiers}} dans la liste.",
|
||||
* new ULocale("fr"));
|
||||
* Object args[] = {new Long(0)};
|
||||
* System.out.println(msgFmt.format(args));
|
||||
* args = {new Long(3)};
|
||||
* System.out.println(msgFmt.format(args));
|
||||
* </pre>
|
||||
* Produces the output:<br />
|
||||
* <code>C'est 0,0 fichier dans la liste.</code><br />
|
||||
* <code>Ce sont 3 fichiers dans la liste."</code>
|
||||
* <p>
|
||||
* <strong>Note:</strong><br />
|
||||
* Currently <code>PluralFormat</code>
|
||||
* does not make use of quotes like <code>MessageFormat</code>.
|
||||
* If you use plural format strings with <code>MessageFormat</code> and want
|
||||
* to use a quote sign "<code>'</code>", you have to write "<code>''</code>".
|
||||
* <code>MessageFormat</code> unquotes this pattern and passes the unquoted
|
||||
* pattern to <code>PluralFormat</code>. It's a bit trickier if you use
|
||||
* nested formats that do quoting. In the example above, we wanted to insert
|
||||
* "<code>'</code>" in the number format pattern. Since
|
||||
* <code>NumberFormat</code> supports quotes, we had to insert
|
||||
* "<code>''</code>". But since <code>MessageFormat</code> unquotes the
|
||||
* pattern before it gets passed to <code>PluralFormat</code>, we have to
|
||||
* double these quotes, i.e. write "<code>''''</code>".
|
||||
* need special number formatting, you have to use a <code>MessageFormat</code>
|
||||
* and explicitly specify a <code>NumberFormat</code> argument.
|
||||
* <strong>Note:</strong> That argument is formatting without subtracting the offset!
|
||||
* If you need a custom format and have a non-zero offset, then you need to pass the
|
||||
* number-minus-offset value as a separate parameter.
|
||||
* </p>
|
||||
* For a usage example, see the {@link MessageFormat} class documentation.
|
||||
*
|
||||
* <h4>Defining Custom Plural Rules</h4>
|
||||
* <p>If you need to use <code>PluralFormat</code> with custom rules, you can
|
||||
* create a <code>PluralRules</code> object and pass it to
|
||||
@ -153,35 +129,51 @@ import com.ibm.icu.util.ULocale;
|
||||
public class PluralFormat extends UFormat {
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
/*
|
||||
/**
|
||||
* The locale used for standard number formatting and getting the predefined
|
||||
* plural rules (if they were not defined explicitely).
|
||||
* @serial
|
||||
*/
|
||||
private ULocale ulocale = null;
|
||||
|
||||
/*
|
||||
/**
|
||||
* The plural rules used for plural selection.
|
||||
* @serial
|
||||
*/
|
||||
private PluralRules pluralRules = null;
|
||||
|
||||
/*
|
||||
/**
|
||||
* The applied pattern string.
|
||||
* @serial
|
||||
*/
|
||||
private String pattern = null;
|
||||
|
||||
/*
|
||||
/**
|
||||
* The MessagePattern which contains the parsed structure of the pattern string.
|
||||
*/
|
||||
transient private MessagePattern msgPattern;
|
||||
|
||||
/**
|
||||
* Obsolete with use of MessagePattern since ICU 4.8. Used to be:
|
||||
* The format messages for each plural case. It is a mapping:
|
||||
* <code>String</code>(plural case keyword) --> <code>String</code>
|
||||
* (message for this plural case).
|
||||
* @serial
|
||||
*/
|
||||
private Map<String, String> parsedValues = null;
|
||||
|
||||
/*
|
||||
/**
|
||||
* This <code>NumberFormat</code> is used for the standard formatting of
|
||||
* the number inserted into the message.
|
||||
* @serial
|
||||
*/
|
||||
private NumberFormat numberFormat = null;
|
||||
|
||||
/**
|
||||
* The offset to subtract before invoking plural rules.
|
||||
*/
|
||||
transient private double offset = 0;
|
||||
|
||||
/**
|
||||
* Creates a new <code>PluralFormat</code> for the default locale.
|
||||
* This locale will be used to get the set of plural rules and for standard
|
||||
@ -306,106 +298,40 @@ public class PluralFormat extends UFormat {
|
||||
ulocale = locale;
|
||||
pluralRules = (rules == null) ? PluralRules.forLocale(ulocale)
|
||||
: rules;
|
||||
parsedValues = null;
|
||||
pattern = null;
|
||||
resetPattern();
|
||||
numberFormat = NumberFormat.getInstance(ulocale);
|
||||
}
|
||||
|
||||
private void resetPattern() {
|
||||
pattern = null;
|
||||
if(msgPattern != null) {
|
||||
msgPattern.clear();
|
||||
}
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the pattern used by this plural format.
|
||||
* The method parses the pattern and creates a map of format strings
|
||||
* for the plural rules.
|
||||
* Patterns and their interpretation are specified in the class description.
|
||||
*
|
||||
* @param pttrn the pattern for this plural format.
|
||||
* @param pattern the pattern for this plural format.
|
||||
* @throws IllegalArgumentException if the pattern is invalid.
|
||||
* @stable ICU 3.8
|
||||
*/
|
||||
public void applyPattern(String pttrn) {
|
||||
pttrn = pttrn.trim();
|
||||
|
||||
this.pattern = pttrn;
|
||||
int braceStack = 0;
|
||||
Set<String> ruleNames = pluralRules.getKeywords();
|
||||
parsedValues = new HashMap<String, String>();
|
||||
|
||||
// Format string has to include keywords.
|
||||
// states:
|
||||
// 0: Reading keyword.
|
||||
// 1: Reading value for preceding keyword.
|
||||
int state = 0;
|
||||
StringBuilder token = new StringBuilder();
|
||||
String currentKeyword = null;
|
||||
boolean readSpaceAfterKeyword = false;
|
||||
for (int i = 0; i < pttrn.length(); ++i) {
|
||||
char ch = pttrn.charAt(i);
|
||||
switch (state) {
|
||||
case 0: // Reading value.
|
||||
if (token.length() == 0) {
|
||||
readSpaceAfterKeyword = false;
|
||||
}
|
||||
if (UCharacterProperty.isRuleWhiteSpace(ch)) {
|
||||
if (token.length() > 0) {
|
||||
readSpaceAfterKeyword = true;
|
||||
}
|
||||
// Skip leading and trailing whitespaces.
|
||||
break;
|
||||
}
|
||||
if (ch == '{') { // End of keyword definition reached.
|
||||
currentKeyword = token.toString().toLowerCase(
|
||||
Locale.ENGLISH);
|
||||
if (!ruleNames.contains(currentKeyword)) {
|
||||
parsingFailure("Malformed formatting expression. "
|
||||
+ "Unknown keyword \"" + currentKeyword
|
||||
+ "\" at position " + i + ".");
|
||||
}
|
||||
if (parsedValues.get(currentKeyword) != null) {
|
||||
parsingFailure("Malformed formatting expression. "
|
||||
+ "Text for case \"" + currentKeyword
|
||||
+ "\" at position " + i + " already defined!");
|
||||
}
|
||||
token.delete(0, token.length());
|
||||
braceStack++;
|
||||
state = 1;
|
||||
break;
|
||||
}
|
||||
if (readSpaceAfterKeyword) {
|
||||
parsingFailure("Malformed formatting expression. " +
|
||||
"Invalid keyword definition. Character \"" + ch +
|
||||
"\" at position " + i + " not expected!");
|
||||
}
|
||||
token.append(ch);
|
||||
break;
|
||||
case 1: // Reading value.
|
||||
switch (ch) {
|
||||
case '{':
|
||||
braceStack++;
|
||||
token.append(ch);
|
||||
break;
|
||||
case '}':
|
||||
braceStack--;
|
||||
if (braceStack == 0) { // End of value reached.
|
||||
parsedValues.put(currentKeyword, token.toString());
|
||||
token.delete(0, token.length());
|
||||
state = 0;
|
||||
} else if (braceStack < 0) {
|
||||
parsingFailure("Malformed formatting expression. "
|
||||
+ "Braces do not match.");
|
||||
} else { // braceStack > 0
|
||||
token.append(ch);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
token.append(ch);
|
||||
}
|
||||
break;
|
||||
} // switch state
|
||||
} // for loop.
|
||||
if (braceStack != 0) {
|
||||
parsingFailure(
|
||||
"Malformed formatting expression. Braces do not match.");
|
||||
public void applyPattern(String pattern) {
|
||||
this.pattern = pattern;
|
||||
if (msgPattern == null) {
|
||||
msgPattern = new MessagePattern();
|
||||
}
|
||||
try {
|
||||
msgPattern.parsePluralStyle(pattern);
|
||||
offset = msgPattern.getPluralOffset(0);
|
||||
} catch(RuntimeException e) {
|
||||
resetPattern();
|
||||
throw e;
|
||||
}
|
||||
checkSufficientDefinition();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -418,6 +344,129 @@ public class PluralFormat extends UFormat {
|
||||
return pattern;
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the PluralFormat sub-message for the given number, or the "other" sub-message.
|
||||
* @param pattern A MessagePattern.
|
||||
* @param partIndex the index of the first PluralFormat argument style part.
|
||||
* @param selector the PluralSelector for mapping the number (minus offset) to a keyword.
|
||||
* @param number a number to be matched to one of the PluralFormat argument's explicit values,
|
||||
* or mapped via the PluralSelector.
|
||||
* @return the sub-message start part index.
|
||||
*/
|
||||
/*package*/ static int findSubMessage(
|
||||
MessagePattern pattern, int partIndex,
|
||||
PluralSelector selector, double number) {
|
||||
int count=pattern.countParts();
|
||||
double offset;
|
||||
MessagePattern.Part part=pattern.getPart(partIndex);
|
||||
if(part.getType().hasNumericValue()) {
|
||||
offset=pattern.getNumericValue(part);
|
||||
++partIndex;
|
||||
} else {
|
||||
offset=0;
|
||||
}
|
||||
// The keyword is null until we need to match against non-explicit, not-"other" value.
|
||||
// Then we get the keyword from the selector.
|
||||
// (In other words, we never call the selector if we match against an explicit value,
|
||||
// or if the only non-explicit keyword is "other".)
|
||||
String keyword=null;
|
||||
// When we find a match, we set msgStart>0 and also set this boolean to true
|
||||
// to avoid matching the keyword again (duplicates are allowed)
|
||||
// while we continue to look for an explicit-value match.
|
||||
boolean haveKeywordMatch=false;
|
||||
// msgStart is 0 until we find any appropriate sub-message.
|
||||
// We remember the first "other" sub-message if we have not seen any
|
||||
// appropriate sub-message before.
|
||||
// We remember the first matching-keyword sub-message if we have not seen
|
||||
// one of those before.
|
||||
// (The parser allows [does not check for] duplicate keywords.
|
||||
// We just have to make sure to take the first one.)
|
||||
// We avoid matching the keyword twice by also setting haveKeywordMatch=true
|
||||
// at the first keyword match.
|
||||
// We keep going until we find an explicit-value match or reach the end of the plural style.
|
||||
int msgStart=0;
|
||||
// Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples
|
||||
// until ARG_LIMIT or end of plural-only pattern.
|
||||
do {
|
||||
part=pattern.getPart(partIndex++);
|
||||
MessagePattern.Part.Type type=part.getType();
|
||||
if(type==MessagePattern.Part.Type.ARG_LIMIT) {
|
||||
break;
|
||||
}
|
||||
assert type==MessagePattern.Part.Type.ARG_SELECTOR;
|
||||
// part is an ARG_SELECTOR followed by an optional explicit value, and then a message
|
||||
if(pattern.getPartType(partIndex).hasNumericValue()) {
|
||||
// explicit value like "=2"
|
||||
part=pattern.getPart(partIndex++);
|
||||
if(number==pattern.getNumericValue(part)) {
|
||||
// matches explicit value
|
||||
return partIndex;
|
||||
}
|
||||
} else if(!haveKeywordMatch) {
|
||||
// plural keyword like "few" or "other"
|
||||
// Compare "other" first and call the selector if this is not "other".
|
||||
if(pattern.partSubstringMatches(part, "other")) {
|
||||
if(msgStart==0) {
|
||||
msgStart=partIndex;
|
||||
if(keyword!=null && keyword.equals("other")) {
|
||||
// This is the first "other" sub-message,
|
||||
// and the selected keyword is also "other".
|
||||
// Do not match "other" again.
|
||||
haveKeywordMatch=true;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if(keyword==null) {
|
||||
keyword=selector.select(number-offset);
|
||||
if(msgStart!=0 && keyword.equals("other")) {
|
||||
// We have already seen an "other" sub-message.
|
||||
// Do not match "other" again.
|
||||
haveKeywordMatch=true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if(pattern.partSubstringMatches(part, keyword)) {
|
||||
// keyword matches
|
||||
msgStart=partIndex;
|
||||
// Do not match this keyword again.
|
||||
haveKeywordMatch=true;
|
||||
}
|
||||
}
|
||||
}
|
||||
partIndex=pattern.getLimitPartIndex(partIndex);
|
||||
} while(++partIndex<count);
|
||||
return msgStart;
|
||||
}
|
||||
|
||||
/**
|
||||
* Interface for selecting PluralFormat keywords for numbers.
|
||||
* The PluralRules class was intended to implement this interface,
|
||||
* but there is no public API that uses a PluralSelector,
|
||||
* only MessageFormat and PluralFormat have PluralSelector implementations.
|
||||
* Therefore, PluralRules is not marked to implement this non-public interface,
|
||||
* to avoid confusing users.
|
||||
* @internal
|
||||
*/
|
||||
/*package*/ interface PluralSelector {
|
||||
/**
|
||||
* Given a number, returns the appropriate PluralFormat keyword.
|
||||
*
|
||||
* @param number The number to be plural-formatted.
|
||||
* @return The selected PluralFormat keyword.
|
||||
*/
|
||||
public String select(double number);
|
||||
}
|
||||
|
||||
// See PluralSelector:
|
||||
// We could avoid this adapter class if we made PluralSelector public
|
||||
// (or at least publicly visible) and had PluralRules implement PluralSelector.
|
||||
private final class PluralSelectorAdapter implements PluralSelector {
|
||||
public String select(double number) {
|
||||
return pluralRules.select(number);
|
||||
}
|
||||
}
|
||||
transient private PluralSelectorAdapter pluralRulesWrapper = new PluralSelectorAdapter();
|
||||
|
||||
/**
|
||||
* Formats a plural message for a given number.
|
||||
*
|
||||
@ -430,20 +479,50 @@ public class PluralFormat extends UFormat {
|
||||
*/
|
||||
public final String format(double number) {
|
||||
// If no pattern was applied, return the formatted number.
|
||||
if (parsedValues == null) {
|
||||
if (msgPattern == null || msgPattern.countParts() == 0) {
|
||||
return numberFormat.format(number);
|
||||
}
|
||||
|
||||
// Get appropriate format pattern.
|
||||
String selectedRule = pluralRules.select(number);
|
||||
String selectedPattern = parsedValues.get(selectedRule);
|
||||
if (selectedPattern == null) { // Fallback to others.
|
||||
selectedPattern = parsedValues.get(PluralRules.KEYWORD_OTHER);
|
||||
// Get the appropriate sub-message.
|
||||
int partIndex = findSubMessage(msgPattern, 0, pluralRulesWrapper, number);
|
||||
// Replace syntactic # signs in the top level of this sub-message
|
||||
// (not in nested arguments) with the formatted number-offset.
|
||||
number -= offset;
|
||||
StringBuilder result = null;
|
||||
int prevIndex = msgPattern.getPart(partIndex).getLimit();
|
||||
for (;;) {
|
||||
MessagePattern.Part part = msgPattern.getPart(++partIndex);
|
||||
MessagePattern.Part.Type type = part.getType();
|
||||
int index = part.getIndex();
|
||||
if (type == MessagePattern.Part.Type.MSG_LIMIT) {
|
||||
if (result == null) {
|
||||
return pattern.substring(prevIndex, index);
|
||||
} else {
|
||||
return result.append(pattern, prevIndex, index).toString();
|
||||
}
|
||||
} else if (type == MessagePattern.Part.Type.REPLACE_NUMBER ||
|
||||
// JDK compatibility mode: Remove SKIP_SYNTAX.
|
||||
(type == MessagePattern.Part.Type.SKIP_SYNTAX && msgPattern.jdkAposMode())) {
|
||||
if (result == null) {
|
||||
result = new StringBuilder();
|
||||
}
|
||||
result.append(pattern, prevIndex, index);
|
||||
if (type == MessagePattern.Part.Type.REPLACE_NUMBER) {
|
||||
result.append(numberFormat.format(number));
|
||||
}
|
||||
prevIndex = part.getLimit();
|
||||
} else if (type == MessagePattern.Part.Type.ARG_START) {
|
||||
if (result == null) {
|
||||
result = new StringBuilder();
|
||||
}
|
||||
result.append(pattern, prevIndex, index);
|
||||
prevIndex = index;
|
||||
partIndex = msgPattern.getLimitPartIndex(partIndex);
|
||||
index = msgPattern.getPart(partIndex).getLimit();
|
||||
MessagePattern.appendReducedApostrophes(pattern, prevIndex, index, result);
|
||||
prevIndex = index;
|
||||
}
|
||||
}
|
||||
// Get formatted number and insert it into String.
|
||||
// Will replace all '#' which are not inside curly braces by the
|
||||
// formatted number.
|
||||
return insertFormattedNumber(number, selectedPattern);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -469,8 +548,7 @@ public class PluralFormat extends UFormat {
|
||||
toAppendTo.append(format(((Number) number).doubleValue()));
|
||||
return toAppendTo;
|
||||
}
|
||||
throw new IllegalArgumentException("'" + number +
|
||||
"' is not a Number");
|
||||
throw new IllegalArgumentException("'" + number + "' is not a Number");
|
||||
}
|
||||
|
||||
/**
|
||||
@ -531,83 +609,24 @@ public class PluralFormat extends UFormat {
|
||||
numberFormat = format;
|
||||
}
|
||||
|
||||
/*
|
||||
* Checks if the applied pattern provided enough information,
|
||||
* i.e., if the attribute <code>parsedValues</code> stores enough
|
||||
* information for plural formatting.
|
||||
* Will be called at the end of pattern parsing.
|
||||
* @throws IllegalArgumentException if there's not sufficient information
|
||||
* provided.
|
||||
*/
|
||||
private void checkSufficientDefinition() {
|
||||
// Check that at least the default rule is defined.
|
||||
if (parsedValues.get(PluralRules.KEYWORD_OTHER) == null) {
|
||||
parsingFailure("Malformed formatting expression.\n"
|
||||
+ "Value for case \"" + PluralRules.KEYWORD_OTHER
|
||||
+ "\" was not defined.");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper method that resets the <code>PluralFormat</code> object and throws
|
||||
* an <code>IllegalArgumentException</code> with a given error text.
|
||||
* @param errorText the error text of the exception message.
|
||||
* @throws IllegalArgumentException will always be thrown by this method.
|
||||
*/
|
||||
private void parsingFailure(String errorText) {
|
||||
// Set PluralFormat to a valid state.
|
||||
init(null, ULocale.getDefault());
|
||||
throw new IllegalArgumentException(errorText);
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper method that is called during formatting.
|
||||
* It replaces the character '#' by the number used for plural selection in
|
||||
* a message text. Only '#' are replaced, that are not written inside curly
|
||||
* braces. This allows the use of nested number formats.
|
||||
* The number will be formatted using the attribute
|
||||
* <code>numberformat</code>.
|
||||
* @param number the number used for plural selection.
|
||||
* @param message is the text in which '#' will be replaced.
|
||||
* @return the text with inserted numbers.
|
||||
*/
|
||||
private String insertFormattedNumber(double number, String message) {
|
||||
if (message == null) {
|
||||
return "";
|
||||
}
|
||||
String formattedNumber = numberFormat.format(number);
|
||||
StringBuilder result = new StringBuilder();
|
||||
int braceStack = 0;
|
||||
int startIndex = 0;
|
||||
for (int i = 0; i < message.length(); ++i) {
|
||||
switch (message.charAt(i)) {
|
||||
case '{':
|
||||
++braceStack;
|
||||
break;
|
||||
case '}':
|
||||
--braceStack;
|
||||
break;
|
||||
case '#':
|
||||
if (braceStack == 0) {
|
||||
result.append(message.substring(startIndex,i));
|
||||
startIndex = i + 1;
|
||||
result.append(formattedNumber);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (startIndex < message.length()) {
|
||||
result.append(message.substring(startIndex, message.length()));
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
* @stable ICU 3.8
|
||||
*/
|
||||
@Override
|
||||
public boolean equals(Object rhs) {
|
||||
return rhs instanceof PluralFormat && equals((PluralFormat) rhs);
|
||||
if(this == rhs) {
|
||||
return true;
|
||||
}
|
||||
if(rhs == null || getClass() != rhs.getClass()) {
|
||||
return false;
|
||||
}
|
||||
PluralFormat pf = (PluralFormat)rhs;
|
||||
return
|
||||
Utility.objectEquals(ulocale, pf.ulocale) &&
|
||||
Utility.objectEquals(pluralRules, pf.pluralRules) &&
|
||||
Utility.objectEquals(msgPattern, pf.msgPattern) &&
|
||||
Utility.objectEquals(numberFormat, pf.numberFormat);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -617,31 +636,40 @@ public class PluralFormat extends UFormat {
|
||||
* @stable ICU 3.8
|
||||
*/
|
||||
public boolean equals(PluralFormat rhs) {
|
||||
return pluralRules.equals(rhs.pluralRules) &&
|
||||
parsedValues.equals(rhs.parsedValues) &&
|
||||
numberFormat.equals(rhs.numberFormat);
|
||||
return equals((Object)rhs);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
* @stable ICU 3.8
|
||||
*/
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return pluralRules.hashCode() ^ parsedValues.hashCode();
|
||||
}
|
||||
|
||||
/**
|
||||
* For debugging purposes only
|
||||
* @return a text representation of the format data.
|
||||
* {@inheritDoc}
|
||||
* @stable ICU 3.8
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder buf = new StringBuilder();
|
||||
buf.append("locale=" + ulocale);
|
||||
buf.append(", rules='" + pluralRules + "'");
|
||||
buf.append(", pattern='" + pattern + "'");
|
||||
buf.append(", parsedValues='" + parsedValues + "'");
|
||||
buf.append(", format='" + numberFormat + "'");
|
||||
return buf.toString();
|
||||
}
|
||||
|
||||
private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
|
||||
in.defaultReadObject();
|
||||
pluralRulesWrapper = new PluralSelectorAdapter();
|
||||
// Ignore the parsedValues from an earlier class version (before ICU 4.8)
|
||||
// and rebuild the msgPattern.
|
||||
parsedValues = null;
|
||||
if (pattern != null) {
|
||||
applyPattern(pattern);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -20,6 +20,7 @@ import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import com.ibm.icu.impl.PatternProps;
|
||||
import com.ibm.icu.impl.PluralRulesLoader;
|
||||
import com.ibm.icu.impl.Utility;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
@ -61,7 +62,7 @@ import com.ibm.icu.util.ULocale;
|
||||
* Syntax:<pre>
|
||||
* rules = rule (';' rule)*
|
||||
* rule = keyword ':' condition
|
||||
* keyword = <identifier>
|
||||
* keyword = <identifier>
|
||||
* condition = and_condition ('or' and_condition)*
|
||||
* and_condition = relation ('and' relation)*
|
||||
* relation = is_relation | in_relation | within_relation | 'n' <EOL>
|
||||
@ -75,6 +76,9 @@ import com.ibm.icu.util.ULocale;
|
||||
* range = value'..'value
|
||||
* </pre></p>
|
||||
* <p>
|
||||
* An "identifier" is a sequence of characters that do not have the
|
||||
* Unicode Pattern_Syntax or Pattern_White_Space properties.
|
||||
* <p>
|
||||
* The difference between 'in' and 'within' is that 'in' only includes
|
||||
* integers in the specified range, while 'within' includes all values.
|
||||
* Using 'within' with a range_list consisting entirely of values
|
||||
@ -139,19 +143,6 @@ public class PluralRules implements Serializable {
|
||||
*/
|
||||
public static final double NO_UNIQUE_VALUE = -0.00123456777;
|
||||
|
||||
/*
|
||||
* The set of all characters a valid keyword can start with.
|
||||
*/
|
||||
private static final UnicodeSet START_CHARS =
|
||||
new UnicodeSet("[[:ID_Start:][_]]");
|
||||
|
||||
/*
|
||||
* The set of all characters a valid keyword can contain after
|
||||
* the first character.
|
||||
*/
|
||||
private static final UnicodeSet CONT_CHARS =
|
||||
new UnicodeSet("[:ID_Continue:]");
|
||||
|
||||
/*
|
||||
* The default constraint that is always satisfied.
|
||||
*/
|
||||
@ -827,17 +818,9 @@ public class PluralRules implements Serializable {
|
||||
* @param token the token to be checked
|
||||
* @return true if the token is a valid keyword.
|
||||
*/
|
||||
private static boolean isValidKeyword(String token) {
|
||||
if (token.length() > 0 && START_CHARS.contains(token.charAt(0))) {
|
||||
for (int i = 1; i < token.length(); ++i) {
|
||||
if (!CONT_CHARS.contains(token.charAt(i))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
private static boolean isValidKeyword(String token) {
|
||||
return PatternProps.isIdentifier(token);
|
||||
}
|
||||
|
||||
/*
|
||||
* Creates a new <code>PluralRules</code> object. Immutable.
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2004-2010, International Business Machines Corporation and *
|
||||
* Copyright (C) 2004-2011, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
* Copyright (C) 2009 , Yahoo! Inc. *
|
||||
*******************************************************************************
|
||||
@ -12,8 +12,8 @@ import java.io.ObjectInputStream;
|
||||
import java.text.FieldPosition;
|
||||
import java.text.Format;
|
||||
import java.text.ParsePosition;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import com.ibm.icu.impl.PatternProps;
|
||||
|
||||
/**
|
||||
* <p><code>SelectFormat</code> supports the creation of internationalized
|
||||
@ -25,6 +25,10 @@ import java.util.Map;
|
||||
*
|
||||
* <h4>Using <code>SelectFormat</code> for Gender Agreement</h4>
|
||||
*
|
||||
* <p>Note: Typically, select formatting is done via <code>MessageFormat</code>
|
||||
* with a <code>select</code> argument type,
|
||||
* rather than using a stand-alone <code>SelectFormat</code>.</p>
|
||||
*
|
||||
* <p>The main use case for the select format is gender based inflection.
|
||||
* When names or nouns are inserted into sentences, their gender can affect pronouns,
|
||||
* verb forms, articles, and adjectives. Special care needs to be
|
||||
@ -58,6 +62,9 @@ import java.util.Map;
|
||||
* but similar in grammatical use.
|
||||
* Some African languages have around 20 noun classes.</p>
|
||||
*
|
||||
* <p><b>Note:</b>For the gender of a <i>person</i> in a given sentence,
|
||||
* we usually need to distinguish only between female, male and other/unknown.</p>
|
||||
*
|
||||
* <p>To enable localizers to create sentence patterns that take their
|
||||
* language's gender dependencies into consideration, software has to provide
|
||||
* information about the gender associated with a noun or name to
|
||||
@ -66,8 +73,8 @@ import java.util.Map;
|
||||
*
|
||||
* <ul>
|
||||
* <li>For people, natural gender information should be maintained for each person.
|
||||
* The keywords "male", "female", "mixed" (for groups of people)
|
||||
* and "unknown" are used.
|
||||
* Keywords like "male", "female", "mixed" (for groups of people)
|
||||
* and "unknown" could be used.
|
||||
*
|
||||
* <li>For nouns, grammatical gender information should be maintained for
|
||||
* each noun and per language, e.g., in resource bundles.
|
||||
@ -85,6 +92,11 @@ import java.util.Map;
|
||||
*
|
||||
* <pre>{0} went to {2}.</pre>
|
||||
*
|
||||
* <p><b>Note:</b> The entire sentence should be included (and partially repeated)
|
||||
* inside each phrase. Otherwise translators would have to be trained on how to
|
||||
* move bits of the sentence in and out of the select argument of a message.
|
||||
* (The examples below do not follow this recommendation!)</p>
|
||||
*
|
||||
* <p>The sentence pattern for French, where the gender of the person affects
|
||||
* the form of the participle, uses a select format based on argument 1:</p>
|
||||
*
|
||||
@ -104,39 +116,24 @@ import java.util.Map;
|
||||
*
|
||||
* <h4>Patterns and Their Interpretation</h4>
|
||||
*
|
||||
* <p>The <code>SelectFormat</code> pattern text defines the phrase output
|
||||
* <p>The <code>SelectFormat</code> pattern string defines the phrase output
|
||||
* for each user-defined keyword.
|
||||
* The pattern is a sequence of <code><i>keyword</i>{<i>phrase</i>}</code>
|
||||
* clauses, separated by white space characters.
|
||||
* Each clause assigns the phrase <code><i>phrase</i></code>
|
||||
* to the user-defined <code><i>keyword</i></code>.</p>
|
||||
* The pattern is a sequence of (keyword, message) pairs.
|
||||
* A keyword is a "pattern identifier": [^[[:Pattern_Syntax:][:Pattern_White_Space:]]]+</p>
|
||||
*
|
||||
* <p>Keywords must match the pattern [a-zA-Z][a-zA-Z0-9_-]*; keywords
|
||||
* that don't match this pattern result in the error code
|
||||
* <code>U_ILLEGAL_CHARACTER</code>.
|
||||
* You always have to define a phrase for the default keyword
|
||||
* <p>Each message is a MessageFormat pattern string enclosed in {curly braces}.</p>
|
||||
*
|
||||
* <p>You always have to define a phrase for the default keyword
|
||||
* <code>other</code>; this phrase is returned when the keyword
|
||||
* provided to
|
||||
* the <code>format</code> method matches no other keyword.
|
||||
* If a pattern does not provide a phrase for <code>other</code>, the method
|
||||
* it's provided to returns the error <code>U_DEFAULT_KEYWORD_MISSING</code>.
|
||||
* If a pattern provides more than one phrase for the same keyword, the
|
||||
* error <code>U_DUPLICATE_KEYWORD</code> is returned.
|
||||
* <br/>
|
||||
* Spaces between <code><i>keyword</i></code> and
|
||||
* <code>{<i>phrase</i>}</code> will be ignored; spaces within
|
||||
* <code>{<i>phrase</i>}</code> will be preserved.</p>
|
||||
* Pattern_White_Space between keywords and messages is ignored.
|
||||
* Pattern_White_Space within a message is preserved and output.</p>
|
||||
*
|
||||
* <p>The phrase for a particular select case may contain other message
|
||||
* format patterns. <code>SelectFormat</code> preserves these so that you
|
||||
* can use the strings produced by <code>SelectFormat</code> with other
|
||||
* formatters. If you are using <code>SelectFormat</code> inside a
|
||||
* <code>MessageFormat</code> pattern, <code>MessageFormat</code> will
|
||||
* automatically evaluate the resulting format pattern.
|
||||
* Thus, curly braces (<code>{</code>, <code>}</code>) are <i>only</i> allowed
|
||||
* in phrases to define a nested format pattern.</p>
|
||||
*
|
||||
* <pre>Example:
|
||||
* <p><pre>Example:
|
||||
* MessageFormat msgFmt = new MessageFormat("{0} est " +
|
||||
* "{1, select, female {all&#u00E9;e} other {all&#u00E9;}} &#u00E0; Paris.",
|
||||
* new ULocale("fr"));
|
||||
@ -160,106 +157,27 @@ public class SelectFormat extends Format{
|
||||
*/
|
||||
private String pattern = null;
|
||||
|
||||
/*
|
||||
* The format messages for each select case. It is a mapping:
|
||||
* <code>String</code>(select case keyword) --> <code>String</code>
|
||||
* (message for this select case).
|
||||
*/
|
||||
transient private Map<String, String> parsedValues = null;
|
||||
|
||||
/**
|
||||
* Common name for the default select form. This name is returned
|
||||
* for values to which no other form in the rule applies. It
|
||||
* can additionally be assigned rules of its own.
|
||||
* @stable ICU 4.4
|
||||
* The MessagePattern which contains the parsed structure of the pattern string.
|
||||
*/
|
||||
private static final String KEYWORD_OTHER = "other";
|
||||
|
||||
/*
|
||||
* The types of character classifications
|
||||
*/
|
||||
private enum CharacterClass {
|
||||
T_START_KEYWORD, T_CONTINUE_KEYWORD, T_LEFT_BRACE,
|
||||
T_RIGHT_BRACE, T_SPACE, T_OTHER
|
||||
};
|
||||
|
||||
/*
|
||||
* The different states needed in state machine
|
||||
* in applyPattern method.
|
||||
*/
|
||||
private enum State {
|
||||
START_STATE, KEYWORD_STATE,
|
||||
PAST_KEYWORD_STATE, PHRASE_STATE
|
||||
};
|
||||
|
||||
transient private MessagePattern msgPattern;
|
||||
|
||||
/**
|
||||
* Creates a new <code>SelectFormat</code> for a given pattern string.
|
||||
* @param pattern the pattern for this <code>SelectFormat</code>.
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
public SelectFormat(String pattern) {
|
||||
init();
|
||||
applyPattern(pattern);
|
||||
}
|
||||
|
||||
/*
|
||||
* Initializes the <code>SelectFormat</code> object.
|
||||
* Postcondition:<br/>
|
||||
* <code>parsedValues</code>: is <code>null</code><br/>
|
||||
* <code>pattern</code>: is <code>null</code><br/>
|
||||
* Resets the <code>SelectFormat</code> object.
|
||||
*/
|
||||
private void init() {
|
||||
parsedValues = null;
|
||||
private void reset() {
|
||||
pattern = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Classifies the characters
|
||||
*/
|
||||
private boolean checkValidKeyword(String argKeyword) {
|
||||
int len = argKeyword.length();
|
||||
if (len < 1) {
|
||||
return false;
|
||||
};
|
||||
if (classifyCharacter(argKeyword.charAt(0)) != CharacterClass.T_START_KEYWORD) {
|
||||
return false;
|
||||
};
|
||||
for (int i = 1; i < len; i++) {
|
||||
CharacterClass type = classifyCharacter(argKeyword.charAt(i));
|
||||
if (type != CharacterClass.T_START_KEYWORD &&
|
||||
type != CharacterClass.T_CONTINUE_KEYWORD) {
|
||||
return false;
|
||||
};
|
||||
};
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Classifies the characters.
|
||||
*/
|
||||
private CharacterClass classifyCharacter(char ch) {
|
||||
if ((ch >= 'A') && (ch <= 'Z')) {
|
||||
return CharacterClass.T_START_KEYWORD;
|
||||
}
|
||||
if ((ch >= 'a') && (ch <= 'z')) {
|
||||
return CharacterClass.T_START_KEYWORD;
|
||||
}
|
||||
if ((ch >= '0') && (ch <= '9')) {
|
||||
return CharacterClass.T_CONTINUE_KEYWORD;
|
||||
}
|
||||
switch (ch) {
|
||||
case '{':
|
||||
return CharacterClass.T_LEFT_BRACE;
|
||||
case '}':
|
||||
return CharacterClass.T_RIGHT_BRACE;
|
||||
case ' ':
|
||||
case '\t':
|
||||
return CharacterClass.T_SPACE;
|
||||
case '-':
|
||||
case '_':
|
||||
return CharacterClass.T_CONTINUE_KEYWORD;
|
||||
default :
|
||||
return CharacterClass.T_OTHER;
|
||||
if(msgPattern != null) {
|
||||
msgPattern.clear();
|
||||
}
|
||||
}
|
||||
|
||||
@ -272,129 +190,16 @@ public class SelectFormat extends Format{
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
public void applyPattern(String pattern) {
|
||||
parsedValues = null;
|
||||
this.pattern = pattern;
|
||||
|
||||
//Initialization
|
||||
StringBuilder keyword = new StringBuilder();
|
||||
StringBuilder phrase = new StringBuilder();
|
||||
int braceCount = 0;
|
||||
|
||||
parsedValues = new HashMap<String, String>();
|
||||
|
||||
//Process the state machine
|
||||
State state = State.START_STATE;
|
||||
for (int i = 0; i < pattern.length(); i++ ){
|
||||
//Get the character and check its type
|
||||
char ch = pattern.charAt(i);
|
||||
CharacterClass type = classifyCharacter(ch);
|
||||
|
||||
//Process the state machine
|
||||
switch (state) {
|
||||
//At the start of pattern
|
||||
case START_STATE:
|
||||
switch (type) {
|
||||
case T_SPACE:
|
||||
break ;
|
||||
case T_START_KEYWORD:
|
||||
state = State.KEYWORD_STATE;
|
||||
keyword.append(ch);
|
||||
break ;
|
||||
//If anything else is encountered, it's a syntax error
|
||||
default :
|
||||
parsingFailure("Pattern syntax error.");
|
||||
}//end of switch(type)
|
||||
break ;
|
||||
|
||||
//Handle the keyword state
|
||||
case KEYWORD_STATE:
|
||||
switch (type) {
|
||||
case T_SPACE:
|
||||
state = State.PAST_KEYWORD_STATE;
|
||||
break ;
|
||||
case T_START_KEYWORD:
|
||||
case T_CONTINUE_KEYWORD:
|
||||
keyword.append(ch);
|
||||
break ;
|
||||
case T_LEFT_BRACE:
|
||||
state = State.PHRASE_STATE;
|
||||
break ;
|
||||
//If anything else is encountered, it's a syntax error
|
||||
default :
|
||||
parsingFailure("Pattern syntax error.");
|
||||
}//end of switch(type)
|
||||
break ;
|
||||
|
||||
//Handle the pastkeyword state
|
||||
case PAST_KEYWORD_STATE:
|
||||
switch (type) {
|
||||
case T_SPACE:
|
||||
break ;
|
||||
case T_LEFT_BRACE:
|
||||
state = State.PHRASE_STATE;
|
||||
break ;
|
||||
//If anything else is encountered, it's a syntax error
|
||||
default :
|
||||
parsingFailure("Pattern syntax error.");
|
||||
}//end of switch(type)
|
||||
break ;
|
||||
|
||||
//Handle the phrase state
|
||||
case PHRASE_STATE:
|
||||
switch (type) {
|
||||
case T_LEFT_BRACE:
|
||||
braceCount++;
|
||||
phrase.append(ch);
|
||||
break ;
|
||||
case T_RIGHT_BRACE:
|
||||
//Matching keyword, phrase pair found
|
||||
if (braceCount == 0){
|
||||
//Check validity of keyword
|
||||
if (parsedValues.get(keyword.toString()) != null) {
|
||||
parsingFailure("Duplicate keyword error.");
|
||||
}
|
||||
if (keyword.length() == 0) {
|
||||
parsingFailure("Pattern syntax error.");
|
||||
}
|
||||
|
||||
//Store the keyword, phrase pair in hashTable
|
||||
parsedValues.put( keyword.toString(), phrase.toString());
|
||||
|
||||
//Reinitialize
|
||||
keyword.setLength(0);
|
||||
phrase.setLength(0);
|
||||
state = State.START_STATE;
|
||||
}
|
||||
|
||||
if (braceCount > 0){
|
||||
braceCount-- ;
|
||||
phrase.append(ch);
|
||||
}
|
||||
break ;
|
||||
default :
|
||||
phrase.append(ch);
|
||||
}//end of switch(type)
|
||||
break ;
|
||||
|
||||
//Handle the default case of switch(state)
|
||||
default :
|
||||
parsingFailure("Pattern syntax error.");
|
||||
|
||||
}//end of switch(state)
|
||||
if (msgPattern == null) {
|
||||
msgPattern = new MessagePattern();
|
||||
}
|
||||
|
||||
//Check if the state machine is back to START_STATE
|
||||
if ( state != State.START_STATE){
|
||||
parsingFailure("Pattern syntax error.");
|
||||
try {
|
||||
msgPattern.parseSelectStyle(pattern);
|
||||
} catch(RuntimeException e) {
|
||||
reset();
|
||||
throw e;
|
||||
}
|
||||
|
||||
//Check if "other" keyword is present
|
||||
if ( !checkSufficientDefinition() ) {
|
||||
parsingFailure("Pattern syntax error. "
|
||||
+ "Value for case \"" + KEYWORD_OTHER
|
||||
+ "\" was not defined. ");
|
||||
}
|
||||
return ;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -407,41 +212,103 @@ public class SelectFormat extends Format{
|
||||
return pattern;
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the SelectFormat sub-message for the given keyword, or the "other" sub-message.
|
||||
* @param pattern A MessagePattern.
|
||||
* @param partIndex the index of the first SelectFormat argument style part.
|
||||
* @param keyword a keyword to be matched to one of the SelectFormat argument's keywords.
|
||||
* @return the sub-message start part index.
|
||||
*/
|
||||
/*package*/ static int findSubMessage(MessagePattern pattern, int partIndex, String keyword) {
|
||||
int count=pattern.countParts();
|
||||
int msgStart=0;
|
||||
// Iterate over (ARG_SELECTOR, message) pairs until ARG_LIMIT or end of select-only pattern.
|
||||
do {
|
||||
MessagePattern.Part part=pattern.getPart(partIndex++);
|
||||
MessagePattern.Part.Type type=part.getType();
|
||||
if(type==MessagePattern.Part.Type.ARG_LIMIT) {
|
||||
break;
|
||||
}
|
||||
assert type==MessagePattern.Part.Type.ARG_SELECTOR;
|
||||
// part is an ARG_SELECTOR followed by a message
|
||||
if(pattern.partSubstringMatches(part, keyword)) {
|
||||
// keyword matches
|
||||
return partIndex;
|
||||
} else if(msgStart==0 && pattern.partSubstringMatches(part, "other")) {
|
||||
msgStart=partIndex;
|
||||
}
|
||||
partIndex=pattern.getLimitPartIndex(partIndex);
|
||||
} while(++partIndex<count);
|
||||
return msgStart;
|
||||
}
|
||||
|
||||
/**
|
||||
* Selects the phrase for the given keyword.
|
||||
*
|
||||
* @param keyword a keyword for which the select message should be formatted.
|
||||
* @param keyword a phrase selection keyword.
|
||||
* @return the string containing the formatted select message.
|
||||
* @throws IllegalArgumentException when the given keyword is not available in the select format pattern
|
||||
* @throws IllegalArgumentException when the given keyword is not a "pattern identifier"
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
public final String format(String keyword) {
|
||||
//Check for the validity of the keyword
|
||||
if( !checkValidKeyword(keyword) ){
|
||||
if (!PatternProps.isIdentifier(keyword)) {
|
||||
throw new IllegalArgumentException("Invalid formatting argument.");
|
||||
}
|
||||
|
||||
// If no pattern was applied, throw an exception
|
||||
if (parsedValues == null) {
|
||||
if (msgPattern == null || msgPattern.countParts() == 0) {
|
||||
throw new IllegalStateException("Invalid format error.");
|
||||
}
|
||||
|
||||
// Get appropriate format pattern.
|
||||
String selectedPattern = parsedValues.get(keyword);
|
||||
if (selectedPattern == null) { // Fallback to others.
|
||||
selectedPattern = parsedValues.get(KEYWORD_OTHER);
|
||||
// Get the appropriate sub-message.
|
||||
int msgStart = findSubMessage(msgPattern, 0, keyword);
|
||||
if (!msgPattern.jdkAposMode()) {
|
||||
int msgLimit = msgPattern.getLimitPartIndex(msgStart);
|
||||
return msgPattern.getPatternString().substring(msgPattern.getPart(msgStart).getLimit(),
|
||||
msgPattern.getPatternIndex(msgLimit));
|
||||
}
|
||||
// JDK compatibility mode: Remove SKIP_SYNTAX.
|
||||
StringBuilder result = null;
|
||||
int prevIndex = msgPattern.getPart(msgStart).getLimit();
|
||||
for (int i = msgStart;;) {
|
||||
MessagePattern.Part part = msgPattern.getPart(++i);
|
||||
MessagePattern.Part.Type type = part.getType();
|
||||
int index = part.getIndex();
|
||||
if (type == MessagePattern.Part.Type.MSG_LIMIT) {
|
||||
if (result == null) {
|
||||
return pattern.substring(prevIndex, index);
|
||||
} else {
|
||||
return result.append(pattern, prevIndex, index).toString();
|
||||
}
|
||||
} else if (type == MessagePattern.Part.Type.SKIP_SYNTAX) {
|
||||
if (result == null) {
|
||||
result = new StringBuilder();
|
||||
}
|
||||
result.append(pattern, prevIndex, index);
|
||||
prevIndex = part.getLimit();
|
||||
} else if (type == MessagePattern.Part.Type.ARG_START) {
|
||||
if (result == null) {
|
||||
result = new StringBuilder();
|
||||
}
|
||||
result.append(pattern, prevIndex, index);
|
||||
prevIndex = index;
|
||||
i = msgPattern.getLimitPartIndex(i);
|
||||
index = msgPattern.getPart(i).getLimit();
|
||||
MessagePattern.appendReducedApostrophes(pattern, prevIndex, index, result);
|
||||
prevIndex = index;
|
||||
}
|
||||
}
|
||||
return selectedPattern;
|
||||
}
|
||||
|
||||
/**
|
||||
* Selects the phrase for the given keyword.
|
||||
* and appends the formatted message to the given <code>StringBuffer</code>.
|
||||
* @param keyword a keyword for which the select message should be formatted.
|
||||
* @param toAppendTo the formatted message will be appended to this
|
||||
* @param keyword a phrase selection keyword.
|
||||
* @param toAppendTo the selected phrase will be appended to this
|
||||
* <code>StringBuffer</code>.
|
||||
* @param pos will be ignored by this method.
|
||||
* @throws IllegalArgumentException when the given keyword is not available in the select format pattern
|
||||
* @throws IllegalArgumentException when the given keyword is not a String
|
||||
* or not a "pattern identifier"
|
||||
* @return the string buffer passed in as toAppendTo, with formatted text
|
||||
* appended.
|
||||
* @stable ICU 4.4
|
||||
@ -470,45 +337,27 @@ public class SelectFormat extends Format{
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
/*
|
||||
* Checks if the applied pattern provided enough information,
|
||||
* i.e., if the attribute <code>parsedValues</code> stores enough
|
||||
* information for select formatting.
|
||||
* Will be called at the end of pattern parsing.
|
||||
*/
|
||||
private boolean checkSufficientDefinition() {
|
||||
// Check that at least the default rule is defined.
|
||||
return parsedValues.get(KEYWORD_OTHER) != null;
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper method that resets the <code>SelectFormat</code> object and throws
|
||||
* an <code>IllegalArgumentException</code> with a given error text.
|
||||
* @param errorText the error text of the exception message.
|
||||
* @throws IllegalArgumentException will always be thrown by this method.
|
||||
*/
|
||||
private void parsingFailure(String errorText) {
|
||||
// Set SelectFormat to a valid state.
|
||||
init();
|
||||
throw new IllegalArgumentException(errorText);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (!(obj instanceof SelectFormat)) {
|
||||
if(this == obj) {
|
||||
return true;
|
||||
}
|
||||
if(obj == null || getClass() != obj.getClass()) {
|
||||
return false;
|
||||
}
|
||||
SelectFormat sf = (SelectFormat) obj;
|
||||
return pattern == null ? sf.pattern == null : pattern.equals(sf.pattern);
|
||||
return msgPattern == null ? sf.msgPattern == null : msgPattern.equals(sf.msgPattern);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
@Override
|
||||
public int hashCode() {
|
||||
if (pattern != null) {
|
||||
return pattern.hashCode();
|
||||
@ -517,16 +366,12 @@ public class SelectFormat extends Format{
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a string representation of the object
|
||||
* @return a text representation of the format object.
|
||||
* The result string includes the class name and
|
||||
* the pattern string returned by <code>toPattern()</code>.
|
||||
* {@inheritDoc}
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder buf = new StringBuilder();
|
||||
buf.append("pattern='" + pattern + "'");
|
||||
return buf.toString();
|
||||
return "pattern='" + pattern + "'";
|
||||
}
|
||||
|
||||
private void readObject(ObjectInputStream in)
|
||||
|
@ -15,6 +15,7 @@ import java.util.TreeSet;
|
||||
|
||||
import com.ibm.icu.impl.BMPSet;
|
||||
import com.ibm.icu.impl.Norm2AllModes;
|
||||
import com.ibm.icu.impl.PatternProps;
|
||||
import com.ibm.icu.impl.RuleCharacterIterator;
|
||||
import com.ibm.icu.impl.SortedSetRelation;
|
||||
import com.ibm.icu.impl.UBiDiProps;
|
||||
@ -115,7 +116,7 @@ import com.ibm.icu.util.VersionInfo;
|
||||
* </blockquote>
|
||||
*
|
||||
* Any character may be preceded by a backslash in order to remove any special
|
||||
* meaning. White space characters, as defined by UCharacterProperty.isRuleWhiteSpace(), are
|
||||
* meaning. White space characters, as defined by the Unicode Pattern_White_Space property, are
|
||||
* ignored, unless they are escaped.
|
||||
*
|
||||
* <p>Property patterns specify a set of characters having a certain
|
||||
@ -424,8 +425,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
||||
* Constructs a set from the given pattern. See the class description
|
||||
* for the syntax of the pattern language.
|
||||
* @param pattern a string specifying what characters are in the set
|
||||
* @param ignoreWhitespace if true, ignore characters for which
|
||||
* UCharacterProperty.isRuleWhiteSpace() returns true
|
||||
* @param ignoreWhitespace if true, ignore Unicode Pattern_White_Space characters
|
||||
* @exception java.lang.IllegalArgumentException if the pattern contains
|
||||
* a syntax error.
|
||||
* @stable ICU 2.0
|
||||
@ -548,8 +548,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
||||
* optionally ignoring whitespace.
|
||||
* See the class description for the syntax of the pattern language.
|
||||
* @param pattern a string specifying what characters are in the set
|
||||
* @param ignoreWhitespace if true then characters for which
|
||||
* UCharacterProperty.isRuleWhiteSpace() returns true are ignored
|
||||
* @param ignoreWhitespace if true then Unicode Pattern_White_Space characters are ignored
|
||||
* @exception java.lang.IllegalArgumentException if the pattern
|
||||
* contains a syntax error.
|
||||
* @stable ICU 2.0
|
||||
@ -628,7 +627,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
||||
break;
|
||||
default:
|
||||
// Escape whitespace
|
||||
if (UCharacterProperty.isRuleWhiteSpace(c)) {
|
||||
if (PatternProps.isWhiteSpace(c)) {
|
||||
buf.append('\\');
|
||||
}
|
||||
break;
|
||||
@ -3189,30 +3188,27 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
||||
|
||||
|
||||
/**
|
||||
* Remove leading and trailing rule white space and compress
|
||||
* internal rule white space to a single space character.
|
||||
*
|
||||
* @see UCharacterProperty#isRuleWhiteSpace
|
||||
* Remove leading and trailing Pattern_White_Space and compress
|
||||
* internal Pattern_White_Space to a single space character.
|
||||
*/
|
||||
private static String mungeCharName(String source) {
|
||||
StringBuffer buf = new StringBuffer();
|
||||
for (int i=0; i<source.length(); ) {
|
||||
int ch = UTF16.charAt(source, i);
|
||||
i += UTF16.getCharCount(ch);
|
||||
if (UCharacterProperty.isRuleWhiteSpace(ch)) {
|
||||
if (buf.length() == 0 ||
|
||||
buf.charAt(buf.length() - 1) == ' ') {
|
||||
source = PatternProps.trimWhiteSpace(source);
|
||||
StringBuilder buf = null;
|
||||
for (int i=0; i<source.length(); ++i) {
|
||||
char ch = source.charAt(i);
|
||||
if (PatternProps.isWhiteSpace(ch)) {
|
||||
if (buf == null) {
|
||||
buf = new StringBuilder().append(source, 0, i);
|
||||
} else if (buf.charAt(buf.length() - 1) == ' ') {
|
||||
continue;
|
||||
}
|
||||
ch = ' '; // convert to ' '
|
||||
}
|
||||
UTF16.append(buf, ch);
|
||||
if (buf != null) {
|
||||
buf.append(ch);
|
||||
}
|
||||
}
|
||||
if (buf.length() != 0 &&
|
||||
buf.charAt(buf.length() - 1) == ' ') {
|
||||
buf.setLength(buf.length() - 1);
|
||||
}
|
||||
return buf.toString();
|
||||
return buf == null ? source : buf.toString();
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------
|
||||
@ -3603,8 +3599,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
||||
|
||||
/**
|
||||
* Bitmask for constructor and applyPattern() indicating that
|
||||
* white space should be ignored. If set, ignore characters for
|
||||
* which UCharacterProperty.isRuleWhiteSpace() returns true,
|
||||
* white space should be ignored. If set, ignore Unicode Pattern_White_Space characters,
|
||||
* unless they are quoted or escaped. This may be ORed together
|
||||
* with other selectors.
|
||||
* @stable ICU 3.8
|
||||
|
@ -44,6 +44,8 @@ import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
import com.ibm.icu.text.MessageFormat;
|
||||
import com.ibm.icu.text.NumberFormat;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
|
||||
public class MessageRegression extends com.ibm.icu.dev.test.TestFmwk {
|
||||
|
||||
@ -101,15 +103,19 @@ public class MessageRegression extends com.ibm.icu.dev.test.TestFmwk {
|
||||
|
||||
/* @bug 4058973
|
||||
* MessageFormat.toPattern has weird rounding behavior.
|
||||
*
|
||||
* ICU 4.8: This test is commented out because toPattern() has been changed to return
|
||||
* the original pattern string, rather than reconstituting a new (equivalent) one.
|
||||
* This trivially eliminates issues with rounding or any other pattern string differences.
|
||||
*/
|
||||
public void Test4058973() {
|
||||
/*public void Test4058973() {
|
||||
|
||||
MessageFormat fmt = new MessageFormat("{0,choice,0#no files|1#one file|1< {0,number,integer} files}");
|
||||
String pat = fmt.toPattern();
|
||||
if (!pat.equals("{0,choice,0.0#no files|1.0#one file|1.0< {0,number,integer} files}")) {
|
||||
errln("MessageFormat.toPattern failed");
|
||||
}
|
||||
}
|
||||
}*/
|
||||
/* @bug 4031438
|
||||
* More robust message formats.
|
||||
*/
|
||||
@ -143,11 +149,11 @@ public class MessageRegression extends com.ibm.icu.dev.test.TestFmwk {
|
||||
logln("Apply with pattern : " + pattern2);
|
||||
messageFormatter.applyPattern(pattern2);
|
||||
tempBuffer = messageFormatter.format(paramArray);
|
||||
if (!tempBuffer.equals("Double ' Quotes 7 test and quoted {1} test plus other {2} stuff."))
|
||||
if (!tempBuffer.equals("Double ' Quotes 7 test and quoted {1} test plus 'other {2} stuff'."))
|
||||
errln("quote format test (w/ params) failed.");
|
||||
logln("Formatted with params : " + tempBuffer);
|
||||
tempBuffer = messageFormatter.format(null);
|
||||
if (!tempBuffer.equals("Double ' Quotes {0} test and quoted {1} test plus other {2} stuff."))
|
||||
if (!tempBuffer.equals("Double ' Quotes {0} test and quoted {1} test plus 'other {2} stuff'."))
|
||||
errln("quote format test (w/ null) failed.");
|
||||
logln("Formatted with null : " + tempBuffer);
|
||||
logln("toPattern : " + messageFormatter.toPattern());
|
||||
@ -285,12 +291,12 @@ public class MessageRegression extends com.ibm.icu.dev.test.TestFmwk {
|
||||
{
|
||||
String originalPattern = "initial pattern";
|
||||
MessageFormat mf = new MessageFormat(originalPattern);
|
||||
String illegalPattern = "ab { '}' de";
|
||||
try {
|
||||
String illegalPattern = "ab { '}' de";
|
||||
mf.applyPattern(illegalPattern);
|
||||
errln("illegal pattern: \"" + illegalPattern + "\"");
|
||||
} catch (IllegalArgumentException foo) {
|
||||
if (!originalPattern.equals(mf.toPattern()))
|
||||
if (illegalPattern.equals(mf.toPattern()))
|
||||
errln("pattern after: \"" + mf.toPattern() + "\"");
|
||||
}
|
||||
}
|
||||
@ -368,7 +374,7 @@ public class MessageRegression extends com.ibm.icu.dev.test.TestFmwk {
|
||||
mf.applyPattern(illegalPattern);
|
||||
errln("Should have thrown IllegalArgumentException for pattern : " + illegalPattern);
|
||||
} catch (IllegalArgumentException e) {
|
||||
if (!originalPattern.equals(mf.toPattern()))
|
||||
if (illegalPattern.equals(mf.toPattern()))
|
||||
errln("pattern after: \"" + mf.toPattern() + "\"");
|
||||
}
|
||||
}
|
||||
@ -596,7 +602,7 @@ public class MessageRegression extends com.ibm.icu.dev.test.TestFmwk {
|
||||
*/
|
||||
public void Test4169959() {
|
||||
// This works
|
||||
logln(MessageFormat.format("This will {0}", "work"));
|
||||
logln(MessageFormat.format("This will {0}", new Object[]{"work"}));
|
||||
|
||||
// This fails
|
||||
logln(MessageFormat.format("This will {0}", new Object[]{ null }));
|
||||
@ -670,11 +676,11 @@ public class MessageRegression extends com.ibm.icu.dev.test.TestFmwk {
|
||||
paramsMap.clear();
|
||||
paramsMap.put("ARG_ZERO", new Integer(7));
|
||||
tempBuffer = messageFormatter.format(paramsMap);
|
||||
if (!tempBuffer.equals("Double ' Quotes 7 test and quoted {ARG_ONE} test plus other {ARG_TWO} stuff."))
|
||||
if (!tempBuffer.equals("Double ' Quotes 7 test and quoted {ARG_ONE} test plus 'other {ARG_TWO} stuff'."))
|
||||
errln("quote format test (w/ params) failed.");
|
||||
logln("Formatted with params : " + tempBuffer);
|
||||
tempBuffer = messageFormatter.format(null);
|
||||
if (!tempBuffer.equals("Double ' Quotes {ARG_ZERO} test and quoted {ARG_ONE} test plus other {ARG_TWO} stuff."))
|
||||
if (!tempBuffer.equals("Double ' Quotes {ARG_ZERO} test and quoted {ARG_ONE} test plus 'other {ARG_TWO} stuff'."))
|
||||
errln("quote format test (w/ null) failed.");
|
||||
logln("Formatted with null : " + tempBuffer);
|
||||
logln("toPattern : " + messageFormatter.toPattern());
|
||||
@ -833,5 +839,42 @@ public class MessageRegression extends com.ibm.icu.dev.test.TestFmwk {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private MessageFormat serializeAndDeserialize(MessageFormat original) {
|
||||
try {
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
ObjectOutputStream ostream = new ObjectOutputStream(baos);
|
||||
ostream.writeObject(original);
|
||||
ostream.flush();
|
||||
byte bytes[] = baos.toByteArray();
|
||||
|
||||
ObjectInputStream istream = new ObjectInputStream(new ByteArrayInputStream(bytes));
|
||||
MessageFormat reconstituted = (MessageFormat)istream.readObject();
|
||||
return reconstituted;
|
||||
} catch(IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
} catch (ClassNotFoundException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void TestSerialization() {
|
||||
MessageFormat format1 = null;
|
||||
MessageFormat format2 = null;
|
||||
|
||||
format1 = new MessageFormat("", ULocale.GERMAN);
|
||||
format2 = serializeAndDeserialize(format1);
|
||||
assertEquals("MessageFormats (empty pattern) before and after serialization are not equal", format1, format2);
|
||||
|
||||
format1.applyPattern("ab{1}cd{0,number}ef{3,date}gh");
|
||||
format1.setFormat(2, null);
|
||||
format1.setFormatByArgumentIndex(1, NumberFormat.getInstance(ULocale.ENGLISH));
|
||||
format2 = serializeAndDeserialize(format1);
|
||||
assertEquals("MessageFormats (with custom formats) before and after serialization are not equal", format1, format2);
|
||||
assertEquals(
|
||||
"MessageFormat (with custom formats) does not "+
|
||||
"format correctly after serialization",
|
||||
"ab3.3cd4,4ef***gh",
|
||||
format2.format(new Object[] { 4.4, 3.3, "+++", "***" }));
|
||||
}
|
||||
}
|
||||
|
@ -92,19 +92,15 @@ public class PluralFormatUnitTest extends TestFmwk {
|
||||
plfOddAndEven.format(i));
|
||||
}
|
||||
|
||||
// Check that double definition results in an exception.
|
||||
try {
|
||||
PluralFormat plFmt = new PluralFormat(oddAndEven);
|
||||
plFmt.applyPattern("odd{foo} odd{bar} other{foobar}");
|
||||
errln("Double definition of a plural case message should " +
|
||||
"provoke an exception but did not.");
|
||||
}catch (IllegalArgumentException e){}
|
||||
try {
|
||||
PluralFormat plFmt = new PluralFormat(oddAndEven);
|
||||
plFmt.applyPattern("odd{foo} other{bar} other{foobar}");
|
||||
errln("Double definition of a plural case message should " +
|
||||
"provoke an exception but did not.");
|
||||
}catch (IllegalArgumentException e){}
|
||||
// ICU 4.8 does not check for duplicate keywords any more.
|
||||
PluralFormat pf = new PluralFormat(ULocale.ENGLISH, oddAndEven,
|
||||
"odd{foo} odd{bar} other{foobar}");
|
||||
assertEquals("should use first occurrence of the 'odd' keyword", "foo", pf.format(1));
|
||||
pf.applyPattern("odd{foo} other{bar} other{foobar}");
|
||||
assertEquals("should use first occurrence of the 'other' keyword", "bar", pf.format(2));
|
||||
// This sees the first "other" before calling the PluralSelector which then selects "other".
|
||||
pf.applyPattern("other{foo} odd{bar} other{foobar}");
|
||||
assertEquals("should use first occurrence of the 'other' keyword", "foo", pf.format(2));
|
||||
}
|
||||
// omit other keyword.
|
||||
try {
|
||||
@ -114,20 +110,18 @@ public class PluralFormatUnitTest extends TestFmwk {
|
||||
"exception but did not.");
|
||||
}catch (IllegalArgumentException e){}
|
||||
|
||||
// Test unknown keyword.
|
||||
try {
|
||||
PluralFormat plFmt = new PluralFormat(oddAndEven);
|
||||
plFmt.applyPattern("otto{foo} other{bar}");
|
||||
errln("Defining a message for an unknown keyword should result in" +
|
||||
"an exception but did not.");
|
||||
}catch (IllegalArgumentException e){}
|
||||
// ICU 4.8 does not check for unknown keywords any more.
|
||||
{
|
||||
PluralFormat pf = new PluralFormat(ULocale.ENGLISH, oddAndEven, "otto{foo} other{bar}");
|
||||
assertEquals("should ignore unknown keywords", "bar", pf.format(1));
|
||||
}
|
||||
|
||||
// Test invalid keyword.
|
||||
try {
|
||||
PluralFormat plFmt = new PluralFormat(oddAndEven);
|
||||
plFmt.applyPattern("1odd{foo} other{bar}");
|
||||
errln("Defining a message for an invalid keyword should result in" +
|
||||
"an exception but did not.");
|
||||
plFmt.applyPattern("*odd{foo} other{bar}");
|
||||
errln("Defining a message for an invalid keyword should result in " +
|
||||
"an exception but did not.");
|
||||
}catch (IllegalArgumentException e){}
|
||||
|
||||
// Test invalid syntax
|
||||
@ -170,12 +164,12 @@ public class PluralFormatUnitTest extends TestFmwk {
|
||||
// Check that a pound sign in curly braces is preserved.
|
||||
{
|
||||
PluralFormat plFmt = new PluralFormat(oddAndEven);
|
||||
plFmt.applyPattern("odd{The number {#} is odd.}" +
|
||||
"other{The number {#} is even.}");
|
||||
plFmt.applyPattern("odd{The number {1,number,#} is odd.}" +
|
||||
"other{The number {2,number,#} is even.}");
|
||||
for (int i = 1; i < 3; ++i) {
|
||||
assertEquals("format did not preserve # inside curly braces.",
|
||||
((i % 2 == 1) ? "The number {#} is odd."
|
||||
: "The number {#} is even."),
|
||||
((i % 2 == 1) ? "The number {1,number,#} is odd."
|
||||
: "The number {2,number,#} is even."),
|
||||
plFmt.format(i));
|
||||
}
|
||||
|
||||
@ -223,14 +217,9 @@ public class PluralFormatUnitTest extends TestFmwk {
|
||||
plFmt.format(5));
|
||||
|
||||
// Check that rules got updated.
|
||||
try {
|
||||
plFmt.applyPattern("odd__{odd} other{even}");
|
||||
errln("SetLocale should reset rules but did not.");
|
||||
} catch (IllegalArgumentException e) {
|
||||
if (e.getMessage().indexOf("Unknown keyword") < 0){
|
||||
errln("Wrong exception thrown");
|
||||
}
|
||||
}
|
||||
plFmt.applyPattern("odd__{odd} other{even}");
|
||||
assertEquals("SetLocale should reset rules but did not.", "even", plFmt.format(1));
|
||||
|
||||
plFmt.applyPattern("one{one} other{not one}");
|
||||
for (int i = 0; i < 20; ++i) {
|
||||
assertEquals("Wrong ruleset loaded by setLocale()",
|
||||
@ -261,12 +250,11 @@ public class PluralFormatUnitTest extends TestFmwk {
|
||||
Object[] args = { "acme", null };
|
||||
|
||||
{
|
||||
PluralFormat pf = new PluralFormat(" one {one ''widget} other {# widgets} ");
|
||||
String pat = pf.toPattern();
|
||||
logln("pf pattern: '" + pat + "'");
|
||||
|
||||
assertEquals("no leading spaces", "o", pat.substring(0, 1));
|
||||
assertEquals("no trailing spaces", "}", pat.substring(pat.length() - 1));
|
||||
// ICU 4.8 PluralFormat does not trim() its pattern any more.
|
||||
// None of the other *Format classes do.
|
||||
String pat = " one {one ''widget} other {# widgets} ";
|
||||
PluralFormat pf = new PluralFormat(pat);
|
||||
assertEquals("should not trim() the pattern", pat, pf.toPattern());
|
||||
}
|
||||
|
||||
MessageFormat pfmt = new MessageFormat("The disk ''{0}'' contains {1, plural, one {one ''''{1, number, #.0}'''' widget} other {# widgets}}.");
|
||||
@ -275,10 +263,60 @@ public class PluralFormatUnitTest extends TestFmwk {
|
||||
args[1] = new Integer(i);
|
||||
logln(pfmt.format(args));
|
||||
}
|
||||
/* ICU 4.8 returns null instead of a choice/plural/select Format object
|
||||
* (because it does not create an object for any "complex" argument).
|
||||
PluralFormat pf = (PluralFormat)pfmt.getFormatsByArgumentIndex()[1];
|
||||
logln(pf.toPattern());
|
||||
*/
|
||||
logln(pfmt.toPattern());
|
||||
MessageFormat pfmt2 = new MessageFormat(pfmt.toPattern());
|
||||
assertEquals("message formats are equal", pfmt, pfmt2);
|
||||
}
|
||||
|
||||
public void TestExtendedPluralFormat() {
|
||||
String[] targets = {
|
||||
"There are no widgets.",
|
||||
"There is one widget.",
|
||||
"There is a bling widget and one other widget.",
|
||||
"There is a bling widget and 2 other widgets.",
|
||||
"There is a bling widget and 3 other widgets.",
|
||||
"Widgets, five (5-1=4) there be.",
|
||||
"There is a bling widget and 5 other widgets.",
|
||||
"There is a bling widget and 6 other widgets.",
|
||||
};
|
||||
PluralFormat pf = new PluralFormat(
|
||||
ULocale.ENGLISH,
|
||||
"offset:1.0 "
|
||||
+ "=0 {There are no widgets.} "
|
||||
+ "=1.0 {There is one widget.} "
|
||||
+ "=5 {Widgets, five (5-1=#) there be.} "
|
||||
+ "one {There is a bling widget and one other widget.} "
|
||||
+ "other {There is a bling widget and # other widgets.}");
|
||||
for (int i = 0; i < 7; ++i) {
|
||||
String result = pf.format(i);
|
||||
assertEquals("value = " + i, targets[i], result);
|
||||
}
|
||||
|
||||
// Try explicit values after keywords.
|
||||
pf.applyPattern("other{zz}other{yy}one{xx}one{ww}=1{vv}=1{uu}");
|
||||
assertEquals("should find first matching *explicit* value", "vv", pf.format(1));
|
||||
}
|
||||
|
||||
public void TestExtendedPluralFormatParsing() {
|
||||
String[] failures = {
|
||||
"offset:1..0 =0 {Foo}",
|
||||
"offset:1.0 {Foo}",
|
||||
"=0= {Foo}",
|
||||
"=0 {Foo} =0.0 {Bar}",
|
||||
" = {Foo}",
|
||||
};
|
||||
for (String fmt : failures) {
|
||||
try {
|
||||
new PluralFormat(fmt);
|
||||
fail("expected exception when parsing '" + fmt + "'");
|
||||
} catch (IllegalArgumentException e) {
|
||||
// ok
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (c) 2004-2010, International Business Machines
|
||||
* Copyright (c) 2004-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
* Copyright (C) 2010 , Yahoo! Inc.
|
||||
*******************************************************************************
|
||||
@ -27,10 +27,8 @@ public class SelectFormatUnitTest extends TestFmwk {
|
||||
*/
|
||||
public void TestPatternSyntax() {
|
||||
String checkSyntaxData[] = {
|
||||
"odd{foo} odd{bar} other{foobar}",
|
||||
"odd{foo} other{bar} other{foobar}",
|
||||
"odd{foo}",
|
||||
"1odd{foo} other{bar}",
|
||||
"*odd{foo} other{bar}",
|
||||
"odd{foo},other{bar}",
|
||||
"od d{foo} other{bar}",
|
||||
"odd{foo}{foobar}other{foo}",
|
||||
@ -39,19 +37,6 @@ public class SelectFormatUnitTest extends TestFmwk {
|
||||
"odd{fo{o1}other{foo2}}"
|
||||
};
|
||||
|
||||
String expectedErrorMsgs[] = {
|
||||
"Duplicate keyword error.",
|
||||
"Duplicate keyword error.",
|
||||
"Pattern syntax error. Value for case \"other\" was not defined. ",
|
||||
"Pattern syntax error.",
|
||||
"Pattern syntax error.",
|
||||
"Pattern syntax error.",
|
||||
"Pattern syntax error.",
|
||||
"Pattern syntax error.",
|
||||
"Pattern syntax error.",
|
||||
"Pattern syntax error. Value for case \"other\" was not defined. ",
|
||||
};
|
||||
|
||||
//Test SelectFormat pattern syntax
|
||||
SelectFormat selFmt = new SelectFormat(SIMPLE_PATTERN);
|
||||
for (int i=0; i<checkSyntaxData.length; ++i) {
|
||||
@ -60,26 +45,34 @@ public class SelectFormatUnitTest extends TestFmwk {
|
||||
errln("\nERROR: Unexpected result - SelectFormat Unit Test failed "
|
||||
+ "to detect syntax error with pattern: "+checkSyntaxData[i]);
|
||||
} catch (IllegalArgumentException e){
|
||||
assertEquals("Error:TestPatternSyntax failed with unexpected"
|
||||
+ " error message for pattern: " + checkSyntaxData[i] ,
|
||||
expectedErrorMsgs[i], e.getMessage() );
|
||||
// ok
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// ICU 4.8 does not check for duplicate keywords any more.
|
||||
selFmt.applyPattern("odd{foo} odd{bar} other{foobar}");
|
||||
assertEquals("should use first occurrence of the 'odd' keyword", "foo", selFmt.format("odd"));
|
||||
selFmt.applyPattern("odd{foo} other{bar} other{foobar}");
|
||||
assertEquals("should use first occurrence of the 'other' keyword", "bar", selFmt.format("other"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Unit tests for invalid keywords
|
||||
*/
|
||||
public void TestInvalidKeyword() {
|
||||
//Test formatting with invalid keyword
|
||||
// Test formatting with invalid keyword:
|
||||
// one which contains Pattern_Syntax or Pattern_White_Space.
|
||||
String keywords[] = {
|
||||
"9Keyword-_", //Starts with a digit
|
||||
"-Keyword-_", //Starts with a hyphen
|
||||
"_Keyword-_", //Starts with an underscore
|
||||
"\\u00E9Keyword-_", //Starts with non-ASCII character
|
||||
"Key*word-_", //Contains a sepial character not allowed
|
||||
"*Keyword-_" //Starts with a sepial character not allowed
|
||||
"9Keyword-_",
|
||||
"-Keyword-_",
|
||||
"_Keyword-_",
|
||||
"\\u00E9Keyword-_",
|
||||
"Key word",
|
||||
" Keyword",
|
||||
"Keyword ",
|
||||
"Key*word-_",
|
||||
"*Keyword-_"
|
||||
};
|
||||
|
||||
String expected = "Invalid formatting argument.";
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2004-2010, International Business Machines
|
||||
* Copyright (c) 2004-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Author: Alan Liu
|
||||
@ -28,6 +28,7 @@ import com.ibm.icu.text.DateFormat;
|
||||
import com.ibm.icu.text.DecimalFormat;
|
||||
import com.ibm.icu.text.DecimalFormatSymbols;
|
||||
import com.ibm.icu.text.MessageFormat;
|
||||
import com.ibm.icu.text.MessagePattern;
|
||||
import com.ibm.icu.text.NumberFormat;
|
||||
import com.ibm.icu.text.SimpleDateFormat;
|
||||
import com.ibm.icu.text.UFormat;
|
||||
@ -158,7 +159,9 @@ public class TestMessageFormat extends com.ibm.icu.dev.test.TestFmwk {
|
||||
"'{1,number,#,##}' {1,number,#,##}",
|
||||
};
|
||||
|
||||
String testResultPatterns[] = {
|
||||
// ICU 4.8 returns the original pattern (testCases)
|
||||
// rather than toPattern() reconstituting a new, equivalent pattern string (testResultPatterns).
|
||||
/*String testResultPatterns[] = {
|
||||
"Quotes '', '{', a {0} '{'0}",
|
||||
"Quotes '', '{', a {0,number} '{'0}",
|
||||
"'{'1,number,#,##} {1,number,'#'#,##}",
|
||||
@ -168,12 +171,12 @@ public class TestMessageFormat extends com.ibm.icu.dev.test.TestFmwk {
|
||||
"'{'1,date,full}, {1,date,full},",
|
||||
"'{'3,date,full}, {3,date,full},",
|
||||
"'{'1,number,#,##} {1,number,#,##}"
|
||||
};
|
||||
};*/
|
||||
|
||||
String testResultStrings[] = {
|
||||
"Quotes ', {, a 1 {0}",
|
||||
"Quotes ', {, a 1 {0}",
|
||||
"{1,number,#,##} #34,56",
|
||||
"Quotes ', {, 'a' 1 {0}",
|
||||
"Quotes ', {, 'a' 1 {0}",
|
||||
"{1,number,'#',##} #34,56",
|
||||
"There are 3,456 files on Disk at 1/12/70 5:46 AM.",
|
||||
"On Disk, there are 3,456 files, with $1.00.",
|
||||
"{1,number,percent}, 345,600%,",
|
||||
@ -193,7 +196,14 @@ public class TestMessageFormat extends com.ibm.icu.dev.test.TestFmwk {
|
||||
errln("MessageFormat for " + testCases[i] + " creation failed.");
|
||||
continue;
|
||||
}
|
||||
assertEquals("\"" + testCases[i] + "\".toPattern()", testResultPatterns[i], form.toPattern());
|
||||
// ICU 4.8 returns the original pattern (testCases)
|
||||
// rather than toPattern() reconstituting a new, equivalent pattern string (testResultPatterns).
|
||||
// assertEquals("\"" + testCases[i] + "\".toPattern()", testResultPatterns[i], form.toPattern());
|
||||
assertEquals("\"" + testCases[i] + "\".toPattern()", testCases[i], form.toPattern());
|
||||
// Note: An alternative test would be to build MessagePattern objects for
|
||||
// both the input and output patterns and compare them, taking SKIP_SYNTAX etc.
|
||||
// into account.
|
||||
// (Too much trouble...)
|
||||
|
||||
//it_out << "Pat out: " << form.toPattern(buffer));
|
||||
StringBuffer result = new StringBuffer();
|
||||
@ -644,7 +654,14 @@ public class TestMessageFormat extends com.ibm.icu.dev.test.TestFmwk {
|
||||
}
|
||||
|
||||
assertEquals("msgCmp.toPattern()", formatStr, msgCmp.toPattern());
|
||||
assertEquals("msg.toPattern()", formatStr, msg.toPattern());
|
||||
// ICU 4.8 does not support toPattern() when there are custom formats (from setFormat() etc.).
|
||||
// assertEquals("msg.toPattern()", formatStr, msg.toPattern());
|
||||
try {
|
||||
msg.toPattern();
|
||||
errln("msg.setFormat().toPattern() does not throw an IllegalStateException");
|
||||
} catch(IllegalStateException e) {
|
||||
// ok
|
||||
}
|
||||
|
||||
for (i = 0; i < formatsAct.length; i++) {
|
||||
a = formatsAct[i];
|
||||
@ -685,7 +702,8 @@ public class TestMessageFormat extends com.ibm.icu.dev.test.TestFmwk {
|
||||
msg.setFormats( formatsToAdopt ); // function to test
|
||||
|
||||
assertEquals("msgCmp.toPattern()", formatStr, msgCmp.toPattern());
|
||||
assertEquals("msg.toPattern()", formatStr, msg.toPattern());
|
||||
// ICU 4.8 does not support toPattern() when there are custom formats (from setFormat() etc.).
|
||||
// assertEquals("msg.toPattern()", formatStr, msg.toPattern());
|
||||
|
||||
formatsAct = msg.getFormats();
|
||||
if (formatsAct==null || (formatsAct.length <=0) || (formatsAct.length != formatsCmp.length)) {
|
||||
@ -735,7 +753,8 @@ public class TestMessageFormat extends com.ibm.icu.dev.test.TestFmwk {
|
||||
}
|
||||
|
||||
assertEquals("msgCmp.toPattern()", formatStr, msgCmp.toPattern());
|
||||
assertEquals("msg.toPattern()", formatStr, msg.toPattern());
|
||||
// ICU 4.8 does not support toPattern() when there are custom formats (from setFormat() etc.).
|
||||
// assertEquals("msg.toPattern()", formatStr, msg.toPattern());
|
||||
|
||||
formatsAct = msg.getFormats();
|
||||
if (formatsAct==null || (formatsAct.length <=0) || (formatsAct.length != formatsCmp.length)) {
|
||||
@ -1124,22 +1143,14 @@ public class TestMessageFormat extends com.ibm.icu.dev.test.TestFmwk {
|
||||
}
|
||||
|
||||
public void testNamedArguments() {
|
||||
// Ensure that mixed argument types are not allowed.
|
||||
// Either all arguments have to be numeric or valid identifiers.
|
||||
try {
|
||||
new MessageFormat("Number of files in folder {0}: {numfiles}");
|
||||
errln("Creating a MessageFormat with mixed argument types " +
|
||||
"(named and numeric) should throw an " +
|
||||
"IllegalArgumentException but did not!");
|
||||
} catch (IllegalArgumentException e) {}
|
||||
|
||||
try {
|
||||
new MessageFormat("Number of files in folder {folder}: {1}");
|
||||
errln("Creating a MessageFormat with mixed argument types " +
|
||||
"(named and numeric) should throw an " +
|
||||
"IllegalArgumentException but did not!");
|
||||
} catch (IllegalArgumentException e) {}
|
||||
|
||||
// ICU 4.8 allows mixing named and numbered arguments.
|
||||
assertTrue(
|
||||
"has some named arguments",
|
||||
new MessageFormat("Number of files in folder {0}: {numfiles}").usesNamedArguments());
|
||||
assertTrue(
|
||||
"has some named arguments",
|
||||
new MessageFormat("Number of files in folder {folder}: {1}").usesNamedArguments());
|
||||
|
||||
// Test named arguments.
|
||||
MessageFormat mf = new MessageFormat("Number of files in folder {folder}: {numfiles}");
|
||||
if (!mf.usesNamedArguments()) {
|
||||
@ -1151,19 +1162,21 @@ public class TestMessageFormat extends com.ibm.icu.dev.test.TestFmwk {
|
||||
}
|
||||
|
||||
// Test argument names with invalid start characters.
|
||||
// Modified: ICU 4.8 allows all characters except for Pattern_White_Space and Pattern_Syntax.
|
||||
try {
|
||||
new MessageFormat("Wavelength: {_\u028EValue\uFF14}");
|
||||
new MessageFormat("Wavelength: {^\u028EValue\uFF14}");
|
||||
errln("Creating a MessageFormat with invalid argument names " +
|
||||
"should throw an IllegalArgumentException but did not!");
|
||||
} catch (IllegalArgumentException e) {}
|
||||
|
||||
try {
|
||||
new MessageFormat("Wavelength: {\uFF14\u028EValue}");
|
||||
new MessageFormat("Wavelength: {\uFE45\u028EValue}");
|
||||
errln("Creating a MessageFormat with invalid argument names " +
|
||||
"should throw an IllegalArgumentException but did not!");
|
||||
} catch (IllegalArgumentException e) {}
|
||||
|
||||
// Test argument names with invalid continue characters.
|
||||
// Modified: ICU 4.8 allows all characters except for Pattern_White_Space and Pattern_Syntax.
|
||||
try {
|
||||
new MessageFormat("Wavelength: {Value@\uFF14}");
|
||||
errln("Creating a MessageFormat with invalid argument names " +
|
||||
@ -1240,7 +1253,7 @@ public class TestMessageFormat extends com.ibm.icu.dev.test.TestFmwk {
|
||||
public void testNestedFormatsInPluralFormat() {
|
||||
try {
|
||||
MessageFormat msgFmt = new MessageFormat(
|
||||
"{0, plural, one {{0, number,C''''est #,##0.0# fichier}} " +
|
||||
"{0, plural, one {{0, number,C''est #,##0.0# fichier}} " +
|
||||
"other {Ce sont # fichiers}} dans la liste.",
|
||||
new ULocale("fr"));
|
||||
Object objArray[] = {new Long(0)};
|
||||
@ -1304,6 +1317,19 @@ public class TestMessageFormat extends com.ibm.icu.dev.test.TestFmwk {
|
||||
}
|
||||
}
|
||||
|
||||
public void testApostropheInPluralAndSelect() {
|
||||
MessageFormat fmt = new MessageFormat(
|
||||
"abc_{0,plural,other{#'#'#'{'#''}}_def_{1,select,other{sel'}'ect''}}_xyz",
|
||||
Locale.ENGLISH);
|
||||
String expected = "abc_3#3{3'_def_sel}ect'_xyz";
|
||||
String result = fmt.format(new Object[] { 3, "x" });
|
||||
if (!result.equals(expected)) {
|
||||
errln("MessageFormat with apostrophes in plural/select arguments failed:\n" +
|
||||
"Expected "+expected+"\n" +
|
||||
"Got "+result);
|
||||
}
|
||||
}
|
||||
|
||||
// Test toPattern when there is a PluralFormat
|
||||
public void testPluralFormatToPattern() {
|
||||
String[] patterns = {
|
||||
@ -1681,4 +1707,129 @@ public class TestMessageFormat extends com.ibm.icu.dev.test.TestFmwk {
|
||||
+ "to return an null if argumentName was not found.");
|
||||
}
|
||||
}
|
||||
|
||||
public String getPatternAndSkipSyntax(MessagePattern pattern) {
|
||||
StringBuilder sb = new StringBuilder(pattern.getPatternString());
|
||||
int count = pattern.countParts();
|
||||
for (int i = count; i > 0;) {
|
||||
MessagePattern.Part part = pattern.getPart(--i);
|
||||
if (part.getType() == MessagePattern.Part.Type.SKIP_SYNTAX) {
|
||||
sb.delete(part.getIndex(), part.getLimit());
|
||||
}
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public void TestApostropheMode() {
|
||||
MessagePattern ado_mp = new MessagePattern(MessagePattern.ApostropheMode.DOUBLE_OPTIONAL);
|
||||
MessagePattern adr_mp = new MessagePattern(MessagePattern.ApostropheMode.DOUBLE_REQUIRED);
|
||||
assertEquals("wrong value",
|
||||
MessagePattern.ApostropheMode.DOUBLE_OPTIONAL,
|
||||
ado_mp.getApostropheMode());
|
||||
assertEquals("wrong value",
|
||||
MessagePattern.ApostropheMode.DOUBLE_REQUIRED,
|
||||
adr_mp.getApostropheMode());
|
||||
assertNotEquals("MessagePatterns with different ApostropheMode (no pattern)", ado_mp, adr_mp);
|
||||
assertNotEquals("MessagePatterns with different ApostropheMode (a)",
|
||||
ado_mp.parse("a"), adr_mp.parse("a"));
|
||||
|
||||
String[] tuples = new String[] {
|
||||
// Desired output
|
||||
// DOUBLE_OPTIONAL pattern
|
||||
// DOUBLE_REQUIRED pattern (null=same as DOUBLE_OPTIONAL)
|
||||
"I see {many}", "I see '{many}'", null,
|
||||
"I said {'Wow!'}", "I said '{''Wow!''}'", null,
|
||||
"I dont know", "I dont know", "I don't know",
|
||||
"I don't know", "I don't know", "I don''t know",
|
||||
"I don't know", "I don''t know", "I don''t know",
|
||||
};
|
||||
for (int i = 0; i < tuples.length; i += 3) {
|
||||
String desired = tuples[i];
|
||||
String ado_pattern = tuples[i + 1];
|
||||
assertEquals("DOUBLE_OPTIONAL failure", desired,
|
||||
getPatternAndSkipSyntax(ado_mp.parse(ado_pattern)));
|
||||
String adr_pattern = tuples[i + 2];
|
||||
if (adr_pattern == null) {
|
||||
adr_pattern = ado_pattern;
|
||||
}
|
||||
assertEquals("DOUBLE_REQUIRED failure", desired,
|
||||
getPatternAndSkipSyntax(adr_mp.parse(adr_pattern)));
|
||||
}
|
||||
}
|
||||
|
||||
// Compare behavior of JDK and ICU's DOUBLE_REQUIRED compatibility mode.
|
||||
public void TestCompatibleApostrophe() {
|
||||
// Message with choice argument which does not contain another argument.
|
||||
// The JDK performs only one apostrophe-quoting pass on this pattern.
|
||||
String pattern = "ab{0,choice,0#1'2''3'''4''''.}yz";
|
||||
java.text.MessageFormat jdkMsg =
|
||||
new java.text.MessageFormat(pattern, Locale.ENGLISH);
|
||||
|
||||
MessageFormat compMsg = new MessageFormat("", Locale.ENGLISH);
|
||||
compMsg.applyPattern(pattern, MessagePattern.ApostropheMode.DOUBLE_REQUIRED);
|
||||
assertEquals("wrong value",
|
||||
MessagePattern.ApostropheMode.DOUBLE_REQUIRED,
|
||||
compMsg.getApostropheMode());
|
||||
|
||||
MessageFormat icuMsg = new MessageFormat("", Locale.ENGLISH);
|
||||
icuMsg.applyPattern(pattern, MessagePattern.ApostropheMode.DOUBLE_OPTIONAL);
|
||||
assertEquals("wrong value",
|
||||
MessagePattern.ApostropheMode.DOUBLE_OPTIONAL,
|
||||
icuMsg.getApostropheMode());
|
||||
|
||||
Object[] zero0 = new Object[] { 0 };
|
||||
assertEquals("unexpected JDK MessageFormat apostrophe behavior",
|
||||
"ab12'3'4''.yz",
|
||||
jdkMsg.format(zero0));
|
||||
assertEquals("incompatible ICU MessageFormat compatibility-apostrophe behavior",
|
||||
"ab12'3'4''.yz",
|
||||
compMsg.format(zero0));
|
||||
assertEquals("unexpected ICU MessageFormat double-apostrophe-optional behavior",
|
||||
"ab1'2'3''4''.yz",
|
||||
icuMsg.format(zero0));
|
||||
|
||||
// Message with choice argument which contains a nested simple argument.
|
||||
// The JDK performs two apostrophe-quoting passes.
|
||||
pattern = "ab{0,choice,0#1'2''3'''4''''.{0,number,'#x'}}yz";
|
||||
jdkMsg.applyPattern(pattern);
|
||||
compMsg.applyPattern(pattern);
|
||||
icuMsg.applyPattern(pattern);
|
||||
assertEquals("unexpected JDK MessageFormat apostrophe behavior",
|
||||
"ab1234'.0xyz",
|
||||
jdkMsg.format(zero0));
|
||||
assertEquals("incompatible ICU MessageFormat compatibility-apostrophe behavior",
|
||||
"ab1234'.0xyz",
|
||||
compMsg.format(zero0));
|
||||
assertEquals("unexpected ICU MessageFormat double-apostrophe-optional behavior",
|
||||
"ab1'2'3''4''.#x0yz",
|
||||
icuMsg.format(zero0));
|
||||
|
||||
// Message with choice argument which contains a nested choice argument.
|
||||
// The JDK fails to parse this pattern.
|
||||
// jdkMsg.applyPattern("cd{0,choice,0#ef{0,choice,0#1'2''3'''4''''.}uv}wx");
|
||||
// For lack of comparison, we do not test ICU with this pattern.
|
||||
|
||||
// The JDK ChoiceFormat itself always performs one apostrophe-quoting pass.
|
||||
ChoiceFormat choice = new ChoiceFormat("0#1'2''3'''4''''.");
|
||||
assertEquals("unexpected JDK ChoiceFormat apostrophe behavior",
|
||||
"12'3'4''.",
|
||||
choice.format(0));
|
||||
choice.applyPattern("0#1'2''3'''4''''.{0,number,'#x'}");
|
||||
assertEquals("unexpected JDK ChoiceFormat apostrophe behavior",
|
||||
"12'3'4''.{0,number,#x}",
|
||||
choice.format(0));
|
||||
}
|
||||
|
||||
public void TestTrimArgumentName() {
|
||||
// ICU 4.8 allows and ignores white space around argument names and numbers.
|
||||
MessageFormat m = new MessageFormat("a { 0 , number , '#,#'#.0 } z", Locale.ENGLISH);
|
||||
assertEquals("trim-numbered-arg format() failed", "a #,#2.0 z", m.format(new Object[] { 2 }));
|
||||
|
||||
m.applyPattern("x { _oOo_ , number , integer } y");
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put("_oOo_", new Integer(3));
|
||||
StringBuffer result = new StringBuffer();
|
||||
assertEquals("trim-named-arg format() failed", "x 3 y",
|
||||
m.format(map, result, new FieldPosition(0)).toString());
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2010, International Business Machines Corporation and *
|
||||
* Copyright (C) 1996-2011, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -15,6 +15,7 @@ import com.ibm.icu.dev.test.TestFmwk;
|
||||
import com.ibm.icu.dev.test.TestUtil;
|
||||
import com.ibm.icu.impl.Norm2AllModes;
|
||||
import com.ibm.icu.impl.Normalizer2Impl;
|
||||
import com.ibm.icu.impl.PatternProps;
|
||||
import com.ibm.icu.impl.UCharacterName;
|
||||
import com.ibm.icu.impl.UCharacterProperty;
|
||||
import com.ibm.icu.impl.Utility;
|
||||
@ -247,6 +248,46 @@ public final class UCharacterTest extends TestFmwk
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test various implementations of Pattern_Syntax & Pattern_White_Space.
|
||||
*/
|
||||
public void TestPatternProperties() {
|
||||
UnicodeSet syn_pp = new UnicodeSet();
|
||||
UnicodeSet syn_prop = new UnicodeSet("[:Pattern_Syntax:]");
|
||||
UnicodeSet syn_list = new UnicodeSet(
|
||||
"[!-/\\:-@\\[-\\^`\\{-~"+
|
||||
"\u00A1-\u00A7\u00A9\u00AB\u00AC\u00AE\u00B0\u00B1\u00B6\u00BB\u00BF\u00D7\u00F7"+
|
||||
"\u2010-\u2027\u2030-\u203E\u2041-\u2053\u2055-\u205E\u2190-\u245F\u2500-\u2775"+
|
||||
"\u2794-\u2BFF\u2E00-\u2E7F\u3001-\u3003\u3008-\u3020\u3030\uFD3E\uFD3F\uFE45\uFE46]");
|
||||
UnicodeSet ws_pp = new UnicodeSet();
|
||||
UnicodeSet ws_prop = new UnicodeSet("[:Pattern_White_Space:]");
|
||||
UnicodeSet ws_list = new UnicodeSet("[\\u0009-\\u000D\\ \\u0085\\u200E\\u200F\\u2028\\u2029]");
|
||||
UnicodeSet syn_ws_pp = new UnicodeSet();
|
||||
UnicodeSet syn_ws_prop = new UnicodeSet(syn_prop).addAll(ws_prop);
|
||||
for(int c=0; c<=0xffff; ++c) {
|
||||
if(PatternProps.isSyntax(c)) {
|
||||
syn_pp.add(c);
|
||||
}
|
||||
if(PatternProps.isWhiteSpace(c)) {
|
||||
ws_pp.add(c);
|
||||
}
|
||||
if(PatternProps.isSyntaxOrWhiteSpace(c)) {
|
||||
syn_ws_pp.add(c);
|
||||
}
|
||||
}
|
||||
compareUSets(syn_pp, syn_prop,
|
||||
"PatternProps.isSyntax()", "[:Pattern_Syntax:]", true);
|
||||
compareUSets(syn_pp, syn_list,
|
||||
"PatternProps.isSyntax()", "[Pattern_Syntax ranges]", true);
|
||||
compareUSets(ws_pp, ws_prop,
|
||||
"PatternProps.isWhiteSpace()", "[:Pattern_White_Space:]", true);
|
||||
compareUSets(ws_pp, ws_list,
|
||||
"PatternProps.isWhiteSpace()", "[Pattern_White_Space ranges]", true);
|
||||
compareUSets(syn_ws_pp, syn_ws_prop,
|
||||
"PatternProps.isSyntaxOrWhiteSpace()",
|
||||
"[[:Pattern_Syntax:][:Pattern_White_Space:]]", true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests for defined and undefined characters
|
||||
*/
|
||||
|
@ -130,6 +130,16 @@ public class CompatibilityTest extends TestFmwk
|
||||
{"ICU_3.8.1", "com.ibm.icu.text.RuleBasedNumberFormat.dat"},
|
||||
{"ICU_4.0", "com.ibm.icu.text.RuleBasedNumberFormat.dat"},
|
||||
{"ICU_4.2.1", "com.ibm.icu.text.CurrencyPluralInfo.dat"},
|
||||
// ICU 4.8 MessageFormat is not serialization-compatible with previous versions.
|
||||
{"ICU_3.6", "com.ibm.icu.text.MessageFormat.dat"},
|
||||
{"ICU_3.8.1", "com.ibm.icu.text.MessageFormat.dat"},
|
||||
{"ICU_4.0", "com.ibm.icu.text.MessageFormat.dat"},
|
||||
{"ICU_4.2.1", "com.ibm.icu.text.MessageFormat.dat"},
|
||||
{"ICU_4.4", "com.ibm.icu.text.MessageFormat.dat"},
|
||||
// RelativeDateFormat apparently uses and serializes a MessageFormat.
|
||||
{"ICU_4.0", "com.ibm.icu.impl.RelativeDateFormat.dat"},
|
||||
{"ICU_4.2.1", "com.ibm.icu.impl.RelativeDateFormat.dat"},
|
||||
{"ICU_4.4", "com.ibm.icu.impl.RelativeDateFormat.dat"},
|
||||
};
|
||||
|
||||
private Target getFileTargets(URL fileURL)
|
||||
|
@ -135,6 +135,10 @@ new features in this release. The list of API changes since the previous ICU4J
|
||||
is available
|
||||
<a href="http://source.icu-project.org/repos/icu/icu4j/tags/milestone-4-7-1/APIChangeReport.html">here</a>.
|
||||
</p>
|
||||
<h5>MessageFormat Changes</h5>
|
||||
<p>MessageFormat and related classes (choice/plural/select) have been reimplemented,
|
||||
with several improvements and some incompatible changes.
|
||||
See the <a href="http://site.icu-project.org/download/48">ICU 4.8 download</a> page for details.</p>
|
||||
<h3 class="doc"><a name="license"></a>License Information</h3>
|
||||
<p>
|
||||
The ICU projects (ICU4C and ICU4J) use the X license. The X
|
||||
|
Loading…
Reference in New Issue
Block a user