ICU-8319 merge MessageFormat 2011q1 work into trunk, from icu4j/branches/markus/msg48 -r 29385:29881

X-SVN-Rev: 29885
This commit is contained in:
Markus Scherer 2011-04-25 20:13:39 +00:00
parent 73c400496f
commit fb5332c296
18 changed files with 4279 additions and 1748 deletions

View File

@ -0,0 +1,123 @@
/*
*******************************************************************************
* Copyright (C) 2010-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* created on: 2010aug21
* created by: Markus W. Scherer
*/
package com.ibm.icu.dev.demo.messagepattern;
import com.ibm.icu.text.MessagePattern;
/**
* Demo code for MessagePattern class.
* Pretty-prints the list of MessagePattern Parts and uses the MiniMessageFormatter
* with a few patterns.
* @author Markus Scherer
* @since 2010-aug-21
*/
public final class MessagePatternDemo {
private static final String manySpaces=" ";
private static final void printParts(MessagePattern msg) {
String autoQA=msg.autoQuoteApostropheDeep();
if(!autoQA.equals(msg.getPatternString())) {
System.out.println("autoQA: "+autoQA);
}
String indent="";
StringBuilder explanation=new StringBuilder();
MessagePattern.Part prevPart=null;
int count=msg.countParts();
for(int i=0; i<count; ++i) {
explanation.delete(0, 0x7fffffff);
MessagePattern.Part part=msg.getPart(i);
assert prevPart==null || prevPart.getLimit()<=part.getIndex();
String partString=part.toString();
MessagePattern.Part.Type type=part.getType();
if(type==MessagePattern.Part.Type.MSG_START) {
indent=manySpaces.substring(0, part.getValue()*2);
}
if(part.getLength()>0) {
explanation.append("=\"").append(msg.getSubstring(part)).append('"');
}
if(type.hasNumericValue()) {
explanation.append('=').append(msg.getNumericValue(part));
}
System.out.format("%2d: %s%s%s\n", i, indent, partString, explanation);
if(type==MessagePattern.Part.Type.MSG_LIMIT) {
int nestingLevel=part.getValue();
if(nestingLevel>1) {
indent=manySpaces.substring(0, (nestingLevel-1)*2); // outdent
} else {
indent="";
}
}
prevPart=part;
}
}
private static final MessagePattern print(String s) {
System.out.println("message: "+s);
try {
MessagePattern msg=new MessagePattern(s);
printParts(msg);
return msg;
} catch(Exception e) {
System.out.println("Exception: "+e.getMessage());
return null;
}
}
private static final void printFormat(String s, Object... args) {
MessagePattern msg=print(s);
if(msg!=null) {
System.out.println(new MiniMessageFormatter(msg).format(new StringBuilder(), args));
}
}
private static final void printFormatWithNamedArgs(String s, Object... args) {
MessagePattern msg=print(s);
if(msg!=null) {
System.out.println(new MiniMessageFormatter(msg).format(
new StringBuilder(), MiniMessageFormatter.mapFromNameValuePairs(args)));
}
}
public static void main(String[] argv) {
print("Hello!");
print("Hel'lo!");
print("Hel'{o");
print("Hel'{'o");
// double apostrophe inside quoted literal text still encodes a single apostrophe
printFormat("a'{bc''de'f");
print("a'{bc''de'f{0,number,g'hi''jk'l#}");
print("abc{0}def");
print("abc{ arg }def");
print("abc{1}def{arg}ghi");
print("abc{2, number}ghi{3, select, xx {xxx} other {ooo}} xyz");
print("abc{gender,select,"+
"other{His name is {person,XML,<entry name=\"PERSON\">{$PERSON}</entry>}.}}xyz");
print("abc{num_people, plural, offset:17 few{fff} other {oooo}}xyz");
print("abc{ num , plural , offset: 2 =1 {1} =-1 {-1} =3.14 {3.14} other {oo} }xyz");
print("I don't {a,plural,other{w'{'on't #'#'}} and "+
"{b,select,other{shan't'}'}} '{'''know'''}' and "+
"{c,choice,0#can't'|'}"+
"{z,number,#'#'###.00'}'}.");
print("a_{0,choice,-∞ #-inf| 5≤ five | 99 # ninety'|'nine }_z");
print("a_{0,plural,other{num=#'#'=#'#'={1,number,##}!}}_z");
print("}}}{0}}"); // yes, unmatched '}' are ok in ICU MessageFormat
printFormat("Hello {0}!", "Alice");
String msg="++{0, select, female{{1} calls you her friend}"+
"other{{1} calls you '{their}' friend}"+
"male{{1} calls you his friend}}--";
printFormat(msg, "female", "Alice");
printFormat(msg, "male", "Bob");
printFormat(msg, "unknown", "sushifan3");
msg="_'__{gender, select, female{Her n'ame is {person_name}.}"+
"other{His n'ame is {person_name}.}}__'_";
printFormatWithNamedArgs(msg, "gender", "female", "person_name", "Alice");
printFormatWithNamedArgs(msg, "gender", "male", "person_name", "Bob");
}
}

View File

@ -0,0 +1,186 @@
/*
*******************************************************************************
* Copyright (C) 2010-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* created on: 2010aug21
* created by: Markus W. Scherer
*/
package com.ibm.icu.dev.demo.messagepattern;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import com.ibm.icu.text.MessagePattern;
import com.ibm.icu.text.MessagePattern.ArgType;
import com.ibm.icu.text.MessagePattern.Part;
import com.ibm.icu.util.Freezable;
/**
* Mini message formatter for a small subset of the ICU MessageFormat syntax.
* Supports only string substitution and select formatting.
* @author Markus Scherer
* @since 2010-aug-21
*/
public final class MiniMessageFormatter implements Freezable<MiniMessageFormatter> {
public MiniMessageFormatter() {
this.msg=new MessagePattern();
}
public MiniMessageFormatter(MessagePattern msg) {
this.msg=(MessagePattern)msg.clone();
}
public MiniMessageFormatter(String msg) {
this.msg=new MessagePattern(msg);
}
public MiniMessageFormatter applyPattern(String msg) {
this.msg.parse(msg);
return this;
}
public String getPatternString() {
return msg.getPatternString();
}
public boolean hasNamedArguments() {
return msg.hasNamedArguments();
}
public boolean hasNumberedArguments() {
return msg.hasNumberedArguments();
}
/**
* Formats the parsed message with positional arguments.
* Supports only string substitution (e.g., {3}) and select format.
* @param dest gets the formatted message appended
* @param args positional arguments
* @return dest
*/
public Appendable format(Appendable dest, Object... args) {
if(msg.hasNamedArguments()) {
throw new IllegalArgumentException(
"Formatting message with named arguments using positional argument values.");
}
format(0, dest, args, null);
return dest;
}
public static final String format(String msg, Object... args) {
return new MiniMessageFormatter(msg).format(new StringBuilder(2*msg.length()), args).toString();
}
public Appendable format(Appendable dest, Map<String, Object> argsMap) {
if(msg.hasNumberedArguments()) {
throw new IllegalArgumentException(
"Formatting message with numbered arguments using named argument values.");
}
format(0, dest, null, argsMap);
return dest;
}
public static final String format(String msg, Map<String, Object> argsMap) {
return new MiniMessageFormatter(msg).format(new StringBuilder(2*msg.length()), argsMap).toString();
}
private int format(int msgStart, Appendable dest, Object[] args, Map<String, Object> argsMap) {
try {
String msgString=msg.getPatternString();
int prevIndex=msg.getPart(msgStart).getLimit();
for(int i=msgStart+1;; ++i) {
Part part=msg.getPart(i);
Part.Type type=part.getType();
int index=part.getIndex();
dest.append(msgString, prevIndex, index);
if(type==Part.Type.MSG_LIMIT) {
return i;
}
if(type==Part.Type.SKIP_SYNTAX || type==Part.Type.INSERT_CHAR) {
prevIndex=part.getLimit();
continue;
}
assert type==Part.Type.ARG_START : "Unexpected Part "+part+" in parsed message.";
int argLimit=msg.getLimitPartIndex(i);
ArgType argType=part.getArgType();
part=msg.getPart(++i);
Object arg;
if(args!=null) {
try {
arg=args[part.getValue()]; // args[ARG_NUMBER]
} catch(IndexOutOfBoundsException e) {
throw new IndexOutOfBoundsException(
"No argument at index "+part.getValue());
}
} else {
arg=argsMap.get(msg.getSubstring(part)); // args[ARG_NAME]
if(arg==null) {
throw new IndexOutOfBoundsException(
"No argument for name "+msg.getSubstring(part));
}
}
String argValue=arg.toString();
++i;
if(argType==ArgType.NONE) {
dest.append(argValue);
} else if(argType==ArgType.SELECT) {
// Similar to SelectFormat.findSubMessage().
int subMsgStart=0;
for(;; ++i) { // (ARG_SELECTOR, message) pairs until ARG_LIMIT
part=msg.getPart(i++);
if(part.getType()==Part.Type.ARG_LIMIT) {
assert subMsgStart!=0; // The parser made sure this is the case.
break;
// else: part is an ARG_SELECTOR followed by a message
} else if(msg.partSubstringMatches(part, argValue)) {
// keyword matches
subMsgStart=i;
break;
} else if(subMsgStart==0 && msg.partSubstringMatches(part, "other")) {
subMsgStart=i;
}
i=msg.getLimitPartIndex(i);
}
format(subMsgStart, dest, args, argsMap);
} else {
throw new UnsupportedOperationException("Unsupported argument type "+argType);
}
prevIndex=msg.getPart(argLimit).getLimit();
i=argLimit;
}
} catch(IOException e) { // Appendable throws IOException
throw new RuntimeException(e); // We do not want a throws clause.
}
}
/**
* Presents an array of (String, Object) pairs as a Map.
* Only for temporary use for formatting with named arguments.
*/
public static Map<String, Object> mapFromNameValuePairs(Object[] args) {
HashMap<String, Object> argsMap = new HashMap<String, Object>();
for(int i=0; i<args.length; i+=2) {
argsMap.put((String)args[i], args[i+1]);
}
return argsMap;
}
public MiniMessageFormatter cloneAsThawed() {
// TODO Auto-generated method stub
return null;
}
public MiniMessageFormatter freeze() {
msg.freeze();
return this;
}
public boolean isFrozen() {
return msg.isFrozen();
}
private final MessagePattern msg;
}

View File

@ -22,6 +22,12 @@ com.ibm.icu.util.TimeZone.DefaultTimeZoneType = ICU
#
com.ibm.icu.text.DecimalFormat.SkipExtendedSeparatorParsing = false
# Sets the default MessageFormat apostrophe-quoting behavior.
# See the com.ibm.icu.text.MessagePattern.ApostropheMode enum documentation.
# Values: DOUBLE_OPTIONAL or DOUBLE_REQUIRED.
# This is new in ICU 4.8.
# DOUBLE_OPTIONAL is the ICU default behavior.
com.ibm.icu.text.MessagePattern.ApostropheMode = DOUBLE_OPTIONAL
#
# [Internal Use Only]

View File

@ -0,0 +1,264 @@
/*
*******************************************************************************
* Copyright (C) 2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* created on: 2011feb25
* created by: Markus W. Scherer
*/
package com.ibm.icu.impl;
/**
* Implements the immutable Unicode properties Pattern_Syntax and Pattern_White_Space.
* Hardcodes these properties, does not load data, does not depend on other ICU classes.
* <p>
* Note: Both properties include ASCII as well as non-ASCII, non-Latin-1 code points,
* and both properties only include BMP code points (no supplementary ones).
* Pattern_Syntax includes some unassigned code points.
* <p>
* [:Pattern_White_Space:] =
* [\u0009-\u000D\ \u0085\u200E\u200F\u2028\u2029]
* <p>
* [:Pattern_Syntax:] =
* [!-/\:-@\[-\^`\{-~\u00A1-\u00A7\u00A9\u00AB\u00AC\u00AE
* \u00B0\u00B1\u00B6\u00BB\u00BF\u00D7\u00F7
* \u2010-\u2027\u2030-\u203E\u2041-\u2053\u2055-\u205E
* \u2190-\u245F\u2500-\u2775\u2794-\u2BFF\u2E00-\u2E7F
* \u3001-\u3003\u3008-\u3020\u3030\uFD3E\uFD3F\uFE45\uFE46]
* @author mscherer
*/
public final class PatternProps {
/**
* @return true if c is a Pattern_Syntax code point.
*/
public static boolean isSyntax(int c) {
if(c<0) {
return false;
} else if(c<=0xff) {
return latin1[c]==3;
} else if(c<0x2010) {
return false;
} else if(c<=0x3030) {
int bits=syntax2000[index2000[(c-0x2000)>>5]];
return ((bits>>(c&0x1f))&1)!=0;
} else if(0xfd3e<=c && c<=0xfe46) {
return c<=0xfd3f || 0xfe45<=c;
} else {
return false;
}
}
/**
* @return true if c is a Pattern_Syntax or Pattern_White_Space code point.
*/
public static boolean isSyntaxOrWhiteSpace(int c) {
if(c<0) {
return false;
} else if(c<=0xff) {
return latin1[c]!=0;
} else if(c<0x200e) {
return false;
} else if(c<=0x3030) {
int bits=syntaxOrWhiteSpace2000[index2000[(c-0x2000)>>5]];
return ((bits>>(c&0x1f))&1)!=0;
} else if(0xfd3e<=c && c<=0xfe46) {
return c<=0xfd3f || 0xfe45<=c;
} else {
return false;
}
}
/**
* @return true if c is a Pattern_White_Space character.
*/
public static boolean isWhiteSpace(int c) {
if(c<0) {
return false;
} else if(c<=0xff) {
return latin1[c]==5;
} else if(0x200e<=c && c<=0x2029) {
return c<=0x200f || 0x2028<=c;
} else {
return false;
}
}
/**
* Skips over Pattern_White_Space starting at index i of the CharSequence.
* @return The smallest index at or after i with a non-white space character.
*/
public static int skipWhiteSpace(CharSequence s, int i) {
while(i<s.length() && isWhiteSpace(s.charAt(i))) {
++i;
}
return i;
}
/**
* @return s except with leading and trailing Pattern_White_Space removed.
*/
public static String trimWhiteSpace(String s) {
if(s.length()==0 || (!isWhiteSpace(s.charAt(0)) && !isWhiteSpace(s.charAt(s.length()-1)))) {
return s;
}
int start=0;
int limit=s.length();
while(start<limit && isWhiteSpace(s.charAt(start))) {
++start;
}
if(start<limit) {
// There is non-white space at start; we will not move limit below that,
// so we need not test start<limit in the loop.
while(isWhiteSpace(s.charAt(limit-1))) {
--limit;
}
}
return s.substring(start, limit);
}
/**
* Tests whether the CharSequence contains a "pattern identifier", that is,
* whether it contains only non-Pattern_White_Space, non-Pattern_Syntax characters.
* @return true if there are no Pattern_White_Space or Pattern_Syntax characters in s.
*/
public static boolean isIdentifier(CharSequence s) {
int limit=s.length();
if(limit==0) {
return false;
}
int start=0;
do {
if(isSyntaxOrWhiteSpace(s.charAt(start++))) {
return false;
}
} while(start<limit);
return true;
}
/**
* Tests whether the CharSequence contains a "pattern identifier", that is,
* whether it contains only non-Pattern_White_Space, non-Pattern_Syntax characters.
* @return true if there are no Pattern_White_Space or Pattern_Syntax characters
* in s between start and (exclusive) limit.
*/
public static boolean isIdentifier(CharSequence s, int start, int limit) {
if(start>=limit) {
return false;
}
do {
if(isSyntaxOrWhiteSpace(s.charAt(start++))) {
return false;
}
} while(start<limit);
return true;
}
/**
* Skips over a "pattern identifier" starting at index i of the CharSequence.
* @return The smallest index at or after i with
* a Pattern_White_Space or Pattern_Syntax character.
*/
public static int skipIdentifier(CharSequence s, int i) {
while(i<s.length() && !isSyntaxOrWhiteSpace(s.charAt(i))) {
++i;
}
return i;
}
/*
* One byte per Latin-1 character.
* Bit 0 is set if either Pattern property is true,
* bit 1 if Pattern_Syntax is true,
* bit 2 if Pattern_White_Space is true.
* That is, Pattern_Syntax is encoded as 3 and Pattern_White_Space as 5.
*/
private static final byte latin1[]=new byte[] { // 256
// WS: 9..D
0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
// WS: 20 Syntax: 21..2F
5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
// Syntax: 3A..40
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3,
3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
// Syntax: 5B..5E
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0,
// Syntax: 60
3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
// Syntax: 7B..7E
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0,
// WS: 85
0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
// Syntax: A1..A7, A9, AB, AC, AE
0, 3, 3, 3, 3, 3, 3, 3, 0, 3, 0, 3, 3, 0, 3, 0,
// Syntax: B0, B1, B6, BB, BF
3, 3, 0, 0, 0, 0, 3, 0, 0, 0, 0, 3, 0, 0, 0, 3,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
// Syntax: D7
0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
// Syntax: F7
0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0
};
/*
* One byte per 32 characters from U+2000..U+303F indexing into
* a small table of 32-bit data words.
* The first two data words are all-zeros and all-ones.
*/
private static final byte index2000[]=new byte[] { // 130
2, 3, 4, 0, 0, 0, 0, 0, // 20xx
0, 0, 0, 0, 5, 1, 1, 1, // 21xx
1, 1, 1, 1, 1, 1, 1, 1, // 22xx
1, 1, 1, 1, 1, 1, 1, 1, // 23xx
1, 1, 1, 0, 0, 0, 0, 0, // 24xx
1, 1, 1, 1, 1, 1, 1, 1, // 25xx
1, 1, 1, 1, 1, 1, 1, 1, // 26xx
1, 1, 1, 6, 7, 1, 1, 1, // 27xx
1, 1, 1, 1, 1, 1, 1, 1, // 28xx
1, 1, 1, 1, 1, 1, 1, 1, // 29xx
1, 1, 1, 1, 1, 1, 1, 1, // 2Axx
1, 1, 1, 1, 1, 1, 1, 1, // 2Bxx
0, 0, 0, 0, 0, 0, 0, 0, // 2Cxx
0, 0, 0, 0, 0, 0, 0, 0, // 2Dxx
1, 1, 1, 1, 0, 0, 0, 0, // 2Exx
0, 0, 0, 0, 0, 0, 0, 0, // 2Fxx
8, 9 // 3000..303F
};
/*
* One 32-bit integer per 32 characters. Ranges of all-false and all-true
* are mapped to the first two values, other ranges map to appropriate bit patterns.
*/
private static final int syntax2000[]=new int[] {
0,
-1,
0xffff0000, // 2: 2010..201F
0x7fff00ff, // 3: 2020..2027, 2030..203E
0x7feffffe, // 4: 2041..2053, 2055..205E
0xffff0000, // 5: 2190..219F
0x003fffff, // 6: 2760..2775
0xfff00000, // 7: 2794..279F
0xffffff0e, // 8: 3001..3003, 3008..301F
0x00010001 // 9: 3020, 3030
};
/*
* Same as syntax2000, but with additional bits set for the
* Pattern_White_Space characters 200E 200F 2028 2029.
*/
private static final int syntaxOrWhiteSpace2000[]=new int[] {
0,
-1,
0xffffc000, // 2: 200E..201F
0x7fff03ff, // 3: 2020..2029, 2030..203E
0x7feffffe, // 4: 2041..2053, 2055..205E
0xffff0000, // 5: 2190..219F
0x003fffff, // 6: 2760..2775
0xfff00000, // 7: 2794..279F
0xffffff0e, // 8: 3001..3003, 3008..301F
0x00010001 // 9: 3020, 3030
};
}

View File

@ -1,6 +1,6 @@
/**
*******************************************************************************
* Copyright (C) 1996-2010, International Business Machines Corporation and *
* Copyright (C) 1996-2011, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
@ -737,18 +737,11 @@ public final class UCharacterProperty
* See UAX #31 Identifier and Pattern Syntax: http://www.unicode.org/reports/tr31/
* @param c codepoint to check
* @return true if c is a ICU white space
* @deprecated use PatternProps.isWhiteSpace(c)
*/
public static boolean isRuleWhiteSpace(int c)
{
/* "white space" in the sense of ICU rule parsers
This is a FIXED LIST that is NOT DEPENDENT ON UNICODE PROPERTIES.
See UAX #31 Identifier and Pattern Syntax: http://www.unicode.org/reports/tr31/
U+0009..U+000D, U+0020, U+0085, U+200E..U+200F, and U+2028..U+2029
Equivalent to test for Pattern_White_Space Unicode property.
*/
return (c >= 0x0009 && c <= 0x2029 &&
(c <= 0x000D || c == 0x0020 || c == 0x0085 ||
c == 0x200E || c == 0x200F || c >= 0x2028));
return PatternProps.isWhiteSpace(c);
}
/**

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,20 +1,19 @@
/*
*******************************************************************************
* Copyright (C) 2007-2010, International Business Machines Corporation and *
* Copyright (C) 2007-2011, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
package com.ibm.icu.text;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.text.FieldPosition;
import java.text.ParsePosition;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import com.ibm.icu.impl.UCharacterProperty;
import com.ibm.icu.impl.Utility;
import com.ibm.icu.util.ULocale;
/**
@ -22,7 +21,7 @@ import com.ibm.icu.util.ULocale;
* <code>PluralFormat</code> supports the creation of internationalized
* messages with plural inflection. It is based on <i>plural
* selection</i>, i.e. the caller specifies messages for each
* plural case that can appear in the users language and the
* plural case that can appear in the user's language and the
* <code>PluralFormat</code> selects the appropriate message based on
* the number.
* </p>
@ -36,7 +35,7 @@ import com.ibm.icu.util.ULocale;
* each message and selects the message whose interval contains a
* given number. This can only handle a finite number of
* intervals. But in some languages, like Polish, one plural case
* applies to infinitely many intervals (e.g., paucal applies to
* applies to infinitely many intervals (e.g., the paucal case applies to
* numbers ending with 2, 3, or 4 except those ending with 12, 13, or
* 14). Thus <code>ChoiceFormat</code> is not adequate.
* </p><p>
@ -47,17 +46,20 @@ import com.ibm.icu.util.ULocale;
* conditions for a plural case than just a single interval. These plural
* rules define both what plural cases exist in a language, and to
* which numbers these cases apply.
* <li>It provides predefined plural rules for many locales. Thus, the programmer
* need not worry about the plural cases of a language. On the flip side,
* the localizer does not have to specify the plural cases; he can simply
* <li>It provides predefined plural rules for many languages. Thus, the programmer
* need not worry about the plural cases of a language and
* does not have to define the plural cases; they can simply
* use the predefined keywords. The whole plural formatting of messages can
* be done using localized patterns from resource bundles. For predefined plural
* rules, see CLDR <i>Language Plural Rules</i> page at
* rules, see the CLDR <i>Language Plural Rules</i> page at
* http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html
* </ul>
* </p>
* <h4>Usage of <code>PluralFormat</code></h4>
* <p>
* <p>Note: Typically, plural formatting is done via <code>MessageFormat</code>
* with a <code>plural</code> argument type,
* rather than using a stand-alone <code>PluralFormat</code>.
* </p><p>
* This discussion assumes that you use <code>PluralFormat</code> with
* a predefined set of plural rules. You can create one using one of
* the constructors that takes a <code>ULocale</code> object. To
@ -70,72 +72,46 @@ import com.ibm.icu.util.ULocale;
* <h5>Patterns and Their Interpretation</h5>
* <p>
* The pattern text defines the message output for each plural case of the
* used locale. The pattern is a sequence of
* <code><i>caseKeyword</i>{<i>message</i>}</code> clauses, separated by white
* space characters. Each clause assigns the message <code><i>message</i></code>
* to the plural case identified by <code><i>caseKeyword</i></code>.
* specified locale. Syntax:
* <blockquote><pre>
* pluralStyle = [offsetValue] (selector '{' message '}')+
* offsetValue = "offset:" number
* selector = explicitValue | keyword
* explicitValue = '=' number // adjacent, no white space in between
* keyword = [^[[:Pattern_Syntax:][:Pattern_White_Space:]]]+
* message: see {@link MessageFormat}
* </pre></blockquote>
* Pattern_White_Space between syntax elements is ignored, except
* between the {curly braces} and their sub-message,
* and between the '=' and the number of an explicitValue.
*
* </p><p>
* There are 6 predefined case keywords in ICU - 'zero', 'one', 'two', 'few', 'many' and
* There are 6 predefined case keywords in CLDR/ICU - 'zero', 'one', 'two', 'few', 'many' and
* 'other'. You always have to define a message text for the default plural case
* "<code>other</code>" which is contained in every rule set. If the plural
* rules of the <code>PluralFormat</code> object do not contain a plural case
* identified by <code><i>caseKeyword</i></code>, an
* <code>IllegalArgumentException</code> is thrown.
* "<code>other</code>" which is contained in every rule set.
* If you do not specify a message text for a particular plural case, the
* message text of the plural case "<code>other</code>" gets assigned to this
* plural case. If you specify more than one message for the same plural case,
* an <code>IllegalArgumentException</code> is thrown.
* <br/>
* Spaces between <code><i>caseKeyword</i></code> and
* <code><i>message</i></code> will be ignored; spaces within
* <code><i>message</i></code> will be preserved.
* plural case.
* </p><p>
* The message text for a particular plural case may contain other message
* format patterns. <code>PluralFormat</code> preserves these so that you
* can use the strings produced by <code>PluralFormat</code> with other
* formatters. If you are using <code>PluralFormat</code> inside a
* <code>MessageFormat</code> pattern, <code>MessageFormat</code> will
* automatically evaluate the resulting format pattern.<br/>
* Thus, curly braces (<code>{</code>, <code>}</code>) are <i>only</i> allowed
* in message texts to define a nested format pattern.<br/>
* The pound sign (<code>#</code>) will be interpreted as the number placeholder
* in the message text, if it is not contained in curly braces (to preserve
* <code>NumberFormat</code> patterns). <code>PluralFormat</code> will
* replace each of those pound signs by the number passed to the
* <code>format()</code> method. It will be formatted using a
* When formatting, the input number is first matched against the explicitValue clauses.
* If there is no exact-number match, then a keyword is selected by calling
* the <code>PluralRules</code> with the input number <em>minus the offset</em>.
* (The offset defaults to 0 if it is omitted from the pattern string.)
* If there is no clause with that keyword, then the "other" clauses is returned.
* </p><p>
* An unquoted pound sign (<code>#</code>) in the selected sub-message
* itself (i.e., outside of arguments nested in the sub-message)
* is replaced by the input number minus the offset.
* The number-minus-offset value is formatted using a
* <code>NumberFormat</code> for the <code>PluralFormat</code>'s locale. If you
* need special number formatting, you have to explicitly specify a
* <code>NumberFormat</code> for the <code>PluralFormat</code> to use.
* </p>
* Example
* <pre>
* MessageFormat msgFmt = new MessageFormat("{0, plural, " +
* "one{{0, number, C''''est #,##0.0# fichier}} " +
* "other {Ce sont # fichiers}} dans la liste.",
* new ULocale("fr"));
* Object args[] = {new Long(0)};
* System.out.println(msgFmt.format(args));
* args = {new Long(3)};
* System.out.println(msgFmt.format(args));
* </pre>
* Produces the output:<br />
* <code>C'est 0,0 fichier dans la liste.</code><br />
* <code>Ce sont 3 fichiers dans la liste."</code>
* <p>
* <strong>Note:</strong><br />
* Currently <code>PluralFormat</code>
* does not make use of quotes like <code>MessageFormat</code>.
* If you use plural format strings with <code>MessageFormat</code> and want
* to use a quote sign "<code>'</code>", you have to write "<code>''</code>".
* <code>MessageFormat</code> unquotes this pattern and passes the unquoted
* pattern to <code>PluralFormat</code>. It's a bit trickier if you use
* nested formats that do quoting. In the example above, we wanted to insert
* "<code>'</code>" in the number format pattern. Since
* <code>NumberFormat</code> supports quotes, we had to insert
* "<code>''</code>". But since <code>MessageFormat</code> unquotes the
* pattern before it gets passed to <code>PluralFormat</code>, we have to
* double these quotes, i.e. write "<code>''''</code>".
* need special number formatting, you have to use a <code>MessageFormat</code>
* and explicitly specify a <code>NumberFormat</code> argument.
* <strong>Note:</strong> That argument is formatting without subtracting the offset!
* If you need a custom format and have a non-zero offset, then you need to pass the
* number-minus-offset value as a separate parameter.
* </p>
* For a usage example, see the {@link MessageFormat} class documentation.
*
* <h4>Defining Custom Plural Rules</h4>
* <p>If you need to use <code>PluralFormat</code> with custom rules, you can
* create a <code>PluralRules</code> object and pass it to
@ -153,35 +129,51 @@ import com.ibm.icu.util.ULocale;
public class PluralFormat extends UFormat {
private static final long serialVersionUID = 1L;
/*
/**
* The locale used for standard number formatting and getting the predefined
* plural rules (if they were not defined explicitely).
* @serial
*/
private ULocale ulocale = null;
/*
/**
* The plural rules used for plural selection.
* @serial
*/
private PluralRules pluralRules = null;
/*
/**
* The applied pattern string.
* @serial
*/
private String pattern = null;
/*
/**
* The MessagePattern which contains the parsed structure of the pattern string.
*/
transient private MessagePattern msgPattern;
/**
* Obsolete with use of MessagePattern since ICU 4.8. Used to be:
* The format messages for each plural case. It is a mapping:
* <code>String</code>(plural case keyword) --&gt; <code>String</code>
* (message for this plural case).
* @serial
*/
private Map<String, String> parsedValues = null;
/*
/**
* This <code>NumberFormat</code> is used for the standard formatting of
* the number inserted into the message.
* @serial
*/
private NumberFormat numberFormat = null;
/**
* The offset to subtract before invoking plural rules.
*/
transient private double offset = 0;
/**
* Creates a new <code>PluralFormat</code> for the default locale.
* This locale will be used to get the set of plural rules and for standard
@ -306,106 +298,40 @@ public class PluralFormat extends UFormat {
ulocale = locale;
pluralRules = (rules == null) ? PluralRules.forLocale(ulocale)
: rules;
parsedValues = null;
pattern = null;
resetPattern();
numberFormat = NumberFormat.getInstance(ulocale);
}
private void resetPattern() {
pattern = null;
if(msgPattern != null) {
msgPattern.clear();
}
offset = 0;
}
/**
* Sets the pattern used by this plural format.
* The method parses the pattern and creates a map of format strings
* for the plural rules.
* Patterns and their interpretation are specified in the class description.
*
* @param pttrn the pattern for this plural format.
* @param pattern the pattern for this plural format.
* @throws IllegalArgumentException if the pattern is invalid.
* @stable ICU 3.8
*/
public void applyPattern(String pttrn) {
pttrn = pttrn.trim();
this.pattern = pttrn;
int braceStack = 0;
Set<String> ruleNames = pluralRules.getKeywords();
parsedValues = new HashMap<String, String>();
// Format string has to include keywords.
// states:
// 0: Reading keyword.
// 1: Reading value for preceding keyword.
int state = 0;
StringBuilder token = new StringBuilder();
String currentKeyword = null;
boolean readSpaceAfterKeyword = false;
for (int i = 0; i < pttrn.length(); ++i) {
char ch = pttrn.charAt(i);
switch (state) {
case 0: // Reading value.
if (token.length() == 0) {
readSpaceAfterKeyword = false;
}
if (UCharacterProperty.isRuleWhiteSpace(ch)) {
if (token.length() > 0) {
readSpaceAfterKeyword = true;
}
// Skip leading and trailing whitespaces.
break;
}
if (ch == '{') { // End of keyword definition reached.
currentKeyword = token.toString().toLowerCase(
Locale.ENGLISH);
if (!ruleNames.contains(currentKeyword)) {
parsingFailure("Malformed formatting expression. "
+ "Unknown keyword \"" + currentKeyword
+ "\" at position " + i + ".");
}
if (parsedValues.get(currentKeyword) != null) {
parsingFailure("Malformed formatting expression. "
+ "Text for case \"" + currentKeyword
+ "\" at position " + i + " already defined!");
}
token.delete(0, token.length());
braceStack++;
state = 1;
break;
}
if (readSpaceAfterKeyword) {
parsingFailure("Malformed formatting expression. " +
"Invalid keyword definition. Character \"" + ch +
"\" at position " + i + " not expected!");
}
token.append(ch);
break;
case 1: // Reading value.
switch (ch) {
case '{':
braceStack++;
token.append(ch);
break;
case '}':
braceStack--;
if (braceStack == 0) { // End of value reached.
parsedValues.put(currentKeyword, token.toString());
token.delete(0, token.length());
state = 0;
} else if (braceStack < 0) {
parsingFailure("Malformed formatting expression. "
+ "Braces do not match.");
} else { // braceStack > 0
token.append(ch);
}
break;
default:
token.append(ch);
}
break;
} // switch state
} // for loop.
if (braceStack != 0) {
parsingFailure(
"Malformed formatting expression. Braces do not match.");
public void applyPattern(String pattern) {
this.pattern = pattern;
if (msgPattern == null) {
msgPattern = new MessagePattern();
}
try {
msgPattern.parsePluralStyle(pattern);
offset = msgPattern.getPluralOffset(0);
} catch(RuntimeException e) {
resetPattern();
throw e;
}
checkSufficientDefinition();
}
/**
@ -418,6 +344,129 @@ public class PluralFormat extends UFormat {
return pattern;
}
/**
* Finds the PluralFormat sub-message for the given number, or the "other" sub-message.
* @param pattern A MessagePattern.
* @param partIndex the index of the first PluralFormat argument style part.
* @param selector the PluralSelector for mapping the number (minus offset) to a keyword.
* @param number a number to be matched to one of the PluralFormat argument's explicit values,
* or mapped via the PluralSelector.
* @return the sub-message start part index.
*/
/*package*/ static int findSubMessage(
MessagePattern pattern, int partIndex,
PluralSelector selector, double number) {
int count=pattern.countParts();
double offset;
MessagePattern.Part part=pattern.getPart(partIndex);
if(part.getType().hasNumericValue()) {
offset=pattern.getNumericValue(part);
++partIndex;
} else {
offset=0;
}
// The keyword is null until we need to match against non-explicit, not-"other" value.
// Then we get the keyword from the selector.
// (In other words, we never call the selector if we match against an explicit value,
// or if the only non-explicit keyword is "other".)
String keyword=null;
// When we find a match, we set msgStart>0 and also set this boolean to true
// to avoid matching the keyword again (duplicates are allowed)
// while we continue to look for an explicit-value match.
boolean haveKeywordMatch=false;
// msgStart is 0 until we find any appropriate sub-message.
// We remember the first "other" sub-message if we have not seen any
// appropriate sub-message before.
// We remember the first matching-keyword sub-message if we have not seen
// one of those before.
// (The parser allows [does not check for] duplicate keywords.
// We just have to make sure to take the first one.)
// We avoid matching the keyword twice by also setting haveKeywordMatch=true
// at the first keyword match.
// We keep going until we find an explicit-value match or reach the end of the plural style.
int msgStart=0;
// Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples
// until ARG_LIMIT or end of plural-only pattern.
do {
part=pattern.getPart(partIndex++);
MessagePattern.Part.Type type=part.getType();
if(type==MessagePattern.Part.Type.ARG_LIMIT) {
break;
}
assert type==MessagePattern.Part.Type.ARG_SELECTOR;
// part is an ARG_SELECTOR followed by an optional explicit value, and then a message
if(pattern.getPartType(partIndex).hasNumericValue()) {
// explicit value like "=2"
part=pattern.getPart(partIndex++);
if(number==pattern.getNumericValue(part)) {
// matches explicit value
return partIndex;
}
} else if(!haveKeywordMatch) {
// plural keyword like "few" or "other"
// Compare "other" first and call the selector if this is not "other".
if(pattern.partSubstringMatches(part, "other")) {
if(msgStart==0) {
msgStart=partIndex;
if(keyword!=null && keyword.equals("other")) {
// This is the first "other" sub-message,
// and the selected keyword is also "other".
// Do not match "other" again.
haveKeywordMatch=true;
}
}
} else {
if(keyword==null) {
keyword=selector.select(number-offset);
if(msgStart!=0 && keyword.equals("other")) {
// We have already seen an "other" sub-message.
// Do not match "other" again.
haveKeywordMatch=true;
continue;
}
}
if(pattern.partSubstringMatches(part, keyword)) {
// keyword matches
msgStart=partIndex;
// Do not match this keyword again.
haveKeywordMatch=true;
}
}
}
partIndex=pattern.getLimitPartIndex(partIndex);
} while(++partIndex<count);
return msgStart;
}
/**
* Interface for selecting PluralFormat keywords for numbers.
* The PluralRules class was intended to implement this interface,
* but there is no public API that uses a PluralSelector,
* only MessageFormat and PluralFormat have PluralSelector implementations.
* Therefore, PluralRules is not marked to implement this non-public interface,
* to avoid confusing users.
* @internal
*/
/*package*/ interface PluralSelector {
/**
* Given a number, returns the appropriate PluralFormat keyword.
*
* @param number The number to be plural-formatted.
* @return The selected PluralFormat keyword.
*/
public String select(double number);
}
// See PluralSelector:
// We could avoid this adapter class if we made PluralSelector public
// (or at least publicly visible) and had PluralRules implement PluralSelector.
private final class PluralSelectorAdapter implements PluralSelector {
public String select(double number) {
return pluralRules.select(number);
}
}
transient private PluralSelectorAdapter pluralRulesWrapper = new PluralSelectorAdapter();
/**
* Formats a plural message for a given number.
*
@ -430,20 +479,50 @@ public class PluralFormat extends UFormat {
*/
public final String format(double number) {
// If no pattern was applied, return the formatted number.
if (parsedValues == null) {
if (msgPattern == null || msgPattern.countParts() == 0) {
return numberFormat.format(number);
}
// Get appropriate format pattern.
String selectedRule = pluralRules.select(number);
String selectedPattern = parsedValues.get(selectedRule);
if (selectedPattern == null) { // Fallback to others.
selectedPattern = parsedValues.get(PluralRules.KEYWORD_OTHER);
// Get the appropriate sub-message.
int partIndex = findSubMessage(msgPattern, 0, pluralRulesWrapper, number);
// Replace syntactic # signs in the top level of this sub-message
// (not in nested arguments) with the formatted number-offset.
number -= offset;
StringBuilder result = null;
int prevIndex = msgPattern.getPart(partIndex).getLimit();
for (;;) {
MessagePattern.Part part = msgPattern.getPart(++partIndex);
MessagePattern.Part.Type type = part.getType();
int index = part.getIndex();
if (type == MessagePattern.Part.Type.MSG_LIMIT) {
if (result == null) {
return pattern.substring(prevIndex, index);
} else {
return result.append(pattern, prevIndex, index).toString();
}
} else if (type == MessagePattern.Part.Type.REPLACE_NUMBER ||
// JDK compatibility mode: Remove SKIP_SYNTAX.
(type == MessagePattern.Part.Type.SKIP_SYNTAX && msgPattern.jdkAposMode())) {
if (result == null) {
result = new StringBuilder();
}
result.append(pattern, prevIndex, index);
if (type == MessagePattern.Part.Type.REPLACE_NUMBER) {
result.append(numberFormat.format(number));
}
prevIndex = part.getLimit();
} else if (type == MessagePattern.Part.Type.ARG_START) {
if (result == null) {
result = new StringBuilder();
}
result.append(pattern, prevIndex, index);
prevIndex = index;
partIndex = msgPattern.getLimitPartIndex(partIndex);
index = msgPattern.getPart(partIndex).getLimit();
MessagePattern.appendReducedApostrophes(pattern, prevIndex, index, result);
prevIndex = index;
}
}
// Get formatted number and insert it into String.
// Will replace all '#' which are not inside curly braces by the
// formatted number.
return insertFormattedNumber(number, selectedPattern);
}
/**
@ -469,8 +548,7 @@ public class PluralFormat extends UFormat {
toAppendTo.append(format(((Number) number).doubleValue()));
return toAppendTo;
}
throw new IllegalArgumentException("'" + number +
"' is not a Number");
throw new IllegalArgumentException("'" + number + "' is not a Number");
}
/**
@ -531,83 +609,24 @@ public class PluralFormat extends UFormat {
numberFormat = format;
}
/*
* Checks if the applied pattern provided enough information,
* i.e., if the attribute <code>parsedValues</code> stores enough
* information for plural formatting.
* Will be called at the end of pattern parsing.
* @throws IllegalArgumentException if there's not sufficient information
* provided.
*/
private void checkSufficientDefinition() {
// Check that at least the default rule is defined.
if (parsedValues.get(PluralRules.KEYWORD_OTHER) == null) {
parsingFailure("Malformed formatting expression.\n"
+ "Value for case \"" + PluralRules.KEYWORD_OTHER
+ "\" was not defined.");
}
}
/*
* Helper method that resets the <code>PluralFormat</code> object and throws
* an <code>IllegalArgumentException</code> with a given error text.
* @param errorText the error text of the exception message.
* @throws IllegalArgumentException will always be thrown by this method.
*/
private void parsingFailure(String errorText) {
// Set PluralFormat to a valid state.
init(null, ULocale.getDefault());
throw new IllegalArgumentException(errorText);
}
/*
* Helper method that is called during formatting.
* It replaces the character '#' by the number used for plural selection in
* a message text. Only '#' are replaced, that are not written inside curly
* braces. This allows the use of nested number formats.
* The number will be formatted using the attribute
* <code>numberformat</code>.
* @param number the number used for plural selection.
* @param message is the text in which '#' will be replaced.
* @return the text with inserted numbers.
*/
private String insertFormattedNumber(double number, String message) {
if (message == null) {
return "";
}
String formattedNumber = numberFormat.format(number);
StringBuilder result = new StringBuilder();
int braceStack = 0;
int startIndex = 0;
for (int i = 0; i < message.length(); ++i) {
switch (message.charAt(i)) {
case '{':
++braceStack;
break;
case '}':
--braceStack;
break;
case '#':
if (braceStack == 0) {
result.append(message.substring(startIndex,i));
startIndex = i + 1;
result.append(formattedNumber);
}
break;
}
}
if (startIndex < message.length()) {
result.append(message.substring(startIndex, message.length()));
}
return result.toString();
}
/**
* {@inheritDoc}
* @stable ICU 3.8
*/
@Override
public boolean equals(Object rhs) {
return rhs instanceof PluralFormat && equals((PluralFormat) rhs);
if(this == rhs) {
return true;
}
if(rhs == null || getClass() != rhs.getClass()) {
return false;
}
PluralFormat pf = (PluralFormat)rhs;
return
Utility.objectEquals(ulocale, pf.ulocale) &&
Utility.objectEquals(pluralRules, pf.pluralRules) &&
Utility.objectEquals(msgPattern, pf.msgPattern) &&
Utility.objectEquals(numberFormat, pf.numberFormat);
}
/**
@ -617,31 +636,40 @@ public class PluralFormat extends UFormat {
* @stable ICU 3.8
*/
public boolean equals(PluralFormat rhs) {
return pluralRules.equals(rhs.pluralRules) &&
parsedValues.equals(rhs.parsedValues) &&
numberFormat.equals(rhs.numberFormat);
return equals((Object)rhs);
}
/**
* {@inheritDoc}
* @stable ICU 3.8
*/
@Override
public int hashCode() {
return pluralRules.hashCode() ^ parsedValues.hashCode();
}
/**
* For debugging purposes only
* @return a text representation of the format data.
* {@inheritDoc}
* @stable ICU 3.8
*/
@Override
public String toString() {
StringBuilder buf = new StringBuilder();
buf.append("locale=" + ulocale);
buf.append(", rules='" + pluralRules + "'");
buf.append(", pattern='" + pattern + "'");
buf.append(", parsedValues='" + parsedValues + "'");
buf.append(", format='" + numberFormat + "'");
return buf.toString();
}
private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
in.defaultReadObject();
pluralRulesWrapper = new PluralSelectorAdapter();
// Ignore the parsedValues from an earlier class version (before ICU 4.8)
// and rebuild the msgPattern.
parsedValues = null;
if (pattern != null) {
applyPattern(pattern);
}
}
}

View File

@ -20,6 +20,7 @@ import java.util.Locale;
import java.util.Map;
import java.util.Set;
import com.ibm.icu.impl.PatternProps;
import com.ibm.icu.impl.PluralRulesLoader;
import com.ibm.icu.impl.Utility;
import com.ibm.icu.util.ULocale;
@ -61,7 +62,7 @@ import com.ibm.icu.util.ULocale;
* Syntax:<pre>
* rules = rule (';' rule)*
* rule = keyword ':' condition
* keyword = <identifier>
* keyword = &lt;identifier&gt;
* condition = and_condition ('or' and_condition)*
* and_condition = relation ('and' relation)*
* relation = is_relation | in_relation | within_relation | 'n' <EOL>
@ -75,6 +76,9 @@ import com.ibm.icu.util.ULocale;
* range = value'..'value
* </pre></p>
* <p>
* An "identifier" is a sequence of characters that do not have the
* Unicode Pattern_Syntax or Pattern_White_Space properties.
* <p>
* The difference between 'in' and 'within' is that 'in' only includes
* integers in the specified range, while 'within' includes all values.
* Using 'within' with a range_list consisting entirely of values
@ -139,19 +143,6 @@ public class PluralRules implements Serializable {
*/
public static final double NO_UNIQUE_VALUE = -0.00123456777;
/*
* The set of all characters a valid keyword can start with.
*/
private static final UnicodeSet START_CHARS =
new UnicodeSet("[[:ID_Start:][_]]");
/*
* The set of all characters a valid keyword can contain after
* the first character.
*/
private static final UnicodeSet CONT_CHARS =
new UnicodeSet("[:ID_Continue:]");
/*
* The default constraint that is always satisfied.
*/
@ -827,17 +818,9 @@ public class PluralRules implements Serializable {
* @param token the token to be checked
* @return true if the token is a valid keyword.
*/
private static boolean isValidKeyword(String token) {
if (token.length() > 0 && START_CHARS.contains(token.charAt(0))) {
for (int i = 1; i < token.length(); ++i) {
if (!CONT_CHARS.contains(token.charAt(i))) {
return false;
}
}
return true;
}
return false;
}
private static boolean isValidKeyword(String token) {
return PatternProps.isIdentifier(token);
}
/*
* Creates a new <code>PluralRules</code> object. Immutable.

View File

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 2004-2010, International Business Machines Corporation and *
* Copyright (C) 2004-2011, International Business Machines Corporation and *
* others. All Rights Reserved. *
* Copyright (C) 2009 , Yahoo! Inc. *
*******************************************************************************
@ -12,8 +12,8 @@ import java.io.ObjectInputStream;
import java.text.FieldPosition;
import java.text.Format;
import java.text.ParsePosition;
import java.util.HashMap;
import java.util.Map;
import com.ibm.icu.impl.PatternProps;
/**
* <p><code>SelectFormat</code> supports the creation of internationalized
@ -25,6 +25,10 @@ import java.util.Map;
*
* <h4>Using <code>SelectFormat</code> for Gender Agreement</h4>
*
* <p>Note: Typically, select formatting is done via <code>MessageFormat</code>
* with a <code>select</code> argument type,
* rather than using a stand-alone <code>SelectFormat</code>.</p>
*
* <p>The main use case for the select format is gender based inflection.
* When names or nouns are inserted into sentences, their gender can affect pronouns,
* verb forms, articles, and adjectives. Special care needs to be
@ -58,6 +62,9 @@ import java.util.Map;
* but similar in grammatical use.
* Some African languages have around 20 noun classes.</p>
*
* <p><b>Note:</b>For the gender of a <i>person</i> in a given sentence,
* we usually need to distinguish only between female, male and other/unknown.</p>
*
* <p>To enable localizers to create sentence patterns that take their
* language's gender dependencies into consideration, software has to provide
* information about the gender associated with a noun or name to
@ -66,8 +73,8 @@ import java.util.Map;
*
* <ul>
* <li>For people, natural gender information should be maintained for each person.
* The keywords "male", "female", "mixed" (for groups of people)
* and "unknown" are used.
* Keywords like "male", "female", "mixed" (for groups of people)
* and "unknown" could be used.
*
* <li>For nouns, grammatical gender information should be maintained for
* each noun and per language, e.g., in resource bundles.
@ -85,6 +92,11 @@ import java.util.Map;
*
* <pre>{0} went to {2}.</pre>
*
* <p><b>Note:</b> The entire sentence should be included (and partially repeated)
* inside each phrase. Otherwise translators would have to be trained on how to
* move bits of the sentence in and out of the select argument of a message.
* (The examples below do not follow this recommendation!)</p>
*
* <p>The sentence pattern for French, where the gender of the person affects
* the form of the participle, uses a select format based on argument 1:</p>
*
@ -104,39 +116,24 @@ import java.util.Map;
*
* <h4>Patterns and Their Interpretation</h4>
*
* <p>The <code>SelectFormat</code> pattern text defines the phrase output
* <p>The <code>SelectFormat</code> pattern string defines the phrase output
* for each user-defined keyword.
* The pattern is a sequence of <code><i>keyword</i>{<i>phrase</i>}</code>
* clauses, separated by white space characters.
* Each clause assigns the phrase <code><i>phrase</i></code>
* to the user-defined <code><i>keyword</i></code>.</p>
* The pattern is a sequence of (keyword, message) pairs.
* A keyword is a "pattern identifier": [^[[:Pattern_Syntax:][:Pattern_White_Space:]]]+</p>
*
* <p>Keywords must match the pattern [a-zA-Z][a-zA-Z0-9_-]*; keywords
* that don't match this pattern result in the error code
* <code>U_ILLEGAL_CHARACTER</code>.
* You always have to define a phrase for the default keyword
* <p>Each message is a MessageFormat pattern string enclosed in {curly braces}.</p>
*
* <p>You always have to define a phrase for the default keyword
* <code>other</code>; this phrase is returned when the keyword
* provided to
* the <code>format</code> method matches no other keyword.
* If a pattern does not provide a phrase for <code>other</code>, the method
* it's provided to returns the error <code>U_DEFAULT_KEYWORD_MISSING</code>.
* If a pattern provides more than one phrase for the same keyword, the
* error <code>U_DUPLICATE_KEYWORD</code> is returned.
* <br/>
* Spaces between <code><i>keyword</i></code> and
* <code>{<i>phrase</i>}</code> will be ignored; spaces within
* <code>{<i>phrase</i>}</code> will be preserved.</p>
* Pattern_White_Space between keywords and messages is ignored.
* Pattern_White_Space within a message is preserved and output.</p>
*
* <p>The phrase for a particular select case may contain other message
* format patterns. <code>SelectFormat</code> preserves these so that you
* can use the strings produced by <code>SelectFormat</code> with other
* formatters. If you are using <code>SelectFormat</code> inside a
* <code>MessageFormat</code> pattern, <code>MessageFormat</code> will
* automatically evaluate the resulting format pattern.
* Thus, curly braces (<code>{</code>, <code>}</code>) are <i>only</i> allowed
* in phrases to define a nested format pattern.</p>
*
* <pre>Example:
* <p><pre>Example:
* MessageFormat msgFmt = new MessageFormat("{0} est " +
* "{1, select, female {all&#u00E9;e} other {all&#u00E9;}} &#u00E0; Paris.",
* new ULocale("fr"));
@ -160,106 +157,27 @@ public class SelectFormat extends Format{
*/
private String pattern = null;
/*
* The format messages for each select case. It is a mapping:
* <code>String</code>(select case keyword) --&gt; <code>String</code>
* (message for this select case).
*/
transient private Map<String, String> parsedValues = null;
/**
* Common name for the default select form. This name is returned
* for values to which no other form in the rule applies. It
* can additionally be assigned rules of its own.
* @stable ICU 4.4
* The MessagePattern which contains the parsed structure of the pattern string.
*/
private static final String KEYWORD_OTHER = "other";
/*
* The types of character classifications
*/
private enum CharacterClass {
T_START_KEYWORD, T_CONTINUE_KEYWORD, T_LEFT_BRACE,
T_RIGHT_BRACE, T_SPACE, T_OTHER
};
/*
* The different states needed in state machine
* in applyPattern method.
*/
private enum State {
START_STATE, KEYWORD_STATE,
PAST_KEYWORD_STATE, PHRASE_STATE
};
transient private MessagePattern msgPattern;
/**
* Creates a new <code>SelectFormat</code> for a given pattern string.
* @param pattern the pattern for this <code>SelectFormat</code>.
* @stable ICU 4.4
*/
public SelectFormat(String pattern) {
init();
applyPattern(pattern);
}
/*
* Initializes the <code>SelectFormat</code> object.
* Postcondition:<br/>
* <code>parsedValues</code>: is <code>null</code><br/>
* <code>pattern</code>: is <code>null</code><br/>
* Resets the <code>SelectFormat</code> object.
*/
private void init() {
parsedValues = null;
private void reset() {
pattern = null;
}
/**
* Classifies the characters
*/
private boolean checkValidKeyword(String argKeyword) {
int len = argKeyword.length();
if (len < 1) {
return false;
};
if (classifyCharacter(argKeyword.charAt(0)) != CharacterClass.T_START_KEYWORD) {
return false;
};
for (int i = 1; i < len; i++) {
CharacterClass type = classifyCharacter(argKeyword.charAt(i));
if (type != CharacterClass.T_START_KEYWORD &&
type != CharacterClass.T_CONTINUE_KEYWORD) {
return false;
};
};
return true;
}
/**
* Classifies the characters.
*/
private CharacterClass classifyCharacter(char ch) {
if ((ch >= 'A') && (ch <= 'Z')) {
return CharacterClass.T_START_KEYWORD;
}
if ((ch >= 'a') && (ch <= 'z')) {
return CharacterClass.T_START_KEYWORD;
}
if ((ch >= '0') && (ch <= '9')) {
return CharacterClass.T_CONTINUE_KEYWORD;
}
switch (ch) {
case '{':
return CharacterClass.T_LEFT_BRACE;
case '}':
return CharacterClass.T_RIGHT_BRACE;
case ' ':
case '\t':
return CharacterClass.T_SPACE;
case '-':
case '_':
return CharacterClass.T_CONTINUE_KEYWORD;
default :
return CharacterClass.T_OTHER;
if(msgPattern != null) {
msgPattern.clear();
}
}
@ -272,129 +190,16 @@ public class SelectFormat extends Format{
* @stable ICU 4.4
*/
public void applyPattern(String pattern) {
parsedValues = null;
this.pattern = pattern;
//Initialization
StringBuilder keyword = new StringBuilder();
StringBuilder phrase = new StringBuilder();
int braceCount = 0;
parsedValues = new HashMap<String, String>();
//Process the state machine
State state = State.START_STATE;
for (int i = 0; i < pattern.length(); i++ ){
//Get the character and check its type
char ch = pattern.charAt(i);
CharacterClass type = classifyCharacter(ch);
//Process the state machine
switch (state) {
//At the start of pattern
case START_STATE:
switch (type) {
case T_SPACE:
break ;
case T_START_KEYWORD:
state = State.KEYWORD_STATE;
keyword.append(ch);
break ;
//If anything else is encountered, it's a syntax error
default :
parsingFailure("Pattern syntax error.");
}//end of switch(type)
break ;
//Handle the keyword state
case KEYWORD_STATE:
switch (type) {
case T_SPACE:
state = State.PAST_KEYWORD_STATE;
break ;
case T_START_KEYWORD:
case T_CONTINUE_KEYWORD:
keyword.append(ch);
break ;
case T_LEFT_BRACE:
state = State.PHRASE_STATE;
break ;
//If anything else is encountered, it's a syntax error
default :
parsingFailure("Pattern syntax error.");
}//end of switch(type)
break ;
//Handle the pastkeyword state
case PAST_KEYWORD_STATE:
switch (type) {
case T_SPACE:
break ;
case T_LEFT_BRACE:
state = State.PHRASE_STATE;
break ;
//If anything else is encountered, it's a syntax error
default :
parsingFailure("Pattern syntax error.");
}//end of switch(type)
break ;
//Handle the phrase state
case PHRASE_STATE:
switch (type) {
case T_LEFT_BRACE:
braceCount++;
phrase.append(ch);
break ;
case T_RIGHT_BRACE:
//Matching keyword, phrase pair found
if (braceCount == 0){
//Check validity of keyword
if (parsedValues.get(keyword.toString()) != null) {
parsingFailure("Duplicate keyword error.");
}
if (keyword.length() == 0) {
parsingFailure("Pattern syntax error.");
}
//Store the keyword, phrase pair in hashTable
parsedValues.put( keyword.toString(), phrase.toString());
//Reinitialize
keyword.setLength(0);
phrase.setLength(0);
state = State.START_STATE;
}
if (braceCount > 0){
braceCount-- ;
phrase.append(ch);
}
break ;
default :
phrase.append(ch);
}//end of switch(type)
break ;
//Handle the default case of switch(state)
default :
parsingFailure("Pattern syntax error.");
}//end of switch(state)
if (msgPattern == null) {
msgPattern = new MessagePattern();
}
//Check if the state machine is back to START_STATE
if ( state != State.START_STATE){
parsingFailure("Pattern syntax error.");
try {
msgPattern.parseSelectStyle(pattern);
} catch(RuntimeException e) {
reset();
throw e;
}
//Check if "other" keyword is present
if ( !checkSufficientDefinition() ) {
parsingFailure("Pattern syntax error. "
+ "Value for case \"" + KEYWORD_OTHER
+ "\" was not defined. ");
}
return ;
}
/**
@ -407,41 +212,103 @@ public class SelectFormat extends Format{
return pattern;
}
/**
* Finds the SelectFormat sub-message for the given keyword, or the "other" sub-message.
* @param pattern A MessagePattern.
* @param partIndex the index of the first SelectFormat argument style part.
* @param keyword a keyword to be matched to one of the SelectFormat argument's keywords.
* @return the sub-message start part index.
*/
/*package*/ static int findSubMessage(MessagePattern pattern, int partIndex, String keyword) {
int count=pattern.countParts();
int msgStart=0;
// Iterate over (ARG_SELECTOR, message) pairs until ARG_LIMIT or end of select-only pattern.
do {
MessagePattern.Part part=pattern.getPart(partIndex++);
MessagePattern.Part.Type type=part.getType();
if(type==MessagePattern.Part.Type.ARG_LIMIT) {
break;
}
assert type==MessagePattern.Part.Type.ARG_SELECTOR;
// part is an ARG_SELECTOR followed by a message
if(pattern.partSubstringMatches(part, keyword)) {
// keyword matches
return partIndex;
} else if(msgStart==0 && pattern.partSubstringMatches(part, "other")) {
msgStart=partIndex;
}
partIndex=pattern.getLimitPartIndex(partIndex);
} while(++partIndex<count);
return msgStart;
}
/**
* Selects the phrase for the given keyword.
*
* @param keyword a keyword for which the select message should be formatted.
* @param keyword a phrase selection keyword.
* @return the string containing the formatted select message.
* @throws IllegalArgumentException when the given keyword is not available in the select format pattern
* @throws IllegalArgumentException when the given keyword is not a "pattern identifier"
* @stable ICU 4.4
*/
public final String format(String keyword) {
//Check for the validity of the keyword
if( !checkValidKeyword(keyword) ){
if (!PatternProps.isIdentifier(keyword)) {
throw new IllegalArgumentException("Invalid formatting argument.");
}
// If no pattern was applied, throw an exception
if (parsedValues == null) {
if (msgPattern == null || msgPattern.countParts() == 0) {
throw new IllegalStateException("Invalid format error.");
}
// Get appropriate format pattern.
String selectedPattern = parsedValues.get(keyword);
if (selectedPattern == null) { // Fallback to others.
selectedPattern = parsedValues.get(KEYWORD_OTHER);
// Get the appropriate sub-message.
int msgStart = findSubMessage(msgPattern, 0, keyword);
if (!msgPattern.jdkAposMode()) {
int msgLimit = msgPattern.getLimitPartIndex(msgStart);
return msgPattern.getPatternString().substring(msgPattern.getPart(msgStart).getLimit(),
msgPattern.getPatternIndex(msgLimit));
}
// JDK compatibility mode: Remove SKIP_SYNTAX.
StringBuilder result = null;
int prevIndex = msgPattern.getPart(msgStart).getLimit();
for (int i = msgStart;;) {
MessagePattern.Part part = msgPattern.getPart(++i);
MessagePattern.Part.Type type = part.getType();
int index = part.getIndex();
if (type == MessagePattern.Part.Type.MSG_LIMIT) {
if (result == null) {
return pattern.substring(prevIndex, index);
} else {
return result.append(pattern, prevIndex, index).toString();
}
} else if (type == MessagePattern.Part.Type.SKIP_SYNTAX) {
if (result == null) {
result = new StringBuilder();
}
result.append(pattern, prevIndex, index);
prevIndex = part.getLimit();
} else if (type == MessagePattern.Part.Type.ARG_START) {
if (result == null) {
result = new StringBuilder();
}
result.append(pattern, prevIndex, index);
prevIndex = index;
i = msgPattern.getLimitPartIndex(i);
index = msgPattern.getPart(i).getLimit();
MessagePattern.appendReducedApostrophes(pattern, prevIndex, index, result);
prevIndex = index;
}
}
return selectedPattern;
}
/**
* Selects the phrase for the given keyword.
* and appends the formatted message to the given <code>StringBuffer</code>.
* @param keyword a keyword for which the select message should be formatted.
* @param toAppendTo the formatted message will be appended to this
* @param keyword a phrase selection keyword.
* @param toAppendTo the selected phrase will be appended to this
* <code>StringBuffer</code>.
* @param pos will be ignored by this method.
* @throws IllegalArgumentException when the given keyword is not available in the select format pattern
* @throws IllegalArgumentException when the given keyword is not a String
* or not a "pattern identifier"
* @return the string buffer passed in as toAppendTo, with formatted text
* appended.
* @stable ICU 4.4
@ -470,45 +337,27 @@ public class SelectFormat extends Format{
throw new UnsupportedOperationException();
}
/*
* Checks if the applied pattern provided enough information,
* i.e., if the attribute <code>parsedValues</code> stores enough
* information for select formatting.
* Will be called at the end of pattern parsing.
*/
private boolean checkSufficientDefinition() {
// Check that at least the default rule is defined.
return parsedValues.get(KEYWORD_OTHER) != null;
}
/*
* Helper method that resets the <code>SelectFormat</code> object and throws
* an <code>IllegalArgumentException</code> with a given error text.
* @param errorText the error text of the exception message.
* @throws IllegalArgumentException will always be thrown by this method.
*/
private void parsingFailure(String errorText) {
// Set SelectFormat to a valid state.
init();
throw new IllegalArgumentException(errorText);
}
/**
* {@inheritDoc}
* @stable ICU 4.4
*/
@Override
public boolean equals(Object obj) {
if (!(obj instanceof SelectFormat)) {
if(this == obj) {
return true;
}
if(obj == null || getClass() != obj.getClass()) {
return false;
}
SelectFormat sf = (SelectFormat) obj;
return pattern == null ? sf.pattern == null : pattern.equals(sf.pattern);
return msgPattern == null ? sf.msgPattern == null : msgPattern.equals(sf.msgPattern);
}
/**
* {@inheritDoc}
* @stable ICU 4.4
*/
@Override
public int hashCode() {
if (pattern != null) {
return pattern.hashCode();
@ -517,16 +366,12 @@ public class SelectFormat extends Format{
}
/**
* Returns a string representation of the object
* @return a text representation of the format object.
* The result string includes the class name and
* the pattern string returned by <code>toPattern()</code>.
* {@inheritDoc}
* @stable ICU 4.4
*/
@Override
public String toString() {
StringBuilder buf = new StringBuilder();
buf.append("pattern='" + pattern + "'");
return buf.toString();
return "pattern='" + pattern + "'";
}
private void readObject(ObjectInputStream in)

View File

@ -15,6 +15,7 @@ import java.util.TreeSet;
import com.ibm.icu.impl.BMPSet;
import com.ibm.icu.impl.Norm2AllModes;
import com.ibm.icu.impl.PatternProps;
import com.ibm.icu.impl.RuleCharacterIterator;
import com.ibm.icu.impl.SortedSetRelation;
import com.ibm.icu.impl.UBiDiProps;
@ -115,7 +116,7 @@ import com.ibm.icu.util.VersionInfo;
* </blockquote>
*
* Any character may be preceded by a backslash in order to remove any special
* meaning. White space characters, as defined by UCharacterProperty.isRuleWhiteSpace(), are
* meaning. White space characters, as defined by the Unicode Pattern_White_Space property, are
* ignored, unless they are escaped.
*
* <p>Property patterns specify a set of characters having a certain
@ -424,8 +425,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
* Constructs a set from the given pattern. See the class description
* for the syntax of the pattern language.
* @param pattern a string specifying what characters are in the set
* @param ignoreWhitespace if true, ignore characters for which
* UCharacterProperty.isRuleWhiteSpace() returns true
* @param ignoreWhitespace if true, ignore Unicode Pattern_White_Space characters
* @exception java.lang.IllegalArgumentException if the pattern contains
* a syntax error.
* @stable ICU 2.0
@ -548,8 +548,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
* optionally ignoring whitespace.
* See the class description for the syntax of the pattern language.
* @param pattern a string specifying what characters are in the set
* @param ignoreWhitespace if true then characters for which
* UCharacterProperty.isRuleWhiteSpace() returns true are ignored
* @param ignoreWhitespace if true then Unicode Pattern_White_Space characters are ignored
* @exception java.lang.IllegalArgumentException if the pattern
* contains a syntax error.
* @stable ICU 2.0
@ -628,7 +627,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
break;
default:
// Escape whitespace
if (UCharacterProperty.isRuleWhiteSpace(c)) {
if (PatternProps.isWhiteSpace(c)) {
buf.append('\\');
}
break;
@ -3189,30 +3188,27 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
/**
* Remove leading and trailing rule white space and compress
* internal rule white space to a single space character.
*
* @see UCharacterProperty#isRuleWhiteSpace
* Remove leading and trailing Pattern_White_Space and compress
* internal Pattern_White_Space to a single space character.
*/
private static String mungeCharName(String source) {
StringBuffer buf = new StringBuffer();
for (int i=0; i<source.length(); ) {
int ch = UTF16.charAt(source, i);
i += UTF16.getCharCount(ch);
if (UCharacterProperty.isRuleWhiteSpace(ch)) {
if (buf.length() == 0 ||
buf.charAt(buf.length() - 1) == ' ') {
source = PatternProps.trimWhiteSpace(source);
StringBuilder buf = null;
for (int i=0; i<source.length(); ++i) {
char ch = source.charAt(i);
if (PatternProps.isWhiteSpace(ch)) {
if (buf == null) {
buf = new StringBuilder().append(source, 0, i);
} else if (buf.charAt(buf.length() - 1) == ' ') {
continue;
}
ch = ' '; // convert to ' '
}
UTF16.append(buf, ch);
if (buf != null) {
buf.append(ch);
}
}
if (buf.length() != 0 &&
buf.charAt(buf.length() - 1) == ' ') {
buf.setLength(buf.length() - 1);
}
return buf.toString();
return buf == null ? source : buf.toString();
}
//----------------------------------------------------------------
@ -3603,8 +3599,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
/**
* Bitmask for constructor and applyPattern() indicating that
* white space should be ignored. If set, ignore characters for
* which UCharacterProperty.isRuleWhiteSpace() returns true,
* white space should be ignored. If set, ignore Unicode Pattern_White_Space characters,
* unless they are quoted or escaped. This may be ORed together
* with other selectors.
* @stable ICU 3.8

View File

@ -44,6 +44,8 @@ import java.util.Locale;
import java.util.Map;
import com.ibm.icu.text.MessageFormat;
import com.ibm.icu.text.NumberFormat;
import com.ibm.icu.util.ULocale;
public class MessageRegression extends com.ibm.icu.dev.test.TestFmwk {
@ -101,15 +103,19 @@ public class MessageRegression extends com.ibm.icu.dev.test.TestFmwk {
/* @bug 4058973
* MessageFormat.toPattern has weird rounding behavior.
*
* ICU 4.8: This test is commented out because toPattern() has been changed to return
* the original pattern string, rather than reconstituting a new (equivalent) one.
* This trivially eliminates issues with rounding or any other pattern string differences.
*/
public void Test4058973() {
/*public void Test4058973() {
MessageFormat fmt = new MessageFormat("{0,choice,0#no files|1#one file|1< {0,number,integer} files}");
String pat = fmt.toPattern();
if (!pat.equals("{0,choice,0.0#no files|1.0#one file|1.0< {0,number,integer} files}")) {
errln("MessageFormat.toPattern failed");
}
}
}*/
/* @bug 4031438
* More robust message formats.
*/
@ -143,11 +149,11 @@ public class MessageRegression extends com.ibm.icu.dev.test.TestFmwk {
logln("Apply with pattern : " + pattern2);
messageFormatter.applyPattern(pattern2);
tempBuffer = messageFormatter.format(paramArray);
if (!tempBuffer.equals("Double ' Quotes 7 test and quoted {1} test plus other {2} stuff."))
if (!tempBuffer.equals("Double ' Quotes 7 test and quoted {1} test plus 'other {2} stuff'."))
errln("quote format test (w/ params) failed.");
logln("Formatted with params : " + tempBuffer);
tempBuffer = messageFormatter.format(null);
if (!tempBuffer.equals("Double ' Quotes {0} test and quoted {1} test plus other {2} stuff."))
if (!tempBuffer.equals("Double ' Quotes {0} test and quoted {1} test plus 'other {2} stuff'."))
errln("quote format test (w/ null) failed.");
logln("Formatted with null : " + tempBuffer);
logln("toPattern : " + messageFormatter.toPattern());
@ -285,12 +291,12 @@ public class MessageRegression extends com.ibm.icu.dev.test.TestFmwk {
{
String originalPattern = "initial pattern";
MessageFormat mf = new MessageFormat(originalPattern);
String illegalPattern = "ab { '}' de";
try {
String illegalPattern = "ab { '}' de";
mf.applyPattern(illegalPattern);
errln("illegal pattern: \"" + illegalPattern + "\"");
} catch (IllegalArgumentException foo) {
if (!originalPattern.equals(mf.toPattern()))
if (illegalPattern.equals(mf.toPattern()))
errln("pattern after: \"" + mf.toPattern() + "\"");
}
}
@ -368,7 +374,7 @@ public class MessageRegression extends com.ibm.icu.dev.test.TestFmwk {
mf.applyPattern(illegalPattern);
errln("Should have thrown IllegalArgumentException for pattern : " + illegalPattern);
} catch (IllegalArgumentException e) {
if (!originalPattern.equals(mf.toPattern()))
if (illegalPattern.equals(mf.toPattern()))
errln("pattern after: \"" + mf.toPattern() + "\"");
}
}
@ -596,7 +602,7 @@ public class MessageRegression extends com.ibm.icu.dev.test.TestFmwk {
*/
public void Test4169959() {
// This works
logln(MessageFormat.format("This will {0}", "work"));
logln(MessageFormat.format("This will {0}", new Object[]{"work"}));
// This fails
logln(MessageFormat.format("This will {0}", new Object[]{ null }));
@ -670,11 +676,11 @@ public class MessageRegression extends com.ibm.icu.dev.test.TestFmwk {
paramsMap.clear();
paramsMap.put("ARG_ZERO", new Integer(7));
tempBuffer = messageFormatter.format(paramsMap);
if (!tempBuffer.equals("Double ' Quotes 7 test and quoted {ARG_ONE} test plus other {ARG_TWO} stuff."))
if (!tempBuffer.equals("Double ' Quotes 7 test and quoted {ARG_ONE} test plus 'other {ARG_TWO} stuff'."))
errln("quote format test (w/ params) failed.");
logln("Formatted with params : " + tempBuffer);
tempBuffer = messageFormatter.format(null);
if (!tempBuffer.equals("Double ' Quotes {ARG_ZERO} test and quoted {ARG_ONE} test plus other {ARG_TWO} stuff."))
if (!tempBuffer.equals("Double ' Quotes {ARG_ZERO} test and quoted {ARG_ONE} test plus 'other {ARG_TWO} stuff'."))
errln("quote format test (w/ null) failed.");
logln("Formatted with null : " + tempBuffer);
logln("toPattern : " + messageFormatter.toPattern());
@ -833,5 +839,42 @@ public class MessageRegression extends com.ibm.icu.dev.test.TestFmwk {
}
}
}
}
private MessageFormat serializeAndDeserialize(MessageFormat original) {
try {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ObjectOutputStream ostream = new ObjectOutputStream(baos);
ostream.writeObject(original);
ostream.flush();
byte bytes[] = baos.toByteArray();
ObjectInputStream istream = new ObjectInputStream(new ByteArrayInputStream(bytes));
MessageFormat reconstituted = (MessageFormat)istream.readObject();
return reconstituted;
} catch(IOException e) {
throw new RuntimeException(e);
} catch (ClassNotFoundException e) {
throw new RuntimeException(e);
}
}
public void TestSerialization() {
MessageFormat format1 = null;
MessageFormat format2 = null;
format1 = new MessageFormat("", ULocale.GERMAN);
format2 = serializeAndDeserialize(format1);
assertEquals("MessageFormats (empty pattern) before and after serialization are not equal", format1, format2);
format1.applyPattern("ab{1}cd{0,number}ef{3,date}gh");
format1.setFormat(2, null);
format1.setFormatByArgumentIndex(1, NumberFormat.getInstance(ULocale.ENGLISH));
format2 = serializeAndDeserialize(format1);
assertEquals("MessageFormats (with custom formats) before and after serialization are not equal", format1, format2);
assertEquals(
"MessageFormat (with custom formats) does not "+
"format correctly after serialization",
"ab3.3cd4,4ef***gh",
format2.format(new Object[] { 4.4, 3.3, "+++", "***" }));
}
}

View File

@ -92,19 +92,15 @@ public class PluralFormatUnitTest extends TestFmwk {
plfOddAndEven.format(i));
}
// Check that double definition results in an exception.
try {
PluralFormat plFmt = new PluralFormat(oddAndEven);
plFmt.applyPattern("odd{foo} odd{bar} other{foobar}");
errln("Double definition of a plural case message should " +
"provoke an exception but did not.");
}catch (IllegalArgumentException e){}
try {
PluralFormat plFmt = new PluralFormat(oddAndEven);
plFmt.applyPattern("odd{foo} other{bar} other{foobar}");
errln("Double definition of a plural case message should " +
"provoke an exception but did not.");
}catch (IllegalArgumentException e){}
// ICU 4.8 does not check for duplicate keywords any more.
PluralFormat pf = new PluralFormat(ULocale.ENGLISH, oddAndEven,
"odd{foo} odd{bar} other{foobar}");
assertEquals("should use first occurrence of the 'odd' keyword", "foo", pf.format(1));
pf.applyPattern("odd{foo} other{bar} other{foobar}");
assertEquals("should use first occurrence of the 'other' keyword", "bar", pf.format(2));
// This sees the first "other" before calling the PluralSelector which then selects "other".
pf.applyPattern("other{foo} odd{bar} other{foobar}");
assertEquals("should use first occurrence of the 'other' keyword", "foo", pf.format(2));
}
// omit other keyword.
try {
@ -114,20 +110,18 @@ public class PluralFormatUnitTest extends TestFmwk {
"exception but did not.");
}catch (IllegalArgumentException e){}
// Test unknown keyword.
try {
PluralFormat plFmt = new PluralFormat(oddAndEven);
plFmt.applyPattern("otto{foo} other{bar}");
errln("Defining a message for an unknown keyword should result in" +
"an exception but did not.");
}catch (IllegalArgumentException e){}
// ICU 4.8 does not check for unknown keywords any more.
{
PluralFormat pf = new PluralFormat(ULocale.ENGLISH, oddAndEven, "otto{foo} other{bar}");
assertEquals("should ignore unknown keywords", "bar", pf.format(1));
}
// Test invalid keyword.
try {
PluralFormat plFmt = new PluralFormat(oddAndEven);
plFmt.applyPattern("1odd{foo} other{bar}");
errln("Defining a message for an invalid keyword should result in" +
"an exception but did not.");
plFmt.applyPattern("*odd{foo} other{bar}");
errln("Defining a message for an invalid keyword should result in " +
"an exception but did not.");
}catch (IllegalArgumentException e){}
// Test invalid syntax
@ -170,12 +164,12 @@ public class PluralFormatUnitTest extends TestFmwk {
// Check that a pound sign in curly braces is preserved.
{
PluralFormat plFmt = new PluralFormat(oddAndEven);
plFmt.applyPattern("odd{The number {#} is odd.}" +
"other{The number {#} is even.}");
plFmt.applyPattern("odd{The number {1,number,#} is odd.}" +
"other{The number {2,number,#} is even.}");
for (int i = 1; i < 3; ++i) {
assertEquals("format did not preserve # inside curly braces.",
((i % 2 == 1) ? "The number {#} is odd."
: "The number {#} is even."),
((i % 2 == 1) ? "The number {1,number,#} is odd."
: "The number {2,number,#} is even."),
plFmt.format(i));
}
@ -223,14 +217,9 @@ public class PluralFormatUnitTest extends TestFmwk {
plFmt.format(5));
// Check that rules got updated.
try {
plFmt.applyPattern("odd__{odd} other{even}");
errln("SetLocale should reset rules but did not.");
} catch (IllegalArgumentException e) {
if (e.getMessage().indexOf("Unknown keyword") < 0){
errln("Wrong exception thrown");
}
}
plFmt.applyPattern("odd__{odd} other{even}");
assertEquals("SetLocale should reset rules but did not.", "even", plFmt.format(1));
plFmt.applyPattern("one{one} other{not one}");
for (int i = 0; i < 20; ++i) {
assertEquals("Wrong ruleset loaded by setLocale()",
@ -261,12 +250,11 @@ public class PluralFormatUnitTest extends TestFmwk {
Object[] args = { "acme", null };
{
PluralFormat pf = new PluralFormat(" one {one ''widget} other {# widgets} ");
String pat = pf.toPattern();
logln("pf pattern: '" + pat + "'");
assertEquals("no leading spaces", "o", pat.substring(0, 1));
assertEquals("no trailing spaces", "}", pat.substring(pat.length() - 1));
// ICU 4.8 PluralFormat does not trim() its pattern any more.
// None of the other *Format classes do.
String pat = " one {one ''widget} other {# widgets} ";
PluralFormat pf = new PluralFormat(pat);
assertEquals("should not trim() the pattern", pat, pf.toPattern());
}
MessageFormat pfmt = new MessageFormat("The disk ''{0}'' contains {1, plural, one {one ''''{1, number, #.0}'''' widget} other {# widgets}}.");
@ -275,10 +263,60 @@ public class PluralFormatUnitTest extends TestFmwk {
args[1] = new Integer(i);
logln(pfmt.format(args));
}
/* ICU 4.8 returns null instead of a choice/plural/select Format object
* (because it does not create an object for any "complex" argument).
PluralFormat pf = (PluralFormat)pfmt.getFormatsByArgumentIndex()[1];
logln(pf.toPattern());
*/
logln(pfmt.toPattern());
MessageFormat pfmt2 = new MessageFormat(pfmt.toPattern());
assertEquals("message formats are equal", pfmt, pfmt2);
}
public void TestExtendedPluralFormat() {
String[] targets = {
"There are no widgets.",
"There is one widget.",
"There is a bling widget and one other widget.",
"There is a bling widget and 2 other widgets.",
"There is a bling widget and 3 other widgets.",
"Widgets, five (5-1=4) there be.",
"There is a bling widget and 5 other widgets.",
"There is a bling widget and 6 other widgets.",
};
PluralFormat pf = new PluralFormat(
ULocale.ENGLISH,
"offset:1.0 "
+ "=0 {There are no widgets.} "
+ "=1.0 {There is one widget.} "
+ "=5 {Widgets, five (5-1=#) there be.} "
+ "one {There is a bling widget and one other widget.} "
+ "other {There is a bling widget and # other widgets.}");
for (int i = 0; i < 7; ++i) {
String result = pf.format(i);
assertEquals("value = " + i, targets[i], result);
}
// Try explicit values after keywords.
pf.applyPattern("other{zz}other{yy}one{xx}one{ww}=1{vv}=1{uu}");
assertEquals("should find first matching *explicit* value", "vv", pf.format(1));
}
public void TestExtendedPluralFormatParsing() {
String[] failures = {
"offset:1..0 =0 {Foo}",
"offset:1.0 {Foo}",
"=0= {Foo}",
"=0 {Foo} =0.0 {Bar}",
" = {Foo}",
};
for (String fmt : failures) {
try {
new PluralFormat(fmt);
fail("expected exception when parsing '" + fmt + "'");
} catch (IllegalArgumentException e) {
// ok
}
}
}
}

View File

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (c) 2004-2010, International Business Machines
* Copyright (c) 2004-2011, International Business Machines
* Corporation and others. All Rights Reserved.
* Copyright (C) 2010 , Yahoo! Inc.
*******************************************************************************
@ -27,10 +27,8 @@ public class SelectFormatUnitTest extends TestFmwk {
*/
public void TestPatternSyntax() {
String checkSyntaxData[] = {
"odd{foo} odd{bar} other{foobar}",
"odd{foo} other{bar} other{foobar}",
"odd{foo}",
"1odd{foo} other{bar}",
"*odd{foo} other{bar}",
"odd{foo},other{bar}",
"od d{foo} other{bar}",
"odd{foo}{foobar}other{foo}",
@ -39,19 +37,6 @@ public class SelectFormatUnitTest extends TestFmwk {
"odd{fo{o1}other{foo2}}"
};
String expectedErrorMsgs[] = {
"Duplicate keyword error.",
"Duplicate keyword error.",
"Pattern syntax error. Value for case \"other\" was not defined. ",
"Pattern syntax error.",
"Pattern syntax error.",
"Pattern syntax error.",
"Pattern syntax error.",
"Pattern syntax error.",
"Pattern syntax error.",
"Pattern syntax error. Value for case \"other\" was not defined. ",
};
//Test SelectFormat pattern syntax
SelectFormat selFmt = new SelectFormat(SIMPLE_PATTERN);
for (int i=0; i<checkSyntaxData.length; ++i) {
@ -60,26 +45,34 @@ public class SelectFormatUnitTest extends TestFmwk {
errln("\nERROR: Unexpected result - SelectFormat Unit Test failed "
+ "to detect syntax error with pattern: "+checkSyntaxData[i]);
} catch (IllegalArgumentException e){
assertEquals("Error:TestPatternSyntax failed with unexpected"
+ " error message for pattern: " + checkSyntaxData[i] ,
expectedErrorMsgs[i], e.getMessage() );
// ok
continue;
}
}
// ICU 4.8 does not check for duplicate keywords any more.
selFmt.applyPattern("odd{foo} odd{bar} other{foobar}");
assertEquals("should use first occurrence of the 'odd' keyword", "foo", selFmt.format("odd"));
selFmt.applyPattern("odd{foo} other{bar} other{foobar}");
assertEquals("should use first occurrence of the 'other' keyword", "bar", selFmt.format("other"));
}
/**
* Unit tests for invalid keywords
*/
public void TestInvalidKeyword() {
//Test formatting with invalid keyword
// Test formatting with invalid keyword:
// one which contains Pattern_Syntax or Pattern_White_Space.
String keywords[] = {
"9Keyword-_", //Starts with a digit
"-Keyword-_", //Starts with a hyphen
"_Keyword-_", //Starts with an underscore
"\\u00E9Keyword-_", //Starts with non-ASCII character
"Key*word-_", //Contains a sepial character not allowed
"*Keyword-_" //Starts with a sepial character not allowed
"9Keyword-_",
"-Keyword-_",
"_Keyword-_",
"\\u00E9Keyword-_",
"Key word",
" Keyword",
"Keyword ",
"Key*word-_",
"*Keyword-_"
};
String expected = "Invalid formatting argument.";

View File

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (c) 2004-2010, International Business Machines
* Copyright (c) 2004-2011, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Author: Alan Liu
@ -28,6 +28,7 @@ import com.ibm.icu.text.DateFormat;
import com.ibm.icu.text.DecimalFormat;
import com.ibm.icu.text.DecimalFormatSymbols;
import com.ibm.icu.text.MessageFormat;
import com.ibm.icu.text.MessagePattern;
import com.ibm.icu.text.NumberFormat;
import com.ibm.icu.text.SimpleDateFormat;
import com.ibm.icu.text.UFormat;
@ -158,7 +159,9 @@ public class TestMessageFormat extends com.ibm.icu.dev.test.TestFmwk {
"'{1,number,#,##}' {1,number,#,##}",
};
String testResultPatterns[] = {
// ICU 4.8 returns the original pattern (testCases)
// rather than toPattern() reconstituting a new, equivalent pattern string (testResultPatterns).
/*String testResultPatterns[] = {
"Quotes '', '{', a {0} '{'0}",
"Quotes '', '{', a {0,number} '{'0}",
"'{'1,number,#,##} {1,number,'#'#,##}",
@ -168,12 +171,12 @@ public class TestMessageFormat extends com.ibm.icu.dev.test.TestFmwk {
"'{'1,date,full}, {1,date,full},",
"'{'3,date,full}, {3,date,full},",
"'{'1,number,#,##} {1,number,#,##}"
};
};*/
String testResultStrings[] = {
"Quotes ', {, a 1 {0}",
"Quotes ', {, a 1 {0}",
"{1,number,#,##} #34,56",
"Quotes ', {, 'a' 1 {0}",
"Quotes ', {, 'a' 1 {0}",
"{1,number,'#',##} #34,56",
"There are 3,456 files on Disk at 1/12/70 5:46 AM.",
"On Disk, there are 3,456 files, with $1.00.",
"{1,number,percent}, 345,600%,",
@ -193,7 +196,14 @@ public class TestMessageFormat extends com.ibm.icu.dev.test.TestFmwk {
errln("MessageFormat for " + testCases[i] + " creation failed.");
continue;
}
assertEquals("\"" + testCases[i] + "\".toPattern()", testResultPatterns[i], form.toPattern());
// ICU 4.8 returns the original pattern (testCases)
// rather than toPattern() reconstituting a new, equivalent pattern string (testResultPatterns).
// assertEquals("\"" + testCases[i] + "\".toPattern()", testResultPatterns[i], form.toPattern());
assertEquals("\"" + testCases[i] + "\".toPattern()", testCases[i], form.toPattern());
// Note: An alternative test would be to build MessagePattern objects for
// both the input and output patterns and compare them, taking SKIP_SYNTAX etc.
// into account.
// (Too much trouble...)
//it_out << "Pat out: " << form.toPattern(buffer));
StringBuffer result = new StringBuffer();
@ -644,7 +654,14 @@ public class TestMessageFormat extends com.ibm.icu.dev.test.TestFmwk {
}
assertEquals("msgCmp.toPattern()", formatStr, msgCmp.toPattern());
assertEquals("msg.toPattern()", formatStr, msg.toPattern());
// ICU 4.8 does not support toPattern() when there are custom formats (from setFormat() etc.).
// assertEquals("msg.toPattern()", formatStr, msg.toPattern());
try {
msg.toPattern();
errln("msg.setFormat().toPattern() does not throw an IllegalStateException");
} catch(IllegalStateException e) {
// ok
}
for (i = 0; i < formatsAct.length; i++) {
a = formatsAct[i];
@ -685,7 +702,8 @@ public class TestMessageFormat extends com.ibm.icu.dev.test.TestFmwk {
msg.setFormats( formatsToAdopt ); // function to test
assertEquals("msgCmp.toPattern()", formatStr, msgCmp.toPattern());
assertEquals("msg.toPattern()", formatStr, msg.toPattern());
// ICU 4.8 does not support toPattern() when there are custom formats (from setFormat() etc.).
// assertEquals("msg.toPattern()", formatStr, msg.toPattern());
formatsAct = msg.getFormats();
if (formatsAct==null || (formatsAct.length <=0) || (formatsAct.length != formatsCmp.length)) {
@ -735,7 +753,8 @@ public class TestMessageFormat extends com.ibm.icu.dev.test.TestFmwk {
}
assertEquals("msgCmp.toPattern()", formatStr, msgCmp.toPattern());
assertEquals("msg.toPattern()", formatStr, msg.toPattern());
// ICU 4.8 does not support toPattern() when there are custom formats (from setFormat() etc.).
// assertEquals("msg.toPattern()", formatStr, msg.toPattern());
formatsAct = msg.getFormats();
if (formatsAct==null || (formatsAct.length <=0) || (formatsAct.length != formatsCmp.length)) {
@ -1124,22 +1143,14 @@ public class TestMessageFormat extends com.ibm.icu.dev.test.TestFmwk {
}
public void testNamedArguments() {
// Ensure that mixed argument types are not allowed.
// Either all arguments have to be numeric or valid identifiers.
try {
new MessageFormat("Number of files in folder {0}: {numfiles}");
errln("Creating a MessageFormat with mixed argument types " +
"(named and numeric) should throw an " +
"IllegalArgumentException but did not!");
} catch (IllegalArgumentException e) {}
try {
new MessageFormat("Number of files in folder {folder}: {1}");
errln("Creating a MessageFormat with mixed argument types " +
"(named and numeric) should throw an " +
"IllegalArgumentException but did not!");
} catch (IllegalArgumentException e) {}
// ICU 4.8 allows mixing named and numbered arguments.
assertTrue(
"has some named arguments",
new MessageFormat("Number of files in folder {0}: {numfiles}").usesNamedArguments());
assertTrue(
"has some named arguments",
new MessageFormat("Number of files in folder {folder}: {1}").usesNamedArguments());
// Test named arguments.
MessageFormat mf = new MessageFormat("Number of files in folder {folder}: {numfiles}");
if (!mf.usesNamedArguments()) {
@ -1151,19 +1162,21 @@ public class TestMessageFormat extends com.ibm.icu.dev.test.TestFmwk {
}
// Test argument names with invalid start characters.
// Modified: ICU 4.8 allows all characters except for Pattern_White_Space and Pattern_Syntax.
try {
new MessageFormat("Wavelength: {_\u028EValue\uFF14}");
new MessageFormat("Wavelength: {^\u028EValue\uFF14}");
errln("Creating a MessageFormat with invalid argument names " +
"should throw an IllegalArgumentException but did not!");
} catch (IllegalArgumentException e) {}
try {
new MessageFormat("Wavelength: {\uFF14\u028EValue}");
new MessageFormat("Wavelength: {\uFE45\u028EValue}");
errln("Creating a MessageFormat with invalid argument names " +
"should throw an IllegalArgumentException but did not!");
} catch (IllegalArgumentException e) {}
// Test argument names with invalid continue characters.
// Modified: ICU 4.8 allows all characters except for Pattern_White_Space and Pattern_Syntax.
try {
new MessageFormat("Wavelength: {Value@\uFF14}");
errln("Creating a MessageFormat with invalid argument names " +
@ -1240,7 +1253,7 @@ public class TestMessageFormat extends com.ibm.icu.dev.test.TestFmwk {
public void testNestedFormatsInPluralFormat() {
try {
MessageFormat msgFmt = new MessageFormat(
"{0, plural, one {{0, number,C''''est #,##0.0# fichier}} " +
"{0, plural, one {{0, number,C''est #,##0.0# fichier}} " +
"other {Ce sont # fichiers}} dans la liste.",
new ULocale("fr"));
Object objArray[] = {new Long(0)};
@ -1304,6 +1317,19 @@ public class TestMessageFormat extends com.ibm.icu.dev.test.TestFmwk {
}
}
public void testApostropheInPluralAndSelect() {
MessageFormat fmt = new MessageFormat(
"abc_{0,plural,other{#'#'#'{'#''}}_def_{1,select,other{sel'}'ect''}}_xyz",
Locale.ENGLISH);
String expected = "abc_3#3{3'_def_sel}ect'_xyz";
String result = fmt.format(new Object[] { 3, "x" });
if (!result.equals(expected)) {
errln("MessageFormat with apostrophes in plural/select arguments failed:\n" +
"Expected "+expected+"\n" +
"Got "+result);
}
}
// Test toPattern when there is a PluralFormat
public void testPluralFormatToPattern() {
String[] patterns = {
@ -1681,4 +1707,129 @@ public class TestMessageFormat extends com.ibm.icu.dev.test.TestFmwk {
+ "to return an null if argumentName was not found.");
}
}
public String getPatternAndSkipSyntax(MessagePattern pattern) {
StringBuilder sb = new StringBuilder(pattern.getPatternString());
int count = pattern.countParts();
for (int i = count; i > 0;) {
MessagePattern.Part part = pattern.getPart(--i);
if (part.getType() == MessagePattern.Part.Type.SKIP_SYNTAX) {
sb.delete(part.getIndex(), part.getLimit());
}
}
return sb.toString();
}
public void TestApostropheMode() {
MessagePattern ado_mp = new MessagePattern(MessagePattern.ApostropheMode.DOUBLE_OPTIONAL);
MessagePattern adr_mp = new MessagePattern(MessagePattern.ApostropheMode.DOUBLE_REQUIRED);
assertEquals("wrong value",
MessagePattern.ApostropheMode.DOUBLE_OPTIONAL,
ado_mp.getApostropheMode());
assertEquals("wrong value",
MessagePattern.ApostropheMode.DOUBLE_REQUIRED,
adr_mp.getApostropheMode());
assertNotEquals("MessagePatterns with different ApostropheMode (no pattern)", ado_mp, adr_mp);
assertNotEquals("MessagePatterns with different ApostropheMode (a)",
ado_mp.parse("a"), adr_mp.parse("a"));
String[] tuples = new String[] {
// Desired output
// DOUBLE_OPTIONAL pattern
// DOUBLE_REQUIRED pattern (null=same as DOUBLE_OPTIONAL)
"I see {many}", "I see '{many}'", null,
"I said {'Wow!'}", "I said '{''Wow!''}'", null,
"I dont know", "I dont know", "I don't know",
"I don't know", "I don't know", "I don''t know",
"I don't know", "I don''t know", "I don''t know",
};
for (int i = 0; i < tuples.length; i += 3) {
String desired = tuples[i];
String ado_pattern = tuples[i + 1];
assertEquals("DOUBLE_OPTIONAL failure", desired,
getPatternAndSkipSyntax(ado_mp.parse(ado_pattern)));
String adr_pattern = tuples[i + 2];
if (adr_pattern == null) {
adr_pattern = ado_pattern;
}
assertEquals("DOUBLE_REQUIRED failure", desired,
getPatternAndSkipSyntax(adr_mp.parse(adr_pattern)));
}
}
// Compare behavior of JDK and ICU's DOUBLE_REQUIRED compatibility mode.
public void TestCompatibleApostrophe() {
// Message with choice argument which does not contain another argument.
// The JDK performs only one apostrophe-quoting pass on this pattern.
String pattern = "ab{0,choice,0#1'2''3'''4''''.}yz";
java.text.MessageFormat jdkMsg =
new java.text.MessageFormat(pattern, Locale.ENGLISH);
MessageFormat compMsg = new MessageFormat("", Locale.ENGLISH);
compMsg.applyPattern(pattern, MessagePattern.ApostropheMode.DOUBLE_REQUIRED);
assertEquals("wrong value",
MessagePattern.ApostropheMode.DOUBLE_REQUIRED,
compMsg.getApostropheMode());
MessageFormat icuMsg = new MessageFormat("", Locale.ENGLISH);
icuMsg.applyPattern(pattern, MessagePattern.ApostropheMode.DOUBLE_OPTIONAL);
assertEquals("wrong value",
MessagePattern.ApostropheMode.DOUBLE_OPTIONAL,
icuMsg.getApostropheMode());
Object[] zero0 = new Object[] { 0 };
assertEquals("unexpected JDK MessageFormat apostrophe behavior",
"ab12'3'4''.yz",
jdkMsg.format(zero0));
assertEquals("incompatible ICU MessageFormat compatibility-apostrophe behavior",
"ab12'3'4''.yz",
compMsg.format(zero0));
assertEquals("unexpected ICU MessageFormat double-apostrophe-optional behavior",
"ab1'2'3''4''.yz",
icuMsg.format(zero0));
// Message with choice argument which contains a nested simple argument.
// The JDK performs two apostrophe-quoting passes.
pattern = "ab{0,choice,0#1'2''3'''4''''.{0,number,'#x'}}yz";
jdkMsg.applyPattern(pattern);
compMsg.applyPattern(pattern);
icuMsg.applyPattern(pattern);
assertEquals("unexpected JDK MessageFormat apostrophe behavior",
"ab1234'.0xyz",
jdkMsg.format(zero0));
assertEquals("incompatible ICU MessageFormat compatibility-apostrophe behavior",
"ab1234'.0xyz",
compMsg.format(zero0));
assertEquals("unexpected ICU MessageFormat double-apostrophe-optional behavior",
"ab1'2'3''4''.#x0yz",
icuMsg.format(zero0));
// Message with choice argument which contains a nested choice argument.
// The JDK fails to parse this pattern.
// jdkMsg.applyPattern("cd{0,choice,0#ef{0,choice,0#1'2''3'''4''''.}uv}wx");
// For lack of comparison, we do not test ICU with this pattern.
// The JDK ChoiceFormat itself always performs one apostrophe-quoting pass.
ChoiceFormat choice = new ChoiceFormat("0#1'2''3'''4''''.");
assertEquals("unexpected JDK ChoiceFormat apostrophe behavior",
"12'3'4''.",
choice.format(0));
choice.applyPattern("0#1'2''3'''4''''.{0,number,'#x'}");
assertEquals("unexpected JDK ChoiceFormat apostrophe behavior",
"12'3'4''.{0,number,#x}",
choice.format(0));
}
public void TestTrimArgumentName() {
// ICU 4.8 allows and ignores white space around argument names and numbers.
MessageFormat m = new MessageFormat("a { 0 , number , '#,#'#.0 } z", Locale.ENGLISH);
assertEquals("trim-numbered-arg format() failed", "a #,#2.0 z", m.format(new Object[] { 2 }));
m.applyPattern("x { _oOo_ , number , integer } y");
Map<String, Object> map = new HashMap<String, Object>();
map.put("_oOo_", new Integer(3));
StringBuffer result = new StringBuffer();
assertEquals("trim-named-arg format() failed", "x 3 y",
m.format(map, result, new FieldPosition(0)).toString());
}
}

View File

@ -1,6 +1,6 @@
/**
*******************************************************************************
* Copyright (C) 1996-2010, International Business Machines Corporation and *
* Copyright (C) 1996-2011, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
@ -15,6 +15,7 @@ import com.ibm.icu.dev.test.TestFmwk;
import com.ibm.icu.dev.test.TestUtil;
import com.ibm.icu.impl.Norm2AllModes;
import com.ibm.icu.impl.Normalizer2Impl;
import com.ibm.icu.impl.PatternProps;
import com.ibm.icu.impl.UCharacterName;
import com.ibm.icu.impl.UCharacterProperty;
import com.ibm.icu.impl.Utility;
@ -247,6 +248,46 @@ public final class UCharacterTest extends TestFmwk
}
}
/**
* Test various implementations of Pattern_Syntax & Pattern_White_Space.
*/
public void TestPatternProperties() {
UnicodeSet syn_pp = new UnicodeSet();
UnicodeSet syn_prop = new UnicodeSet("[:Pattern_Syntax:]");
UnicodeSet syn_list = new UnicodeSet(
"[!-/\\:-@\\[-\\^`\\{-~"+
"\u00A1-\u00A7\u00A9\u00AB\u00AC\u00AE\u00B0\u00B1\u00B6\u00BB\u00BF\u00D7\u00F7"+
"\u2010-\u2027\u2030-\u203E\u2041-\u2053\u2055-\u205E\u2190-\u245F\u2500-\u2775"+
"\u2794-\u2BFF\u2E00-\u2E7F\u3001-\u3003\u3008-\u3020\u3030\uFD3E\uFD3F\uFE45\uFE46]");
UnicodeSet ws_pp = new UnicodeSet();
UnicodeSet ws_prop = new UnicodeSet("[:Pattern_White_Space:]");
UnicodeSet ws_list = new UnicodeSet("[\\u0009-\\u000D\\ \\u0085\\u200E\\u200F\\u2028\\u2029]");
UnicodeSet syn_ws_pp = new UnicodeSet();
UnicodeSet syn_ws_prop = new UnicodeSet(syn_prop).addAll(ws_prop);
for(int c=0; c<=0xffff; ++c) {
if(PatternProps.isSyntax(c)) {
syn_pp.add(c);
}
if(PatternProps.isWhiteSpace(c)) {
ws_pp.add(c);
}
if(PatternProps.isSyntaxOrWhiteSpace(c)) {
syn_ws_pp.add(c);
}
}
compareUSets(syn_pp, syn_prop,
"PatternProps.isSyntax()", "[:Pattern_Syntax:]", true);
compareUSets(syn_pp, syn_list,
"PatternProps.isSyntax()", "[Pattern_Syntax ranges]", true);
compareUSets(ws_pp, ws_prop,
"PatternProps.isWhiteSpace()", "[:Pattern_White_Space:]", true);
compareUSets(ws_pp, ws_list,
"PatternProps.isWhiteSpace()", "[Pattern_White_Space ranges]", true);
compareUSets(syn_ws_pp, syn_ws_prop,
"PatternProps.isSyntaxOrWhiteSpace()",
"[[:Pattern_Syntax:][:Pattern_White_Space:]]", true);
}
/**
* Tests for defined and undefined characters
*/

View File

@ -130,6 +130,16 @@ public class CompatibilityTest extends TestFmwk
{"ICU_3.8.1", "com.ibm.icu.text.RuleBasedNumberFormat.dat"},
{"ICU_4.0", "com.ibm.icu.text.RuleBasedNumberFormat.dat"},
{"ICU_4.2.1", "com.ibm.icu.text.CurrencyPluralInfo.dat"},
// ICU 4.8 MessageFormat is not serialization-compatible with previous versions.
{"ICU_3.6", "com.ibm.icu.text.MessageFormat.dat"},
{"ICU_3.8.1", "com.ibm.icu.text.MessageFormat.dat"},
{"ICU_4.0", "com.ibm.icu.text.MessageFormat.dat"},
{"ICU_4.2.1", "com.ibm.icu.text.MessageFormat.dat"},
{"ICU_4.4", "com.ibm.icu.text.MessageFormat.dat"},
// RelativeDateFormat apparently uses and serializes a MessageFormat.
{"ICU_4.0", "com.ibm.icu.impl.RelativeDateFormat.dat"},
{"ICU_4.2.1", "com.ibm.icu.impl.RelativeDateFormat.dat"},
{"ICU_4.4", "com.ibm.icu.impl.RelativeDateFormat.dat"},
};
private Target getFileTargets(URL fileURL)

View File

@ -135,6 +135,10 @@ new features in this release. The list of API changes since the previous ICU4J
is available
<a href="http://source.icu-project.org/repos/icu/icu4j/tags/milestone-4-7-1/APIChangeReport.html">here</a>.
</p>
<h5>MessageFormat Changes</h5>
<p>MessageFormat and related classes (choice/plural/select) have been reimplemented,
with several improvements and some incompatible changes.
See the <a href="http://site.icu-project.org/download/48">ICU 4.8 download</a> page for details.</p>
<h3 class="doc"><a name="license"></a>License Information</h3>
<p>
The ICU projects (ICU4C and ICU4J) use the X license. The X