ICU-10128 update ICU to Unicode 6.3 beta (merge from branches/markus/uni63 at r33585)
X-SVN-Rev: 33663
This commit is contained in:
parent
2982958b06
commit
54eb776527
@ -1,6 +1,6 @@
|
||||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2011, International Business Machines Corporation and
|
||||
* Copyright (C) 1996-2013, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -16,7 +16,6 @@ import com.ibm.icu.impl.ICUBinary;
|
||||
import com.ibm.icu.impl.ICUData;
|
||||
import com.ibm.icu.impl.ICUResourceBundle;
|
||||
import com.ibm.icu.impl.IntTrie;
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.text.CollationParsedRuleBuilder.InverseUCA;
|
||||
import com.ibm.icu.text.RuleBasedCollator.LeadByteConstants;
|
||||
import com.ibm.icu.text.RuleBasedCollator.UCAConstants;
|
||||
@ -138,13 +137,14 @@ final class CollatorReader {
|
||||
*/
|
||||
private CollatorReader(InputStream inputStream, boolean readICUHeader) throws IOException {
|
||||
if (readICUHeader) {
|
||||
byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, UCA_AUTHENTICATE_);
|
||||
// weiv: check that we have the correct Unicode version in
|
||||
// binary files
|
||||
VersionInfo UCDVersion = UCharacter.getUnicodeVersion();
|
||||
if (UnicodeVersion[0] != UCDVersion.getMajor() || UnicodeVersion[1] != UCDVersion.getMinor()) {
|
||||
throw new IOException(WRONG_UNICODE_VERSION_ERROR_);
|
||||
}
|
||||
ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, UCA_AUTHENTICATE_);
|
||||
// Note: In ICU 51 and earlier,
|
||||
// we used to check that the UCA data version (readHeader() return value)
|
||||
// matches the UCD version (UCharacter.getUnicodeVersion())
|
||||
// but that complicated version updates, and
|
||||
// a mismatch is "only" a problem for handling canonical equivalence.
|
||||
// It need not be a fatal error.
|
||||
// throw new IOException(WRONG_UNICODE_VERSION_ERROR_);
|
||||
}
|
||||
m_dataInputStream_ = new DataInputStream(inputStream);
|
||||
}
|
||||
@ -512,15 +512,11 @@ final class CollatorReader {
|
||||
* thrown when error occurs while reading the inverse uca
|
||||
*/
|
||||
private static CollationParsedRuleBuilder.InverseUCA readInverseUCA(InputStream inputStream) throws IOException {
|
||||
byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, INVERSE_UCA_DATA_FORMAT_ID_,
|
||||
INVERSE_UCA_AUTHENTICATE_);
|
||||
ICUBinary.readHeader(inputStream, INVERSE_UCA_DATA_FORMAT_ID_, INVERSE_UCA_AUTHENTICATE_);
|
||||
|
||||
// weiv: check that we have the correct Unicode version in
|
||||
// binary files
|
||||
VersionInfo UCDVersion = UCharacter.getUnicodeVersion();
|
||||
if (UnicodeVersion[0] != UCDVersion.getMajor() || UnicodeVersion[1] != UCDVersion.getMinor()) {
|
||||
throw new IOException(WRONG_UNICODE_VERSION_ERROR_);
|
||||
}
|
||||
// TODO: Check that the invuca data version (readHeader() return value)
|
||||
// matches the ucadata version.
|
||||
// throw new IOException(WRONG_UNICODE_VERSION_ERROR_);
|
||||
|
||||
CollationParsedRuleBuilder.InverseUCA result = new CollationParsedRuleBuilder.InverseUCA();
|
||||
DataInputStream input = new DataInputStream(inputStream);
|
||||
@ -616,7 +612,7 @@ final class CollatorReader {
|
||||
/**
|
||||
* Wrong unicode version error string
|
||||
*/
|
||||
private static final String WRONG_UNICODE_VERSION_ERROR_ = "Unicode version in binary image is not compatible with the current Unicode version";
|
||||
// private static final String WRONG_UNICODE_VERSION_ERROR_ = "Unicode version in binary image is not compatible with the current Unicode version";
|
||||
|
||||
/**
|
||||
* Size of expansion table in bytes
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2004-2011, International Business Machines
|
||||
* Copyright (C) 2004-2013, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
@ -152,6 +152,8 @@ public final class UBiDiProps {
|
||||
return (max&MAX_JG_MASK)>>MAX_JG_SHIFT;
|
||||
case UProperty.JOINING_TYPE:
|
||||
return (max&JT_MASK)>>JT_SHIFT;
|
||||
case UProperty.BIDI_PAIRED_BRACKET_TYPE:
|
||||
return (max&BPT_MASK)>>BPT_SHIFT;
|
||||
default:
|
||||
return -1; /* undefined */
|
||||
}
|
||||
@ -165,12 +167,8 @@ public final class UBiDiProps {
|
||||
return getFlagFromProps(trie.get(c), IS_MIRRORED_SHIFT);
|
||||
}
|
||||
|
||||
public final int getMirror(int c) {
|
||||
int props;
|
||||
int delta;
|
||||
|
||||
props=trie.get(c);
|
||||
delta=((short)props)>>MIRROR_DELTA_SHIFT;
|
||||
private final int getMirror(int c, int props) {
|
||||
int delta=getMirrorDeltaFromProps(props);
|
||||
if(delta!=ESC_MIRROR_DELTA) {
|
||||
return c+delta;
|
||||
} else {
|
||||
@ -198,6 +196,11 @@ public final class UBiDiProps {
|
||||
}
|
||||
}
|
||||
|
||||
public final int getMirror(int c) {
|
||||
int props=trie.get(c);
|
||||
return getMirror(c, props);
|
||||
}
|
||||
|
||||
public final boolean isBidiControl(int c) {
|
||||
return getFlagFromProps(trie.get(c), BIDI_CONTROL_SHIFT);
|
||||
}
|
||||
@ -222,6 +225,19 @@ public final class UBiDiProps {
|
||||
}
|
||||
}
|
||||
|
||||
public final int getPairedBracketType(int c) {
|
||||
return (trie.get(c)&BPT_MASK)>>BPT_SHIFT;
|
||||
}
|
||||
|
||||
public final int getPairedBracket(int c) {
|
||||
int props=trie.get(c);
|
||||
if((props&BPT_MASK)==0) {
|
||||
return c;
|
||||
} else {
|
||||
return getMirror(c, props);
|
||||
}
|
||||
}
|
||||
|
||||
// data members -------------------------------------------------------- ***
|
||||
private int indexes[];
|
||||
private int mirrors[];
|
||||
@ -254,7 +270,7 @@ public final class UBiDiProps {
|
||||
/* CLASS_SHIFT=0, */ /* bidi class: 5 bits (4..0) */
|
||||
private static final int JT_SHIFT=5; /* joining type: 3 bits (7..5) */
|
||||
|
||||
/* private static final int _SHIFT=8, reserved: 2 bits (9..8) */
|
||||
private static final int BPT_SHIFT=8; /* Bidi_Paired_Bracket_Type(bpt): 2 bits (9..8) */
|
||||
|
||||
private static final int JOIN_CONTROL_SHIFT=10;
|
||||
private static final int BIDI_CONTROL_SHIFT=11;
|
||||
@ -266,6 +282,7 @@ public final class UBiDiProps {
|
||||
|
||||
private static final int CLASS_MASK= 0x0000001f;
|
||||
private static final int JT_MASK= 0x000000e0;
|
||||
private static final int BPT_MASK= 0x00000300;
|
||||
|
||||
private static final int MAX_JG_MASK= 0x00ff0000;
|
||||
|
||||
@ -275,6 +292,9 @@ public final class UBiDiProps {
|
||||
private static final boolean getFlagFromProps(int props, int shift) {
|
||||
return ((props>>shift)&1)!=0;
|
||||
}
|
||||
private static final int getMirrorDeltaFromProps(int props) {
|
||||
return (short)props>>MIRROR_DELTA_SHIFT;
|
||||
}
|
||||
|
||||
private static final int ESC_MIRROR_DELTA=-4;
|
||||
//private static final int MIN_MIRROR_DELTA=-3;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2012, International Business Machines Corporation and
|
||||
* Copyright (C) 1996-2013, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -560,6 +560,11 @@ public final class UCharacterProperty
|
||||
new IntProperty(2, GCB_MASK, GCB_SHIFT), // GRAPHEME_CLUSTER_BREAK
|
||||
new IntProperty(2, SB_MASK, SB_SHIFT), // SENTENCE_BREAK
|
||||
new IntProperty(2, WB_MASK, WB_SHIFT), // WORD_BREAK
|
||||
new BiDiIntProperty() { // BIDI_PAIRED_BRACKET_TYPE
|
||||
int getValue(int c) {
|
||||
return UBiDiProps.INSTANCE.getPairedBracketType(c);
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
public int getIntPropertyValue(int c, int which) {
|
||||
|
@ -1,6 +1,6 @@
|
||||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2012, International Business Machines Corporation and
|
||||
* Copyright (C) 1996-2013, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -2823,10 +2823,16 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
||||
public static final int NEWLINE = 12;
|
||||
/** @stable ICU 50 */
|
||||
public static final int REGIONAL_INDICATOR = 13; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
|
||||
/** @stable ICU 52 */
|
||||
public static final int HEBREW_LETTER = 14; /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */
|
||||
/** @stable ICU 52 */
|
||||
public static final int SINGLE_QUOTE = 15; /*[SQ]*/
|
||||
/** @stable ICU 52 */
|
||||
public static final int DOUBLE_QUOTE = 16; /*[DQ]*/
|
||||
/**
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
public static final int COUNT = 14;
|
||||
public static final int COUNT = 17;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -3139,6 +3145,34 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
||||
public static final int COUNT = 6;
|
||||
}
|
||||
|
||||
/**
|
||||
* Bidi Paired Bracket Type constants.
|
||||
*
|
||||
* @see UProperty#BIDI_PAIRED_BRACKET_TYPE
|
||||
* @stable ICU 52
|
||||
*/
|
||||
public static interface BidiPairedBracketType {
|
||||
/**
|
||||
* Not a paired bracket.
|
||||
* @stable ICU 52
|
||||
*/
|
||||
public static final int NONE = 0;
|
||||
/**
|
||||
* Open paired bracket.
|
||||
* @stable ICU 52
|
||||
*/
|
||||
public static final int OPEN = 1;
|
||||
/**
|
||||
* Close paired bracket.
|
||||
* @stable ICU 52
|
||||
*/
|
||||
public static final int CLOSE = 2;
|
||||
/**
|
||||
* @stable ICU 52
|
||||
*/
|
||||
public static final int COUNT = 3;
|
||||
}
|
||||
|
||||
// public data members -----------------------------------------------
|
||||
|
||||
/**
|
||||
@ -3937,6 +3971,26 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
||||
return UBiDiProps.INSTANCE.getMirror(ch);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@icu} Maps the specified character to its paired bracket character.
|
||||
* For Bidi_Paired_Bracket_Type!=None, this is the same as getMirror(int).
|
||||
* Otherwise c itself is returned.
|
||||
* See http://www.unicode.org/reports/tr9/
|
||||
*
|
||||
* @param c the code point to be mapped
|
||||
* @return the paired bracket code point,
|
||||
* or c itself if there is no such mapping
|
||||
* (Bidi_Paired_Bracket_Type=None)
|
||||
*
|
||||
* @see UProperty#BIDI_PAIRED_BRACKET
|
||||
* @see UProperty#BIDI_PAIRED_BRACKET_TYPE
|
||||
* @see #getMirror(int)
|
||||
* @stable ICU 52
|
||||
*/
|
||||
public static int getBidiPairedBracket(int c) {
|
||||
return UBiDiProps.INSTANCE.getPairedBracket(c);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@icu} Returns the combining class of the argument codepoint
|
||||
* @param ch code point whose combining is to be retrieved
|
||||
|
@ -1,7 +1,7 @@
|
||||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2004, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
* Copyright (C) 1996-2013, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
@ -78,6 +78,14 @@ public final class UCharacterDirection implements ECharacterDirection {
|
||||
return "Non-Spacing Mark";
|
||||
case BOUNDARY_NEUTRAL :
|
||||
return "Boundary Neutral";
|
||||
case FIRST_STRONG_ISOLATE:
|
||||
return "First Strong Isolate";
|
||||
case LEFT_TO_RIGHT_ISOLATE:
|
||||
return "Left-to-Right Isolate";
|
||||
case RIGHT_TO_LEFT_ISOLATE:
|
||||
return "Right-to-Left Isolate";
|
||||
case POP_DIRECTIONAL_ISOLATE:
|
||||
return "Pop Directional Isolate";
|
||||
}
|
||||
return "Unassigned";
|
||||
}
|
||||
|
@ -1,7 +1,7 @@
|
||||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2004-2007, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
* Copyright (C) 2004-2013, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
@ -475,11 +475,35 @@ public class UCharacterEnums {
|
||||
*/
|
||||
public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = (byte)BOUNDARY_NEUTRAL;
|
||||
|
||||
/**
|
||||
* Directional type FSI
|
||||
* @stable ICU 52
|
||||
*/
|
||||
public static final byte FIRST_STRONG_ISOLATE = 19;
|
||||
|
||||
/**
|
||||
* Directional type LRI
|
||||
* @stable ICU 52
|
||||
*/
|
||||
public static final byte LEFT_TO_RIGHT_ISOLATE = 20;
|
||||
|
||||
/**
|
||||
* Directional type RLI
|
||||
* @stable ICU 52
|
||||
*/
|
||||
public static final byte RIGHT_TO_LEFT_ISOLATE = 21;
|
||||
|
||||
/**
|
||||
* Directional type PDI
|
||||
* @stable ICU 52
|
||||
*/
|
||||
public static final byte POP_DIRECTIONAL_ISOLATE = 22;
|
||||
|
||||
/**
|
||||
* Number of directional types
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
public static final int CHAR_DIRECTION_COUNT = 19;
|
||||
public static final int CHAR_DIRECTION_COUNT = 23;
|
||||
|
||||
/**
|
||||
* Undefined bidirectional character type. Undefined <code>char</code>
|
||||
|
@ -1,6 +1,6 @@
|
||||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2012, International Business Machines Corporation and
|
||||
* Copyright (C) 1996-2013, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -605,7 +605,7 @@ public interface UProperty
|
||||
|
||||
/**
|
||||
* Enumerated property Hangul_Syllable_Type, new in Unicode 4.
|
||||
* Returns HangulSyllableType values.
|
||||
* Returns UCharacter.HangulSyllableType values.
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
public static final int HANGUL_SYLLABLE_TYPE = 0x100B;
|
||||
@ -664,7 +664,7 @@ public interface UProperty
|
||||
* Enumerated property Grapheme_Cluster_Break (new in Unicode 4.1).
|
||||
* Used in UAX #29: Text Boundaries
|
||||
* (http://www.unicode.org/reports/tr29/)
|
||||
* Returns UGraphemeClusterBreak values.
|
||||
* Returns UCharacter.GraphemeClusterBreak values.
|
||||
* @stable ICU 3.4
|
||||
*/
|
||||
public static final int GRAPHEME_CLUSTER_BREAK = 0x1012;
|
||||
@ -673,7 +673,7 @@ public interface UProperty
|
||||
* Enumerated property Sentence_Break (new in Unicode 4.1).
|
||||
* Used in UAX #29: Text Boundaries
|
||||
* (http://www.unicode.org/reports/tr29/)
|
||||
* Returns USentenceBreak values.
|
||||
* Returns UCharacter.SentenceBreak values.
|
||||
* @stable ICU 3.4
|
||||
*/
|
||||
public static final int SENTENCE_BREAK = 0x1013;
|
||||
@ -682,17 +682,26 @@ public interface UProperty
|
||||
* Enumerated property Word_Break (new in Unicode 4.1).
|
||||
* Used in UAX #29: Text Boundaries
|
||||
* (http://www.unicode.org/reports/tr29/)
|
||||
* Returns UWordBreakValues values.
|
||||
* Returns UCharacter.WordBreak values.
|
||||
* @stable ICU 3.4
|
||||
*/
|
||||
public static final int WORD_BREAK = 0x1014;
|
||||
|
||||
/**
|
||||
* Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3).
|
||||
* Used in UAX #9: Unicode Bidirectional Algorithm
|
||||
* (http://www.unicode.org/reports/tr9/)
|
||||
* Returns UCharacter.BidiPairedBracketType values.
|
||||
* @stable ICU 52
|
||||
*/
|
||||
public static final int BIDI_PAIRED_BRACKET_TYPE = 0x1015;
|
||||
|
||||
/**
|
||||
* One more than the last constant for enumerated/integer Unicode
|
||||
* properties.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
public static final int INT_LIMIT = 0x1015;
|
||||
public static final int INT_LIMIT = 0x1016;
|
||||
|
||||
/**
|
||||
* Bitmask property General_Category_Mask.
|
||||
@ -835,16 +844,21 @@ public interface UProperty
|
||||
*/
|
||||
public static final int UPPERCASE_MAPPING = 0x400C;
|
||||
|
||||
/**
|
||||
* String property Bidi_Paired_Bracket (new in Unicode 6.3).
|
||||
* Corresponds to UCharacter.getBidiPairedBracket.
|
||||
* @stable ICU 52
|
||||
*/
|
||||
public static final int BIDI_PAIRED_BRACKET = 0x400D;
|
||||
|
||||
/**
|
||||
* One more than the last constant for string Unicode properties.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
public static final int STRING_LIMIT = 0x400D;
|
||||
public static final int STRING_LIMIT = 0x400E;
|
||||
|
||||
/**
|
||||
* Provisional property Script_Extensions (new in Unicode 6.0).
|
||||
* As a provisional property, it may be modified or removed
|
||||
* in future versions of the Unicode Standard, and thus in ICU.
|
||||
* Miscellaneous property Script_Extensions (new in Unicode 6.0).
|
||||
* Some characters are commonly used in multiple scripts.
|
||||
* For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
|
||||
* Corresponds to UScript.hasScript and UScript.getScriptExtensions.
|
||||
|
@ -523,11 +523,16 @@ public final class UScript {
|
||||
* @stable ICU 3.6
|
||||
*/
|
||||
public static final int PHOENICIAN = 91; /* Phnx */
|
||||
/**
|
||||
* ISO 15924 script code
|
||||
* @stable ICU 52
|
||||
*/
|
||||
public static final int MIAO = 92; /* Plrd */
|
||||
/**
|
||||
* ISO 15924 script code
|
||||
* @stable ICU 3.6
|
||||
*/
|
||||
public static final int PHONETIC_POLLARD = 92; /* Plrd */
|
||||
public static final int PHONETIC_POLLARD = MIAO;
|
||||
/**
|
||||
* ISO 15924 script code
|
||||
* @stable ICU 3.6
|
||||
@ -584,7 +589,6 @@ public final class UScript {
|
||||
*/
|
||||
public static final int UNKNOWN = 103;/* Zzzz */ /* Unknown="Code for uncoded script", for unassigned code points */
|
||||
|
||||
/* Private use codes from Qaaa - Qabx are not supported*/
|
||||
/**
|
||||
* ISO 15924 script code
|
||||
* @stable ICU 3.8
|
||||
@ -878,6 +882,18 @@ public final class UScript {
|
||||
* @stable ICU 49
|
||||
*/
|
||||
public static final int TIRHUTA = 158;/* Tirh */
|
||||
/**
|
||||
* ISO 15924 script code
|
||||
* @stable ICU 52
|
||||
*/
|
||||
public static final int CAUCASIAN_ALBANIAN = 159; /* Aghb */
|
||||
/**
|
||||
* ISO 15924 script code
|
||||
* @stable ICU 52
|
||||
*/
|
||||
public static final int MAHAJANI = 160; /* Mahj */
|
||||
|
||||
/* Private use codes from Qaaa - Qabx are not supported */
|
||||
|
||||
/**
|
||||
* One higher than the last ISO 15924 script code integer.
|
||||
@ -885,7 +901,7 @@ public final class UScript {
|
||||
* for which integer constants are added above.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
public static final int CODE_LIMIT = 159;
|
||||
public static final int CODE_LIMIT = 161;
|
||||
|
||||
private static final String kLocaleScript = "LocaleScript";
|
||||
|
||||
@ -1324,6 +1340,8 @@ public final class UScript {
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
// End copy-paste from parsescriptmetadata.py
|
||||
};
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2001-2012, International Business Machines
|
||||
* Copyright (C) 2001-2013, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -850,6 +850,10 @@ public class Bidi {
|
||||
static final byte PDF = UCharacterDirection.POP_DIRECTIONAL_FORMAT;
|
||||
static final byte NSM = UCharacterDirection.DIR_NON_SPACING_MARK;
|
||||
static final byte BN = UCharacterDirection.BOUNDARY_NEUTRAL;
|
||||
static final byte FSI = UCharacterDirection.FIRST_STRONG_ISOLATE;
|
||||
static final byte LRI = UCharacterDirection.LEFT_TO_RIGHT_ISOLATE;
|
||||
static final byte RLI = UCharacterDirection.RIGHT_TO_LEFT_ISOLATE;
|
||||
static final byte PDI = UCharacterDirection.POP_DIRECTIONAL_ISOLATE;
|
||||
|
||||
static final int MASK_R_AL = (1 << R | 1 << AL);
|
||||
|
||||
@ -3954,11 +3958,14 @@ public class Bidi {
|
||||
int dir;
|
||||
|
||||
if (customClassifier == null ||
|
||||
(dir = customClassifier.classify(c)) == Bidi.CLASS_DEFAULT) {
|
||||
return bdp.getClass(c);
|
||||
} else {
|
||||
return dir;
|
||||
(dir = customClassifier.classify(c)) == Bidi.CLASS_DEFAULT) {
|
||||
dir = bdp.getClass(c);
|
||||
}
|
||||
if (dir > 18) {
|
||||
// TODO: Implement Unicode 6.3 BiDi isolates in the ICU BiDi code.
|
||||
dir = ON;
|
||||
}
|
||||
return dir;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2013, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
* Copyright (C) 1996-2013, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
@ -143,6 +143,12 @@ public final class VersionInfo implements Comparable<VersionInfo>
|
||||
*/
|
||||
public static final VersionInfo UNICODE_6_2;
|
||||
|
||||
/**
|
||||
* Unicode 6.3 version
|
||||
* @stable ICU 52
|
||||
*/
|
||||
public static final VersionInfo UNICODE_6_3;
|
||||
|
||||
/**
|
||||
* ICU4J current release version
|
||||
* @stable ICU 2.8
|
||||
@ -505,10 +511,11 @@ public final class VersionInfo implements Comparable<VersionInfo>
|
||||
UNICODE_6_0 = getInstance(6, 0, 0, 0);
|
||||
UNICODE_6_1 = getInstance(6, 1, 0, 0);
|
||||
UNICODE_6_2 = getInstance(6, 2, 0, 0);
|
||||
UNICODE_6_3 = getInstance(6, 3, 0, 0);
|
||||
|
||||
ICU_VERSION = getInstance(52, 0, 1, 0);
|
||||
ICU_DATA_VERSION = getInstance(52, 0, 1, 0);
|
||||
UNICODE_VERSION = UNICODE_6_2;
|
||||
UNICODE_VERSION = UNICODE_6_3;
|
||||
|
||||
UCOL_RUNTIME_VERSION = getInstance(7);
|
||||
UCOL_BUILDER_VERSION = getInstance(8);
|
||||
@ -529,9 +536,9 @@ public final class VersionInfo implements Comparable<VersionInfo>
|
||||
/**
|
||||
* Gets the int from the version numbers
|
||||
* @param major non-negative version number
|
||||
* @param minor non-negativeversion number
|
||||
* @param milli non-negativeversion number
|
||||
* @param micro non-negativeversion number
|
||||
* @param minor non-negative version number
|
||||
* @param milli non-negative version number
|
||||
* @param micro non-negative version number
|
||||
*/
|
||||
private static int getInt(int major, int minor, int milli, int micro)
|
||||
{
|
||||
|
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b246383d534609ffb6a14c49236f9c325b445cb60e97c0ddf5948ac47406d82f
|
||||
size 10576546
|
||||
oid sha256:dd2c9faa74a7029d57014b097e4b380f038c54d837ca90b197321e183eca432d
|
||||
size 10578576
|
||||
|
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:43cc25f91794de5c1ba2aa48032a9e69653da5e00d0f0fc5cc72db5d2fb0a170
|
||||
oid sha256:ee9e2b3884dcfd3d4905d0005280aa429b8b1846667f9abb475157cd6fc3f3d0
|
||||
size 98429
|
||||
|
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:bbedc52b17fb2c3437c1d6498655b6223654395d08edf79d661401f3dab224d0
|
||||
size 724510
|
||||
oid sha256:2dd62bbd4ef0fe4b89295bffdb22c67e718effb1ada3a5e28e516b7e3576adec
|
||||
size 724514
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2002-2012, International Business Machines Corporation and
|
||||
* Copyright (C) 2002-2013, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -436,7 +436,11 @@ public class CollationAPITest extends TestFmwk {
|
||||
// Assume that the UCD and UCA versions are the same,
|
||||
// rather than hardcoding (and updating each time) a particular UCA version.
|
||||
VersionInfo ucdVersion = UCharacter.getUnicodeVersion();
|
||||
doAssert(col.getUCAVersion().equals(ucdVersion), "Expected UCA version "+ucdVersion.toString()+" got "+col.getUCAVersion().toString());
|
||||
VersionInfo ucaVersion = col.getUCAVersion();
|
||||
doAssert(logKnownIssue("9101", "update to collv2 & UCA 6.3") ?
|
||||
ucdVersion.getMajor() == 6 && ucdVersion.getMinor() == 3 :
|
||||
ucaVersion.equals(ucdVersion),
|
||||
"Expected UCA version "+ucdVersion.toString()+" got "+col.getUCAVersion().toString());
|
||||
|
||||
doAssert((col.compare("ab", "abc") < 0), "ab < abc comparison failed");
|
||||
doAssert((col.compare("ab", "AB") < 0), "ab < AB comparison failed");
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,10 +1,10 @@
|
||||
# NormalizationCorrections-6.2.0.txt
|
||||
# Date: 2012-05-15, 22:25:00 GMT [KW, LI]
|
||||
# NormalizationCorrections-6.3.0.txt
|
||||
# Date: 2013-01-02, 08:39:00 GMT [KW, LI]
|
||||
#
|
||||
# This file is a normative contributory data file in the
|
||||
# Unicode Character Database.
|
||||
#
|
||||
# Copyright (c) 1991-2012 Unicode, Inc.
|
||||
# Copyright (c) 1991-2013 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
# The normalization stability policy of the Unicode Consortium
|
||||
|
@ -1,8 +1,8 @@
|
||||
# NormalizationTest-6.2.0.txt
|
||||
# Date: 2012-08-14, 17:54:58 GMT [MD]
|
||||
# NormalizationTest-6.3.0.txt
|
||||
# Date: 2012-12-20, 22:18:30 GMT [MD]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2012 Unicode, Inc.
|
||||
# Copyright (c) 1991-2013 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
#
|
||||
|
@ -1,8 +1,8 @@
|
||||
# SpecialCasing-6.2.0.txt
|
||||
# Date: 2012-05-23, 20:35:15 GMT [MD]
|
||||
# SpecialCasing-6.3.0.txt
|
||||
# Date: 2013-03-12, 22:36:00 GMT [LI temp]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2012 Unicode, Inc.
|
||||
# Copyright (c) 1991-2013 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
#
|
||||
@ -39,7 +39,7 @@
|
||||
# A language ID is defined by BCP 47, with '-' and '_' treated equivalently.
|
||||
#
|
||||
# A context for a character C is defined by Section 3.13 Default Case
|
||||
# Operations, of The Unicode Standard, Version 5.0.
|
||||
# Algorithms, of The Unicode Standard, Version 6.3.
|
||||
# (This is identical to the context defined by Unicode 4.1.0,
|
||||
# as specified in http://www.unicode.org/versions/Unicode4.1.0/)
|
||||
#
|
||||
@ -273,4 +273,3 @@ FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH
|
||||
# 0131; 0131; 0049; 0049; tr; # LATIN SMALL LETTER DOTLESS I
|
||||
|
||||
# EOF
|
||||
|
||||
|
@ -1509,6 +1509,7 @@
|
||||
0619;ARABIC SMALL DAMMA;Mn;31;NSM;;;;;N;;;;;
|
||||
061A;ARABIC SMALL KASRA;Mn;32;NSM;;;;;N;;;;;
|
||||
061B;ARABIC SEMICOLON;Po;0;AL;;;;;N;;;;;
|
||||
061C;ARABIC LETTER MARK;Cf;0;AL;;;;;N;;;;;
|
||||
061E;ARABIC TRIPLE DOT PUNCTUATION MARK;Po;0;AL;;;;;N;;;;;
|
||||
061F;ARABIC QUESTION MARK;Po;0;AL;;;;;N;;;;;
|
||||
0620;ARABIC LETTER KASHMIRI YEH;Lo;0;AL;;;;;N;;;;;
|
||||
@ -5296,7 +5297,7 @@
|
||||
180B;MONGOLIAN FREE VARIATION SELECTOR ONE;Mn;0;NSM;;;;;N;;;;;
|
||||
180C;MONGOLIAN FREE VARIATION SELECTOR TWO;Mn;0;NSM;;;;;N;;;;;
|
||||
180D;MONGOLIAN FREE VARIATION SELECTOR THREE;Mn;0;NSM;;;;;N;;;;;
|
||||
180E;MONGOLIAN VOWEL SEPARATOR;Zs;0;WS;;;;;N;;;;;
|
||||
180E;MONGOLIAN VOWEL SEPARATOR;Cf;0;BN;;;;;N;;;;;
|
||||
1810;MONGOLIAN DIGIT ZERO;Nd;0;L;;0;0;0;N;;;;;
|
||||
1811;MONGOLIAN DIGIT ONE;Nd;0;L;;1;1;1;N;;;;;
|
||||
1812;MONGOLIAN DIGIT TWO;Nd;0;L;;2;2;2;N;;;;;
|
||||
@ -5751,7 +5752,7 @@
|
||||
1A18;BUGINESE VOWEL SIGN U;Mn;220;NSM;;;;;N;;;;;
|
||||
1A19;BUGINESE VOWEL SIGN E;Mc;0;L;;;;;N;;;;;
|
||||
1A1A;BUGINESE VOWEL SIGN O;Mc;0;L;;;;;N;;;;;
|
||||
1A1B;BUGINESE VOWEL SIGN AE;Mc;0;L;;;;;N;;;;;
|
||||
1A1B;BUGINESE VOWEL SIGN AE;Mn;0;NSM;;;;;N;;;;;
|
||||
1A1E;BUGINESE PALLAWA;Po;0;L;;;;;N;;;;;
|
||||
1A1F;BUGINESE END OF SECTION;Po;0;L;;;;;N;;;;;
|
||||
1A20;TAI THAM LETTER HIGH KA;Lo;0;L;;;;;N;;;;;
|
||||
@ -7116,6 +7117,10 @@
|
||||
2062;INVISIBLE TIMES;Cf;0;BN;;;;;N;;;;;
|
||||
2063;INVISIBLE SEPARATOR;Cf;0;BN;;;;;N;;;;;
|
||||
2064;INVISIBLE PLUS;Cf;0;BN;;;;;N;;;;;
|
||||
2066;LEFT-TO-RIGHT ISOLATE;Cf;0;LRI;;;;;N;;;;;
|
||||
2067;RIGHT-TO-LEFT ISOLATE;Cf;0;RLI;;;;;N;;;;;
|
||||
2068;FIRST STRONG ISOLATE;Cf;0;FSI;;;;;N;;;;;
|
||||
2069;POP DIRECTIONAL ISOLATE;Cf;0;PDI;;;;;N;;;;;
|
||||
206A;INHIBIT SYMMETRIC SWAPPING;Cf;0;BN;;;;;N;;;;;
|
||||
206B;ACTIVATE SYMMETRIC SWAPPING;Cf;0;BN;;;;;N;;;;;
|
||||
206C;INHIBIT ARABIC FORM SHAPING;Cf;0;BN;;;;;N;;;;;
|
||||
@ -18740,8 +18745,8 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
|
||||
12453;CUNEIFORM NUMERIC SIGN FOUR BAN2 VARIANT FORM;Nl;0;L;;;;4;N;;;;;
|
||||
12454;CUNEIFORM NUMERIC SIGN FIVE BAN2;Nl;0;L;;;;5;N;;;;;
|
||||
12455;CUNEIFORM NUMERIC SIGN FIVE BAN2 VARIANT FORM;Nl;0;L;;;;5;N;;;;;
|
||||
12456;CUNEIFORM NUMERIC SIGN NIGIDAMIN;Nl;0;L;;;;-1;N;;;;;
|
||||
12457;CUNEIFORM NUMERIC SIGN NIGIDAESH;Nl;0;L;;;;-1;N;;;;;
|
||||
12456;CUNEIFORM NUMERIC SIGN NIGIDAMIN;Nl;0;L;;;;2;N;;;;;
|
||||
12457;CUNEIFORM NUMERIC SIGN NIGIDAESH;Nl;0;L;;;;3;N;;;;;
|
||||
12458;CUNEIFORM NUMERIC SIGN ONE ESHE3;Nl;0;L;;;;1;N;;;;;
|
||||
12459;CUNEIFORM NUMERIC SIGN TWO ESHE3;Nl;0;L;;;;2;N;;;;;
|
||||
1245A;CUNEIFORM NUMERIC SIGN ONE THIRD DISH;Nl;0;L;;;;1/3;N;;;;;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2010, International Business Machines Corporation and
|
||||
* Copyright (C) 2010-2013, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -27,6 +27,11 @@ public class BiDiConformanceTest extends TestFmwk {
|
||||
public BiDiConformanceTest() {}
|
||||
|
||||
public void TestBidiTest() throws IOException {
|
||||
if(logKnownIssue("10142",
|
||||
"Update the ICU BiDi code to implement the additions in the " +
|
||||
"Unicode 6.3 BiDi Algorithm, and reenable the BiDi conformance test.")) {
|
||||
return;
|
||||
}
|
||||
BufferedReader bidiTestFile=TestUtil.getDataReader("unicode/BidiTest.txt");
|
||||
Bidi ubidi=new Bidi();
|
||||
ubidi.setCustomClassifier(new ConfTestBidiClassifier());
|
||||
@ -140,7 +145,12 @@ outerLoop:
|
||||
0x4f, // 'O' for RLO
|
||||
0x2a, // '*' for PDF
|
||||
0x60, // '`' for NSM
|
||||
0x7c // '|' for BN
|
||||
0x7c, // '|' for BN
|
||||
// new in Unicode 6.3/ICU 52
|
||||
0x53, // 'S' for FSI
|
||||
0x69, // 'i' for LRI
|
||||
0x49, // 'I' for RLI
|
||||
0x2e // '.' for PDI
|
||||
};
|
||||
private class ConfTestBidiClassifier extends BidiClassifier {
|
||||
public ConfTestBidiClassifier() {
|
||||
@ -159,7 +169,7 @@ outerLoop:
|
||||
}
|
||||
}
|
||||
private static final int biDiClassNameLengths[]={
|
||||
1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 3, 3, 2, 3, 3, 3, 3, 2, 0
|
||||
1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 3, 3, 2, 3, 3, 3, 3, 2, 3, 3, 3, 3, 0
|
||||
};
|
||||
private void parseInputStringFromBiDiClasses() {
|
||||
inputStringBuilder.delete(0, 0x7fffffff);
|
||||
@ -178,6 +188,8 @@ outerLoop:
|
||||
if((lineIndex+2)<line.length() && line.charAt(lineIndex+1)=='R') {
|
||||
if((c2=line.charAt(lineIndex+2))=='E') {
|
||||
biDiClass=UCharacterDirection.LEFT_TO_RIGHT_EMBEDDING;
|
||||
} else if(line.charAt(lineIndex+2)=='I') {
|
||||
biDiClass=UCharacterDirection.LEFT_TO_RIGHT_ISOLATE;
|
||||
} else if(c2=='O') {
|
||||
biDiClass=UCharacterDirection.LEFT_TO_RIGHT_OVERRIDE;
|
||||
}
|
||||
@ -188,6 +200,8 @@ outerLoop:
|
||||
if((lineIndex+2)<line.length() && line.charAt(lineIndex+1)=='L') {
|
||||
if((c2=line.charAt(lineIndex+2))=='E') {
|
||||
biDiClass=UCharacterDirection.RIGHT_TO_LEFT_EMBEDDING;
|
||||
} else if(line.charAt(lineIndex+2)=='I') {
|
||||
biDiClass=UCharacterDirection.RIGHT_TO_LEFT_ISOLATE;
|
||||
} else if(c2=='O') {
|
||||
biDiClass=UCharacterDirection.RIGHT_TO_LEFT_OVERRIDE;
|
||||
}
|
||||
@ -226,12 +240,18 @@ outerLoop:
|
||||
biDiClass=UCharacterDirection.WHITE_SPACE_NEUTRAL;
|
||||
} else if(c0=='O' && (lineIndex+1)<line.length() && line.charAt(lineIndex+1)=='N') {
|
||||
biDiClass=UCharacterDirection.OTHER_NEUTRAL;
|
||||
} else if(c0=='P' && (lineIndex+2)<line.length() &&
|
||||
line.charAt(lineIndex+1)=='D' && line.charAt(lineIndex+2)=='F') {
|
||||
biDiClass=UCharacterDirection.POP_DIRECTIONAL_FORMAT;
|
||||
} else if(c0=='P' && (lineIndex+2)<line.length() && line.charAt(lineIndex+1)=='D') {
|
||||
if(line.charAt(lineIndex+2)=='F') {
|
||||
biDiClass=UCharacterDirection.POP_DIRECTIONAL_FORMAT;
|
||||
} else if(line.charAt(lineIndex+2)=='I') {
|
||||
biDiClass=UCharacterDirection.POP_DIRECTIONAL_ISOLATE;
|
||||
}
|
||||
} else if(c0=='N' && (lineIndex+2)<line.length() &&
|
||||
line.charAt(lineIndex+1)=='S' && line.charAt(lineIndex+2)=='M') {
|
||||
biDiClass=UCharacterDirection.DIR_NON_SPACING_MARK;
|
||||
} else if(c0=='F' && (lineIndex+2)<line.length() &&
|
||||
line.charAt(lineIndex+1)=='S' && line.charAt(lineIndex+2)=='I') {
|
||||
biDiClass=UCharacterDirection.FIRST_STRONG_ISOLATE;
|
||||
}
|
||||
// Now we verify that the class name is terminated properly,
|
||||
// and not just the start of a longer word.
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2001-2010, International Business Machines
|
||||
* Copyright (C) 2001-2013, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -28,7 +28,9 @@ public class BidiTest extends TestFmwk {
|
||||
/* L R EN ES ET AN CS B S WS ON */
|
||||
0x61, 0x5d0, 0x30, 0x2f, 0x25, 0x660, 0x2c, 0xa, 0x9, 0x20, 0x26,
|
||||
/* LRE LRO AL RLE RLO PDF NSM BN */
|
||||
0x202a, 0x202d, 0x627, 0x202b, 0x202e, 0x202c, 0x308, 0x200c
|
||||
0x202a, 0x202d, 0x627, 0x202b, 0x202e, 0x202c, 0x308, 0x200c,
|
||||
/* FSI LRI RLI PDI */
|
||||
0x2068, 0x2066, 0x2067, 0x2069 /* new in Unicode 6.3/ICU 52 */
|
||||
};
|
||||
|
||||
static {
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2001-2007, International Business Machines
|
||||
* Copyright (C) 2001-2013, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -37,11 +37,16 @@ public class TestData {
|
||||
protected static final int PDF = UCharacterDirection.POP_DIRECTIONAL_FORMAT;
|
||||
protected static final int NSM = UCharacterDirection.DIR_NON_SPACING_MARK;
|
||||
protected static final int BN = UCharacterDirection.BOUNDARY_NEUTRAL;
|
||||
protected static final int FSI = UCharacterDirection.FIRST_STRONG_ISOLATE;
|
||||
protected static final int LRI = UCharacterDirection.LEFT_TO_RIGHT_ISOLATE;
|
||||
protected static final int RLI = UCharacterDirection.RIGHT_TO_LEFT_ISOLATE;
|
||||
protected static final int PDI = UCharacterDirection.POP_DIRECTIONAL_ISOLATE;
|
||||
protected static final int DEF = Bidi.CLASS_DEFAULT;
|
||||
|
||||
protected static final String[] dirPropNames = {
|
||||
"L", "R", "EN", "ES", "ET", "AN", "CS", "B", "S", "WS", "ON",
|
||||
"LRE", "LRO", "AL", "RLE", "RLO", "PDF", "NSM", "BN"
|
||||
"LRE", "LRO", "AL", "RLE", "RLO", "PDF", "NSM", "BN",
|
||||
"FSI", "LRI", "RLI", "PDI" /* new in Unicode 6.3/ICU 52 */
|
||||
};
|
||||
protected static final short[][] testDirProps = {
|
||||
{ L, L, WS, L, WS, EN, L, B }, // 0
|
||||
|
@ -579,6 +579,8 @@ public class TestUScript extends TestFmwk {
|
||||
"Afak", "Jurc", "Mroo", "Nshu", "Sharada", "Sora_Sompeng", "Takri", "Tang", "Wole",
|
||||
/* new in ICU 49 */
|
||||
"Hluw", "Khoj", "Tirh",
|
||||
/* new in ICU 52 */
|
||||
"Aghb", "Mahj"
|
||||
};
|
||||
String[] expectedShort = new String[]{
|
||||
"Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp",
|
||||
@ -601,6 +603,8 @@ public class TestUScript extends TestFmwk {
|
||||
"Afak", "Jurc", "Mroo", "Nshu", "Shrd", "Sora", "Takr", "Tang", "Wole",
|
||||
/* new in ICU 49 */
|
||||
"Hluw", "Khoj", "Tirh",
|
||||
/* new in ICU 52 */
|
||||
"Aghb", "Mahj"
|
||||
};
|
||||
if(expectedLong.length!=(UScript.CODE_LIMIT-UScript.BALINESE)) {
|
||||
errln("need to add new script codes in lang.TestUScript.java!");
|
||||
|
@ -1,7 +1,7 @@
|
||||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2001-2009, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
* Copyright (C) 2001-2013, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.test.lang;
|
||||
@ -65,6 +65,10 @@ public class UCharacterDirectionTest extends TestFmwk
|
||||
"Pop Directional Format",
|
||||
"Non-Spacing Mark",
|
||||
"Boundary Neutral",
|
||||
"First Strong Isolate",
|
||||
"Left-to-Right Isolate",
|
||||
"Right-to-Left Isolate",
|
||||
"Pop Directional Isolate",
|
||||
"Unassigned"};
|
||||
|
||||
for (int i = UCharacterDirection.LEFT_TO_RIGHT;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2012, International Business Machines Corporation and
|
||||
* Copyright (C) 1996-2013, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -46,7 +46,7 @@ public final class UCharacterTest extends TestFmwk
|
||||
/**
|
||||
* ICU4J data version number
|
||||
*/
|
||||
private final VersionInfo VERSION_ = VersionInfo.getInstance("6.2.0.0");
|
||||
private final VersionInfo VERSION_ = VersionInfo.getInstance("6.3.0.0");
|
||||
|
||||
// constructor ===================================================
|
||||
|
||||
@ -534,6 +534,16 @@ public final class UCharacterTest extends TestFmwk
|
||||
if(c3!=start) {
|
||||
errln("getMirror() does not roundtrip: U+"+hex(start)+"->U+"+hex(c2)+"->U+"+hex(c3));
|
||||
}
|
||||
c3=UCharacter.getBidiPairedBracket(start);
|
||||
if(UCharacter.getIntPropertyValue(start, UProperty.BIDI_PAIRED_BRACKET_TYPE)==UCharacter.BidiPairedBracketType.NONE) {
|
||||
if(c3!=start) {
|
||||
errln("u_getBidiPairedBracket(U+"+hex(start)+") != self for bpt(c)==None");
|
||||
}
|
||||
} else {
|
||||
if(c3!=c2) {
|
||||
errln("u_getBidiPairedBracket(U+"+hex(start)+") != U+"+hex(c2)+" = bmg(c)'");
|
||||
}
|
||||
}
|
||||
} while(++start<=end);
|
||||
}
|
||||
|
||||
@ -673,10 +683,10 @@ public final class UCharacterTest extends TestFmwk
|
||||
final String TYPE =
|
||||
"LuLlLtLmLoMnMeMcNdNlNoZsZlZpCcCfCoCsPdPsPePcPoSmScSkSoPiPf";
|
||||
|
||||
// directory types used in the UnicodeData file
|
||||
// directorionality types used in the UnicodeData file
|
||||
// padded by spaces to make each type size 4
|
||||
final String DIR =
|
||||
"L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN ";
|
||||
"L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN FSI LRI RLI PDI ";
|
||||
|
||||
Normalizer2 nfc = Normalizer2.getNFCInstance();
|
||||
Normalizer2 nfkc = Normalizer2.getNFKCInstance();
|
||||
@ -802,7 +812,7 @@ public final class UCharacterTest extends TestFmwk
|
||||
}
|
||||
int i=UCharacter.getIntPropertyValue(ch, UProperty.DECOMPOSITION_TYPE);
|
||||
assertEquals(
|
||||
String.format("error: u_getIntPropertyValue(U+%04x, UCHAR_DECOMPOSITION_TYPE) is wrong", ch),
|
||||
String.format("error: UCharacter.getIntPropertyValue(U+%04x, UProperty.DECOMPOSITION_TYPE) is wrong", ch),
|
||||
dt, i);
|
||||
/* Expect Decomposition_Mapping=nfkc.getRawDecomposition(c). */
|
||||
String mapping=nfkc.getRawDecomposition(ch);
|
||||
@ -1492,6 +1502,8 @@ public final class UCharacterTest extends TestFmwk
|
||||
{ 0x07C0, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
|
||||
{ 0x08A0, UCharacterDirection.RIGHT_TO_LEFT },
|
||||
{ 0x0900, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, /* Unicode 6.1 changes U+08A0..U+08FF from R to AL */
|
||||
{ 0x20A0, UCharacterDirection.LEFT_TO_RIGHT },
|
||||
{ 0x20D0, UCharacterDirection.EUROPEAN_NUMBER_TERMINATOR }, /* Unicode 6.3 changes the currency symbols block U+20A0..U+20CF to default to ET not L */
|
||||
{ 0xFB1D, UCharacterDirection.LEFT_TO_RIGHT },
|
||||
{ 0xFB50, UCharacterDirection.RIGHT_TO_LEFT },
|
||||
{ 0xFE00, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
|
||||
@ -2067,6 +2079,20 @@ public final class UCharacterTest extends TestFmwk
|
||||
{ 0x08ba, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
|
||||
{ 0x1eee4, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
|
||||
|
||||
{ -1, 0x630, 0 }, /* version break for Unicode 6.3 */
|
||||
|
||||
/* unassigned code points in the currency symbols block now default to ET */
|
||||
{ 0x20C0, UProperty.BIDI_CLASS, UCharacterDirection.EUROPEAN_NUMBER_TERMINATOR },
|
||||
{ 0x20CF, UProperty.BIDI_CLASS, UCharacterDirection.EUROPEAN_NUMBER_TERMINATOR },
|
||||
|
||||
/* new property in Unicode 6.3 */
|
||||
{ 0x0027, UProperty.BIDI_PAIRED_BRACKET_TYPE, UCharacter.BidiPairedBracketType.NONE },
|
||||
{ 0x0028, UProperty.BIDI_PAIRED_BRACKET_TYPE, UCharacter.BidiPairedBracketType.OPEN },
|
||||
{ 0x0029, UProperty.BIDI_PAIRED_BRACKET_TYPE, UCharacter.BidiPairedBracketType.CLOSE },
|
||||
{ 0xFF5C, UProperty.BIDI_PAIRED_BRACKET_TYPE, UCharacter.BidiPairedBracketType.NONE },
|
||||
{ 0xFF5B, UProperty.BIDI_PAIRED_BRACKET_TYPE, UCharacter.BidiPairedBracketType.OPEN },
|
||||
{ 0xFF5D, UProperty.BIDI_PAIRED_BRACKET_TYPE, UCharacter.BidiPairedBracketType.CLOSE },
|
||||
|
||||
/* undefined UProperty values */
|
||||
{ 0x61, 0x4a7, 0 },
|
||||
{ 0x234bc, 0x15ed, 0 }
|
||||
@ -2121,6 +2147,9 @@ public final class UCharacterTest extends TestFmwk
|
||||
if(UCharacter.getIntPropertyMaxValue(UProperty.WORD_BREAK)!=UCharacter.WordBreak.COUNT-1) {
|
||||
errln("error: UCharacter.getIntPropertyMaxValue(UProperty.WORD_BREAK) wrong\n");
|
||||
}
|
||||
if(UCharacter.getIntPropertyMaxValue(UProperty.BIDI_PAIRED_BRACKET_TYPE)!=UCharacter.BidiPairedBracketType.COUNT-1) {
|
||||
errln("error: UCharacter.getIntPropertyMaxValue(UProperty.BIDI_PAIRED_BRACKET_TYPE) wrong\n");
|
||||
}
|
||||
/*JB#2410*/
|
||||
if( UCharacter.getIntPropertyMaxValue(0x2345)!=-1) {
|
||||
errln("error: UCharacter.getIntPropertyMaxValue(0x2345) wrong\n");
|
||||
@ -2227,8 +2256,6 @@ public final class UCharacterTest extends TestFmwk
|
||||
// where UCharacter.NO_NUMERIC_VALUE is turned into -1.
|
||||
// getNumericValue() returns -2 if the code point has a value
|
||||
// which is not a non-negative integer. (This is mostly auto-converted to -2.)
|
||||
{ 0x12456, UCharacter.NumericType.NUMERIC, -1. },
|
||||
{ 0x12457, UCharacter.NumericType.NUMERIC, -1. },
|
||||
{ 0x0F33, UCharacter.NumericType.NUMERIC, -1./2. },
|
||||
{ 0x0C66, UCharacter.NumericType.DECIMAL, 0 },
|
||||
{ 0x96f6, UCharacter.NumericType.NUMERIC, 0 },
|
||||
@ -2389,6 +2416,32 @@ public final class UCharacterTest extends TestFmwk
|
||||
}
|
||||
}
|
||||
|
||||
public void TestBidiPairedBracketType() {
|
||||
// BidiBrackets-6.3.0.txt says:
|
||||
//
|
||||
// The set of code points listed in this file was originally derived
|
||||
// using the character properties General_Category (gc), Bidi_Class (bc),
|
||||
// Bidi_Mirrored (Bidi_M), and Bidi_Mirroring_Glyph (bmg), as follows:
|
||||
// two characters, A and B, form a pair if A has gc=Ps and B has gc=Pe,
|
||||
// both have bc=ON and Bidi_M=Y, and bmg of A is B. Bidi_Paired_Bracket
|
||||
// maps A to B and vice versa, and their Bidi_Paired_Bracket_Type
|
||||
// property values are Open and Close, respectively.
|
||||
UnicodeSet bpt = new UnicodeSet("[:^bpt=n:]");
|
||||
assertTrue("bpt!=None is not empty", !bpt.isEmpty());
|
||||
// The following should always be true.
|
||||
UnicodeSet mirrored = new UnicodeSet("[:Bidi_M:]");
|
||||
UnicodeSet other_neutral = new UnicodeSet("[:bc=ON:]");
|
||||
assertTrue("bpt!=None is a subset of Bidi_M", mirrored.containsAll(bpt));
|
||||
assertTrue("bpt!=None is a subset of bc=ON", other_neutral.containsAll(bpt));
|
||||
// The following are true at least initially in Unicode 6.3.
|
||||
UnicodeSet bpt_open = new UnicodeSet("[:bpt=o:]");
|
||||
UnicodeSet bpt_close = new UnicodeSet("[:bpt=c:]");
|
||||
UnicodeSet ps = new UnicodeSet("[:Ps:]");
|
||||
UnicodeSet pe = new UnicodeSet("[:Pe:]");
|
||||
assertTrue("bpt=Open is a subset of Ps", ps.containsAll(bpt_open));
|
||||
assertTrue("bpt=Close is a subset of Pe", pe.containsAll(bpt_close));
|
||||
}
|
||||
|
||||
public void TestIsBMP()
|
||||
{
|
||||
int ch[] = {0x0, -1, 0xffff, 0x10ffff, 0xff, 0x1ffff};
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2012, International Business Machines Corporation and
|
||||
* Copyright (C) 1996-2013, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -588,6 +588,8 @@ public class RBBITest extends TestFmwk {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// TODO: Move these test cases to rbbitst.txt if they aren't there already, then remove this test. It is redundant.
|
||||
public void TestTailoredBreaks() {
|
||||
class TBItem {
|
||||
private int type;
|
||||
@ -661,7 +663,7 @@ public class RBBITest extends TestFmwk {
|
||||
// KIND_WORD "en_US_POSIX"
|
||||
final String posxWordText = "Can't have breaks in xx:yy or struct.field for CS-types.";
|
||||
final int[] posxWordTOffsets = { 5, 6, 10, 11, 17, 18, 20, 21, 23, 24, 26, 27, 29, 30, 36, 37, 42, 43, 46, 47, 49, 50, 55, 56 };
|
||||
final int[] posxWordROffsets = { 5, 6, 10, 11, 17, 18, 20, 21, 26, 27, 29, 30, 42, 43, 46, 47, 49, 50, 55, 56 };
|
||||
final int[] posxWordROffsets = { 5, 6, 10, 11, 17, 18, 20, 21, 23, 24, 26, 27, 29, 30, 42, 43, 46, 47, 49, 50, 55, 56 };
|
||||
// KIND_SENTENCE "el"
|
||||
final String elSentText = "\u0391\u03B2, \u03B3\u03B4; \u0395 \u03B6\u03B7\u037E \u0398 \u03B9\u03BA. " +
|
||||
"\u039B\u03BC \u03BD\u03BE! \u039F\u03C0, \u03A1\u03C2? \u03A3";
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2003-2012 International Business Machines Corporation and
|
||||
* Copyright (C) 2003-2013 International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -262,8 +262,12 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
UnicodeSet fCRSet;
|
||||
UnicodeSet fLFSet;
|
||||
UnicodeSet fNewlineSet;
|
||||
UnicodeSet fRegionalIndicatorSet;
|
||||
UnicodeSet fKatakanaSet;
|
||||
UnicodeSet fHebrew_LetterSet;
|
||||
UnicodeSet fALetterSet;
|
||||
UnicodeSet fSingle_QuoteSet;
|
||||
UnicodeSet fDouble_QuoteSet;
|
||||
UnicodeSet fMidNumLetSet;
|
||||
UnicodeSet fMidLetterSet;
|
||||
UnicodeSet fMidNumSet;
|
||||
@ -271,9 +275,7 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
UnicodeSet fFormatSet;
|
||||
UnicodeSet fExtendSet;
|
||||
UnicodeSet fExtendNumLetSet;
|
||||
UnicodeSet fRegionalIndicatorSet;
|
||||
UnicodeSet fOtherSet;
|
||||
|
||||
UnicodeSet fOtherSet;
|
||||
UnicodeSet fDictionaryCjkSet;
|
||||
|
||||
|
||||
@ -284,9 +286,13 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
fCRSet = new UnicodeSet("[\\p{Word_Break = CR}]");
|
||||
fLFSet = new UnicodeSet("[\\p{Word_Break = LF}]");
|
||||
fNewlineSet = new UnicodeSet("[\\p{Word_Break = Newline}]");
|
||||
fRegionalIndicatorSet = new UnicodeSet("[\\p{Word_Break = Regional_Indicator}]");
|
||||
fKatakanaSet = new UnicodeSet("[\\p{Word_Break = Katakana}]");
|
||||
fHebrew_LetterSet = new UnicodeSet("[\\p{Word_Break = Hebrew_Letter}]");
|
||||
fALetterSet = new UnicodeSet("[\\p{Word_Break = ALetter}]");
|
||||
fALetterSet.removeAll(fDictionaryCjkSet);
|
||||
fKatakanaSet = new UnicodeSet("[\\p{Word_Break = Katakana}]");
|
||||
fSingle_QuoteSet = new UnicodeSet("[\\p{Word_Break = Single_Quote}]");
|
||||
fDouble_QuoteSet = new UnicodeSet("[\\p{Word_Break = Double_Quote}]");
|
||||
fMidNumLetSet = new UnicodeSet("[\\p{Word_Break = MidNumLet}]");
|
||||
fMidLetterSet = new UnicodeSet("[\\p{Word_Break = MidLetter}]");
|
||||
fMidNumSet = new UnicodeSet("[\\p{Word_Break = MidNum}]");
|
||||
@ -294,7 +300,6 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
fFormatSet = new UnicodeSet("[\\p{Word_Break = Format}]");
|
||||
fExtendNumLetSet = new UnicodeSet("[\\p{Word_Break = ExtendNumLet}]");
|
||||
fExtendSet = new UnicodeSet("[\\p{Word_Break = Extend}]");
|
||||
fRegionalIndicatorSet = new UnicodeSet("[\\p{Word_Break = Regional_Indicator}]");
|
||||
|
||||
fOtherSet = new UnicodeSet();
|
||||
fOtherSet.complement();
|
||||
@ -302,7 +307,10 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
fOtherSet.removeAll(fLFSet);
|
||||
fOtherSet.removeAll(fNewlineSet);
|
||||
fOtherSet.removeAll(fALetterSet);
|
||||
fOtherSet.removeAll(fSingle_QuoteSet);
|
||||
fOtherSet.removeAll(fDouble_QuoteSet);
|
||||
fOtherSet.removeAll(fKatakanaSet);
|
||||
fOtherSet.removeAll(fHebrew_LetterSet);
|
||||
fOtherSet.removeAll(fMidLetterSet);
|
||||
fOtherSet.removeAll(fMidNumSet);
|
||||
fOtherSet.removeAll(fNumericSet);
|
||||
@ -319,8 +327,12 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
fSets.add(fCRSet);
|
||||
fSets.add(fLFSet);
|
||||
fSets.add(fNewlineSet);
|
||||
fSets.add(fRegionalIndicatorSet);
|
||||
fSets.add(fHebrew_LetterSet);
|
||||
fSets.add(fALetterSet);
|
||||
//fSets.add(fKatakanaSet); // TODO: work out how to test katakana
|
||||
fSets.add(fSingle_QuoteSet);
|
||||
fSets.add(fDouble_QuoteSet);
|
||||
fSets.add(fMidLetterSet);
|
||||
fSets.add(fMidNumLetSet);
|
||||
fSets.add(fMidNumSet);
|
||||
@ -328,7 +340,6 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
fSets.add(fFormatSet);
|
||||
fSets.add(fExtendSet);
|
||||
fSets.add(fExtendNumLetSet);
|
||||
fSets.add(fRegionalIndicatorSet);
|
||||
fSets.add(fOtherSet);
|
||||
}
|
||||
|
||||
@ -407,25 +418,39 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
break;
|
||||
}
|
||||
|
||||
// Rule (5). ALetter x ALetter
|
||||
if (fALetterSet.contains(c1) &&
|
||||
fALetterSet.contains(c2)) {
|
||||
// Rule (5). (ALetter | Hebrew_Letter) x (ALetter | Hebrew_Letter)
|
||||
if ((fALetterSet.contains(c1) || fHebrew_LetterSet.contains(c1)) &&
|
||||
(fALetterSet.contains(c2) || fHebrew_LetterSet.contains(c2))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Rule (6) ALetter x (MidLetter | MidNumLet) ALetter
|
||||
|
||||
// Rule (6) (ALetter | Hebrew_Letter) x (MidLetter | MidNumLet | Single_Quote) (ALetter | Hebrew_Letter)
|
||||
//
|
||||
if ( fALetterSet.contains(c1) &&
|
||||
(fMidLetterSet.contains(c2) || fMidNumLetSet.contains(c2)) &&
|
||||
setContains(fALetterSet, c3)) {
|
||||
if ( (fALetterSet.contains(c1) || fHebrew_LetterSet.contains(c1)) &&
|
||||
(fMidLetterSet.contains(c2) || fMidNumLetSet.contains(c2) || fSingle_QuoteSet.contains(c2)) &&
|
||||
(setContains(fALetterSet, c3) || setContains(fHebrew_LetterSet, c3))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
// Rule (7) ALetter (MidLetter | MidNumLet) x ALetter
|
||||
if (fALetterSet.contains(c0) &&
|
||||
(fMidLetterSet.contains(c1) || fMidNumLetSet.contains(c1)) &&
|
||||
fALetterSet.contains(c2)) {
|
||||
|
||||
// Rule (7) (ALetter | Hebrew_Letter) (MidLetter | MidNumLet | Single_Quote) x (ALetter | Hebrew_Letter)
|
||||
if ((fALetterSet.contains(c0) || fHebrew_LetterSet.contains(c0)) &&
|
||||
(fMidLetterSet.contains(c1) || fMidNumLetSet.contains(c1) || fSingle_QuoteSet.contains(c1)) &&
|
||||
(fALetterSet.contains(c2) || fHebrew_LetterSet.contains(c2))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Rule (7a) Hebrew_Letter x Single_Quote
|
||||
if (fHebrew_LetterSet.contains(c1) && fSingle_QuoteSet.contains(c2)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Rule (7b) Hebrew_Letter x Double_Quote Hebrew_Letter
|
||||
if (fHebrew_LetterSet.contains(c1) && fDouble_QuoteSet.contains(c2) && setContains(fHebrew_LetterSet,c3)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Rule (7c) Hebrew_Letter Double_Quote x Hebrew_Letter
|
||||
if (fHebrew_LetterSet.contains(c0) && fDouble_QuoteSet.contains(c1) && fHebrew_LetterSet.contains(c2)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -435,29 +460,29 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Rule (9) ALetter x Numeric
|
||||
if (fALetterSet.contains(c1) &&
|
||||
fNumericSet.contains(c2)) {
|
||||
// Rule (9) (ALetter | Hebrew_Letter) x Numeric
|
||||
if ((fALetterSet.contains(c1) || fHebrew_LetterSet.contains(c1)) &&
|
||||
fNumericSet.contains(c2)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Rule (10) Numeric x ALetter
|
||||
// Rule (10) Numeric x (ALetter | Hebrew_Letter)
|
||||
if (fNumericSet.contains(c1) &&
|
||||
fALetterSet.contains(c2)) {
|
||||
(fALetterSet.contains(c2) || fHebrew_LetterSet.contains(c2))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Rule (11) Numeric (MidNum | MidNumLet) x Numeric
|
||||
if ( fNumericSet.contains(c0) &&
|
||||
(fMidNumSet.contains(c1) || fMidNumLetSet.contains(c1)) &&
|
||||
|
||||
// Rule (11) Numeric (MidNum | MidNumLet | Single_Quote) x Numeric
|
||||
if (fNumericSet.contains(c0) &&
|
||||
(fMidNumSet.contains(c1) || fMidNumLetSet.contains(c1) || fSingle_QuoteSet.contains(c1)) &&
|
||||
fNumericSet.contains(c2)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Rule (12) Numeric x (MidNum | MidNumLet) Numeric
|
||||
// Rule (12) Numeric x (MidNum | MidNumLet | SingleQuote) Numeric
|
||||
if (fNumericSet.contains(c1) &&
|
||||
(fMidNumSet.contains(c2) || fMidNumLetSet.contains(c2)) &&
|
||||
setContains(fNumericSet, c3)) {
|
||||
(fMidNumSet.contains(c2) || fMidNumLetSet.contains(c2) || fSingle_QuoteSet.contains(c2)) &&
|
||||
setContains(fNumericSet, c3)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -466,19 +491,21 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
fKatakanaSet.contains(c2)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Rule 13a (ALetter | Numeric | Katakana | ExtendNumLet) x ExtendNumLet
|
||||
if ((fALetterSet.contains(c1) || fNumericSet.contains(c1) ||
|
||||
|
||||
// Rule 13a (ALetter | Hebrew_Letter | Numeric | KataKana | ExtendNumLet) x ExtendNumLet
|
||||
if ((fALetterSet.contains(c1) || fHebrew_LetterSet.contains(c1) ||fNumericSet.contains(c1) ||
|
||||
fKatakanaSet.contains(c1) || fExtendNumLetSet.contains(c1)) &&
|
||||
fExtendNumLetSet.contains(c2)) {
|
||||
continue;
|
||||
}
|
||||
// Rule 13b ExtendNumLet x (ALetter | Numeric | Katakana | ExtendNumLet)
|
||||
|
||||
// Rule 13b ExtendNumLet x (ALetter | Hebrew_Letter | Numeric | Katakana)
|
||||
if (fExtendNumLetSet.contains(c1) &&
|
||||
(fALetterSet.contains(c2) || fNumericSet.contains(c2) ||
|
||||
fKatakanaSet.contains(c2) || fExtendNumLetSet.contains(c2))) {
|
||||
(fALetterSet.contains(c2) || fHebrew_LetterSet.contains(c2) ||
|
||||
fNumericSet.contains(c2) || fKatakanaSet.contains(c2))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
// Rule 13c Do not break between Regional Indicators.
|
||||
// Regional_Indicator × Regional_Indicator
|
||||
@ -1976,7 +2003,6 @@ public void TestRTWordMonkey() {
|
||||
if (params.inclusion >= 9) {
|
||||
loopCount = 2000;
|
||||
}
|
||||
|
||||
logln("Word Break Monkey Test");
|
||||
RBBIWordMonkey m = new RBBIWordMonkey();
|
||||
BreakIterator bi = BreakIterator.getWordInstance(Locale.US);
|
||||
|
@ -711,10 +711,11 @@ Bangkok)•</data>
|
||||
|
||||
# UBreakIteratorType UBRK_WORD, Locale "en_US_POSIX"
|
||||
# Words don't include colon or period (cldrbug #1969).
|
||||
# Unicode 6.3 change: colon now breaks words.
|
||||
|
||||
<locale en_US>
|
||||
<word>
|
||||
<data>•Can't<200> •have<200> •breaks<200> •in<200> •xx:yy<200> •or<200> •struct.field<200> \
|
||||
<data>•Can't<200> •have<200> •breaks<200> •in<200> •xx<200>:•yy<200> •or<200> •struct.field<200> \
|
||||
•for<200> •CS<200>-•types<200>.•</data>
|
||||
|
||||
<locale en_US_POSIX>
|
||||
|
@ -529,8 +529,8 @@ public class SpoofCheckerTest extends TestFmwk {
|
||||
{"アaー〆", "HIGHLY_RESTRICTIVE", "[]", "Latn Kana", "", ""},
|
||||
{"a1١", "UNRESTRICTIVE", "[0٠]", "Latn", "Arab Thaa", "Arab Thaa"},
|
||||
{"a1١۱", "UNRESTRICTIVE", "[0٠۰]", "Latn Arab", "", ""},
|
||||
{"١ー〆aア1१۱", "UNRESTRICTIVE", "[0٠۰०]", "Latn Kana Arab Deva", "", ""},
|
||||
{"aアー〆1१١۱", "UNRESTRICTIVE", "[0٠۰०]", "Latn Kana Arab Deva", "", ""},
|
||||
{"١ー〆aア1१۱", "UNRESTRICTIVE", "[0٠۰०]", "Latn Kana Arab", "Deva Kthi", "Deva Kthi"},
|
||||
{"aアー〆1१١۱", "UNRESTRICTIVE", "[0٠۰०]", "Latn Kana Arab", "Deva Kthi", "Deva Kthi"},
|
||||
};
|
||||
for (String[] test : tests) {
|
||||
String testString = test[0];
|
||||
|
Loading…
Reference in New Issue
Block a user