ICU-7237 port BidiTest.txt BiDi conformance test to Java; fix Bidi.java bug parallel to ICU4C source/common/ubidi.c change 26898 (missing NoContextRTL())
X-SVN-Rev: 28287
This commit is contained in:
parent
d84415e717
commit
13e8abd068
2
.gitattributes
vendored
2
.gitattributes
vendored
@ -137,6 +137,8 @@ icu4j/main/tests/core/manifest.stub -text
|
||||
icu4j/main/tests/core/src/com/ibm/icu/dev/data/rbbi/english.dict -text
|
||||
icu4j/main/tests/core/src/com/ibm/icu/dev/data/resources/testmessages.properties -text
|
||||
icu4j/main/tests/core/src/com/ibm/icu/dev/data/thai6.ucs -text
|
||||
icu4j/main/tests/core/src/com/ibm/icu/dev/data/unicode/BidiTest.txt -text
|
||||
icu4j/main/tests/core/src/com/ibm/icu/dev/test/bidi/BiDiConformanceTest.java -text
|
||||
icu4j/main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.impl.OlsonTimeZone.dat -text
|
||||
icu4j/main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.impl.TimeZoneAdapter.dat -text
|
||||
icu4j/main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.math.BigDecimal.dat -text
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2001-2009, International Business Machines
|
||||
* Copyright (C) 2001-2010, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -2584,7 +2584,7 @@ public class Bidi {
|
||||
levState.impAct = impTabPair.impact[levState.runLevel & 1];
|
||||
processPropertySeq(levState, sor, start, start);
|
||||
/* initialize for property state table */
|
||||
if (dirProps[start] == NSM) {
|
||||
if (NoContextRTL(dirProps[start]) == NSM) {
|
||||
stateImp = (short)(1 + sor);
|
||||
} else {
|
||||
stateImp = 0;
|
||||
|
219558
icu4j/main/tests/core/src/com/ibm/icu/dev/data/unicode/BidiTest.txt
Normal file
219558
icu4j/main/tests/core/src/com/ibm/icu/dev/data/unicode/BidiTest.txt
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,391 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2010, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.test.bidi;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import com.ibm.icu.dev.test.TestFmwk;
|
||||
import com.ibm.icu.dev.test.TestUtil;
|
||||
import com.ibm.icu.lang.UCharacterDirection;
|
||||
import com.ibm.icu.text.Bidi;
|
||||
import com.ibm.icu.text.BidiClassifier;
|
||||
|
||||
/**
|
||||
* @author Markus W. Scherer
|
||||
* BiDi conformance test, using the Unicode BidiTest.txt file.
|
||||
* Ported from ICU4C intltest/bidiconf.cpp .
|
||||
*/
|
||||
public class BiDiConformanceTest extends TestFmwk {
|
||||
public static void main(String[] args) throws Exception {
|
||||
new BiDiConformanceTest().run(args);
|
||||
}
|
||||
public BiDiConformanceTest() {}
|
||||
|
||||
public void TestBidiTest() throws IOException {
|
||||
BufferedReader bidiTestFile=TestUtil.getDataReader("unicode/BidiTest.txt");
|
||||
Bidi ubidi=new Bidi();
|
||||
ubidi.setCustomClassifier(new ConfTestBidiClassifier());
|
||||
lineNumber=0;
|
||||
levelsCount=0;
|
||||
orderingCount=0;
|
||||
errorCount=0;
|
||||
outerLoop:
|
||||
while(errorCount<10 && (line=bidiTestFile.readLine())!=null) {
|
||||
++lineNumber;
|
||||
lineIndex=0;
|
||||
// Remove trailing comments and whitespace.
|
||||
int commentStart=line.indexOf('#');
|
||||
if(commentStart>=0) {
|
||||
line=line.substring(0, commentStart);
|
||||
}
|
||||
if(!skipWhitespace()) {
|
||||
continue; // Skip empty and comment-only lines.
|
||||
}
|
||||
if(line.charAt(lineIndex)=='@') {
|
||||
++lineIndex;
|
||||
if(line.startsWith("Levels:", lineIndex)) {
|
||||
lineIndex+=7;
|
||||
parseLevels();
|
||||
} else if(line.startsWith("Reorder:", lineIndex)) {
|
||||
lineIndex+=8;
|
||||
parseOrdering();
|
||||
}
|
||||
// Skip unknown @Xyz: ...
|
||||
} else {
|
||||
parseInputStringFromBiDiClasses();
|
||||
if(!skipWhitespace() || line.charAt(lineIndex++)!=';') {
|
||||
errln("missing ; separator on input line "+line);
|
||||
return;
|
||||
}
|
||||
int bitset=Integer.parseInt(line.substring(lineIndex).trim(), 16);
|
||||
// Loop over the bitset.
|
||||
for(int i=0; i<=3; ++i) {
|
||||
if((bitset&(1<<i))!=0) {
|
||||
ubidi.setPara(inputString, paraLevels[i], null);
|
||||
byte actualLevels[]=ubidi.getLevels();
|
||||
if(!checkLevels(actualLevels, paraLevelNames[i])) {
|
||||
continue outerLoop;
|
||||
}
|
||||
if(!checkOrdering(ubidi, paraLevelNames[i])) {
|
||||
continue outerLoop;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
private static final byte paraLevels[]={
|
||||
Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT,
|
||||
0,
|
||||
1,
|
||||
Bidi.DIRECTION_DEFAULT_RIGHT_TO_LEFT
|
||||
};
|
||||
private static final String paraLevelNames[]={ "auto/LTR", "LTR", "RTL", "auto/RTL" };
|
||||
|
||||
private void parseLevels() {
|
||||
directionBits=0;
|
||||
levelsCount=0;
|
||||
if(skipWhitespace()) {
|
||||
String[] levelStrings=line.substring(lineIndex).split("[ \t]+");
|
||||
for(String levelString: levelStrings) {
|
||||
if(levelString.equals("x")) {
|
||||
levels[levelsCount++]=Bidi.LEVEL_DEFAULT_LTR;
|
||||
} else {
|
||||
int value=Integer.parseInt(levelString);
|
||||
if(value<0 || value>(Bidi.MAX_EXPLICIT_LEVEL+1)) {
|
||||
throw new IllegalArgumentException(
|
||||
"@Levels: parse error at "+levelString+" in "+line);
|
||||
}
|
||||
levels[levelsCount++]=(byte)value;
|
||||
directionBits|=(1<<(value&1));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
private void parseOrdering() {
|
||||
orderingCount=0;
|
||||
if(skipWhitespace()) {
|
||||
String[] orderingStrings=line.substring(lineIndex).split("[ \t]+");
|
||||
for(String orderingString: orderingStrings) {
|
||||
int value=Integer.parseInt(orderingString);
|
||||
if(value>=1000) {
|
||||
throw new IllegalArgumentException(
|
||||
"@Reorder: parse error at "+orderingString+" in "+line);
|
||||
}
|
||||
ordering[orderingCount++]=value;
|
||||
}
|
||||
}
|
||||
}
|
||||
private static char charFromBiDiClass[]={
|
||||
0x6c, // 'l' for L
|
||||
0x52, // 'R' for R
|
||||
0x33, // '3' for EN
|
||||
0x2d, // '-' for ES
|
||||
0x25, // '%' for ET
|
||||
0x39, // '9' for AN
|
||||
0x2c, // ',' for CS
|
||||
0x2f, // '/' for B
|
||||
0x5f, // '_' for S
|
||||
0x20, // ' ' for WS
|
||||
0x3d, // '=' for ON
|
||||
0x65, // 'e' for LRE
|
||||
0x6f, // 'o' for LRO
|
||||
0x41, // 'A' for AL
|
||||
0x45, // 'E' for RLE
|
||||
0x4f, // 'O' for RLO
|
||||
0x2a, // '*' for PDF
|
||||
0x60, // '`' for NSM
|
||||
0x7c // '|' for BN
|
||||
};
|
||||
private class ConfTestBidiClassifier extends BidiClassifier {
|
||||
public ConfTestBidiClassifier() {
|
||||
super(null);
|
||||
}
|
||||
@Override
|
||||
public int classify(int c) {
|
||||
for(int i=0; i<charFromBiDiClass.length; ++i) {
|
||||
if(c==charFromBiDiClass[i]) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
// Character not in our hardcoded table.
|
||||
// Should not occur during testing.
|
||||
return Bidi.CLASS_DEFAULT;
|
||||
}
|
||||
}
|
||||
private static final int biDiClassNameLengths[]={
|
||||
1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 3, 3, 2, 3, 3, 3, 3, 2, 0
|
||||
};
|
||||
private void parseInputStringFromBiDiClasses() {
|
||||
inputStringBuilder.delete(0, 0x7fffffff);
|
||||
/*
|
||||
* Lengthy but fast BiDi class parser.
|
||||
* A simple parser could terminate or extract the name string and use
|
||||
* int32_t biDiClassInt=u_getPropertyValueEnum(UCHAR_BIDI_CLASS, bidiClassString);
|
||||
* but that makes this test take significantly more time.
|
||||
*/
|
||||
char c0, c1, c2;
|
||||
while(skipWhitespace() && (c0=line.charAt(lineIndex))!=';') {
|
||||
int biDiClass=UCharacterDirection.CHAR_DIRECTION_COUNT;
|
||||
// Compare each character once until we have a match on
|
||||
// a complete, short BiDi class name.
|
||||
if(c0=='L') {
|
||||
if((lineIndex+2)<line.length() && line.charAt(lineIndex+1)=='R') {
|
||||
if((c2=line.charAt(lineIndex+2))=='E') {
|
||||
biDiClass=UCharacterDirection.LEFT_TO_RIGHT_EMBEDDING;
|
||||
} else if(c2=='O') {
|
||||
biDiClass=UCharacterDirection.LEFT_TO_RIGHT_OVERRIDE;
|
||||
}
|
||||
} else {
|
||||
biDiClass=UCharacterDirection.LEFT_TO_RIGHT;
|
||||
}
|
||||
} else if(c0=='R') {
|
||||
if((lineIndex+2)<line.length() && line.charAt(lineIndex+1)=='L') {
|
||||
if((c2=line.charAt(lineIndex+2))=='E') {
|
||||
biDiClass=UCharacterDirection.RIGHT_TO_LEFT_EMBEDDING;
|
||||
} else if(c2=='O') {
|
||||
biDiClass=UCharacterDirection.RIGHT_TO_LEFT_OVERRIDE;
|
||||
}
|
||||
} else {
|
||||
biDiClass=UCharacterDirection.RIGHT_TO_LEFT;
|
||||
}
|
||||
} else if(c0=='E') {
|
||||
if((lineIndex+1)>=line.length()) {
|
||||
// too short
|
||||
} else if((c1=line.charAt(lineIndex+1))=='N') {
|
||||
biDiClass=UCharacterDirection.EUROPEAN_NUMBER;
|
||||
} else if(c1=='S') {
|
||||
biDiClass=UCharacterDirection.EUROPEAN_NUMBER_SEPARATOR;
|
||||
} else if(c1=='T') {
|
||||
biDiClass=UCharacterDirection.EUROPEAN_NUMBER_TERMINATOR;
|
||||
}
|
||||
} else if(c0=='A') {
|
||||
if((lineIndex+1)>=line.length()) {
|
||||
// too short
|
||||
} else if((c1=line.charAt(lineIndex+1))=='L') {
|
||||
biDiClass=UCharacterDirection.RIGHT_TO_LEFT_ARABIC;
|
||||
} else if(c1=='N') {
|
||||
biDiClass=UCharacterDirection.ARABIC_NUMBER;
|
||||
}
|
||||
} else if(c0=='C' && (lineIndex+1)<line.length() && line.charAt(lineIndex+1)=='S') {
|
||||
biDiClass=UCharacterDirection.COMMON_NUMBER_SEPARATOR;
|
||||
} else if(c0=='B') {
|
||||
if((lineIndex+1)<line.length() && line.charAt(lineIndex+1)=='N') {
|
||||
biDiClass=UCharacterDirection.BOUNDARY_NEUTRAL;
|
||||
} else {
|
||||
biDiClass=UCharacterDirection.BLOCK_SEPARATOR;
|
||||
}
|
||||
} else if(c0=='S') {
|
||||
biDiClass=UCharacterDirection.SEGMENT_SEPARATOR;
|
||||
} else if(c0=='W' && (lineIndex+1)<line.length() && line.charAt(lineIndex+1)=='S') {
|
||||
biDiClass=UCharacterDirection.WHITE_SPACE_NEUTRAL;
|
||||
} else if(c0=='O' && (lineIndex+1)<line.length() && line.charAt(lineIndex+1)=='N') {
|
||||
biDiClass=UCharacterDirection.OTHER_NEUTRAL;
|
||||
} else if(c0=='P' && (lineIndex+2)<line.length() &&
|
||||
line.charAt(lineIndex+1)=='D' && line.charAt(lineIndex+2)=='F') {
|
||||
biDiClass=UCharacterDirection.POP_DIRECTIONAL_FORMAT;
|
||||
} else if(c0=='N' && (lineIndex+2)<line.length() &&
|
||||
line.charAt(lineIndex+1)=='S' && line.charAt(lineIndex+2)=='M') {
|
||||
biDiClass=UCharacterDirection.DIR_NON_SPACING_MARK;
|
||||
}
|
||||
// Now we verify that the class name is terminated properly,
|
||||
// and not just the start of a longer word.
|
||||
int biDiClassNameLength=biDiClassNameLengths[biDiClass];
|
||||
char c;
|
||||
if( biDiClass==UCharacterDirection.CHAR_DIRECTION_COUNT ||
|
||||
((lineIndex+biDiClassNameLength)<line.length() &&
|
||||
!isInvWhitespace(c=line.charAt(lineIndex+biDiClassNameLength)) &&
|
||||
c!=';')
|
||||
) {
|
||||
throw new IllegalArgumentException(
|
||||
"BiDi class string not recognized at "+line.substring(lineIndex)+" in "+line);
|
||||
}
|
||||
inputStringBuilder.append(charFromBiDiClass[biDiClass]);
|
||||
lineIndex+=biDiClassNameLength;
|
||||
}
|
||||
inputString=inputStringBuilder.toString();
|
||||
}
|
||||
|
||||
private static char printLevel(byte level) {
|
||||
if(level<Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT) {
|
||||
return (char)('0'+level);
|
||||
} else {
|
||||
return 'x';
|
||||
}
|
||||
}
|
||||
|
||||
private static int getDirectionBits(byte actualLevels[]) {
|
||||
int actualDirectionBits=0;
|
||||
for(int i=0; i<actualLevels.length; ++i) {
|
||||
actualDirectionBits|=(1<<(actualLevels[i]&1));
|
||||
}
|
||||
return actualDirectionBits;
|
||||
}
|
||||
private boolean checkLevels(byte actualLevels[], String paraLevelName) {
|
||||
boolean isOk=true;
|
||||
if(levelsCount!=actualLevels.length) {
|
||||
errln("Wrong number of level values; expected "+levelsCount+" actual "+actualLevels.length);
|
||||
isOk=false;
|
||||
} else {
|
||||
for(int i=0; i<actualLevels.length; ++i) {
|
||||
if(levels[i]!=actualLevels[i] && levels[i]<Bidi.LEVEL_DEFAULT_LTR) {
|
||||
if(directionBits!=3 && directionBits==getDirectionBits(actualLevels)) {
|
||||
// ICU used a shortcut:
|
||||
// Since the text is unidirectional, it did not store the resolved
|
||||
// levels but just returns all levels as the paragraph level 0 or 1.
|
||||
// The reordering result is the same, so this is fine.
|
||||
break;
|
||||
} else {
|
||||
errln("Wrong level value at index "+i+"; expected levels[i] actual "+actualLevels[i]);
|
||||
isOk=false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if(!isOk) {
|
||||
printErrorLine(paraLevelName);
|
||||
StringBuilder els=new StringBuilder("Expected levels: ");
|
||||
int i;
|
||||
for(i=0; i<levelsCount; ++i) {
|
||||
els.append(' ').append(printLevel(levels[i]));
|
||||
}
|
||||
StringBuilder als=new StringBuilder("Actual levels: ");
|
||||
for(i=0; i<actualLevels.length; ++i) {
|
||||
als.append(' ').append(printLevel(actualLevels[i]));
|
||||
}
|
||||
errln(els.toString());
|
||||
errln(als.toString());
|
||||
}
|
||||
return isOk;
|
||||
}
|
||||
|
||||
// Note: ubidi_setReorderingOptions(ubidi, UBIDI_OPTION_REMOVE_CONTROLS);
|
||||
// does not work for custom BiDi class assignments
|
||||
// and anyway also removes LRM/RLM/ZWJ/ZWNJ which is not desirable here.
|
||||
// Therefore we just skip the indexes for BiDi controls while comparing
|
||||
// with the expected ordering that has them omitted.
|
||||
private boolean checkOrdering(Bidi ubidi, String paraLevelName) {
|
||||
boolean isOk=true;
|
||||
int resultLength=ubidi.getResultLength(); // visual length including BiDi controls
|
||||
int i, visualIndex;
|
||||
// Note: It should be faster to call ubidi_countRuns()/ubidi_getVisualRun()
|
||||
// and loop over each run's indexes, but that seems unnecessary for this test code.
|
||||
for(i=visualIndex=0; i<resultLength; ++i) {
|
||||
int logicalIndex=ubidi.getLogicalIndex(i);
|
||||
if(levels[logicalIndex]>=Bidi.LEVEL_DEFAULT_LTR) {
|
||||
continue; // BiDi control, omitted from expected ordering.
|
||||
}
|
||||
if(visualIndex<orderingCount && logicalIndex!=ordering[visualIndex]) {
|
||||
errln("Wrong ordering value at visual index "+visualIndex+"; expected "+
|
||||
ordering[visualIndex]+" actual "+logicalIndex);
|
||||
isOk=false;
|
||||
break;
|
||||
}
|
||||
++visualIndex;
|
||||
}
|
||||
// visualIndex is now the visual length minus the BiDi controls,
|
||||
// which should match the length of the BidiTest.txt ordering.
|
||||
if(isOk && orderingCount!=visualIndex) {
|
||||
errln("Wrong number of ordering values; expected "+orderingCount+" actual "+visualIndex);
|
||||
isOk=false;
|
||||
}
|
||||
if(!isOk) {
|
||||
printErrorLine(paraLevelName);
|
||||
StringBuilder eord=new StringBuilder("Expected ordering: ");
|
||||
for(i=0; i<orderingCount; ++i) {
|
||||
eord.append(' ').append((char)('0'+ordering[i]));
|
||||
}
|
||||
StringBuilder aord=new StringBuilder("Actual ordering: ");
|
||||
for(i=0; i<resultLength; ++i) {
|
||||
int logicalIndex=ubidi.getLogicalIndex(i);
|
||||
if(levels[logicalIndex]<Bidi.LEVEL_DEFAULT_LTR) {
|
||||
aord.append(' ').append((char)('0'+logicalIndex));
|
||||
}
|
||||
}
|
||||
errln(eord.toString());
|
||||
errln(aord.toString());
|
||||
}
|
||||
return isOk;
|
||||
}
|
||||
|
||||
private void printErrorLine(String paraLevelName) {
|
||||
++errorCount;
|
||||
errln(String.format("Input line %5d: %s", lineNumber, line));
|
||||
errln("Input string: "+inputString);
|
||||
errln("Para level: "+paraLevelName);
|
||||
}
|
||||
|
||||
private static boolean isInvWhitespace(char c) {
|
||||
return ((c)==' ' || (c)=='\t' || (c)=='\r' || (c)=='\n');
|
||||
}
|
||||
/**
|
||||
* Skip isInvWhitespace() characters.
|
||||
* @return true if line.charAt[lineIndex] is a non-whitespace, false if lineIndex>=line.length()
|
||||
*/
|
||||
private boolean skipWhitespace() {
|
||||
while(lineIndex<line.length()) {
|
||||
if(!isInvWhitespace(line.charAt(lineIndex))) {
|
||||
return true;
|
||||
}
|
||||
++lineIndex;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private String line;
|
||||
private int lineIndex;
|
||||
private byte levels[]=new byte[1000]; // UBiDiLevel
|
||||
private int directionBits;
|
||||
private int ordering[]=new int[1000];
|
||||
private int lineNumber;
|
||||
private int levelsCount;
|
||||
private int orderingCount;
|
||||
private int errorCount;
|
||||
private String inputString;
|
||||
private StringBuilder inputStringBuilder=new StringBuilder();
|
||||
}
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2001-2009, International Business Machines
|
||||
* Copyright (C) 2001-2010, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -32,6 +32,7 @@ public class TestAll extends TestGroup {
|
||||
"com.ibm.icu.dev.test.bidi.TestStreaming",
|
||||
"com.ibm.icu.dev.test.bidi.TestClassOverride",
|
||||
"com.ibm.icu.dev.test.bidi.TestCompatibility",
|
||||
"com.ibm.icu.dev.test.bidi.BiDiConformanceTest"
|
||||
},
|
||||
"Bidi tests");
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user