scuffed-code/icu4c/source/test/intltest/bidiconf.cpp

803 lines
29 KiB
C++

/*
*******************************************************************************
*
* Copyright (C) 2009-2013, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: bidiconf.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2009oct16
* created by: Markus W. Scherer
*
* BiDi conformance test, using the Unicode BidiTest.txt and BidiCharacterTest.txt files.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "unicode/utypes.h"
#include "unicode/ubidi.h"
#include "unicode/errorcode.h"
#include "unicode/localpointer.h"
#include "unicode/putil.h"
#include "unicode/unistr.h"
#include "intltest.h"
#include "uparse.h"
class BiDiConformanceTest : public IntlTest {
public:
BiDiConformanceTest() :
directionBits(0), lineNumber(0), levelsCount(0), orderingCount(0),
errorCount(0) {}
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
void TestBidiTest();
void TestBidiCharacterTest();
private:
char *getUnidataPath(char path[]);
UBool parseLevels(const char *&start);
UBool parseOrdering(const char *start);
UBool parseInputStringFromBiDiClasses(const char *&start, UBool parseChars);
UBool checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount);
UBool checkOrdering(UBiDi *ubidi);
void printErrorLine();
char line[10000];
UBiDiLevel levels[1000];
uint32_t directionBits;
int32_t ordering[1000];
int32_t lineNumber;
int32_t levelsCount;
int32_t orderingCount;
int32_t errorCount;
UnicodeString inputString;
const char *paraLevelName;
char levelNameString[12];
};
extern IntlTest *createBiDiConformanceTest() {
return new BiDiConformanceTest();
}
void BiDiConformanceTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
if(exec) {
logln("TestSuite BiDiConformanceTest: ");
}
TESTCASE_AUTO_BEGIN;
TESTCASE_AUTO(TestBidiTest);
TESTCASE_AUTO(TestBidiCharacterTest);
TESTCASE_AUTO_END;
}
// TODO: Move to a common place (IntlTest?) to avoid duplication with UnicodeTest (ucdtest.cpp).
char *BiDiConformanceTest::getUnidataPath(char path[]) {
IcuTestErrorCode errorCode(*this, "getUnidataPath");
const int kUnicodeDataTxtLength=15; // strlen("UnicodeData.txt")
// Look inside ICU_DATA first.
strcpy(path, pathToDataDirectory());
strcat(path, "unidata" U_FILE_SEP_STRING "UnicodeData.txt");
FILE *f=fopen(path, "r");
if(f!=NULL) {
fclose(f);
*(strchr(path, 0)-kUnicodeDataTxtLength)=0; // Remove the basename.
return path;
}
// As a fallback, try to guess where the source data was located
// at the time ICU was built, and look there.
# ifdef U_TOPSRCDIR
strcpy(path, U_TOPSRCDIR U_FILE_SEP_STRING "data");
# else
strcpy(path, loadTestData(errorCode));
strcat(path, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".."
U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".."
U_FILE_SEP_STRING "data");
# endif
strcat(path, U_FILE_SEP_STRING);
strcat(path, "unidata" U_FILE_SEP_STRING "UnicodeData.txt");
f=fopen(path, "r");
if(f!=NULL) {
fclose(f);
*(strchr(path, 0)-kUnicodeDataTxtLength)=0; // Remove the basename.
return path;
}
return NULL;
}
U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose);
UBool BiDiConformanceTest::parseLevels(const char *&start) {
directionBits=0;
levelsCount=0;
while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') {
if(*start=='x') {
levels[levelsCount++]=UBIDI_DEFAULT_LTR;
++start;
} else {
char *end;
uint32_t value=(uint32_t)strtoul(start, &end, 10);
if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0 && *end!=';')
|| value>(UBIDI_MAX_EXPLICIT_LEVEL+1)) {
errln("\nError on line %d: Levels parse error at %s", (int)lineNumber, start);
printErrorLine();
return FALSE;
}
levels[levelsCount++]=(UBiDiLevel)value;
directionBits|=(1<<(value&1));
start=end;
}
}
return TRUE;
}
UBool BiDiConformanceTest::parseOrdering(const char *start) {
orderingCount=0;
while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') {
char *end;
uint32_t value=(uint32_t)strtoul(start, &end, 10);
if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0 && *end!=';') || value>=1000) {
errln("\nError on line %d: Reorder parse error at %s", (int)lineNumber, start);
printErrorLine();
return FALSE;
}
ordering[orderingCount++]=(int32_t)value;
start=end;
}
return TRUE;
}
static const UChar pseudoCharFromBiDiClass[U_CHAR_DIRECTION_COUNT]={
0x6c, // 'l' for L
0x52, // 'R' for R
0x33, // '3' for EN
0x2d, // '-' for ES
0x25, // '%' for ET
0x39, // '9' for AN
0x2c, // ',' for CS
0x2f, // '/' for B
0x5f, // '_' for S
0x20, // ' ' for WS
0x3d, // '=' for ON
0x65, // 'e' for LRE
0x6f, // 'o' for LRO
0x41, // 'A' for AL
0x45, // 'E' for RLE
0x4f, // 'O' for RLO
0x2a, // '*' for PDF
0x60, // '`' for NSM
0x7c, // '|' for BN
// new in Unicode 6.3/ICU 52
0x53, // 'S' for FSI
0x69, // 'i' for LRI
0x49, // 'I' for RLI
0x2e // '.' for PDI
};
static const UChar realCharFromBiDiClass[U_CHAR_DIRECTION_COUNT]={
0x006c, // 'l' for L
0x05d0, // Hebrew Letter Alef for R
0x0033, // '3' for EN
0x002d, // '-' for ES
0x0025, // '%' for ET
0x0669, // Arabic-Indic '9' for AN
0x002c, // ',' for CS
0x000d, // CR for B
0x0009, // Tab for S
0x0020, // ' ' for WS
0x003d, // '=' for ON
0x202a, // LRE
0x202d, // LRO
0x0630, // Arabic Letter Thal for AL
0x202b, // RLE
0x202e, // RLO
0x202c, // PDF
0x05b9, // Hebrew Point Holam for NSM
0x00ad, // Soft Hyphen for BN
0x2068, // FSI
0x2066, // LRI
0x2067, // RLI
0x2069 // PDI
};
U_CDECL_BEGIN
static UCharDirection U_CALLCONV
biDiConfUBiDiClassCallback(const void * /*context*/, UChar32 c) {
for(int i=0; i<U_CHAR_DIRECTION_COUNT; ++i) {
if(c==pseudoCharFromBiDiClass[i]) {
return (UCharDirection)i;
}
}
// Character not in our hardcoded table.
// Should not occur during testing.
return U_BIDI_CLASS_DEFAULT;
}
U_CDECL_END
static int32_t hexdigit(char c) {
if(c>='0' && c<='9')
return c - '0';
if(c>='A' && c<='F')
return c - ('A'-10);
if(c>='a' && c<='f')
return c - ('a'-10);
return -1;
}
static const int8_t biDiClassNameLengths[U_CHAR_DIRECTION_COUNT+1]={
1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 3, 3, 2, 3, 3, 3, 3, 2, 3, 3, 3, 3, 0
};
UBool BiDiConformanceTest::parseInputStringFromBiDiClasses(const char *&start, UBool parseChars) {
inputString.remove();
/*
* Lengthy but fast BiDi class parser.
* A simple parser could terminate or extract the name string and use
* int32_t biDiClassInt=u_getPropertyValueEnum(UCHAR_BIDI_CLASS, bidiClassString);
* but that makes this test take significantly more time.
*/
while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') {
int32_t d1, d2, hexnum;
// First look for an hexa value of at least 2 digits
if(parseChars && (d1=hexdigit(start[0]))>=0 && (d2=hexdigit(start[1]))>=0) {
const char *saveStart=start;
hexnum=(d1<<4) + d2;
start+=2;
while((d1=hexdigit(start[0]))>=0) {
hexnum=(hexnum<<4) + d1;
start++;
}
if(hexnum<=0 || hexnum>0xffff ||
(!U_IS_INV_WHITESPACE(start[0]) && start[0]!=';' && start[0]!=0)) {
errln("\nError on line %d: Invalid hexa number at %s", (int)lineNumber, saveStart);
return FALSE;
}
inputString.append(hexnum);
continue;
}
UCharDirection biDiClass=U_CHAR_DIRECTION_COUNT;
// Compare each character once until we have a match on
// a complete, short BiDi class name.
if(start[0]=='L') {
if(start[1]=='R') {
if(start[2]=='E') {
biDiClass=U_LEFT_TO_RIGHT_EMBEDDING;
} else if(start[2]=='I') {
biDiClass=U_LEFT_TO_RIGHT_ISOLATE;
} else if(start[2]=='O') {
biDiClass=U_LEFT_TO_RIGHT_OVERRIDE;
}
} else {
biDiClass=U_LEFT_TO_RIGHT;
}
} else if(start[0]=='R') {
if(start[1]=='L') {
if(start[2]=='E') {
biDiClass=U_RIGHT_TO_LEFT_EMBEDDING;
} else if(start[2]=='I') {
biDiClass=U_RIGHT_TO_LEFT_ISOLATE;
} else if(start[2]=='O') {
biDiClass=U_RIGHT_TO_LEFT_OVERRIDE;
}
} else {
biDiClass=U_RIGHT_TO_LEFT;
}
} else if(start[0]=='E') {
if(start[1]=='N') {
biDiClass=U_EUROPEAN_NUMBER;
} else if(start[1]=='S') {
biDiClass=U_EUROPEAN_NUMBER_SEPARATOR;
} else if(start[1]=='T') {
biDiClass=U_EUROPEAN_NUMBER_TERMINATOR;
}
} else if(start[0]=='A') {
if(start[1]=='L') {
biDiClass=U_RIGHT_TO_LEFT_ARABIC;
} else if(start[1]=='N') {
biDiClass=U_ARABIC_NUMBER;
}
} else if(start[0]=='C' && start[1]=='S') {
biDiClass=U_COMMON_NUMBER_SEPARATOR;
} else if(start[0]=='B') {
if(start[1]=='N') {
biDiClass=U_BOUNDARY_NEUTRAL;
} else {
biDiClass=U_BLOCK_SEPARATOR;
}
} else if(start[0]=='S') {
biDiClass=U_SEGMENT_SEPARATOR;
} else if(start[0]=='W' && start[1]=='S') {
biDiClass=U_WHITE_SPACE_NEUTRAL;
} else if(start[0]=='O' && start[1]=='N') {
biDiClass=U_OTHER_NEUTRAL;
} else if(start[0]=='P' && start[1]=='D') {
if(start[2]=='F') {
biDiClass=U_POP_DIRECTIONAL_FORMAT;
} else if(start[2]=='I') {
biDiClass=U_POP_DIRECTIONAL_ISOLATE;
}
} else if(start[0]=='N' && start[1]=='S' && start[2]=='M') {
biDiClass=U_DIR_NON_SPACING_MARK;
} else if(start[0]=='F' && start[1]=='S' && start[2]=='I') {
biDiClass=U_FIRST_STRONG_ISOLATE;
}
// Now we verify that the class name is terminated properly,
// and not just the start of a longer word.
int8_t biDiClassNameLength=biDiClassNameLengths[biDiClass];
char c=start[biDiClassNameLength];
if(biDiClass<U_CHAR_DIRECTION_COUNT && (U_IS_INV_WHITESPACE(c) || c==';' || c==0)) {
if(parseChars) {
inputString.append(realCharFromBiDiClass[biDiClass]);
} else {
inputString.append(pseudoCharFromBiDiClass[biDiClass]);
}
start+=biDiClassNameLength;
continue;
}
#if 0
// Accept any single character
// Not currently supported:
// This parser reads the .txt file as is, with the default charset.
// We could at most support "invariant" characters,
// and would have to convert them to Unicode using invariant-character functions.
// If we need to support Unicode characters, then we would have to
// rewrite the code for reading and parsing to read UTF-8.
if(parseChars && (U_IS_INV_WHITESPACE(start[1]) || start[1]==';' || start[1]==0)) {
inputString.append(start[0]);
start++;
continue;
}
#endif
errln("\nError on line %d: BiDi class string not recognized at %s", (int)lineNumber, start);
printErrorLine();
return FALSE;
}
return TRUE;
}
void BiDiConformanceTest::TestBidiTest() {
IcuTestErrorCode errorCode(*this, "TestBidiTest");
const char *sourceTestDataPath=getSourceTestData(errorCode);
if(errorCode.logIfFailureAndReset("unable to find the source/test/testdata "
"folder (getSourceTestData())")) {
return;
}
char bidiTestPath[400];
strcpy(bidiTestPath, sourceTestDataPath);
strcat(bidiTestPath, "BidiTest.txt");
LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r"));
if(bidiTestFile.isNull()) {
errln("unable to open %s", bidiTestPath);
return;
}
LocalUBiDiPointer ubidi(ubidi_open());
ubidi_setClassCallback(ubidi.getAlias(), biDiConfUBiDiClassCallback, NULL,
NULL, NULL, errorCode);
if(errorCode.logIfFailureAndReset("ubidi_setClassCallback()")) {
return;
}
lineNumber=0;
levelsCount=0;
orderingCount=0;
errorCount=0;
while(errorCount<10 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias())!=NULL) {
++lineNumber;
// Remove trailing comments and whitespace.
char *commentStart=strchr(line, '#');
if(commentStart!=NULL) {
*commentStart=0;
}
u_rtrim(line);
const char *start=u_skipWhitespace(line);
if(*start==0) {
continue; // Skip empty and comment-only lines.
}
if(*start=='@') {
++start;
if(0==strncmp(start, "Levels:", 7)) {
start+=7;
if(!parseLevels(start)) {
return;
}
} else if(0==strncmp(start, "Reorder:", 8)) {
if(!parseOrdering(start+8)) {
return;
}
}
// Skip unknown @Xyz: ...
} else {
if(!parseInputStringFromBiDiClasses(start, FALSE)) {
return;
}
start=u_skipWhitespace(start);
if(*start!=';') {
errln("missing ; separator on input line %s", line);
return;
}
start=u_skipWhitespace(start+1);
char *end;
uint32_t bitset=(uint32_t)strtoul(start, &end, 16);
if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0)) {
errln("input bitset parse error at %s", start);
return;
}
// Loop over the bitset.
static const UBiDiLevel paraLevels[]={ UBIDI_DEFAULT_LTR, 0, 1, UBIDI_DEFAULT_RTL };
static const char *const paraLevelNames[]={ "auto/LTR", "LTR", "RTL", "auto/RTL" };
for(int i=0; i<=3; ++i) {
if(bitset&(1<<i)) {
ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inputString.length(),
paraLevels[i], NULL, errorCode);
const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode);
if(errorCode.logIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()")) {
errln("Input line %d: %s", (int)lineNumber, line);
return;
}
paraLevelName=paraLevelNames[i];
if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()))) {
// continue outerLoop; does not exist in C++
// so just break out of the inner loop.
break;
}
if(!checkOrdering(ubidi.getAlias())) {
// continue outerLoop; does not exist in C++
// so just break out of the inner loop.
break;
}
}
}
}
}
}
/*
*******************************************************************************
*
* created on: 2013jul01
* created by: Matitiahu Allouche
This function performs a conformance test for implementations of the
Unicode Bidirectional Algorithm, specified in UAX #9: Unicode
Bidirectional Algorithm, at http://www.unicode.org/unicode/reports/tr9/
Each test case is represented in a single line which is read from a file
named BidiCharacter.txt. Empty, blank and comment lines may also appear
in this file.
The format of the test data is specified below. Note that each test
case constitutes a single line of text; reordering is applied within a
single line and independently of a rendering engine, and rules L3 and L4
are out of scope.
The number sign '#' is the comment character: everything is ignored from
the occurrence of '#' until the end of the line,
Empty lines and lines containing only spaces and/or comments are ignored.
Lines which represent test cases consist of 4 or 5 fields separated by a
semicolon. Each field consists of tokens separated by whitespace (space
or Tab). Whitespace before and after semicolons is optional.
Field 0: A sequence of tokens where each token may be one of the following:
- an hexadecimal number of at least 2 digits representing a code point
- a bidi property value, which must be one of (case sensitive)
L (translated to 'l'),
R (translated to Hebrew Letter Alef),
EN (translated to '3'),
ES (translated to '-'),
ET (translated to '%'),
AN (translated to Arabic-Indic '9'),
CS (translated to ','),
B (translated to CR),
S (translated to Tab),
WS (translated to space),
ON (translated to '='),
LRE, LRO,
AL (translated to Arabic Letter Thal),
RLE, RLO, PDF,
NSM (translated to Hebrew Point Holam),
BN (translated to Soft Hyphen),
FSI, LRI, RLI, PDI
- a single character which represents itself
Field 1: A value representing the paragraph direction, as follows:
- 0 represents left-to-right
- 1 represents right-to-left
- 2 represents auto-LTR according to rules P2 and P3 of the algorithm
- 3 represents auto-RTL according to rules P2 and P3 of the algorithm
- a negative number whose absolute value is taken as paragraph level;
this may be useful to test cases where the embedding level approaches
or exceeds the maximum embedding level.
Field 2: The resolved paragraph embedding level. If the input (field 0)
includes more than one paragraph, this field represents the
resolved level of the first paragraph.
Field 3: An ordered list of resulting levels for each token in field 0
(each token represents one source character).
The UBA does not assign levels to certain characters (e.g. LRO);
characters removed in rule X9 are indicated with an 'x'.
Field 4: An ordered list of indices showing the resulting visual ordering
from left to right; characters with a resolved level of 'x' are
skipped. The number are zero-based. Each index corresponds to
a character in the reordered (visual) string. It represents the
index of the source character in the input (field 0).
This field is optional. When it is absent, the visual ordering
is not verified.
Examples:
# This is a comment line.
L L ON R ; 0 ; 0 ; 0 0 0 1 ; 0 1 2 3
L L ON R;0;0;0 0 0 1;0 1 2 3
# Note: in the next line, 'B' represents a block separator, not the letter 'B'.
LRE A B C PDF;2;0;x 2 0 0 x;1 2 3
# Note: in the next line, 'b' represents the letter 'b', not a block separator.
a b c 05d0 05d1 x ; 0 ; 0 ; 0 0 0 1 1 0 ; 0 1 2 4 3 5
a R R x ; 1 ; 1 ; 2 1 1 2
L L R R R B R R L L L B ON ON ; 3 ; 0 ; 0 0 1 1 1 0 1 1 2 2 2 1 1 1
*
*******************************************************************************
*/
void BiDiConformanceTest::TestBidiCharacterTest() {
IcuTestErrorCode errorCode(*this, "TestBidiCharacterTest");
const char *sourceTestDataPath=getSourceTestData(errorCode);
if(errorCode.logIfFailureAndReset("unable to find the source/test/testdata "
"folder (getSourceTestData())")) {
return;
}
char bidiTestPath[400];
strcpy(bidiTestPath, sourceTestDataPath);
strcat(bidiTestPath, "BidiCharacterTest.txt");
LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r"));
if(bidiTestFile.isNull()) {
errln("unable to open %s", bidiTestPath);
return;
}
LocalUBiDiPointer ubidi(ubidi_open());
lineNumber=0;
levelsCount=0;
orderingCount=0;
errorCount=0;
while(errorCount<20 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias())!=NULL) {
++lineNumber;
paraLevelName="N/A";
inputString="N/A";
// Remove trailing comments and whitespace.
char *commentStart=strchr(line, '#');
if(commentStart!=NULL) {
*commentStart=0;
}
u_rtrim(line);
const char *start=u_skipWhitespace(line);
if(*start==0) {
continue; // Skip empty and comment-only lines.
}
if(!parseInputStringFromBiDiClasses(start, TRUE)) {
continue;
}
start=u_skipWhitespace(start);
if(*start!=';') {
errorCount++;
errln("\nError on line %d: Missing ; separator on line: %s", (int)lineNumber, line);
continue;
}
start=u_skipWhitespace(start+1);
char *end;
int32_t paraDirection=(int32_t)strtol(start, &end, 10);
UBiDiLevel paraLevel=UBIDI_MAX_EXPLICIT_LEVEL+2;
if(paraDirection==0) {
paraLevel=0;
paraLevelName="LTR";
}
else if(paraDirection==1) {
paraLevel=1;
paraLevelName="RTL";
}
else if(paraDirection==2) {
paraLevel=UBIDI_DEFAULT_LTR;
paraLevelName="Auto/LTR";
}
else if(paraDirection==3) {
paraLevel=UBIDI_DEFAULT_RTL;
paraLevelName="Auto/RTL";
}
else if(paraDirection<0 && -paraDirection<=(UBIDI_MAX_EXPLICIT_LEVEL+1)) {
paraLevel=(UBiDiLevel)(-paraDirection);
sprintf(levelNameString, "%d", (int)paraLevel);
paraLevelName=levelNameString;
}
if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) ||
paraLevel==(UBIDI_MAX_EXPLICIT_LEVEL+2)) {
errln("\nError on line %d: Input paragraph direction incorrect at %s", (int)lineNumber, start);
printErrorLine();
continue;
}
start=u_skipWhitespace(end);
if(*start!=';') {
errorCount++;
errln("\nError on line %d: Missing ; separator on line: %s", (int)lineNumber, line);
continue;
}
start++;
uint32_t resolvedParaLevel=(uint32_t)strtoul(start, &end, 10);
if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) ||
resolvedParaLevel>1) {
errln("\nError on line %d: Resolved paragraph level incorrect at %s", (int)lineNumber, start);
printErrorLine();
continue;
}
start=u_skipWhitespace(end);
if(*start!=';') {
errorCount++;
errln("\nError on line %d: Missing ; separator on line: %s", (int)lineNumber, line);
return;
}
start++;
if(!parseLevels(start)) {
continue;
}
start=u_skipWhitespace(start);
if(*start==';') {
if(!parseOrdering(start+1)) {
continue;
}
}
else
orderingCount=-1;
ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inputString.length(),
paraLevel, NULL, errorCode);
const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode);
if(errorCode.logIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()")) {
errln("Input line %d: %s", (int)lineNumber, line);
continue;
}
UBiDiLevel actualLevel;
if((actualLevel=ubidi_getParaLevel(ubidi.getAlias()))!=resolvedParaLevel) {
printErrorLine();
errln("\nError on line %d: Wrong resolved paragraph level; expected %d actual %d",
(int)lineNumber, resolvedParaLevel, actualLevel);
continue;
}
if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()))) {
continue;
}
if(orderingCount>=0 && !checkOrdering(ubidi.getAlias())) {
continue;
}
}
}
static UChar printLevel(UBiDiLevel level) {
if(level<UBIDI_DEFAULT_LTR) {
return 0x30+level;
} else {
return 0x78; // 'x'
}
}
static uint32_t getDirectionBits(const UBiDiLevel actualLevels[], int32_t actualCount) {
uint32_t actualDirectionBits=0;
for(int32_t i=0; i<actualCount; ++i) {
actualDirectionBits|=(1<<(actualLevels[i]&1));
}
return actualDirectionBits;
}
UBool BiDiConformanceTest::checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount) {
UBool isOk=TRUE;
if(levelsCount!=actualCount) {
errln("\nError on line %d: Wrong number of level values; expected %d actual %d",
(int)lineNumber, (int)levelsCount, (int)actualCount);
isOk=FALSE;
} else {
for(int32_t i=0; i<actualCount; ++i) {
if(levels[i]!=actualLevels[i] && levels[i]<UBIDI_DEFAULT_LTR) {
if(directionBits!=3 && directionBits==getDirectionBits(actualLevels, actualCount)) {
// ICU used a shortcut:
// Since the text is unidirectional, it did not store the resolved
// levels but just returns all levels as the paragraph level 0 or 1.
// The reordering result is the same, so this is fine.
break;
} else {
errln("\nError on line %d: Wrong level value at index %d; expected %d actual %d",
(int)lineNumber, (int)i, levels[i], actualLevels[i]);
isOk=FALSE;
break;
}
}
}
}
if(!isOk) {
printErrorLine();
UnicodeString els("Expected levels: ");
int32_t i;
for(i=0; i<levelsCount; ++i) {
els.append((UChar)0x20).append(printLevel(levels[i]));
}
UnicodeString als("Actual levels: ");
for(i=0; i<actualCount; ++i) {
als.append((UChar)0x20).append(printLevel(actualLevels[i]));
}
errln(els);
errln(als);
}
return isOk;
}
// Note: ubidi_setReorderingOptions(ubidi, UBIDI_OPTION_REMOVE_CONTROLS);
// does not work for custom BiDi class assignments
// and anyway also removes LRM/RLM/ZWJ/ZWNJ which is not desirable here.
// Therefore we just skip the indexes for BiDi controls while comparing
// with the expected ordering that has them omitted.
UBool BiDiConformanceTest::checkOrdering(UBiDi *ubidi) {
UBool isOk=TRUE;
IcuTestErrorCode errorCode(*this, "checkOrdering()");
int32_t resultLength=ubidi_getResultLength(ubidi); // visual length including BiDi controls
int32_t i, visualIndex;
// Note: It should be faster to call ubidi_countRuns()/ubidi_getVisualRun()
// and loop over each run's indexes, but that seems unnecessary for this test code.
for(i=visualIndex=0; i<resultLength; ++i) {
int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode);
if(errorCode.logIfFailureAndReset("ubidi_getLogicalIndex()")) {
errln("Input line %d: %s", (int)lineNumber, line);
return FALSE;
}
if(levels[logicalIndex]>=UBIDI_DEFAULT_LTR) {
continue; // BiDi control, omitted from expected ordering.
}
if(visualIndex<orderingCount && logicalIndex!=ordering[visualIndex]) {
errln("\nError on line %d: Wrong ordering value at visual index %d; expected %d actual %d",
(int)lineNumber, (int)visualIndex, ordering[visualIndex], logicalIndex);
isOk=FALSE;
break;
}
++visualIndex;
}
// visualIndex is now the visual length minus the BiDi controls,
// which should match the length of the BidiTest.txt ordering.
if(isOk && orderingCount!=visualIndex) {
errln("\nError on line %d: Wrong number of ordering values; expected %d actual %d",
(int)lineNumber, (int)orderingCount, (int)visualIndex);
isOk=FALSE;
}
if(!isOk) {
printErrorLine();
UnicodeString eord("Expected ordering: ");
for(i=0; i<orderingCount; ++i) {
eord.append((UChar)0x20).append((UChar)(0x30+ordering[i]));
}
UnicodeString aord("Actual ordering: ");
for(i=0; i<resultLength; ++i) {
int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode);
if(levels[logicalIndex]<UBIDI_DEFAULT_LTR) {
aord.append((UChar)0x20).append((UChar)(0x30+logicalIndex));
}
}
errln(eord);
errln(aord);
}
return isOk;
}
void BiDiConformanceTest::printErrorLine() {
++errorCount;
errln("Input line %5d: %s", (int)lineNumber, line);
errln(UnicodeString("Input string: ")+inputString);
errln("Para level: %s", paraLevelName);
}