ICU-9077 RBBI test enhancements.

X-SVN-Rev: 33232
This commit is contained in:
Andy Heninger 2013-02-15 02:13:58 +00:00
parent ded4f854b0
commit c437359b7f
2 changed files with 60 additions and 7 deletions

View File

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1999-2012, International Business Machines Corporation and
* Copyright (c) 1999-2013, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
/************************************************************************
@ -956,6 +956,57 @@ void RBBITest::executeTest(TestParams *t) {
i, t->srcLine->elementAti(i), t->srcCol->elementAti(i));
}
}
// Check isBoundary()
for (i=0; i<t->expectedBreaks->size(); i++) {
UBool boundaryExpected = (t->expectedBreaks->elementAti(i) != 0);
UBool boundaryFound = t->bi->isBoundary(i);
if (boundaryExpected != boundaryFound) {
errln("isBoundary(%d) incorrect. File line,col= %4d,%4d\n"
" Expected, Actual= %s, %s",
i, t->srcLine->elementAti(i), t->srcCol->elementAti(i),
boundaryExpected ? "true":"false", boundaryFound? "true" : "false");
}
}
// Check following()
for (i=0; i<t->expectedBreaks->size(); i++) {
int32_t actualBreak = t->bi->following(i);
int32_t expectedBreak = BreakIterator::DONE;
for (int32_t j=i+1; j < t->expectedBreaks->size(); j++) {
if (t->expectedBreaks->elementAti(j) != 0) {
expectedBreak = j;
break;
}
}
if (expectedBreak != actualBreak) {
errln("following(%d) incorrect. File line,col= %4d,%4d\n"
" Expected, Actual= %d, %d",
i, t->srcLine->elementAti(i), t->srcCol->elementAti(i), expectedBreak, actualBreak);
}
}
// Check preceding()
for (i=t->expectedBreaks->size(); i>=0; i--) {
int32_t actualBreak = t->bi->preceding(i);
int32_t expectedBreak = BreakIterator::DONE;
// For supplementaries, back up to the start of the character.
int32_t currentCharStart = i < t->dataToBreak.length()? t->dataToBreak.getChar32Start(i) : i;
for (int32_t j=currentCharStart-1; j >= 0; j--) {
// for (int32_t j=i-1; j >= 0; j--) {
if (t->expectedBreaks->elementAti(j) != 0) {
expectedBreak = j;
break;
}
}
if (expectedBreak != actualBreak) {
errln("preceding(%d) incorrect. File line,col= %4d,%4d\n"
" Expected, Actual= %d, %d",
i, t->srcLine->elementAti(i), t->srcCol->elementAti(i), expectedBreak, actualBreak);
}
}
}
@ -1100,7 +1151,7 @@ void RBBITest::TestExtended() {
char localeName8[100];
localeName.extract(0, localeName.length(), localeName8, sizeof(localeName8), 0);
locale = Locale::createFromName(localeName8);
charIdx += localeMatcher.group(0, status).length();
charIdx += localeMatcher.group(0, status).length() - 1;
TEST_ASSERT_SUCCESS(status);
break;
}

View File

@ -1,4 +1,4 @@
# Copyright (c) 2001-2012 International Business Machines
# Copyright (c) 2001-2013 International Business Machines
# Corporation and others. All Rights Reserved.
#
# RBBI Test Data
@ -167,7 +167,8 @@
<data>•abc\U00010300<200> •abc\N{DESERET SMALL LETTER ENG}<200> •abc\N{MATHEMATICAL BOLD SMALL Z}<200> •abc\N{MATHEMATICAL SANS-SERIF BOLD ITALIC PI SYMBOL}<200> •</data>
# Unassigned code points
<data>•abc<200>\U0001D800•def<200>\U0001D3FF• •</data>
# TODO: This case should pass.
#<data>•abc<200>\U0001D800•def<200>\U0001D3FF• •</data>
# Hiragana & Katakana stay together, but separates from each other and Latin.
# *** what to do about theoretical combos of chars? i.e. hiragana + accent
@ -538,7 +539,8 @@ What is the proper use of the abbreviation pp.? •Yes, I am definatelly 12" tal
# Surrogate line break tests.
#
<data>•\u4e01•\ud840\udc01•\u4e02•abc •\ue000 •\udb80\udc01•</data>
#<data>•\u4e01•\ud840\udc01•\u4e02•abc •\ue000 •\udb80\udc01•</data> #TODO: should be same as the next line.
<data>•\u4e01•\U20001•\u4e02•abc •\ue000 •\Uf0001•</data>
# Regression for bug 836
# Note: Unicode 5.1 changed this behavior
@ -817,6 +819,6 @@ Bangkok)•</data>
<locale fi>
<line>
<data>•abc •- •def •abc •-def •abc- •def •abc-•def•</data> # With ASCII hyphen
<data>•abc •‐ •def •abc •def •abc •def •abc•def•</data> # With Unicode u2010 hyphen
#<data>•abc •- •def •abc •-def •abc- •def •abc-•def•</data> # With ASCII hyphen
#<data>•abc •‐ •def •abc •def •abc •def •abc•def•</data> # With Unicode u2010 hyphen