ICU-9077 RBBI test enhancements.
X-SVN-Rev: 33232
This commit is contained in:
parent
ded4f854b0
commit
c437359b7f
@ -1,6 +1,6 @@
|
||||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1999-2012, International Business Machines Corporation and
|
||||
* Copyright (c) 1999-2013, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
/************************************************************************
|
||||
@ -956,6 +956,57 @@ void RBBITest::executeTest(TestParams *t) {
|
||||
i, t->srcLine->elementAti(i), t->srcCol->elementAti(i));
|
||||
}
|
||||
}
|
||||
|
||||
// Check isBoundary()
|
||||
for (i=0; i<t->expectedBreaks->size(); i++) {
|
||||
UBool boundaryExpected = (t->expectedBreaks->elementAti(i) != 0);
|
||||
UBool boundaryFound = t->bi->isBoundary(i);
|
||||
if (boundaryExpected != boundaryFound) {
|
||||
errln("isBoundary(%d) incorrect. File line,col= %4d,%4d\n"
|
||||
" Expected, Actual= %s, %s",
|
||||
i, t->srcLine->elementAti(i), t->srcCol->elementAti(i),
|
||||
boundaryExpected ? "true":"false", boundaryFound? "true" : "false");
|
||||
}
|
||||
}
|
||||
|
||||
// Check following()
|
||||
for (i=0; i<t->expectedBreaks->size(); i++) {
|
||||
int32_t actualBreak = t->bi->following(i);
|
||||
int32_t expectedBreak = BreakIterator::DONE;
|
||||
for (int32_t j=i+1; j < t->expectedBreaks->size(); j++) {
|
||||
if (t->expectedBreaks->elementAti(j) != 0) {
|
||||
expectedBreak = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (expectedBreak != actualBreak) {
|
||||
errln("following(%d) incorrect. File line,col= %4d,%4d\n"
|
||||
" Expected, Actual= %d, %d",
|
||||
i, t->srcLine->elementAti(i), t->srcCol->elementAti(i), expectedBreak, actualBreak);
|
||||
}
|
||||
}
|
||||
|
||||
// Check preceding()
|
||||
for (i=t->expectedBreaks->size(); i>=0; i--) {
|
||||
int32_t actualBreak = t->bi->preceding(i);
|
||||
int32_t expectedBreak = BreakIterator::DONE;
|
||||
|
||||
// For supplementaries, back up to the start of the character.
|
||||
int32_t currentCharStart = i < t->dataToBreak.length()? t->dataToBreak.getChar32Start(i) : i;
|
||||
|
||||
for (int32_t j=currentCharStart-1; j >= 0; j--) {
|
||||
// for (int32_t j=i-1; j >= 0; j--) {
|
||||
if (t->expectedBreaks->elementAti(j) != 0) {
|
||||
expectedBreak = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (expectedBreak != actualBreak) {
|
||||
errln("preceding(%d) incorrect. File line,col= %4d,%4d\n"
|
||||
" Expected, Actual= %d, %d",
|
||||
i, t->srcLine->elementAti(i), t->srcCol->elementAti(i), expectedBreak, actualBreak);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1100,7 +1151,7 @@ void RBBITest::TestExtended() {
|
||||
char localeName8[100];
|
||||
localeName.extract(0, localeName.length(), localeName8, sizeof(localeName8), 0);
|
||||
locale = Locale::createFromName(localeName8);
|
||||
charIdx += localeMatcher.group(0, status).length();
|
||||
charIdx += localeMatcher.group(0, status).length() - 1;
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
break;
|
||||
}
|
||||
|
12
icu4c/source/test/testdata/rbbitst.txt
vendored
12
icu4c/source/test/testdata/rbbitst.txt
vendored
@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2001-2012 International Business Machines
|
||||
# Copyright (c) 2001-2013 International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#
|
||||
# RBBI Test Data
|
||||
@ -167,7 +167,8 @@
|
||||
<data>•abc\U00010300<200> •abc\N{DESERET SMALL LETTER ENG}<200> •abc\N{MATHEMATICAL BOLD SMALL Z}<200> •abc\N{MATHEMATICAL SANS-SERIF BOLD ITALIC PI SYMBOL}<200> •</data>
|
||||
|
||||
# Unassigned code points
|
||||
<data>•abc<200>\U0001D800•def<200>\U0001D3FF• •</data>
|
||||
# TODO: This case should pass.
|
||||
#<data>•abc<200>\U0001D800•def<200>\U0001D3FF• •</data>
|
||||
|
||||
# Hiragana & Katakana stay together, but separates from each other and Latin.
|
||||
# *** what to do about theoretical combos of chars? i.e. hiragana + accent
|
||||
@ -538,7 +539,8 @@ What is the proper use of the abbreviation pp.? •Yes, I am definatelly 12" tal
|
||||
|
||||
# Surrogate line break tests.
|
||||
#
|
||||
<data>•\u4e01•\ud840\udc01•\u4e02•abc •\ue000 •\udb80\udc01•</data>
|
||||
#<data>•\u4e01•\ud840\udc01•\u4e02•abc •\ue000 •\udb80\udc01•</data> #TODO: should be same as the next line.
|
||||
<data>•\u4e01•\U20001•\u4e02•abc •\ue000 •\Uf0001•</data>
|
||||
|
||||
# Regression for bug 836
|
||||
# Note: Unicode 5.1 changed this behavior
|
||||
@ -817,6 +819,6 @@ Bangkok)•</data>
|
||||
|
||||
<locale fi>
|
||||
<line>
|
||||
<data>•abc •- •def •abc •-def •abc- •def •abc-•def•</data> # With ASCII hyphen
|
||||
<data>•abc •‐ •def •abc •‐def •abc‐ •def •abc‐•def•</data> # With Unicode u2010 hyphen
|
||||
#<data>•abc •- •def •abc •-def •abc- •def •abc-•def•</data> # With ASCII hyphen
|
||||
#<data>•abc •‐ •def •abc •‐def •abc‐ •def •abc‐•def•</data> # With Unicode u2010 hyphen
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user