ICU-21178 Add check for corrupt rbbitst.txt data.

In the test data from rbbitst.txt, two or more adjacent boundary markers with
no intervening test data were accepted, with no indication of a problem.

This situation occurred, as described in bug ICU-21178, with a bad import of
some test cases from CLDR. PR #1194 corrected the problem with the test data
in ICU4C. This PR adds code to flag this situation in the test data, and
also propagates the data fix to ICU4J's copy of rbbitst.txt.
This commit is contained in:
Andy Heninger 2020-07-07 17:12:09 -07:00
parent 0d4b1c1cb9
commit 895aff3bff
3 changed files with 20 additions and 2 deletions

View File

@ -905,6 +905,10 @@ void RBBITest::TestExtended() {
case PARSE_DATA:
if (c == u'') {
int32_t breakIdx = tp.dataToBreak.length();
if (tp.expectedBreaks->size() > breakIdx) {
errln("rbbitst.txt:%d:%d adjacent expected breaks with no intervening test text",
lineNum, column);
}
tp.expectedBreaks->setSize(breakIdx+1);
tp.expectedBreaks->setElementAt(-1, breakIdx);
tp.srcLine->setSize(breakIdx+1);
@ -1069,6 +1073,10 @@ void RBBITest::TestExtended() {
tagValue = -1;
}
int32_t breakIdx = tp.dataToBreak.length();
if (tp.expectedBreaks->size() > breakIdx) {
errln("rbbitst.txt:%d:%d adjacent expected breaks with no intervening test text",
lineNum, column);
}
tp.expectedBreaks->setSize(breakIdx+1);
tp.expectedBreaks->setElementAt(tagValue, breakIdx);
tp.srcLine->setSize(breakIdx+1);

View File

@ -245,6 +245,11 @@ public void TestExtended() {
case PARSE_DATA:
if (c == '•') {
int breakIdx = tp.dataToBreak.length();
if (tp.expectedBreaks[breakIdx] != 0) {
errln(String.format(
"rbbitst.txt:%d:%d adjacent expected breaks with no intervening test text",
lineNum, column));
}
tp.expectedBreaks[breakIdx] = -1;
tp.srcLine[breakIdx] = lineNum;
tp.srcCol[breakIdx] = column;
@ -388,6 +393,11 @@ public void TestExtended() {
tagValue = -1;
}
int breakIdx = tp.dataToBreak.length();
if (tp.expectedBreaks[breakIdx] != 0) {
errln(String.format(
"rbbitst.txt:%d:%d adjacent expected breaks with no intervening test text",
lineNum, column));
}
tp.expectedBreaks[breakIdx] = tagValue;
tp.srcLine[breakIdx] = lineNum;
tp.srcCol[breakIdx] = column;

View File

@ -289,7 +289,7 @@
# वृद्धिसँग ;
<data>•वृ•द्धि•सँ•ग•</data>
# अंतःज्ञानी ;
<data>•अं•तः•ज्ञा•नी••</data>
<data>•अं•तः•ज्ञा•नी• •</data>
# गन्नदी॑धिम ;
<data>•ग•न्न•दी॑•धि•म•</data>
# प्प्रप॑द्ये॒ ;
@ -319,7 +319,7 @@
# भर्तुर्भोगः ;
<data>•भ•र्तु•र्भो•गः•</data>
# शॆत्युल ;
<data>•शॆ•त्यु•ल••</data>
<data>•शॆ•त्यु•ल• •</data>
# महारॆन्य ;
<data>•म•हा•रॆ•न्य•</data>
# सॆक्युल ;