ICU-21178 Add check for corrupt rbbitst.txt data.
In the test data from rbbitst.txt, two or more adjacent boundary markers with no intervening test data were accepted, with no indication of a problem. This situation occurred, as described in bug ICU-21178, with a bad import of some test cases from CLDR. PR #1194 corrected the problem with the test data in ICU4C. This PR adds code to flag this situation in the test data, and also propagates the data fix to ICU4J's copy of rbbitst.txt.
This commit is contained in:
parent
0d4b1c1cb9
commit
895aff3bff
@ -905,6 +905,10 @@ void RBBITest::TestExtended() {
|
||||
case PARSE_DATA:
|
||||
if (c == u'•') {
|
||||
int32_t breakIdx = tp.dataToBreak.length();
|
||||
if (tp.expectedBreaks->size() > breakIdx) {
|
||||
errln("rbbitst.txt:%d:%d adjacent expected breaks with no intervening test text",
|
||||
lineNum, column);
|
||||
}
|
||||
tp.expectedBreaks->setSize(breakIdx+1);
|
||||
tp.expectedBreaks->setElementAt(-1, breakIdx);
|
||||
tp.srcLine->setSize(breakIdx+1);
|
||||
@ -1069,6 +1073,10 @@ void RBBITest::TestExtended() {
|
||||
tagValue = -1;
|
||||
}
|
||||
int32_t breakIdx = tp.dataToBreak.length();
|
||||
if (tp.expectedBreaks->size() > breakIdx) {
|
||||
errln("rbbitst.txt:%d:%d adjacent expected breaks with no intervening test text",
|
||||
lineNum, column);
|
||||
}
|
||||
tp.expectedBreaks->setSize(breakIdx+1);
|
||||
tp.expectedBreaks->setElementAt(tagValue, breakIdx);
|
||||
tp.srcLine->setSize(breakIdx+1);
|
||||
|
@ -245,6 +245,11 @@ public void TestExtended() {
|
||||
case PARSE_DATA:
|
||||
if (c == '•') {
|
||||
int breakIdx = tp.dataToBreak.length();
|
||||
if (tp.expectedBreaks[breakIdx] != 0) {
|
||||
errln(String.format(
|
||||
"rbbitst.txt:%d:%d adjacent expected breaks with no intervening test text",
|
||||
lineNum, column));
|
||||
}
|
||||
tp.expectedBreaks[breakIdx] = -1;
|
||||
tp.srcLine[breakIdx] = lineNum;
|
||||
tp.srcCol[breakIdx] = column;
|
||||
@ -388,6 +393,11 @@ public void TestExtended() {
|
||||
tagValue = -1;
|
||||
}
|
||||
int breakIdx = tp.dataToBreak.length();
|
||||
if (tp.expectedBreaks[breakIdx] != 0) {
|
||||
errln(String.format(
|
||||
"rbbitst.txt:%d:%d adjacent expected breaks with no intervening test text",
|
||||
lineNum, column));
|
||||
}
|
||||
tp.expectedBreaks[breakIdx] = tagValue;
|
||||
tp.srcLine[breakIdx] = lineNum;
|
||||
tp.srcCol[breakIdx] = column;
|
||||
|
@ -289,7 +289,7 @@
|
||||
# वृद्धिसँग ;
|
||||
<data>•वृ•द्धि•सँ•ग•</data>
|
||||
# अंतःज्ञानी ;
|
||||
<data>•अं•तः•ज्ञा•नी••</data>
|
||||
<data>•अं•तः•ज्ञा•नी• •</data>
|
||||
# गन्नदी॑धिम ;
|
||||
<data>•ग•न्न•दी॑•धि•म•</data>
|
||||
# प्प्रप॑द्ये॒ ;
|
||||
@ -319,7 +319,7 @@
|
||||
# भर्तुर्भोगः ;
|
||||
<data>•भ•र्तु•र्भो•गः•</data>
|
||||
# शॆत्युल ;
|
||||
<data>•शॆ•त्यु•ल••</data>
|
||||
<data>•शॆ•त्यु•ल• •</data>
|
||||
# महारॆन्य ;
|
||||
<data>•म•हा•रॆ•न्य•</data>
|
||||
# सॆक्युल ;
|
||||
|
Loading…
Reference in New Issue
Block a user