ICU-7130 infinite loop in dictionary break iterators

X-SVN-Rev: 26618
This commit is contained in:
Andy Heninger 2009-09-10 23:17:38 +00:00
parent 85f0ecb659
commit 4d90dc692d
3 changed files with 44 additions and 3 deletions

View File

@ -1,6 +1,6 @@
/*
***************************************************************************
* Copyright (C) 1999-2008 International Business Machines Corporation *
* Copyright (C) 1999-2009 International Business Machines Corporation *
* and others. All rights reserved. *
***************************************************************************
*/
@ -247,7 +247,10 @@ void RuleBasedBreakIterator::init() {
fLastRuleStatusIndex = 0;
fLastStatusIndexValid = TRUE;
fDictionaryCharCount = 0;
fBreakType = -1;
fBreakType = UBRK_WORD; // Defaulting BreakType to word gives reasonable
// dictionary behavior for Break Iterators that are
// built from rules. Even better would be the ability to
// declare the type in the rules.
fCachedBreakPositions = NULL;
fLanguageBreakEngines = NULL;
@ -1019,7 +1022,7 @@ int32_t RuleBasedBreakIterator::handleNext(const RBBIStateTable *statetable) {
#ifdef RBBI_DEBUG
if (fTrace) {
RBBIDebugPrintf(" %4d ", utext_getNativeIndex(fText));
RBBIDebugPrintf(" %4ld ", utext_getNativeIndex(fText));
if (0x20<=c && c<0x7f) {
RBBIDebugPrintf("\"%c\" ", c);
} else {

View File

@ -140,6 +140,8 @@ void RBBITest::runIndexedTest( int32_t index, UBool exec, const char* &name, cha
case 21: case 22: case 23: name = "skip";
break;
#endif
case 24: name = "TestDictRules";
if (exec) TestDictRules(); break;
default: name = ""; break; //needed to end loop
}
@ -1991,6 +1993,41 @@ void RBBITest::TestTailoredBreaks() {
}
//-------------------------------------------------------------------------------
//
// TestDictRules create a break iterator from source rules that includes a
// dictionary range. Regression for bug #7130. Source rules
// do not declare a break iterator type (word, line, sentence, etc.
// but the dictionary code, without a type, would loop.
//
//-------------------------------------------------------------------------------
void RBBITest::TestDictRules() {
const char *rules = "$dictionary = [a-z]; \n"
"!!forward; \n"
"$dictionary $dictionary; \n"
"!!reverse; \n"
"$dictionary $dictionary; \n";
const char *text = "aa";
UErrorCode status = U_ZERO_ERROR;
UParseError parseError;
RuleBasedBreakIterator bi(rules, parseError, status);
TEST_ASSERT_SUCCESS(status);
UnicodeString utext = text;
bi.setText(utext);
int32_t position;
int32_t loops;
for (loops = 0; loops<10; loops++) {
position = bi.next();
if (position == RuleBasedBreakIterator::DONE) {
break;
}
}
TEST_ASSERT(loops == 1);
}
//-------------------------------------------------------------------------------
//
// ReadAndConvertFile Read a text data file, convert it to UChars, and

View File

@ -70,6 +70,7 @@ public:
void TestBug5775();
void TestThaiBreaks();
void TestTailoredBreaks();
void TestDictRules();
void TestDebug();