From 4d90dc692d684cba36bc30404d80f6f6c6dfbe25 Mon Sep 17 00:00:00 2001 From: Andy Heninger Date: Thu, 10 Sep 2009 23:17:38 +0000 Subject: [PATCH] ICU-7130 infinite loop in dictionary break iterators X-SVN-Rev: 26618 --- icu4c/source/common/rbbi.cpp | 9 ++++--- icu4c/source/test/intltest/rbbitst.cpp | 37 ++++++++++++++++++++++++++ icu4c/source/test/intltest/rbbitst.h | 1 + 3 files changed, 44 insertions(+), 3 deletions(-) diff --git a/icu4c/source/common/rbbi.cpp b/icu4c/source/common/rbbi.cpp index b06a01c093..5308e26d09 100644 --- a/icu4c/source/common/rbbi.cpp +++ b/icu4c/source/common/rbbi.cpp @@ -1,6 +1,6 @@ /* *************************************************************************** -* Copyright (C) 1999-2008 International Business Machines Corporation * +* Copyright (C) 1999-2009 International Business Machines Corporation * * and others. All rights reserved. * *************************************************************************** */ @@ -247,7 +247,10 @@ void RuleBasedBreakIterator::init() { fLastRuleStatusIndex = 0; fLastStatusIndexValid = TRUE; fDictionaryCharCount = 0; - fBreakType = -1; + fBreakType = UBRK_WORD; // Defaulting BreakType to word gives reasonable + // dictionary behavior for Break Iterators that are + // built from rules. Even better would be the ability to + // declare the type in the rules. fCachedBreakPositions = NULL; fLanguageBreakEngines = NULL; @@ -1019,7 +1022,7 @@ int32_t RuleBasedBreakIterator::handleNext(const RBBIStateTable *statetable) { #ifdef RBBI_DEBUG if (fTrace) { - RBBIDebugPrintf(" %4d ", utext_getNativeIndex(fText)); + RBBIDebugPrintf(" %4ld ", utext_getNativeIndex(fText)); if (0x20<=c && c<0x7f) { RBBIDebugPrintf("\"%c\" ", c); } else { diff --git a/icu4c/source/test/intltest/rbbitst.cpp b/icu4c/source/test/intltest/rbbitst.cpp index 4f0acd68ca..efbb7c1c51 100644 --- a/icu4c/source/test/intltest/rbbitst.cpp +++ b/icu4c/source/test/intltest/rbbitst.cpp @@ -140,6 +140,8 @@ void RBBITest::runIndexedTest( int32_t index, UBool exec, const char* &name, cha case 21: case 22: case 23: name = "skip"; break; #endif + case 24: name = "TestDictRules"; + if (exec) TestDictRules(); break; default: name = ""; break; //needed to end loop } @@ -1991,6 +1993,41 @@ void RBBITest::TestTailoredBreaks() { } +//------------------------------------------------------------------------------- +// +// TestDictRules create a break iterator from source rules that includes a +// dictionary range. Regression for bug #7130. Source rules +// do not declare a break iterator type (word, line, sentence, etc. +// but the dictionary code, without a type, would loop. +// +//------------------------------------------------------------------------------- +void RBBITest::TestDictRules() { + const char *rules = "$dictionary = [a-z]; \n" + "!!forward; \n" + "$dictionary $dictionary; \n" + "!!reverse; \n" + "$dictionary $dictionary; \n"; + const char *text = "aa"; + UErrorCode status = U_ZERO_ERROR; + UParseError parseError; + + RuleBasedBreakIterator bi(rules, parseError, status); + TEST_ASSERT_SUCCESS(status); + UnicodeString utext = text; + bi.setText(utext); + int32_t position; + int32_t loops; + for (loops = 0; loops<10; loops++) { + position = bi.next(); + if (position == RuleBasedBreakIterator::DONE) { + break; + } + } + TEST_ASSERT(loops == 1); +} + + + //------------------------------------------------------------------------------- // // ReadAndConvertFile Read a text data file, convert it to UChars, and diff --git a/icu4c/source/test/intltest/rbbitst.h b/icu4c/source/test/intltest/rbbitst.h index 69511bd85d..fbdbbf880e 100644 --- a/icu4c/source/test/intltest/rbbitst.h +++ b/icu4c/source/test/intltest/rbbitst.h @@ -70,6 +70,7 @@ public: void TestBug5775(); void TestThaiBreaks(); void TestTailoredBreaks(); + void TestDictRules(); void TestDebug();