From 2b714f2bcef3753d5ff18e0804bde7ca252af9cc Mon Sep 17 00:00:00 2001 From: Andy Heninger Date: Thu, 31 Mar 2005 01:45:27 +0000 Subject: [PATCH] ICU-4157 Word Break, fix problem with CR sequences X-SVN-Rev: 17427 --- icu4c/source/data/brkitr/word.txt | 4 ++-- icu4c/source/test/intltest/rbbitst.cpp | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/icu4c/source/data/brkitr/word.txt b/icu4c/source/data/brkitr/word.txt index aaf98bfc84..07e7862cb2 100644 --- a/icu4c/source/data/brkitr/word.txt +++ b/icu4c/source/data/brkitr/word.txt @@ -52,7 +52,7 @@ $ExtendNumLetEx = $ExtendNumLet $Extend*; # see character breaks. $CR $LF; -[^$Control] $Extend+; +[^$Control $CR $LF] $Extend+; $NumericEx $Extend* {100}; $ALetterEx $Extend* {200}; @@ -114,7 +114,7 @@ $LF $CR; # see character breaks -$Extend* [^$Control]; +$Extend* [^$Control $CR $LF]; # rule 5 diff --git a/icu4c/source/test/intltest/rbbitst.cpp b/icu4c/source/test/intltest/rbbitst.cpp index 071a7c18e4..693622a784 100644 --- a/icu4c/source/test/intltest/rbbitst.cpp +++ b/icu4c/source/test/intltest/rbbitst.cpp @@ -2139,6 +2139,8 @@ RBBIWordMonkey::RBBIWordMonkey() : fGCFMatcher(0), fOtherSet->removeAll(*fMidNumSet); fOtherSet->removeAll(*fNumericSet); fOtherSet->removeAll(*fExtendNumLetSet); + fOtherSet->removeAll(*fFormatSet); + fOtherSet->removeAll(*fExtendSet); fSets->addElement(fALetterSet, status); fSets->addElement(fKatakanaSet, status); @@ -2146,6 +2148,7 @@ RBBIWordMonkey::RBBIWordMonkey() : fGCFMatcher(0), fSets->addElement(fMidNumSet, status); fSets->addElement(fNumericSet, status); fSets->addElement(fFormatSet, status); + fSets->addElement(fExtendSet, status); fSets->addElement(fOtherSet, status); fSets->addElement(fExtendNumLetSet, status);