diff --git a/icu4c/source/common/brkiter.cpp b/icu4c/source/common/brkiter.cpp index adbfd6501a..5931ebf7fb 100644 --- a/icu4c/source/common/brkiter.cpp +++ b/icu4c/source/common/brkiter.cpp @@ -1,10 +1,10 @@ /* ******************************************************************************* -* Copyright (C) 1997-2013, International Business Machines Corporation and +* Copyright (C) 1997-2014, International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************************* * -* File TXTBDRY.CPP +* File brkiter.cpp * * Modification History: * @@ -461,6 +461,11 @@ int32_t BreakIterator::getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UE return 1; } +BreakIterator::BreakIterator (const Locale& valid, const Locale& actual) { + U_LOCALE_BASED(locBased, (*this)); + locBased.setLocaleIDs(valid, actual); +} + U_NAMESPACE_END #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ diff --git a/icu4c/source/common/locbased.cpp b/icu4c/source/common/locbased.cpp index e96b9f79f3..b3d911d0ed 100644 --- a/icu4c/source/common/locbased.cpp +++ b/icu4c/source/common/locbased.cpp @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (c) 2004, International Business Machines +* Copyright (c) 2004-2014, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Author: Alan Liu @@ -43,4 +43,9 @@ void LocaleBased::setLocaleIDs(const char* validID, const char* actualID) { } } +void LocaleBased::setLocaleIDs(const Locale& validID, const Locale& actualID) { + uprv_strcpy(valid, validID.getName()); + uprv_strcpy(actual, actualID.getName()); +} + U_NAMESPACE_END diff --git a/icu4c/source/common/locbased.h b/icu4c/source/common/locbased.h index 366b15109e..d9f8942071 100644 --- a/icu4c/source/common/locbased.h +++ b/icu4c/source/common/locbased.h @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (c) 2004, International Business Machines +* Copyright (c) 2004-2014, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Author: Alan Liu @@ -75,6 +75,14 @@ class U_COMMON_API LocaleBased : public UMemory { */ void setLocaleIDs(const char* valid, const char* actual); + /** + * Set the locale meta-data for the service object wrapped by this + * object. + * @param valid the ID of the valid locale + * @param actual the ID of the actual locale + */ + void setLocaleIDs(const Locale& valid, const Locale& actual); + private: char* valid; diff --git a/icu4c/source/common/unicode/brkiter.h b/icu4c/source/common/unicode/brkiter.h index 00a0f9bbec..6fc9fefa5b 100644 --- a/icu4c/source/common/unicode/brkiter.h +++ b/icu4c/source/common/unicode/brkiter.h @@ -623,7 +623,8 @@ protected: BreakIterator(); /** @internal */ BreakIterator (const BreakIterator &other) : UObject(other) {} - + /** @internal */ + BreakIterator (const Locale& valid, const Locale& actual); private: /** @internal */ diff --git a/icu4c/source/i18n/filteredbrk.cpp b/icu4c/source/i18n/filteredbrk.cpp index fcfa75c5bd..357c9c1791 100644 --- a/icu4c/source/i18n/filteredbrk.cpp +++ b/icu4c/source/i18n/filteredbrk.cpp @@ -7,8 +7,356 @@ #include "unicode/filteredbrk.h" +#if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING + +#include + +#include +#include +#include +#include +#include + U_NAMESPACE_BEGIN +using namespace std; + +static const UBool debug = FALSE; +static const int32_t kPARTIAL = (1<<0); //< partial - need to run through forward trie +static const int32_t kMATCH = (1<<1); //< exact match - skip this one. +static const int32_t kSuppressInReverse = (1<<0); +static const int32_t kAddToForward = (1<<1); +static const UChar kFULLSTOP = 0x002E; // '.' + +class ULISentenceBreakIterator : public BreakIterator { +public: + ULISentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status); + virtual ~ULISentenceBreakIterator() {} + ULISentenceBreakIterator(const ULISentenceBreakIterator& other); +private: + LocalPointer fDelegate; + LocalUTextPointer fText; + LocalPointer fBackwardsTrie; // i.e. ".srM" for Mrs. + LocalPointer fForwardsPartialTrie; // Has ".a" for "a.M." + + /* -- subclass interface -- */ +public: + /* -- cloning and other subclass stuff -- */ + virtual BreakIterator * createBufferClone(void */*stackBuffer*/, + int32_t &/*BufferSize*/, + UErrorCode &status) { + // for now - always deep clone + status = U_SAFECLONE_ALLOCATED_WARNING; + return clone(); + } + virtual BreakIterator* clone(void) const { return new ULISentenceBreakIterator(*this); } + virtual UClassID getDynamicClassID(void) const { return NULL; } + virtual UBool operator==(const BreakIterator& o) const { if(*this==o) return true; return false; } + + /* -- text modifying -- */ + virtual void setText(UText *text, UErrorCode &status) { fDelegate->setText(text,status); } + virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) { fDelegate->refreshInputText(input,status); return *this; } + virtual void adoptText(CharacterIterator* it) { fDelegate->adoptText(it); } + virtual void setText(const UnicodeString &text) { fDelegate->setText(text); } + + /* -- other functions that are just delegated -- */ + virtual UText *getUText(UText *fillIn, UErrorCode &status) const { return fDelegate->getUText(fillIn,status); } + virtual CharacterIterator& getText(void) const { return fDelegate->getText(); } + + /* -- ITERATION -- */ + virtual int32_t first(void) { return fDelegate->first(); } + virtual int32_t preceding(int32_t offset) { return fDelegate->preceding(offset); } + virtual int32_t previous(void) { return fDelegate->previous(); } + virtual UBool isBoundary(int32_t offset) { return fDelegate->isBoundary(offset); } + virtual int32_t current(void) const { return fDelegate->current(); } + + virtual int32_t next(void); + + virtual int32_t next(int32_t n) { return fDelegate->next(n); } + virtual int32_t following(int32_t offset) { return fDelegate->following(offset); } + virtual int32_t last(void) { return fDelegate->last(); } + +}; + +ULISentenceBreakIterator::ULISentenceBreakIterator(const ULISentenceBreakIterator& other) + : BreakIterator(other), fDelegate(other.fDelegate->clone()) +{ + /* + TODO: not able to clone Tries. Should be a refcounted hidden master instead. + if(other.fBackwardsTrie.isValid()) { + fBackwardsTrie.adoptInstead(other.fBackwardsTrie->clone()); + } + if(other.fForwardsPartialTrie.isValid()) { + fForwardsPartialTrie.adoptInstead(other.fForwardsPartialTrie->clone()); + } + */ +} + + +ULISentenceBreakIterator::ULISentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status) : + BreakIterator(adopt->getLocale(ULOC_VALID_LOCALE,status),adopt->getLocale(ULOC_ACTUAL_LOCALE,status)), + fDelegate(adopt), + fBackwardsTrie(backwards), + fForwardsPartialTrie(forwards) +{ + // all set.. +} + +int32_t ULISentenceBreakIterator::next() { + int32_t n = fDelegate->next(); + if(n == UBRK_DONE || // at end or + fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions + return n; + } + // OK, do we need to break here? + UErrorCode status = U_ZERO_ERROR; + // refresh text + fText.adoptInstead(fDelegate->getUText(fText.orphan(), status)); + //if(debug2) u_printf("str, native len=%d\n", utext_nativeLength(fText.getAlias())); + do { // outer loop runs once per underlying break (from fDelegate). + // loops while 'n' points to an exception. + utext_setNativeIndex(fText.getAlias(), n); // from n.. + fBackwardsTrie->reset(); + UChar32 uch; + //if(debug2) u_printf(" n@ %d\n", n); + // Assume a space is following the '.' (so we handle the case: "Mr. /Brown") + if((uch=utext_previous32(fText.getAlias()))==(UChar32)0x0020) { // TODO: skip a class of chars here?? + // TODO only do this the 1st time? + //if(debug2) u_printf("skipping prev: |%C| \n", (UChar)uch); + } else { + //if(debug2) u_printf("not skipping prev: |%C| \n", (UChar)uch); + uch = utext_next32(fText.getAlias()); + //if(debug2) u_printf(" -> : |%C| \n", (UChar)uch); + } + UStringTrieResult r = USTRINGTRIE_INTERMEDIATE_VALUE; + + int32_t bestPosn = -1; + int32_t bestValue = -1; + + while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL && // more to consume backwards and.. + USTRINGTRIE_HAS_NEXT(r=fBackwardsTrie->nextForCodePoint(uch))) {// more in the trie + if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far + bestPosn = utext_getNativeIndex(fText.getAlias()); + bestValue = fBackwardsTrie->getValue(); + } + //if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (UChar)uch, r, utext_getNativeIndex(fText.getAlias())); + } + + if(USTRINGTRIE_MATCHES(r)) { // exact match? + //if(debug2) u_printf("revgetValue(); + bestPosn = utext_getNativeIndex(fText.getAlias()); + //if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue); + } + + if(bestPosn>=0) { + //if(debug2) u_printf("rev< /%C/ end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue); + + //if(USTRINGTRIE_MATCHES(r)) { // matched - so, now what? + //int32_t bestValue = fBackwardsTrie->getValue(); + ////if(debug2) u_printf("rev< /%C/ matched, skip..%d bestValue=%d\n", (UChar)uch, r, bestValue); + + if(bestValue == kMATCH) { // exact match! + //if(debug2) u_printf(" exact backward match\n"); + n = fDelegate->next(); // skip this one. Find the next lowerlevel break. + if(n==UBRK_DONE) return n; + continue; // See if the next is another exception. + } else if(bestValue == kPARTIAL + && fForwardsPartialTrie.isValid()) { // make sure there's a forward trie + //if(debug2) u_printf(" partial backward match\n"); + // We matched the "Ph." in "Ph.D." - now we need to run everything through the forwards trie + // to see if it matches something going forward. + fForwardsPartialTrie->reset(); + UStringTrieResult rfwd = USTRINGTRIE_INTERMEDIATE_VALUE; + utext_setNativeIndex(fText.getAlias(), bestPosn); // hope that's close .. + //if(debug2) u_printf("Retrying at %d\n", bestPosn); + while((uch=utext_next32(fText.getAlias()))!=U_SENTINEL && + USTRINGTRIE_HAS_NEXT(rfwd=fForwardsPartialTrie->nextForCodePoint(uch))) { + //if(debug2) u_printf("fwd> /%C/ cont?%d @%d\n", (UChar)uch, rfwd, utext_getNativeIndex(fText.getAlias())); + } + if(USTRINGTRIE_MATCHES(rfwd)) { + //if(debug2) u_printf("fwd> /%C/ == forward match!\n", (UChar)uch); + // only full matches here, nothing to check + // skip the next: + n = fDelegate->next(); + if(n==UBRK_DONE) return n; + continue; + } else { + //if(debug2) u_printf("fwd> /%C/ no match.\n", (UChar)uch); + // no match (no exception) -return the 'underlying' break + return n; + } + } else { + return n; // internal error and/or no forwards trie + } + } else { + //if(debug2) u_printf("rev< /%C/ .. no match..%d\n", (UChar)uch, r); // no best match + return n; // No match - so exit. Not an exception. + } + } while(n != UBRK_DONE); + return n; +} + +U_NAMESPACE_END + +// for the 'set' +namespace std { + template <> struct hash { + size_t operator()( const UnicodeString& str ) const { + return (size_t)str.hashCode(); + } + }; +} + +U_NAMESPACE_BEGIN + +class SimpleFilteredBreakIteratorBuilder : public FilteredBreakIteratorBuilder { +public: + virtual ~SimpleFilteredBreakIteratorBuilder(); + SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &status); + SimpleFilteredBreakIteratorBuilder(); + virtual UBool suppressBreakAfter(const UnicodeString& exception, UErrorCode& status); + virtual UBool unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status); + virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status); +private: + set fSet; +}; + +SimpleFilteredBreakIteratorBuilder::~SimpleFilteredBreakIteratorBuilder() +{ +} + +SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &status) + : fSet() +{ + // TODO: load, set + status = U_UNSUPPORTED_ERROR; +} + +SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder() + : fSet() +{ +} + +UBool +SimpleFilteredBreakIteratorBuilder::suppressBreakAfter(const UnicodeString& exception, UErrorCode& status) +{ + return fSet.insert(exception).second; +} + +UBool +SimpleFilteredBreakIteratorBuilder::unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status) +{ + return ((fSet.erase(exception)) != 0); +} +BreakIterator * +SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UErrorCode& status) { + LocalPointer adopt(adoptBreakIterator); + + if(U_FAILURE(status)) { + return NULL; + } + + LocalPointer builder(new UCharsTrieBuilder(status)); + LocalPointer builder2(new UCharsTrieBuilder(status)); + + int32_t revCount = 0; + int32_t fwdCount = 0; + + int32_t subCount = fSet.size(); + LocalArray ustrs(new UnicodeString[subCount]); + LocalArray partials(new int[subCount]); + + LocalPointer backwardsTrie; // i.e. ".srM" for Mrs. + LocalPointer forwardsPartialTrie; // Has ".a" for "a.M." + + int n=0; + for ( set::iterator i = fSet.begin(); + i != fSet.end(); + i++) { + const UnicodeString &abbr = *i; + ustrs[n] = abbr; + partials[n] = 0; // default: not partial + n++; + } + // first pass - find partials. + for(int i=0;i-1 && (nn+1)!=ustrs[i].length()) { + //if(true) u_printf("Is a partial: /%S/\n", ustrs[i].getTerminatedBuffer()); + // is partial. + // is it unique? + int sameAs = -1; + for(int j=0;jadd(prefix, kPARTIAL, status); + revCount++; + //if(debug2) u_printf("Added Partial: /%S/ from /%S/ status=%s\n", prefix.getTerminatedBuffer(), ustrs[i].getTerminatedBuffer(), u_errorName(status)); + partials[i] = kSuppressInReverse | kAddToForward; + } else { + //if(debug2) u_printf(" // not adding partial for /%S/ from /%S/\n", prefix.getTerminatedBuffer(), ustrs[i].getTerminatedBuffer()); + } + } + } + for(int i=0;iadd(ustrs[i], kMATCH, status); + revCount++; + //if(debug2) u_printf("Added: /%S/ status=%s\n", ustrs[i].getTerminatedBuffer(), u_errorName(status)); + } else { + //if(debug2) u_printf(" Adding fwd: /%S/\n", ustrs[i].getTerminatedBuffer()); + + // an optimization would be to only add the portion after the '.' + // for example, for "Ph.D." we store ".hP" in the reverse table. We could just store "D." in the forward, + // instead of "Ph.D." since we already know the "Ph." part is a match. + // would need the trie to be able to hold 0-length strings, though. + builder2->add(ustrs[i], kMATCH, status); // forward + fwdCount++; + //ustrs[i].reverse(); + ////if(debug2) u_printf("SUPPRESS- not Added(%d): /%S/ status=%s\n",partials[i], ustrs[i].getTerminatedBuffer(), u_errorName(status)); + } + } + //if(debug) u_printf(" %s has %d abbrs.\n", fJSONSource.c_str(), subCount); + + if(revCount>0) { + backwardsTrie.adoptInstead(builder->build(USTRINGTRIE_BUILD_FAST, status)); + if(U_FAILURE(status)) { + //if(debug) u_printf("Error %s building backwards\n", u_errorName(status)); + return NULL; + } + } + + if(fwdCount>0) { + forwardsPartialTrie.adoptInstead(builder2->build(USTRINGTRIE_BUILD_FAST, status)); + if(U_FAILURE(status)) { + //if(debug) u_printf("Error %s building forwards\n", u_errorName(status)); + return NULL; + } + } + + return new ULISentenceBreakIterator(adopt.orphan(), forwardsPartialTrie.orphan(), backwardsTrie.orphan(), status); +} + + +// ----------- + FilteredBreakIteratorBuilder::FilteredBreakIteratorBuilder() { } @@ -16,18 +364,23 @@ FilteredBreakIteratorBuilder::~FilteredBreakIteratorBuilder() { } FilteredBreakIteratorBuilder * -FilteredBreakIteratorBuilder::createInstance(const Locale& /*where*/, UErrorCode& status) { - if (U_FAILURE(status)) return NULL; +FilteredBreakIteratorBuilder::createInstance(const Locale& where, UErrorCode& status) { + if(U_FAILURE(status)) return NULL; - status = U_UNSUPPORTED_ERROR; - return NULL; + LocalPointer ret(new SimpleFilteredBreakIteratorBuilder(where, status)); + if(!ret.isValid()) status = U_MEMORY_ALLOCATION_ERROR; + return ret.orphan(); } - FilteredBreakIteratorBuilder * FilteredBreakIteratorBuilder::createInstance(UErrorCode& status) { - status = U_UNSUPPORTED_ERROR; - return NULL; + if(U_FAILURE(status)) return NULL; + + LocalPointer ret(new SimpleFilteredBreakIteratorBuilder()); + if(!ret.isValid()) status = U_MEMORY_ALLOCATION_ERROR; + return ret.orphan(); } U_NAMESPACE_END + +#endif //#if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING diff --git a/icu4c/source/i18n/unicode/filteredbrk.h b/icu4c/source/i18n/unicode/filteredbrk.h index cb2768ca9a..7e14f2d9ef 100644 --- a/icu4c/source/i18n/unicode/filteredbrk.h +++ b/icu4c/source/i18n/unicode/filteredbrk.h @@ -10,7 +10,7 @@ #include "unicode/brkiter.h" -#if !UCONFIG_NO_BREAK_ITERATION +#if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING U_NAMESPACE_BEGIN diff --git a/icu4c/source/test/intltest/rbbiapts.cpp b/icu4c/source/test/intltest/rbbiapts.cpp index 4930a645a2..f26fcf36c0 100644 --- a/icu4c/source/test/intltest/rbbiapts.cpp +++ b/icu4c/source/test/intltest/rbbiapts.cpp @@ -1,5 +1,5 @@ /******************************************************************** - * Copyright (c) 1999-2013, International Business Machines + * Copyright (c) 1999-2014, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************** * Date Name Description @@ -23,7 +23,9 @@ #include "unicode/ustring.h" #include "unicode/utext.h" #include "cmemory.h" - +#if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING +#include "unicode/filteredbrk.h" +#endif /** * API Test the RuleBasedBreakIterator class */ @@ -643,8 +645,8 @@ void RBBIAPITest::TestRuleStatus() { //no longer test Han or hiragana breaking here: ruleStatusVec would return nothing // changed UBRK_WORD_KANA to UBRK_WORD_IDEO u_unescape("plain word 123.45 \\u30a1\\u30a2 ", - // 012345678901234567 8 9 0 - // Katakana + // 012345678901234567 8 9 0 + // Katakana str, 30); UnicodeString testString1(str); int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 20, 21}; @@ -878,7 +880,7 @@ void RBBIAPITest::TestRegistration() { BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status); BreakIterator* root_word = BreakIterator::createWordInstance("", status); BreakIterator* root_char = BreakIterator::createCharacterInstance("", status); - + if (status == U_MISSING_RESOURCE_ERROR || status == U_FILE_ACCESS_ERROR) { dataerrln("Error creating instances of break interactors - %s", u_errorName(status)); @@ -886,7 +888,7 @@ void RBBIAPITest::TestRegistration() { delete ja_char; delete root_word; delete root_char; - + return; } @@ -1057,7 +1059,7 @@ void RBBIAPITest::TestRoundtripRules() { // Try out the RuleBasedBreakIterator constructors that take RBBIDataHeader* // (these are protected so we access them via a local class RBBIWithProtectedFunctions). -// This is just a sanity check, not a thorough test (e.g. we don't check that the +// This is just a sanity check, not a thorough test (e.g. we don't check that the // first delete actually frees rulesCopy). void RBBIAPITest::TestCreateFromRBBIData() { // Get some handy RBBIData @@ -1083,7 +1085,7 @@ void RBBIAPITest::TestCreateFromRBBIData() { uprv_free( rulesCopy ); } } - + // Now try the non-adopting constructor brkItr = new RBBIWithProtectedFunctions(builtRules, RBBIWithProtectedFunctions::kDontAdopt, status); if ( U_SUCCESS(status) ) { @@ -1168,7 +1170,7 @@ void RBBIAPITest::TestRefreshInputText() { TEST_ASSERT(7 == bi->next()); TEST_ASSERT(8 == bi->next()); TEST_ASSERT(UBRK_DONE == bi->next()); - + utext_close(&ut1); utext_close(&ut2); } @@ -1176,6 +1178,142 @@ void RBBIAPITest::TestRefreshInputText() { } +static void prtbrks(BreakIterator* brk, const UnicodeString &ustr, IntlTest &it) { + static const UChar PILCROW=0x00B6, CHSTR=0x3010, CHEND=0x3011; // lenticular brackets + it.logln(UnicodeString("String:'")+ustr+UnicodeString("'")); + + int32_t *pos = new int32_t[ustr.length()]; + int32_t posCount = 0; + + // calculate breaks up front, so we can print out + // sans any debugging + for(int32_t n = 0; (n=brk->next())!=UBRK_DONE; ) { + pos[posCount++] = n; + if(posCount>=ustr.length()) { + it.errln("brk count exceeds string length!"); + return; + } + } + UnicodeString out; + out.append((UChar)CHSTR); + int32_t prev = 0; + for(int32_t i=0;i builder; + LocalPointer baseBI; + LocalPointer filteredBI; + + const UnicodeString text("In the meantime Mr. Weston arrived with his small ship, which he had now recovered. Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge."); // (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited. + const UnicodeString ABBR_MR("Mr."); + const UnicodeString ABBR_CAPT("Capt."); + + { + logln("Constructing empty builder\n"); + builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status)); + TEST_ASSERT_SUCCESS(status); + + logln("Constructing base BI\n"); + baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status)); + TEST_ASSERT_SUCCESS(status); + + logln("Building new BI\n"); + filteredBI.adoptInstead(builder->build(baseBI.orphan(), status)); + TEST_ASSERT_SUCCESS(status); + + logln("Testing:"); + filteredBI->setText(text); + TEST_ASSERT(20 == filteredBI->next()); // Mr. + TEST_ASSERT(84 == filteredBI->next()); // recovered. + TEST_ASSERT(90 == filteredBI->next()); // Capt. + TEST_ASSERT(181 == filteredBI->next()); // Mr. + TEST_ASSERT(278 == filteredBI->next()); // charge. + filteredBI->first(); + prtbrks(filteredBI.getAlias(), text, *this); + } + + { + logln("Constructing empty builder\n"); + builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status)); + TEST_ASSERT_SUCCESS(status); + + logln("Adding Mr. as an exception\n"); + TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status)); + TEST_ASSERT(FALSE == builder->suppressBreakAfter(ABBR_MR, status)); // already have it + TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_MR, status)); + TEST_ASSERT(FALSE == builder->unsuppressBreakAfter(ABBR_MR, status)); // already removed it + TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status)); + TEST_ASSERT_SUCCESS(status); + + logln("Constructing base BI\n"); + baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status)); + TEST_ASSERT_SUCCESS(status); + + logln("Building new BI\n"); + filteredBI.adoptInstead(builder->build(baseBI.orphan(), status)); + TEST_ASSERT_SUCCESS(status); + + logln("Testing:"); + filteredBI->setText(text); + TEST_ASSERT(84 == filteredBI->next()); + TEST_ASSERT(90 == filteredBI->next());// Capt. + TEST_ASSERT(278 == filteredBI->next()); + filteredBI->first(); + prtbrks(filteredBI.getAlias(), text, *this); + } + + + { + logln("Constructing empty builder\n"); + builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status)); + TEST_ASSERT_SUCCESS(status); + + logln("Adding Mr. and Capt as an exception\n"); + TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status)); + TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_CAPT, status)); + TEST_ASSERT_SUCCESS(status); + + logln("Constructing base BI\n"); + baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status)); + TEST_ASSERT_SUCCESS(status); + + logln("Building new BI\n"); + filteredBI.adoptInstead(builder->build(baseBI.orphan(), status)); + TEST_ASSERT_SUCCESS(status); + + logln("Testing:"); + filteredBI->setText(text); + TEST_ASSERT(84 == filteredBI->next()); + TEST_ASSERT(278 == filteredBI->next()); + filteredBI->first(); + prtbrks(filteredBI.getAlias(), text, *this); + } + +#else + logln("Skipped- not: !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING"); +#endif +} //--------------------------------------------- // runIndexedTest @@ -1210,6 +1348,11 @@ void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, #endif case 14: name = "TestRefreshInputText"; if (exec) TestRefreshInputText(); break; +#if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING + case 15: name = "TestFilteredBreakIteratorBuilder"; if(exec) TestFilteredBreakIteratorBuilder(); break; +#else + case 15: name="skip"; break; +#endif default: name = ""; break; // needed to end loop } } diff --git a/icu4c/source/test/intltest/rbbiapts.h b/icu4c/source/test/intltest/rbbiapts.h index 2805c887a6..0a672a6d49 100644 --- a/icu4c/source/test/intltest/rbbiapts.h +++ b/icu4c/source/test/intltest/rbbiapts.h @@ -1,6 +1,6 @@ /******************************************************************** * COPYRIGHT: - * Copyright (c) 1999-2013 International Business Machines Corporation and + * Copyright (c) 1999-2014 International Business Machines Corporation and * others. All Rights Reserved. ********************************************************************/ /************************************************************************ @@ -53,6 +53,7 @@ public: **/ void TestIteration(void); + void TestFilteredBreakIteratorBuilder(void); /** * Tests creating RuleBasedBreakIterator from rules strings.