ICU-10286 Check in filtered break behavior. Does not load bundles yet. Also fixes to locbased and brkiter ( no way for subclasses to set locale id )

X-SVN-Rev: 35357
This commit is contained in:
Steven R. Loomis 2014-03-06 11:21:15 +00:00
parent bf0d5601c2
commit bbe5a9e0d8
8 changed files with 539 additions and 23 deletions

View File

@ -1,10 +1,10 @@
/*
*******************************************************************************
* Copyright (C) 1997-2013, International Business Machines Corporation and
* Copyright (C) 1997-2014, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*
* File TXTBDRY.CPP
* File brkiter.cpp
*
* Modification History:
*
@ -461,6 +461,11 @@ int32_t BreakIterator::getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UE
return 1;
}
BreakIterator::BreakIterator (const Locale& valid, const Locale& actual) {
U_LOCALE_BASED(locBased, (*this));
locBased.setLocaleIDs(valid, actual);
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */

View File

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (c) 2004, International Business Machines
* Copyright (c) 2004-2014, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Author: Alan Liu
@ -43,4 +43,9 @@ void LocaleBased::setLocaleIDs(const char* validID, const char* actualID) {
}
}
void LocaleBased::setLocaleIDs(const Locale& validID, const Locale& actualID) {
uprv_strcpy(valid, validID.getName());
uprv_strcpy(actual, actualID.getName());
}
U_NAMESPACE_END

View File

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (c) 2004, International Business Machines
* Copyright (c) 2004-2014, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Author: Alan Liu
@ -75,6 +75,14 @@ class U_COMMON_API LocaleBased : public UMemory {
*/
void setLocaleIDs(const char* valid, const char* actual);
/**
* Set the locale meta-data for the service object wrapped by this
* object.
* @param valid the ID of the valid locale
* @param actual the ID of the actual locale
*/
void setLocaleIDs(const Locale& valid, const Locale& actual);
private:
char* valid;

View File

@ -623,7 +623,8 @@ protected:
BreakIterator();
/** @internal */
BreakIterator (const BreakIterator &other) : UObject(other) {}
/** @internal */
BreakIterator (const Locale& valid, const Locale& actual);
private:
/** @internal */

View File

@ -7,8 +7,356 @@
#include "unicode/filteredbrk.h"
#if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING
#include <unicode/ucharstriebuilder.h>
#include <string.h>
#include <strings.h>
#include <set>
#include <string>
#include <functional>
U_NAMESPACE_BEGIN
using namespace std;
static const UBool debug = FALSE;
static const int32_t kPARTIAL = (1<<0); //< partial - need to run through forward trie
static const int32_t kMATCH = (1<<1); //< exact match - skip this one.
static const int32_t kSuppressInReverse = (1<<0);
static const int32_t kAddToForward = (1<<1);
static const UChar kFULLSTOP = 0x002E; // '.'
class ULISentenceBreakIterator : public BreakIterator {
public:
ULISentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status);
virtual ~ULISentenceBreakIterator() {}
ULISentenceBreakIterator(const ULISentenceBreakIterator& other);
private:
LocalPointer<BreakIterator> fDelegate;
LocalUTextPointer fText;
LocalPointer<UCharsTrie> fBackwardsTrie; // i.e. ".srM" for Mrs.
LocalPointer<UCharsTrie> fForwardsPartialTrie; // Has ".a" for "a.M."
/* -- subclass interface -- */
public:
/* -- cloning and other subclass stuff -- */
virtual BreakIterator * createBufferClone(void */*stackBuffer*/,
int32_t &/*BufferSize*/,
UErrorCode &status) {
// for now - always deep clone
status = U_SAFECLONE_ALLOCATED_WARNING;
return clone();
}
virtual BreakIterator* clone(void) const { return new ULISentenceBreakIterator(*this); }
virtual UClassID getDynamicClassID(void) const { return NULL; }
virtual UBool operator==(const BreakIterator& o) const { if(*this==o) return true; return false; }
/* -- text modifying -- */
virtual void setText(UText *text, UErrorCode &status) { fDelegate->setText(text,status); }
virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) { fDelegate->refreshInputText(input,status); return *this; }
virtual void adoptText(CharacterIterator* it) { fDelegate->adoptText(it); }
virtual void setText(const UnicodeString &text) { fDelegate->setText(text); }
/* -- other functions that are just delegated -- */
virtual UText *getUText(UText *fillIn, UErrorCode &status) const { return fDelegate->getUText(fillIn,status); }
virtual CharacterIterator& getText(void) const { return fDelegate->getText(); }
/* -- ITERATION -- */
virtual int32_t first(void) { return fDelegate->first(); }
virtual int32_t preceding(int32_t offset) { return fDelegate->preceding(offset); }
virtual int32_t previous(void) { return fDelegate->previous(); }
virtual UBool isBoundary(int32_t offset) { return fDelegate->isBoundary(offset); }
virtual int32_t current(void) const { return fDelegate->current(); }
virtual int32_t next(void);
virtual int32_t next(int32_t n) { return fDelegate->next(n); }
virtual int32_t following(int32_t offset) { return fDelegate->following(offset); }
virtual int32_t last(void) { return fDelegate->last(); }
};
ULISentenceBreakIterator::ULISentenceBreakIterator(const ULISentenceBreakIterator& other)
: BreakIterator(other), fDelegate(other.fDelegate->clone())
{
/*
TODO: not able to clone Tries. Should be a refcounted hidden master instead.
if(other.fBackwardsTrie.isValid()) {
fBackwardsTrie.adoptInstead(other.fBackwardsTrie->clone());
}
if(other.fForwardsPartialTrie.isValid()) {
fForwardsPartialTrie.adoptInstead(other.fForwardsPartialTrie->clone());
}
*/
}
ULISentenceBreakIterator::ULISentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status) :
BreakIterator(adopt->getLocale(ULOC_VALID_LOCALE,status),adopt->getLocale(ULOC_ACTUAL_LOCALE,status)),
fDelegate(adopt),
fBackwardsTrie(backwards),
fForwardsPartialTrie(forwards)
{
// all set..
}
int32_t ULISentenceBreakIterator::next() {
int32_t n = fDelegate->next();
if(n == UBRK_DONE || // at end or
fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions
return n;
}
// OK, do we need to break here?
UErrorCode status = U_ZERO_ERROR;
// refresh text
fText.adoptInstead(fDelegate->getUText(fText.orphan(), status));
//if(debug2) u_printf("str, native len=%d\n", utext_nativeLength(fText.getAlias()));
do { // outer loop runs once per underlying break (from fDelegate).
// loops while 'n' points to an exception.
utext_setNativeIndex(fText.getAlias(), n); // from n..
fBackwardsTrie->reset();
UChar32 uch;
//if(debug2) u_printf(" n@ %d\n", n);
// Assume a space is following the '.' (so we handle the case: "Mr. /Brown")
if((uch=utext_previous32(fText.getAlias()))==(UChar32)0x0020) { // TODO: skip a class of chars here??
// TODO only do this the 1st time?
//if(debug2) u_printf("skipping prev: |%C| \n", (UChar)uch);
} else {
//if(debug2) u_printf("not skipping prev: |%C| \n", (UChar)uch);
uch = utext_next32(fText.getAlias());
//if(debug2) u_printf(" -> : |%C| \n", (UChar)uch);
}
UStringTrieResult r = USTRINGTRIE_INTERMEDIATE_VALUE;
int32_t bestPosn = -1;
int32_t bestValue = -1;
while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL && // more to consume backwards and..
USTRINGTRIE_HAS_NEXT(r=fBackwardsTrie->nextForCodePoint(uch))) {// more in the trie
if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far
bestPosn = utext_getNativeIndex(fText.getAlias());
bestValue = fBackwardsTrie->getValue();
}
//if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (UChar)uch, r, utext_getNativeIndex(fText.getAlias()));
}
if(USTRINGTRIE_MATCHES(r)) { // exact match?
//if(debug2) u_printf("rev<?/%C/?end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
bestValue = fBackwardsTrie->getValue();
bestPosn = utext_getNativeIndex(fText.getAlias());
//if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
}
if(bestPosn>=0) {
//if(debug2) u_printf("rev< /%C/ end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
//if(USTRINGTRIE_MATCHES(r)) { // matched - so, now what?
//int32_t bestValue = fBackwardsTrie->getValue();
////if(debug2) u_printf("rev< /%C/ matched, skip..%d bestValue=%d\n", (UChar)uch, r, bestValue);
if(bestValue == kMATCH) { // exact match!
//if(debug2) u_printf(" exact backward match\n");
n = fDelegate->next(); // skip this one. Find the next lowerlevel break.
if(n==UBRK_DONE) return n;
continue; // See if the next is another exception.
} else if(bestValue == kPARTIAL
&& fForwardsPartialTrie.isValid()) { // make sure there's a forward trie
//if(debug2) u_printf(" partial backward match\n");
// We matched the "Ph." in "Ph.D." - now we need to run everything through the forwards trie
// to see if it matches something going forward.
fForwardsPartialTrie->reset();
UStringTrieResult rfwd = USTRINGTRIE_INTERMEDIATE_VALUE;
utext_setNativeIndex(fText.getAlias(), bestPosn); // hope that's close ..
//if(debug2) u_printf("Retrying at %d\n", bestPosn);
while((uch=utext_next32(fText.getAlias()))!=U_SENTINEL &&
USTRINGTRIE_HAS_NEXT(rfwd=fForwardsPartialTrie->nextForCodePoint(uch))) {
//if(debug2) u_printf("fwd> /%C/ cont?%d @%d\n", (UChar)uch, rfwd, utext_getNativeIndex(fText.getAlias()));
}
if(USTRINGTRIE_MATCHES(rfwd)) {
//if(debug2) u_printf("fwd> /%C/ == forward match!\n", (UChar)uch);
// only full matches here, nothing to check
// skip the next:
n = fDelegate->next();
if(n==UBRK_DONE) return n;
continue;
} else {
//if(debug2) u_printf("fwd> /%C/ no match.\n", (UChar)uch);
// no match (no exception) -return the 'underlying' break
return n;
}
} else {
return n; // internal error and/or no forwards trie
}
} else {
//if(debug2) u_printf("rev< /%C/ .. no match..%d\n", (UChar)uch, r); // no best match
return n; // No match - so exit. Not an exception.
}
} while(n != UBRK_DONE);
return n;
}
U_NAMESPACE_END
// for the 'set'
namespace std {
template <> struct hash<icu::UnicodeString> {
size_t operator()( const UnicodeString& str ) const {
return (size_t)str.hashCode();
}
};
}
U_NAMESPACE_BEGIN
class SimpleFilteredBreakIteratorBuilder : public FilteredBreakIteratorBuilder {
public:
virtual ~SimpleFilteredBreakIteratorBuilder();
SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &status);
SimpleFilteredBreakIteratorBuilder();
virtual UBool suppressBreakAfter(const UnicodeString& exception, UErrorCode& status);
virtual UBool unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status);
virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status);
private:
set<UnicodeString> fSet;
};
SimpleFilteredBreakIteratorBuilder::~SimpleFilteredBreakIteratorBuilder()
{
}
SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &status)
: fSet()
{
// TODO: load, set
status = U_UNSUPPORTED_ERROR;
}
SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder()
: fSet()
{
}
UBool
SimpleFilteredBreakIteratorBuilder::suppressBreakAfter(const UnicodeString& exception, UErrorCode& status)
{
return fSet.insert(exception).second;
}
UBool
SimpleFilteredBreakIteratorBuilder::unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status)
{
return ((fSet.erase(exception)) != 0);
}
BreakIterator *
SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UErrorCode& status) {
LocalPointer<BreakIterator> adopt(adoptBreakIterator);
if(U_FAILURE(status)) {
return NULL;
}
LocalPointer<UCharsTrieBuilder> builder(new UCharsTrieBuilder(status));
LocalPointer<UCharsTrieBuilder> builder2(new UCharsTrieBuilder(status));
int32_t revCount = 0;
int32_t fwdCount = 0;
int32_t subCount = fSet.size();
LocalArray<UnicodeString> ustrs(new UnicodeString[subCount]);
LocalArray<int> partials(new int[subCount]);
LocalPointer<UCharsTrie> backwardsTrie; // i.e. ".srM" for Mrs.
LocalPointer<UCharsTrie> forwardsPartialTrie; // Has ".a" for "a.M."
int n=0;
for ( set<UnicodeString>::iterator i = fSet.begin();
i != fSet.end();
i++) {
const UnicodeString &abbr = *i;
ustrs[n] = abbr;
partials[n] = 0; // default: not partial
n++;
}
// first pass - find partials.
for(int i=0;i<subCount;i++) {
int nn = ustrs[i].indexOf(kFULLSTOP); // TODO: non-'.' abbreviations
if(nn>-1 && (nn+1)!=ustrs[i].length()) {
//if(true) u_printf("Is a partial: /%S/\n", ustrs[i].getTerminatedBuffer());
// is partial.
// is it unique?
int sameAs = -1;
for(int j=0;j<subCount;j++) {
if(j==i) continue;
if(ustrs[i].compare(0,nn+1,ustrs[j],0,nn+1)==0) {
//if(true) u_printf("Prefix match: /%S/ to %d\n", ustrs[j].getTerminatedBuffer(), nn+1);
//UBool otherIsPartial = ((nn+1)!=ustrs[j].length()); // true if ustrs[j] doesn't end at nn
if(partials[j]==0) { // hasn't been processed yet
partials[j] = kSuppressInReverse | kAddToForward;
//if(true) u_printf("Suppressing: /%S/\n", ustrs[j].getTerminatedBuffer());
} else if(partials[j] & kSuppressInReverse) {
sameAs = j; // the other entry is already in the reverse table.
}
}
}
//if(debug2) u_printf("for partial /%S/ same=%d partials=%d\n", ustrs[i].getTerminatedBuffer(), sameAs, partials[i]);
UnicodeString prefix(ustrs[i], 0, nn+1);
if(sameAs == -1 && partials[i] == 0) {
// first one - add the prefix to the reverse table.
prefix.reverse();
builder->add(prefix, kPARTIAL, status);
revCount++;
//if(debug2) u_printf("Added Partial: /%S/ from /%S/ status=%s\n", prefix.getTerminatedBuffer(), ustrs[i].getTerminatedBuffer(), u_errorName(status));
partials[i] = kSuppressInReverse | kAddToForward;
} else {
//if(debug2) u_printf(" // not adding partial for /%S/ from /%S/\n", prefix.getTerminatedBuffer(), ustrs[i].getTerminatedBuffer());
}
}
}
for(int i=0;i<subCount;i++) {
if(partials[i]==0) {
ustrs[i].reverse();
builder->add(ustrs[i], kMATCH, status);
revCount++;
//if(debug2) u_printf("Added: /%S/ status=%s\n", ustrs[i].getTerminatedBuffer(), u_errorName(status));
} else {
//if(debug2) u_printf(" Adding fwd: /%S/\n", ustrs[i].getTerminatedBuffer());
// an optimization would be to only add the portion after the '.'
// for example, for "Ph.D." we store ".hP" in the reverse table. We could just store "D." in the forward,
// instead of "Ph.D." since we already know the "Ph." part is a match.
// would need the trie to be able to hold 0-length strings, though.
builder2->add(ustrs[i], kMATCH, status); // forward
fwdCount++;
//ustrs[i].reverse();
////if(debug2) u_printf("SUPPRESS- not Added(%d): /%S/ status=%s\n",partials[i], ustrs[i].getTerminatedBuffer(), u_errorName(status));
}
}
//if(debug) u_printf(" %s has %d abbrs.\n", fJSONSource.c_str(), subCount);
if(revCount>0) {
backwardsTrie.adoptInstead(builder->build(USTRINGTRIE_BUILD_FAST, status));
if(U_FAILURE(status)) {
//if(debug) u_printf("Error %s building backwards\n", u_errorName(status));
return NULL;
}
}
if(fwdCount>0) {
forwardsPartialTrie.adoptInstead(builder2->build(USTRINGTRIE_BUILD_FAST, status));
if(U_FAILURE(status)) {
//if(debug) u_printf("Error %s building forwards\n", u_errorName(status));
return NULL;
}
}
return new ULISentenceBreakIterator(adopt.orphan(), forwardsPartialTrie.orphan(), backwardsTrie.orphan(), status);
}
// -----------
FilteredBreakIteratorBuilder::FilteredBreakIteratorBuilder() {
}
@ -16,18 +364,23 @@ FilteredBreakIteratorBuilder::~FilteredBreakIteratorBuilder() {
}
FilteredBreakIteratorBuilder *
FilteredBreakIteratorBuilder::createInstance(const Locale& /*where*/, UErrorCode& status) {
if (U_FAILURE(status)) return NULL;
FilteredBreakIteratorBuilder::createInstance(const Locale& where, UErrorCode& status) {
if(U_FAILURE(status)) return NULL;
status = U_UNSUPPORTED_ERROR;
return NULL;
LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIteratorBuilder(where, status));
if(!ret.isValid()) status = U_MEMORY_ALLOCATION_ERROR;
return ret.orphan();
}
FilteredBreakIteratorBuilder *
FilteredBreakIteratorBuilder::createInstance(UErrorCode& status) {
status = U_UNSUPPORTED_ERROR;
return NULL;
if(U_FAILURE(status)) return NULL;
LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIteratorBuilder());
if(!ret.isValid()) status = U_MEMORY_ALLOCATION_ERROR;
return ret.orphan();
}
U_NAMESPACE_END
#endif //#if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING

View File

@ -10,7 +10,7 @@
#include "unicode/brkiter.h"
#if !UCONFIG_NO_BREAK_ITERATION
#if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING
U_NAMESPACE_BEGIN

View File

@ -1,5 +1,5 @@
/********************************************************************
* Copyright (c) 1999-2013, International Business Machines
* Copyright (c) 1999-2014, International Business Machines
* Corporation and others. All Rights Reserved.
********************************************************************
* Date Name Description
@ -23,7 +23,9 @@
#include "unicode/ustring.h"
#include "unicode/utext.h"
#include "cmemory.h"
#if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING
#include "unicode/filteredbrk.h"
#endif
/**
* API Test the RuleBasedBreakIterator class
*/
@ -643,8 +645,8 @@ void RBBIAPITest::TestRuleStatus() {
//no longer test Han or hiragana breaking here: ruleStatusVec would return nothing
// changed UBRK_WORD_KANA to UBRK_WORD_IDEO
u_unescape("plain word 123.45 \\u30a1\\u30a2 ",
// 012345678901234567 8 9 0
// Katakana
// 012345678901234567 8 9 0
// Katakana
str, 30);
UnicodeString testString1(str);
int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 20, 21};
@ -878,7 +880,7 @@ void RBBIAPITest::TestRegistration() {
BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status);
BreakIterator* root_word = BreakIterator::createWordInstance("", status);
BreakIterator* root_char = BreakIterator::createCharacterInstance("", status);
if (status == U_MISSING_RESOURCE_ERROR || status == U_FILE_ACCESS_ERROR) {
dataerrln("Error creating instances of break interactors - %s", u_errorName(status));
@ -886,7 +888,7 @@ void RBBIAPITest::TestRegistration() {
delete ja_char;
delete root_word;
delete root_char;
return;
}
@ -1057,7 +1059,7 @@ void RBBIAPITest::TestRoundtripRules() {
// Try out the RuleBasedBreakIterator constructors that take RBBIDataHeader*
// (these are protected so we access them via a local class RBBIWithProtectedFunctions).
// This is just a sanity check, not a thorough test (e.g. we don't check that the
// This is just a sanity check, not a thorough test (e.g. we don't check that the
// first delete actually frees rulesCopy).
void RBBIAPITest::TestCreateFromRBBIData() {
// Get some handy RBBIData
@ -1083,7 +1085,7 @@ void RBBIAPITest::TestCreateFromRBBIData() {
uprv_free( rulesCopy );
}
}
// Now try the non-adopting constructor
brkItr = new RBBIWithProtectedFunctions(builtRules, RBBIWithProtectedFunctions::kDontAdopt, status);
if ( U_SUCCESS(status) ) {
@ -1168,7 +1170,7 @@ void RBBIAPITest::TestRefreshInputText() {
TEST_ASSERT(7 == bi->next());
TEST_ASSERT(8 == bi->next());
TEST_ASSERT(UBRK_DONE == bi->next());
utext_close(&ut1);
utext_close(&ut2);
}
@ -1176,6 +1178,142 @@ void RBBIAPITest::TestRefreshInputText() {
}
static void prtbrks(BreakIterator* brk, const UnicodeString &ustr, IntlTest &it) {
static const UChar PILCROW=0x00B6, CHSTR=0x3010, CHEND=0x3011; // lenticular brackets
it.logln(UnicodeString("String:'")+ustr+UnicodeString("'"));
int32_t *pos = new int32_t[ustr.length()];
int32_t posCount = 0;
// calculate breaks up front, so we can print out
// sans any debugging
for(int32_t n = 0; (n=brk->next())!=UBRK_DONE; ) {
pos[posCount++] = n;
if(posCount>=ustr.length()) {
it.errln("brk count exceeds string length!");
return;
}
}
UnicodeString out;
out.append((UChar)CHSTR);
int32_t prev = 0;
for(int32_t i=0;i<posCount;i++) {
int32_t n=pos[i];
out.append(ustr.tempSubString(prev,n-prev));
out.append((UChar)PILCROW);
prev=n;
}
out.append(ustr.tempSubString(prev,ustr.length()-prev));
out.append((UChar)CHEND);
it.logln(out);
out.remove();
for(int32_t i=0;i<posCount;i++) {
char tmp[100];
sprintf(tmp,"%d ",pos[i]);
out.append(UnicodeString(tmp));
}
it.logln(out);
delete [] pos;
}
void RBBIAPITest::TestFilteredBreakIteratorBuilder() {
#if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING
UErrorCode status = U_ZERO_ERROR;
LocalPointer<FilteredBreakIteratorBuilder> builder;
LocalPointer<BreakIterator> baseBI;
LocalPointer<BreakIterator> filteredBI;
const UnicodeString text("In the meantime Mr. Weston arrived with his small ship, which he had now recovered. Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge."); // (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited.
const UnicodeString ABBR_MR("Mr.");
const UnicodeString ABBR_CAPT("Capt.");
{
logln("Constructing empty builder\n");
builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
TEST_ASSERT_SUCCESS(status);
logln("Constructing base BI\n");
baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
TEST_ASSERT_SUCCESS(status);
logln("Building new BI\n");
filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
TEST_ASSERT_SUCCESS(status);
logln("Testing:");
filteredBI->setText(text);
TEST_ASSERT(20 == filteredBI->next()); // Mr.
TEST_ASSERT(84 == filteredBI->next()); // recovered.
TEST_ASSERT(90 == filteredBI->next()); // Capt.
TEST_ASSERT(181 == filteredBI->next()); // Mr.
TEST_ASSERT(278 == filteredBI->next()); // charge.
filteredBI->first();
prtbrks(filteredBI.getAlias(), text, *this);
}
{
logln("Constructing empty builder\n");
builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
TEST_ASSERT_SUCCESS(status);
logln("Adding Mr. as an exception\n");
TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
TEST_ASSERT(FALSE == builder->suppressBreakAfter(ABBR_MR, status)); // already have it
TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_MR, status));
TEST_ASSERT(FALSE == builder->unsuppressBreakAfter(ABBR_MR, status)); // already removed it
TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
TEST_ASSERT_SUCCESS(status);
logln("Constructing base BI\n");
baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
TEST_ASSERT_SUCCESS(status);
logln("Building new BI\n");
filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
TEST_ASSERT_SUCCESS(status);
logln("Testing:");
filteredBI->setText(text);
TEST_ASSERT(84 == filteredBI->next());
TEST_ASSERT(90 == filteredBI->next());// Capt.
TEST_ASSERT(278 == filteredBI->next());
filteredBI->first();
prtbrks(filteredBI.getAlias(), text, *this);
}
{
logln("Constructing empty builder\n");
builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
TEST_ASSERT_SUCCESS(status);
logln("Adding Mr. and Capt as an exception\n");
TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_CAPT, status));
TEST_ASSERT_SUCCESS(status);
logln("Constructing base BI\n");
baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
TEST_ASSERT_SUCCESS(status);
logln("Building new BI\n");
filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
TEST_ASSERT_SUCCESS(status);
logln("Testing:");
filteredBI->setText(text);
TEST_ASSERT(84 == filteredBI->next());
TEST_ASSERT(278 == filteredBI->next());
filteredBI->first();
prtbrks(filteredBI.getAlias(), text, *this);
}
#else
logln("Skipped- not: !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING");
#endif
}
//---------------------------------------------
// runIndexedTest
@ -1210,6 +1348,11 @@ void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name,
#endif
case 14: name = "TestRefreshInputText"; if (exec) TestRefreshInputText(); break;
#if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING
case 15: name = "TestFilteredBreakIteratorBuilder"; if(exec) TestFilteredBreakIteratorBuilder(); break;
#else
case 15: name="skip"; break;
#endif
default: name = ""; break; // needed to end loop
}
}

View File

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1999-2013 International Business Machines Corporation and
* Copyright (c) 1999-2014 International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
/************************************************************************
@ -53,6 +53,7 @@ public:
**/
void TestIteration(void);
void TestFilteredBreakIteratorBuilder(void);
/**
* Tests creating RuleBasedBreakIterator from rules strings.