ICU-10286 load suppression data from locales. For now, only test English because it is already in.

X-SVN-Rev: 35359
This commit is contained in:
Steven R. Loomis 2014-03-06 12:51:31 +00:00
parent 7bc88a1a04
commit f4fe9b638e
2 changed files with 101 additions and 4 deletions

View File

@ -14,6 +14,8 @@
#include <set>
#include <string>
#include <functional>
#include "uresimp.h"
#include "ubrkimpl.h"
U_NAMESPACE_BEGIN
@ -230,8 +232,25 @@ SimpleFilteredBreakIteratorBuilder::~SimpleFilteredBreakIteratorBuilder()
SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &status)
: fSet()
{
// TODO: load, set
status = U_UNSUPPORTED_ERROR;
if(U_SUCCESS(status)) {
LocalUResourceBundlePointer b(ures_open(U_ICUDATA_BRKITR, fromLocale.getBaseName(), &status));
LocalUResourceBundlePointer exceptions(ures_getByKeyWithFallback(b.getAlias(), "exceptions", NULL, &status));
LocalUResourceBundlePointer breaks(ures_getByKeyWithFallback(exceptions.getAlias(), "SentenceBreak", NULL, &status));
if(U_FAILURE(status)) return; // leaves the builder empty, if you try to use it.
LocalUResourceBundlePointer strs;
UErrorCode subStatus = status;
do {
strs.adoptInstead(ures_getNextResource(breaks.getAlias(), strs.orphan(), &subStatus));
if(strs.isValid() && U_SUCCESS(subStatus)) {
UnicodeString str(ures_getUnicodeString(strs.getAlias(), &status));
suppressBreakAfter(str, status); // load the string
}
} while (strs.isValid() && U_SUCCESS(subStatus));
if(U_FAILURE(subStatus)&&subStatus!=U_INDEX_OUTOFBOUNDS_ERROR&&U_SUCCESS(status)) {
status = subStatus;
}
}
}
SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder()
@ -339,7 +358,7 @@ SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UEr
if(revCount>0) {
backwardsTrie.adoptInstead(builder->build(USTRINGTRIE_BUILD_FAST, status));
if(U_FAILURE(status)) {
//if(debug) u_printf("Error %s building backwards\n", u_errorName(status));
printf("Error %s building backwards\n", u_errorName(status));
return NULL;
}
}
@ -347,7 +366,7 @@ SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UEr
if(fwdCount>0) {
forwardsPartialTrie.adoptInstead(builder2->build(USTRINGTRIE_BUILD_FAST, status));
if(U_FAILURE(status)) {
//if(debug) u_printf("Error %s building forwards\n", u_errorName(status));
printf("Error %s building forwards\n", u_errorName(status));
return NULL;
}
}

View File

@ -1310,6 +1310,84 @@ void RBBIAPITest::TestFilteredBreakIteratorBuilder() {
prtbrks(filteredBI.getAlias(), text, *this);
}
{
logln("Constructing English builder\n");
builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
TEST_ASSERT_SUCCESS(status);
logln("Constructing base BI\n");
baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
TEST_ASSERT_SUCCESS(status);
logln("unsuppressing 'Capt'");
TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_CAPT, status));
logln("Building new BI\n");
filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
TEST_ASSERT_SUCCESS(status);
if(filteredBI.isValid()) {
logln("Testing:");
filteredBI->setText(text);
TEST_ASSERT(84 == filteredBI->next());
TEST_ASSERT(90 == filteredBI->next());
TEST_ASSERT(278 == filteredBI->next());
filteredBI->first();
prtbrks(filteredBI.getAlias(), text, *this);
}
}
{
logln("Constructing English builder\n");
builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
TEST_ASSERT_SUCCESS(status);
logln("Constructing base BI\n");
baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
TEST_ASSERT_SUCCESS(status);
logln("Building new BI\n");
filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
TEST_ASSERT_SUCCESS(status);
if(filteredBI.isValid()) {
logln("Testing:");
filteredBI->setText(text);
TEST_ASSERT(84 == filteredBI->next());
TEST_ASSERT(278 == filteredBI->next());
filteredBI->first();
prtbrks(filteredBI.getAlias(), text, *this);
}
}
#if 0
// reenable once french is in
{
logln("Constructing French builder");
builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getFrench(), status));
TEST_ASSERT_SUCCESS(status);
logln("Constructing base BI\n");
baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getFrench(), status));
TEST_ASSERT_SUCCESS(status);
logln("Building new BI\n");
filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
TEST_ASSERT_SUCCESS(status);
if(filteredBI.isValid()) {
logln("Testing:");
filteredBI->setText(text);
TEST_ASSERT(20 == filteredBI->next());
TEST_ASSERT(84 == filteredBI->next());
filteredBI->first();
prtbrks(filteredBI.getAlias(), text, *this);
}
}
#endif
#else
logln("Skipped- not: !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING");
#endif