ICU-9379 C: Update BreakIterator createInstance to handle linebreak variant files; update tests

X-SVN-Rev: 37059
This commit is contained in:
Peter Edberg 2015-02-24 22:37:10 +00:00
parent ce09d8a4bc
commit 43f62124cd
3 changed files with 52 additions and 4 deletions

View File

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 1997-2014, International Business Machines Corporation and
* Copyright (C) 1997-2015, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*
@ -383,6 +383,7 @@ BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& statu
}
// -------------------------------------
enum { kLBTypeLenMax = 32 };
BreakIterator*
BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
@ -391,6 +392,7 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
if (U_FAILURE(status)) {
return NULL;
}
char lbType[kLBTypeLenMax];
BreakIterator *result = NULL;
switch (kind) {
@ -401,7 +403,17 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
result = BreakIterator::buildInstance(loc, "word", kind, status);
break;
case UBRK_LINE:
result = BreakIterator::buildInstance(loc, "line", kind, status);
uprv_strcpy(lbType, "line");
{
char lbKeyValue[kLBTypeLenMax];
UErrorCode kvStatus = U_ZERO_ERROR;
loc.getKeywordValue("lb", lbKeyValue, kLBTypeLenMax, kvStatus);
if (U_SUCCESS(kvStatus) && (uprv_strcmp(lbKeyValue,"strict")==0 || uprv_strcmp(lbKeyValue,"normal")==0 || uprv_strcmp(lbKeyValue,"loose")==0)) {
uprv_strcat(lbType, "_");
uprv_strcat(lbType, lbKeyValue);
}
}
result = BreakIterator::buildInstance(loc, lbType, kind, status);
break;
case UBRK_SENTENCE:
result = BreakIterator::buildInstance(loc, "sentence", kind, status);

View File

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1999-2014, International Business Machines Corporation and
* Copyright (c) 1999-2015, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
/************************************************************************
@ -1175,7 +1175,7 @@ void RBBITest::TestExtended() {
UnicodeString rules;
TestParams tp(status);
RegexMatcher localeMatcher(UNICODE_STRING_SIMPLE("<locale *([\\p{L}\\p{Nd}_]*) *>"), 0, status);
RegexMatcher localeMatcher(UNICODE_STRING_SIMPLE("<locale *([\\p{L}\\p{Nd}_@=-]*) *>"), 0, status);
if (U_FAILURE(status)) {
dataerrln("Failure in file %s, line %d, status = \"%s\"", __FILE__, __LINE__, u_errorName(status));
}

View File

@ -899,3 +899,39 @@ Bangkok)•</data>
<data>•abc/\u05D9 •def•</data>
<data>•\u05E7\u05D7/\u05D9 •\u05DE\u05E2\u05D9\u05DC•</data>
<data>•\u05D3\u05E8\u05D5\u05E9\u05D9\u05DD •\u05E9\u05D7\u05E7\u05E0\u05D9\u05DD/\u05D9\u05D5\u05EA•</data>
####################################################################################
#
# Test CSS line break variants: strict, normal, loose
#
####################################################################################
<locale ja@lb=strict>
<line>
# •no brk before 3063 •no brk before 301C•no brk btw 2026 •no brk before FF01•
<data>•\u3084\u3063•\u3071•\u308A\u0020•\u0031\u301C\u0020•\u2026\u2026\u0020•\u30A2\uFF01\u0020•</data>
<locale ja@lb=normal>
<line>
# •brk OK before 3063 •brk OK before 301C •no brk btw 2026 •no brk before FF01•
<data>•\u3084•\u3063•\u3071•\u308A\u0020•\u0031•\u301C\u0020•\u2026\u2026\u0020•\u30A2\uFF01\u0020•</data>
<locale ja@lb=loose>
<line>
# •brk OK before 3063 •brk OK before 301C •brk OK btw 2026 •brk OK before FF01•
<data>•\u3084•\u3063•\u3071•\u308A\u0020•\u0031•\u301C\u0020•\u2026•\u2026\u0020•u30A2•\uFF01\u0020•</data>
<locale en@lb=strict>
<line>
# •no brk before 3063 •no brk before 301C•no brk btw 2026 •no brk before FF01•
<data>•\u3084\u3063•\u3071•\u308A\u0020•\u0031\u301C\u0020•\u2026\u2026\u0020•\u30A2\uFF01\u0020•</data>
<locale en@lb=normal>
<line>
# •brk OK before 3063 •no brk before 301C •no brk btw 2026 •no brk before FF01•
<data>•\u3084•\u3063•\u3071•\u308A\u0020•\u0031\u301C\u0020•\u2026\u2026\u0020•\u30A2\uFF01\u0020•</data>
<locale en@lb=loose>
<line>
# •brk OK before 3063 •no brk before 301C •brk OK btw 2026 •no brk before FF01•
<data>•\u3084•\u3063•\u3071•\u308A\u0020•\u0031\u301C\u0020•\u2026•\u2026\u0020•u30A2\uFF01\u0020•</data>