ICU-21016 Special handling of Spanish and Hebrew list format until CLDR get the data
See #1043
This commit is contained in:
parent
fcb9b5579a
commit
f6622ab2f1
@ -21,6 +21,7 @@
|
||||
#include "unicode/listformatter.h"
|
||||
#include "unicode/simpleformatter.h"
|
||||
#include "unicode/ulistformatter.h"
|
||||
#include "unicode/uscript.h"
|
||||
#include "fphdlimp.h"
|
||||
#include "mutex.h"
|
||||
#include "hash.h"
|
||||
@ -35,34 +36,203 @@
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
struct ListFormatInternal : public UMemory {
|
||||
namespace {
|
||||
|
||||
class PatternHandler : public UObject {
|
||||
public:
|
||||
PatternHandler(const UnicodeString& two, const UnicodeString& end, UErrorCode& errorCode) :
|
||||
twoPattern(two, 2, 2, errorCode),
|
||||
endPattern(end, 2, 2, errorCode) { }
|
||||
|
||||
PatternHandler(const SimpleFormatter& two, const SimpleFormatter& end) :
|
||||
twoPattern(two),
|
||||
endPattern(end) { }
|
||||
|
||||
virtual ~PatternHandler();
|
||||
|
||||
virtual PatternHandler* clone() const { return new PatternHandler(twoPattern, endPattern); }
|
||||
|
||||
virtual const SimpleFormatter& getTwoPattern(const UnicodeString&) const {
|
||||
return twoPattern;
|
||||
}
|
||||
|
||||
virtual const SimpleFormatter& getEndPattern(const UnicodeString&) const {
|
||||
return endPattern;
|
||||
}
|
||||
|
||||
protected:
|
||||
SimpleFormatter twoPattern;
|
||||
SimpleFormatter endPattern;
|
||||
};
|
||||
|
||||
PatternHandler::~PatternHandler() {
|
||||
}
|
||||
|
||||
class ContextualHandler : public PatternHandler {
|
||||
public:
|
||||
ContextualHandler(bool (*testFunc)(const UnicodeString& text),
|
||||
const UnicodeString& thenTwo,
|
||||
const UnicodeString& elseTwo,
|
||||
const UnicodeString& thenEnd,
|
||||
const UnicodeString& elseEnd,
|
||||
UErrorCode& errorCode) :
|
||||
PatternHandler(elseTwo, elseEnd, errorCode),
|
||||
test(testFunc),
|
||||
thenTwoPattern(thenTwo, 2, 2, errorCode),
|
||||
thenEndPattern(thenEnd, 2, 2, errorCode) { }
|
||||
|
||||
ContextualHandler(bool (*testFunc)(const UnicodeString& text),
|
||||
const SimpleFormatter& thenTwo, SimpleFormatter elseTwo,
|
||||
const SimpleFormatter& thenEnd, SimpleFormatter elseEnd) :
|
||||
PatternHandler(elseTwo, elseEnd),
|
||||
test(testFunc),
|
||||
thenTwoPattern(thenTwo),
|
||||
thenEndPattern(thenEnd) { }
|
||||
|
||||
~ContextualHandler() override;
|
||||
|
||||
PatternHandler* clone() const override {
|
||||
return new ContextualHandler(
|
||||
test, thenTwoPattern, twoPattern, thenEndPattern, endPattern);
|
||||
}
|
||||
|
||||
const SimpleFormatter& getTwoPattern(
|
||||
const UnicodeString& text) const override {
|
||||
return (test)(text) ? thenTwoPattern : twoPattern;
|
||||
}
|
||||
|
||||
const SimpleFormatter& getEndPattern(
|
||||
const UnicodeString& text) const override {
|
||||
return (test)(text) ? thenEndPattern : endPattern;
|
||||
}
|
||||
|
||||
private:
|
||||
bool (*test)(const UnicodeString&);
|
||||
SimpleFormatter thenTwoPattern;
|
||||
SimpleFormatter thenEndPattern;
|
||||
};
|
||||
|
||||
ContextualHandler::~ContextualHandler() {
|
||||
}
|
||||
|
||||
static const char16_t *spanishY = u"{0} y {1}";
|
||||
static const char16_t *spanishE = u"{0} e {1}";
|
||||
static const char16_t *spanishO = u"{0} o {1}";
|
||||
static const char16_t *spanishU = u"{0} u {1}";
|
||||
static const char16_t *hebrewVav = u"{0} \u05D5{1}";
|
||||
static const char16_t *hebrewVavDash = u"{0} \u05D5-{1}";
|
||||
|
||||
// Condiction to change to e.
|
||||
// Starts with "hi" or "i" but not with "hie" nor "hia"
|
||||
static bool shouldChangeToE(const UnicodeString& text) {
|
||||
int32_t len = text.length();
|
||||
if (len == 0) { return false; }
|
||||
// Case insensitive match hi but not hie nor hia.
|
||||
if ((text[0] == u'h' || text[0] == u'H') &&
|
||||
((len > 1) && (text[1] == u'i' || text[1] == u'I')) &&
|
||||
((len == 2) || !(text[2] == u'a' || text[2] == u'A' || text[2] == u'e' || text[2] == u'E'))) {
|
||||
return true;
|
||||
}
|
||||
// Case insensitive for "start with i"
|
||||
if (text[0] == u'i' || text[0] == u'I') { return true; }
|
||||
return false;
|
||||
}
|
||||
|
||||
// Condiction to change to u.
|
||||
// Starts with "o", "ho", and "8". Also "11" by itself.
|
||||
// re: ^((o|ho|8).*|11)$
|
||||
static bool shouldChangeToU(const UnicodeString& text) {
|
||||
int32_t len = text.length();
|
||||
if (len == 0) { return false; }
|
||||
// Case insensitive match o.* and 8.*
|
||||
if (text[0] == u'o' || text[0] == u'O' || text[0] == u'8') { return true; }
|
||||
// Case insensitive match ho.*
|
||||
if ((text[0] == u'h' || text[0] == u'H') &&
|
||||
((len > 1) && (text[1] == 'o' || text[1] == u'O'))) {
|
||||
return true;
|
||||
}
|
||||
// match "^11$" and "^11 .*"
|
||||
if ((len >= 2) && text[0] == u'1' && text[1] == u'1' && (len == 2 || text[2] == u' ')) { return true; }
|
||||
return false;
|
||||
}
|
||||
|
||||
// Condiction to change to VAV follow by a dash.
|
||||
// Starts with non Hebrew letter.
|
||||
static bool shouldChangeToVavDash(const UnicodeString& text) {
|
||||
if (text.isEmpty()) { return false; }
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
return uscript_getScript(text.char32At(0), &status) != USCRIPT_HEBREW;
|
||||
}
|
||||
|
||||
PatternHandler* createPatternHandler(
|
||||
const char* lang, const UnicodeString& two, const UnicodeString& end,
|
||||
UErrorCode& status) {
|
||||
if (uprv_strcmp(lang, "es") == 0) {
|
||||
// Spanish
|
||||
UnicodeString spanishYStr(TRUE, spanishY, -1);
|
||||
bool twoIsY = two == spanishYStr;
|
||||
bool endIsY = end == spanishYStr;
|
||||
if (twoIsY || endIsY) {
|
||||
UnicodeString replacement(TRUE, spanishE, -1);
|
||||
return new ContextualHandler(
|
||||
shouldChangeToE,
|
||||
twoIsY ? replacement : two, two,
|
||||
endIsY ? replacement : end, end, status);
|
||||
}
|
||||
UnicodeString spanishOStr(TRUE, spanishO, -1);
|
||||
bool twoIsO = two == spanishOStr;
|
||||
bool endIsO = end == spanishOStr;
|
||||
if (twoIsO || endIsO) {
|
||||
UnicodeString replacement(TRUE, spanishU, -1);
|
||||
return new ContextualHandler(
|
||||
shouldChangeToU,
|
||||
twoIsO ? replacement : two, two,
|
||||
endIsO ? replacement : end, end, status);
|
||||
}
|
||||
} else if (uprv_strcmp(lang, "he") == 0 || uprv_strcmp(lang, "iw") == 0) {
|
||||
// Hebrew
|
||||
UnicodeString hebrewVavStr(TRUE, hebrewVav, -1);
|
||||
bool twoIsVav = two == hebrewVavStr;
|
||||
bool endIsVav = end == hebrewVavStr;
|
||||
if (twoIsVav || endIsVav) {
|
||||
UnicodeString replacement(TRUE, hebrewVavDash, -1);
|
||||
return new ContextualHandler(
|
||||
shouldChangeToVavDash,
|
||||
twoIsVav ? replacement : two, two,
|
||||
endIsVav ? replacement : end, end, status);
|
||||
}
|
||||
}
|
||||
return new PatternHandler(two, end, status);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
struct ListFormatInternal : public UMemory {
|
||||
SimpleFormatter startPattern;
|
||||
SimpleFormatter middlePattern;
|
||||
SimpleFormatter endPattern;
|
||||
LocalPointer<PatternHandler> patternHandler;
|
||||
|
||||
ListFormatInternal(
|
||||
const UnicodeString& two,
|
||||
const UnicodeString& start,
|
||||
const UnicodeString& middle,
|
||||
const UnicodeString& end,
|
||||
const Locale& locale,
|
||||
UErrorCode &errorCode) :
|
||||
twoPattern(two, 2, 2, errorCode),
|
||||
startPattern(start, 2, 2, errorCode),
|
||||
middlePattern(middle, 2, 2, errorCode),
|
||||
endPattern(end, 2, 2, errorCode) {}
|
||||
patternHandler(createPatternHandler(locale.getLanguage(), two, end, errorCode), errorCode) { }
|
||||
|
||||
ListFormatInternal(const ListFormatData &data, UErrorCode &errorCode) :
|
||||
twoPattern(data.twoPattern, errorCode),
|
||||
startPattern(data.startPattern, errorCode),
|
||||
middlePattern(data.middlePattern, errorCode),
|
||||
endPattern(data.endPattern, errorCode) { }
|
||||
patternHandler(createPatternHandler(
|
||||
data.locale.getLanguage(), data.twoPattern, data.endPattern, errorCode), errorCode) { }
|
||||
|
||||
ListFormatInternal(const ListFormatInternal &other) :
|
||||
twoPattern(other.twoPattern),
|
||||
startPattern(other.startPattern),
|
||||
middlePattern(other.middlePattern),
|
||||
endPattern(other.endPattern) { }
|
||||
patternHandler(other.patternHandler->clone()) { }
|
||||
};
|
||||
|
||||
|
||||
@ -322,7 +492,8 @@ ListFormatInternal* ListFormatter::loadListFormatInternal(
|
||||
errorCode = U_MISSING_RESOURCE_ERROR;
|
||||
return nullptr;
|
||||
}
|
||||
ListFormatInternal* result = new ListFormatInternal(sink.two, sink.start, sink.middle, sink.end, errorCode);
|
||||
|
||||
ListFormatInternal* result = new ListFormatInternal(sink.two, sink.start, sink.middle, sink.end, locale, errorCode);
|
||||
if (result == nullptr) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return nullptr;
|
||||
@ -524,16 +695,29 @@ UnicodeString& ListFormatter::format_(
|
||||
// for n items, there are 2 * (n + 1) boundary including 0 and the upper
|
||||
// edge.
|
||||
MaybeStackArray<int32_t, 10> offsets((handler != nullptr) ? 2 * (nItems + 1): 0);
|
||||
joinStringsAndReplace(
|
||||
nItems == 2 ? data->twoPattern : data->startPattern,
|
||||
result,
|
||||
items[1],
|
||||
result,
|
||||
index == 1,
|
||||
offset,
|
||||
&offsetFirst,
|
||||
&offsetSecond,
|
||||
errorCode);
|
||||
if (nItems == 2) {
|
||||
joinStringsAndReplace(
|
||||
data->patternHandler->getTwoPattern(items[1]),
|
||||
result,
|
||||
items[1],
|
||||
result,
|
||||
index == 1,
|
||||
offset,
|
||||
&offsetFirst,
|
||||
&offsetSecond,
|
||||
errorCode);
|
||||
} else {
|
||||
joinStringsAndReplace(
|
||||
data->startPattern,
|
||||
result,
|
||||
items[1],
|
||||
result,
|
||||
index == 1,
|
||||
offset,
|
||||
&offsetFirst,
|
||||
&offsetSecond,
|
||||
errorCode);
|
||||
}
|
||||
if (handler != nullptr) {
|
||||
offsets[0] = 0;
|
||||
prefixLength += offsetFirst;
|
||||
@ -557,7 +741,7 @@ UnicodeString& ListFormatter::format_(
|
||||
}
|
||||
}
|
||||
joinStringsAndReplace(
|
||||
data->endPattern,
|
||||
data->patternHandler->getEndPattern(items[nItems - 1]),
|
||||
result,
|
||||
items[nItems - 1],
|
||||
result,
|
||||
@ -612,5 +796,5 @@ UnicodeString& ListFormatter::format_(
|
||||
#endif
|
||||
return appendTo;
|
||||
}
|
||||
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
@ -50,9 +50,11 @@ struct ListFormatData : public UMemory {
|
||||
UnicodeString startPattern;
|
||||
UnicodeString middlePattern;
|
||||
UnicodeString endPattern;
|
||||
Locale locale;
|
||||
|
||||
ListFormatData(const UnicodeString& two, const UnicodeString& start, const UnicodeString& middle, const UnicodeString& end) :
|
||||
twoPattern(two), startPattern(start), middlePattern(middle), endPattern(end) {}
|
||||
ListFormatData(const UnicodeString& two, const UnicodeString& start, const UnicodeString& middle, const UnicodeString& end,
|
||||
const Locale& loc) :
|
||||
twoPattern(two), startPattern(start), middlePattern(middle), endPattern(end), locale(loc) {}
|
||||
};
|
||||
/** \endcond */
|
||||
|
||||
|
@ -945,7 +945,7 @@ group: dayperiodrules
|
||||
group: listformatter
|
||||
listformatter.o ulistformatter.o
|
||||
deps
|
||||
resourcebundle simpleformatter format uclean_i18n formatted_value_iterimpl
|
||||
uchar resourcebundle simpleformatter format uclean_i18n formatted_value_iterimpl
|
||||
|
||||
group: double_conversion
|
||||
double-conversion-bignum.o double-conversion-double-to-string.o
|
||||
|
@ -47,6 +47,7 @@ void ListFormatterTest::runIndexedTest(int32_t index, UBool exec,
|
||||
TESTCASE_AUTO(TestDifferentStyles);
|
||||
TESTCASE_AUTO(TestBadStylesFail);
|
||||
TESTCASE_AUTO(TestCreateStyled);
|
||||
TESTCASE_AUTO(TestContextual);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
@ -473,8 +474,9 @@ void ListFormatterTest::TestOutOfOrderPatterns() {
|
||||
};
|
||||
|
||||
IcuTestErrorCode errorCode(*this, "TestOutOfOrderPatterns()");
|
||||
Locale locale("en");
|
||||
ListFormatData data("{1} after {0}", "{1} after the first {0}",
|
||||
"{1} after {0}", "{1} in the last after {0}");
|
||||
"{1} after {0}", "{1} in the last after {0}", locale);
|
||||
ListFormatter formatter(data, errorCode);
|
||||
|
||||
UnicodeString input1[] = {one};
|
||||
@ -622,4 +624,80 @@ void ListFormatterTest::TestCreateStyled() {
|
||||
}
|
||||
}
|
||||
|
||||
void ListFormatterTest::TestContextual() {
|
||||
IcuTestErrorCode status(*this, "TestContextual");
|
||||
std::vector<std::string> es = { "es", "es_419" , "es_PY", "es_DO" };
|
||||
std::vector<std::string> he = { "he", "he_IL", "iw", "iw_IL" };
|
||||
UListFormatterWidth widths [] = {
|
||||
ULISTFMT_WIDTH_WIDE, ULISTFMT_WIDTH_SHORT, ULISTFMT_WIDTH_NARROW
|
||||
};
|
||||
struct TestCase {
|
||||
std::vector<std::string> locales;
|
||||
UListFormatterType type;
|
||||
const char16_t* expected;
|
||||
const char16_t* data1;
|
||||
const char16_t* data2;
|
||||
const char16_t* data3;
|
||||
} cases[] = {
|
||||
{ es, ULISTFMT_TYPE_AND, u"fascinante e increíblemente",
|
||||
u"fascinante", u"increíblemente", nullptr },
|
||||
{ es, ULISTFMT_TYPE_AND, u"Comunicaciones Industriales e IIoT",
|
||||
u"Comunicaciones Industriales", u"IIoT", nullptr },
|
||||
{ es, ULISTFMT_TYPE_AND, u"España e Italia", u"España", u"Italia", nullptr },
|
||||
{ es, ULISTFMT_TYPE_AND, u"hijas intrépidas e hijos solidarios",
|
||||
u"hijas intrépidas", u"hijos solidarios", nullptr },
|
||||
{ es, ULISTFMT_TYPE_AND, u"a un hombre e hirieron a otro",
|
||||
u"a un hombre", u"hirieron a otro", nullptr },
|
||||
{ es, ULISTFMT_TYPE_AND, u"hija e hijo", u"hija", u"hijo", nullptr },
|
||||
{ es, ULISTFMT_TYPE_AND, u"esposa, hija e hijo", u"esposa", u"hija", u"hijo" },
|
||||
// For 'y' exception
|
||||
{ es, ULISTFMT_TYPE_AND, u"oro y hierro", u"oro", u"hierro", nullptr },
|
||||
{ es, ULISTFMT_TYPE_AND, u"agua y hielo", u"agua", u"hielo", nullptr },
|
||||
{ es, ULISTFMT_TYPE_AND, u"colágeno y hialurónico", u"colágeno", u"hialurónico", nullptr },
|
||||
|
||||
{ es, ULISTFMT_TYPE_OR, u"desierto u oasis", u"desierto", u"oasis", nullptr },
|
||||
{ es, ULISTFMT_TYPE_OR, u"oasis, desierto u océano", u"oasis", u"desierto", u"océano" },
|
||||
{ es, ULISTFMT_TYPE_OR, u"7 u 8", u"7", u"8", nullptr },
|
||||
{ es, ULISTFMT_TYPE_OR, u"7 u 80", u"7", u"80", nullptr },
|
||||
{ es, ULISTFMT_TYPE_OR, u"7 u 800", u"7", u"800", nullptr },
|
||||
{ es, ULISTFMT_TYPE_OR, u"6, 7 u 8", u"6", u"7", u"8" },
|
||||
{ es, ULISTFMT_TYPE_OR, u"10 u 11", u"10", u"11", nullptr },
|
||||
{ es, ULISTFMT_TYPE_OR, u"10 o 111", u"10", u"111", nullptr },
|
||||
{ es, ULISTFMT_TYPE_OR, u"10 o 11.2", u"10", u"11.2", nullptr },
|
||||
{ es, ULISTFMT_TYPE_OR, u"9, 10 u 11", u"9", u"10", u"11" },
|
||||
|
||||
{ he, ULISTFMT_TYPE_AND, u"a, b ו-c", u"a", u"b", u"c" },
|
||||
{ he, ULISTFMT_TYPE_AND, u"a ו-b", u"a", u"b", nullptr },
|
||||
{ he, ULISTFMT_TYPE_AND, u"1, 2 ו-3", u"1", u"2", u"3" },
|
||||
{ he, ULISTFMT_TYPE_AND, u"1 ו-2", u"1", u"2", nullptr },
|
||||
{ he, ULISTFMT_TYPE_AND, u"אהבה ומקווה", u"אהבה", u"מקווה", nullptr },
|
||||
{ he, ULISTFMT_TYPE_AND, u"אהבה, מקווה ואמונה", u"אהבה", u"מקווה", u"אמונה" },
|
||||
};
|
||||
for (auto width : widths) {
|
||||
for (auto cas : cases) {
|
||||
for (auto locale : cas.locales) {
|
||||
LocalPointer<ListFormatter> fmt(
|
||||
ListFormatter::createInstance(locale.c_str(), cas.type, width, status),
|
||||
status);
|
||||
if (status.errIfFailureAndReset()) {
|
||||
continue;
|
||||
}
|
||||
UnicodeString message = UnicodeString(u"TestContextual loc=")
|
||||
+ locale.c_str() + u" type="
|
||||
+ Int64ToUnicodeString(cas.type) + u" width="
|
||||
+ Int64ToUnicodeString(width);
|
||||
if (cas.data3 == nullptr) {
|
||||
const UnicodeString inputs2[] = { cas.data1, cas.data2 };
|
||||
FormattedList result = fmt->formatStringsToValue(inputs2, UPRV_LENGTHOF(inputs2), status);
|
||||
assertEquals(message, cas.expected, result.toTempString(status));
|
||||
} else {
|
||||
const UnicodeString inputs3[] = { cas.data1, cas.data2, cas.data3 };
|
||||
FormattedList result = fmt->formatStringsToValue(inputs3, UPRV_LENGTHOF(inputs3), status);
|
||||
assertEquals(message, cas.expected, result.toTempString(status));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
@ -54,6 +54,7 @@ class ListFormatterTest : public IntlTestWithFieldPosition {
|
||||
void TestDifferentStyles();
|
||||
void TestBadStylesFail();
|
||||
void TestCreateStyled();
|
||||
void TestContextual();
|
||||
|
||||
private:
|
||||
void CheckFormatting(
|
||||
|
@ -16,6 +16,7 @@ import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
import java.util.Locale;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import com.ibm.icu.impl.FormattedStringBuilder;
|
||||
import com.ibm.icu.impl.FormattedValueStringBuilderImpl;
|
||||
@ -39,12 +40,16 @@ import com.ibm.icu.util.UResourceBundle;
|
||||
*/
|
||||
final public class ListFormatter {
|
||||
// Compiled SimpleFormatter patterns.
|
||||
private final String two;
|
||||
private final String start;
|
||||
private final String middle;
|
||||
private final String end;
|
||||
private final ULocale locale;
|
||||
|
||||
private interface PatternHandler {
|
||||
public String getTwoPattern(String text);
|
||||
public String getEndPattern(String text);
|
||||
}
|
||||
private final PatternHandler patternHandler;
|
||||
|
||||
/**
|
||||
* Indicates the style of Listformatter
|
||||
* TODO(ICU-20888): Remove this in ICU 68.
|
||||
@ -371,11 +376,10 @@ final public class ListFormatter {
|
||||
}
|
||||
|
||||
private ListFormatter(String two, String start, String middle, String end, ULocale locale) {
|
||||
this.two = two;
|
||||
this.start = start;
|
||||
this.middle = middle;
|
||||
this.end = end;
|
||||
this.locale = locale;
|
||||
this.patternHandler = createPatternHandler(two, end);
|
||||
}
|
||||
|
||||
private static String compilePattern(String pattern, StringBuilder sb) {
|
||||
@ -526,14 +530,131 @@ final public class ListFormatter {
|
||||
case 1:
|
||||
return new FormattedListBuilder(it.next(), needsFields);
|
||||
case 2:
|
||||
return new FormattedListBuilder(it.next(), needsFields).append(two, it.next(), 1);
|
||||
Object first = it.next();
|
||||
Object second = it.next();
|
||||
return new FormattedListBuilder(first, needsFields)
|
||||
.append(patternHandler.getTwoPattern(String.valueOf(second)), second, 1);
|
||||
}
|
||||
FormattedListBuilder builder = new FormattedListBuilder(it.next(), needsFields);
|
||||
builder.append(start, it.next(), 1);
|
||||
for (int idx = 2; idx < count - 1; ++idx) {
|
||||
builder.append(middle, it.next(), idx);
|
||||
}
|
||||
return builder.append(end, it.next(), count - 1);
|
||||
Object last = it.next();
|
||||
return builder.append(patternHandler.getEndPattern(String.valueOf(last)), last, count - 1);
|
||||
}
|
||||
|
||||
// A static handler just returns the pattern without considering the input text.
|
||||
private class StaticHandler implements PatternHandler {
|
||||
StaticHandler(String two, String end) {
|
||||
twoPattern = two;
|
||||
endPattern = end;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getTwoPattern(String text) { return twoPattern; }
|
||||
|
||||
@Override
|
||||
public String getEndPattern(String text) { return endPattern; }
|
||||
|
||||
private final String twoPattern;
|
||||
private final String endPattern;
|
||||
}
|
||||
|
||||
// A contextual handler returns one of the two patterns depending on whether the text matched the regexp.
|
||||
private class ContextualHandler implements PatternHandler {
|
||||
ContextualHandler(Pattern regexp, String thenTwo, String elseTwo, String thenEnd, String elseEnd) {
|
||||
this.regexp = regexp;
|
||||
thenTwoPattern = thenTwo;
|
||||
elseTwoPattern = elseTwo;
|
||||
thenEndPattern = thenEnd;
|
||||
elseEndPattern = elseEnd;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getTwoPattern(String text) {
|
||||
if(regexp.matcher(text).matches()) {
|
||||
return thenTwoPattern;
|
||||
} else {
|
||||
return elseTwoPattern;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getEndPattern(String text) {
|
||||
if(regexp.matcher(text).matches()) {
|
||||
return thenEndPattern;
|
||||
} else {
|
||||
return elseEndPattern;
|
||||
}
|
||||
}
|
||||
|
||||
private final Pattern regexp;
|
||||
private final String thenTwoPattern;
|
||||
private final String elseTwoPattern;
|
||||
private final String thenEndPattern;
|
||||
private final String elseEndPattern;
|
||||
|
||||
}
|
||||
|
||||
// Pattern in the ICU Data which might be replaced y by e.
|
||||
private static final String compiledY = compilePattern("{0} y {1}", new StringBuilder());
|
||||
|
||||
// The new pattern to replace y to e
|
||||
private static final String compiledE = compilePattern("{0} e {1}", new StringBuilder());
|
||||
|
||||
// Pattern in the ICU Data which might be replaced o by u.
|
||||
private static final String compiledO = compilePattern("{0} o {1}", new StringBuilder());
|
||||
|
||||
// The new pattern to replace u to o
|
||||
private static final String compiledU = compilePattern("{0} u {1}", new StringBuilder());
|
||||
|
||||
// Condition to change to e.
|
||||
// Starts with "hi" or "i" but not with "hie" nor "hia"a
|
||||
private static final Pattern changeToE = Pattern.compile("(i.*|hi|hi[^ae].*)", Pattern.CASE_INSENSITIVE);
|
||||
|
||||
// Condition to change to u.
|
||||
// Starts with "o", "ho", and "8". Also "11" by itself.
|
||||
private static final Pattern changeToU = Pattern.compile("((o|ho|8).*|11)", Pattern.CASE_INSENSITIVE);
|
||||
|
||||
// Pattern in the ICU Data which might need to add a DASH after VAV.
|
||||
private static final String compiledVav = compilePattern("{0} \u05D5{1}", new StringBuilder());
|
||||
|
||||
// Pattern to add a DASH after VAV.
|
||||
private static final String compiledVavDash = compilePattern("{0} \u05D5-{1}", new StringBuilder());
|
||||
|
||||
// Condition to change to VAV follow by a dash.
|
||||
// Starts with non Hebrew letter.
|
||||
private static final Pattern changeToVavDash = Pattern.compile("^[\\P{InHebrew}].*$");
|
||||
|
||||
// A factory function to create function based on locale
|
||||
// Handle specal case of Spanish and Hebrew
|
||||
private PatternHandler createPatternHandler(String two, String end) {
|
||||
if (this.locale != null) {
|
||||
String language = this.locale.getLanguage();
|
||||
if (language.equals("es")) {
|
||||
boolean twoIsY = two.equals(compiledY);
|
||||
boolean endIsY = end.equals(compiledY);
|
||||
if (twoIsY || endIsY) {
|
||||
return new ContextualHandler(
|
||||
changeToE, twoIsY ? compiledE : two, two, endIsY ? compiledE : end, end);
|
||||
}
|
||||
boolean twoIsO = two.equals(compiledO);
|
||||
boolean endIsO = end.equals(compiledO);
|
||||
if (twoIsO || endIsO) {
|
||||
return new ContextualHandler(
|
||||
changeToU, twoIsO ? compiledU : two, two, endIsO ? compiledU : end, end);
|
||||
}
|
||||
} else if (language.equals("he") || language.equals("iw")) {
|
||||
boolean twoIsVav = two.equals(compiledVav);
|
||||
boolean endIsVav = end.equals(compiledVav);
|
||||
if (twoIsVav || endIsVav) {
|
||||
return new ContextualHandler(changeToVavDash,
|
||||
twoIsVav ? compiledVavDash : two, two, endIsVav ? compiledVavDash : end, end);
|
||||
}
|
||||
}
|
||||
}
|
||||
return new StaticHandler(two, end);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -10,6 +10,7 @@ package com.ibm.icu.dev.test.format;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.junit.Test;
|
||||
@ -292,4 +293,65 @@ public class ListFormatterTest extends TestFmwk {
|
||||
assertEquals(message, expected, result);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestContextual() {
|
||||
String [] es = { "es", "es_419", "es_PY", "es_DO" };
|
||||
String [] he = { "he", "he_IL", "iw", "iw_IL" };
|
||||
Width[] widths = {Width.WIDE, Width.SHORT, Width.NARROW};
|
||||
Object[][] cases = {
|
||||
{ es, Type.AND, "fascinante e incre\u00EDblemente", "fascinante", "incre\u00EDblemente"},
|
||||
{ es, Type.AND, "Comunicaciones Industriales e IIoT", "Comunicaciones Industriales", "IIoT"},
|
||||
{ es, Type.AND, "Espa\u00F1a e Italia", "Espa\u00F1a", "Italia"},
|
||||
{ es, Type.AND, "hijas intr\u00E9pidas e hijos solidarios", "hijas intr\u00E9pidas", "hijos solidarios"},
|
||||
{ es, Type.AND, "a un hombre e hirieron a otro", "a un hombre", "hirieron a otro"},
|
||||
{ es, Type.AND, "hija e hijo", "hija", "hijo"},
|
||||
{ es, Type.AND, "esposa, hija e hijo", "esposa", "hija", "hijo"},
|
||||
// For 'y' exception
|
||||
{ es, Type.AND, "oro y hierro", "oro", "hierro"},
|
||||
{ es, Type.AND, "agua y hielo", "agua", "hielo"},
|
||||
{ es, Type.AND, "col\u00E1geno y hialur\u00F3nico", "col\u00E1geno", "hialur\u00F3nico"},
|
||||
|
||||
{ es, Type.OR, "desierto u oasis", "desierto", "oasis"},
|
||||
{ es, Type.OR, "oasis, desierto u océano", "oasis", "desierto", "océano"},
|
||||
{ es, Type.OR, "7 u 8", "7", "8"},
|
||||
{ es, Type.OR, "7 u 80", "7", "80"},
|
||||
{ es, Type.OR, "7 u 800", "7", "800"},
|
||||
{ es, Type.OR, "6, 7 u 8", "6", "7", "8"},
|
||||
{ es, Type.OR, "10 u 11", "10", "11"},
|
||||
{ es, Type.OR, "10 o 111", "10", "111"},
|
||||
{ es, Type.OR, "10 o 11.2", "10", "11.2"},
|
||||
{ es, Type.OR, "9, 10 u 11", "9", "10", "11"},
|
||||
|
||||
{ he, Type.AND, "a, b \u05D5-c", "a", "b", "c" },
|
||||
{ he, Type.AND, "a \u05D5-b", "a", "b" },
|
||||
{ he, Type.AND, "1, 2 \u05D5-3", "1", "2", "3" },
|
||||
{ he, Type.AND, "1 \u05D5-2", "1", "2" },
|
||||
{ he, Type.AND, "\u05D0\u05D4\u05D1\u05D4 \u05D5\u05DE\u05E7\u05D5\u05D5\u05D4",
|
||||
"\u05D0\u05D4\u05D1\u05D4", "\u05DE\u05E7\u05D5\u05D5\u05D4" },
|
||||
{ he, Type.AND, "\u05D0\u05D4\u05D1\u05D4, \u05DE\u05E7\u05D5\u05D5\u05D4 \u05D5\u05D0\u05DE\u05D5\u05E0\u05D4",
|
||||
"\u05D0\u05D4\u05D1\u05D4", "\u05DE\u05E7\u05D5\u05D5\u05D4", "\u05D0\u05DE\u05D5\u05E0\u05D4" },
|
||||
};
|
||||
for (Width width : widths) {
|
||||
for (Object[] cas : cases) {
|
||||
String [] locales = (String[]) cas[0];
|
||||
Type type = (Type) cas[1];
|
||||
String expected = (String) cas[2];
|
||||
for (String locale : locales) {
|
||||
ULocale uloc = new ULocale(locale);
|
||||
List inputs = Arrays.asList(cas).subList(3, cas.length);
|
||||
ListFormatter fmt = ListFormatter.getInstance(uloc, type, width);
|
||||
String message = "TestContextual uloc="
|
||||
+ uloc + " type="
|
||||
+ type + " width="
|
||||
+ width + "data=";
|
||||
for (Object i : inputs) {
|
||||
message += i + ",";
|
||||
}
|
||||
String result = fmt.format(inputs);
|
||||
assertEquals(message, expected, result);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user