ICU-20673 Allow built-in translit ID w/o data.

See #958
This commit is contained in:
Frank Yung-Fong Tang 2020-01-17 01:57:49 +00:00
parent c98af924a1
commit 21df05234d
5 changed files with 107 additions and 17 deletions

View File

@ -57,7 +57,11 @@ jobs:
vmImage: 'Ubuntu 16.04'
steps:
- script: |
cd icu4c/source && ICU_DATA_FILTER_FILE=../../.ci-builds/data-filter.json ./runConfigureICU Linux && make -j2
cd icu4c/source && \
ICU_DATA_FILTER_FILE=../../.ci-builds/data-filter.json ./runConfigureICU Linux && \
make -j2 tests && \
\[ ! -d data/out/build/icudt66l/translit \] && \
(cd test/intltest && LD_LIBRARY_PATH=../../lib:../../tools/ctestfw ./intltest translit/TransliteratorTest/TestBasicTransliteratorEvenWithoutData)
displayName: 'Build with Data Filter'
env:
CC: clang

View File

@ -8,10 +8,12 @@
]
},
// Test mixed feature filter and resource filter
// Exlude translit data so we can run test for ICU-20673
"featureFilters": {
"misc": {
"whitelist": ["supplementalData"]
}
},
"translit": "exclude"
},
"resourceFilters": [
{
@ -27,8 +29,8 @@
"directory": "$SRC",
"replacements": [
{
"src": "translit/Zawgyi_my.txt",
"dest": "translit/Zawgyi_my.txt"
"src": "brkitr/rules/line.txt",
"dest": "brkitr/rules/line_normal.txt"
},
"misc/dayPeriods.txt"
]

View File

@ -1508,28 +1508,35 @@ UBool Transliterator::initializeRegistry(UErrorCode &status) {
*/
//static const char translit_index[] = "translit_index";
UErrorCode lstatus = U_ZERO_ERROR;
UResourceBundle *bundle, *transIDs, *colBund;
bundle = ures_open(U_ICUDATA_TRANSLIT, NULL/*open default locale*/, &status);
transIDs = ures_getByKey(bundle, RB_RULE_BASED_IDS, 0, &status);
bundle = ures_open(U_ICUDATA_TRANSLIT, NULL/*open default locale*/, &lstatus);
transIDs = ures_getByKey(bundle, RB_RULE_BASED_IDS, 0, &lstatus);
const UnicodeString T_PART = UNICODE_STRING_SIMPLE("-t-");
int32_t row, maxRows;
if (U_SUCCESS(status)) {
if (lstatus == U_MEMORY_ALLOCATION_ERROR) {
delete registry;
registry = nullptr;
status = U_MEMORY_ALLOCATION_ERROR;
return FALSE;
}
if (U_SUCCESS(lstatus)) {
maxRows = ures_getSize(transIDs);
for (row = 0; row < maxRows; row++) {
colBund = ures_getByIndex(transIDs, row, 0, &status);
if (U_SUCCESS(status)) {
colBund = ures_getByIndex(transIDs, row, 0, &lstatus);
if (U_SUCCESS(lstatus)) {
UnicodeString id(ures_getKey(colBund), -1, US_INV);
if(id.indexOf(T_PART) != -1) {
ures_close(colBund);
continue;
}
UResourceBundle* res = ures_getNextResource(colBund, NULL, &status);
UResourceBundle* res = ures_getNextResource(colBund, NULL, &lstatus);
const char* typeStr = ures_getKey(res);
UChar type;
u_charsToUChars(typeStr, &type, 1);
if (U_SUCCESS(status)) {
if (U_SUCCESS(lstatus)) {
int32_t len = 0;
const UChar *resString;
switch (type) {
@ -1539,19 +1546,19 @@ UBool Transliterator::initializeRegistry(UErrorCode &status) {
// row[2]=resource, row[3]=direction
{
resString = ures_getStringByKey(res, "resource", &len, &status);
resString = ures_getStringByKey(res, "resource", &len, &lstatus);
UBool visible = (type == 0x0066 /*f*/);
UTransDirection dir =
(ures_getUnicodeStringByKey(res, "direction", &status).charAt(0) ==
(ures_getUnicodeStringByKey(res, "direction", &lstatus).charAt(0) ==
0x0046 /*F*/) ?
UTRANS_FORWARD : UTRANS_REVERSE;
registry->put(id, UnicodeString(TRUE, resString, len), dir, TRUE, visible, status);
registry->put(id, UnicodeString(TRUE, resString, len), dir, TRUE, visible, lstatus);
}
break;
case 0x61: // 'a'
// 'alias'; row[2]=createInstance argument
resString = ures_getString(res, &len, &status);
registry->put(id, UnicodeString(TRUE, resString, len), TRUE, TRUE, status);
resString = ures_getString(res, &len, &lstatus);
registry->put(id, UnicodeString(TRUE, resString, len), TRUE, TRUE, lstatus);
break;
}
}

View File

@ -196,6 +196,7 @@ TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
TESTCASE(82,TestHalfwidthFullwidth);
TESTCASE(83,TestThai);
TESTCASE(84,TestAny);
TESTCASE(85,TestBasicTransliteratorEvenWithoutData);
default: name = ""; break;
}
}
@ -1508,6 +1509,81 @@ void TransliteratorTest::TestNormalizationTransliterator() {
delete t;
}
/**
* Test we can create basic transliterator even without data.
*/
void TransliteratorTest::TestBasicTransliteratorEvenWithoutData() {
const char16_t* TEST_DATA = u"\u0124e\u0301 \uFB01nd x";
const char16_t* EXPECTED_RESULTS[] = {
u"H\u0302e\u0301 \uFB01nd x", // NFD
u"\u0124\u00E9 \uFB01nd x", // NFC
u"H\u0302e\u0301 find x", // NFKD
u"\u0124\u00E9 find x", // NFKC
u"\u0124e\u0301 \uFB01nd x", // Hex-Any
u"\u0125e\u0301 \uFB01nd x", // Lower
u"\u0124e\uFB01ndx", // [:^L:]Remove
u"H\u0302e\u0301 \uFB01nd ", // NFD; [x]Remove
u"h\u0302e\u0301 find x", // Lower; NFKD;
u"hefindx", // Lower; NFKD; [:^L:]Remove; NFC;
u"\u0124e \uFB01nd x", // [:Nonspacing Mark:] Remove;
u"He \uFB01nd x", // NFD; [:Nonspacing Mark:] Remove; NFC;
// end
0
};
const char* BASIC_TRANSLITERATOR_ID[] = {
"NFD",
"NFC",
"NFKD",
"NFKC",
"Hex-Any",
"Lower",
"[:^L:]Remove",
"NFD; [x]Remove",
"Lower; NFKD;",
"Lower; NFKD; [:^L:]Remove; NFC;",
"[:Nonspacing Mark:] Remove;",
"NFD; [:Nonspacing Mark:] Remove; NFC;",
// end
0
};
const char* BASIC_TRANSLITERATOR_RULES[] = {
"::Lower; ::NFKD;",
"::Lower; ::NFKD; ::[:^L:]Remove; ::NFC;",
"::[:Nonspacing Mark:] Remove;",
"::NFD; ::[:Nonspacing Mark:] Remove; ::NFC;",
// end
0
};
for (int32_t i=0; BASIC_TRANSLITERATOR_ID[i]; i++) {
UErrorCode status = U_ZERO_ERROR;
UParseError parseError;
std::unique_ptr<Transliterator> translit(Transliterator::createInstance(
BASIC_TRANSLITERATOR_ID[i], UTRANS_FORWARD, parseError, status));
if (translit.get() == nullptr || !U_SUCCESS(status)) {
dataerrln("FAIL: createInstance %s failed", BASIC_TRANSLITERATOR_ID[i]);
}
UnicodeString data(TEST_DATA);
UnicodeString expected(EXPECTED_RESULTS[i]);
translit->transliterate(data);
if (data != expected) {
dataerrln(UnicodeString("FAIL: expected translit(") +
BASIC_TRANSLITERATOR_ID[i] + ") = '" +
EXPECTED_RESULTS[i] + "' but got '" + data);
}
}
for (int32_t i=0; BASIC_TRANSLITERATOR_RULES[i]; i++) {
UErrorCode status = U_ZERO_ERROR;
UParseError parseError;
std::unique_ptr<Transliterator> translit(Transliterator::createFromRules(
"Test",
BASIC_TRANSLITERATOR_RULES[i], UTRANS_FORWARD, parseError, status));
if (translit.get() == nullptr || !U_SUCCESS(status)) {
dataerrln("FAIL: createFromRules %s failed", BASIC_TRANSLITERATOR_RULES[i]);
}
}
}
/**
* Test compound RBT rules.
*/

View File

@ -369,6 +369,7 @@ private:
*/
void TestRegisterAlias(void);
void TestBasicTransliteratorEvenWithoutData(void);
//======================================================================
// Support methods
//======================================================================