ICU-1364 fix reverse ::ID block parsing and global filters in toRules()
X-SVN-Rev: 6971
This commit is contained in:
parent
e572ebdae1
commit
0d4f43a415
@ -316,6 +316,16 @@ void CompoundTransliterator::adoptTransliterators(Transliterator* adoptedTransli
|
|||||||
setID(joinIDs(trans, count));
|
setID(joinIDs(trans, count));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Append c to buf, unless buf is empty or buf already ends in c.
|
||||||
|
*/
|
||||||
|
static void _smartAppend(UnicodeString& buf, UChar c) {
|
||||||
|
if (buf.length() != 0 &&
|
||||||
|
buf.charAt(buf.length() - 1) != c) {
|
||||||
|
buf.append(c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
UnicodeString& CompoundTransliterator::toRules(UnicodeString& rulesSource,
|
UnicodeString& CompoundTransliterator::toRules(UnicodeString& rulesSource,
|
||||||
UBool escapeUnprintable) const {
|
UBool escapeUnprintable) const {
|
||||||
// We do NOT call toRules() on our component transliterators, in
|
// We do NOT call toRules() on our component transliterators, in
|
||||||
@ -325,6 +335,12 @@ UnicodeString& CompoundTransliterator::toRules(UnicodeString& rulesSource,
|
|||||||
// compoundRBTIndex >= 0. For the transliterator at compoundRBTIndex,
|
// compoundRBTIndex >= 0. For the transliterator at compoundRBTIndex,
|
||||||
// we do call toRules() recursively.
|
// we do call toRules() recursively.
|
||||||
rulesSource.truncate(0);
|
rulesSource.truncate(0);
|
||||||
|
if (compoundRBTIndex >= 0 && getFilter() != NULL) {
|
||||||
|
// If we are a compound RBT and if we have a global
|
||||||
|
// filter, then emit it at the top.
|
||||||
|
UnicodeString pat;
|
||||||
|
rulesSource.append("::").append(getFilter()->toPattern(pat, escapeUnprintable)).append(ID_DELIM);
|
||||||
|
}
|
||||||
for (int32_t i=0; i<count; ++i) {
|
for (int32_t i=0; i<count; ++i) {
|
||||||
UnicodeString rule;
|
UnicodeString rule;
|
||||||
if (i == compoundRBTIndex) {
|
if (i == compoundRBTIndex) {
|
||||||
@ -332,15 +348,9 @@ UnicodeString& CompoundTransliterator::toRules(UnicodeString& rulesSource,
|
|||||||
} else {
|
} else {
|
||||||
trans[i]->Transliterator::toRules(rule, escapeUnprintable);
|
trans[i]->Transliterator::toRules(rule, escapeUnprintable);
|
||||||
}
|
}
|
||||||
if (rulesSource.length() != 0 &&
|
_smartAppend(rulesSource, NEWLINE);
|
||||||
rulesSource.charAt(rulesSource.length() - 1) != NEWLINE) {
|
|
||||||
rulesSource.append(NEWLINE);
|
|
||||||
}
|
|
||||||
rulesSource.append(rule);
|
rulesSource.append(rule);
|
||||||
if (rulesSource.length() != 0 &&
|
_smartAppend(rulesSource, ID_DELIM);
|
||||||
rulesSource.charAt(rulesSource.length() - 1) != ID_DELIM) {
|
|
||||||
rulesSource.append(ID_DELIM);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return rulesSource;
|
return rulesSource;
|
||||||
}
|
}
|
||||||
|
@ -829,7 +829,9 @@ void TransliteratorParser::parseRules(const UnicodeString& rules,
|
|||||||
int32_t lengthBefore = idBlock.length();
|
int32_t lengthBefore = idBlock.length();
|
||||||
if (mode == 1) {
|
if (mode == 1) {
|
||||||
mode = 2;
|
mode = 2;
|
||||||
idSplitPoint = lengthBefore;
|
// In the forward direction parseID adds elements at the end.
|
||||||
|
// In the reverse direction parseID adds elements at the start.
|
||||||
|
idSplitPoint = (direction == UTRANS_REVERSE) ? 0 : lengthBefore;
|
||||||
}
|
}
|
||||||
int32_t p = pos;
|
int32_t p = pos;
|
||||||
UBool sawDelim;
|
UBool sawDelim;
|
||||||
@ -840,6 +842,10 @@ void TransliteratorParser::parseRules(const UnicodeString& rules,
|
|||||||
delete cpdFilter;
|
delete cpdFilter;
|
||||||
syntaxError(U_ILLEGAL_ARGUMENT_ERROR, rules, pos);
|
syntaxError(U_ILLEGAL_ARGUMENT_ERROR, rules, pos);
|
||||||
} else {
|
} else {
|
||||||
|
if (direction == UTRANS_REVERSE && idSplitPoint >= 0) {
|
||||||
|
// In the reverse direction parseID adds elements at the start.
|
||||||
|
idSplitPoint += idBlock.length() - lengthBefore;
|
||||||
|
}
|
||||||
if (cpdFilter != NULL) {
|
if (cpdFilter != NULL) {
|
||||||
if (compoundFilter != NULL) {
|
if (compoundFilter != NULL) {
|
||||||
syntaxError(U_MULTIPLE_COMPOUND_FILTERS, rules, pos);
|
syntaxError(U_MULTIPLE_COMPOUND_FILTERS, rules, pos);
|
||||||
|
@ -139,6 +139,7 @@ TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
|
|||||||
TESTCASE(57,TestVariableRange);
|
TESTCASE(57,TestVariableRange);
|
||||||
TESTCASE(58,TestInvalidPostContext);
|
TESTCASE(58,TestInvalidPostContext);
|
||||||
TESTCASE(59,TestIDForms);
|
TESTCASE(59,TestIDForms);
|
||||||
|
TESTCASE(60,TestToRulesMark);
|
||||||
default: name = ""; break;
|
default: name = ""; break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2755,6 +2756,84 @@ void TransliteratorTest::TestIDForms() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const UChar SPACE[] = {32,0};
|
||||||
|
static const UChar NEWLINE[] = {10,0};
|
||||||
|
static const UChar RETURN[] = {13,0};
|
||||||
|
static const UChar EMPTY[] = {0};
|
||||||
|
|
||||||
|
void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2,
|
||||||
|
const UnicodeString& testRulesForward) {
|
||||||
|
UnicodeString rules2; t2.toRules(rules2, TRUE);
|
||||||
|
//rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
|
||||||
|
rules2.findAndReplace(SPACE, EMPTY);
|
||||||
|
rules2.findAndReplace(NEWLINE, EMPTY);
|
||||||
|
rules2.findAndReplace(RETURN, EMPTY);
|
||||||
|
|
||||||
|
UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY);
|
||||||
|
|
||||||
|
if (rules2 != testRules) {
|
||||||
|
errln(label);
|
||||||
|
logln((UnicodeString)"GENERATED RULES: " + rules2);
|
||||||
|
logln((UnicodeString)"SHOULD BE: " + testRulesForward);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Mark's toRules test.
|
||||||
|
*/
|
||||||
|
void TransliteratorTest::TestToRulesMark() {
|
||||||
|
const char* testRules =
|
||||||
|
"::[[:Latin:][:Mark:]];"
|
||||||
|
"::NFKD (NFC);"
|
||||||
|
"::Lower (Lower);"
|
||||||
|
"a <> \\u03B1;" // alpha
|
||||||
|
"::NFKC (NFD);"
|
||||||
|
"::Upper (Lower);"
|
||||||
|
"::Lower ();"
|
||||||
|
"::([[:Greek:][:Mark:]]);"
|
||||||
|
;
|
||||||
|
const char* testRulesForward =
|
||||||
|
"::[[:Latin:][:Mark:]];"
|
||||||
|
"::NFKD(NFC);"
|
||||||
|
"::Lower(Lower);"
|
||||||
|
"a > \\u03B1;"
|
||||||
|
"::NFKC(NFD);"
|
||||||
|
"::Upper (Lower);"
|
||||||
|
"::Lower ();"
|
||||||
|
;
|
||||||
|
const char* testRulesBackward =
|
||||||
|
"::[[:Greek:][:Mark:]];"
|
||||||
|
"::Lower (Upper);"
|
||||||
|
"::NFD(NFKC);"
|
||||||
|
"\\u03B1 > a;"
|
||||||
|
"::Lower(Lower);"
|
||||||
|
"::NFC(NFKD);"
|
||||||
|
;
|
||||||
|
UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute
|
||||||
|
UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute
|
||||||
|
|
||||||
|
UParseError pe;
|
||||||
|
UErrorCode ec = U_ZERO_ERROR;
|
||||||
|
Transliterator *t2 = Transliterator::createFromRules("source-target", testRules, UTRANS_FORWARD, pe, ec);
|
||||||
|
Transliterator *t3 = Transliterator::createFromRules("target-source", testRules, UTRANS_REVERSE, pe, ec);
|
||||||
|
|
||||||
|
if (U_FAILURE(ec)) {
|
||||||
|
delete t2;
|
||||||
|
delete t3;
|
||||||
|
errln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
expect(*t2, source, target);
|
||||||
|
expect(*t3, target, source);
|
||||||
|
|
||||||
|
checkRules("Failed toRules FORWARD", *t2, testRulesForward);
|
||||||
|
checkRules("Failed toRules BACKWARD", *t3, testRulesBackward);
|
||||||
|
|
||||||
|
delete t2;
|
||||||
|
delete t3;
|
||||||
|
}
|
||||||
|
|
||||||
//======================================================================
|
//======================================================================
|
||||||
// icu4c ONLY
|
// icu4c ONLY
|
||||||
// These tests are not mirrored (yet) in icu4j at
|
// These tests are not mirrored (yet) in icu4j at
|
||||||
|
@ -282,6 +282,11 @@ class TransliteratorTest : public IntlTest {
|
|||||||
*/
|
*/
|
||||||
void TestIDForms();
|
void TestIDForms();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Mark's toRules test.
|
||||||
|
*/
|
||||||
|
void TestToRulesMark();
|
||||||
|
|
||||||
//======================================================================
|
//======================================================================
|
||||||
// Support methods
|
// Support methods
|
||||||
//======================================================================
|
//======================================================================
|
||||||
@ -313,6 +318,9 @@ class TransliteratorTest : public IntlTest {
|
|||||||
static UnicodeString& formatInput(UnicodeString &appendTo,
|
static UnicodeString& formatInput(UnicodeString &appendTo,
|
||||||
const UnicodeString& input,
|
const UnicodeString& input,
|
||||||
const UTransPosition& pos);
|
const UTransPosition& pos);
|
||||||
|
|
||||||
|
void checkRules(const UnicodeString& label, Transliterator& t2,
|
||||||
|
const UnicodeString& testRulesForward);
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -25,10 +25,6 @@ public class RoundTripTest extends TestFmwk {
|
|||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public void TestToRules() throws IOException, ParseException {
|
|
||||||
new Test2Rules().test(this);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void TestHiragana() throws IOException, ParseException {
|
public void TestHiragana() throws IOException, ParseException {
|
||||||
new Test("Latin-Hiragana")
|
new Test("Latin-Hiragana")
|
||||||
.test("[a-zA-Z]", "[[:hiragana:]\u3040-\u3094]", "[\u309D\u309E]", this, new Legal());
|
.test("[a-zA-Z]", "[[:hiragana:]\u3040-\u3094]", "[\u309D\u309E]", this, new Legal());
|
||||||
@ -1168,69 +1164,4 @@ public class RoundTripTest extends TestFmwk {
|
|||||||
// return super.isSource(c);
|
// return super.isSource(c);
|
||||||
// }
|
// }
|
||||||
// }
|
// }
|
||||||
static class Test2Rules {
|
|
||||||
TestLog log;
|
|
||||||
|
|
||||||
void checkDiff(String label, Transliterator t2, String source, String target) {
|
|
||||||
String st = t2.transliterate(source);
|
|
||||||
if (!target.equals(st)) {
|
|
||||||
log.errln(label
|
|
||||||
+ ": " + TestUtility.hex(source)
|
|
||||||
+ " => " + TestUtility.hex(st)
|
|
||||||
+ ", instead of " + TestUtility.hex(target));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void checkRules(String label, Transliterator t2, String testRulesForward) {
|
|
||||||
String rules2 = t2.toRules(true);
|
|
||||||
//rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
|
|
||||||
rules2 = TestUtility.replace(rules2, " ", "");
|
|
||||||
rules2 = TestUtility.replace(rules2, "\n", "");
|
|
||||||
rules2 = TestUtility.replace(rules2, "\r", "");
|
|
||||||
testRulesForward = TestUtility.replace(testRulesForward, " ", "");
|
|
||||||
|
|
||||||
if (!rules2.equals(testRulesForward)) {
|
|
||||||
log.errln(label);
|
|
||||||
System.out.println();
|
|
||||||
System.out.println("GENERATED RULES:\t" + rules2);
|
|
||||||
System.out.println("SHOULD BE:\t" + testRulesForward);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public void test(TestLog log) {
|
|
||||||
this.log = log;
|
|
||||||
|
|
||||||
String testRules =
|
|
||||||
"::[[:Latin:][:Mark:]];"
|
|
||||||
+ "::NFKD (NFC);"
|
|
||||||
+ "a <> \\u03B1;" // alpha
|
|
||||||
+ "::NFKC (NFD);"
|
|
||||||
+ "::([[:Greek:][:Mark:]]);"
|
|
||||||
;
|
|
||||||
String testRulesForward =
|
|
||||||
"::[[:Latin:][:Mark:]];"
|
|
||||||
+ "::NFKD();"
|
|
||||||
+ "a > \\u03B1;"
|
|
||||||
+ "::NFKC();"
|
|
||||||
;
|
|
||||||
String testRulesBackward =
|
|
||||||
"::[[:Greek:][:Mark:]];"
|
|
||||||
+ "::NFD();"
|
|
||||||
+ "\\u03B1 > a;"
|
|
||||||
+ "::NFC();"
|
|
||||||
;
|
|
||||||
String source = "\u00E1"; // a-acute
|
|
||||||
String target = "\u03AC"; // alpha-acute
|
|
||||||
|
|
||||||
Transliterator t2 = Transliterator.createFromRules("temp1", testRules, Transliterator.FORWARD);
|
|
||||||
Transliterator t3 = Transliterator.createFromRules("temp1", testRules, Transliterator.REVERSE);
|
|
||||||
|
|
||||||
checkDiff("Failed source-target", t2, source, target);
|
|
||||||
checkDiff("Failed target-source", t3, target, source);
|
|
||||||
|
|
||||||
checkRules("Failed toRules FORWARD", t2, testRulesForward);
|
|
||||||
checkRules("Failed toRules BACKWARD", t3, testRulesBackward);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -5,8 +5,8 @@
|
|||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
*
|
*
|
||||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java,v $
|
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java,v $
|
||||||
* $Date: 2001/11/16 21:50:40 $
|
* $Date: 2001/11/17 06:44:50 $
|
||||||
* $Revision: 1.72 $
|
* $Revision: 1.73 $
|
||||||
*
|
*
|
||||||
*****************************************************************************************
|
*****************************************************************************************
|
||||||
*/
|
*/
|
||||||
@ -2055,6 +2055,66 @@ public class TransliteratorTest extends TestFmwk {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void checkRules(String label, Transliterator t2, String testRulesForward) {
|
||||||
|
String rules2 = t2.toRules(true);
|
||||||
|
//rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
|
||||||
|
rules2 = TestUtility.replace(rules2, " ", "");
|
||||||
|
rules2 = TestUtility.replace(rules2, "\n", "");
|
||||||
|
rules2 = TestUtility.replace(rules2, "\r", "");
|
||||||
|
testRulesForward = TestUtility.replace(testRulesForward, " ", "");
|
||||||
|
|
||||||
|
if (!rules2.equals(testRulesForward)) {
|
||||||
|
errln(label);
|
||||||
|
logln("GENERATED RULES: " + rules2);
|
||||||
|
logln("SHOULD BE: " + testRulesForward);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Mark's toRules test.
|
||||||
|
*/
|
||||||
|
public void TestToRulesMark() {
|
||||||
|
|
||||||
|
String testRules =
|
||||||
|
"::[[:Latin:][:Mark:]];"
|
||||||
|
+ "::NFKD (NFC);"
|
||||||
|
+ "::Lower (Lower);"
|
||||||
|
+ "a <> \\u03B1;" // alpha
|
||||||
|
+ "::NFKC (NFD);"
|
||||||
|
+ "::Upper (Lower);"
|
||||||
|
+ "::Lower ();"
|
||||||
|
+ "::([[:Greek:][:Mark:]]);"
|
||||||
|
;
|
||||||
|
String testRulesForward =
|
||||||
|
"::[[:Latin:][:Mark:]];"
|
||||||
|
+ "::NFKD(NFC);"
|
||||||
|
+ "::Lower(Lower);"
|
||||||
|
+ "a > \\u03B1;"
|
||||||
|
+ "::NFKC(NFD);"
|
||||||
|
+ "::Upper (Lower);"
|
||||||
|
+ "::Lower ();"
|
||||||
|
;
|
||||||
|
String testRulesBackward =
|
||||||
|
"::[[:Greek:][:Mark:]];"
|
||||||
|
+ "::Lower (Upper);"
|
||||||
|
+ "::NFD(NFKC);"
|
||||||
|
+ "\\u03B1 > a;"
|
||||||
|
+ "::Lower(Lower);"
|
||||||
|
+ "::NFC(NFKD);"
|
||||||
|
;
|
||||||
|
String source = "\u00E1"; // a-acute
|
||||||
|
String target = "\u03AC"; // alpha-acute
|
||||||
|
|
||||||
|
Transliterator t2 = Transliterator.createFromRules("source-target", testRules, Transliterator.FORWARD);
|
||||||
|
Transliterator t3 = Transliterator.createFromRules("target-source", testRules, Transliterator.REVERSE);
|
||||||
|
|
||||||
|
expect(t2, source, target);
|
||||||
|
expect(t3, target, source);
|
||||||
|
|
||||||
|
checkRules("Failed toRules FORWARD", t2, testRulesForward);
|
||||||
|
checkRules("Failed toRules BACKWARD", t3, testRulesBackward);
|
||||||
|
}
|
||||||
|
|
||||||
//======================================================================
|
//======================================================================
|
||||||
// icu4j ONLY
|
// icu4j ONLY
|
||||||
// These tests are not mirrored (yet) in icu4c at
|
// These tests are not mirrored (yet) in icu4c at
|
||||||
|
@ -5,8 +5,8 @@
|
|||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
*
|
*
|
||||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/CompoundTransliterator.java,v $
|
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/CompoundTransliterator.java,v $
|
||||||
* $Date: 2001/10/26 22:46:35 $
|
* $Date: 2001/11/17 06:43:17 $
|
||||||
* $Revision: 1.18 $
|
* $Revision: 1.19 $
|
||||||
*
|
*
|
||||||
*****************************************************************************************
|
*****************************************************************************************
|
||||||
*/
|
*/
|
||||||
@ -35,7 +35,7 @@ import java.util.Vector;
|
|||||||
* <p>Copyright © IBM Corporation 1999. All rights reserved.
|
* <p>Copyright © IBM Corporation 1999. All rights reserved.
|
||||||
*
|
*
|
||||||
* @author Alan Liu
|
* @author Alan Liu
|
||||||
* @version $RCSfile: CompoundTransliterator.java,v $ $Revision: 1.18 $ $Date: 2001/10/26 22:46:35 $
|
* @version $RCSfile: CompoundTransliterator.java,v $ $Revision: 1.19 $ $Date: 2001/11/17 06:43:17 $
|
||||||
*/
|
*/
|
||||||
public class CompoundTransliterator extends Transliterator {
|
public class CompoundTransliterator extends Transliterator {
|
||||||
|
|
||||||
@ -256,6 +256,16 @@ public class CompoundTransliterator extends Transliterator {
|
|||||||
return trans[index];
|
return trans[index];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Append c to buf, unless buf is empty or buf already ends in c.
|
||||||
|
*/
|
||||||
|
private static void _smartAppend(StringBuffer buf, char c) {
|
||||||
|
if (buf.length() != 0 &&
|
||||||
|
buf.charAt(buf.length() - 1) != c) {
|
||||||
|
buf.append(c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public String toRules(boolean escapeUnprintable) {
|
public String toRules(boolean escapeUnprintable) {
|
||||||
// We do NOT call toRules() on our component transliterators, in
|
// We do NOT call toRules() on our component transliterators, in
|
||||||
// general. If we have several rule-based transliterators, this
|
// general. If we have several rule-based transliterators, this
|
||||||
@ -264,6 +274,11 @@ public class CompoundTransliterator extends Transliterator {
|
|||||||
// compoundRBTIndex >= 0. For the transliterator at compoundRBTIndex,
|
// compoundRBTIndex >= 0. For the transliterator at compoundRBTIndex,
|
||||||
// we do call toRules() recursively.
|
// we do call toRules() recursively.
|
||||||
StringBuffer rulesSource = new StringBuffer();
|
StringBuffer rulesSource = new StringBuffer();
|
||||||
|
if (compoundRBTIndex >= 0 && getFilter() != null) {
|
||||||
|
// If we are a compound RBT and if we have a global
|
||||||
|
// filter, then emit it at the top.
|
||||||
|
rulesSource.append("::").append(getFilter().toPattern(escapeUnprintable)).append(ID_DELIM);
|
||||||
|
}
|
||||||
for (int i=0; i<trans.length; ++i) {
|
for (int i=0; i<trans.length; ++i) {
|
||||||
String rule;
|
String rule;
|
||||||
if (i == compoundRBTIndex) {
|
if (i == compoundRBTIndex) {
|
||||||
@ -271,15 +286,9 @@ public class CompoundTransliterator extends Transliterator {
|
|||||||
} else {
|
} else {
|
||||||
rule = trans[i].baseToRules(escapeUnprintable);
|
rule = trans[i].baseToRules(escapeUnprintable);
|
||||||
}
|
}
|
||||||
if (rulesSource.length() != 0 &&
|
_smartAppend(rulesSource, '\n');
|
||||||
rulesSource.charAt(rulesSource.length() - 1) != '\n') {
|
|
||||||
rulesSource.append('\n');
|
|
||||||
}
|
|
||||||
rulesSource.append(rule);
|
rulesSource.append(rule);
|
||||||
if (rulesSource.length() != 0 &&
|
_smartAppend(rulesSource, ID_DELIM);
|
||||||
rulesSource.charAt(rulesSource.length() - 1) != ID_DELIM) {
|
|
||||||
rulesSource.append(ID_DELIM);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return rulesSource.toString();
|
return rulesSource.toString();
|
||||||
}
|
}
|
||||||
|
@ -4,8 +4,8 @@
|
|||||||
* Corporation and others. All Rights Reserved.
|
* Corporation and others. All Rights Reserved.
|
||||||
**********************************************************************
|
**********************************************************************
|
||||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/TransliteratorParser.java,v $
|
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/TransliteratorParser.java,v $
|
||||||
* $Date: 2001/11/14 19:16:41 $
|
* $Date: 2001/11/17 06:43:17 $
|
||||||
* $Revision: 1.11 $
|
* $Revision: 1.12 $
|
||||||
**********************************************************************
|
**********************************************************************
|
||||||
*/
|
*/
|
||||||
package com.ibm.text;
|
package com.ibm.text;
|
||||||
@ -866,7 +866,9 @@ class TransliteratorParser {
|
|||||||
int lengthBefore = idBlockResult.length();
|
int lengthBefore = idBlockResult.length();
|
||||||
if (mode == 1) {
|
if (mode == 1) {
|
||||||
mode = 2;
|
mode = 2;
|
||||||
idSplitPoint = lengthBefore;
|
// In the forward direction parseID adds elements at the end.
|
||||||
|
// In the reverse direction parseID adds elements at the start.
|
||||||
|
idSplitPoint = (direction == Transliterator.REVERSE) ? 0 : lengthBefore;
|
||||||
}
|
}
|
||||||
int[] p = new int[] { pos };
|
int[] p = new int[] { pos };
|
||||||
boolean[] sawDelim = new boolean[1];
|
boolean[] sawDelim = new boolean[1];
|
||||||
@ -881,6 +883,10 @@ class TransliteratorParser {
|
|||||||
throw new IllegalArgumentException("Invalid ::ID " +
|
throw new IllegalArgumentException("Invalid ::ID " +
|
||||||
rule.substring(pos, i1));
|
rule.substring(pos, i1));
|
||||||
}
|
}
|
||||||
|
if (direction == Transliterator.REVERSE && idSplitPoint >= 0) {
|
||||||
|
// In the reverse direction parseID adds elements at the start.
|
||||||
|
idSplitPoint += idBlockResult.length() - lengthBefore;
|
||||||
|
}
|
||||||
if (cpdFilter[0] != null) {
|
if (cpdFilter[0] != null) {
|
||||||
if (compoundFilter != null) {
|
if (compoundFilter != null) {
|
||||||
// Multiple compound filters
|
// Multiple compound filters
|
||||||
|
@ -25,10 +25,6 @@ public class RoundTripTest extends TestFmwk {
|
|||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public void TestToRules() throws IOException, ParseException {
|
|
||||||
new Test2Rules().test(this);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void TestHiragana() throws IOException, ParseException {
|
public void TestHiragana() throws IOException, ParseException {
|
||||||
new Test("Latin-Hiragana")
|
new Test("Latin-Hiragana")
|
||||||
.test("[a-zA-Z]", "[[:hiragana:]\u3040-\u3094]", "[\u309D\u309E]", this, new Legal());
|
.test("[a-zA-Z]", "[[:hiragana:]\u3040-\u3094]", "[\u309D\u309E]", this, new Legal());
|
||||||
@ -1168,69 +1164,4 @@ public class RoundTripTest extends TestFmwk {
|
|||||||
// return super.isSource(c);
|
// return super.isSource(c);
|
||||||
// }
|
// }
|
||||||
// }
|
// }
|
||||||
static class Test2Rules {
|
|
||||||
TestLog log;
|
|
||||||
|
|
||||||
void checkDiff(String label, Transliterator t2, String source, String target) {
|
|
||||||
String st = t2.transliterate(source);
|
|
||||||
if (!target.equals(st)) {
|
|
||||||
log.errln(label
|
|
||||||
+ ": " + TestUtility.hex(source)
|
|
||||||
+ " => " + TestUtility.hex(st)
|
|
||||||
+ ", instead of " + TestUtility.hex(target));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void checkRules(String label, Transliterator t2, String testRulesForward) {
|
|
||||||
String rules2 = t2.toRules(true);
|
|
||||||
//rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
|
|
||||||
rules2 = TestUtility.replace(rules2, " ", "");
|
|
||||||
rules2 = TestUtility.replace(rules2, "\n", "");
|
|
||||||
rules2 = TestUtility.replace(rules2, "\r", "");
|
|
||||||
testRulesForward = TestUtility.replace(testRulesForward, " ", "");
|
|
||||||
|
|
||||||
if (!rules2.equals(testRulesForward)) {
|
|
||||||
log.errln(label);
|
|
||||||
System.out.println();
|
|
||||||
System.out.println("GENERATED RULES:\t" + rules2);
|
|
||||||
System.out.println("SHOULD BE:\t" + testRulesForward);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public void test(TestLog log) {
|
|
||||||
this.log = log;
|
|
||||||
|
|
||||||
String testRules =
|
|
||||||
"::[[:Latin:][:Mark:]];"
|
|
||||||
+ "::NFKD (NFC);"
|
|
||||||
+ "a <> \\u03B1;" // alpha
|
|
||||||
+ "::NFKC (NFD);"
|
|
||||||
+ "::([[:Greek:][:Mark:]]);"
|
|
||||||
;
|
|
||||||
String testRulesForward =
|
|
||||||
"::[[:Latin:][:Mark:]];"
|
|
||||||
+ "::NFKD();"
|
|
||||||
+ "a > \\u03B1;"
|
|
||||||
+ "::NFKC();"
|
|
||||||
;
|
|
||||||
String testRulesBackward =
|
|
||||||
"::[[:Greek:][:Mark:]];"
|
|
||||||
+ "::NFD();"
|
|
||||||
+ "\\u03B1 > a;"
|
|
||||||
+ "::NFC();"
|
|
||||||
;
|
|
||||||
String source = "\u00E1"; // a-acute
|
|
||||||
String target = "\u03AC"; // alpha-acute
|
|
||||||
|
|
||||||
Transliterator t2 = Transliterator.createFromRules("temp1", testRules, Transliterator.FORWARD);
|
|
||||||
Transliterator t3 = Transliterator.createFromRules("temp1", testRules, Transliterator.REVERSE);
|
|
||||||
|
|
||||||
checkDiff("Failed source-target", t2, source, target);
|
|
||||||
checkDiff("Failed target-source", t3, target, source);
|
|
||||||
|
|
||||||
checkRules("Failed toRules FORWARD", t2, testRulesForward);
|
|
||||||
checkRules("Failed toRules BACKWARD", t3, testRulesBackward);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -5,8 +5,8 @@
|
|||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
*
|
*
|
||||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/test/translit/Attic/TransliteratorTest.java,v $
|
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/test/translit/Attic/TransliteratorTest.java,v $
|
||||||
* $Date: 2001/11/16 21:50:40 $
|
* $Date: 2001/11/17 06:44:50 $
|
||||||
* $Revision: 1.72 $
|
* $Revision: 1.73 $
|
||||||
*
|
*
|
||||||
*****************************************************************************************
|
*****************************************************************************************
|
||||||
*/
|
*/
|
||||||
@ -2055,6 +2055,66 @@ public class TransliteratorTest extends TestFmwk {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void checkRules(String label, Transliterator t2, String testRulesForward) {
|
||||||
|
String rules2 = t2.toRules(true);
|
||||||
|
//rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
|
||||||
|
rules2 = TestUtility.replace(rules2, " ", "");
|
||||||
|
rules2 = TestUtility.replace(rules2, "\n", "");
|
||||||
|
rules2 = TestUtility.replace(rules2, "\r", "");
|
||||||
|
testRulesForward = TestUtility.replace(testRulesForward, " ", "");
|
||||||
|
|
||||||
|
if (!rules2.equals(testRulesForward)) {
|
||||||
|
errln(label);
|
||||||
|
logln("GENERATED RULES: " + rules2);
|
||||||
|
logln("SHOULD BE: " + testRulesForward);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Mark's toRules test.
|
||||||
|
*/
|
||||||
|
public void TestToRulesMark() {
|
||||||
|
|
||||||
|
String testRules =
|
||||||
|
"::[[:Latin:][:Mark:]];"
|
||||||
|
+ "::NFKD (NFC);"
|
||||||
|
+ "::Lower (Lower);"
|
||||||
|
+ "a <> \\u03B1;" // alpha
|
||||||
|
+ "::NFKC (NFD);"
|
||||||
|
+ "::Upper (Lower);"
|
||||||
|
+ "::Lower ();"
|
||||||
|
+ "::([[:Greek:][:Mark:]]);"
|
||||||
|
;
|
||||||
|
String testRulesForward =
|
||||||
|
"::[[:Latin:][:Mark:]];"
|
||||||
|
+ "::NFKD(NFC);"
|
||||||
|
+ "::Lower(Lower);"
|
||||||
|
+ "a > \\u03B1;"
|
||||||
|
+ "::NFKC(NFD);"
|
||||||
|
+ "::Upper (Lower);"
|
||||||
|
+ "::Lower ();"
|
||||||
|
;
|
||||||
|
String testRulesBackward =
|
||||||
|
"::[[:Greek:][:Mark:]];"
|
||||||
|
+ "::Lower (Upper);"
|
||||||
|
+ "::NFD(NFKC);"
|
||||||
|
+ "\\u03B1 > a;"
|
||||||
|
+ "::Lower(Lower);"
|
||||||
|
+ "::NFC(NFKD);"
|
||||||
|
;
|
||||||
|
String source = "\u00E1"; // a-acute
|
||||||
|
String target = "\u03AC"; // alpha-acute
|
||||||
|
|
||||||
|
Transliterator t2 = Transliterator.createFromRules("source-target", testRules, Transliterator.FORWARD);
|
||||||
|
Transliterator t3 = Transliterator.createFromRules("target-source", testRules, Transliterator.REVERSE);
|
||||||
|
|
||||||
|
expect(t2, source, target);
|
||||||
|
expect(t3, target, source);
|
||||||
|
|
||||||
|
checkRules("Failed toRules FORWARD", t2, testRulesForward);
|
||||||
|
checkRules("Failed toRules BACKWARD", t3, testRulesBackward);
|
||||||
|
}
|
||||||
|
|
||||||
//======================================================================
|
//======================================================================
|
||||||
// icu4j ONLY
|
// icu4j ONLY
|
||||||
// These tests are not mirrored (yet) in icu4c at
|
// These tests are not mirrored (yet) in icu4c at
|
||||||
|
@ -5,8 +5,8 @@
|
|||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
*
|
*
|
||||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/CompoundTransliterator.java,v $
|
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/CompoundTransliterator.java,v $
|
||||||
* $Date: 2001/10/26 22:46:35 $
|
* $Date: 2001/11/17 06:43:17 $
|
||||||
* $Revision: 1.18 $
|
* $Revision: 1.19 $
|
||||||
*
|
*
|
||||||
*****************************************************************************************
|
*****************************************************************************************
|
||||||
*/
|
*/
|
||||||
@ -35,7 +35,7 @@ import java.util.Vector;
|
|||||||
* <p>Copyright © IBM Corporation 1999. All rights reserved.
|
* <p>Copyright © IBM Corporation 1999. All rights reserved.
|
||||||
*
|
*
|
||||||
* @author Alan Liu
|
* @author Alan Liu
|
||||||
* @version $RCSfile: CompoundTransliterator.java,v $ $Revision: 1.18 $ $Date: 2001/10/26 22:46:35 $
|
* @version $RCSfile: CompoundTransliterator.java,v $ $Revision: 1.19 $ $Date: 2001/11/17 06:43:17 $
|
||||||
*/
|
*/
|
||||||
public class CompoundTransliterator extends Transliterator {
|
public class CompoundTransliterator extends Transliterator {
|
||||||
|
|
||||||
@ -256,6 +256,16 @@ public class CompoundTransliterator extends Transliterator {
|
|||||||
return trans[index];
|
return trans[index];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Append c to buf, unless buf is empty or buf already ends in c.
|
||||||
|
*/
|
||||||
|
private static void _smartAppend(StringBuffer buf, char c) {
|
||||||
|
if (buf.length() != 0 &&
|
||||||
|
buf.charAt(buf.length() - 1) != c) {
|
||||||
|
buf.append(c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public String toRules(boolean escapeUnprintable) {
|
public String toRules(boolean escapeUnprintable) {
|
||||||
// We do NOT call toRules() on our component transliterators, in
|
// We do NOT call toRules() on our component transliterators, in
|
||||||
// general. If we have several rule-based transliterators, this
|
// general. If we have several rule-based transliterators, this
|
||||||
@ -264,6 +274,11 @@ public class CompoundTransliterator extends Transliterator {
|
|||||||
// compoundRBTIndex >= 0. For the transliterator at compoundRBTIndex,
|
// compoundRBTIndex >= 0. For the transliterator at compoundRBTIndex,
|
||||||
// we do call toRules() recursively.
|
// we do call toRules() recursively.
|
||||||
StringBuffer rulesSource = new StringBuffer();
|
StringBuffer rulesSource = new StringBuffer();
|
||||||
|
if (compoundRBTIndex >= 0 && getFilter() != null) {
|
||||||
|
// If we are a compound RBT and if we have a global
|
||||||
|
// filter, then emit it at the top.
|
||||||
|
rulesSource.append("::").append(getFilter().toPattern(escapeUnprintable)).append(ID_DELIM);
|
||||||
|
}
|
||||||
for (int i=0; i<trans.length; ++i) {
|
for (int i=0; i<trans.length; ++i) {
|
||||||
String rule;
|
String rule;
|
||||||
if (i == compoundRBTIndex) {
|
if (i == compoundRBTIndex) {
|
||||||
@ -271,15 +286,9 @@ public class CompoundTransliterator extends Transliterator {
|
|||||||
} else {
|
} else {
|
||||||
rule = trans[i].baseToRules(escapeUnprintable);
|
rule = trans[i].baseToRules(escapeUnprintable);
|
||||||
}
|
}
|
||||||
if (rulesSource.length() != 0 &&
|
_smartAppend(rulesSource, '\n');
|
||||||
rulesSource.charAt(rulesSource.length() - 1) != '\n') {
|
|
||||||
rulesSource.append('\n');
|
|
||||||
}
|
|
||||||
rulesSource.append(rule);
|
rulesSource.append(rule);
|
||||||
if (rulesSource.length() != 0 &&
|
_smartAppend(rulesSource, ID_DELIM);
|
||||||
rulesSource.charAt(rulesSource.length() - 1) != ID_DELIM) {
|
|
||||||
rulesSource.append(ID_DELIM);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return rulesSource.toString();
|
return rulesSource.toString();
|
||||||
}
|
}
|
||||||
|
@ -4,8 +4,8 @@
|
|||||||
* Corporation and others. All Rights Reserved.
|
* Corporation and others. All Rights Reserved.
|
||||||
**********************************************************************
|
**********************************************************************
|
||||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/TransliteratorParser.java,v $
|
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/TransliteratorParser.java,v $
|
||||||
* $Date: 2001/11/14 19:16:41 $
|
* $Date: 2001/11/17 06:43:17 $
|
||||||
* $Revision: 1.11 $
|
* $Revision: 1.12 $
|
||||||
**********************************************************************
|
**********************************************************************
|
||||||
*/
|
*/
|
||||||
package com.ibm.text;
|
package com.ibm.text;
|
||||||
@ -866,7 +866,9 @@ class TransliteratorParser {
|
|||||||
int lengthBefore = idBlockResult.length();
|
int lengthBefore = idBlockResult.length();
|
||||||
if (mode == 1) {
|
if (mode == 1) {
|
||||||
mode = 2;
|
mode = 2;
|
||||||
idSplitPoint = lengthBefore;
|
// In the forward direction parseID adds elements at the end.
|
||||||
|
// In the reverse direction parseID adds elements at the start.
|
||||||
|
idSplitPoint = (direction == Transliterator.REVERSE) ? 0 : lengthBefore;
|
||||||
}
|
}
|
||||||
int[] p = new int[] { pos };
|
int[] p = new int[] { pos };
|
||||||
boolean[] sawDelim = new boolean[1];
|
boolean[] sawDelim = new boolean[1];
|
||||||
@ -881,6 +883,10 @@ class TransliteratorParser {
|
|||||||
throw new IllegalArgumentException("Invalid ::ID " +
|
throw new IllegalArgumentException("Invalid ::ID " +
|
||||||
rule.substring(pos, i1));
|
rule.substring(pos, i1));
|
||||||
}
|
}
|
||||||
|
if (direction == Transliterator.REVERSE && idSplitPoint >= 0) {
|
||||||
|
// In the reverse direction parseID adds elements at the start.
|
||||||
|
idSplitPoint += idBlockResult.length() - lengthBefore;
|
||||||
|
}
|
||||||
if (cpdFilter[0] != null) {
|
if (cpdFilter[0] != null) {
|
||||||
if (compoundFilter != null) {
|
if (compoundFilter != null) {
|
||||||
// Multiple compound filters
|
// Multiple compound filters
|
||||||
|
Loading…
Reference in New Issue
Block a user