ICU-1364 fix reverse ::ID block parsing and global filters in toRules()
X-SVN-Rev: 6971
This commit is contained in:
parent
e572ebdae1
commit
0d4f43a415
@ -316,6 +316,16 @@ void CompoundTransliterator::adoptTransliterators(Transliterator* adoptedTransli
|
||||
setID(joinIDs(trans, count));
|
||||
}
|
||||
|
||||
/**
|
||||
* Append c to buf, unless buf is empty or buf already ends in c.
|
||||
*/
|
||||
static void _smartAppend(UnicodeString& buf, UChar c) {
|
||||
if (buf.length() != 0 &&
|
||||
buf.charAt(buf.length() - 1) != c) {
|
||||
buf.append(c);
|
||||
}
|
||||
}
|
||||
|
||||
UnicodeString& CompoundTransliterator::toRules(UnicodeString& rulesSource,
|
||||
UBool escapeUnprintable) const {
|
||||
// We do NOT call toRules() on our component transliterators, in
|
||||
@ -325,6 +335,12 @@ UnicodeString& CompoundTransliterator::toRules(UnicodeString& rulesSource,
|
||||
// compoundRBTIndex >= 0. For the transliterator at compoundRBTIndex,
|
||||
// we do call toRules() recursively.
|
||||
rulesSource.truncate(0);
|
||||
if (compoundRBTIndex >= 0 && getFilter() != NULL) {
|
||||
// If we are a compound RBT and if we have a global
|
||||
// filter, then emit it at the top.
|
||||
UnicodeString pat;
|
||||
rulesSource.append("::").append(getFilter()->toPattern(pat, escapeUnprintable)).append(ID_DELIM);
|
||||
}
|
||||
for (int32_t i=0; i<count; ++i) {
|
||||
UnicodeString rule;
|
||||
if (i == compoundRBTIndex) {
|
||||
@ -332,15 +348,9 @@ UnicodeString& CompoundTransliterator::toRules(UnicodeString& rulesSource,
|
||||
} else {
|
||||
trans[i]->Transliterator::toRules(rule, escapeUnprintable);
|
||||
}
|
||||
if (rulesSource.length() != 0 &&
|
||||
rulesSource.charAt(rulesSource.length() - 1) != NEWLINE) {
|
||||
rulesSource.append(NEWLINE);
|
||||
}
|
||||
_smartAppend(rulesSource, NEWLINE);
|
||||
rulesSource.append(rule);
|
||||
if (rulesSource.length() != 0 &&
|
||||
rulesSource.charAt(rulesSource.length() - 1) != ID_DELIM) {
|
||||
rulesSource.append(ID_DELIM);
|
||||
}
|
||||
_smartAppend(rulesSource, ID_DELIM);
|
||||
}
|
||||
return rulesSource;
|
||||
}
|
||||
|
@ -829,7 +829,9 @@ void TransliteratorParser::parseRules(const UnicodeString& rules,
|
||||
int32_t lengthBefore = idBlock.length();
|
||||
if (mode == 1) {
|
||||
mode = 2;
|
||||
idSplitPoint = lengthBefore;
|
||||
// In the forward direction parseID adds elements at the end.
|
||||
// In the reverse direction parseID adds elements at the start.
|
||||
idSplitPoint = (direction == UTRANS_REVERSE) ? 0 : lengthBefore;
|
||||
}
|
||||
int32_t p = pos;
|
||||
UBool sawDelim;
|
||||
@ -840,6 +842,10 @@ void TransliteratorParser::parseRules(const UnicodeString& rules,
|
||||
delete cpdFilter;
|
||||
syntaxError(U_ILLEGAL_ARGUMENT_ERROR, rules, pos);
|
||||
} else {
|
||||
if (direction == UTRANS_REVERSE && idSplitPoint >= 0) {
|
||||
// In the reverse direction parseID adds elements at the start.
|
||||
idSplitPoint += idBlock.length() - lengthBefore;
|
||||
}
|
||||
if (cpdFilter != NULL) {
|
||||
if (compoundFilter != NULL) {
|
||||
syntaxError(U_MULTIPLE_COMPOUND_FILTERS, rules, pos);
|
||||
|
@ -139,6 +139,7 @@ TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
|
||||
TESTCASE(57,TestVariableRange);
|
||||
TESTCASE(58,TestInvalidPostContext);
|
||||
TESTCASE(59,TestIDForms);
|
||||
TESTCASE(60,TestToRulesMark);
|
||||
default: name = ""; break;
|
||||
}
|
||||
}
|
||||
@ -2755,6 +2756,84 @@ void TransliteratorTest::TestIDForms() {
|
||||
}
|
||||
}
|
||||
|
||||
static const UChar SPACE[] = {32,0};
|
||||
static const UChar NEWLINE[] = {10,0};
|
||||
static const UChar RETURN[] = {13,0};
|
||||
static const UChar EMPTY[] = {0};
|
||||
|
||||
void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2,
|
||||
const UnicodeString& testRulesForward) {
|
||||
UnicodeString rules2; t2.toRules(rules2, TRUE);
|
||||
//rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
|
||||
rules2.findAndReplace(SPACE, EMPTY);
|
||||
rules2.findAndReplace(NEWLINE, EMPTY);
|
||||
rules2.findAndReplace(RETURN, EMPTY);
|
||||
|
||||
UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY);
|
||||
|
||||
if (rules2 != testRules) {
|
||||
errln(label);
|
||||
logln((UnicodeString)"GENERATED RULES: " + rules2);
|
||||
logln((UnicodeString)"SHOULD BE: " + testRulesForward);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark's toRules test.
|
||||
*/
|
||||
void TransliteratorTest::TestToRulesMark() {
|
||||
const char* testRules =
|
||||
"::[[:Latin:][:Mark:]];"
|
||||
"::NFKD (NFC);"
|
||||
"::Lower (Lower);"
|
||||
"a <> \\u03B1;" // alpha
|
||||
"::NFKC (NFD);"
|
||||
"::Upper (Lower);"
|
||||
"::Lower ();"
|
||||
"::([[:Greek:][:Mark:]]);"
|
||||
;
|
||||
const char* testRulesForward =
|
||||
"::[[:Latin:][:Mark:]];"
|
||||
"::NFKD(NFC);"
|
||||
"::Lower(Lower);"
|
||||
"a > \\u03B1;"
|
||||
"::NFKC(NFD);"
|
||||
"::Upper (Lower);"
|
||||
"::Lower ();"
|
||||
;
|
||||
const char* testRulesBackward =
|
||||
"::[[:Greek:][:Mark:]];"
|
||||
"::Lower (Upper);"
|
||||
"::NFD(NFKC);"
|
||||
"\\u03B1 > a;"
|
||||
"::Lower(Lower);"
|
||||
"::NFC(NFKD);"
|
||||
;
|
||||
UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute
|
||||
UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute
|
||||
|
||||
UParseError pe;
|
||||
UErrorCode ec = U_ZERO_ERROR;
|
||||
Transliterator *t2 = Transliterator::createFromRules("source-target", testRules, UTRANS_FORWARD, pe, ec);
|
||||
Transliterator *t3 = Transliterator::createFromRules("target-source", testRules, UTRANS_REVERSE, pe, ec);
|
||||
|
||||
if (U_FAILURE(ec)) {
|
||||
delete t2;
|
||||
delete t3;
|
||||
errln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec));
|
||||
return;
|
||||
}
|
||||
|
||||
expect(*t2, source, target);
|
||||
expect(*t3, target, source);
|
||||
|
||||
checkRules("Failed toRules FORWARD", *t2, testRulesForward);
|
||||
checkRules("Failed toRules BACKWARD", *t3, testRulesBackward);
|
||||
|
||||
delete t2;
|
||||
delete t3;
|
||||
}
|
||||
|
||||
//======================================================================
|
||||
// icu4c ONLY
|
||||
// These tests are not mirrored (yet) in icu4j at
|
||||
|
@ -282,6 +282,11 @@ class TransliteratorTest : public IntlTest {
|
||||
*/
|
||||
void TestIDForms();
|
||||
|
||||
/**
|
||||
* Mark's toRules test.
|
||||
*/
|
||||
void TestToRulesMark();
|
||||
|
||||
//======================================================================
|
||||
// Support methods
|
||||
//======================================================================
|
||||
@ -313,6 +318,9 @@ class TransliteratorTest : public IntlTest {
|
||||
static UnicodeString& formatInput(UnicodeString &appendTo,
|
||||
const UnicodeString& input,
|
||||
const UTransPosition& pos);
|
||||
|
||||
void checkRules(const UnicodeString& label, Transliterator& t2,
|
||||
const UnicodeString& testRulesForward);
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -25,10 +25,6 @@ public class RoundTripTest extends TestFmwk {
|
||||
}
|
||||
*/
|
||||
|
||||
public void TestToRules() throws IOException, ParseException {
|
||||
new Test2Rules().test(this);
|
||||
}
|
||||
|
||||
public void TestHiragana() throws IOException, ParseException {
|
||||
new Test("Latin-Hiragana")
|
||||
.test("[a-zA-Z]", "[[:hiragana:]\u3040-\u3094]", "[\u309D\u309E]", this, new Legal());
|
||||
@ -1168,69 +1164,4 @@ public class RoundTripTest extends TestFmwk {
|
||||
// return super.isSource(c);
|
||||
// }
|
||||
// }
|
||||
static class Test2Rules {
|
||||
TestLog log;
|
||||
|
||||
void checkDiff(String label, Transliterator t2, String source, String target) {
|
||||
String st = t2.transliterate(source);
|
||||
if (!target.equals(st)) {
|
||||
log.errln(label
|
||||
+ ": " + TestUtility.hex(source)
|
||||
+ " => " + TestUtility.hex(st)
|
||||
+ ", instead of " + TestUtility.hex(target));
|
||||
}
|
||||
}
|
||||
|
||||
void checkRules(String label, Transliterator t2, String testRulesForward) {
|
||||
String rules2 = t2.toRules(true);
|
||||
//rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
|
||||
rules2 = TestUtility.replace(rules2, " ", "");
|
||||
rules2 = TestUtility.replace(rules2, "\n", "");
|
||||
rules2 = TestUtility.replace(rules2, "\r", "");
|
||||
testRulesForward = TestUtility.replace(testRulesForward, " ", "");
|
||||
|
||||
if (!rules2.equals(testRulesForward)) {
|
||||
log.errln(label);
|
||||
System.out.println();
|
||||
System.out.println("GENERATED RULES:\t" + rules2);
|
||||
System.out.println("SHOULD BE:\t" + testRulesForward);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void test(TestLog log) {
|
||||
this.log = log;
|
||||
|
||||
String testRules =
|
||||
"::[[:Latin:][:Mark:]];"
|
||||
+ "::NFKD (NFC);"
|
||||
+ "a <> \\u03B1;" // alpha
|
||||
+ "::NFKC (NFD);"
|
||||
+ "::([[:Greek:][:Mark:]]);"
|
||||
;
|
||||
String testRulesForward =
|
||||
"::[[:Latin:][:Mark:]];"
|
||||
+ "::NFKD();"
|
||||
+ "a > \\u03B1;"
|
||||
+ "::NFKC();"
|
||||
;
|
||||
String testRulesBackward =
|
||||
"::[[:Greek:][:Mark:]];"
|
||||
+ "::NFD();"
|
||||
+ "\\u03B1 > a;"
|
||||
+ "::NFC();"
|
||||
;
|
||||
String source = "\u00E1"; // a-acute
|
||||
String target = "\u03AC"; // alpha-acute
|
||||
|
||||
Transliterator t2 = Transliterator.createFromRules("temp1", testRules, Transliterator.FORWARD);
|
||||
Transliterator t3 = Transliterator.createFromRules("temp1", testRules, Transliterator.REVERSE);
|
||||
|
||||
checkDiff("Failed source-target", t2, source, target);
|
||||
checkDiff("Failed target-source", t3, target, source);
|
||||
|
||||
checkRules("Failed toRules FORWARD", t2, testRulesForward);
|
||||
checkRules("Failed toRules BACKWARD", t3, testRulesBackward);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java,v $
|
||||
* $Date: 2001/11/16 21:50:40 $
|
||||
* $Revision: 1.72 $
|
||||
* $Date: 2001/11/17 06:44:50 $
|
||||
* $Revision: 1.73 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
@ -2055,6 +2055,66 @@ public class TransliteratorTest extends TestFmwk {
|
||||
}
|
||||
}
|
||||
|
||||
void checkRules(String label, Transliterator t2, String testRulesForward) {
|
||||
String rules2 = t2.toRules(true);
|
||||
//rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
|
||||
rules2 = TestUtility.replace(rules2, " ", "");
|
||||
rules2 = TestUtility.replace(rules2, "\n", "");
|
||||
rules2 = TestUtility.replace(rules2, "\r", "");
|
||||
testRulesForward = TestUtility.replace(testRulesForward, " ", "");
|
||||
|
||||
if (!rules2.equals(testRulesForward)) {
|
||||
errln(label);
|
||||
logln("GENERATED RULES: " + rules2);
|
||||
logln("SHOULD BE: " + testRulesForward);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark's toRules test.
|
||||
*/
|
||||
public void TestToRulesMark() {
|
||||
|
||||
String testRules =
|
||||
"::[[:Latin:][:Mark:]];"
|
||||
+ "::NFKD (NFC);"
|
||||
+ "::Lower (Lower);"
|
||||
+ "a <> \\u03B1;" // alpha
|
||||
+ "::NFKC (NFD);"
|
||||
+ "::Upper (Lower);"
|
||||
+ "::Lower ();"
|
||||
+ "::([[:Greek:][:Mark:]]);"
|
||||
;
|
||||
String testRulesForward =
|
||||
"::[[:Latin:][:Mark:]];"
|
||||
+ "::NFKD(NFC);"
|
||||
+ "::Lower(Lower);"
|
||||
+ "a > \\u03B1;"
|
||||
+ "::NFKC(NFD);"
|
||||
+ "::Upper (Lower);"
|
||||
+ "::Lower ();"
|
||||
;
|
||||
String testRulesBackward =
|
||||
"::[[:Greek:][:Mark:]];"
|
||||
+ "::Lower (Upper);"
|
||||
+ "::NFD(NFKC);"
|
||||
+ "\\u03B1 > a;"
|
||||
+ "::Lower(Lower);"
|
||||
+ "::NFC(NFKD);"
|
||||
;
|
||||
String source = "\u00E1"; // a-acute
|
||||
String target = "\u03AC"; // alpha-acute
|
||||
|
||||
Transliterator t2 = Transliterator.createFromRules("source-target", testRules, Transliterator.FORWARD);
|
||||
Transliterator t3 = Transliterator.createFromRules("target-source", testRules, Transliterator.REVERSE);
|
||||
|
||||
expect(t2, source, target);
|
||||
expect(t3, target, source);
|
||||
|
||||
checkRules("Failed toRules FORWARD", t2, testRulesForward);
|
||||
checkRules("Failed toRules BACKWARD", t3, testRulesBackward);
|
||||
}
|
||||
|
||||
//======================================================================
|
||||
// icu4j ONLY
|
||||
// These tests are not mirrored (yet) in icu4c at
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/CompoundTransliterator.java,v $
|
||||
* $Date: 2001/10/26 22:46:35 $
|
||||
* $Revision: 1.18 $
|
||||
* $Date: 2001/11/17 06:43:17 $
|
||||
* $Revision: 1.19 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
@ -35,7 +35,7 @@ import java.util.Vector;
|
||||
* <p>Copyright © IBM Corporation 1999. All rights reserved.
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: CompoundTransliterator.java,v $ $Revision: 1.18 $ $Date: 2001/10/26 22:46:35 $
|
||||
* @version $RCSfile: CompoundTransliterator.java,v $ $Revision: 1.19 $ $Date: 2001/11/17 06:43:17 $
|
||||
*/
|
||||
public class CompoundTransliterator extends Transliterator {
|
||||
|
||||
@ -256,6 +256,16 @@ public class CompoundTransliterator extends Transliterator {
|
||||
return trans[index];
|
||||
}
|
||||
|
||||
/**
|
||||
* Append c to buf, unless buf is empty or buf already ends in c.
|
||||
*/
|
||||
private static void _smartAppend(StringBuffer buf, char c) {
|
||||
if (buf.length() != 0 &&
|
||||
buf.charAt(buf.length() - 1) != c) {
|
||||
buf.append(c);
|
||||
}
|
||||
}
|
||||
|
||||
public String toRules(boolean escapeUnprintable) {
|
||||
// We do NOT call toRules() on our component transliterators, in
|
||||
// general. If we have several rule-based transliterators, this
|
||||
@ -264,6 +274,11 @@ public class CompoundTransliterator extends Transliterator {
|
||||
// compoundRBTIndex >= 0. For the transliterator at compoundRBTIndex,
|
||||
// we do call toRules() recursively.
|
||||
StringBuffer rulesSource = new StringBuffer();
|
||||
if (compoundRBTIndex >= 0 && getFilter() != null) {
|
||||
// If we are a compound RBT and if we have a global
|
||||
// filter, then emit it at the top.
|
||||
rulesSource.append("::").append(getFilter().toPattern(escapeUnprintable)).append(ID_DELIM);
|
||||
}
|
||||
for (int i=0; i<trans.length; ++i) {
|
||||
String rule;
|
||||
if (i == compoundRBTIndex) {
|
||||
@ -271,15 +286,9 @@ public class CompoundTransliterator extends Transliterator {
|
||||
} else {
|
||||
rule = trans[i].baseToRules(escapeUnprintable);
|
||||
}
|
||||
if (rulesSource.length() != 0 &&
|
||||
rulesSource.charAt(rulesSource.length() - 1) != '\n') {
|
||||
rulesSource.append('\n');
|
||||
}
|
||||
_smartAppend(rulesSource, '\n');
|
||||
rulesSource.append(rule);
|
||||
if (rulesSource.length() != 0 &&
|
||||
rulesSource.charAt(rulesSource.length() - 1) != ID_DELIM) {
|
||||
rulesSource.append(ID_DELIM);
|
||||
}
|
||||
_smartAppend(rulesSource, ID_DELIM);
|
||||
}
|
||||
return rulesSource.toString();
|
||||
}
|
||||
|
@ -4,8 +4,8 @@
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/TransliteratorParser.java,v $
|
||||
* $Date: 2001/11/14 19:16:41 $
|
||||
* $Revision: 1.11 $
|
||||
* $Date: 2001/11/17 06:43:17 $
|
||||
* $Revision: 1.12 $
|
||||
**********************************************************************
|
||||
*/
|
||||
package com.ibm.text;
|
||||
@ -866,7 +866,9 @@ class TransliteratorParser {
|
||||
int lengthBefore = idBlockResult.length();
|
||||
if (mode == 1) {
|
||||
mode = 2;
|
||||
idSplitPoint = lengthBefore;
|
||||
// In the forward direction parseID adds elements at the end.
|
||||
// In the reverse direction parseID adds elements at the start.
|
||||
idSplitPoint = (direction == Transliterator.REVERSE) ? 0 : lengthBefore;
|
||||
}
|
||||
int[] p = new int[] { pos };
|
||||
boolean[] sawDelim = new boolean[1];
|
||||
@ -881,6 +883,10 @@ class TransliteratorParser {
|
||||
throw new IllegalArgumentException("Invalid ::ID " +
|
||||
rule.substring(pos, i1));
|
||||
}
|
||||
if (direction == Transliterator.REVERSE && idSplitPoint >= 0) {
|
||||
// In the reverse direction parseID adds elements at the start.
|
||||
idSplitPoint += idBlockResult.length() - lengthBefore;
|
||||
}
|
||||
if (cpdFilter[0] != null) {
|
||||
if (compoundFilter != null) {
|
||||
// Multiple compound filters
|
||||
|
@ -25,10 +25,6 @@ public class RoundTripTest extends TestFmwk {
|
||||
}
|
||||
*/
|
||||
|
||||
public void TestToRules() throws IOException, ParseException {
|
||||
new Test2Rules().test(this);
|
||||
}
|
||||
|
||||
public void TestHiragana() throws IOException, ParseException {
|
||||
new Test("Latin-Hiragana")
|
||||
.test("[a-zA-Z]", "[[:hiragana:]\u3040-\u3094]", "[\u309D\u309E]", this, new Legal());
|
||||
@ -1168,69 +1164,4 @@ public class RoundTripTest extends TestFmwk {
|
||||
// return super.isSource(c);
|
||||
// }
|
||||
// }
|
||||
static class Test2Rules {
|
||||
TestLog log;
|
||||
|
||||
void checkDiff(String label, Transliterator t2, String source, String target) {
|
||||
String st = t2.transliterate(source);
|
||||
if (!target.equals(st)) {
|
||||
log.errln(label
|
||||
+ ": " + TestUtility.hex(source)
|
||||
+ " => " + TestUtility.hex(st)
|
||||
+ ", instead of " + TestUtility.hex(target));
|
||||
}
|
||||
}
|
||||
|
||||
void checkRules(String label, Transliterator t2, String testRulesForward) {
|
||||
String rules2 = t2.toRules(true);
|
||||
//rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
|
||||
rules2 = TestUtility.replace(rules2, " ", "");
|
||||
rules2 = TestUtility.replace(rules2, "\n", "");
|
||||
rules2 = TestUtility.replace(rules2, "\r", "");
|
||||
testRulesForward = TestUtility.replace(testRulesForward, " ", "");
|
||||
|
||||
if (!rules2.equals(testRulesForward)) {
|
||||
log.errln(label);
|
||||
System.out.println();
|
||||
System.out.println("GENERATED RULES:\t" + rules2);
|
||||
System.out.println("SHOULD BE:\t" + testRulesForward);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void test(TestLog log) {
|
||||
this.log = log;
|
||||
|
||||
String testRules =
|
||||
"::[[:Latin:][:Mark:]];"
|
||||
+ "::NFKD (NFC);"
|
||||
+ "a <> \\u03B1;" // alpha
|
||||
+ "::NFKC (NFD);"
|
||||
+ "::([[:Greek:][:Mark:]]);"
|
||||
;
|
||||
String testRulesForward =
|
||||
"::[[:Latin:][:Mark:]];"
|
||||
+ "::NFKD();"
|
||||
+ "a > \\u03B1;"
|
||||
+ "::NFKC();"
|
||||
;
|
||||
String testRulesBackward =
|
||||
"::[[:Greek:][:Mark:]];"
|
||||
+ "::NFD();"
|
||||
+ "\\u03B1 > a;"
|
||||
+ "::NFC();"
|
||||
;
|
||||
String source = "\u00E1"; // a-acute
|
||||
String target = "\u03AC"; // alpha-acute
|
||||
|
||||
Transliterator t2 = Transliterator.createFromRules("temp1", testRules, Transliterator.FORWARD);
|
||||
Transliterator t3 = Transliterator.createFromRules("temp1", testRules, Transliterator.REVERSE);
|
||||
|
||||
checkDiff("Failed source-target", t2, source, target);
|
||||
checkDiff("Failed target-source", t3, target, source);
|
||||
|
||||
checkRules("Failed toRules FORWARD", t2, testRulesForward);
|
||||
checkRules("Failed toRules BACKWARD", t3, testRulesBackward);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/test/translit/Attic/TransliteratorTest.java,v $
|
||||
* $Date: 2001/11/16 21:50:40 $
|
||||
* $Revision: 1.72 $
|
||||
* $Date: 2001/11/17 06:44:50 $
|
||||
* $Revision: 1.73 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
@ -2055,6 +2055,66 @@ public class TransliteratorTest extends TestFmwk {
|
||||
}
|
||||
}
|
||||
|
||||
void checkRules(String label, Transliterator t2, String testRulesForward) {
|
||||
String rules2 = t2.toRules(true);
|
||||
//rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
|
||||
rules2 = TestUtility.replace(rules2, " ", "");
|
||||
rules2 = TestUtility.replace(rules2, "\n", "");
|
||||
rules2 = TestUtility.replace(rules2, "\r", "");
|
||||
testRulesForward = TestUtility.replace(testRulesForward, " ", "");
|
||||
|
||||
if (!rules2.equals(testRulesForward)) {
|
||||
errln(label);
|
||||
logln("GENERATED RULES: " + rules2);
|
||||
logln("SHOULD BE: " + testRulesForward);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark's toRules test.
|
||||
*/
|
||||
public void TestToRulesMark() {
|
||||
|
||||
String testRules =
|
||||
"::[[:Latin:][:Mark:]];"
|
||||
+ "::NFKD (NFC);"
|
||||
+ "::Lower (Lower);"
|
||||
+ "a <> \\u03B1;" // alpha
|
||||
+ "::NFKC (NFD);"
|
||||
+ "::Upper (Lower);"
|
||||
+ "::Lower ();"
|
||||
+ "::([[:Greek:][:Mark:]]);"
|
||||
;
|
||||
String testRulesForward =
|
||||
"::[[:Latin:][:Mark:]];"
|
||||
+ "::NFKD(NFC);"
|
||||
+ "::Lower(Lower);"
|
||||
+ "a > \\u03B1;"
|
||||
+ "::NFKC(NFD);"
|
||||
+ "::Upper (Lower);"
|
||||
+ "::Lower ();"
|
||||
;
|
||||
String testRulesBackward =
|
||||
"::[[:Greek:][:Mark:]];"
|
||||
+ "::Lower (Upper);"
|
||||
+ "::NFD(NFKC);"
|
||||
+ "\\u03B1 > a;"
|
||||
+ "::Lower(Lower);"
|
||||
+ "::NFC(NFKD);"
|
||||
;
|
||||
String source = "\u00E1"; // a-acute
|
||||
String target = "\u03AC"; // alpha-acute
|
||||
|
||||
Transliterator t2 = Transliterator.createFromRules("source-target", testRules, Transliterator.FORWARD);
|
||||
Transliterator t3 = Transliterator.createFromRules("target-source", testRules, Transliterator.REVERSE);
|
||||
|
||||
expect(t2, source, target);
|
||||
expect(t3, target, source);
|
||||
|
||||
checkRules("Failed toRules FORWARD", t2, testRulesForward);
|
||||
checkRules("Failed toRules BACKWARD", t3, testRulesBackward);
|
||||
}
|
||||
|
||||
//======================================================================
|
||||
// icu4j ONLY
|
||||
// These tests are not mirrored (yet) in icu4c at
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/CompoundTransliterator.java,v $
|
||||
* $Date: 2001/10/26 22:46:35 $
|
||||
* $Revision: 1.18 $
|
||||
* $Date: 2001/11/17 06:43:17 $
|
||||
* $Revision: 1.19 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
@ -35,7 +35,7 @@ import java.util.Vector;
|
||||
* <p>Copyright © IBM Corporation 1999. All rights reserved.
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: CompoundTransliterator.java,v $ $Revision: 1.18 $ $Date: 2001/10/26 22:46:35 $
|
||||
* @version $RCSfile: CompoundTransliterator.java,v $ $Revision: 1.19 $ $Date: 2001/11/17 06:43:17 $
|
||||
*/
|
||||
public class CompoundTransliterator extends Transliterator {
|
||||
|
||||
@ -256,6 +256,16 @@ public class CompoundTransliterator extends Transliterator {
|
||||
return trans[index];
|
||||
}
|
||||
|
||||
/**
|
||||
* Append c to buf, unless buf is empty or buf already ends in c.
|
||||
*/
|
||||
private static void _smartAppend(StringBuffer buf, char c) {
|
||||
if (buf.length() != 0 &&
|
||||
buf.charAt(buf.length() - 1) != c) {
|
||||
buf.append(c);
|
||||
}
|
||||
}
|
||||
|
||||
public String toRules(boolean escapeUnprintable) {
|
||||
// We do NOT call toRules() on our component transliterators, in
|
||||
// general. If we have several rule-based transliterators, this
|
||||
@ -264,6 +274,11 @@ public class CompoundTransliterator extends Transliterator {
|
||||
// compoundRBTIndex >= 0. For the transliterator at compoundRBTIndex,
|
||||
// we do call toRules() recursively.
|
||||
StringBuffer rulesSource = new StringBuffer();
|
||||
if (compoundRBTIndex >= 0 && getFilter() != null) {
|
||||
// If we are a compound RBT and if we have a global
|
||||
// filter, then emit it at the top.
|
||||
rulesSource.append("::").append(getFilter().toPattern(escapeUnprintable)).append(ID_DELIM);
|
||||
}
|
||||
for (int i=0; i<trans.length; ++i) {
|
||||
String rule;
|
||||
if (i == compoundRBTIndex) {
|
||||
@ -271,15 +286,9 @@ public class CompoundTransliterator extends Transliterator {
|
||||
} else {
|
||||
rule = trans[i].baseToRules(escapeUnprintable);
|
||||
}
|
||||
if (rulesSource.length() != 0 &&
|
||||
rulesSource.charAt(rulesSource.length() - 1) != '\n') {
|
||||
rulesSource.append('\n');
|
||||
}
|
||||
_smartAppend(rulesSource, '\n');
|
||||
rulesSource.append(rule);
|
||||
if (rulesSource.length() != 0 &&
|
||||
rulesSource.charAt(rulesSource.length() - 1) != ID_DELIM) {
|
||||
rulesSource.append(ID_DELIM);
|
||||
}
|
||||
_smartAppend(rulesSource, ID_DELIM);
|
||||
}
|
||||
return rulesSource.toString();
|
||||
}
|
||||
|
@ -4,8 +4,8 @@
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/TransliteratorParser.java,v $
|
||||
* $Date: 2001/11/14 19:16:41 $
|
||||
* $Revision: 1.11 $
|
||||
* $Date: 2001/11/17 06:43:17 $
|
||||
* $Revision: 1.12 $
|
||||
**********************************************************************
|
||||
*/
|
||||
package com.ibm.text;
|
||||
@ -866,7 +866,9 @@ class TransliteratorParser {
|
||||
int lengthBefore = idBlockResult.length();
|
||||
if (mode == 1) {
|
||||
mode = 2;
|
||||
idSplitPoint = lengthBefore;
|
||||
// In the forward direction parseID adds elements at the end.
|
||||
// In the reverse direction parseID adds elements at the start.
|
||||
idSplitPoint = (direction == Transliterator.REVERSE) ? 0 : lengthBefore;
|
||||
}
|
||||
int[] p = new int[] { pos };
|
||||
boolean[] sawDelim = new boolean[1];
|
||||
@ -881,6 +883,10 @@ class TransliteratorParser {
|
||||
throw new IllegalArgumentException("Invalid ::ID " +
|
||||
rule.substring(pos, i1));
|
||||
}
|
||||
if (direction == Transliterator.REVERSE && idSplitPoint >= 0) {
|
||||
// In the reverse direction parseID adds elements at the start.
|
||||
idSplitPoint += idBlockResult.length() - lengthBefore;
|
||||
}
|
||||
if (cpdFilter[0] != null) {
|
||||
if (compoundFilter != null) {
|
||||
// Multiple compound filters
|
||||
|
Loading…
Reference in New Issue
Block a user