ICU-1364 fix reverse ::ID block parsing and global filters in toRules()

X-SVN-Rev: 6971
This commit is contained in:
Alan Liu 2001-11-17 06:44:50 +00:00
parent e572ebdae1
commit 0d4f43a415
12 changed files with 294 additions and 179 deletions

View File

@ -316,6 +316,16 @@ void CompoundTransliterator::adoptTransliterators(Transliterator* adoptedTransli
setID(joinIDs(trans, count)); setID(joinIDs(trans, count));
} }
/**
* Append c to buf, unless buf is empty or buf already ends in c.
*/
static void _smartAppend(UnicodeString& buf, UChar c) {
if (buf.length() != 0 &&
buf.charAt(buf.length() - 1) != c) {
buf.append(c);
}
}
UnicodeString& CompoundTransliterator::toRules(UnicodeString& rulesSource, UnicodeString& CompoundTransliterator::toRules(UnicodeString& rulesSource,
UBool escapeUnprintable) const { UBool escapeUnprintable) const {
// We do NOT call toRules() on our component transliterators, in // We do NOT call toRules() on our component transliterators, in
@ -325,6 +335,12 @@ UnicodeString& CompoundTransliterator::toRules(UnicodeString& rulesSource,
// compoundRBTIndex >= 0. For the transliterator at compoundRBTIndex, // compoundRBTIndex >= 0. For the transliterator at compoundRBTIndex,
// we do call toRules() recursively. // we do call toRules() recursively.
rulesSource.truncate(0); rulesSource.truncate(0);
if (compoundRBTIndex >= 0 && getFilter() != NULL) {
// If we are a compound RBT and if we have a global
// filter, then emit it at the top.
UnicodeString pat;
rulesSource.append("::").append(getFilter()->toPattern(pat, escapeUnprintable)).append(ID_DELIM);
}
for (int32_t i=0; i<count; ++i) { for (int32_t i=0; i<count; ++i) {
UnicodeString rule; UnicodeString rule;
if (i == compoundRBTIndex) { if (i == compoundRBTIndex) {
@ -332,15 +348,9 @@ UnicodeString& CompoundTransliterator::toRules(UnicodeString& rulesSource,
} else { } else {
trans[i]->Transliterator::toRules(rule, escapeUnprintable); trans[i]->Transliterator::toRules(rule, escapeUnprintable);
} }
if (rulesSource.length() != 0 && _smartAppend(rulesSource, NEWLINE);
rulesSource.charAt(rulesSource.length() - 1) != NEWLINE) {
rulesSource.append(NEWLINE);
}
rulesSource.append(rule); rulesSource.append(rule);
if (rulesSource.length() != 0 && _smartAppend(rulesSource, ID_DELIM);
rulesSource.charAt(rulesSource.length() - 1) != ID_DELIM) {
rulesSource.append(ID_DELIM);
}
} }
return rulesSource; return rulesSource;
} }

View File

@ -829,7 +829,9 @@ void TransliteratorParser::parseRules(const UnicodeString& rules,
int32_t lengthBefore = idBlock.length(); int32_t lengthBefore = idBlock.length();
if (mode == 1) { if (mode == 1) {
mode = 2; mode = 2;
idSplitPoint = lengthBefore; // In the forward direction parseID adds elements at the end.
// In the reverse direction parseID adds elements at the start.
idSplitPoint = (direction == UTRANS_REVERSE) ? 0 : lengthBefore;
} }
int32_t p = pos; int32_t p = pos;
UBool sawDelim; UBool sawDelim;
@ -840,6 +842,10 @@ void TransliteratorParser::parseRules(const UnicodeString& rules,
delete cpdFilter; delete cpdFilter;
syntaxError(U_ILLEGAL_ARGUMENT_ERROR, rules, pos); syntaxError(U_ILLEGAL_ARGUMENT_ERROR, rules, pos);
} else { } else {
if (direction == UTRANS_REVERSE && idSplitPoint >= 0) {
// In the reverse direction parseID adds elements at the start.
idSplitPoint += idBlock.length() - lengthBefore;
}
if (cpdFilter != NULL) { if (cpdFilter != NULL) {
if (compoundFilter != NULL) { if (compoundFilter != NULL) {
syntaxError(U_MULTIPLE_COMPOUND_FILTERS, rules, pos); syntaxError(U_MULTIPLE_COMPOUND_FILTERS, rules, pos);

View File

@ -139,6 +139,7 @@ TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
TESTCASE(57,TestVariableRange); TESTCASE(57,TestVariableRange);
TESTCASE(58,TestInvalidPostContext); TESTCASE(58,TestInvalidPostContext);
TESTCASE(59,TestIDForms); TESTCASE(59,TestIDForms);
TESTCASE(60,TestToRulesMark);
default: name = ""; break; default: name = ""; break;
} }
} }
@ -2755,6 +2756,84 @@ void TransliteratorTest::TestIDForms() {
} }
} }
static const UChar SPACE[] = {32,0};
static const UChar NEWLINE[] = {10,0};
static const UChar RETURN[] = {13,0};
static const UChar EMPTY[] = {0};
void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2,
const UnicodeString& testRulesForward) {
UnicodeString rules2; t2.toRules(rules2, TRUE);
//rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
rules2.findAndReplace(SPACE, EMPTY);
rules2.findAndReplace(NEWLINE, EMPTY);
rules2.findAndReplace(RETURN, EMPTY);
UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY);
if (rules2 != testRules) {
errln(label);
logln((UnicodeString)"GENERATED RULES: " + rules2);
logln((UnicodeString)"SHOULD BE: " + testRulesForward);
}
}
/**
* Mark's toRules test.
*/
void TransliteratorTest::TestToRulesMark() {
const char* testRules =
"::[[:Latin:][:Mark:]];"
"::NFKD (NFC);"
"::Lower (Lower);"
"a <> \\u03B1;" // alpha
"::NFKC (NFD);"
"::Upper (Lower);"
"::Lower ();"
"::([[:Greek:][:Mark:]]);"
;
const char* testRulesForward =
"::[[:Latin:][:Mark:]];"
"::NFKD(NFC);"
"::Lower(Lower);"
"a > \\u03B1;"
"::NFKC(NFD);"
"::Upper (Lower);"
"::Lower ();"
;
const char* testRulesBackward =
"::[[:Greek:][:Mark:]];"
"::Lower (Upper);"
"::NFD(NFKC);"
"\\u03B1 > a;"
"::Lower(Lower);"
"::NFC(NFKD);"
;
UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute
UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute
UParseError pe;
UErrorCode ec = U_ZERO_ERROR;
Transliterator *t2 = Transliterator::createFromRules("source-target", testRules, UTRANS_FORWARD, pe, ec);
Transliterator *t3 = Transliterator::createFromRules("target-source", testRules, UTRANS_REVERSE, pe, ec);
if (U_FAILURE(ec)) {
delete t2;
delete t3;
errln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec));
return;
}
expect(*t2, source, target);
expect(*t3, target, source);
checkRules("Failed toRules FORWARD", *t2, testRulesForward);
checkRules("Failed toRules BACKWARD", *t3, testRulesBackward);
delete t2;
delete t3;
}
//====================================================================== //======================================================================
// icu4c ONLY // icu4c ONLY
// These tests are not mirrored (yet) in icu4j at // These tests are not mirrored (yet) in icu4j at

View File

@ -282,6 +282,11 @@ class TransliteratorTest : public IntlTest {
*/ */
void TestIDForms(); void TestIDForms();
/**
* Mark's toRules test.
*/
void TestToRulesMark();
//====================================================================== //======================================================================
// Support methods // Support methods
//====================================================================== //======================================================================
@ -313,6 +318,9 @@ class TransliteratorTest : public IntlTest {
static UnicodeString& formatInput(UnicodeString &appendTo, static UnicodeString& formatInput(UnicodeString &appendTo,
const UnicodeString& input, const UnicodeString& input,
const UTransPosition& pos); const UTransPosition& pos);
void checkRules(const UnicodeString& label, Transliterator& t2,
const UnicodeString& testRulesForward);
}; };
#endif #endif

View File

@ -25,10 +25,6 @@ public class RoundTripTest extends TestFmwk {
} }
*/ */
public void TestToRules() throws IOException, ParseException {
new Test2Rules().test(this);
}
public void TestHiragana() throws IOException, ParseException { public void TestHiragana() throws IOException, ParseException {
new Test("Latin-Hiragana") new Test("Latin-Hiragana")
.test("[a-zA-Z]", "[[:hiragana:]\u3040-\u3094]", "[\u309D\u309E]", this, new Legal()); .test("[a-zA-Z]", "[[:hiragana:]\u3040-\u3094]", "[\u309D\u309E]", this, new Legal());
@ -1168,69 +1164,4 @@ public class RoundTripTest extends TestFmwk {
// return super.isSource(c); // return super.isSource(c);
// } // }
// } // }
static class Test2Rules {
TestLog log;
void checkDiff(String label, Transliterator t2, String source, String target) {
String st = t2.transliterate(source);
if (!target.equals(st)) {
log.errln(label
+ ": " + TestUtility.hex(source)
+ " => " + TestUtility.hex(st)
+ ", instead of " + TestUtility.hex(target));
}
}
void checkRules(String label, Transliterator t2, String testRulesForward) {
String rules2 = t2.toRules(true);
//rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
rules2 = TestUtility.replace(rules2, " ", "");
rules2 = TestUtility.replace(rules2, "\n", "");
rules2 = TestUtility.replace(rules2, "\r", "");
testRulesForward = TestUtility.replace(testRulesForward, " ", "");
if (!rules2.equals(testRulesForward)) {
log.errln(label);
System.out.println();
System.out.println("GENERATED RULES:\t" + rules2);
System.out.println("SHOULD BE:\t" + testRulesForward);
}
}
public void test(TestLog log) {
this.log = log;
String testRules =
"::[[:Latin:][:Mark:]];"
+ "::NFKD (NFC);"
+ "a <> \\u03B1;" // alpha
+ "::NFKC (NFD);"
+ "::([[:Greek:][:Mark:]]);"
;
String testRulesForward =
"::[[:Latin:][:Mark:]];"
+ "::NFKD();"
+ "a > \\u03B1;"
+ "::NFKC();"
;
String testRulesBackward =
"::[[:Greek:][:Mark:]];"
+ "::NFD();"
+ "\\u03B1 > a;"
+ "::NFC();"
;
String source = "\u00E1"; // a-acute
String target = "\u03AC"; // alpha-acute
Transliterator t2 = Transliterator.createFromRules("temp1", testRules, Transliterator.FORWARD);
Transliterator t3 = Transliterator.createFromRules("temp1", testRules, Transliterator.REVERSE);
checkDiff("Failed source-target", t2, source, target);
checkDiff("Failed target-source", t3, target, source);
checkRules("Failed toRules FORWARD", t2, testRulesForward);
checkRules("Failed toRules BACKWARD", t3, testRulesBackward);
}
}
} }

View File

@ -5,8 +5,8 @@
******************************************************************************* *******************************************************************************
* *
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java,v $ * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java,v $
* $Date: 2001/11/16 21:50:40 $ * $Date: 2001/11/17 06:44:50 $
* $Revision: 1.72 $ * $Revision: 1.73 $
* *
***************************************************************************************** *****************************************************************************************
*/ */
@ -2055,6 +2055,66 @@ public class TransliteratorTest extends TestFmwk {
} }
} }
void checkRules(String label, Transliterator t2, String testRulesForward) {
String rules2 = t2.toRules(true);
//rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
rules2 = TestUtility.replace(rules2, " ", "");
rules2 = TestUtility.replace(rules2, "\n", "");
rules2 = TestUtility.replace(rules2, "\r", "");
testRulesForward = TestUtility.replace(testRulesForward, " ", "");
if (!rules2.equals(testRulesForward)) {
errln(label);
logln("GENERATED RULES: " + rules2);
logln("SHOULD BE: " + testRulesForward);
}
}
/**
* Mark's toRules test.
*/
public void TestToRulesMark() {
String testRules =
"::[[:Latin:][:Mark:]];"
+ "::NFKD (NFC);"
+ "::Lower (Lower);"
+ "a <> \\u03B1;" // alpha
+ "::NFKC (NFD);"
+ "::Upper (Lower);"
+ "::Lower ();"
+ "::([[:Greek:][:Mark:]]);"
;
String testRulesForward =
"::[[:Latin:][:Mark:]];"
+ "::NFKD(NFC);"
+ "::Lower(Lower);"
+ "a > \\u03B1;"
+ "::NFKC(NFD);"
+ "::Upper (Lower);"
+ "::Lower ();"
;
String testRulesBackward =
"::[[:Greek:][:Mark:]];"
+ "::Lower (Upper);"
+ "::NFD(NFKC);"
+ "\\u03B1 > a;"
+ "::Lower(Lower);"
+ "::NFC(NFKD);"
;
String source = "\u00E1"; // a-acute
String target = "\u03AC"; // alpha-acute
Transliterator t2 = Transliterator.createFromRules("source-target", testRules, Transliterator.FORWARD);
Transliterator t3 = Transliterator.createFromRules("target-source", testRules, Transliterator.REVERSE);
expect(t2, source, target);
expect(t3, target, source);
checkRules("Failed toRules FORWARD", t2, testRulesForward);
checkRules("Failed toRules BACKWARD", t3, testRulesBackward);
}
//====================================================================== //======================================================================
// icu4j ONLY // icu4j ONLY
// These tests are not mirrored (yet) in icu4c at // These tests are not mirrored (yet) in icu4c at

View File

@ -5,8 +5,8 @@
******************************************************************************* *******************************************************************************
* *
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/CompoundTransliterator.java,v $ * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/CompoundTransliterator.java,v $
* $Date: 2001/10/26 22:46:35 $ * $Date: 2001/11/17 06:43:17 $
* $Revision: 1.18 $ * $Revision: 1.19 $
* *
***************************************************************************************** *****************************************************************************************
*/ */
@ -35,7 +35,7 @@ import java.util.Vector;
* <p>Copyright &copy; IBM Corporation 1999. All rights reserved. * <p>Copyright &copy; IBM Corporation 1999. All rights reserved.
* *
* @author Alan Liu * @author Alan Liu
* @version $RCSfile: CompoundTransliterator.java,v $ $Revision: 1.18 $ $Date: 2001/10/26 22:46:35 $ * @version $RCSfile: CompoundTransliterator.java,v $ $Revision: 1.19 $ $Date: 2001/11/17 06:43:17 $
*/ */
public class CompoundTransliterator extends Transliterator { public class CompoundTransliterator extends Transliterator {
@ -256,6 +256,16 @@ public class CompoundTransliterator extends Transliterator {
return trans[index]; return trans[index];
} }
/**
* Append c to buf, unless buf is empty or buf already ends in c.
*/
private static void _smartAppend(StringBuffer buf, char c) {
if (buf.length() != 0 &&
buf.charAt(buf.length() - 1) != c) {
buf.append(c);
}
}
public String toRules(boolean escapeUnprintable) { public String toRules(boolean escapeUnprintable) {
// We do NOT call toRules() on our component transliterators, in // We do NOT call toRules() on our component transliterators, in
// general. If we have several rule-based transliterators, this // general. If we have several rule-based transliterators, this
@ -264,6 +274,11 @@ public class CompoundTransliterator extends Transliterator {
// compoundRBTIndex >= 0. For the transliterator at compoundRBTIndex, // compoundRBTIndex >= 0. For the transliterator at compoundRBTIndex,
// we do call toRules() recursively. // we do call toRules() recursively.
StringBuffer rulesSource = new StringBuffer(); StringBuffer rulesSource = new StringBuffer();
if (compoundRBTIndex >= 0 && getFilter() != null) {
// If we are a compound RBT and if we have a global
// filter, then emit it at the top.
rulesSource.append("::").append(getFilter().toPattern(escapeUnprintable)).append(ID_DELIM);
}
for (int i=0; i<trans.length; ++i) { for (int i=0; i<trans.length; ++i) {
String rule; String rule;
if (i == compoundRBTIndex) { if (i == compoundRBTIndex) {
@ -271,15 +286,9 @@ public class CompoundTransliterator extends Transliterator {
} else { } else {
rule = trans[i].baseToRules(escapeUnprintable); rule = trans[i].baseToRules(escapeUnprintable);
} }
if (rulesSource.length() != 0 && _smartAppend(rulesSource, '\n');
rulesSource.charAt(rulesSource.length() - 1) != '\n') {
rulesSource.append('\n');
}
rulesSource.append(rule); rulesSource.append(rule);
if (rulesSource.length() != 0 && _smartAppend(rulesSource, ID_DELIM);
rulesSource.charAt(rulesSource.length() - 1) != ID_DELIM) {
rulesSource.append(ID_DELIM);
}
} }
return rulesSource.toString(); return rulesSource.toString();
} }

View File

@ -4,8 +4,8 @@
* Corporation and others. All Rights Reserved. * Corporation and others. All Rights Reserved.
********************************************************************** **********************************************************************
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/TransliteratorParser.java,v $ * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/TransliteratorParser.java,v $
* $Date: 2001/11/14 19:16:41 $ * $Date: 2001/11/17 06:43:17 $
* $Revision: 1.11 $ * $Revision: 1.12 $
********************************************************************** **********************************************************************
*/ */
package com.ibm.text; package com.ibm.text;
@ -866,7 +866,9 @@ class TransliteratorParser {
int lengthBefore = idBlockResult.length(); int lengthBefore = idBlockResult.length();
if (mode == 1) { if (mode == 1) {
mode = 2; mode = 2;
idSplitPoint = lengthBefore; // In the forward direction parseID adds elements at the end.
// In the reverse direction parseID adds elements at the start.
idSplitPoint = (direction == Transliterator.REVERSE) ? 0 : lengthBefore;
} }
int[] p = new int[] { pos }; int[] p = new int[] { pos };
boolean[] sawDelim = new boolean[1]; boolean[] sawDelim = new boolean[1];
@ -881,6 +883,10 @@ class TransliteratorParser {
throw new IllegalArgumentException("Invalid ::ID " + throw new IllegalArgumentException("Invalid ::ID " +
rule.substring(pos, i1)); rule.substring(pos, i1));
} }
if (direction == Transliterator.REVERSE && idSplitPoint >= 0) {
// In the reverse direction parseID adds elements at the start.
idSplitPoint += idBlockResult.length() - lengthBefore;
}
if (cpdFilter[0] != null) { if (cpdFilter[0] != null) {
if (compoundFilter != null) { if (compoundFilter != null) {
// Multiple compound filters // Multiple compound filters

View File

@ -25,10 +25,6 @@ public class RoundTripTest extends TestFmwk {
} }
*/ */
public void TestToRules() throws IOException, ParseException {
new Test2Rules().test(this);
}
public void TestHiragana() throws IOException, ParseException { public void TestHiragana() throws IOException, ParseException {
new Test("Latin-Hiragana") new Test("Latin-Hiragana")
.test("[a-zA-Z]", "[[:hiragana:]\u3040-\u3094]", "[\u309D\u309E]", this, new Legal()); .test("[a-zA-Z]", "[[:hiragana:]\u3040-\u3094]", "[\u309D\u309E]", this, new Legal());
@ -1168,69 +1164,4 @@ public class RoundTripTest extends TestFmwk {
// return super.isSource(c); // return super.isSource(c);
// } // }
// } // }
static class Test2Rules {
TestLog log;
void checkDiff(String label, Transliterator t2, String source, String target) {
String st = t2.transliterate(source);
if (!target.equals(st)) {
log.errln(label
+ ": " + TestUtility.hex(source)
+ " => " + TestUtility.hex(st)
+ ", instead of " + TestUtility.hex(target));
}
}
void checkRules(String label, Transliterator t2, String testRulesForward) {
String rules2 = t2.toRules(true);
//rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
rules2 = TestUtility.replace(rules2, " ", "");
rules2 = TestUtility.replace(rules2, "\n", "");
rules2 = TestUtility.replace(rules2, "\r", "");
testRulesForward = TestUtility.replace(testRulesForward, " ", "");
if (!rules2.equals(testRulesForward)) {
log.errln(label);
System.out.println();
System.out.println("GENERATED RULES:\t" + rules2);
System.out.println("SHOULD BE:\t" + testRulesForward);
}
}
public void test(TestLog log) {
this.log = log;
String testRules =
"::[[:Latin:][:Mark:]];"
+ "::NFKD (NFC);"
+ "a <> \\u03B1;" // alpha
+ "::NFKC (NFD);"
+ "::([[:Greek:][:Mark:]]);"
;
String testRulesForward =
"::[[:Latin:][:Mark:]];"
+ "::NFKD();"
+ "a > \\u03B1;"
+ "::NFKC();"
;
String testRulesBackward =
"::[[:Greek:][:Mark:]];"
+ "::NFD();"
+ "\\u03B1 > a;"
+ "::NFC();"
;
String source = "\u00E1"; // a-acute
String target = "\u03AC"; // alpha-acute
Transliterator t2 = Transliterator.createFromRules("temp1", testRules, Transliterator.FORWARD);
Transliterator t3 = Transliterator.createFromRules("temp1", testRules, Transliterator.REVERSE);
checkDiff("Failed source-target", t2, source, target);
checkDiff("Failed target-source", t3, target, source);
checkRules("Failed toRules FORWARD", t2, testRulesForward);
checkRules("Failed toRules BACKWARD", t3, testRulesBackward);
}
}
} }

View File

@ -5,8 +5,8 @@
******************************************************************************* *******************************************************************************
* *
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/test/translit/Attic/TransliteratorTest.java,v $ * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/test/translit/Attic/TransliteratorTest.java,v $
* $Date: 2001/11/16 21:50:40 $ * $Date: 2001/11/17 06:44:50 $
* $Revision: 1.72 $ * $Revision: 1.73 $
* *
***************************************************************************************** *****************************************************************************************
*/ */
@ -2055,6 +2055,66 @@ public class TransliteratorTest extends TestFmwk {
} }
} }
void checkRules(String label, Transliterator t2, String testRulesForward) {
String rules2 = t2.toRules(true);
//rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
rules2 = TestUtility.replace(rules2, " ", "");
rules2 = TestUtility.replace(rules2, "\n", "");
rules2 = TestUtility.replace(rules2, "\r", "");
testRulesForward = TestUtility.replace(testRulesForward, " ", "");
if (!rules2.equals(testRulesForward)) {
errln(label);
logln("GENERATED RULES: " + rules2);
logln("SHOULD BE: " + testRulesForward);
}
}
/**
* Mark's toRules test.
*/
public void TestToRulesMark() {
String testRules =
"::[[:Latin:][:Mark:]];"
+ "::NFKD (NFC);"
+ "::Lower (Lower);"
+ "a <> \\u03B1;" // alpha
+ "::NFKC (NFD);"
+ "::Upper (Lower);"
+ "::Lower ();"
+ "::([[:Greek:][:Mark:]]);"
;
String testRulesForward =
"::[[:Latin:][:Mark:]];"
+ "::NFKD(NFC);"
+ "::Lower(Lower);"
+ "a > \\u03B1;"
+ "::NFKC(NFD);"
+ "::Upper (Lower);"
+ "::Lower ();"
;
String testRulesBackward =
"::[[:Greek:][:Mark:]];"
+ "::Lower (Upper);"
+ "::NFD(NFKC);"
+ "\\u03B1 > a;"
+ "::Lower(Lower);"
+ "::NFC(NFKD);"
;
String source = "\u00E1"; // a-acute
String target = "\u03AC"; // alpha-acute
Transliterator t2 = Transliterator.createFromRules("source-target", testRules, Transliterator.FORWARD);
Transliterator t3 = Transliterator.createFromRules("target-source", testRules, Transliterator.REVERSE);
expect(t2, source, target);
expect(t3, target, source);
checkRules("Failed toRules FORWARD", t2, testRulesForward);
checkRules("Failed toRules BACKWARD", t3, testRulesBackward);
}
//====================================================================== //======================================================================
// icu4j ONLY // icu4j ONLY
// These tests are not mirrored (yet) in icu4c at // These tests are not mirrored (yet) in icu4c at

View File

@ -5,8 +5,8 @@
******************************************************************************* *******************************************************************************
* *
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/CompoundTransliterator.java,v $ * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/CompoundTransliterator.java,v $
* $Date: 2001/10/26 22:46:35 $ * $Date: 2001/11/17 06:43:17 $
* $Revision: 1.18 $ * $Revision: 1.19 $
* *
***************************************************************************************** *****************************************************************************************
*/ */
@ -35,7 +35,7 @@ import java.util.Vector;
* <p>Copyright &copy; IBM Corporation 1999. All rights reserved. * <p>Copyright &copy; IBM Corporation 1999. All rights reserved.
* *
* @author Alan Liu * @author Alan Liu
* @version $RCSfile: CompoundTransliterator.java,v $ $Revision: 1.18 $ $Date: 2001/10/26 22:46:35 $ * @version $RCSfile: CompoundTransliterator.java,v $ $Revision: 1.19 $ $Date: 2001/11/17 06:43:17 $
*/ */
public class CompoundTransliterator extends Transliterator { public class CompoundTransliterator extends Transliterator {
@ -256,6 +256,16 @@ public class CompoundTransliterator extends Transliterator {
return trans[index]; return trans[index];
} }
/**
* Append c to buf, unless buf is empty or buf already ends in c.
*/
private static void _smartAppend(StringBuffer buf, char c) {
if (buf.length() != 0 &&
buf.charAt(buf.length() - 1) != c) {
buf.append(c);
}
}
public String toRules(boolean escapeUnprintable) { public String toRules(boolean escapeUnprintable) {
// We do NOT call toRules() on our component transliterators, in // We do NOT call toRules() on our component transliterators, in
// general. If we have several rule-based transliterators, this // general. If we have several rule-based transliterators, this
@ -264,6 +274,11 @@ public class CompoundTransliterator extends Transliterator {
// compoundRBTIndex >= 0. For the transliterator at compoundRBTIndex, // compoundRBTIndex >= 0. For the transliterator at compoundRBTIndex,
// we do call toRules() recursively. // we do call toRules() recursively.
StringBuffer rulesSource = new StringBuffer(); StringBuffer rulesSource = new StringBuffer();
if (compoundRBTIndex >= 0 && getFilter() != null) {
// If we are a compound RBT and if we have a global
// filter, then emit it at the top.
rulesSource.append("::").append(getFilter().toPattern(escapeUnprintable)).append(ID_DELIM);
}
for (int i=0; i<trans.length; ++i) { for (int i=0; i<trans.length; ++i) {
String rule; String rule;
if (i == compoundRBTIndex) { if (i == compoundRBTIndex) {
@ -271,15 +286,9 @@ public class CompoundTransliterator extends Transliterator {
} else { } else {
rule = trans[i].baseToRules(escapeUnprintable); rule = trans[i].baseToRules(escapeUnprintable);
} }
if (rulesSource.length() != 0 && _smartAppend(rulesSource, '\n');
rulesSource.charAt(rulesSource.length() - 1) != '\n') {
rulesSource.append('\n');
}
rulesSource.append(rule); rulesSource.append(rule);
if (rulesSource.length() != 0 && _smartAppend(rulesSource, ID_DELIM);
rulesSource.charAt(rulesSource.length() - 1) != ID_DELIM) {
rulesSource.append(ID_DELIM);
}
} }
return rulesSource.toString(); return rulesSource.toString();
} }

View File

@ -4,8 +4,8 @@
* Corporation and others. All Rights Reserved. * Corporation and others. All Rights Reserved.
********************************************************************** **********************************************************************
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/TransliteratorParser.java,v $ * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/TransliteratorParser.java,v $
* $Date: 2001/11/14 19:16:41 $ * $Date: 2001/11/17 06:43:17 $
* $Revision: 1.11 $ * $Revision: 1.12 $
********************************************************************** **********************************************************************
*/ */
package com.ibm.text; package com.ibm.text;
@ -866,7 +866,9 @@ class TransliteratorParser {
int lengthBefore = idBlockResult.length(); int lengthBefore = idBlockResult.length();
if (mode == 1) { if (mode == 1) {
mode = 2; mode = 2;
idSplitPoint = lengthBefore; // In the forward direction parseID adds elements at the end.
// In the reverse direction parseID adds elements at the start.
idSplitPoint = (direction == Transliterator.REVERSE) ? 0 : lengthBefore;
} }
int[] p = new int[] { pos }; int[] p = new int[] { pos };
boolean[] sawDelim = new boolean[1]; boolean[] sawDelim = new boolean[1];
@ -881,6 +883,10 @@ class TransliteratorParser {
throw new IllegalArgumentException("Invalid ::ID " + throw new IllegalArgumentException("Invalid ::ID " +
rule.substring(pos, i1)); rule.substring(pos, i1));
} }
if (direction == Transliterator.REVERSE && idSplitPoint >= 0) {
// In the reverse direction parseID adds elements at the start.
idSplitPoint += idBlockResult.length() - lengthBefore;
}
if (cpdFilter[0] != null) { if (cpdFilter[0] != null) {
if (compoundFilter != null) { if (compoundFilter != null) {
// Multiple compound filters // Multiple compound filters