ICU-1825 allow filter in &func; fix canonicalization of &func

X-SVN-Rev: 8479
This commit is contained in:
Alan Liu 2002-04-17 16:48:28 +00:00
parent 8ed4fea84a
commit ab07165e4c
3 changed files with 92 additions and 30 deletions

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java,v $
* $Date: 2002/04/01 22:15:23 $
* $Revision: 1.103 $
* $Date: 2002/04/17 16:48:28 $
* $Revision: 1.104 $
*
*****************************************************************************************
*/
@ -15,6 +15,7 @@ import com.ibm.icu.lang.*;
import com.ibm.icu.text.*;
import com.ibm.icu.dev.test.*;
import com.ibm.icu.impl.Utility;
import com.ibm.icu.util.CaseInsensitiveString;
import java.text.*;
import java.util.*;
@ -2531,6 +2532,71 @@ public class TransliteratorTest extends TestFmwk {
"x x x qtp qstp qrstp");
}
/**
* Test that user-registered transliterators can be used under function
* syntax.
*/
public void TestUserFunction() {
Transliterator t;
// There's no need to register inverses if we don't use them
TestUserFunctionFactory.add("Any-gif",
Transliterator.createFromRules("gif",
"'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';",
Transliterator.FORWARD));
//TestUserFunctionFactory.add("gif-Any", Transliterator.getInstance("Any-Null"));
TestUserFunctionFactory.add("Any-RemoveCurly",
Transliterator.createFromRules("RemoveCurly", "[\\{\\}] > ;", Transliterator.FORWARD));
//TestUserFunctionFactory.add("RemoveCurly-Any", Transliterator.getInstance("Any-Null"));
logln("Trying &hex");
t = Transliterator.createFromRules("hex2", "(.) > &hex($1);", Transliterator.FORWARD);
logln("Registering");
TestUserFunctionFactory.add("Any-hex2", t);
t = Transliterator.getInstance("Any-hex2");
expect(t, "abc", "\\u0061\\u0062\\u0063");
logln("Trying &gif");
t = Transliterator.createFromRules("gif2", "(.) > &Gif(&Hex2($1));", Transliterator.FORWARD);
logln("Registering");
TestUserFunctionFactory.add("Any-gif2", t);
t = Transliterator.getInstance("Any-gif2");
expect(t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">" +
"<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
// Test that filters are allowed after &
t = Transliterator.createFromRules("test",
"(.) > &Hex($1) ' ' &[\\{\\}]Remove(&Name($1)) ' ';", Transliterator.FORWARD);
expect(t, "abc", "\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C ");
// Unregister our test stuff
TestUserFunctionFactory.unregister();
}
static class TestUserFunctionFactory implements Transliterator.Factory {
static TestUserFunctionFactory singleton = new TestUserFunctionFactory();
static HashMap m = new HashMap();
static void add(String ID, Transliterator t) {
m.put(new CaseInsensitiveString(ID), t);
Transliterator.registerFactory(ID, singleton);
}
public Transliterator getInstance(String ID) {
return (Transliterator) m.get(new CaseInsensitiveString(ID));
}
static void unregister() {
Iterator ids = m.keySet().iterator();
while (ids.hasNext()) {
CaseInsensitiveString id = (CaseInsensitiveString) ids.next();
Transliterator.unregister(id.getString());
ids.remove(); // removes pair from m
}
}
}
//======================================================================
// Support methods
//======================================================================

View File

@ -131,34 +131,27 @@ class TransliteratorIDParser {
}
/**
* Parse a basic ID from the given string. A basic ID contains
* only a single source, target, and variant. It does not contain
* a filter or an explicit inverse.
* Parse a filter ID, that is, an ID of the general form
* "[f1] s1-t1/v1", with the filters optional, and the variants optional.
* @param id the id to be parsed
* @param pos INPUT-OUTPUT parameter. On input, the position of
* the first character to parse. On output, the position after
* the last character parsed. If the parse fails pos[0] will be
* unchanged.
* @return the parsed ID in canonical format, or null on parse
* failure. If the parsed ID did not contain a source, the return
* ID will not.
* the last character parsed.
* @return a SingleID object or null if the parse fails
*/
public static String parseBasicID(String id, int[] pos) {
Specs specs = parseFilterID(id, pos, false);
if (specs != null) {
StringBuffer buf = new StringBuffer();
if (specs.sawSource) {
buf.append(specs.source);
buf.append(TARGET_SEP);
}
buf.append(specs.target);
if (specs.variant != null) {
buf.append(VARIANT_SEP);
buf.append(specs.variant);
}
return buf.toString();
public static SingleID parseFilterID(String id, int[] pos) {
int start = pos[0];
Specs specs = parseFilterID(id, pos, true);
if (specs == null) {
pos[0] = start;
return null;
}
return null;
// Assemble return results
SingleID single = specsToID(specs, FORWARD);
single.filter = specs.filter;
return single;
}
/**

View File

@ -4,8 +4,8 @@
* Corporation and others. All Rights Reserved.
**********************************************************************
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/TransliteratorParser.java,v $
* $Date: 2002/03/06 00:37:52 $
* $Revision: 1.20 $
* $Date: 2002/04/17 16:46:11 $
* $Revision: 1.21 $
**********************************************************************
*/
package com.ibm.icu.text;
@ -573,14 +573,17 @@ class TransliteratorParser {
case FUNCTION:
{
iref[0] = pos;
String id = TransliteratorIDParser.parseBasicID(rule, iref);
TransliteratorIDParser.SingleID single = TransliteratorIDParser.parseFilterID(rule, iref);
// The next character MUST be a segment open
if (id == null ||
if (single == null ||
!Utility.parseChar(rule, iref, SEGMENT_OPEN)) {
syntaxError("Invalid function", rule, start);
}
Transliterator t = Transliterator.getBasicInstance(id, id);
Transliterator t = single.getInstance();
if (t == null) {
syntaxError("Invalid function ID", rule, start);
}
// bufSegStart is the offset in buf to the first
// character of the segment we are parsing.