ICU-65 allow explicit reverse ID of the form Foo-Bar(Bar-Baz)
X-SVN-Rev: 5840
This commit is contained in:
parent
d038a7a071
commit
409625bd97
@ -216,7 +216,7 @@ public class JamoTest extends TransliteratorTest {
|
||||
// "XML, Java, ECMAScript(JavaScript), LDAP, CORBA 3.0, WML " +
|
||||
"\ub4f1\uacfc " +
|
||||
"\uac19\uc774 \ud604\uc7ac \ub110\ub9ac \uc0ac\uc6a9\ub418\ub294 " +
|
||||
"\ud45c\uc900\uc5d0\uc11c \ud544\uc694\ud558\uba70 \uc774\ub294 ISO/IEC " +
|
||||
"\ud45c\uc900\uc5d0\uc11c \ud544\uc694\ud558\uba70 \uc774\ub294 " + //ISO/IEC " +
|
||||
"10646\uc744 \uad6c\ud604\ud558\ub294 \uacf5\uc2dd\uc801\uc778 " +
|
||||
"\ubc29\ubc95\uc785\ub2c8\ub2e4. \uc774\ub294 \ub9ce\uc740 \uc6b4\uc601 " +
|
||||
"\uccb4\uc81c, \uc694\uc998 \uc0ac\uc6a9\ub418\ub294 \ubaa8\ub4e0 " +
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java,v $
|
||||
* $Date: 2001/09/19 17:44:09 $
|
||||
* $Revision: 1.43 $
|
||||
* $Date: 2001/09/20 21:21:10 $
|
||||
* $Revision: 1.44 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
@ -961,6 +961,26 @@ public class TransliteratorTest extends TestFmwk {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test inverse of Greek-Latin; Title()
|
||||
*/
|
||||
public void TestCompoundInverse() {
|
||||
Transliterator t = Transliterator.getInstance
|
||||
("Greek-Latin; Title()", Transliterator.REVERSE);
|
||||
if (t == null) {
|
||||
errln("FAIL: createInstance");
|
||||
return;
|
||||
}
|
||||
String exp = "(Title);Latin-Greek";
|
||||
if (t.getID().equals(exp)) {
|
||||
logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
|
||||
t.getID());
|
||||
} else {
|
||||
errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
|
||||
t.getID() + "\", expected \"" + exp + "\"");
|
||||
}
|
||||
}
|
||||
|
||||
//======================================================================
|
||||
// Support methods
|
||||
//======================================================================
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/CompoundTransliterator.java,v $
|
||||
* $Date: 2001/03/30 23:33:06 $
|
||||
* $Revision: 1.12 $
|
||||
* $Date: 2001/09/20 21:20:39 $
|
||||
* $Revision: 1.13 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
@ -35,7 +35,7 @@ import java.util.Vector;
|
||||
* <p>Copyright © IBM Corporation 1999. All rights reserved.
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: CompoundTransliterator.java,v $ $Revision: 1.12 $ $Date: 2001/03/30 23:33:06 $
|
||||
* @version $RCSfile: CompoundTransliterator.java,v $ $Revision: 1.13 $ $Date: 2001/09/20 21:20:39 $
|
||||
*/
|
||||
public class CompoundTransliterator extends Transliterator {
|
||||
|
||||
@ -48,6 +48,14 @@ public class CompoundTransliterator extends Transliterator {
|
||||
*/
|
||||
private UnicodeFilter[] filters = null;
|
||||
|
||||
/**
|
||||
* For compound RBTs (those with an ::id block before and/or after
|
||||
* the main rule block) we record the index of the RBT here.
|
||||
* Otherwise, this should have a value of -1. We need this
|
||||
* information to implement toRules().
|
||||
*/
|
||||
private int compoundRBTIndex;
|
||||
|
||||
private static final String COPYRIGHT =
|
||||
"\u00A9 IBM Corporation 1999. All rights reserved.";
|
||||
|
||||
@ -131,6 +139,72 @@ public class CompoundTransliterator extends Transliterator {
|
||||
this(ID, FORWARD, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Package private constructor for Transliterator from a vector of
|
||||
* transliterators. The vector order is FORWARD, so if dir is
|
||||
* REVERSE then the vector order will be reversed. The caller is
|
||||
* responsible for fixing up the ID.
|
||||
*/
|
||||
CompoundTransliterator(int dir,
|
||||
Vector list) {
|
||||
super("", null);
|
||||
trans = null;
|
||||
compoundRBTIndex = -1;
|
||||
init(list, dir, false);
|
||||
// assume caller will fixup ID
|
||||
}
|
||||
|
||||
/**
|
||||
* Finish constructing a transliterator: only to be called by
|
||||
* constructors. Before calling init(), set trans and filter to NULL.
|
||||
* @param list a vector of transliterator objects to be adopted. It
|
||||
* should NOT be empty. The list should be in declared order. That
|
||||
* is, it should be in the FORWARD order; if direction is REVERSE then
|
||||
* the list order will be reversed.
|
||||
* @param direction either FORWARD or REVERSE
|
||||
* @param fixReverseID if TRUE, then reconstruct the ID of reverse
|
||||
* entries by calling getID() of component entries. Some constructors
|
||||
* do not require this because they apply a facade ID anyway.
|
||||
* @param status the error code indicating success or failure
|
||||
*/
|
||||
private void init(Vector list,
|
||||
int direction,
|
||||
boolean fixReverseID) {
|
||||
// assert(trans == 0);
|
||||
|
||||
// Allocate array
|
||||
int count = list.size();
|
||||
trans = new Transliterator[count];
|
||||
|
||||
// Move the transliterators from the vector into an array.
|
||||
// Reverse the order if necessary.
|
||||
int i;
|
||||
for (i=0; i<count; ++i) {
|
||||
int j = (direction == FORWARD) ? i : count - 1 - i;
|
||||
trans[i] = (Transliterator) list.elementAt(j);
|
||||
}
|
||||
|
||||
// Fix compoundRBTIndex for REVERSE transliterators
|
||||
if (compoundRBTIndex >= 0 && direction == REVERSE) {
|
||||
compoundRBTIndex = count - 1 - compoundRBTIndex;
|
||||
}
|
||||
|
||||
// If the direction is UTRANS_REVERSE then we may need to fix the
|
||||
// ID.
|
||||
if (direction == REVERSE && fixReverseID) {
|
||||
StringBuffer newID = new StringBuffer();
|
||||
for (i=0; i<count; ++i) {
|
||||
if (i > 0) {
|
||||
newID.append(ID_DELIM);
|
||||
}
|
||||
newID.append(trans[i].getID());
|
||||
}
|
||||
setID(newID.toString());
|
||||
}
|
||||
|
||||
computeMaximumContextLength();
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the IDs of the given list of transliterators, concatenated
|
||||
* with ';' delimiting them. Equivalent to the perlish expression
|
||||
|
@ -13,7 +13,7 @@ import java.util.*;
|
||||
|
||||
/*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: NormalizationTransliterator.java,v $ $Revision: 1.1 $ $Date: 2001/06/12 23:01:55 $
|
||||
* @version $RCSfile: NormalizationTransliterator.java,v $ $Revision: 1.2 $ $Date: 2001/09/20 21:20:39 $
|
||||
*/
|
||||
public class NormalizationTransliterator extends Transliterator {
|
||||
|
||||
@ -31,25 +31,25 @@ public class NormalizationTransliterator extends Transliterator {
|
||||
* System registration hook.
|
||||
*/
|
||||
static void register() {
|
||||
Transliterator.registerFactory("NFC", new Transliterator.Factory() {
|
||||
Transliterator.registerFactory("Any-NFC", new Transliterator.Factory() {
|
||||
public Transliterator getInstance() {
|
||||
return NormalizationTransliterator.
|
||||
getInstance(Normalizer.COMPOSE);
|
||||
}
|
||||
});
|
||||
Transliterator.registerFactory("NFD", new Transliterator.Factory() {
|
||||
Transliterator.registerFactory("Any-NFD", new Transliterator.Factory() {
|
||||
public Transliterator getInstance() {
|
||||
return NormalizationTransliterator.
|
||||
getInstance(Normalizer.DECOMP);
|
||||
}
|
||||
});
|
||||
Transliterator.registerFactory("NFKC", new Transliterator.Factory() {
|
||||
Transliterator.registerFactory("Any-NFKC", new Transliterator.Factory() {
|
||||
public Transliterator getInstance() {
|
||||
return NormalizationTransliterator.
|
||||
getInstance(Normalizer.COMPOSE_COMPAT);
|
||||
}
|
||||
});
|
||||
Transliterator.registerFactory("NFKD", new Transliterator.Factory() {
|
||||
Transliterator.registerFactory("Any-NFKD", new Transliterator.Factory() {
|
||||
public Transliterator getInstance() {
|
||||
return NormalizationTransliterator.
|
||||
getInstance(Normalizer.DECOMP_COMPAT);
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/NullTransliterator.java,v $
|
||||
* $Date: 2000/06/28 20:49:54 $
|
||||
* $Revision: 1.8 $
|
||||
* $Date: 2001/09/20 21:20:39 $
|
||||
* $Revision: 1.9 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
@ -21,9 +21,10 @@ public class NullTransliterator extends Transliterator {
|
||||
"\u00A9 IBM Corporation 2000. All rights reserved.";
|
||||
|
||||
/**
|
||||
* Package accessible ID for this transliterator.
|
||||
* Package accessible IDs for this transliterator.
|
||||
*/
|
||||
static String _ID = "Null";
|
||||
static String SHORT_ID = "Null";
|
||||
static String _ID = "Any-Null";
|
||||
|
||||
/**
|
||||
* Constructs a transliterator.
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/RemoveTransliterator.java,v $
|
||||
* $Date: 2001/04/04 18:06:53 $
|
||||
* $Revision: 1.1 $
|
||||
* $Date: 2001/09/20 21:20:39 $
|
||||
* $Revision: 1.2 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
@ -22,7 +22,7 @@ public class RemoveTransliterator extends Transliterator {
|
||||
/**
|
||||
* Package accessible ID for this transliterator.
|
||||
*/
|
||||
static String _ID = "Remove";
|
||||
static String _ID = "Any-Remove";
|
||||
|
||||
/**
|
||||
* Constructs a transliterator.
|
||||
|
@ -4,9 +4,9 @@
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/Transliterator.java,v $
|
||||
* $Date: 2001/09/19 17:43:38 $
|
||||
* $Revision: 1.38 $
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/Transliterator.java,v $
|
||||
* $Date: 2001/09/20 21:20:39 $
|
||||
* $Revision: 1.39 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
@ -241,7 +241,7 @@ import com.ibm.util.CaseInsensitiveString;
|
||||
* <p>Copyright © IBM Corporation 1999. All rights reserved.
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.38 $ $Date: 2001/09/19 17:43:38 $
|
||||
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.39 $ $Date: 2001/09/20 21:20:39 $
|
||||
*/
|
||||
public abstract class Transliterator {
|
||||
/**
|
||||
@ -262,7 +262,7 @@ public abstract class Transliterator {
|
||||
* @see RuleBasedTransliterator
|
||||
* @see CompoundTransliterator
|
||||
*/
|
||||
public static final int REVERSE = 1;
|
||||
public static final int REVERSE = 1;
|
||||
|
||||
/**
|
||||
* Position structure for incremental transliteration. This data
|
||||
@ -337,7 +337,7 @@ public abstract class Transliterator {
|
||||
*/
|
||||
private String ID;
|
||||
|
||||
/**
|
||||
/**
|
||||
* This transliterator's filter. Any character for which
|
||||
* <tt>filter.contains()</tt> returns <tt>false</tt> will not be
|
||||
* altered by this transliterator. If <tt>filter</tt> is
|
||||
@ -380,6 +380,12 @@ public abstract class Transliterator {
|
||||
|
||||
private static Hashtable displayNameCache;
|
||||
|
||||
// TODO Add documentation
|
||||
// TODO Add documentation
|
||||
// TODO Add documentation
|
||||
// TODO Add documentation
|
||||
private static TransliteratorRegistry registry;
|
||||
|
||||
/**
|
||||
* Prefix for resource bundle key for the display name for a
|
||||
* transliterator. The ID is appended to this to form the key.
|
||||
@ -412,6 +418,10 @@ public abstract class Transliterator {
|
||||
private static final String RB_LOCALE_ELEMENTS =
|
||||
"com.ibm.text.resources.LocaleElements";
|
||||
|
||||
protected static final char ID_DELIM = ';';
|
||||
|
||||
protected static final char ID_SEP = '-';
|
||||
|
||||
private static final String COPYRIGHT =
|
||||
"\u00A9 IBM Corporation 1999. All rights reserved.";
|
||||
|
||||
@ -509,7 +519,7 @@ public abstract class Transliterator {
|
||||
* pending transliterations, clients should call {@link
|
||||
* #finishTransliteration} after the last call to this
|
||||
* method has been made.
|
||||
*
|
||||
*
|
||||
* @param text the buffer holding transliterated and untransliterated text
|
||||
* @param index the start and limit of the text, the position
|
||||
* of the cursor, and the start and limit of transliteration.
|
||||
@ -771,74 +781,47 @@ public abstract class Transliterator {
|
||||
* @see #getAvailableIDs
|
||||
* @see #getID
|
||||
*/
|
||||
public static Transliterator getInstance(String ID, int direction) {
|
||||
if (ID.indexOf(';') >= 0) {
|
||||
return new CompoundTransliterator(ID, direction, null);
|
||||
}
|
||||
|
||||
// 'id' is the ID with the filter pattern removed and with
|
||||
// whitespace deleted.
|
||||
StringBuffer id = new StringBuffer(ID);
|
||||
|
||||
// Look for embedded filter pattern
|
||||
UnicodeSet filter = null;
|
||||
int setStart = ID.indexOf('[');
|
||||
int setLimit = 0;
|
||||
if (setStart >= 0) {
|
||||
ParsePosition pos = new ParsePosition(setStart);
|
||||
filter = new UnicodeSet(ID, pos, null);
|
||||
setLimit = pos.getIndex();
|
||||
id.delete(setStart, setLimit);
|
||||
}
|
||||
|
||||
// Delete whitespace
|
||||
int i;
|
||||
for (i=0; i<id.length(); ++i) {
|
||||
if (UCharacter.isWhitespace(id.charAt(i))) {
|
||||
id.deleteCharAt(i);
|
||||
--i;
|
||||
}
|
||||
}
|
||||
|
||||
// Fix the id, if necessary, by reversing it (A-B => B-A).
|
||||
// Record the position of the separator. Detect the special
|
||||
// case of Null, whose inverse is itself. Given an ID with no
|
||||
// separator "Foo", an abbreviation for "Any-Foo", consider
|
||||
// the inverse to be "Foo-Any".
|
||||
String str = id.toString();
|
||||
int sep = str.indexOf('-');
|
||||
if (str.equalsIgnoreCase(NullTransliterator._ID)) {
|
||||
sep = id.length();
|
||||
} else if (direction == REVERSE) {
|
||||
String left;
|
||||
if (sep >= 0) {
|
||||
left = id.substring(0, sep);
|
||||
id.delete(0, sep+1);
|
||||
} else {
|
||||
left = "Any";
|
||||
}
|
||||
sep = id.length();
|
||||
id.append('-').append(left);
|
||||
} else if (sep < 0) {
|
||||
sep = id.length();
|
||||
}
|
||||
|
||||
Transliterator t = internalGetInstance(id.toString());
|
||||
if (t != null) {
|
||||
if (filter != null) {
|
||||
t.setFilter(filter);
|
||||
id.insert(sep, ID.substring(setStart, setLimit));
|
||||
}
|
||||
t.ID = id.toString();
|
||||
return t;
|
||||
}
|
||||
|
||||
throw new IllegalArgumentException("Unsupported transliterator: "
|
||||
+ ID);
|
||||
public static final Transliterator getInstance(String ID, int direction) {
|
||||
return getInstance(ID, direction, -1, null);
|
||||
}
|
||||
|
||||
public static final Transliterator getInstance(String ID) {
|
||||
return getInstance(ID, FORWARD);
|
||||
return getInstance(ID, FORWARD, -1, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a transliterator given a compound ID (possibly degenerate,
|
||||
* with no ID_DELIM). If idSplitPoint >= 0 and adoptedSplitTrans !=
|
||||
* 0, then insert adoptedSplitTrans in the compound ID at offset
|
||||
* idSplitPoint. Otherwise idSplitPoint should be -1 and
|
||||
* adoptedSplitTrans should be 0. The resultant transliterator will
|
||||
* be an atomic (non-compound) transliterator if this is indicated by
|
||||
* ID. Otherwise it will be a compound translitertor.
|
||||
*/
|
||||
private static Transliterator getInstance(String ID,
|
||||
int dir,
|
||||
int idSplitPoint,
|
||||
Transliterator adoptedSplitTrans) {
|
||||
Vector list = new Vector();
|
||||
int[] ignored = new int[1];
|
||||
StringBuffer regenID = new StringBuffer();
|
||||
parseCompoundID(ID, regenID, dir, idSplitPoint, adoptedSplitTrans,
|
||||
list, ignored);
|
||||
|
||||
Transliterator t = null;
|
||||
switch (list.size()) {
|
||||
case 0:
|
||||
t = new NullTransliterator();
|
||||
break;
|
||||
case 1:
|
||||
t = (Transliterator) list.elementAt(0);
|
||||
break;
|
||||
default:
|
||||
t = new CompoundTransliterator(dir, list);
|
||||
break;
|
||||
}
|
||||
t.setID(regenID.toString());
|
||||
return t;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -854,6 +837,428 @@ public abstract class Transliterator {
|
||||
return new RuleBasedTransliterator(ID, rules, direction, null);
|
||||
}
|
||||
|
||||
public String toRules(boolean escapeUnprintable) {
|
||||
// The base class implementation of toRules munges the ID into
|
||||
// the correct format. That is: foo => ::foo
|
||||
// KEEP in sync with rbt_pars
|
||||
return "::" + getID() + ID_DELIM;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a compound ID (possibly a degenerate one, containing no
|
||||
* ID_DELIM). If idSplitPoint >= 0 and adoptedSplitTrans != 0, then
|
||||
* insert adoptedSplitTrans in the compound ID at offset idSplitPoint.
|
||||
* Otherwise idSplitPoint should be -1 and adoptedSplitTrans should be
|
||||
* 0. Return in the result vector the instantiated transliterator
|
||||
* objects (one of these will be adoptedSplitTrans, if the latter was
|
||||
* specified). These will be in order of id, so if dir is REVERSE,
|
||||
* then the caller will have to reverse the order.
|
||||
*
|
||||
* @param regenID regenerated ID, reversed if appropriate, which
|
||||
* should be applied to the final created transliterator
|
||||
* @param splitTransIndex output parameter to receive the index in
|
||||
* 'result' at which the adoptedSplitTrans is stored, or -1 if
|
||||
* adoptedSplitTrans == 0
|
||||
*/
|
||||
private static void parseCompoundID(String id,
|
||||
StringBuffer regenID,
|
||||
int dir,
|
||||
int idSplitPoint,
|
||||
Transliterator adoptedSplitTrans,
|
||||
Vector result,
|
||||
int[] splitTransIndex) {
|
||||
regenID.setLength(0);
|
||||
splitTransIndex[0] = -1;
|
||||
int pos = 0;
|
||||
int i;
|
||||
while (pos < id.length()) {
|
||||
// We compare (pos >= split), not (pos == split), so we can
|
||||
// skip over whitespace (see below).
|
||||
if (pos >= idSplitPoint && adoptedSplitTrans != null) {
|
||||
splitTransIndex[0] = result.size();
|
||||
result.addElement(adoptedSplitTrans);
|
||||
adoptedSplitTrans = null;
|
||||
}
|
||||
int[] p = new int[] { pos };
|
||||
boolean[] sawDelimiter = new boolean[1];
|
||||
Transliterator t =
|
||||
parseID(id, regenID, p, sawDelimiter, dir, true);
|
||||
|
||||
if (p[0] == pos || (p[0] < id.length() && !sawDelimiter[0])) {
|
||||
// TODO
|
||||
//throw new IllegalArgumentException("Invalid ID " + id);
|
||||
throw new IllegalArgumentException("Invalid ID " + id +
|
||||
" p[0]=" + p[0] +
|
||||
" pos=" + pos +
|
||||
" id.length()=" + id.length() +
|
||||
" sawDelimite[0]=" + sawDelimiter[0] +
|
||||
"");
|
||||
}
|
||||
pos = p[0];
|
||||
// The return value may be NULL when, for instance, creating a
|
||||
// REVERSE transliterator of ID "Latin-Greek()".
|
||||
if (t != null) {
|
||||
result.addElement(t);
|
||||
}
|
||||
}
|
||||
|
||||
// Handle case of idSplitPoint == id.length()
|
||||
if (pos >= idSplitPoint && adoptedSplitTrans != null) {
|
||||
splitTransIndex[0] = result.size();
|
||||
result.addElement(adoptedSplitTrans);
|
||||
adoptedSplitTrans = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a single ID, possibly including an inline filter, and return
|
||||
* the resultant transliterator object. NOTE: If 'create' is false,
|
||||
* then the amount of syntax checking is limited. However, the 'pos'
|
||||
* parameter will be updated correctly, assuming the input string is
|
||||
* valid.
|
||||
*
|
||||
* A trailing /;? \s* / is skipped. The parameter sawDelimiter
|
||||
* indicates whether the ';' was seen or not. Upon return, if pos is
|
||||
* advanced, it will either point to a non-whitespace character past
|
||||
* the trailing ';', if any, or be equal to length().
|
||||
*
|
||||
* @param ID the ID string
|
||||
* @param regenID regenerated ID, reversed if appropriate, which
|
||||
* should be applied to the final created transliterator. This method
|
||||
* will append to this parameter for FORWARD direction and insert
|
||||
* addition text at offset 0 for REVERSE direction. If create is
|
||||
* false then this parameter is not used.
|
||||
* @param pos INPUT-OUTPUT parameter. On input, the position of the
|
||||
* first character to parse. On output, the position after the last
|
||||
* character parsed. This will be a semicolon or ID.length(). In the
|
||||
* case of an error this value will be unchanged.
|
||||
* @param create if true, create and return the result. If false,
|
||||
* only scan the ID, and return NULL.
|
||||
* @return a newly created transliterator, or NULL. NULL is returned
|
||||
* in all cases if create is false. If create is true, then NULL is
|
||||
* returned on error, or if the ID is effectively empty.
|
||||
* E.g. "Latin-Greek()" with dir == REVERSE. Do NOT check for NULL to
|
||||
* determine if there was an error. Instead, check to see if pos
|
||||
* moved.
|
||||
*/
|
||||
private static Transliterator parseID(String ID,
|
||||
StringBuffer regenID,
|
||||
int[] pos,
|
||||
boolean[] sawDelimiter,
|
||||
int dir,
|
||||
boolean create) {
|
||||
int limit, preDelimLimit,
|
||||
revStart, revLimit=0,
|
||||
idStart, idLimit,
|
||||
setStart, setLimit;
|
||||
|
||||
UnicodeSet[] filter = new UnicodeSet[1];
|
||||
int[] indices = new int[4];
|
||||
|
||||
if (!parseIDBounds(ID, pos[0], false, indices, filter)) {
|
||||
return null;
|
||||
}
|
||||
limit = indices[0];
|
||||
setStart = indices[1];
|
||||
setLimit = indices[2];
|
||||
revStart = indices[3];
|
||||
|
||||
idStart = pos[0];
|
||||
idLimit = limit;
|
||||
|
||||
if (revStart >= 0 && revStart < limit) {
|
||||
int revSetStart, revSetLimit;
|
||||
UnicodeSet[] revFilter = new UnicodeSet[1];
|
||||
if (!parseIDBounds(ID, revStart+1, true, indices, revFilter)) {
|
||||
return null;
|
||||
}
|
||||
revLimit = indices[0];
|
||||
revSetStart = indices[1];
|
||||
revSetLimit = indices[2];
|
||||
// we ignore indices[3]
|
||||
|
||||
// revStart points to '('
|
||||
if (dir == REVERSE) {
|
||||
idStart = revStart+1;
|
||||
idLimit = revLimit;
|
||||
setStart = revSetStart;
|
||||
setLimit = revSetLimit;
|
||||
filter[0] = revFilter[0];
|
||||
} else {
|
||||
idLimit = revStart;
|
||||
}
|
||||
// assert(revLimit < ID.length() && ID.charAt(revLimit) == ')');
|
||||
limit = revLimit+1;
|
||||
} else {
|
||||
// Ignore () exprs outside of this atomic ID, that is, in
|
||||
// "Greek-Latin; Title()", ignore the "()" after Title when
|
||||
// parsing Greek-Latin.
|
||||
revStart = -1;
|
||||
}
|
||||
|
||||
// Advance limit past /\s*;?\s*/
|
||||
preDelimLimit = limit;
|
||||
limit = skipSpaces(ID, limit);
|
||||
sawDelimiter[0] = (limit < ID.length() && ID.charAt(limit) == ID_DELIM);
|
||||
if (sawDelimiter[0]) {
|
||||
limit = skipSpaces(ID, ++limit);
|
||||
}
|
||||
|
||||
if (!create) {
|
||||
// TODO Improve performance by scanning the UnicodeSet pattern
|
||||
// without actually constructing it, if create is false. That
|
||||
// is, create a method like this one for UnicodeSet.
|
||||
pos[0] = limit;
|
||||
return null;
|
||||
}
|
||||
|
||||
// 'id' is the ID with the filter pattern removed and with
|
||||
// whitespace deleted. In a Foo(Bar) ID, id is Foo for FORWARD
|
||||
// and Bar for REVERSE.
|
||||
String str;
|
||||
str = ID.substring(setLimit, idLimit);
|
||||
StringBuffer id = new StringBuffer(ID.substring(idStart, setStart));
|
||||
id.append(str);
|
||||
|
||||
// Delete whitespace
|
||||
int i;
|
||||
for (i=0; i<id.length(); ++i) {
|
||||
if (UCharacter.isWhitespace(id.charAt(i))) {
|
||||
id.deleteCharAt(i);
|
||||
--i;
|
||||
}
|
||||
}
|
||||
|
||||
// Fix the id, if necessary, by reversing it (A-B => B-A). This
|
||||
// is only done if the id is NOT of the form Foo(Bar). Record the
|
||||
// position of the separator.
|
||||
//
|
||||
// For both A-B and Foo(Bar) ids, detect the special case of Null,
|
||||
// whose inverse is itself. Given an ID with no separator "Foo",
|
||||
// an abbreviation for "Any-Foo", consider the inverse to be
|
||||
// "Foo-Any".
|
||||
int sep = id.toString().indexOf(ID_SEP);
|
||||
if (sep < 0 && id.toString().equalsIgnoreCase(NullTransliterator.SHORT_ID)) {
|
||||
// Handle "Null"
|
||||
sep = id.length();
|
||||
} else if (dir == REVERSE &&
|
||||
id.toString().equalsIgnoreCase(NullTransliterator._ID)) {
|
||||
// Reverse of "Any-Null" => "Null"
|
||||
id.delete(0, sep+1);
|
||||
sep = id.length();
|
||||
} else if (dir == REVERSE && revStart < 0) {
|
||||
if (sep >= 0) {
|
||||
str = id.substring(0, sep);
|
||||
id.delete(0, sep+1);
|
||||
} else {
|
||||
str = "Any";
|
||||
}
|
||||
sep = id.length();
|
||||
id.append(ID_SEP).append(str);
|
||||
} else if (sep < 0 && id.length() > 0) {
|
||||
// Don't do anything for empty IDs -- we handle these specially below
|
||||
str = "Any-";
|
||||
sep = str.length() - 1;
|
||||
id.insert(0, str);
|
||||
}
|
||||
|
||||
Transliterator t = null;
|
||||
|
||||
// If we have a reverse part of the ID, e.g., Foo(Bar), then we
|
||||
// need to check for an empty part, which represents a Null
|
||||
// transliterator. We return 0 (not a NullTransliterator). If we
|
||||
// are not of the form Foo(Bar) then an empty string is illegal.
|
||||
if (revStart >= 0 && id.length() == 0) {
|
||||
// Ignore any filters; filters on Null are meaningless (and we
|
||||
// can't attach them to 0 anyway)
|
||||
filter = null;
|
||||
}
|
||||
|
||||
else {
|
||||
StringBuffer s = new StringBuffer();
|
||||
|
||||
synchronized (registry) {
|
||||
t = registry.get(id.toString(), s);
|
||||
// Need to enclose this in a block to prevent deadlock when
|
||||
// instantiating aliases (below).
|
||||
}
|
||||
|
||||
if (s.length() != 0) {
|
||||
// assert(t==0);
|
||||
// Instantiate an alias
|
||||
t = getInstance(s.toString(), FORWARD);
|
||||
}
|
||||
|
||||
if (t == null) {
|
||||
// Creation failed; the ID is invalid or is an alias
|
||||
filter[0] = null;
|
||||
return null;
|
||||
}
|
||||
|
||||
// Set the filter, if any
|
||||
t.setFilter(filter[0]);
|
||||
}
|
||||
|
||||
// Set the ID. This is normally just a substring of the input
|
||||
// ID, but for reverse transliterators we need to munge A-B to
|
||||
// B-A or Foo(Bar) to Bar(Foo).
|
||||
if (dir == FORWARD) {
|
||||
id.setLength(0);
|
||||
id.append(ID.substring(pos[0], preDelimLimit));
|
||||
} else if (revStart < 0) {
|
||||
id.insert(sep, ID.substring(setStart, setLimit));
|
||||
} else {
|
||||
// Change Foo(Bar) to Bar(Foo)
|
||||
str = ID.substring(pos[0], revStart);
|
||||
str = str.trim();
|
||||
id.setLength(0);
|
||||
id.append(ID.substring(revStart+1, revLimit));
|
||||
// TODO make this more efficient
|
||||
id = new StringBuffer(id.toString().trim());
|
||||
id.append('(').append(str).append(')');
|
||||
}
|
||||
// TODO make this more efficient
|
||||
id = new StringBuffer(id.toString().trim());
|
||||
|
||||
if (t != null) {
|
||||
t.setID(id.toString());
|
||||
}
|
||||
|
||||
// Regenerate ID of a compound entity
|
||||
if (dir == FORWARD) {
|
||||
if (regenID.length() != 0) {
|
||||
regenID.append(ID_DELIM);
|
||||
}
|
||||
regenID.append(id);
|
||||
} else {
|
||||
if (regenID.length() != 0) {
|
||||
regenID.insert(0, ID_DELIM);
|
||||
}
|
||||
regenID.insert(0, id);
|
||||
}
|
||||
|
||||
// Indicate success by bumping pos past the final /;?\s*/.
|
||||
pos[0] = limit;
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal method used by parseID. Given a piece of a single ID,
|
||||
* find the boundaries of various parts. For IDs of the form
|
||||
* Foo(Bar), this method parses the Foo, then the Bar. In each piece
|
||||
* it locates any inline UnicodeSet pattern [setStart, setLimit)
|
||||
* and finds the limit (this will point to either ';' or ')' or
|
||||
* ID.length()).
|
||||
*
|
||||
* @param ID the ID to be parsed
|
||||
* @param pos the index of ID at which to start
|
||||
* @param withinParens if true, parse the Bar of Foo(Bar), stop at a
|
||||
* close paren, and do not look for an open paren. If true then a
|
||||
* close paren MUST be seen or false is returned; if false then the
|
||||
* ';' delimiter is optional.
|
||||
* @param limit set to the position of ';' or ')' (depending on
|
||||
* withinParens), or ID.length() if no delimiter was found
|
||||
* @param setStart set to the start of an inline filter pattern,
|
||||
* or pos if none
|
||||
* @param setLimit set to the limit of an inline filter pattern,
|
||||
* or pos if none
|
||||
* @param revStart if not withinParens then set to the position of the
|
||||
* first '(', which may be > limit; otherwise set to -1
|
||||
* @param filter set to a newly created UnicodeSet object for the
|
||||
* inline filter pattern, if any; OWNED BY THE CALLER
|
||||
*
|
||||
* @return true if the pattern is valid, false is there is an invalid
|
||||
* UnicodeSet pattern or if withinParens is true and no close paren is
|
||||
* seen.
|
||||
*/
|
||||
private static boolean parseIDBounds(String ID,
|
||||
int pos,
|
||||
boolean withinParens,
|
||||
int[] indices,
|
||||
UnicodeSet[] filter) {
|
||||
int limit;
|
||||
int setStart;
|
||||
int setLimit;
|
||||
int revStart;
|
||||
|
||||
char endDelimiter = withinParens ? ')' : ID_DELIM;
|
||||
limit = ID.indexOf(endDelimiter, pos);
|
||||
if (limit < 0) {
|
||||
if (withinParens) {
|
||||
//return false;
|
||||
throw new IllegalArgumentException("Missing closing parenthesis in " + ID);
|
||||
}
|
||||
limit = ID.length();
|
||||
}
|
||||
setStart = ID.indexOf('[', pos);
|
||||
revStart = withinParens ? -1 : ID.indexOf('(', pos);
|
||||
|
||||
if (setStart >= 0 && setStart < limit &&
|
||||
(revStart < 0 || setStart < revStart)) {
|
||||
ParsePosition ppos = new ParsePosition(setStart);
|
||||
// TODO Improve performance by scanning the UnicodeSet pattern
|
||||
// without actually constructing it, if create is false. That
|
||||
// is, create a method like this one for UnicodeSet.
|
||||
filter[0] = new UnicodeSet();
|
||||
filter[0].applyPattern(ID, ppos, null, true);
|
||||
setLimit = ppos.getIndex();
|
||||
if (limit < setLimit) {
|
||||
limit = ID.indexOf(endDelimiter, setLimit);
|
||||
if (limit < 0) {
|
||||
if (withinParens) {
|
||||
//return false;
|
||||
throw new IllegalArgumentException("Missing closing parenthesis in " + ID);
|
||||
}
|
||||
limit = ID.length();
|
||||
}
|
||||
}
|
||||
if (revStart >= 0 && revStart < setLimit) {
|
||||
revStart = ID.indexOf(')', setLimit);
|
||||
}
|
||||
} else {
|
||||
setStart = setLimit = pos;
|
||||
}
|
||||
indices[0] = limit;
|
||||
indices[1] = setStart;
|
||||
indices[2] = setLimit;
|
||||
indices[3] = revStart;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* If pos is the index of a space in str, then advance it over that
|
||||
* space and any immediately subsequent ones.
|
||||
*/
|
||||
private static int skipSpaces(String str,
|
||||
int pos) {
|
||||
while (pos < str.length() &&
|
||||
UCharacter.isWhitespace(str.charAt(pos))) {
|
||||
++pos;
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
static Transliterator tempGet(String id, StringBuffer aliasReturn) {
|
||||
aliasReturn.setLength(0);
|
||||
if (id.equalsIgnoreCase(NullTransliterator.SHORT_ID)) {
|
||||
id = NullTransliterator._ID;
|
||||
// Temporary hack to make this work
|
||||
}
|
||||
return internalGetInstance(id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns this transliterator's inverse. See the class
|
||||
* documentation for details. This implementation simply inverts
|
||||
@ -877,7 +1282,7 @@ public abstract class Transliterator {
|
||||
public final Transliterator getInverse() {
|
||||
return getInstance(ID, REVERSE);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a transliterator object given its ID. Unlike getInstance(),
|
||||
* this method returns null if it cannot make use of the given ID.
|
||||
@ -891,7 +1296,7 @@ public abstract class Transliterator {
|
||||
obj = internalCache.get(ciID);
|
||||
sourceCache = internalCache;
|
||||
}
|
||||
|
||||
|
||||
if (obj != null) {
|
||||
if (obj instanceof RuleBasedTransliterator.Data) {
|
||||
data = (RuleBasedTransliterator.Data) obj;
|
||||
@ -925,7 +1330,7 @@ public abstract class Transliterator {
|
||||
} catch (IllegalArgumentException e2) {
|
||||
// Can't load UTF8 file
|
||||
}
|
||||
|
||||
|
||||
if (r != null) {
|
||||
data = RuleBasedTransliterator.parse(r, dir);
|
||||
sourceCache.put(ciID, data);
|
||||
@ -950,7 +1355,7 @@ public abstract class Transliterator {
|
||||
// * Find a path through the composed transliterator graph. This
|
||||
// * will not necessarily be the only path, or the shortest path.
|
||||
// * This is a simple recursive algorithm.
|
||||
// *
|
||||
// *
|
||||
// * <p><code>composedGraph</code> is the links table.
|
||||
// * composedGraph.get(x) should return a String[] array, each of
|
||||
// * which is a node that x is connected to.
|
||||
@ -984,7 +1389,7 @@ public abstract class Transliterator {
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// path.removeElementAt(path.size() - 1);
|
||||
// path.removeElementAt(path.size() - 1);
|
||||
// return false;
|
||||
// }
|
||||
|
||||
@ -1020,7 +1425,7 @@ public abstract class Transliterator {
|
||||
/**
|
||||
* Unregisters a transliterator or class. This may be either
|
||||
* a system transliterator or a user transliterator or class.
|
||||
*
|
||||
*
|
||||
* @param ID the ID of the transliterator or class
|
||||
* @return the <code>Object</code> that was registered with
|
||||
* <code>ID</code>, or <code>null</code> if none was
|
||||
@ -1082,6 +1487,9 @@ public abstract class Transliterator {
|
||||
}
|
||||
|
||||
static {
|
||||
// TODO FINISH
|
||||
registry = new TransliteratorRegistry();
|
||||
|
||||
// The display name cache starts out empty
|
||||
displayNameCache = new Hashtable();
|
||||
|
||||
@ -1145,7 +1553,7 @@ public abstract class Transliterator {
|
||||
HangulJamoTransliterator.class, null);
|
||||
registerClass(JamoHangulTransliterator._ID,
|
||||
JamoHangulTransliterator.class, null);
|
||||
|
||||
|
||||
registerClass(HexToUnicodeTransliterator._ID,
|
||||
HexToUnicodeTransliterator.class, null);
|
||||
registerClass(UnicodeToHexTransliterator._ID,
|
||||
|
@ -216,7 +216,7 @@ public class JamoTest extends TransliteratorTest {
|
||||
// "XML, Java, ECMAScript(JavaScript), LDAP, CORBA 3.0, WML " +
|
||||
"\ub4f1\uacfc " +
|
||||
"\uac19\uc774 \ud604\uc7ac \ub110\ub9ac \uc0ac\uc6a9\ub418\ub294 " +
|
||||
"\ud45c\uc900\uc5d0\uc11c \ud544\uc694\ud558\uba70 \uc774\ub294 ISO/IEC " +
|
||||
"\ud45c\uc900\uc5d0\uc11c \ud544\uc694\ud558\uba70 \uc774\ub294 " + //ISO/IEC " +
|
||||
"10646\uc744 \uad6c\ud604\ud558\ub294 \uacf5\uc2dd\uc801\uc778 " +
|
||||
"\ubc29\ubc95\uc785\ub2c8\ub2e4. \uc774\ub294 \ub9ce\uc740 \uc6b4\uc601 " +
|
||||
"\uccb4\uc81c, \uc694\uc998 \uc0ac\uc6a9\ub418\ub294 \ubaa8\ub4e0 " +
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/test/translit/Attic/TransliteratorTest.java,v $
|
||||
* $Date: 2001/09/19 17:44:09 $
|
||||
* $Revision: 1.43 $
|
||||
* $Date: 2001/09/20 21:21:10 $
|
||||
* $Revision: 1.44 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
@ -961,6 +961,26 @@ public class TransliteratorTest extends TestFmwk {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test inverse of Greek-Latin; Title()
|
||||
*/
|
||||
public void TestCompoundInverse() {
|
||||
Transliterator t = Transliterator.getInstance
|
||||
("Greek-Latin; Title()", Transliterator.REVERSE);
|
||||
if (t == null) {
|
||||
errln("FAIL: createInstance");
|
||||
return;
|
||||
}
|
||||
String exp = "(Title);Latin-Greek";
|
||||
if (t.getID().equals(exp)) {
|
||||
logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
|
||||
t.getID());
|
||||
} else {
|
||||
errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
|
||||
t.getID() + "\", expected \"" + exp + "\"");
|
||||
}
|
||||
}
|
||||
|
||||
//======================================================================
|
||||
// Support methods
|
||||
//======================================================================
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/CompoundTransliterator.java,v $
|
||||
* $Date: 2001/03/30 23:33:06 $
|
||||
* $Revision: 1.12 $
|
||||
* $Date: 2001/09/20 21:20:39 $
|
||||
* $Revision: 1.13 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
@ -35,7 +35,7 @@ import java.util.Vector;
|
||||
* <p>Copyright © IBM Corporation 1999. All rights reserved.
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: CompoundTransliterator.java,v $ $Revision: 1.12 $ $Date: 2001/03/30 23:33:06 $
|
||||
* @version $RCSfile: CompoundTransliterator.java,v $ $Revision: 1.13 $ $Date: 2001/09/20 21:20:39 $
|
||||
*/
|
||||
public class CompoundTransliterator extends Transliterator {
|
||||
|
||||
@ -48,6 +48,14 @@ public class CompoundTransliterator extends Transliterator {
|
||||
*/
|
||||
private UnicodeFilter[] filters = null;
|
||||
|
||||
/**
|
||||
* For compound RBTs (those with an ::id block before and/or after
|
||||
* the main rule block) we record the index of the RBT here.
|
||||
* Otherwise, this should have a value of -1. We need this
|
||||
* information to implement toRules().
|
||||
*/
|
||||
private int compoundRBTIndex;
|
||||
|
||||
private static final String COPYRIGHT =
|
||||
"\u00A9 IBM Corporation 1999. All rights reserved.";
|
||||
|
||||
@ -131,6 +139,72 @@ public class CompoundTransliterator extends Transliterator {
|
||||
this(ID, FORWARD, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Package private constructor for Transliterator from a vector of
|
||||
* transliterators. The vector order is FORWARD, so if dir is
|
||||
* REVERSE then the vector order will be reversed. The caller is
|
||||
* responsible for fixing up the ID.
|
||||
*/
|
||||
CompoundTransliterator(int dir,
|
||||
Vector list) {
|
||||
super("", null);
|
||||
trans = null;
|
||||
compoundRBTIndex = -1;
|
||||
init(list, dir, false);
|
||||
// assume caller will fixup ID
|
||||
}
|
||||
|
||||
/**
|
||||
* Finish constructing a transliterator: only to be called by
|
||||
* constructors. Before calling init(), set trans and filter to NULL.
|
||||
* @param list a vector of transliterator objects to be adopted. It
|
||||
* should NOT be empty. The list should be in declared order. That
|
||||
* is, it should be in the FORWARD order; if direction is REVERSE then
|
||||
* the list order will be reversed.
|
||||
* @param direction either FORWARD or REVERSE
|
||||
* @param fixReverseID if TRUE, then reconstruct the ID of reverse
|
||||
* entries by calling getID() of component entries. Some constructors
|
||||
* do not require this because they apply a facade ID anyway.
|
||||
* @param status the error code indicating success or failure
|
||||
*/
|
||||
private void init(Vector list,
|
||||
int direction,
|
||||
boolean fixReverseID) {
|
||||
// assert(trans == 0);
|
||||
|
||||
// Allocate array
|
||||
int count = list.size();
|
||||
trans = new Transliterator[count];
|
||||
|
||||
// Move the transliterators from the vector into an array.
|
||||
// Reverse the order if necessary.
|
||||
int i;
|
||||
for (i=0; i<count; ++i) {
|
||||
int j = (direction == FORWARD) ? i : count - 1 - i;
|
||||
trans[i] = (Transliterator) list.elementAt(j);
|
||||
}
|
||||
|
||||
// Fix compoundRBTIndex for REVERSE transliterators
|
||||
if (compoundRBTIndex >= 0 && direction == REVERSE) {
|
||||
compoundRBTIndex = count - 1 - compoundRBTIndex;
|
||||
}
|
||||
|
||||
// If the direction is UTRANS_REVERSE then we may need to fix the
|
||||
// ID.
|
||||
if (direction == REVERSE && fixReverseID) {
|
||||
StringBuffer newID = new StringBuffer();
|
||||
for (i=0; i<count; ++i) {
|
||||
if (i > 0) {
|
||||
newID.append(ID_DELIM);
|
||||
}
|
||||
newID.append(trans[i].getID());
|
||||
}
|
||||
setID(newID.toString());
|
||||
}
|
||||
|
||||
computeMaximumContextLength();
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the IDs of the given list of transliterators, concatenated
|
||||
* with ';' delimiting them. Equivalent to the perlish expression
|
||||
|
@ -13,7 +13,7 @@ import java.util.*;
|
||||
|
||||
/*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: NormalizationTransliterator.java,v $ $Revision: 1.1 $ $Date: 2001/06/12 23:01:55 $
|
||||
* @version $RCSfile: NormalizationTransliterator.java,v $ $Revision: 1.2 $ $Date: 2001/09/20 21:20:39 $
|
||||
*/
|
||||
public class NormalizationTransliterator extends Transliterator {
|
||||
|
||||
@ -31,25 +31,25 @@ public class NormalizationTransliterator extends Transliterator {
|
||||
* System registration hook.
|
||||
*/
|
||||
static void register() {
|
||||
Transliterator.registerFactory("NFC", new Transliterator.Factory() {
|
||||
Transliterator.registerFactory("Any-NFC", new Transliterator.Factory() {
|
||||
public Transliterator getInstance() {
|
||||
return NormalizationTransliterator.
|
||||
getInstance(Normalizer.COMPOSE);
|
||||
}
|
||||
});
|
||||
Transliterator.registerFactory("NFD", new Transliterator.Factory() {
|
||||
Transliterator.registerFactory("Any-NFD", new Transliterator.Factory() {
|
||||
public Transliterator getInstance() {
|
||||
return NormalizationTransliterator.
|
||||
getInstance(Normalizer.DECOMP);
|
||||
}
|
||||
});
|
||||
Transliterator.registerFactory("NFKC", new Transliterator.Factory() {
|
||||
Transliterator.registerFactory("Any-NFKC", new Transliterator.Factory() {
|
||||
public Transliterator getInstance() {
|
||||
return NormalizationTransliterator.
|
||||
getInstance(Normalizer.COMPOSE_COMPAT);
|
||||
}
|
||||
});
|
||||
Transliterator.registerFactory("NFKD", new Transliterator.Factory() {
|
||||
Transliterator.registerFactory("Any-NFKD", new Transliterator.Factory() {
|
||||
public Transliterator getInstance() {
|
||||
return NormalizationTransliterator.
|
||||
getInstance(Normalizer.DECOMP_COMPAT);
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/NullTransliterator.java,v $
|
||||
* $Date: 2000/06/28 20:49:54 $
|
||||
* $Revision: 1.8 $
|
||||
* $Date: 2001/09/20 21:20:39 $
|
||||
* $Revision: 1.9 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
@ -21,9 +21,10 @@ public class NullTransliterator extends Transliterator {
|
||||
"\u00A9 IBM Corporation 2000. All rights reserved.";
|
||||
|
||||
/**
|
||||
* Package accessible ID for this transliterator.
|
||||
* Package accessible IDs for this transliterator.
|
||||
*/
|
||||
static String _ID = "Null";
|
||||
static String SHORT_ID = "Null";
|
||||
static String _ID = "Any-Null";
|
||||
|
||||
/**
|
||||
* Constructs a transliterator.
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/RemoveTransliterator.java,v $
|
||||
* $Date: 2001/04/04 18:06:53 $
|
||||
* $Revision: 1.1 $
|
||||
* $Date: 2001/09/20 21:20:39 $
|
||||
* $Revision: 1.2 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
@ -22,7 +22,7 @@ public class RemoveTransliterator extends Transliterator {
|
||||
/**
|
||||
* Package accessible ID for this transliterator.
|
||||
*/
|
||||
static String _ID = "Remove";
|
||||
static String _ID = "Any-Remove";
|
||||
|
||||
/**
|
||||
* Constructs a transliterator.
|
||||
|
@ -4,9 +4,9 @@
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/Transliterator.java,v $
|
||||
* $Date: 2001/09/19 17:43:38 $
|
||||
* $Revision: 1.38 $
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/Transliterator.java,v $
|
||||
* $Date: 2001/09/20 21:20:39 $
|
||||
* $Revision: 1.39 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
@ -241,7 +241,7 @@ import com.ibm.util.CaseInsensitiveString;
|
||||
* <p>Copyright © IBM Corporation 1999. All rights reserved.
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.38 $ $Date: 2001/09/19 17:43:38 $
|
||||
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.39 $ $Date: 2001/09/20 21:20:39 $
|
||||
*/
|
||||
public abstract class Transliterator {
|
||||
/**
|
||||
@ -262,7 +262,7 @@ public abstract class Transliterator {
|
||||
* @see RuleBasedTransliterator
|
||||
* @see CompoundTransliterator
|
||||
*/
|
||||
public static final int REVERSE = 1;
|
||||
public static final int REVERSE = 1;
|
||||
|
||||
/**
|
||||
* Position structure for incremental transliteration. This data
|
||||
@ -337,7 +337,7 @@ public abstract class Transliterator {
|
||||
*/
|
||||
private String ID;
|
||||
|
||||
/**
|
||||
/**
|
||||
* This transliterator's filter. Any character for which
|
||||
* <tt>filter.contains()</tt> returns <tt>false</tt> will not be
|
||||
* altered by this transliterator. If <tt>filter</tt> is
|
||||
@ -380,6 +380,12 @@ public abstract class Transliterator {
|
||||
|
||||
private static Hashtable displayNameCache;
|
||||
|
||||
// TODO Add documentation
|
||||
// TODO Add documentation
|
||||
// TODO Add documentation
|
||||
// TODO Add documentation
|
||||
private static TransliteratorRegistry registry;
|
||||
|
||||
/**
|
||||
* Prefix for resource bundle key for the display name for a
|
||||
* transliterator. The ID is appended to this to form the key.
|
||||
@ -412,6 +418,10 @@ public abstract class Transliterator {
|
||||
private static final String RB_LOCALE_ELEMENTS =
|
||||
"com.ibm.text.resources.LocaleElements";
|
||||
|
||||
protected static final char ID_DELIM = ';';
|
||||
|
||||
protected static final char ID_SEP = '-';
|
||||
|
||||
private static final String COPYRIGHT =
|
||||
"\u00A9 IBM Corporation 1999. All rights reserved.";
|
||||
|
||||
@ -509,7 +519,7 @@ public abstract class Transliterator {
|
||||
* pending transliterations, clients should call {@link
|
||||
* #finishTransliteration} after the last call to this
|
||||
* method has been made.
|
||||
*
|
||||
*
|
||||
* @param text the buffer holding transliterated and untransliterated text
|
||||
* @param index the start and limit of the text, the position
|
||||
* of the cursor, and the start and limit of transliteration.
|
||||
@ -771,74 +781,47 @@ public abstract class Transliterator {
|
||||
* @see #getAvailableIDs
|
||||
* @see #getID
|
||||
*/
|
||||
public static Transliterator getInstance(String ID, int direction) {
|
||||
if (ID.indexOf(';') >= 0) {
|
||||
return new CompoundTransliterator(ID, direction, null);
|
||||
}
|
||||
|
||||
// 'id' is the ID with the filter pattern removed and with
|
||||
// whitespace deleted.
|
||||
StringBuffer id = new StringBuffer(ID);
|
||||
|
||||
// Look for embedded filter pattern
|
||||
UnicodeSet filter = null;
|
||||
int setStart = ID.indexOf('[');
|
||||
int setLimit = 0;
|
||||
if (setStart >= 0) {
|
||||
ParsePosition pos = new ParsePosition(setStart);
|
||||
filter = new UnicodeSet(ID, pos, null);
|
||||
setLimit = pos.getIndex();
|
||||
id.delete(setStart, setLimit);
|
||||
}
|
||||
|
||||
// Delete whitespace
|
||||
int i;
|
||||
for (i=0; i<id.length(); ++i) {
|
||||
if (UCharacter.isWhitespace(id.charAt(i))) {
|
||||
id.deleteCharAt(i);
|
||||
--i;
|
||||
}
|
||||
}
|
||||
|
||||
// Fix the id, if necessary, by reversing it (A-B => B-A).
|
||||
// Record the position of the separator. Detect the special
|
||||
// case of Null, whose inverse is itself. Given an ID with no
|
||||
// separator "Foo", an abbreviation for "Any-Foo", consider
|
||||
// the inverse to be "Foo-Any".
|
||||
String str = id.toString();
|
||||
int sep = str.indexOf('-');
|
||||
if (str.equalsIgnoreCase(NullTransliterator._ID)) {
|
||||
sep = id.length();
|
||||
} else if (direction == REVERSE) {
|
||||
String left;
|
||||
if (sep >= 0) {
|
||||
left = id.substring(0, sep);
|
||||
id.delete(0, sep+1);
|
||||
} else {
|
||||
left = "Any";
|
||||
}
|
||||
sep = id.length();
|
||||
id.append('-').append(left);
|
||||
} else if (sep < 0) {
|
||||
sep = id.length();
|
||||
}
|
||||
|
||||
Transliterator t = internalGetInstance(id.toString());
|
||||
if (t != null) {
|
||||
if (filter != null) {
|
||||
t.setFilter(filter);
|
||||
id.insert(sep, ID.substring(setStart, setLimit));
|
||||
}
|
||||
t.ID = id.toString();
|
||||
return t;
|
||||
}
|
||||
|
||||
throw new IllegalArgumentException("Unsupported transliterator: "
|
||||
+ ID);
|
||||
public static final Transliterator getInstance(String ID, int direction) {
|
||||
return getInstance(ID, direction, -1, null);
|
||||
}
|
||||
|
||||
public static final Transliterator getInstance(String ID) {
|
||||
return getInstance(ID, FORWARD);
|
||||
return getInstance(ID, FORWARD, -1, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a transliterator given a compound ID (possibly degenerate,
|
||||
* with no ID_DELIM). If idSplitPoint >= 0 and adoptedSplitTrans !=
|
||||
* 0, then insert adoptedSplitTrans in the compound ID at offset
|
||||
* idSplitPoint. Otherwise idSplitPoint should be -1 and
|
||||
* adoptedSplitTrans should be 0. The resultant transliterator will
|
||||
* be an atomic (non-compound) transliterator if this is indicated by
|
||||
* ID. Otherwise it will be a compound translitertor.
|
||||
*/
|
||||
private static Transliterator getInstance(String ID,
|
||||
int dir,
|
||||
int idSplitPoint,
|
||||
Transliterator adoptedSplitTrans) {
|
||||
Vector list = new Vector();
|
||||
int[] ignored = new int[1];
|
||||
StringBuffer regenID = new StringBuffer();
|
||||
parseCompoundID(ID, regenID, dir, idSplitPoint, adoptedSplitTrans,
|
||||
list, ignored);
|
||||
|
||||
Transliterator t = null;
|
||||
switch (list.size()) {
|
||||
case 0:
|
||||
t = new NullTransliterator();
|
||||
break;
|
||||
case 1:
|
||||
t = (Transliterator) list.elementAt(0);
|
||||
break;
|
||||
default:
|
||||
t = new CompoundTransliterator(dir, list);
|
||||
break;
|
||||
}
|
||||
t.setID(regenID.toString());
|
||||
return t;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -854,6 +837,428 @@ public abstract class Transliterator {
|
||||
return new RuleBasedTransliterator(ID, rules, direction, null);
|
||||
}
|
||||
|
||||
public String toRules(boolean escapeUnprintable) {
|
||||
// The base class implementation of toRules munges the ID into
|
||||
// the correct format. That is: foo => ::foo
|
||||
// KEEP in sync with rbt_pars
|
||||
return "::" + getID() + ID_DELIM;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a compound ID (possibly a degenerate one, containing no
|
||||
* ID_DELIM). If idSplitPoint >= 0 and adoptedSplitTrans != 0, then
|
||||
* insert adoptedSplitTrans in the compound ID at offset idSplitPoint.
|
||||
* Otherwise idSplitPoint should be -1 and adoptedSplitTrans should be
|
||||
* 0. Return in the result vector the instantiated transliterator
|
||||
* objects (one of these will be adoptedSplitTrans, if the latter was
|
||||
* specified). These will be in order of id, so if dir is REVERSE,
|
||||
* then the caller will have to reverse the order.
|
||||
*
|
||||
* @param regenID regenerated ID, reversed if appropriate, which
|
||||
* should be applied to the final created transliterator
|
||||
* @param splitTransIndex output parameter to receive the index in
|
||||
* 'result' at which the adoptedSplitTrans is stored, or -1 if
|
||||
* adoptedSplitTrans == 0
|
||||
*/
|
||||
private static void parseCompoundID(String id,
|
||||
StringBuffer regenID,
|
||||
int dir,
|
||||
int idSplitPoint,
|
||||
Transliterator adoptedSplitTrans,
|
||||
Vector result,
|
||||
int[] splitTransIndex) {
|
||||
regenID.setLength(0);
|
||||
splitTransIndex[0] = -1;
|
||||
int pos = 0;
|
||||
int i;
|
||||
while (pos < id.length()) {
|
||||
// We compare (pos >= split), not (pos == split), so we can
|
||||
// skip over whitespace (see below).
|
||||
if (pos >= idSplitPoint && adoptedSplitTrans != null) {
|
||||
splitTransIndex[0] = result.size();
|
||||
result.addElement(adoptedSplitTrans);
|
||||
adoptedSplitTrans = null;
|
||||
}
|
||||
int[] p = new int[] { pos };
|
||||
boolean[] sawDelimiter = new boolean[1];
|
||||
Transliterator t =
|
||||
parseID(id, regenID, p, sawDelimiter, dir, true);
|
||||
|
||||
if (p[0] == pos || (p[0] < id.length() && !sawDelimiter[0])) {
|
||||
// TODO
|
||||
//throw new IllegalArgumentException("Invalid ID " + id);
|
||||
throw new IllegalArgumentException("Invalid ID " + id +
|
||||
" p[0]=" + p[0] +
|
||||
" pos=" + pos +
|
||||
" id.length()=" + id.length() +
|
||||
" sawDelimite[0]=" + sawDelimiter[0] +
|
||||
"");
|
||||
}
|
||||
pos = p[0];
|
||||
// The return value may be NULL when, for instance, creating a
|
||||
// REVERSE transliterator of ID "Latin-Greek()".
|
||||
if (t != null) {
|
||||
result.addElement(t);
|
||||
}
|
||||
}
|
||||
|
||||
// Handle case of idSplitPoint == id.length()
|
||||
if (pos >= idSplitPoint && adoptedSplitTrans != null) {
|
||||
splitTransIndex[0] = result.size();
|
||||
result.addElement(adoptedSplitTrans);
|
||||
adoptedSplitTrans = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a single ID, possibly including an inline filter, and return
|
||||
* the resultant transliterator object. NOTE: If 'create' is false,
|
||||
* then the amount of syntax checking is limited. However, the 'pos'
|
||||
* parameter will be updated correctly, assuming the input string is
|
||||
* valid.
|
||||
*
|
||||
* A trailing /;? \s* / is skipped. The parameter sawDelimiter
|
||||
* indicates whether the ';' was seen or not. Upon return, if pos is
|
||||
* advanced, it will either point to a non-whitespace character past
|
||||
* the trailing ';', if any, or be equal to length().
|
||||
*
|
||||
* @param ID the ID string
|
||||
* @param regenID regenerated ID, reversed if appropriate, which
|
||||
* should be applied to the final created transliterator. This method
|
||||
* will append to this parameter for FORWARD direction and insert
|
||||
* addition text at offset 0 for REVERSE direction. If create is
|
||||
* false then this parameter is not used.
|
||||
* @param pos INPUT-OUTPUT parameter. On input, the position of the
|
||||
* first character to parse. On output, the position after the last
|
||||
* character parsed. This will be a semicolon or ID.length(). In the
|
||||
* case of an error this value will be unchanged.
|
||||
* @param create if true, create and return the result. If false,
|
||||
* only scan the ID, and return NULL.
|
||||
* @return a newly created transliterator, or NULL. NULL is returned
|
||||
* in all cases if create is false. If create is true, then NULL is
|
||||
* returned on error, or if the ID is effectively empty.
|
||||
* E.g. "Latin-Greek()" with dir == REVERSE. Do NOT check for NULL to
|
||||
* determine if there was an error. Instead, check to see if pos
|
||||
* moved.
|
||||
*/
|
||||
private static Transliterator parseID(String ID,
|
||||
StringBuffer regenID,
|
||||
int[] pos,
|
||||
boolean[] sawDelimiter,
|
||||
int dir,
|
||||
boolean create) {
|
||||
int limit, preDelimLimit,
|
||||
revStart, revLimit=0,
|
||||
idStart, idLimit,
|
||||
setStart, setLimit;
|
||||
|
||||
UnicodeSet[] filter = new UnicodeSet[1];
|
||||
int[] indices = new int[4];
|
||||
|
||||
if (!parseIDBounds(ID, pos[0], false, indices, filter)) {
|
||||
return null;
|
||||
}
|
||||
limit = indices[0];
|
||||
setStart = indices[1];
|
||||
setLimit = indices[2];
|
||||
revStart = indices[3];
|
||||
|
||||
idStart = pos[0];
|
||||
idLimit = limit;
|
||||
|
||||
if (revStart >= 0 && revStart < limit) {
|
||||
int revSetStart, revSetLimit;
|
||||
UnicodeSet[] revFilter = new UnicodeSet[1];
|
||||
if (!parseIDBounds(ID, revStart+1, true, indices, revFilter)) {
|
||||
return null;
|
||||
}
|
||||
revLimit = indices[0];
|
||||
revSetStart = indices[1];
|
||||
revSetLimit = indices[2];
|
||||
// we ignore indices[3]
|
||||
|
||||
// revStart points to '('
|
||||
if (dir == REVERSE) {
|
||||
idStart = revStart+1;
|
||||
idLimit = revLimit;
|
||||
setStart = revSetStart;
|
||||
setLimit = revSetLimit;
|
||||
filter[0] = revFilter[0];
|
||||
} else {
|
||||
idLimit = revStart;
|
||||
}
|
||||
// assert(revLimit < ID.length() && ID.charAt(revLimit) == ')');
|
||||
limit = revLimit+1;
|
||||
} else {
|
||||
// Ignore () exprs outside of this atomic ID, that is, in
|
||||
// "Greek-Latin; Title()", ignore the "()" after Title when
|
||||
// parsing Greek-Latin.
|
||||
revStart = -1;
|
||||
}
|
||||
|
||||
// Advance limit past /\s*;?\s*/
|
||||
preDelimLimit = limit;
|
||||
limit = skipSpaces(ID, limit);
|
||||
sawDelimiter[0] = (limit < ID.length() && ID.charAt(limit) == ID_DELIM);
|
||||
if (sawDelimiter[0]) {
|
||||
limit = skipSpaces(ID, ++limit);
|
||||
}
|
||||
|
||||
if (!create) {
|
||||
// TODO Improve performance by scanning the UnicodeSet pattern
|
||||
// without actually constructing it, if create is false. That
|
||||
// is, create a method like this one for UnicodeSet.
|
||||
pos[0] = limit;
|
||||
return null;
|
||||
}
|
||||
|
||||
// 'id' is the ID with the filter pattern removed and with
|
||||
// whitespace deleted. In a Foo(Bar) ID, id is Foo for FORWARD
|
||||
// and Bar for REVERSE.
|
||||
String str;
|
||||
str = ID.substring(setLimit, idLimit);
|
||||
StringBuffer id = new StringBuffer(ID.substring(idStart, setStart));
|
||||
id.append(str);
|
||||
|
||||
// Delete whitespace
|
||||
int i;
|
||||
for (i=0; i<id.length(); ++i) {
|
||||
if (UCharacter.isWhitespace(id.charAt(i))) {
|
||||
id.deleteCharAt(i);
|
||||
--i;
|
||||
}
|
||||
}
|
||||
|
||||
// Fix the id, if necessary, by reversing it (A-B => B-A). This
|
||||
// is only done if the id is NOT of the form Foo(Bar). Record the
|
||||
// position of the separator.
|
||||
//
|
||||
// For both A-B and Foo(Bar) ids, detect the special case of Null,
|
||||
// whose inverse is itself. Given an ID with no separator "Foo",
|
||||
// an abbreviation for "Any-Foo", consider the inverse to be
|
||||
// "Foo-Any".
|
||||
int sep = id.toString().indexOf(ID_SEP);
|
||||
if (sep < 0 && id.toString().equalsIgnoreCase(NullTransliterator.SHORT_ID)) {
|
||||
// Handle "Null"
|
||||
sep = id.length();
|
||||
} else if (dir == REVERSE &&
|
||||
id.toString().equalsIgnoreCase(NullTransliterator._ID)) {
|
||||
// Reverse of "Any-Null" => "Null"
|
||||
id.delete(0, sep+1);
|
||||
sep = id.length();
|
||||
} else if (dir == REVERSE && revStart < 0) {
|
||||
if (sep >= 0) {
|
||||
str = id.substring(0, sep);
|
||||
id.delete(0, sep+1);
|
||||
} else {
|
||||
str = "Any";
|
||||
}
|
||||
sep = id.length();
|
||||
id.append(ID_SEP).append(str);
|
||||
} else if (sep < 0 && id.length() > 0) {
|
||||
// Don't do anything for empty IDs -- we handle these specially below
|
||||
str = "Any-";
|
||||
sep = str.length() - 1;
|
||||
id.insert(0, str);
|
||||
}
|
||||
|
||||
Transliterator t = null;
|
||||
|
||||
// If we have a reverse part of the ID, e.g., Foo(Bar), then we
|
||||
// need to check for an empty part, which represents a Null
|
||||
// transliterator. We return 0 (not a NullTransliterator). If we
|
||||
// are not of the form Foo(Bar) then an empty string is illegal.
|
||||
if (revStart >= 0 && id.length() == 0) {
|
||||
// Ignore any filters; filters on Null are meaningless (and we
|
||||
// can't attach them to 0 anyway)
|
||||
filter = null;
|
||||
}
|
||||
|
||||
else {
|
||||
StringBuffer s = new StringBuffer();
|
||||
|
||||
synchronized (registry) {
|
||||
t = registry.get(id.toString(), s);
|
||||
// Need to enclose this in a block to prevent deadlock when
|
||||
// instantiating aliases (below).
|
||||
}
|
||||
|
||||
if (s.length() != 0) {
|
||||
// assert(t==0);
|
||||
// Instantiate an alias
|
||||
t = getInstance(s.toString(), FORWARD);
|
||||
}
|
||||
|
||||
if (t == null) {
|
||||
// Creation failed; the ID is invalid or is an alias
|
||||
filter[0] = null;
|
||||
return null;
|
||||
}
|
||||
|
||||
// Set the filter, if any
|
||||
t.setFilter(filter[0]);
|
||||
}
|
||||
|
||||
// Set the ID. This is normally just a substring of the input
|
||||
// ID, but for reverse transliterators we need to munge A-B to
|
||||
// B-A or Foo(Bar) to Bar(Foo).
|
||||
if (dir == FORWARD) {
|
||||
id.setLength(0);
|
||||
id.append(ID.substring(pos[0], preDelimLimit));
|
||||
} else if (revStart < 0) {
|
||||
id.insert(sep, ID.substring(setStart, setLimit));
|
||||
} else {
|
||||
// Change Foo(Bar) to Bar(Foo)
|
||||
str = ID.substring(pos[0], revStart);
|
||||
str = str.trim();
|
||||
id.setLength(0);
|
||||
id.append(ID.substring(revStart+1, revLimit));
|
||||
// TODO make this more efficient
|
||||
id = new StringBuffer(id.toString().trim());
|
||||
id.append('(').append(str).append(')');
|
||||
}
|
||||
// TODO make this more efficient
|
||||
id = new StringBuffer(id.toString().trim());
|
||||
|
||||
if (t != null) {
|
||||
t.setID(id.toString());
|
||||
}
|
||||
|
||||
// Regenerate ID of a compound entity
|
||||
if (dir == FORWARD) {
|
||||
if (regenID.length() != 0) {
|
||||
regenID.append(ID_DELIM);
|
||||
}
|
||||
regenID.append(id);
|
||||
} else {
|
||||
if (regenID.length() != 0) {
|
||||
regenID.insert(0, ID_DELIM);
|
||||
}
|
||||
regenID.insert(0, id);
|
||||
}
|
||||
|
||||
// Indicate success by bumping pos past the final /;?\s*/.
|
||||
pos[0] = limit;
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal method used by parseID. Given a piece of a single ID,
|
||||
* find the boundaries of various parts. For IDs of the form
|
||||
* Foo(Bar), this method parses the Foo, then the Bar. In each piece
|
||||
* it locates any inline UnicodeSet pattern [setStart, setLimit)
|
||||
* and finds the limit (this will point to either ';' or ')' or
|
||||
* ID.length()).
|
||||
*
|
||||
* @param ID the ID to be parsed
|
||||
* @param pos the index of ID at which to start
|
||||
* @param withinParens if true, parse the Bar of Foo(Bar), stop at a
|
||||
* close paren, and do not look for an open paren. If true then a
|
||||
* close paren MUST be seen or false is returned; if false then the
|
||||
* ';' delimiter is optional.
|
||||
* @param limit set to the position of ';' or ')' (depending on
|
||||
* withinParens), or ID.length() if no delimiter was found
|
||||
* @param setStart set to the start of an inline filter pattern,
|
||||
* or pos if none
|
||||
* @param setLimit set to the limit of an inline filter pattern,
|
||||
* or pos if none
|
||||
* @param revStart if not withinParens then set to the position of the
|
||||
* first '(', which may be > limit; otherwise set to -1
|
||||
* @param filter set to a newly created UnicodeSet object for the
|
||||
* inline filter pattern, if any; OWNED BY THE CALLER
|
||||
*
|
||||
* @return true if the pattern is valid, false is there is an invalid
|
||||
* UnicodeSet pattern or if withinParens is true and no close paren is
|
||||
* seen.
|
||||
*/
|
||||
private static boolean parseIDBounds(String ID,
|
||||
int pos,
|
||||
boolean withinParens,
|
||||
int[] indices,
|
||||
UnicodeSet[] filter) {
|
||||
int limit;
|
||||
int setStart;
|
||||
int setLimit;
|
||||
int revStart;
|
||||
|
||||
char endDelimiter = withinParens ? ')' : ID_DELIM;
|
||||
limit = ID.indexOf(endDelimiter, pos);
|
||||
if (limit < 0) {
|
||||
if (withinParens) {
|
||||
//return false;
|
||||
throw new IllegalArgumentException("Missing closing parenthesis in " + ID);
|
||||
}
|
||||
limit = ID.length();
|
||||
}
|
||||
setStart = ID.indexOf('[', pos);
|
||||
revStart = withinParens ? -1 : ID.indexOf('(', pos);
|
||||
|
||||
if (setStart >= 0 && setStart < limit &&
|
||||
(revStart < 0 || setStart < revStart)) {
|
||||
ParsePosition ppos = new ParsePosition(setStart);
|
||||
// TODO Improve performance by scanning the UnicodeSet pattern
|
||||
// without actually constructing it, if create is false. That
|
||||
// is, create a method like this one for UnicodeSet.
|
||||
filter[0] = new UnicodeSet();
|
||||
filter[0].applyPattern(ID, ppos, null, true);
|
||||
setLimit = ppos.getIndex();
|
||||
if (limit < setLimit) {
|
||||
limit = ID.indexOf(endDelimiter, setLimit);
|
||||
if (limit < 0) {
|
||||
if (withinParens) {
|
||||
//return false;
|
||||
throw new IllegalArgumentException("Missing closing parenthesis in " + ID);
|
||||
}
|
||||
limit = ID.length();
|
||||
}
|
||||
}
|
||||
if (revStart >= 0 && revStart < setLimit) {
|
||||
revStart = ID.indexOf(')', setLimit);
|
||||
}
|
||||
} else {
|
||||
setStart = setLimit = pos;
|
||||
}
|
||||
indices[0] = limit;
|
||||
indices[1] = setStart;
|
||||
indices[2] = setLimit;
|
||||
indices[3] = revStart;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* If pos is the index of a space in str, then advance it over that
|
||||
* space and any immediately subsequent ones.
|
||||
*/
|
||||
private static int skipSpaces(String str,
|
||||
int pos) {
|
||||
while (pos < str.length() &&
|
||||
UCharacter.isWhitespace(str.charAt(pos))) {
|
||||
++pos;
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
static Transliterator tempGet(String id, StringBuffer aliasReturn) {
|
||||
aliasReturn.setLength(0);
|
||||
if (id.equalsIgnoreCase(NullTransliterator.SHORT_ID)) {
|
||||
id = NullTransliterator._ID;
|
||||
// Temporary hack to make this work
|
||||
}
|
||||
return internalGetInstance(id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns this transliterator's inverse. See the class
|
||||
* documentation for details. This implementation simply inverts
|
||||
@ -877,7 +1282,7 @@ public abstract class Transliterator {
|
||||
public final Transliterator getInverse() {
|
||||
return getInstance(ID, REVERSE);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a transliterator object given its ID. Unlike getInstance(),
|
||||
* this method returns null if it cannot make use of the given ID.
|
||||
@ -891,7 +1296,7 @@ public abstract class Transliterator {
|
||||
obj = internalCache.get(ciID);
|
||||
sourceCache = internalCache;
|
||||
}
|
||||
|
||||
|
||||
if (obj != null) {
|
||||
if (obj instanceof RuleBasedTransliterator.Data) {
|
||||
data = (RuleBasedTransliterator.Data) obj;
|
||||
@ -925,7 +1330,7 @@ public abstract class Transliterator {
|
||||
} catch (IllegalArgumentException e2) {
|
||||
// Can't load UTF8 file
|
||||
}
|
||||
|
||||
|
||||
if (r != null) {
|
||||
data = RuleBasedTransliterator.parse(r, dir);
|
||||
sourceCache.put(ciID, data);
|
||||
@ -950,7 +1355,7 @@ public abstract class Transliterator {
|
||||
// * Find a path through the composed transliterator graph. This
|
||||
// * will not necessarily be the only path, or the shortest path.
|
||||
// * This is a simple recursive algorithm.
|
||||
// *
|
||||
// *
|
||||
// * <p><code>composedGraph</code> is the links table.
|
||||
// * composedGraph.get(x) should return a String[] array, each of
|
||||
// * which is a node that x is connected to.
|
||||
@ -984,7 +1389,7 @@ public abstract class Transliterator {
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// path.removeElementAt(path.size() - 1);
|
||||
// path.removeElementAt(path.size() - 1);
|
||||
// return false;
|
||||
// }
|
||||
|
||||
@ -1020,7 +1425,7 @@ public abstract class Transliterator {
|
||||
/**
|
||||
* Unregisters a transliterator or class. This may be either
|
||||
* a system transliterator or a user transliterator or class.
|
||||
*
|
||||
*
|
||||
* @param ID the ID of the transliterator or class
|
||||
* @return the <code>Object</code> that was registered with
|
||||
* <code>ID</code>, or <code>null</code> if none was
|
||||
@ -1082,6 +1487,9 @@ public abstract class Transliterator {
|
||||
}
|
||||
|
||||
static {
|
||||
// TODO FINISH
|
||||
registry = new TransliteratorRegistry();
|
||||
|
||||
// The display name cache starts out empty
|
||||
displayNameCache = new Hashtable();
|
||||
|
||||
@ -1145,7 +1553,7 @@ public abstract class Transliterator {
|
||||
HangulJamoTransliterator.class, null);
|
||||
registerClass(JamoHangulTransliterator._ID,
|
||||
JamoHangulTransliterator.class, null);
|
||||
|
||||
|
||||
registerClass(HexToUnicodeTransliterator._ID,
|
||||
HexToUnicodeTransliterator.class, null);
|
||||
registerClass(UnicodeToHexTransliterator._ID,
|
||||
|
Loading…
Reference in New Issue
Block a user