Add Jamo-Hangul, Hangul-Jamo, fix rules, add compound ID support
X-SVN-Rev: 622
This commit is contained in:
parent
4c3ef3e790
commit
16730bea73
@ -23,7 +23,7 @@ import java.util.Vector;
|
||||
* <p>Copyright © IBM Corporation 1999. All rights reserved.
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: CompoundTransliterator.java,v $ $Revision: 1.1 $ $Date: 1999/12/20 18:29:21 $
|
||||
* @version $RCSfile: CompoundTransliterator.java,v $ $Revision: 1.2 $ $Date: 2000/01/18 02:30:49 $
|
||||
*/
|
||||
public class CompoundTransliterator extends Transliterator {
|
||||
|
||||
@ -46,9 +46,9 @@ public class CompoundTransliterator extends Transliterator {
|
||||
* altered by this transliterator. If <tt>filter</tt> is
|
||||
* <tt>null</tt> then no filtering is applied.
|
||||
*/
|
||||
public CompoundTransliterator(String ID, Transliterator[] transliterators,
|
||||
public CompoundTransliterator(Transliterator[] transliterators,
|
||||
UnicodeFilter filter) {
|
||||
super(ID, filter);
|
||||
super(joinIDs(transliterators), filter);
|
||||
trans = new Transliterator[transliterators.length];
|
||||
System.arraycopy(transliterators, 0, trans, 0, trans.length);
|
||||
}
|
||||
@ -61,9 +61,81 @@ public class CompoundTransliterator extends Transliterator {
|
||||
* @param transliterators array of <code>Transliterator</code>
|
||||
* objects
|
||||
*/
|
||||
public CompoundTransliterator(String ID, Transliterator[] transliterators) {
|
||||
this(ID, transliterators, null);
|
||||
public CompoundTransliterator(Transliterator[] transliterators) {
|
||||
this(transliterators, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Splits an ID of the form "ID;ID;..." into a compound using each
|
||||
* of the IDs.
|
||||
* @param ID of above form
|
||||
* @param forward if false, does the list in reverse order, and
|
||||
* takes the inverse of each ID.
|
||||
*/
|
||||
public CompoundTransliterator(String ID, int direction,
|
||||
UnicodeFilter filter) {
|
||||
// changed MED
|
||||
// Later, add "rule1[filter];rule2...
|
||||
super(ID, filter);
|
||||
String[] list = split(ID, ';');
|
||||
trans = new Transliterator[list.length];
|
||||
for (int i = 0; i < list.length; ++i) {
|
||||
trans[i] = getInstance(list[direction==FORWARD ? i : (list.length-1-i)],
|
||||
direction);
|
||||
}
|
||||
}
|
||||
|
||||
public CompoundTransliterator(String ID, int direction) {
|
||||
this(ID, direction, null);
|
||||
}
|
||||
|
||||
public CompoundTransliterator(String ID) {
|
||||
this(ID, FORWARD, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the IDs of the given list of transliterators, concatenated
|
||||
* with ';' delimiting them. Equivalent to the perlish expression
|
||||
* join(';', map($_.getID(), transliterators).
|
||||
*/
|
||||
private static String joinIDs(Transliterator[] transliterators) {
|
||||
StringBuffer id = new StringBuffer();
|
||||
for (int i=0; i<transliterators.length; ++i) {
|
||||
if (i > 0) {
|
||||
id.append(';');
|
||||
}
|
||||
id.append(transliterators[i].getID());
|
||||
}
|
||||
return id.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Splits a string, as in JavaScript
|
||||
*/
|
||||
private static String[] split(String s, char divider) {
|
||||
// changed MED
|
||||
|
||||
// see how many there are
|
||||
int count = 1;
|
||||
for (int i = 0; i < s.length(); ++i) {
|
||||
if (s.charAt(i) == divider) ++count;
|
||||
}
|
||||
|
||||
// make an array with them
|
||||
String[] result = new String[count];
|
||||
int last = 0;
|
||||
int current = 0;
|
||||
int i;
|
||||
for (i = 0; i < s.length(); ++i) {
|
||||
if (s.charAt(i) == divider) {
|
||||
result[current++] = s.substring(last,i);
|
||||
last = i+1;
|
||||
}
|
||||
}
|
||||
result[current++] = s.substring(last,i);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the number of transliterators in this chain.
|
||||
|
@ -196,9 +196,12 @@ import java.text.ParsePosition;
|
||||
* <p>Copyright (c) IBM Corporation 1999-2000. All rights reserved.</p>
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.10 $ $Date: 2000/01/13 23:53:23 $
|
||||
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.11 $ $Date: 2000/01/18 02:30:49 $
|
||||
*
|
||||
* $Log: RuleBasedTransliterator.java,v $
|
||||
* Revision 1.11 2000/01/18 02:30:49 Alan
|
||||
* Add Jamo-Hangul, Hangul-Jamo, fix rules, add compound ID support
|
||||
*
|
||||
* Revision 1.10 2000/01/13 23:53:23 Alan
|
||||
* Fix bugs found during ICU port
|
||||
*
|
||||
@ -221,17 +224,6 @@ import java.text.ParsePosition;
|
||||
* Improve masking checking; turn it off by default, for better performance
|
||||
*/
|
||||
public class RuleBasedTransliterator extends Transliterator {
|
||||
/**
|
||||
* Direction constant passed to constructor to create a transliterator
|
||||
* using the forward rules.
|
||||
*/
|
||||
public static final int FORWARD = 0;
|
||||
|
||||
/**
|
||||
* Direction constant passed to constructor to create a transliterator
|
||||
* using the reverse rules.
|
||||
*/
|
||||
public static final int REVERSE = 1;
|
||||
|
||||
private Data data;
|
||||
|
||||
|
@ -198,9 +198,29 @@ import java.text.MessageFormat;
|
||||
* <p>Copyright © IBM Corporation 1999. All rights reserved.
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.6 $ $Date: 2000/01/06 17:38:25 $
|
||||
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.7 $ $Date: 2000/01/18 02:30:49 $
|
||||
*/
|
||||
public abstract class Transliterator {
|
||||
/**
|
||||
* Direction constant indicating the forward direction in a transliterator,
|
||||
* e.g., the forward rules of a RuleBasedTransliterator. An "A-B"
|
||||
* transliterator transliterates A to B when operating in the forward
|
||||
* direction, and B to A when operating in the reverse direction.
|
||||
* @see RuleBasedTransliterator
|
||||
* @see CompoundTransliterator
|
||||
*/
|
||||
public static final int FORWARD = 0;
|
||||
|
||||
/**
|
||||
* Direction constant indicating the reverse direction in a transliterator,
|
||||
* e.g., the reverse rules of a RuleBasedTransliterator. An "A-B"
|
||||
* transliterator transliterates A to B when operating in the forward
|
||||
* direction, and B to A when operating in the reverse direction.
|
||||
* @see RuleBasedTransliterator
|
||||
* @see CompoundTransliterator
|
||||
*/
|
||||
public static final int REVERSE = 1;
|
||||
|
||||
/**
|
||||
* In the <code>keyboardTransliterate()</code>
|
||||
* <code>index[]</code> array, the beginning index, inclusive
|
||||
@ -260,6 +280,8 @@ public abstract class Transliterator {
|
||||
*/
|
||||
private static Hashtable cache;
|
||||
|
||||
private static Hashtable displayNameCache;
|
||||
|
||||
/**
|
||||
* Internal object used to stand for instances of
|
||||
* <code>RuleBasedTransliterator</code> that have not been
|
||||
@ -649,6 +671,12 @@ public abstract class Transliterator {
|
||||
ResourceBundle bundle = ResourceBundle.getBundle(
|
||||
RB_LOCALE_ELEMENTS, inLocale);
|
||||
|
||||
// Use the registered display name, if any
|
||||
String n = (String) displayNameCache.get(ID);
|
||||
if (n != null) {
|
||||
return n;
|
||||
}
|
||||
|
||||
// Use display name for the entire transliterator, if it
|
||||
// exists.
|
||||
try {
|
||||
@ -705,6 +733,43 @@ public abstract class Transliterator {
|
||||
this.filter = filter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a <code>Transliterator</code> object given its ID.
|
||||
* The ID must be either a system transliterator ID or a ID registered
|
||||
* using <code>registerInstance()</code>.
|
||||
*
|
||||
* @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code>
|
||||
* @return A <code>Transliterator</code> object with the given ID
|
||||
* @exception IllegalArgumentException if the given ID is invalid.
|
||||
* @see #registerInstance
|
||||
* @see #getAvailableIDs
|
||||
* @see #getID
|
||||
*/
|
||||
// changed MED
|
||||
public static Transliterator getInstance(String ID, int direction) {
|
||||
if (ID.indexOf(';') >= 0) {
|
||||
return new CompoundTransliterator(ID, direction, null);
|
||||
}
|
||||
if (direction == REVERSE) {
|
||||
int i = ID.indexOf('-');
|
||||
if (i < 0) {
|
||||
throw new IllegalArgumentException("No inverse for: "
|
||||
+ ID);
|
||||
}
|
||||
ID = ID.substring(i+1) + '-' + ID.substring(0, i);
|
||||
}
|
||||
Transliterator t = internalGetInstance(ID);
|
||||
if (t != null) {
|
||||
return t;
|
||||
}
|
||||
throw new IllegalArgumentException("Unsupported transliterator: "
|
||||
+ ID);
|
||||
}
|
||||
|
||||
public static final Transliterator getInstance(String ID) {
|
||||
return getInstance(ID, FORWARD);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns this transliterator's inverse. See the class
|
||||
* documentation for details. This implementation simply inverts
|
||||
@ -725,36 +790,10 @@ public abstract class Transliterator {
|
||||
* transliterator is registered.
|
||||
* @see #registerInstance
|
||||
*/
|
||||
public Transliterator getInverse() {
|
||||
int i = ID.indexOf('-');
|
||||
if (i >= 0) {
|
||||
String inverseID = ID.substring(i+1) + '-' + ID.substring(0, i);
|
||||
return internalGetInstance(inverseID);
|
||||
}
|
||||
return null;
|
||||
public final Transliterator getInverse() {
|
||||
return getInstance(ID, REVERSE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a <code>Transliterator</code> object given its ID.
|
||||
* The ID must be either a system transliterator ID or a ID registered
|
||||
* using <code>registerInstance()</code>.
|
||||
*
|
||||
* @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code>
|
||||
* @return A <code>Transliterator</code> object with the given ID
|
||||
* @exception IllegalArgumentException if the given ID is invalid.
|
||||
* @see #registerInstance
|
||||
* @see #getAvailableIDs
|
||||
* @see #getID
|
||||
*/
|
||||
public static Transliterator getInstance(String ID) {
|
||||
Transliterator t = internalGetInstance(ID);
|
||||
if (t != null) {
|
||||
return t;
|
||||
}
|
||||
throw new IllegalArgumentException("Unsupported transliterator: "
|
||||
+ ID);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a transliterator object given its ID. Unlike getInstance(),
|
||||
* this method returns null if it cannot make use of the given ID.
|
||||
@ -828,8 +867,11 @@ public abstract class Transliterator {
|
||||
* @see #registerInstance
|
||||
* @see #unregister
|
||||
*/
|
||||
public static void registerClass(String ID, Class transClass) {
|
||||
cache.put(ID, transClass);
|
||||
public static void registerClass(String ID, Class transClass, String displayName) {
|
||||
cache.put(ID, transClass);
|
||||
if (displayName != null) {
|
||||
displayNameCache.put(ID, displayName);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -843,6 +885,7 @@ public abstract class Transliterator {
|
||||
* @see #registerClass
|
||||
*/
|
||||
public static Object unregister(String ID) {
|
||||
displayNameCache.remove(ID);
|
||||
return cache.remove(ID);
|
||||
}
|
||||
|
||||
@ -868,6 +911,7 @@ public abstract class Transliterator {
|
||||
String[] ruleBasedIDs = bundle.getStringArray(RB_RULE_BASED_IDS);
|
||||
|
||||
cache = new Hashtable();
|
||||
displayNameCache = new Hashtable();
|
||||
|
||||
for (int i=0; i<ruleBasedIDs.length; ++i) {
|
||||
String ID = ruleBasedIDs[i];
|
||||
@ -881,11 +925,16 @@ public abstract class Transliterator {
|
||||
} catch (MissingResourceException e) {}
|
||||
|
||||
// Register non-rule-based transliterators
|
||||
registerClass(HangulJamoTransliterator._ID,
|
||||
HangulJamoTransliterator.class, null);
|
||||
registerClass(JamoHangulTransliterator._ID,
|
||||
JamoHangulTransliterator.class, null);
|
||||
|
||||
registerClass(HexToUnicodeTransliterator._ID,
|
||||
HexToUnicodeTransliterator.class);
|
||||
HexToUnicodeTransliterator.class, null);
|
||||
registerClass(UnicodeToHexTransliterator._ID,
|
||||
UnicodeToHexTransliterator.class);
|
||||
UnicodeToHexTransliterator.class, null);
|
||||
registerClass(NullTransliterator._ID,
|
||||
NullTransliterator.class);
|
||||
NullTransliterator.class, null);
|
||||
}
|
||||
}
|
||||
|
@ -23,7 +23,7 @@ import java.util.Vector;
|
||||
* <p>Copyright © IBM Corporation 1999. All rights reserved.
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: CompoundTransliterator.java,v $ $Revision: 1.1 $ $Date: 1999/12/20 18:29:21 $
|
||||
* @version $RCSfile: CompoundTransliterator.java,v $ $Revision: 1.2 $ $Date: 2000/01/18 02:30:49 $
|
||||
*/
|
||||
public class CompoundTransliterator extends Transliterator {
|
||||
|
||||
@ -46,9 +46,9 @@ public class CompoundTransliterator extends Transliterator {
|
||||
* altered by this transliterator. If <tt>filter</tt> is
|
||||
* <tt>null</tt> then no filtering is applied.
|
||||
*/
|
||||
public CompoundTransliterator(String ID, Transliterator[] transliterators,
|
||||
public CompoundTransliterator(Transliterator[] transliterators,
|
||||
UnicodeFilter filter) {
|
||||
super(ID, filter);
|
||||
super(joinIDs(transliterators), filter);
|
||||
trans = new Transliterator[transliterators.length];
|
||||
System.arraycopy(transliterators, 0, trans, 0, trans.length);
|
||||
}
|
||||
@ -61,9 +61,81 @@ public class CompoundTransliterator extends Transliterator {
|
||||
* @param transliterators array of <code>Transliterator</code>
|
||||
* objects
|
||||
*/
|
||||
public CompoundTransliterator(String ID, Transliterator[] transliterators) {
|
||||
this(ID, transliterators, null);
|
||||
public CompoundTransliterator(Transliterator[] transliterators) {
|
||||
this(transliterators, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Splits an ID of the form "ID;ID;..." into a compound using each
|
||||
* of the IDs.
|
||||
* @param ID of above form
|
||||
* @param forward if false, does the list in reverse order, and
|
||||
* takes the inverse of each ID.
|
||||
*/
|
||||
public CompoundTransliterator(String ID, int direction,
|
||||
UnicodeFilter filter) {
|
||||
// changed MED
|
||||
// Later, add "rule1[filter];rule2...
|
||||
super(ID, filter);
|
||||
String[] list = split(ID, ';');
|
||||
trans = new Transliterator[list.length];
|
||||
for (int i = 0; i < list.length; ++i) {
|
||||
trans[i] = getInstance(list[direction==FORWARD ? i : (list.length-1-i)],
|
||||
direction);
|
||||
}
|
||||
}
|
||||
|
||||
public CompoundTransliterator(String ID, int direction) {
|
||||
this(ID, direction, null);
|
||||
}
|
||||
|
||||
public CompoundTransliterator(String ID) {
|
||||
this(ID, FORWARD, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the IDs of the given list of transliterators, concatenated
|
||||
* with ';' delimiting them. Equivalent to the perlish expression
|
||||
* join(';', map($_.getID(), transliterators).
|
||||
*/
|
||||
private static String joinIDs(Transliterator[] transliterators) {
|
||||
StringBuffer id = new StringBuffer();
|
||||
for (int i=0; i<transliterators.length; ++i) {
|
||||
if (i > 0) {
|
||||
id.append(';');
|
||||
}
|
||||
id.append(transliterators[i].getID());
|
||||
}
|
||||
return id.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Splits a string, as in JavaScript
|
||||
*/
|
||||
private static String[] split(String s, char divider) {
|
||||
// changed MED
|
||||
|
||||
// see how many there are
|
||||
int count = 1;
|
||||
for (int i = 0; i < s.length(); ++i) {
|
||||
if (s.charAt(i) == divider) ++count;
|
||||
}
|
||||
|
||||
// make an array with them
|
||||
String[] result = new String[count];
|
||||
int last = 0;
|
||||
int current = 0;
|
||||
int i;
|
||||
for (i = 0; i < s.length(); ++i) {
|
||||
if (s.charAt(i) == divider) {
|
||||
result[current++] = s.substring(last,i);
|
||||
last = i+1;
|
||||
}
|
||||
}
|
||||
result[current++] = s.substring(last,i);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the number of transliterators in this chain.
|
||||
|
107
icu4j/src/com/ibm/text/HangulJamoTransliterator.java
Executable file
107
icu4j/src/com/ibm/text/HangulJamoTransliterator.java
Executable file
@ -0,0 +1,107 @@
|
||||
package com.ibm.text;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* A transliterator that converts Hangul to Jamo
|
||||
*
|
||||
* <p>Copyright © IBM Corporation 1999. All rights reserved.
|
||||
*
|
||||
* @author Mark Davis
|
||||
* @version $RCSfile: HangulJamoTransliterator.java,v $ $Revision: 1.1 $ $Date: 2000/01/18 02:30:49 $
|
||||
*/
|
||||
public class HangulJamoTransliterator extends Transliterator {
|
||||
private static final String COPYRIGHT =
|
||||
"\u00A9 IBM Corporation 1999. All rights reserved.";
|
||||
|
||||
/**
|
||||
* Package accessible ID for this transliterator.
|
||||
*/
|
||||
static String _ID = "Hangul-Jamo";
|
||||
|
||||
/**
|
||||
* Constructs a transliterator.
|
||||
*/
|
||||
public HangulJamoTransliterator() {
|
||||
super(_ID, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Transliterates a segment of a string. <code>Transliterator</code> API.
|
||||
* @param text the string to be transliterated
|
||||
* @param start the beginning index, inclusive; <code>0 <= start
|
||||
* <= limit</code>.
|
||||
* @param limit the ending index, exclusive; <code>start <= limit
|
||||
* <= text.length()</code>.
|
||||
* @return the new limit index
|
||||
*/
|
||||
public int transliterate(Replaceable text, int start, int limit) {
|
||||
int[] offsets = { start, limit, start };
|
||||
handleKeyboardTransliterate(text, offsets);
|
||||
return offsets[LIMIT];
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements {@link Transliterator#handleKeyboardTransliterate}.
|
||||
*/
|
||||
protected void handleKeyboardTransliterate(Replaceable text,
|
||||
int[] offsets) {
|
||||
int cursor = offsets[CURSOR];
|
||||
int limit = offsets[LIMIT];
|
||||
|
||||
StringBuffer replacement = new StringBuffer();
|
||||
while (cursor < limit) {
|
||||
char c = filteredCharAt(text, cursor);
|
||||
if (decomposeHangul(c, replacement)) {
|
||||
text.replace(cursor, cursor+1, replacement.toString());
|
||||
cursor += replacement.length(); // skip over replacement
|
||||
limit += replacement.length() - 1; // fix up limit
|
||||
} else {
|
||||
++cursor;
|
||||
}
|
||||
}
|
||||
|
||||
offsets[LIMIT] = limit;
|
||||
offsets[CURSOR] = cursor;
|
||||
}
|
||||
|
||||
private char filteredCharAt(Replaceable text, int i) {
|
||||
char c;
|
||||
UnicodeFilter filter = getFilter();
|
||||
return (filter == null) ? text.charAt(i) :
|
||||
(filter.isIn(c = text.charAt(i)) ? c : '\uFFFF');
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the length of the longest context required by this transliterator.
|
||||
* This is <em>preceding</em> context.
|
||||
* @param direction either <code>FORWARD</code> or <code>REVERSE</code>
|
||||
* @return maximum number of preceding context characters this
|
||||
* transliterator needs to examine
|
||||
*/
|
||||
protected int getMaximumContextLength() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static final int
|
||||
SBase = 0xAC00, LBase = 0x1100, VBase = 0x1161, TBase = 0x11A7,
|
||||
LCount = 19, VCount = 21, TCount = 28,
|
||||
NCount = VCount * TCount, // 588
|
||||
SCount = LCount * NCount; // 11172
|
||||
|
||||
public static boolean decomposeHangul(char s, StringBuffer result) {
|
||||
int SIndex = s - SBase;
|
||||
if (0 > SIndex || SIndex >= SCount) {
|
||||
return false;
|
||||
}
|
||||
int L = LBase + SIndex / NCount;
|
||||
int V = VBase + (SIndex % NCount) / TCount;
|
||||
int T = TBase + SIndex % TCount;
|
||||
result.setLength(0);
|
||||
result.append((char)L);
|
||||
result.append((char)V);
|
||||
if (T != TBase) result.append((char)T);
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
130
icu4j/src/com/ibm/text/JamoHangulTransliterator.java
Executable file
130
icu4j/src/com/ibm/text/JamoHangulTransliterator.java
Executable file
@ -0,0 +1,130 @@
|
||||
package com.ibm.text;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* A transliterator that converts Jamo to Hangul
|
||||
*
|
||||
* <p>Copyright © IBM Corporation 1999. All rights reserved.
|
||||
*
|
||||
* @author Mark Davis
|
||||
*/
|
||||
public class JamoHangulTransliterator extends Transliterator {
|
||||
private static final String COPYRIGHT =
|
||||
"\u00A9 IBM Corporation 1999. All rights reserved.";
|
||||
|
||||
/**
|
||||
* Package accessible ID for this transliterator.
|
||||
*/
|
||||
static String _ID = "Jamo-Hangul";
|
||||
|
||||
/**
|
||||
* Constructs a transliterator.
|
||||
*/
|
||||
public JamoHangulTransliterator() {
|
||||
super(_ID, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Transliterates a segment of a string. <code>Transliterator</code> API.
|
||||
* @param text the string to be transliterated
|
||||
* @param start the beginning index, inclusive; <code>0 <= start
|
||||
* <= limit</code>.
|
||||
* @param limit the ending index, exclusive; <code>start <= limit
|
||||
* <= text.length()</code>.
|
||||
* @return the new limit index
|
||||
*/
|
||||
public int transliterate(Replaceable text, int start, int limit) {
|
||||
int[] offsets = { start, limit, start };
|
||||
handleKeyboardTransliterate(text, offsets);
|
||||
return offsets[LIMIT];
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements {@link Transliterator#handleKeyboardTransliterate}.
|
||||
*/
|
||||
protected void handleKeyboardTransliterate(Replaceable text,
|
||||
int[] offsets) {
|
||||
/**
|
||||
* Performs transliteration changing Jamo to Hangul
|
||||
*/
|
||||
int cursor = offsets[CURSOR];
|
||||
int limit = offsets[LIMIT];
|
||||
if (cursor >= limit) return;
|
||||
|
||||
// get last character
|
||||
char last = filteredCharAt(text, cursor++);
|
||||
// testing
|
||||
if (limit - cursor > 2) {
|
||||
last = (char)(last + 0);
|
||||
}
|
||||
|
||||
loop:
|
||||
while (cursor < limit) {
|
||||
char c = filteredCharAt(text, cursor);
|
||||
char replacement = composeHangul(last, c);
|
||||
if (replacement != 0) {
|
||||
text.replace(cursor-1, cursor+1, String.valueOf(replacement));
|
||||
last = replacement;
|
||||
// leave cursor where it is
|
||||
--limit; // fix up limit
|
||||
} else {
|
||||
++cursor;
|
||||
}
|
||||
}
|
||||
|
||||
offsets[LIMIT] = limit + 1;
|
||||
offsets[CURSOR] = cursor;
|
||||
}
|
||||
|
||||
private char filteredCharAt(Replaceable text, int i) {
|
||||
char c;
|
||||
UnicodeFilter filter = getFilter();
|
||||
return (filter == null) ? text.charAt(i) :
|
||||
(filter.isIn(c = text.charAt(i)) ? c : '\uFFFF');
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the length of the longest context required by this transliterator.
|
||||
* This is <em>preceding</em> context.
|
||||
* @param direction either <code>FORWARD</code> or <code>REVERSE</code>
|
||||
* @return maximum number of preceding context characters this
|
||||
* transliterator needs to examine
|
||||
*/
|
||||
protected int getMaximumContextLength() {
|
||||
return 3;
|
||||
}
|
||||
|
||||
|
||||
static final int
|
||||
SBase = 0xAC00, LBase = 0x1100, VBase = 0x1161, TBase = 0x11A7,
|
||||
LCount = 19, VCount = 21, TCount = 28,
|
||||
NCount = VCount * TCount, // 588
|
||||
SCount = LCount * NCount; // 11172
|
||||
|
||||
/**
|
||||
* Return composed character (if it composes)
|
||||
* 0 otherwise
|
||||
*/
|
||||
public static char composeHangul(char last, char ch) {
|
||||
// check to see if two current characters are L and V
|
||||
int LIndex = last - LBase;
|
||||
if (0 <= LIndex && LIndex < LCount) {
|
||||
int VIndex = ch - VBase;
|
||||
if (0 <= VIndex && VIndex < VCount) {
|
||||
// make syllable of form LV
|
||||
return (char)(SBase + (LIndex * VCount + VIndex) * TCount);
|
||||
}
|
||||
}
|
||||
// check to see if two current characters are LV and T
|
||||
int SIndex = last - SBase;
|
||||
if (0 <= SIndex && SIndex < SCount && (SIndex % TCount) == 0) {
|
||||
int TIndex = ch - TBase;
|
||||
if (0 <= TIndex && TIndex <= TCount) {
|
||||
// make syllable of form LVT
|
||||
return (char)(last + TIndex);
|
||||
}
|
||||
}
|
||||
// if neither case was true, skip
|
||||
return '\u0000';
|
||||
}
|
||||
}
|
@ -196,9 +196,12 @@ import java.text.ParsePosition;
|
||||
* <p>Copyright (c) IBM Corporation 1999-2000. All rights reserved.</p>
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.10 $ $Date: 2000/01/13 23:53:23 $
|
||||
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.11 $ $Date: 2000/01/18 02:30:49 $
|
||||
*
|
||||
* $Log: RuleBasedTransliterator.java,v $
|
||||
* Revision 1.11 2000/01/18 02:30:49 Alan
|
||||
* Add Jamo-Hangul, Hangul-Jamo, fix rules, add compound ID support
|
||||
*
|
||||
* Revision 1.10 2000/01/13 23:53:23 Alan
|
||||
* Fix bugs found during ICU port
|
||||
*
|
||||
@ -221,17 +224,6 @@ import java.text.ParsePosition;
|
||||
* Improve masking checking; turn it off by default, for better performance
|
||||
*/
|
||||
public class RuleBasedTransliterator extends Transliterator {
|
||||
/**
|
||||
* Direction constant passed to constructor to create a transliterator
|
||||
* using the forward rules.
|
||||
*/
|
||||
public static final int FORWARD = 0;
|
||||
|
||||
/**
|
||||
* Direction constant passed to constructor to create a transliterator
|
||||
* using the reverse rules.
|
||||
*/
|
||||
public static final int REVERSE = 1;
|
||||
|
||||
private Data data;
|
||||
|
||||
|
@ -198,9 +198,29 @@ import java.text.MessageFormat;
|
||||
* <p>Copyright © IBM Corporation 1999. All rights reserved.
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.6 $ $Date: 2000/01/06 17:38:25 $
|
||||
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.7 $ $Date: 2000/01/18 02:30:49 $
|
||||
*/
|
||||
public abstract class Transliterator {
|
||||
/**
|
||||
* Direction constant indicating the forward direction in a transliterator,
|
||||
* e.g., the forward rules of a RuleBasedTransliterator. An "A-B"
|
||||
* transliterator transliterates A to B when operating in the forward
|
||||
* direction, and B to A when operating in the reverse direction.
|
||||
* @see RuleBasedTransliterator
|
||||
* @see CompoundTransliterator
|
||||
*/
|
||||
public static final int FORWARD = 0;
|
||||
|
||||
/**
|
||||
* Direction constant indicating the reverse direction in a transliterator,
|
||||
* e.g., the reverse rules of a RuleBasedTransliterator. An "A-B"
|
||||
* transliterator transliterates A to B when operating in the forward
|
||||
* direction, and B to A when operating in the reverse direction.
|
||||
* @see RuleBasedTransliterator
|
||||
* @see CompoundTransliterator
|
||||
*/
|
||||
public static final int REVERSE = 1;
|
||||
|
||||
/**
|
||||
* In the <code>keyboardTransliterate()</code>
|
||||
* <code>index[]</code> array, the beginning index, inclusive
|
||||
@ -260,6 +280,8 @@ public abstract class Transliterator {
|
||||
*/
|
||||
private static Hashtable cache;
|
||||
|
||||
private static Hashtable displayNameCache;
|
||||
|
||||
/**
|
||||
* Internal object used to stand for instances of
|
||||
* <code>RuleBasedTransliterator</code> that have not been
|
||||
@ -649,6 +671,12 @@ public abstract class Transliterator {
|
||||
ResourceBundle bundle = ResourceBundle.getBundle(
|
||||
RB_LOCALE_ELEMENTS, inLocale);
|
||||
|
||||
// Use the registered display name, if any
|
||||
String n = (String) displayNameCache.get(ID);
|
||||
if (n != null) {
|
||||
return n;
|
||||
}
|
||||
|
||||
// Use display name for the entire transliterator, if it
|
||||
// exists.
|
||||
try {
|
||||
@ -705,6 +733,43 @@ public abstract class Transliterator {
|
||||
this.filter = filter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a <code>Transliterator</code> object given its ID.
|
||||
* The ID must be either a system transliterator ID or a ID registered
|
||||
* using <code>registerInstance()</code>.
|
||||
*
|
||||
* @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code>
|
||||
* @return A <code>Transliterator</code> object with the given ID
|
||||
* @exception IllegalArgumentException if the given ID is invalid.
|
||||
* @see #registerInstance
|
||||
* @see #getAvailableIDs
|
||||
* @see #getID
|
||||
*/
|
||||
// changed MED
|
||||
public static Transliterator getInstance(String ID, int direction) {
|
||||
if (ID.indexOf(';') >= 0) {
|
||||
return new CompoundTransliterator(ID, direction, null);
|
||||
}
|
||||
if (direction == REVERSE) {
|
||||
int i = ID.indexOf('-');
|
||||
if (i < 0) {
|
||||
throw new IllegalArgumentException("No inverse for: "
|
||||
+ ID);
|
||||
}
|
||||
ID = ID.substring(i+1) + '-' + ID.substring(0, i);
|
||||
}
|
||||
Transliterator t = internalGetInstance(ID);
|
||||
if (t != null) {
|
||||
return t;
|
||||
}
|
||||
throw new IllegalArgumentException("Unsupported transliterator: "
|
||||
+ ID);
|
||||
}
|
||||
|
||||
public static final Transliterator getInstance(String ID) {
|
||||
return getInstance(ID, FORWARD);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns this transliterator's inverse. See the class
|
||||
* documentation for details. This implementation simply inverts
|
||||
@ -725,36 +790,10 @@ public abstract class Transliterator {
|
||||
* transliterator is registered.
|
||||
* @see #registerInstance
|
||||
*/
|
||||
public Transliterator getInverse() {
|
||||
int i = ID.indexOf('-');
|
||||
if (i >= 0) {
|
||||
String inverseID = ID.substring(i+1) + '-' + ID.substring(0, i);
|
||||
return internalGetInstance(inverseID);
|
||||
}
|
||||
return null;
|
||||
public final Transliterator getInverse() {
|
||||
return getInstance(ID, REVERSE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a <code>Transliterator</code> object given its ID.
|
||||
* The ID must be either a system transliterator ID or a ID registered
|
||||
* using <code>registerInstance()</code>.
|
||||
*
|
||||
* @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code>
|
||||
* @return A <code>Transliterator</code> object with the given ID
|
||||
* @exception IllegalArgumentException if the given ID is invalid.
|
||||
* @see #registerInstance
|
||||
* @see #getAvailableIDs
|
||||
* @see #getID
|
||||
*/
|
||||
public static Transliterator getInstance(String ID) {
|
||||
Transliterator t = internalGetInstance(ID);
|
||||
if (t != null) {
|
||||
return t;
|
||||
}
|
||||
throw new IllegalArgumentException("Unsupported transliterator: "
|
||||
+ ID);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a transliterator object given its ID. Unlike getInstance(),
|
||||
* this method returns null if it cannot make use of the given ID.
|
||||
@ -828,8 +867,11 @@ public abstract class Transliterator {
|
||||
* @see #registerInstance
|
||||
* @see #unregister
|
||||
*/
|
||||
public static void registerClass(String ID, Class transClass) {
|
||||
cache.put(ID, transClass);
|
||||
public static void registerClass(String ID, Class transClass, String displayName) {
|
||||
cache.put(ID, transClass);
|
||||
if (displayName != null) {
|
||||
displayNameCache.put(ID, displayName);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -843,6 +885,7 @@ public abstract class Transliterator {
|
||||
* @see #registerClass
|
||||
*/
|
||||
public static Object unregister(String ID) {
|
||||
displayNameCache.remove(ID);
|
||||
return cache.remove(ID);
|
||||
}
|
||||
|
||||
@ -868,6 +911,7 @@ public abstract class Transliterator {
|
||||
String[] ruleBasedIDs = bundle.getStringArray(RB_RULE_BASED_IDS);
|
||||
|
||||
cache = new Hashtable();
|
||||
displayNameCache = new Hashtable();
|
||||
|
||||
for (int i=0; i<ruleBasedIDs.length; ++i) {
|
||||
String ID = ruleBasedIDs[i];
|
||||
@ -881,11 +925,16 @@ public abstract class Transliterator {
|
||||
} catch (MissingResourceException e) {}
|
||||
|
||||
// Register non-rule-based transliterators
|
||||
registerClass(HangulJamoTransliterator._ID,
|
||||
HangulJamoTransliterator.class, null);
|
||||
registerClass(JamoHangulTransliterator._ID,
|
||||
JamoHangulTransliterator.class, null);
|
||||
|
||||
registerClass(HexToUnicodeTransliterator._ID,
|
||||
HexToUnicodeTransliterator.class);
|
||||
HexToUnicodeTransliterator.class, null);
|
||||
registerClass(UnicodeToHexTransliterator._ID,
|
||||
UnicodeToHexTransliterator.class);
|
||||
UnicodeToHexTransliterator.class, null);
|
||||
registerClass(NullTransliterator._ID,
|
||||
NullTransliterator.class);
|
||||
NullTransliterator.class, null);
|
||||
}
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
6213
icu4j/src/com/ibm/text/resources/TransliterationRule$Kanji$OnRomaji.java
Executable file
6213
icu4j/src/com/ibm/text/resources/TransliterationRule$Kanji$OnRomaji.java
Executable file
File diff suppressed because it is too large
Load Diff
@ -11,9 +11,11 @@ public class TransliterationRule$Latin$Jamo extends ListResourceBundle {
|
||||
|
||||
// VARIABLES
|
||||
|
||||
+ "initial=[\u1100-\u115F];"
|
||||
+ "medial=[\u1160-\u11A7];"
|
||||
+ "final=[\u11A8-\u11F9];" // added - aliu
|
||||
+ "vowel=[aeiouwyAEIOUWY\u1160-\u11A7];"
|
||||
+ "consonant=[bcdfghjklmnpqrstvxzBCDFGHJKLMNPQRSTVXZ{medial}{final}];"
|
||||
+ "ye=[yeYE];"
|
||||
+ "ywe=[yweYWE];"
|
||||
+ "yw=[ywYW];"
|
||||
@ -44,10 +46,13 @@ public class TransliterationRule$Latin$Jamo extends ListResourceBundle {
|
||||
// X[{vowel}>CHOSEONG (initial)
|
||||
// X>JONGSEONG (final)
|
||||
|
||||
// special insertion for funny sequences of vowels
|
||||
|
||||
+ "({medial}) ({vowel}) > \u110B;" // HANGUL CHOSEONG IEUNG
|
||||
// special insertion for funny sequences of vowels, and for empty consonant
|
||||
|
||||
+ "'' < ({consonant}) \u110B;" // insert a break between any consonant and the empty consonant.
|
||||
+ "({medial}) ({vowel}) <> \u110B;" // HANGUL CHOSEONG IEUNG
|
||||
|
||||
// Below, insert an empty consonant in front of a vowel, if there is no Initial in front.
|
||||
|
||||
// Fix casing.
|
||||
// Because Korean is caseless, we just want to treat everything as
|
||||
// lowercase.
|
||||
@ -169,7 +174,7 @@ public class TransliterationRule$Latin$Jamo extends ListResourceBundle {
|
||||
+ "c ({vowel}) <> \u110e;" // hangul choseong chieuch
|
||||
+ "bb ({vowel}) <> \u1108;" // hangul choseong ssangpieup
|
||||
+ "b ({vowel}) <> \u1107;" // hangul choseong pieup
|
||||
|
||||
|
||||
// If we have gotten through to these rules, and we start with
|
||||
// a consonant, then the remaining mappings would be to F,
|
||||
// because must have CC (or C<non-letter>), not CV.
|
||||
@ -195,27 +200,61 @@ public class TransliterationRule$Latin$Jamo extends ListResourceBundle {
|
||||
+ "({final}) bb > \u1108\u116e;" // hangul choseong ssangpieup
|
||||
+ "({final}) b > \u1107\u116e;" // hangul choseong pieup
|
||||
|
||||
// MEDIALS (vowels) and FINALS
|
||||
// MEDIALS after INITIALS
|
||||
|
||||
+ "({initial}) yu <> \u1172;" // hangul jungseong yu
|
||||
+ "({initial}) yo <> \u116d;" // hangul jungseong yo
|
||||
+ "({initial}) yi <> \u1174;" // hangul jungseong yi
|
||||
+ "({initial}) yeo <> \u1167;" // hangul jungseong yeo
|
||||
+ "({initial}) ye <> \u1168;" // hangul jungseong ye
|
||||
+ "({initial}) yae <> \u1164;" // hangul jungseong yae
|
||||
+ "({initial}) ya <> \u1163;" // hangul jungseong ya
|
||||
+ "({initial}) wi <> \u1171;" // hangul jungseong wi
|
||||
+ "({initial}) weo <> \u116f;" // hangul jungseong weo
|
||||
+ "({initial}) we <> \u1170;" // hangul jungseong we
|
||||
+ "({initial}) wae <> \u116b;" // hangul jungseong wae
|
||||
+ "({initial}) wa <> \u116a;" // hangul jungseong wa
|
||||
+ "({initial}) u <> \u116e;" // hangul jungseong u
|
||||
+ "({initial}) oe <> \u116c;" // hangul jungseong oe
|
||||
+ "({initial}) o <> \u1169;" // hangul jungseong o
|
||||
+ "({initial}) i <> \u1175;" // hangul jungseong i
|
||||
+ "({initial}) eu <> \u1173;" // hangul jungseong eu
|
||||
+ "({initial}) eo <> \u1165;" // hangul jungseong eo
|
||||
+ "({initial}) e <> \u1166;" // hangul jungseong e
|
||||
+ "({initial}) ae <> \u1162;" // hangul jungseong ae
|
||||
+ "({initial}) a <> \u1161;" // hangul jungseong a
|
||||
|
||||
// MEDIALS (vowels) not after INITIALs
|
||||
|
||||
+ "yu > \u110B\u1172;" // hangul jungseong yu
|
||||
+ "yo > \u110B\u116d;" // hangul jungseong yo
|
||||
+ "yi > \u110B\u1174;" // hangul jungseong yi
|
||||
+ "yeo > \u110B\u1167;" // hangul jungseong yeo
|
||||
+ "ye > \u110B\u1168;" // hangul jungseong ye
|
||||
+ "yae > \u110B\u1164;" // hangul jungseong yae
|
||||
+ "ya > \u110B\u1163;" // hangul jungseong ya
|
||||
+ "wi > \u110B\u1171;" // hangul jungseong wi
|
||||
+ "weo > \u110B\u116f;" // hangul jungseong weo
|
||||
+ "we > \u110B\u1170;" // hangul jungseong we
|
||||
+ "wae > \u110B\u116b;" // hangul jungseong wae
|
||||
+ "wa > \u110B\u116a;" // hangul jungseong wa
|
||||
+ "u > \u110B\u116e;" // hangul jungseong u
|
||||
+ "oe > \u110B\u116c;" // hangul jungseong oe
|
||||
+ "o > \u110B\u1169;" // hangul jungseong o
|
||||
+ "i > \u110B\u1175;" // hangul jungseong i
|
||||
+ "eu > \u110B\u1173;" // hangul jungseong eu
|
||||
+ "eo > \u110B\u1165;" // hangul jungseong eo
|
||||
+ "e > \u110B\u1166;" // hangul jungseong e
|
||||
+ "ae > \u110B\u1162;" // hangul jungseong ae
|
||||
+ "a > \u110B\u1161;" // hangul jungseong a
|
||||
|
||||
|
||||
+ "yu <> \u1172;" // hangul jungseong yu
|
||||
+ "yo <> \u116d;" // hangul jungseong yo
|
||||
+ "yi <> \u1174;" // hangul jungseong yi
|
||||
+ "yeo <> \u1167;" // hangul jungseong yeo
|
||||
+ "ye <> \u1168;" // hangul jungseong ye
|
||||
+ "yae <> \u1164;" // hangul jungseong yae
|
||||
+ "ya <> \u1163;" // hangul jungseong ya
|
||||
+ "wi <> \u1171;" // hangul jungseong wi
|
||||
+ "weo <> \u116f;" // hangul jungseong weo
|
||||
+ "we <> \u1170;" // hangul jungseong we
|
||||
+ "wae <> \u116b;" // hangul jungseong wae
|
||||
+ "wa <> \u116a;" // hangul jungseong wa
|
||||
+ "u <> \u116e;" // hangul jungseong u
|
||||
// FINALS
|
||||
|
||||
+ "t <> \u11c0;" // hangul jongseong thieuth
|
||||
+ "ss <> \u11bb;" // hangul jongseong ssangsios
|
||||
+ "s <> \u11ba;" // hangul jongseong sios
|
||||
+ "p <> \u11c1;" // hangul jongseong phieuph
|
||||
+ "oe <> \u116c;" // hangul jungseong oe
|
||||
+ "o <> \u1169;" // hangul jungseong o
|
||||
+ "nj <> \u11ac;" // hangul jongseong nieun-cieuc
|
||||
+ "nh <> \u11ad;" // hangul jongseong nieun-hieuh
|
||||
+ "ng <> \u11bc;" // hangul jongseong ieung
|
||||
@ -232,21 +271,15 @@ public class TransliterationRule$Latin$Jamo extends ListResourceBundle {
|
||||
+ "k <> \u11bf;" // hangul jongseong khieukh
|
||||
+ "jj <> \u110d;" // hangul choseong ssangcieuc
|
||||
+ "j <> \u11bd;" // hangul jongseong cieuc
|
||||
+ "i <> \u1175;" // hangul jungseong i
|
||||
+ "h <> \u11c2;" // hangul jongseong hieuh
|
||||
+ "gs <> \u11aa;" // hangul jongseong kiyeok-sios
|
||||
+ "gg <> \u11a9;" // hangul jongseong ssangkiyeok
|
||||
+ "g <> \u11a8;" // hangul jongseong kiyeok
|
||||
+ "eu <> \u1173;" // hangul jungseong eu
|
||||
+ "eo <> \u1165;" // hangul jungseong eo
|
||||
+ "e <> \u1166;" // hangul jungseong e
|
||||
+ "dd <> \u1104;" // hangul choseong ssangtikeut
|
||||
+ "d <> \u11ae;" // hangul jongseong tikeut
|
||||
+ "c <> \u11be;" // hangul jongseong chieuch
|
||||
+ "bs <> \u11b9;" // hangul jongseong pieup-sios
|
||||
+ "b <> \u11b8;" // hangul jongseong pieup
|
||||
+ "ae <> \u1162;" // hangul jungseong ae
|
||||
+ "a <> \u1161;" // hangul jungseong a
|
||||
|
||||
// extra English letters
|
||||
// {moved to bottom - aliu}
|
||||
@ -265,6 +298,10 @@ public class TransliterationRule$Latin$Jamo extends ListResourceBundle {
|
||||
+ "F > |p;"
|
||||
//{ + "c > |k;" } masked
|
||||
+ "C > |k;"
|
||||
|
||||
+ "y > \u1172;" // hangul jungseong yu
|
||||
+ "w > \u1171;" // hangul jungseong wi
|
||||
|
||||
|
||||
// ====================================
|
||||
// Normal final rule: remove '
|
||||
|
Loading…
Reference in New Issue
Block a user