Add Jamo-Hangul, Hangul-Jamo, fix rules, add compound ID support

X-SVN-Rev: 622
This commit is contained in:
Alan Liu 2000-01-18 02:30:49 +00:00
parent 4c3ef3e790
commit 16730bea73
11 changed files with 27188 additions and 20474 deletions

View File

@ -23,7 +23,7 @@ import java.util.Vector;
* <p>Copyright &copy; IBM Corporation 1999. All rights reserved.
*
* @author Alan Liu
* @version $RCSfile: CompoundTransliterator.java,v $ $Revision: 1.1 $ $Date: 1999/12/20 18:29:21 $
* @version $RCSfile: CompoundTransliterator.java,v $ $Revision: 1.2 $ $Date: 2000/01/18 02:30:49 $
*/
public class CompoundTransliterator extends Transliterator {
@ -46,9 +46,9 @@ public class CompoundTransliterator extends Transliterator {
* altered by this transliterator. If <tt>filter</tt> is
* <tt>null</tt> then no filtering is applied.
*/
public CompoundTransliterator(String ID, Transliterator[] transliterators,
public CompoundTransliterator(Transliterator[] transliterators,
UnicodeFilter filter) {
super(ID, filter);
super(joinIDs(transliterators), filter);
trans = new Transliterator[transliterators.length];
System.arraycopy(transliterators, 0, trans, 0, trans.length);
}
@ -61,9 +61,81 @@ public class CompoundTransliterator extends Transliterator {
* @param transliterators array of <code>Transliterator</code>
* objects
*/
public CompoundTransliterator(String ID, Transliterator[] transliterators) {
this(ID, transliterators, null);
public CompoundTransliterator(Transliterator[] transliterators) {
this(transliterators, null);
}
/**
* Splits an ID of the form "ID;ID;..." into a compound using each
* of the IDs.
* @param ID of above form
* @param forward if false, does the list in reverse order, and
* takes the inverse of each ID.
*/
public CompoundTransliterator(String ID, int direction,
UnicodeFilter filter) {
// changed MED
// Later, add "rule1[filter];rule2...
super(ID, filter);
String[] list = split(ID, ';');
trans = new Transliterator[list.length];
for (int i = 0; i < list.length; ++i) {
trans[i] = getInstance(list[direction==FORWARD ? i : (list.length-1-i)],
direction);
}
}
public CompoundTransliterator(String ID, int direction) {
this(ID, direction, null);
}
public CompoundTransliterator(String ID) {
this(ID, FORWARD, null);
}
/**
* Return the IDs of the given list of transliterators, concatenated
* with ';' delimiting them. Equivalent to the perlish expression
* join(';', map($_.getID(), transliterators).
*/
private static String joinIDs(Transliterator[] transliterators) {
StringBuffer id = new StringBuffer();
for (int i=0; i<transliterators.length; ++i) {
if (i > 0) {
id.append(';');
}
id.append(transliterators[i].getID());
}
return id.toString();
}
/**
* Splits a string, as in JavaScript
*/
private static String[] split(String s, char divider) {
// changed MED
// see how many there are
int count = 1;
for (int i = 0; i < s.length(); ++i) {
if (s.charAt(i) == divider) ++count;
}
// make an array with them
String[] result = new String[count];
int last = 0;
int current = 0;
int i;
for (i = 0; i < s.length(); ++i) {
if (s.charAt(i) == divider) {
result[current++] = s.substring(last,i);
last = i+1;
}
}
result[current++] = s.substring(last,i);
return result;
}
/**
* Returns the number of transliterators in this chain.

View File

@ -196,9 +196,12 @@ import java.text.ParsePosition;
* <p>Copyright (c) IBM Corporation 1999-2000. All rights reserved.</p>
*
* @author Alan Liu
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.10 $ $Date: 2000/01/13 23:53:23 $
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.11 $ $Date: 2000/01/18 02:30:49 $
*
* $Log: RuleBasedTransliterator.java,v $
* Revision 1.11 2000/01/18 02:30:49 Alan
* Add Jamo-Hangul, Hangul-Jamo, fix rules, add compound ID support
*
* Revision 1.10 2000/01/13 23:53:23 Alan
* Fix bugs found during ICU port
*
@ -221,17 +224,6 @@ import java.text.ParsePosition;
* Improve masking checking; turn it off by default, for better performance
*/
public class RuleBasedTransliterator extends Transliterator {
/**
* Direction constant passed to constructor to create a transliterator
* using the forward rules.
*/
public static final int FORWARD = 0;
/**
* Direction constant passed to constructor to create a transliterator
* using the reverse rules.
*/
public static final int REVERSE = 1;
private Data data;

View File

@ -198,9 +198,29 @@ import java.text.MessageFormat;
* <p>Copyright &copy; IBM Corporation 1999. All rights reserved.
*
* @author Alan Liu
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.6 $ $Date: 2000/01/06 17:38:25 $
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.7 $ $Date: 2000/01/18 02:30:49 $
*/
public abstract class Transliterator {
/**
* Direction constant indicating the forward direction in a transliterator,
* e.g., the forward rules of a RuleBasedTransliterator. An "A-B"
* transliterator transliterates A to B when operating in the forward
* direction, and B to A when operating in the reverse direction.
* @see RuleBasedTransliterator
* @see CompoundTransliterator
*/
public static final int FORWARD = 0;
/**
* Direction constant indicating the reverse direction in a transliterator,
* e.g., the reverse rules of a RuleBasedTransliterator. An "A-B"
* transliterator transliterates A to B when operating in the forward
* direction, and B to A when operating in the reverse direction.
* @see RuleBasedTransliterator
* @see CompoundTransliterator
*/
public static final int REVERSE = 1;
/**
* In the <code>keyboardTransliterate()</code>
* <code>index[]</code> array, the beginning index, inclusive
@ -260,6 +280,8 @@ public abstract class Transliterator {
*/
private static Hashtable cache;
private static Hashtable displayNameCache;
/**
* Internal object used to stand for instances of
* <code>RuleBasedTransliterator</code> that have not been
@ -649,6 +671,12 @@ public abstract class Transliterator {
ResourceBundle bundle = ResourceBundle.getBundle(
RB_LOCALE_ELEMENTS, inLocale);
// Use the registered display name, if any
String n = (String) displayNameCache.get(ID);
if (n != null) {
return n;
}
// Use display name for the entire transliterator, if it
// exists.
try {
@ -705,6 +733,43 @@ public abstract class Transliterator {
this.filter = filter;
}
/**
* Returns a <code>Transliterator</code> object given its ID.
* The ID must be either a system transliterator ID or a ID registered
* using <code>registerInstance()</code>.
*
* @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code>
* @return A <code>Transliterator</code> object with the given ID
* @exception IllegalArgumentException if the given ID is invalid.
* @see #registerInstance
* @see #getAvailableIDs
* @see #getID
*/
// changed MED
public static Transliterator getInstance(String ID, int direction) {
if (ID.indexOf(';') >= 0) {
return new CompoundTransliterator(ID, direction, null);
}
if (direction == REVERSE) {
int i = ID.indexOf('-');
if (i < 0) {
throw new IllegalArgumentException("No inverse for: "
+ ID);
}
ID = ID.substring(i+1) + '-' + ID.substring(0, i);
}
Transliterator t = internalGetInstance(ID);
if (t != null) {
return t;
}
throw new IllegalArgumentException("Unsupported transliterator: "
+ ID);
}
public static final Transliterator getInstance(String ID) {
return getInstance(ID, FORWARD);
}
/**
* Returns this transliterator's inverse. See the class
* documentation for details. This implementation simply inverts
@ -725,36 +790,10 @@ public abstract class Transliterator {
* transliterator is registered.
* @see #registerInstance
*/
public Transliterator getInverse() {
int i = ID.indexOf('-');
if (i >= 0) {
String inverseID = ID.substring(i+1) + '-' + ID.substring(0, i);
return internalGetInstance(inverseID);
}
return null;
public final Transliterator getInverse() {
return getInstance(ID, REVERSE);
}
/**
* Returns a <code>Transliterator</code> object given its ID.
* The ID must be either a system transliterator ID or a ID registered
* using <code>registerInstance()</code>.
*
* @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code>
* @return A <code>Transliterator</code> object with the given ID
* @exception IllegalArgumentException if the given ID is invalid.
* @see #registerInstance
* @see #getAvailableIDs
* @see #getID
*/
public static Transliterator getInstance(String ID) {
Transliterator t = internalGetInstance(ID);
if (t != null) {
return t;
}
throw new IllegalArgumentException("Unsupported transliterator: "
+ ID);
}
/**
* Returns a transliterator object given its ID. Unlike getInstance(),
* this method returns null if it cannot make use of the given ID.
@ -828,8 +867,11 @@ public abstract class Transliterator {
* @see #registerInstance
* @see #unregister
*/
public static void registerClass(String ID, Class transClass) {
cache.put(ID, transClass);
public static void registerClass(String ID, Class transClass, String displayName) {
cache.put(ID, transClass);
if (displayName != null) {
displayNameCache.put(ID, displayName);
}
}
/**
@ -843,6 +885,7 @@ public abstract class Transliterator {
* @see #registerClass
*/
public static Object unregister(String ID) {
displayNameCache.remove(ID);
return cache.remove(ID);
}
@ -868,6 +911,7 @@ public abstract class Transliterator {
String[] ruleBasedIDs = bundle.getStringArray(RB_RULE_BASED_IDS);
cache = new Hashtable();
displayNameCache = new Hashtable();
for (int i=0; i<ruleBasedIDs.length; ++i) {
String ID = ruleBasedIDs[i];
@ -881,11 +925,16 @@ public abstract class Transliterator {
} catch (MissingResourceException e) {}
// Register non-rule-based transliterators
registerClass(HangulJamoTransliterator._ID,
HangulJamoTransliterator.class, null);
registerClass(JamoHangulTransliterator._ID,
JamoHangulTransliterator.class, null);
registerClass(HexToUnicodeTransliterator._ID,
HexToUnicodeTransliterator.class);
HexToUnicodeTransliterator.class, null);
registerClass(UnicodeToHexTransliterator._ID,
UnicodeToHexTransliterator.class);
UnicodeToHexTransliterator.class, null);
registerClass(NullTransliterator._ID,
NullTransliterator.class);
NullTransliterator.class, null);
}
}

View File

@ -23,7 +23,7 @@ import java.util.Vector;
* <p>Copyright &copy; IBM Corporation 1999. All rights reserved.
*
* @author Alan Liu
* @version $RCSfile: CompoundTransliterator.java,v $ $Revision: 1.1 $ $Date: 1999/12/20 18:29:21 $
* @version $RCSfile: CompoundTransliterator.java,v $ $Revision: 1.2 $ $Date: 2000/01/18 02:30:49 $
*/
public class CompoundTransliterator extends Transliterator {
@ -46,9 +46,9 @@ public class CompoundTransliterator extends Transliterator {
* altered by this transliterator. If <tt>filter</tt> is
* <tt>null</tt> then no filtering is applied.
*/
public CompoundTransliterator(String ID, Transliterator[] transliterators,
public CompoundTransliterator(Transliterator[] transliterators,
UnicodeFilter filter) {
super(ID, filter);
super(joinIDs(transliterators), filter);
trans = new Transliterator[transliterators.length];
System.arraycopy(transliterators, 0, trans, 0, trans.length);
}
@ -61,9 +61,81 @@ public class CompoundTransliterator extends Transliterator {
* @param transliterators array of <code>Transliterator</code>
* objects
*/
public CompoundTransliterator(String ID, Transliterator[] transliterators) {
this(ID, transliterators, null);
public CompoundTransliterator(Transliterator[] transliterators) {
this(transliterators, null);
}
/**
* Splits an ID of the form "ID;ID;..." into a compound using each
* of the IDs.
* @param ID of above form
* @param forward if false, does the list in reverse order, and
* takes the inverse of each ID.
*/
public CompoundTransliterator(String ID, int direction,
UnicodeFilter filter) {
// changed MED
// Later, add "rule1[filter];rule2...
super(ID, filter);
String[] list = split(ID, ';');
trans = new Transliterator[list.length];
for (int i = 0; i < list.length; ++i) {
trans[i] = getInstance(list[direction==FORWARD ? i : (list.length-1-i)],
direction);
}
}
public CompoundTransliterator(String ID, int direction) {
this(ID, direction, null);
}
public CompoundTransliterator(String ID) {
this(ID, FORWARD, null);
}
/**
* Return the IDs of the given list of transliterators, concatenated
* with ';' delimiting them. Equivalent to the perlish expression
* join(';', map($_.getID(), transliterators).
*/
private static String joinIDs(Transliterator[] transliterators) {
StringBuffer id = new StringBuffer();
for (int i=0; i<transliterators.length; ++i) {
if (i > 0) {
id.append(';');
}
id.append(transliterators[i].getID());
}
return id.toString();
}
/**
* Splits a string, as in JavaScript
*/
private static String[] split(String s, char divider) {
// changed MED
// see how many there are
int count = 1;
for (int i = 0; i < s.length(); ++i) {
if (s.charAt(i) == divider) ++count;
}
// make an array with them
String[] result = new String[count];
int last = 0;
int current = 0;
int i;
for (i = 0; i < s.length(); ++i) {
if (s.charAt(i) == divider) {
result[current++] = s.substring(last,i);
last = i+1;
}
}
result[current++] = s.substring(last,i);
return result;
}
/**
* Returns the number of transliterators in this chain.

View File

@ -0,0 +1,107 @@
package com.ibm.text;
import java.util.*;
/**
* A transliterator that converts Hangul to Jamo
*
* <p>Copyright &copy; IBM Corporation 1999. All rights reserved.
*
* @author Mark Davis
* @version $RCSfile: HangulJamoTransliterator.java,v $ $Revision: 1.1 $ $Date: 2000/01/18 02:30:49 $
*/
public class HangulJamoTransliterator extends Transliterator {
private static final String COPYRIGHT =
"\u00A9 IBM Corporation 1999. All rights reserved.";
/**
* Package accessible ID for this transliterator.
*/
static String _ID = "Hangul-Jamo";
/**
* Constructs a transliterator.
*/
public HangulJamoTransliterator() {
super(_ID, null);
}
/**
* Transliterates a segment of a string. <code>Transliterator</code> API.
* @param text the string to be transliterated
* @param start the beginning index, inclusive; <code>0 <= start
* <= limit</code>.
* @param limit the ending index, exclusive; <code>start <= limit
* <= text.length()</code>.
* @return the new limit index
*/
public int transliterate(Replaceable text, int start, int limit) {
int[] offsets = { start, limit, start };
handleKeyboardTransliterate(text, offsets);
return offsets[LIMIT];
}
/**
* Implements {@link Transliterator#handleKeyboardTransliterate}.
*/
protected void handleKeyboardTransliterate(Replaceable text,
int[] offsets) {
int cursor = offsets[CURSOR];
int limit = offsets[LIMIT];
StringBuffer replacement = new StringBuffer();
while (cursor < limit) {
char c = filteredCharAt(text, cursor);
if (decomposeHangul(c, replacement)) {
text.replace(cursor, cursor+1, replacement.toString());
cursor += replacement.length(); // skip over replacement
limit += replacement.length() - 1; // fix up limit
} else {
++cursor;
}
}
offsets[LIMIT] = limit;
offsets[CURSOR] = cursor;
}
private char filteredCharAt(Replaceable text, int i) {
char c;
UnicodeFilter filter = getFilter();
return (filter == null) ? text.charAt(i) :
(filter.isIn(c = text.charAt(i)) ? c : '\uFFFF');
}
/**
* Return the length of the longest context required by this transliterator.
* This is <em>preceding</em> context.
* @param direction either <code>FORWARD</code> or <code>REVERSE</code>
* @return maximum number of preceding context characters this
* transliterator needs to examine
*/
protected int getMaximumContextLength() {
return 0;
}
static final int
SBase = 0xAC00, LBase = 0x1100, VBase = 0x1161, TBase = 0x11A7,
LCount = 19, VCount = 21, TCount = 28,
NCount = VCount * TCount, // 588
SCount = LCount * NCount; // 11172
public static boolean decomposeHangul(char s, StringBuffer result) {
int SIndex = s - SBase;
if (0 > SIndex || SIndex >= SCount) {
return false;
}
int L = LBase + SIndex / NCount;
int V = VBase + (SIndex % NCount) / TCount;
int T = TBase + SIndex % TCount;
result.setLength(0);
result.append((char)L);
result.append((char)V);
if (T != TBase) result.append((char)T);
return true;
}
}

View File

@ -0,0 +1,130 @@
package com.ibm.text;
import java.util.*;
/**
* A transliterator that converts Jamo to Hangul
*
* <p>Copyright &copy; IBM Corporation 1999. All rights reserved.
*
* @author Mark Davis
*/
public class JamoHangulTransliterator extends Transliterator {
private static final String COPYRIGHT =
"\u00A9 IBM Corporation 1999. All rights reserved.";
/**
* Package accessible ID for this transliterator.
*/
static String _ID = "Jamo-Hangul";
/**
* Constructs a transliterator.
*/
public JamoHangulTransliterator() {
super(_ID, null);
}
/**
* Transliterates a segment of a string. <code>Transliterator</code> API.
* @param text the string to be transliterated
* @param start the beginning index, inclusive; <code>0 <= start
* <= limit</code>.
* @param limit the ending index, exclusive; <code>start <= limit
* <= text.length()</code>.
* @return the new limit index
*/
public int transliterate(Replaceable text, int start, int limit) {
int[] offsets = { start, limit, start };
handleKeyboardTransliterate(text, offsets);
return offsets[LIMIT];
}
/**
* Implements {@link Transliterator#handleKeyboardTransliterate}.
*/
protected void handleKeyboardTransliterate(Replaceable text,
int[] offsets) {
/**
* Performs transliteration changing Jamo to Hangul
*/
int cursor = offsets[CURSOR];
int limit = offsets[LIMIT];
if (cursor >= limit) return;
// get last character
char last = filteredCharAt(text, cursor++);
// testing
if (limit - cursor > 2) {
last = (char)(last + 0);
}
loop:
while (cursor < limit) {
char c = filteredCharAt(text, cursor);
char replacement = composeHangul(last, c);
if (replacement != 0) {
text.replace(cursor-1, cursor+1, String.valueOf(replacement));
last = replacement;
// leave cursor where it is
--limit; // fix up limit
} else {
++cursor;
}
}
offsets[LIMIT] = limit + 1;
offsets[CURSOR] = cursor;
}
private char filteredCharAt(Replaceable text, int i) {
char c;
UnicodeFilter filter = getFilter();
return (filter == null) ? text.charAt(i) :
(filter.isIn(c = text.charAt(i)) ? c : '\uFFFF');
}
/**
* Return the length of the longest context required by this transliterator.
* This is <em>preceding</em> context.
* @param direction either <code>FORWARD</code> or <code>REVERSE</code>
* @return maximum number of preceding context characters this
* transliterator needs to examine
*/
protected int getMaximumContextLength() {
return 3;
}
static final int
SBase = 0xAC00, LBase = 0x1100, VBase = 0x1161, TBase = 0x11A7,
LCount = 19, VCount = 21, TCount = 28,
NCount = VCount * TCount, // 588
SCount = LCount * NCount; // 11172
/**
* Return composed character (if it composes)
* 0 otherwise
*/
public static char composeHangul(char last, char ch) {
// check to see if two current characters are L and V
int LIndex = last - LBase;
if (0 <= LIndex && LIndex < LCount) {
int VIndex = ch - VBase;
if (0 <= VIndex && VIndex < VCount) {
// make syllable of form LV
return (char)(SBase + (LIndex * VCount + VIndex) * TCount);
}
}
// check to see if two current characters are LV and T
int SIndex = last - SBase;
if (0 <= SIndex && SIndex < SCount && (SIndex % TCount) == 0) {
int TIndex = ch - TBase;
if (0 <= TIndex && TIndex <= TCount) {
// make syllable of form LVT
return (char)(last + TIndex);
}
}
// if neither case was true, skip
return '\u0000';
}
}

View File

@ -196,9 +196,12 @@ import java.text.ParsePosition;
* <p>Copyright (c) IBM Corporation 1999-2000. All rights reserved.</p>
*
* @author Alan Liu
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.10 $ $Date: 2000/01/13 23:53:23 $
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.11 $ $Date: 2000/01/18 02:30:49 $
*
* $Log: RuleBasedTransliterator.java,v $
* Revision 1.11 2000/01/18 02:30:49 Alan
* Add Jamo-Hangul, Hangul-Jamo, fix rules, add compound ID support
*
* Revision 1.10 2000/01/13 23:53:23 Alan
* Fix bugs found during ICU port
*
@ -221,17 +224,6 @@ import java.text.ParsePosition;
* Improve masking checking; turn it off by default, for better performance
*/
public class RuleBasedTransliterator extends Transliterator {
/**
* Direction constant passed to constructor to create a transliterator
* using the forward rules.
*/
public static final int FORWARD = 0;
/**
* Direction constant passed to constructor to create a transliterator
* using the reverse rules.
*/
public static final int REVERSE = 1;
private Data data;

View File

@ -198,9 +198,29 @@ import java.text.MessageFormat;
* <p>Copyright &copy; IBM Corporation 1999. All rights reserved.
*
* @author Alan Liu
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.6 $ $Date: 2000/01/06 17:38:25 $
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.7 $ $Date: 2000/01/18 02:30:49 $
*/
public abstract class Transliterator {
/**
* Direction constant indicating the forward direction in a transliterator,
* e.g., the forward rules of a RuleBasedTransliterator. An "A-B"
* transliterator transliterates A to B when operating in the forward
* direction, and B to A when operating in the reverse direction.
* @see RuleBasedTransliterator
* @see CompoundTransliterator
*/
public static final int FORWARD = 0;
/**
* Direction constant indicating the reverse direction in a transliterator,
* e.g., the reverse rules of a RuleBasedTransliterator. An "A-B"
* transliterator transliterates A to B when operating in the forward
* direction, and B to A when operating in the reverse direction.
* @see RuleBasedTransliterator
* @see CompoundTransliterator
*/
public static final int REVERSE = 1;
/**
* In the <code>keyboardTransliterate()</code>
* <code>index[]</code> array, the beginning index, inclusive
@ -260,6 +280,8 @@ public abstract class Transliterator {
*/
private static Hashtable cache;
private static Hashtable displayNameCache;
/**
* Internal object used to stand for instances of
* <code>RuleBasedTransliterator</code> that have not been
@ -649,6 +671,12 @@ public abstract class Transliterator {
ResourceBundle bundle = ResourceBundle.getBundle(
RB_LOCALE_ELEMENTS, inLocale);
// Use the registered display name, if any
String n = (String) displayNameCache.get(ID);
if (n != null) {
return n;
}
// Use display name for the entire transliterator, if it
// exists.
try {
@ -705,6 +733,43 @@ public abstract class Transliterator {
this.filter = filter;
}
/**
* Returns a <code>Transliterator</code> object given its ID.
* The ID must be either a system transliterator ID or a ID registered
* using <code>registerInstance()</code>.
*
* @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code>
* @return A <code>Transliterator</code> object with the given ID
* @exception IllegalArgumentException if the given ID is invalid.
* @see #registerInstance
* @see #getAvailableIDs
* @see #getID
*/
// changed MED
public static Transliterator getInstance(String ID, int direction) {
if (ID.indexOf(';') >= 0) {
return new CompoundTransliterator(ID, direction, null);
}
if (direction == REVERSE) {
int i = ID.indexOf('-');
if (i < 0) {
throw new IllegalArgumentException("No inverse for: "
+ ID);
}
ID = ID.substring(i+1) + '-' + ID.substring(0, i);
}
Transliterator t = internalGetInstance(ID);
if (t != null) {
return t;
}
throw new IllegalArgumentException("Unsupported transliterator: "
+ ID);
}
public static final Transliterator getInstance(String ID) {
return getInstance(ID, FORWARD);
}
/**
* Returns this transliterator's inverse. See the class
* documentation for details. This implementation simply inverts
@ -725,36 +790,10 @@ public abstract class Transliterator {
* transliterator is registered.
* @see #registerInstance
*/
public Transliterator getInverse() {
int i = ID.indexOf('-');
if (i >= 0) {
String inverseID = ID.substring(i+1) + '-' + ID.substring(0, i);
return internalGetInstance(inverseID);
}
return null;
public final Transliterator getInverse() {
return getInstance(ID, REVERSE);
}
/**
* Returns a <code>Transliterator</code> object given its ID.
* The ID must be either a system transliterator ID or a ID registered
* using <code>registerInstance()</code>.
*
* @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code>
* @return A <code>Transliterator</code> object with the given ID
* @exception IllegalArgumentException if the given ID is invalid.
* @see #registerInstance
* @see #getAvailableIDs
* @see #getID
*/
public static Transliterator getInstance(String ID) {
Transliterator t = internalGetInstance(ID);
if (t != null) {
return t;
}
throw new IllegalArgumentException("Unsupported transliterator: "
+ ID);
}
/**
* Returns a transliterator object given its ID. Unlike getInstance(),
* this method returns null if it cannot make use of the given ID.
@ -828,8 +867,11 @@ public abstract class Transliterator {
* @see #registerInstance
* @see #unregister
*/
public static void registerClass(String ID, Class transClass) {
cache.put(ID, transClass);
public static void registerClass(String ID, Class transClass, String displayName) {
cache.put(ID, transClass);
if (displayName != null) {
displayNameCache.put(ID, displayName);
}
}
/**
@ -843,6 +885,7 @@ public abstract class Transliterator {
* @see #registerClass
*/
public static Object unregister(String ID) {
displayNameCache.remove(ID);
return cache.remove(ID);
}
@ -868,6 +911,7 @@ public abstract class Transliterator {
String[] ruleBasedIDs = bundle.getStringArray(RB_RULE_BASED_IDS);
cache = new Hashtable();
displayNameCache = new Hashtable();
for (int i=0; i<ruleBasedIDs.length; ++i) {
String ID = ruleBasedIDs[i];
@ -881,11 +925,16 @@ public abstract class Transliterator {
} catch (MissingResourceException e) {}
// Register non-rule-based transliterators
registerClass(HangulJamoTransliterator._ID,
HangulJamoTransliterator.class, null);
registerClass(JamoHangulTransliterator._ID,
JamoHangulTransliterator.class, null);
registerClass(HexToUnicodeTransliterator._ID,
HexToUnicodeTransliterator.class);
HexToUnicodeTransliterator.class, null);
registerClass(UnicodeToHexTransliterator._ID,
UnicodeToHexTransliterator.class);
UnicodeToHexTransliterator.class, null);
registerClass(NullTransliterator._ID,
NullTransliterator.class);
NullTransliterator.class, null);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -11,9 +11,11 @@ public class TransliterationRule$Latin$Jamo extends ListResourceBundle {
// VARIABLES
+ "initial=[\u1100-\u115F];"
+ "medial=[\u1160-\u11A7];"
+ "final=[\u11A8-\u11F9];" // added - aliu
+ "vowel=[aeiouwyAEIOUWY\u1160-\u11A7];"
+ "consonant=[bcdfghjklmnpqrstvxzBCDFGHJKLMNPQRSTVXZ{medial}{final}];"
+ "ye=[yeYE];"
+ "ywe=[yweYWE];"
+ "yw=[ywYW];"
@ -44,10 +46,13 @@ public class TransliterationRule$Latin$Jamo extends ListResourceBundle {
// X[{vowel}>CHOSEONG (initial)
// X>JONGSEONG (final)
// special insertion for funny sequences of vowels
+ "({medial}) ({vowel}) > \u110B;" // HANGUL CHOSEONG IEUNG
// special insertion for funny sequences of vowels, and for empty consonant
+ "'' < ({consonant}) \u110B;" // insert a break between any consonant and the empty consonant.
+ "({medial}) ({vowel}) <> \u110B;" // HANGUL CHOSEONG IEUNG
// Below, insert an empty consonant in front of a vowel, if there is no Initial in front.
// Fix casing.
// Because Korean is caseless, we just want to treat everything as
// lowercase.
@ -169,7 +174,7 @@ public class TransliterationRule$Latin$Jamo extends ListResourceBundle {
+ "c ({vowel}) <> \u110e;" // hangul choseong chieuch
+ "bb ({vowel}) <> \u1108;" // hangul choseong ssangpieup
+ "b ({vowel}) <> \u1107;" // hangul choseong pieup
// If we have gotten through to these rules, and we start with
// a consonant, then the remaining mappings would be to F,
// because must have CC (or C<non-letter>), not CV.
@ -195,27 +200,61 @@ public class TransliterationRule$Latin$Jamo extends ListResourceBundle {
+ "({final}) bb > \u1108\u116e;" // hangul choseong ssangpieup
+ "({final}) b > \u1107\u116e;" // hangul choseong pieup
// MEDIALS (vowels) and FINALS
// MEDIALS after INITIALS
+ "({initial}) yu <> \u1172;" // hangul jungseong yu
+ "({initial}) yo <> \u116d;" // hangul jungseong yo
+ "({initial}) yi <> \u1174;" // hangul jungseong yi
+ "({initial}) yeo <> \u1167;" // hangul jungseong yeo
+ "({initial}) ye <> \u1168;" // hangul jungseong ye
+ "({initial}) yae <> \u1164;" // hangul jungseong yae
+ "({initial}) ya <> \u1163;" // hangul jungseong ya
+ "({initial}) wi <> \u1171;" // hangul jungseong wi
+ "({initial}) weo <> \u116f;" // hangul jungseong weo
+ "({initial}) we <> \u1170;" // hangul jungseong we
+ "({initial}) wae <> \u116b;" // hangul jungseong wae
+ "({initial}) wa <> \u116a;" // hangul jungseong wa
+ "({initial}) u <> \u116e;" // hangul jungseong u
+ "({initial}) oe <> \u116c;" // hangul jungseong oe
+ "({initial}) o <> \u1169;" // hangul jungseong o
+ "({initial}) i <> \u1175;" // hangul jungseong i
+ "({initial}) eu <> \u1173;" // hangul jungseong eu
+ "({initial}) eo <> \u1165;" // hangul jungseong eo
+ "({initial}) e <> \u1166;" // hangul jungseong e
+ "({initial}) ae <> \u1162;" // hangul jungseong ae
+ "({initial}) a <> \u1161;" // hangul jungseong a
// MEDIALS (vowels) not after INITIALs
+ "yu > \u110B\u1172;" // hangul jungseong yu
+ "yo > \u110B\u116d;" // hangul jungseong yo
+ "yi > \u110B\u1174;" // hangul jungseong yi
+ "yeo > \u110B\u1167;" // hangul jungseong yeo
+ "ye > \u110B\u1168;" // hangul jungseong ye
+ "yae > \u110B\u1164;" // hangul jungseong yae
+ "ya > \u110B\u1163;" // hangul jungseong ya
+ "wi > \u110B\u1171;" // hangul jungseong wi
+ "weo > \u110B\u116f;" // hangul jungseong weo
+ "we > \u110B\u1170;" // hangul jungseong we
+ "wae > \u110B\u116b;" // hangul jungseong wae
+ "wa > \u110B\u116a;" // hangul jungseong wa
+ "u > \u110B\u116e;" // hangul jungseong u
+ "oe > \u110B\u116c;" // hangul jungseong oe
+ "o > \u110B\u1169;" // hangul jungseong o
+ "i > \u110B\u1175;" // hangul jungseong i
+ "eu > \u110B\u1173;" // hangul jungseong eu
+ "eo > \u110B\u1165;" // hangul jungseong eo
+ "e > \u110B\u1166;" // hangul jungseong e
+ "ae > \u110B\u1162;" // hangul jungseong ae
+ "a > \u110B\u1161;" // hangul jungseong a
+ "yu <> \u1172;" // hangul jungseong yu
+ "yo <> \u116d;" // hangul jungseong yo
+ "yi <> \u1174;" // hangul jungseong yi
+ "yeo <> \u1167;" // hangul jungseong yeo
+ "ye <> \u1168;" // hangul jungseong ye
+ "yae <> \u1164;" // hangul jungseong yae
+ "ya <> \u1163;" // hangul jungseong ya
+ "wi <> \u1171;" // hangul jungseong wi
+ "weo <> \u116f;" // hangul jungseong weo
+ "we <> \u1170;" // hangul jungseong we
+ "wae <> \u116b;" // hangul jungseong wae
+ "wa <> \u116a;" // hangul jungseong wa
+ "u <> \u116e;" // hangul jungseong u
// FINALS
+ "t <> \u11c0;" // hangul jongseong thieuth
+ "ss <> \u11bb;" // hangul jongseong ssangsios
+ "s <> \u11ba;" // hangul jongseong sios
+ "p <> \u11c1;" // hangul jongseong phieuph
+ "oe <> \u116c;" // hangul jungseong oe
+ "o <> \u1169;" // hangul jungseong o
+ "nj <> \u11ac;" // hangul jongseong nieun-cieuc
+ "nh <> \u11ad;" // hangul jongseong nieun-hieuh
+ "ng <> \u11bc;" // hangul jongseong ieung
@ -232,21 +271,15 @@ public class TransliterationRule$Latin$Jamo extends ListResourceBundle {
+ "k <> \u11bf;" // hangul jongseong khieukh
+ "jj <> \u110d;" // hangul choseong ssangcieuc
+ "j <> \u11bd;" // hangul jongseong cieuc
+ "i <> \u1175;" // hangul jungseong i
+ "h <> \u11c2;" // hangul jongseong hieuh
+ "gs <> \u11aa;" // hangul jongseong kiyeok-sios
+ "gg <> \u11a9;" // hangul jongseong ssangkiyeok
+ "g <> \u11a8;" // hangul jongseong kiyeok
+ "eu <> \u1173;" // hangul jungseong eu
+ "eo <> \u1165;" // hangul jungseong eo
+ "e <> \u1166;" // hangul jungseong e
+ "dd <> \u1104;" // hangul choseong ssangtikeut
+ "d <> \u11ae;" // hangul jongseong tikeut
+ "c <> \u11be;" // hangul jongseong chieuch
+ "bs <> \u11b9;" // hangul jongseong pieup-sios
+ "b <> \u11b8;" // hangul jongseong pieup
+ "ae <> \u1162;" // hangul jungseong ae
+ "a <> \u1161;" // hangul jungseong a
// extra English letters
// {moved to bottom - aliu}
@ -265,6 +298,10 @@ public class TransliterationRule$Latin$Jamo extends ListResourceBundle {
+ "F > |p;"
//{ + "c > |k;" } masked
+ "C > |k;"
+ "y > \u1172;" // hangul jungseong yu
+ "w > \u1171;" // hangul jungseong wi
// ====================================
// Normal final rule: remove '