Modify rule syntax

X-SVN-Rev: 1210
This commit is contained in:
Alan Liu 2000-04-21 21:17:08 +00:00
parent 13de7186f7
commit 2b1cdd4f74
16 changed files with 18309 additions and 18170 deletions

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/RuleBasedTransliterator.java,v $
* $Date: 2000/04/19 17:35:23 $
* $Revision: 1.20 $
* $Date: 2000/04/21 21:16:40 $
* $Revision: 1.21 $
*
*****************************************************************************************
*/
@ -274,7 +274,7 @@ import com.ibm.util.Utility;
* <p>Copyright (c) IBM Corporation 1999-2000. All rights reserved.</p>
*
* @author Alan Liu
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.20 $ $Date: 2000/04/19 17:35:23 $
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.21 $ $Date: 2000/04/21 21:16:40 $
*/
public class RuleBasedTransliterator extends Transliterator {
@ -559,10 +559,20 @@ public class RuleBasedTransliterator extends Transliterator {
/**
* The last available stand-in for variables. This is discovered
* dynamically. At any point during parsing, available variables are
* <code>variableNext..variableLimit-1</code>.
* <code>variableNext..variableLimit-1</code>. During variable definition
* we use the special value variableLimit-1 as a placeholder.
*/
private char variableLimit;
/**
* When we encounter an undefined variable, we do not immediately signal
* an error, in case we are defining this variable, e.g., "$a = [a-z];".
* Instead, we save the name of the undefined variable, and substitute
* in the placeholder char variableLimit - 1, and decrement
* variableLimit.
*/
private String undefinedVariableName;
// Operators
private static final char VARIABLE_DEF_OP = '=';
private static final char FORWARD_RULE_OP = '>';
@ -577,17 +587,15 @@ public class RuleBasedTransliterator extends Transliterator {
private static final char END_OF_RULE = ';';
private static final char RULE_COMMENT_CHAR = '#';
private static final char VARIABLE_REF_OPEN = '{';
private static final char VARIABLE_REF_CLOSE = '}';
private static final char CONTEXT_OPEN = '(';
private static final char CONTEXT_CLOSE = ')';
private static final char VARIABLE_REF = '$'; // also segment refs
private static final char CONTEXT_ANTE = '{'; // ante{key
private static final char CONTEXT_POST = '}'; // key}post
private static final char SET_OPEN = '[';
private static final char SET_CLOSE = ']';
private static final char CURSOR_POS = '|';
// Segments of the input string are delimited by "$(" and "$)". In the
// output string these segments are referenced as "$1" through "$9".
private static final char SEGMENT_REF = '$';
private static final char SEGMENT_OPEN = '(';
private static final char SEGMENT_CLOSE = ')';
@ -703,7 +711,6 @@ public class RuleBasedTransliterator extends Transliterator {
RuleBasedTransliterator.Parser parser) {
int start = pos;
StringBuffer buf = new StringBuffer();
int postClose = -1; // position of post context close ')' in text
main:
while (pos < limit) {
@ -756,18 +763,11 @@ public class RuleBasedTransliterator extends Transliterator {
--pos; // Backup to point to operator
break main;
}
// Handle segment definitions "$(" ")$" and references "$1"
// .. "$9".
if (c == SEGMENT_REF) {
// After a SEGMENT_REF, must see SEGMENT_OPEN,
// SEGMENT_CLOSE, or a digit 1 to 9, with no intervening
// whitespace
if (pos == limit) {
syntaxError("Trailing " + c, rule, start);
}
c = rule.charAt(pos++);
if (c == SEGMENT_OPEN || c == SEGMENT_CLOSE) {
// Parse "$(", "$)"
switch (c) {
case SEGMENT_OPEN:
case SEGMENT_CLOSE:
// Handle segment definitions "(" and ")"
// Parse "(", ")"
if (segments == null) {
segments = new Vector();
}
@ -777,65 +777,63 @@ public class RuleBasedTransliterator extends Transliterator {
rule, start);
}
segments.addElement(new Integer(buf.length()));
} else {
// Parse "$1" "$2" .. "$9"
int r = Character.digit(c, 10);
if (r < 1 || r > 9) {
syntaxError("Illegal char after " + SEGMENT_REF,
rule, start);
break;
case END_OF_RULE:
--pos; // Backup to point to END_OF_RULE
break main;
case VARIABLE_REF:
// Handle variable references and segment references "$1" .. "$9"
{
// A variable reference must be followed immediately
// by a Unicode identifier start and zero or more
// Unicode identifier part characters, or by a digit
// 1..9 if it is a segment reference.
if (pos == limit) {
syntaxError("Trailing " + c, rule, start);
}
// Parse "$1" "$2" .. "$9"
c = rule.charAt(pos++);
int r = Character.digit(c, 10);
if (r >= 1 && r <= 9) {
if (r > maxRef) {
maxRef = r;
}
buf.append((char) (parser.data.segmentBase + r - 1));
} else if (Character.isUnicodeIdentifierStart(c)) {
int j = pos;
while (j < limit &&
Character.isUnicodeIdentifierPart(rule.charAt(j))) {
++j;
}
continue;
}
switch (c) {
case END_OF_RULE:
--pos; // Backup to point to END_OF_RULE
break main;
case VARIABLE_REF_OPEN:
{
int j = rule.indexOf(VARIABLE_REF_CLOSE, pos);
if (pos == j || j < 0) { // empty or unterminated
syntaxError("Malformed variable reference", rule, start);
}
String name = rule.substring(pos, j);
pos = j+1;
String name = rule.substring(pos-1, j);
pos = j;
// If this is a variable definition statement, then the LHS
// variable will be undefined. In that case getVariableName()
// will return the special placeholder variableLimit-1.
buf.append(parser.getVariableDef(name));
} else {
syntaxError("Illegal char after " + VARIABLE_REF,
rule, start);
}
}
break;
case CONTEXT_OPEN:
case CONTEXT_ANTE:
if (ante >= 0) {
syntaxError("Multiple ante contexts", rule, start);
}
ante = buf.length();
break;
case CONTEXT_POST:
if (post >= 0) {
syntaxError("Multiple post contexts", rule, start);
}
// Ignore CONTEXT_OPEN if buffer length is zero -- that means
// this is the optional opening delimiter for the ante context.
if (buf.length() > 0) {
post = buf.length();
}
break;
case CONTEXT_CLOSE:
if (postClose >= 0) {
syntaxError("Unexpected " + c, rule, start);
}
if (post >= 0) {
// This is probably the optional closing delimiter
// for the post context; save the pos and check later.
postClose = buf.length();
} else if (ante >= 0) {
syntaxError("Multiple ante contexts", rule, start);
} else {
ante = buf.length();
}
break;
case SET_OPEN:
ParsePosition pp = new ParsePosition(pos-1); // Backup to opening '['
buf.append(parser.registerSet(new UnicodeSet(rule, pp, parser.parseData)));
pos = pp.getIndex();
break;
case VARIABLE_REF_CLOSE:
case SET_CLOSE:
syntaxError("Unquoted " + c, rule, start);
case CURSOR_POS:
@ -850,11 +848,6 @@ public class RuleBasedTransliterator extends Transliterator {
}
}
// Check context close parameters
if (postClose >= 0 && postClose != buf.length()) {
syntaxError("Extra text after ]", rule, start);
}
text = buf.toString();
return pos;
}
@ -906,6 +899,7 @@ public class RuleBasedTransliterator extends Transliterator {
RuleHalf left = new RuleHalf();
RuleHalf right = new RuleHalf();
undefinedVariableName = null;
pos = left.parse(rule, pos, limit, this);
if (pos == limit ||
@ -936,17 +930,31 @@ public class RuleBasedTransliterator extends Transliterator {
// or a set (already parsed). If RHS is longer than one
// character, it is either a multi-character string, or multiple
// sets, or a mixture of chars and sets -- syntax error.
// We expect to see a single undefined variable (the one being
// defined).
if (undefinedVariableName == null) {
syntaxError("Missing '$' or duplicate definition", rule, start);
}
if (left.text.length() != 1 || left.text.charAt(0) != variableLimit) {
syntaxError("Malformed LHS", rule, start);
}
if (right.text.length() != 1) {
syntaxError("Malformed RHS", rule, start);
}
if (data.variableNames.get(left.text) != null) {
syntaxError("Duplicate definition of {" +
left.text + "}", rule, start);
}
data.variableNames.put(left.text, new Character(right.text.charAt(0)));
data.variableNames.put(undefinedVariableName,
new Character(right.text.charAt(0)));
++variableLimit;
return pos;
}
// If this is not a variable definition rule, we shouldn't have
// any undefined variable names.
if (undefinedVariableName != null) {
syntaxError("Undefined variable $" + undefinedVariableName,
rule, start);
}
// If the direction we want doesn't match the rule
// direction, do nothing.
if (operator != FWDREV_RULE_OP &&
@ -1041,7 +1049,18 @@ public class RuleBasedTransliterator extends Transliterator {
private char getVariableDef(String name) {
Character ch = (Character) data.variableNames.get(name);
if (ch == null) {
throw new IllegalArgumentException("Undefined variable: "
// We allow one undefined variable so that variable definition
// statements work. For the first undefined variable we return
// the special placeholder variableLimit-1, and save the variable
// name.
if (undefinedVariableName == null) {
undefinedVariableName = name;
if (variableNext >= variableLimit) {
throw new RuntimeException("Private use variables exhausted");
}
return --variableLimit;
}
throw new IllegalArgumentException("Undefined variable $"
+ name);
}
return ch.charValue();
@ -1210,7 +1229,11 @@ public class RuleBasedTransliterator extends Transliterator {
}
}
/* $Log: RuleBasedTransliterator.java,v $
/**
* $Log: RuleBasedTransliterator.java,v $
* Revision 1.21 2000/04/21 21:16:40 alan
* Modify rule syntax
*
* Revision 1.20 2000/04/19 17:35:23 alan
* Update javadoc; fix compile error
*

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/TransliterationRule.java,v $
* $Date: 2000/04/19 16:34:18 $
* $Revision: 1.16 $
* $Date: 2000/04/21 21:16:40 $
* $Revision: 1.17 $
*
*****************************************************************************************
*/
@ -44,54 +44,7 @@ import com.ibm.util.Utility;
* <p>Copyright &copy; IBM Corporation 1999. All rights reserved.
*
* @author Alan Liu
* @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.16 $ $Date: 2000/04/19 16:34:18 $
*
* $Log: TransliterationRule.java,v $
* Revision 1.16 2000/04/19 16:34:18 alan
* Add segment support.
*
* Revision 1.15 2000/04/12 20:17:45 alan
* Delegate replace operation to rule object
*
* Revision 1.14 2000/03/10 04:07:24 johnf
* Copyright update
*
* Revision 1.13 2000/02/10 07:36:25 johnf
* fixed imports for com.ibm.util.Utility
*
* Revision 1.12 2000/02/03 18:11:19 Alan
* Use array rather than hashtable for char-to-set map
*
* Revision 1.11 2000/01/27 18:59:19 Alan
* Use Position rather than int[] and move all subclass overrides to one method (handleTransliterate)
*
* Revision 1.10 2000/01/18 20:36:17 Alan
* Make UnicodeSet inherit from UnicodeFilter
*
* Revision 1.9 2000/01/18 02:38:55 Alan
* Fix filtering bug.
*
* Revision 1.8 2000/01/13 23:53:23 Alan
* Fix bugs found during ICU port
*
* Revision 1.7 2000/01/11 04:12:06 Alan
* Cleanup, embellish comments
*
* Revision 1.6 2000/01/11 02:25:03 Alan
* Rewrite UnicodeSet and RBT parsers for better performance and new syntax
*
* Revision 1.5 2000/01/04 21:43:57 Alan
* Add rule indexing, and move masking check to TransliterationRuleSet.
*
* Revision 1.4 1999/12/22 01:40:54 Alan
* Consolidate rule pattern anteContext, key, and postContext into one string.
*
* Revision 1.3 1999/12/22 01:05:54 Alan
* Improve masking checking; turn it off by default, for better performance
*
* Revision 1.2 1999/12/21 23:58:44 Alan
* Detect a>x masking a>y
*
* @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.17 $ $Date: 2000/04/21 21:16:40 $
*/
class TransliterationRule {
/**
@ -538,3 +491,54 @@ class TransliterationRule {
keyChar == textChar : set.contains(textChar));
}
}
/**
* $Log: TransliterationRule.java,v $
* Revision 1.17 2000/04/21 21:16:40 alan
* Modify rule syntax
*
* Revision 1.16 2000/04/19 16:34:18 alan
* Add segment support.
*
* Revision 1.15 2000/04/12 20:17:45 alan
* Delegate replace operation to rule object
*
* Revision 1.14 2000/03/10 04:07:24 johnf
* Copyright update
*
* Revision 1.13 2000/02/10 07:36:25 johnf
* fixed imports for com.ibm.util.Utility
*
* Revision 1.12 2000/02/03 18:11:19 Alan
* Use array rather than hashtable for char-to-set map
*
* Revision 1.11 2000/01/27 18:59:19 Alan
* Use Position rather than int[] and move all subclass overrides to one method (handleTransliterate)
*
* Revision 1.10 2000/01/18 20:36:17 Alan
* Make UnicodeSet inherit from UnicodeFilter
*
* Revision 1.9 2000/01/18 02:38:55 Alan
* Fix filtering bug.
*
* Revision 1.8 2000/01/13 23:53:23 Alan
* Fix bugs found during ICU port
*
* Revision 1.7 2000/01/11 04:12:06 Alan
* Cleanup, embellish comments
*
* Revision 1.6 2000/01/11 02:25:03 Alan
* Rewrite UnicodeSet and RBT parsers for better performance and new syntax
*
* Revision 1.5 2000/01/04 21:43:57 Alan
* Add rule indexing, and move masking check to TransliterationRuleSet.
*
* Revision 1.4 1999/12/22 01:40:54 Alan
* Consolidate rule pattern anteContext, key, and postContext into one string.
*
* Revision 1.3 1999/12/22 01:05:54 Alan
* Improve masking checking; turn it off by default, for better performance
*
* Revision 1.2 1999/12/21 23:58:44 Alan
* Detect a>x masking a>y
*/

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/UnicodeSet.java,v $
* $Date: 2000/03/10 04:07:25 $
* $Revision: 1.16 $
* $Date: 2000/04/21 21:16:40 $
* $Revision: 1.17 $
*
*****************************************************************************************
*/
@ -241,7 +241,7 @@ import java.text.*;
* *Unsupported by Java (and hence unsupported by UnicodeSet).
*
* @author Alan Liu
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.16 $ $Date: 2000/03/10 04:07:25 $
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.17 $ $Date: 2000/04/21 21:16:40 $
*/
public class UnicodeSet implements UnicodeFilter {
/**
@ -887,6 +887,47 @@ public class UnicodeSet implements UnicodeFilter {
i = j; // Make i point at closing '}'
}
/* Parse variable references. These are treated as literals. If a
* variable refers to a UnicodeSet, nestedPairs is assigned here.
* Variable names are only parsed if varNameToChar is not null.
* Set variables are only looked up if varCharToSet is not null.
*/
// TEMPORARY
// TEMPORARY
// TEMPORARY
else if (symbols != null && !isLiteral && c == '$') {
++i;
c = pattern.charAt(i);
int j = i;
if (Character.isUnicodeIdentifierStart(c)) {
++j;
while (j < limit &&
Character.isUnicodeIdentifierPart(pattern.charAt(j))) {
++j;
}
}
if (i == j || j < 0) { // empty or unterminated
throw new IllegalArgumentException("Illegal variable reference " +
pattern.substring(i-1, limit));
}
String name = pattern.substring(i, j);
Object obj = symbols.lookup(name);
if (obj == null) {
throw new IllegalArgumentException("Undefined variable: "
+ name);
}
isLiteral = true;
if (obj instanceof Character) {
c = ((Character) obj).charValue();
} else {
nestedPairs = ((UnicodeSet) obj).pairs.toString();
}
i = j-1; // Make i point at last char of var name
}
// TEMPORARY
// TEMPORARY
// TEMPORARY
/* An opening bracket indicates the first bracket of a nested
* subpattern, either a normal pattern or a category pattern. We
* recognize these here and set nestedPairs accordingly.

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/RuleBasedTransliterator.java,v $
* $Date: 2000/04/19 17:35:23 $
* $Revision: 1.20 $
* $Date: 2000/04/21 21:16:40 $
* $Revision: 1.21 $
*
*****************************************************************************************
*/
@ -274,7 +274,7 @@ import com.ibm.util.Utility;
* <p>Copyright (c) IBM Corporation 1999-2000. All rights reserved.</p>
*
* @author Alan Liu
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.20 $ $Date: 2000/04/19 17:35:23 $
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.21 $ $Date: 2000/04/21 21:16:40 $
*/
public class RuleBasedTransliterator extends Transliterator {
@ -559,10 +559,20 @@ public class RuleBasedTransliterator extends Transliterator {
/**
* The last available stand-in for variables. This is discovered
* dynamically. At any point during parsing, available variables are
* <code>variableNext..variableLimit-1</code>.
* <code>variableNext..variableLimit-1</code>. During variable definition
* we use the special value variableLimit-1 as a placeholder.
*/
private char variableLimit;
/**
* When we encounter an undefined variable, we do not immediately signal
* an error, in case we are defining this variable, e.g., "$a = [a-z];".
* Instead, we save the name of the undefined variable, and substitute
* in the placeholder char variableLimit - 1, and decrement
* variableLimit.
*/
private String undefinedVariableName;
// Operators
private static final char VARIABLE_DEF_OP = '=';
private static final char FORWARD_RULE_OP = '>';
@ -577,17 +587,15 @@ public class RuleBasedTransliterator extends Transliterator {
private static final char END_OF_RULE = ';';
private static final char RULE_COMMENT_CHAR = '#';
private static final char VARIABLE_REF_OPEN = '{';
private static final char VARIABLE_REF_CLOSE = '}';
private static final char CONTEXT_OPEN = '(';
private static final char CONTEXT_CLOSE = ')';
private static final char VARIABLE_REF = '$'; // also segment refs
private static final char CONTEXT_ANTE = '{'; // ante{key
private static final char CONTEXT_POST = '}'; // key}post
private static final char SET_OPEN = '[';
private static final char SET_CLOSE = ']';
private static final char CURSOR_POS = '|';
// Segments of the input string are delimited by "$(" and "$)". In the
// output string these segments are referenced as "$1" through "$9".
private static final char SEGMENT_REF = '$';
private static final char SEGMENT_OPEN = '(';
private static final char SEGMENT_CLOSE = ')';
@ -703,7 +711,6 @@ public class RuleBasedTransliterator extends Transliterator {
RuleBasedTransliterator.Parser parser) {
int start = pos;
StringBuffer buf = new StringBuffer();
int postClose = -1; // position of post context close ')' in text
main:
while (pos < limit) {
@ -756,18 +763,11 @@ public class RuleBasedTransliterator extends Transliterator {
--pos; // Backup to point to operator
break main;
}
// Handle segment definitions "$(" ")$" and references "$1"
// .. "$9".
if (c == SEGMENT_REF) {
// After a SEGMENT_REF, must see SEGMENT_OPEN,
// SEGMENT_CLOSE, or a digit 1 to 9, with no intervening
// whitespace
if (pos == limit) {
syntaxError("Trailing " + c, rule, start);
}
c = rule.charAt(pos++);
if (c == SEGMENT_OPEN || c == SEGMENT_CLOSE) {
// Parse "$(", "$)"
switch (c) {
case SEGMENT_OPEN:
case SEGMENT_CLOSE:
// Handle segment definitions "(" and ")"
// Parse "(", ")"
if (segments == null) {
segments = new Vector();
}
@ -777,65 +777,63 @@ public class RuleBasedTransliterator extends Transliterator {
rule, start);
}
segments.addElement(new Integer(buf.length()));
} else {
// Parse "$1" "$2" .. "$9"
int r = Character.digit(c, 10);
if (r < 1 || r > 9) {
syntaxError("Illegal char after " + SEGMENT_REF,
rule, start);
break;
case END_OF_RULE:
--pos; // Backup to point to END_OF_RULE
break main;
case VARIABLE_REF:
// Handle variable references and segment references "$1" .. "$9"
{
// A variable reference must be followed immediately
// by a Unicode identifier start and zero or more
// Unicode identifier part characters, or by a digit
// 1..9 if it is a segment reference.
if (pos == limit) {
syntaxError("Trailing " + c, rule, start);
}
// Parse "$1" "$2" .. "$9"
c = rule.charAt(pos++);
int r = Character.digit(c, 10);
if (r >= 1 && r <= 9) {
if (r > maxRef) {
maxRef = r;
}
buf.append((char) (parser.data.segmentBase + r - 1));
} else if (Character.isUnicodeIdentifierStart(c)) {
int j = pos;
while (j < limit &&
Character.isUnicodeIdentifierPart(rule.charAt(j))) {
++j;
}
continue;
}
switch (c) {
case END_OF_RULE:
--pos; // Backup to point to END_OF_RULE
break main;
case VARIABLE_REF_OPEN:
{
int j = rule.indexOf(VARIABLE_REF_CLOSE, pos);
if (pos == j || j < 0) { // empty or unterminated
syntaxError("Malformed variable reference", rule, start);
}
String name = rule.substring(pos, j);
pos = j+1;
String name = rule.substring(pos-1, j);
pos = j;
// If this is a variable definition statement, then the LHS
// variable will be undefined. In that case getVariableName()
// will return the special placeholder variableLimit-1.
buf.append(parser.getVariableDef(name));
} else {
syntaxError("Illegal char after " + VARIABLE_REF,
rule, start);
}
}
break;
case CONTEXT_OPEN:
case CONTEXT_ANTE:
if (ante >= 0) {
syntaxError("Multiple ante contexts", rule, start);
}
ante = buf.length();
break;
case CONTEXT_POST:
if (post >= 0) {
syntaxError("Multiple post contexts", rule, start);
}
// Ignore CONTEXT_OPEN if buffer length is zero -- that means
// this is the optional opening delimiter for the ante context.
if (buf.length() > 0) {
post = buf.length();
}
break;
case CONTEXT_CLOSE:
if (postClose >= 0) {
syntaxError("Unexpected " + c, rule, start);
}
if (post >= 0) {
// This is probably the optional closing delimiter
// for the post context; save the pos and check later.
postClose = buf.length();
} else if (ante >= 0) {
syntaxError("Multiple ante contexts", rule, start);
} else {
ante = buf.length();
}
break;
case SET_OPEN:
ParsePosition pp = new ParsePosition(pos-1); // Backup to opening '['
buf.append(parser.registerSet(new UnicodeSet(rule, pp, parser.parseData)));
pos = pp.getIndex();
break;
case VARIABLE_REF_CLOSE:
case SET_CLOSE:
syntaxError("Unquoted " + c, rule, start);
case CURSOR_POS:
@ -850,11 +848,6 @@ public class RuleBasedTransliterator extends Transliterator {
}
}
// Check context close parameters
if (postClose >= 0 && postClose != buf.length()) {
syntaxError("Extra text after ]", rule, start);
}
text = buf.toString();
return pos;
}
@ -906,6 +899,7 @@ public class RuleBasedTransliterator extends Transliterator {
RuleHalf left = new RuleHalf();
RuleHalf right = new RuleHalf();
undefinedVariableName = null;
pos = left.parse(rule, pos, limit, this);
if (pos == limit ||
@ -936,17 +930,31 @@ public class RuleBasedTransliterator extends Transliterator {
// or a set (already parsed). If RHS is longer than one
// character, it is either a multi-character string, or multiple
// sets, or a mixture of chars and sets -- syntax error.
// We expect to see a single undefined variable (the one being
// defined).
if (undefinedVariableName == null) {
syntaxError("Missing '$' or duplicate definition", rule, start);
}
if (left.text.length() != 1 || left.text.charAt(0) != variableLimit) {
syntaxError("Malformed LHS", rule, start);
}
if (right.text.length() != 1) {
syntaxError("Malformed RHS", rule, start);
}
if (data.variableNames.get(left.text) != null) {
syntaxError("Duplicate definition of {" +
left.text + "}", rule, start);
}
data.variableNames.put(left.text, new Character(right.text.charAt(0)));
data.variableNames.put(undefinedVariableName,
new Character(right.text.charAt(0)));
++variableLimit;
return pos;
}
// If this is not a variable definition rule, we shouldn't have
// any undefined variable names.
if (undefinedVariableName != null) {
syntaxError("Undefined variable $" + undefinedVariableName,
rule, start);
}
// If the direction we want doesn't match the rule
// direction, do nothing.
if (operator != FWDREV_RULE_OP &&
@ -1041,7 +1049,18 @@ public class RuleBasedTransliterator extends Transliterator {
private char getVariableDef(String name) {
Character ch = (Character) data.variableNames.get(name);
if (ch == null) {
throw new IllegalArgumentException("Undefined variable: "
// We allow one undefined variable so that variable definition
// statements work. For the first undefined variable we return
// the special placeholder variableLimit-1, and save the variable
// name.
if (undefinedVariableName == null) {
undefinedVariableName = name;
if (variableNext >= variableLimit) {
throw new RuntimeException("Private use variables exhausted");
}
return --variableLimit;
}
throw new IllegalArgumentException("Undefined variable $"
+ name);
}
return ch.charValue();
@ -1210,7 +1229,11 @@ public class RuleBasedTransliterator extends Transliterator {
}
}
/* $Log: RuleBasedTransliterator.java,v $
/**
* $Log: RuleBasedTransliterator.java,v $
* Revision 1.21 2000/04/21 21:16:40 alan
* Modify rule syntax
*
* Revision 1.20 2000/04/19 17:35:23 alan
* Update javadoc; fix compile error
*

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/TransliterationRule.java,v $
* $Date: 2000/04/19 16:34:18 $
* $Revision: 1.16 $
* $Date: 2000/04/21 21:16:40 $
* $Revision: 1.17 $
*
*****************************************************************************************
*/
@ -44,54 +44,7 @@ import com.ibm.util.Utility;
* <p>Copyright &copy; IBM Corporation 1999. All rights reserved.
*
* @author Alan Liu
* @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.16 $ $Date: 2000/04/19 16:34:18 $
*
* $Log: TransliterationRule.java,v $
* Revision 1.16 2000/04/19 16:34:18 alan
* Add segment support.
*
* Revision 1.15 2000/04/12 20:17:45 alan
* Delegate replace operation to rule object
*
* Revision 1.14 2000/03/10 04:07:24 johnf
* Copyright update
*
* Revision 1.13 2000/02/10 07:36:25 johnf
* fixed imports for com.ibm.util.Utility
*
* Revision 1.12 2000/02/03 18:11:19 Alan
* Use array rather than hashtable for char-to-set map
*
* Revision 1.11 2000/01/27 18:59:19 Alan
* Use Position rather than int[] and move all subclass overrides to one method (handleTransliterate)
*
* Revision 1.10 2000/01/18 20:36:17 Alan
* Make UnicodeSet inherit from UnicodeFilter
*
* Revision 1.9 2000/01/18 02:38:55 Alan
* Fix filtering bug.
*
* Revision 1.8 2000/01/13 23:53:23 Alan
* Fix bugs found during ICU port
*
* Revision 1.7 2000/01/11 04:12:06 Alan
* Cleanup, embellish comments
*
* Revision 1.6 2000/01/11 02:25:03 Alan
* Rewrite UnicodeSet and RBT parsers for better performance and new syntax
*
* Revision 1.5 2000/01/04 21:43:57 Alan
* Add rule indexing, and move masking check to TransliterationRuleSet.
*
* Revision 1.4 1999/12/22 01:40:54 Alan
* Consolidate rule pattern anteContext, key, and postContext into one string.
*
* Revision 1.3 1999/12/22 01:05:54 Alan
* Improve masking checking; turn it off by default, for better performance
*
* Revision 1.2 1999/12/21 23:58:44 Alan
* Detect a>x masking a>y
*
* @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.17 $ $Date: 2000/04/21 21:16:40 $
*/
class TransliterationRule {
/**
@ -538,3 +491,54 @@ class TransliterationRule {
keyChar == textChar : set.contains(textChar));
}
}
/**
* $Log: TransliterationRule.java,v $
* Revision 1.17 2000/04/21 21:16:40 alan
* Modify rule syntax
*
* Revision 1.16 2000/04/19 16:34:18 alan
* Add segment support.
*
* Revision 1.15 2000/04/12 20:17:45 alan
* Delegate replace operation to rule object
*
* Revision 1.14 2000/03/10 04:07:24 johnf
* Copyright update
*
* Revision 1.13 2000/02/10 07:36:25 johnf
* fixed imports for com.ibm.util.Utility
*
* Revision 1.12 2000/02/03 18:11:19 Alan
* Use array rather than hashtable for char-to-set map
*
* Revision 1.11 2000/01/27 18:59:19 Alan
* Use Position rather than int[] and move all subclass overrides to one method (handleTransliterate)
*
* Revision 1.10 2000/01/18 20:36:17 Alan
* Make UnicodeSet inherit from UnicodeFilter
*
* Revision 1.9 2000/01/18 02:38:55 Alan
* Fix filtering bug.
*
* Revision 1.8 2000/01/13 23:53:23 Alan
* Fix bugs found during ICU port
*
* Revision 1.7 2000/01/11 04:12:06 Alan
* Cleanup, embellish comments
*
* Revision 1.6 2000/01/11 02:25:03 Alan
* Rewrite UnicodeSet and RBT parsers for better performance and new syntax
*
* Revision 1.5 2000/01/04 21:43:57 Alan
* Add rule indexing, and move masking check to TransliterationRuleSet.
*
* Revision 1.4 1999/12/22 01:40:54 Alan
* Consolidate rule pattern anteContext, key, and postContext into one string.
*
* Revision 1.3 1999/12/22 01:05:54 Alan
* Improve masking checking; turn it off by default, for better performance
*
* Revision 1.2 1999/12/21 23:58:44 Alan
* Detect a>x masking a>y
*/

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/UnicodeSet.java,v $
* $Date: 2000/03/10 04:07:25 $
* $Revision: 1.16 $
* $Date: 2000/04/21 21:16:40 $
* $Revision: 1.17 $
*
*****************************************************************************************
*/
@ -241,7 +241,7 @@ import java.text.*;
* *Unsupported by Java (and hence unsupported by UnicodeSet).
*
* @author Alan Liu
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.16 $ $Date: 2000/03/10 04:07:25 $
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.17 $ $Date: 2000/04/21 21:16:40 $
*/
public class UnicodeSet implements UnicodeFilter {
/**
@ -887,6 +887,47 @@ public class UnicodeSet implements UnicodeFilter {
i = j; // Make i point at closing '}'
}
/* Parse variable references. These are treated as literals. If a
* variable refers to a UnicodeSet, nestedPairs is assigned here.
* Variable names are only parsed if varNameToChar is not null.
* Set variables are only looked up if varCharToSet is not null.
*/
// TEMPORARY
// TEMPORARY
// TEMPORARY
else if (symbols != null && !isLiteral && c == '$') {
++i;
c = pattern.charAt(i);
int j = i;
if (Character.isUnicodeIdentifierStart(c)) {
++j;
while (j < limit &&
Character.isUnicodeIdentifierPart(pattern.charAt(j))) {
++j;
}
}
if (i == j || j < 0) { // empty or unterminated
throw new IllegalArgumentException("Illegal variable reference " +
pattern.substring(i-1, limit));
}
String name = pattern.substring(i, j);
Object obj = symbols.lookup(name);
if (obj == null) {
throw new IllegalArgumentException("Undefined variable: "
+ name);
}
isLiteral = true;
if (obj instanceof Character) {
c = ((Character) obj).charValue();
} else {
nestedPairs = ((UnicodeSet) obj).pairs.toString();
}
i = j-1; // Make i point at last char of var name
}
// TEMPORARY
// TEMPORARY
// TEMPORARY
/* An opening bracket indicates the first bracket of a nested
* subpattern, either a normal pattern or a category pattern. We
* recognize these here and set nestedPairs accordingly.

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/resources/Attic/TransliterationRule_KeyboardEscape_Latin1.java,v $
* $Date: 2000/03/10 04:07:30 $
* $Revision: 1.2 $
* $Date: 2000/04/21 21:17:08 $
* $Revision: 1.3 $
*
*****************************************************************************************
*/
@ -21,120 +21,120 @@ public class TransliterationRule_KeyboardEscape_Latin1 extends ListResourceBundl
public Object[][] getContents() {
return new Object[][] {
{ "Rule",
"esc='';"
+ "grave=`;"
+ "acute='';"
+ "hat=^;"
+ "tilde=~;"
+ "umlaut=:;"
+ "ring=.;"
+ "cedilla=,;"
+ "slash=/;"
+ "super=^;"
"$esc='';"
+ "$grave=`;"
+ "$acute='';"
+ "$hat=^;"
+ "$tilde=~;"
+ "$umlaut=:;"
+ "$ring=.;"
+ "$cedilla=,;"
+ "$slash=/;"
+ "$super=^;"
// Make keyboard entry of {esc} possible
// and of backslash
+ "'\\'{esc}>{esc};"
+ "'\\'$esc>$esc;"
+ "'\\\\'>'\\';"
// Long keys
+ "cur{esc}>\u00A4;"
+ "sec{esc}>\u00A7;"
+ "not{esc}>\u00AC;"
+ "mul{esc}>\u00D7;"
+ "div{esc}>\u00F7;"
+ "cur$esc>\u00A4;"
+ "sec$esc>\u00A7;"
+ "not$esc>\u00AC;"
+ "mul$esc>\u00D7;"
+ "div$esc>\u00F7;"
+ "\\ {esc}>\u00A0;" // non-breaking space
+ "!{esc}>\u00A1;" // inverted exclamation
+ "c/{esc}>\u00A2;" // cent sign
+ "lb{esc}>\u00A3;" // pound sign
+ "'|'{esc}>\u00A6;" // broken vertical bar
+ ":{esc}>\u00A8;" // umlaut
+ "{super}a{esc}>\u00AA;" // feminine ordinal
+ "'<<'{esc}>\u00AB;"
+ "r{esc}>\u00AE;"
+ "--{esc}>\u00AF;"
+ "-{esc}>\u00AD;"
+ "+-{esc}>\u00B1;"
+ "{super}2{esc}>\u00B2;"
+ "{super}3{esc}>\u00B3;"
+ "{acute}{esc}>\u00B4;"
+ "m{esc}>\u00B5;"
+ "para{esc}>\u00B6;"
+ "dot{esc}>\u00B7;"
+ "{cedilla}{esc}>\u00B8;"
+ "{super}1{esc}>\u00B9;"
+ "{super}o{esc}>\u00BA;" // masculine ordinal
+ "'>>'{esc}>\u00BB;"
+ "1/4{esc}>\u00BC;"
+ "1/2{esc}>\u00BD;"
+ "3/4{esc}>\u00BE;"
+ "?{esc}>\u00BF;"
+ "A{grave}{esc}>\u00C0;"
+ "A{acute}{esc}>\u00C1;"
+ "A{hat}{esc}>\u00C2;"
+ "A{tilde}{esc}>\u00C3;"
+ "A{umlaut}{esc}>\u00C4;"
+ "A{ring}{esc}>\u00C5;"
+ "AE{esc}>\u00C6;"
+ "C{cedilla}{esc}>\u00C7;"
+ "E{grave}{esc}>\u00C8;"
+ "E{acute}{esc}>\u00C9;"
+ "E{hat}{esc}>\u00CA;"
+ "E{umlaut}{esc}>\u00CB;"
+ "I{grave}{esc}>\u00CC;"
+ "I{acute}{esc}>\u00CD;"
+ "I{hat}{esc}>\u00CE;"
+ "I{umlaut}{esc}>\u00CF;"
+ "D-{esc}>\u00D0;"
+ "N{tilde}{esc}>\u00D1;"
+ "O{grave}{esc}>\u00D2;"
+ "O{acute}{esc}>\u00D3;"
+ "O{hat}{esc}>\u00D4;"
+ "O{tilde}{esc}>\u00D5;"
+ "O{umlaut}{esc}>\u00D6;"
+ "O{slash}{esc}>\u00D8;"
+ "U{grave}{esc}>\u00D9;"
+ "U{acute}{esc}>\u00DA;"
+ "U{hat}{esc}>\u00DB;"
+ "U{umlaut}{esc}>\u00DC;"
+ "Y{acute}{esc}>\u00DD;"
+ "TH{esc}>\u00DE;"
+ "ss{esc}>\u00DF;"
+ "a{grave}{esc}>\u00E0;"
+ "a{acute}{esc}>\u00E1;"
+ "a{hat}{esc}>\u00E2;"
+ "a{tilde}{esc}>\u00E3;"
+ "a{umlaut}{esc}>\u00E4;"
+ "a{ring}{esc}>\u00E5;"
+ "ae{esc}>\u00E6;"
+ "c{cedilla}{esc}>\u00E7;"
+ "c{esc}>\u00A9;" // copyright - after c{cedilla}
+ "e{grave}{esc}>\u00E8;"
+ "e{acute}{esc}>\u00E9;"
+ "e{hat}{esc}>\u00EA;"
+ "e{umlaut}{esc}>\u00EB;"
+ "i{grave}{esc}>\u00EC;"
+ "i{acute}{esc}>\u00ED;"
+ "i{hat}{esc}>\u00EE;"
+ "i{umlaut}{esc}>\u00EF;"
+ "d-{esc}>\u00F0;"
+ "n{tilde}{esc}>\u00F1;"
+ "o{grave}{esc}>\u00F2;"
+ "o{acute}{esc}>\u00F3;"
+ "o{hat}{esc}>\u00F4;"
+ "o{tilde}{esc}>\u00F5;"
+ "o{umlaut}{esc}>\u00F6;"
+ "o{slash}{esc}>\u00F8;"
+ "o{esc}>\u00B0;"
+ "u{grave}{esc}>\u00F9;"
+ "u{acute}{esc}>\u00FA;"
+ "u{hat}{esc}>\u00FB;"
+ "u{umlaut}{esc}>\u00FC;"
+ "y{acute}{esc}>\u00FD;"
+ "y{esc}>\u00A5;" // yen sign
+ "th{esc}>\u00FE;"
//masked: + "ss{esc}>\u00FF;"
+ "\\ $esc>\u00A0;" // non-breaking space
+ "!$esc>\u00A1;" // inverted exclamation
+ "c/$esc>\u00A2;" // cent sign
+ "lb$esc>\u00A3;" // pound sign
+ "'|'$esc>\u00A6;" // broken vertical bar
+ ":$esc>\u00A8;" // umlaut
+ "$super a$esc>\u00AA;" // feminine ordinal
+ "'<<'$esc>\u00AB;"
+ "r$esc>\u00AE;"
+ "--$esc>\u00AF;"
+ "-$esc>\u00AD;"
+ "+-$esc>\u00B1;"
+ "$super 2$esc>\u00B2;"
+ "$super 3$esc>\u00B3;"
+ "$acute$esc>\u00B4;"
+ "m$esc>\u00B5;"
+ "para$esc>\u00B6;"
+ "dot$esc>\u00B7;"
+ "$cedilla$esc>\u00B8;"
+ "$super 1$esc>\u00B9;"
+ "$super o$esc>\u00BA;" // masculine ordinal
+ "'>>'$esc>\u00BB;"
+ "1/4$esc>\u00BC;"
+ "1/2$esc>\u00BD;"
+ "3/4$esc>\u00BE;"
+ "?$esc>\u00BF;"
+ "A$grave$esc>\u00C0;"
+ "A$acute$esc>\u00C1;"
+ "A$hat$esc>\u00C2;"
+ "A$tilde$esc>\u00C3;"
+ "A$umlaut$esc>\u00C4;"
+ "A$ring$esc>\u00C5;"
+ "AE$esc>\u00C6;"
+ "C$cedilla$esc>\u00C7;"
+ "E$grave$esc>\u00C8;"
+ "E$acute$esc>\u00C9;"
+ "E$hat$esc>\u00CA;"
+ "E$umlaut$esc>\u00CB;"
+ "I$grave$esc>\u00CC;"
+ "I$acute$esc>\u00CD;"
+ "I$hat$esc>\u00CE;"
+ "I$umlaut$esc>\u00CF;"
+ "D-$esc>\u00D0;"
+ "N$tilde$esc>\u00D1;"
+ "O$grave$esc>\u00D2;"
+ "O$acute$esc>\u00D3;"
+ "O$hat$esc>\u00D4;"
+ "O$tilde$esc>\u00D5;"
+ "O$umlaut$esc>\u00D6;"
+ "O$slash$esc>\u00D8;"
+ "U$grave$esc>\u00D9;"
+ "U$acute$esc>\u00DA;"
+ "U$hat$esc>\u00DB;"
+ "U$umlaut$esc>\u00DC;"
+ "Y$acute$esc>\u00DD;"
+ "TH$esc>\u00DE;"
+ "ss$esc>\u00DF;"
+ "a$grave$esc>\u00E0;"
+ "a$acute$esc>\u00E1;"
+ "a$hat$esc>\u00E2;"
+ "a$tilde$esc>\u00E3;"
+ "a$umlaut$esc>\u00E4;"
+ "a$ring$esc>\u00E5;"
+ "ae$esc>\u00E6;"
+ "c$cedilla$esc>\u00E7;"
+ "c$esc>\u00A9;" // copyright - after c{cedilla}
+ "e$grave$esc>\u00E8;"
+ "e$acute$esc>\u00E9;"
+ "e$hat$esc>\u00EA;"
+ "e$umlaut$esc>\u00EB;"
+ "i$grave$esc>\u00EC;"
+ "i$acute$esc>\u00ED;"
+ "i$hat$esc>\u00EE;"
+ "i$umlaut$esc>\u00EF;"
+ "d-$esc>\u00F0;"
+ "n$tilde$esc>\u00F1;"
+ "o$grave$esc>\u00F2;"
+ "o$acute$esc>\u00F3;"
+ "o$hat$esc>\u00F4;"
+ "o$tilde$esc>\u00F5;"
+ "o$umlaut$esc>\u00F6;"
+ "o$slash$esc>\u00F8;"
+ "o$esc>\u00B0;"
+ "u$grave$esc>\u00F9;"
+ "u$acute$esc>\u00FA;"
+ "u$hat$esc>\u00FB;"
+ "u$umlaut$esc>\u00FC;"
+ "y$acute$esc>\u00FD;"
+ "y$esc>\u00A5;" // yen sign
+ "th$esc>\u00FE;"
//masked: + "ss$esc>\u00FF;"
}
};
}

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/resources/Attic/TransliterationRule_Latin_Arabic.java,v $
* $Date: 2000/03/10 04:07:30 $
* $Revision: 1.2 $
* $Date: 2000/04/21 21:17:08 $
* $Revision: 1.3 $
*
*****************************************************************************************
*/
@ -23,52 +23,52 @@ public class TransliterationRule_Latin_Arabic extends ListResourceBundle {
{ "Rule",
// To Do: finish adding shadda, add sokoon
"alefmadda=\u0622;"+
"alefuhamza=\u0623;"+
"wauuhamza=\u0624;"+
"alefhamza=\u0625;"+
"yehuhamza=\u0626;"+
"alef=\u0627;"+
"beh=\u0628;"+
"tehmarbuta=\u0629;"+
"teh=\u062A;"+
"theh=\u062B;"+
"geem=\u062C;"+
"hah=\u062D;"+
"kha=\u062E;"+
"dal=\u062F;"+
"dhal=\u0630;"+
"reh=\u0631;"+
"zain=\u0632;"+
"seen=\u0633;"+
"sheen=\u0634;"+
"sad=\u0635;"+
"dad=\u0636;"+
"tah=\u0637;"+
"zah=\u0638;"+
"ein=\u0639;"+
"ghein=\u063A;"+
"feh=\u0641;"+
"qaaf=\u0642;"+
"kaf=\u0643;"+
"lam=\u0644;"+
"meem=\u0645;"+
"noon=\u0646;"+
"heh=\u0647;"+
"wau=\u0648;"+
"yehmaqsura=\u0649;"+
"yeh=\u064A;"+
"peh=\u06A4;"+
"$alefmadda=\u0622;"+
"$alefuhamza=\u0623;"+
"$wauuhamza=\u0624;"+
"$alefhamza=\u0625;"+
"$yehuhamza=\u0626;"+
"$alef=\u0627;"+
"$beh=\u0628;"+
"$tehmarbuta=\u0629;"+
"$teh=\u062A;"+
"$theh=\u062B;"+
"$geem=\u062C;"+
"$hah=\u062D;"+
"$kha=\u062E;"+
"$dal=\u062F;"+
"$dhal=\u0630;"+
"$reh=\u0631;"+
"$zain=\u0632;"+
"$seen=\u0633;"+
"$sheen=\u0634;"+
"$sad=\u0635;"+
"$dad=\u0636;"+
"$tah=\u0637;"+
"$zah=\u0638;"+
"$ein=\u0639;"+
"$ghein=\u063A;"+
"$feh=\u0641;"+
"$qaaf=\u0642;"+
"$kaf=\u0643;"+
"$lam=\u0644;"+
"$meem=\u0645;"+
"$noon=\u0646;"+
"$heh=\u0647;"+
"$wau=\u0648;"+
"$yehmaqsura=\u0649;"+
"$yeh=\u064A;"+
"$peh=\u06A4;"+
"hamza=\u0621;"+
"fathatein=\u064B;"+
"dammatein=\u064C;"+
"kasratein=\u064D;"+
"fatha=\u064E;"+
"damma=\u064F;"+
"kasra=\u0650;"+
"shadda=\u0651;"+
"sokoon=\u0652;"+
"$hamza=\u0621;"+
"$fathatein=\u064B;"+
"$dammatein=\u064C;"+
"$kasratein=\u064D;"+
"$fatha=\u064E;"+
"$damma=\u064F;"+
"$kasra=\u0650;"+
"$shadda=\u0651;"+
"$sokoon=\u0652;"+
// convert English to Arabic
"Arabic>"+
@ -79,56 +79,56 @@ public class TransliterationRule_Latin_Arabic extends ListResourceBundle {
"\u0643\u062a\u0627\u0628\u0628\u064a\u0629' '"+
"\u062c\u0645\u064a\u0644\u0629;"+
"ai>{alefmadda};"+
"ae>{alefuhamza};"+
"ao>{alefhamza};"+
"aa>{alef};"+
"an>{fathatein};"+
"a>{fatha};"+
"b>{beh};"+
"c>{kaf};"+
"{dhal})dh>{shadda};"+
"dh>{dhal};"+
"{dad})dd>{shadda};"+
"dd>{dad};"+
"{dal})d>{shadda};"+
"d>{dal};"+
"e>{ein};"+
"f>{feh};"+
"gh>{ghein};"+
"g>{geem};"+
"hh>{hah};"+
"h>{heh};"+
"ii>{kasratein};"+
"i>{kasra};"+
"j>{geem};"+
"kh>{kha};"+
"k>{kaf};"+
"l>{lam};"+
"m>{meem};"+
"n>{noon};"+
"o>{hamza};"+
"p>{peh};"+
"q>{qaaf};"+
"r>{reh};"+
"sh>{sheen};"+
"ss>{sad};"+
"s>{seen};"+
"th>{theh};"+
"tm>{tehmarbuta};"+
"tt>{tah};"+
"t>{teh};"+
"uu>{dammatein};"+
"u>{damma};"+
"v>{beh};"+
"we>{wauuhamza};"+
"w>{wau};"+
"x>{kaf}{shadda}{seen};"+
"ye>{yehuhamza};"+
"ym>{yehmaqsura};"+
"y>{yeh};"+
"zz>{zah};"+
"z>{zain};"+
"ai>$alefmadda;"+
"ae>$alefuhamza;"+
"ao>$alefhamza;"+
"aa>$alef;"+
"an>$fathatein;"+
"a>$fatha;"+
"b>$beh;"+
"c>$kaf;"+
"$dhal{dh>$shadda;"+
"dh>$dhal;"+
"$dad{dd>$shadda;"+
"dd>$dad;"+
"$dal{d>$shadda;"+
"d>$dal;"+
"e>$ein;"+
"f>$feh;"+
"gh>$ghein;"+
"g>$geem;"+
"hh>$hah;"+
"h>$heh;"+
"ii>$kasratein;"+
"i>$kasra;"+
"j>$geem;"+
"kh>$kha;"+
"k>$kaf;"+
"l>$lam;"+
"m>$meem;"+
"n>$noon;"+
"o>$hamza;"+
"p>$peh;"+
"q>$qaaf;"+
"r>$reh;"+
"sh>$sheen;"+
"ss>$sad;"+
"s>$seen;"+
"th>$theh;"+
"tm>$tehmarbuta;"+
"tt>$tah;"+
"t>$teh;"+
"uu>$dammatein;"+
"u>$damma;"+
"v>$beh;"+
"we>$wauuhamza;"+
"w>$wau;"+
"x>$kaf$shadda$seen;"+
"ye>$yehuhamza;"+
"ym>$yehmaqsura;"+
"y>$yeh;"+
"zz>$zah;"+
"z>$zain;"+
"0>\u0660;"+ // Arabic digit 0
"1>\u0661;"+ // Arabic digit 1
@ -165,88 +165,88 @@ public class TransliterationRule_Latin_Arabic extends ListResourceBundle {
// now Arabic to English
"''ai<a){alefmadda};"+
"ai<{alefmadda};"+
"''ae<a){alefuhamza};"+
"ae<{alefuhamza};"+
"''ao<a){alefhamza};"+
"ao<{alefhamza};"+
"''aa<a){alef};"+
"aa<{alef};"+
"''an<a){fathatein};"+
"an<{fathatein};"+
"''a<a){fatha};"+
"a<{fatha};"+
"b<{beh};"+
"''dh<d){dhal};"+
"dh<{dhal};"+
"''dd<d){dad};"+
"dd<{dad};"+
"''d<d){dal};"+
"d<{dal};"+
"''e<a){ein};"+
"''e<w){ein};"+
"''e<y){ein};"+
"e<{ein};"+
"f<{feh};"+
"gh<{ghein};"+
"''hh<d){hah};"+
"''hh<t){hah};"+
"''hh<k){hah};"+
"''hh<s){hah};"+
"hh<{hah};"+
"''h<d){heh};"+
"''h<t){heh};"+
"''h<k){heh};"+
"''h<s){heh};"+
"h<{heh};"+
"''ii<i){kasratein};"+
"ii<{kasratein};"+
"''i<i){kasra};"+
"i<{kasra};"+
"j<{geem};"+
"kh<{kha};"+
"x<{kaf}{shadda}{seen};"+
"k<{kaf};"+
"l<{lam};"+
"''m<y){meem};"+
"''m<t){meem};"+
"m<{meem};"+
"n<{noon};"+
"''o<a){hamza};"+
"o<{hamza};"+
"p<{peh};"+
"q<{qaaf};"+
"r<{reh};"+
"sh<{sheen};"+
"''ss<s){sad};"+
"ss<{sad};"+
"''s<s){seen};"+
"s<{seen};"+
"th<{theh};"+
"tm<{tehmarbuta};"+
"''tt<t){tah};"+
"tt<{tah};"+
"''t<t){teh};"+
"t<{teh};"+
"''uu<u){dammatein};"+
"uu<{dammatein};"+
"''u<u){damma};"+
"u<{damma};"+
"we<{wauuhamza};"+
"w<{wau};"+
"ye<{yehuhamza};"+
"ym<{yehmaqsura};"+
"''y<y){yeh};"+
"y<{yeh};"+
"''zz<z){zah};"+
"zz<{zah};"+
"''z<z){zain};"+
"z<{zain};"+
"''ai<a{$alefmadda;"+
"ai<$alefmadda;"+
"''ae<a{$alefuhamza;"+
"ae<$alefuhamza;"+
"''ao<a{$alefhamza;"+
"ao<$alefhamza;"+
"''aa<a{$alef;"+
"aa<$alef;"+
"''an<a{$fathatein;"+
"an<$fathatein;"+
"''a<a{$fatha;"+
"a<$fatha;"+
"b<$beh;"+
"''dh<d{$dhal;"+
"dh<$dhal;"+
"''dd<d{$dad;"+
"dd<$dad;"+
"''d<d{$dal;"+
"d<$dal;"+
"''e<a{$ein;"+
"''e<w{$ein;"+
"''e<y{$ein;"+
"e<$ein;"+
"f<$feh;"+
"gh<$ghein;"+
"''hh<d{$hah;"+
"''hh<t{$hah;"+
"''hh<k{$hah;"+
"''hh<s{$hah;"+
"hh<$hah;"+
"''h<d{$heh;"+
"''h<t{$heh;"+
"''h<k{$heh;"+
"''h<s{$heh;"+
"h<$heh;"+
"''ii<i{$kasratein;"+
"ii<$kasratein;"+
"''i<i{$kasra;"+
"i<$kasra;"+
"j<$geem;"+
"kh<$kha;"+
"x<$kaf$shadda$seen;"+
"k<$kaf;"+
"l<$lam;"+
"''m<y{$meem;"+
"''m<t{$meem;"+
"m<$meem;"+
"n<$noon;"+
"''o<a{$hamza;"+
"o<$hamza;"+
"p<$peh;"+
"q<$qaaf;"+
"r<$reh;"+
"sh<$sheen;"+
"''ss<s{$sad;"+
"ss<$sad;"+
"''s<s{$seen;"+
"s<$seen;"+
"th<$theh;"+
"tm<$tehmarbuta;"+
"''tt<t{$tah;"+
"tt<$tah;"+
"''t<t{$teh;"+
"t<$teh;"+
"''uu<u{$dammatein;"+
"uu<$dammatein;"+
"''u<u{$damma;"+
"u<$damma;"+
"we<$wauuhamza;"+
"w<$wau;"+
"ye<$yehuhamza;"+
"ym<$yehmaqsura;"+
"''y<y{$yeh;"+
"y<$yeh;"+
"''zz<z{$zah;"+
"zz<$zah;"+
"''z<z{$zain;"+
"z<$zain;"+
"dh<dh){shadda};"+
"dd<dd){shadda};"+
"''d<d){shadda};"
"dh<dh{$shadda;"+
"dd<dd{$shadda;"+
"''d<d{$shadda;"
}
};
}

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/resources/Attic/TransliterationRule_Latin_Cyrillic.java,v $
* $Date: 2000/03/10 04:07:30 $
* $Revision: 1.2 $
* $Date: 2000/04/21 21:17:08 $
* $Revision: 1.3 $
*
*****************************************************************************************
*/
@ -29,102 +29,102 @@ public class TransliterationRule_Latin_Cyrillic extends ListResourceBundle {
mappings.
*/
+ "S-hacek=\u0160;"
+ "s-hacek=\u0161;"
+ "$S_hacek=\u0160;"
+ "$s_hacek=\u0161;"
+ "YO=\u0401;"
+ "J=\u0408;"
+ "A=\u0410;"
+ "B=\u0411;"
+ "V=\u0412;"
+ "G=\u0413;"
+ "D=\u0414;"
+ "YE=\u0415;"
+ "ZH=\u0416;"
+ "Z=\u0417;"
+ "YI=\u0418;"
+ "Y=\u0419;"
+ "K=\u041A;"
+ "L=\u041B;"
+ "M=\u041C;"
+ "N=\u041D;"
+ "O=\u041E;"
+ "P=\u041F;"
+ "R=\u0420;"
+ "S=\u0421;"
+ "T=\u0422;"
+ "U=\u0423;"
+ "F=\u0424;"
+ "KH=\u0425;"
+ "TS=\u0426;"
+ "CH=\u0427;"
+ "SH=\u0428;"
+ "SHCH=\u0429;"
+ "HARD=\u042A;"
+ "I=\u042B;"
+ "SOFT=\u042C;"
+ "E=\u042D;"
+ "YU=\u042E;"
+ "YA=\u042F;"
+ "$YO=\u0401;"
+ "$J=\u0408;"
+ "$A=\u0410;"
+ "$B=\u0411;"
+ "$V=\u0412;"
+ "$G=\u0413;"
+ "$D=\u0414;"
+ "$YE=\u0415;"
+ "$ZH=\u0416;"
+ "$Z=\u0417;"
+ "$YI=\u0418;"
+ "$Y=\u0419;"
+ "$K=\u041A;"
+ "$L=\u041B;"
+ "$M=\u041C;"
+ "$N=\u041D;"
+ "$O=\u041E;"
+ "$P=\u041F;"
+ "$R=\u0420;"
+ "$S=\u0421;"
+ "$T=\u0422;"
+ "$U=\u0423;"
+ "$F=\u0424;"
+ "$KH=\u0425;"
+ "$TS=\u0426;"
+ "$CH=\u0427;"
+ "$SH=\u0428;"
+ "$SHCH=\u0429;"
+ "$HARD=\u042A;"
+ "$I=\u042B;"
+ "$SOFT=\u042C;"
+ "$E=\u042D;"
+ "$YU=\u042E;"
+ "$YA=\u042F;"
// Lowercase
+ "a=\u0430;"
+ "b=\u0431;"
+ "v=\u0432;"
+ "g=\u0433;"
+ "d=\u0434;"
+ "ye=\u0435;"
+ "zh=\u0436;"
+ "z=\u0437;"
+ "yi=\u0438;"
+ "y=\u0439;"
+ "k=\u043a;"
+ "l=\u043b;"
+ "m=\u043c;"
+ "n=\u043d;"
+ "o=\u043e;"
+ "p=\u043f;"
+ "r=\u0440;"
+ "s=\u0441;"
+ "t=\u0442;"
+ "u=\u0443;"
+ "f=\u0444;"
+ "kh=\u0445;"
+ "ts=\u0446;"
+ "ch=\u0447;"
+ "sh=\u0448;"
+ "shch=\u0449;"
+ "hard=\u044a;"
+ "i=\u044b;"
+ "soft=\u044c;"
+ "e=\u044d;"
+ "yu=\u044e;"
+ "ya=\u044f;"
+ "$a=\u0430;"
+ "$b=\u0431;"
+ "$v=\u0432;"
+ "$g=\u0433;"
+ "$d=\u0434;"
+ "$ye=\u0435;"
+ "$zh=\u0436;"
+ "$z=\u0437;"
+ "$yi=\u0438;"
+ "$y=\u0439;"
+ "$k=\u043a;"
+ "$l=\u043b;"
+ "$m=\u043c;"
+ "$n=\u043d;"
+ "$o=\u043e;"
+ "$p=\u043f;"
+ "$r=\u0440;"
+ "$s=\u0441;"
+ "$t=\u0442;"
+ "$u=\u0443;"
+ "$f=\u0444;"
+ "$kh=\u0445;"
+ "$ts=\u0446;"
+ "$ch=\u0447;"
+ "$sh=\u0448;"
+ "$shch=\u0449;"
+ "$hard=\u044a;"
+ "$i=\u044b;"
+ "$soft=\u044c;"
+ "$e=\u044d;"
+ "$yu=\u044e;"
+ "$ya=\u044f;"
+ "yo=\u0451;"
+ "j=\u0458;"
+ "$yo=\u0451;"
+ "$j=\u0458;"
// variables
// some are duplicated so lowercasing works
+ "csoft=[eiyEIY];"
+ "CSOFT=[eiyEIY];"
+ "$csoft=[eiyEIY];"
+ "$CSOFT=[eiyEIY];"
+ "BECOMES_H=[{HARD}{hard}];"
+ "becomes_h=[{HARD}{hard}];"
+ "$BECOMES_H=[$HARD$hard];"
+ "$becomes_h=[$HARD$hard];"
+ "BECOMES_S=[{S}{s}];"
+ "becomes_s=[{S}{s}];"
+ "$BECOMES_S=[$S$s];"
+ "$becomes_s=[$S$s];"
+ "BECOMES_C=[{CH}{ch}];"
+ "becomes_c=[{CH}{ch}];"
+ "$BECOMES_C=[$CH$ch];"
+ "$becomes_c=[$CH$ch];"
+ "BECOMES_VOWEL=[{A}{E}{I}{O}{U}{a}{e}{i}{o}{u}];"
+ "becomes_vowel=[{A}{E}{I}{O}{U}{a}{e}{i}{o}{u}];"
+ "$BECOMES_VOWEL=[$A$E$I$O$U$a$e$i$o$u];"
+ "$becomes_vowel=[$A$E$I$O$U$a$e$i$o$u];"
+ "letter=[[:Lu:][:Ll:]];"
+ "lower=[[:Ll:]];"
+ "$letter=[[:Lu:][:Ll:]];"
+ "$lower=[[:Ll:]];"
/*
Modified to combine display transliterator and typing transliterator.
@ -137,17 +137,17 @@ public class TransliterationRule_Latin_Cyrillic extends ListResourceBundle {
// Special titlecase forms, not duplicated
// #############################################
+ "Ch>{CH};" + "Ch<{CH}({lower};"
+ "Kh>{KH};" + "Kh<{KH}({lower};"
+ "Shch>{SHCH};" + "Shch<{SHCH}({lower};"
+ "Sh>{SH};" + "Sh<{SH}({lower};"
+ "Ts>{TS};" + "Ts<{TS}({lower};"
+ "Zh>{ZH};" + "Zh<{ZH}({lower};"
+ "Yi>{YI};" //+ "Yi<{YI}({lower};"
+ "Ye>{YE};" //+ "Ye<{YE}({lower};"
+ "Yo>{YO};" //+ "Yo<{YO}({lower};"
+ "Yu>{YU};" //+ "Yu<{YU}({lower};"
+ "Ya>{YA};" //+ "Ya<{YA}({lower};"
+ "Ch>$CH;" + "Ch<$CH}$lower;"
+ "Kh>$KH;" + "Kh<$KH}$lower;"
+ "Shch>$SHCH;" + "Shch<$SHCH}$lower;"
+ "Sh>$SH;" + "Sh<$SH}$lower;"
+ "Ts>$TS;" + "Ts<$TS}$lower;"
+ "Zh>$ZH;" + "Zh<$ZH}$lower;"
+ "Yi>$YI;" //+ "Yi<$YI}$lower;"
+ "Ye>$YE;" //+ "Ye<$YE}$lower;"
+ "Yo>$YO;" //+ "Yo<$YO}$lower;"
+ "Yu>$YU;" //+ "Yu<$YU}$lower;"
+ "Ya>$YA;" //+ "Ya<$YA}$lower;"
// #############################################
// Rules to Duplicate
@ -156,77 +156,77 @@ public class TransliterationRule_Latin_Cyrillic extends ListResourceBundle {
// variant spellings in English
+ "SHTCH>{SHCH};"
+ "TCH>{CH};"
+ "TH>{Z};"
+ "Q>{K};"
+ "WH>{V};"
+ "W>{V};"
+ "X>{K}{S};" //+ "X<{K}{S};"
+ "SHTCH>$SHCH;"
+ "TCH>$CH;"
+ "TH>$Z;"
+ "Q>$K;"
+ "WH>$V;"
+ "W>$V;"
+ "X>$K$S;" //+ "X<$K$S;"
// Separate letters that would otherwise join
+ "SH''<{SH}({BECOMES_C};"
+ "T''<{T}({BECOMES_S};"
+ "SH''<$SH}$BECOMES_C;"
+ "T''<$T}$BECOMES_S;"
+ "K''<{K}({BECOMES_H};"
+ "S''<{S}({BECOMES_H};"
+ "T''<{T}({BECOMES_H};"
+ "Z''<{Z}({BECOMES_H};"
+ "K''<$K}$BECOMES_H;"
+ "S''<$S}$BECOMES_H;"
+ "T''<$T}$BECOMES_H;"
+ "Z''<$Z}$BECOMES_H;"
+ "Y''<{Y}({BECOMES_VOWEL};"
+ "Y''<$Y}$BECOMES_VOWEL;"
// Main letters
+ "A<>{A};"
+ "B<>{B};"
+ "CH<>{CH};"
+ "D<>{D};"
+ "E<>{E};"
+ "F<>{F};"
+ "G<>{G};"
+ "\u00cc<>{YI};"
+ "I<>{I};"
+ "KH<>{KH};"
+ "K<>{K};"
+ "L<>{L};"
+ "M<>{M};"
+ "N<>{N};"
+ "O<>{O};"
+ "P<>{P};"
+ "R<>{R};"
+ "SHCH<>{SHCH};"
+ "SH>{SH};" //+ "SH<{SH};"
+ "{S-hacek}<>{SH};"
+ "S<>{S};"
+ "TS<>{TS};"
+ "T<>{T};"
+ "U<>{U};"
+ "V<>{V};"
+ "A<>$A;"
+ "B<>$B;"
+ "CH<>$CH;"
+ "D<>$D;"
+ "E<>$E;"
+ "F<>$F;"
+ "G<>$G;"
+ "\u00cc<>$YI;"
+ "I<>$I;"
+ "KH<>$KH;"
+ "K<>$K;"
+ "L<>$L;"
+ "M<>$M;"
+ "N<>$N;"
+ "O<>$O;"
+ "P<>$P;"
+ "R<>$R;"
+ "SHCH<>$SHCH;"
+ "SH>$SH;" //+ "SH<$SH;"
+ "$S_hacek<>$SH;"
+ "S<>$S;"
+ "TS<>$TS;"
+ "T<>$T;"
+ "U<>$U;"
+ "V<>$V;"
//\u00cc\u00c0\u00c8\u00d2\u00d9
+ "YE>{YE};" //+ "YE<{YE};"
+ "\u00c8<>{YE};"
+ "YO>{YO};" //+ "YO<{YO};"
+ "\u00d2<>{YO};"
+ "YU>{YU};" //+ "YU<{YU};"
+ "\u00d9<>{YU};"
+ "YA>{YA};" //+ "YA<{YA};"
+ "\u00c0<>{YA};"
+ "Y<>{Y};"
+ "ZH<>{ZH};"
+ "Z<>{Z};"
+ "YE>$YE;" //+ "YE<$YE;"
+ "\u00c8<>$YE;"
+ "YO>$YO;" //+ "YO<$YO;"
+ "\u00d2<>$YO;"
+ "YU>$YU;" //+ "YU<$YU;"
+ "\u00d9<>$YU;"
+ "YA>$YA;" //+ "YA<$YA;"
+ "\u00c0<>$YA;"
+ "Y<>$Y;"
+ "ZH<>$ZH;"
+ "Z<>$Z;"
+ "H<>{HARD};"
+ "\u0178<>{SOFT};"
+ "H<>$HARD;"
+ "\u0178<>$SOFT;"
// Non-russian
+ "J<>{J};"
+ "J<>$J;"
// variant spellings in English
+ "C({csoft}>{S};"
+ "C>{K};"
+ "C}$csoft>$S;"
+ "C>$K;"
// #############################################
// Duplicated Rules
@ -235,77 +235,77 @@ public class TransliterationRule_Latin_Cyrillic extends ListResourceBundle {
// variant spellings in english
+ "shtch>{shch};"
+ "tch>{ch};"
+ "th>{z};"
+ "q>{k};"
+ "wh>{v};"
+ "w>{v};"
+ "x>{k}{s};" //+ "x<{k}{s};"
+ "shtch>$shch;"
+ "tch>$ch;"
+ "th>$z;"
+ "q>$k;"
+ "wh>$v;"
+ "w>$v;"
+ "x>$k$s;" //+ "x<$k$s;"
// separate letters that would otherwise join
+ "sh''<{sh}({becomes_c};"
+ "t''<{t}({becomes_s};"
+ "sh''<$sh}$becomes_c;"
+ "t''<$t}$becomes_s;"
+ "k''<{k}({becomes_h};"
+ "s''<{s}({becomes_h};"
+ "t''<{t}({becomes_h};"
+ "z''<{z}({becomes_h};"
+ "k''<$k}$becomes_h;"
+ "s''<$s}$becomes_h;"
+ "t''<$t}$becomes_h;"
+ "z''<$z}$becomes_h;"
+ "y''<{y}({becomes_vowel};"
+ "y''<$y}$becomes_vowel;"
// main letters
+ "a<>{a};"
+ "b<>{b};"
+ "ch<>{ch};"
+ "d<>{d};"
+ "e<>{e};"
+ "f<>{f};"
+ "g<>{g};"
+ "\u00ec<>{yi};"
+ "i<>{i};"
+ "kh<>{kh};"
+ "k<>{k};"
+ "l<>{l};"
+ "m<>{m};"
+ "n<>{n};"
+ "o<>{o};"
+ "p<>{p};"
+ "r<>{r};"
+ "shch<>{shch};"
+ "sh>{sh};" //+ "sh<{sh};"
+ "{s-hacek}<>{sh};"
+ "s<>{s};"
+ "ts<>{ts};"
+ "t<>{t};"
+ "u<>{u};"
+ "v<>{v};"
+ "a<>$a;"
+ "b<>$b;"
+ "ch<>$ch;"
+ "d<>$d;"
+ "e<>$e;"
+ "f<>$f;"
+ "g<>$g;"
+ "\u00ec<>$yi;"
+ "i<>$i;"
+ "kh<>$kh;"
+ "k<>$k;"
+ "l<>$l;"
+ "m<>$m;"
+ "n<>$n;"
+ "o<>$o;"
+ "p<>$p;"
+ "r<>$r;"
+ "shch<>$shch;"
+ "sh>$sh;" //+ "sh<$sh;"
+ "$s_hacek<>$sh;"
+ "s<>$s;"
+ "ts<>$ts;"
+ "t<>$t;"
+ "u<>$u;"
+ "v<>$v;"
//\u00ec\u00e0\u00e8\u00f2\u00f9
+ "ye>{ye};" //+ "ye<{ye};"
+ "\u00e8<>{ye};"
+ "yo>{yo};" //+ "yo<{yo};"
+ "\u00f2<>{yo};"
+ "yu>{yu};" //+ "yu<{yu};"
+ "\u00f9<>{yu};"
+ "ya>{ya};" //+ "ya<{ya};"
+ "\u00e0<>{ya};"
+ "y<>{y};"
+ "zh<>{zh};"
+ "z<>{z};"
+ "ye>$ye;" //+ "ye<$ye;"
+ "\u00e8<>$ye;"
+ "yo>$yo;" //+ "yo<$yo;"
+ "\u00f2<>$yo;"
+ "yu>$yu;" //+ "yu<$yu;"
+ "\u00f9<>$yu;"
+ "ya>$ya;" //+ "ya<$ya;"
+ "\u00e0<>$ya;"
+ "y<>$y;"
+ "zh<>$zh;"
+ "z<>$z;"
+ "h<>{hard};"
+ "\u00ff<>{soft};"
+ "h<>$hard;"
+ "\u00ff<>$soft;"
// non-russian
+ "j<>{j};"
+ "j<>$j;"
// variant spellings in english
+ "c({csoft}>{s};"
+ "c>{k};"
+ "c}$csoft>$s;"
+ "c>$k;"

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/resources/Attic/TransliterationRule_Latin_Devanagari.java,v $
* $Date: 2000/03/10 04:07:31 $
* $Revision: 1.2 $
* $Date: 2000/04/21 21:17:08 $
* $Revision: 1.3 $
*
*****************************************************************************************
*/
@ -54,91 +54,92 @@ public class TransliterationRule_Latin_Devanagari extends ListResourceBundle {
//#####################################################################
//consonants
"candrabindu=\u0901;"
+ "bindu=\u0902;"
+ "visarga=\u0903;"
"$candrabindu=\u0901;"
+ "$bindu=\u0902;"
+ "$visarga=\u0903;"
// w<vowel> represents the stand-alone form
+ "wa=\u0905;"
+ "waa=\u0906;"
+ "wi=\u0907;"
+ "wii=\u0908;"
+ "wu=\u0909;"
+ "wuu=\u090A;"
+ "wr=\u090B;"
+ "wl=\u090C;"
+ "we=\u090F;"
+ "wai=\u0910;"
+ "wo=\u0913;"
+ "wau=\u0914;"
+ "$wa=\u0905;"
+ "$waa=\u0906;"
+ "$wi=\u0907;"
+ "$wii=\u0908;"
+ "$wu=\u0909;"
+ "$wuu=\u090A;"
+ "$wr=\u090B;"
+ "$wl=\u090C;"
+ "$we=\u090F;"
+ "$wai=\u0910;"
+ "$wo=\u0913;"
+ "$wau=\u0914;"
+ "ka=\u0915;"
+ "kha=\u0916;"
+ "ga=\u0917;"
+ "gha=\u0918;"
+ "nga=\u0919;"
+ "$ka=\u0915;"
+ "$kha=\u0916;"
+ "$ga=\u0917;"
+ "$gha=\u0918;"
+ "$nga=\u0919;"
+ "ca=\u091A;"
+ "cha=\u091B;"
+ "ja=\u091C;"
+ "jha=\u091D;"
+ "nya=\u091E;"
+ "$ca=\u091A;"
+ "$cha=\u091B;"
+ "$ja=\u091C;"
+ "$jha=\u091D;"
+ "$nya=\u091E;"
+ "tta=\u091F;"
+ "ttha=\u0920;"
+ "dda=\u0921;"
+ "ddha=\u0922;"
+ "nna=\u0923;"
+ "$tta=\u091F;"
+ "$ttha=\u0920;"
+ "$dda=\u0921;"
+ "$ddha=\u0922;"
+ "$nna=\u0923;"
+ "ta=\u0924;"
+ "tha=\u0925;"
+ "da=\u0926;"
+ "dha=\u0927;"
+ "na=\u0928;"
+ "$ta=\u0924;"
+ "$tha=\u0925;"
+ "$da=\u0926;"
+ "$dha=\u0927;"
+ "$na=\u0928;"
+ "pa=\u092A;"
+ "pha=\u092B;"
+ "ba=\u092C;"
+ "bha=\u092D;"
+ "ma=\u092E;"
+ "$pa=\u092A;"
+ "$pha=\u092B;"
+ "$ba=\u092C;"
+ "$bha=\u092D;"
+ "$ma=\u092E;"
+ "ya=\u092F;"
+ "ra=\u0930;"
+ "rra=\u0931;"
+ "la=\u0933;"
+ "va=\u0935;"
+ "$ya=\u092F;"
+ "$ra=\u0930;"
+ "$rra=\u0931;"
+ "$la=\u0933;"
+ "$va=\u0935;"
+ "sha=\u0936;"
+ "ssa=\u0937;"
+ "sa=\u0938;"
+ "ha=\u0939;"
+ "$sha=\u0936;"
+ "$ssa=\u0937;"
+ "$sa=\u0938;"
+ "$ha=\u0939;"
// <vowel> represents the dependent form
+ "aa=\u093E;"
+ "i=\u093F;"
+ "ii=\u0940;"
+ "u=\u0941;"
+ "uu=\u0942;"
+ "rh=\u0943;"
+ "lh=\u0944;"
+ "e=\u0947;"
+ "ai=\u0948;"
+ "o=\u094B;"
+ "au=\u094C;"
+ "$aa=\u093E;"
+ "$i=\u093F;"
+ "$ii=\u0940;"
+ "$u=\u0941;"
+ "$uu=\u0942;"
+ "$rh=\u0943;"
+ "$lh=\u0944;"
+ "$e=\u0947;"
+ "$ai=\u0948;"
+ "$o=\u094B;"
+ "$au=\u094C;"
+ "virama=\u094D;"
+ "$virama=\u094D;"
+ "wrr=\u0960;"
+ "rrh=\u0962;"
+ "$wrr=\u0960;"
+ "$rrh=\u0962;"
+ "danda=\u0964;"
+ "doubleDanda=\u0965;"
+ "depVowelAbove=[\u093E-\u0940\u0945-\u094C];"
+ "depVowelBelow=[\u0941-\u0944];"
+ "endThing=[{danda}{doubleDanda}\u0000-\u08FF\u0980-\uFFFF];"
+ "$danda=\u0964;"
+ "$doubleDanda=\u0965;"
+ "$depVowelAbove=[\u093E-\u0940\u0945-\u094C];"
+ "$depVowelBelow=[\u0941-\u0944];"
+ "$endThing=[$danda$doubleDanda \u0000-\u08FF\u0980-\uFFFF];"
+ "&=[{virama}{aa}{ai}{au}{ii}{i}{uu}{u}{rrh}{rh}{lh}{e}{o}];"
+ "%=[bcdfghjklmnpqrstvwxyz];"
// Unused -- these variable names are illegal and need to be changed
// + "$&=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o];"
// + "$%=[bcdfghjklmnpqrstvwxyz];"
//#####################################################################
// convert from Latin letters to Native letters
@ -147,106 +148,106 @@ public class TransliterationRule_Latin_Devanagari extends ListResourceBundle {
// special forms with no good conversion
+ "mm>{bindu};"
+ "x>{visarga};"
+ "mm>$bindu;"
+ "x>$visarga;"
// convert to independent forms at start of word or syllable:
// e.g. keai -> {ka}{e}{wai}; k'ai -> {ka}{wai}; (ai) -> ({wai})
// Moved up [LIU]
+ "aa>{waa};"
+ "ai>{wai};"
+ "au>{wau};"
+ "ii>{wii};"
+ "i>{wi};"
+ "uu>{wuu};"
+ "u>{wu};"
+ "rrh>{wrr};"
+ "rh>{wr};"
+ "lh>{wl};"
+ "e>{we};"
+ "o>{wo};"
+ "a>{wa};"
+ "aa>$waa;"
+ "ai>$wai;"
+ "au>$wau;"
+ "ii>$wii;"
+ "i>$wi;"
+ "uu>$wuu;"
+ "u>$wu;"
+ "rrh>$wrr;"
+ "rh>$wr;"
+ "lh>$wl;"
+ "e>$we;"
+ "o>$wo;"
+ "a>$wa;"
// normal consonants
+ "kh>{kha}|{virama};"
+ "k>{ka}|{virama};"
+ "q>{ka}|{virama};"
+ "gh>{gha}|{virama};"
+ "g>{ga}|{virama};"
+ "ng>{nga}|{virama};"
+ "ch>{cha}|{virama};"
+ "c>{ca}|{virama};"
+ "jh>{jha}|{virama};"
+ "j>{ja}|{virama};"
+ "ny>{nya}|{virama};"
+ "tth>{ttha}|{virama};"
+ "tt>{tta}|{virama};"
+ "ddh>{ddha}|{virama};"
+ "dd>{dda}|{virama};"
+ "nn>{nna}|{virama};"
+ "th>{tha}|{virama};"
+ "t>{ta}|{virama};"
+ "dh>{dha}|{virama};"
+ "d>{da}|{virama};"
+ "n>{na}|{virama};"
+ "ph>{pha}|{virama};"
+ "p>{pa}|{virama};"
+ "bh>{bha}|{virama};"
+ "b>{ba}|{virama};"
+ "m>{ma}|{virama};"
+ "y>{ya}|{virama};"
+ "r>{ra}|{virama};"
+ "l>{la}|{virama};"
+ "v>{va}|{virama};"
+ "f>{va}|{virama};"
+ "w>{va}|{virama};"
+ "sh>{sha}|{virama};"
+ "ss>{ssa}|{virama};"
+ "s>{sa}|{virama};"
+ "z>{sa}|{virama};"
+ "h>{ha}|{virama};"
+ "kh>$kha|$virama;"
+ "k>$ka|$virama;"
+ "q>$ka|$virama;"
+ "gh>$gha|$virama;"
+ "g>$ga|$virama;"
+ "ng>$nga|$virama;"
+ "ch>$cha|$virama;"
+ "c>$ca|$virama;"
+ "jh>$jha|$virama;"
+ "j>$ja|$virama;"
+ "ny>$nya|$virama;"
+ "tth>$ttha|$virama;"
+ "tt>$tta|$virama;"
+ "ddh>$ddha|$virama;"
+ "dd>$dda|$virama;"
+ "nn>$nna|$virama;"
+ "th>$tha|$virama;"
+ "t>$ta|$virama;"
+ "dh>$dha|$virama;"
+ "d>$da|$virama;"
+ "n>$na|$virama;"
+ "ph>$pha|$virama;"
+ "p>$pa|$virama;"
+ "bh>$bha|$virama;"
+ "b>$ba|$virama;"
+ "m>$ma|$virama;"
+ "y>$ya|$virama;"
+ "r>$ra|$virama;"
+ "l>$la|$virama;"
+ "v>$va|$virama;"
+ "f>$va|$virama;"
+ "w>$va|$virama;"
+ "sh>$sha|$virama;"
+ "ss>$ssa|$virama;"
+ "s>$sa|$virama;"
+ "z>$sa|$virama;"
+ "h>$ha|$virama;"
+ ".>{danda};"
+ "{danda}.>{doubleDanda};"
+ "{depVowelAbove})~>{bindu};"
+ "{depVowelBelow})~>{candrabindu};"
+ ".>$danda;"
+ "$danda.>$doubleDanda;"
+ "$depVowelAbove{~>$bindu;"
+ "$depVowelBelow{~>$candrabindu;"
// convert to dependent forms after consonant with no vowel:
// e.g. kai -> {ka}{virama}ai -> {ka}{ai}
+ "{virama}aa>{aa};"
+ "{virama}ai>{ai};"
+ "{virama}au>{au};"
+ "{virama}ii>{ii};"
+ "{virama}i>{i};"
+ "{virama}uu>{uu};"
+ "{virama}u>{u};"
+ "{virama}rrh>{rrh};"
+ "{virama}rh>{rh};"
+ "{virama}lh>{lh};"
+ "{virama}e>{e};"
+ "{virama}o>{o};"
+ "{virama}a>;"
+ "$virama aa>$aa;"
+ "$virama ai>$ai;"
+ "$virama au>$au;"
+ "$virama ii>$ii;"
+ "$virama i>$i;"
+ "$virama uu>$uu;"
+ "$virama u>$u;"
+ "$virama rrh>$rrh;"
+ "$virama rh>$rh;"
+ "$virama lh>$lh;"
+ "$virama e>$e;"
+ "$virama o>$o;"
+ "$virama a>;"
// otherwise convert independent forms when separated by ': k'ai -> {ka}{virama}{wai}
+ "{virama}''aa>{waa};"
+ "{virama}''ai>{wai};"
+ "{virama}''au>{wau};"
+ "{virama}''ii>{wii};"
+ "{virama}''i>{wi};"
+ "{virama}''uu>{wuu};"
+ "{virama}''u>{wu};"
+ "{virama}''rrh>{wrr};"
+ "{virama}''rh>{wr};"
+ "{virama}''lh>{wl};"
+ "{virama}''e>{we};"
+ "{virama}''o>{wo};"
+ "{virama}''a>{wa};"
+ "$virama''aa>$waa;"
+ "$virama''ai>$wai;"
+ "$virama''au>$wau;"
+ "$virama''ii>$wii;"
+ "$virama''i>$wi;"
+ "$virama''uu>$wuu;"
+ "$virama''u>$wu;"
+ "$virama''rrh>$wrr;"
+ "$virama''rh>$wr;"
+ "$virama''lh>$wl;"
+ "$virama''e>$we;"
+ "$virama''o>$wo;"
+ "$virama''a>$wa;"
+ "{virama}({endThing}>;"
+ "$virama}$endThing>;"
// convert any left-over apostrophes used for separation
@ -258,163 +259,163 @@ public class TransliterationRule_Latin_Devanagari extends ListResourceBundle {
// special forms with no good conversion
+ "mm<{bindu};"
+ "x<{visarga};"
+ "mm<$bindu;"
+ "x<$visarga;"
// normal consonants
+ "kh<{kha}(&;"
+ "kha<{kha};"
+ "k''<{ka}{virama}({ha};"
+ "k<{ka}(&;"
+ "ka<{ka};"
+ "gh<{gha}(&;"
+ "gha<{gha};"
+ "g''<{ga}{virama}({ha};"
+ "g<{ga}(&;"
+ "ga<{ga};"
+ "ng<{nga}(&;"
+ "nga<{nga};"
+ "ch<{cha}(&;"
+ "cha<{cha};"
+ "c''<{ca}{virama}({ha};"
+ "c<{ca}(&;"
+ "ca<{ca};"
+ "jh<{jha}(&;"
+ "jha<{jha};"
+ "j''<{ja}{virama}({ha};"
+ "j<{ja}(&;"
+ "ja<{ja};"
+ "ny<{nya}(&;"
+ "nya<{nya};"
+ "tth<{ttha}(&;"
+ "ttha<{ttha};"
+ "tt''<{tta}{virama}({ha};"
+ "tt<{tta}(&;"
+ "tta<{tta};"
+ "ddh<{ddha}(&;"
+ "ddha<{ddha};"
+ "dd''<{dda}(&{ha};"
+ "dd<{dda}(&;"
+ "dda<{dda};"
+ "dh<{dha}(&;"
+ "dha<{dha};"
+ "d''<{da}{virama}({ha};"
+ "d''<{da}{virama}({ddha};"
+ "d''<{da}{virama}({dda};"
+ "d''<{da}{virama}({dha};"
+ "d''<{da}{virama}({da};"
+ "d<{da}(&;"
+ "da<{da};"
+ "th<{tha}(&;"
+ "tha<{tha};"
+ "t''<{ta}{virama}({ha};"
+ "t''<{ta}{virama}({ttha};"
+ "t''<{ta}{virama}({tta};"
+ "t''<{ta}{virama}({tha};"
+ "t''<{ta}{virama}({ta};"
+ "t<{ta}(&;"
+ "ta<{ta};"
+ "n''<{na}{virama}({ga};"
+ "n''<{na}{virama}({ya};"
+ "n<{na}(&;"
+ "na<{na};"
+ "ph<{pha}(&;"
+ "pha<{pha};"
+ "p''<{pa}{virama}({ha};"
+ "p<{pa}(&;"
+ "pa<{pa};"
+ "bh<{bha}(&;"
+ "bha<{bha};"
+ "b''<{ba}{virama}({ha};"
+ "b<{ba}(&;"
+ "ba<{ba};"
+ "m''<{ma}{virama}({ma};"
+ "m''<{ma}{virama}({bindu};"
+ "m<{ma}(&;"
+ "ma<{ma};"
+ "y<{ya}(&;"
+ "ya<{ya};"
+ "r''<{ra}{virama}({ha};"
+ "r<{ra}(&;"
+ "ra<{ra};"
+ "l''<{la}{virama}({ha};"
+ "l<{la}(&;"
+ "la<{la};"
+ "v<{va}(&;"
+ "va<{va};"
+ "sh<{sha}(&;"
+ "sha<{sha};"
+ "ss<{ssa}(&;"
+ "ssa<{ssa};"
+ "s''<{sa}{virama}({ha};"
+ "s''<{sa}{virama}({sha};"
+ "s''<{sa}{virama}({ssa};"
+ "s''<{sa}{virama}({sa};"
+ "s<{sa}(&;"
+ "sa<{sa};"
+ "h<{ha}(&;"
+ "ha<{ha};"
+ "kh<$kha}&;"
+ "kha<$kha;"
+ "k''<$ka$virama}$ha;"
+ "k<$ka}&;"
+ "ka<$ka;"
+ "gh<$gha}&;"
+ "gha<$gha;"
+ "g''<$ga$virama}$ha;"
+ "g<$ga}&;"
+ "ga<$ga;"
+ "ng<$nga}&;"
+ "nga<$nga;"
+ "ch<$cha}&;"
+ "cha<$cha;"
+ "c''<$ca$virama}$ha;"
+ "c<$ca}&;"
+ "ca<$ca;"
+ "jh<$jha}&;"
+ "jha<$jha;"
+ "j''<$ja$virama}$ha;"
+ "j<$ja}&;"
+ "ja<$ja;"
+ "ny<$nya}&;"
+ "nya<$nya;"
+ "tth<$ttha}&;"
+ "ttha<$ttha;"
+ "tt''<$tta$virama}$ha;"
+ "tt<$tta}&;"
+ "tta<$tta;"
+ "ddh<$ddha}&;"
+ "ddha<$ddha;"
+ "dd''<$dda}&$ha;"
+ "dd<$dda}&;"
+ "dda<$dda;"
+ "dh<$dha}&;"
+ "dha<$dha;"
+ "d''<$da$virama}$ha;"
+ "d''<$da$virama}$ddha;"
+ "d''<$da$virama}$dda;"
+ "d''<$da$virama}$dha;"
+ "d''<$da$virama}$da;"
+ "d<$da}&;"
+ "da<$da;"
+ "th<$tha}&;"
+ "tha<$tha;"
+ "t''<$ta$virama}$ha;"
+ "t''<$ta$virama}$ttha;"
+ "t''<$ta$virama}$tta;"
+ "t''<$ta$virama}$tha;"
+ "t''<$ta$virama}$ta;"
+ "t<$ta}&;"
+ "ta<$ta;"
+ "n''<$na$virama}$ga;"
+ "n''<$na$virama}$ya;"
+ "n<$na}&;"
+ "na<$na;"
+ "ph<$pha}&;"
+ "pha<$pha;"
+ "p''<$pa$virama}$ha;"
+ "p<$pa}&;"
+ "pa<$pa;"
+ "bh<$bha}&;"
+ "bha<$bha;"
+ "b''<$ba$virama}$ha;"
+ "b<$ba}&;"
+ "ba<$ba;"
+ "m''<$ma$virama}$ma;"
+ "m''<$ma$virama}$bindu;"
+ "m<$ma}&;"
+ "ma<$ma;"
+ "y<$ya}&;"
+ "ya<$ya;"
+ "r''<$ra$virama}$ha;"
+ "r<$ra}&;"
+ "ra<$ra;"
+ "l''<$la$virama}$ha;"
+ "l<$la}&;"
+ "la<$la;"
+ "v<$va}&;"
+ "va<$va;"
+ "sh<$sha}&;"
+ "sha<$sha;"
+ "ss<$ssa}&;"
+ "ssa<$ssa;"
+ "s''<$sa$virama}$ha;"
+ "s''<$sa$virama}$sha;"
+ "s''<$sa$virama}$ssa;"
+ "s''<$sa$virama}$sa;"
+ "s<$sa}&;"
+ "sa<$sa;"
+ "h<$ha}&;"
+ "ha<$ha;"
// dependent vowels (should never occur except following consonants)
+ "aa<{aa};"
+ "ai<{ai};"
+ "au<{au};"
+ "ii<{ii};"
+ "i<{i};"
+ "uu<{uu};"
+ "u<{u};"
+ "rrh<{rrh};"
+ "rh<{rh};"
+ "lh<{lh};"
+ "e<{e};"
+ "o<{o};"
+ "aa<$aa;"
+ "ai<$ai;"
+ "au<$au;"
+ "ii<$ii;"
+ "i<$i;"
+ "uu<$uu;"
+ "u<$u;"
+ "rrh<$rrh;"
+ "rh<$rh;"
+ "lh<$lh;"
+ "e<$e;"
+ "o<$o;"
// independent vowels (when following consonants)
+ "''aa<a){waa};"
+ "''aa<%){waa};"
+ "''ai<a){wai};"
+ "''ai<%){wai};"
+ "''au<a){wau};"
+ "''au<%){wau};"
+ "''ii<a){wii};"
+ "''ii<%){wii};"
+ "''i<a){wi};"
+ "''i<%){wi};"
+ "''uu<a){wuu};"
+ "''uu<%){wuu};"
+ "''u<a){wu};"
+ "''u<%){wu};"
+ "''rrh<%){wrr};"
+ "''rh<%){wr};"
+ "''lh<%){wl};"
+ "''e<%){we};"
+ "''o<%){wo};"
+ "''a<a){wa};"
+ "''a<%){wa};"
+ "''aa<a{$waa;"
+ "''aa<%{$waa;"
+ "''ai<a{$wai;"
+ "''ai<%{$wai;"
+ "''au<a{$wau;"
+ "''au<%{$wau;"
+ "''ii<a{$wii;"
+ "''ii<%{$wii;"
+ "''i<a{$wi;"
+ "''i<%{$wi;"
+ "''uu<a{$wuu;"
+ "''uu<%{$wuu;"
+ "''u<a{$wu;"
+ "''u<%{$wu;"
+ "''rrh<%{$wrr;"
+ "''rh<%{$wr;"
+ "''lh<%{$wl;"
+ "''e<%{$we;"
+ "''o<%{$wo;"
+ "''a<a{$wa;"
+ "''a<%{$wa;"
// independent vowels (otherwise)
+ "aa<{waa};"
+ "ai<{wai};"
+ "au<{wau};"
+ "ii<{wii};"
+ "i<{wi};"
+ "uu<{wuu};"
+ "u<{wu};"
+ "rrh<{wrr};"
+ "rh<{wr};"
+ "lh<{wl};"
+ "e<{we};"
+ "o<{wo};"
+ "a<{wa};"
+ "aa<$waa;"
+ "ai<$wai;"
+ "au<$wau;"
+ "ii<$wii;"
+ "i<$wi;"
+ "uu<$wuu;"
+ "u<$wu;"
+ "rrh<$wrr;"
+ "rh<$wr;"
+ "lh<$wl;"
+ "e<$we;"
+ "o<$wo;"
+ "a<$wa;"
// blow away any remaining viramas
+ "<{virama};"
+ "<$virama;"
}
};
}

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/resources/Attic/TransliterationRule_Latin_Greek.java,v $
* $Date: 2000/03/10 04:07:31 $
* $Revision: 1.2 $
* $Date: 2000/04/21 21:17:08 $
* $Revision: 1.3 $
*
*****************************************************************************************
*/
@ -56,131 +56,133 @@ public class TransliterationRule_Latin_Greek extends ListResourceBundle {
// and for conditionals.
// ==============================================
+ "$quote=\";"
// Latin Letters
+ "E-MACRON=\u0112;"
+ "e-macron=\u0113;"
+ "O-MACRON=\u014C;"
+ "o-macron=\u014D;"
+ "Y-UMLAUT=\u0178;"
+ "y-umlaut=\u00FF;"
+ "$E_MACRON=\u0112;"
+ "$e_macron=\u0113;"
+ "$O_MACRON=\u014C;"
+ "$o_macron=\u014D;"
+ "$Y_UMLAUT=\u0178;"
+ "$y_umlaut=\u00FF;"
//! // with real accents.
//! + "E-MACRON-ACUTE=\u0112\u0301;"
//! + "e-macron-acute=\u0113\u0301;"
//! + "O-MACRON-ACUTE=\u014C\u0301;"
//! + "o-macron-acute=\u014D\u0301;"
//! + "y-umlaut-acute=\u00FF\u0301;"
//! + "\u00ef-acute=\u00ef\u0301;"
//! + "\u00fc-acute=\u00fc\u0301;"
//! + "$E_MACRON_ACUTE=\u0112\u0301;"
//! + "$e_macron_acute=\u0113\u0301;"
//! + "$O_MACRON_ACUTE=\u014C\u0301;"
//! + "$o_macron_acute=\u014D\u0301;"
//! + "$y_umlaut_acute=\u00FF\u0301;"
//! + "$u00ef_acute=\u00ef\u0301;"
//! + "$u00fc_acute=\u00fc\u0301;"
//! //
// single letter equivalents
+ "E-MACRON-ACUTE=\u00CA;"
+ "e-macron-acute=\u00EA;"
+ "O-MACRON-ACUTE=\u00D4;"
+ "o-macron-acute=\u00F4;"
+ "y-umlaut-acute=\u0177;"
+ "\u00ef-acute=\u00EE;"
+ "\u00fc-acute=\u00FB;"
+ "$E_MACRON_ACUTE=\u00CA;"
+ "$e_macron_acute=\u00EA;"
+ "$O_MACRON_ACUTE=\u00D4;"
+ "$o_macron_acute=\u00F4;"
+ "$y_umlaut_acute=\u0177;"
+ "$u00ef_acute=\u00EE;"
+ "$u00fc_acute=\u00FB;"
// Greek Letters
+ "ALPHA=\u0391;"
+ "BETA=\u0392;"
+ "GAMMA=\u0393;"
+ "DELTA=\u0394;"
+ "EPSILON=\u0395;"
+ "ZETA=\u0396;"
+ "ETA=\u0397;"
+ "THETA=\u0398;"
+ "IOTA=\u0399;"
+ "KAPPA=\u039A;"
+ "LAMBDA=\u039B;"
+ "MU=\u039C;"
+ "NU=\u039D;"
+ "XI=\u039E;"
+ "OMICRON=\u039F;"
+ "PI=\u03A0;"
+ "RHO=\u03A1;"
+ "SIGMA=\u03A3;"
+ "TAU=\u03A4;"
+ "YPSILON=\u03A5;"
+ "PHI=\u03A6;"
+ "CHI=\u03A7;"
+ "PSI=\u03A8;"
+ "OMEGA=\u03A9;"
+ "$ALPHA=\u0391;"
+ "$BETA=\u0392;"
+ "$GAMMA=\u0393;"
+ "$DELTA=\u0394;"
+ "$EPSILON=\u0395;"
+ "$ZETA=\u0396;"
+ "$ETA=\u0397;"
+ "$THETA=\u0398;"
+ "$IOTA=\u0399;"
+ "$KAPPA=\u039A;"
+ "$LAMBDA=\u039B;"
+ "$MU=\u039C;"
+ "$NU=\u039D;"
+ "$XI=\u039E;"
+ "$OMICRON=\u039F;"
+ "$PI=\u03A0;"
+ "$RHO=\u03A1;"
+ "$SIGMA=\u03A3;"
+ "$TAU=\u03A4;"
+ "$YPSILON=\u03A5;"
+ "$PHI=\u03A6;"
+ "$CHI=\u03A7;"
+ "$PSI=\u03A8;"
+ "$OMEGA=\u03A9;"
+ "ALPHA+=\u0386;"
+ "EPSILON+=\u0388;"
+ "ETA+=\u0389;"
+ "IOTA+=\u038A;"
+ "OMICRON+=\u038C;"
+ "YPSILON+=\u038E;"
+ "OMEGA+=\u038F;"
+ "IOTA_DIAERESIS=\u03AA;"
+ "YPSILON_DIAERESIS=\u03AB;"
+ "$ALPHA2=\u0386;"
+ "$EPSILON2=\u0388;"
+ "$ETA2=\u0389;"
+ "$IOTA2=\u038A;"
+ "$OMICRON2=\u038C;"
+ "$YPSILON2=\u038E;"
+ "$OMEGA2=\u038F;"
+ "$IOTA_DIAERESIS=\u03AA;"
+ "$YPSILON_DIAERESIS=\u03AB;"
+ "alpha=\u03B1;"
+ "beta=\u03B2;"
+ "gamma=\u03B3;"
+ "delta=\u03B4;"
+ "epsilon=\u03B5;"
+ "zeta=\u03B6;"
+ "eta=\u03B7;"
+ "theta=\u03B8;"
+ "iota=\u03B9;"
+ "kappa=\u03BA;"
+ "lambda=\u03BB;"
+ "mu=\u03BC;"
+ "nu=\u03BD;"
+ "xi=\u03BE;"
+ "omicron=\u03BF;"
+ "pi=\u03C0;"
+ "rho=\u03C1;"
+ "sigma=\u03C3;"
+ "tau=\u03C4;"
+ "ypsilon=\u03C5;"
+ "phi=\u03C6;"
+ "chi=\u03C7;"
+ "psi=\u03C8;"
+ "omega=\u03C9;"
+ "$alpha=\u03B1;"
+ "$beta=\u03B2;"
+ "$gamma=\u03B3;"
+ "$delta=\u03B4;"
+ "$epsilon=\u03B5;"
+ "$zeta=\u03B6;"
+ "$eta=\u03B7;"
+ "$theta=\u03B8;"
+ "$iota=\u03B9;"
+ "$kappa=\u03BA;"
+ "$lambda=\u03BB;"
+ "$mu=\u03BC;"
+ "$nu=\u03BD;"
+ "$xi=\u03BE;"
+ "$omicron=\u03BF;"
+ "$pi=\u03C0;"
+ "$rho=\u03C1;"
+ "$sigma=\u03C3;"
+ "$tau=\u03C4;"
+ "$ypsilon=\u03C5;"
+ "$phi=\u03C6;"
+ "$chi=\u03C7;"
+ "$psi=\u03C8;"
+ "$omega=\u03C9;"
//forms
+ "alpha+=\u03AC;"
+ "epsilon+=\u03AD;"
+ "eta+=\u03AE;"
+ "iota+=\u03AF;"
+ "omicron+=\u03CC;"
+ "ypsilon+=\u03CD;"
+ "omega+=\u03CE;"
+ "iota_diaeresis=\u03CA;"
+ "ypsilon_diaeresis=\u03CB;"
+ "iota_diaeresis+=\u0390;"
+ "ypsilon_diaeresis+=\u03B0;"
+ "sigma+=\u03C2;"
+ "$alpha2=\u03AC;"
+ "$epsilon2=\u03AD;"
+ "$eta2=\u03AE;"
+ "$iota2=\u03AF;"
+ "$omicron2=\u03CC;"
+ "$ypsilon2=\u03CD;"
+ "$omega2=\u03CE;"
+ "$iota_diaeresis=\u03CA;"
+ "$ypsilon_diaeresis=\u03CB;"
+ "$iota_diaeresis2=\u0390;"
+ "$ypsilon_diaeresis2=\u03B0;"
+ "$sigma2=\u03C2;"
// Variables for conditional mappings
// Use lowercase for all variable names, to allow cut/paste below.
+ "letter=[~[:Lu:][:Ll:]];"
+ "lower=[[:Ll:]];"
+ "softener=[eiyEIY];"
+ "vowel=[aeiouAEIOU"
+ "{ALPHA}{EPSILON}{ETA}{IOTA}{OMICRON}{YPSILON}{OMEGA}"
+ "{ALPHA+}{EPSILON+}{ETA+}{IOTA+}{OMICRON+}{YPSILON+}{OMEGA+}"
+ "{IOTA_DIAERESIS}{YPSILON_DIAERESIS}"
+ "{alpha}{epsilon}{eta}{iota}{omicron}{ypsilon}{omega}"
+ "{alpha+}{epsilon+}{eta+}{iota+}{omicron+}{ypsilon+}{omega+}"
+ "{iota_diaeresis}{ypsilon_diaeresis}"
+ "{iota_diaeresis+}{ypsilon_diaeresis+}"
+ "$letter=[~[:Lu:][:Ll:]];"
+ "$lower=[[:Ll:]];"
+ "$softener=[eiyEIY];"
+ "$vowel=[aeiouAEIOU"
+ "$ALPHA$EPSILON$ETA$IOTA$OMICRON$YPSILON$OMEGA"
+ "$ALPHA2$EPSILON2$ETA2$IOTA2$OMICRON2$YPSILON2$OMEGA2"
+ "$IOTA_DIAERESIS$YPSILON_DIAERESIS"
+ "$alpha$epsilon$eta$iota$omicron$ypsilon$omega"
+ "$alpha2$epsilon2$eta2$iota2$omicron2$ypsilon2$omega2"
+ "$iota_diaeresis$ypsilon_diaeresis"
+ "$iota_diaeresis2$ypsilon_diaeresis2"
+ "];"
+ "n-gamma=[GKXCgkxc];"
+ "gamma-n=[{GAMMA}{KAPPA}{CHI}{XI}{gamma}{kappa}{chi}{xi}];"
+ "pp=[Pp];"
+ "$n_gamma=[GKXCgkxc];"
+ "$gamma_n=[$GAMMA$KAPPA$CHI$XI$gamma$kappa$chi$xi];"
+ "$pp=[Pp];"
// ==============================================
// Rules
@ -189,10 +191,10 @@ public class TransliterationRule_Latin_Greek extends ListResourceBundle {
// not be copied when duplicating the lowercase
// ==============================================
+ "Th <> {THETA}({lower};"
+ "Ph <> {PHI}({lower};"
+ "Ch <> {CHI}({lower};"
//masked: + "Ps<{PHI}({lower};"
+ "Th <> $THETA}$lower;"
+ "Ph <> $PHI}$lower;"
+ "Ch <> $CHI}$lower;"
//masked: + "Ps<$PHI}$lower;"
// Because there is no uppercase forms for final sigma,
// we had to move all the sigma rules up here.
@ -203,8 +205,8 @@ public class TransliterationRule_Latin_Greek extends ListResourceBundle {
// use special form for s
+ "''S <> ({pp}) {SIGMA} ;" // handle PS
+ "S <> {SIGMA};"
+ "''S <> $pp{$SIGMA;" // handle PS
+ "S <> $SIGMA;"
// The following are a bit tricky. 's' takes two forms in greek
// final or non final.
@ -213,29 +215,29 @@ public class TransliterationRule_Latin_Greek extends ListResourceBundle {
// We use 's to separate p and s (otherwise ps is one letter)
// so, we break out the following forms:
+ "''s < ({pp}) {sigma} ({letter});"
+ "s < {sigma} ({letter});"
+ "~s < {sigma} ;"
+ "''s < $pp{$sigma}$letter;"
+ "s < $sigma}$letter;"
+ "~s < $sigma;"
+ "~s < {sigma+} ({letter});"
+ "''s < ({pp}) {sigma+} ;"
+ "s < {sigma+} ;"
+ "~s < $sigma2}$letter;"
+ "''s < $pp{$sigma2;"
+ "s < $sigma2;"
+ "~s ({letter}) > {sigma+};"
+ "~s > {sigma};"
+ "''s ({letter}) > {sigma};"
+ "''s > {sigma+};"
+ "s ({letter}) > {sigma};"
+ "s > {sigma+};"
+ "~s }$letter>$sigma2;"
+ "~s > $sigma;"
+ "''s }$letter>$sigma;"
+ "''s > $sigma2;"
+ "s }$letter>$sigma;"
+ "s > $sigma2;"
// because there are no uppercase forms, had to move these up too.
+ "i\"`>{iota_diaeresis+};"
+ "y\"`>{ypsilon_diaeresis+};"
+ "i$quote`>$iota_diaeresis2;"
+ "y$quote`>$ypsilon_diaeresis2;"
+ "{\u00ef-acute} <> {iota_diaeresis+};"
+ "{\u00fc-acute} <> {vowel}){ypsilon_diaeresis+};"
+ "{y-umlaut-acute} <> {ypsilon_diaeresis+};"
+ "$u00ef_acute<>$iota_diaeresis2;"
+ "$u00fc_acute<>$vowel{$ypsilon_diaeresis2;"
+ "$y_umlaut_acute<>$ypsilon_diaeresis2;"
// ==============================================
// Uppercase Forms.
@ -244,62 +246,62 @@ public class TransliterationRule_Latin_Greek extends ListResourceBundle {
// Typing variants, in case the keyboard doesn't have accents
+ "A`>{ALPHA+};"
+ "E`>{EPSILON+};"
+ "EE`>{ETA+};"
+ "EE>{ETA};"
+ "I`>{IOTA+};"
+ "O`>{OMICRON+};"
+ "OO`>{OMEGA+};"
+ "OO>{OMEGA};"
+ "I\">{IOTA_DIAERESIS};"
+ "Y\">{YPSILON_DIAERESIS};"
+ "A`>$ALPHA2;"
+ "E`>$EPSILON2;"
+ "EE`>$ETA2;"
+ "EE>$ETA;"
+ "I`>$IOTA2;"
+ "O`>$OMICRON2;"
+ "OO`>$OMEGA2;"
+ "OO>$OMEGA;"
+ "I$quote>$IOTA_DIAERESIS;"
+ "Y$quote>$YPSILON_DIAERESIS;"
// Basic Letters
+ "A<>{ALPHA};"
+ "\u00c1<>{ALPHA+};"
+ "B<>{BETA};"
+ "N ({n-gamma}) <> {GAMMA} ({gamma-n});"
+ "G<>{GAMMA};"
+ "D<>{DELTA};"
+ "''E <> ([Ee]){EPSILON};" // handle EE
+ "E<>{EPSILON};"
+ "\u00c9<>{EPSILON+};"
+ "Z<>{ZETA};"
+ "{E-MACRON-ACUTE}<>{ETA+};"
+ "{E-MACRON}<>{ETA};"
+ "TH<>{THETA};"
+ "I<>{IOTA};"
+ "\u00cd<>{IOTA+};"
+ "\u00cf<>{IOTA_DIAERESIS};"
+ "K<>{KAPPA};"
+ "L<>{LAMBDA};"
+ "M<>{MU};"
+ "N'' <> {NU} ({gamma-n});"
+ "N<>{NU};"
+ "X<>{XI};"
+ "''O <> ([Oo]) {OMICRON};" // handle OO
+ "O<>{OMICRON};"
+ "\u00d3<>{OMICRON+};"
+ "PH<>{PHI};" // needs ordering before P
+ "PS<>{PSI};" // needs ordering before P
+ "P<>{PI};"
+ "R<>{RHO};"
+ "T<>{TAU};"
+ "U <> ({vowel}) {YPSILON};"
+ "\u00da <> ({vowel}) {YPSILON+};"
+ "\u00dc <> ({vowel}) {YPSILON_DIAERESIS};"
+ "Y<>{YPSILON};"
+ "\u00dd<>{YPSILON+};"
+ "{Y-UMLAUT}<>{YPSILON_DIAERESIS};"
+ "CH<>{CHI};"
+ "{O-MACRON-ACUTE}<>{OMEGA+};"
+ "{O-MACRON}<>{OMEGA};"
+ "A<>$ALPHA;"
+ "\u00c1<>$ALPHA2;"
+ "B<>$BETA;"
+ "N }$n_gamma<>$GAMMA}$gamma_n;"
+ "G<>$GAMMA;"
+ "D<>$DELTA;"
+ "''E <> [Ee]{$EPSILON;" // handle EE
+ "E<>$EPSILON;"
+ "\u00c9<>$EPSILON2;"
+ "Z<>$ZETA;"
+ "$E_MACRON_ACUTE<>$ETA2;"
+ "$E_MACRON<>$ETA;"
+ "TH<>$THETA;"
+ "I<>$IOTA;"
+ "\u00cd<>$IOTA2;"
+ "\u00cf<>$IOTA_DIAERESIS;"
+ "K<>$KAPPA;"
+ "L<>$LAMBDA;"
+ "M<>$MU;"
+ "N'' <> $NU}$gamma_n;"
+ "N<>$NU;"
+ "X<>$XI;"
+ "''O <> [Oo]{ $OMICRON;" // handle OO
+ "O<>$OMICRON;"
+ "\u00d3<>$OMICRON2;"
+ "PH<>$PHI;" // needs ordering before P
+ "PS<>$PSI;" // needs ordering before P
+ "P<>$PI;"
+ "R<>$RHO;"
+ "T<>$TAU;"
+ "U <> $vowel{$YPSILON;"
+ "\u00da <> $vowel{$YPSILON2;"
+ "\u00dc <> $vowel{$YPSILON_DIAERESIS;"
+ "Y<>$YPSILON;"
+ "\u00dd<>$YPSILON2;"
+ "$Y_UMLAUT<>$YPSILON_DIAERESIS;"
+ "CH<>$CHI;"
+ "$O_MACRON_ACUTE<>$OMEGA2;"
+ "$O_MACRON<>$OMEGA;"
// Extra English Letters. Mapped for completeness
+ "C({softener})>|S;"
+ "C}$softener>|S;"
+ "C>|K;"
+ "F>|PH;"
+ "H>|CH;"
@ -314,62 +316,62 @@ public class TransliterationRule_Latin_Greek extends ListResourceBundle {
// typing variants, in case the keyboard doesn't have accents
+ "a`>{alpha+};"
+ "e`>{epsilon+};"
+ "ee`>{eta+};"
+ "ee>{eta};"
+ "i`>{iota+};"
+ "o`>{omicron+};"
+ "oo`>{omega+};"
+ "oo>{omega};"
+ "i\">{iota_diaeresis};"
+ "y\">{ypsilon_diaeresis};"
+ "a`>$alpha2;"
+ "e`>$epsilon2;"
+ "ee`>$eta2;"
+ "ee>$eta;"
+ "i`>$iota2;"
+ "o`>$omicron2;"
+ "oo`>$omega2;"
+ "oo>$omega;"
+ "i$quote>$iota_diaeresis;"
+ "y$quote>$ypsilon_diaeresis;"
// basic letters
+ "a<>{alpha};"
+ "\u00e1<>{alpha+};"
+ "b<>{beta};"
+ "n ({n-gamma}) <> {gamma} ({gamma-n});"
+ "g<>{gamma};"
+ "d<>{delta};"
+ "''e <> ([Ee]){epsilon};" // handle EE
+ "e<>{epsilon};"
+ "\u00e9<>{epsilon+};"
+ "z<>{zeta};"
+ "{e-macron-acute}<>{eta+};"
+ "{e-macron}<>{eta};"
+ "th<>{theta};"
+ "i<>{iota};"
+ "\u00ed<>{iota+};"
+ "\u00ef<>{iota_diaeresis};"
+ "k<>{kappa};"
+ "l<>{lambda};"
+ "m<>{mu};"
+ "n'' <> {nu} ({gamma-n});"
+ "n<>{nu};"
+ "x<>{xi};"
+ "''o <> ([Oo]) {omicron};" // handle OO
+ "o<>{omicron};"
+ "\u00f3<>{omicron+};"
+ "ph<>{phi};" // needs ordering before p
+ "ps<>{psi};" // needs ordering before p
+ "p<>{pi};"
+ "r<>{rho};"
+ "t<>{tau};"
+ "u <> ({vowel}){ypsilon};"
+ "\u00fa <> ({vowel}){ypsilon+};"
+ "\u00fc <> ({vowel}){ypsilon_diaeresis};"
+ "y<>{ypsilon};"
+ "\u00fd<>{ypsilon+};"
+ "{y-umlaut}<>{ypsilon_diaeresis};"
+ "ch<>{chi};"
+ "{o-macron-acute}<>{omega+};"
+ "{o-macron}<>{omega};"
+ "a<>$alpha;"
+ "\u00e1<>$alpha2;"
+ "b<>$beta;"
+ "n }$n_gamma<>$gamma}$gamma_n;"
+ "g<>$gamma;"
+ "d<>$delta;"
+ "''e <> [Ee]{$epsilon;" // handle EE
+ "e<>$epsilon;"
+ "\u00e9<>$epsilon2;"
+ "z<>$zeta;"
+ "$e_macron_acute<>$eta2;"
+ "$e_macron<>$eta;"
+ "th<>$theta;"
+ "i<>$iota;"
+ "\u00ed<>$iota2;"
+ "\u00ef<>$iota_diaeresis;"
+ "k<>$kappa;"
+ "l<>$lambda;"
+ "m<>$mu;"
+ "n'' <> $nu}$gamma_n;"
+ "n<>$nu;"
+ "x<>$xi;"
+ "''o <> [Oo]{ $omicron;" // handle OO
+ "o<>$omicron;"
+ "\u00f3<>$omicron2;"
+ "ph<>$phi;" // needs ordering before p
+ "ps<>$psi;" // needs ordering before p
+ "p<>$pi;"
+ "r<>$rho;"
+ "t<>$tau;"
+ "u <> $vowel{$ypsilon;"
+ "\u00fa <> $vowel{$ypsilon2;"
+ "\u00fc <> $vowel{$ypsilon_diaeresis;"
+ "y<>$ypsilon;"
+ "\u00fd<>$ypsilon2;"
+ "$y_umlaut<>$ypsilon_diaeresis;"
+ "ch<>$chi;"
+ "$o_macron_acute<>$omega2;"
+ "$o_macron<>$omega;"
// extra english letters. mapped for completeness
+ "c({softener})>|s;"
+ "c}$softener>|s;"
+ "c>|k;"
+ "f>|ph;"
+ "h>|ch;"

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/resources/Attic/TransliterationRule_Latin_Hebrew.java,v $
* $Date: 2000/03/10 04:07:31 $
* $Revision: 1.2 $
* $Date: 2000/04/21 21:17:08 $
* $Revision: 1.3 $
*
*****************************************************************************************
*/
@ -23,66 +23,66 @@ public class TransliterationRule_Latin_Hebrew extends ListResourceBundle {
{ "Rule",
//variable names, derived from the Unicode names.
"POINT_SHEVA=\u05B0;"
+ "POINT_HATAF_SEGOL=\u05B1;"
+ "POINT_HATAF_PATAH=\u05B2;"
+ "POINT_HATAF_QAMATS=\u05B3;"
+ "POINT_HIRIQ=\u05B4;"
+ "POINT_TSERE=\u05B5;"
+ "POINT_SEGOL=\u05B6;"
+ "POINT_PATAH=\u05B7;"
+ "POINT_QAMATS=\u05B8;"
+ "POINT_HOLAM=\u05B9;"
+ "POINT_QUBUTS=\u05BB;"
+ "POINT_DAGESH_OR_MAPIQ=\u05BC;"
+ "POINT_METEG=\u05BD;"
+ "PUNCTUATION_MAQAF=\u05BE;"
+ "POINT_RAFE=\u05BF;"
+ "PUNCTUATION_PASEQ=\u05C0;"
+ "POINT_SHIN_DOT=\u05C1;"
+ "POINT_SIN_DOT=\u05C2;"
+ "PUNCTUATION_SOF_PASUQ=\u05C3;"
+ "ALEF=\u05D0;"
+ "BET=\u05D1;"
+ "GIMEL=\u05D2;"
+ "DALET=\u05D3;"
+ "HE=\u05D4;"
+ "VAV=\u05D5;"
+ "ZAYIN=\u05D6;"
+ "HET=\u05D7;"
+ "TET=\u05D8;"
+ "YOD=\u05D9;"
+ "FINAL_KAF=\u05DA;"
+ "KAF=\u05DB;"
+ "LAMED=\u05DC;"
+ "FINAL_MEM=\u05DD;"
+ "MEM=\u05DE;"
+ "FINAL_NUN=\u05DF;"
+ "NUN=\u05E0;"
+ "SAMEKH=\u05E1;"
+ "AYIN=\u05E2;"
+ "FINAL_PE=\u05E3;"
+ "PE=\u05E4;"
+ "FINAL_TSADI=\u05E5;"
+ "TSADI=\u05E6;"
+ "QOF=\u05E7;"
+ "RESH=\u05E8;"
+ "SHIN=\u05E9;"
+ "TAV=\u05EA;"
+ "YIDDISH_DOUBLE_VAV=\u05F0;"
+ "YIDDISH_VAV_YOD=\u05F1;"
+ "YIDDISH_DOUBLE_YOD=\u05F2;"
+ "PUNCTUATION_GERESH=\u05F3;"
+ "PUNCTUATION_GERSHAYIM=\u05F4;"
"$POINT_SHEVA=\u05B0;"
+ "$POINT_HATAF_SEGOL=\u05B1;"
+ "$POINT_HATAF_PATAH=\u05B2;"
+ "$POINT_HATAF_QAMATS=\u05B3;"
+ "$POINT_HIRIQ=\u05B4;"
+ "$POINT_TSERE=\u05B5;"
+ "$POINT_SEGOL=\u05B6;"
+ "$POINT_PATAH=\u05B7;"
+ "$POINT_QAMATS=\u05B8;"
+ "$POINT_HOLAM=\u05B9;"
+ "$POINT_QUBUTS=\u05BB;"
+ "$POINT_DAGESH_OR_MAPIQ=\u05BC;"
+ "$POINT_METEG=\u05BD;"
+ "$PUNCTUATION_MAQAF=\u05BE;"
+ "$POINT_RAFE=\u05BF;"
+ "$PUNCTUATION_PASEQ=\u05C0;"
+ "$POINT_SHIN_DOT=\u05C1;"
+ "$POINT_SIN_DOT=\u05C2;"
+ "$PUNCTUATION_SOF_PASUQ=\u05C3;"
+ "$ALEF=\u05D0;"
+ "$BET=\u05D1;"
+ "$GIMEL=\u05D2;"
+ "$DALET=\u05D3;"
+ "$HE=\u05D4;"
+ "$VAV=\u05D5;"
+ "$ZAYIN=\u05D6;"
+ "$HET=\u05D7;"
+ "$TET=\u05D8;"
+ "$YOD=\u05D9;"
+ "$FINAL_KAF=\u05DA;"
+ "$KAF=\u05DB;"
+ "$LAMED=\u05DC;"
+ "$FINAL_MEM=\u05DD;"
+ "$MEM=\u05DE;"
+ "$FINAL_NUN=\u05DF;"
+ "$NUN=\u05E0;"
+ "$SAMEKH=\u05E1;"
+ "$AYIN=\u05E2;"
+ "$FINAL_PE=\u05E3;"
+ "$PE=\u05E4;"
+ "$FINAL_TSADI=\u05E5;"
+ "$TSADI=\u05E6;"
+ "$QOF=\u05E7;"
+ "$RESH=\u05E8;"
+ "$SHIN=\u05E9;"
+ "$TAV=\u05EA;"
+ "$YIDDISH_DOUBLE_VAV=\u05F0;"
+ "$YIDDISH_VAV_YOD=\u05F1;"
+ "$YIDDISH_DOUBLE_YOD=\u05F2;"
+ "$PUNCTUATION_GERESH=\u05F3;"
+ "$PUNCTUATION_GERSHAYIM=\u05F4;"
//wildcards
//The values can be anything we don't use in this file: start at E000.
+ "letter=[abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ];"
+ "$letter=[abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ];"
+ "softvowel=[eiyEIY];"
+ "$softvowel=[eiyEIY];"
+ "vowellike=[{ALEF}{AYIN}{YOD}{VAV}];"
+ "$vowellike=[$ALEF$AYIN$YOD$VAV];"
//?>{POINT_SHEVA}
//?>{POINT_HATAF_SEGOL}
@ -104,117 +104,117 @@ public class TransliterationRule_Latin_Hebrew extends ListResourceBundle {
//?>{POINT_SIN_DOT}
//?>{PUNCTUATION_SOF_PASUQ}
+ "a>{ALEF};"
+ "A>{ALEF};"
+ "a>$ALEF;"
+ "A>$ALEF;"
+ "b>{BET};"
+ "B>{BET};"
+ "b>$BET;"
+ "B>$BET;"
+ "c({softvowel}>{SAMEKH};"
+ "C({softvowel}>{SAMEKH};"
+ "c({letter}>{KAF};"
+ "C({letter}>{KAF};"
+ "c>{FINAL_KAF};"
+ "C>{FINAL_KAF};"
+ "c}$softvowel>$SAMEKH;"
+ "C}$softvowel>$SAMEKH;"
+ "c}$letter>$KAF;"
+ "C}$letter>$KAF;"
+ "c>$FINAL_KAF;"
+ "C>$FINAL_KAF;"
+ "d>{DALET};"
+ "D>{DALET};"
+ "d>$DALET;"
+ "D>$DALET;"
+ "e>{AYIN};"
+ "E>{AYIN};"
+ "e>$AYIN;"
+ "E>$AYIN;"
+ "f({letter}>{PE};"
+ "f>{FINAL_PE};"
+ "F({letter}>{PE};"
+ "F>{FINAL_PE};"
+ "f}$letter>$PE;"
+ "f>$FINAL_PE;"
+ "F}$letter>$PE;"
+ "F>$FINAL_PE;"
+ "g>{GIMEL};"
+ "G>{GIMEL};"
+ "g>$GIMEL;"
+ "G>$GIMEL;"
+ "h>{HE};"
+ "H>{HE};"
+ "h>$HE;"
+ "H>$HE;"
+ "i>{YOD};"
+ "I>{YOD};"
+ "i>$YOD;"
+ "I>$YOD;"
+ "j>{DALET}{SHIN};"
+ "J>{DALET}{SHIN};"
+ "j>$DALET$SHIN;"
+ "J>$DALET$SHIN;"
+ "kH>{HET};"
+ "kh>{HET};"
+ "KH>{HET};"
+ "Kh>{HET};"
+ "k({letter}>{KAF};"
+ "K({letter}>{KAF};"
+ "k>{FINAL_KAF};"
+ "K>{FINAL_KAF};"
+ "kH>$HET;"
+ "kh>$HET;"
+ "KH>$HET;"
+ "Kh>$HET;"
+ "k}$letter>$KAF;"
+ "K}$letter>$KAF;"
+ "k>$FINAL_KAF;"
+ "K>$FINAL_KAF;"
+ "l>{LAMED};"
+ "L>{LAMED};"
+ "l>$LAMED;"
+ "L>$LAMED;"
+ "m({letter}>{MEM};"
+ "m>{FINAL_MEM};"
+ "M({letter}>{MEM};"
+ "M>{FINAL_MEM};"
+ "m}$letter>$MEM;"
+ "m>$FINAL_MEM;"
+ "M}$letter>$MEM;"
+ "M>$FINAL_MEM;"
+ "n({letter}>{NUN};"
+ "n>{FINAL_NUN};"
+ "N({letter}>{NUN};"
+ "N>{FINAL_NUN};"
+ "n}$letter>$NUN;"
+ "n>$FINAL_NUN;"
+ "N}$letter>$NUN;"
+ "N>$FINAL_NUN;"
+ "o>{VAV};"
+ "O>{VAV};"
+ "o>$VAV;"
+ "O>$VAV;"
+ "p({letter}>{PE};"
+ "p>{FINAL_PE};"
+ "P({letter}>{PE};"
+ "P>{FINAL_PE};"
+ "p}$letter>$PE;"
+ "p>$FINAL_PE;"
+ "P}$letter>$PE;"
+ "P>$FINAL_PE;"
+ "q>{QOF};"
+ "Q>{QOF};"
+ "q>$QOF;"
+ "Q>$QOF;"
+ "r>{RESH};"
+ "R>{RESH};"
+ "r>$RESH;"
+ "R>$RESH;"
+ "sH>{SHIN};"
+ "sh>{SHIN};"
+ "SH>{SHIN};"
+ "Sh>{SHIN};"
+ "s>{SAMEKH};"
+ "S>{SAMEKH};"
+ "sH>$SHIN;"
+ "sh>$SHIN;"
+ "SH>$SHIN;"
+ "Sh>$SHIN;"
+ "s>$SAMEKH;"
+ "S>$SAMEKH;"
+ "th>{TAV};"
+ "tH>{TAV};"
+ "TH>{TAV};"
+ "Th>{TAV};"
+ "tS({letter}>{TSADI};"
+ "ts({letter}>{TSADI};"
+ "Ts({letter}>{TSADI};"
+ "TS({letter}>{TSADI};"
+ "tS>{FINAL_TSADI};"
+ "ts>{FINAL_TSADI};"
+ "Ts>{FINAL_TSADI};"
+ "TS>{FINAL_TSADI};"
+ "t>{TET};"
+ "T>{TET};"
+ "th>$TAV;"
+ "tH>$TAV;"
+ "TH>$TAV;"
+ "Th>$TAV;"
+ "tS}$letter>$TSADI;"
+ "ts}$letter>$TSADI;"
+ "Ts}$letter>$TSADI;"
+ "TS}$letter>$TSADI;"
+ "tS>$FINAL_TSADI;"
+ "ts>$FINAL_TSADI;"
+ "Ts>$FINAL_TSADI;"
+ "TS>$FINAL_TSADI;"
+ "t>$TET;"
+ "T>$TET;"
+ "u>{VAV};"
+ "U>{VAV};"
+ "u>$VAV;"
+ "U>$VAV;"
+ "v>{VAV};"
+ "V>{VAV};"
+ "v>$VAV;"
+ "V>$VAV;"
+ "w>{VAV};"
+ "W>{VAV};"
+ "w>$VAV;"
+ "W>$VAV;"
+ "x>{KAF}{SAMEKH};"
+ "X>{KAF}{SAMEKH};"
+ "x>$KAF$SAMEKH;"
+ "X>$KAF$SAMEKH;"
+ "y>{YOD};"
+ "Y>{YOD};"
+ "y>$YOD;"
+ "Y>$YOD;"
+ "z>{ZAYIN};"
+ "Z>{ZAYIN};"
+ "z>$ZAYIN;"
+ "Z>$ZAYIN;"
//#?>{YIDDISH_DOUBLE_VAV}
//?>{YIDDISH_VAV_YOD}
@ -244,39 +244,39 @@ public class TransliterationRule_Latin_Hebrew extends ListResourceBundle {
//{POINT_SIN_DOT}>@
//{PUNCTUATION_SOF_PASUQ}>@
+ "a<{ALEF};"
+ "e<{AYIN};"
+ "b<{BET};"
+ "d<{DALET};"
+ "k<{FINAL_KAF};"
+ "m<{FINAL_MEM};"
+ "n<{FINAL_NUN};"
+ "p<{FINAL_PE};"
+ "ts<{FINAL_TSADI};"
+ "g<{GIMEL};"
+ "kh<{HET};"
+ "h<{HE};"
+ "k''<{KAF}({HE};"
+ "k<{KAF};"
+ "l<{LAMED};"
+ "m<{MEM};"
+ "n<{NUN};"
+ "p<{PE};"
+ "q<{QOF};"
+ "r<{RESH};"
+ "s''<{SAMEKH}({HE};"
+ "s<{SAMEKH};"
+ "sh<{SHIN};"
+ "th<{TAV};"
+ "t''<{TET}({HE};"
+ "t''<{TET}({SAMEKH};"
+ "t''<{TET}({SHIN};"
+ "t<{TET};"
+ "ts<{TSADI};"
+ "v<{VAV}({vowellike};"
+ "u<{VAV};"
+ "y<{YOD};"
+ "z<{ZAYIN};"
+ "a<$ALEF;"
+ "e<$AYIN;"
+ "b<$BET;"
+ "d<$DALET;"
+ "k<$FINAL_KAF;"
+ "m<$FINAL_MEM;"
+ "n<$FINAL_NUN;"
+ "p<$FINAL_PE;"
+ "ts<$FINAL_TSADI;"
+ "g<$GIMEL;"
+ "kh<$HET;"
+ "h<$HE;"
+ "k''<$KAF}$HE;"
+ "k<$KAF;"
+ "l<$LAMED;"
+ "m<$MEM;"
+ "n<$NUN;"
+ "p<$PE;"
+ "q<$QOF;"
+ "r<$RESH;"
+ "s''<$SAMEKH}$HE;"
+ "s<$SAMEKH;"
+ "sh<$SHIN;"
+ "th<$TAV;"
+ "t''<$TET}$HE;"
+ "t''<$TET}$SAMEKH;"
+ "t''<$TET}$SHIN;"
+ "t<$TET;"
+ "ts<$TSADI;"
+ "v<$VAV}$vowellike;"
+ "u<$VAV;"
+ "y<$YOD;"
+ "z<$ZAYIN;"
//{YIDDISH_DOUBLE_VAV}>@
//{YIDDISH_VAV_YOD}>@

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/resources/Attic/TransliterationRule_Latin_Jamo.java,v $
* $Date: 2000/03/10 04:07:31 $
* $Revision: 1.2 $
* $Date: 2000/04/21 21:17:08 $
* $Revision: 1.3 $
*
*****************************************************************************************
*/
@ -23,19 +23,19 @@ public class TransliterationRule_Latin_Jamo extends ListResourceBundle {
// VARIABLES
+ "initial=[\u1100-\u115F];"
+ "medial=[\u1160-\u11A7];"
+ "final=[\u11A8-\u11F9];" // added - aliu
+ "vowel=[aeiouwyAEIOUWY\u1160-\u11A7];"
+ "consonant=[bcdfghjklmnpqrstvxzBCDFGHJKLMNPQRSTVXZ{medial}{final}];"
+ "ye=[yeYE];"
+ "ywe=[yweYWE];"
+ "yw=[ywYW];"
+ "nl=[nlNL];"
+ "gnl=[gnlGNL];"
+ "lsgb=[lsgbLSGB];"
+ "ywao=[ywaoYWAO];"
+ "bl=[blBL];"
+ "$initial=[\u1100-\u115F];"
+ "$medial=[\u1160-\u11A7];"
+ "$final=[\u11A8-\u11F9];" // added - aliu
+ "$vowel=[aeiouwyAEIOUWY\u1160-\u11A7];"
+ "$consonant=[bcdfghjklmnpqrstvxzBCDFGHJKLMNPQRSTVXZ$medial$final];"
+ "$ye=[yeYE];"
+ "$ywe=[yweYWE];"
+ "$yw=[ywYW];"
+ "$nl=[nlNL];"
+ "$gnl=[gnlGNL];"
+ "$lsgb=[lsgbLSGB];"
+ "$ywao=[ywaoYWAO];"
+ "$bl=[blBL];"
// RULES
@ -60,8 +60,8 @@ public class TransliterationRule_Latin_Jamo extends ListResourceBundle {
// special insertion for funny sequences of vowels, and for empty consonant
+ "'' < ({consonant}) \u110B;" // insert a break between any consonant and the empty consonant.
+ "({medial}) ({vowel}) <> \u110B;" // HANGUL CHOSEONG IEUNG
+ "'' < $consonant{\u110B;" // insert a break between any consonant and the empty consonant.
+ "$medial{}$vowel<>\u110B;" // HANGUL CHOSEONG IEUNG
// Below, insert an empty consonant in front of a vowel, if there is no Initial in front.
@ -144,57 +144,57 @@ public class TransliterationRule_Latin_Jamo extends ListResourceBundle {
// from Hangul to Latin. Catch every letter that can be the
// LAST of a digraph (or multigraph) AND first of an initial
+ "'' < (l) (\u11c0;" // hangul jongseong thieuth
+ "'' < ({lsgb}) (\u11ba;" // hangul jongseong sios
+ "'' < (l) (\u11c1;" // hangul jongseong phieuph
+ "'' < (l) (\u11b7;" // hangul jongseong mieum
+ "'' < (n) (\u11bd;" // hangul jongseong cieuc
+ "'' < ({nl}) (\u11c2;" // hangul jongseong hieuh
+ "'' < ({gnl}) (\u11a9;" // hangul jongseong ssangkiyeok
+ "'' < ({bl}) (\u11b8;" // hangul jongseong pieup
+ "'' < (d) (\u11ae;" // hangul jongseong tikeut
+ "'' < l{ }\u11c0;" // hangul jongseong thieuth
+ "'' < $lsgb{}\u11ba;" // hangul jongseong sios
+ "'' < l{ }\u11c1;" // hangul jongseong phieuph
+ "'' < l{ }\u11b7;" // hangul jongseong mieum
+ "'' < n{ }\u11bd;" // hangul jongseong cieuc
+ "'' < $nl{}\u11c2;" // hangul jongseong hieuh
+ "'' < $gnl{}\u11a9;" // hangul jongseong ssangkiyeok
+ "'' < $bl{}\u11b8;" // hangul jongseong pieup
+ "'' < d{ }\u11ae;" // hangul jongseong tikeut
+ "'' < ({ye}) (\u116e;" // hangul jungseong u
+ "'' < ({ywe}) (\u1169;" // hangul jungseong o
+ "'' < ({yw}) (\u1175;" // hangul jungseong i
+ "'' < ({ywao}) (\u1166;" // hangul jungseong e
+ "'' < ({yw}) (\u1161;" // hangul jungseong a
+ "'' < $ye{}\u116e;" // hangul jungseong u
+ "'' < $ywe{}\u1169;" // hangul jungseong o
+ "'' < $yw{}\u1175;" // hangul jungseong i
+ "'' < $ywao{}\u1166;" // hangul jungseong e
+ "'' < $yw{}\u1161;" // hangul jungseong a
+ "'' < (l) (\u1110;" // hangul choseong thieuth
+ "'' < ({lsgb}) (\u110a;" // hangul choseong ssangsios
+ "'' < ({lsgb}) (\u1109;" // hangul choseong sios
+ "'' < (l) (\u1111;" // hangul choseong phieuph
+ "'' < (l) (\u1106;" // hangul choseong mieum
+ "'' < (n) (\u110c;" // hangul choseong cieuc
+ "'' < (n) (\u110d;"
+ "'' < ({nl}) (\u1112;" // hangul choseong hieuh
+ "'' < ({gnl}) (\u1101;" // hangul choseong ssangkiyeok
+ "'' < ({gnl}) (\u1100;" // hangul choseong kiyeok
+ "'' < (d) (\u1103;" // hangul choseong tikeut
+ "'' < (d) (\u1104;"
+ "'' < ({bl}) (\u1107;" // hangul choseong pieup
+ "'' < ({bl}) (\u1108;"
+ "'' < l{ }\u1110;" // hangul choseong thieuth
+ "'' < $lsgb{}\u110a;" // hangul choseong ssangsios
+ "'' < $lsgb{}\u1109;" // hangul choseong sios
+ "'' < l{ }\u1111;" // hangul choseong phieuph
+ "'' < l{ }\u1106;" // hangul choseong mieum
+ "'' < n{ }\u110c;" // hangul choseong cieuc
+ "'' < n{ }\u110d;"
+ "'' < $nl{}\u1112;" // hangul choseong hieuh
+ "'' < $gnl{}\u1101;" // hangul choseong ssangkiyeok
+ "'' < $gnl{}\u1100;" // hangul choseong kiyeok
+ "'' < d{ }\u1103;" // hangul choseong tikeut
+ "'' < d{ }\u1104;"
+ "'' < $bl{}\u1107;" // hangul choseong pieup
+ "'' < $bl{}\u1108;"
// INITIALS
+ "t ({vowel}) <> \u1110;" // hangul choseong thieuth
+ "ss ({vowel}) <> \u110a;" // hangul choseong ssangsios
+ "s ({vowel}) <> \u1109;" // hangul choseong sios
+ "p ({vowel}) <> \u1111;" // hangul choseong phieuph
+ "n ({vowel}) <> \u1102;" // hangul choseong nieun
+ "m ({vowel}) <> \u1106;" // hangul choseong mieum
+ "l ({vowel}) <> \u1105;" // hangul choseong rieul
+ "k ({vowel}) <> \u110f;" // hangul choseong khieukh
+ "j ({vowel}) <> \u110c;" // hangul choseong cieuc
+ "h ({vowel}) <> \u1112;" // hangul choseong hieuh
+ "gg ({vowel}) <> \u1101;" // hangul choseong ssangkiyeok
+ "g ({vowel}) <> \u1100;" // hangul choseong kiyeok
+ "d ({vowel}) <> \u1103;" // hangul choseong tikeut
+ "c ({vowel}) <> \u110e;" // hangul choseong chieuch
+ "b ({vowel}) <> \u1107;" // hangul choseong pieup
+ "bb ({vowel}) <> \u1108;"
+ "jj ({vowel}) <> \u110d;"
+ "dd ({vowel}) <> \u1104;"
+ "t }$vowel<>\u1110;" // hangul choseong thieuth
+ "ss }$vowel<>\u110a;" // hangul choseong ssangsios
+ "s }$vowel<>\u1109;" // hangul choseong sios
+ "p }$vowel<>\u1111;" // hangul choseong phieuph
+ "n }$vowel<>\u1102;" // hangul choseong nieun
+ "m }$vowel<>\u1106;" // hangul choseong mieum
+ "l }$vowel<>\u1105;" // hangul choseong rieul
+ "k }$vowel<>\u110f;" // hangul choseong khieukh
+ "j }$vowel<>\u110c;" // hangul choseong cieuc
+ "h }$vowel<>\u1112;" // hangul choseong hieuh
+ "gg }$vowel<>\u1101;" // hangul choseong ssangkiyeok
+ "g }$vowel<>\u1100;" // hangul choseong kiyeok
+ "d }$vowel<>\u1103;" // hangul choseong tikeut
+ "c }$vowel<>\u110e;" // hangul choseong chieuch
+ "b }$vowel<>\u1107;" // hangul choseong pieup
+ "bb }$vowel<>\u1108;"
+ "jj }$vowel<>\u110d;"
+ "dd }$vowel<>\u1104;"
// If we have gotten through to these rules, and we start with
// a consonant, then the remaining mappings would be to F,
@ -209,45 +209,45 @@ public class TransliterationRule_Latin_Jamo extends ListResourceBundle {
+ "jj > \u1108\u110d;" // hangul choseong ssangcieuc
+ "dd > \u1108\u1104;" // hangul choseong ssangtikeut
+ "({final}) t > \u1110\u116e;" // hangul choseong thieuth
+ "({final}) ss > \u110a\u116e;" // hangul choseong ssangsios
+ "({final}) s > \u1109\u116e;" // hangul choseong sios
+ "({final}) p > \u1111\u116e;" // hangul choseong phieuph
+ "({final}) n > \u1102\u116e;" // hangul choseong nieun
+ "({final}) m > \u1106\u116e;" // hangul choseong mieum
+ "({final}) l > \u1105\u116e;" // hangul choseong rieul
+ "({final}) k > \u110f\u116e;" // hangul choseong khieukh
+ "({final}) j > \u110c\u116e;" // hangul choseong cieuc
+ "({final}) h > \u1112\u116e;" // hangul choseong hieuh
+ "({final}) gg > \u1101\u116e;" // hangul choseong ssangkiyeok
+ "({final}) g > \u1100\u116e;" // hangul choseong kiyeok
+ "({final}) d > \u1103\u116e;" // hangul choseong tikeut
+ "({final}) c > \u110e\u116e;" // hangul choseong chieuch
+ "({final}) b > \u1107\u116e;" // hangul choseong pieup
+ "$final{ t > \u1110\u116e;" // hangul choseong thieuth
+ "$final{ ss > \u110a\u116e;" // hangul choseong ssangsios
+ "$final{ s > \u1109\u116e;" // hangul choseong sios
+ "$final{ p > \u1111\u116e;" // hangul choseong phieuph
+ "$final{ n > \u1102\u116e;" // hangul choseong nieun
+ "$final{ m > \u1106\u116e;" // hangul choseong mieum
+ "$final{ l > \u1105\u116e;" // hangul choseong rieul
+ "$final{ k > \u110f\u116e;" // hangul choseong khieukh
+ "$final{ j > \u110c\u116e;" // hangul choseong cieuc
+ "$final{ h > \u1112\u116e;" // hangul choseong hieuh
+ "$final{ gg > \u1101\u116e;" // hangul choseong ssangkiyeok
+ "$final{ g > \u1100\u116e;" // hangul choseong kiyeok
+ "$final{ d > \u1103\u116e;" // hangul choseong tikeut
+ "$final{ c > \u110e\u116e;" // hangul choseong chieuch
+ "$final{ b > \u1107\u116e;" // hangul choseong pieup
// MEDIALS after INITIALS
+ "({initial}) yu <> \u1172;" // hangul jungseong yu
+ "({initial}) yo <> \u116d;" // hangul jungseong yo
+ "({initial}) yi <> \u1174;" // hangul jungseong yi
+ "({initial}) yeo <> \u1167;" // hangul jungseong yeo
+ "({initial}) ye <> \u1168;" // hangul jungseong ye
+ "({initial}) yae <> \u1164;" // hangul jungseong yae
+ "({initial}) ya <> \u1163;" // hangul jungseong ya
+ "({initial}) wi <> \u1171;" // hangul jungseong wi
+ "({initial}) weo <> \u116f;" // hangul jungseong weo
+ "({initial}) we <> \u1170;" // hangul jungseong we
+ "({initial}) wae <> \u116b;" // hangul jungseong wae
+ "({initial}) wa <> \u116a;" // hangul jungseong wa
+ "({initial}) u <> \u116e;" // hangul jungseong u
+ "({initial}) oe <> \u116c;" // hangul jungseong oe
+ "({initial}) o <> \u1169;" // hangul jungseong o
+ "({initial}) i <> \u1175;" // hangul jungseong i
+ "({initial}) eu <> \u1173;" // hangul jungseong eu
+ "({initial}) eo <> \u1165;" // hangul jungseong eo
+ "({initial}) e <> \u1166;" // hangul jungseong e
+ "({initial}) ae <> \u1162;" // hangul jungseong ae
+ "({initial}) a <> \u1161;" // hangul jungseong a
+ "$initial{ yu <> \u1172;" // hangul jungseong yu
+ "$initial{ yo <> \u116d;" // hangul jungseong yo
+ "$initial{ yi <> \u1174;" // hangul jungseong yi
+ "$initial{ yeo <> \u1167;" // hangul jungseong yeo
+ "$initial{ ye <> \u1168;" // hangul jungseong ye
+ "$initial{ yae <> \u1164;" // hangul jungseong yae
+ "$initial{ ya <> \u1163;" // hangul jungseong ya
+ "$initial{ wi <> \u1171;" // hangul jungseong wi
+ "$initial{ weo <> \u116f;" // hangul jungseong weo
+ "$initial{ we <> \u1170;" // hangul jungseong we
+ "$initial{ wae <> \u116b;" // hangul jungseong wae
+ "$initial{ wa <> \u116a;" // hangul jungseong wa
+ "$initial{ u <> \u116e;" // hangul jungseong u
+ "$initial{ oe <> \u116c;" // hangul jungseong oe
+ "$initial{ o <> \u1169;" // hangul jungseong o
+ "$initial{ i <> \u1175;" // hangul jungseong i
+ "$initial{ eu <> \u1173;" // hangul jungseong eu
+ "$initial{ eo <> \u1165;" // hangul jungseong eo
+ "$initial{ e <> \u1166;" // hangul jungseong e
+ "$initial{ ae <> \u1162;" // hangul jungseong ae
+ "$initial{ a <> \u1161;" // hangul jungseong a
// MEDIALS (vowels) not after INITIALs

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/resources/Attic/TransliterationRule_StraightQuotes_CurlyQuotes.java,v $
* $Date: 2000/03/10 04:07:31 $
* $Revision: 1.2 $
* $Date: 2000/04/21 21:17:08 $
* $Revision: 1.3 $
*
*****************************************************************************************
*/
@ -22,74 +22,74 @@ public class TransliterationRule_StraightQuotes_CurlyQuotes extends ListResource
return new Object[][] {
{ "Rule",
// Rewritten using character codes [LIU]
"white=[[:Zs:][:Zl:][:Zp:]];"
+ "black=[^{white}];"
+ "open=[:Ps:];"
+ "dquote=\";"
"$white=[[:Zs:][:Zl:][:Zp:]];"
+ "$black=[^$white];"
+ "$open=[:Ps:];"
+ "$dquote=\";"
+ "lAng=\u3008;"
+ "ldAng=\u300A;"
+ "lBrk='[';"
+ "lBrc='{';"
+ "$lAng=\u3008;"
+ "$ldAng=\u300A;"
+ "$lBrk='[';"
+ "$lBrc='{';"
+ "lquote=\u2018;"
+ "rquote=\u2019;"
+ "ldquote=\u201C;"
+ "rdquote=\u201D;"
+ "$lquote=\u2018;"
+ "$rquote=\u2019;"
+ "$ldquote=\u201C;"
+ "$rdquote=\u201D;"
+ "ldguill=\u00AB;"
+ "rdguill=\u00BB;"
+ "lguill=\u2039;"
+ "rguill=\u203A;"
+ "$ldguill=\u00AB;"
+ "$rdguill=\u00BB;"
+ "$lguill=\u2039;"
+ "$rguill=\u203A;"
+ "mdash=\u2014;"
+ "$mdash=\u2014;"
//#######################################
// Conversions from input
//#######################################
// join single quotes
+ "{lquote}''>{ldquote};"
+ "{lquote}{lquote}>{ldquote};"
+ "{rquote}''>{rdquote};"
+ "{rquote}{rquote}>{rdquote};"
+ "$lquote''>$ldquote;"
+ "$lquote$lquote>$ldquote;"
+ "$rquote''>$rdquote;"
+ "$rquote$rquote>$rdquote;"
//smart single quotes
+ "{white})''>{lquote};"
+ "{open})''>{lquote};"
+ "{black})''>{rquote};"
+ "''>{lquote};"
+ "$white{''>$lquote;"
+ "$open{''>$lquote;"
+ "$black{''>$rquote;"
+ "''>$lquote;"
//smart doubles
+ "{white}){dquote}>{ldquote};"
+ "{open}){dquote}>{ldquote};"
+ "{black}){dquote}>{rdquote};"
+ "{dquote}>{ldquote};"
+ "$white{$dquote>$ldquote;"
+ "$open{$dquote>$ldquote;"
+ "$black{$dquote>$rdquote;"
+ "$dquote>$ldquote;"
// join single guillemets
+ "{rguill}{rguill}>{rdguill};"
+ "'>>'>{rdguill};"
+ "{lguill}{lguill}>{ldguill};"
+ "'<<'>{ldguill};"
+ "$rguill$rguill>$rdguill;"
+ "'>>'>$rdguill;"
+ "$lguill$lguill>$ldguill;"
+ "'<<'>$ldguill;"
// prevent double spaces
+ "\\ )\\ >;"
+ "\\ {\\ >;"
// join hyphens into dash
+ "-->{mdash};"
+ "-->$mdash;"
//#######################################
// Conversions back to input
//#######################################
//smart quotes
+ "''<{lquote};"
+ "''<{rquote};"
+ "{dquote}<{ldquote};"
+ "{dquote}<{rdquote};"
+ "''<$lquote;"
+ "''<$rquote;"
+ "$dquote<$ldquote;"
+ "$dquote<$rdquote;"
//hyphens
+ "--<{mdash};"
+ "--<$mdash;"
}
};
}