Modify rule syntax
X-SVN-Rev: 1210
This commit is contained in:
parent
13de7186f7
commit
2b1cdd4f74
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/RuleBasedTransliterator.java,v $
|
||||
* $Date: 2000/04/19 17:35:23 $
|
||||
* $Revision: 1.20 $
|
||||
* $Date: 2000/04/21 21:16:40 $
|
||||
* $Revision: 1.21 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
@ -274,7 +274,7 @@ import com.ibm.util.Utility;
|
||||
* <p>Copyright (c) IBM Corporation 1999-2000. All rights reserved.</p>
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.20 $ $Date: 2000/04/19 17:35:23 $
|
||||
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.21 $ $Date: 2000/04/21 21:16:40 $
|
||||
*/
|
||||
public class RuleBasedTransliterator extends Transliterator {
|
||||
|
||||
@ -559,10 +559,20 @@ public class RuleBasedTransliterator extends Transliterator {
|
||||
/**
|
||||
* The last available stand-in for variables. This is discovered
|
||||
* dynamically. At any point during parsing, available variables are
|
||||
* <code>variableNext..variableLimit-1</code>.
|
||||
* <code>variableNext..variableLimit-1</code>. During variable definition
|
||||
* we use the special value variableLimit-1 as a placeholder.
|
||||
*/
|
||||
private char variableLimit;
|
||||
|
||||
/**
|
||||
* When we encounter an undefined variable, we do not immediately signal
|
||||
* an error, in case we are defining this variable, e.g., "$a = [a-z];".
|
||||
* Instead, we save the name of the undefined variable, and substitute
|
||||
* in the placeholder char variableLimit - 1, and decrement
|
||||
* variableLimit.
|
||||
*/
|
||||
private String undefinedVariableName;
|
||||
|
||||
// Operators
|
||||
private static final char VARIABLE_DEF_OP = '=';
|
||||
private static final char FORWARD_RULE_OP = '>';
|
||||
@ -577,17 +587,15 @@ public class RuleBasedTransliterator extends Transliterator {
|
||||
private static final char END_OF_RULE = ';';
|
||||
private static final char RULE_COMMENT_CHAR = '#';
|
||||
|
||||
private static final char VARIABLE_REF_OPEN = '{';
|
||||
private static final char VARIABLE_REF_CLOSE = '}';
|
||||
private static final char CONTEXT_OPEN = '(';
|
||||
private static final char CONTEXT_CLOSE = ')';
|
||||
private static final char VARIABLE_REF = '$'; // also segment refs
|
||||
private static final char CONTEXT_ANTE = '{'; // ante{key
|
||||
private static final char CONTEXT_POST = '}'; // key}post
|
||||
private static final char SET_OPEN = '[';
|
||||
private static final char SET_CLOSE = ']';
|
||||
private static final char CURSOR_POS = '|';
|
||||
|
||||
// Segments of the input string are delimited by "$(" and "$)". In the
|
||||
// output string these segments are referenced as "$1" through "$9".
|
||||
private static final char SEGMENT_REF = '$';
|
||||
private static final char SEGMENT_OPEN = '(';
|
||||
private static final char SEGMENT_CLOSE = ')';
|
||||
|
||||
@ -703,7 +711,6 @@ public class RuleBasedTransliterator extends Transliterator {
|
||||
RuleBasedTransliterator.Parser parser) {
|
||||
int start = pos;
|
||||
StringBuffer buf = new StringBuffer();
|
||||
int postClose = -1; // position of post context close ')' in text
|
||||
|
||||
main:
|
||||
while (pos < limit) {
|
||||
@ -756,86 +763,77 @@ public class RuleBasedTransliterator extends Transliterator {
|
||||
--pos; // Backup to point to operator
|
||||
break main;
|
||||
}
|
||||
// Handle segment definitions "$(" ")$" and references "$1"
|
||||
// .. "$9".
|
||||
if (c == SEGMENT_REF) {
|
||||
// After a SEGMENT_REF, must see SEGMENT_OPEN,
|
||||
// SEGMENT_CLOSE, or a digit 1 to 9, with no intervening
|
||||
// whitespace
|
||||
if (pos == limit) {
|
||||
syntaxError("Trailing " + c, rule, start);
|
||||
}
|
||||
c = rule.charAt(pos++);
|
||||
if (c == SEGMENT_OPEN || c == SEGMENT_CLOSE) {
|
||||
// Parse "$(", "$)"
|
||||
if (segments == null) {
|
||||
segments = new Vector();
|
||||
}
|
||||
if ((c == SEGMENT_OPEN) !=
|
||||
(segments.size() % 2 == 0)) {
|
||||
syntaxError("Mismatched segment delimiters",
|
||||
rule, start);
|
||||
}
|
||||
segments.addElement(new Integer(buf.length()));
|
||||
} else {
|
||||
// Parse "$1" "$2" .. "$9"
|
||||
int r = Character.digit(c, 10);
|
||||
if (r < 1 || r > 9) {
|
||||
syntaxError("Illegal char after " + SEGMENT_REF,
|
||||
rule, start);
|
||||
}
|
||||
if (r > maxRef) {
|
||||
maxRef = r;
|
||||
}
|
||||
buf.append((char) (parser.data.segmentBase + r - 1));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
switch (c) {
|
||||
case SEGMENT_OPEN:
|
||||
case SEGMENT_CLOSE:
|
||||
// Handle segment definitions "(" and ")"
|
||||
// Parse "(", ")"
|
||||
if (segments == null) {
|
||||
segments = new Vector();
|
||||
}
|
||||
if ((c == SEGMENT_OPEN) !=
|
||||
(segments.size() % 2 == 0)) {
|
||||
syntaxError("Mismatched segment delimiters",
|
||||
rule, start);
|
||||
}
|
||||
segments.addElement(new Integer(buf.length()));
|
||||
break;
|
||||
case END_OF_RULE:
|
||||
--pos; // Backup to point to END_OF_RULE
|
||||
break main;
|
||||
case VARIABLE_REF_OPEN:
|
||||
case VARIABLE_REF:
|
||||
// Handle variable references and segment references "$1" .. "$9"
|
||||
{
|
||||
int j = rule.indexOf(VARIABLE_REF_CLOSE, pos);
|
||||
if (pos == j || j < 0) { // empty or unterminated
|
||||
syntaxError("Malformed variable reference", rule, start);
|
||||
// A variable reference must be followed immediately
|
||||
// by a Unicode identifier start and zero or more
|
||||
// Unicode identifier part characters, or by a digit
|
||||
// 1..9 if it is a segment reference.
|
||||
if (pos == limit) {
|
||||
syntaxError("Trailing " + c, rule, start);
|
||||
}
|
||||
// Parse "$1" "$2" .. "$9"
|
||||
c = rule.charAt(pos++);
|
||||
int r = Character.digit(c, 10);
|
||||
if (r >= 1 && r <= 9) {
|
||||
if (r > maxRef) {
|
||||
maxRef = r;
|
||||
}
|
||||
buf.append((char) (parser.data.segmentBase + r - 1));
|
||||
} else if (Character.isUnicodeIdentifierStart(c)) {
|
||||
int j = pos;
|
||||
while (j < limit &&
|
||||
Character.isUnicodeIdentifierPart(rule.charAt(j))) {
|
||||
++j;
|
||||
}
|
||||
String name = rule.substring(pos-1, j);
|
||||
pos = j;
|
||||
// If this is a variable definition statement, then the LHS
|
||||
// variable will be undefined. In that case getVariableName()
|
||||
// will return the special placeholder variableLimit-1.
|
||||
buf.append(parser.getVariableDef(name));
|
||||
} else {
|
||||
syntaxError("Illegal char after " + VARIABLE_REF,
|
||||
rule, start);
|
||||
}
|
||||
String name = rule.substring(pos, j);
|
||||
pos = j+1;
|
||||
buf.append(parser.getVariableDef(name));
|
||||
}
|
||||
break;
|
||||
case CONTEXT_OPEN:
|
||||
case CONTEXT_ANTE:
|
||||
if (ante >= 0) {
|
||||
syntaxError("Multiple ante contexts", rule, start);
|
||||
}
|
||||
ante = buf.length();
|
||||
break;
|
||||
case CONTEXT_POST:
|
||||
if (post >= 0) {
|
||||
syntaxError("Multiple post contexts", rule, start);
|
||||
}
|
||||
// Ignore CONTEXT_OPEN if buffer length is zero -- that means
|
||||
// this is the optional opening delimiter for the ante context.
|
||||
if (buf.length() > 0) {
|
||||
post = buf.length();
|
||||
}
|
||||
break;
|
||||
case CONTEXT_CLOSE:
|
||||
if (postClose >= 0) {
|
||||
syntaxError("Unexpected " + c, rule, start);
|
||||
}
|
||||
if (post >= 0) {
|
||||
// This is probably the optional closing delimiter
|
||||
// for the post context; save the pos and check later.
|
||||
postClose = buf.length();
|
||||
} else if (ante >= 0) {
|
||||
syntaxError("Multiple ante contexts", rule, start);
|
||||
} else {
|
||||
ante = buf.length();
|
||||
}
|
||||
post = buf.length();
|
||||
break;
|
||||
case SET_OPEN:
|
||||
ParsePosition pp = new ParsePosition(pos-1); // Backup to opening '['
|
||||
buf.append(parser.registerSet(new UnicodeSet(rule, pp, parser.parseData)));
|
||||
pos = pp.getIndex();
|
||||
break;
|
||||
case VARIABLE_REF_CLOSE:
|
||||
case SET_CLOSE:
|
||||
syntaxError("Unquoted " + c, rule, start);
|
||||
case CURSOR_POS:
|
||||
@ -850,11 +848,6 @@ public class RuleBasedTransliterator extends Transliterator {
|
||||
}
|
||||
}
|
||||
|
||||
// Check context close parameters
|
||||
if (postClose >= 0 && postClose != buf.length()) {
|
||||
syntaxError("Extra text after ]", rule, start);
|
||||
}
|
||||
|
||||
text = buf.toString();
|
||||
return pos;
|
||||
}
|
||||
@ -906,6 +899,7 @@ public class RuleBasedTransliterator extends Transliterator {
|
||||
RuleHalf left = new RuleHalf();
|
||||
RuleHalf right = new RuleHalf();
|
||||
|
||||
undefinedVariableName = null;
|
||||
pos = left.parse(rule, pos, limit, this);
|
||||
|
||||
if (pos == limit ||
|
||||
@ -936,17 +930,31 @@ public class RuleBasedTransliterator extends Transliterator {
|
||||
// or a set (already parsed). If RHS is longer than one
|
||||
// character, it is either a multi-character string, or multiple
|
||||
// sets, or a mixture of chars and sets -- syntax error.
|
||||
|
||||
// We expect to see a single undefined variable (the one being
|
||||
// defined).
|
||||
if (undefinedVariableName == null) {
|
||||
syntaxError("Missing '$' or duplicate definition", rule, start);
|
||||
}
|
||||
if (left.text.length() != 1 || left.text.charAt(0) != variableLimit) {
|
||||
syntaxError("Malformed LHS", rule, start);
|
||||
}
|
||||
if (right.text.length() != 1) {
|
||||
syntaxError("Malformed RHS", rule, start);
|
||||
}
|
||||
if (data.variableNames.get(left.text) != null) {
|
||||
syntaxError("Duplicate definition of {" +
|
||||
left.text + "}", rule, start);
|
||||
}
|
||||
data.variableNames.put(left.text, new Character(right.text.charAt(0)));
|
||||
data.variableNames.put(undefinedVariableName,
|
||||
new Character(right.text.charAt(0)));
|
||||
++variableLimit;
|
||||
return pos;
|
||||
}
|
||||
|
||||
// If this is not a variable definition rule, we shouldn't have
|
||||
// any undefined variable names.
|
||||
if (undefinedVariableName != null) {
|
||||
syntaxError("Undefined variable $" + undefinedVariableName,
|
||||
rule, start);
|
||||
}
|
||||
|
||||
// If the direction we want doesn't match the rule
|
||||
// direction, do nothing.
|
||||
if (operator != FWDREV_RULE_OP &&
|
||||
@ -1041,7 +1049,18 @@ public class RuleBasedTransliterator extends Transliterator {
|
||||
private char getVariableDef(String name) {
|
||||
Character ch = (Character) data.variableNames.get(name);
|
||||
if (ch == null) {
|
||||
throw new IllegalArgumentException("Undefined variable: "
|
||||
// We allow one undefined variable so that variable definition
|
||||
// statements work. For the first undefined variable we return
|
||||
// the special placeholder variableLimit-1, and save the variable
|
||||
// name.
|
||||
if (undefinedVariableName == null) {
|
||||
undefinedVariableName = name;
|
||||
if (variableNext >= variableLimit) {
|
||||
throw new RuntimeException("Private use variables exhausted");
|
||||
}
|
||||
return --variableLimit;
|
||||
}
|
||||
throw new IllegalArgumentException("Undefined variable $"
|
||||
+ name);
|
||||
}
|
||||
return ch.charValue();
|
||||
@ -1210,7 +1229,11 @@ public class RuleBasedTransliterator extends Transliterator {
|
||||
}
|
||||
}
|
||||
|
||||
/* $Log: RuleBasedTransliterator.java,v $
|
||||
/**
|
||||
* $Log: RuleBasedTransliterator.java,v $
|
||||
* Revision 1.21 2000/04/21 21:16:40 alan
|
||||
* Modify rule syntax
|
||||
*
|
||||
* Revision 1.20 2000/04/19 17:35:23 alan
|
||||
* Update javadoc; fix compile error
|
||||
*
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/TransliterationRule.java,v $
|
||||
* $Date: 2000/04/19 16:34:18 $
|
||||
* $Revision: 1.16 $
|
||||
* $Date: 2000/04/21 21:16:40 $
|
||||
* $Revision: 1.17 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
@ -44,54 +44,7 @@ import com.ibm.util.Utility;
|
||||
* <p>Copyright © IBM Corporation 1999. All rights reserved.
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.16 $ $Date: 2000/04/19 16:34:18 $
|
||||
*
|
||||
* $Log: TransliterationRule.java,v $
|
||||
* Revision 1.16 2000/04/19 16:34:18 alan
|
||||
* Add segment support.
|
||||
*
|
||||
* Revision 1.15 2000/04/12 20:17:45 alan
|
||||
* Delegate replace operation to rule object
|
||||
*
|
||||
* Revision 1.14 2000/03/10 04:07:24 johnf
|
||||
* Copyright update
|
||||
*
|
||||
* Revision 1.13 2000/02/10 07:36:25 johnf
|
||||
* fixed imports for com.ibm.util.Utility
|
||||
*
|
||||
* Revision 1.12 2000/02/03 18:11:19 Alan
|
||||
* Use array rather than hashtable for char-to-set map
|
||||
*
|
||||
* Revision 1.11 2000/01/27 18:59:19 Alan
|
||||
* Use Position rather than int[] and move all subclass overrides to one method (handleTransliterate)
|
||||
*
|
||||
* Revision 1.10 2000/01/18 20:36:17 Alan
|
||||
* Make UnicodeSet inherit from UnicodeFilter
|
||||
*
|
||||
* Revision 1.9 2000/01/18 02:38:55 Alan
|
||||
* Fix filtering bug.
|
||||
*
|
||||
* Revision 1.8 2000/01/13 23:53:23 Alan
|
||||
* Fix bugs found during ICU port
|
||||
*
|
||||
* Revision 1.7 2000/01/11 04:12:06 Alan
|
||||
* Cleanup, embellish comments
|
||||
*
|
||||
* Revision 1.6 2000/01/11 02:25:03 Alan
|
||||
* Rewrite UnicodeSet and RBT parsers for better performance and new syntax
|
||||
*
|
||||
* Revision 1.5 2000/01/04 21:43:57 Alan
|
||||
* Add rule indexing, and move masking check to TransliterationRuleSet.
|
||||
*
|
||||
* Revision 1.4 1999/12/22 01:40:54 Alan
|
||||
* Consolidate rule pattern anteContext, key, and postContext into one string.
|
||||
*
|
||||
* Revision 1.3 1999/12/22 01:05:54 Alan
|
||||
* Improve masking checking; turn it off by default, for better performance
|
||||
*
|
||||
* Revision 1.2 1999/12/21 23:58:44 Alan
|
||||
* Detect a>x masking a>y
|
||||
*
|
||||
* @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.17 $ $Date: 2000/04/21 21:16:40 $
|
||||
*/
|
||||
class TransliterationRule {
|
||||
/**
|
||||
@ -538,3 +491,54 @@ class TransliterationRule {
|
||||
keyChar == textChar : set.contains(textChar));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* $Log: TransliterationRule.java,v $
|
||||
* Revision 1.17 2000/04/21 21:16:40 alan
|
||||
* Modify rule syntax
|
||||
*
|
||||
* Revision 1.16 2000/04/19 16:34:18 alan
|
||||
* Add segment support.
|
||||
*
|
||||
* Revision 1.15 2000/04/12 20:17:45 alan
|
||||
* Delegate replace operation to rule object
|
||||
*
|
||||
* Revision 1.14 2000/03/10 04:07:24 johnf
|
||||
* Copyright update
|
||||
*
|
||||
* Revision 1.13 2000/02/10 07:36:25 johnf
|
||||
* fixed imports for com.ibm.util.Utility
|
||||
*
|
||||
* Revision 1.12 2000/02/03 18:11:19 Alan
|
||||
* Use array rather than hashtable for char-to-set map
|
||||
*
|
||||
* Revision 1.11 2000/01/27 18:59:19 Alan
|
||||
* Use Position rather than int[] and move all subclass overrides to one method (handleTransliterate)
|
||||
*
|
||||
* Revision 1.10 2000/01/18 20:36:17 Alan
|
||||
* Make UnicodeSet inherit from UnicodeFilter
|
||||
*
|
||||
* Revision 1.9 2000/01/18 02:38:55 Alan
|
||||
* Fix filtering bug.
|
||||
*
|
||||
* Revision 1.8 2000/01/13 23:53:23 Alan
|
||||
* Fix bugs found during ICU port
|
||||
*
|
||||
* Revision 1.7 2000/01/11 04:12:06 Alan
|
||||
* Cleanup, embellish comments
|
||||
*
|
||||
* Revision 1.6 2000/01/11 02:25:03 Alan
|
||||
* Rewrite UnicodeSet and RBT parsers for better performance and new syntax
|
||||
*
|
||||
* Revision 1.5 2000/01/04 21:43:57 Alan
|
||||
* Add rule indexing, and move masking check to TransliterationRuleSet.
|
||||
*
|
||||
* Revision 1.4 1999/12/22 01:40:54 Alan
|
||||
* Consolidate rule pattern anteContext, key, and postContext into one string.
|
||||
*
|
||||
* Revision 1.3 1999/12/22 01:05:54 Alan
|
||||
* Improve masking checking; turn it off by default, for better performance
|
||||
*
|
||||
* Revision 1.2 1999/12/21 23:58:44 Alan
|
||||
* Detect a>x masking a>y
|
||||
*/
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/UnicodeSet.java,v $
|
||||
* $Date: 2000/03/10 04:07:25 $
|
||||
* $Revision: 1.16 $
|
||||
* $Date: 2000/04/21 21:16:40 $
|
||||
* $Revision: 1.17 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
@ -241,7 +241,7 @@ import java.text.*;
|
||||
* *Unsupported by Java (and hence unsupported by UnicodeSet).
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.16 $ $Date: 2000/03/10 04:07:25 $
|
||||
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.17 $ $Date: 2000/04/21 21:16:40 $
|
||||
*/
|
||||
public class UnicodeSet implements UnicodeFilter {
|
||||
/**
|
||||
@ -887,6 +887,47 @@ public class UnicodeSet implements UnicodeFilter {
|
||||
i = j; // Make i point at closing '}'
|
||||
}
|
||||
|
||||
/* Parse variable references. These are treated as literals. If a
|
||||
* variable refers to a UnicodeSet, nestedPairs is assigned here.
|
||||
* Variable names are only parsed if varNameToChar is not null.
|
||||
* Set variables are only looked up if varCharToSet is not null.
|
||||
*/
|
||||
// TEMPORARY
|
||||
// TEMPORARY
|
||||
// TEMPORARY
|
||||
else if (symbols != null && !isLiteral && c == '$') {
|
||||
++i;
|
||||
c = pattern.charAt(i);
|
||||
int j = i;
|
||||
if (Character.isUnicodeIdentifierStart(c)) {
|
||||
++j;
|
||||
while (j < limit &&
|
||||
Character.isUnicodeIdentifierPart(pattern.charAt(j))) {
|
||||
++j;
|
||||
}
|
||||
}
|
||||
if (i == j || j < 0) { // empty or unterminated
|
||||
throw new IllegalArgumentException("Illegal variable reference " +
|
||||
pattern.substring(i-1, limit));
|
||||
}
|
||||
String name = pattern.substring(i, j);
|
||||
Object obj = symbols.lookup(name);
|
||||
if (obj == null) {
|
||||
throw new IllegalArgumentException("Undefined variable: "
|
||||
+ name);
|
||||
}
|
||||
isLiteral = true;
|
||||
if (obj instanceof Character) {
|
||||
c = ((Character) obj).charValue();
|
||||
} else {
|
||||
nestedPairs = ((UnicodeSet) obj).pairs.toString();
|
||||
}
|
||||
i = j-1; // Make i point at last char of var name
|
||||
}
|
||||
// TEMPORARY
|
||||
// TEMPORARY
|
||||
// TEMPORARY
|
||||
|
||||
/* An opening bracket indicates the first bracket of a nested
|
||||
* subpattern, either a normal pattern or a category pattern. We
|
||||
* recognize these here and set nestedPairs accordingly.
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/RuleBasedTransliterator.java,v $
|
||||
* $Date: 2000/04/19 17:35:23 $
|
||||
* $Revision: 1.20 $
|
||||
* $Date: 2000/04/21 21:16:40 $
|
||||
* $Revision: 1.21 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
@ -274,7 +274,7 @@ import com.ibm.util.Utility;
|
||||
* <p>Copyright (c) IBM Corporation 1999-2000. All rights reserved.</p>
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.20 $ $Date: 2000/04/19 17:35:23 $
|
||||
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.21 $ $Date: 2000/04/21 21:16:40 $
|
||||
*/
|
||||
public class RuleBasedTransliterator extends Transliterator {
|
||||
|
||||
@ -559,10 +559,20 @@ public class RuleBasedTransliterator extends Transliterator {
|
||||
/**
|
||||
* The last available stand-in for variables. This is discovered
|
||||
* dynamically. At any point during parsing, available variables are
|
||||
* <code>variableNext..variableLimit-1</code>.
|
||||
* <code>variableNext..variableLimit-1</code>. During variable definition
|
||||
* we use the special value variableLimit-1 as a placeholder.
|
||||
*/
|
||||
private char variableLimit;
|
||||
|
||||
/**
|
||||
* When we encounter an undefined variable, we do not immediately signal
|
||||
* an error, in case we are defining this variable, e.g., "$a = [a-z];".
|
||||
* Instead, we save the name of the undefined variable, and substitute
|
||||
* in the placeholder char variableLimit - 1, and decrement
|
||||
* variableLimit.
|
||||
*/
|
||||
private String undefinedVariableName;
|
||||
|
||||
// Operators
|
||||
private static final char VARIABLE_DEF_OP = '=';
|
||||
private static final char FORWARD_RULE_OP = '>';
|
||||
@ -577,17 +587,15 @@ public class RuleBasedTransliterator extends Transliterator {
|
||||
private static final char END_OF_RULE = ';';
|
||||
private static final char RULE_COMMENT_CHAR = '#';
|
||||
|
||||
private static final char VARIABLE_REF_OPEN = '{';
|
||||
private static final char VARIABLE_REF_CLOSE = '}';
|
||||
private static final char CONTEXT_OPEN = '(';
|
||||
private static final char CONTEXT_CLOSE = ')';
|
||||
private static final char VARIABLE_REF = '$'; // also segment refs
|
||||
private static final char CONTEXT_ANTE = '{'; // ante{key
|
||||
private static final char CONTEXT_POST = '}'; // key}post
|
||||
private static final char SET_OPEN = '[';
|
||||
private static final char SET_CLOSE = ']';
|
||||
private static final char CURSOR_POS = '|';
|
||||
|
||||
// Segments of the input string are delimited by "$(" and "$)". In the
|
||||
// output string these segments are referenced as "$1" through "$9".
|
||||
private static final char SEGMENT_REF = '$';
|
||||
private static final char SEGMENT_OPEN = '(';
|
||||
private static final char SEGMENT_CLOSE = ')';
|
||||
|
||||
@ -703,7 +711,6 @@ public class RuleBasedTransliterator extends Transliterator {
|
||||
RuleBasedTransliterator.Parser parser) {
|
||||
int start = pos;
|
||||
StringBuffer buf = new StringBuffer();
|
||||
int postClose = -1; // position of post context close ')' in text
|
||||
|
||||
main:
|
||||
while (pos < limit) {
|
||||
@ -756,86 +763,77 @@ public class RuleBasedTransliterator extends Transliterator {
|
||||
--pos; // Backup to point to operator
|
||||
break main;
|
||||
}
|
||||
// Handle segment definitions "$(" ")$" and references "$1"
|
||||
// .. "$9".
|
||||
if (c == SEGMENT_REF) {
|
||||
// After a SEGMENT_REF, must see SEGMENT_OPEN,
|
||||
// SEGMENT_CLOSE, or a digit 1 to 9, with no intervening
|
||||
// whitespace
|
||||
if (pos == limit) {
|
||||
syntaxError("Trailing " + c, rule, start);
|
||||
}
|
||||
c = rule.charAt(pos++);
|
||||
if (c == SEGMENT_OPEN || c == SEGMENT_CLOSE) {
|
||||
// Parse "$(", "$)"
|
||||
if (segments == null) {
|
||||
segments = new Vector();
|
||||
}
|
||||
if ((c == SEGMENT_OPEN) !=
|
||||
(segments.size() % 2 == 0)) {
|
||||
syntaxError("Mismatched segment delimiters",
|
||||
rule, start);
|
||||
}
|
||||
segments.addElement(new Integer(buf.length()));
|
||||
} else {
|
||||
// Parse "$1" "$2" .. "$9"
|
||||
int r = Character.digit(c, 10);
|
||||
if (r < 1 || r > 9) {
|
||||
syntaxError("Illegal char after " + SEGMENT_REF,
|
||||
rule, start);
|
||||
}
|
||||
if (r > maxRef) {
|
||||
maxRef = r;
|
||||
}
|
||||
buf.append((char) (parser.data.segmentBase + r - 1));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
switch (c) {
|
||||
case SEGMENT_OPEN:
|
||||
case SEGMENT_CLOSE:
|
||||
// Handle segment definitions "(" and ")"
|
||||
// Parse "(", ")"
|
||||
if (segments == null) {
|
||||
segments = new Vector();
|
||||
}
|
||||
if ((c == SEGMENT_OPEN) !=
|
||||
(segments.size() % 2 == 0)) {
|
||||
syntaxError("Mismatched segment delimiters",
|
||||
rule, start);
|
||||
}
|
||||
segments.addElement(new Integer(buf.length()));
|
||||
break;
|
||||
case END_OF_RULE:
|
||||
--pos; // Backup to point to END_OF_RULE
|
||||
break main;
|
||||
case VARIABLE_REF_OPEN:
|
||||
case VARIABLE_REF:
|
||||
// Handle variable references and segment references "$1" .. "$9"
|
||||
{
|
||||
int j = rule.indexOf(VARIABLE_REF_CLOSE, pos);
|
||||
if (pos == j || j < 0) { // empty or unterminated
|
||||
syntaxError("Malformed variable reference", rule, start);
|
||||
// A variable reference must be followed immediately
|
||||
// by a Unicode identifier start and zero or more
|
||||
// Unicode identifier part characters, or by a digit
|
||||
// 1..9 if it is a segment reference.
|
||||
if (pos == limit) {
|
||||
syntaxError("Trailing " + c, rule, start);
|
||||
}
|
||||
// Parse "$1" "$2" .. "$9"
|
||||
c = rule.charAt(pos++);
|
||||
int r = Character.digit(c, 10);
|
||||
if (r >= 1 && r <= 9) {
|
||||
if (r > maxRef) {
|
||||
maxRef = r;
|
||||
}
|
||||
buf.append((char) (parser.data.segmentBase + r - 1));
|
||||
} else if (Character.isUnicodeIdentifierStart(c)) {
|
||||
int j = pos;
|
||||
while (j < limit &&
|
||||
Character.isUnicodeIdentifierPart(rule.charAt(j))) {
|
||||
++j;
|
||||
}
|
||||
String name = rule.substring(pos-1, j);
|
||||
pos = j;
|
||||
// If this is a variable definition statement, then the LHS
|
||||
// variable will be undefined. In that case getVariableName()
|
||||
// will return the special placeholder variableLimit-1.
|
||||
buf.append(parser.getVariableDef(name));
|
||||
} else {
|
||||
syntaxError("Illegal char after " + VARIABLE_REF,
|
||||
rule, start);
|
||||
}
|
||||
String name = rule.substring(pos, j);
|
||||
pos = j+1;
|
||||
buf.append(parser.getVariableDef(name));
|
||||
}
|
||||
break;
|
||||
case CONTEXT_OPEN:
|
||||
case CONTEXT_ANTE:
|
||||
if (ante >= 0) {
|
||||
syntaxError("Multiple ante contexts", rule, start);
|
||||
}
|
||||
ante = buf.length();
|
||||
break;
|
||||
case CONTEXT_POST:
|
||||
if (post >= 0) {
|
||||
syntaxError("Multiple post contexts", rule, start);
|
||||
}
|
||||
// Ignore CONTEXT_OPEN if buffer length is zero -- that means
|
||||
// this is the optional opening delimiter for the ante context.
|
||||
if (buf.length() > 0) {
|
||||
post = buf.length();
|
||||
}
|
||||
break;
|
||||
case CONTEXT_CLOSE:
|
||||
if (postClose >= 0) {
|
||||
syntaxError("Unexpected " + c, rule, start);
|
||||
}
|
||||
if (post >= 0) {
|
||||
// This is probably the optional closing delimiter
|
||||
// for the post context; save the pos and check later.
|
||||
postClose = buf.length();
|
||||
} else if (ante >= 0) {
|
||||
syntaxError("Multiple ante contexts", rule, start);
|
||||
} else {
|
||||
ante = buf.length();
|
||||
}
|
||||
post = buf.length();
|
||||
break;
|
||||
case SET_OPEN:
|
||||
ParsePosition pp = new ParsePosition(pos-1); // Backup to opening '['
|
||||
buf.append(parser.registerSet(new UnicodeSet(rule, pp, parser.parseData)));
|
||||
pos = pp.getIndex();
|
||||
break;
|
||||
case VARIABLE_REF_CLOSE:
|
||||
case SET_CLOSE:
|
||||
syntaxError("Unquoted " + c, rule, start);
|
||||
case CURSOR_POS:
|
||||
@ -850,11 +848,6 @@ public class RuleBasedTransliterator extends Transliterator {
|
||||
}
|
||||
}
|
||||
|
||||
// Check context close parameters
|
||||
if (postClose >= 0 && postClose != buf.length()) {
|
||||
syntaxError("Extra text after ]", rule, start);
|
||||
}
|
||||
|
||||
text = buf.toString();
|
||||
return pos;
|
||||
}
|
||||
@ -906,6 +899,7 @@ public class RuleBasedTransliterator extends Transliterator {
|
||||
RuleHalf left = new RuleHalf();
|
||||
RuleHalf right = new RuleHalf();
|
||||
|
||||
undefinedVariableName = null;
|
||||
pos = left.parse(rule, pos, limit, this);
|
||||
|
||||
if (pos == limit ||
|
||||
@ -936,17 +930,31 @@ public class RuleBasedTransliterator extends Transliterator {
|
||||
// or a set (already parsed). If RHS is longer than one
|
||||
// character, it is either a multi-character string, or multiple
|
||||
// sets, or a mixture of chars and sets -- syntax error.
|
||||
|
||||
// We expect to see a single undefined variable (the one being
|
||||
// defined).
|
||||
if (undefinedVariableName == null) {
|
||||
syntaxError("Missing '$' or duplicate definition", rule, start);
|
||||
}
|
||||
if (left.text.length() != 1 || left.text.charAt(0) != variableLimit) {
|
||||
syntaxError("Malformed LHS", rule, start);
|
||||
}
|
||||
if (right.text.length() != 1) {
|
||||
syntaxError("Malformed RHS", rule, start);
|
||||
}
|
||||
if (data.variableNames.get(left.text) != null) {
|
||||
syntaxError("Duplicate definition of {" +
|
||||
left.text + "}", rule, start);
|
||||
}
|
||||
data.variableNames.put(left.text, new Character(right.text.charAt(0)));
|
||||
data.variableNames.put(undefinedVariableName,
|
||||
new Character(right.text.charAt(0)));
|
||||
++variableLimit;
|
||||
return pos;
|
||||
}
|
||||
|
||||
// If this is not a variable definition rule, we shouldn't have
|
||||
// any undefined variable names.
|
||||
if (undefinedVariableName != null) {
|
||||
syntaxError("Undefined variable $" + undefinedVariableName,
|
||||
rule, start);
|
||||
}
|
||||
|
||||
// If the direction we want doesn't match the rule
|
||||
// direction, do nothing.
|
||||
if (operator != FWDREV_RULE_OP &&
|
||||
@ -1041,7 +1049,18 @@ public class RuleBasedTransliterator extends Transliterator {
|
||||
private char getVariableDef(String name) {
|
||||
Character ch = (Character) data.variableNames.get(name);
|
||||
if (ch == null) {
|
||||
throw new IllegalArgumentException("Undefined variable: "
|
||||
// We allow one undefined variable so that variable definition
|
||||
// statements work. For the first undefined variable we return
|
||||
// the special placeholder variableLimit-1, and save the variable
|
||||
// name.
|
||||
if (undefinedVariableName == null) {
|
||||
undefinedVariableName = name;
|
||||
if (variableNext >= variableLimit) {
|
||||
throw new RuntimeException("Private use variables exhausted");
|
||||
}
|
||||
return --variableLimit;
|
||||
}
|
||||
throw new IllegalArgumentException("Undefined variable $"
|
||||
+ name);
|
||||
}
|
||||
return ch.charValue();
|
||||
@ -1210,7 +1229,11 @@ public class RuleBasedTransliterator extends Transliterator {
|
||||
}
|
||||
}
|
||||
|
||||
/* $Log: RuleBasedTransliterator.java,v $
|
||||
/**
|
||||
* $Log: RuleBasedTransliterator.java,v $
|
||||
* Revision 1.21 2000/04/21 21:16:40 alan
|
||||
* Modify rule syntax
|
||||
*
|
||||
* Revision 1.20 2000/04/19 17:35:23 alan
|
||||
* Update javadoc; fix compile error
|
||||
*
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/TransliterationRule.java,v $
|
||||
* $Date: 2000/04/19 16:34:18 $
|
||||
* $Revision: 1.16 $
|
||||
* $Date: 2000/04/21 21:16:40 $
|
||||
* $Revision: 1.17 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
@ -44,54 +44,7 @@ import com.ibm.util.Utility;
|
||||
* <p>Copyright © IBM Corporation 1999. All rights reserved.
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.16 $ $Date: 2000/04/19 16:34:18 $
|
||||
*
|
||||
* $Log: TransliterationRule.java,v $
|
||||
* Revision 1.16 2000/04/19 16:34:18 alan
|
||||
* Add segment support.
|
||||
*
|
||||
* Revision 1.15 2000/04/12 20:17:45 alan
|
||||
* Delegate replace operation to rule object
|
||||
*
|
||||
* Revision 1.14 2000/03/10 04:07:24 johnf
|
||||
* Copyright update
|
||||
*
|
||||
* Revision 1.13 2000/02/10 07:36:25 johnf
|
||||
* fixed imports for com.ibm.util.Utility
|
||||
*
|
||||
* Revision 1.12 2000/02/03 18:11:19 Alan
|
||||
* Use array rather than hashtable for char-to-set map
|
||||
*
|
||||
* Revision 1.11 2000/01/27 18:59:19 Alan
|
||||
* Use Position rather than int[] and move all subclass overrides to one method (handleTransliterate)
|
||||
*
|
||||
* Revision 1.10 2000/01/18 20:36:17 Alan
|
||||
* Make UnicodeSet inherit from UnicodeFilter
|
||||
*
|
||||
* Revision 1.9 2000/01/18 02:38:55 Alan
|
||||
* Fix filtering bug.
|
||||
*
|
||||
* Revision 1.8 2000/01/13 23:53:23 Alan
|
||||
* Fix bugs found during ICU port
|
||||
*
|
||||
* Revision 1.7 2000/01/11 04:12:06 Alan
|
||||
* Cleanup, embellish comments
|
||||
*
|
||||
* Revision 1.6 2000/01/11 02:25:03 Alan
|
||||
* Rewrite UnicodeSet and RBT parsers for better performance and new syntax
|
||||
*
|
||||
* Revision 1.5 2000/01/04 21:43:57 Alan
|
||||
* Add rule indexing, and move masking check to TransliterationRuleSet.
|
||||
*
|
||||
* Revision 1.4 1999/12/22 01:40:54 Alan
|
||||
* Consolidate rule pattern anteContext, key, and postContext into one string.
|
||||
*
|
||||
* Revision 1.3 1999/12/22 01:05:54 Alan
|
||||
* Improve masking checking; turn it off by default, for better performance
|
||||
*
|
||||
* Revision 1.2 1999/12/21 23:58:44 Alan
|
||||
* Detect a>x masking a>y
|
||||
*
|
||||
* @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.17 $ $Date: 2000/04/21 21:16:40 $
|
||||
*/
|
||||
class TransliterationRule {
|
||||
/**
|
||||
@ -538,3 +491,54 @@ class TransliterationRule {
|
||||
keyChar == textChar : set.contains(textChar));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* $Log: TransliterationRule.java,v $
|
||||
* Revision 1.17 2000/04/21 21:16:40 alan
|
||||
* Modify rule syntax
|
||||
*
|
||||
* Revision 1.16 2000/04/19 16:34:18 alan
|
||||
* Add segment support.
|
||||
*
|
||||
* Revision 1.15 2000/04/12 20:17:45 alan
|
||||
* Delegate replace operation to rule object
|
||||
*
|
||||
* Revision 1.14 2000/03/10 04:07:24 johnf
|
||||
* Copyright update
|
||||
*
|
||||
* Revision 1.13 2000/02/10 07:36:25 johnf
|
||||
* fixed imports for com.ibm.util.Utility
|
||||
*
|
||||
* Revision 1.12 2000/02/03 18:11:19 Alan
|
||||
* Use array rather than hashtable for char-to-set map
|
||||
*
|
||||
* Revision 1.11 2000/01/27 18:59:19 Alan
|
||||
* Use Position rather than int[] and move all subclass overrides to one method (handleTransliterate)
|
||||
*
|
||||
* Revision 1.10 2000/01/18 20:36:17 Alan
|
||||
* Make UnicodeSet inherit from UnicodeFilter
|
||||
*
|
||||
* Revision 1.9 2000/01/18 02:38:55 Alan
|
||||
* Fix filtering bug.
|
||||
*
|
||||
* Revision 1.8 2000/01/13 23:53:23 Alan
|
||||
* Fix bugs found during ICU port
|
||||
*
|
||||
* Revision 1.7 2000/01/11 04:12:06 Alan
|
||||
* Cleanup, embellish comments
|
||||
*
|
||||
* Revision 1.6 2000/01/11 02:25:03 Alan
|
||||
* Rewrite UnicodeSet and RBT parsers for better performance and new syntax
|
||||
*
|
||||
* Revision 1.5 2000/01/04 21:43:57 Alan
|
||||
* Add rule indexing, and move masking check to TransliterationRuleSet.
|
||||
*
|
||||
* Revision 1.4 1999/12/22 01:40:54 Alan
|
||||
* Consolidate rule pattern anteContext, key, and postContext into one string.
|
||||
*
|
||||
* Revision 1.3 1999/12/22 01:05:54 Alan
|
||||
* Improve masking checking; turn it off by default, for better performance
|
||||
*
|
||||
* Revision 1.2 1999/12/21 23:58:44 Alan
|
||||
* Detect a>x masking a>y
|
||||
*/
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/UnicodeSet.java,v $
|
||||
* $Date: 2000/03/10 04:07:25 $
|
||||
* $Revision: 1.16 $
|
||||
* $Date: 2000/04/21 21:16:40 $
|
||||
* $Revision: 1.17 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
@ -241,7 +241,7 @@ import java.text.*;
|
||||
* *Unsupported by Java (and hence unsupported by UnicodeSet).
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.16 $ $Date: 2000/03/10 04:07:25 $
|
||||
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.17 $ $Date: 2000/04/21 21:16:40 $
|
||||
*/
|
||||
public class UnicodeSet implements UnicodeFilter {
|
||||
/**
|
||||
@ -887,6 +887,47 @@ public class UnicodeSet implements UnicodeFilter {
|
||||
i = j; // Make i point at closing '}'
|
||||
}
|
||||
|
||||
/* Parse variable references. These are treated as literals. If a
|
||||
* variable refers to a UnicodeSet, nestedPairs is assigned here.
|
||||
* Variable names are only parsed if varNameToChar is not null.
|
||||
* Set variables are only looked up if varCharToSet is not null.
|
||||
*/
|
||||
// TEMPORARY
|
||||
// TEMPORARY
|
||||
// TEMPORARY
|
||||
else if (symbols != null && !isLiteral && c == '$') {
|
||||
++i;
|
||||
c = pattern.charAt(i);
|
||||
int j = i;
|
||||
if (Character.isUnicodeIdentifierStart(c)) {
|
||||
++j;
|
||||
while (j < limit &&
|
||||
Character.isUnicodeIdentifierPart(pattern.charAt(j))) {
|
||||
++j;
|
||||
}
|
||||
}
|
||||
if (i == j || j < 0) { // empty or unterminated
|
||||
throw new IllegalArgumentException("Illegal variable reference " +
|
||||
pattern.substring(i-1, limit));
|
||||
}
|
||||
String name = pattern.substring(i, j);
|
||||
Object obj = symbols.lookup(name);
|
||||
if (obj == null) {
|
||||
throw new IllegalArgumentException("Undefined variable: "
|
||||
+ name);
|
||||
}
|
||||
isLiteral = true;
|
||||
if (obj instanceof Character) {
|
||||
c = ((Character) obj).charValue();
|
||||
} else {
|
||||
nestedPairs = ((UnicodeSet) obj).pairs.toString();
|
||||
}
|
||||
i = j-1; // Make i point at last char of var name
|
||||
}
|
||||
// TEMPORARY
|
||||
// TEMPORARY
|
||||
// TEMPORARY
|
||||
|
||||
/* An opening bracket indicates the first bracket of a nested
|
||||
* subpattern, either a normal pattern or a category pattern. We
|
||||
* recognize these here and set nestedPairs accordingly.
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/resources/Attic/TransliterationRule_KeyboardEscape_Latin1.java,v $
|
||||
* $Date: 2000/03/10 04:07:30 $
|
||||
* $Revision: 1.2 $
|
||||
* $Date: 2000/04/21 21:17:08 $
|
||||
* $Revision: 1.3 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
@ -21,120 +21,120 @@ public class TransliterationRule_KeyboardEscape_Latin1 extends ListResourceBundl
|
||||
public Object[][] getContents() {
|
||||
return new Object[][] {
|
||||
{ "Rule",
|
||||
"esc='';"
|
||||
+ "grave=`;"
|
||||
+ "acute='';"
|
||||
+ "hat=^;"
|
||||
+ "tilde=~;"
|
||||
+ "umlaut=:;"
|
||||
+ "ring=.;"
|
||||
+ "cedilla=,;"
|
||||
+ "slash=/;"
|
||||
+ "super=^;"
|
||||
"$esc='';"
|
||||
+ "$grave=`;"
|
||||
+ "$acute='';"
|
||||
+ "$hat=^;"
|
||||
+ "$tilde=~;"
|
||||
+ "$umlaut=:;"
|
||||
+ "$ring=.;"
|
||||
+ "$cedilla=,;"
|
||||
+ "$slash=/;"
|
||||
+ "$super=^;"
|
||||
|
||||
// Make keyboard entry of {esc} possible
|
||||
// and of backslash
|
||||
+ "'\\'{esc}>{esc};"
|
||||
+ "'\\'$esc>$esc;"
|
||||
+ "'\\\\'>'\\';"
|
||||
|
||||
// Long keys
|
||||
+ "cur{esc}>\u00A4;"
|
||||
+ "sec{esc}>\u00A7;"
|
||||
+ "not{esc}>\u00AC;"
|
||||
+ "mul{esc}>\u00D7;"
|
||||
+ "div{esc}>\u00F7;"
|
||||
+ "cur$esc>\u00A4;"
|
||||
+ "sec$esc>\u00A7;"
|
||||
+ "not$esc>\u00AC;"
|
||||
+ "mul$esc>\u00D7;"
|
||||
+ "div$esc>\u00F7;"
|
||||
|
||||
+ "\\ {esc}>\u00A0;" // non-breaking space
|
||||
+ "!{esc}>\u00A1;" // inverted exclamation
|
||||
+ "c/{esc}>\u00A2;" // cent sign
|
||||
+ "lb{esc}>\u00A3;" // pound sign
|
||||
+ "'|'{esc}>\u00A6;" // broken vertical bar
|
||||
+ ":{esc}>\u00A8;" // umlaut
|
||||
+ "{super}a{esc}>\u00AA;" // feminine ordinal
|
||||
+ "'<<'{esc}>\u00AB;"
|
||||
+ "r{esc}>\u00AE;"
|
||||
+ "--{esc}>\u00AF;"
|
||||
+ "-{esc}>\u00AD;"
|
||||
+ "+-{esc}>\u00B1;"
|
||||
+ "{super}2{esc}>\u00B2;"
|
||||
+ "{super}3{esc}>\u00B3;"
|
||||
+ "{acute}{esc}>\u00B4;"
|
||||
+ "m{esc}>\u00B5;"
|
||||
+ "para{esc}>\u00B6;"
|
||||
+ "dot{esc}>\u00B7;"
|
||||
+ "{cedilla}{esc}>\u00B8;"
|
||||
+ "{super}1{esc}>\u00B9;"
|
||||
+ "{super}o{esc}>\u00BA;" // masculine ordinal
|
||||
+ "'>>'{esc}>\u00BB;"
|
||||
+ "1/4{esc}>\u00BC;"
|
||||
+ "1/2{esc}>\u00BD;"
|
||||
+ "3/4{esc}>\u00BE;"
|
||||
+ "?{esc}>\u00BF;"
|
||||
+ "A{grave}{esc}>\u00C0;"
|
||||
+ "A{acute}{esc}>\u00C1;"
|
||||
+ "A{hat}{esc}>\u00C2;"
|
||||
+ "A{tilde}{esc}>\u00C3;"
|
||||
+ "A{umlaut}{esc}>\u00C4;"
|
||||
+ "A{ring}{esc}>\u00C5;"
|
||||
+ "AE{esc}>\u00C6;"
|
||||
+ "C{cedilla}{esc}>\u00C7;"
|
||||
+ "E{grave}{esc}>\u00C8;"
|
||||
+ "E{acute}{esc}>\u00C9;"
|
||||
+ "E{hat}{esc}>\u00CA;"
|
||||
+ "E{umlaut}{esc}>\u00CB;"
|
||||
+ "I{grave}{esc}>\u00CC;"
|
||||
+ "I{acute}{esc}>\u00CD;"
|
||||
+ "I{hat}{esc}>\u00CE;"
|
||||
+ "I{umlaut}{esc}>\u00CF;"
|
||||
+ "D-{esc}>\u00D0;"
|
||||
+ "N{tilde}{esc}>\u00D1;"
|
||||
+ "O{grave}{esc}>\u00D2;"
|
||||
+ "O{acute}{esc}>\u00D3;"
|
||||
+ "O{hat}{esc}>\u00D4;"
|
||||
+ "O{tilde}{esc}>\u00D5;"
|
||||
+ "O{umlaut}{esc}>\u00D6;"
|
||||
+ "O{slash}{esc}>\u00D8;"
|
||||
+ "U{grave}{esc}>\u00D9;"
|
||||
+ "U{acute}{esc}>\u00DA;"
|
||||
+ "U{hat}{esc}>\u00DB;"
|
||||
+ "U{umlaut}{esc}>\u00DC;"
|
||||
+ "Y{acute}{esc}>\u00DD;"
|
||||
+ "TH{esc}>\u00DE;"
|
||||
+ "ss{esc}>\u00DF;"
|
||||
+ "a{grave}{esc}>\u00E0;"
|
||||
+ "a{acute}{esc}>\u00E1;"
|
||||
+ "a{hat}{esc}>\u00E2;"
|
||||
+ "a{tilde}{esc}>\u00E3;"
|
||||
+ "a{umlaut}{esc}>\u00E4;"
|
||||
+ "a{ring}{esc}>\u00E5;"
|
||||
+ "ae{esc}>\u00E6;"
|
||||
+ "c{cedilla}{esc}>\u00E7;"
|
||||
+ "c{esc}>\u00A9;" // copyright - after c{cedilla}
|
||||
+ "e{grave}{esc}>\u00E8;"
|
||||
+ "e{acute}{esc}>\u00E9;"
|
||||
+ "e{hat}{esc}>\u00EA;"
|
||||
+ "e{umlaut}{esc}>\u00EB;"
|
||||
+ "i{grave}{esc}>\u00EC;"
|
||||
+ "i{acute}{esc}>\u00ED;"
|
||||
+ "i{hat}{esc}>\u00EE;"
|
||||
+ "i{umlaut}{esc}>\u00EF;"
|
||||
+ "d-{esc}>\u00F0;"
|
||||
+ "n{tilde}{esc}>\u00F1;"
|
||||
+ "o{grave}{esc}>\u00F2;"
|
||||
+ "o{acute}{esc}>\u00F3;"
|
||||
+ "o{hat}{esc}>\u00F4;"
|
||||
+ "o{tilde}{esc}>\u00F5;"
|
||||
+ "o{umlaut}{esc}>\u00F6;"
|
||||
+ "o{slash}{esc}>\u00F8;"
|
||||
+ "o{esc}>\u00B0;"
|
||||
+ "u{grave}{esc}>\u00F9;"
|
||||
+ "u{acute}{esc}>\u00FA;"
|
||||
+ "u{hat}{esc}>\u00FB;"
|
||||
+ "u{umlaut}{esc}>\u00FC;"
|
||||
+ "y{acute}{esc}>\u00FD;"
|
||||
+ "y{esc}>\u00A5;" // yen sign
|
||||
+ "th{esc}>\u00FE;"
|
||||
//masked: + "ss{esc}>\u00FF;"
|
||||
+ "\\ $esc>\u00A0;" // non-breaking space
|
||||
+ "!$esc>\u00A1;" // inverted exclamation
|
||||
+ "c/$esc>\u00A2;" // cent sign
|
||||
+ "lb$esc>\u00A3;" // pound sign
|
||||
+ "'|'$esc>\u00A6;" // broken vertical bar
|
||||
+ ":$esc>\u00A8;" // umlaut
|
||||
+ "$super a$esc>\u00AA;" // feminine ordinal
|
||||
+ "'<<'$esc>\u00AB;"
|
||||
+ "r$esc>\u00AE;"
|
||||
+ "--$esc>\u00AF;"
|
||||
+ "-$esc>\u00AD;"
|
||||
+ "+-$esc>\u00B1;"
|
||||
+ "$super 2$esc>\u00B2;"
|
||||
+ "$super 3$esc>\u00B3;"
|
||||
+ "$acute$esc>\u00B4;"
|
||||
+ "m$esc>\u00B5;"
|
||||
+ "para$esc>\u00B6;"
|
||||
+ "dot$esc>\u00B7;"
|
||||
+ "$cedilla$esc>\u00B8;"
|
||||
+ "$super 1$esc>\u00B9;"
|
||||
+ "$super o$esc>\u00BA;" // masculine ordinal
|
||||
+ "'>>'$esc>\u00BB;"
|
||||
+ "1/4$esc>\u00BC;"
|
||||
+ "1/2$esc>\u00BD;"
|
||||
+ "3/4$esc>\u00BE;"
|
||||
+ "?$esc>\u00BF;"
|
||||
+ "A$grave$esc>\u00C0;"
|
||||
+ "A$acute$esc>\u00C1;"
|
||||
+ "A$hat$esc>\u00C2;"
|
||||
+ "A$tilde$esc>\u00C3;"
|
||||
+ "A$umlaut$esc>\u00C4;"
|
||||
+ "A$ring$esc>\u00C5;"
|
||||
+ "AE$esc>\u00C6;"
|
||||
+ "C$cedilla$esc>\u00C7;"
|
||||
+ "E$grave$esc>\u00C8;"
|
||||
+ "E$acute$esc>\u00C9;"
|
||||
+ "E$hat$esc>\u00CA;"
|
||||
+ "E$umlaut$esc>\u00CB;"
|
||||
+ "I$grave$esc>\u00CC;"
|
||||
+ "I$acute$esc>\u00CD;"
|
||||
+ "I$hat$esc>\u00CE;"
|
||||
+ "I$umlaut$esc>\u00CF;"
|
||||
+ "D-$esc>\u00D0;"
|
||||
+ "N$tilde$esc>\u00D1;"
|
||||
+ "O$grave$esc>\u00D2;"
|
||||
+ "O$acute$esc>\u00D3;"
|
||||
+ "O$hat$esc>\u00D4;"
|
||||
+ "O$tilde$esc>\u00D5;"
|
||||
+ "O$umlaut$esc>\u00D6;"
|
||||
+ "O$slash$esc>\u00D8;"
|
||||
+ "U$grave$esc>\u00D9;"
|
||||
+ "U$acute$esc>\u00DA;"
|
||||
+ "U$hat$esc>\u00DB;"
|
||||
+ "U$umlaut$esc>\u00DC;"
|
||||
+ "Y$acute$esc>\u00DD;"
|
||||
+ "TH$esc>\u00DE;"
|
||||
+ "ss$esc>\u00DF;"
|
||||
+ "a$grave$esc>\u00E0;"
|
||||
+ "a$acute$esc>\u00E1;"
|
||||
+ "a$hat$esc>\u00E2;"
|
||||
+ "a$tilde$esc>\u00E3;"
|
||||
+ "a$umlaut$esc>\u00E4;"
|
||||
+ "a$ring$esc>\u00E5;"
|
||||
+ "ae$esc>\u00E6;"
|
||||
+ "c$cedilla$esc>\u00E7;"
|
||||
+ "c$esc>\u00A9;" // copyright - after c{cedilla}
|
||||
+ "e$grave$esc>\u00E8;"
|
||||
+ "e$acute$esc>\u00E9;"
|
||||
+ "e$hat$esc>\u00EA;"
|
||||
+ "e$umlaut$esc>\u00EB;"
|
||||
+ "i$grave$esc>\u00EC;"
|
||||
+ "i$acute$esc>\u00ED;"
|
||||
+ "i$hat$esc>\u00EE;"
|
||||
+ "i$umlaut$esc>\u00EF;"
|
||||
+ "d-$esc>\u00F0;"
|
||||
+ "n$tilde$esc>\u00F1;"
|
||||
+ "o$grave$esc>\u00F2;"
|
||||
+ "o$acute$esc>\u00F3;"
|
||||
+ "o$hat$esc>\u00F4;"
|
||||
+ "o$tilde$esc>\u00F5;"
|
||||
+ "o$umlaut$esc>\u00F6;"
|
||||
+ "o$slash$esc>\u00F8;"
|
||||
+ "o$esc>\u00B0;"
|
||||
+ "u$grave$esc>\u00F9;"
|
||||
+ "u$acute$esc>\u00FA;"
|
||||
+ "u$hat$esc>\u00FB;"
|
||||
+ "u$umlaut$esc>\u00FC;"
|
||||
+ "y$acute$esc>\u00FD;"
|
||||
+ "y$esc>\u00A5;" // yen sign
|
||||
+ "th$esc>\u00FE;"
|
||||
//masked: + "ss$esc>\u00FF;"
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/resources/Attic/TransliterationRule_Latin_Arabic.java,v $
|
||||
* $Date: 2000/03/10 04:07:30 $
|
||||
* $Revision: 1.2 $
|
||||
* $Date: 2000/04/21 21:17:08 $
|
||||
* $Revision: 1.3 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
@ -23,52 +23,52 @@ public class TransliterationRule_Latin_Arabic extends ListResourceBundle {
|
||||
{ "Rule",
|
||||
// To Do: finish adding shadda, add sokoon
|
||||
|
||||
"alefmadda=\u0622;"+
|
||||
"alefuhamza=\u0623;"+
|
||||
"wauuhamza=\u0624;"+
|
||||
"alefhamza=\u0625;"+
|
||||
"yehuhamza=\u0626;"+
|
||||
"alef=\u0627;"+
|
||||
"beh=\u0628;"+
|
||||
"tehmarbuta=\u0629;"+
|
||||
"teh=\u062A;"+
|
||||
"theh=\u062B;"+
|
||||
"geem=\u062C;"+
|
||||
"hah=\u062D;"+
|
||||
"kha=\u062E;"+
|
||||
"dal=\u062F;"+
|
||||
"dhal=\u0630;"+
|
||||
"reh=\u0631;"+
|
||||
"zain=\u0632;"+
|
||||
"seen=\u0633;"+
|
||||
"sheen=\u0634;"+
|
||||
"sad=\u0635;"+
|
||||
"dad=\u0636;"+
|
||||
"tah=\u0637;"+
|
||||
"zah=\u0638;"+
|
||||
"ein=\u0639;"+
|
||||
"ghein=\u063A;"+
|
||||
"feh=\u0641;"+
|
||||
"qaaf=\u0642;"+
|
||||
"kaf=\u0643;"+
|
||||
"lam=\u0644;"+
|
||||
"meem=\u0645;"+
|
||||
"noon=\u0646;"+
|
||||
"heh=\u0647;"+
|
||||
"wau=\u0648;"+
|
||||
"yehmaqsura=\u0649;"+
|
||||
"yeh=\u064A;"+
|
||||
"peh=\u06A4;"+
|
||||
"$alefmadda=\u0622;"+
|
||||
"$alefuhamza=\u0623;"+
|
||||
"$wauuhamza=\u0624;"+
|
||||
"$alefhamza=\u0625;"+
|
||||
"$yehuhamza=\u0626;"+
|
||||
"$alef=\u0627;"+
|
||||
"$beh=\u0628;"+
|
||||
"$tehmarbuta=\u0629;"+
|
||||
"$teh=\u062A;"+
|
||||
"$theh=\u062B;"+
|
||||
"$geem=\u062C;"+
|
||||
"$hah=\u062D;"+
|
||||
"$kha=\u062E;"+
|
||||
"$dal=\u062F;"+
|
||||
"$dhal=\u0630;"+
|
||||
"$reh=\u0631;"+
|
||||
"$zain=\u0632;"+
|
||||
"$seen=\u0633;"+
|
||||
"$sheen=\u0634;"+
|
||||
"$sad=\u0635;"+
|
||||
"$dad=\u0636;"+
|
||||
"$tah=\u0637;"+
|
||||
"$zah=\u0638;"+
|
||||
"$ein=\u0639;"+
|
||||
"$ghein=\u063A;"+
|
||||
"$feh=\u0641;"+
|
||||
"$qaaf=\u0642;"+
|
||||
"$kaf=\u0643;"+
|
||||
"$lam=\u0644;"+
|
||||
"$meem=\u0645;"+
|
||||
"$noon=\u0646;"+
|
||||
"$heh=\u0647;"+
|
||||
"$wau=\u0648;"+
|
||||
"$yehmaqsura=\u0649;"+
|
||||
"$yeh=\u064A;"+
|
||||
"$peh=\u06A4;"+
|
||||
|
||||
"hamza=\u0621;"+
|
||||
"fathatein=\u064B;"+
|
||||
"dammatein=\u064C;"+
|
||||
"kasratein=\u064D;"+
|
||||
"fatha=\u064E;"+
|
||||
"damma=\u064F;"+
|
||||
"kasra=\u0650;"+
|
||||
"shadda=\u0651;"+
|
||||
"sokoon=\u0652;"+
|
||||
"$hamza=\u0621;"+
|
||||
"$fathatein=\u064B;"+
|
||||
"$dammatein=\u064C;"+
|
||||
"$kasratein=\u064D;"+
|
||||
"$fatha=\u064E;"+
|
||||
"$damma=\u064F;"+
|
||||
"$kasra=\u0650;"+
|
||||
"$shadda=\u0651;"+
|
||||
"$sokoon=\u0652;"+
|
||||
|
||||
// convert English to Arabic
|
||||
"Arabic>"+
|
||||
@ -79,56 +79,56 @@ public class TransliterationRule_Latin_Arabic extends ListResourceBundle {
|
||||
"\u0643\u062a\u0627\u0628\u0628\u064a\u0629' '"+
|
||||
"\u062c\u0645\u064a\u0644\u0629;"+
|
||||
|
||||
"ai>{alefmadda};"+
|
||||
"ae>{alefuhamza};"+
|
||||
"ao>{alefhamza};"+
|
||||
"aa>{alef};"+
|
||||
"an>{fathatein};"+
|
||||
"a>{fatha};"+
|
||||
"b>{beh};"+
|
||||
"c>{kaf};"+
|
||||
"{dhal})dh>{shadda};"+
|
||||
"dh>{dhal};"+
|
||||
"{dad})dd>{shadda};"+
|
||||
"dd>{dad};"+
|
||||
"{dal})d>{shadda};"+
|
||||
"d>{dal};"+
|
||||
"e>{ein};"+
|
||||
"f>{feh};"+
|
||||
"gh>{ghein};"+
|
||||
"g>{geem};"+
|
||||
"hh>{hah};"+
|
||||
"h>{heh};"+
|
||||
"ii>{kasratein};"+
|
||||
"i>{kasra};"+
|
||||
"j>{geem};"+
|
||||
"kh>{kha};"+
|
||||
"k>{kaf};"+
|
||||
"l>{lam};"+
|
||||
"m>{meem};"+
|
||||
"n>{noon};"+
|
||||
"o>{hamza};"+
|
||||
"p>{peh};"+
|
||||
"q>{qaaf};"+
|
||||
"r>{reh};"+
|
||||
"sh>{sheen};"+
|
||||
"ss>{sad};"+
|
||||
"s>{seen};"+
|
||||
"th>{theh};"+
|
||||
"tm>{tehmarbuta};"+
|
||||
"tt>{tah};"+
|
||||
"t>{teh};"+
|
||||
"uu>{dammatein};"+
|
||||
"u>{damma};"+
|
||||
"v>{beh};"+
|
||||
"we>{wauuhamza};"+
|
||||
"w>{wau};"+
|
||||
"x>{kaf}{shadda}{seen};"+
|
||||
"ye>{yehuhamza};"+
|
||||
"ym>{yehmaqsura};"+
|
||||
"y>{yeh};"+
|
||||
"zz>{zah};"+
|
||||
"z>{zain};"+
|
||||
"ai>$alefmadda;"+
|
||||
"ae>$alefuhamza;"+
|
||||
"ao>$alefhamza;"+
|
||||
"aa>$alef;"+
|
||||
"an>$fathatein;"+
|
||||
"a>$fatha;"+
|
||||
"b>$beh;"+
|
||||
"c>$kaf;"+
|
||||
"$dhal{dh>$shadda;"+
|
||||
"dh>$dhal;"+
|
||||
"$dad{dd>$shadda;"+
|
||||
"dd>$dad;"+
|
||||
"$dal{d>$shadda;"+
|
||||
"d>$dal;"+
|
||||
"e>$ein;"+
|
||||
"f>$feh;"+
|
||||
"gh>$ghein;"+
|
||||
"g>$geem;"+
|
||||
"hh>$hah;"+
|
||||
"h>$heh;"+
|
||||
"ii>$kasratein;"+
|
||||
"i>$kasra;"+
|
||||
"j>$geem;"+
|
||||
"kh>$kha;"+
|
||||
"k>$kaf;"+
|
||||
"l>$lam;"+
|
||||
"m>$meem;"+
|
||||
"n>$noon;"+
|
||||
"o>$hamza;"+
|
||||
"p>$peh;"+
|
||||
"q>$qaaf;"+
|
||||
"r>$reh;"+
|
||||
"sh>$sheen;"+
|
||||
"ss>$sad;"+
|
||||
"s>$seen;"+
|
||||
"th>$theh;"+
|
||||
"tm>$tehmarbuta;"+
|
||||
"tt>$tah;"+
|
||||
"t>$teh;"+
|
||||
"uu>$dammatein;"+
|
||||
"u>$damma;"+
|
||||
"v>$beh;"+
|
||||
"we>$wauuhamza;"+
|
||||
"w>$wau;"+
|
||||
"x>$kaf$shadda$seen;"+
|
||||
"ye>$yehuhamza;"+
|
||||
"ym>$yehmaqsura;"+
|
||||
"y>$yeh;"+
|
||||
"zz>$zah;"+
|
||||
"z>$zain;"+
|
||||
|
||||
"0>\u0660;"+ // Arabic digit 0
|
||||
"1>\u0661;"+ // Arabic digit 1
|
||||
@ -165,88 +165,88 @@ public class TransliterationRule_Latin_Arabic extends ListResourceBundle {
|
||||
|
||||
// now Arabic to English
|
||||
|
||||
"''ai<a){alefmadda};"+
|
||||
"ai<{alefmadda};"+
|
||||
"''ae<a){alefuhamza};"+
|
||||
"ae<{alefuhamza};"+
|
||||
"''ao<a){alefhamza};"+
|
||||
"ao<{alefhamza};"+
|
||||
"''aa<a){alef};"+
|
||||
"aa<{alef};"+
|
||||
"''an<a){fathatein};"+
|
||||
"an<{fathatein};"+
|
||||
"''a<a){fatha};"+
|
||||
"a<{fatha};"+
|
||||
"b<{beh};"+
|
||||
"''dh<d){dhal};"+
|
||||
"dh<{dhal};"+
|
||||
"''dd<d){dad};"+
|
||||
"dd<{dad};"+
|
||||
"''d<d){dal};"+
|
||||
"d<{dal};"+
|
||||
"''e<a){ein};"+
|
||||
"''e<w){ein};"+
|
||||
"''e<y){ein};"+
|
||||
"e<{ein};"+
|
||||
"f<{feh};"+
|
||||
"gh<{ghein};"+
|
||||
"''hh<d){hah};"+
|
||||
"''hh<t){hah};"+
|
||||
"''hh<k){hah};"+
|
||||
"''hh<s){hah};"+
|
||||
"hh<{hah};"+
|
||||
"''h<d){heh};"+
|
||||
"''h<t){heh};"+
|
||||
"''h<k){heh};"+
|
||||
"''h<s){heh};"+
|
||||
"h<{heh};"+
|
||||
"''ii<i){kasratein};"+
|
||||
"ii<{kasratein};"+
|
||||
"''i<i){kasra};"+
|
||||
"i<{kasra};"+
|
||||
"j<{geem};"+
|
||||
"kh<{kha};"+
|
||||
"x<{kaf}{shadda}{seen};"+
|
||||
"k<{kaf};"+
|
||||
"l<{lam};"+
|
||||
"''m<y){meem};"+
|
||||
"''m<t){meem};"+
|
||||
"m<{meem};"+
|
||||
"n<{noon};"+
|
||||
"''o<a){hamza};"+
|
||||
"o<{hamza};"+
|
||||
"p<{peh};"+
|
||||
"q<{qaaf};"+
|
||||
"r<{reh};"+
|
||||
"sh<{sheen};"+
|
||||
"''ss<s){sad};"+
|
||||
"ss<{sad};"+
|
||||
"''s<s){seen};"+
|
||||
"s<{seen};"+
|
||||
"th<{theh};"+
|
||||
"tm<{tehmarbuta};"+
|
||||
"''tt<t){tah};"+
|
||||
"tt<{tah};"+
|
||||
"''t<t){teh};"+
|
||||
"t<{teh};"+
|
||||
"''uu<u){dammatein};"+
|
||||
"uu<{dammatein};"+
|
||||
"''u<u){damma};"+
|
||||
"u<{damma};"+
|
||||
"we<{wauuhamza};"+
|
||||
"w<{wau};"+
|
||||
"ye<{yehuhamza};"+
|
||||
"ym<{yehmaqsura};"+
|
||||
"''y<y){yeh};"+
|
||||
"y<{yeh};"+
|
||||
"''zz<z){zah};"+
|
||||
"zz<{zah};"+
|
||||
"''z<z){zain};"+
|
||||
"z<{zain};"+
|
||||
"''ai<a{$alefmadda;"+
|
||||
"ai<$alefmadda;"+
|
||||
"''ae<a{$alefuhamza;"+
|
||||
"ae<$alefuhamza;"+
|
||||
"''ao<a{$alefhamza;"+
|
||||
"ao<$alefhamza;"+
|
||||
"''aa<a{$alef;"+
|
||||
"aa<$alef;"+
|
||||
"''an<a{$fathatein;"+
|
||||
"an<$fathatein;"+
|
||||
"''a<a{$fatha;"+
|
||||
"a<$fatha;"+
|
||||
"b<$beh;"+
|
||||
"''dh<d{$dhal;"+
|
||||
"dh<$dhal;"+
|
||||
"''dd<d{$dad;"+
|
||||
"dd<$dad;"+
|
||||
"''d<d{$dal;"+
|
||||
"d<$dal;"+
|
||||
"''e<a{$ein;"+
|
||||
"''e<w{$ein;"+
|
||||
"''e<y{$ein;"+
|
||||
"e<$ein;"+
|
||||
"f<$feh;"+
|
||||
"gh<$ghein;"+
|
||||
"''hh<d{$hah;"+
|
||||
"''hh<t{$hah;"+
|
||||
"''hh<k{$hah;"+
|
||||
"''hh<s{$hah;"+
|
||||
"hh<$hah;"+
|
||||
"''h<d{$heh;"+
|
||||
"''h<t{$heh;"+
|
||||
"''h<k{$heh;"+
|
||||
"''h<s{$heh;"+
|
||||
"h<$heh;"+
|
||||
"''ii<i{$kasratein;"+
|
||||
"ii<$kasratein;"+
|
||||
"''i<i{$kasra;"+
|
||||
"i<$kasra;"+
|
||||
"j<$geem;"+
|
||||
"kh<$kha;"+
|
||||
"x<$kaf$shadda$seen;"+
|
||||
"k<$kaf;"+
|
||||
"l<$lam;"+
|
||||
"''m<y{$meem;"+
|
||||
"''m<t{$meem;"+
|
||||
"m<$meem;"+
|
||||
"n<$noon;"+
|
||||
"''o<a{$hamza;"+
|
||||
"o<$hamza;"+
|
||||
"p<$peh;"+
|
||||
"q<$qaaf;"+
|
||||
"r<$reh;"+
|
||||
"sh<$sheen;"+
|
||||
"''ss<s{$sad;"+
|
||||
"ss<$sad;"+
|
||||
"''s<s{$seen;"+
|
||||
"s<$seen;"+
|
||||
"th<$theh;"+
|
||||
"tm<$tehmarbuta;"+
|
||||
"''tt<t{$tah;"+
|
||||
"tt<$tah;"+
|
||||
"''t<t{$teh;"+
|
||||
"t<$teh;"+
|
||||
"''uu<u{$dammatein;"+
|
||||
"uu<$dammatein;"+
|
||||
"''u<u{$damma;"+
|
||||
"u<$damma;"+
|
||||
"we<$wauuhamza;"+
|
||||
"w<$wau;"+
|
||||
"ye<$yehuhamza;"+
|
||||
"ym<$yehmaqsura;"+
|
||||
"''y<y{$yeh;"+
|
||||
"y<$yeh;"+
|
||||
"''zz<z{$zah;"+
|
||||
"zz<$zah;"+
|
||||
"''z<z{$zain;"+
|
||||
"z<$zain;"+
|
||||
|
||||
"dh<dh){shadda};"+
|
||||
"dd<dd){shadda};"+
|
||||
"''d<d){shadda};"
|
||||
"dh<dh{$shadda;"+
|
||||
"dd<dd{$shadda;"+
|
||||
"''d<d{$shadda;"
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/resources/Attic/TransliterationRule_Latin_Cyrillic.java,v $
|
||||
* $Date: 2000/03/10 04:07:30 $
|
||||
* $Revision: 1.2 $
|
||||
* $Date: 2000/04/21 21:17:08 $
|
||||
* $Revision: 1.3 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
@ -29,102 +29,102 @@ public class TransliterationRule_Latin_Cyrillic extends ListResourceBundle {
|
||||
mappings.
|
||||
*/
|
||||
|
||||
+ "S-hacek=\u0160;"
|
||||
+ "s-hacek=\u0161;"
|
||||
+ "$S_hacek=\u0160;"
|
||||
+ "$s_hacek=\u0161;"
|
||||
|
||||
+ "YO=\u0401;"
|
||||
+ "J=\u0408;"
|
||||
+ "A=\u0410;"
|
||||
+ "B=\u0411;"
|
||||
+ "V=\u0412;"
|
||||
+ "G=\u0413;"
|
||||
+ "D=\u0414;"
|
||||
+ "YE=\u0415;"
|
||||
+ "ZH=\u0416;"
|
||||
+ "Z=\u0417;"
|
||||
+ "YI=\u0418;"
|
||||
+ "Y=\u0419;"
|
||||
+ "K=\u041A;"
|
||||
+ "L=\u041B;"
|
||||
+ "M=\u041C;"
|
||||
+ "N=\u041D;"
|
||||
+ "O=\u041E;"
|
||||
+ "P=\u041F;"
|
||||
+ "R=\u0420;"
|
||||
+ "S=\u0421;"
|
||||
+ "T=\u0422;"
|
||||
+ "U=\u0423;"
|
||||
+ "F=\u0424;"
|
||||
+ "KH=\u0425;"
|
||||
+ "TS=\u0426;"
|
||||
+ "CH=\u0427;"
|
||||
+ "SH=\u0428;"
|
||||
+ "SHCH=\u0429;"
|
||||
+ "HARD=\u042A;"
|
||||
+ "I=\u042B;"
|
||||
+ "SOFT=\u042C;"
|
||||
+ "E=\u042D;"
|
||||
+ "YU=\u042E;"
|
||||
+ "YA=\u042F;"
|
||||
+ "$YO=\u0401;"
|
||||
+ "$J=\u0408;"
|
||||
+ "$A=\u0410;"
|
||||
+ "$B=\u0411;"
|
||||
+ "$V=\u0412;"
|
||||
+ "$G=\u0413;"
|
||||
+ "$D=\u0414;"
|
||||
+ "$YE=\u0415;"
|
||||
+ "$ZH=\u0416;"
|
||||
+ "$Z=\u0417;"
|
||||
+ "$YI=\u0418;"
|
||||
+ "$Y=\u0419;"
|
||||
+ "$K=\u041A;"
|
||||
+ "$L=\u041B;"
|
||||
+ "$M=\u041C;"
|
||||
+ "$N=\u041D;"
|
||||
+ "$O=\u041E;"
|
||||
+ "$P=\u041F;"
|
||||
+ "$R=\u0420;"
|
||||
+ "$S=\u0421;"
|
||||
+ "$T=\u0422;"
|
||||
+ "$U=\u0423;"
|
||||
+ "$F=\u0424;"
|
||||
+ "$KH=\u0425;"
|
||||
+ "$TS=\u0426;"
|
||||
+ "$CH=\u0427;"
|
||||
+ "$SH=\u0428;"
|
||||
+ "$SHCH=\u0429;"
|
||||
+ "$HARD=\u042A;"
|
||||
+ "$I=\u042B;"
|
||||
+ "$SOFT=\u042C;"
|
||||
+ "$E=\u042D;"
|
||||
+ "$YU=\u042E;"
|
||||
+ "$YA=\u042F;"
|
||||
|
||||
// Lowercase
|
||||
|
||||
+ "a=\u0430;"
|
||||
+ "b=\u0431;"
|
||||
+ "v=\u0432;"
|
||||
+ "g=\u0433;"
|
||||
+ "d=\u0434;"
|
||||
+ "ye=\u0435;"
|
||||
+ "zh=\u0436;"
|
||||
+ "z=\u0437;"
|
||||
+ "yi=\u0438;"
|
||||
+ "y=\u0439;"
|
||||
+ "k=\u043a;"
|
||||
+ "l=\u043b;"
|
||||
+ "m=\u043c;"
|
||||
+ "n=\u043d;"
|
||||
+ "o=\u043e;"
|
||||
+ "p=\u043f;"
|
||||
+ "r=\u0440;"
|
||||
+ "s=\u0441;"
|
||||
+ "t=\u0442;"
|
||||
+ "u=\u0443;"
|
||||
+ "f=\u0444;"
|
||||
+ "kh=\u0445;"
|
||||
+ "ts=\u0446;"
|
||||
+ "ch=\u0447;"
|
||||
+ "sh=\u0448;"
|
||||
+ "shch=\u0449;"
|
||||
+ "hard=\u044a;"
|
||||
+ "i=\u044b;"
|
||||
+ "soft=\u044c;"
|
||||
+ "e=\u044d;"
|
||||
+ "yu=\u044e;"
|
||||
+ "ya=\u044f;"
|
||||
+ "$a=\u0430;"
|
||||
+ "$b=\u0431;"
|
||||
+ "$v=\u0432;"
|
||||
+ "$g=\u0433;"
|
||||
+ "$d=\u0434;"
|
||||
+ "$ye=\u0435;"
|
||||
+ "$zh=\u0436;"
|
||||
+ "$z=\u0437;"
|
||||
+ "$yi=\u0438;"
|
||||
+ "$y=\u0439;"
|
||||
+ "$k=\u043a;"
|
||||
+ "$l=\u043b;"
|
||||
+ "$m=\u043c;"
|
||||
+ "$n=\u043d;"
|
||||
+ "$o=\u043e;"
|
||||
+ "$p=\u043f;"
|
||||
+ "$r=\u0440;"
|
||||
+ "$s=\u0441;"
|
||||
+ "$t=\u0442;"
|
||||
+ "$u=\u0443;"
|
||||
+ "$f=\u0444;"
|
||||
+ "$kh=\u0445;"
|
||||
+ "$ts=\u0446;"
|
||||
+ "$ch=\u0447;"
|
||||
+ "$sh=\u0448;"
|
||||
+ "$shch=\u0449;"
|
||||
+ "$hard=\u044a;"
|
||||
+ "$i=\u044b;"
|
||||
+ "$soft=\u044c;"
|
||||
+ "$e=\u044d;"
|
||||
+ "$yu=\u044e;"
|
||||
+ "$ya=\u044f;"
|
||||
|
||||
+ "yo=\u0451;"
|
||||
+ "j=\u0458;"
|
||||
+ "$yo=\u0451;"
|
||||
+ "$j=\u0458;"
|
||||
|
||||
// variables
|
||||
// some are duplicated so lowercasing works
|
||||
|
||||
+ "csoft=[eiyEIY];"
|
||||
+ "CSOFT=[eiyEIY];"
|
||||
+ "$csoft=[eiyEIY];"
|
||||
+ "$CSOFT=[eiyEIY];"
|
||||
|
||||
+ "BECOMES_H=[{HARD}{hard}];"
|
||||
+ "becomes_h=[{HARD}{hard}];"
|
||||
+ "$BECOMES_H=[$HARD$hard];"
|
||||
+ "$becomes_h=[$HARD$hard];"
|
||||
|
||||
+ "BECOMES_S=[{S}{s}];"
|
||||
+ "becomes_s=[{S}{s}];"
|
||||
+ "$BECOMES_S=[$S$s];"
|
||||
+ "$becomes_s=[$S$s];"
|
||||
|
||||
+ "BECOMES_C=[{CH}{ch}];"
|
||||
+ "becomes_c=[{CH}{ch}];"
|
||||
+ "$BECOMES_C=[$CH$ch];"
|
||||
+ "$becomes_c=[$CH$ch];"
|
||||
|
||||
+ "BECOMES_VOWEL=[{A}{E}{I}{O}{U}{a}{e}{i}{o}{u}];"
|
||||
+ "becomes_vowel=[{A}{E}{I}{O}{U}{a}{e}{i}{o}{u}];"
|
||||
+ "$BECOMES_VOWEL=[$A$E$I$O$U$a$e$i$o$u];"
|
||||
+ "$becomes_vowel=[$A$E$I$O$U$a$e$i$o$u];"
|
||||
|
||||
+ "letter=[[:Lu:][:Ll:]];"
|
||||
+ "lower=[[:Ll:]];"
|
||||
+ "$letter=[[:Lu:][:Ll:]];"
|
||||
+ "$lower=[[:Ll:]];"
|
||||
|
||||
/*
|
||||
Modified to combine display transliterator and typing transliterator.
|
||||
@ -137,17 +137,17 @@ public class TransliterationRule_Latin_Cyrillic extends ListResourceBundle {
|
||||
// Special titlecase forms, not duplicated
|
||||
// #############################################
|
||||
|
||||
+ "Ch>{CH};" + "Ch<{CH}({lower};"
|
||||
+ "Kh>{KH};" + "Kh<{KH}({lower};"
|
||||
+ "Shch>{SHCH};" + "Shch<{SHCH}({lower};"
|
||||
+ "Sh>{SH};" + "Sh<{SH}({lower};"
|
||||
+ "Ts>{TS};" + "Ts<{TS}({lower};"
|
||||
+ "Zh>{ZH};" + "Zh<{ZH}({lower};"
|
||||
+ "Yi>{YI};" //+ "Yi<{YI}({lower};"
|
||||
+ "Ye>{YE};" //+ "Ye<{YE}({lower};"
|
||||
+ "Yo>{YO};" //+ "Yo<{YO}({lower};"
|
||||
+ "Yu>{YU};" //+ "Yu<{YU}({lower};"
|
||||
+ "Ya>{YA};" //+ "Ya<{YA}({lower};"
|
||||
+ "Ch>$CH;" + "Ch<$CH}$lower;"
|
||||
+ "Kh>$KH;" + "Kh<$KH}$lower;"
|
||||
+ "Shch>$SHCH;" + "Shch<$SHCH}$lower;"
|
||||
+ "Sh>$SH;" + "Sh<$SH}$lower;"
|
||||
+ "Ts>$TS;" + "Ts<$TS}$lower;"
|
||||
+ "Zh>$ZH;" + "Zh<$ZH}$lower;"
|
||||
+ "Yi>$YI;" //+ "Yi<$YI}$lower;"
|
||||
+ "Ye>$YE;" //+ "Ye<$YE}$lower;"
|
||||
+ "Yo>$YO;" //+ "Yo<$YO}$lower;"
|
||||
+ "Yu>$YU;" //+ "Yu<$YU}$lower;"
|
||||
+ "Ya>$YA;" //+ "Ya<$YA}$lower;"
|
||||
|
||||
// #############################################
|
||||
// Rules to Duplicate
|
||||
@ -156,77 +156,77 @@ public class TransliterationRule_Latin_Cyrillic extends ListResourceBundle {
|
||||
|
||||
// variant spellings in English
|
||||
|
||||
+ "SHTCH>{SHCH};"
|
||||
+ "TCH>{CH};"
|
||||
+ "TH>{Z};"
|
||||
+ "Q>{K};"
|
||||
+ "WH>{V};"
|
||||
+ "W>{V};"
|
||||
+ "X>{K}{S};" //+ "X<{K}{S};"
|
||||
+ "SHTCH>$SHCH;"
|
||||
+ "TCH>$CH;"
|
||||
+ "TH>$Z;"
|
||||
+ "Q>$K;"
|
||||
+ "WH>$V;"
|
||||
+ "W>$V;"
|
||||
+ "X>$K$S;" //+ "X<$K$S;"
|
||||
|
||||
// Separate letters that would otherwise join
|
||||
|
||||
+ "SH''<{SH}({BECOMES_C};"
|
||||
+ "T''<{T}({BECOMES_S};"
|
||||
+ "SH''<$SH}$BECOMES_C;"
|
||||
+ "T''<$T}$BECOMES_S;"
|
||||
|
||||
+ "K''<{K}({BECOMES_H};"
|
||||
+ "S''<{S}({BECOMES_H};"
|
||||
+ "T''<{T}({BECOMES_H};"
|
||||
+ "Z''<{Z}({BECOMES_H};"
|
||||
+ "K''<$K}$BECOMES_H;"
|
||||
+ "S''<$S}$BECOMES_H;"
|
||||
+ "T''<$T}$BECOMES_H;"
|
||||
+ "Z''<$Z}$BECOMES_H;"
|
||||
|
||||
+ "Y''<{Y}({BECOMES_VOWEL};"
|
||||
+ "Y''<$Y}$BECOMES_VOWEL;"
|
||||
|
||||
// Main letters
|
||||
|
||||
+ "A<>{A};"
|
||||
+ "B<>{B};"
|
||||
+ "CH<>{CH};"
|
||||
+ "D<>{D};"
|
||||
+ "E<>{E};"
|
||||
+ "F<>{F};"
|
||||
+ "G<>{G};"
|
||||
+ "\u00cc<>{YI};"
|
||||
+ "I<>{I};"
|
||||
+ "KH<>{KH};"
|
||||
+ "K<>{K};"
|
||||
+ "L<>{L};"
|
||||
+ "M<>{M};"
|
||||
+ "N<>{N};"
|
||||
+ "O<>{O};"
|
||||
+ "P<>{P};"
|
||||
+ "R<>{R};"
|
||||
+ "SHCH<>{SHCH};"
|
||||
+ "SH>{SH};" //+ "SH<{SH};"
|
||||
+ "{S-hacek}<>{SH};"
|
||||
+ "S<>{S};"
|
||||
+ "TS<>{TS};"
|
||||
+ "T<>{T};"
|
||||
+ "U<>{U};"
|
||||
+ "V<>{V};"
|
||||
+ "A<>$A;"
|
||||
+ "B<>$B;"
|
||||
+ "CH<>$CH;"
|
||||
+ "D<>$D;"
|
||||
+ "E<>$E;"
|
||||
+ "F<>$F;"
|
||||
+ "G<>$G;"
|
||||
+ "\u00cc<>$YI;"
|
||||
+ "I<>$I;"
|
||||
+ "KH<>$KH;"
|
||||
+ "K<>$K;"
|
||||
+ "L<>$L;"
|
||||
+ "M<>$M;"
|
||||
+ "N<>$N;"
|
||||
+ "O<>$O;"
|
||||
+ "P<>$P;"
|
||||
+ "R<>$R;"
|
||||
+ "SHCH<>$SHCH;"
|
||||
+ "SH>$SH;" //+ "SH<$SH;"
|
||||
+ "$S_hacek<>$SH;"
|
||||
+ "S<>$S;"
|
||||
+ "TS<>$TS;"
|
||||
+ "T<>$T;"
|
||||
+ "U<>$U;"
|
||||
+ "V<>$V;"
|
||||
//\u00cc\u00c0\u00c8\u00d2\u00d9
|
||||
+ "YE>{YE};" //+ "YE<{YE};"
|
||||
+ "\u00c8<>{YE};"
|
||||
+ "YO>{YO};" //+ "YO<{YO};"
|
||||
+ "\u00d2<>{YO};"
|
||||
+ "YU>{YU};" //+ "YU<{YU};"
|
||||
+ "\u00d9<>{YU};"
|
||||
+ "YA>{YA};" //+ "YA<{YA};"
|
||||
+ "\u00c0<>{YA};"
|
||||
+ "Y<>{Y};"
|
||||
+ "ZH<>{ZH};"
|
||||
+ "Z<>{Z};"
|
||||
+ "YE>$YE;" //+ "YE<$YE;"
|
||||
+ "\u00c8<>$YE;"
|
||||
+ "YO>$YO;" //+ "YO<$YO;"
|
||||
+ "\u00d2<>$YO;"
|
||||
+ "YU>$YU;" //+ "YU<$YU;"
|
||||
+ "\u00d9<>$YU;"
|
||||
+ "YA>$YA;" //+ "YA<$YA;"
|
||||
+ "\u00c0<>$YA;"
|
||||
+ "Y<>$Y;"
|
||||
+ "ZH<>$ZH;"
|
||||
+ "Z<>$Z;"
|
||||
|
||||
+ "H<>{HARD};"
|
||||
+ "\u0178<>{SOFT};"
|
||||
+ "H<>$HARD;"
|
||||
+ "\u0178<>$SOFT;"
|
||||
|
||||
// Non-russian
|
||||
|
||||
+ "J<>{J};"
|
||||
+ "J<>$J;"
|
||||
|
||||
// variant spellings in English
|
||||
|
||||
+ "C({csoft}>{S};"
|
||||
+ "C>{K};"
|
||||
+ "C}$csoft>$S;"
|
||||
+ "C>$K;"
|
||||
|
||||
// #############################################
|
||||
// Duplicated Rules
|
||||
@ -235,77 +235,77 @@ public class TransliterationRule_Latin_Cyrillic extends ListResourceBundle {
|
||||
|
||||
// variant spellings in english
|
||||
|
||||
+ "shtch>{shch};"
|
||||
+ "tch>{ch};"
|
||||
+ "th>{z};"
|
||||
+ "q>{k};"
|
||||
+ "wh>{v};"
|
||||
+ "w>{v};"
|
||||
+ "x>{k}{s};" //+ "x<{k}{s};"
|
||||
+ "shtch>$shch;"
|
||||
+ "tch>$ch;"
|
||||
+ "th>$z;"
|
||||
+ "q>$k;"
|
||||
+ "wh>$v;"
|
||||
+ "w>$v;"
|
||||
+ "x>$k$s;" //+ "x<$k$s;"
|
||||
|
||||
// separate letters that would otherwise join
|
||||
|
||||
+ "sh''<{sh}({becomes_c};"
|
||||
+ "t''<{t}({becomes_s};"
|
||||
+ "sh''<$sh}$becomes_c;"
|
||||
+ "t''<$t}$becomes_s;"
|
||||
|
||||
+ "k''<{k}({becomes_h};"
|
||||
+ "s''<{s}({becomes_h};"
|
||||
+ "t''<{t}({becomes_h};"
|
||||
+ "z''<{z}({becomes_h};"
|
||||
+ "k''<$k}$becomes_h;"
|
||||
+ "s''<$s}$becomes_h;"
|
||||
+ "t''<$t}$becomes_h;"
|
||||
+ "z''<$z}$becomes_h;"
|
||||
|
||||
+ "y''<{y}({becomes_vowel};"
|
||||
+ "y''<$y}$becomes_vowel;"
|
||||
|
||||
// main letters
|
||||
|
||||
+ "a<>{a};"
|
||||
+ "b<>{b};"
|
||||
+ "ch<>{ch};"
|
||||
+ "d<>{d};"
|
||||
+ "e<>{e};"
|
||||
+ "f<>{f};"
|
||||
+ "g<>{g};"
|
||||
+ "\u00ec<>{yi};"
|
||||
+ "i<>{i};"
|
||||
+ "kh<>{kh};"
|
||||
+ "k<>{k};"
|
||||
+ "l<>{l};"
|
||||
+ "m<>{m};"
|
||||
+ "n<>{n};"
|
||||
+ "o<>{o};"
|
||||
+ "p<>{p};"
|
||||
+ "r<>{r};"
|
||||
+ "shch<>{shch};"
|
||||
+ "sh>{sh};" //+ "sh<{sh};"
|
||||
+ "{s-hacek}<>{sh};"
|
||||
+ "s<>{s};"
|
||||
+ "ts<>{ts};"
|
||||
+ "t<>{t};"
|
||||
+ "u<>{u};"
|
||||
+ "v<>{v};"
|
||||
+ "a<>$a;"
|
||||
+ "b<>$b;"
|
||||
+ "ch<>$ch;"
|
||||
+ "d<>$d;"
|
||||
+ "e<>$e;"
|
||||
+ "f<>$f;"
|
||||
+ "g<>$g;"
|
||||
+ "\u00ec<>$yi;"
|
||||
+ "i<>$i;"
|
||||
+ "kh<>$kh;"
|
||||
+ "k<>$k;"
|
||||
+ "l<>$l;"
|
||||
+ "m<>$m;"
|
||||
+ "n<>$n;"
|
||||
+ "o<>$o;"
|
||||
+ "p<>$p;"
|
||||
+ "r<>$r;"
|
||||
+ "shch<>$shch;"
|
||||
+ "sh>$sh;" //+ "sh<$sh;"
|
||||
+ "$s_hacek<>$sh;"
|
||||
+ "s<>$s;"
|
||||
+ "ts<>$ts;"
|
||||
+ "t<>$t;"
|
||||
+ "u<>$u;"
|
||||
+ "v<>$v;"
|
||||
//\u00ec\u00e0\u00e8\u00f2\u00f9
|
||||
+ "ye>{ye};" //+ "ye<{ye};"
|
||||
+ "\u00e8<>{ye};"
|
||||
+ "yo>{yo};" //+ "yo<{yo};"
|
||||
+ "\u00f2<>{yo};"
|
||||
+ "yu>{yu};" //+ "yu<{yu};"
|
||||
+ "\u00f9<>{yu};"
|
||||
+ "ya>{ya};" //+ "ya<{ya};"
|
||||
+ "\u00e0<>{ya};"
|
||||
+ "y<>{y};"
|
||||
+ "zh<>{zh};"
|
||||
+ "z<>{z};"
|
||||
+ "ye>$ye;" //+ "ye<$ye;"
|
||||
+ "\u00e8<>$ye;"
|
||||
+ "yo>$yo;" //+ "yo<$yo;"
|
||||
+ "\u00f2<>$yo;"
|
||||
+ "yu>$yu;" //+ "yu<$yu;"
|
||||
+ "\u00f9<>$yu;"
|
||||
+ "ya>$ya;" //+ "ya<$ya;"
|
||||
+ "\u00e0<>$ya;"
|
||||
+ "y<>$y;"
|
||||
+ "zh<>$zh;"
|
||||
+ "z<>$z;"
|
||||
|
||||
+ "h<>{hard};"
|
||||
+ "\u00ff<>{soft};"
|
||||
+ "h<>$hard;"
|
||||
+ "\u00ff<>$soft;"
|
||||
|
||||
// non-russian
|
||||
|
||||
+ "j<>{j};"
|
||||
+ "j<>$j;"
|
||||
|
||||
// variant spellings in english
|
||||
|
||||
+ "c({csoft}>{s};"
|
||||
+ "c>{k};"
|
||||
+ "c}$csoft>$s;"
|
||||
+ "c>$k;"
|
||||
|
||||
|
||||
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/resources/Attic/TransliterationRule_Latin_Devanagari.java,v $
|
||||
* $Date: 2000/03/10 04:07:31 $
|
||||
* $Revision: 1.2 $
|
||||
* $Date: 2000/04/21 21:17:08 $
|
||||
* $Revision: 1.3 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
@ -54,91 +54,92 @@ public class TransliterationRule_Latin_Devanagari extends ListResourceBundle {
|
||||
//#####################################################################
|
||||
|
||||
//consonants
|
||||
"candrabindu=\u0901;"
|
||||
+ "bindu=\u0902;"
|
||||
+ "visarga=\u0903;"
|
||||
"$candrabindu=\u0901;"
|
||||
+ "$bindu=\u0902;"
|
||||
+ "$visarga=\u0903;"
|
||||
|
||||
// w<vowel> represents the stand-alone form
|
||||
+ "wa=\u0905;"
|
||||
+ "waa=\u0906;"
|
||||
+ "wi=\u0907;"
|
||||
+ "wii=\u0908;"
|
||||
+ "wu=\u0909;"
|
||||
+ "wuu=\u090A;"
|
||||
+ "wr=\u090B;"
|
||||
+ "wl=\u090C;"
|
||||
+ "we=\u090F;"
|
||||
+ "wai=\u0910;"
|
||||
+ "wo=\u0913;"
|
||||
+ "wau=\u0914;"
|
||||
+ "$wa=\u0905;"
|
||||
+ "$waa=\u0906;"
|
||||
+ "$wi=\u0907;"
|
||||
+ "$wii=\u0908;"
|
||||
+ "$wu=\u0909;"
|
||||
+ "$wuu=\u090A;"
|
||||
+ "$wr=\u090B;"
|
||||
+ "$wl=\u090C;"
|
||||
+ "$we=\u090F;"
|
||||
+ "$wai=\u0910;"
|
||||
+ "$wo=\u0913;"
|
||||
+ "$wau=\u0914;"
|
||||
|
||||
+ "ka=\u0915;"
|
||||
+ "kha=\u0916;"
|
||||
+ "ga=\u0917;"
|
||||
+ "gha=\u0918;"
|
||||
+ "nga=\u0919;"
|
||||
+ "$ka=\u0915;"
|
||||
+ "$kha=\u0916;"
|
||||
+ "$ga=\u0917;"
|
||||
+ "$gha=\u0918;"
|
||||
+ "$nga=\u0919;"
|
||||
|
||||
+ "ca=\u091A;"
|
||||
+ "cha=\u091B;"
|
||||
+ "ja=\u091C;"
|
||||
+ "jha=\u091D;"
|
||||
+ "nya=\u091E;"
|
||||
+ "$ca=\u091A;"
|
||||
+ "$cha=\u091B;"
|
||||
+ "$ja=\u091C;"
|
||||
+ "$jha=\u091D;"
|
||||
+ "$nya=\u091E;"
|
||||
|
||||
+ "tta=\u091F;"
|
||||
+ "ttha=\u0920;"
|
||||
+ "dda=\u0921;"
|
||||
+ "ddha=\u0922;"
|
||||
+ "nna=\u0923;"
|
||||
+ "$tta=\u091F;"
|
||||
+ "$ttha=\u0920;"
|
||||
+ "$dda=\u0921;"
|
||||
+ "$ddha=\u0922;"
|
||||
+ "$nna=\u0923;"
|
||||
|
||||
+ "ta=\u0924;"
|
||||
+ "tha=\u0925;"
|
||||
+ "da=\u0926;"
|
||||
+ "dha=\u0927;"
|
||||
+ "na=\u0928;"
|
||||
+ "$ta=\u0924;"
|
||||
+ "$tha=\u0925;"
|
||||
+ "$da=\u0926;"
|
||||
+ "$dha=\u0927;"
|
||||
+ "$na=\u0928;"
|
||||
|
||||
+ "pa=\u092A;"
|
||||
+ "pha=\u092B;"
|
||||
+ "ba=\u092C;"
|
||||
+ "bha=\u092D;"
|
||||
+ "ma=\u092E;"
|
||||
+ "$pa=\u092A;"
|
||||
+ "$pha=\u092B;"
|
||||
+ "$ba=\u092C;"
|
||||
+ "$bha=\u092D;"
|
||||
+ "$ma=\u092E;"
|
||||
|
||||
+ "ya=\u092F;"
|
||||
+ "ra=\u0930;"
|
||||
+ "rra=\u0931;"
|
||||
+ "la=\u0933;"
|
||||
+ "va=\u0935;"
|
||||
+ "$ya=\u092F;"
|
||||
+ "$ra=\u0930;"
|
||||
+ "$rra=\u0931;"
|
||||
+ "$la=\u0933;"
|
||||
+ "$va=\u0935;"
|
||||
|
||||
+ "sha=\u0936;"
|
||||
+ "ssa=\u0937;"
|
||||
+ "sa=\u0938;"
|
||||
+ "ha=\u0939;"
|
||||
+ "$sha=\u0936;"
|
||||
+ "$ssa=\u0937;"
|
||||
+ "$sa=\u0938;"
|
||||
+ "$ha=\u0939;"
|
||||
|
||||
// <vowel> represents the dependent form
|
||||
+ "aa=\u093E;"
|
||||
+ "i=\u093F;"
|
||||
+ "ii=\u0940;"
|
||||
+ "u=\u0941;"
|
||||
+ "uu=\u0942;"
|
||||
+ "rh=\u0943;"
|
||||
+ "lh=\u0944;"
|
||||
+ "e=\u0947;"
|
||||
+ "ai=\u0948;"
|
||||
+ "o=\u094B;"
|
||||
+ "au=\u094C;"
|
||||
+ "$aa=\u093E;"
|
||||
+ "$i=\u093F;"
|
||||
+ "$ii=\u0940;"
|
||||
+ "$u=\u0941;"
|
||||
+ "$uu=\u0942;"
|
||||
+ "$rh=\u0943;"
|
||||
+ "$lh=\u0944;"
|
||||
+ "$e=\u0947;"
|
||||
+ "$ai=\u0948;"
|
||||
+ "$o=\u094B;"
|
||||
+ "$au=\u094C;"
|
||||
|
||||
+ "virama=\u094D;"
|
||||
+ "$virama=\u094D;"
|
||||
|
||||
+ "wrr=\u0960;"
|
||||
+ "rrh=\u0962;"
|
||||
+ "$wrr=\u0960;"
|
||||
+ "$rrh=\u0962;"
|
||||
|
||||
+ "danda=\u0964;"
|
||||
+ "doubleDanda=\u0965;"
|
||||
+ "depVowelAbove=[\u093E-\u0940\u0945-\u094C];"
|
||||
+ "depVowelBelow=[\u0941-\u0944];"
|
||||
+ "endThing=[{danda}{doubleDanda}\u0000-\u08FF\u0980-\uFFFF];"
|
||||
+ "$danda=\u0964;"
|
||||
+ "$doubleDanda=\u0965;"
|
||||
+ "$depVowelAbove=[\u093E-\u0940\u0945-\u094C];"
|
||||
+ "$depVowelBelow=[\u0941-\u0944];"
|
||||
+ "$endThing=[$danda$doubleDanda \u0000-\u08FF\u0980-\uFFFF];"
|
||||
|
||||
+ "&=[{virama}{aa}{ai}{au}{ii}{i}{uu}{u}{rrh}{rh}{lh}{e}{o}];"
|
||||
+ "%=[bcdfghjklmnpqrstvwxyz];"
|
||||
// Unused -- these variable names are illegal and need to be changed
|
||||
// + "$&=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o];"
|
||||
// + "$%=[bcdfghjklmnpqrstvwxyz];"
|
||||
|
||||
//#####################################################################
|
||||
// convert from Latin letters to Native letters
|
||||
@ -147,106 +148,106 @@ public class TransliterationRule_Latin_Devanagari extends ListResourceBundle {
|
||||
|
||||
// special forms with no good conversion
|
||||
|
||||
+ "mm>{bindu};"
|
||||
+ "x>{visarga};"
|
||||
+ "mm>$bindu;"
|
||||
+ "x>$visarga;"
|
||||
|
||||
// convert to independent forms at start of word or syllable:
|
||||
// e.g. keai -> {ka}{e}{wai}; k'ai -> {ka}{wai}; (ai) -> ({wai})
|
||||
// Moved up [LIU]
|
||||
|
||||
+ "aa>{waa};"
|
||||
+ "ai>{wai};"
|
||||
+ "au>{wau};"
|
||||
+ "ii>{wii};"
|
||||
+ "i>{wi};"
|
||||
+ "uu>{wuu};"
|
||||
+ "u>{wu};"
|
||||
+ "rrh>{wrr};"
|
||||
+ "rh>{wr};"
|
||||
+ "lh>{wl};"
|
||||
+ "e>{we};"
|
||||
+ "o>{wo};"
|
||||
+ "a>{wa};"
|
||||
+ "aa>$waa;"
|
||||
+ "ai>$wai;"
|
||||
+ "au>$wau;"
|
||||
+ "ii>$wii;"
|
||||
+ "i>$wi;"
|
||||
+ "uu>$wuu;"
|
||||
+ "u>$wu;"
|
||||
+ "rrh>$wrr;"
|
||||
+ "rh>$wr;"
|
||||
+ "lh>$wl;"
|
||||
+ "e>$we;"
|
||||
+ "o>$wo;"
|
||||
+ "a>$wa;"
|
||||
|
||||
// normal consonants
|
||||
|
||||
+ "kh>{kha}|{virama};"
|
||||
+ "k>{ka}|{virama};"
|
||||
+ "q>{ka}|{virama};"
|
||||
+ "gh>{gha}|{virama};"
|
||||
+ "g>{ga}|{virama};"
|
||||
+ "ng>{nga}|{virama};"
|
||||
+ "ch>{cha}|{virama};"
|
||||
+ "c>{ca}|{virama};"
|
||||
+ "jh>{jha}|{virama};"
|
||||
+ "j>{ja}|{virama};"
|
||||
+ "ny>{nya}|{virama};"
|
||||
+ "tth>{ttha}|{virama};"
|
||||
+ "tt>{tta}|{virama};"
|
||||
+ "ddh>{ddha}|{virama};"
|
||||
+ "dd>{dda}|{virama};"
|
||||
+ "nn>{nna}|{virama};"
|
||||
+ "th>{tha}|{virama};"
|
||||
+ "t>{ta}|{virama};"
|
||||
+ "dh>{dha}|{virama};"
|
||||
+ "d>{da}|{virama};"
|
||||
+ "n>{na}|{virama};"
|
||||
+ "ph>{pha}|{virama};"
|
||||
+ "p>{pa}|{virama};"
|
||||
+ "bh>{bha}|{virama};"
|
||||
+ "b>{ba}|{virama};"
|
||||
+ "m>{ma}|{virama};"
|
||||
+ "y>{ya}|{virama};"
|
||||
+ "r>{ra}|{virama};"
|
||||
+ "l>{la}|{virama};"
|
||||
+ "v>{va}|{virama};"
|
||||
+ "f>{va}|{virama};"
|
||||
+ "w>{va}|{virama};"
|
||||
+ "sh>{sha}|{virama};"
|
||||
+ "ss>{ssa}|{virama};"
|
||||
+ "s>{sa}|{virama};"
|
||||
+ "z>{sa}|{virama};"
|
||||
+ "h>{ha}|{virama};"
|
||||
+ "kh>$kha|$virama;"
|
||||
+ "k>$ka|$virama;"
|
||||
+ "q>$ka|$virama;"
|
||||
+ "gh>$gha|$virama;"
|
||||
+ "g>$ga|$virama;"
|
||||
+ "ng>$nga|$virama;"
|
||||
+ "ch>$cha|$virama;"
|
||||
+ "c>$ca|$virama;"
|
||||
+ "jh>$jha|$virama;"
|
||||
+ "j>$ja|$virama;"
|
||||
+ "ny>$nya|$virama;"
|
||||
+ "tth>$ttha|$virama;"
|
||||
+ "tt>$tta|$virama;"
|
||||
+ "ddh>$ddha|$virama;"
|
||||
+ "dd>$dda|$virama;"
|
||||
+ "nn>$nna|$virama;"
|
||||
+ "th>$tha|$virama;"
|
||||
+ "t>$ta|$virama;"
|
||||
+ "dh>$dha|$virama;"
|
||||
+ "d>$da|$virama;"
|
||||
+ "n>$na|$virama;"
|
||||
+ "ph>$pha|$virama;"
|
||||
+ "p>$pa|$virama;"
|
||||
+ "bh>$bha|$virama;"
|
||||
+ "b>$ba|$virama;"
|
||||
+ "m>$ma|$virama;"
|
||||
+ "y>$ya|$virama;"
|
||||
+ "r>$ra|$virama;"
|
||||
+ "l>$la|$virama;"
|
||||
+ "v>$va|$virama;"
|
||||
+ "f>$va|$virama;"
|
||||
+ "w>$va|$virama;"
|
||||
+ "sh>$sha|$virama;"
|
||||
+ "ss>$ssa|$virama;"
|
||||
+ "s>$sa|$virama;"
|
||||
+ "z>$sa|$virama;"
|
||||
+ "h>$ha|$virama;"
|
||||
|
||||
+ ".>{danda};"
|
||||
+ "{danda}.>{doubleDanda};"
|
||||
+ "{depVowelAbove})~>{bindu};"
|
||||
+ "{depVowelBelow})~>{candrabindu};"
|
||||
+ ".>$danda;"
|
||||
+ "$danda.>$doubleDanda;"
|
||||
+ "$depVowelAbove{~>$bindu;"
|
||||
+ "$depVowelBelow{~>$candrabindu;"
|
||||
|
||||
// convert to dependent forms after consonant with no vowel:
|
||||
// e.g. kai -> {ka}{virama}ai -> {ka}{ai}
|
||||
|
||||
+ "{virama}aa>{aa};"
|
||||
+ "{virama}ai>{ai};"
|
||||
+ "{virama}au>{au};"
|
||||
+ "{virama}ii>{ii};"
|
||||
+ "{virama}i>{i};"
|
||||
+ "{virama}uu>{uu};"
|
||||
+ "{virama}u>{u};"
|
||||
+ "{virama}rrh>{rrh};"
|
||||
+ "{virama}rh>{rh};"
|
||||
+ "{virama}lh>{lh};"
|
||||
+ "{virama}e>{e};"
|
||||
+ "{virama}o>{o};"
|
||||
+ "{virama}a>;"
|
||||
+ "$virama aa>$aa;"
|
||||
+ "$virama ai>$ai;"
|
||||
+ "$virama au>$au;"
|
||||
+ "$virama ii>$ii;"
|
||||
+ "$virama i>$i;"
|
||||
+ "$virama uu>$uu;"
|
||||
+ "$virama u>$u;"
|
||||
+ "$virama rrh>$rrh;"
|
||||
+ "$virama rh>$rh;"
|
||||
+ "$virama lh>$lh;"
|
||||
+ "$virama e>$e;"
|
||||
+ "$virama o>$o;"
|
||||
+ "$virama a>;"
|
||||
|
||||
// otherwise convert independent forms when separated by ': k'ai -> {ka}{virama}{wai}
|
||||
|
||||
+ "{virama}''aa>{waa};"
|
||||
+ "{virama}''ai>{wai};"
|
||||
+ "{virama}''au>{wau};"
|
||||
+ "{virama}''ii>{wii};"
|
||||
+ "{virama}''i>{wi};"
|
||||
+ "{virama}''uu>{wuu};"
|
||||
+ "{virama}''u>{wu};"
|
||||
+ "{virama}''rrh>{wrr};"
|
||||
+ "{virama}''rh>{wr};"
|
||||
+ "{virama}''lh>{wl};"
|
||||
+ "{virama}''e>{we};"
|
||||
+ "{virama}''o>{wo};"
|
||||
+ "{virama}''a>{wa};"
|
||||
+ "$virama''aa>$waa;"
|
||||
+ "$virama''ai>$wai;"
|
||||
+ "$virama''au>$wau;"
|
||||
+ "$virama''ii>$wii;"
|
||||
+ "$virama''i>$wi;"
|
||||
+ "$virama''uu>$wuu;"
|
||||
+ "$virama''u>$wu;"
|
||||
+ "$virama''rrh>$wrr;"
|
||||
+ "$virama''rh>$wr;"
|
||||
+ "$virama''lh>$wl;"
|
||||
+ "$virama''e>$we;"
|
||||
+ "$virama''o>$wo;"
|
||||
+ "$virama''a>$wa;"
|
||||
|
||||
+ "{virama}({endThing}>;"
|
||||
+ "$virama}$endThing>;"
|
||||
|
||||
// convert any left-over apostrophes used for separation
|
||||
|
||||
@ -258,163 +259,163 @@ public class TransliterationRule_Latin_Devanagari extends ListResourceBundle {
|
||||
|
||||
// special forms with no good conversion
|
||||
|
||||
+ "mm<{bindu};"
|
||||
+ "x<{visarga};"
|
||||
+ "mm<$bindu;"
|
||||
+ "x<$visarga;"
|
||||
|
||||
// normal consonants
|
||||
|
||||
+ "kh<{kha}(&;"
|
||||
+ "kha<{kha};"
|
||||
+ "k''<{ka}{virama}({ha};"
|
||||
+ "k<{ka}(&;"
|
||||
+ "ka<{ka};"
|
||||
+ "gh<{gha}(&;"
|
||||
+ "gha<{gha};"
|
||||
+ "g''<{ga}{virama}({ha};"
|
||||
+ "g<{ga}(&;"
|
||||
+ "ga<{ga};"
|
||||
+ "ng<{nga}(&;"
|
||||
+ "nga<{nga};"
|
||||
+ "ch<{cha}(&;"
|
||||
+ "cha<{cha};"
|
||||
+ "c''<{ca}{virama}({ha};"
|
||||
+ "c<{ca}(&;"
|
||||
+ "ca<{ca};"
|
||||
+ "jh<{jha}(&;"
|
||||
+ "jha<{jha};"
|
||||
+ "j''<{ja}{virama}({ha};"
|
||||
+ "j<{ja}(&;"
|
||||
+ "ja<{ja};"
|
||||
+ "ny<{nya}(&;"
|
||||
+ "nya<{nya};"
|
||||
+ "tth<{ttha}(&;"
|
||||
+ "ttha<{ttha};"
|
||||
+ "tt''<{tta}{virama}({ha};"
|
||||
+ "tt<{tta}(&;"
|
||||
+ "tta<{tta};"
|
||||
+ "ddh<{ddha}(&;"
|
||||
+ "ddha<{ddha};"
|
||||
+ "dd''<{dda}(&{ha};"
|
||||
+ "dd<{dda}(&;"
|
||||
+ "dda<{dda};"
|
||||
+ "dh<{dha}(&;"
|
||||
+ "dha<{dha};"
|
||||
+ "d''<{da}{virama}({ha};"
|
||||
+ "d''<{da}{virama}({ddha};"
|
||||
+ "d''<{da}{virama}({dda};"
|
||||
+ "d''<{da}{virama}({dha};"
|
||||
+ "d''<{da}{virama}({da};"
|
||||
+ "d<{da}(&;"
|
||||
+ "da<{da};"
|
||||
+ "th<{tha}(&;"
|
||||
+ "tha<{tha};"
|
||||
+ "t''<{ta}{virama}({ha};"
|
||||
+ "t''<{ta}{virama}({ttha};"
|
||||
+ "t''<{ta}{virama}({tta};"
|
||||
+ "t''<{ta}{virama}({tha};"
|
||||
+ "t''<{ta}{virama}({ta};"
|
||||
+ "t<{ta}(&;"
|
||||
+ "ta<{ta};"
|
||||
+ "n''<{na}{virama}({ga};"
|
||||
+ "n''<{na}{virama}({ya};"
|
||||
+ "n<{na}(&;"
|
||||
+ "na<{na};"
|
||||
+ "ph<{pha}(&;"
|
||||
+ "pha<{pha};"
|
||||
+ "p''<{pa}{virama}({ha};"
|
||||
+ "p<{pa}(&;"
|
||||
+ "pa<{pa};"
|
||||
+ "bh<{bha}(&;"
|
||||
+ "bha<{bha};"
|
||||
+ "b''<{ba}{virama}({ha};"
|
||||
+ "b<{ba}(&;"
|
||||
+ "ba<{ba};"
|
||||
+ "m''<{ma}{virama}({ma};"
|
||||
+ "m''<{ma}{virama}({bindu};"
|
||||
+ "m<{ma}(&;"
|
||||
+ "ma<{ma};"
|
||||
+ "y<{ya}(&;"
|
||||
+ "ya<{ya};"
|
||||
+ "r''<{ra}{virama}({ha};"
|
||||
+ "r<{ra}(&;"
|
||||
+ "ra<{ra};"
|
||||
+ "l''<{la}{virama}({ha};"
|
||||
+ "l<{la}(&;"
|
||||
+ "la<{la};"
|
||||
+ "v<{va}(&;"
|
||||
+ "va<{va};"
|
||||
+ "sh<{sha}(&;"
|
||||
+ "sha<{sha};"
|
||||
+ "ss<{ssa}(&;"
|
||||
+ "ssa<{ssa};"
|
||||
+ "s''<{sa}{virama}({ha};"
|
||||
+ "s''<{sa}{virama}({sha};"
|
||||
+ "s''<{sa}{virama}({ssa};"
|
||||
+ "s''<{sa}{virama}({sa};"
|
||||
+ "s<{sa}(&;"
|
||||
+ "sa<{sa};"
|
||||
+ "h<{ha}(&;"
|
||||
+ "ha<{ha};"
|
||||
+ "kh<$kha}&;"
|
||||
+ "kha<$kha;"
|
||||
+ "k''<$ka$virama}$ha;"
|
||||
+ "k<$ka}&;"
|
||||
+ "ka<$ka;"
|
||||
+ "gh<$gha}&;"
|
||||
+ "gha<$gha;"
|
||||
+ "g''<$ga$virama}$ha;"
|
||||
+ "g<$ga}&;"
|
||||
+ "ga<$ga;"
|
||||
+ "ng<$nga}&;"
|
||||
+ "nga<$nga;"
|
||||
+ "ch<$cha}&;"
|
||||
+ "cha<$cha;"
|
||||
+ "c''<$ca$virama}$ha;"
|
||||
+ "c<$ca}&;"
|
||||
+ "ca<$ca;"
|
||||
+ "jh<$jha}&;"
|
||||
+ "jha<$jha;"
|
||||
+ "j''<$ja$virama}$ha;"
|
||||
+ "j<$ja}&;"
|
||||
+ "ja<$ja;"
|
||||
+ "ny<$nya}&;"
|
||||
+ "nya<$nya;"
|
||||
+ "tth<$ttha}&;"
|
||||
+ "ttha<$ttha;"
|
||||
+ "tt''<$tta$virama}$ha;"
|
||||
+ "tt<$tta}&;"
|
||||
+ "tta<$tta;"
|
||||
+ "ddh<$ddha}&;"
|
||||
+ "ddha<$ddha;"
|
||||
+ "dd''<$dda}&$ha;"
|
||||
+ "dd<$dda}&;"
|
||||
+ "dda<$dda;"
|
||||
+ "dh<$dha}&;"
|
||||
+ "dha<$dha;"
|
||||
+ "d''<$da$virama}$ha;"
|
||||
+ "d''<$da$virama}$ddha;"
|
||||
+ "d''<$da$virama}$dda;"
|
||||
+ "d''<$da$virama}$dha;"
|
||||
+ "d''<$da$virama}$da;"
|
||||
+ "d<$da}&;"
|
||||
+ "da<$da;"
|
||||
+ "th<$tha}&;"
|
||||
+ "tha<$tha;"
|
||||
+ "t''<$ta$virama}$ha;"
|
||||
+ "t''<$ta$virama}$ttha;"
|
||||
+ "t''<$ta$virama}$tta;"
|
||||
+ "t''<$ta$virama}$tha;"
|
||||
+ "t''<$ta$virama}$ta;"
|
||||
+ "t<$ta}&;"
|
||||
+ "ta<$ta;"
|
||||
+ "n''<$na$virama}$ga;"
|
||||
+ "n''<$na$virama}$ya;"
|
||||
+ "n<$na}&;"
|
||||
+ "na<$na;"
|
||||
+ "ph<$pha}&;"
|
||||
+ "pha<$pha;"
|
||||
+ "p''<$pa$virama}$ha;"
|
||||
+ "p<$pa}&;"
|
||||
+ "pa<$pa;"
|
||||
+ "bh<$bha}&;"
|
||||
+ "bha<$bha;"
|
||||
+ "b''<$ba$virama}$ha;"
|
||||
+ "b<$ba}&;"
|
||||
+ "ba<$ba;"
|
||||
+ "m''<$ma$virama}$ma;"
|
||||
+ "m''<$ma$virama}$bindu;"
|
||||
+ "m<$ma}&;"
|
||||
+ "ma<$ma;"
|
||||
+ "y<$ya}&;"
|
||||
+ "ya<$ya;"
|
||||
+ "r''<$ra$virama}$ha;"
|
||||
+ "r<$ra}&;"
|
||||
+ "ra<$ra;"
|
||||
+ "l''<$la$virama}$ha;"
|
||||
+ "l<$la}&;"
|
||||
+ "la<$la;"
|
||||
+ "v<$va}&;"
|
||||
+ "va<$va;"
|
||||
+ "sh<$sha}&;"
|
||||
+ "sha<$sha;"
|
||||
+ "ss<$ssa}&;"
|
||||
+ "ssa<$ssa;"
|
||||
+ "s''<$sa$virama}$ha;"
|
||||
+ "s''<$sa$virama}$sha;"
|
||||
+ "s''<$sa$virama}$ssa;"
|
||||
+ "s''<$sa$virama}$sa;"
|
||||
+ "s<$sa}&;"
|
||||
+ "sa<$sa;"
|
||||
+ "h<$ha}&;"
|
||||
+ "ha<$ha;"
|
||||
|
||||
// dependent vowels (should never occur except following consonants)
|
||||
|
||||
+ "aa<{aa};"
|
||||
+ "ai<{ai};"
|
||||
+ "au<{au};"
|
||||
+ "ii<{ii};"
|
||||
+ "i<{i};"
|
||||
+ "uu<{uu};"
|
||||
+ "u<{u};"
|
||||
+ "rrh<{rrh};"
|
||||
+ "rh<{rh};"
|
||||
+ "lh<{lh};"
|
||||
+ "e<{e};"
|
||||
+ "o<{o};"
|
||||
+ "aa<$aa;"
|
||||
+ "ai<$ai;"
|
||||
+ "au<$au;"
|
||||
+ "ii<$ii;"
|
||||
+ "i<$i;"
|
||||
+ "uu<$uu;"
|
||||
+ "u<$u;"
|
||||
+ "rrh<$rrh;"
|
||||
+ "rh<$rh;"
|
||||
+ "lh<$lh;"
|
||||
+ "e<$e;"
|
||||
+ "o<$o;"
|
||||
|
||||
// independent vowels (when following consonants)
|
||||
|
||||
+ "''aa<a){waa};"
|
||||
+ "''aa<%){waa};"
|
||||
+ "''ai<a){wai};"
|
||||
+ "''ai<%){wai};"
|
||||
+ "''au<a){wau};"
|
||||
+ "''au<%){wau};"
|
||||
+ "''ii<a){wii};"
|
||||
+ "''ii<%){wii};"
|
||||
+ "''i<a){wi};"
|
||||
+ "''i<%){wi};"
|
||||
+ "''uu<a){wuu};"
|
||||
+ "''uu<%){wuu};"
|
||||
+ "''u<a){wu};"
|
||||
+ "''u<%){wu};"
|
||||
+ "''rrh<%){wrr};"
|
||||
+ "''rh<%){wr};"
|
||||
+ "''lh<%){wl};"
|
||||
+ "''e<%){we};"
|
||||
+ "''o<%){wo};"
|
||||
+ "''a<a){wa};"
|
||||
+ "''a<%){wa};"
|
||||
+ "''aa<a{$waa;"
|
||||
+ "''aa<%{$waa;"
|
||||
+ "''ai<a{$wai;"
|
||||
+ "''ai<%{$wai;"
|
||||
+ "''au<a{$wau;"
|
||||
+ "''au<%{$wau;"
|
||||
+ "''ii<a{$wii;"
|
||||
+ "''ii<%{$wii;"
|
||||
+ "''i<a{$wi;"
|
||||
+ "''i<%{$wi;"
|
||||
+ "''uu<a{$wuu;"
|
||||
+ "''uu<%{$wuu;"
|
||||
+ "''u<a{$wu;"
|
||||
+ "''u<%{$wu;"
|
||||
+ "''rrh<%{$wrr;"
|
||||
+ "''rh<%{$wr;"
|
||||
+ "''lh<%{$wl;"
|
||||
+ "''e<%{$we;"
|
||||
+ "''o<%{$wo;"
|
||||
+ "''a<a{$wa;"
|
||||
+ "''a<%{$wa;"
|
||||
|
||||
|
||||
// independent vowels (otherwise)
|
||||
|
||||
+ "aa<{waa};"
|
||||
+ "ai<{wai};"
|
||||
+ "au<{wau};"
|
||||
+ "ii<{wii};"
|
||||
+ "i<{wi};"
|
||||
+ "uu<{wuu};"
|
||||
+ "u<{wu};"
|
||||
+ "rrh<{wrr};"
|
||||
+ "rh<{wr};"
|
||||
+ "lh<{wl};"
|
||||
+ "e<{we};"
|
||||
+ "o<{wo};"
|
||||
+ "a<{wa};"
|
||||
+ "aa<$waa;"
|
||||
+ "ai<$wai;"
|
||||
+ "au<$wau;"
|
||||
+ "ii<$wii;"
|
||||
+ "i<$wi;"
|
||||
+ "uu<$wuu;"
|
||||
+ "u<$wu;"
|
||||
+ "rrh<$wrr;"
|
||||
+ "rh<$wr;"
|
||||
+ "lh<$wl;"
|
||||
+ "e<$we;"
|
||||
+ "o<$wo;"
|
||||
+ "a<$wa;"
|
||||
|
||||
// blow away any remaining viramas
|
||||
|
||||
+ "<{virama};"
|
||||
+ "<$virama;"
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/resources/Attic/TransliterationRule_Latin_Greek.java,v $
|
||||
* $Date: 2000/03/10 04:07:31 $
|
||||
* $Revision: 1.2 $
|
||||
* $Date: 2000/04/21 21:17:08 $
|
||||
* $Revision: 1.3 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
@ -55,132 +55,134 @@ public class TransliterationRule_Latin_Greek extends ListResourceBundle {
|
||||
// Variables, used to make the rules more comprehensible
|
||||
// and for conditionals.
|
||||
// ==============================================
|
||||
|
||||
+ "$quote=\";"
|
||||
|
||||
// Latin Letters
|
||||
|
||||
+ "E-MACRON=\u0112;"
|
||||
+ "e-macron=\u0113;"
|
||||
+ "O-MACRON=\u014C;"
|
||||
+ "o-macron=\u014D;"
|
||||
+ "Y-UMLAUT=\u0178;"
|
||||
+ "y-umlaut=\u00FF;"
|
||||
+ "$E_MACRON=\u0112;"
|
||||
+ "$e_macron=\u0113;"
|
||||
+ "$O_MACRON=\u014C;"
|
||||
+ "$o_macron=\u014D;"
|
||||
+ "$Y_UMLAUT=\u0178;"
|
||||
+ "$y_umlaut=\u00FF;"
|
||||
|
||||
//! // with real accents.
|
||||
//! + "E-MACRON-ACUTE=\u0112\u0301;"
|
||||
//! + "e-macron-acute=\u0113\u0301;"
|
||||
//! + "O-MACRON-ACUTE=\u014C\u0301;"
|
||||
//! + "o-macron-acute=\u014D\u0301;"
|
||||
//! + "y-umlaut-acute=\u00FF\u0301;"
|
||||
//! + "\u00ef-acute=\u00ef\u0301;"
|
||||
//! + "\u00fc-acute=\u00fc\u0301;"
|
||||
//! + "$E_MACRON_ACUTE=\u0112\u0301;"
|
||||
//! + "$e_macron_acute=\u0113\u0301;"
|
||||
//! + "$O_MACRON_ACUTE=\u014C\u0301;"
|
||||
//! + "$o_macron_acute=\u014D\u0301;"
|
||||
//! + "$y_umlaut_acute=\u00FF\u0301;"
|
||||
//! + "$u00ef_acute=\u00ef\u0301;"
|
||||
//! + "$u00fc_acute=\u00fc\u0301;"
|
||||
//! //
|
||||
|
||||
// single letter equivalents
|
||||
|
||||
+ "E-MACRON-ACUTE=\u00CA;"
|
||||
+ "e-macron-acute=\u00EA;"
|
||||
+ "O-MACRON-ACUTE=\u00D4;"
|
||||
+ "o-macron-acute=\u00F4;"
|
||||
+ "y-umlaut-acute=\u0177;"
|
||||
+ "\u00ef-acute=\u00EE;"
|
||||
+ "\u00fc-acute=\u00FB;"
|
||||
+ "$E_MACRON_ACUTE=\u00CA;"
|
||||
+ "$e_macron_acute=\u00EA;"
|
||||
+ "$O_MACRON_ACUTE=\u00D4;"
|
||||
+ "$o_macron_acute=\u00F4;"
|
||||
+ "$y_umlaut_acute=\u0177;"
|
||||
+ "$u00ef_acute=\u00EE;"
|
||||
+ "$u00fc_acute=\u00FB;"
|
||||
|
||||
// Greek Letters
|
||||
|
||||
+ "ALPHA=\u0391;"
|
||||
+ "BETA=\u0392;"
|
||||
+ "GAMMA=\u0393;"
|
||||
+ "DELTA=\u0394;"
|
||||
+ "EPSILON=\u0395;"
|
||||
+ "ZETA=\u0396;"
|
||||
+ "ETA=\u0397;"
|
||||
+ "THETA=\u0398;"
|
||||
+ "IOTA=\u0399;"
|
||||
+ "KAPPA=\u039A;"
|
||||
+ "LAMBDA=\u039B;"
|
||||
+ "MU=\u039C;"
|
||||
+ "NU=\u039D;"
|
||||
+ "XI=\u039E;"
|
||||
+ "OMICRON=\u039F;"
|
||||
+ "PI=\u03A0;"
|
||||
+ "RHO=\u03A1;"
|
||||
+ "SIGMA=\u03A3;"
|
||||
+ "TAU=\u03A4;"
|
||||
+ "YPSILON=\u03A5;"
|
||||
+ "PHI=\u03A6;"
|
||||
+ "CHI=\u03A7;"
|
||||
+ "PSI=\u03A8;"
|
||||
+ "OMEGA=\u03A9;"
|
||||
+ "$ALPHA=\u0391;"
|
||||
+ "$BETA=\u0392;"
|
||||
+ "$GAMMA=\u0393;"
|
||||
+ "$DELTA=\u0394;"
|
||||
+ "$EPSILON=\u0395;"
|
||||
+ "$ZETA=\u0396;"
|
||||
+ "$ETA=\u0397;"
|
||||
+ "$THETA=\u0398;"
|
||||
+ "$IOTA=\u0399;"
|
||||
+ "$KAPPA=\u039A;"
|
||||
+ "$LAMBDA=\u039B;"
|
||||
+ "$MU=\u039C;"
|
||||
+ "$NU=\u039D;"
|
||||
+ "$XI=\u039E;"
|
||||
+ "$OMICRON=\u039F;"
|
||||
+ "$PI=\u03A0;"
|
||||
+ "$RHO=\u03A1;"
|
||||
+ "$SIGMA=\u03A3;"
|
||||
+ "$TAU=\u03A4;"
|
||||
+ "$YPSILON=\u03A5;"
|
||||
+ "$PHI=\u03A6;"
|
||||
+ "$CHI=\u03A7;"
|
||||
+ "$PSI=\u03A8;"
|
||||
+ "$OMEGA=\u03A9;"
|
||||
|
||||
+ "ALPHA+=\u0386;"
|
||||
+ "EPSILON+=\u0388;"
|
||||
+ "ETA+=\u0389;"
|
||||
+ "IOTA+=\u038A;"
|
||||
+ "OMICRON+=\u038C;"
|
||||
+ "YPSILON+=\u038E;"
|
||||
+ "OMEGA+=\u038F;"
|
||||
+ "IOTA_DIAERESIS=\u03AA;"
|
||||
+ "YPSILON_DIAERESIS=\u03AB;"
|
||||
+ "$ALPHA2=\u0386;"
|
||||
+ "$EPSILON2=\u0388;"
|
||||
+ "$ETA2=\u0389;"
|
||||
+ "$IOTA2=\u038A;"
|
||||
+ "$OMICRON2=\u038C;"
|
||||
+ "$YPSILON2=\u038E;"
|
||||
+ "$OMEGA2=\u038F;"
|
||||
+ "$IOTA_DIAERESIS=\u03AA;"
|
||||
+ "$YPSILON_DIAERESIS=\u03AB;"
|
||||
|
||||
+ "alpha=\u03B1;"
|
||||
+ "beta=\u03B2;"
|
||||
+ "gamma=\u03B3;"
|
||||
+ "delta=\u03B4;"
|
||||
+ "epsilon=\u03B5;"
|
||||
+ "zeta=\u03B6;"
|
||||
+ "eta=\u03B7;"
|
||||
+ "theta=\u03B8;"
|
||||
+ "iota=\u03B9;"
|
||||
+ "kappa=\u03BA;"
|
||||
+ "lambda=\u03BB;"
|
||||
+ "mu=\u03BC;"
|
||||
+ "nu=\u03BD;"
|
||||
+ "xi=\u03BE;"
|
||||
+ "omicron=\u03BF;"
|
||||
+ "pi=\u03C0;"
|
||||
+ "rho=\u03C1;"
|
||||
+ "sigma=\u03C3;"
|
||||
+ "tau=\u03C4;"
|
||||
+ "ypsilon=\u03C5;"
|
||||
+ "phi=\u03C6;"
|
||||
+ "chi=\u03C7;"
|
||||
+ "psi=\u03C8;"
|
||||
+ "omega=\u03C9;"
|
||||
+ "$alpha=\u03B1;"
|
||||
+ "$beta=\u03B2;"
|
||||
+ "$gamma=\u03B3;"
|
||||
+ "$delta=\u03B4;"
|
||||
+ "$epsilon=\u03B5;"
|
||||
+ "$zeta=\u03B6;"
|
||||
+ "$eta=\u03B7;"
|
||||
+ "$theta=\u03B8;"
|
||||
+ "$iota=\u03B9;"
|
||||
+ "$kappa=\u03BA;"
|
||||
+ "$lambda=\u03BB;"
|
||||
+ "$mu=\u03BC;"
|
||||
+ "$nu=\u03BD;"
|
||||
+ "$xi=\u03BE;"
|
||||
+ "$omicron=\u03BF;"
|
||||
+ "$pi=\u03C0;"
|
||||
+ "$rho=\u03C1;"
|
||||
+ "$sigma=\u03C3;"
|
||||
+ "$tau=\u03C4;"
|
||||
+ "$ypsilon=\u03C5;"
|
||||
+ "$phi=\u03C6;"
|
||||
+ "$chi=\u03C7;"
|
||||
+ "$psi=\u03C8;"
|
||||
+ "$omega=\u03C9;"
|
||||
|
||||
//forms
|
||||
|
||||
+ "alpha+=\u03AC;"
|
||||
+ "epsilon+=\u03AD;"
|
||||
+ "eta+=\u03AE;"
|
||||
+ "iota+=\u03AF;"
|
||||
+ "omicron+=\u03CC;"
|
||||
+ "ypsilon+=\u03CD;"
|
||||
+ "omega+=\u03CE;"
|
||||
+ "iota_diaeresis=\u03CA;"
|
||||
+ "ypsilon_diaeresis=\u03CB;"
|
||||
+ "iota_diaeresis+=\u0390;"
|
||||
+ "ypsilon_diaeresis+=\u03B0;"
|
||||
+ "sigma+=\u03C2;"
|
||||
+ "$alpha2=\u03AC;"
|
||||
+ "$epsilon2=\u03AD;"
|
||||
+ "$eta2=\u03AE;"
|
||||
+ "$iota2=\u03AF;"
|
||||
+ "$omicron2=\u03CC;"
|
||||
+ "$ypsilon2=\u03CD;"
|
||||
+ "$omega2=\u03CE;"
|
||||
+ "$iota_diaeresis=\u03CA;"
|
||||
+ "$ypsilon_diaeresis=\u03CB;"
|
||||
+ "$iota_diaeresis2=\u0390;"
|
||||
+ "$ypsilon_diaeresis2=\u03B0;"
|
||||
+ "$sigma2=\u03C2;"
|
||||
|
||||
// Variables for conditional mappings
|
||||
|
||||
// Use lowercase for all variable names, to allow cut/paste below.
|
||||
|
||||
+ "letter=[~[:Lu:][:Ll:]];"
|
||||
+ "lower=[[:Ll:]];"
|
||||
+ "softener=[eiyEIY];"
|
||||
+ "vowel=[aeiouAEIOU"
|
||||
+ "{ALPHA}{EPSILON}{ETA}{IOTA}{OMICRON}{YPSILON}{OMEGA}"
|
||||
+ "{ALPHA+}{EPSILON+}{ETA+}{IOTA+}{OMICRON+}{YPSILON+}{OMEGA+}"
|
||||
+ "{IOTA_DIAERESIS}{YPSILON_DIAERESIS}"
|
||||
+ "{alpha}{epsilon}{eta}{iota}{omicron}{ypsilon}{omega}"
|
||||
+ "{alpha+}{epsilon+}{eta+}{iota+}{omicron+}{ypsilon+}{omega+}"
|
||||
+ "{iota_diaeresis}{ypsilon_diaeresis}"
|
||||
+ "{iota_diaeresis+}{ypsilon_diaeresis+}"
|
||||
+ "$letter=[~[:Lu:][:Ll:]];"
|
||||
+ "$lower=[[:Ll:]];"
|
||||
+ "$softener=[eiyEIY];"
|
||||
+ "$vowel=[aeiouAEIOU"
|
||||
+ "$ALPHA$EPSILON$ETA$IOTA$OMICRON$YPSILON$OMEGA"
|
||||
+ "$ALPHA2$EPSILON2$ETA2$IOTA2$OMICRON2$YPSILON2$OMEGA2"
|
||||
+ "$IOTA_DIAERESIS$YPSILON_DIAERESIS"
|
||||
+ "$alpha$epsilon$eta$iota$omicron$ypsilon$omega"
|
||||
+ "$alpha2$epsilon2$eta2$iota2$omicron2$ypsilon2$omega2"
|
||||
+ "$iota_diaeresis$ypsilon_diaeresis"
|
||||
+ "$iota_diaeresis2$ypsilon_diaeresis2"
|
||||
+ "];"
|
||||
+ "n-gamma=[GKXCgkxc];"
|
||||
+ "gamma-n=[{GAMMA}{KAPPA}{CHI}{XI}{gamma}{kappa}{chi}{xi}];"
|
||||
+ "pp=[Pp];"
|
||||
+ "$n_gamma=[GKXCgkxc];"
|
||||
+ "$gamma_n=[$GAMMA$KAPPA$CHI$XI$gamma$kappa$chi$xi];"
|
||||
+ "$pp=[Pp];"
|
||||
|
||||
// ==============================================
|
||||
// Rules
|
||||
@ -189,10 +191,10 @@ public class TransliterationRule_Latin_Greek extends ListResourceBundle {
|
||||
// not be copied when duplicating the lowercase
|
||||
// ==============================================
|
||||
|
||||
+ "Th <> {THETA}({lower};"
|
||||
+ "Ph <> {PHI}({lower};"
|
||||
+ "Ch <> {CHI}({lower};"
|
||||
//masked: + "Ps<{PHI}({lower};"
|
||||
+ "Th <> $THETA}$lower;"
|
||||
+ "Ph <> $PHI}$lower;"
|
||||
+ "Ch <> $CHI}$lower;"
|
||||
//masked: + "Ps<$PHI}$lower;"
|
||||
|
||||
// Because there is no uppercase forms for final sigma,
|
||||
// we had to move all the sigma rules up here.
|
||||
@ -203,8 +205,8 @@ public class TransliterationRule_Latin_Greek extends ListResourceBundle {
|
||||
|
||||
// use special form for s
|
||||
|
||||
+ "''S <> ({pp}) {SIGMA} ;" // handle PS
|
||||
+ "S <> {SIGMA};"
|
||||
+ "''S <> $pp{$SIGMA;" // handle PS
|
||||
+ "S <> $SIGMA;"
|
||||
|
||||
// The following are a bit tricky. 's' takes two forms in greek
|
||||
// final or non final.
|
||||
@ -213,29 +215,29 @@ public class TransliterationRule_Latin_Greek extends ListResourceBundle {
|
||||
// We use 's to separate p and s (otherwise ps is one letter)
|
||||
// so, we break out the following forms:
|
||||
|
||||
+ "''s < ({pp}) {sigma} ({letter});"
|
||||
+ "s < {sigma} ({letter});"
|
||||
+ "~s < {sigma} ;"
|
||||
+ "''s < $pp{$sigma}$letter;"
|
||||
+ "s < $sigma}$letter;"
|
||||
+ "~s < $sigma;"
|
||||
|
||||
+ "~s < {sigma+} ({letter});"
|
||||
+ "''s < ({pp}) {sigma+} ;"
|
||||
+ "s < {sigma+} ;"
|
||||
+ "~s < $sigma2}$letter;"
|
||||
+ "''s < $pp{$sigma2;"
|
||||
+ "s < $sigma2;"
|
||||
|
||||
+ "~s ({letter}) > {sigma+};"
|
||||
+ "~s > {sigma};"
|
||||
+ "''s ({letter}) > {sigma};"
|
||||
+ "''s > {sigma+};"
|
||||
+ "s ({letter}) > {sigma};"
|
||||
+ "s > {sigma+};"
|
||||
+ "~s }$letter>$sigma2;"
|
||||
+ "~s > $sigma;"
|
||||
+ "''s }$letter>$sigma;"
|
||||
+ "''s > $sigma2;"
|
||||
+ "s }$letter>$sigma;"
|
||||
+ "s > $sigma2;"
|
||||
|
||||
// because there are no uppercase forms, had to move these up too.
|
||||
|
||||
+ "i\"`>{iota_diaeresis+};"
|
||||
+ "y\"`>{ypsilon_diaeresis+};"
|
||||
+ "i$quote`>$iota_diaeresis2;"
|
||||
+ "y$quote`>$ypsilon_diaeresis2;"
|
||||
|
||||
+ "{\u00ef-acute} <> {iota_diaeresis+};"
|
||||
+ "{\u00fc-acute} <> {vowel}){ypsilon_diaeresis+};"
|
||||
+ "{y-umlaut-acute} <> {ypsilon_diaeresis+};"
|
||||
+ "$u00ef_acute<>$iota_diaeresis2;"
|
||||
+ "$u00fc_acute<>$vowel{$ypsilon_diaeresis2;"
|
||||
+ "$y_umlaut_acute<>$ypsilon_diaeresis2;"
|
||||
|
||||
// ==============================================
|
||||
// Uppercase Forms.
|
||||
@ -244,62 +246,62 @@ public class TransliterationRule_Latin_Greek extends ListResourceBundle {
|
||||
|
||||
// Typing variants, in case the keyboard doesn't have accents
|
||||
|
||||
+ "A`>{ALPHA+};"
|
||||
+ "E`>{EPSILON+};"
|
||||
+ "EE`>{ETA+};"
|
||||
+ "EE>{ETA};"
|
||||
+ "I`>{IOTA+};"
|
||||
+ "O`>{OMICRON+};"
|
||||
+ "OO`>{OMEGA+};"
|
||||
+ "OO>{OMEGA};"
|
||||
+ "I\">{IOTA_DIAERESIS};"
|
||||
+ "Y\">{YPSILON_DIAERESIS};"
|
||||
+ "A`>$ALPHA2;"
|
||||
+ "E`>$EPSILON2;"
|
||||
+ "EE`>$ETA2;"
|
||||
+ "EE>$ETA;"
|
||||
+ "I`>$IOTA2;"
|
||||
+ "O`>$OMICRON2;"
|
||||
+ "OO`>$OMEGA2;"
|
||||
+ "OO>$OMEGA;"
|
||||
+ "I$quote>$IOTA_DIAERESIS;"
|
||||
+ "Y$quote>$YPSILON_DIAERESIS;"
|
||||
|
||||
// Basic Letters
|
||||
|
||||
+ "A<>{ALPHA};"
|
||||
+ "\u00c1<>{ALPHA+};"
|
||||
+ "B<>{BETA};"
|
||||
+ "N ({n-gamma}) <> {GAMMA} ({gamma-n});"
|
||||
+ "G<>{GAMMA};"
|
||||
+ "D<>{DELTA};"
|
||||
+ "''E <> ([Ee]){EPSILON};" // handle EE
|
||||
+ "E<>{EPSILON};"
|
||||
+ "\u00c9<>{EPSILON+};"
|
||||
+ "Z<>{ZETA};"
|
||||
+ "{E-MACRON-ACUTE}<>{ETA+};"
|
||||
+ "{E-MACRON}<>{ETA};"
|
||||
+ "TH<>{THETA};"
|
||||
+ "I<>{IOTA};"
|
||||
+ "\u00cd<>{IOTA+};"
|
||||
+ "\u00cf<>{IOTA_DIAERESIS};"
|
||||
+ "K<>{KAPPA};"
|
||||
+ "L<>{LAMBDA};"
|
||||
+ "M<>{MU};"
|
||||
+ "N'' <> {NU} ({gamma-n});"
|
||||
+ "N<>{NU};"
|
||||
+ "X<>{XI};"
|
||||
+ "''O <> ([Oo]) {OMICRON};" // handle OO
|
||||
+ "O<>{OMICRON};"
|
||||
+ "\u00d3<>{OMICRON+};"
|
||||
+ "PH<>{PHI};" // needs ordering before P
|
||||
+ "PS<>{PSI};" // needs ordering before P
|
||||
+ "P<>{PI};"
|
||||
+ "R<>{RHO};"
|
||||
+ "T<>{TAU};"
|
||||
+ "U <> ({vowel}) {YPSILON};"
|
||||
+ "\u00da <> ({vowel}) {YPSILON+};"
|
||||
+ "\u00dc <> ({vowel}) {YPSILON_DIAERESIS};"
|
||||
+ "Y<>{YPSILON};"
|
||||
+ "\u00dd<>{YPSILON+};"
|
||||
+ "{Y-UMLAUT}<>{YPSILON_DIAERESIS};"
|
||||
+ "CH<>{CHI};"
|
||||
+ "{O-MACRON-ACUTE}<>{OMEGA+};"
|
||||
+ "{O-MACRON}<>{OMEGA};"
|
||||
+ "A<>$ALPHA;"
|
||||
+ "\u00c1<>$ALPHA2;"
|
||||
+ "B<>$BETA;"
|
||||
+ "N }$n_gamma<>$GAMMA}$gamma_n;"
|
||||
+ "G<>$GAMMA;"
|
||||
+ "D<>$DELTA;"
|
||||
+ "''E <> [Ee]{$EPSILON;" // handle EE
|
||||
+ "E<>$EPSILON;"
|
||||
+ "\u00c9<>$EPSILON2;"
|
||||
+ "Z<>$ZETA;"
|
||||
+ "$E_MACRON_ACUTE<>$ETA2;"
|
||||
+ "$E_MACRON<>$ETA;"
|
||||
+ "TH<>$THETA;"
|
||||
+ "I<>$IOTA;"
|
||||
+ "\u00cd<>$IOTA2;"
|
||||
+ "\u00cf<>$IOTA_DIAERESIS;"
|
||||
+ "K<>$KAPPA;"
|
||||
+ "L<>$LAMBDA;"
|
||||
+ "M<>$MU;"
|
||||
+ "N'' <> $NU}$gamma_n;"
|
||||
+ "N<>$NU;"
|
||||
+ "X<>$XI;"
|
||||
+ "''O <> [Oo]{ $OMICRON;" // handle OO
|
||||
+ "O<>$OMICRON;"
|
||||
+ "\u00d3<>$OMICRON2;"
|
||||
+ "PH<>$PHI;" // needs ordering before P
|
||||
+ "PS<>$PSI;" // needs ordering before P
|
||||
+ "P<>$PI;"
|
||||
+ "R<>$RHO;"
|
||||
+ "T<>$TAU;"
|
||||
+ "U <> $vowel{$YPSILON;"
|
||||
+ "\u00da <> $vowel{$YPSILON2;"
|
||||
+ "\u00dc <> $vowel{$YPSILON_DIAERESIS;"
|
||||
+ "Y<>$YPSILON;"
|
||||
+ "\u00dd<>$YPSILON2;"
|
||||
+ "$Y_UMLAUT<>$YPSILON_DIAERESIS;"
|
||||
+ "CH<>$CHI;"
|
||||
+ "$O_MACRON_ACUTE<>$OMEGA2;"
|
||||
+ "$O_MACRON<>$OMEGA;"
|
||||
|
||||
// Extra English Letters. Mapped for completeness
|
||||
|
||||
+ "C({softener})>|S;"
|
||||
+ "C}$softener>|S;"
|
||||
+ "C>|K;"
|
||||
+ "F>|PH;"
|
||||
+ "H>|CH;"
|
||||
@ -314,62 +316,62 @@ public class TransliterationRule_Latin_Greek extends ListResourceBundle {
|
||||
|
||||
// typing variants, in case the keyboard doesn't have accents
|
||||
|
||||
+ "a`>{alpha+};"
|
||||
+ "e`>{epsilon+};"
|
||||
+ "ee`>{eta+};"
|
||||
+ "ee>{eta};"
|
||||
+ "i`>{iota+};"
|
||||
+ "o`>{omicron+};"
|
||||
+ "oo`>{omega+};"
|
||||
+ "oo>{omega};"
|
||||
+ "i\">{iota_diaeresis};"
|
||||
+ "y\">{ypsilon_diaeresis};"
|
||||
+ "a`>$alpha2;"
|
||||
+ "e`>$epsilon2;"
|
||||
+ "ee`>$eta2;"
|
||||
+ "ee>$eta;"
|
||||
+ "i`>$iota2;"
|
||||
+ "o`>$omicron2;"
|
||||
+ "oo`>$omega2;"
|
||||
+ "oo>$omega;"
|
||||
+ "i$quote>$iota_diaeresis;"
|
||||
+ "y$quote>$ypsilon_diaeresis;"
|
||||
|
||||
// basic letters
|
||||
|
||||
+ "a<>{alpha};"
|
||||
+ "\u00e1<>{alpha+};"
|
||||
+ "b<>{beta};"
|
||||
+ "n ({n-gamma}) <> {gamma} ({gamma-n});"
|
||||
+ "g<>{gamma};"
|
||||
+ "d<>{delta};"
|
||||
+ "''e <> ([Ee]){epsilon};" // handle EE
|
||||
+ "e<>{epsilon};"
|
||||
+ "\u00e9<>{epsilon+};"
|
||||
+ "z<>{zeta};"
|
||||
+ "{e-macron-acute}<>{eta+};"
|
||||
+ "{e-macron}<>{eta};"
|
||||
+ "th<>{theta};"
|
||||
+ "i<>{iota};"
|
||||
+ "\u00ed<>{iota+};"
|
||||
+ "\u00ef<>{iota_diaeresis};"
|
||||
+ "k<>{kappa};"
|
||||
+ "l<>{lambda};"
|
||||
+ "m<>{mu};"
|
||||
+ "n'' <> {nu} ({gamma-n});"
|
||||
+ "n<>{nu};"
|
||||
+ "x<>{xi};"
|
||||
+ "''o <> ([Oo]) {omicron};" // handle OO
|
||||
+ "o<>{omicron};"
|
||||
+ "\u00f3<>{omicron+};"
|
||||
+ "ph<>{phi};" // needs ordering before p
|
||||
+ "ps<>{psi};" // needs ordering before p
|
||||
+ "p<>{pi};"
|
||||
+ "r<>{rho};"
|
||||
+ "t<>{tau};"
|
||||
+ "u <> ({vowel}){ypsilon};"
|
||||
+ "\u00fa <> ({vowel}){ypsilon+};"
|
||||
+ "\u00fc <> ({vowel}){ypsilon_diaeresis};"
|
||||
+ "y<>{ypsilon};"
|
||||
+ "\u00fd<>{ypsilon+};"
|
||||
+ "{y-umlaut}<>{ypsilon_diaeresis};"
|
||||
+ "ch<>{chi};"
|
||||
+ "{o-macron-acute}<>{omega+};"
|
||||
+ "{o-macron}<>{omega};"
|
||||
+ "a<>$alpha;"
|
||||
+ "\u00e1<>$alpha2;"
|
||||
+ "b<>$beta;"
|
||||
+ "n }$n_gamma<>$gamma}$gamma_n;"
|
||||
+ "g<>$gamma;"
|
||||
+ "d<>$delta;"
|
||||
+ "''e <> [Ee]{$epsilon;" // handle EE
|
||||
+ "e<>$epsilon;"
|
||||
+ "\u00e9<>$epsilon2;"
|
||||
+ "z<>$zeta;"
|
||||
+ "$e_macron_acute<>$eta2;"
|
||||
+ "$e_macron<>$eta;"
|
||||
+ "th<>$theta;"
|
||||
+ "i<>$iota;"
|
||||
+ "\u00ed<>$iota2;"
|
||||
+ "\u00ef<>$iota_diaeresis;"
|
||||
+ "k<>$kappa;"
|
||||
+ "l<>$lambda;"
|
||||
+ "m<>$mu;"
|
||||
+ "n'' <> $nu}$gamma_n;"
|
||||
+ "n<>$nu;"
|
||||
+ "x<>$xi;"
|
||||
+ "''o <> [Oo]{ $omicron;" // handle OO
|
||||
+ "o<>$omicron;"
|
||||
+ "\u00f3<>$omicron2;"
|
||||
+ "ph<>$phi;" // needs ordering before p
|
||||
+ "ps<>$psi;" // needs ordering before p
|
||||
+ "p<>$pi;"
|
||||
+ "r<>$rho;"
|
||||
+ "t<>$tau;"
|
||||
+ "u <> $vowel{$ypsilon;"
|
||||
+ "\u00fa <> $vowel{$ypsilon2;"
|
||||
+ "\u00fc <> $vowel{$ypsilon_diaeresis;"
|
||||
+ "y<>$ypsilon;"
|
||||
+ "\u00fd<>$ypsilon2;"
|
||||
+ "$y_umlaut<>$ypsilon_diaeresis;"
|
||||
+ "ch<>$chi;"
|
||||
+ "$o_macron_acute<>$omega2;"
|
||||
+ "$o_macron<>$omega;"
|
||||
|
||||
// extra english letters. mapped for completeness
|
||||
|
||||
+ "c({softener})>|s;"
|
||||
+ "c}$softener>|s;"
|
||||
+ "c>|k;"
|
||||
+ "f>|ph;"
|
||||
+ "h>|ch;"
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/resources/Attic/TransliterationRule_Latin_Hebrew.java,v $
|
||||
* $Date: 2000/03/10 04:07:31 $
|
||||
* $Revision: 1.2 $
|
||||
* $Date: 2000/04/21 21:17:08 $
|
||||
* $Revision: 1.3 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
@ -23,66 +23,66 @@ public class TransliterationRule_Latin_Hebrew extends ListResourceBundle {
|
||||
{ "Rule",
|
||||
//variable names, derived from the Unicode names.
|
||||
|
||||
"POINT_SHEVA=\u05B0;"
|
||||
+ "POINT_HATAF_SEGOL=\u05B1;"
|
||||
+ "POINT_HATAF_PATAH=\u05B2;"
|
||||
+ "POINT_HATAF_QAMATS=\u05B3;"
|
||||
+ "POINT_HIRIQ=\u05B4;"
|
||||
+ "POINT_TSERE=\u05B5;"
|
||||
+ "POINT_SEGOL=\u05B6;"
|
||||
+ "POINT_PATAH=\u05B7;"
|
||||
+ "POINT_QAMATS=\u05B8;"
|
||||
+ "POINT_HOLAM=\u05B9;"
|
||||
+ "POINT_QUBUTS=\u05BB;"
|
||||
+ "POINT_DAGESH_OR_MAPIQ=\u05BC;"
|
||||
+ "POINT_METEG=\u05BD;"
|
||||
+ "PUNCTUATION_MAQAF=\u05BE;"
|
||||
+ "POINT_RAFE=\u05BF;"
|
||||
+ "PUNCTUATION_PASEQ=\u05C0;"
|
||||
+ "POINT_SHIN_DOT=\u05C1;"
|
||||
+ "POINT_SIN_DOT=\u05C2;"
|
||||
+ "PUNCTUATION_SOF_PASUQ=\u05C3;"
|
||||
+ "ALEF=\u05D0;"
|
||||
+ "BET=\u05D1;"
|
||||
+ "GIMEL=\u05D2;"
|
||||
+ "DALET=\u05D3;"
|
||||
+ "HE=\u05D4;"
|
||||
+ "VAV=\u05D5;"
|
||||
+ "ZAYIN=\u05D6;"
|
||||
+ "HET=\u05D7;"
|
||||
+ "TET=\u05D8;"
|
||||
+ "YOD=\u05D9;"
|
||||
+ "FINAL_KAF=\u05DA;"
|
||||
+ "KAF=\u05DB;"
|
||||
+ "LAMED=\u05DC;"
|
||||
+ "FINAL_MEM=\u05DD;"
|
||||
+ "MEM=\u05DE;"
|
||||
+ "FINAL_NUN=\u05DF;"
|
||||
+ "NUN=\u05E0;"
|
||||
+ "SAMEKH=\u05E1;"
|
||||
+ "AYIN=\u05E2;"
|
||||
+ "FINAL_PE=\u05E3;"
|
||||
+ "PE=\u05E4;"
|
||||
+ "FINAL_TSADI=\u05E5;"
|
||||
+ "TSADI=\u05E6;"
|
||||
+ "QOF=\u05E7;"
|
||||
+ "RESH=\u05E8;"
|
||||
+ "SHIN=\u05E9;"
|
||||
+ "TAV=\u05EA;"
|
||||
+ "YIDDISH_DOUBLE_VAV=\u05F0;"
|
||||
+ "YIDDISH_VAV_YOD=\u05F1;"
|
||||
+ "YIDDISH_DOUBLE_YOD=\u05F2;"
|
||||
+ "PUNCTUATION_GERESH=\u05F3;"
|
||||
+ "PUNCTUATION_GERSHAYIM=\u05F4;"
|
||||
"$POINT_SHEVA=\u05B0;"
|
||||
+ "$POINT_HATAF_SEGOL=\u05B1;"
|
||||
+ "$POINT_HATAF_PATAH=\u05B2;"
|
||||
+ "$POINT_HATAF_QAMATS=\u05B3;"
|
||||
+ "$POINT_HIRIQ=\u05B4;"
|
||||
+ "$POINT_TSERE=\u05B5;"
|
||||
+ "$POINT_SEGOL=\u05B6;"
|
||||
+ "$POINT_PATAH=\u05B7;"
|
||||
+ "$POINT_QAMATS=\u05B8;"
|
||||
+ "$POINT_HOLAM=\u05B9;"
|
||||
+ "$POINT_QUBUTS=\u05BB;"
|
||||
+ "$POINT_DAGESH_OR_MAPIQ=\u05BC;"
|
||||
+ "$POINT_METEG=\u05BD;"
|
||||
+ "$PUNCTUATION_MAQAF=\u05BE;"
|
||||
+ "$POINT_RAFE=\u05BF;"
|
||||
+ "$PUNCTUATION_PASEQ=\u05C0;"
|
||||
+ "$POINT_SHIN_DOT=\u05C1;"
|
||||
+ "$POINT_SIN_DOT=\u05C2;"
|
||||
+ "$PUNCTUATION_SOF_PASUQ=\u05C3;"
|
||||
+ "$ALEF=\u05D0;"
|
||||
+ "$BET=\u05D1;"
|
||||
+ "$GIMEL=\u05D2;"
|
||||
+ "$DALET=\u05D3;"
|
||||
+ "$HE=\u05D4;"
|
||||
+ "$VAV=\u05D5;"
|
||||
+ "$ZAYIN=\u05D6;"
|
||||
+ "$HET=\u05D7;"
|
||||
+ "$TET=\u05D8;"
|
||||
+ "$YOD=\u05D9;"
|
||||
+ "$FINAL_KAF=\u05DA;"
|
||||
+ "$KAF=\u05DB;"
|
||||
+ "$LAMED=\u05DC;"
|
||||
+ "$FINAL_MEM=\u05DD;"
|
||||
+ "$MEM=\u05DE;"
|
||||
+ "$FINAL_NUN=\u05DF;"
|
||||
+ "$NUN=\u05E0;"
|
||||
+ "$SAMEKH=\u05E1;"
|
||||
+ "$AYIN=\u05E2;"
|
||||
+ "$FINAL_PE=\u05E3;"
|
||||
+ "$PE=\u05E4;"
|
||||
+ "$FINAL_TSADI=\u05E5;"
|
||||
+ "$TSADI=\u05E6;"
|
||||
+ "$QOF=\u05E7;"
|
||||
+ "$RESH=\u05E8;"
|
||||
+ "$SHIN=\u05E9;"
|
||||
+ "$TAV=\u05EA;"
|
||||
+ "$YIDDISH_DOUBLE_VAV=\u05F0;"
|
||||
+ "$YIDDISH_VAV_YOD=\u05F1;"
|
||||
+ "$YIDDISH_DOUBLE_YOD=\u05F2;"
|
||||
+ "$PUNCTUATION_GERESH=\u05F3;"
|
||||
+ "$PUNCTUATION_GERSHAYIM=\u05F4;"
|
||||
|
||||
//wildcards
|
||||
//The values can be anything we don't use in this file: start at E000.
|
||||
|
||||
+ "letter=[abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ];"
|
||||
+ "$letter=[abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ];"
|
||||
|
||||
+ "softvowel=[eiyEIY];"
|
||||
+ "$softvowel=[eiyEIY];"
|
||||
|
||||
+ "vowellike=[{ALEF}{AYIN}{YOD}{VAV}];"
|
||||
+ "$vowellike=[$ALEF$AYIN$YOD$VAV];"
|
||||
|
||||
//?>{POINT_SHEVA}
|
||||
//?>{POINT_HATAF_SEGOL}
|
||||
@ -104,117 +104,117 @@ public class TransliterationRule_Latin_Hebrew extends ListResourceBundle {
|
||||
//?>{POINT_SIN_DOT}
|
||||
//?>{PUNCTUATION_SOF_PASUQ}
|
||||
|
||||
+ "a>{ALEF};"
|
||||
+ "A>{ALEF};"
|
||||
+ "a>$ALEF;"
|
||||
+ "A>$ALEF;"
|
||||
|
||||
+ "b>{BET};"
|
||||
+ "B>{BET};"
|
||||
+ "b>$BET;"
|
||||
+ "B>$BET;"
|
||||
|
||||
+ "c({softvowel}>{SAMEKH};"
|
||||
+ "C({softvowel}>{SAMEKH};"
|
||||
+ "c({letter}>{KAF};"
|
||||
+ "C({letter}>{KAF};"
|
||||
+ "c>{FINAL_KAF};"
|
||||
+ "C>{FINAL_KAF};"
|
||||
+ "c}$softvowel>$SAMEKH;"
|
||||
+ "C}$softvowel>$SAMEKH;"
|
||||
+ "c}$letter>$KAF;"
|
||||
+ "C}$letter>$KAF;"
|
||||
+ "c>$FINAL_KAF;"
|
||||
+ "C>$FINAL_KAF;"
|
||||
|
||||
+ "d>{DALET};"
|
||||
+ "D>{DALET};"
|
||||
+ "d>$DALET;"
|
||||
+ "D>$DALET;"
|
||||
|
||||
+ "e>{AYIN};"
|
||||
+ "E>{AYIN};"
|
||||
+ "e>$AYIN;"
|
||||
+ "E>$AYIN;"
|
||||
|
||||
+ "f({letter}>{PE};"
|
||||
+ "f>{FINAL_PE};"
|
||||
+ "F({letter}>{PE};"
|
||||
+ "F>{FINAL_PE};"
|
||||
+ "f}$letter>$PE;"
|
||||
+ "f>$FINAL_PE;"
|
||||
+ "F}$letter>$PE;"
|
||||
+ "F>$FINAL_PE;"
|
||||
|
||||
+ "g>{GIMEL};"
|
||||
+ "G>{GIMEL};"
|
||||
+ "g>$GIMEL;"
|
||||
+ "G>$GIMEL;"
|
||||
|
||||
+ "h>{HE};"
|
||||
+ "H>{HE};"
|
||||
+ "h>$HE;"
|
||||
+ "H>$HE;"
|
||||
|
||||
+ "i>{YOD};"
|
||||
+ "I>{YOD};"
|
||||
+ "i>$YOD;"
|
||||
+ "I>$YOD;"
|
||||
|
||||
+ "j>{DALET}{SHIN};"
|
||||
+ "J>{DALET}{SHIN};"
|
||||
+ "j>$DALET$SHIN;"
|
||||
+ "J>$DALET$SHIN;"
|
||||
|
||||
+ "kH>{HET};"
|
||||
+ "kh>{HET};"
|
||||
+ "KH>{HET};"
|
||||
+ "Kh>{HET};"
|
||||
+ "k({letter}>{KAF};"
|
||||
+ "K({letter}>{KAF};"
|
||||
+ "k>{FINAL_KAF};"
|
||||
+ "K>{FINAL_KAF};"
|
||||
+ "kH>$HET;"
|
||||
+ "kh>$HET;"
|
||||
+ "KH>$HET;"
|
||||
+ "Kh>$HET;"
|
||||
+ "k}$letter>$KAF;"
|
||||
+ "K}$letter>$KAF;"
|
||||
+ "k>$FINAL_KAF;"
|
||||
+ "K>$FINAL_KAF;"
|
||||
|
||||
+ "l>{LAMED};"
|
||||
+ "L>{LAMED};"
|
||||
+ "l>$LAMED;"
|
||||
+ "L>$LAMED;"
|
||||
|
||||
+ "m({letter}>{MEM};"
|
||||
+ "m>{FINAL_MEM};"
|
||||
+ "M({letter}>{MEM};"
|
||||
+ "M>{FINAL_MEM};"
|
||||
+ "m}$letter>$MEM;"
|
||||
+ "m>$FINAL_MEM;"
|
||||
+ "M}$letter>$MEM;"
|
||||
+ "M>$FINAL_MEM;"
|
||||
|
||||
+ "n({letter}>{NUN};"
|
||||
+ "n>{FINAL_NUN};"
|
||||
+ "N({letter}>{NUN};"
|
||||
+ "N>{FINAL_NUN};"
|
||||
+ "n}$letter>$NUN;"
|
||||
+ "n>$FINAL_NUN;"
|
||||
+ "N}$letter>$NUN;"
|
||||
+ "N>$FINAL_NUN;"
|
||||
|
||||
+ "o>{VAV};"
|
||||
+ "O>{VAV};"
|
||||
+ "o>$VAV;"
|
||||
+ "O>$VAV;"
|
||||
|
||||
+ "p({letter}>{PE};"
|
||||
+ "p>{FINAL_PE};"
|
||||
+ "P({letter}>{PE};"
|
||||
+ "P>{FINAL_PE};"
|
||||
+ "p}$letter>$PE;"
|
||||
+ "p>$FINAL_PE;"
|
||||
+ "P}$letter>$PE;"
|
||||
+ "P>$FINAL_PE;"
|
||||
|
||||
+ "q>{QOF};"
|
||||
+ "Q>{QOF};"
|
||||
+ "q>$QOF;"
|
||||
+ "Q>$QOF;"
|
||||
|
||||
+ "r>{RESH};"
|
||||
+ "R>{RESH};"
|
||||
+ "r>$RESH;"
|
||||
+ "R>$RESH;"
|
||||
|
||||
+ "sH>{SHIN};"
|
||||
+ "sh>{SHIN};"
|
||||
+ "SH>{SHIN};"
|
||||
+ "Sh>{SHIN};"
|
||||
+ "s>{SAMEKH};"
|
||||
+ "S>{SAMEKH};"
|
||||
+ "sH>$SHIN;"
|
||||
+ "sh>$SHIN;"
|
||||
+ "SH>$SHIN;"
|
||||
+ "Sh>$SHIN;"
|
||||
+ "s>$SAMEKH;"
|
||||
+ "S>$SAMEKH;"
|
||||
|
||||
+ "th>{TAV};"
|
||||
+ "tH>{TAV};"
|
||||
+ "TH>{TAV};"
|
||||
+ "Th>{TAV};"
|
||||
+ "tS({letter}>{TSADI};"
|
||||
+ "ts({letter}>{TSADI};"
|
||||
+ "Ts({letter}>{TSADI};"
|
||||
+ "TS({letter}>{TSADI};"
|
||||
+ "tS>{FINAL_TSADI};"
|
||||
+ "ts>{FINAL_TSADI};"
|
||||
+ "Ts>{FINAL_TSADI};"
|
||||
+ "TS>{FINAL_TSADI};"
|
||||
+ "t>{TET};"
|
||||
+ "T>{TET};"
|
||||
+ "th>$TAV;"
|
||||
+ "tH>$TAV;"
|
||||
+ "TH>$TAV;"
|
||||
+ "Th>$TAV;"
|
||||
+ "tS}$letter>$TSADI;"
|
||||
+ "ts}$letter>$TSADI;"
|
||||
+ "Ts}$letter>$TSADI;"
|
||||
+ "TS}$letter>$TSADI;"
|
||||
+ "tS>$FINAL_TSADI;"
|
||||
+ "ts>$FINAL_TSADI;"
|
||||
+ "Ts>$FINAL_TSADI;"
|
||||
+ "TS>$FINAL_TSADI;"
|
||||
+ "t>$TET;"
|
||||
+ "T>$TET;"
|
||||
|
||||
+ "u>{VAV};"
|
||||
+ "U>{VAV};"
|
||||
+ "u>$VAV;"
|
||||
+ "U>$VAV;"
|
||||
|
||||
+ "v>{VAV};"
|
||||
+ "V>{VAV};"
|
||||
+ "v>$VAV;"
|
||||
+ "V>$VAV;"
|
||||
|
||||
+ "w>{VAV};"
|
||||
+ "W>{VAV};"
|
||||
+ "w>$VAV;"
|
||||
+ "W>$VAV;"
|
||||
|
||||
+ "x>{KAF}{SAMEKH};"
|
||||
+ "X>{KAF}{SAMEKH};"
|
||||
+ "x>$KAF$SAMEKH;"
|
||||
+ "X>$KAF$SAMEKH;"
|
||||
|
||||
+ "y>{YOD};"
|
||||
+ "Y>{YOD};"
|
||||
+ "y>$YOD;"
|
||||
+ "Y>$YOD;"
|
||||
|
||||
+ "z>{ZAYIN};"
|
||||
+ "Z>{ZAYIN};"
|
||||
+ "z>$ZAYIN;"
|
||||
+ "Z>$ZAYIN;"
|
||||
|
||||
//#?>{YIDDISH_DOUBLE_VAV}
|
||||
//?>{YIDDISH_VAV_YOD}
|
||||
@ -244,39 +244,39 @@ public class TransliterationRule_Latin_Hebrew extends ListResourceBundle {
|
||||
//{POINT_SIN_DOT}>@
|
||||
//{PUNCTUATION_SOF_PASUQ}>@
|
||||
|
||||
+ "a<{ALEF};"
|
||||
+ "e<{AYIN};"
|
||||
+ "b<{BET};"
|
||||
+ "d<{DALET};"
|
||||
+ "k<{FINAL_KAF};"
|
||||
+ "m<{FINAL_MEM};"
|
||||
+ "n<{FINAL_NUN};"
|
||||
+ "p<{FINAL_PE};"
|
||||
+ "ts<{FINAL_TSADI};"
|
||||
+ "g<{GIMEL};"
|
||||
+ "kh<{HET};"
|
||||
+ "h<{HE};"
|
||||
+ "k''<{KAF}({HE};"
|
||||
+ "k<{KAF};"
|
||||
+ "l<{LAMED};"
|
||||
+ "m<{MEM};"
|
||||
+ "n<{NUN};"
|
||||
+ "p<{PE};"
|
||||
+ "q<{QOF};"
|
||||
+ "r<{RESH};"
|
||||
+ "s''<{SAMEKH}({HE};"
|
||||
+ "s<{SAMEKH};"
|
||||
+ "sh<{SHIN};"
|
||||
+ "th<{TAV};"
|
||||
+ "t''<{TET}({HE};"
|
||||
+ "t''<{TET}({SAMEKH};"
|
||||
+ "t''<{TET}({SHIN};"
|
||||
+ "t<{TET};"
|
||||
+ "ts<{TSADI};"
|
||||
+ "v<{VAV}({vowellike};"
|
||||
+ "u<{VAV};"
|
||||
+ "y<{YOD};"
|
||||
+ "z<{ZAYIN};"
|
||||
+ "a<$ALEF;"
|
||||
+ "e<$AYIN;"
|
||||
+ "b<$BET;"
|
||||
+ "d<$DALET;"
|
||||
+ "k<$FINAL_KAF;"
|
||||
+ "m<$FINAL_MEM;"
|
||||
+ "n<$FINAL_NUN;"
|
||||
+ "p<$FINAL_PE;"
|
||||
+ "ts<$FINAL_TSADI;"
|
||||
+ "g<$GIMEL;"
|
||||
+ "kh<$HET;"
|
||||
+ "h<$HE;"
|
||||
+ "k''<$KAF}$HE;"
|
||||
+ "k<$KAF;"
|
||||
+ "l<$LAMED;"
|
||||
+ "m<$MEM;"
|
||||
+ "n<$NUN;"
|
||||
+ "p<$PE;"
|
||||
+ "q<$QOF;"
|
||||
+ "r<$RESH;"
|
||||
+ "s''<$SAMEKH}$HE;"
|
||||
+ "s<$SAMEKH;"
|
||||
+ "sh<$SHIN;"
|
||||
+ "th<$TAV;"
|
||||
+ "t''<$TET}$HE;"
|
||||
+ "t''<$TET}$SAMEKH;"
|
||||
+ "t''<$TET}$SHIN;"
|
||||
+ "t<$TET;"
|
||||
+ "ts<$TSADI;"
|
||||
+ "v<$VAV}$vowellike;"
|
||||
+ "u<$VAV;"
|
||||
+ "y<$YOD;"
|
||||
+ "z<$ZAYIN;"
|
||||
|
||||
//{YIDDISH_DOUBLE_VAV}>@
|
||||
//{YIDDISH_VAV_YOD}>@
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/resources/Attic/TransliterationRule_Latin_Jamo.java,v $
|
||||
* $Date: 2000/03/10 04:07:31 $
|
||||
* $Revision: 1.2 $
|
||||
* $Date: 2000/04/21 21:17:08 $
|
||||
* $Revision: 1.3 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
@ -23,19 +23,19 @@ public class TransliterationRule_Latin_Jamo extends ListResourceBundle {
|
||||
|
||||
// VARIABLES
|
||||
|
||||
+ "initial=[\u1100-\u115F];"
|
||||
+ "medial=[\u1160-\u11A7];"
|
||||
+ "final=[\u11A8-\u11F9];" // added - aliu
|
||||
+ "vowel=[aeiouwyAEIOUWY\u1160-\u11A7];"
|
||||
+ "consonant=[bcdfghjklmnpqrstvxzBCDFGHJKLMNPQRSTVXZ{medial}{final}];"
|
||||
+ "ye=[yeYE];"
|
||||
+ "ywe=[yweYWE];"
|
||||
+ "yw=[ywYW];"
|
||||
+ "nl=[nlNL];"
|
||||
+ "gnl=[gnlGNL];"
|
||||
+ "lsgb=[lsgbLSGB];"
|
||||
+ "ywao=[ywaoYWAO];"
|
||||
+ "bl=[blBL];"
|
||||
+ "$initial=[\u1100-\u115F];"
|
||||
+ "$medial=[\u1160-\u11A7];"
|
||||
+ "$final=[\u11A8-\u11F9];" // added - aliu
|
||||
+ "$vowel=[aeiouwyAEIOUWY\u1160-\u11A7];"
|
||||
+ "$consonant=[bcdfghjklmnpqrstvxzBCDFGHJKLMNPQRSTVXZ$medial$final];"
|
||||
+ "$ye=[yeYE];"
|
||||
+ "$ywe=[yweYWE];"
|
||||
+ "$yw=[ywYW];"
|
||||
+ "$nl=[nlNL];"
|
||||
+ "$gnl=[gnlGNL];"
|
||||
+ "$lsgb=[lsgbLSGB];"
|
||||
+ "$ywao=[ywaoYWAO];"
|
||||
+ "$bl=[blBL];"
|
||||
|
||||
// RULES
|
||||
|
||||
@ -60,8 +60,8 @@ public class TransliterationRule_Latin_Jamo extends ListResourceBundle {
|
||||
|
||||
// special insertion for funny sequences of vowels, and for empty consonant
|
||||
|
||||
+ "'' < ({consonant}) \u110B;" // insert a break between any consonant and the empty consonant.
|
||||
+ "({medial}) ({vowel}) <> \u110B;" // HANGUL CHOSEONG IEUNG
|
||||
+ "'' < $consonant{\u110B;" // insert a break between any consonant and the empty consonant.
|
||||
+ "$medial{}$vowel<>\u110B;" // HANGUL CHOSEONG IEUNG
|
||||
|
||||
// Below, insert an empty consonant in front of a vowel, if there is no Initial in front.
|
||||
|
||||
@ -144,57 +144,57 @@ public class TransliterationRule_Latin_Jamo extends ListResourceBundle {
|
||||
// from Hangul to Latin. Catch every letter that can be the
|
||||
// LAST of a digraph (or multigraph) AND first of an initial
|
||||
|
||||
+ "'' < (l) (\u11c0;" // hangul jongseong thieuth
|
||||
+ "'' < ({lsgb}) (\u11ba;" // hangul jongseong sios
|
||||
+ "'' < (l) (\u11c1;" // hangul jongseong phieuph
|
||||
+ "'' < (l) (\u11b7;" // hangul jongseong mieum
|
||||
+ "'' < (n) (\u11bd;" // hangul jongseong cieuc
|
||||
+ "'' < ({nl}) (\u11c2;" // hangul jongseong hieuh
|
||||
+ "'' < ({gnl}) (\u11a9;" // hangul jongseong ssangkiyeok
|
||||
+ "'' < ({bl}) (\u11b8;" // hangul jongseong pieup
|
||||
+ "'' < (d) (\u11ae;" // hangul jongseong tikeut
|
||||
+ "'' < l{ }\u11c0;" // hangul jongseong thieuth
|
||||
+ "'' < $lsgb{}\u11ba;" // hangul jongseong sios
|
||||
+ "'' < l{ }\u11c1;" // hangul jongseong phieuph
|
||||
+ "'' < l{ }\u11b7;" // hangul jongseong mieum
|
||||
+ "'' < n{ }\u11bd;" // hangul jongseong cieuc
|
||||
+ "'' < $nl{}\u11c2;" // hangul jongseong hieuh
|
||||
+ "'' < $gnl{}\u11a9;" // hangul jongseong ssangkiyeok
|
||||
+ "'' < $bl{}\u11b8;" // hangul jongseong pieup
|
||||
+ "'' < d{ }\u11ae;" // hangul jongseong tikeut
|
||||
|
||||
+ "'' < ({ye}) (\u116e;" // hangul jungseong u
|
||||
+ "'' < ({ywe}) (\u1169;" // hangul jungseong o
|
||||
+ "'' < ({yw}) (\u1175;" // hangul jungseong i
|
||||
+ "'' < ({ywao}) (\u1166;" // hangul jungseong e
|
||||
+ "'' < ({yw}) (\u1161;" // hangul jungseong a
|
||||
+ "'' < $ye{}\u116e;" // hangul jungseong u
|
||||
+ "'' < $ywe{}\u1169;" // hangul jungseong o
|
||||
+ "'' < $yw{}\u1175;" // hangul jungseong i
|
||||
+ "'' < $ywao{}\u1166;" // hangul jungseong e
|
||||
+ "'' < $yw{}\u1161;" // hangul jungseong a
|
||||
|
||||
+ "'' < (l) (\u1110;" // hangul choseong thieuth
|
||||
+ "'' < ({lsgb}) (\u110a;" // hangul choseong ssangsios
|
||||
+ "'' < ({lsgb}) (\u1109;" // hangul choseong sios
|
||||
+ "'' < (l) (\u1111;" // hangul choseong phieuph
|
||||
+ "'' < (l) (\u1106;" // hangul choseong mieum
|
||||
+ "'' < (n) (\u110c;" // hangul choseong cieuc
|
||||
+ "'' < (n) (\u110d;"
|
||||
+ "'' < ({nl}) (\u1112;" // hangul choseong hieuh
|
||||
+ "'' < ({gnl}) (\u1101;" // hangul choseong ssangkiyeok
|
||||
+ "'' < ({gnl}) (\u1100;" // hangul choseong kiyeok
|
||||
+ "'' < (d) (\u1103;" // hangul choseong tikeut
|
||||
+ "'' < (d) (\u1104;"
|
||||
+ "'' < ({bl}) (\u1107;" // hangul choseong pieup
|
||||
+ "'' < ({bl}) (\u1108;"
|
||||
+ "'' < l{ }\u1110;" // hangul choseong thieuth
|
||||
+ "'' < $lsgb{}\u110a;" // hangul choseong ssangsios
|
||||
+ "'' < $lsgb{}\u1109;" // hangul choseong sios
|
||||
+ "'' < l{ }\u1111;" // hangul choseong phieuph
|
||||
+ "'' < l{ }\u1106;" // hangul choseong mieum
|
||||
+ "'' < n{ }\u110c;" // hangul choseong cieuc
|
||||
+ "'' < n{ }\u110d;"
|
||||
+ "'' < $nl{}\u1112;" // hangul choseong hieuh
|
||||
+ "'' < $gnl{}\u1101;" // hangul choseong ssangkiyeok
|
||||
+ "'' < $gnl{}\u1100;" // hangul choseong kiyeok
|
||||
+ "'' < d{ }\u1103;" // hangul choseong tikeut
|
||||
+ "'' < d{ }\u1104;"
|
||||
+ "'' < $bl{}\u1107;" // hangul choseong pieup
|
||||
+ "'' < $bl{}\u1108;"
|
||||
|
||||
// INITIALS
|
||||
|
||||
+ "t ({vowel}) <> \u1110;" // hangul choseong thieuth
|
||||
+ "ss ({vowel}) <> \u110a;" // hangul choseong ssangsios
|
||||
+ "s ({vowel}) <> \u1109;" // hangul choseong sios
|
||||
+ "p ({vowel}) <> \u1111;" // hangul choseong phieuph
|
||||
+ "n ({vowel}) <> \u1102;" // hangul choseong nieun
|
||||
+ "m ({vowel}) <> \u1106;" // hangul choseong mieum
|
||||
+ "l ({vowel}) <> \u1105;" // hangul choseong rieul
|
||||
+ "k ({vowel}) <> \u110f;" // hangul choseong khieukh
|
||||
+ "j ({vowel}) <> \u110c;" // hangul choseong cieuc
|
||||
+ "h ({vowel}) <> \u1112;" // hangul choseong hieuh
|
||||
+ "gg ({vowel}) <> \u1101;" // hangul choseong ssangkiyeok
|
||||
+ "g ({vowel}) <> \u1100;" // hangul choseong kiyeok
|
||||
+ "d ({vowel}) <> \u1103;" // hangul choseong tikeut
|
||||
+ "c ({vowel}) <> \u110e;" // hangul choseong chieuch
|
||||
+ "b ({vowel}) <> \u1107;" // hangul choseong pieup
|
||||
+ "bb ({vowel}) <> \u1108;"
|
||||
+ "jj ({vowel}) <> \u110d;"
|
||||
+ "dd ({vowel}) <> \u1104;"
|
||||
+ "t }$vowel<>\u1110;" // hangul choseong thieuth
|
||||
+ "ss }$vowel<>\u110a;" // hangul choseong ssangsios
|
||||
+ "s }$vowel<>\u1109;" // hangul choseong sios
|
||||
+ "p }$vowel<>\u1111;" // hangul choseong phieuph
|
||||
+ "n }$vowel<>\u1102;" // hangul choseong nieun
|
||||
+ "m }$vowel<>\u1106;" // hangul choseong mieum
|
||||
+ "l }$vowel<>\u1105;" // hangul choseong rieul
|
||||
+ "k }$vowel<>\u110f;" // hangul choseong khieukh
|
||||
+ "j }$vowel<>\u110c;" // hangul choseong cieuc
|
||||
+ "h }$vowel<>\u1112;" // hangul choseong hieuh
|
||||
+ "gg }$vowel<>\u1101;" // hangul choseong ssangkiyeok
|
||||
+ "g }$vowel<>\u1100;" // hangul choseong kiyeok
|
||||
+ "d }$vowel<>\u1103;" // hangul choseong tikeut
|
||||
+ "c }$vowel<>\u110e;" // hangul choseong chieuch
|
||||
+ "b }$vowel<>\u1107;" // hangul choseong pieup
|
||||
+ "bb }$vowel<>\u1108;"
|
||||
+ "jj }$vowel<>\u110d;"
|
||||
+ "dd }$vowel<>\u1104;"
|
||||
|
||||
// If we have gotten through to these rules, and we start with
|
||||
// a consonant, then the remaining mappings would be to F,
|
||||
@ -209,45 +209,45 @@ public class TransliterationRule_Latin_Jamo extends ListResourceBundle {
|
||||
+ "jj > \u1108\u110d;" // hangul choseong ssangcieuc
|
||||
+ "dd > \u1108\u1104;" // hangul choseong ssangtikeut
|
||||
|
||||
+ "({final}) t > \u1110\u116e;" // hangul choseong thieuth
|
||||
+ "({final}) ss > \u110a\u116e;" // hangul choseong ssangsios
|
||||
+ "({final}) s > \u1109\u116e;" // hangul choseong sios
|
||||
+ "({final}) p > \u1111\u116e;" // hangul choseong phieuph
|
||||
+ "({final}) n > \u1102\u116e;" // hangul choseong nieun
|
||||
+ "({final}) m > \u1106\u116e;" // hangul choseong mieum
|
||||
+ "({final}) l > \u1105\u116e;" // hangul choseong rieul
|
||||
+ "({final}) k > \u110f\u116e;" // hangul choseong khieukh
|
||||
+ "({final}) j > \u110c\u116e;" // hangul choseong cieuc
|
||||
+ "({final}) h > \u1112\u116e;" // hangul choseong hieuh
|
||||
+ "({final}) gg > \u1101\u116e;" // hangul choseong ssangkiyeok
|
||||
+ "({final}) g > \u1100\u116e;" // hangul choseong kiyeok
|
||||
+ "({final}) d > \u1103\u116e;" // hangul choseong tikeut
|
||||
+ "({final}) c > \u110e\u116e;" // hangul choseong chieuch
|
||||
+ "({final}) b > \u1107\u116e;" // hangul choseong pieup
|
||||
+ "$final{ t > \u1110\u116e;" // hangul choseong thieuth
|
||||
+ "$final{ ss > \u110a\u116e;" // hangul choseong ssangsios
|
||||
+ "$final{ s > \u1109\u116e;" // hangul choseong sios
|
||||
+ "$final{ p > \u1111\u116e;" // hangul choseong phieuph
|
||||
+ "$final{ n > \u1102\u116e;" // hangul choseong nieun
|
||||
+ "$final{ m > \u1106\u116e;" // hangul choseong mieum
|
||||
+ "$final{ l > \u1105\u116e;" // hangul choseong rieul
|
||||
+ "$final{ k > \u110f\u116e;" // hangul choseong khieukh
|
||||
+ "$final{ j > \u110c\u116e;" // hangul choseong cieuc
|
||||
+ "$final{ h > \u1112\u116e;" // hangul choseong hieuh
|
||||
+ "$final{ gg > \u1101\u116e;" // hangul choseong ssangkiyeok
|
||||
+ "$final{ g > \u1100\u116e;" // hangul choseong kiyeok
|
||||
+ "$final{ d > \u1103\u116e;" // hangul choseong tikeut
|
||||
+ "$final{ c > \u110e\u116e;" // hangul choseong chieuch
|
||||
+ "$final{ b > \u1107\u116e;" // hangul choseong pieup
|
||||
|
||||
// MEDIALS after INITIALS
|
||||
|
||||
+ "({initial}) yu <> \u1172;" // hangul jungseong yu
|
||||
+ "({initial}) yo <> \u116d;" // hangul jungseong yo
|
||||
+ "({initial}) yi <> \u1174;" // hangul jungseong yi
|
||||
+ "({initial}) yeo <> \u1167;" // hangul jungseong yeo
|
||||
+ "({initial}) ye <> \u1168;" // hangul jungseong ye
|
||||
+ "({initial}) yae <> \u1164;" // hangul jungseong yae
|
||||
+ "({initial}) ya <> \u1163;" // hangul jungseong ya
|
||||
+ "({initial}) wi <> \u1171;" // hangul jungseong wi
|
||||
+ "({initial}) weo <> \u116f;" // hangul jungseong weo
|
||||
+ "({initial}) we <> \u1170;" // hangul jungseong we
|
||||
+ "({initial}) wae <> \u116b;" // hangul jungseong wae
|
||||
+ "({initial}) wa <> \u116a;" // hangul jungseong wa
|
||||
+ "({initial}) u <> \u116e;" // hangul jungseong u
|
||||
+ "({initial}) oe <> \u116c;" // hangul jungseong oe
|
||||
+ "({initial}) o <> \u1169;" // hangul jungseong o
|
||||
+ "({initial}) i <> \u1175;" // hangul jungseong i
|
||||
+ "({initial}) eu <> \u1173;" // hangul jungseong eu
|
||||
+ "({initial}) eo <> \u1165;" // hangul jungseong eo
|
||||
+ "({initial}) e <> \u1166;" // hangul jungseong e
|
||||
+ "({initial}) ae <> \u1162;" // hangul jungseong ae
|
||||
+ "({initial}) a <> \u1161;" // hangul jungseong a
|
||||
+ "$initial{ yu <> \u1172;" // hangul jungseong yu
|
||||
+ "$initial{ yo <> \u116d;" // hangul jungseong yo
|
||||
+ "$initial{ yi <> \u1174;" // hangul jungseong yi
|
||||
+ "$initial{ yeo <> \u1167;" // hangul jungseong yeo
|
||||
+ "$initial{ ye <> \u1168;" // hangul jungseong ye
|
||||
+ "$initial{ yae <> \u1164;" // hangul jungseong yae
|
||||
+ "$initial{ ya <> \u1163;" // hangul jungseong ya
|
||||
+ "$initial{ wi <> \u1171;" // hangul jungseong wi
|
||||
+ "$initial{ weo <> \u116f;" // hangul jungseong weo
|
||||
+ "$initial{ we <> \u1170;" // hangul jungseong we
|
||||
+ "$initial{ wae <> \u116b;" // hangul jungseong wae
|
||||
+ "$initial{ wa <> \u116a;" // hangul jungseong wa
|
||||
+ "$initial{ u <> \u116e;" // hangul jungseong u
|
||||
+ "$initial{ oe <> \u116c;" // hangul jungseong oe
|
||||
+ "$initial{ o <> \u1169;" // hangul jungseong o
|
||||
+ "$initial{ i <> \u1175;" // hangul jungseong i
|
||||
+ "$initial{ eu <> \u1173;" // hangul jungseong eu
|
||||
+ "$initial{ eo <> \u1165;" // hangul jungseong eo
|
||||
+ "$initial{ e <> \u1166;" // hangul jungseong e
|
||||
+ "$initial{ ae <> \u1162;" // hangul jungseong ae
|
||||
+ "$initial{ a <> \u1161;" // hangul jungseong a
|
||||
|
||||
// MEDIALS (vowels) not after INITIALs
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/resources/Attic/TransliterationRule_StraightQuotes_CurlyQuotes.java,v $
|
||||
* $Date: 2000/03/10 04:07:31 $
|
||||
* $Revision: 1.2 $
|
||||
* $Date: 2000/04/21 21:17:08 $
|
||||
* $Revision: 1.3 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
@ -22,74 +22,74 @@ public class TransliterationRule_StraightQuotes_CurlyQuotes extends ListResource
|
||||
return new Object[][] {
|
||||
{ "Rule",
|
||||
// Rewritten using character codes [LIU]
|
||||
"white=[[:Zs:][:Zl:][:Zp:]];"
|
||||
+ "black=[^{white}];"
|
||||
+ "open=[:Ps:];"
|
||||
+ "dquote=\";"
|
||||
"$white=[[:Zs:][:Zl:][:Zp:]];"
|
||||
+ "$black=[^$white];"
|
||||
+ "$open=[:Ps:];"
|
||||
+ "$dquote=\";"
|
||||
|
||||
+ "lAng=\u3008;"
|
||||
+ "ldAng=\u300A;"
|
||||
+ "lBrk='[';"
|
||||
+ "lBrc='{';"
|
||||
+ "$lAng=\u3008;"
|
||||
+ "$ldAng=\u300A;"
|
||||
+ "$lBrk='[';"
|
||||
+ "$lBrc='{';"
|
||||
|
||||
+ "lquote=\u2018;"
|
||||
+ "rquote=\u2019;"
|
||||
+ "ldquote=\u201C;"
|
||||
+ "rdquote=\u201D;"
|
||||
+ "$lquote=\u2018;"
|
||||
+ "$rquote=\u2019;"
|
||||
+ "$ldquote=\u201C;"
|
||||
+ "$rdquote=\u201D;"
|
||||
|
||||
+ "ldguill=\u00AB;"
|
||||
+ "rdguill=\u00BB;"
|
||||
+ "lguill=\u2039;"
|
||||
+ "rguill=\u203A;"
|
||||
+ "$ldguill=\u00AB;"
|
||||
+ "$rdguill=\u00BB;"
|
||||
+ "$lguill=\u2039;"
|
||||
+ "$rguill=\u203A;"
|
||||
|
||||
+ "mdash=\u2014;"
|
||||
+ "$mdash=\u2014;"
|
||||
|
||||
//#######################################
|
||||
// Conversions from input
|
||||
//#######################################
|
||||
|
||||
// join single quotes
|
||||
+ "{lquote}''>{ldquote};"
|
||||
+ "{lquote}{lquote}>{ldquote};"
|
||||
+ "{rquote}''>{rdquote};"
|
||||
+ "{rquote}{rquote}>{rdquote};"
|
||||
+ "$lquote''>$ldquote;"
|
||||
+ "$lquote$lquote>$ldquote;"
|
||||
+ "$rquote''>$rdquote;"
|
||||
+ "$rquote$rquote>$rdquote;"
|
||||
|
||||
//smart single quotes
|
||||
+ "{white})''>{lquote};"
|
||||
+ "{open})''>{lquote};"
|
||||
+ "{black})''>{rquote};"
|
||||
+ "''>{lquote};"
|
||||
+ "$white{''>$lquote;"
|
||||
+ "$open{''>$lquote;"
|
||||
+ "$black{''>$rquote;"
|
||||
+ "''>$lquote;"
|
||||
|
||||
//smart doubles
|
||||
+ "{white}){dquote}>{ldquote};"
|
||||
+ "{open}){dquote}>{ldquote};"
|
||||
+ "{black}){dquote}>{rdquote};"
|
||||
+ "{dquote}>{ldquote};"
|
||||
+ "$white{$dquote>$ldquote;"
|
||||
+ "$open{$dquote>$ldquote;"
|
||||
+ "$black{$dquote>$rdquote;"
|
||||
+ "$dquote>$ldquote;"
|
||||
|
||||
// join single guillemets
|
||||
+ "{rguill}{rguill}>{rdguill};"
|
||||
+ "'>>'>{rdguill};"
|
||||
+ "{lguill}{lguill}>{ldguill};"
|
||||
+ "'<<'>{ldguill};"
|
||||
+ "$rguill$rguill>$rdguill;"
|
||||
+ "'>>'>$rdguill;"
|
||||
+ "$lguill$lguill>$ldguill;"
|
||||
+ "'<<'>$ldguill;"
|
||||
|
||||
// prevent double spaces
|
||||
+ "\\ )\\ >;"
|
||||
+ "\\ {\\ >;"
|
||||
|
||||
// join hyphens into dash
|
||||
+ "-->{mdash};"
|
||||
+ "-->$mdash;"
|
||||
|
||||
//#######################################
|
||||
// Conversions back to input
|
||||
//#######################################
|
||||
|
||||
//smart quotes
|
||||
+ "''<{lquote};"
|
||||
+ "''<{rquote};"
|
||||
+ "{dquote}<{ldquote};"
|
||||
+ "{dquote}<{rdquote};"
|
||||
+ "''<$lquote;"
|
||||
+ "''<$rquote;"
|
||||
+ "$dquote<$ldquote;"
|
||||
+ "$dquote<$rdquote;"
|
||||
|
||||
//hyphens
|
||||
+ "--<{mdash};"
|
||||
+ "--<$mdash;"
|
||||
}
|
||||
};
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user