Fix bugs found during ICU port
X-SVN-Rev: 575
This commit is contained in:
parent
95e425918b
commit
fb7313675a
@ -196,9 +196,12 @@ import java.text.ParsePosition;
|
||||
* <p>Copyright (c) IBM Corporation 1999-2000. All rights reserved.</p>
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.9 $ $Date: 2000/01/11 04:12:06 $
|
||||
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.10 $ $Date: 2000/01/13 23:53:23 $
|
||||
*
|
||||
* $Log: RuleBasedTransliterator.java,v $
|
||||
* Revision 1.10 2000/01/13 23:53:23 Alan
|
||||
* Fix bugs found during ICU port
|
||||
*
|
||||
* Revision 1.9 2000/01/11 04:12:06 Alan
|
||||
* Cleanup, embellish comments
|
||||
*
|
||||
@ -783,7 +786,7 @@ public class RuleBasedTransliterator extends Transliterator {
|
||||
}
|
||||
String name = rule.substring(pos, j);
|
||||
pos = j+1;
|
||||
buf.append(getVariableDef(name).charValue());
|
||||
buf.append(getVariableDef(name));
|
||||
}
|
||||
break;
|
||||
case CONTEXT_OPEN:
|
||||
@ -813,7 +816,7 @@ public class RuleBasedTransliterator extends Transliterator {
|
||||
case SET_OPEN:
|
||||
ParsePosition pp = new ParsePosition(pos-1); // Backup to opening '['
|
||||
buf.append(registerSet(new UnicodeSet(rule, pp,
|
||||
data.variableNames, data.setVariables)).charValue());
|
||||
data.variableNames, data.setVariables)));
|
||||
pos = pp.getIndex();
|
||||
break;
|
||||
case VARIABLE_REF_CLOSE:
|
||||
@ -907,7 +910,7 @@ public class RuleBasedTransliterator extends Transliterator {
|
||||
|
||||
/**
|
||||
* Throw an exception indicating a syntax error. Search the rule string
|
||||
* for the probably end of the rule. Of course, if the error is that
|
||||
* for the probable end of the rule. Of course, if the error is that
|
||||
* the end of rule marker is missing, then the rule end will not be found.
|
||||
* In any case the rule start will be correctly reported.
|
||||
* @param msg error description
|
||||
@ -928,13 +931,13 @@ public class RuleBasedTransliterator extends Transliterator {
|
||||
* register it in the setVariables hash, and return the substitution
|
||||
* character.
|
||||
*/
|
||||
private final Character registerSet(UnicodeSet set) {
|
||||
private final char registerSet(UnicodeSet set) {
|
||||
if (variableNext >= variableLimit) {
|
||||
throw new RuntimeException("Private use variables exhausted");
|
||||
}
|
||||
Character c = new Character(variableNext++);
|
||||
data.setVariables.put(c, set);
|
||||
return c;
|
||||
return c.charValue();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -942,13 +945,13 @@ public class RuleBasedTransliterator extends Transliterator {
|
||||
* names are recognized.
|
||||
* @exception IllegalArgumentException if the name is unknown.
|
||||
*/
|
||||
private Character getVariableDef(String name) {
|
||||
private char getVariableDef(String name) {
|
||||
Character ch = (Character) data.variableNames.get(name);
|
||||
if (ch == null) {
|
||||
throw new IllegalArgumentException("Undefined variable: "
|
||||
+ name);
|
||||
}
|
||||
return ch;
|
||||
return ch.charValue();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -21,9 +21,12 @@ import java.util.Dictionary;
|
||||
* <p>Copyright © IBM Corporation 1999. All rights reserved.
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.7 $ $Date: 2000/01/11 04:12:06 $
|
||||
* @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.8 $ $Date: 2000/01/13 23:53:23 $
|
||||
*
|
||||
* $Log: TransliterationRule.java,v $
|
||||
* Revision 1.8 2000/01/13 23:53:23 Alan
|
||||
* Fix bugs found during ICU port
|
||||
*
|
||||
* Revision 1.7 2000/01/11 04:12:06 Alan
|
||||
* Cleanup, embellish comments
|
||||
*
|
||||
@ -150,7 +153,7 @@ class TransliterationRule {
|
||||
if (cursorPos < 0) {
|
||||
this.cursorPos = output.length();
|
||||
} else {
|
||||
if (cursorPos > input.length()) {
|
||||
if (cursorPos > output.length()) {
|
||||
throw new IllegalArgumentException("Invalid cursor position");
|
||||
}
|
||||
this.cursorPos = cursorPos;
|
||||
@ -255,9 +258,7 @@ class TransliterationRule {
|
||||
|
||||
/* LIMITATION of the current mask algorithm: Some rule
|
||||
* maskings are currently not detected. For example,
|
||||
* "{Lu}]a>x" masks "A]a>y". To detect these sorts of masking,
|
||||
* we need a subset operator on UnicodeSet objects, which we
|
||||
* currently do not have. This can be added later.
|
||||
* "{Lu}]a>x" masks "A]a>y". This can be added later. TODO
|
||||
*/
|
||||
|
||||
int left = anteContextLength;
|
||||
|
@ -230,7 +230,7 @@ import java.util.Dictionary;
|
||||
* *Unsupported by Java (and hence unsupported by UnicodeSet).
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.4 $ $Date: 2000/01/11 04:03:54 $
|
||||
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.5 $ $Date: 2000/01/13 23:53:23 $
|
||||
*/
|
||||
public class UnicodeSet {
|
||||
/**
|
||||
@ -352,10 +352,17 @@ public class UnicodeSet {
|
||||
public void applyPattern(String pattern) {
|
||||
ParsePosition pos = new ParsePosition(0);
|
||||
pairs = parse(pattern, pos, null, null);
|
||||
if (pos.getIndex() != pattern.length()) {
|
||||
|
||||
// Skip over trailing whitespace
|
||||
int i = pos.getIndex();
|
||||
int n = pattern.length();
|
||||
while (i < n && Character.isWhitespace(pattern.charAt(i))) {
|
||||
++i;
|
||||
}
|
||||
|
||||
if (i != n) {
|
||||
throw new IllegalArgumentException("Parse of \"" + pattern +
|
||||
"\" failed at " +
|
||||
pos.getIndex());
|
||||
"\" failed at " + i);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -196,9 +196,12 @@ import java.text.ParsePosition;
|
||||
* <p>Copyright (c) IBM Corporation 1999-2000. All rights reserved.</p>
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.9 $ $Date: 2000/01/11 04:12:06 $
|
||||
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.10 $ $Date: 2000/01/13 23:53:23 $
|
||||
*
|
||||
* $Log: RuleBasedTransliterator.java,v $
|
||||
* Revision 1.10 2000/01/13 23:53:23 Alan
|
||||
* Fix bugs found during ICU port
|
||||
*
|
||||
* Revision 1.9 2000/01/11 04:12:06 Alan
|
||||
* Cleanup, embellish comments
|
||||
*
|
||||
@ -783,7 +786,7 @@ public class RuleBasedTransliterator extends Transliterator {
|
||||
}
|
||||
String name = rule.substring(pos, j);
|
||||
pos = j+1;
|
||||
buf.append(getVariableDef(name).charValue());
|
||||
buf.append(getVariableDef(name));
|
||||
}
|
||||
break;
|
||||
case CONTEXT_OPEN:
|
||||
@ -813,7 +816,7 @@ public class RuleBasedTransliterator extends Transliterator {
|
||||
case SET_OPEN:
|
||||
ParsePosition pp = new ParsePosition(pos-1); // Backup to opening '['
|
||||
buf.append(registerSet(new UnicodeSet(rule, pp,
|
||||
data.variableNames, data.setVariables)).charValue());
|
||||
data.variableNames, data.setVariables)));
|
||||
pos = pp.getIndex();
|
||||
break;
|
||||
case VARIABLE_REF_CLOSE:
|
||||
@ -907,7 +910,7 @@ public class RuleBasedTransliterator extends Transliterator {
|
||||
|
||||
/**
|
||||
* Throw an exception indicating a syntax error. Search the rule string
|
||||
* for the probably end of the rule. Of course, if the error is that
|
||||
* for the probable end of the rule. Of course, if the error is that
|
||||
* the end of rule marker is missing, then the rule end will not be found.
|
||||
* In any case the rule start will be correctly reported.
|
||||
* @param msg error description
|
||||
@ -928,13 +931,13 @@ public class RuleBasedTransliterator extends Transliterator {
|
||||
* register it in the setVariables hash, and return the substitution
|
||||
* character.
|
||||
*/
|
||||
private final Character registerSet(UnicodeSet set) {
|
||||
private final char registerSet(UnicodeSet set) {
|
||||
if (variableNext >= variableLimit) {
|
||||
throw new RuntimeException("Private use variables exhausted");
|
||||
}
|
||||
Character c = new Character(variableNext++);
|
||||
data.setVariables.put(c, set);
|
||||
return c;
|
||||
return c.charValue();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -942,13 +945,13 @@ public class RuleBasedTransliterator extends Transliterator {
|
||||
* names are recognized.
|
||||
* @exception IllegalArgumentException if the name is unknown.
|
||||
*/
|
||||
private Character getVariableDef(String name) {
|
||||
private char getVariableDef(String name) {
|
||||
Character ch = (Character) data.variableNames.get(name);
|
||||
if (ch == null) {
|
||||
throw new IllegalArgumentException("Undefined variable: "
|
||||
+ name);
|
||||
}
|
||||
return ch;
|
||||
return ch.charValue();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -21,9 +21,12 @@ import java.util.Dictionary;
|
||||
* <p>Copyright © IBM Corporation 1999. All rights reserved.
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.7 $ $Date: 2000/01/11 04:12:06 $
|
||||
* @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.8 $ $Date: 2000/01/13 23:53:23 $
|
||||
*
|
||||
* $Log: TransliterationRule.java,v $
|
||||
* Revision 1.8 2000/01/13 23:53:23 Alan
|
||||
* Fix bugs found during ICU port
|
||||
*
|
||||
* Revision 1.7 2000/01/11 04:12:06 Alan
|
||||
* Cleanup, embellish comments
|
||||
*
|
||||
@ -150,7 +153,7 @@ class TransliterationRule {
|
||||
if (cursorPos < 0) {
|
||||
this.cursorPos = output.length();
|
||||
} else {
|
||||
if (cursorPos > input.length()) {
|
||||
if (cursorPos > output.length()) {
|
||||
throw new IllegalArgumentException("Invalid cursor position");
|
||||
}
|
||||
this.cursorPos = cursorPos;
|
||||
@ -255,9 +258,7 @@ class TransliterationRule {
|
||||
|
||||
/* LIMITATION of the current mask algorithm: Some rule
|
||||
* maskings are currently not detected. For example,
|
||||
* "{Lu}]a>x" masks "A]a>y". To detect these sorts of masking,
|
||||
* we need a subset operator on UnicodeSet objects, which we
|
||||
* currently do not have. This can be added later.
|
||||
* "{Lu}]a>x" masks "A]a>y". This can be added later. TODO
|
||||
*/
|
||||
|
||||
int left = anteContextLength;
|
||||
|
@ -230,7 +230,7 @@ import java.util.Dictionary;
|
||||
* *Unsupported by Java (and hence unsupported by UnicodeSet).
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.4 $ $Date: 2000/01/11 04:03:54 $
|
||||
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.5 $ $Date: 2000/01/13 23:53:23 $
|
||||
*/
|
||||
public class UnicodeSet {
|
||||
/**
|
||||
@ -352,10 +352,17 @@ public class UnicodeSet {
|
||||
public void applyPattern(String pattern) {
|
||||
ParsePosition pos = new ParsePosition(0);
|
||||
pairs = parse(pattern, pos, null, null);
|
||||
if (pos.getIndex() != pattern.length()) {
|
||||
|
||||
// Skip over trailing whitespace
|
||||
int i = pos.getIndex();
|
||||
int n = pattern.length();
|
||||
while (i < n && Character.isWhitespace(pattern.charAt(i))) {
|
||||
++i;
|
||||
}
|
||||
|
||||
if (i != n) {
|
||||
throw new IllegalArgumentException("Parse of \"" + pattern +
|
||||
"\" failed at " +
|
||||
pos.getIndex());
|
||||
"\" failed at " + i);
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user