Fix bugs found during ICU port

X-SVN-Rev: 575
This commit is contained in:
Alan Liu 2000-01-13 23:53:23 +00:00
parent 95e425918b
commit fb7313675a
6 changed files with 56 additions and 34 deletions

View File

@ -196,9 +196,12 @@ import java.text.ParsePosition;
* <p>Copyright (c) IBM Corporation 1999-2000. All rights reserved.</p>
*
* @author Alan Liu
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.9 $ $Date: 2000/01/11 04:12:06 $
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.10 $ $Date: 2000/01/13 23:53:23 $
*
* $Log: RuleBasedTransliterator.java,v $
* Revision 1.10 2000/01/13 23:53:23 Alan
* Fix bugs found during ICU port
*
* Revision 1.9 2000/01/11 04:12:06 Alan
* Cleanup, embellish comments
*
@ -783,7 +786,7 @@ public class RuleBasedTransliterator extends Transliterator {
}
String name = rule.substring(pos, j);
pos = j+1;
buf.append(getVariableDef(name).charValue());
buf.append(getVariableDef(name));
}
break;
case CONTEXT_OPEN:
@ -813,7 +816,7 @@ public class RuleBasedTransliterator extends Transliterator {
case SET_OPEN:
ParsePosition pp = new ParsePosition(pos-1); // Backup to opening '['
buf.append(registerSet(new UnicodeSet(rule, pp,
data.variableNames, data.setVariables)).charValue());
data.variableNames, data.setVariables)));
pos = pp.getIndex();
break;
case VARIABLE_REF_CLOSE:
@ -907,7 +910,7 @@ public class RuleBasedTransliterator extends Transliterator {
/**
* Throw an exception indicating a syntax error. Search the rule string
* for the probably end of the rule. Of course, if the error is that
* for the probable end of the rule. Of course, if the error is that
* the end of rule marker is missing, then the rule end will not be found.
* In any case the rule start will be correctly reported.
* @param msg error description
@ -928,13 +931,13 @@ public class RuleBasedTransliterator extends Transliterator {
* register it in the setVariables hash, and return the substitution
* character.
*/
private final Character registerSet(UnicodeSet set) {
private final char registerSet(UnicodeSet set) {
if (variableNext >= variableLimit) {
throw new RuntimeException("Private use variables exhausted");
}
Character c = new Character(variableNext++);
data.setVariables.put(c, set);
return c;
return c.charValue();
}
/**
@ -942,13 +945,13 @@ public class RuleBasedTransliterator extends Transliterator {
* names are recognized.
* @exception IllegalArgumentException if the name is unknown.
*/
private Character getVariableDef(String name) {
private char getVariableDef(String name) {
Character ch = (Character) data.variableNames.get(name);
if (ch == null) {
throw new IllegalArgumentException("Undefined variable: "
+ name);
}
return ch;
return ch.charValue();
}
/**

View File

@ -21,9 +21,12 @@ import java.util.Dictionary;
* <p>Copyright &copy; IBM Corporation 1999. All rights reserved.
*
* @author Alan Liu
* @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.7 $ $Date: 2000/01/11 04:12:06 $
* @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.8 $ $Date: 2000/01/13 23:53:23 $
*
* $Log: TransliterationRule.java,v $
* Revision 1.8 2000/01/13 23:53:23 Alan
* Fix bugs found during ICU port
*
* Revision 1.7 2000/01/11 04:12:06 Alan
* Cleanup, embellish comments
*
@ -150,7 +153,7 @@ class TransliterationRule {
if (cursorPos < 0) {
this.cursorPos = output.length();
} else {
if (cursorPos > input.length()) {
if (cursorPos > output.length()) {
throw new IllegalArgumentException("Invalid cursor position");
}
this.cursorPos = cursorPos;
@ -255,9 +258,7 @@ class TransliterationRule {
/* LIMITATION of the current mask algorithm: Some rule
* maskings are currently not detected. For example,
* "{Lu}]a>x" masks "A]a>y". To detect these sorts of masking,
* we need a subset operator on UnicodeSet objects, which we
* currently do not have. This can be added later.
* "{Lu}]a>x" masks "A]a>y". This can be added later. TODO
*/
int left = anteContextLength;

View File

@ -230,7 +230,7 @@ import java.util.Dictionary;
* *Unsupported by Java (and hence unsupported by UnicodeSet).
*
* @author Alan Liu
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.4 $ $Date: 2000/01/11 04:03:54 $
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.5 $ $Date: 2000/01/13 23:53:23 $
*/
public class UnicodeSet {
/**
@ -352,10 +352,17 @@ public class UnicodeSet {
public void applyPattern(String pattern) {
ParsePosition pos = new ParsePosition(0);
pairs = parse(pattern, pos, null, null);
if (pos.getIndex() != pattern.length()) {
// Skip over trailing whitespace
int i = pos.getIndex();
int n = pattern.length();
while (i < n && Character.isWhitespace(pattern.charAt(i))) {
++i;
}
if (i != n) {
throw new IllegalArgumentException("Parse of \"" + pattern +
"\" failed at " +
pos.getIndex());
"\" failed at " + i);
}
}

View File

@ -196,9 +196,12 @@ import java.text.ParsePosition;
* <p>Copyright (c) IBM Corporation 1999-2000. All rights reserved.</p>
*
* @author Alan Liu
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.9 $ $Date: 2000/01/11 04:12:06 $
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.10 $ $Date: 2000/01/13 23:53:23 $
*
* $Log: RuleBasedTransliterator.java,v $
* Revision 1.10 2000/01/13 23:53:23 Alan
* Fix bugs found during ICU port
*
* Revision 1.9 2000/01/11 04:12:06 Alan
* Cleanup, embellish comments
*
@ -783,7 +786,7 @@ public class RuleBasedTransliterator extends Transliterator {
}
String name = rule.substring(pos, j);
pos = j+1;
buf.append(getVariableDef(name).charValue());
buf.append(getVariableDef(name));
}
break;
case CONTEXT_OPEN:
@ -813,7 +816,7 @@ public class RuleBasedTransliterator extends Transliterator {
case SET_OPEN:
ParsePosition pp = new ParsePosition(pos-1); // Backup to opening '['
buf.append(registerSet(new UnicodeSet(rule, pp,
data.variableNames, data.setVariables)).charValue());
data.variableNames, data.setVariables)));
pos = pp.getIndex();
break;
case VARIABLE_REF_CLOSE:
@ -907,7 +910,7 @@ public class RuleBasedTransliterator extends Transliterator {
/**
* Throw an exception indicating a syntax error. Search the rule string
* for the probably end of the rule. Of course, if the error is that
* for the probable end of the rule. Of course, if the error is that
* the end of rule marker is missing, then the rule end will not be found.
* In any case the rule start will be correctly reported.
* @param msg error description
@ -928,13 +931,13 @@ public class RuleBasedTransliterator extends Transliterator {
* register it in the setVariables hash, and return the substitution
* character.
*/
private final Character registerSet(UnicodeSet set) {
private final char registerSet(UnicodeSet set) {
if (variableNext >= variableLimit) {
throw new RuntimeException("Private use variables exhausted");
}
Character c = new Character(variableNext++);
data.setVariables.put(c, set);
return c;
return c.charValue();
}
/**
@ -942,13 +945,13 @@ public class RuleBasedTransliterator extends Transliterator {
* names are recognized.
* @exception IllegalArgumentException if the name is unknown.
*/
private Character getVariableDef(String name) {
private char getVariableDef(String name) {
Character ch = (Character) data.variableNames.get(name);
if (ch == null) {
throw new IllegalArgumentException("Undefined variable: "
+ name);
}
return ch;
return ch.charValue();
}
/**

View File

@ -21,9 +21,12 @@ import java.util.Dictionary;
* <p>Copyright &copy; IBM Corporation 1999. All rights reserved.
*
* @author Alan Liu
* @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.7 $ $Date: 2000/01/11 04:12:06 $
* @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.8 $ $Date: 2000/01/13 23:53:23 $
*
* $Log: TransliterationRule.java,v $
* Revision 1.8 2000/01/13 23:53:23 Alan
* Fix bugs found during ICU port
*
* Revision 1.7 2000/01/11 04:12:06 Alan
* Cleanup, embellish comments
*
@ -150,7 +153,7 @@ class TransliterationRule {
if (cursorPos < 0) {
this.cursorPos = output.length();
} else {
if (cursorPos > input.length()) {
if (cursorPos > output.length()) {
throw new IllegalArgumentException("Invalid cursor position");
}
this.cursorPos = cursorPos;
@ -255,9 +258,7 @@ class TransliterationRule {
/* LIMITATION of the current mask algorithm: Some rule
* maskings are currently not detected. For example,
* "{Lu}]a>x" masks "A]a>y". To detect these sorts of masking,
* we need a subset operator on UnicodeSet objects, which we
* currently do not have. This can be added later.
* "{Lu}]a>x" masks "A]a>y". This can be added later. TODO
*/
int left = anteContextLength;

View File

@ -230,7 +230,7 @@ import java.util.Dictionary;
* *Unsupported by Java (and hence unsupported by UnicodeSet).
*
* @author Alan Liu
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.4 $ $Date: 2000/01/11 04:03:54 $
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.5 $ $Date: 2000/01/13 23:53:23 $
*/
public class UnicodeSet {
/**
@ -352,10 +352,17 @@ public class UnicodeSet {
public void applyPattern(String pattern) {
ParsePosition pos = new ParsePosition(0);
pairs = parse(pattern, pos, null, null);
if (pos.getIndex() != pattern.length()) {
// Skip over trailing whitespace
int i = pos.getIndex();
int n = pattern.length();
while (i < n && Character.isWhitespace(pattern.charAt(i))) {
++i;
}
if (i != n) {
throw new IllegalArgumentException("Parse of \"" + pattern +
"\" failed at " +
pos.getIndex());
"\" failed at " + i);
}
}