Fix handling of Transliterator.Position fields

X-SVN-Rev: 1711
This commit is contained in:
Alan Liu 2000-06-29 21:59:23 +00:00
parent 1f32d4dd9f
commit 5ad646673b
6 changed files with 108 additions and 158 deletions

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/RuleBasedTransliterator.java,v $
* $Date: 2000/06/28 20:49:54 $
* $Revision: 1.35 $
* $Date: 2000/06/29 21:59:23 $
* $Revision: 1.36 $
*
*****************************************************************************************
*/
@ -252,14 +252,12 @@ import com.ibm.util.Utility;
* <p>Copyright (c) IBM Corporation 1999-2000. All rights reserved.</p>
*
* @author Alan Liu
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.35 $ $Date: 2000/06/28 20:49:54 $
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.36 $ $Date: 2000/06/29 21:59:23 $
*/
public class RuleBasedTransliterator extends Transliterator {
private Data data;
static final boolean DEBUG = false;
private static final String COPYRIGHT =
"\u00A9 IBM Corporation 1999. All rights reserved.";
@ -324,15 +322,6 @@ public class RuleBasedTransliterator extends Transliterator {
* exz|d no match, advance cursor
* exzd| done
*/
int start = index.contextStart;
int limit = index.limit;
int cursor = index.start;
if (DEBUG) {
System.out.print("\"" +
Utility.escape(rsubstring(text, start, cursor)) + '|' +
Utility.escape(rsubstring(text, cursor, limit)) + "\"");
}
/* A rule like
* a>b|a
@ -344,7 +333,7 @@ public class RuleBasedTransliterator extends Transliterator {
* uint32_t.
*/
int loopCount = 0;
int loopLimit = limit - cursor;
int loopLimit = index.limit - index.start;
if (loopLimit >= 0x08000000) {
loopLimit = 0x7FFFFFFF;
} else {
@ -354,12 +343,12 @@ public class RuleBasedTransliterator extends Transliterator {
boolean partial[] = new boolean[1];
partial[0] = false;
while (cursor < limit && loopCount <= loopLimit) {
while (index.start < index.limit && loopCount <= loopLimit) {
TransliterationRule r = incremental ?
data.ruleSet.findIncrementalMatch(text, index.contextStart, limit, cursor,
data.ruleSet.findIncrementalMatch(text, index,
data, partial, getFilter()) :
data.ruleSet.findMatch(text, index.contextStart, limit,
cursor, data, getFilter());
data.ruleSet.findMatch(text, index,
data, getFilter());
/* If we match a rule then apply it by replacing the key
* with the rule output and repositioning the cursor
* appropriately. If we get a partial match, then we
@ -372,46 +361,20 @@ public class RuleBasedTransliterator extends Transliterator {
if (partial[0]) {
break;
} else {
++cursor;
++index.start;
}
} else {
// Delegate replacement to TransliterationRule object
limit += r.replace(text, cursor, data);
// text.replace(cursor, cursor + r.getKeyLength(), r.getOutput());
// limit += r.getOutput().length() - r.getKeyLength();
cursor += r.getCursorPos();
int lenDelta = r.replace(text, index.start, data);
index.limit += lenDelta;
index.contextLimit += lenDelta;
index.start += r.getCursorPos();
++loopCount;
}
}
if (DEBUG) {
System.out.println(" -> \"" +
Utility.escape(rsubstring(text, start, cursor)) + '|' +
Utility.escape(rsubstring(text, cursor, cursor)) + '|' +
Utility.escape(rsubstring(text, cursor, limit)) + "\"");
}
index.contextLimit += limit - index.limit;
index.limit = limit;
index.start = cursor;
}
/**
* FOR DEBUGGING: Return a substring of a Replaceable.
*/
private static String rsubstring(Replaceable r, int start, int limit) {
StringBuffer buf = new StringBuffer();
while (start < limit) {
buf.append(r.charAt(start++));
}
return buf.toString();
}
static class Data {
public Data() {
variableNames = new Hashtable();
@ -1329,6 +1292,9 @@ public class RuleBasedTransliterator extends Transliterator {
/**
* $Log: RuleBasedTransliterator.java,v $
* Revision 1.36 2000/06/29 21:59:23 alan4j
* Fix handling of Transliterator.Position fields
*
* Revision 1.35 2000/06/28 20:49:54 alan4j
* Fix handling of Positions fields
*

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/TransliterationRule.java,v $
* $Date: 2000/05/18 21:37:19 $
* $Revision: 1.22 $
* $Date: 2000/06/29 21:59:23 $
* $Revision: 1.23 $
*
*****************************************************************************************
*/
@ -44,7 +44,7 @@ import com.ibm.util.Utility;
* <p>Copyright &copy; IBM Corporation 1999. All rights reserved.
*
* @author Alan Liu
* @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.22 $ $Date: 2000/05/18 21:37:19 $
* @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.23 $ $Date: 2000/06/29 21:59:23 $
*/
class TransliterationRule {
/**
@ -402,13 +402,14 @@ class TransliterationRule {
* altered by this transliterator. If <tt>filter</tt> is
* <tt>null</tt> then no filtering is applied.
*/
public final boolean matches(Replaceable text, int start, int limit,
int cursor, RuleBasedTransliterator.Data variables,
public final boolean matches(Replaceable text,
Transliterator.Position pos,
RuleBasedTransliterator.Data variables,
UnicodeFilter filter) {
// Match anteContext, key, and postContext
cursor -= anteContextLength;
if (cursor < start
|| (cursor + pattern.length()) > limit) {
int cursor = pos.start - anteContextLength;
if (cursor < pos.contextStart
|| (cursor + pattern.length()) > pos.contextLimit) {
return false;
}
for (int i=0; i<pattern.length(); ++i, ++cursor) {
@ -445,10 +446,11 @@ class TransliterationRule {
* @see #PARTIAL_MATCH
* @see #FULL_MATCH
*/
public int getMatchDegree(Replaceable text, int start, int limit,
int cursor, RuleBasedTransliterator.Data variables,
public int getMatchDegree(Replaceable text,
Transliterator.Position pos,
RuleBasedTransliterator.Data variables,
UnicodeFilter filter) {
int len = getRegionMatchLength(text, start, limit, cursor - anteContextLength,
int len = getRegionMatchLength(text, pos,
pattern, variables, filter);
return len < anteContextLength ? MISMATCH :
(len < pattern.length() ? PARTIAL_MATCH : FULL_MATCH);
@ -477,16 +479,17 @@ class TransliterationRule {
* match any characters, otherwise the number of characters of text that
* match this rule.
*/
protected static int getRegionMatchLength(Replaceable text, int start,
int limit, int cursor,
String template,
RuleBasedTransliterator.Data variables,
UnicodeFilter filter) {
if (cursor < start) {
protected int getRegionMatchLength(Replaceable text,
Transliterator.Position pos,
String template,
RuleBasedTransliterator.Data variables,
UnicodeFilter filter) {
int cursor = pos.start - anteContextLength;
if (cursor < pos.contextStart) {
return -1;
}
int i;
for (i=0; i<template.length() && cursor<limit; ++i, ++cursor) {
for (i=0; i<template.length() && cursor<pos.contextLimit; ++i, ++cursor) {
if (!charMatches(template.charAt(i), text.charAt(cursor),
variables, filter)) {
return -1;
@ -521,6 +524,9 @@ class TransliterationRule {
/**
* $Log: TransliterationRule.java,v $
* Revision 1.23 2000/06/29 21:59:23 alan4j
* Fix handling of Transliterator.Position fields
*
* Revision 1.22 2000/05/18 21:37:19 alan
* Update docs
*

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/TransliterationRuleSet.java,v $
* $Date: 2000/03/10 04:07:24 $
* $Revision: 1.9 $
* $Date: 2000/06/29 21:59:23 $
* $Revision: 1.10 $
*
*****************************************************************************************
*/
@ -27,9 +27,12 @@ import java.util.*;
* <p>Copyright &copy; IBM Corporation 1999. All rights reserved.
*
* @author Alan Liu
* @version $RCSfile: TransliterationRuleSet.java,v $ $Revision: 1.9 $ $Date: 2000/03/10 04:07:24 $
* @version $RCSfile: TransliterationRuleSet.java,v $ $Revision: 1.10 $ $Date: 2000/06/29 21:59:23 $
*
* $Log: TransliterationRuleSet.java,v $
* Revision 1.10 2000/06/29 21:59:23 alan4j
* Fix handling of Transliterator.Position fields
*
* Revision 1.9 2000/03/10 04:07:24 johnf
* Copyright update
*
@ -226,16 +229,16 @@ class TransliterationRuleSet {
* <tt>null</tt> then no filtering is applied.
* @return the matching rule, or null if none found.
*/
public TransliterationRule findMatch(Replaceable text, int start, int limit,
int cursor,
public TransliterationRule findMatch(Replaceable text,
Transliterator.Position pos,
RuleBasedTransliterator.Data variables,
UnicodeFilter filter) {
/* We only need to check our indexed bin of the rule table,
* based on the low byte of the first key character.
*/
int x = text.charAt(cursor) & 0xFF;
int x = text.charAt(pos.start) & 0xFF;
for (int i=index[x]; i<index[x+1]; ++i) {
if (rules[i].matches(text, start, limit, cursor, variables, filter)) {
if (rules[i].matches(text, pos, variables, filter)) {
return rules[i];
}
}
@ -269,8 +272,8 @@ class TransliterationRuleSet {
* @return the matching rule, or null if none found, or if the text buffer
* does not have enough text yet to unambiguously match a rule.
*/
public TransliterationRule findIncrementalMatch(Replaceable text, int start,
int limit, int cursor,
public TransliterationRule findIncrementalMatch(Replaceable text,
Transliterator.Position pos,
RuleBasedTransliterator.Data variables,
boolean partial[],
UnicodeFilter filter) {
@ -278,9 +281,9 @@ class TransliterationRuleSet {
* based on the low byte of the first key character.
*/
partial[0] = false;
int x = text.charAt(cursor) & 0xFF;
int x = text.charAt(pos.start) & 0xFF;
for (int i=index[x]; i<index[x+1]; ++i) {
int match = rules[i].getMatchDegree(text, start, limit, cursor,
int match = rules[i].getMatchDegree(text, pos,
variables, filter);
switch (match) {
case TransliterationRule.FULL_MATCH:

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/RuleBasedTransliterator.java,v $
* $Date: 2000/06/28 20:49:54 $
* $Revision: 1.35 $
* $Date: 2000/06/29 21:59:23 $
* $Revision: 1.36 $
*
*****************************************************************************************
*/
@ -252,14 +252,12 @@ import com.ibm.util.Utility;
* <p>Copyright (c) IBM Corporation 1999-2000. All rights reserved.</p>
*
* @author Alan Liu
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.35 $ $Date: 2000/06/28 20:49:54 $
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.36 $ $Date: 2000/06/29 21:59:23 $
*/
public class RuleBasedTransliterator extends Transliterator {
private Data data;
static final boolean DEBUG = false;
private static final String COPYRIGHT =
"\u00A9 IBM Corporation 1999. All rights reserved.";
@ -324,15 +322,6 @@ public class RuleBasedTransliterator extends Transliterator {
* exz|d no match, advance cursor
* exzd| done
*/
int start = index.contextStart;
int limit = index.limit;
int cursor = index.start;
if (DEBUG) {
System.out.print("\"" +
Utility.escape(rsubstring(text, start, cursor)) + '|' +
Utility.escape(rsubstring(text, cursor, limit)) + "\"");
}
/* A rule like
* a>b|a
@ -344,7 +333,7 @@ public class RuleBasedTransliterator extends Transliterator {
* uint32_t.
*/
int loopCount = 0;
int loopLimit = limit - cursor;
int loopLimit = index.limit - index.start;
if (loopLimit >= 0x08000000) {
loopLimit = 0x7FFFFFFF;
} else {
@ -354,12 +343,12 @@ public class RuleBasedTransliterator extends Transliterator {
boolean partial[] = new boolean[1];
partial[0] = false;
while (cursor < limit && loopCount <= loopLimit) {
while (index.start < index.limit && loopCount <= loopLimit) {
TransliterationRule r = incremental ?
data.ruleSet.findIncrementalMatch(text, index.contextStart, limit, cursor,
data.ruleSet.findIncrementalMatch(text, index,
data, partial, getFilter()) :
data.ruleSet.findMatch(text, index.contextStart, limit,
cursor, data, getFilter());
data.ruleSet.findMatch(text, index,
data, getFilter());
/* If we match a rule then apply it by replacing the key
* with the rule output and repositioning the cursor
* appropriately. If we get a partial match, then we
@ -372,46 +361,20 @@ public class RuleBasedTransliterator extends Transliterator {
if (partial[0]) {
break;
} else {
++cursor;
++index.start;
}
} else {
// Delegate replacement to TransliterationRule object
limit += r.replace(text, cursor, data);
// text.replace(cursor, cursor + r.getKeyLength(), r.getOutput());
// limit += r.getOutput().length() - r.getKeyLength();
cursor += r.getCursorPos();
int lenDelta = r.replace(text, index.start, data);
index.limit += lenDelta;
index.contextLimit += lenDelta;
index.start += r.getCursorPos();
++loopCount;
}
}
if (DEBUG) {
System.out.println(" -> \"" +
Utility.escape(rsubstring(text, start, cursor)) + '|' +
Utility.escape(rsubstring(text, cursor, cursor)) + '|' +
Utility.escape(rsubstring(text, cursor, limit)) + "\"");
}
index.contextLimit += limit - index.limit;
index.limit = limit;
index.start = cursor;
}
/**
* FOR DEBUGGING: Return a substring of a Replaceable.
*/
private static String rsubstring(Replaceable r, int start, int limit) {
StringBuffer buf = new StringBuffer();
while (start < limit) {
buf.append(r.charAt(start++));
}
return buf.toString();
}
static class Data {
public Data() {
variableNames = new Hashtable();
@ -1329,6 +1292,9 @@ public class RuleBasedTransliterator extends Transliterator {
/**
* $Log: RuleBasedTransliterator.java,v $
* Revision 1.36 2000/06/29 21:59:23 alan4j
* Fix handling of Transliterator.Position fields
*
* Revision 1.35 2000/06/28 20:49:54 alan4j
* Fix handling of Positions fields
*

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/TransliterationRule.java,v $
* $Date: 2000/05/18 21:37:19 $
* $Revision: 1.22 $
* $Date: 2000/06/29 21:59:23 $
* $Revision: 1.23 $
*
*****************************************************************************************
*/
@ -44,7 +44,7 @@ import com.ibm.util.Utility;
* <p>Copyright &copy; IBM Corporation 1999. All rights reserved.
*
* @author Alan Liu
* @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.22 $ $Date: 2000/05/18 21:37:19 $
* @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.23 $ $Date: 2000/06/29 21:59:23 $
*/
class TransliterationRule {
/**
@ -402,13 +402,14 @@ class TransliterationRule {
* altered by this transliterator. If <tt>filter</tt> is
* <tt>null</tt> then no filtering is applied.
*/
public final boolean matches(Replaceable text, int start, int limit,
int cursor, RuleBasedTransliterator.Data variables,
public final boolean matches(Replaceable text,
Transliterator.Position pos,
RuleBasedTransliterator.Data variables,
UnicodeFilter filter) {
// Match anteContext, key, and postContext
cursor -= anteContextLength;
if (cursor < start
|| (cursor + pattern.length()) > limit) {
int cursor = pos.start - anteContextLength;
if (cursor < pos.contextStart
|| (cursor + pattern.length()) > pos.contextLimit) {
return false;
}
for (int i=0; i<pattern.length(); ++i, ++cursor) {
@ -445,10 +446,11 @@ class TransliterationRule {
* @see #PARTIAL_MATCH
* @see #FULL_MATCH
*/
public int getMatchDegree(Replaceable text, int start, int limit,
int cursor, RuleBasedTransliterator.Data variables,
public int getMatchDegree(Replaceable text,
Transliterator.Position pos,
RuleBasedTransliterator.Data variables,
UnicodeFilter filter) {
int len = getRegionMatchLength(text, start, limit, cursor - anteContextLength,
int len = getRegionMatchLength(text, pos,
pattern, variables, filter);
return len < anteContextLength ? MISMATCH :
(len < pattern.length() ? PARTIAL_MATCH : FULL_MATCH);
@ -477,16 +479,17 @@ class TransliterationRule {
* match any characters, otherwise the number of characters of text that
* match this rule.
*/
protected static int getRegionMatchLength(Replaceable text, int start,
int limit, int cursor,
String template,
RuleBasedTransliterator.Data variables,
UnicodeFilter filter) {
if (cursor < start) {
protected int getRegionMatchLength(Replaceable text,
Transliterator.Position pos,
String template,
RuleBasedTransliterator.Data variables,
UnicodeFilter filter) {
int cursor = pos.start - anteContextLength;
if (cursor < pos.contextStart) {
return -1;
}
int i;
for (i=0; i<template.length() && cursor<limit; ++i, ++cursor) {
for (i=0; i<template.length() && cursor<pos.contextLimit; ++i, ++cursor) {
if (!charMatches(template.charAt(i), text.charAt(cursor),
variables, filter)) {
return -1;
@ -521,6 +524,9 @@ class TransliterationRule {
/**
* $Log: TransliterationRule.java,v $
* Revision 1.23 2000/06/29 21:59:23 alan4j
* Fix handling of Transliterator.Position fields
*
* Revision 1.22 2000/05/18 21:37:19 alan
* Update docs
*

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/TransliterationRuleSet.java,v $
* $Date: 2000/03/10 04:07:24 $
* $Revision: 1.9 $
* $Date: 2000/06/29 21:59:23 $
* $Revision: 1.10 $
*
*****************************************************************************************
*/
@ -27,9 +27,12 @@ import java.util.*;
* <p>Copyright &copy; IBM Corporation 1999. All rights reserved.
*
* @author Alan Liu
* @version $RCSfile: TransliterationRuleSet.java,v $ $Revision: 1.9 $ $Date: 2000/03/10 04:07:24 $
* @version $RCSfile: TransliterationRuleSet.java,v $ $Revision: 1.10 $ $Date: 2000/06/29 21:59:23 $
*
* $Log: TransliterationRuleSet.java,v $
* Revision 1.10 2000/06/29 21:59:23 alan4j
* Fix handling of Transliterator.Position fields
*
* Revision 1.9 2000/03/10 04:07:24 johnf
* Copyright update
*
@ -226,16 +229,16 @@ class TransliterationRuleSet {
* <tt>null</tt> then no filtering is applied.
* @return the matching rule, or null if none found.
*/
public TransliterationRule findMatch(Replaceable text, int start, int limit,
int cursor,
public TransliterationRule findMatch(Replaceable text,
Transliterator.Position pos,
RuleBasedTransliterator.Data variables,
UnicodeFilter filter) {
/* We only need to check our indexed bin of the rule table,
* based on the low byte of the first key character.
*/
int x = text.charAt(cursor) & 0xFF;
int x = text.charAt(pos.start) & 0xFF;
for (int i=index[x]; i<index[x+1]; ++i) {
if (rules[i].matches(text, start, limit, cursor, variables, filter)) {
if (rules[i].matches(text, pos, variables, filter)) {
return rules[i];
}
}
@ -269,8 +272,8 @@ class TransliterationRuleSet {
* @return the matching rule, or null if none found, or if the text buffer
* does not have enough text yet to unambiguously match a rule.
*/
public TransliterationRule findIncrementalMatch(Replaceable text, int start,
int limit, int cursor,
public TransliterationRule findIncrementalMatch(Replaceable text,
Transliterator.Position pos,
RuleBasedTransliterator.Data variables,
boolean partial[],
UnicodeFilter filter) {
@ -278,9 +281,9 @@ class TransliterationRuleSet {
* based on the low byte of the first key character.
*/
partial[0] = false;
int x = text.charAt(cursor) & 0xFF;
int x = text.charAt(pos.start) & 0xFF;
for (int i=index[x]; i<index[x+1]; ++i) {
int match = rules[i].getMatchDegree(text, start, limit, cursor,
int match = rules[i].getMatchDegree(text, pos,
variables, filter);
switch (match) {
case TransliterationRule.FULL_MATCH: