Fix handling of Transliterator.Position fields

X-SVN-Rev: 1711
2000-06-29 21:59:23 +00:00 · 2000-06-29 21:59:23 +00:00 · 5ad646673b
commit 5ad646673b
parent 1f32d4dd9f
6 changed files with 108 additions and 158 deletions
--- a/icu4j/src/com/ibm/icu/text/RuleBasedTransliterator.java
+++ b/icu4j/src/com/ibm/icu/text/RuleBasedTransliterator.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/RuleBasedTransliterator.java,v $ 
- * $Date: 2000/06/28 20:49:54 $ 
- * $Revision: 1.35 $
+ * $Date: 2000/06/29 21:59:23 $ 
+ * $Revision: 1.36 $
 *
 *****************************************************************************************
 */
@ -252,14 +252,12 @@ import com.ibm.util.Utility;
 * <p>Copyright (c) IBM Corporation 1999-2000. All rights reserved.</p>
 * 
 * @author Alan Liu
- * @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.35 $ $Date: 2000/06/28 20:49:54 $
+ * @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.36 $ $Date: 2000/06/29 21:59:23 $
 */
 public class RuleBasedTransliterator extends Transliterator {

    private Data data;

-    static final boolean DEBUG = false;
-
    private static final String COPYRIGHT =
        "\u00A9 IBM Corporation 1999. All rights reserved.";

@ -324,15 +322,6 @@ public class RuleBasedTransliterator extends Transliterator {
         * exz|d    no match, advance cursor
         * exzd|    done
         */
-        int start = index.contextStart;
-        int limit = index.limit;
-        int cursor = index.start;
-
-        if (DEBUG) {
-            System.out.print("\"" +
-                Utility.escape(rsubstring(text, start, cursor)) + '|' +
-                Utility.escape(rsubstring(text, cursor, limit)) + "\"");
-        }

        /* A rule like
         *   a>b|a
@ -344,7 +333,7 @@ public class RuleBasedTransliterator extends Transliterator {
         * uint32_t.
         */
        int loopCount = 0;
-        int loopLimit = limit - cursor;
+        int loopLimit = index.limit - index.start;
        if (loopLimit >= 0x08000000) {
            loopLimit = 0x7FFFFFFF;
        } else {
@ -354,12 +343,12 @@ public class RuleBasedTransliterator extends Transliterator {
        boolean partial[] = new boolean[1];
        partial[0] = false;

-        while (cursor < limit && loopCount <= loopLimit) {
+        while (index.start < index.limit && loopCount <= loopLimit) {
            TransliterationRule r = incremental ?
-                data.ruleSet.findIncrementalMatch(text, index.contextStart, limit, cursor,
+                data.ruleSet.findIncrementalMatch(text, index,
                                                  data, partial, getFilter()) :
-                data.ruleSet.findMatch(text, index.contextStart, limit,
-                                       cursor, data, getFilter());
+                data.ruleSet.findMatch(text, index,
+                                       data, getFilter());
            /* If we match a rule then apply it by replacing the key
             * with the rule output and repositioning the cursor
             * appropriately.  If we get a partial match, then we
@ -372,46 +361,20 @@ public class RuleBasedTransliterator extends Transliterator {
                if (partial[0]) {
                    break;
                } else {
-                    ++cursor;
+                    ++index.start;
                }
            } else {
                // Delegate replacement to TransliterationRule object
-                limit += r.replace(text, cursor, data);
-                // text.replace(cursor, cursor + r.getKeyLength(), r.getOutput());
-                // limit += r.getOutput().length() - r.getKeyLength();
-                cursor += r.getCursorPos();
+                int lenDelta = r.replace(text, index.start, data);
+                index.limit += lenDelta;
+                index.contextLimit += lenDelta;
+                index.start += r.getCursorPos();
                ++loopCount;
            }
        }
-
-        if (DEBUG) {
-            System.out.println(" -> \"" +
-                Utility.escape(rsubstring(text, start, cursor)) + '|' + 
-                Utility.escape(rsubstring(text, cursor, cursor)) + '|' + 
-                Utility.escape(rsubstring(text, cursor, limit)) + "\"");
-        }
-
-        index.contextLimit += limit - index.limit;
-        index.limit = limit;
-        index.start = cursor;
    }


-    /**
-     * FOR DEBUGGING: Return a substring of a Replaceable.
-     */
-    private static String rsubstring(Replaceable r, int start, int limit) {
-        StringBuffer buf = new StringBuffer();
-        while (start < limit) {
-            buf.append(r.charAt(start++));
-        }
-        return buf.toString();
-    }
-
-
-
-
-
    static class Data {
        public Data() {
            variableNames = new Hashtable();
@ -1329,6 +1292,9 @@ public class RuleBasedTransliterator extends Transliterator {

 /**
 * $Log: RuleBasedTransliterator.java,v $
+ * Revision 1.36  2000/06/29 21:59:23  alan4j
+ * Fix handling of Transliterator.Position fields
+ *
 * Revision 1.35  2000/06/28 20:49:54  alan4j
 * Fix handling of Positions fields
 *
--- a/icu4j/src/com/ibm/icu/text/TransliterationRule.java
+++ b/icu4j/src/com/ibm/icu/text/TransliterationRule.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/TransliterationRule.java,v $ 
- * $Date: 2000/05/18 21:37:19 $ 
- * $Revision: 1.22 $
+ * $Date: 2000/06/29 21:59:23 $ 
+ * $Revision: 1.23 $
 *
 *****************************************************************************************
 */
@ -44,7 +44,7 @@ import com.ibm.util.Utility;
 * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
 *
 * @author Alan Liu
- * @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.22 $ $Date: 2000/05/18 21:37:19 $
+ * @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.23 $ $Date: 2000/06/29 21:59:23 $
 */
 class TransliterationRule {
    /**
@ -402,13 +402,14 @@ class TransliterationRule {
     * altered by this transliterator.  If <tt>filter</tt> is
     * <tt>null</tt> then no filtering is applied.
     */
-    public final boolean matches(Replaceable text, int start, int limit,
-                                 int cursor, RuleBasedTransliterator.Data variables,
+    public final boolean matches(Replaceable text,
+                                 Transliterator.Position pos,
+                                 RuleBasedTransliterator.Data variables,
                                 UnicodeFilter filter) {
        // Match anteContext, key, and postContext
-        cursor -= anteContextLength;
-        if (cursor < start
-            || (cursor + pattern.length()) > limit) {
+        int cursor = pos.start - anteContextLength;
+        if (cursor < pos.contextStart
+            || (cursor + pattern.length()) > pos.contextLimit) {
            return false;
        }
        for (int i=0; i<pattern.length(); ++i, ++cursor) {
@ -445,10 +446,11 @@ class TransliterationRule {
     * @see #PARTIAL_MATCH
     * @see #FULL_MATCH
     */
-    public int getMatchDegree(Replaceable text, int start, int limit,
-                              int cursor, RuleBasedTransliterator.Data variables,
+    public int getMatchDegree(Replaceable text,
+                              Transliterator.Position pos,
+                              RuleBasedTransliterator.Data variables,
                              UnicodeFilter filter) {
-        int len = getRegionMatchLength(text, start, limit, cursor - anteContextLength,
+        int len = getRegionMatchLength(text, pos,
                                       pattern, variables, filter);
        return len < anteContextLength ? MISMATCH :
            (len < pattern.length() ? PARTIAL_MATCH : FULL_MATCH);
@ -477,16 +479,17 @@ class TransliterationRule {
     * match any characters, otherwise the number of characters of text that
     * match this rule.
     */
-    protected static int getRegionMatchLength(Replaceable text, int start,
-                                              int limit, int cursor,
-                                              String template,
-                                              RuleBasedTransliterator.Data variables,
-                                              UnicodeFilter filter) {
-        if (cursor < start) {
+    protected int getRegionMatchLength(Replaceable text,
+                                       Transliterator.Position pos,
+                                       String template,
+                                       RuleBasedTransliterator.Data variables,
+                                       UnicodeFilter filter) {
+        int cursor = pos.start - anteContextLength;
+        if (cursor < pos.contextStart) {
            return -1;
        }
        int i;
-        for (i=0; i<template.length() && cursor<limit; ++i, ++cursor) {
+        for (i=0; i<template.length() && cursor<pos.contextLimit; ++i, ++cursor) {
            if (!charMatches(template.charAt(i), text.charAt(cursor),
                             variables, filter)) {
                return -1;
@ -521,6 +524,9 @@ class TransliterationRule {

 /**
 * $Log: TransliterationRule.java,v $
+ * Revision 1.23  2000/06/29 21:59:23  alan4j
+ * Fix handling of Transliterator.Position fields
+ *
 * Revision 1.22  2000/05/18 21:37:19  alan
 * Update docs
 *
--- a/icu4j/src/com/ibm/icu/text/TransliterationRuleSet.java
+++ b/icu4j/src/com/ibm/icu/text/TransliterationRuleSet.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/TransliterationRuleSet.java,v $ 
- * $Date: 2000/03/10 04:07:24 $ 
- * $Revision: 1.9 $
+ * $Date: 2000/06/29 21:59:23 $ 
+ * $Revision: 1.10 $
 *
 *****************************************************************************************
 */
@ -27,9 +27,12 @@ import java.util.*;
 * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
 *
 * @author Alan Liu
- * @version $RCSfile: TransliterationRuleSet.java,v $ $Revision: 1.9 $ $Date: 2000/03/10 04:07:24 $
+ * @version $RCSfile: TransliterationRuleSet.java,v $ $Revision: 1.10 $ $Date: 2000/06/29 21:59:23 $
 *
 * $Log: TransliterationRuleSet.java,v $
+ * Revision 1.10  2000/06/29 21:59:23  alan4j
+ * Fix handling of Transliterator.Position fields
+ *
 * Revision 1.9  2000/03/10 04:07:24  johnf
 * Copyright update
 *
@ -226,16 +229,16 @@ class TransliterationRuleSet {
     * <tt>null</tt> then no filtering is applied.
     * @return the matching rule, or null if none found.
     */
-    public TransliterationRule findMatch(Replaceable text, int start, int limit,
-                                         int cursor,
+    public TransliterationRule findMatch(Replaceable text,
+                                         Transliterator.Position pos,
                                         RuleBasedTransliterator.Data variables,
                                         UnicodeFilter filter) {
        /* We only need to check our indexed bin of the rule table,
         * based on the low byte of the first key character.
         */
-        int x = text.charAt(cursor) & 0xFF;
+        int x = text.charAt(pos.start) & 0xFF;
        for (int i=index[x]; i<index[x+1]; ++i) {
-            if (rules[i].matches(text, start, limit, cursor, variables, filter)) {
+            if (rules[i].matches(text, pos, variables, filter)) {
                return rules[i];
            }
        }
@ -269,8 +272,8 @@ class TransliterationRuleSet {
     * @return the matching rule, or null if none found, or if the text buffer
     * does not have enough text yet to unambiguously match a rule.
     */
-    public TransliterationRule findIncrementalMatch(Replaceable text, int start,
-                                                    int limit, int cursor,
+    public TransliterationRule findIncrementalMatch(Replaceable text,
+                                                    Transliterator.Position pos,
                                                    RuleBasedTransliterator.Data variables,
                                                    boolean partial[],
                                                    UnicodeFilter filter) {
@ -278,9 +281,9 @@ class TransliterationRuleSet {
         * based on the low byte of the first key character.
         */
        partial[0] = false;
-        int x = text.charAt(cursor) & 0xFF;
+        int x = text.charAt(pos.start) & 0xFF;
        for (int i=index[x]; i<index[x+1]; ++i) {
-            int match = rules[i].getMatchDegree(text, start, limit, cursor,
+            int match = rules[i].getMatchDegree(text, pos,
                                                variables, filter);
            switch (match) {
            case TransliterationRule.FULL_MATCH:
--- a/icu4j/src/com/ibm/text/RuleBasedTransliterator.java
+++ b/icu4j/src/com/ibm/text/RuleBasedTransliterator.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/RuleBasedTransliterator.java,v $ 
- * $Date: 2000/06/28 20:49:54 $ 
- * $Revision: 1.35 $
+ * $Date: 2000/06/29 21:59:23 $ 
+ * $Revision: 1.36 $
 *
 *****************************************************************************************
 */
@ -252,14 +252,12 @@ import com.ibm.util.Utility;
 * <p>Copyright (c) IBM Corporation 1999-2000. All rights reserved.</p>
 * 
 * @author Alan Liu
- * @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.35 $ $Date: 2000/06/28 20:49:54 $
+ * @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.36 $ $Date: 2000/06/29 21:59:23 $
 */
 public class RuleBasedTransliterator extends Transliterator {

    private Data data;

-    static final boolean DEBUG = false;
-
    private static final String COPYRIGHT =
        "\u00A9 IBM Corporation 1999. All rights reserved.";

@ -324,15 +322,6 @@ public class RuleBasedTransliterator extends Transliterator {
         * exz|d    no match, advance cursor
         * exzd|    done
         */
-        int start = index.contextStart;
-        int limit = index.limit;
-        int cursor = index.start;
-
-        if (DEBUG) {
-            System.out.print("\"" +
-                Utility.escape(rsubstring(text, start, cursor)) + '|' +
-                Utility.escape(rsubstring(text, cursor, limit)) + "\"");
-        }

        /* A rule like
         *   a>b|a
@ -344,7 +333,7 @@ public class RuleBasedTransliterator extends Transliterator {
         * uint32_t.
         */
        int loopCount = 0;
-        int loopLimit = limit - cursor;
+        int loopLimit = index.limit - index.start;
        if (loopLimit >= 0x08000000) {
            loopLimit = 0x7FFFFFFF;
        } else {
@ -354,12 +343,12 @@ public class RuleBasedTransliterator extends Transliterator {
        boolean partial[] = new boolean[1];
        partial[0] = false;

-        while (cursor < limit && loopCount <= loopLimit) {
+        while (index.start < index.limit && loopCount <= loopLimit) {
            TransliterationRule r = incremental ?
-                data.ruleSet.findIncrementalMatch(text, index.contextStart, limit, cursor,
+                data.ruleSet.findIncrementalMatch(text, index,
                                                  data, partial, getFilter()) :
-                data.ruleSet.findMatch(text, index.contextStart, limit,
-                                       cursor, data, getFilter());
+                data.ruleSet.findMatch(text, index,
+                                       data, getFilter());
            /* If we match a rule then apply it by replacing the key
             * with the rule output and repositioning the cursor
             * appropriately.  If we get a partial match, then we
@ -372,46 +361,20 @@ public class RuleBasedTransliterator extends Transliterator {
                if (partial[0]) {
                    break;
                } else {
-                    ++cursor;
+                    ++index.start;
                }
            } else {
                // Delegate replacement to TransliterationRule object
-                limit += r.replace(text, cursor, data);
-                // text.replace(cursor, cursor + r.getKeyLength(), r.getOutput());
-                // limit += r.getOutput().length() - r.getKeyLength();
-                cursor += r.getCursorPos();
+                int lenDelta = r.replace(text, index.start, data);
+                index.limit += lenDelta;
+                index.contextLimit += lenDelta;
+                index.start += r.getCursorPos();
                ++loopCount;
            }
        }
-
-        if (DEBUG) {
-            System.out.println(" -> \"" +
-                Utility.escape(rsubstring(text, start, cursor)) + '|' + 
-                Utility.escape(rsubstring(text, cursor, cursor)) + '|' + 
-                Utility.escape(rsubstring(text, cursor, limit)) + "\"");
-        }
-
-        index.contextLimit += limit - index.limit;
-        index.limit = limit;
-        index.start = cursor;
    }


-    /**
-     * FOR DEBUGGING: Return a substring of a Replaceable.
-     */
-    private static String rsubstring(Replaceable r, int start, int limit) {
-        StringBuffer buf = new StringBuffer();
-        while (start < limit) {
-            buf.append(r.charAt(start++));
-        }
-        return buf.toString();
-    }
-
-
-
-
-
    static class Data {
        public Data() {
            variableNames = new Hashtable();
@ -1329,6 +1292,9 @@ public class RuleBasedTransliterator extends Transliterator {

 /**
 * $Log: RuleBasedTransliterator.java,v $
+ * Revision 1.36  2000/06/29 21:59:23  alan4j
+ * Fix handling of Transliterator.Position fields
+ *
 * Revision 1.35  2000/06/28 20:49:54  alan4j
 * Fix handling of Positions fields
 *
--- a/icu4j/src/com/ibm/text/TransliterationRule.java
+++ b/icu4j/src/com/ibm/text/TransliterationRule.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/TransliterationRule.java,v $ 
- * $Date: 2000/05/18 21:37:19 $ 
- * $Revision: 1.22 $
+ * $Date: 2000/06/29 21:59:23 $ 
+ * $Revision: 1.23 $
 *
 *****************************************************************************************
 */
@ -44,7 +44,7 @@ import com.ibm.util.Utility;
 * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
 *
 * @author Alan Liu
- * @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.22 $ $Date: 2000/05/18 21:37:19 $
+ * @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.23 $ $Date: 2000/06/29 21:59:23 $
 */
 class TransliterationRule {
    /**
@ -402,13 +402,14 @@ class TransliterationRule {
     * altered by this transliterator.  If <tt>filter</tt> is
     * <tt>null</tt> then no filtering is applied.
     */
-    public final boolean matches(Replaceable text, int start, int limit,
-                                 int cursor, RuleBasedTransliterator.Data variables,
+    public final boolean matches(Replaceable text,
+                                 Transliterator.Position pos,
+                                 RuleBasedTransliterator.Data variables,
                                 UnicodeFilter filter) {
        // Match anteContext, key, and postContext
-        cursor -= anteContextLength;
-        if (cursor < start
-            || (cursor + pattern.length()) > limit) {
+        int cursor = pos.start - anteContextLength;
+        if (cursor < pos.contextStart
+            || (cursor + pattern.length()) > pos.contextLimit) {
            return false;
        }
        for (int i=0; i<pattern.length(); ++i, ++cursor) {
@ -445,10 +446,11 @@ class TransliterationRule {
     * @see #PARTIAL_MATCH
     * @see #FULL_MATCH
     */
-    public int getMatchDegree(Replaceable text, int start, int limit,
-                              int cursor, RuleBasedTransliterator.Data variables,
+    public int getMatchDegree(Replaceable text,
+                              Transliterator.Position pos,
+                              RuleBasedTransliterator.Data variables,
                              UnicodeFilter filter) {
-        int len = getRegionMatchLength(text, start, limit, cursor - anteContextLength,
+        int len = getRegionMatchLength(text, pos,
                                       pattern, variables, filter);
        return len < anteContextLength ? MISMATCH :
            (len < pattern.length() ? PARTIAL_MATCH : FULL_MATCH);
@ -477,16 +479,17 @@ class TransliterationRule {
     * match any characters, otherwise the number of characters of text that
     * match this rule.
     */
-    protected static int getRegionMatchLength(Replaceable text, int start,
-                                              int limit, int cursor,
-                                              String template,
-                                              RuleBasedTransliterator.Data variables,
-                                              UnicodeFilter filter) {
-        if (cursor < start) {
+    protected int getRegionMatchLength(Replaceable text,
+                                       Transliterator.Position pos,
+                                       String template,
+                                       RuleBasedTransliterator.Data variables,
+                                       UnicodeFilter filter) {
+        int cursor = pos.start - anteContextLength;
+        if (cursor < pos.contextStart) {
            return -1;
        }
        int i;
-        for (i=0; i<template.length() && cursor<limit; ++i, ++cursor) {
+        for (i=0; i<template.length() && cursor<pos.contextLimit; ++i, ++cursor) {
            if (!charMatches(template.charAt(i), text.charAt(cursor),
                             variables, filter)) {
                return -1;
@ -521,6 +524,9 @@ class TransliterationRule {

 /**
 * $Log: TransliterationRule.java,v $
+ * Revision 1.23  2000/06/29 21:59:23  alan4j
+ * Fix handling of Transliterator.Position fields
+ *
 * Revision 1.22  2000/05/18 21:37:19  alan
 * Update docs
 *
--- a/icu4j/src/com/ibm/text/TransliterationRuleSet.java
+++ b/icu4j/src/com/ibm/text/TransliterationRuleSet.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/TransliterationRuleSet.java,v $ 
- * $Date: 2000/03/10 04:07:24 $ 
- * $Revision: 1.9 $
+ * $Date: 2000/06/29 21:59:23 $ 
+ * $Revision: 1.10 $
 *
 *****************************************************************************************
 */
@ -27,9 +27,12 @@ import java.util.*;
 * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
 *
 * @author Alan Liu
- * @version $RCSfile: TransliterationRuleSet.java,v $ $Revision: 1.9 $ $Date: 2000/03/10 04:07:24 $
+ * @version $RCSfile: TransliterationRuleSet.java,v $ $Revision: 1.10 $ $Date: 2000/06/29 21:59:23 $
 *
 * $Log: TransliterationRuleSet.java,v $
+ * Revision 1.10  2000/06/29 21:59:23  alan4j
+ * Fix handling of Transliterator.Position fields
+ *
 * Revision 1.9  2000/03/10 04:07:24  johnf
 * Copyright update
 *
@ -226,16 +229,16 @@ class TransliterationRuleSet {
     * <tt>null</tt> then no filtering is applied.
     * @return the matching rule, or null if none found.
     */
-    public TransliterationRule findMatch(Replaceable text, int start, int limit,
-                                         int cursor,
+    public TransliterationRule findMatch(Replaceable text,
+                                         Transliterator.Position pos,
                                         RuleBasedTransliterator.Data variables,
                                         UnicodeFilter filter) {
        /* We only need to check our indexed bin of the rule table,
         * based on the low byte of the first key character.
         */
-        int x = text.charAt(cursor) & 0xFF;
+        int x = text.charAt(pos.start) & 0xFF;
        for (int i=index[x]; i<index[x+1]; ++i) {
-            if (rules[i].matches(text, start, limit, cursor, variables, filter)) {
+            if (rules[i].matches(text, pos, variables, filter)) {
                return rules[i];
            }
        }
@ -269,8 +272,8 @@ class TransliterationRuleSet {
     * @return the matching rule, or null if none found, or if the text buffer
     * does not have enough text yet to unambiguously match a rule.
     */
-    public TransliterationRule findIncrementalMatch(Replaceable text, int start,
-                                                    int limit, int cursor,
+    public TransliterationRule findIncrementalMatch(Replaceable text,
+                                                    Transliterator.Position pos,
                                                    RuleBasedTransliterator.Data variables,
                                                    boolean partial[],
                                                    UnicodeFilter filter) {
@ -278,9 +281,9 @@ class TransliterationRuleSet {
         * based on the low byte of the first key character.
         */
        partial[0] = false;
-        int x = text.charAt(cursor) & 0xFF;
+        int x = text.charAt(pos.start) & 0xFF;
        for (int i=index[x]; i<index[x+1]; ++i) {
-            int match = rules[i].getMatchDegree(text, start, limit, cursor,
+            int match = rules[i].getMatchDegree(text, pos,
                                                variables, filter);
            switch (match) {
            case TransliterationRule.FULL_MATCH: