ICU-1560 implement smart rollback

X-SVN-Rev: 7155
2001-11-29 01:01:31 +00:00 · 2001-11-29 01:01:31 +00:00 · adeb7ac274
commit adeb7ac274
parent 8eff706d84
7 changed files with 700 additions and 418 deletions
--- a/icu4c/source/i18n/translit.cpp
+++ b/icu4c/source/i18n/translit.cpp
@ -178,7 +178,7 @@ int32_t Transliterator::transliterate(Replaceable& text,
    offsets.contextLimit = limit;
    offsets.start = start;
    offsets.limit = limit;
-    filteredTransliterate(text, offsets, FALSE);
+    filteredTransliterate(text, offsets, FALSE, TRUE);
    return offsets.limit;
 }

@ -317,7 +317,7 @@ void Transliterator::finishTransliteration(Replaceable& text,
        return;
    }

-    filteredTransliterate(text, index, FALSE);
+    filteredTransliterate(text, index, FALSE, TRUE);
 }

 /**
@ -356,7 +356,7 @@ void Transliterator::_transliterate(Replaceable& text,
        return;
    }

-    filteredTransliterate(text, index, TRUE);
+    filteredTransliterate(text, index, TRUE, TRUE);

 #if 0
    // TODO
@ -393,18 +393,6 @@ void Transliterator::_transliterate(Replaceable& text,
 #endif
 }

-/**
- * Rollback makes global filters and compound transliterators very
- * bulletproof, but it also makes some transliterators completely
- * non-incremental -- that is, for some transliterators, rollback
- * is always triggered, until finishTransliteration() is called.
- * Since this eliminates most of the usefulness of incremental
- * mode, rollback should usually be disabled.
- *
- * This is used by Transliterator and CompoundTransliterator.
- */
-// #define TRANSLIT_ROLLBACK
-
 /**
 * This method breaks up the input text into runs of unfiltered
 * characters.  It passes each such run to
@ -415,13 +403,33 @@ void Transliterator::_transliterate(Replaceable& text,
 */
 void Transliterator::filteredTransliterate(Replaceable& text,
                                           UTransPosition& index,
-                                           UBool incremental) const {
-    if (filter == 0) {
-        // Short circuit path for transliterators with no filter
+                                           UBool incremental,
+                                           UBool rollback) const {
+    // Short circuit path for transliterators with no filter in
+    // non-incremental mode.
+    if (filter == 0 && !rollback) {
        handleTransliterate(text, index, incremental);
        return;
    }

+    //----------------------------------------------------------------------
+    // This method processes text in two groupings:
+    //
+    // RUNS -- A run is a contiguous group of characters which are contained
+    // in the filter for this transliterator (filter.contains(ch) == true).
+    // Text outside of runs may appear as context but it is not modified.
+    // The start and limit Position values are narrowed to each run.
+    //
+    // PASSES (incremental only) -- To make incremental mode work correctly,
+    // each run is broken up into n passes, where n is the length (in code
+    // points) of the run.  Each pass contains the first n characters.  If a
+    // pass is completely transliterated, it is committed, and further passes
+    // include characters after the committed text.  If a pass is blocked,
+    // and does not transliterate completely, then this method rolls back
+    // the changes made during the pass, extends the pass by one code point,
+    // and tries again.
+    //----------------------------------------------------------------------
+    
    // globalLimit is the limit value for the entire operation.  We
    // set index.limit to the end of each unfiltered run before
    // calling handleTransliterate(), so we need to maintain the real
@ -429,33 +437,36 @@ void Transliterator::filteredTransliterate(Replaceable& text,
    // update globalLimit for insertions or deletions that have
    // happened.
    int32_t globalLimit = index.limit;
-
-    // Break the input text up.  Say the input text has the form:
+    
+    // If there is a non-null filter, then break the input text up.  Say the
+    // input text has the form:
    //   xxxabcxxdefxx
-    // where 'x' represents a filtered character.  Then we break this
-    // up into:
+    // where 'x' represents a filtered character (filter.contains('x') ==
+    // false).  Then we break this up into:
    //   xxxabc xxdef xx
    // Each pass through the loop consumes a run of filtered
    // characters (which are ignored) and a subsequent run of
-    // unfiltered characters (which are transliterated).  If, at any
-    // point, we fail to consume our entire segment, we stop.
+    // unfiltered characters (which are transliterated).
+    
    for (;;) {
-        // Narrow the range to be transliterated to the first segment
-        // of unfiltered characters at or after index.start.

-        UChar32 c;
+        if (filter != NULL) {
+            // Narrow the range to be transliterated to the first segment
+            // of unfiltered characters at or after index.start.

-        // Advance compoundStart past filtered chars
-        while (index.start < globalLimit &&
-               !filter->contains(c=text.char32At(index.start))) {
-            index.start += UTF_CHAR_LENGTH(c);
-        }
+            // Advance past filtered chars
+            UChar32 c;
+            while (index.start < globalLimit &&
+                   !filter->contains(c=text.char32At(index.start))) {
+                index.start += UTF_CHAR_LENGTH(c);
+            }

-        // Find the end of this run of unfiltered chars
-        index.limit = index.start;
-        while (index.limit < globalLimit &&
-               filter->contains(c=text.char32At(index.limit))) {
-            index.limit += UTF_CHAR_LENGTH(c);
+            // Find the end of this run of unfiltered chars
+            index.limit = index.start;
+            while (index.limit < globalLimit &&
+                   filter->contains(c=text.char32At(index.limit))) {
+                index.limit += UTF_CHAR_LENGTH(c);
+            }
        }

        // Check to see if the unfiltered run is empty.  This only
@ -466,15 +477,15 @@ void Transliterator::filteredTransliterate(Replaceable& text,
            break;
        }

-        int32_t limit = index.limit;
-
-        // Is this segment incremental?  If there is additional
+        // Is this run incremental?  If there is additional
        // filtered text (if limit < globalLimit) then we pass in
        // an incremental value of FALSE to force the subclass to
-        // complete the transliteration for this segment.
-        UBool isIncrementalSegment =
-            (limit < globalLimit ? FALSE : incremental);
+        // complete the transliteration for this run.
+        UBool isIncrementalRun =
+            (index.limit < globalLimit ? FALSE : incremental);
        
+        int32_t delta;
+
        // Implement rollback.  To understand the need for rollback,
        // consider the following transliterator:
        //
@ -495,88 +506,147 @@ void Transliterator::filteredTransliterate(Replaceable& text,
        // transformation in incremental mode into characters outside its
        // filter.
        //
-        // There are two solutions.  The first is to add two new index
-        // values to the position structure, a filteredStart and a
-        // filteredLimit.  Then filteredTransliterate() can set and read
-        // these, and avoid filtering partially transliterated results.  A
-        // variant of this solution is to retain an internal state object
-        // with the filtered range that is indexed by the text pointer and
-        // the position object pointer, in analogy to strtok().  The third
-        // solution involves no change to the API and no internal state
-        // cache.  It is to roll back any partially transliterated results
-        // if (a) there is a filter, and (b) the transliteration is
-        // incremental.  This is the solution implemented here.
-        int32_t rollbackStart = 0;
-        int32_t rollbackCopy = 0;
-#ifdef TRANSLIT_ROLLBACK
-        if (isIncrementalSegment) {
+        // To handle this, when in incremental mode we supply characters to
+        // handleTransliterate() in several passes.  Each pass adds one more
+        // input character to the input text.  That is, for input "ABCD", we
+        // first try "A", then "AB", then "ABC", and finally "ABCD".  If at
+        // any point we block (upon return, start < limit) then we roll
+        // back.  If at any point we complete the run (upon return start ==
+        // limit) then we commit that run.
+
+        if (rollback && isIncrementalRun) {
+
+            int32_t runStart = index.start;
+            int32_t runLimit = index.limit;
+            int32_t runLength =  runLimit - runStart;
+
            // Make a rollback copy at the end of the string
-            rollbackStart = index.start;
-            rollbackCopy = text.length();
-            text.copy(rollbackStart, limit, rollbackCopy);
-        }
-#endif
-        
-        // Delegate to subclass for actual transliteration.
-        handleTransliterate(text, index, isIncrementalSegment);
-        
-        int32_t delta = index.limit - limit; // change in length
-            
-        // Adjust overall limit for insertions/deletions.  Don't need
-        // to worry about contextLimit because handleTransliterate()
-        // maintains that.
-        globalLimit += delta;
+            int32_t rollbackOrigin = text.length();
+            text.copy(runStart, runLimit, rollbackOrigin);

-#ifdef TRANSLIT_ROLLBACK
-        // If we failed to complete transliterate this segment,
-        // then we are done.  If rollback is required, then do so.
-        if (index.start != index.limit) {
-            if (isIncrementalSegment) {
-                // Replace [rollbackStart, limit) -- this is the
-                // original filtered segment -- with
-                // [rollbackCopy, text.length()), the rollback
-                // copy, then delete the rollback copy.
-                rollbackCopy += delta;
-                int32_t rollbackLen = text.length() - rollbackCopy;
+            // Variables reflecting the commitment of completely
+            // transliterated text.  passStart is the runStart, advanced
+            // past committed text.  rollbackStart is the rollbackOrigin,
+            // advanced past rollback text that corresponds to committed
+            // text.
+            int32_t passStart = runStart;
+            int32_t rollbackStart = rollbackOrigin;

-                // Delete the partially transliterated segment
-                rollbackCopy -= index.limit - rollbackStart;
-				text.handleReplaceBetween(rollbackStart, index.limit, EMPTY);
+            // The limit for each pass; we advance by one code point with
+            // each iteration.
+            int32_t passLimit = index.start;

-                // Copy the rollback copy back
-                text.copy(rollbackCopy, text.length(), rollbackStart);
-                
-                // Delete the rollback copy
-                rollbackCopy += rollbackLen;
-                text.handleReplaceBetween(rollbackCopy, text.length(), EMPTY);
-                
-                // Restore indices
-                index.start = rollbackStart;
-                index.limit = limit;
-                index.contextLimit -= delta;
-                globalLimit -= delta;
+            // Total length, in 16-bit code units, of uncommitted text.
+            // This is the length to be rolled back.
+            int32_t uncommittedLength = 0;
+
+            // Total delta (change in length) for all passes
+            int32_t totalDelta = 0;
+
+            // PASS MAIN LOOP -- Start with a single character, and extend
+            // the text by one character at a time.  Roll back partial
+            // transliterations and commit complete transliterations.
+            for (;;) {
+                // Length of additional code point, either one or two
+                int32_t charLength =
+                    UTF_CHAR_LENGTH(text.char32At(passLimit));
+                passLimit += charLength;
+                if (passLimit > runLimit) {
+                    break;
+                }
+                uncommittedLength += charLength;
+
+                index.limit = passLimit;
+
+                // Delegate to subclass for actual transliteration.  Upon
+                // return, start will be updated to point after the
+                // transliterated text, and limit and contextLimit will be
+                // adjusted for length changes.
+                handleTransliterate(text, index, true);
+
+                delta = index.limit - passLimit; // change in length
+
+                // We failed to completely transliterate this pass.
+                // Roll back the text.  Indices remain unchanged; reset
+                // them where necessary.
+                if (index.start != index.limit) {
+                    // Find the rollbackStart, adjusted for length changes
+                    // and the deletion of partially transliterated text.
+                    int32_t rs = rollbackStart + delta - (index.limit - passStart);
+
+                    // Delete the partially transliterated text
+                    text.handleReplaceBetween(passStart, index.limit, EMPTY);
+
+                    // Copy the rollback text back
+                    text.copy(rs, rs + uncommittedLength, passStart);
+
+                    // Restore indices to their original values
+                    index.start = passStart;
+                    index.limit = passLimit;
+                    index.contextLimit -= delta;
+                }
+
+                // We did completely transliterate this pass.  Update the
+                // commit indices to record how far we got.  Adjust indices
+                // for length change.
+                else {
+                    // Move the pass indices past the committed text.
+                    passStart = passLimit = index.start;
+
+                    // Adjust the rollbackStart for length changes and move
+                    // it past the committed text.  All characters we've
+                    // processed to this point are committed now, so zero
+                    // out the uncommittedLength.
+                    rollbackStart += delta + uncommittedLength;
+                    uncommittedLength = 0;
+
+                    // Adjust indices for length changes.
+                    runLimit += delta;
+                    totalDelta += delta;
+                }
            }
-            break;
-        } else if (isIncrementalSegment) {
-            // We finished this segment; delete the rollback copy
-            rollbackCopy += delta;
-            text.handleReplaceBetween(rollbackCopy, text.length(), EMPTY);
+
+            // Adjust overall limit and rollbackOrigin for insertions and
+            // deletions.  Don't need to worry about contextLimit because
+            // handleTransliterate() maintains that.
+            rollbackOrigin += totalDelta;
+            globalLimit += totalDelta;
+
+            // Delete the rollback copy
+            text.handleReplaceBetween(rollbackOrigin, rollbackOrigin + runLength, EMPTY);
        }
-#else
-        // If we failed to complete transliterate this segment,
+
+        else {
+            // Delegate to subclass for actual transliteration.
+            int32_t limit = index.limit;
+            handleTransliterate(text, index, isIncrementalRun);
+            delta = index.limit - limit; // change in length
+
+            // Adjust overall limit for insertions/deletions.  Don't need
+            // to worry about contextLimit because handleTransliterate()
+            // maintains that.
+            globalLimit += delta;
+        }
+
+        // If we failed to complete transliterate this run,
        // then we are done.
        if (index.start != index.limit) {
            break;
        }
-#endif
-        
+
        // If we did completely transliterate this
-        // segment, then repeat with the next unfiltered segment.
+        // run, then repeat with the next unfiltered run.
    }

    // Start is valid where it is.  Limit needs to be put back where
    // it was, modulo adjustments for deletions/insertions.
-    index.limit = globalLimit;    
+    index.limit = globalLimit;
+}
+
+void Transliterator::filteredTransliterate(Replaceable& text,
+                                           UTransPosition& index,
+                                           UBool incremental) const {
+    filteredTransliterate(text, index, incremental, FALSE);
 }

 /**
--- a/icu4c/source/i18n/unicode/translit.h
+++ b/icu4c/source/i18n/unicode/translit.h
@ -573,22 +573,54 @@ protected:
                                     UBool incremental) const = 0;

    /**
-     * Core transliteration method called by all other methods in
-     * Tranliterator.  This method splits up the input text into
-     * segments of unfiltered text and passes those to
-     * handleTransliterate().  For most subclasses this is convenient
-     * and efficient.  Subclasses that can more efficiently handle the
-     * filter logic on their own (rare) can override
-     * filteredTransliterate().  Such subclasses must still implement
-     * handleTransliterate() but they can do so with an empty body,
-     * since filteredTransliterate() is the only method that calls
-     * handleTransliterate().
+     * Transliterate a substring of text, as specified by index, taking filters
+     * into account.  This method is for subclasses that need to delegate to
+     * another transliterator, such as CompoundTransliterator.
+     * @param text the text to be transliterated
+     * @param index the position indices
+     * @param incremental if TRUE, then assume more characters may be inserted
+     * at index.limit, and postpone processing to accomodate future incoming
+     * characters
     */
    virtual void filteredTransliterate(Replaceable& text,
                                       UTransPosition& index,
                                       UBool incremental) const;

-    friend class CompoundTransliterator; // for filteredTransliterate
+    friend class CompoundTransliterator; // for filteredTransliterate()
+
+private:
+
+    /**
+     * Top-level transliteration method, handling filtering, incremental and
+     * non-incremental transliteration, and rollback.  All transliteration
+     * public API methods eventually call this method with a rollback argument
+     * of TRUE.  Other entities may call this method but rollback should be
+     * FALSE.
+     * 
+     * <p>If this transliterator has a filter, break up the input text into runs
+     * of unfiltered characters.  Pass each run to
+     * <subclass>.handleTransliterate().
+     *
+     * <p>In incremental mode, if rollback is TRUE, perform a special
+     * incremental procedure in which several passes are made over the input
+     * text, adding one character at a time, and committing successful
+     * transliterations as they occur.  Unsuccessful transliterations are rolled
+     * back and retried with additional characters to give correct results.
+     *
+     * @param text the text to be transliterated
+     * @param index the position indices
+     * @param incremental if TRUE, then assume more characters may be inserted
+     * at index.limit, and postpone processing to accomodate future incoming
+     * characters
+     * @param rollback if TRUE and if incremental is TRUE, then perform special
+     * incremental processing, as described above, and undo partial
+     * transliterations where necessary.  If incremental is FALSE then this
+     * parameter is ignored.
+     */
+    virtual void filteredTransliterate(Replaceable& text,
+                                       UTransPosition& index,
+                                       UBool incremental,
+                                       UBool rollback) const;

 public:

--- a/icu4c/source/test/intltest/transtst.cpp
+++ b/icu4c/source/test/intltest/transtst.cpp
@ -394,12 +394,12 @@ void TransliteratorTest::TestKeyboard2(void) {
        // insertion, buffer
        "a", "A",
        "p", "Ap",
-        "s", "Ay",
-        "c", "Ayc",
+        "s", "Aps", // modified for rollback - "Ay",
+        "c", "Apsc", // modified for rollback - "Ayc",
        "a", "AycA",
        "p", "AycAp",
-        "s", "AycAy",
-        "c", "AycAyc",
+        "s", "AycAps", // modified for rollback - "AycAy",
+        "c", "AycApsc", // modified for rollback - "AycAyc",
        "h", "AycAY",
        0, "AycAY", // null means finishKeyboardTransliteration
    };
@ -423,9 +423,9 @@ void TransliteratorTest::TestKeyboard3(void) {
        //           keyboard xliteration.
        "a", "a",
        "b", "ab",
-        "t", "aby",
+        "t", "abt", // modified for rollback - "aby",
        "c", "abyc",
-        "t", "abycy",
+        "t", "abyct", // modified for rollback - "abycy",
        "h", "abycz",
        0, "abycz", // null means finishKeyboardTransliteration
    };
@ -2110,7 +2110,7 @@ void TransliteratorTest::TestNewEngine() {

    delete t;

-#if 0
+#if 1
    // This test will only work if Transliterator.ROLLBACK is
    // true.  Otherwise, this test will fail, revealing a
    // limitation of global filters in incremental mode.
--- a/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java
+++ b/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java,v $
- * $Date: 2001/11/28 17:40:40 $
- * $Revision: 1.82 $
+ * $Date: 2001/11/29 01:00:29 $
+ * $Revision: 1.83 $
 *
 *****************************************************************************************
 */
@ -267,12 +267,12 @@ public class TransliteratorTest extends TestFmwk {
            // insertion, buffer
            "a", "A",
            "p", "Ap",
-            "s", "Ay",
-            "c", "Ayc",
+            "s", "Aps", // modified for rollback - "Ay",
+            "c", "Apsc", // modified for rollback - "Ayc",
            "a", "AycA",
            "p", "AycAp",
-            "s", "AycAy",
-            "c", "AycAyc",
+            "s", "AycAps", // modified for rollback - "AycAy",
+            "c", "AycApsc", // modified for rollback - "AycAyc",
            "h", "AycAY",
            null, "AycAY", // null means finishKeyboardTransliteration
        };
@ -298,9 +298,9 @@ public class TransliteratorTest extends TestFmwk {
            //           keyboard xliteration.
            "a", "a",
            "b", "ab",
-            "t", "aby",
+            "t", "abt", // modified for rollback - "aby",
            "c", "abyc",
-            "t", "abycy",
+            "t", "abyct", // modified for rollback - "abycy",
            "h", "abycz",
            null, "abycz", // null means finishKeyboardTransliteration
        };
@ -323,15 +323,8 @@ public class TransliteratorTest extends TestFmwk {
                log = new StringBuffer(s.toString() + " => ");
                t.finishTransliteration(s, index);
            }
-            String str = s.toString();
-            // Show the start index '{' and the cursor '|'
-            log.append(str.substring(0, index.contextStart)).
-                append('{').
-                append(str.substring(index.contextStart,
-                                     index.start)).
-                append('|').
-                append(str.substring(index.start));
-            if (str.equals(DATA[i+1])) {
+            formatInput(log, s, index);
+            if (s.toString().equals(DATA[i+1])) {
                logln(log.toString());
            } else {
                errln("FAIL: " + log.toString() + ", expected " + DATA[i+1]);
@ -1547,7 +1540,7 @@ public class TransliteratorTest extends TestFmwk {
        // Katakana should be untouched
        expect(t, "a\u3042\u30A2", "\u3042\u3042\u30A2");

-        if (false) {
+        if (true) {
            // This test will only work if Transliterator.ROLLBACK is
            // true.  Otherwise, this test will fail, revealing a
            // limitation of global filters in incremental mode.
--- a/icu4j/src/com/ibm/icu/text/Transliterator.java
+++ b/icu4j/src/com/ibm/icu/text/Transliterator.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/Transliterator.java,v $
- * $Date: 2001/11/21 22:21:45 $
- * $Revision: 1.65 $
+ * $Date: 2001/11/29 01:00:10 $
+ * $Revision: 1.66 $
 *
 *****************************************************************************************
 */
@ -242,7 +242,7 @@ import com.ibm.util.Utility;
 * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
 *
 * @author Alan Liu
- * @version $RCSfile: Transliterator.java,v $ $Revision: 1.65 $ $Date: 2001/11/21 22:21:45 $
+ * @version $RCSfile: Transliterator.java,v $ $Revision: 1.66 $ $Date: 2001/11/29 01:00:10 $
 */
 public abstract class Transliterator {
    /**
@ -464,7 +464,7 @@ public abstract class Transliterator {
        }

        Position pos = new Position(start, limit, start);
-        filteredTransliterate(text, pos, false);
+        filteredTransliterate(text, pos, false, true);
        return pos.limit;
    }

@ -555,7 +555,7 @@ public abstract class Transliterator {
            return;
        }

-        filteredTransliterate(text, index, true);
+        filteredTransliterate(text, index, true, true);

 // TODO
 // This doesn't work once we add quantifier support.  Need to rewrite
@ -613,7 +613,7 @@ public abstract class Transliterator {
    public final void finishTransliteration(Replaceable text,
                                            Position index) {
        index.validate(text.length());
-        filteredTransliterate(text, index, false);
+        filteredTransliterate(text, index, false, true);
    }

    /**
@ -657,34 +657,61 @@ public abstract class Transliterator {
                                                Position pos, boolean incremental);

    /**
-     * Rollback makes global filters and compound transliterators very
-     * bulletproof, but it also makes some transliterators completely
-     * non-incremental -- that is, for some transliterators, rollback
-     * is always triggered, until finishTransliteration() is called.
-     * Since this eliminates most of the usefulness of incremental
-     * mode, rollback should usually be disabled.
+     * Top-level transliteration method, handling filtering, incremental and
+     * non-incremental transliteration, and rollback.  All transliteration
+     * public API methods eventually call this method with a rollback argument
+     * of TRUE.  Other entities may call this method but rollback should be
+     * FALSE.
+     * 
+     * <p>If this transliterator has a filter, break up the input text into runs
+     * of unfiltered characters.  Pass each run to
+     * <subclass>.handleTransliterate().
     *
-     * This is used by Transliterator and CompoundTransliterator.
-     */
-    static final boolean ROLLBACK = false;
-
-    /**
-     * This method breaks up the input text into runs of unfiltered
-     * characters.  It passes each such run to
-     * <subclass>.handleTransliterate().  Subclasses that can handle the
-     * filter logic more efficiently themselves may override this method.
+     * <p>In incremental mode, if rollback is TRUE, perform a special
+     * incremental procedure in which several passes are made over the input
+     * text, adding one character at a time, and committing successful
+     * transliterations as they occur.  Unsuccessful transliterations are rolled
+     * back and retried with additional characters to give correct results.
     *
-     * All transliteration calls in this class go through this method.
+     * @param text the text to be transliterated
+     * @param index the position indices
+     * @param incremental if TRUE, then assume more characters may be inserted
+     * at index.limit, and postpone processing to accomodate future incoming
+     * characters
+     * @param rollback if TRUE and if incremental is TRUE, then perform special
+     * incremental processing, as described above, and undo partial
+     * transliterations where necessary.  If incremental is FALSE then this
+     * parameter is ignored.
     */
-    protected void filteredTransliterate(Replaceable text,
-                                         Position index,
-                                         boolean incremental) {
-        if (filter == null) {
-            // Short circuit path for transliterators with no filter
+    private void filteredTransliterate(Replaceable text,
+                                       Position index,
+                                       boolean incremental,
+                                       boolean rollback) {
+        // Short circuit path for transliterators with no filter in
+        // non-incremental mode.
+        if (filter == null && !rollback) {
            handleTransliterate(text, index, incremental);
            return;
        }

+        //----------------------------------------------------------------------
+        // This method processes text in two groupings:
+        //
+        // RUNS -- A run is a contiguous group of characters which are contained
+        // in the filter for this transliterator (filter.contains(ch) == true).
+        // Text outside of runs may appear as context but it is not modified.
+        // The start and limit Position values are narrowed to each run.
+        //
+        // PASSES (incremental only) -- To make incremental mode work correctly,
+        // each run is broken up into n passes, where n is the length (in code
+        // points) of the run.  Each pass contains the first n characters.  If a
+        // pass is completely transliterated, it is committed, and further passes
+        // include characters after the committed text.  If a pass is blocked,
+        // and does not transliterate completely, then this method rolls back
+        // the changes made during the pass, extends the pass by one code point,
+        // and tries again.
+        //----------------------------------------------------------------------
+
        // globalLimit is the limit value for the entire operation.  We
        // set index.limit to the end of each unfiltered run before
        // calling handleTransliterate(), so we need to maintain the real
@ -693,32 +720,35 @@ public abstract class Transliterator {
        // happened.
        int globalLimit = index.limit;

-        // Break the input text up.  Say the input text has the form:
+        // If there is a non-null filter, then break the input text up.  Say the
+        // input text has the form:
        //   xxxabcxxdefxx
-        // where 'x' represents a filtered character.  Then we break this
-        // up into:
+        // where 'x' represents a filtered character (filter.contains('x') ==
+        // false).  Then we break this up into:
        //   xxxabc xxdef xx
        // Each pass through the loop consumes a run of filtered
        // characters (which are ignored) and a subsequent run of
-        // unfiltered characters (which are transliterated).  If, at any
-        // point, we fail to consume our entire segment, we stop.
+        // unfiltered characters (which are transliterated).
+
        for (;;) {
-            // Narrow the range to be transliterated to the first segment
-            // of unfiltered characters at or after index.start.

-            int c;
-
-            // Advance compoundStart past filtered chars
-            while (index.start < globalLimit &&
-                   !filter.contains(c=UTF16.charAt(text, index.start))) {
-                index.start += UTF16.getCharCount(c);
-            }
-
-            // Find the end of this run of unfiltered chars
-            index.limit = index.start;
-            while (index.limit < globalLimit &&
-                   filter.contains(c=UTF16.charAt(text, index.limit))) {
-                index.limit += UTF16.getCharCount(c);
+            if (filter != null) {
+                // Narrow the range to be transliterated to the first run
+                // of unfiltered characters at or after index.start.
+                
+                // Advance past filtered chars
+                int c;
+                while (index.start < globalLimit &&
+                       !filter.contains(c=UTF16.charAt(text, index.start))) {
+                    index.start += UTF16.getCharCount(c);
+                }
+                
+                // Find the end of this run of unfiltered chars
+                index.limit = index.start;
+                while (index.limit < globalLimit &&
+                       filter.contains(c=UTF16.charAt(text, index.limit))) {
+                    index.limit += UTF16.getCharCount(c);
+                }
            }

            // Check to see if the unfiltered run is empty.  This only
@ -729,14 +759,14 @@ public abstract class Transliterator {
                break;
            }

-            int limit = index.limit;
-
-            // Is this segment incremental?  If there is additional
+            // Is this run incremental?  If there is additional
            // filtered text (if limit < globalLimit) then we pass in
            // an incremental value of FALSE to force the subclass to
-            // complete the transliteration for this segment.
-            boolean isIncrementalSegment =
-                (limit < globalLimit ? false : incremental);
+            // complete the transliteration for this run.
+            boolean isIncrementalRun =
+                (index.limit < globalLimit ? false : incremental);
+
+            int delta;

            // Implement rollback.  To understand the need for rollback,
            // consider the following transliterator:
@ -745,96 +775,149 @@ public abstract class Transliterator {
            //  "u" is "A > b;"
            //  "v" is a compound of "t; NFD; u" with a filter [:Ll:]
            //
-            // Now apply "c" to the input text "a".  The result is "b".  But if
+            // Now apply "v" to the input text "a".  The result is "b".  But if
            // the transliteration is done incrementally, then the NFD holds
            // things up after "t" has already transformed "a" to "A".  When
            // finishTransliterate() is called, "A" is _not_ processed because
            // it gets excluded by the [:Ll:] filter, and the end result is "A"
            // -- incorrect.  The problem is that the filter is applied to a
            // partially-transliterated result, when we only want it to apply to
-            // input text.  Although this example hinges on a compound
+            // input text.  Although this example describes a compound
            // transliterator containing NFD and a specific filter, it can
-            // actually happen with any transliterator which may do a partial
+            // happen with any transliterator which does a partial
            // transformation in incremental mode into characters outside its
            // filter.
            //
-            // There are two solutions.  The first is to add two new index
-            // values to the position structure, a filteredStart and a
-            // filteredLimit.  Then filteredTransliterate() can set and read
-            // these, and avoid filtering partially transliterated results.  A
-            // variant of this solution is to retain an internal state object
-            // with the filtered range that is indexed by the text pointer and
-            // the position object pointer, in analogy to strtok().  The third
-            // solution involves no change to the API and no internal state
-            // cache.  It is to roll back any partially transliterated results
-            // if (a) there is a filter, and (b) the transliteration is
-            // incremental.  This is the solution implemented here.
-            int rollbackStart = 0;
-            int rollbackCopy = 0;
-            if (ROLLBACK) {
-                if (isIncrementalSegment) {
-                    // Make a rollback copy at the end of the string
-                    rollbackStart = index.start;
-                    rollbackCopy = text.length();
-                    text.copy(rollbackStart, limit, rollbackCopy);
-                }
-            }
+            // To handle this, when in incremental mode we supply characters to
+            // handleTransliterate() in several passes.  Each pass adds one more
+            // input character to the input text.  That is, for input "ABCD", we
+            // first try "A", then "AB", then "ABC", and finally "ABCD".  If at
+            // any point we block (upon return, start < limit) then we roll
+            // back.  If at any point we complete the run (upon return start ==
+            // limit) then we commit that run.

-            // Delegate to subclass for actual transliteration.
-            handleTransliterate(text, index, isIncrementalSegment);
+            if (rollback && isIncrementalRun) {

-            int delta = index.limit - limit; // change in length
+                int runStart = index.start;
+                int runLimit = index.limit;
+                int runLength =  runLimit - runStart;

-            // Adjust overall limit for insertions/deletions.  Don't need
-            // to worry about contextLimit because handleTransliterate()
-            // maintains that.
-            globalLimit += delta;
+                // Make a rollback copy at the end of the string
+                int rollbackOrigin = text.length();
+                text.copy(runStart, runLimit, rollbackOrigin);

-            if (ROLLBACK) {
-                // If we failed to complete transliterate this segment,
-                // then we are done.  If rollback is required, then do so.
-                if (index.start != index.limit) {
-                    if (isIncrementalSegment) {
-                        // Replace [rollbackStart, limit) -- this is the
-                        // original filtered segment -- with
-                        // [rollbackCopy, text.length()), the rollback
-                        // copy, then delete the rollback copy.
-                        rollbackCopy += delta;
-                        int rollbackLen = text.length() - rollbackCopy;
-                    
-                        // Delete the partially transliterated segment
-                        rollbackCopy -= index.limit - rollbackStart;
-                        text.replace(rollbackStart, index.limit, "");
-                    
-                        // Copy the rollback copy back
-                        text.copy(rollbackCopy, text.length(), rollbackStart);
-                    
-                        // Delete the rollback copy
-                        rollbackCopy += rollbackLen;
-                        text.replace(rollbackCopy, text.length(), "");
-                    
-                        // Restore indices
-                        index.start = rollbackStart;
-                        index.limit = limit;
-                        index.contextLimit -= delta;
-                        globalLimit -= delta;
+                // Variables reflecting the commitment of completely
+                // transliterated text.  passStart is the runStart, advanced
+                // past committed text.  rollbackStart is the rollbackOrigin,
+                // advanced past rollback text that corresponds to committed
+                // text.
+                int passStart = runStart;
+                int rollbackStart = rollbackOrigin;
+
+                // The limit for each pass; we advance by one code point with
+                // each iteration.
+                int passLimit = index.start;
+
+                // Total length, in 16-bit code units, of uncommitted text.
+                // This is the length to be rolled back.
+                int uncommittedLength = 0;
+
+                // Total delta (change in length) for all passes
+                int totalDelta = 0;
+
+                // PASS MAIN LOOP -- Start with a single character, and extend
+                // the text by one character at a time.  Roll back partial
+                // transliterations and commit complete transliterations.
+                for (;;) {
+                    // Length of additional code point, either one or two
+                    int charLength =
+                        UTF16.getCharCount(UTF16.charAt(text, passLimit));
+                    passLimit += charLength;
+                    if (passLimit > runLimit) {
+                        break;
+                    }
+                    uncommittedLength += charLength;
+
+                    index.limit = passLimit;
+
+                    // Delegate to subclass for actual transliteration.  Upon
+                    // return, start will be updated to point after the
+                    // transliterated text, and limit and contextLimit will be
+                    // adjusted for length changes.
+                    handleTransliterate(text, index, true);
+
+                    delta = index.limit - passLimit; // change in length
+            
+                    // We failed to completely transliterate this pass.
+                    // Roll back the text.  Indices remain unchanged; reset
+                    // them where necessary.
+                    if (index.start != index.limit) {
+                        // Find the rollbackStart, adjusted for length changes
+                        // and the deletion of partially transliterated text.
+                        int rs = rollbackStart + delta - (index.limit - passStart);
+                    
+                        // Delete the partially transliterated text
+                        text.replace(passStart, index.limit, "");
+                    
+                        // Copy the rollback text back
+                        text.copy(rs, rs + uncommittedLength, passStart);
+                    
+                        // Restore indices to their original values
+                        index.start = passStart;
+                        index.limit = passLimit;
+                        index.contextLimit -= delta;
+                    }
+
+                    // We did completely transliterate this pass.  Update the
+                    // commit indices to record how far we got.  Adjust indices
+                    // for length change.
+                    else {
+                        // Move the pass indices past the committed text.
+                        passStart = passLimit = index.start;
+
+                        // Adjust the rollbackStart for length changes and move
+                        // it past the committed text.  All characters we've
+                        // processed to this point are committed now, so zero
+                        // out the uncommittedLength.
+                        rollbackStart += delta + uncommittedLength;
+                        uncommittedLength = 0;
+
+                        // Adjust indices for length changes.
+                        runLimit += delta;
+                        totalDelta += delta;
                    }
-                    break;
-                } else if (isIncrementalSegment) {
-                    // We finished this segment; delete the rollback copy
-                    rollbackCopy += delta;
-                    text.replace(rollbackCopy, text.length(), "");
-                }
-            } else {
-                // If we failed to complete transliterate this segment,
-                // then we are done.
-                if (index.start != index.limit) {
-                    break;
                }
+
+                // Adjust overall limit and rollbackOrigin for insertions and
+                // deletions.  Don't need to worry about contextLimit because
+                // handleTransliterate() maintains that.
+                rollbackOrigin += totalDelta;
+                globalLimit += totalDelta;
+
+                // Delete the rollback copy
+                text.replace(rollbackOrigin, rollbackOrigin + runLength, "");
            }

+            else {
+                // Delegate to subclass for actual transliteration.
+                int limit = index.limit;
+                handleTransliterate(text, index, isIncrementalRun);
+                delta = index.limit - limit; // change in length
+
+                // Adjust overall limit for insertions/deletions.  Don't need
+                // to worry about contextLimit because handleTransliterate()
+                // maintains that.
+                globalLimit += delta;
+            }
+
+            // If we failed to complete transliterate this run,
+            // then we are done.
+            if (index.start != index.limit) {
+                break;
+            }
+            
            // If we did completely transliterate this
-            // segment, then repeat with the next unfiltered segment.
+            // run, then repeat with the next unfiltered run.
        }

        // Start is valid where it is.  Limit needs to be put back where
@ -842,6 +925,22 @@ public abstract class Transliterator {
        index.limit = globalLimit;
    }

+    /**
+     * Transliterate a substring of text, as specified by index, taking filters
+     * into account.  This method is for subclasses that need to delegate to
+     * another transliterator, such as CompoundTransliterator.
+     * @param text the text to be transliterated
+     * @param index the position indices
+     * @param incremental if TRUE, then assume more characters may be inserted
+     * at index.limit, and postpone processing to accomodate future incoming
+     * characters
+     */
+    protected void filteredTransliterate(Replaceable text,
+                                         Position index,
+                                         boolean incremental) {
+        filteredTransliterate(text, index, incremental, false);
+    }
+
    /**
     * Returns the length of the longest context required by this transliterator.
     * This is <em>preceding</em> context.  The default value is zero, but
@ -1726,9 +1825,7 @@ public abstract class Transliterator {
     *
     * <p><b>Note:</b> Most subclasses that implement
     * handleTransliterator() will <em>not</em> want to use this
-     * method, since characters they see are already filtered.  Only
-     * subclasses with special requirements, such as those overriding
-     * filteredTransliterate(), should need this method.
+     * method, since characters they see are already filtered.
     *
     * @deprecated the new architecture provides filtering at the top
     * level.  This method will be removed Dec 31 2001.
--- a/icu4j/src/com/ibm/test/translit/TransliteratorTest.java
+++ b/icu4j/src/com/ibm/test/translit/TransliteratorTest.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/test/translit/Attic/TransliteratorTest.java,v $
- * $Date: 2001/11/28 17:40:40 $
- * $Revision: 1.82 $
+ * $Date: 2001/11/29 01:00:29 $
+ * $Revision: 1.83 $
 *
 *****************************************************************************************
 */
@ -267,12 +267,12 @@ public class TransliteratorTest extends TestFmwk {
            // insertion, buffer
            "a", "A",
            "p", "Ap",
-            "s", "Ay",
-            "c", "Ayc",
+            "s", "Aps", // modified for rollback - "Ay",
+            "c", "Apsc", // modified for rollback - "Ayc",
            "a", "AycA",
            "p", "AycAp",
-            "s", "AycAy",
-            "c", "AycAyc",
+            "s", "AycAps", // modified for rollback - "AycAy",
+            "c", "AycApsc", // modified for rollback - "AycAyc",
            "h", "AycAY",
            null, "AycAY", // null means finishKeyboardTransliteration
        };
@ -298,9 +298,9 @@ public class TransliteratorTest extends TestFmwk {
            //           keyboard xliteration.
            "a", "a",
            "b", "ab",
-            "t", "aby",
+            "t", "abt", // modified for rollback - "aby",
            "c", "abyc",
-            "t", "abycy",
+            "t", "abyct", // modified for rollback - "abycy",
            "h", "abycz",
            null, "abycz", // null means finishKeyboardTransliteration
        };
@ -323,15 +323,8 @@ public class TransliteratorTest extends TestFmwk {
                log = new StringBuffer(s.toString() + " => ");
                t.finishTransliteration(s, index);
            }
-            String str = s.toString();
-            // Show the start index '{' and the cursor '|'
-            log.append(str.substring(0, index.contextStart)).
-                append('{').
-                append(str.substring(index.contextStart,
-                                     index.start)).
-                append('|').
-                append(str.substring(index.start));
-            if (str.equals(DATA[i+1])) {
+            formatInput(log, s, index);
+            if (s.toString().equals(DATA[i+1])) {
                logln(log.toString());
            } else {
                errln("FAIL: " + log.toString() + ", expected " + DATA[i+1]);
@ -1547,7 +1540,7 @@ public class TransliteratorTest extends TestFmwk {
        // Katakana should be untouched
        expect(t, "a\u3042\u30A2", "\u3042\u3042\u30A2");

-        if (false) {
+        if (true) {
            // This test will only work if Transliterator.ROLLBACK is
            // true.  Otherwise, this test will fail, revealing a
            // limitation of global filters in incremental mode.
--- a/icu4j/src/com/ibm/text/Transliterator.java
+++ b/icu4j/src/com/ibm/text/Transliterator.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/Transliterator.java,v $
- * $Date: 2001/11/21 22:21:45 $
- * $Revision: 1.65 $
+ * $Date: 2001/11/29 01:00:10 $
+ * $Revision: 1.66 $
 *
 *****************************************************************************************
 */
@ -242,7 +242,7 @@ import com.ibm.util.Utility;
 * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
 *
 * @author Alan Liu
- * @version $RCSfile: Transliterator.java,v $ $Revision: 1.65 $ $Date: 2001/11/21 22:21:45 $
+ * @version $RCSfile: Transliterator.java,v $ $Revision: 1.66 $ $Date: 2001/11/29 01:00:10 $
 */
 public abstract class Transliterator {
    /**
@ -464,7 +464,7 @@ public abstract class Transliterator {
        }

        Position pos = new Position(start, limit, start);
-        filteredTransliterate(text, pos, false);
+        filteredTransliterate(text, pos, false, true);
        return pos.limit;
    }

@ -555,7 +555,7 @@ public abstract class Transliterator {
            return;
        }

-        filteredTransliterate(text, index, true);
+        filteredTransliterate(text, index, true, true);

 // TODO
 // This doesn't work once we add quantifier support.  Need to rewrite
@ -613,7 +613,7 @@ public abstract class Transliterator {
    public final void finishTransliteration(Replaceable text,
                                            Position index) {
        index.validate(text.length());
-        filteredTransliterate(text, index, false);
+        filteredTransliterate(text, index, false, true);
    }

    /**
@ -657,34 +657,61 @@ public abstract class Transliterator {
                                                Position pos, boolean incremental);

    /**
-     * Rollback makes global filters and compound transliterators very
-     * bulletproof, but it also makes some transliterators completely
-     * non-incremental -- that is, for some transliterators, rollback
-     * is always triggered, until finishTransliteration() is called.
-     * Since this eliminates most of the usefulness of incremental
-     * mode, rollback should usually be disabled.
+     * Top-level transliteration method, handling filtering, incremental and
+     * non-incremental transliteration, and rollback.  All transliteration
+     * public API methods eventually call this method with a rollback argument
+     * of TRUE.  Other entities may call this method but rollback should be
+     * FALSE.
+     * 
+     * <p>If this transliterator has a filter, break up the input text into runs
+     * of unfiltered characters.  Pass each run to
+     * <subclass>.handleTransliterate().
     *
-     * This is used by Transliterator and CompoundTransliterator.
-     */
-    static final boolean ROLLBACK = false;
-
-    /**
-     * This method breaks up the input text into runs of unfiltered
-     * characters.  It passes each such run to
-     * <subclass>.handleTransliterate().  Subclasses that can handle the
-     * filter logic more efficiently themselves may override this method.
+     * <p>In incremental mode, if rollback is TRUE, perform a special
+     * incremental procedure in which several passes are made over the input
+     * text, adding one character at a time, and committing successful
+     * transliterations as they occur.  Unsuccessful transliterations are rolled
+     * back and retried with additional characters to give correct results.
     *
-     * All transliteration calls in this class go through this method.
+     * @param text the text to be transliterated
+     * @param index the position indices
+     * @param incremental if TRUE, then assume more characters may be inserted
+     * at index.limit, and postpone processing to accomodate future incoming
+     * characters
+     * @param rollback if TRUE and if incremental is TRUE, then perform special
+     * incremental processing, as described above, and undo partial
+     * transliterations where necessary.  If incremental is FALSE then this
+     * parameter is ignored.
     */
-    protected void filteredTransliterate(Replaceable text,
-                                         Position index,
-                                         boolean incremental) {
-        if (filter == null) {
-            // Short circuit path for transliterators with no filter
+    private void filteredTransliterate(Replaceable text,
+                                       Position index,
+                                       boolean incremental,
+                                       boolean rollback) {
+        // Short circuit path for transliterators with no filter in
+        // non-incremental mode.
+        if (filter == null && !rollback) {
            handleTransliterate(text, index, incremental);
            return;
        }

+        //----------------------------------------------------------------------
+        // This method processes text in two groupings:
+        //
+        // RUNS -- A run is a contiguous group of characters which are contained
+        // in the filter for this transliterator (filter.contains(ch) == true).
+        // Text outside of runs may appear as context but it is not modified.
+        // The start and limit Position values are narrowed to each run.
+        //
+        // PASSES (incremental only) -- To make incremental mode work correctly,
+        // each run is broken up into n passes, where n is the length (in code
+        // points) of the run.  Each pass contains the first n characters.  If a
+        // pass is completely transliterated, it is committed, and further passes
+        // include characters after the committed text.  If a pass is blocked,
+        // and does not transliterate completely, then this method rolls back
+        // the changes made during the pass, extends the pass by one code point,
+        // and tries again.
+        //----------------------------------------------------------------------
+
        // globalLimit is the limit value for the entire operation.  We
        // set index.limit to the end of each unfiltered run before
        // calling handleTransliterate(), so we need to maintain the real
@ -693,32 +720,35 @@ public abstract class Transliterator {
        // happened.
        int globalLimit = index.limit;

-        // Break the input text up.  Say the input text has the form:
+        // If there is a non-null filter, then break the input text up.  Say the
+        // input text has the form:
        //   xxxabcxxdefxx
-        // where 'x' represents a filtered character.  Then we break this
-        // up into:
+        // where 'x' represents a filtered character (filter.contains('x') ==
+        // false).  Then we break this up into:
        //   xxxabc xxdef xx
        // Each pass through the loop consumes a run of filtered
        // characters (which are ignored) and a subsequent run of
-        // unfiltered characters (which are transliterated).  If, at any
-        // point, we fail to consume our entire segment, we stop.
+        // unfiltered characters (which are transliterated).
+
        for (;;) {
-            // Narrow the range to be transliterated to the first segment
-            // of unfiltered characters at or after index.start.

-            int c;
-
-            // Advance compoundStart past filtered chars
-            while (index.start < globalLimit &&
-                   !filter.contains(c=UTF16.charAt(text, index.start))) {
-                index.start += UTF16.getCharCount(c);
-            }
-
-            // Find the end of this run of unfiltered chars
-            index.limit = index.start;
-            while (index.limit < globalLimit &&
-                   filter.contains(c=UTF16.charAt(text, index.limit))) {
-                index.limit += UTF16.getCharCount(c);
+            if (filter != null) {
+                // Narrow the range to be transliterated to the first run
+                // of unfiltered characters at or after index.start.
+                
+                // Advance past filtered chars
+                int c;
+                while (index.start < globalLimit &&
+                       !filter.contains(c=UTF16.charAt(text, index.start))) {
+                    index.start += UTF16.getCharCount(c);
+                }
+                
+                // Find the end of this run of unfiltered chars
+                index.limit = index.start;
+                while (index.limit < globalLimit &&
+                       filter.contains(c=UTF16.charAt(text, index.limit))) {
+                    index.limit += UTF16.getCharCount(c);
+                }
            }

            // Check to see if the unfiltered run is empty.  This only
@ -729,14 +759,14 @@ public abstract class Transliterator {
                break;
            }

-            int limit = index.limit;
-
-            // Is this segment incremental?  If there is additional
+            // Is this run incremental?  If there is additional
            // filtered text (if limit < globalLimit) then we pass in
            // an incremental value of FALSE to force the subclass to
-            // complete the transliteration for this segment.
-            boolean isIncrementalSegment =
-                (limit < globalLimit ? false : incremental);
+            // complete the transliteration for this run.
+            boolean isIncrementalRun =
+                (index.limit < globalLimit ? false : incremental);
+
+            int delta;

            // Implement rollback.  To understand the need for rollback,
            // consider the following transliterator:
@ -745,96 +775,149 @@ public abstract class Transliterator {
            //  "u" is "A > b;"
            //  "v" is a compound of "t; NFD; u" with a filter [:Ll:]
            //
-            // Now apply "c" to the input text "a".  The result is "b".  But if
+            // Now apply "v" to the input text "a".  The result is "b".  But if
            // the transliteration is done incrementally, then the NFD holds
            // things up after "t" has already transformed "a" to "A".  When
            // finishTransliterate() is called, "A" is _not_ processed because
            // it gets excluded by the [:Ll:] filter, and the end result is "A"
            // -- incorrect.  The problem is that the filter is applied to a
            // partially-transliterated result, when we only want it to apply to
-            // input text.  Although this example hinges on a compound
+            // input text.  Although this example describes a compound
            // transliterator containing NFD and a specific filter, it can
-            // actually happen with any transliterator which may do a partial
+            // happen with any transliterator which does a partial
            // transformation in incremental mode into characters outside its
            // filter.
            //
-            // There are two solutions.  The first is to add two new index
-            // values to the position structure, a filteredStart and a
-            // filteredLimit.  Then filteredTransliterate() can set and read
-            // these, and avoid filtering partially transliterated results.  A
-            // variant of this solution is to retain an internal state object
-            // with the filtered range that is indexed by the text pointer and
-            // the position object pointer, in analogy to strtok().  The third
-            // solution involves no change to the API and no internal state
-            // cache.  It is to roll back any partially transliterated results
-            // if (a) there is a filter, and (b) the transliteration is
-            // incremental.  This is the solution implemented here.
-            int rollbackStart = 0;
-            int rollbackCopy = 0;
-            if (ROLLBACK) {
-                if (isIncrementalSegment) {
-                    // Make a rollback copy at the end of the string
-                    rollbackStart = index.start;
-                    rollbackCopy = text.length();
-                    text.copy(rollbackStart, limit, rollbackCopy);
-                }
-            }
+            // To handle this, when in incremental mode we supply characters to
+            // handleTransliterate() in several passes.  Each pass adds one more
+            // input character to the input text.  That is, for input "ABCD", we
+            // first try "A", then "AB", then "ABC", and finally "ABCD".  If at
+            // any point we block (upon return, start < limit) then we roll
+            // back.  If at any point we complete the run (upon return start ==
+            // limit) then we commit that run.

-            // Delegate to subclass for actual transliteration.
-            handleTransliterate(text, index, isIncrementalSegment);
+            if (rollback && isIncrementalRun) {

-            int delta = index.limit - limit; // change in length
+                int runStart = index.start;
+                int runLimit = index.limit;
+                int runLength =  runLimit - runStart;

-            // Adjust overall limit for insertions/deletions.  Don't need
-            // to worry about contextLimit because handleTransliterate()
-            // maintains that.
-            globalLimit += delta;
+                // Make a rollback copy at the end of the string
+                int rollbackOrigin = text.length();
+                text.copy(runStart, runLimit, rollbackOrigin);

-            if (ROLLBACK) {
-                // If we failed to complete transliterate this segment,
-                // then we are done.  If rollback is required, then do so.
-                if (index.start != index.limit) {
-                    if (isIncrementalSegment) {
-                        // Replace [rollbackStart, limit) -- this is the
-                        // original filtered segment -- with
-                        // [rollbackCopy, text.length()), the rollback
-                        // copy, then delete the rollback copy.
-                        rollbackCopy += delta;
-                        int rollbackLen = text.length() - rollbackCopy;
-                    
-                        // Delete the partially transliterated segment
-                        rollbackCopy -= index.limit - rollbackStart;
-                        text.replace(rollbackStart, index.limit, "");
-                    
-                        // Copy the rollback copy back
-                        text.copy(rollbackCopy, text.length(), rollbackStart);
-                    
-                        // Delete the rollback copy
-                        rollbackCopy += rollbackLen;
-                        text.replace(rollbackCopy, text.length(), "");
-                    
-                        // Restore indices
-                        index.start = rollbackStart;
-                        index.limit = limit;
-                        index.contextLimit -= delta;
-                        globalLimit -= delta;
+                // Variables reflecting the commitment of completely
+                // transliterated text.  passStart is the runStart, advanced
+                // past committed text.  rollbackStart is the rollbackOrigin,
+                // advanced past rollback text that corresponds to committed
+                // text.
+                int passStart = runStart;
+                int rollbackStart = rollbackOrigin;
+
+                // The limit for each pass; we advance by one code point with
+                // each iteration.
+                int passLimit = index.start;
+
+                // Total length, in 16-bit code units, of uncommitted text.
+                // This is the length to be rolled back.
+                int uncommittedLength = 0;
+
+                // Total delta (change in length) for all passes
+                int totalDelta = 0;
+
+                // PASS MAIN LOOP -- Start with a single character, and extend
+                // the text by one character at a time.  Roll back partial
+                // transliterations and commit complete transliterations.
+                for (;;) {
+                    // Length of additional code point, either one or two
+                    int charLength =
+                        UTF16.getCharCount(UTF16.charAt(text, passLimit));
+                    passLimit += charLength;
+                    if (passLimit > runLimit) {
+                        break;
+                    }
+                    uncommittedLength += charLength;
+
+                    index.limit = passLimit;
+
+                    // Delegate to subclass for actual transliteration.  Upon
+                    // return, start will be updated to point after the
+                    // transliterated text, and limit and contextLimit will be
+                    // adjusted for length changes.
+                    handleTransliterate(text, index, true);
+
+                    delta = index.limit - passLimit; // change in length
+            
+                    // We failed to completely transliterate this pass.
+                    // Roll back the text.  Indices remain unchanged; reset
+                    // them where necessary.
+                    if (index.start != index.limit) {
+                        // Find the rollbackStart, adjusted for length changes
+                        // and the deletion of partially transliterated text.
+                        int rs = rollbackStart + delta - (index.limit - passStart);
+                    
+                        // Delete the partially transliterated text
+                        text.replace(passStart, index.limit, "");
+                    
+                        // Copy the rollback text back
+                        text.copy(rs, rs + uncommittedLength, passStart);
+                    
+                        // Restore indices to their original values
+                        index.start = passStart;
+                        index.limit = passLimit;
+                        index.contextLimit -= delta;
+                    }
+
+                    // We did completely transliterate this pass.  Update the
+                    // commit indices to record how far we got.  Adjust indices
+                    // for length change.
+                    else {
+                        // Move the pass indices past the committed text.
+                        passStart = passLimit = index.start;
+
+                        // Adjust the rollbackStart for length changes and move
+                        // it past the committed text.  All characters we've
+                        // processed to this point are committed now, so zero
+                        // out the uncommittedLength.
+                        rollbackStart += delta + uncommittedLength;
+                        uncommittedLength = 0;
+
+                        // Adjust indices for length changes.
+                        runLimit += delta;
+                        totalDelta += delta;
                    }
-                    break;
-                } else if (isIncrementalSegment) {
-                    // We finished this segment; delete the rollback copy
-                    rollbackCopy += delta;
-                    text.replace(rollbackCopy, text.length(), "");
-                }
-            } else {
-                // If we failed to complete transliterate this segment,
-                // then we are done.
-                if (index.start != index.limit) {
-                    break;
                }
+
+                // Adjust overall limit and rollbackOrigin for insertions and
+                // deletions.  Don't need to worry about contextLimit because
+                // handleTransliterate() maintains that.
+                rollbackOrigin += totalDelta;
+                globalLimit += totalDelta;
+
+                // Delete the rollback copy
+                text.replace(rollbackOrigin, rollbackOrigin + runLength, "");
            }

+            else {
+                // Delegate to subclass for actual transliteration.
+                int limit = index.limit;
+                handleTransliterate(text, index, isIncrementalRun);
+                delta = index.limit - limit; // change in length
+
+                // Adjust overall limit for insertions/deletions.  Don't need
+                // to worry about contextLimit because handleTransliterate()
+                // maintains that.
+                globalLimit += delta;
+            }
+
+            // If we failed to complete transliterate this run,
+            // then we are done.
+            if (index.start != index.limit) {
+                break;
+            }
+            
            // If we did completely transliterate this
-            // segment, then repeat with the next unfiltered segment.
+            // run, then repeat with the next unfiltered run.
        }

        // Start is valid where it is.  Limit needs to be put back where
@ -842,6 +925,22 @@ public abstract class Transliterator {
        index.limit = globalLimit;
    }

+    /**
+     * Transliterate a substring of text, as specified by index, taking filters
+     * into account.  This method is for subclasses that need to delegate to
+     * another transliterator, such as CompoundTransliterator.
+     * @param text the text to be transliterated
+     * @param index the position indices
+     * @param incremental if TRUE, then assume more characters may be inserted
+     * at index.limit, and postpone processing to accomodate future incoming
+     * characters
+     */
+    protected void filteredTransliterate(Replaceable text,
+                                         Position index,
+                                         boolean incremental) {
+        filteredTransliterate(text, index, incremental, false);
+    }
+
    /**
     * Returns the length of the longest context required by this transliterator.
     * This is <em>preceding</em> context.  The default value is zero, but
@ -1726,9 +1825,7 @@ public abstract class Transliterator {
     *
     * <p><b>Note:</b> Most subclasses that implement
     * handleTransliterator() will <em>not</em> want to use this
-     * method, since characters they see are already filtered.  Only
-     * subclasses with special requirements, such as those overriding
-     * filteredTransliterate(), should need this method.
+     * method, since characters they see are already filtered.
     *
     * @deprecated the new architecture provides filtering at the top
     * level.  This method will be removed Dec 31 2001.