From 1a8abc5b66b04ec41f5848da76b7455725d4b44e Mon Sep 17 00:00:00 2001
From: Syn Wee Quek <swquek@svn.icu-project.org>
Date: Wed, 8 Oct 2003 21:51:44 +0000
Subject: [PATCH] ICU-2212 Utilized UCharacterIterator in Collation

X-SVN-Rev: 13361
---
 .../dev/test/collator/CollationAPITest.java   |   3 +-
 .../test/collator/CollationIteratorTest.java  |  13 +-
 .../icu/impl/StringUCharacterIterator.java    | 263 ++++++++++++++++++
 .../icu/text/CollationElementIterator.java    | 247 +++++++++-------
 .../com/ibm/icu/text/RuleBasedCollator.java   |  32 ++-
 icu4j/src/com/ibm/icu/text/StringSearch.java  |   8 +-
 6 files changed, 458 insertions(+), 108 deletions(-)
 create mode 100644 icu4j/src/com/ibm/icu/impl/StringUCharacterIterator.java

diff --git a/icu4j/src/com/ibm/icu/dev/test/collator/CollationAPITest.java b/icu4j/src/com/ibm/icu/dev/test/collator/CollationAPITest.java
index 923daf5c77..dcfbc5e39a 100644
--- a/icu4j/src/com/ibm/icu/dev/test/collator/CollationAPITest.java
+++ b/icu4j/src/com/ibm/icu/dev/test/collator/CollationAPITest.java
@@ -353,7 +353,8 @@ public class CollationAPITest extends TestFmwk {
         CharacterIterator chariter=new StringCharacterIterator(testString1);
         // copy ctor
         CollationElementIterator iterator2 = ((RuleBasedCollator)col).getCollationElementIterator(chariter);
-        CollationElementIterator iterator3 = ((RuleBasedCollator)col).getCollationElementIterator(testString2);
+        UCharacterIterator uchariter=UCharacterIterator.getInstance(testString2);
+        CollationElementIterator iterator3 = ((RuleBasedCollator)col).getCollationElementIterator(uchariter);
     
         int offset = 0;
         offset = iterator1.getOffset();
diff --git a/icu4j/src/com/ibm/icu/dev/test/collator/CollationIteratorTest.java b/icu4j/src/com/ibm/icu/dev/test/collator/CollationIteratorTest.java
index 619e415770..210a1d3128 100644
--- a/icu4j/src/com/ibm/icu/dev/test/collator/CollationIteratorTest.java
+++ b/icu4j/src/com/ibm/icu/dev/test/collator/CollationIteratorTest.java
@@ -426,12 +426,23 @@ public class CollationIteratorTest extends TestFmwk {
         //now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text
         CharacterIterator chariter = new StringCharacterIterator(test1);
         try {
-        iter2.setText(chariter);
+            iter2.setText(chariter);
         } catch (Exception e ) {
             errln("call to iter2->setText(chariter(test1)) failed.");
             return;
         }
         assertEqual(iter1, iter2);
+        
+        iter1.reset();
+        //now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text
+        UCharacterIterator uchariter = UCharacterIterator.getInstance(test1);
+        try {
+            iter2.setText(uchariter);
+        } catch (Exception e ) {
+            errln("call to iter2->setText(uchariter(test1)) failed.");
+            return;
+        }
+        assertEqual(iter1, iter2);
     }
 
     /**
diff --git a/icu4j/src/com/ibm/icu/impl/StringUCharacterIterator.java b/icu4j/src/com/ibm/icu/impl/StringUCharacterIterator.java
new file mode 100644
index 0000000000..8c9501985a
--- /dev/null
+++ b/icu4j/src/com/ibm/icu/impl/StringUCharacterIterator.java
@@ -0,0 +1,263 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2000, International Business Machines Corporation and    *
+ * others. All Rights Reserved.                                                *
+ *******************************************************************************
+ *
+ * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/StringUCharacterIterator.java,v $ 
+ * $Date: 2003/10/08 21:51:43 $ 
+ * $Revision: 1.1 $
+ *
+ *******************************************************************************
+ */
+package com.ibm.icu.impl;
+
+import com.ibm.icu.text.UCharacterIterator;
+import com.ibm.icu.text.UTF16;    
+
+/**
+ * Used by Collation. UCharacterIterator on Strings. Can't use 
+ * ReplaceableUCharacterIterator because it is not easy to do a fast setText. 
+ * @author synwee
+ */
+public final class StringUCharacterIterator extends UCharacterIterator 
+{
+
+    // public constructor ------------------------------------------------------
+    
+    /**
+     * Public constructor
+     * @param str text which the iterator will be based on
+     */
+    public StringUCharacterIterator(String str)
+    {
+        if (str == null) {
+            throw new IllegalArgumentException();
+        }
+        m_text_ = str;
+        m_currentIndex_ = 0;
+    }
+    
+    /**
+     * Public default constructor
+     */
+    public StringUCharacterIterator()
+    {
+        m_text_ = "";
+        m_currentIndex_ = 0;
+    }
+    
+    // public methods ----------------------------------------------------------
+    
+    /**
+     * Creates a copy of this iterator, does not clone the underlying 
+     * <code>String</code>object
+     * @return copy of this iterator
+     */
+    public Object clone()
+    {
+		try {
+		    return super.clone();
+		} catch (CloneNotSupportedException e) {
+		    return null; // never invoked
+		}
+    }
+    
+    /**
+     * Returns the current UTF16 character.
+     * @return current UTF16 character
+     */
+    public int current()
+    {
+        if (m_currentIndex_ < m_text_.length()) {
+            return m_text_.charAt(m_currentIndex_);
+        }
+        return DONE;
+    }
+    
+    /**
+     * Returns the current codepoint
+     * @return current codepoint
+     */
+    public int currentCodePoint()
+    {
+        // cannot use charAt due to it different 
+        // behaviour when index is pointing at a
+        // trail surrogate, check for surrogates
+         
+        if (m_currentIndex_ >= m_text_.length()) {
+            return DONE;
+        }
+        char ch = m_text_.charAt(m_currentIndex_);
+        if (UTF16.isLeadSurrogate(ch)) {
+            // advance the index to get the next code point
+            m_currentIndex_ ++;
+            if (m_currentIndex_ < m_text_.length()) {
+                // due to post increment semantics current() after next() 
+                // actually returns the next char which is what we want
+                char ch2 = m_text_.charAt(m_currentIndex_);
+                
+                if (UTF16.isTrailSurrogate(ch2)) {
+                    // we found a surrogate pair
+                    return UCharacterProperty.getRawSupplementary(ch, ch2);
+                }
+            }
+            // current should never change the current index so back off
+            m_currentIndex_ --;
+        }
+        return ch;
+    }
+    
+    /**
+     * Returns the length of the text
+     * @return length of the text
+     */
+    public int getLength()
+    {
+        return m_text_.length();
+    }
+    
+    /**
+     * Gets the current currentIndex in text.
+     * @return current currentIndex in text.
+     */
+    public int getIndex()
+    {
+        return m_currentIndex_;
+    }
+        
+    /**
+     * Returns next UTF16 character and increments the iterator's currentIndex 
+     * by 1. 
+     * If the resulting currentIndex is greater or equal to the text length, 
+     * the currentIndex is reset to the text length and a value of DONE is 
+     * returned. 
+     * @return next UTF16 character in text or DONE if the new currentIndex is 
+     *         off the end of the text range.
+     */
+    public int next()
+    {
+        if (m_currentIndex_ < m_text_.length()) 
+        {
+            return m_text_.charAt(m_currentIndex_ ++);
+        }
+        return DONE;
+    }
+    
+                
+    /**
+     * Returns previous UTF16 character and decrements the iterator's 
+     * currentIndex by 1. 
+     * If the resulting currentIndex is less than 0, the currentIndex is reset 
+     * to 0 and a value of DONE is returned. 
+     * @return next UTF16 character in text or DONE if the new currentIndex is 
+     *         off the start of the text range.
+     */
+    public int previous()
+    {
+        if (m_currentIndex_ > 0) {
+            return m_text_.charAt(-- m_currentIndex_);
+        }
+        return DONE;
+    }
+
+    /**
+     * <p>Sets the currentIndex to the specified currentIndex in the text and 
+     * returns that single UTF16 character at currentIndex. 
+     * This assumes the text is stored as 16-bit code units.</p>
+     * @param currentIndex the currentIndex within the text. 
+     * @exception IllegalArgumentException is thrown if an invalid currentIndex 
+     *            is supplied. i.e. currentIndex is out of bounds.
+     * @return the character at the specified currentIndex or DONE if the 
+     *         specified currentIndex is equal to the end of the text.
+     */
+    public void setIndex(int currentIndex) throws IndexOutOfBoundsException
+    {
+        if (currentIndex < 0 || currentIndex > m_text_.length()) {
+            throw new IndexOutOfBoundsException();
+        }
+        m_currentIndex_ = currentIndex;
+    }
+    
+    /**
+     * Fills the buffer with the underlying text storage of the iterator
+     * If the buffer capacity is not enough a exception is thrown. The capacity
+     * of the fill in buffer should at least be equal to length of text in the 
+     * iterator obtained by calling <code>getLength()</code).
+     * <b>Usage:</b>
+     * 
+     * <code>
+     * <pre>
+     *         UChacterIterator iter = new UCharacterIterator.getInstance(text);
+     *         char[] buf = new char[iter.getLength()];
+     *         iter.getText(buf);
+     *         
+     *         OR
+     *         char[] buf= new char[1];
+     *         int len = 0;
+     *         for(;;){
+     *             try{
+     *                 len = iter.getText(buf);
+     *                 break;
+     *             }catch(IndexOutOfBoundsException e){
+     *                 buf = new char[iter.getLength()];
+     *             }
+     *         }
+     * </pre>
+     * </code>
+     *             
+     * @param fillIn an array of chars to fill with the underlying UTF-16 code 
+     *         units.
+     * @param offset the position within the array to start putting the data.
+     * @return the number of code units added to fillIn, as a convenience
+     * @exception IndexOutOfBounds exception if there is not enough
+     *            room after offset in the array, or if offset &lt; 0.
+     */
+    public int getText(char[] fillIn, int offset)
+    {
+    	int length = m_text_.length();
+        if (offset < 0 || offset + length > fillIn.length) {
+            throw new IndexOutOfBoundsException(Integer.toString(length));
+        }
+        m_text_.getChars(0, length, fillIn, offset);
+        return length;
+    }
+    
+    /**
+     * Convenience method for returning the underlying text storage as as 
+     * string
+     * @return the underlying text storage in the iterator as a string
+     */
+    public String getText() 
+    {
+        return m_text_;
+    }       
+    
+    /**
+     * Reset this iterator to point to a new string. This method is used by 
+     * other classes that want to avoid allocating new 
+     * ReplaceableCharacterIterator objects every time their setText method
+     * is called.
+     * @param text The String to be iterated over 
+     */
+    public void setText(String text) 
+    {
+        if (text == null) {
+            throw new NullPointerException();
+        }
+        m_text_ = text;
+        m_currentIndex_ = 0;
+    }
+        
+    // private data members ----------------------------------------------------
+    
+    /**
+     * Text string object
+     */
+    private String m_text_;
+    /**
+     * Current currentIndex
+     */
+    private int m_currentIndex_;
+
+}
diff --git a/icu4j/src/com/ibm/icu/text/CollationElementIterator.java b/icu4j/src/com/ibm/icu/text/CollationElementIterator.java
index 2a3dda8973..f088d277fc 100755
--- a/icu4j/src/com/ibm/icu/text/CollationElementIterator.java
+++ b/icu4j/src/com/ibm/icu/text/CollationElementIterator.java
@@ -9,12 +9,17 @@
 */
 package com.ibm.icu.text;
 
-import java.text.StringCharacterIterator;
-import java.text.CharacterIterator;
+/***
+ * import java.text.StringCharacterIterator;
+ * import java.text.CharacterIterator;
+ */
 import com.ibm.icu.impl.NormalizerImpl;
 import com.ibm.icu.impl.UCharacterProperty;
-import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.impl.StringUCharacterIterator;
+import com.ibm.icu.impl.CharacterIteratorWrapper;
 import com.ibm.icu.impl.ICUDebug;
+import com.ibm.icu.lang.UCharacter;
+import java.text.CharacterIterator;
 
 /**
  * <p><code>CollationElementIterator</code> is an iterator created by
@@ -218,7 +223,7 @@ public final class CollationElementIterator
      */
     public void reset()
     {
-        m_source_.setIndex(m_source_.getBeginIndex());
+        m_source_.setToStart();
         updateInternalState();
     }
 
@@ -255,13 +260,13 @@ public final class CollationElementIterator
             m_CEBufferSize_ = 0;
             m_CEBufferOffset_ = 0;
         }
-
-        char ch = nextChar();
-        /* System.out.println("ch " + Integer.toHexString(ch) + " " +
-           Integer.toHexString(m_source_.current()));*/
-        if (ch == CharacterIterator.DONE) {
+ 
+        int ch_int = nextChar();
+        
+        if (ch_int == UCharacterIterator.DONE) {
             return NULLORDER;
         }
+        char ch = (char)ch_int;
         if (m_collator_.m_isHiragana4_) {
             m_isCodePointHiragana_ = (ch >= 0x3040 && ch <= 0x309e)
                                      && !(ch > 0x3094 && ch < 0x309d);
@@ -325,7 +330,7 @@ public final class CollationElementIterator
         if (m_source_.getIndex() <= 0 && m_isForwards_) {
             // if iterator is new or reset, we can immediate perform  backwards
             // iteration even when the offset is not right.
-            m_source_.setIndex(m_source_.getEndIndex());
+            m_source_.setToLimit();
             updateInternalState();
         }
         m_isForwards_ = false;
@@ -337,10 +342,11 @@ public final class CollationElementIterator
             m_CEBufferSize_ = 0;
             m_CEBufferOffset_ = 0;
         }
-        char ch = previousChar();
-        if (ch == CharacterIterator.DONE) {
+        int ch_int = previousChar();
+        if (ch_int == UCharacterIterator.DONE) {
             return NULLORDER;
         }
+        char ch = (char)ch_int;
         if (m_collator_.m_isHiragana4_) {
             m_isCodePointHiragana_ = (ch >= 0x3040 && ch <= 0x309f);
         }
@@ -477,13 +483,14 @@ public final class CollationElementIterator
     public void setOffset(int offset)
     {
         m_source_.setIndex(offset);
-        char ch = m_source_.current();
-        if (ch != CharacterIterator.DONE && m_collator_.isUnsafe(ch)) {
+        int ch_int = m_source_.current();
+        char ch = (char)ch_int;
+        if (ch_int != UCharacterIterator.DONE && m_collator_.isUnsafe(ch)) {
             // if it is unsafe we need to check if it is part of a contraction
             // or a surrogate character
             if (UTF16.isTrailSurrogate(ch)) {
                 // if it is a surrogate pair we move up one character
-                char prevch = m_source_.previous();
+                char prevch = (char)m_source_.previous();
                 if (!UTF16.isLeadSurrogate(prevch)) {
                     m_source_.setIndex(offset); // go back to the same index
                 }
@@ -495,7 +502,7 @@ public final class CollationElementIterator
                     if (!m_collator_.isUnsafe(ch)) {
                         break;
                     }
-                    ch = m_source_.previous();
+                    ch = (char)m_source_.previous();
                 }
                 updateInternalState();
                 int prevoffset = 0;
@@ -510,12 +517,12 @@ public final class CollationElementIterator
         // direction code to prevent next and previous from returning a 
         // character if we are already at the ends
         offset = m_source_.getIndex();
-        if (offset == m_source_.getBeginIndex()) {
+        if (offset == 0/* m_source_.getBeginIndex() */) {
             // preventing previous() from returning characters from the end of 
             // the string again if we are at the beginning
             m_isForwards_ = false; 
         }
-        else if (offset == m_source_.getEndIndex()) {
+        else if (offset == m_source_.getLength()) {
             // preventing next() from returning characters from the start of 
             // the string again if we are at the end
             m_isForwards_ = true;
@@ -535,6 +542,22 @@ public final class CollationElementIterator
         m_source_ = m_srcUtilIter_;
         updateInternalState();
     }
+    
+    /**
+     * <p>Set a new source string iterator for iteration, and reset the
+     * offset to the beginning of the text.
+     * </p>
+     * <p>The source iterator's integrity will be preserved since a new copy
+     * will be created for use.</p>
+     * @param source the new source string iterator for iteration.
+     * @draft ICU 2.8
+     */
+    public void setText(UCharacterIterator source)
+    {
+        m_srcUtilIter_.setText(source.getText());
+        m_source_ = m_srcUtilIter_;
+        updateInternalState(); 
+    }
 
     /**
      * <p>Set a new source string iterator for iteration, and reset the
@@ -545,8 +568,8 @@ public final class CollationElementIterator
      */
     public void setText(CharacterIterator source)
     {
-        m_source_ = source;
-        m_source_.setIndex(m_source_.getBeginIndex());
+        m_source_ = new CharacterIteratorWrapper(source);
+        m_source_.setToStart();
         updateInternalState();
     }
 
@@ -568,10 +591,13 @@ public final class CollationElementIterator
         if (that instanceof CollationElementIterator) {
             CollationElementIterator thatceiter
                                               = (CollationElementIterator)that;
-            if (m_collator_.equals(thatceiter.m_collator_)
-                && m_source_.equals(thatceiter.m_source_)) {
-                return true;
+            if (!m_collator_.equals(thatceiter.m_collator_)) {
+                return false;
             }
+            // checks the text 
+            return m_source_.getIndex() == thatceiter.m_source_.getIndex()
+                   && m_source_.getText().equals(
+                                            thatceiter.m_source_.getText());
         }
         return false;
     }
@@ -591,7 +617,7 @@ public final class CollationElementIterator
      */
     CollationElementIterator(String source, RuleBasedCollator collator)
     {
-        m_srcUtilIter_ = new StringCharacterIterator(source);
+        m_srcUtilIter_ = new StringUCharacterIterator(source);
         m_utilStringBuffer_ = new StringBuffer();
         m_source_ = m_srcUtilIter_;
         m_collator_ = collator;
@@ -615,9 +641,34 @@ public final class CollationElementIterator
     CollationElementIterator(CharacterIterator source,
                              RuleBasedCollator collator)
     {
-        m_srcUtilIter_ = new StringCharacterIterator("");
+        m_srcUtilIter_ = new StringUCharacterIterator();
         m_utilStringBuffer_ = new StringBuffer();
-        m_source_ = source;
+        m_source_ = new CharacterIteratorWrapper(source);
+        m_collator_ = collator;
+        m_CEBuffer_ = new int[CE_BUFFER_INIT_SIZE_];
+        m_buffer_ = new StringBuffer();
+        m_utilSpecialBackUp_ = new Backup();
+        updateInternalState();
+    }
+    
+    /**
+     * <p>CollationElementIterator constructor. This takes a source
+     * character iterator and a RuleBasedCollator. The iterator will
+     * walk through the source string based on the rules defined by
+     * the collator. If the source string is empty, NULLORDER will be
+     * returned on the first call to next().</p>
+     *
+     * @param source the source string iterator.
+     * @param collator the RuleBasedCollator
+     * @draft ICU 2.2
+     */
+    CollationElementIterator(UCharacterIterator source,
+                             RuleBasedCollator collator)
+    {
+        m_srcUtilIter_ = new StringUCharacterIterator();
+        m_utilStringBuffer_ = new StringBuffer();
+        m_srcUtilIter_.setText(source.getText());
+        m_source_ = m_srcUtilIter_;
         m_collator_ = collator;
         m_CEBuffer_ = new int[CE_BUFFER_INIT_SIZE_];
         m_buffer_ = new StringBuffer();
@@ -717,11 +768,11 @@ public final class CollationElementIterator
      * @param ch character to test
      * @return true if ch is a Thai prevowel, false otherwise
      */
-    static final boolean isThaiPreVowel(char ch)
+    static final boolean isThaiPreVowel(int ch)
     {
         return (ch >= 0xe40 && ch <= 0xe44) || (ch >= 0xec0 && ch <= 0xec4);
     }
-
+    
     /**
      * <p>Sets the iterator to point to the collation element corresponding to
      * the specified character (the parameter is a CHARACTER offset in the
@@ -736,9 +787,10 @@ public final class CollationElementIterator
      * @param source the new source string iterator for iteration.
      * @param offset to the source
      */
-    void setText(CharacterIterator source, int offset)
+    void setText(UCharacterIterator source, int offset)
     {
-        m_source_ = source;
+        m_srcUtilIter_.setText(source.getText());
+        m_source_ = m_srcUtilIter_;
         m_source_.setIndex(offset);
         updateInternalState();
     }
@@ -796,7 +848,7 @@ public final class CollationElementIterator
     /**
      * Source string iterator
      */
-    private CharacterIterator m_source_;
+    private UCharacterIterator m_source_;
     /**
      * This is position to the m_buffer_, -1 if iterator is not in m_buffer_
      */
@@ -846,7 +898,7 @@ public final class CollationElementIterator
     /**
      * Utility
      */
-    private StringCharacterIterator m_srcUtilIter_;
+    private StringUCharacterIterator m_srcUtilIter_;
     private StringBuffer m_utilStringBuffer_;
     private StringBuffer m_utilSkippedBuffer_;
     private CollationElementIterator m_utilColEIter_;
@@ -950,7 +1002,7 @@ public final class CollationElementIterator
         m_CEBufferOffset_ = 0;
         m_CEBufferSize_ = 0;
         m_FCDLimit_ = -1;
-        m_FCDStart_ = m_source_.getEndIndex();
+        m_FCDStart_ = m_source_.getLength();
         m_isHiragana4_ = m_collator_.m_isHiragana4_;
         m_isForwards_ = true;
     }
@@ -1022,8 +1074,7 @@ public final class CollationElementIterator
         m_buffer_.setLength(0);
         m_source_.setIndex(m_FCDStart_);
         for (int i = 0; i < size; i ++) {
-            m_buffer_.append(m_source_.current());
-            m_source_.next();
+            m_buffer_.append((char)m_source_.next());
         }
         String decomp = Normalizer.decompose(m_buffer_.toString(), false);
         m_buffer_.setLength(0);
@@ -1059,7 +1110,9 @@ public final class CollationElementIterator
         // trie access
         char fcd = NormalizerImpl.getFCD16(ch);
         if (fcd != 0 && UTF16.isLeadSurrogate(ch)) {
-            ch = m_source_.next(); // CharacterIterator.DONE has 0 fcd
+            m_source_.next();
+            ch = (char)m_source_.current(); 
+            // UCharacterIterator.DONE has 0 fcd
             if (UTF16.isTrailSurrogate(ch)) {
                 fcd = NormalizerImpl.getFCD16FromSurrogatePair(fcd, ch);
             } else {
@@ -1073,14 +1126,17 @@ public final class CollationElementIterator
             // The current char has a non-zero trailing CC. Scan forward until
             // we find a char with a leading cc of zero.
             while (true) {
-                ch = m_source_.next();
-                if (ch == CharacterIterator.DONE) {
+                m_source_.next();
+                int ch_int = m_source_.current();
+                if (ch_int == UCharacterIterator.DONE) {
                     break;
                 }
+                ch = (char)ch_int;
                 // trie access
                 fcd = NormalizerImpl.getFCD16(ch);
                 if (fcd != 0 && UTF16.isLeadSurrogate(ch)) {
-                    ch = m_source_.next();
+                    m_source_.next();
+                    ch = (char)m_source_.current();
                     if (UTF16.isTrailSurrogate(ch)) {
                         fcd = NormalizerImpl.getFCD16FromSurrogatePair(fcd, ch);
                     } else {
@@ -1112,9 +1168,9 @@ public final class CollationElementIterator
      * <p>Offsets are returned at the next character.</p>
      * @return next fcd character
      */
-    private char nextChar()
+    private int nextChar()
     {
-        char result;
+        int result;
 
         // loop handles the next character whether it is in the buffer or not.
         if (m_bufferOffset_ < 0) {
@@ -1147,8 +1203,9 @@ public final class CollationElementIterator
         if (result < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_) {
             // We need to peek at the next character in order to tell if we are
             // FCD
-            char next = m_source_.next();
-            if (next == CharacterIterator.DONE
+            m_source_.next();
+            int next = m_source_.current();
+            if (next == UCharacterIterator.DONE
                 || next <= LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_) {
                 return result; // end of source string and if next character
                 // starts with a base character is always fcd.
@@ -1156,7 +1213,7 @@ public final class CollationElementIterator
         }
 
         // Need a more complete FCD check and possible normalization.
-        if (!FCDCheck(result, startoffset)) {
+        if (!FCDCheck((char)result, startoffset)) {
             normalize();
             result = m_buffer_.charAt(0);
             m_bufferOffset_ = 1;
@@ -1206,7 +1263,7 @@ public final class CollationElementIterator
         else if (UTF16.isTrailSurrogate(ch) && m_FCDLimit_ > 0) {
             // note trail surrogate characters gets 0 fcd
             char trailch = ch;
-            ch = m_source_.previous();
+            ch = (char)m_source_.previous();
             if (UTF16.isLeadSurrogate(ch)) {
                 fcd = NormalizerImpl.getFCD16(ch);
                 if (fcd != 0) {
@@ -1228,13 +1285,13 @@ public final class CollationElementIterator
             if (offset == 0) {
                 break;
             }
-            ch = m_source_.previous();
+            ch = (char)m_source_.previous();
             if (!UTF16.isSurrogate(ch)) {
                 fcd = NormalizerImpl.getFCD16(ch);
             }
             else if (UTF16.isTrailSurrogate(ch) && m_source_.getIndex() > 0) {
                 char trail = ch;
-                ch = m_source_.previous();
+                ch = (char)m_source_.previous();
                 if (UTF16.isLeadSurrogate(ch)) {
                     fcd = NormalizerImpl.getFCD16(ch);
                 }
@@ -1270,7 +1327,7 @@ public final class CollationElementIterator
      * <p>Offsets are returned at the current character.</p>
      * @return previous fcd character
      */
-    private char previousChar()
+    private int previousChar()
     {
         if (m_bufferOffset_ >= 0) {
             m_bufferOffset_ --;
@@ -1280,10 +1337,10 @@ public final class CollationElementIterator
             else {
                 // At the start of buffer, route back to string.
                 m_buffer_.setLength(0);
-                if (m_FCDStart_ == m_source_.getBeginIndex()) {
+                if (m_FCDStart_ == 0) {
                     m_FCDStart_ = -1;
-                    m_source_.setIndex(m_source_.getBeginIndex());
-                    return CharacterIterator.DONE;
+                    m_source_.setIndex(0);
+                    return UCharacterIterator.DONE;
                 }
                 else {
                     m_FCDLimit_ = m_FCDStart_;
@@ -1292,21 +1349,21 @@ public final class CollationElementIterator
                 }
             }
         }
-        char result = m_source_.previous();
+        int result = m_source_.previous();
         int startoffset = m_source_.getIndex();
         if (result < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_
             || m_collator_.getDecomposition() == Collator.NO_DECOMPOSITION
             || m_FCDStart_ <= startoffset || m_source_.getIndex() == 0) {
             return result;
         }
-        char ch = m_source_.previous();
+        int ch = m_source_.previous();
         if (ch < FULL_ZERO_COMBINING_CLASS_FAST_LIMIT_) {
             // if previous character is FCD
             m_source_.next();
             return result;
         }
         // Need a more complete FCD check and possible normalization.
-        if (!FCDCheckBackwards(result, startoffset)) {
+        if (!FCDCheckBackwards((char)result, startoffset)) {
             normalizeBackwards();
             m_bufferOffset_ --;
             result = m_buffer_.charAt(m_bufferOffset_);
@@ -1340,10 +1397,10 @@ public final class CollationElementIterator
             }
             else {
                 // at end of buffer. check if fcd is at the end
-                return m_FCDLimit_ == m_source_.getEndIndex();
+                return m_FCDLimit_ == m_source_.getLength();
             }
         }
-        return m_source_.getEndIndex() == m_source_.getIndex();
+        return m_source_.getLength() == m_source_.getIndex();
     }
 
     /**
@@ -1408,12 +1465,12 @@ public final class CollationElementIterator
                 // Note: this operation might activate the normalization buffer. We have to check for 
                 // that and act accordingly.
                 m_FCDStart_ = m_source_.getIndex() - 1;
-                char thCh = nextChar(); 
+                char thCh = (char)nextChar(); 
                 int cp = thCh;
                 if (UTF16.isLeadSurrogate(thCh)) {
                     if (!isEnd()) {
                         backupInternalState(m_utilSpecialBackUp_);
-                        char trailCh = nextChar(); 
+                        char trailCh = (char)nextChar(); 
                         if (UTF16.isTrailSurrogate(trailCh)) {
                             cp = UCharacterProperty.getRawSupplementary(
                                                                 thCh, trailCh);                  
@@ -1582,7 +1639,7 @@ public final class CollationElementIterator
                 ce = collator.m_contractionCE_[offset];
                 break;
             }
-            char previous = previousChar();
+            char previous = (char)previousChar();
             while (previous > collator.m_contractionIndex_[offset]) {
                 // contraction characters are ordered, skip smaller characters
                 offset ++;
@@ -1613,7 +1670,7 @@ public final class CollationElementIterator
                      // 3. schar is a trail surrogate in a valid surrogate
                      // sequence that is explicitly set to zero.
                      if (!isBackwardsStart()) {
-                         char lead = previousChar();
+                         char lead = (char)previousChar();
                          if (UTF16.isLeadSurrogate(lead)) {
                              isZeroCE = collator.m_trie_.getLeadValue(lead);
                              if (RuleBasedCollator.getTag(isZeroCE)
@@ -1706,12 +1763,11 @@ public final class CollationElementIterator
      * Returns the current character for forward iteration
      * @return current character
      */
-    private char currentChar()
+    private int currentChar()
     {
         if (m_bufferOffset_ < 0) {
-            char result = m_source_.previous();
-            m_source_.next();
-            return result;
+            m_source_.previous();
+            return m_source_.next();
         }
 
         // m_bufferOffset_ is never 0 in normal circumstances except after a
@@ -1740,8 +1796,8 @@ public final class CollationElementIterator
         else {
             m_utilSkippedBuffer_.setLength(0);
         }
-        char ch = currentChar();
-        m_utilSkippedBuffer_.append(currentChar());
+        char ch = (char)currentChar();
+        m_utilSkippedBuffer_.append((char)currentChar());
         // accent after the first character
         if (m_utilSpecialDiscontiguousBackUp_ == null) {
             m_utilSpecialDiscontiguousBackUp_ = new Backup();
@@ -1750,14 +1806,15 @@ public final class CollationElementIterator
         char nextch = ch;
         while (true) {
             ch = nextch;
-            nextch = nextChar();
-            if (nextch == CharacterIterator.DONE
+            int ch_int = nextChar();
+            nextch = (char)ch_int;
+            if (ch_int == UCharacterIterator.DONE
                 || getCombiningClass(nextch) == 0) {
                 // if there are no more accents to move around
                 // we don't have to shift previousChar, since we are resetting
                 // the offset later
                 if (multicontraction) {
-                    if (nextch != CharacterIterator.DONE) {
+                    if (ch_int != UCharacterIterator.DONE) {
                         previousChar(); // backtrack
                     }
                     setDiscontiguous(m_utilSkippedBuffer_);
@@ -1836,7 +1893,7 @@ public final class CollationElementIterator
             byte maxCC = (byte)(collator.m_contractionIndex_[offset] & 0xFF);
             // checks if all characters have the same combining class
             byte allSame = (byte)(collator.m_contractionIndex_[offset] >> 8);
-            char ch = nextChar();
+            char ch = (char)nextChar();
             offset ++;
             while (ch > collator.m_contractionIndex_[offset]) {
                 // contraction characters are ordered, skip all smaller
@@ -1859,7 +1916,7 @@ public final class CollationElementIterator
                 else if (UTF16.isLeadSurrogate(ch)) {
                     if (!isEnd()) {
                         backupInternalState(m_utilSpecialBackUp_);
-                        char trail = nextChar();
+                        char trail = (char)nextChar();
                         if (UTF16.isTrailSurrogate(trail)) {
                             // do stuff with trail
                             if (RuleBasedCollator.getTag(isZeroCE)
@@ -1901,10 +1958,11 @@ public final class CollationElementIterator
                 else {
                     // Contraction is possibly discontiguous.
                     // find the next character if ch is not a base character
-                    char nextch = nextChar();
-                    if (nextch != CharacterIterator.DONE) {
+                    int ch_int = nextChar();
+                    if (ch_int != UCharacterIterator.DONE) {
                         previousChar();
                     }
+                    char nextch = (char)ch_int;
                     if (getCombiningClass(nextch) == 0) {
                         previousChar();
                         // base character not part of discontiguous contraction
@@ -2098,11 +2156,11 @@ public final class CollationElementIterator
                 // Get next character.
                 if (!isEnd()){
                     backupInternalState(m_utilSpecialBackUp_);
-                    char ch = nextChar();
-                    int char32 = ch;
+                    int char32 = nextChar();
+                    char ch = (char)char32;
                     if (UTF16.isLeadSurrogate(ch)){
                         if (!isEnd()) {
-                            char trail = nextChar();
+                            char trail = (char)nextChar();
                             if (UTF16.isTrailSurrogate(trail)) {
                                char32 = UCharacterProperty.getRawSupplementary(
                                                                    ch, trail);
@@ -2227,8 +2285,9 @@ public final class CollationElementIterator
      */
     private int nextSurrogate(char ch)
     {
-        char nextch = nextChar();
-        if (nextch != CharacterIterator.DONE &&
+        int ch_int = nextChar();
+        char nextch = (char)ch_int;
+        if (ch_int != CharacterIterator.DONE &&
             UTF16.isTrailSurrogate(nextch)) {
             int codepoint = UCharacterProperty.getRawSupplementary(ch, nextch);
             return nextImplicit(codepoint);
@@ -2330,7 +2389,7 @@ public final class CollationElementIterator
                         return IGNORABLE;
                     }
                     backupInternalState(m_utilSpecialBackUp_);
-                    char trail = nextChar();
+                    char trail = (char)nextChar();
                     ce = nextSurrogate(collator, ce, trail);
                     // calculate the supplementary code point value,
                     // if surrogate was not tailored we go one more round
@@ -2403,10 +2462,10 @@ public final class CollationElementIterator
         
         // check that ch is from the normalization buffer or not
         boolean innorm = m_bufferOffset_ >= 0;
-        char prevch = previousChar();
+        int prevch = previousChar();
         if (!isThaiPreVowel(prevch)) {
             // we now rearrange unconditionally do not check for base consonant
-            if (prevch != CharacterIterator.DONE) {
+            if (prevch != UCharacterIterator.DONE) {
                 nextChar();
             }
             // Treat Thai as a length one expansion
@@ -2445,10 +2504,10 @@ public final class CollationElementIterator
             m_FCDLimit_ = m_FCDStart_ + 2;
         }
         if (reorder) {
-            m_buffer_.insert(1, prevch);
+            m_buffer_.insert(1, (char)prevch);
         } 
         else {
-            m_buffer_.insert(0, prevch);
+            m_buffer_.insert(0, (char)prevch);
         }
         return IGNORABLE;
     }
@@ -2475,7 +2534,7 @@ public final class CollationElementIterator
                 ce = collator.m_contractionCE_[offset];
                 break;
             }
-            char prevch = previousChar();
+            char prevch = (char)previousChar();
             while (prevch > collator.m_contractionIndex_[offset]) {
                 // since contraction codepoints are ordered, we skip all that
                 // are smaller
@@ -2505,7 +2564,7 @@ public final class CollationElementIterator
                     // 3. schar is a trail surrogate in a valid surrogate
                     //    sequence that is explicitly set to zero.
                     if (!isBackwardsStart()) {
-                        char lead = previousChar();
+                        char lead = (char)previousChar();
                         if (UTF16.isLeadSurrogate(lead)) {
                             isZeroCE = collator.m_trie_.getLeadValue(lead);
                             if (RuleBasedCollator.getTag(isZeroCE)
@@ -2563,7 +2622,7 @@ public final class CollationElementIterator
         m_utilStringBuffer_.setLength(0);
         // since we might encounter normalized characters (from the thai
         // processing) we can't use peekCharacter() here.
-        char prevch = previousChar();
+        char prevch = (char)previousChar();
         boolean atStart = false;
         while (collator.isUnsafe(ch) || isThaiPreVowel(prevch)) {
             m_utilStringBuffer_.insert(0, ch);
@@ -2572,7 +2631,7 @@ public final class CollationElementIterator
                 atStart = true;
                 break;
             }
-            prevch = previousChar();
+            prevch = (char)previousChar();
         }
         if (!atStart) {
             // undo the previousChar() if we didn't reach the beginning 
@@ -2692,7 +2751,7 @@ public final class CollationElementIterator
             int char32 = ch;
             if (UTF16.isTrailSurrogate(ch)) {
                 if (!isBackwardsStart()){
-                    char lead = previousChar();
+                    char lead = (char)previousChar();
                     if (UTF16.isLeadSurrogate(lead)) {
                         char32 = UCharacterProperty.getRawSupplementary(lead,
                                                                         ch);
@@ -2753,11 +2812,11 @@ public final class CollationElementIterator
             
                 if (!isBackwardsStart()){
                     backupInternalState(m_utilSpecialBackUp_);
-                    ch = previousChar();
-                    char32 = ch;
+                    char32 = previousChar();
+                    ch = (char)ch;
                     if (UTF16.isTrailSurrogate(ch)){
                         if (!isBackwardsStart()) {
-                            char lead = previousChar();
+                            char lead = (char)previousChar();
                             if (UTF16.isLeadSurrogate(lead)) {
                                 char32 
                                     = UCharacterProperty.getRawSupplementary(
@@ -2926,7 +2985,7 @@ public final class CollationElementIterator
             // we are at the start of the string, wrong place to be at
             return IGNORABLE;
         }
-        char prevch = previousChar();
+        char prevch = (char)previousChar();
         // Handles Han and Supplementary characters here.
         if (UTF16.isLeadSurrogate(prevch)) {
             return previousImplicit(
@@ -3099,12 +3158,12 @@ public final class CollationElementIterator
         if (offset != 0) {
             int currentoffset = m_source_.getIndex();
             m_source_.setIndex(currentoffset + offset);
-            char result = m_source_.current();
+            char result = (char)m_source_.current();
             m_source_.setIndex(currentoffset);
             return result;
         } 
         else {
-            return m_source_.current();
+            return (char)m_source_.current();
         }
     }
     
diff --git a/icu4j/src/com/ibm/icu/text/RuleBasedCollator.java b/icu4j/src/com/ibm/icu/text/RuleBasedCollator.java
index d867f5b9d3..8cad6e0621 100755
--- a/icu4j/src/com/ibm/icu/text/RuleBasedCollator.java
+++ b/icu4j/src/com/ibm/icu/text/RuleBasedCollator.java
@@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/RuleBasedCollator.java,v $
-* $Date: 2003/09/22 06:24:20 $
-* $Revision: 1.47 $
+* $Date: 2003/10/08 21:51:44 $
+* $Revision: 1.48 $
 *
 *******************************************************************************
 */
@@ -19,7 +19,6 @@ import java.util.Locale;
 import java.util.ResourceBundle;
 import java.util.Arrays;
 import java.text.CharacterIterator;
-import java.text.StringCharacterIterator;
 import com.ibm.icu.lang.UCharacter;
 import com.ibm.icu.util.VersionInfo;
 import com.ibm.icu.impl.IntTrie;
@@ -28,6 +27,7 @@ import com.ibm.icu.impl.ICULocaleData;
 import com.ibm.icu.impl.BOCU;
 import com.ibm.icu.impl.Utility;
 import com.ibm.icu.impl.ICUDebug;
+import com.ibm.icu.impl.StringUCharacterIterator;
 
 /**
  * <p>RuleBasedCollator is a concrete subclass of Collator. It allows
@@ -255,6 +255,19 @@ public final class RuleBasedCollator extends Collator
         CharacterIterator newsource = (CharacterIterator)source.clone();
         return new CollationElementIterator(newsource, this);
     }
+    
+    /**
+     * Return a CollationElementIterator for the given UCharacterIterator.
+     * The source iterator's integrity will be preserved since a new copy
+     * will be created for use.
+     * @see CollationElementIterator
+     * @draft ICU 2.8
+     */
+    public CollationElementIterator getCollationElementIterator(
+                                                UCharacterIterator source)
+    {
+        return new CollationElementIterator(source, this);
+    }
 
     // public setters --------------------------------------------------------
 
@@ -1731,9 +1744,10 @@ public final class RuleBasedCollator extends Collator
         if (ch < m_minUnsafe_) {
             return false;
         }
-
+        
         if (ch >= (HEURISTIC_SIZE_ << HEURISTIC_SHIFT_)) {
-            if (UTF16.isLeadSurrogate(ch) || UTF16.isTrailSurrogate(ch)) {
+            if (UTF16.isLeadSurrogate(ch) 
+                || UTF16.isTrailSurrogate(ch)) {
                 //  Trail surrogate are always considered unsafe.
                 return true;
             }
@@ -1966,9 +1980,9 @@ public final class RuleBasedCollator extends Collator
     /**
      * Bunch of utility iterators
      */
-    private StringCharacterIterator m_srcUtilIter_;
+    private StringUCharacterIterator m_srcUtilIter_;
     private CollationElementIterator m_srcUtilColEIter_;
-    private StringCharacterIterator m_tgtUtilIter_;
+    private StringUCharacterIterator m_tgtUtilIter_;
     private CollationElementIterator m_tgtUtilColEIter_;
     /**
      * Utility comparison flags
@@ -3787,9 +3801,9 @@ public final class RuleBasedCollator extends Collator
      *  Initializes utility iterators and byte buffer used by compare
      */
     private final void initUtility() {
-       m_srcUtilIter_ = new StringCharacterIterator(new String(""));
+       m_srcUtilIter_ = new StringUCharacterIterator();
        m_srcUtilColEIter_ = new CollationElementIterator(m_srcUtilIter_, this);
-       m_tgtUtilIter_ = new StringCharacterIterator(new String(""));
+       m_tgtUtilIter_ = new StringUCharacterIterator();
        m_tgtUtilColEIter_ = new CollationElementIterator(m_tgtUtilIter_, this);
        m_utilBytes0_ = new byte[SORT_BUFFER_INIT_SIZE_CASE_]; // case
        m_utilBytes1_ = new byte[SORT_BUFFER_INIT_SIZE_1_]; // primary
diff --git a/icu4j/src/com/ibm/icu/text/StringSearch.java b/icu4j/src/com/ibm/icu/text/StringSearch.java
index 22aff1214b..3ac08b73c1 100755
--- a/icu4j/src/com/ibm/icu/text/StringSearch.java
+++ b/icu4j/src/com/ibm/icu/text/StringSearch.java
@@ -5,8 +5,8 @@
  *******************************************************************************
  *
  * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/StringSearch.java,v $ 
- * $Date: 2003/07/31 19:51:12 $ 
- * $Revision: 1.25 $
+ * $Date: 2003/10/08 21:51:44 $ 
+ * $Revision: 1.26 $
  *
  *****************************************************************************************
  */
@@ -17,6 +17,7 @@ import java.text.CharacterIterator;
 import java.text.StringCharacterIterator;
 import java.util.Locale;
 import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.impl.CharacterIteratorWrapper;
 import com.ibm.icu.impl.NormalizerImpl;
 
 /**
@@ -1088,7 +1089,8 @@ public final class StringSearch extends SearchIterator
 	        				 	 || breakIterator.following(end - 1) == end);
 	        if (result) {
 	            // iterates the individual ces
-	            m_utilColEIter_.setText(targetText, start);
+	            m_utilColEIter_.setText(
+                    new CharacterIteratorWrapper(targetText), start);
 	            for (int count = 0; count < m_pattern_.m_CELength_;
 	                 count ++) {
                     int ce = getCE(m_utilColEIter_.next());