ICU-1897

builder completed but not fully tested. and documentation updates. X-SVN-Rev: 8914
2002-06-21 23:57:56 +00:00 · 2002-06-21 23:57:56 +00:00 · 1cef5c4d34
commit 1cef5c4d34
parent 4a6e11bcba
9 changed files with 10637 additions and 2318 deletions
--- a/icu4j/src/com/ibm/icu/text/CollationElementIterator.java
+++ b/icu4j/src/com/ibm/icu/text/CollationElementIterator.java
@ -6,53 +6,77 @@ import com.ibm.icu.impl.NormalizerImpl;
 import com.ibm.icu.impl.UCharacterProperty;

 /**
- * <p>The <code>CollationElementIterator</code> class is used as an iterator
- * to walk through each character of an international string. Use the iterator
- * to return the ordering priority of the positioned character. The ordering
- * priority of a character, which we refer to as a key, defines how a 
- * character is collated in the given collation object.</p>
- * <p>For example, consider the following in Spanish:
+ * <p>
+ * The <code>CollationElementIterator</code> object is an iterator created
+ * by a RuleBasedCollator to walk through an international string. The return
+ * result of each iteration is a 32 bit collation element that defines the 
+ * ordering priority of the next sequence of characters in the source string. 
+ * </p>
+ * <p>For better illustration, consider the following in Spanish:
 * <blockquote>
 * <pre>
- * "ca" -> the first key is key('c') and second key is key('a').
- * "cha" -> the first key is key('ch') and second key is key('a').
+ * "ca" -> the first collation element is collation_element('c') and second 
+ *         collation element is collation_element('a').
+ * 
+ * Since "ch" in Spanish sorts as one entity, the below example returns one
+ * collation element for the 2 characters 'c' and 'h'
+ * 
+ * "cha" -> the first collation element is collation_element('ch') and second 
+ *          collation element is collation_element('a').
 * </pre>
 * </blockquote>
 * And in German,
 * <blockquote>
 * <pre>
- * "\u00e4b"-> the first key is key('a'), the second key is key('e'), and
- * the third key is key('b').
+ * Since the character '&#230;' is a composed character of 'a' and 'e', the
+ * below example returns 2 collation elements for the single character 
+ * '&#230;'
+ * 
+ * "&#230;b" -> the first collation element is collation_element('a'), the 
+ *              second collation element is collation_element('e'), and the 
+ *              third collation element is collation_element('b').
 * </pre>
 * </blockquote>
 * </p>
- * <p>The key of a character is an integer composed of primary order(short),
- * secondary order(byte), and tertiary order(byte). Java strictly defines
- * the size and signedness of its primitive data types. Therefore, the static
- * functions <code>primaryOrder</code>, <code>secondaryOrder</code>, and
- * <code>tertiaryOrder</code> return <code>int</code>, <code>short</code>,
- * and <code>short</code> respectively to ensure the correctness of the key
- * value.</p>
 * <p>
- * Example of the iterator usage,
+ * For collation ordering comparison, the collation element results can not be 
+ * compared simply by using basic arithmetric operators, e.g. &lt;, == or &gt;, 
+ * further processing has to be done. Details can be found in the ICU
+ * <a href=http://oss.software.ibm.com/icu/userguide/Collate_ServiceArchitecture.html>
+ * user guide</a>. An example of using the CollationElementIterator for
+ * collation ordering comparison is the class <a href=StringSearch.html>
+ * com.ibm.icu.text.StringSearch</a>.
+ * </p>
+ * <p>
+ * To construct a CollationElementIterator object, users would have to call the 
+ * factory method getCollationElementIterator() in a RuleBasedCollator object
+ * that defines the sorting order that is desired.
+ * </p>
+ * <p>
+ * Example:
 * <blockquote>
 * <pre>
 *  String testString = "This is a test";
- *  RuleBasedCollator ruleBasedCollator = (RuleBasedCollator)Collator.getInstance();
- *  CollationElementIterator collationElementIterator = ruleBasedCollator.getCollationElementIterator(testString);
- *  int primaryOrder = CollationElementIterator.primaryOrder(collationElementIterator.next());
+ *  RuleBasedCollator rbc = new RuleBasedCollator("&amp;a&lt;b");
+ *  CollationElementIterator collationElementIterator = rbc.getCollationElementIterator(testString);
+ *  int primaryOrder = CollationElementIterator.IGNORABLE;
+ *  while (primaryOrder != CollationElementIterator.NULLORDER) {
+ *      int order = collationElementIterator.next();
+ *      if (order != CollationElementIterator.IGNORABLE &&
+ *          order != CollationElementIterator.NULLORDER) {
+ *          // order is valid, not ignorable and we have not passed the end
+ *          // of the iteration, we do something
+ *          primaryOrder = CollationElementIterator.primaryOrder(order);
+ *          System.out.println("Next primary order 0x" + Integer.toHexString(primaryOrder));
+ *      }
+ *  }
 * </pre>
 * </blockquote>
 * </p>
 * <p>
- * <code>CollationElementIterator.next</code> returns the collation order
- * of the next character. A collation order consists of primary order,
- * secondary order and tertiary order. The data type of the collation
- * order is <strong>int</strong>. The first 16 bits of a collation order
- * is its primary order; the next 8 bits is the secondary order and the
- * last 8 bits is the tertiary order.</p>
- * @see                Collator
- * @see                RuleBasedCollator
+ * @see Collator
+ * @see RuleBasedCollator
+ * @see StringSearch
 * @author Syn Wee Quek
 * @since release 2.2, April 18 2002
 * @draft 2.2
@ -62,12 +86,22 @@ public final class CollationElementIterator
 	// public data members --------------------------------------------------
 	
    /**
-     * Null order which indicates the end of string is reached
+     * <p>This constant is returned by the iterator in the methods next() and
+     * previous() when the end or the beginning of the source string has been
+     * reached, and there are no more valid collation elements to return.</p>
+     * <p>See class documentation for an example of use.</p>
     * @draft 2.2
+     * @see #next
+     * @see #previous
     */
    public final static int NULLORDER = 0xffffffff;
    /**
-     * Ignorable collation element order.
+     * <p>This constant is returned by the iterator in the methods next() and
+     * previous() when a collation element result is to be ignored.</p>
+     * <p>See class documentation for an example of use.</p>
+     * @draft 2.2
+     * @see #next
+     * @see #previous
     */
    public static final int IGNORABLE = 0;

@ -76,24 +110,25 @@ public final class CollationElementIterator
 	// public getters -------------------------------------------------------
 	
 	/**
-     * <p>Returns the character offset in the original text corresponding to 
-     * the next collation element. (That is, getOffset() returns the position 
-     * in the text corresponding to the collation element that will be 
-     * returned by the next call to next().) This value could be either
+     * <p>Returns the character offset in the source string corresponding to 
+     * the next collation element. i.e. getOffset() returns the position 
+     * in source string corresponding to the collation element that will be 
+     * returned by the next call to next(). This value could be either
     * <ul>
-     * <li>index of the <b>first</b> character corresponding to the next
+     * <li> Index of the <b>first</b> character corresponding to the next
     * collation element. This means that if <code>setOffset(offset)</code> 
     * sets the index in the middle of a contraction, <code>getOffset()</code>
     * returns the index of the first character in the contraction, which
-     * may not be equals to offset.
-     * <li>if normalization is on, <code>getOffset()</code> may return the 
+     * may not be equals to the original offset that was set. Hence calling
+     * getOffset() immediately after setOffset(offset) does not guarantee that
+     * the original offset set will be returned.
+     * <li> If normalization is on, <code>getOffset()</code> may return the 
     * index of the <b>immediate</b> subsequent character, or composite 
     * character with the first character, having a combining class of 0.
+     * <li> the length of the source string if iteration has reached the end.
     * </ul>
     * </p>
-     * <p>Note calling getOffset() immediately after setOffset(offset) may not
-     * return the value offset.</p>
-     * @return The character offset in the original text corresponding to the 
+     * @return The character offset in the source string corresponding to the 
     *         collation element that will be returned by the next call to 
     *         next().
     * @draft 2.2
@ -111,8 +146,11 @@ public final class CollationElementIterator


    /**
-     * Return the maximum length of any expansion sequences that end with the 
-     * specified collation element.
+     * <p>
+     * Returns the maximum length of any expansion sequence that ends with 
+     * the argument collation element ce. If there is no expansion with the 
+     * argument ce as the last element, 1 is returned.
+     * </p>
     * @param ce a collation element returned by previous() or next().
     * @return the maximum length of any expansion sequences ending
     *         with the specified collation element.
@ -122,9 +160,11 @@ public final class CollationElementIterator
    {
        int start = 0;                                  
  		int limit = m_collator_.m_expansionEndCE_.length;
+  		long unsignedce = ce & 0xFFFFFFFFl;
  		while (start < limit - 1) {
    		int mid = start + ((limit - start) >> 1);              
-    		if (ce <= m_collator_.m_expansionEndCE_[mid]) {              
+    		long midce = m_collator_.m_expansionEndCE_[mid] & 0xFFFFFFFFl;
+    		if (unsignedce <= midce) {              
      			limit = mid;                                              
    		}                                                             
    		else {                                                        
@ -135,7 +175,8 @@ public final class CollationElementIterator
  		if (m_collator_.m_expansionEndCE_[start] == ce) {
    		result = m_collator_.m_expansionEndCEMaxSize_[start];
  		}                                                                
-  		else if (m_collator_.m_expansionEndCE_[limit] == ce) {           
+  		else if (limit < m_collator_.m_expansionEndCE_.length &&
+  		         m_collator_.m_expansionEndCE_[limit] == ce) {           
         	result = m_collator_.m_expansionEndCEMaxSize_[limit]; 
       	}                                  
       	else if ((ce & 0xFFFF) == 0x00C0) {
@ -147,34 +188,49 @@ public final class CollationElementIterator
 	// public other methods -------------------------------------------------
 	
 	/**
-     * <p>Resets the cursor to the beginning of the string. The next call
-     * to next() will return the first collation element in the string.</p>
+     * <p>
+     * Resets the cursor to the beginning of the string. The next call
+     * to next() and previous will return the first and last collation element 
+     * in the string respectively. 
+     * </p>
+     * <p> 
+     * If the RuleBasedCollator used in this iterator has its attributes 
+     * changed, calling reset() will reinitialize the iterator to use the new
+     * RuleBasedCollator attributes.
+     * </p>
     * @draft 2.2
     */
-    public synchronized void reset()
+    public void reset()
    {
-    	m_source_.setIndex(0);
+    	m_source_.setIndex(m_source_.getBeginIndex());
    	updateInternalState();
    }

    /**
-     * <p>Get the next collation element in the string.</p>  
-     * <p>This iterator iterates over a sequence of collation elements that 
-     * were built from the string. Because there isn't necessarily a 
-     * one-to-one mapping from characters to collation elements, this doesn't 
-     * mean the same thing as "return the collation element [or ordering 
-     * priority] of the next character in the string".</p>
-     * <p>This function returns the collation element that the iterator is 
+     * <p>
+     * Get the next collation element in the source string.
+     * </p>  
+     * <p>
+     * This iterator iterates over a sequence of collation elements that were 
+     * built from the string. Because there isn't necessarily a one-to-one 
+     * mapping from characters to collation elements, this doesn't mean the 
+     * same thing as "return the collation element [or ordering priority] of 
+     * the next character in the string".
+     * </p>
+     * <p>
+     * This function returns the collation element that the iterator is 
     * currently pointing to and then updates the internal pointer to point to 
     * the next element. previous() updates the pointer first and then 
     * returns the element. This means that when you change direction while 
     * iterating (i.e., call next() and then call previous(), or call 
     * previous() and then call next()), you'll get back the same element 
-     * twice.</p>
-     * @return the next collation element 
+     * twice.
+     * </p>
+     * @return the next collation element or NULLORDER if the end of the 
+     *         iteration has been reached.
     * @draft 2.2
     */
-    public synchronized int next()
+    public int next()
    {
    	m_isForwards_ = true;
        if (m_CEBufferSize_ > 0) { 
@ -230,24 +286,30 @@ public final class CollationElementIterator
    }

    /**
-     * <p>Get the previous collation element in the string.</p>  
-     * <p>This iterator iterates over a sequence of collation elements that 
+     * <p>
+     * Get the previous collation element in the source string.
+     * </p>  
+     * <p>
+     * This iterator iterates over a sequence of collation elements that 
     * were built from the string. Because there isn't necessarily a 
     * one-to-one mapping from characters to collation elements, this doesn't 
     * mean the same thing as "return the collation element [or ordering 
-     * priority] of the previous character in the string".</p>
-     * <p>This function updates the iterator's internal pointer to point to 
+     * priority] of the previous character in the string".
+     * </p>
+     * <p>
+     * This function updates the iterator's internal pointer to point to 
     * the collation element preceding the one it's currently pointing to and 
     * then returns that element, while next() returns the current element and 
     * then updates the pointer. This means that when you change direction 
     * while iterating (i.e., call next() and then call previous(), or call 
     * previous() and then call next()), you'll get back the same element 
-     * twice.</p>
+     * twice.
+     * </p>
     * @return the previous collation element, or NULLORDER when the start of 
-     * 			the iteration has been reached.
+     * 		   the iteration has been reached.
     * @draft 2.2
     */
-    public synchronized int previous()
+    public int previous()
    {
    	if (m_source_.getIndex() <= 0 && m_isForwards_) {
    		// if iterator is new or reset, we can immediate perform  backwards
@ -317,50 +379,66 @@ public final class CollationElementIterator
    }

    /**
-     * Return the primary strength of a collation element.
+     * Return the primary order of a collation element ce.
+     * i.e. the first 16 bits of the argument ce.
     * @param ce the collation element
-     * @return the element's primary strength
+     * @return the element's 16 bits primary order.
     * @draft 2.2
     */
    public final static int primaryOrder(int ce)
    {
-        return (ce & RuleBasedCollator.CE_PRIMARY_MASK_) >> CE_PRIMARY_SHIFT_;
+        return (ce & RuleBasedCollator.CE_PRIMARY_MASK_) 
+                >>> RuleBasedCollator.CE_PRIMARY_SHIFT_;
    }
    /**
-     * Return the secondary strength of a collation element.
+     * Return the secondary order of a collation element ce.
+     * i.e. the 16th to 27th inclusive set of bits in the argument ce.
     * @param ce the collation element
-     * @return the element's secondary strength
+     * @return the element's 8 bits secondary order
     * @draft 2.2
     */
-    public final static short secondaryOrder(int ce)
+    public final static int secondaryOrder(int ce)
    {
-        return (short)((ce & RuleBasedCollator.CE_SECONDARY_MASK_) 
-        											>> CE_SECONDARY_SHIFT_);
+        return (ce & RuleBasedCollator.CE_SECONDARY_MASK_) 
+               >> RuleBasedCollator.CE_SECONDARY_SHIFT_;
    }
    
    /**
-     * Return the tertiary strength of a collation element.
-     * @param colelem the collation element
-     * @return the element's tertiary strength
+     * Return the tertiary order of a collation element ce. i.e. the last
+     * 8 bits in the argument ce.
+     * @param ce the collation element
+     * @return the element's 8 bits tertiary order
     * @draft 2.2
     */
-    public final static short tertiaryOrder(int ce)
+    public final static int tertiaryOrder(int ce)
    {
-        return (short)(ce & RuleBasedCollator.CE_TERTIARY_MASK_);
+        return ce & RuleBasedCollator.CE_TERTIARY_MASK_;
    }

    /**
-     * <p>Sets the iterator to point to the collation element corresponding to
-     * the specified character (the parameter is a CHARACTER offset in the
-     * original string, not an offset into its corresponding sequence of
-     * collation elements). The value returned by the next call to next()
-     * will be the collation element corresponding to the specified position
-     * in the text. If that position is in the middle of a contracting
-     * character sequence, the result of the next call to next() is the
-     * collation element for that sequence. This means that getOffset()
-     * is not guaranteed to return the same value as was passed to a preceding
-     * call to setOffset().</p>
-     * @param offset new character offset into the original text to set. 
+     * <p>
+     * Sets the iterator to point to the collation element corresponding to
+     * the specified character argument offset. The value returned by the next 
+     * call to next() will be the collation element corresponding to the 
+     * characters at argument offset. 
+     * </p>
+     * <p>
+     * If argument offset is in the middle of a contracting character sequence, 
+     * the iterator is adjusted to the start of the contracting sequence. This 
+     * means that getOffset() is not guaranteed to return the same value as 
+     * the argument offset.
+     * </p>
+     * <p>
+     * If the decomposition mode is on and argument offset is in the middle of 
+     * a decomposible range of source text, the iterator may not render a 
+     * correct result for 
+     * the next forwards or backwards iteration. User has to ensure that the
+     * argument offset does not violate the mid of a decomposible range in
+     * source text.
+     * </p>
+     * @param offset character offset into the original source string to 
+     *        set. Note this argument is not an offset into the corresponding 
+     *        sequence of collation elements
     * @draft 2.2
     */
    public void setOffset(int offset)
@ -388,7 +466,7 @@ public final class CollationElementIterator
    			}
    			updateInternalState();
    			int prevoffset = 0;
-    			while (m_source_.getIndex() < offset) {
+    			while (m_source_.getIndex() <= offset) {
    				prevoffset = m_source_.getIndex();
    				next();
    			}	
@ -399,59 +477,36 @@ public final class CollationElementIterator
    }

    /**
-     * <p>Set a new string over which to iterate.</p>
-     * <p>Iteration will start from the start of source.</p>
-     * @param source the new source text.
+     * <p>
+     * Sets a new source string for iteration and restart the iteration to
+     * start from the beginning of the argument source.
+     * </p>
+     * @param source the new source string for iteration.
     * @draft 2.2
     */
-    public synchronized void setText(String source)
+    public void setText(String source)
    {
    	m_source_ = new StringCharacterIterator(source);
    	updateInternalState();
    }

    /**
-     * <p>Set a new string iterator over which to iterate.</p>
-     * <p>Iteration will start from the start of source.</p>
-     * @param source the new source text.
+     * <p>
+     * Sets a new source string iterator for iteration and restart the 
+     * iteration to start from the beginning of the argument source.
+     * </p>
+     * @param source the new source string iterator for iteration.
     * @draft 2.2
     */
-    public synchronized void setText(CharacterIterator source)
+    public void setText(CharacterIterator source)
    {
 		m_source_ = source;    	
-		m_source_.setIndex(0);
+		m_source_.setIndex(m_source_.getBeginIndex());
 		updateInternalState();
    }
    
    // public miscellaneous methods -----------------------------------------
    
-	// protected data members -----------------------------------------------
-	
-	/**
-  	 * true if current codepoint was Hiragana
-  	 */
-  	protected boolean m_isCodePointHiragana_;
-  	/**
-  	 * Position in the original string that starts with a non-FCD sequence
-  	 */
-  	protected int m_FCDStart_;
-  	/** 
-	 * This is the CE from CEs buffer that should be returned. 
-	 * Initial value is 0.
-	 * Forwards iteration will end with m_CEBufferOffset_ == m_CEBufferSize_,
-	 * backwards will end with m_CEBufferOffset_ == 0.
-	 * The next/previous after we reach the end/beginning of the m_CEBuffer_
-	 * will cause this value to be reset to 0.
-	 */
-  	protected int m_CEBufferOffset_;
-  	/** 
-  	 * This is the position to which we have stored processed CEs.
-  	 * Initial value is 0.
-  	 * The next/previous after we reach the end/beginning of the m_CEBuffer_
-	 * will cause this value to be reset to 0.
-  	 */
-  	protected int m_CEBufferSize_; 
-  	
 	// protected constructors -----------------------------------------------
 	
 	/**
@ -493,29 +548,95 @@ public final class CollationElementIterator
    	updateInternalState();
    }
    
-    // protected methods ----------------------------------------------------
+    // package private data members -----------------------------------------
+	
+	/**
+  	 * true if current codepoint was Hiragana
+  	 */
+  	boolean m_isCodePointHiragana_;
+  	/**
+  	 * Position in the original string that starts with a non-FCD sequence
+  	 */
+  	int m_FCDStart_;
+  	/** 
+	 * This is the CE from CEs buffer that should be returned. 
+	 * Initial value is 0.
+	 * Forwards iteration will end with m_CEBufferOffset_ == m_CEBufferSize_,
+	 * backwards will end with m_CEBufferOffset_ == 0.
+	 * The next/previous after we reach the end/beginning of the m_CEBuffer_
+	 * will cause this value to be reset to 0.
+	 */
+  	int m_CEBufferOffset_;
+  	/** 
+  	 * This is the position to which we have stored processed CEs.
+  	 * Initial value is 0.
+  	 * The next/previous after we reach the end/beginning of the m_CEBuffer_
+	 * will cause this value to be reset to 0.
+  	 */
+  	int m_CEBufferSize_; 
    
-    /**
-     * Checks if iterator is in the buffer zone
-     * @return true if iterator is in buffer zone, false otherwise
-     */
-    protected boolean isInBuffer()
-    {
-    	return m_bufferOffset_ != -1;
-    }
+    // package private methods ----------------------------------------------
    
    /**
     * Sets the collator used.
     * Internal use, all data members will be reset to the default values
     * @param collator to set
     */
-    protected void setCollator(RuleBasedCollator collator) 
+    void setCollator(RuleBasedCollator collator) 
    {
    	m_collator_ = collator;
    	updateInternalState();
    }
    
-    // private data members -------------------------------------------------
+    /**
+     * <p>Sets the iterator to point to the collation element corresponding to
+     * the specified character (the parameter is a CHARACTER offset in the
+     * original string, not an offset into its corresponding sequence of
+     * collation elements). The value returned by the next call to next()
+     * will be the collation element corresponding to the specified position
+     * in the text. Unlike the public method setOffset(int), this method does 
+     * not try to readjust the offset to the start of a contracting sequence.
+     * getOffset() is guaranteed to return the same value as was passed to a 
+     * preceding call to setOffset().</p>
+     * @param offset new character offset into the original text to set. 
+     * @draft 2.2
+     */
+    void setExactOffset(int offset)
+    {  
+	    m_source_.setIndex(offset);
+	    updateInternalState();
+    }
+    
+    /**
+     * Checks if iterator is in the buffer zone
+     * @return true if iterator is in buffer zone, false otherwise
+     */
+    boolean isInBuffer()
+    {
+    	return m_bufferOffset_ != -1;
+    }
+    
+    /**
+ 	 * Determine if a character is a Thai vowel, which sorts after its base 
+ 	 * consonant.
+ 	 * @param ch character to test
+ 	 * @return true if ch is a Thai prevowel, false otherwise
+ 	 */
+	static final boolean isThaiPreVowel(char ch)
+	{ 
+		return (ch >= 0xe40 && ch <= 0xe44) || (ch >= 0xec0 && ch <= 0xec4);
+	}
+
+	/**
+ 	 * Determine if a character is a Thai base consonant, which sorts before 
+ 	 * its prevowel
+ 	 * @param ch character to test
+ 	 * @return true if ch is a Thai base consonant, false otherwise
+ 	 */
+	static final boolean isThaiBaseConsonant(char ch)
+	{
+		return ch >= 0xe01 && ch <= 0xe2e;
+	}
    
    // private inner class --------------------------------------------------
    
@ -675,8 +796,6 @@ public final class CollationElementIterator
    private static final int CE_LONG_PRIMARY_TAG_ = 12; 
    private static final int CE_CE_TAGS_COUNT = 13;
   	private static final int CE_BYTE_COMMON_ = 0x05;
-   	private static final int CE_PRIMARY_SHIFT_ = 16;
-   	private static final int CE_SECONDARY_SHIFT_ = 8;
   	
 	// end special ce values and tags ---------------------------------------
 	
@ -773,21 +892,19 @@ public final class CollationElementIterator
 	 * Source offsets points to the current processing character.
 	 * </p>
 	 */
-	private void normalize()
+	private void normalize() 
 	{
-		/* synwee todo normalize to 1 before fcd
-		try {
-			decompose(m_buffer_, m_source_, m_FCDStart_, m_FCDLimit_,
-    	          	  m_collator_.m_decomposition_);
-		} 
-		catch (ArrayOutOfBoundsException e) {
-			// increase the size of the buffer
-			m_buffer_ = new char[m_buffer_.length << 1];
-        	decompose(m_buffer_, m_source_, m_FCDStart_, m_FCDLimit_,
-    	          	  m_collator_.m_decomposition_);
-    	}
-		*/
-    	m_bufferOffset_ = 0;
+		int size = m_FCDLimit_ - m_FCDStart_;
+		m_buffer_.delete(0, m_buffer_.length());
+		m_source_.setIndex(m_FCDStart_);
+		for (int i = 0; i < size; i ++) {
+			m_buffer_.append(m_source_.current());
+			m_source_.next();
+		}
+		String decomp = Normalizer.decompose(m_buffer_.toString(), false);
+		m_buffer_.delete(0, m_buffer_.length());
+		m_buffer_.append(decomp);
+		m_bufferOffset_ = 0;
 	}
 	
 	/** 
@ -811,24 +928,22 @@ public final class CollationElementIterator
 	{
    	boolean result = true;

-    	// srcP = collationSource->pos-1;
-    	
-		// Get the trailing combining class of the current character.  
+    	// Get the trailing combining class of the current character.  
 		// If it's zero, we are OK.
    	m_FCDStart_ = offset;
    	m_source_.setIndex(offset);
    	// trie access
-    	char fcd = 0; // synwee todo: unorm_getFCD16(ch);
+    	char fcd = NormalizerImpl.getFCD16(ch);
    	if (fcd != 0 && UTF16.isLeadSurrogate(ch)) {
    		ch = m_source_.next(); // CharacterIterator.DONE has 0 fcd
            if (UTF16.isTrailSurrogate(ch)) {
-               	fcd = 0xFFFF; // unorm_getFCD16FromSurrogatePair(fcd, ch);
+               	fcd = NormalizerImpl.getFCD16FromSurrogatePair(fcd, ch);
            } else {
               	fcd = 0;
            }
        }

-        byte prevTrailCC = (byte)(fcd & LAST_BYTE_MASK_);
+        int prevTrailCC = fcd & LAST_BYTE_MASK_;

        if (prevTrailCC != 0) {
        	// The current char has a non-zero trailing CC. Scan forward until 
@ -839,16 +954,16 @@ public final class CollationElementIterator
            		break;
            	}
                // trie access
-                fcd = 0; // unorm_getFCD16(ch);
+                fcd = NormalizerImpl.getFCD16(ch);
                if (fcd != 0 && UTF16.isLeadSurrogate(ch)) {
                	ch = m_source_.next();
                    if (UTF16.isTrailSurrogate(ch)) {
-                        fcd = 0xFFFF; // unorm_getFCD16FromSurrogatePair(fcd, ch);
+                        fcd = NormalizerImpl.getFCD16FromSurrogatePair(fcd, ch);
                    } else {
                        fcd = 0;
                    }
                }
-                byte leadCC = (byte)(fcd >> SECOND_LAST_BYTE_SHIFT_);
+                int leadCC = fcd >>> SECOND_LAST_BYTE_SHIFT_;
                if (leadCC == 0) {
                	// this is a base character, we stop the FCD checks
                    break;
@ -858,12 +973,12 @@ public final class CollationElementIterator
                    result = false;
                }

-                prevTrailCC = (byte)(fcd & LAST_BYTE_MASK_);
+                prevTrailCC = fcd & LAST_BYTE_MASK_;
            }
        }
+        m_FCDLimit_ = m_source_.getIndex();
        m_source_.setIndex(m_FCDStart_);
        m_source_.next();
-        m_FCDLimit_ = m_source_.getIndex();
    	return result;
 	}
 	
@ -885,8 +1000,7 @@ public final class CollationElementIterator
 	    }
 		else {
 	        // we are in the buffer, buffer offset will never be 0 here
-	        result = m_buffer_.charAt(m_bufferOffset_ ++);
-	        if (result == 0) {
+	        if (m_bufferOffset_ >= m_buffer_.length()) {
 	            // Null marked end of buffer, revert to the source string and
 	            // loop back to top to try again to get a character.
 	            m_source_.setIndex(m_FCDLimit_);
@ -894,10 +1008,10 @@ public final class CollationElementIterator
 	            m_buffer_.delete(0, m_buffer_.length());
 	            return nextChar();
 	        }
-	        return result;
+	        return m_buffer_.charAt(m_bufferOffset_ ++);
 		}
 	
-	    if (m_collator_.m_decomposition_ == Collator.NO_DECOMPOSITION 
+	    if (m_collator_.getDecomposition() == Collator.NO_DECOMPOSITION 
 	        || m_bufferOffset_ != -1 || m_FCDLimit_ > startoffset
 	        // skip the fcd checks
 	  		|| result < FULL_ZERO_COMBINING_CLASS_FAST_LIMIT_  
@ -934,20 +1048,10 @@ public final class CollationElementIterator
 	* the buffer.
 	* Source offsets points to the current processing character.</p>
 	*/
-	public void normalizeBackwards()
+	private void normalizeBackwards()
 	{
-	    int start = m_FCDStart_;
-		int size = 0;
-	    /* synwee todo normalize including fcd
-	    try {
-	    	size = decompose(m_buffer_, m_source_, start, m_FCDLimit_);
-		}
-		catch (ArrayOutOfBoundsException .) {
-	    	m_buffer_ = new char[m_buffer_.length << 1];
-	    	size = decompose(m_buffer_, m_source_, start, m_FCDLimit);
-	    }
-	    */
-	    m_bufferOffset_ = size - 1;
+	    normalize();
+	    m_bufferOffset_ = m_buffer_.length();
 	}

 	/**
@ -972,18 +1076,20 @@ public final class CollationElementIterator
 	{
 	    boolean result = true;    
 	    char fcd = 0; 
-	    m_FCDLimit_ = offset;
+	    m_FCDLimit_ = offset + 1;
 	    m_source_.setIndex(offset);
 	    if (!UTF16.isSurrogate(ch)) {
-	        fcd = 0; // synwee todo unorm_getFCD16(fcdTrieIndex, c);
+	        fcd = NormalizerImpl.getFCD16(ch);
 	    } 
 	    else if (UTF16.isTrailSurrogate(ch) && m_FCDLimit_ > 0) { 
 	    	// note trail surrogate characters gets 0 fcd
+	    	char trailch = ch;
 	    	ch = m_source_.previous();  
 	       	if (UTF16.isLeadSurrogate(ch)) {
-	        	fcd = 0; // unorm_getFCD16(fcdTrieIndex, c2);
+	        	fcd = NormalizerImpl.getFCD16(ch);
 	        	if (fcd != 0) {
-	            	fcd = 0; // unorm_getFCD16FromSurrogatePair(fcdTrieIndex, fcd, c);
+	            	fcd = NormalizerImpl.getFCD16FromSurrogatePair(fcd, 
+	            													trailch);
 	        	}
 	    	} 
 	    	else {
@ -991,44 +1097,47 @@ public final class CollationElementIterator
 	    	}
 	    }
 	
-	    byte leadCC = (byte)(fcd >> SECOND_LAST_BYTE_SHIFT_);
-	    if (leadCC != 0) {
-	        // The current char has a non-zero leading combining class.
-	        // Scan backward until we find a char with a trailing cc of zero.
-	        while (true) {
-	            if (m_source_.getIndex() == 0) {
-	                break;
-	            }
+	    int leadCC = fcd >>> SECOND_LAST_BYTE_SHIFT_;
+	    // The current char has a non-zero leading combining class.
+	    // Scan backward until we find a char with a trailing cc of zero.
+        
+	    while (leadCC != 0) {
+            offset = m_source_.getIndex();
+            if (offset == 0) {
+	            break;
+	        }
+	        ch = m_source_.previous();
+	        if (!UTF16.isSurrogate(ch)) {
+	            fcd = NormalizerImpl.getFCD16(ch);
+	        } 
+	        else if (UTF16.isTrailSurrogate(ch) && m_source_.getIndex() > 0) {
+	            char trail = ch;
 	            ch = m_source_.previous();
-	            if (!UTF16.isSurrogate(ch)) {
-	                fcd = 0; //unorm_getFCD16(fcdTrieIndex, c);
-	            } 
-	            else {
-	            	if (UTF16.isTrailSurrogate(ch) && m_source_.getIndex() > 0) 
-	            	{
-	            		ch = m_source_.previous();
-	            	    if (UTF16.isLeadSurrogate(ch)) {
-	                		fcd = 0; // unorm_getFCD16(fcdTrieIndex, c2);
-	            	    }
-	            		if (fcd != 0) {
-	                   		fcd = 0; // unorm_getFCD16FromSurrogatePair(fcdTrieIndex, fcd, c);
-	                	}
-	            	} else {
-	                	fcd = 0; // unpaired surrogate
-	            	}
-	            	byte prevTrailCC = (byte)(fcd & LAST_BYTE_MASK_);
-	            	if (prevTrailCC == 0) {
-	                	break;
-	            	}
-	
-	            	if (leadCC < prevTrailCC) {
-	                	result = false;
-	            	}
-	            	leadCC = (byte)(fcd >> SECOND_LAST_BYTE_SHIFT_);
-	        	}
-	    	}
+	            if (UTF16.isLeadSurrogate(ch)) {
+	                fcd = NormalizerImpl.getFCD16(ch);
+	            }
+	            if (fcd != 0) {
+	                fcd = NormalizerImpl.getFCD16FromSurrogatePair(fcd, trail);
+	            }
+            }
+            else {
+	            fcd = 0; // unpaired surrogate
+	        }
+	        int prevTrailCC = fcd & LAST_BYTE_MASK_;
+	        if (leadCC < prevTrailCC) {
+	            result = false;
+	        }
+	        leadCC = fcd >>> SECOND_LAST_BYTE_SHIFT_;
 	    }
-	    m_FCDStart_ = m_source_.getIndex(); // character with 0 lead/trail fcd
+	
+        // storing character with 0 lead fcd or the 1st accent with a base 
+        // character before it   
+        if (fcd == 0) {
+            m_FCDStart_ = offset;
+        }
+        else {
+            m_FCDStart_ = m_source_.getIndex();
+        } 
 	    m_source_.setIndex(m_FCDLimit_);
 	    return result;
 	}
@ -1062,7 +1171,7 @@ public final class CollationElementIterator
 		char result = m_source_.previous();
 		int startoffset = m_source_.getIndex();
 	    if (result < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_ 
-	        || m_collator_.m_decomposition_ == Collator.NO_DECOMPOSITION 
+	        || m_collator_.getDecomposition() == Collator.NO_DECOMPOSITION 
 	        || m_FCDStart_ <= startoffset || m_source_.getIndex() == 0) {
 	        return result;
 	    }
@ -1073,7 +1182,7 @@ public final class CollationElementIterator
 	        return result;
 	    }
 	    // Need a more complete FCD check and possible normalization.
-	    if (!FCDCheckBackwards(ch, startoffset)) {
+	    if (!FCDCheckBackwards(result, startoffset)) {
 	        normalizeBackwards();
 	        m_bufferOffset_ --;
 	        result = m_buffer_.charAt(m_bufferOffset_);
@ -1085,52 +1194,17 @@ public final class CollationElementIterator
 	 * Determines if it is at the start of source iteration
 	 * @return true if iterator at the start, false otherwise
 	 */
-	private boolean isBackwardsStart() 
+	private final boolean isBackwardsStart() 
 	{
    	return (m_bufferOffset_ < 0 && m_source_.getIndex() == 0)
    	        || (m_bufferOffset_ == 0 && m_FCDStart_ <= 0);
 	}
 	
-	/**
- 	 * Determine if a character is a Thai vowel, which sorts after its base 
- 	 * consonant.
- 	 * @param ch character to test
- 	 * @return true if ch is a Thai prevowel, false otherwise
- 	 */
-	private boolean isThaiPreVowel(char ch)
-	{ 
-		return (ch >= 0xe40 && ch <= 0xe44) || (ch >= 0xec0 && ch <= 0xec4);
-	}
-
-	/**
- 	 * Determine if a character is a Thai base consonant, which sorts before 
- 	 * its prevowel
- 	 * @param ch character to test
- 	 * @return true if ch is a Thai base consonant, false otherwise
- 	 */
-	private boolean isThaiBaseConsonant(char ch)
-	{
-		return ch >= 0xe01 && ch <= 0xe2e;
-	}
-	
-	
-	/**
- 	 * Determine if a character is a Jamo
- 	 * @param ch character to test
- 	 * @return true if ch is a Jamo, false otherwise
- 	 */
-	private boolean isJamo(char ch)
-	{ 
-		return (ch - 0x1100 <= 0x1112 - 0x1100) 
-		       || (ch - 0x1161 <= 0x1175 - 0x1161) 
-		       || (ch - 0x11A8 <= 0x11C2 - 0x11A8);
-	}
-	
 	/**
 	 * Checks if iterator is at the end of its source string.
 	 * @return true if it is at the end, false otherwise
 	 */
-	private boolean isEnd() 
+	private final boolean isEnd() 
 	{
    	if (m_bufferOffset_ >= 0) {
    		if (m_bufferOffset_ != m_buffer_.length()) {
@ -1155,7 +1229,8 @@ public final class CollationElementIterator
 	 * @param trail character
 	 * @return next CE for the surrogate characters
 	 */
-	private int nextSurrogate(RuleBasedCollator collator, int ce, char trail)
+	private final int nextSurrogate(RuleBasedCollator collator, int ce, 
+	                                char trail)
 	{
 		if (!UTF16.isTrailSurrogate(trail)) {
 	        updateInternalState(m_backup_);
@ -1188,7 +1263,7 @@ public final class CollationElementIterator
 	 * @param ch current character
 	 * @return next CE for Thai characters
 	 */
-	private int nextThai(RuleBasedCollator collator, int ce, char ch) 
+	private int nextThai(RuleBasedCollator collator, int ce, char ch)
 	{
 		if (m_bufferOffset_ != -1 // already swapped
 		    || isEnd() || !isThaiBaseConsonant(m_source_.current())) {
@ -1430,6 +1505,7 @@ public final class CollationElementIterator
 	 * @param collator collator to use
 	 * @param ce current ce
 	 * @param entrybackup entry backup iterator status
+	 * @return ce of the next contraction
 	 */
 	private int nextContraction(RuleBasedCollator collator, int ce)
 	{
@ -1895,7 +1971,7 @@ public final class CollationElementIterator
 	        return collator.m_contractionCE_[entryoffset];
 	    }
 	    StringBuffer buffer = new StringBuffer();
-	    while (collator.isUnsafe(ch)) {
+	    while (collator.isUnsafe(ch) || isThaiBaseConsonant(ch)) {
 	        buffer.insert(0, ch);
 	        ch = previousChar();
 	        if (isBackwardsStart()) {
--- a/icu4j/src/com/ibm/icu/text/CollationKey.java
+++ b/icu4j/src/com/ibm/icu/text/CollationKey.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/CollationKey.java,v $ 
-* $Date: 2002/05/16 20:04:49 $ 
-* $Revision: 1.5 $
+* $Date: 2002/06/21 23:56:44 $ 
+* $Revision: 1.6 $
 *
 *******************************************************************************
 */
@ -15,28 +15,42 @@ package com.ibm.icu.text;
 import java.util.Arrays;

 /**
- * <p>A <code>CollationKey</code> represents a <code>String</code> under the
+ * <p>
+ * A <code>CollationKey</code> represents a <code>String</code> under the
 * rules of a specific <code>Collator</code> object. Comparing two
 * <code>CollationKey</code>s returns the relative order of the
- * <code>String</code>s they represent. Using <code>CollationKey</code>s to 
- * compare <code>String</code>s is generally faster than using 
- * <code>Collator.compare</code>. Thus, when the <code>String</code>s must be 
- * compared multiple times, for example when sorting a list of 
- * <code>String</code>s. It's more efficient to use <code>CollationKey</code>s.
+ * <code>String</code>s they represent.
+ * </p>
+ * <p>
+ * <code>CollationKey</code> instances can not be create directly. Rather, 
+ * they are generated by calling <code>Collator.getCollationKey(String)</code>. 
+ * Since the rule set of each <code>Collator differs</code>, the sort orders of 
+ * the same string under two unique <code>Collator</code> may not be the same. 
+ * Hence comparing <code>CollationKey</code>s generated from different 
+ * <code>Collator</code> objects may not give the right results.
+ * </p>
+ * <p>
+ * Similar to <code>CollationKey.compareTo(CollationKey)</code>, 
+ * the method <code>RuleBasedCollator.compare(String, String)</code> compares
+ * two strings and returns the relative order. During the construction
+ * of a <code>CollationKey</code> object, the entire source string is examined
+ * and processed into a series of bits that are stored in the 
+ * <code>CollationKey</code> object. Bitwise comparison on the bit sequences 
+ * are then performed during <code>CollationKey.compareTo(CollationKey)</code>. 
+ * This comparison could incurr expensive startup costs while creating 
+ * the <code>CollationKey</code> object, but once the objects are created, 
+ * binary comparisons are fast, and is recommended when the same strings are
+ * to be compared over and over again. 
+ * On the other hand <code>Collator.compare(String, String)</code> examines 
+ * and processes the string only until the first characters differing in order,
+ * and is recommend for use if the <code>String</code>s are to be compared only
+ * once.
+ * </p>
+ * <p>
+ * Details of the composition of the bit sequence is located at
+ * <a href=http://oss.software.ibm.com/icu/userguide/Collate_ServiceArchitecture.html>
+ * user guide</a>.
 * </p>
- * <p>You can not create <code>CollationKey</code>s directly. Rather, generate 
- * them by calling <code>Collator.getCollationKey(String)</code>. You can only 
- * compare <code>CollationKey</code>s generated from the same 
- * <code>Collator</code> object.</p>
- * <p>Generating a <code>CollationKey</code> for a <code>String</code>
- * involves examining the entire <code>String</code> and converting it to 
- * series of bits that can be compared bitwise. This allows fast comparisons 
- * once the keys are generated. The cost of generating keys is recouped in 
- * faster comparisons when <code>String</code>s need to be compared many 
- * times. On the other hand, the result of a comparison is often determined by 
- * the first couple of characters of each <code>String</code>.
- * <code>Collator.compare(String, String)</code> examines only as many characters as it needs 
- * which allows it to be faster when doing single comparisons.</p>
 * <p>The following example shows how <code>CollationKey</code>s might be used
 * to sort a list of <code>String</code>s.</p>
 * <blockquote>
@ -63,7 +77,7 @@ import java.util.Arrays;
 * System.out.println( keys[2].getSourceString() );
 * </pre>
 * </blockquote>
- *
+ * </p>
 * @see Collator
 * @see RuleBasedCollator
 * @author Syn Wee Quek
@ -77,7 +91,7 @@ public final class CollationKey implements Comparable
 	// public getters -------------------------------------------------------
 	
    /**
-     * Returns the String that this CollationKey represents.
+     * Returns the source string that this CollationKey represents.
     * @return source string that this CollationKey represents
     * @draft 2.2
     */
@ -87,11 +101,44 @@ public final class CollationKey implements Comparable
    }

    /**
-     * <p>Duplicates and returns the value of this CollationKey as a sequence 
-     * of big-endian bytes.</p> 
-     * <p>If two CollationKeys could be legitimately compared, then one could 
-     * compare the byte arrays of each to obtain the same result.</p>  
-     * @return CollationKey value in a sequence of big-endian byte bytes.
+     * <p>
+     * Duplicates and returns the value of this CollationKey as a sequence 
+     * of big-endian bytes terminated by a null.
+     * </p> 
+     * <p>
+     * If two CollationKeys could be legitimately compared, then one could 
+     * compare the byte arrays of each to obtain the same result.
+     * <pre>
+     * byte key1[] = collationkey1.toByteArray();
+     * byte key2[] = collationkey2.toByteArray();
+     * int i = 0;
+     * while (key1[i] != 0 && key2[i] != 0) {
+     *	   int key = key1[i] & 0xFF;
+     *     int targetkey = key2[i] & 0xFF;
+     *     if (key &lt; targetkey) {
+     *         System.out.println("String 1 is less than string 2");
+     *         return;
+     *     }
+     *     if (targetkey &lt; key) {
+     *         System.out.println("String 1 is more than string 2");
+     *     }
+     *     i ++;
+     * }
+     * int key = key1[i] & 0xFF;
+     * int targetkey = key2[i] & 0xFF;
+     * if (key &lt; targetkey) {
+     *     System.out.println("String 1 is less than string 2");
+     *     return;
+     * }
+     * if (targetkey &lt; key) {
+     *     System.out.println("String 1 is more than string 2");
+     *     return;
+     * }
+     * System.out.println("String 1 is equals to string 2");;
+     * </pre>
+     * </p>  
+     * @return CollationKey value in a sequence of big-endian byte bytes 
+     *         terminated by a null.
     * @draft 2.2
     */
    public byte[] toByteArray() 
@ -112,15 +159,22 @@ public final class CollationKey implements Comparable
 	// public other methods -------------------------------------------------	
 	
    /**
-     * <p>Compare this CollationKey to the target CollationKey. The collation 
-     * rules of the Collator object which created these keys are applied.</p>
-     * <p><strong>Note:</strong> CollationKeys created by different Collators 
-     * can not be compared.</p>
+     * <p>
+     * Compare this CollationKey to the argument target CollationKey. 
+     * The collation 
+     * rules of the Collator object which created these keys are applied.
+     * </p>
+     * <p>
+     * <strong>Note:</strong> Comparison between CollationKeys created by 
+     * different Collators may not return the correct result. See class 
+     * documentation.
+     * </p>
     * @param target target CollationKey
     * @return an integer value, if value is less than zero this CollationKey
     *         is less than than target, if value is zero if they are equal 
     *         and value is greater than zero if this CollationKey is greater 
     *         than target.
+     * @exception NullPointerException thrown when argument is null.
     * @see Collator#compare(String, String)
     * @draft 2.2
     */
@ -151,13 +205,21 @@ public final class CollationKey implements Comparable
    }

    /**
-     * <p>Compares this CollationKey with the specified Object.</p>
+     * <p>
+     * Compares this CollationKey with the specified Object.
+     * The collation 
+     * rules of the Collator object which created these objects are applied.
+     * </p>
+     * <p>
+     * See note in compareTo(CollationKey) for warnings of incorrect results
+     * </p>
     * @param obj the Object to be compared.
     * @return Returns a negative integer, zero, or a positive integer 
     *         respectively if this CollationKey is less than, equal to, or 
     *         greater than the given Object.
-     * @exception ClassCastException thrown when the specified Object is not a
-     *		      CollationKey.
+     * @exception ClassCastException thrown when the specified argument is not 
+     *            a CollationKey. NullPointerException thrown when argument 
+     *            is null.
     * @see #compareTo(CollationKey)
     * @draft 2.2
     */
@ -167,22 +229,52 @@ public final class CollationKey implements Comparable
    }

    /**
-     * <p>Compare this CollationKey and the target CollationKey for equality.
+     * <p>
+     * Compare this CollationKey and the argument target object for equality.
+     * The collation 
+     * rules of the Collator object which created these objects are applied.
     * </p>
-     * <p>The collation rules of the Collator object which created these keys 
-     * are applied.</p>
-     * <p><strong>Note:</strong> CollationKeys created by different Collators 
-     * can not be compared.</p>
-     * @param target the CollationKey to compare to.
+     * <p>
+     * See note in compareTo(CollationKey) for warnings of incorrect results
+     * </p>
+     * @param target the object to compare to.
     * @return true if two objects are equal, false otherwise.
+     * @see #compareTo(CollationKey)
+     * @exception ClassCastException thrown when the specified argument is not 
+     *            a CollationKey. NullPointerException thrown when argument 
+     *            is null.
     * @draft 2.2
     */
    public boolean equals(Object target) 
+    {
+        if (!(target instanceof CollationKey)) {
+            return false;
+        }
+        
+        return equals((CollationKey)target);
+    }
+    
+    /**
+     * <p>
+     * Compare this CollationKey and the argument target CollationKey for 
+     * equality.
+     * The collation 
+     * rules of the Collator object which created these objects are applied.
+     * </p>
+     * <p>
+     * See note in compareTo(CollationKey) for warnings of incorrect results
+     * </p>
+     * @param target the CollationKey to compare to.
+     * @return true if two objects are equal, false otherwise.
+     * @exception NullPointerException thrown when argument is null.
+     * @draft 2.2
+     */
+    public boolean equals(CollationKey target) 
    {
        if (this == target) {
        	return true;
        }
-        if (target == null || !(target instanceof CollationKey)) {
+        if (target == null) {
            return false;
        }
        CollationKey other = (CollationKey)target;
@ -200,12 +292,13 @@ public final class CollationKey implements Comparable
    }

    /**
-     * <p>Creates a hash code for this CollationKey. The hash value is 
-     * calculated on the key itself, not the String from which the key was 
-     * created. Thus if x and y are CollationKeys, then 
-     * x.hashCode(x) == y.hashCode() if x.equals(y) is true. This allows 
-     * language-sensitive comparison in a hash table.</p>
-     * <p>See the CollatinKey class description for an example.</p>
+     * <p>
+     * Creates a hash code for this CollationKey. The hash value is calculated 
+     * on the key itself, not the String from which the key was created. Thus 
+     * if x and y are CollationKeys, then x.hashCode(x) == y.hashCode() 
+     * if x.equals(y) is true. This allows language-sensitive comparison in a 
+     * hash table.
+     * </p>
     * @return the hash value.
     * @draft 2.2
     */
--- a/icu4j/src/com/ibm/icu/text/CollationParsedRuleBuilder.java
+++ b/icu4j/src/com/ibm/icu/text/CollationParsedRuleBuilder.java
--- a/icu4j/src/com/ibm/icu/text/CollationRuleParser.java
+++ b/icu4j/src/com/ibm/icu/text/CollationRuleParser.java
--- a/icu4j/src/com/ibm/icu/text/Collator.java
+++ b/icu4j/src/com/ibm/icu/text/Collator.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/Collator.java,v $ 
-* $Date: 2002/05/20 23:43:01 $ 
-* $Revision: 1.6 $
+* $Date: 2002/06/21 23:56:44 $ 
+* $Revision: 1.7 $
 *
 *******************************************************************************
 */
@ -15,57 +15,103 @@ package com.ibm.icu.text;
 import java.util.Locale;

 /**
-* <p>The Collator class performs locale-sensitive String comparison. 
-* You use this class to build searching and sorting routines for natural 
-* language text.</p> 
-* <p>Collator is an abstract base class. Subclasses implement specific 
-* collation strategies. One subclass, RuleBasedCollator, is currently 
-* provided and is applicable to a wide set of languages. Other subclasses 
-* may be created to handle more specialized needs.</p>
-* <p>Like other locale-sensitive classes, you can use the static factory 
-* method, getInstance, to obtain the appropriate Collator object for a given 
-* locale. You will only need to look at the subclasses of Collator if you need 
-* to understand the details of a particular collation strategy or if you need 
-* to modify that strategy. </p>
-* <p>The following example shows how to compare two strings using the Collator 
-* for the default locale. 
+* <p>
+* Collator is an abstract base class, its subclasses performs 
+* locale-sensitive String comparison. A concrete subclass, RuleBasedCollator, 
+* is provided and it allows customization of the collation ordering by the use 
+* of rule sets.
+* </p>
+* <p>
+* Following the 
+* <a href=http://www.unicode.org>Unicode Consortium</a>'s specifications for
+* the <a href=http://www.unicode.org/unicode/reports/tr10/>
+* Unicode Collation Algorithm (UCA)</a>, there are
+* 5 different levels of strength used in comparisons.
+* <ul>
+* <li>PRIMARY strength: Typically, this is used to denote differences between 
+*     base characters (for example, "a" &lt; "b"). 
+*     It is the strongest difference. For example, dictionaries are divided 
+*     into different sections by base character. 
+* <li>SECONDARY strength: Accents in the characters are considered secondary 
+*     differences (for example, "as" &lt; "&agrave;s" &lt; "at"). Other 
+*     differences 
+*     between letters can also be considered secondary differences, depending 
+*     on the language. A secondary difference is ignored when there is a 
+*     primary difference anywhere in the strings.
+* <li>TERTIARY strength: Upper and lower case differences in characters are 
+*     distinguished at tertiary strength (for example, "ao" &lt; "Ao" &lt; 
+*     "a&ograve;"). In addition, a variant of a letter differs from the base 
+*     form on the tertiary strength (such as "A" and "&#9398;"). Another 
+*     example is the 
+*     difference between large and small Kana. A tertiary difference is ignored 
+*     when there is a primary or secondary difference anywhere in the strings. 
+* <li>QUATERNARY strength: When punctuation is ignored 
+*     <a href=http://www-124.ibm.com/icu/userguide/Collate_Concepts.html#Ignoring_Punctuation>
+*     (see Ignoring Punctuations in the user guide)</a> at PRIMARY to TERTIARY 
+*     strength, an additional strength level can 
+*     be used to distinguish words with and without punctuation (for example, 
+*     "ab" &lt; "a-b" &lt; "aB"). 
+*     This difference is ignored when there is a PRIMARY, SECONDARY or TERTIARY 
+*     difference. The QUATERNARY strength should only be used if ignoring 
+*     punctuation is required. 
+* <li>IDENTICAL strength:
+*     When all other strengths are equal, the IDENTICAL strength is used as a 
+*     tiebreaker. The Unicode code point values of the NFD form of each string 
+*     are compared, just in case there is no difference. 
+*     For example, Hebrew cantellation marks are only distinguished at this 
+*     strength. This strength should be used sparingly, as only code point 
+*     values differences between two strings is an extremely rare occurrence. 
+*     Using this strength substantially decreases the performance for both 
+*     comparison and collation key generation APIs. This strength also 
+*     increases the size of the collation key.
+* </ul>
+* Unlike the JDK, ICU4J's Collator deals only with 2 decomposition modes, 
+* the canonical decomposition mode and one that does not use any decomposition.
+* The compatibility decomposition mode, java.text.Collator.FULL_DECOMPOSITION
+* is not supported here. If the canonical
+* decomposition mode is set, the Collator handles un-normalized text properly, 
+* producing the same results as if the text were normalized in NFD. If 
+* canonical decomposition is turned off, it is the user's responsibility to 
+* ensure that all text is already in the appropriate form before performing
+* a comparison or before getting a CollationKey.
+* </p>
+* <p>
+* For more information about the collation service see the 
+* <a href="http://oss.software.ibm.com/icu/userguide/Collate_Intro.html">users 
+* guide</a>.
+* </p>
+* <p>
+* Examples of use
 * <pre>
-* // Compare two strings in the default locale
-* Collator myCollator = Collator.getInstance();
-* if (myCollator.compare("abc", "ABC") < 0) {
-*     System.out.println("abc is less than ABC");
-* }
-* else {
-*     System.out.println("abc is greater than or equal to ABC");
-* }
-* </pre>
-* <p>You can set a <code>Collator</code>'s <em>strength</em> property to 
-* determine the level of difference considered significant in comparisons. 
-* Four strengths are provided: <code>PRIMARY</code>, <code>SECONDARY</code>, 
-* <code>TERTIARY</code>, and <code>IDENTICAL</code>. The exact assignment of 
-* strengths to language features is locale dependant. For example, in Czech, 
-* "e" and "f" are considered primary differences, while "e" and "\u00EA" are 
-* secondary differences, "e" and "E" are tertiary differences and "e" and "e" 
-* are identical. The following shows how both case and accents could be 
-* ignored for US English.</p>
-* <pre>
-* //Get the Collator for US English and set its strength to PRIMARY
+* // Get the Collator for US English and set its strength to PRIMARY
 * Collator usCollator = Collator.getInstance(Locale.US);
 * usCollator.setStrength(Collator.PRIMARY);
 * if (usCollator.compare("abc", "ABC") == 0) {
 *     System.out.println("Strings are equivalent");
 * }
+* 
+* The following example shows how to compare two strings using the Collator 
+* for the default locale. 
+* // Compare two strings in the default locale
+* Collator myCollator = Collator.getInstance();
+* myCollator.setDecomposition(NO_DECOMPOSITION);
+* if (myCollator.compare("&agrave;&#92;u0325", "a&#92;u0325&#768;") != 0) {
+*     System.out.println("&agrave;&#92;u0325 is not equals to a&#92;u0325&#768; without decomposition");
+*     myCollator.setDecomposition(CANONICAL_DECOMPOSITION);
+*     if (myCollator.compare("&agrave;&#92;u0325", "a&#92;u0325&#768;") != 0) {
+*         System.out.println("Error: &agrave;&#92;u0325 should be equals to a&#92;u0325&#768; with decomposition");
+*     }
+*     else {
+*         System.out.println("&agrave;&#92;u0325 is equals to a&#92;u0325&#768; with decomposition");
+*     }
+* }
+* else {
+*     System.out.println("Error: &agrave;&#92;u0325 should be not equals to a&#92;u0325&#768; without decomposition");
+* }
 * </pre>
-* <p>For comparing Strings exactly once, the compare method provides the best 
-* performance. When sorting a list of Strings however, it is generally 
-* necessary to compare each String multiple times. In this case, 
-* CollationKeys provide better performance. The CollationKey class converts a 
-* String to a series of bits that can be compared bitwise against other 
-* CollationKeys. A CollationKey is created by a Collator object for a given 
-* String.</p> 
-* <p>Note: CollationKeys from different Collators can not be compared. See the 
-* class description for CollationKey for an example using CollationKeys. 
 * </p>
+* @see RuleBasedCollator
+* @see CollationKey
 * @author Syn Wee Quek
 * @since release 2.2, April 18 2002
 * @draft 2.2
@ -76,92 +122,92 @@ public abstract class Collator
 	// public data members ---------------------------------------------------
 	
 	/**
-     * Collator strength value. When set, only PRIMARY differences are
-     * considered significant during comparison. The assignment of strengths
-     * to language features is locale dependant. A common example is for
-     * different base letters ("a" vs "b") to be considered a PRIMARY 
-     * difference.
+     * Strongest collator strength value. Typically, used to denote differences 
+     * between base characters.
+     * See class documentation for more explanation.
     * @see #setStrength
     * @see #getStrength
     * @draft 2.2
     */
-    public final static int PRIMARY 
-    							= RuleBasedCollator.AttributeValue.PRIMARY_;
+    public final static int PRIMARY = 0;
    /**
-     * Collator strength value. When set, only SECONDARY and above 
-     * differences are considered significant during comparison. The 
-     * assignment of strengths to language features is locale dependant. A 
-     * common example is for different accented forms of the same base letter 
-     * ("a" vs "\u00E4") to be considered a SECONDARY difference.
+     * Second level collator strength value. 
+     * Accents in the characters are considered secondary differences.
+     * Other differences between letters can also be considered secondary 
+     * differences, depending on the language. 
+     * See class documentation for more explanation.
     * @see #setStrength
     * @see #getStrength
     * @draft 2.2
     */
-    public final static int SECONDARY 
-    							= RuleBasedCollator.AttributeValue.SECONDARY_;
+    public final static int SECONDARY = 1;
    /**
-     * Collator strength value. When set, only TERTIARY and above differences 
-     * are considered significant during comparison. The assignment of 
-     * strengths to language features is locale dependant. A common example is 
-     * for case differences ("a" vs "A") to be considered a TERTIARY 
-     * difference.
+     * Third level collator strength value. 
+     * Upper and lower case differences in characters are distinguished at this
+     * strength level. In addition, a variant of a letter differs from the base 
+     * form on the tertiary level.
+     * See class documentation for more explanation.
     * @see #setStrength
     * @see #getStrength
     * @draft 2.2
     */
-    public final static int TERTIARY 
-    							= RuleBasedCollator.AttributeValue.TERTIARY_;
-                                   
+    public final static int TERTIARY = 2;                            
    /**
-     * Collator strength value. When set, only QUARTENARY and above differences 
-     * are considered significant during comparison. The assignment of 
-     * strengths to language features is locale dependant.
-     * difference.
+     * Fourth level collator strength value. 
+     * When punctuation is ignored 
+     * <a href=http://www-124.ibm.com/icu/userguide/Collate_Concepts.html#Ignoring_Punctuation>
+     * (see Ignoring Punctuations in the user guide)</a> at PRIMARY to TERTIARY 
+     * strength, an additional strength level can 
+     * be used to distinguish words with and without punctuation
+     * See class documentation for more explanation.
     * @see #setStrength
     * @see #getStrength
     * @draft 2.2
     */
-    public final static int QUATERNARY 
-    							= RuleBasedCollator.AttributeValue.QUATERNARY_;
-
+    public final static int QUATERNARY = 3;
    /**
-     * <p>Collator strength value. When set, all differences are considered 
-     * significant during comparison. The assignment of strengths to language 
-     * features is locale dependant. A common example is for control 
-     * characters ("&#092;u0001" vs "&#092;u0002") to be considered equal at 
-     * the PRIMARY, SECONDARY, and TERTIARY levels but different at the 
-     * IDENTICAL level.  Additionally, differences between pre-composed 
-     * accents such as "&#092;u00C0" (A-grave) and combining accents such as 
-     * "A&#092;u0300" (A, combining-grave) will be considered significant at 
-     * the tertiary level if decomposition is set to NO_DECOMPOSITION.
+     * <p>
+     * Smallest Collator strength value. When all other strengths are equal, 
+     * the IDENTICAL strength is used as a tiebreaker. The Unicode code point 
+     * values of the NFD form of each string are compared, just in case there 
+     * is no difference. 
+     * See class documentation for more explanation.
+     * </p>
+     * <p>
+     * Note this value is different from JDK's
     * </p>
-     * <p>Note this value is different from JDK's</p>
     * @draft 2.2
     */
-    public final static int IDENTICAL 
-    							= RuleBasedCollator.AttributeValue.IDENTICAL_;
+    public final static int IDENTICAL = 15;

    /**
-     * <p>Decomposition mode value. With NO_DECOMPOSITION set, accented 
-     * characters will not be decomposed for collation. This is the default 
-     * setting and provides the fastest collation but will only produce 
-     * correct results for languages that do not use accents.</p>
-     * <p>Note this value is different from JDK's</p>
+     * <p>
+     * Decomposition mode value. With NO_DECOMPOSITION set, Strings will not be 
+     * decomposed for collation. This is the default 
+     * decomposition setting unless otherwise specified by the locale used
+     * to create the Collator.
+     * </p>
+     * <p>
+     * Note this value is different from JDK's
+     * </p>
+     * @see #CANONICAL_DECOMPOSITION
     * @see #getDecomposition
     * @see #setDecomposition
     * @draft 2.2
     */
-    public final static int NO_DECOMPOSITION 
-    							= RuleBasedCollator.AttributeValue.OFF_;
-
+    public final static int NO_DECOMPOSITION = 16;
    /**
-     * <p>Decomposition mode value. With CANONICAL_DECOMPOSITION set, 
+     * <p>
+     * Decomposition mode value. With CANONICAL_DECOMPOSITION set, 
     * characters that are canonical variants according to Unicode 2.0 will be 
-     * decomposed for collation. This should be used to get correct collation 
-     * of accented characters.</p>
-     * <p>CANONICAL_DECOMPOSITION corresponds to Normalization Form D as
+     * decomposed for collation.
+     * </p>
+     * <p>
+     * CANONICAL_DECOMPOSITION corresponds to Normalization Form D as
     * described in <a href="http://www.unicode.org/unicode/reports/tr15/">
-     * Unicode Technical Report #15</a>.</p>
+     * Unicode Technical Report #15</a>.
+     * </p>
+     * @see #NO_DECOMPOSITION
     * @see #getDecomposition
     * @see #setDecomposition
     * @draft 2.2
@ -173,9 +219,15 @@ public abstract class Collator
    // public setters --------------------------------------------------------
    
    /**
-     * <p>Sets this Collator's strength property. The strength property 
+     * <p>
+     * Sets this Collator's strength property. The strength property 
     * determines the minimum level of difference considered significant 
-     * during comparison.</p>
+     * during comparison.
+     * </p>
+     * <p> 
+     * The default strength for the Collator is TERTIARY, unless specified 
+     * otherwise by the locale used to create the Collator.
+     * </p>
     * <p>See the Collator class description for an example of use.</p>
     * @param the new strength value.
     * @see #getStrength
@ -185,10 +237,11 @@ public abstract class Collator
     * @see #QUATERNARY
     * @see #IDENTICAL
     * @exception IllegalArgumentException If the new strength value is not one 
-     * 				of PRIMARY, SECONDARY, TERTIARY, QUATERNARY or IDENTICAL.
+     * 		      of PRIMARY, SECONDARY, TERTIARY, QUATERNARY or IDENTICAL.
     * @draft 2.2
     */
-    public void setStrength(int newStrength) {
+    public void setStrength(int newStrength) 
+    {
        if ((newStrength != PRIMARY) &&
            (newStrength != SECONDARY) &&
            (newStrength != TERTIARY) &&
@ -200,18 +253,38 @@ public abstract class Collator
    }
    
    /**
-     * Set the decomposition mode of this Collator. See getDecomposition
-     * for a description of decomposition mode.
+     * <p>
+     * Set the decomposition mode of this Collator. 
+     * Setting this decomposition property with CANONICAL_DECOMPOSITION allows 
+     * the Collator to handle 
+     * un-normalized text properly, producing the same results as if the text 
+     * were normalized. If NO_DECOMPOSITION is set, it is the user's 
+     * responsibility to insure that all text is already in the appropriate 
+     * form before a comparison or before getting a CollationKey. Adjusting
+     * decomposition mode allows the user to select between faster and more
+     * complete collation behavior.
+     * </p>
+     * <p>
+     * Since a great majority of the world languages does not require text
+     * normalization, most locales has NO_DECOMPOSITION has the default 
+     * decomposition mode.
+     * <p>
+     * The default decompositon mode for the Collator is NO_DECOMPOSITON, 
+     * unless specified otherwise by the locale used to create the Collator.
+     * </p>
+     * <p>
+     * See getDecomposition for a description of decomposition mode.
+     * </p>
     * @param decomposition the new decomposition mode
     * @see #getDecomposition
     * @see #NO_DECOMPOSITION
     * @see #CANONICAL_DECOMPOSITION
-     * @see #FULL_DECOMPOSITION
-     * @exception IllegalArgumentException If the given value is not a valid decomposition
-     * mode.
+     * @exception IllegalArgumentException If the given value is not a valid 
+     *            decomposition mode.
     * @draft 2.2
     */
-    public void setDecomposition(int decomposition) {
+    public void setDecomposition(int decomposition) 
+    {
        if ((decomposition != NO_DECOMPOSITION) &&
            (decomposition != CANONICAL_DECOMPOSITION)) {
            throw new IllegalArgumentException("Wrong decomposition mode.");
@ -225,9 +298,11 @@ public abstract class Collator
     * Gets the Collator for the current default locale.
     * The default locale is determined by java.util.Locale.getDefault().
     * @return the Collator for the default locale (for example, en_US) if it
-     *         is created successfully, otherwise if there is a failure,
-     *         null will be returned.
+     *         is created successfully. Otherwise if there is no Collator
+     *         associated with the current locale, the default UCA collator 
+     *         will be returned.
     * @see java.util.Locale#getDefault
+     * @see #getInstance(Locale)
     * @draft 2.2
     */
    public static final Collator getInstance() 
@ -238,11 +313,13 @@ public abstract class Collator
    /**
     * Gets the Collator for the desired locale.
     * @param locale the desired locale.
-     * @return Collator for the desired locale if it is created successfully,
-     *         otherwise if there is a failure, the default UCA collator will 
-     * 		   be returned.
+     * @return Collator for the desired locale if it is created successfully.
+     *         Otherwise if there is no Collator
+     *         associated with the current locale, the default UCA collator 
+     *         will be returned.
     * @see java.util.Locale
     * @see java.util.ResourceBundle
+     * @see #getInstance()
     * @draft 2.2
     */
    public static final Collator getInstance(Locale locale)
@ -256,15 +333,19 @@ public abstract class Collator
    }
    
    /**
-     * <p>Returns this Collator's strength property. The strength property 
-     * determines the minimum level of difference considered significant 
-     * during comparison.</p>
-     * <p>See the Collator class description for an example of use.</p>
+     * <p>
+     * Returns this Collator's strength property. The strength property 
+     * determines the minimum level of difference considered significant.
+     * </p>
+     * <p>
+     * See the Collator class description for more details.
+     * </p>
     * @return this Collator's current strength property.
     * @see #setStrength
     * @see #PRIMARY
     * @see #SECONDARY
     * @see #TERTIARY
+     * @see #QUATERNARY
     * @see #IDENTICAL
     * @draft 2.2
     */
@ -274,24 +355,17 @@ public abstract class Collator
    }
    
    /**
-     * <p>Get the decomposition mode of this Collator. Decomposition mode
-     * determines how Unicode composed characters are handled. Adjusting
-     * decomposition mode allows the user to select between faster and more
-     * complete collation behavior.
-     * <p>The three values for decomposition mode are:
-     * <UL>
-     * <LI>NO_DECOMPOSITION,
-     * <LI>CANONICAL_DECOMPOSITION
-     * <LI>FULL_DECOMPOSITION.
-     * </UL>
-     * See the documentation for these three constants for a description
-     * of their meaning.
+     * <p>
+     * Get the decomposition mode of this Collator. Decomposition mode
+     * determines how Unicode composed characters are handled. 
+     * </p>
+     * <p>
+     * See the Collator class description for more details.
     * </p>
     * @return the decomposition mode
     * @see #setDecomposition
     * @see #NO_DECOMPOSITION
     * @see #CANONICAL_DECOMPOSITION
-     * @see #FULL_DECOMPOSITION
     * @draft 2.2
     */
    public int getDecomposition()
@ -302,91 +376,68 @@ public abstract class Collator
    // public other methods -------------------------------------------------

    /**
-     * Convenience method for comparing the equality of two strings based on
-     * this Collator's collation rules.
+     * Convenience method for comparing the equality of two text Strings based 
+     * on this Collator's collation rules, strength and decomposition mode.
     * @param source the source string to be compared with.
     * @param target the target string to be compared with.
     * @return true if the strings are equal according to the collation
     *         rules. false, otherwise.
     * @see #compare
+     * @exception NullPointerException thrown if either arguments is null.
     * @draft 2.2
     */
-    public boolean equals(String source, String target)
+    public boolean equals(String source, String target) 
    {
        return (compare(source, target) == 0);
    }
-	    
-    /**
-     * Cloning this Collator.
-     * @return a cloned Collator of this object
-     * @draft 2.2
-     */
-    public Object clone()
-    {
-        try {
-            return (Collator)super.clone();
-        } catch (CloneNotSupportedException e) {
-            throw new InternalError();
-        }
-    }

    /**
     * Compares the equality of two Collators.
     * @param that the Collator to be compared with this.
     * @return true if this Collator is the same as that Collator;
-     * false otherwise.
+     *         false otherwise.
     * @draft 2.2
     */
-    public boolean equals(Object that)
-    {
-        if (this == that) {
-        	return true;
-        }
-        if (that == null || getClass() != that.getClass()) {
-        	return false;
-        }
-        Collator other = (Collator) that;
-        return ((m_strength_ == other.m_strength_) &&
-                (m_decomposition_ == other.m_decomposition_));
-    }
+    public abstract boolean equals(Object that);
    
    // public abstract methods -----------------------------------------------

    /**
-     * Generates the hash code for this Collator.
+     * Generates a unique hash code for this Collator.
     * @draft 2.2
+     * @return 32 bit unique hash code
     */
    public abstract int hashCode();
    
    /**
-     * <p>Compares the source string to the target string according to the
-     * collation rules for this Collator. Returns an integer less than, equal 
-     * to or greater than zero depending on whether the source String is less 
-     * than, equal to or greater than the target string. See the Collator
-     * class description for an example of use.</p>
-     * <p>For a one time comparison, this method has the best performance. If 
-     * a given String will be involved in multiple comparisons, 
-     * CollationKey.compareTo() has the best performance. See the Collator 
-     * class description for an example using CollationKeys.</p>
-     * @param source the source string.
-     * @param target the target string.
+     * <p>
+     * Compares the source text String to the target text String according to 
+     * the collation rules, strength and decomposition mode for this Collator. 
+     * Returns an integer less than, 
+     * equal to or greater than zero depending on whether the source String is 
+     * less than, equal to or greater than the target String. See the Collator
+     * class description for an example of use.
+     * </p>
+     * @param source the source String.
+     * @param target the target String.
     * @return Returns an integer value. Value is less than zero if source is 
     *         less than target, value is zero if source and target are equal, 
     *         value is greater than zero if source is greater than target.
     * @see CollationKey
     * @see #getCollationKey
+     * @exception NullPointerException thrown if either arguments is null.
     * @draft 2.2
     */
    public abstract int compare(String source, String target);

    /**
-     * <p>Transforms the String into a series of bits that can be compared 
-     * bitwise to other CollationKeys. CollationKeys provide better 
-     * performance than Collator.compare() when Strings are involved in 
-     * multiple comparisons.</p> 
-     * <p>See the Collator class description for an example using 
-     * CollationKeys.</p>
-     * @param source the string to be transformed into a collation key.
+     * <p>
+     * Transforms the String into a series of bits that can be compared 
+     * bitwise to other CollationKeys. Bits generated depends on the collation
+     * rules, strength and decomposition mode.
+     * </p> 
+     * <p>See the CollationKey class documentation for more information.</p>
+     * @param source the string to be transformed into a CollationKey.
     * @return the CollationKey for the given String based on this Collator's 
     *         collation rules. If the source String is null, a null 
     *         CollationKey is returned.
@ -396,35 +447,18 @@ public abstract class Collator
     */
    public abstract CollationKey getCollationKey(String source);
    
-    // protected data members ------------------------------------------------
+    // protected constructor -------------------------------------------------
+
+  
+    // private data members --------------------------------------------------
    
    /**
     * Collation strength
     */
-    protected int m_strength_;
+    private int m_strength_ = TERTIARY;
    /**
     * Decomposition mode
     */ 
-    protected int m_decomposition_;
-    
-    // protected constructor -------------------------------------------------
-    
-    /**
-    * <p>Protected constructor for use by subclasses. 
-    * Public access to creating Collators is handled by the API getInstance().
-    * </p>
-    * @draft 2.2
-    */
-    protected Collator() throws Exception
-    {
-    	m_strength_ = TERTIARY;
-    	m_decomposition_ = CANONICAL_DECOMPOSITION;
-    }
-  
-    // protected methods -----------------------------------------------------
-    
-    // private variables -----------------------------------------------------
-
-    // private methods -------------------------------------------------------
+    private int m_decomposition_ = CANONICAL_DECOMPOSITION;
 }

--- a/icu4j/src/com/ibm/icu/text/CollatorReader.java
+++ b/icu4j/src/com/ibm/icu/text/CollatorReader.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/CollatorReader.java,v $ 
-* $Date: 2002/05/16 20:04:49 $ 
-* $Revision: 1.2 $
+* $Date: 2002/06/21 23:56:47 $ 
+* $Revision: 1.3 $
 *
 *******************************************************************************
 */
@ -140,26 +140,28 @@ final class CollatorReader
     * @exception IOException thrown when there's a data error.
     * @draft 2.2
     */
-    public void readOptions(RuleBasedCollator rbc) throws IOException
+    protected void readOptions(RuleBasedCollator rbc) throws IOException
    {
    	rbc.m_variableTopValue_ = m_dataInputStream_.readInt();
-    	rbc.setAttributeDefault(RuleBasedCollator.Attribute.FRENCH_COLLATION_,
-    	                 m_dataInputStream_.readInt());
-    	rbc.setAttributeDefault(
-    	                 RuleBasedCollator.Attribute.ALTERNATE_HANDLING_,
-    	                 m_dataInputStream_.readInt());
-    	rbc.setAttributeDefault(RuleBasedCollator.Attribute.CASE_FIRST_,
-    	                 m_dataInputStream_.readInt());
-      	rbc.setAttributeDefault(RuleBasedCollator.Attribute.CASE_LEVEL_,
-    	                 m_dataInputStream_.readInt());
-      	rbc.setAttributeDefault(
-      	                 RuleBasedCollator.Attribute.NORMALIZATION_MODE_,
-    	                 m_dataInputStream_.readInt());
-      	rbc.setAttributeDefault(RuleBasedCollator.Attribute.STRENGTH_,
-    	                 m_dataInputStream_.readInt());
-		rbc.setAttributeDefault(
-		                 RuleBasedCollator.Attribute.HIRAGANA_QUATERNARY_MODE_,
-    	                 m_dataInputStream_.readInt());
+    	rbc.m_defaultIsFrenchCollation_ = (m_dataInputStream_.readInt() 
+    	                                == RuleBasedCollator.AttributeValue.ON_);
+        rbc.m_defaultIsAlternateHandlingShifted_ 
+                                   = (m_dataInputStream_.readInt() == 
+                                    RuleBasedCollator.AttributeValue.SHIFTED_);
+        rbc.m_defaultCaseFirst_ = m_dataInputStream_.readInt();
+        rbc.m_defaultIsCaseLevel_ = (m_dataInputStream_.readInt() 
+                                     == RuleBasedCollator.AttributeValue.ON_);
+        int value = m_dataInputStream_.readInt();
+    	if (value == RuleBasedCollator.AttributeValue.ON_) {
+    		value = Collator.CANONICAL_DECOMPOSITION;
+    	}
+    	else {
+    		value = Collator.NO_DECOMPOSITION;
+    	}
+    	rbc.m_defaultDecomposition_ = value;
+    	rbc.m_defaultStrength_ = m_dataInputStream_.readInt();
+    	rbc.m_defaultIsHiragana4_ = (m_dataInputStream_.readInt() 
+    	                             == RuleBasedCollator.AttributeValue.ON_);
    }
    
    /**
@ -169,7 +171,7 @@ final class CollatorReader
    * @exception IOException thrown when there's a data error.
    * @draft 2.2
    */
-    public void read(RuleBasedCollator rbc) throws IOException
+    protected void read(RuleBasedCollator rbc) throws IOException
    {
    	readHeader(rbc);
    	readOptions(rbc);
@ -188,7 +190,8 @@ final class CollatorReader
    	for (int i = 0; i < m_contractionCESize_; i ++) {
    		rbc.m_contractionCE_[i] = m_dataInputStream_.readInt();
    	}
-    	rbc.m_trie_ = new IntTrie(m_dataInputStream_, rbc);
+    	rbc.m_trie_ = new IntTrie(m_dataInputStream_, 
+                           	  RuleBasedCollator.DataManipulate.getInstance());
    	if (!rbc.m_trie_.isLatin1Linear()) {
    		throw new IOException("Data corrupted, " 
    		                      + "Collator Tries expected to have linear "
@ -213,6 +216,43 @@ final class CollatorReader
    	}
    }
    
+    /**
+     * Reads in the inverse uca data
+     * @param input input stream with the inverse uca data
+     * @return an object containing the inverse uca data
+     * @exception IOException thrown when error occurs while reading the 
+     *            inverse uca
+     */
+    protected static CollationParsedRuleBuilder.InverseUCA readInverseUCA(
+                                                      InputStream inputStream)
+                                                      throws IOException
+    {
+        ICUBinary.readHeader(inputStream, INVERSE_UCA_DATA_FORMAT_ID_, 
+                             DATA_FORMAT_VERSION_, UNICODE_VERSION_);
+        CollationParsedRuleBuilder.InverseUCA result = 
+                                  new CollationParsedRuleBuilder.InverseUCA();
+        DataInputStream input = new DataInputStream(inputStream);        
+        int bytesize = input.readInt();
+        int tablesize = input.readInt(); // in int size
+        int contsize = input.readInt();  // in char size
+        int table = input.readInt(); // in bytes
+        int conts = input.readInt(); // in bytes
+        int size = tablesize * 3; // one column for each strength
+        result.m_table_ = new int[size];
+        result.m_continuations_ = new char[contsize];
+        
+        for (int i = 0; i < size; i ++) {
+            result.m_table_[i] = input.readInt();
+        }
+        for (int i = 0; i < contsize; i ++) {
+            result.m_continuations_[i] = input.readChar();
+        }
+        input.close();
+        return result;
+    }
+    
+    // private inner class -----------------------------------------------
+    
    // private variables -------------------------------------------------
  
    /**
@ -231,6 +271,14 @@ final class CollatorReader
    private static final byte UNICODE_VERSION_[] = {(byte)0x3, (byte)0x0, 
                                                    (byte)0x0, (byte)0x0};
    /**
+    * Inverse UCA file format version and id that this class understands.
+    * No guarantees are made if a older version is used
+    */
+    private static final byte INVERSE_UCA_DATA_FORMAT_ID_[] = {(byte)0x49, 
+                                                               (byte)0x6e,  
+                                                               (byte)0x76, 
+                                                               (byte)0x43};
+    /**
    * Corrupted error string
    */
    private static final String CORRUPTED_DATA_ERROR_ =
--- a/icu4j/src/com/ibm/icu/text/RuleBasedCollator.java
+++ b/icu4j/src/com/ibm/icu/text/RuleBasedCollator.java
--- a/icu4j/src/com/ibm/icu/text/SearchIterator.java
+++ b/icu4j/src/com/ibm/icu/text/SearchIterator.java
--- a/icu4j/src/com/ibm/icu/text/StringSearch.java
+++ b/icu4j/src/com/ibm/icu/text/StringSearch.java