ICU-1897

bugs solved * japanese case level * more stringent checks on ce buffer size. X-SVN-Rev: 8665
2002-05-22 01:14:38 +00:00 · 2002-05-22 01:14:38 +00:00 · 076095ae7e
commit 076095ae7e
parent 6b4f81d44d
2 changed files with 75 additions and 49 deletions
--- a/icu4j/src/com/ibm/icu/text/CollationElementIterator.java
+++ b/icu4j/src/com/ibm/icu/text/CollationElementIterator.java
@ -800,12 +800,14 @@ public final class CollationElementIterator
 	 * <li>The leading combining class from the current character is 0 or the 
 	 *     trailing combining class of the previous char was zero.
 	 * </ul>
-	 * Incoming source offsets points to the next processing character.
+	 * Incoming source offsets points to the current processing character.
 	 * Return source offsets points to the current processing character.
 	 * </p>
+	 * @param ch current character
+	 * @param offset current character offset
 	 * @return true if FCDCheck passes, false otherwise
 	 */
-	private boolean FCDCheck() 
+	private boolean FCDCheck(char ch, int offset) 
 	{
    	boolean result = true;

@ -813,8 +815,8 @@ public final class CollationElementIterator
    	
 		// Get the trailing combining class of the current character.  
 		// If it's zero, we are OK.
-    	char ch = m_source_.previous();
-    	m_FCDStart_ = m_source_.getIndex();
+    	m_FCDStart_ = offset;
+    	m_source_.setIndex(offset);
    	// trie access
    	char fcd = 0; // synwee todo: unorm_getFCD16(ch);
    	if (fcd != 0 && UTF16.isLeadSurrogate(ch)) {
@ -860,6 +862,7 @@ public final class CollationElementIterator
            }
        }
        m_source_.setIndex(m_FCDStart_);
+        m_source_.next();
        m_FCDLimit_ = m_source_.getIndex();
    	return result;
 	}
@ -873,7 +876,8 @@ public final class CollationElementIterator
 	private char nextChar()
 	{
 		char result;
-    	// loop handles the next character whether it is in the buffer or not.
+		int startoffset = m_source_.getIndex();
+		// loop handles the next character whether it is in the buffer or not.
 	    if (m_bufferOffset_ == -1) {
 	        // we're working on the source and not normalizing. fast path.
 	        // note Thai pre-vowel reordering uses buffer too
@ -890,22 +894,24 @@ public final class CollationElementIterator
 	            m_buffer_.delete(0, m_buffer_.length());
 	            return nextChar();
 	        }
+	        return result;
 		}
 	
 	    if (m_collator_.m_decomposition_ == Collator.NO_DECOMPOSITION 
-	        || m_bufferOffset_ != -1 || m_FCDLimit_ > m_source_.getIndex()
+	        || m_bufferOffset_ != -1 || m_FCDLimit_ > startoffset
 	        // skip the fcd checks
 	  		|| result < FULL_ZERO_COMBINING_CLASS_FAST_LIMIT_  
 	   		// Fast fcd safe path. trail combining class == 0.
 	   		) {
-	   		m_source_.next();
+	   		
+	        m_source_.next();
 	   		return result;
 	    }
 		
-	    if (result < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_) {
+		if (result < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_) {
 	        // We need to peek at the next character in order to tell if we are 
 	        // FCD
-	        char next = m_source_.next(); 
+	        char next = m_source_.next();
 	        if (next == CharacterIterator.DONE 
 	            || next == LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_) {
 	            return result; // end of source string and if next character 
@ -914,18 +920,17 @@ public final class CollationElementIterator
 	    }
 	
 	    // Need a more complete FCD check and possible normalization.
-	    if (!FCDCheck()) {
+	    if (!FCDCheck(result, startoffset)) {
 	        normalize();
 	        result = m_buffer_.charAt(0);
 	        m_bufferOffset_ = 1;	  
 	    }	
-	    m_source_.next();
 	    return result;
 	}
 	
 	/**
 	* <p>Incremental normalization, this is an essential optimization.
-	*7 Assuming FCD checks has been done, normalize the non-FCD characters into 
+	* Assuming FCD checks has been done, normalize the non-FCD characters into 
 	* the buffer.
 	* Source offsets points to the current processing character.</p>
 	*/
@ -959,14 +964,16 @@ public final class CollationElementIterator
 	 * Input source offsets points to the previous character.
 	 * Return source offsets points to the current processing character.
 	 * </p>
+	 * @param ch current character
+	 * @param offset current character offset
 	 * @return true if FCDCheck passes, false otherwise
-	*/
-	private boolean FCDCheckBackwards()
+	 */
+	private boolean FCDCheckBackwards(char ch, int offset)
 	{
 	    boolean result = true;    
-	    char ch = m_source_.next();
 	    char fcd = 0; 
-	    m_FCDLimit_ = m_source_.getIndex();
+	    m_FCDLimit_ = offset;
+	    m_source_.setIndex(offset);
 	    if (!UTF16.isSurrogate(ch)) {
 	        fcd = 0; // synwee todo unorm_getFCD16(fcdTrieIndex, c);
 	    } 
@ -1053,10 +1060,10 @@ public final class CollationElementIterator
 	        }
 		}    
 		char result = m_source_.previous();
+		int startoffset = m_source_.getIndex();
 	    if (result < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_ 
 	        || m_collator_.m_decomposition_ == Collator.NO_DECOMPOSITION 
-	        || m_FCDStart_ <= m_source_.getIndex()
-	        || m_source_.getIndex() == 0) {
+	        || m_FCDStart_ <= startoffset || m_source_.getIndex() == 0) {
 	        return result;
 	    }
 	    char ch = m_source_.previous();
@ -1066,7 +1073,7 @@ public final class CollationElementIterator
 	        return result;
 	    }
 	    // Need a more complete FCD check and possible normalization.
-	    if (!FCDCheckBackwards()) {
+	    if (!FCDCheckBackwards(ch, startoffset)) {
 	        normalizeBackwards();
 	        m_bufferOffset_ --;
 	        result = m_buffer_.charAt(m_bufferOffset_);
--- a/icu4j/src/com/ibm/icu/text/RuleBasedCollator.java
+++ b/icu4j/src/com/ibm/icu/text/RuleBasedCollator.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/RuleBasedCollator.java,v $ 
-* $Date: 2002/05/20 23:43:01 $ 
-* $Revision: 1.6 $
+* $Date: 2002/05/22 01:14:38 $ 
+* $Revision: 1.7 $
 *
 *******************************************************************************
 */
@ -809,7 +809,7 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
 	      	}
 	    }
 	
-		if (compare[4]) {  // checkQuad
+		if (doShift4) {  // checkQuad
 	      	result = doQuaternaryCompare(cebuffer, lowestpvalue);
 	      	if (result != 0) {
 	      		return result;
@ -1320,7 +1320,7 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
 	protected void updateInternalState() 
 	{
      	if (m_caseFirst_ == AttributeValue.UPPER_FIRST_) {
-        	m_caseSwitch_ = CASE_SWITCH_;
+        	m_caseSwitch_ = (byte)CASE_SWITCH_;
      	} 
      	else {
        	m_caseSwitch_ = NO_CASE_SWITCH_;
@ -1334,7 +1334,7 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
        	m_bottom3_ = COMMON_BOTTOM_3_;
      	} 
      	else {
-        	m_mask3_ = CE_KEEP_CASE_;
+        	m_mask3_ = (byte)CE_KEEP_CASE_;
        	m_addition3_ = FLAG_BIT_MASK_CASE_SWITCH_ON_;
        	if (m_caseFirst_ == AttributeValue.UPPER_FIRST_) {
          		m_common3_ = COMMON_UPPER_FIRST_3_;
@ -1518,18 +1518,18 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
 	/**
 	 * Case first constants
 	 */
-	private static final byte CASE_SWITCH_ = (byte)0xC0;
-	private static final byte NO_CASE_SWITCH_ = 0;
+	private static final int CASE_SWITCH_ = 0xC0;
+	private static final int NO_CASE_SWITCH_ = 0;
 	/**
 	 * Case level constants
 	 */
-	private static final byte CE_REMOVE_CASE_ = (byte)0x3F;
-	private static final byte CE_KEEP_CASE_ = (byte)0xFF;
+	private static final int CE_REMOVE_CASE_ = 0x3F;
+	private static final int CE_KEEP_CASE_ = 0xFF;
 	/**
 	 * Case strength mask
 	 */
-	private static final byte CE_CASE_BIT_MASK_ = (byte)0xC0;
-	private static final byte CE_CASE_MASK_3_ = (byte)0xFF;
+	private static final int CE_CASE_BIT_MASK_ = 0xC0;
+	private static final int CE_CASE_MASK_3_ = 0xFF;
 	/** 
 	 * Sortkey size factor. Values can be changed.
 	 */
@ -2320,7 +2320,8 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
 		if (bytes[1].length <= bytescount[1] + isize) {
        	bytes[1] = increase(bytes[1], bytescount[1], 1 + isize);
        }
-        BOSCU.writeIdenticalLevelRun(source, bytes[1], bytescount[1]); 
+        bytescount[1] = BOSCU.writeIdenticalLevelRun(source, bytes[1], 
+        											 bytescount[1]); 
 	}
 	
 	/**
@ -2533,16 +2534,20 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
 	{
 		// if we reach here, the ce offset accessed is the last ce
 		// appended to the buffer
-		boolean isNullOrder = (cebuffer[0][cebuffersize[0] - 1] 
-			 					== CollationElementIterator.NULLORDER);
-			 					
+		boolean isSourceNullOrder = (cebuffer[0][cebuffersize[0] - 1] 
+			 							== CollationElementIterator.NULLORDER);
+		boolean isTargetNullOrder = (cebuffer[1][cebuffersize[1] - 1] 
+			 							== CollationElementIterator.NULLORDER);	 					
 		cebuffer[0] = null;
 	    cebuffer[1] = null;
 	    cebuffersize[0] = 0;
 	    cebuffersize[1] = 0;
-	    if (isNullOrder) {
+	    if (isSourceNullOrder) {
 	    	return -1;
 	    }
+	    if (isTargetNullOrder) {
+	    	return 1;
+	    }
 	    // getting rid of the sign
 	    sorder >>>= CE_PRIMARY_SHIFT_;
 	    torder >>>= CE_PRIMARY_SHIFT_;
@ -2685,6 +2690,10 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
 	          				CollationElementIterator.NULLORDER) {
 	          			return -1;
 	          		}
+	          		if (cebuffer[1][toffset - 1] == 
+	          				CollationElementIterator.NULLORDER) {
+	          			return 1;
+	          		}
 	               	return (sorder < torder) ? -1 : 1;
 	          	}
 	        }
@ -2696,11 +2705,11 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
 	        	int sorder = getSecondaryFrenchCE(cebuffer, offset, 
 	        										continuationoffset, 0);
 	        	int torder = getSecondaryFrenchCE(cebuffer, offset, 
-	        										continuationoffset,1);
+	        										continuationoffset, 1);
 	          	if (sorder == torder) {
-	            	if (cebuffer[0][offset[0] - 1] 
-	            						== CollationElementIterator.NULLORDER	            					 
-	            		|| (offset[0] < 0 && offset[1] < 0)) {
+	            	if ((offset[0] < 0 && offset[1] < 0) 
+	            		|| cebuffer[0][offset[0]] 
+	            					== CollationElementIterator.NULLORDER) {
 	              		break;
 	            	} 
 	          	} 
@ -2729,17 +2738,19 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
 	    while (result == CollationElementIterator.IGNORABLE 
 	    		&& offset[index] >= 0) {
 	        if (continuationoffset[index] == 0) {
-	        	while (isContinuation(cebuffer[0][offset[index] --]));
-	            // after this, sorder is at the start of continuation, 
-	            // and offset points before that 
-	            if (isContinuation(cebuffer[0][offset[index] + 1])) {
-	            	// save offset for later
-	            	continuationoffset[index] = offset[index]; 
-	            	offset[index] += 2;  
-	           	}
+	        	result = cebuffer[index][offset[index]];
+		        while (isContinuation(cebuffer[index][offset[index] --]));
+		            // after this, sorder is at the start of continuation, 
+		            // and offset points before that 
+		            if (isContinuation(cebuffer[index][offset[index] + 1])) {
+		            	// save offset for later
+		            	continuationoffset[index] = offset[index]; 
+		            	offset[index] += 2;  
+		           	}
+	        	//}
 	        }
 	        else {
-	        	result = cebuffer[0][offset[index] ++];
+	        	result = cebuffer[index][offset[index] ++];
 	        	if (!isContinuation(result)) { 
 	        		// we have finished with this continuation
 	           		offset[index] = continuationoffset[index];
@ -2780,7 +2791,7 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
 	        while ((torder & CE_REMOVE_CASE_) 
 	        						== CollationElementIterator.IGNORABLE) {
 	        	torder = cebuffer[1][toffset ++];
-	          	if (!isContinuation(sorder)) {
+	          	if (!isContinuation(torder)) {
 	            	torder &= CE_CASE_MASK_3_;
 	            	torder ^= m_caseSwitch_;
 	          	} 
@ -2853,6 +2864,10 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
 	          							CollationElementIterator.NULLORDER) {
 	          		return -1;
 	          	}
+	          	if (cebuffer[1][toffset - 1] == 
+	          				CollationElementIterator.NULLORDER) {
+	          		return 1;
+	          	}
 	            return (sorder < torder) ? -1 : 1;
 	        }
 	    }
@ -2927,6 +2942,10 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
 	          		CollationElementIterator.NULLORDER) {
 	          		return -1;
 	          	}
+	          	if (cebuffer[1][toffset - 1] == 
+	          		CollationElementIterator.NULLORDER) {
+	          		return 1;
+	          	}
 	            return (sorder < torder) ? -1 : 1;
 	        }
 	    }