ICU-2212 Utilized UCharacterIterator in Collation
X-SVN-Rev: 13361
This commit is contained in:
parent
1fde66b031
commit
1a8abc5b66
@ -353,7 +353,8 @@ public class CollationAPITest extends TestFmwk {
|
|||||||
CharacterIterator chariter=new StringCharacterIterator(testString1);
|
CharacterIterator chariter=new StringCharacterIterator(testString1);
|
||||||
// copy ctor
|
// copy ctor
|
||||||
CollationElementIterator iterator2 = ((RuleBasedCollator)col).getCollationElementIterator(chariter);
|
CollationElementIterator iterator2 = ((RuleBasedCollator)col).getCollationElementIterator(chariter);
|
||||||
CollationElementIterator iterator3 = ((RuleBasedCollator)col).getCollationElementIterator(testString2);
|
UCharacterIterator uchariter=UCharacterIterator.getInstance(testString2);
|
||||||
|
CollationElementIterator iterator3 = ((RuleBasedCollator)col).getCollationElementIterator(uchariter);
|
||||||
|
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
offset = iterator1.getOffset();
|
offset = iterator1.getOffset();
|
||||||
|
@ -426,12 +426,23 @@ public class CollationIteratorTest extends TestFmwk {
|
|||||||
//now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text
|
//now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text
|
||||||
CharacterIterator chariter = new StringCharacterIterator(test1);
|
CharacterIterator chariter = new StringCharacterIterator(test1);
|
||||||
try {
|
try {
|
||||||
iter2.setText(chariter);
|
iter2.setText(chariter);
|
||||||
} catch (Exception e ) {
|
} catch (Exception e ) {
|
||||||
errln("call to iter2->setText(chariter(test1)) failed.");
|
errln("call to iter2->setText(chariter(test1)) failed.");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
assertEqual(iter1, iter2);
|
assertEqual(iter1, iter2);
|
||||||
|
|
||||||
|
iter1.reset();
|
||||||
|
//now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text
|
||||||
|
UCharacterIterator uchariter = UCharacterIterator.getInstance(test1);
|
||||||
|
try {
|
||||||
|
iter2.setText(uchariter);
|
||||||
|
} catch (Exception e ) {
|
||||||
|
errln("call to iter2->setText(uchariter(test1)) failed.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
assertEqual(iter1, iter2);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
263
icu4j/src/com/ibm/icu/impl/StringUCharacterIterator.java
Normal file
263
icu4j/src/com/ibm/icu/impl/StringUCharacterIterator.java
Normal file
@ -0,0 +1,263 @@
|
|||||||
|
/*
|
||||||
|
*******************************************************************************
|
||||||
|
* Copyright (C) 1996-2000, International Business Machines Corporation and *
|
||||||
|
* others. All Rights Reserved. *
|
||||||
|
*******************************************************************************
|
||||||
|
*
|
||||||
|
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/StringUCharacterIterator.java,v $
|
||||||
|
* $Date: 2003/10/08 21:51:43 $
|
||||||
|
* $Revision: 1.1 $
|
||||||
|
*
|
||||||
|
*******************************************************************************
|
||||||
|
*/
|
||||||
|
package com.ibm.icu.impl;
|
||||||
|
|
||||||
|
import com.ibm.icu.text.UCharacterIterator;
|
||||||
|
import com.ibm.icu.text.UTF16;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Used by Collation. UCharacterIterator on Strings. Can't use
|
||||||
|
* ReplaceableUCharacterIterator because it is not easy to do a fast setText.
|
||||||
|
* @author synwee
|
||||||
|
*/
|
||||||
|
public final class StringUCharacterIterator extends UCharacterIterator
|
||||||
|
{
|
||||||
|
|
||||||
|
// public constructor ------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Public constructor
|
||||||
|
* @param str text which the iterator will be based on
|
||||||
|
*/
|
||||||
|
public StringUCharacterIterator(String str)
|
||||||
|
{
|
||||||
|
if (str == null) {
|
||||||
|
throw new IllegalArgumentException();
|
||||||
|
}
|
||||||
|
m_text_ = str;
|
||||||
|
m_currentIndex_ = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Public default constructor
|
||||||
|
*/
|
||||||
|
public StringUCharacterIterator()
|
||||||
|
{
|
||||||
|
m_text_ = "";
|
||||||
|
m_currentIndex_ = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// public methods ----------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a copy of this iterator, does not clone the underlying
|
||||||
|
* <code>String</code>object
|
||||||
|
* @return copy of this iterator
|
||||||
|
*/
|
||||||
|
public Object clone()
|
||||||
|
{
|
||||||
|
try {
|
||||||
|
return super.clone();
|
||||||
|
} catch (CloneNotSupportedException e) {
|
||||||
|
return null; // never invoked
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the current UTF16 character.
|
||||||
|
* @return current UTF16 character
|
||||||
|
*/
|
||||||
|
public int current()
|
||||||
|
{
|
||||||
|
if (m_currentIndex_ < m_text_.length()) {
|
||||||
|
return m_text_.charAt(m_currentIndex_);
|
||||||
|
}
|
||||||
|
return DONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the current codepoint
|
||||||
|
* @return current codepoint
|
||||||
|
*/
|
||||||
|
public int currentCodePoint()
|
||||||
|
{
|
||||||
|
// cannot use charAt due to it different
|
||||||
|
// behaviour when index is pointing at a
|
||||||
|
// trail surrogate, check for surrogates
|
||||||
|
|
||||||
|
if (m_currentIndex_ >= m_text_.length()) {
|
||||||
|
return DONE;
|
||||||
|
}
|
||||||
|
char ch = m_text_.charAt(m_currentIndex_);
|
||||||
|
if (UTF16.isLeadSurrogate(ch)) {
|
||||||
|
// advance the index to get the next code point
|
||||||
|
m_currentIndex_ ++;
|
||||||
|
if (m_currentIndex_ < m_text_.length()) {
|
||||||
|
// due to post increment semantics current() after next()
|
||||||
|
// actually returns the next char which is what we want
|
||||||
|
char ch2 = m_text_.charAt(m_currentIndex_);
|
||||||
|
|
||||||
|
if (UTF16.isTrailSurrogate(ch2)) {
|
||||||
|
// we found a surrogate pair
|
||||||
|
return UCharacterProperty.getRawSupplementary(ch, ch2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// current should never change the current index so back off
|
||||||
|
m_currentIndex_ --;
|
||||||
|
}
|
||||||
|
return ch;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the length of the text
|
||||||
|
* @return length of the text
|
||||||
|
*/
|
||||||
|
public int getLength()
|
||||||
|
{
|
||||||
|
return m_text_.length();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the current currentIndex in text.
|
||||||
|
* @return current currentIndex in text.
|
||||||
|
*/
|
||||||
|
public int getIndex()
|
||||||
|
{
|
||||||
|
return m_currentIndex_;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns next UTF16 character and increments the iterator's currentIndex
|
||||||
|
* by 1.
|
||||||
|
* If the resulting currentIndex is greater or equal to the text length,
|
||||||
|
* the currentIndex is reset to the text length and a value of DONE is
|
||||||
|
* returned.
|
||||||
|
* @return next UTF16 character in text or DONE if the new currentIndex is
|
||||||
|
* off the end of the text range.
|
||||||
|
*/
|
||||||
|
public int next()
|
||||||
|
{
|
||||||
|
if (m_currentIndex_ < m_text_.length())
|
||||||
|
{
|
||||||
|
return m_text_.charAt(m_currentIndex_ ++);
|
||||||
|
}
|
||||||
|
return DONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns previous UTF16 character and decrements the iterator's
|
||||||
|
* currentIndex by 1.
|
||||||
|
* If the resulting currentIndex is less than 0, the currentIndex is reset
|
||||||
|
* to 0 and a value of DONE is returned.
|
||||||
|
* @return next UTF16 character in text or DONE if the new currentIndex is
|
||||||
|
* off the start of the text range.
|
||||||
|
*/
|
||||||
|
public int previous()
|
||||||
|
{
|
||||||
|
if (m_currentIndex_ > 0) {
|
||||||
|
return m_text_.charAt(-- m_currentIndex_);
|
||||||
|
}
|
||||||
|
return DONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* <p>Sets the currentIndex to the specified currentIndex in the text and
|
||||||
|
* returns that single UTF16 character at currentIndex.
|
||||||
|
* This assumes the text is stored as 16-bit code units.</p>
|
||||||
|
* @param currentIndex the currentIndex within the text.
|
||||||
|
* @exception IllegalArgumentException is thrown if an invalid currentIndex
|
||||||
|
* is supplied. i.e. currentIndex is out of bounds.
|
||||||
|
* @return the character at the specified currentIndex or DONE if the
|
||||||
|
* specified currentIndex is equal to the end of the text.
|
||||||
|
*/
|
||||||
|
public void setIndex(int currentIndex) throws IndexOutOfBoundsException
|
||||||
|
{
|
||||||
|
if (currentIndex < 0 || currentIndex > m_text_.length()) {
|
||||||
|
throw new IndexOutOfBoundsException();
|
||||||
|
}
|
||||||
|
m_currentIndex_ = currentIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fills the buffer with the underlying text storage of the iterator
|
||||||
|
* If the buffer capacity is not enough a exception is thrown. The capacity
|
||||||
|
* of the fill in buffer should at least be equal to length of text in the
|
||||||
|
* iterator obtained by calling <code>getLength()</code).
|
||||||
|
* <b>Usage:</b>
|
||||||
|
*
|
||||||
|
* <code>
|
||||||
|
* <pre>
|
||||||
|
* UChacterIterator iter = new UCharacterIterator.getInstance(text);
|
||||||
|
* char[] buf = new char[iter.getLength()];
|
||||||
|
* iter.getText(buf);
|
||||||
|
*
|
||||||
|
* OR
|
||||||
|
* char[] buf= new char[1];
|
||||||
|
* int len = 0;
|
||||||
|
* for(;;){
|
||||||
|
* try{
|
||||||
|
* len = iter.getText(buf);
|
||||||
|
* break;
|
||||||
|
* }catch(IndexOutOfBoundsException e){
|
||||||
|
* buf = new char[iter.getLength()];
|
||||||
|
* }
|
||||||
|
* }
|
||||||
|
* </pre>
|
||||||
|
* </code>
|
||||||
|
*
|
||||||
|
* @param fillIn an array of chars to fill with the underlying UTF-16 code
|
||||||
|
* units.
|
||||||
|
* @param offset the position within the array to start putting the data.
|
||||||
|
* @return the number of code units added to fillIn, as a convenience
|
||||||
|
* @exception IndexOutOfBounds exception if there is not enough
|
||||||
|
* room after offset in the array, or if offset < 0.
|
||||||
|
*/
|
||||||
|
public int getText(char[] fillIn, int offset)
|
||||||
|
{
|
||||||
|
int length = m_text_.length();
|
||||||
|
if (offset < 0 || offset + length > fillIn.length) {
|
||||||
|
throw new IndexOutOfBoundsException(Integer.toString(length));
|
||||||
|
}
|
||||||
|
m_text_.getChars(0, length, fillIn, offset);
|
||||||
|
return length;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convenience method for returning the underlying text storage as as
|
||||||
|
* string
|
||||||
|
* @return the underlying text storage in the iterator as a string
|
||||||
|
*/
|
||||||
|
public String getText()
|
||||||
|
{
|
||||||
|
return m_text_;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reset this iterator to point to a new string. This method is used by
|
||||||
|
* other classes that want to avoid allocating new
|
||||||
|
* ReplaceableCharacterIterator objects every time their setText method
|
||||||
|
* is called.
|
||||||
|
* @param text The String to be iterated over
|
||||||
|
*/
|
||||||
|
public void setText(String text)
|
||||||
|
{
|
||||||
|
if (text == null) {
|
||||||
|
throw new NullPointerException();
|
||||||
|
}
|
||||||
|
m_text_ = text;
|
||||||
|
m_currentIndex_ = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// private data members ----------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Text string object
|
||||||
|
*/
|
||||||
|
private String m_text_;
|
||||||
|
/**
|
||||||
|
* Current currentIndex
|
||||||
|
*/
|
||||||
|
private int m_currentIndex_;
|
||||||
|
|
||||||
|
}
|
@ -9,12 +9,17 @@
|
|||||||
*/
|
*/
|
||||||
package com.ibm.icu.text;
|
package com.ibm.icu.text;
|
||||||
|
|
||||||
import java.text.StringCharacterIterator;
|
/***
|
||||||
import java.text.CharacterIterator;
|
* import java.text.StringCharacterIterator;
|
||||||
|
* import java.text.CharacterIterator;
|
||||||
|
*/
|
||||||
import com.ibm.icu.impl.NormalizerImpl;
|
import com.ibm.icu.impl.NormalizerImpl;
|
||||||
import com.ibm.icu.impl.UCharacterProperty;
|
import com.ibm.icu.impl.UCharacterProperty;
|
||||||
import com.ibm.icu.lang.UCharacter;
|
import com.ibm.icu.impl.StringUCharacterIterator;
|
||||||
|
import com.ibm.icu.impl.CharacterIteratorWrapper;
|
||||||
import com.ibm.icu.impl.ICUDebug;
|
import com.ibm.icu.impl.ICUDebug;
|
||||||
|
import com.ibm.icu.lang.UCharacter;
|
||||||
|
import java.text.CharacterIterator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p><code>CollationElementIterator</code> is an iterator created by
|
* <p><code>CollationElementIterator</code> is an iterator created by
|
||||||
@ -218,7 +223,7 @@ public final class CollationElementIterator
|
|||||||
*/
|
*/
|
||||||
public void reset()
|
public void reset()
|
||||||
{
|
{
|
||||||
m_source_.setIndex(m_source_.getBeginIndex());
|
m_source_.setToStart();
|
||||||
updateInternalState();
|
updateInternalState();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -256,12 +261,12 @@ public final class CollationElementIterator
|
|||||||
m_CEBufferOffset_ = 0;
|
m_CEBufferOffset_ = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
char ch = nextChar();
|
int ch_int = nextChar();
|
||||||
/* System.out.println("ch " + Integer.toHexString(ch) + " " +
|
|
||||||
Integer.toHexString(m_source_.current()));*/
|
if (ch_int == UCharacterIterator.DONE) {
|
||||||
if (ch == CharacterIterator.DONE) {
|
|
||||||
return NULLORDER;
|
return NULLORDER;
|
||||||
}
|
}
|
||||||
|
char ch = (char)ch_int;
|
||||||
if (m_collator_.m_isHiragana4_) {
|
if (m_collator_.m_isHiragana4_) {
|
||||||
m_isCodePointHiragana_ = (ch >= 0x3040 && ch <= 0x309e)
|
m_isCodePointHiragana_ = (ch >= 0x3040 && ch <= 0x309e)
|
||||||
&& !(ch > 0x3094 && ch < 0x309d);
|
&& !(ch > 0x3094 && ch < 0x309d);
|
||||||
@ -325,7 +330,7 @@ public final class CollationElementIterator
|
|||||||
if (m_source_.getIndex() <= 0 && m_isForwards_) {
|
if (m_source_.getIndex() <= 0 && m_isForwards_) {
|
||||||
// if iterator is new or reset, we can immediate perform backwards
|
// if iterator is new or reset, we can immediate perform backwards
|
||||||
// iteration even when the offset is not right.
|
// iteration even when the offset is not right.
|
||||||
m_source_.setIndex(m_source_.getEndIndex());
|
m_source_.setToLimit();
|
||||||
updateInternalState();
|
updateInternalState();
|
||||||
}
|
}
|
||||||
m_isForwards_ = false;
|
m_isForwards_ = false;
|
||||||
@ -337,10 +342,11 @@ public final class CollationElementIterator
|
|||||||
m_CEBufferSize_ = 0;
|
m_CEBufferSize_ = 0;
|
||||||
m_CEBufferOffset_ = 0;
|
m_CEBufferOffset_ = 0;
|
||||||
}
|
}
|
||||||
char ch = previousChar();
|
int ch_int = previousChar();
|
||||||
if (ch == CharacterIterator.DONE) {
|
if (ch_int == UCharacterIterator.DONE) {
|
||||||
return NULLORDER;
|
return NULLORDER;
|
||||||
}
|
}
|
||||||
|
char ch = (char)ch_int;
|
||||||
if (m_collator_.m_isHiragana4_) {
|
if (m_collator_.m_isHiragana4_) {
|
||||||
m_isCodePointHiragana_ = (ch >= 0x3040 && ch <= 0x309f);
|
m_isCodePointHiragana_ = (ch >= 0x3040 && ch <= 0x309f);
|
||||||
}
|
}
|
||||||
@ -477,13 +483,14 @@ public final class CollationElementIterator
|
|||||||
public void setOffset(int offset)
|
public void setOffset(int offset)
|
||||||
{
|
{
|
||||||
m_source_.setIndex(offset);
|
m_source_.setIndex(offset);
|
||||||
char ch = m_source_.current();
|
int ch_int = m_source_.current();
|
||||||
if (ch != CharacterIterator.DONE && m_collator_.isUnsafe(ch)) {
|
char ch = (char)ch_int;
|
||||||
|
if (ch_int != UCharacterIterator.DONE && m_collator_.isUnsafe(ch)) {
|
||||||
// if it is unsafe we need to check if it is part of a contraction
|
// if it is unsafe we need to check if it is part of a contraction
|
||||||
// or a surrogate character
|
// or a surrogate character
|
||||||
if (UTF16.isTrailSurrogate(ch)) {
|
if (UTF16.isTrailSurrogate(ch)) {
|
||||||
// if it is a surrogate pair we move up one character
|
// if it is a surrogate pair we move up one character
|
||||||
char prevch = m_source_.previous();
|
char prevch = (char)m_source_.previous();
|
||||||
if (!UTF16.isLeadSurrogate(prevch)) {
|
if (!UTF16.isLeadSurrogate(prevch)) {
|
||||||
m_source_.setIndex(offset); // go back to the same index
|
m_source_.setIndex(offset); // go back to the same index
|
||||||
}
|
}
|
||||||
@ -495,7 +502,7 @@ public final class CollationElementIterator
|
|||||||
if (!m_collator_.isUnsafe(ch)) {
|
if (!m_collator_.isUnsafe(ch)) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
ch = m_source_.previous();
|
ch = (char)m_source_.previous();
|
||||||
}
|
}
|
||||||
updateInternalState();
|
updateInternalState();
|
||||||
int prevoffset = 0;
|
int prevoffset = 0;
|
||||||
@ -510,12 +517,12 @@ public final class CollationElementIterator
|
|||||||
// direction code to prevent next and previous from returning a
|
// direction code to prevent next and previous from returning a
|
||||||
// character if we are already at the ends
|
// character if we are already at the ends
|
||||||
offset = m_source_.getIndex();
|
offset = m_source_.getIndex();
|
||||||
if (offset == m_source_.getBeginIndex()) {
|
if (offset == 0/* m_source_.getBeginIndex() */) {
|
||||||
// preventing previous() from returning characters from the end of
|
// preventing previous() from returning characters from the end of
|
||||||
// the string again if we are at the beginning
|
// the string again if we are at the beginning
|
||||||
m_isForwards_ = false;
|
m_isForwards_ = false;
|
||||||
}
|
}
|
||||||
else if (offset == m_source_.getEndIndex()) {
|
else if (offset == m_source_.getLength()) {
|
||||||
// preventing next() from returning characters from the start of
|
// preventing next() from returning characters from the start of
|
||||||
// the string again if we are at the end
|
// the string again if we are at the end
|
||||||
m_isForwards_ = true;
|
m_isForwards_ = true;
|
||||||
@ -536,6 +543,22 @@ public final class CollationElementIterator
|
|||||||
updateInternalState();
|
updateInternalState();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* <p>Set a new source string iterator for iteration, and reset the
|
||||||
|
* offset to the beginning of the text.
|
||||||
|
* </p>
|
||||||
|
* <p>The source iterator's integrity will be preserved since a new copy
|
||||||
|
* will be created for use.</p>
|
||||||
|
* @param source the new source string iterator for iteration.
|
||||||
|
* @draft ICU 2.8
|
||||||
|
*/
|
||||||
|
public void setText(UCharacterIterator source)
|
||||||
|
{
|
||||||
|
m_srcUtilIter_.setText(source.getText());
|
||||||
|
m_source_ = m_srcUtilIter_;
|
||||||
|
updateInternalState();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>Set a new source string iterator for iteration, and reset the
|
* <p>Set a new source string iterator for iteration, and reset the
|
||||||
* offset to the beginning of the text.
|
* offset to the beginning of the text.
|
||||||
@ -545,8 +568,8 @@ public final class CollationElementIterator
|
|||||||
*/
|
*/
|
||||||
public void setText(CharacterIterator source)
|
public void setText(CharacterIterator source)
|
||||||
{
|
{
|
||||||
m_source_ = source;
|
m_source_ = new CharacterIteratorWrapper(source);
|
||||||
m_source_.setIndex(m_source_.getBeginIndex());
|
m_source_.setToStart();
|
||||||
updateInternalState();
|
updateInternalState();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -568,10 +591,13 @@ public final class CollationElementIterator
|
|||||||
if (that instanceof CollationElementIterator) {
|
if (that instanceof CollationElementIterator) {
|
||||||
CollationElementIterator thatceiter
|
CollationElementIterator thatceiter
|
||||||
= (CollationElementIterator)that;
|
= (CollationElementIterator)that;
|
||||||
if (m_collator_.equals(thatceiter.m_collator_)
|
if (!m_collator_.equals(thatceiter.m_collator_)) {
|
||||||
&& m_source_.equals(thatceiter.m_source_)) {
|
return false;
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
// checks the text
|
||||||
|
return m_source_.getIndex() == thatceiter.m_source_.getIndex()
|
||||||
|
&& m_source_.getText().equals(
|
||||||
|
thatceiter.m_source_.getText());
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -591,7 +617,7 @@ public final class CollationElementIterator
|
|||||||
*/
|
*/
|
||||||
CollationElementIterator(String source, RuleBasedCollator collator)
|
CollationElementIterator(String source, RuleBasedCollator collator)
|
||||||
{
|
{
|
||||||
m_srcUtilIter_ = new StringCharacterIterator(source);
|
m_srcUtilIter_ = new StringUCharacterIterator(source);
|
||||||
m_utilStringBuffer_ = new StringBuffer();
|
m_utilStringBuffer_ = new StringBuffer();
|
||||||
m_source_ = m_srcUtilIter_;
|
m_source_ = m_srcUtilIter_;
|
||||||
m_collator_ = collator;
|
m_collator_ = collator;
|
||||||
@ -615,9 +641,34 @@ public final class CollationElementIterator
|
|||||||
CollationElementIterator(CharacterIterator source,
|
CollationElementIterator(CharacterIterator source,
|
||||||
RuleBasedCollator collator)
|
RuleBasedCollator collator)
|
||||||
{
|
{
|
||||||
m_srcUtilIter_ = new StringCharacterIterator("");
|
m_srcUtilIter_ = new StringUCharacterIterator();
|
||||||
m_utilStringBuffer_ = new StringBuffer();
|
m_utilStringBuffer_ = new StringBuffer();
|
||||||
m_source_ = source;
|
m_source_ = new CharacterIteratorWrapper(source);
|
||||||
|
m_collator_ = collator;
|
||||||
|
m_CEBuffer_ = new int[CE_BUFFER_INIT_SIZE_];
|
||||||
|
m_buffer_ = new StringBuffer();
|
||||||
|
m_utilSpecialBackUp_ = new Backup();
|
||||||
|
updateInternalState();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* <p>CollationElementIterator constructor. This takes a source
|
||||||
|
* character iterator and a RuleBasedCollator. The iterator will
|
||||||
|
* walk through the source string based on the rules defined by
|
||||||
|
* the collator. If the source string is empty, NULLORDER will be
|
||||||
|
* returned on the first call to next().</p>
|
||||||
|
*
|
||||||
|
* @param source the source string iterator.
|
||||||
|
* @param collator the RuleBasedCollator
|
||||||
|
* @draft ICU 2.2
|
||||||
|
*/
|
||||||
|
CollationElementIterator(UCharacterIterator source,
|
||||||
|
RuleBasedCollator collator)
|
||||||
|
{
|
||||||
|
m_srcUtilIter_ = new StringUCharacterIterator();
|
||||||
|
m_utilStringBuffer_ = new StringBuffer();
|
||||||
|
m_srcUtilIter_.setText(source.getText());
|
||||||
|
m_source_ = m_srcUtilIter_;
|
||||||
m_collator_ = collator;
|
m_collator_ = collator;
|
||||||
m_CEBuffer_ = new int[CE_BUFFER_INIT_SIZE_];
|
m_CEBuffer_ = new int[CE_BUFFER_INIT_SIZE_];
|
||||||
m_buffer_ = new StringBuffer();
|
m_buffer_ = new StringBuffer();
|
||||||
@ -717,7 +768,7 @@ public final class CollationElementIterator
|
|||||||
* @param ch character to test
|
* @param ch character to test
|
||||||
* @return true if ch is a Thai prevowel, false otherwise
|
* @return true if ch is a Thai prevowel, false otherwise
|
||||||
*/
|
*/
|
||||||
static final boolean isThaiPreVowel(char ch)
|
static final boolean isThaiPreVowel(int ch)
|
||||||
{
|
{
|
||||||
return (ch >= 0xe40 && ch <= 0xe44) || (ch >= 0xec0 && ch <= 0xec4);
|
return (ch >= 0xe40 && ch <= 0xe44) || (ch >= 0xec0 && ch <= 0xec4);
|
||||||
}
|
}
|
||||||
@ -736,9 +787,10 @@ public final class CollationElementIterator
|
|||||||
* @param source the new source string iterator for iteration.
|
* @param source the new source string iterator for iteration.
|
||||||
* @param offset to the source
|
* @param offset to the source
|
||||||
*/
|
*/
|
||||||
void setText(CharacterIterator source, int offset)
|
void setText(UCharacterIterator source, int offset)
|
||||||
{
|
{
|
||||||
m_source_ = source;
|
m_srcUtilIter_.setText(source.getText());
|
||||||
|
m_source_ = m_srcUtilIter_;
|
||||||
m_source_.setIndex(offset);
|
m_source_.setIndex(offset);
|
||||||
updateInternalState();
|
updateInternalState();
|
||||||
}
|
}
|
||||||
@ -796,7 +848,7 @@ public final class CollationElementIterator
|
|||||||
/**
|
/**
|
||||||
* Source string iterator
|
* Source string iterator
|
||||||
*/
|
*/
|
||||||
private CharacterIterator m_source_;
|
private UCharacterIterator m_source_;
|
||||||
/**
|
/**
|
||||||
* This is position to the m_buffer_, -1 if iterator is not in m_buffer_
|
* This is position to the m_buffer_, -1 if iterator is not in m_buffer_
|
||||||
*/
|
*/
|
||||||
@ -846,7 +898,7 @@ public final class CollationElementIterator
|
|||||||
/**
|
/**
|
||||||
* Utility
|
* Utility
|
||||||
*/
|
*/
|
||||||
private StringCharacterIterator m_srcUtilIter_;
|
private StringUCharacterIterator m_srcUtilIter_;
|
||||||
private StringBuffer m_utilStringBuffer_;
|
private StringBuffer m_utilStringBuffer_;
|
||||||
private StringBuffer m_utilSkippedBuffer_;
|
private StringBuffer m_utilSkippedBuffer_;
|
||||||
private CollationElementIterator m_utilColEIter_;
|
private CollationElementIterator m_utilColEIter_;
|
||||||
@ -950,7 +1002,7 @@ public final class CollationElementIterator
|
|||||||
m_CEBufferOffset_ = 0;
|
m_CEBufferOffset_ = 0;
|
||||||
m_CEBufferSize_ = 0;
|
m_CEBufferSize_ = 0;
|
||||||
m_FCDLimit_ = -1;
|
m_FCDLimit_ = -1;
|
||||||
m_FCDStart_ = m_source_.getEndIndex();
|
m_FCDStart_ = m_source_.getLength();
|
||||||
m_isHiragana4_ = m_collator_.m_isHiragana4_;
|
m_isHiragana4_ = m_collator_.m_isHiragana4_;
|
||||||
m_isForwards_ = true;
|
m_isForwards_ = true;
|
||||||
}
|
}
|
||||||
@ -1022,8 +1074,7 @@ public final class CollationElementIterator
|
|||||||
m_buffer_.setLength(0);
|
m_buffer_.setLength(0);
|
||||||
m_source_.setIndex(m_FCDStart_);
|
m_source_.setIndex(m_FCDStart_);
|
||||||
for (int i = 0; i < size; i ++) {
|
for (int i = 0; i < size; i ++) {
|
||||||
m_buffer_.append(m_source_.current());
|
m_buffer_.append((char)m_source_.next());
|
||||||
m_source_.next();
|
|
||||||
}
|
}
|
||||||
String decomp = Normalizer.decompose(m_buffer_.toString(), false);
|
String decomp = Normalizer.decompose(m_buffer_.toString(), false);
|
||||||
m_buffer_.setLength(0);
|
m_buffer_.setLength(0);
|
||||||
@ -1059,7 +1110,9 @@ public final class CollationElementIterator
|
|||||||
// trie access
|
// trie access
|
||||||
char fcd = NormalizerImpl.getFCD16(ch);
|
char fcd = NormalizerImpl.getFCD16(ch);
|
||||||
if (fcd != 0 && UTF16.isLeadSurrogate(ch)) {
|
if (fcd != 0 && UTF16.isLeadSurrogate(ch)) {
|
||||||
ch = m_source_.next(); // CharacterIterator.DONE has 0 fcd
|
m_source_.next();
|
||||||
|
ch = (char)m_source_.current();
|
||||||
|
// UCharacterIterator.DONE has 0 fcd
|
||||||
if (UTF16.isTrailSurrogate(ch)) {
|
if (UTF16.isTrailSurrogate(ch)) {
|
||||||
fcd = NormalizerImpl.getFCD16FromSurrogatePair(fcd, ch);
|
fcd = NormalizerImpl.getFCD16FromSurrogatePair(fcd, ch);
|
||||||
} else {
|
} else {
|
||||||
@ -1073,14 +1126,17 @@ public final class CollationElementIterator
|
|||||||
// The current char has a non-zero trailing CC. Scan forward until
|
// The current char has a non-zero trailing CC. Scan forward until
|
||||||
// we find a char with a leading cc of zero.
|
// we find a char with a leading cc of zero.
|
||||||
while (true) {
|
while (true) {
|
||||||
ch = m_source_.next();
|
m_source_.next();
|
||||||
if (ch == CharacterIterator.DONE) {
|
int ch_int = m_source_.current();
|
||||||
|
if (ch_int == UCharacterIterator.DONE) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
ch = (char)ch_int;
|
||||||
// trie access
|
// trie access
|
||||||
fcd = NormalizerImpl.getFCD16(ch);
|
fcd = NormalizerImpl.getFCD16(ch);
|
||||||
if (fcd != 0 && UTF16.isLeadSurrogate(ch)) {
|
if (fcd != 0 && UTF16.isLeadSurrogate(ch)) {
|
||||||
ch = m_source_.next();
|
m_source_.next();
|
||||||
|
ch = (char)m_source_.current();
|
||||||
if (UTF16.isTrailSurrogate(ch)) {
|
if (UTF16.isTrailSurrogate(ch)) {
|
||||||
fcd = NormalizerImpl.getFCD16FromSurrogatePair(fcd, ch);
|
fcd = NormalizerImpl.getFCD16FromSurrogatePair(fcd, ch);
|
||||||
} else {
|
} else {
|
||||||
@ -1112,9 +1168,9 @@ public final class CollationElementIterator
|
|||||||
* <p>Offsets are returned at the next character.</p>
|
* <p>Offsets are returned at the next character.</p>
|
||||||
* @return next fcd character
|
* @return next fcd character
|
||||||
*/
|
*/
|
||||||
private char nextChar()
|
private int nextChar()
|
||||||
{
|
{
|
||||||
char result;
|
int result;
|
||||||
|
|
||||||
// loop handles the next character whether it is in the buffer or not.
|
// loop handles the next character whether it is in the buffer or not.
|
||||||
if (m_bufferOffset_ < 0) {
|
if (m_bufferOffset_ < 0) {
|
||||||
@ -1147,8 +1203,9 @@ public final class CollationElementIterator
|
|||||||
if (result < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_) {
|
if (result < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_) {
|
||||||
// We need to peek at the next character in order to tell if we are
|
// We need to peek at the next character in order to tell if we are
|
||||||
// FCD
|
// FCD
|
||||||
char next = m_source_.next();
|
m_source_.next();
|
||||||
if (next == CharacterIterator.DONE
|
int next = m_source_.current();
|
||||||
|
if (next == UCharacterIterator.DONE
|
||||||
|| next <= LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_) {
|
|| next <= LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_) {
|
||||||
return result; // end of source string and if next character
|
return result; // end of source string and if next character
|
||||||
// starts with a base character is always fcd.
|
// starts with a base character is always fcd.
|
||||||
@ -1156,7 +1213,7 @@ public final class CollationElementIterator
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Need a more complete FCD check and possible normalization.
|
// Need a more complete FCD check and possible normalization.
|
||||||
if (!FCDCheck(result, startoffset)) {
|
if (!FCDCheck((char)result, startoffset)) {
|
||||||
normalize();
|
normalize();
|
||||||
result = m_buffer_.charAt(0);
|
result = m_buffer_.charAt(0);
|
||||||
m_bufferOffset_ = 1;
|
m_bufferOffset_ = 1;
|
||||||
@ -1206,7 +1263,7 @@ public final class CollationElementIterator
|
|||||||
else if (UTF16.isTrailSurrogate(ch) && m_FCDLimit_ > 0) {
|
else if (UTF16.isTrailSurrogate(ch) && m_FCDLimit_ > 0) {
|
||||||
// note trail surrogate characters gets 0 fcd
|
// note trail surrogate characters gets 0 fcd
|
||||||
char trailch = ch;
|
char trailch = ch;
|
||||||
ch = m_source_.previous();
|
ch = (char)m_source_.previous();
|
||||||
if (UTF16.isLeadSurrogate(ch)) {
|
if (UTF16.isLeadSurrogate(ch)) {
|
||||||
fcd = NormalizerImpl.getFCD16(ch);
|
fcd = NormalizerImpl.getFCD16(ch);
|
||||||
if (fcd != 0) {
|
if (fcd != 0) {
|
||||||
@ -1228,13 +1285,13 @@ public final class CollationElementIterator
|
|||||||
if (offset == 0) {
|
if (offset == 0) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
ch = m_source_.previous();
|
ch = (char)m_source_.previous();
|
||||||
if (!UTF16.isSurrogate(ch)) {
|
if (!UTF16.isSurrogate(ch)) {
|
||||||
fcd = NormalizerImpl.getFCD16(ch);
|
fcd = NormalizerImpl.getFCD16(ch);
|
||||||
}
|
}
|
||||||
else if (UTF16.isTrailSurrogate(ch) && m_source_.getIndex() > 0) {
|
else if (UTF16.isTrailSurrogate(ch) && m_source_.getIndex() > 0) {
|
||||||
char trail = ch;
|
char trail = ch;
|
||||||
ch = m_source_.previous();
|
ch = (char)m_source_.previous();
|
||||||
if (UTF16.isLeadSurrogate(ch)) {
|
if (UTF16.isLeadSurrogate(ch)) {
|
||||||
fcd = NormalizerImpl.getFCD16(ch);
|
fcd = NormalizerImpl.getFCD16(ch);
|
||||||
}
|
}
|
||||||
@ -1270,7 +1327,7 @@ public final class CollationElementIterator
|
|||||||
* <p>Offsets are returned at the current character.</p>
|
* <p>Offsets are returned at the current character.</p>
|
||||||
* @return previous fcd character
|
* @return previous fcd character
|
||||||
*/
|
*/
|
||||||
private char previousChar()
|
private int previousChar()
|
||||||
{
|
{
|
||||||
if (m_bufferOffset_ >= 0) {
|
if (m_bufferOffset_ >= 0) {
|
||||||
m_bufferOffset_ --;
|
m_bufferOffset_ --;
|
||||||
@ -1280,10 +1337,10 @@ public final class CollationElementIterator
|
|||||||
else {
|
else {
|
||||||
// At the start of buffer, route back to string.
|
// At the start of buffer, route back to string.
|
||||||
m_buffer_.setLength(0);
|
m_buffer_.setLength(0);
|
||||||
if (m_FCDStart_ == m_source_.getBeginIndex()) {
|
if (m_FCDStart_ == 0) {
|
||||||
m_FCDStart_ = -1;
|
m_FCDStart_ = -1;
|
||||||
m_source_.setIndex(m_source_.getBeginIndex());
|
m_source_.setIndex(0);
|
||||||
return CharacterIterator.DONE;
|
return UCharacterIterator.DONE;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
m_FCDLimit_ = m_FCDStart_;
|
m_FCDLimit_ = m_FCDStart_;
|
||||||
@ -1292,21 +1349,21 @@ public final class CollationElementIterator
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
char result = m_source_.previous();
|
int result = m_source_.previous();
|
||||||
int startoffset = m_source_.getIndex();
|
int startoffset = m_source_.getIndex();
|
||||||
if (result < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_
|
if (result < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_
|
||||||
|| m_collator_.getDecomposition() == Collator.NO_DECOMPOSITION
|
|| m_collator_.getDecomposition() == Collator.NO_DECOMPOSITION
|
||||||
|| m_FCDStart_ <= startoffset || m_source_.getIndex() == 0) {
|
|| m_FCDStart_ <= startoffset || m_source_.getIndex() == 0) {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
char ch = m_source_.previous();
|
int ch = m_source_.previous();
|
||||||
if (ch < FULL_ZERO_COMBINING_CLASS_FAST_LIMIT_) {
|
if (ch < FULL_ZERO_COMBINING_CLASS_FAST_LIMIT_) {
|
||||||
// if previous character is FCD
|
// if previous character is FCD
|
||||||
m_source_.next();
|
m_source_.next();
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
// Need a more complete FCD check and possible normalization.
|
// Need a more complete FCD check and possible normalization.
|
||||||
if (!FCDCheckBackwards(result, startoffset)) {
|
if (!FCDCheckBackwards((char)result, startoffset)) {
|
||||||
normalizeBackwards();
|
normalizeBackwards();
|
||||||
m_bufferOffset_ --;
|
m_bufferOffset_ --;
|
||||||
result = m_buffer_.charAt(m_bufferOffset_);
|
result = m_buffer_.charAt(m_bufferOffset_);
|
||||||
@ -1340,10 +1397,10 @@ public final class CollationElementIterator
|
|||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
// at end of buffer. check if fcd is at the end
|
// at end of buffer. check if fcd is at the end
|
||||||
return m_FCDLimit_ == m_source_.getEndIndex();
|
return m_FCDLimit_ == m_source_.getLength();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return m_source_.getEndIndex() == m_source_.getIndex();
|
return m_source_.getLength() == m_source_.getIndex();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -1408,12 +1465,12 @@ public final class CollationElementIterator
|
|||||||
// Note: this operation might activate the normalization buffer. We have to check for
|
// Note: this operation might activate the normalization buffer. We have to check for
|
||||||
// that and act accordingly.
|
// that and act accordingly.
|
||||||
m_FCDStart_ = m_source_.getIndex() - 1;
|
m_FCDStart_ = m_source_.getIndex() - 1;
|
||||||
char thCh = nextChar();
|
char thCh = (char)nextChar();
|
||||||
int cp = thCh;
|
int cp = thCh;
|
||||||
if (UTF16.isLeadSurrogate(thCh)) {
|
if (UTF16.isLeadSurrogate(thCh)) {
|
||||||
if (!isEnd()) {
|
if (!isEnd()) {
|
||||||
backupInternalState(m_utilSpecialBackUp_);
|
backupInternalState(m_utilSpecialBackUp_);
|
||||||
char trailCh = nextChar();
|
char trailCh = (char)nextChar();
|
||||||
if (UTF16.isTrailSurrogate(trailCh)) {
|
if (UTF16.isTrailSurrogate(trailCh)) {
|
||||||
cp = UCharacterProperty.getRawSupplementary(
|
cp = UCharacterProperty.getRawSupplementary(
|
||||||
thCh, trailCh);
|
thCh, trailCh);
|
||||||
@ -1582,7 +1639,7 @@ public final class CollationElementIterator
|
|||||||
ce = collator.m_contractionCE_[offset];
|
ce = collator.m_contractionCE_[offset];
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
char previous = previousChar();
|
char previous = (char)previousChar();
|
||||||
while (previous > collator.m_contractionIndex_[offset]) {
|
while (previous > collator.m_contractionIndex_[offset]) {
|
||||||
// contraction characters are ordered, skip smaller characters
|
// contraction characters are ordered, skip smaller characters
|
||||||
offset ++;
|
offset ++;
|
||||||
@ -1613,7 +1670,7 @@ public final class CollationElementIterator
|
|||||||
// 3. schar is a trail surrogate in a valid surrogate
|
// 3. schar is a trail surrogate in a valid surrogate
|
||||||
// sequence that is explicitly set to zero.
|
// sequence that is explicitly set to zero.
|
||||||
if (!isBackwardsStart()) {
|
if (!isBackwardsStart()) {
|
||||||
char lead = previousChar();
|
char lead = (char)previousChar();
|
||||||
if (UTF16.isLeadSurrogate(lead)) {
|
if (UTF16.isLeadSurrogate(lead)) {
|
||||||
isZeroCE = collator.m_trie_.getLeadValue(lead);
|
isZeroCE = collator.m_trie_.getLeadValue(lead);
|
||||||
if (RuleBasedCollator.getTag(isZeroCE)
|
if (RuleBasedCollator.getTag(isZeroCE)
|
||||||
@ -1706,12 +1763,11 @@ public final class CollationElementIterator
|
|||||||
* Returns the current character for forward iteration
|
* Returns the current character for forward iteration
|
||||||
* @return current character
|
* @return current character
|
||||||
*/
|
*/
|
||||||
private char currentChar()
|
private int currentChar()
|
||||||
{
|
{
|
||||||
if (m_bufferOffset_ < 0) {
|
if (m_bufferOffset_ < 0) {
|
||||||
char result = m_source_.previous();
|
m_source_.previous();
|
||||||
m_source_.next();
|
return m_source_.next();
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// m_bufferOffset_ is never 0 in normal circumstances except after a
|
// m_bufferOffset_ is never 0 in normal circumstances except after a
|
||||||
@ -1740,8 +1796,8 @@ public final class CollationElementIterator
|
|||||||
else {
|
else {
|
||||||
m_utilSkippedBuffer_.setLength(0);
|
m_utilSkippedBuffer_.setLength(0);
|
||||||
}
|
}
|
||||||
char ch = currentChar();
|
char ch = (char)currentChar();
|
||||||
m_utilSkippedBuffer_.append(currentChar());
|
m_utilSkippedBuffer_.append((char)currentChar());
|
||||||
// accent after the first character
|
// accent after the first character
|
||||||
if (m_utilSpecialDiscontiguousBackUp_ == null) {
|
if (m_utilSpecialDiscontiguousBackUp_ == null) {
|
||||||
m_utilSpecialDiscontiguousBackUp_ = new Backup();
|
m_utilSpecialDiscontiguousBackUp_ = new Backup();
|
||||||
@ -1750,14 +1806,15 @@ public final class CollationElementIterator
|
|||||||
char nextch = ch;
|
char nextch = ch;
|
||||||
while (true) {
|
while (true) {
|
||||||
ch = nextch;
|
ch = nextch;
|
||||||
nextch = nextChar();
|
int ch_int = nextChar();
|
||||||
if (nextch == CharacterIterator.DONE
|
nextch = (char)ch_int;
|
||||||
|
if (ch_int == UCharacterIterator.DONE
|
||||||
|| getCombiningClass(nextch) == 0) {
|
|| getCombiningClass(nextch) == 0) {
|
||||||
// if there are no more accents to move around
|
// if there are no more accents to move around
|
||||||
// we don't have to shift previousChar, since we are resetting
|
// we don't have to shift previousChar, since we are resetting
|
||||||
// the offset later
|
// the offset later
|
||||||
if (multicontraction) {
|
if (multicontraction) {
|
||||||
if (nextch != CharacterIterator.DONE) {
|
if (ch_int != UCharacterIterator.DONE) {
|
||||||
previousChar(); // backtrack
|
previousChar(); // backtrack
|
||||||
}
|
}
|
||||||
setDiscontiguous(m_utilSkippedBuffer_);
|
setDiscontiguous(m_utilSkippedBuffer_);
|
||||||
@ -1836,7 +1893,7 @@ public final class CollationElementIterator
|
|||||||
byte maxCC = (byte)(collator.m_contractionIndex_[offset] & 0xFF);
|
byte maxCC = (byte)(collator.m_contractionIndex_[offset] & 0xFF);
|
||||||
// checks if all characters have the same combining class
|
// checks if all characters have the same combining class
|
||||||
byte allSame = (byte)(collator.m_contractionIndex_[offset] >> 8);
|
byte allSame = (byte)(collator.m_contractionIndex_[offset] >> 8);
|
||||||
char ch = nextChar();
|
char ch = (char)nextChar();
|
||||||
offset ++;
|
offset ++;
|
||||||
while (ch > collator.m_contractionIndex_[offset]) {
|
while (ch > collator.m_contractionIndex_[offset]) {
|
||||||
// contraction characters are ordered, skip all smaller
|
// contraction characters are ordered, skip all smaller
|
||||||
@ -1859,7 +1916,7 @@ public final class CollationElementIterator
|
|||||||
else if (UTF16.isLeadSurrogate(ch)) {
|
else if (UTF16.isLeadSurrogate(ch)) {
|
||||||
if (!isEnd()) {
|
if (!isEnd()) {
|
||||||
backupInternalState(m_utilSpecialBackUp_);
|
backupInternalState(m_utilSpecialBackUp_);
|
||||||
char trail = nextChar();
|
char trail = (char)nextChar();
|
||||||
if (UTF16.isTrailSurrogate(trail)) {
|
if (UTF16.isTrailSurrogate(trail)) {
|
||||||
// do stuff with trail
|
// do stuff with trail
|
||||||
if (RuleBasedCollator.getTag(isZeroCE)
|
if (RuleBasedCollator.getTag(isZeroCE)
|
||||||
@ -1901,10 +1958,11 @@ public final class CollationElementIterator
|
|||||||
else {
|
else {
|
||||||
// Contraction is possibly discontiguous.
|
// Contraction is possibly discontiguous.
|
||||||
// find the next character if ch is not a base character
|
// find the next character if ch is not a base character
|
||||||
char nextch = nextChar();
|
int ch_int = nextChar();
|
||||||
if (nextch != CharacterIterator.DONE) {
|
if (ch_int != UCharacterIterator.DONE) {
|
||||||
previousChar();
|
previousChar();
|
||||||
}
|
}
|
||||||
|
char nextch = (char)ch_int;
|
||||||
if (getCombiningClass(nextch) == 0) {
|
if (getCombiningClass(nextch) == 0) {
|
||||||
previousChar();
|
previousChar();
|
||||||
// base character not part of discontiguous contraction
|
// base character not part of discontiguous contraction
|
||||||
@ -2098,11 +2156,11 @@ public final class CollationElementIterator
|
|||||||
// Get next character.
|
// Get next character.
|
||||||
if (!isEnd()){
|
if (!isEnd()){
|
||||||
backupInternalState(m_utilSpecialBackUp_);
|
backupInternalState(m_utilSpecialBackUp_);
|
||||||
char ch = nextChar();
|
int char32 = nextChar();
|
||||||
int char32 = ch;
|
char ch = (char)char32;
|
||||||
if (UTF16.isLeadSurrogate(ch)){
|
if (UTF16.isLeadSurrogate(ch)){
|
||||||
if (!isEnd()) {
|
if (!isEnd()) {
|
||||||
char trail = nextChar();
|
char trail = (char)nextChar();
|
||||||
if (UTF16.isTrailSurrogate(trail)) {
|
if (UTF16.isTrailSurrogate(trail)) {
|
||||||
char32 = UCharacterProperty.getRawSupplementary(
|
char32 = UCharacterProperty.getRawSupplementary(
|
||||||
ch, trail);
|
ch, trail);
|
||||||
@ -2227,8 +2285,9 @@ public final class CollationElementIterator
|
|||||||
*/
|
*/
|
||||||
private int nextSurrogate(char ch)
|
private int nextSurrogate(char ch)
|
||||||
{
|
{
|
||||||
char nextch = nextChar();
|
int ch_int = nextChar();
|
||||||
if (nextch != CharacterIterator.DONE &&
|
char nextch = (char)ch_int;
|
||||||
|
if (ch_int != CharacterIterator.DONE &&
|
||||||
UTF16.isTrailSurrogate(nextch)) {
|
UTF16.isTrailSurrogate(nextch)) {
|
||||||
int codepoint = UCharacterProperty.getRawSupplementary(ch, nextch);
|
int codepoint = UCharacterProperty.getRawSupplementary(ch, nextch);
|
||||||
return nextImplicit(codepoint);
|
return nextImplicit(codepoint);
|
||||||
@ -2330,7 +2389,7 @@ public final class CollationElementIterator
|
|||||||
return IGNORABLE;
|
return IGNORABLE;
|
||||||
}
|
}
|
||||||
backupInternalState(m_utilSpecialBackUp_);
|
backupInternalState(m_utilSpecialBackUp_);
|
||||||
char trail = nextChar();
|
char trail = (char)nextChar();
|
||||||
ce = nextSurrogate(collator, ce, trail);
|
ce = nextSurrogate(collator, ce, trail);
|
||||||
// calculate the supplementary code point value,
|
// calculate the supplementary code point value,
|
||||||
// if surrogate was not tailored we go one more round
|
// if surrogate was not tailored we go one more round
|
||||||
@ -2403,10 +2462,10 @@ public final class CollationElementIterator
|
|||||||
|
|
||||||
// check that ch is from the normalization buffer or not
|
// check that ch is from the normalization buffer or not
|
||||||
boolean innorm = m_bufferOffset_ >= 0;
|
boolean innorm = m_bufferOffset_ >= 0;
|
||||||
char prevch = previousChar();
|
int prevch = previousChar();
|
||||||
if (!isThaiPreVowel(prevch)) {
|
if (!isThaiPreVowel(prevch)) {
|
||||||
// we now rearrange unconditionally do not check for base consonant
|
// we now rearrange unconditionally do not check for base consonant
|
||||||
if (prevch != CharacterIterator.DONE) {
|
if (prevch != UCharacterIterator.DONE) {
|
||||||
nextChar();
|
nextChar();
|
||||||
}
|
}
|
||||||
// Treat Thai as a length one expansion
|
// Treat Thai as a length one expansion
|
||||||
@ -2445,10 +2504,10 @@ public final class CollationElementIterator
|
|||||||
m_FCDLimit_ = m_FCDStart_ + 2;
|
m_FCDLimit_ = m_FCDStart_ + 2;
|
||||||
}
|
}
|
||||||
if (reorder) {
|
if (reorder) {
|
||||||
m_buffer_.insert(1, prevch);
|
m_buffer_.insert(1, (char)prevch);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
m_buffer_.insert(0, prevch);
|
m_buffer_.insert(0, (char)prevch);
|
||||||
}
|
}
|
||||||
return IGNORABLE;
|
return IGNORABLE;
|
||||||
}
|
}
|
||||||
@ -2475,7 +2534,7 @@ public final class CollationElementIterator
|
|||||||
ce = collator.m_contractionCE_[offset];
|
ce = collator.m_contractionCE_[offset];
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
char prevch = previousChar();
|
char prevch = (char)previousChar();
|
||||||
while (prevch > collator.m_contractionIndex_[offset]) {
|
while (prevch > collator.m_contractionIndex_[offset]) {
|
||||||
// since contraction codepoints are ordered, we skip all that
|
// since contraction codepoints are ordered, we skip all that
|
||||||
// are smaller
|
// are smaller
|
||||||
@ -2505,7 +2564,7 @@ public final class CollationElementIterator
|
|||||||
// 3. schar is a trail surrogate in a valid surrogate
|
// 3. schar is a trail surrogate in a valid surrogate
|
||||||
// sequence that is explicitly set to zero.
|
// sequence that is explicitly set to zero.
|
||||||
if (!isBackwardsStart()) {
|
if (!isBackwardsStart()) {
|
||||||
char lead = previousChar();
|
char lead = (char)previousChar();
|
||||||
if (UTF16.isLeadSurrogate(lead)) {
|
if (UTF16.isLeadSurrogate(lead)) {
|
||||||
isZeroCE = collator.m_trie_.getLeadValue(lead);
|
isZeroCE = collator.m_trie_.getLeadValue(lead);
|
||||||
if (RuleBasedCollator.getTag(isZeroCE)
|
if (RuleBasedCollator.getTag(isZeroCE)
|
||||||
@ -2563,7 +2622,7 @@ public final class CollationElementIterator
|
|||||||
m_utilStringBuffer_.setLength(0);
|
m_utilStringBuffer_.setLength(0);
|
||||||
// since we might encounter normalized characters (from the thai
|
// since we might encounter normalized characters (from the thai
|
||||||
// processing) we can't use peekCharacter() here.
|
// processing) we can't use peekCharacter() here.
|
||||||
char prevch = previousChar();
|
char prevch = (char)previousChar();
|
||||||
boolean atStart = false;
|
boolean atStart = false;
|
||||||
while (collator.isUnsafe(ch) || isThaiPreVowel(prevch)) {
|
while (collator.isUnsafe(ch) || isThaiPreVowel(prevch)) {
|
||||||
m_utilStringBuffer_.insert(0, ch);
|
m_utilStringBuffer_.insert(0, ch);
|
||||||
@ -2572,7 +2631,7 @@ public final class CollationElementIterator
|
|||||||
atStart = true;
|
atStart = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
prevch = previousChar();
|
prevch = (char)previousChar();
|
||||||
}
|
}
|
||||||
if (!atStart) {
|
if (!atStart) {
|
||||||
// undo the previousChar() if we didn't reach the beginning
|
// undo the previousChar() if we didn't reach the beginning
|
||||||
@ -2692,7 +2751,7 @@ public final class CollationElementIterator
|
|||||||
int char32 = ch;
|
int char32 = ch;
|
||||||
if (UTF16.isTrailSurrogate(ch)) {
|
if (UTF16.isTrailSurrogate(ch)) {
|
||||||
if (!isBackwardsStart()){
|
if (!isBackwardsStart()){
|
||||||
char lead = previousChar();
|
char lead = (char)previousChar();
|
||||||
if (UTF16.isLeadSurrogate(lead)) {
|
if (UTF16.isLeadSurrogate(lead)) {
|
||||||
char32 = UCharacterProperty.getRawSupplementary(lead,
|
char32 = UCharacterProperty.getRawSupplementary(lead,
|
||||||
ch);
|
ch);
|
||||||
@ -2753,11 +2812,11 @@ public final class CollationElementIterator
|
|||||||
|
|
||||||
if (!isBackwardsStart()){
|
if (!isBackwardsStart()){
|
||||||
backupInternalState(m_utilSpecialBackUp_);
|
backupInternalState(m_utilSpecialBackUp_);
|
||||||
ch = previousChar();
|
char32 = previousChar();
|
||||||
char32 = ch;
|
ch = (char)ch;
|
||||||
if (UTF16.isTrailSurrogate(ch)){
|
if (UTF16.isTrailSurrogate(ch)){
|
||||||
if (!isBackwardsStart()) {
|
if (!isBackwardsStart()) {
|
||||||
char lead = previousChar();
|
char lead = (char)previousChar();
|
||||||
if (UTF16.isLeadSurrogate(lead)) {
|
if (UTF16.isLeadSurrogate(lead)) {
|
||||||
char32
|
char32
|
||||||
= UCharacterProperty.getRawSupplementary(
|
= UCharacterProperty.getRawSupplementary(
|
||||||
@ -2926,7 +2985,7 @@ public final class CollationElementIterator
|
|||||||
// we are at the start of the string, wrong place to be at
|
// we are at the start of the string, wrong place to be at
|
||||||
return IGNORABLE;
|
return IGNORABLE;
|
||||||
}
|
}
|
||||||
char prevch = previousChar();
|
char prevch = (char)previousChar();
|
||||||
// Handles Han and Supplementary characters here.
|
// Handles Han and Supplementary characters here.
|
||||||
if (UTF16.isLeadSurrogate(prevch)) {
|
if (UTF16.isLeadSurrogate(prevch)) {
|
||||||
return previousImplicit(
|
return previousImplicit(
|
||||||
@ -3099,12 +3158,12 @@ public final class CollationElementIterator
|
|||||||
if (offset != 0) {
|
if (offset != 0) {
|
||||||
int currentoffset = m_source_.getIndex();
|
int currentoffset = m_source_.getIndex();
|
||||||
m_source_.setIndex(currentoffset + offset);
|
m_source_.setIndex(currentoffset + offset);
|
||||||
char result = m_source_.current();
|
char result = (char)m_source_.current();
|
||||||
m_source_.setIndex(currentoffset);
|
m_source_.setIndex(currentoffset);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
return m_source_.current();
|
return (char)m_source_.current();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5,8 +5,8 @@
|
|||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
*
|
*
|
||||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/RuleBasedCollator.java,v $
|
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/RuleBasedCollator.java,v $
|
||||||
* $Date: 2003/09/22 06:24:20 $
|
* $Date: 2003/10/08 21:51:44 $
|
||||||
* $Revision: 1.47 $
|
* $Revision: 1.48 $
|
||||||
*
|
*
|
||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
*/
|
*/
|
||||||
@ -19,7 +19,6 @@ import java.util.Locale;
|
|||||||
import java.util.ResourceBundle;
|
import java.util.ResourceBundle;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.text.CharacterIterator;
|
import java.text.CharacterIterator;
|
||||||
import java.text.StringCharacterIterator;
|
|
||||||
import com.ibm.icu.lang.UCharacter;
|
import com.ibm.icu.lang.UCharacter;
|
||||||
import com.ibm.icu.util.VersionInfo;
|
import com.ibm.icu.util.VersionInfo;
|
||||||
import com.ibm.icu.impl.IntTrie;
|
import com.ibm.icu.impl.IntTrie;
|
||||||
@ -28,6 +27,7 @@ import com.ibm.icu.impl.ICULocaleData;
|
|||||||
import com.ibm.icu.impl.BOCU;
|
import com.ibm.icu.impl.BOCU;
|
||||||
import com.ibm.icu.impl.Utility;
|
import com.ibm.icu.impl.Utility;
|
||||||
import com.ibm.icu.impl.ICUDebug;
|
import com.ibm.icu.impl.ICUDebug;
|
||||||
|
import com.ibm.icu.impl.StringUCharacterIterator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>RuleBasedCollator is a concrete subclass of Collator. It allows
|
* <p>RuleBasedCollator is a concrete subclass of Collator. It allows
|
||||||
@ -256,6 +256,19 @@ public final class RuleBasedCollator extends Collator
|
|||||||
return new CollationElementIterator(newsource, this);
|
return new CollationElementIterator(newsource, this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return a CollationElementIterator for the given UCharacterIterator.
|
||||||
|
* The source iterator's integrity will be preserved since a new copy
|
||||||
|
* will be created for use.
|
||||||
|
* @see CollationElementIterator
|
||||||
|
* @draft ICU 2.8
|
||||||
|
*/
|
||||||
|
public CollationElementIterator getCollationElementIterator(
|
||||||
|
UCharacterIterator source)
|
||||||
|
{
|
||||||
|
return new CollationElementIterator(source, this);
|
||||||
|
}
|
||||||
|
|
||||||
// public setters --------------------------------------------------------
|
// public setters --------------------------------------------------------
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -1733,7 +1746,8 @@ public final class RuleBasedCollator extends Collator
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (ch >= (HEURISTIC_SIZE_ << HEURISTIC_SHIFT_)) {
|
if (ch >= (HEURISTIC_SIZE_ << HEURISTIC_SHIFT_)) {
|
||||||
if (UTF16.isLeadSurrogate(ch) || UTF16.isTrailSurrogate(ch)) {
|
if (UTF16.isLeadSurrogate(ch)
|
||||||
|
|| UTF16.isTrailSurrogate(ch)) {
|
||||||
// Trail surrogate are always considered unsafe.
|
// Trail surrogate are always considered unsafe.
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -1966,9 +1980,9 @@ public final class RuleBasedCollator extends Collator
|
|||||||
/**
|
/**
|
||||||
* Bunch of utility iterators
|
* Bunch of utility iterators
|
||||||
*/
|
*/
|
||||||
private StringCharacterIterator m_srcUtilIter_;
|
private StringUCharacterIterator m_srcUtilIter_;
|
||||||
private CollationElementIterator m_srcUtilColEIter_;
|
private CollationElementIterator m_srcUtilColEIter_;
|
||||||
private StringCharacterIterator m_tgtUtilIter_;
|
private StringUCharacterIterator m_tgtUtilIter_;
|
||||||
private CollationElementIterator m_tgtUtilColEIter_;
|
private CollationElementIterator m_tgtUtilColEIter_;
|
||||||
/**
|
/**
|
||||||
* Utility comparison flags
|
* Utility comparison flags
|
||||||
@ -3787,9 +3801,9 @@ public final class RuleBasedCollator extends Collator
|
|||||||
* Initializes utility iterators and byte buffer used by compare
|
* Initializes utility iterators and byte buffer used by compare
|
||||||
*/
|
*/
|
||||||
private final void initUtility() {
|
private final void initUtility() {
|
||||||
m_srcUtilIter_ = new StringCharacterIterator(new String(""));
|
m_srcUtilIter_ = new StringUCharacterIterator();
|
||||||
m_srcUtilColEIter_ = new CollationElementIterator(m_srcUtilIter_, this);
|
m_srcUtilColEIter_ = new CollationElementIterator(m_srcUtilIter_, this);
|
||||||
m_tgtUtilIter_ = new StringCharacterIterator(new String(""));
|
m_tgtUtilIter_ = new StringUCharacterIterator();
|
||||||
m_tgtUtilColEIter_ = new CollationElementIterator(m_tgtUtilIter_, this);
|
m_tgtUtilColEIter_ = new CollationElementIterator(m_tgtUtilIter_, this);
|
||||||
m_utilBytes0_ = new byte[SORT_BUFFER_INIT_SIZE_CASE_]; // case
|
m_utilBytes0_ = new byte[SORT_BUFFER_INIT_SIZE_CASE_]; // case
|
||||||
m_utilBytes1_ = new byte[SORT_BUFFER_INIT_SIZE_1_]; // primary
|
m_utilBytes1_ = new byte[SORT_BUFFER_INIT_SIZE_1_]; // primary
|
||||||
|
@ -5,8 +5,8 @@
|
|||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
*
|
*
|
||||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/StringSearch.java,v $
|
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/StringSearch.java,v $
|
||||||
* $Date: 2003/07/31 19:51:12 $
|
* $Date: 2003/10/08 21:51:44 $
|
||||||
* $Revision: 1.25 $
|
* $Revision: 1.26 $
|
||||||
*
|
*
|
||||||
*****************************************************************************************
|
*****************************************************************************************
|
||||||
*/
|
*/
|
||||||
@ -17,6 +17,7 @@ import java.text.CharacterIterator;
|
|||||||
import java.text.StringCharacterIterator;
|
import java.text.StringCharacterIterator;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import com.ibm.icu.lang.UCharacter;
|
import com.ibm.icu.lang.UCharacter;
|
||||||
|
import com.ibm.icu.impl.CharacterIteratorWrapper;
|
||||||
import com.ibm.icu.impl.NormalizerImpl;
|
import com.ibm.icu.impl.NormalizerImpl;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -1088,7 +1089,8 @@ public final class StringSearch extends SearchIterator
|
|||||||
|| breakIterator.following(end - 1) == end);
|
|| breakIterator.following(end - 1) == end);
|
||||||
if (result) {
|
if (result) {
|
||||||
// iterates the individual ces
|
// iterates the individual ces
|
||||||
m_utilColEIter_.setText(targetText, start);
|
m_utilColEIter_.setText(
|
||||||
|
new CharacterIteratorWrapper(targetText), start);
|
||||||
for (int count = 0; count < m_pattern_.m_CELength_;
|
for (int count = 0; count < m_pattern_.m_CELength_;
|
||||||
count ++) {
|
count ++) {
|
||||||
int ce = getCE(m_utilColEIter_.next());
|
int ce = getCE(m_utilColEIter_.next());
|
||||||
|
Loading…
Reference in New Issue
Block a user