ICU-2212 Utilized UCharacterIterator in Collation

X-SVN-Rev: 13361
This commit is contained in:
Syn Wee Quek 2003-10-08 21:51:44 +00:00
parent 1fde66b031
commit 1a8abc5b66
6 changed files with 458 additions and 108 deletions

View File

@ -353,7 +353,8 @@ public class CollationAPITest extends TestFmwk {
CharacterIterator chariter=new StringCharacterIterator(testString1); CharacterIterator chariter=new StringCharacterIterator(testString1);
// copy ctor // copy ctor
CollationElementIterator iterator2 = ((RuleBasedCollator)col).getCollationElementIterator(chariter); CollationElementIterator iterator2 = ((RuleBasedCollator)col).getCollationElementIterator(chariter);
CollationElementIterator iterator3 = ((RuleBasedCollator)col).getCollationElementIterator(testString2); UCharacterIterator uchariter=UCharacterIterator.getInstance(testString2);
CollationElementIterator iterator3 = ((RuleBasedCollator)col).getCollationElementIterator(uchariter);
int offset = 0; int offset = 0;
offset = iterator1.getOffset(); offset = iterator1.getOffset();

View File

@ -426,12 +426,23 @@ public class CollationIteratorTest extends TestFmwk {
//now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text //now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text
CharacterIterator chariter = new StringCharacterIterator(test1); CharacterIterator chariter = new StringCharacterIterator(test1);
try { try {
iter2.setText(chariter); iter2.setText(chariter);
} catch (Exception e ) { } catch (Exception e ) {
errln("call to iter2->setText(chariter(test1)) failed."); errln("call to iter2->setText(chariter(test1)) failed.");
return; return;
} }
assertEqual(iter1, iter2); assertEqual(iter1, iter2);
iter1.reset();
//now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text
UCharacterIterator uchariter = UCharacterIterator.getInstance(test1);
try {
iter2.setText(uchariter);
} catch (Exception e ) {
errln("call to iter2->setText(uchariter(test1)) failed.");
return;
}
assertEqual(iter1, iter2);
} }
/** /**

View File

@ -0,0 +1,263 @@
/*
*******************************************************************************
* Copyright (C) 1996-2000, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/StringUCharacterIterator.java,v $
* $Date: 2003/10/08 21:51:43 $
* $Revision: 1.1 $
*
*******************************************************************************
*/
package com.ibm.icu.impl;
import com.ibm.icu.text.UCharacterIterator;
import com.ibm.icu.text.UTF16;
/**
* Used by Collation. UCharacterIterator on Strings. Can't use
* ReplaceableUCharacterIterator because it is not easy to do a fast setText.
* @author synwee
*/
public final class StringUCharacterIterator extends UCharacterIterator
{
// public constructor ------------------------------------------------------
/**
* Public constructor
* @param str text which the iterator will be based on
*/
public StringUCharacterIterator(String str)
{
if (str == null) {
throw new IllegalArgumentException();
}
m_text_ = str;
m_currentIndex_ = 0;
}
/**
* Public default constructor
*/
public StringUCharacterIterator()
{
m_text_ = "";
m_currentIndex_ = 0;
}
// public methods ----------------------------------------------------------
/**
* Creates a copy of this iterator, does not clone the underlying
* <code>String</code>object
* @return copy of this iterator
*/
public Object clone()
{
try {
return super.clone();
} catch (CloneNotSupportedException e) {
return null; // never invoked
}
}
/**
* Returns the current UTF16 character.
* @return current UTF16 character
*/
public int current()
{
if (m_currentIndex_ < m_text_.length()) {
return m_text_.charAt(m_currentIndex_);
}
return DONE;
}
/**
* Returns the current codepoint
* @return current codepoint
*/
public int currentCodePoint()
{
// cannot use charAt due to it different
// behaviour when index is pointing at a
// trail surrogate, check for surrogates
if (m_currentIndex_ >= m_text_.length()) {
return DONE;
}
char ch = m_text_.charAt(m_currentIndex_);
if (UTF16.isLeadSurrogate(ch)) {
// advance the index to get the next code point
m_currentIndex_ ++;
if (m_currentIndex_ < m_text_.length()) {
// due to post increment semantics current() after next()
// actually returns the next char which is what we want
char ch2 = m_text_.charAt(m_currentIndex_);
if (UTF16.isTrailSurrogate(ch2)) {
// we found a surrogate pair
return UCharacterProperty.getRawSupplementary(ch, ch2);
}
}
// current should never change the current index so back off
m_currentIndex_ --;
}
return ch;
}
/**
* Returns the length of the text
* @return length of the text
*/
public int getLength()
{
return m_text_.length();
}
/**
* Gets the current currentIndex in text.
* @return current currentIndex in text.
*/
public int getIndex()
{
return m_currentIndex_;
}
/**
* Returns next UTF16 character and increments the iterator's currentIndex
* by 1.
* If the resulting currentIndex is greater or equal to the text length,
* the currentIndex is reset to the text length and a value of DONE is
* returned.
* @return next UTF16 character in text or DONE if the new currentIndex is
* off the end of the text range.
*/
public int next()
{
if (m_currentIndex_ < m_text_.length())
{
return m_text_.charAt(m_currentIndex_ ++);
}
return DONE;
}
/**
* Returns previous UTF16 character and decrements the iterator's
* currentIndex by 1.
* If the resulting currentIndex is less than 0, the currentIndex is reset
* to 0 and a value of DONE is returned.
* @return next UTF16 character in text or DONE if the new currentIndex is
* off the start of the text range.
*/
public int previous()
{
if (m_currentIndex_ > 0) {
return m_text_.charAt(-- m_currentIndex_);
}
return DONE;
}
/**
* <p>Sets the currentIndex to the specified currentIndex in the text and
* returns that single UTF16 character at currentIndex.
* This assumes the text is stored as 16-bit code units.</p>
* @param currentIndex the currentIndex within the text.
* @exception IllegalArgumentException is thrown if an invalid currentIndex
* is supplied. i.e. currentIndex is out of bounds.
* @return the character at the specified currentIndex or DONE if the
* specified currentIndex is equal to the end of the text.
*/
public void setIndex(int currentIndex) throws IndexOutOfBoundsException
{
if (currentIndex < 0 || currentIndex > m_text_.length()) {
throw new IndexOutOfBoundsException();
}
m_currentIndex_ = currentIndex;
}
/**
* Fills the buffer with the underlying text storage of the iterator
* If the buffer capacity is not enough a exception is thrown. The capacity
* of the fill in buffer should at least be equal to length of text in the
* iterator obtained by calling <code>getLength()</code).
* <b>Usage:</b>
*
* <code>
* <pre>
* UChacterIterator iter = new UCharacterIterator.getInstance(text);
* char[] buf = new char[iter.getLength()];
* iter.getText(buf);
*
* OR
* char[] buf= new char[1];
* int len = 0;
* for(;;){
* try{
* len = iter.getText(buf);
* break;
* }catch(IndexOutOfBoundsException e){
* buf = new char[iter.getLength()];
* }
* }
* </pre>
* </code>
*
* @param fillIn an array of chars to fill with the underlying UTF-16 code
* units.
* @param offset the position within the array to start putting the data.
* @return the number of code units added to fillIn, as a convenience
* @exception IndexOutOfBounds exception if there is not enough
* room after offset in the array, or if offset &lt; 0.
*/
public int getText(char[] fillIn, int offset)
{
int length = m_text_.length();
if (offset < 0 || offset + length > fillIn.length) {
throw new IndexOutOfBoundsException(Integer.toString(length));
}
m_text_.getChars(0, length, fillIn, offset);
return length;
}
/**
* Convenience method for returning the underlying text storage as as
* string
* @return the underlying text storage in the iterator as a string
*/
public String getText()
{
return m_text_;
}
/**
* Reset this iterator to point to a new string. This method is used by
* other classes that want to avoid allocating new
* ReplaceableCharacterIterator objects every time their setText method
* is called.
* @param text The String to be iterated over
*/
public void setText(String text)
{
if (text == null) {
throw new NullPointerException();
}
m_text_ = text;
m_currentIndex_ = 0;
}
// private data members ----------------------------------------------------
/**
* Text string object
*/
private String m_text_;
/**
* Current currentIndex
*/
private int m_currentIndex_;
}

View File

@ -9,12 +9,17 @@
*/ */
package com.ibm.icu.text; package com.ibm.icu.text;
import java.text.StringCharacterIterator; /***
import java.text.CharacterIterator; * import java.text.StringCharacterIterator;
* import java.text.CharacterIterator;
*/
import com.ibm.icu.impl.NormalizerImpl; import com.ibm.icu.impl.NormalizerImpl;
import com.ibm.icu.impl.UCharacterProperty; import com.ibm.icu.impl.UCharacterProperty;
import com.ibm.icu.lang.UCharacter; import com.ibm.icu.impl.StringUCharacterIterator;
import com.ibm.icu.impl.CharacterIteratorWrapper;
import com.ibm.icu.impl.ICUDebug; import com.ibm.icu.impl.ICUDebug;
import com.ibm.icu.lang.UCharacter;
import java.text.CharacterIterator;
/** /**
* <p><code>CollationElementIterator</code> is an iterator created by * <p><code>CollationElementIterator</code> is an iterator created by
@ -218,7 +223,7 @@ public final class CollationElementIterator
*/ */
public void reset() public void reset()
{ {
m_source_.setIndex(m_source_.getBeginIndex()); m_source_.setToStart();
updateInternalState(); updateInternalState();
} }
@ -256,12 +261,12 @@ public final class CollationElementIterator
m_CEBufferOffset_ = 0; m_CEBufferOffset_ = 0;
} }
char ch = nextChar(); int ch_int = nextChar();
/* System.out.println("ch " + Integer.toHexString(ch) + " " +
Integer.toHexString(m_source_.current()));*/ if (ch_int == UCharacterIterator.DONE) {
if (ch == CharacterIterator.DONE) {
return NULLORDER; return NULLORDER;
} }
char ch = (char)ch_int;
if (m_collator_.m_isHiragana4_) { if (m_collator_.m_isHiragana4_) {
m_isCodePointHiragana_ = (ch >= 0x3040 && ch <= 0x309e) m_isCodePointHiragana_ = (ch >= 0x3040 && ch <= 0x309e)
&& !(ch > 0x3094 && ch < 0x309d); && !(ch > 0x3094 && ch < 0x309d);
@ -325,7 +330,7 @@ public final class CollationElementIterator
if (m_source_.getIndex() <= 0 && m_isForwards_) { if (m_source_.getIndex() <= 0 && m_isForwards_) {
// if iterator is new or reset, we can immediate perform backwards // if iterator is new or reset, we can immediate perform backwards
// iteration even when the offset is not right. // iteration even when the offset is not right.
m_source_.setIndex(m_source_.getEndIndex()); m_source_.setToLimit();
updateInternalState(); updateInternalState();
} }
m_isForwards_ = false; m_isForwards_ = false;
@ -337,10 +342,11 @@ public final class CollationElementIterator
m_CEBufferSize_ = 0; m_CEBufferSize_ = 0;
m_CEBufferOffset_ = 0; m_CEBufferOffset_ = 0;
} }
char ch = previousChar(); int ch_int = previousChar();
if (ch == CharacterIterator.DONE) { if (ch_int == UCharacterIterator.DONE) {
return NULLORDER; return NULLORDER;
} }
char ch = (char)ch_int;
if (m_collator_.m_isHiragana4_) { if (m_collator_.m_isHiragana4_) {
m_isCodePointHiragana_ = (ch >= 0x3040 && ch <= 0x309f); m_isCodePointHiragana_ = (ch >= 0x3040 && ch <= 0x309f);
} }
@ -477,13 +483,14 @@ public final class CollationElementIterator
public void setOffset(int offset) public void setOffset(int offset)
{ {
m_source_.setIndex(offset); m_source_.setIndex(offset);
char ch = m_source_.current(); int ch_int = m_source_.current();
if (ch != CharacterIterator.DONE && m_collator_.isUnsafe(ch)) { char ch = (char)ch_int;
if (ch_int != UCharacterIterator.DONE && m_collator_.isUnsafe(ch)) {
// if it is unsafe we need to check if it is part of a contraction // if it is unsafe we need to check if it is part of a contraction
// or a surrogate character // or a surrogate character
if (UTF16.isTrailSurrogate(ch)) { if (UTF16.isTrailSurrogate(ch)) {
// if it is a surrogate pair we move up one character // if it is a surrogate pair we move up one character
char prevch = m_source_.previous(); char prevch = (char)m_source_.previous();
if (!UTF16.isLeadSurrogate(prevch)) { if (!UTF16.isLeadSurrogate(prevch)) {
m_source_.setIndex(offset); // go back to the same index m_source_.setIndex(offset); // go back to the same index
} }
@ -495,7 +502,7 @@ public final class CollationElementIterator
if (!m_collator_.isUnsafe(ch)) { if (!m_collator_.isUnsafe(ch)) {
break; break;
} }
ch = m_source_.previous(); ch = (char)m_source_.previous();
} }
updateInternalState(); updateInternalState();
int prevoffset = 0; int prevoffset = 0;
@ -510,12 +517,12 @@ public final class CollationElementIterator
// direction code to prevent next and previous from returning a // direction code to prevent next and previous from returning a
// character if we are already at the ends // character if we are already at the ends
offset = m_source_.getIndex(); offset = m_source_.getIndex();
if (offset == m_source_.getBeginIndex()) { if (offset == 0/* m_source_.getBeginIndex() */) {
// preventing previous() from returning characters from the end of // preventing previous() from returning characters from the end of
// the string again if we are at the beginning // the string again if we are at the beginning
m_isForwards_ = false; m_isForwards_ = false;
} }
else if (offset == m_source_.getEndIndex()) { else if (offset == m_source_.getLength()) {
// preventing next() from returning characters from the start of // preventing next() from returning characters from the start of
// the string again if we are at the end // the string again if we are at the end
m_isForwards_ = true; m_isForwards_ = true;
@ -536,6 +543,22 @@ public final class CollationElementIterator
updateInternalState(); updateInternalState();
} }
/**
* <p>Set a new source string iterator for iteration, and reset the
* offset to the beginning of the text.
* </p>
* <p>The source iterator's integrity will be preserved since a new copy
* will be created for use.</p>
* @param source the new source string iterator for iteration.
* @draft ICU 2.8
*/
public void setText(UCharacterIterator source)
{
m_srcUtilIter_.setText(source.getText());
m_source_ = m_srcUtilIter_;
updateInternalState();
}
/** /**
* <p>Set a new source string iterator for iteration, and reset the * <p>Set a new source string iterator for iteration, and reset the
* offset to the beginning of the text. * offset to the beginning of the text.
@ -545,8 +568,8 @@ public final class CollationElementIterator
*/ */
public void setText(CharacterIterator source) public void setText(CharacterIterator source)
{ {
m_source_ = source; m_source_ = new CharacterIteratorWrapper(source);
m_source_.setIndex(m_source_.getBeginIndex()); m_source_.setToStart();
updateInternalState(); updateInternalState();
} }
@ -568,10 +591,13 @@ public final class CollationElementIterator
if (that instanceof CollationElementIterator) { if (that instanceof CollationElementIterator) {
CollationElementIterator thatceiter CollationElementIterator thatceiter
= (CollationElementIterator)that; = (CollationElementIterator)that;
if (m_collator_.equals(thatceiter.m_collator_) if (!m_collator_.equals(thatceiter.m_collator_)) {
&& m_source_.equals(thatceiter.m_source_)) { return false;
return true;
} }
// checks the text
return m_source_.getIndex() == thatceiter.m_source_.getIndex()
&& m_source_.getText().equals(
thatceiter.m_source_.getText());
} }
return false; return false;
} }
@ -591,7 +617,7 @@ public final class CollationElementIterator
*/ */
CollationElementIterator(String source, RuleBasedCollator collator) CollationElementIterator(String source, RuleBasedCollator collator)
{ {
m_srcUtilIter_ = new StringCharacterIterator(source); m_srcUtilIter_ = new StringUCharacterIterator(source);
m_utilStringBuffer_ = new StringBuffer(); m_utilStringBuffer_ = new StringBuffer();
m_source_ = m_srcUtilIter_; m_source_ = m_srcUtilIter_;
m_collator_ = collator; m_collator_ = collator;
@ -615,9 +641,34 @@ public final class CollationElementIterator
CollationElementIterator(CharacterIterator source, CollationElementIterator(CharacterIterator source,
RuleBasedCollator collator) RuleBasedCollator collator)
{ {
m_srcUtilIter_ = new StringCharacterIterator(""); m_srcUtilIter_ = new StringUCharacterIterator();
m_utilStringBuffer_ = new StringBuffer(); m_utilStringBuffer_ = new StringBuffer();
m_source_ = source; m_source_ = new CharacterIteratorWrapper(source);
m_collator_ = collator;
m_CEBuffer_ = new int[CE_BUFFER_INIT_SIZE_];
m_buffer_ = new StringBuffer();
m_utilSpecialBackUp_ = new Backup();
updateInternalState();
}
/**
* <p>CollationElementIterator constructor. This takes a source
* character iterator and a RuleBasedCollator. The iterator will
* walk through the source string based on the rules defined by
* the collator. If the source string is empty, NULLORDER will be
* returned on the first call to next().</p>
*
* @param source the source string iterator.
* @param collator the RuleBasedCollator
* @draft ICU 2.2
*/
CollationElementIterator(UCharacterIterator source,
RuleBasedCollator collator)
{
m_srcUtilIter_ = new StringUCharacterIterator();
m_utilStringBuffer_ = new StringBuffer();
m_srcUtilIter_.setText(source.getText());
m_source_ = m_srcUtilIter_;
m_collator_ = collator; m_collator_ = collator;
m_CEBuffer_ = new int[CE_BUFFER_INIT_SIZE_]; m_CEBuffer_ = new int[CE_BUFFER_INIT_SIZE_];
m_buffer_ = new StringBuffer(); m_buffer_ = new StringBuffer();
@ -717,7 +768,7 @@ public final class CollationElementIterator
* @param ch character to test * @param ch character to test
* @return true if ch is a Thai prevowel, false otherwise * @return true if ch is a Thai prevowel, false otherwise
*/ */
static final boolean isThaiPreVowel(char ch) static final boolean isThaiPreVowel(int ch)
{ {
return (ch >= 0xe40 && ch <= 0xe44) || (ch >= 0xec0 && ch <= 0xec4); return (ch >= 0xe40 && ch <= 0xe44) || (ch >= 0xec0 && ch <= 0xec4);
} }
@ -736,9 +787,10 @@ public final class CollationElementIterator
* @param source the new source string iterator for iteration. * @param source the new source string iterator for iteration.
* @param offset to the source * @param offset to the source
*/ */
void setText(CharacterIterator source, int offset) void setText(UCharacterIterator source, int offset)
{ {
m_source_ = source; m_srcUtilIter_.setText(source.getText());
m_source_ = m_srcUtilIter_;
m_source_.setIndex(offset); m_source_.setIndex(offset);
updateInternalState(); updateInternalState();
} }
@ -796,7 +848,7 @@ public final class CollationElementIterator
/** /**
* Source string iterator * Source string iterator
*/ */
private CharacterIterator m_source_; private UCharacterIterator m_source_;
/** /**
* This is position to the m_buffer_, -1 if iterator is not in m_buffer_ * This is position to the m_buffer_, -1 if iterator is not in m_buffer_
*/ */
@ -846,7 +898,7 @@ public final class CollationElementIterator
/** /**
* Utility * Utility
*/ */
private StringCharacterIterator m_srcUtilIter_; private StringUCharacterIterator m_srcUtilIter_;
private StringBuffer m_utilStringBuffer_; private StringBuffer m_utilStringBuffer_;
private StringBuffer m_utilSkippedBuffer_; private StringBuffer m_utilSkippedBuffer_;
private CollationElementIterator m_utilColEIter_; private CollationElementIterator m_utilColEIter_;
@ -950,7 +1002,7 @@ public final class CollationElementIterator
m_CEBufferOffset_ = 0; m_CEBufferOffset_ = 0;
m_CEBufferSize_ = 0; m_CEBufferSize_ = 0;
m_FCDLimit_ = -1; m_FCDLimit_ = -1;
m_FCDStart_ = m_source_.getEndIndex(); m_FCDStart_ = m_source_.getLength();
m_isHiragana4_ = m_collator_.m_isHiragana4_; m_isHiragana4_ = m_collator_.m_isHiragana4_;
m_isForwards_ = true; m_isForwards_ = true;
} }
@ -1022,8 +1074,7 @@ public final class CollationElementIterator
m_buffer_.setLength(0); m_buffer_.setLength(0);
m_source_.setIndex(m_FCDStart_); m_source_.setIndex(m_FCDStart_);
for (int i = 0; i < size; i ++) { for (int i = 0; i < size; i ++) {
m_buffer_.append(m_source_.current()); m_buffer_.append((char)m_source_.next());
m_source_.next();
} }
String decomp = Normalizer.decompose(m_buffer_.toString(), false); String decomp = Normalizer.decompose(m_buffer_.toString(), false);
m_buffer_.setLength(0); m_buffer_.setLength(0);
@ -1059,7 +1110,9 @@ public final class CollationElementIterator
// trie access // trie access
char fcd = NormalizerImpl.getFCD16(ch); char fcd = NormalizerImpl.getFCD16(ch);
if (fcd != 0 && UTF16.isLeadSurrogate(ch)) { if (fcd != 0 && UTF16.isLeadSurrogate(ch)) {
ch = m_source_.next(); // CharacterIterator.DONE has 0 fcd m_source_.next();
ch = (char)m_source_.current();
// UCharacterIterator.DONE has 0 fcd
if (UTF16.isTrailSurrogate(ch)) { if (UTF16.isTrailSurrogate(ch)) {
fcd = NormalizerImpl.getFCD16FromSurrogatePair(fcd, ch); fcd = NormalizerImpl.getFCD16FromSurrogatePair(fcd, ch);
} else { } else {
@ -1073,14 +1126,17 @@ public final class CollationElementIterator
// The current char has a non-zero trailing CC. Scan forward until // The current char has a non-zero trailing CC. Scan forward until
// we find a char with a leading cc of zero. // we find a char with a leading cc of zero.
while (true) { while (true) {
ch = m_source_.next(); m_source_.next();
if (ch == CharacterIterator.DONE) { int ch_int = m_source_.current();
if (ch_int == UCharacterIterator.DONE) {
break; break;
} }
ch = (char)ch_int;
// trie access // trie access
fcd = NormalizerImpl.getFCD16(ch); fcd = NormalizerImpl.getFCD16(ch);
if (fcd != 0 && UTF16.isLeadSurrogate(ch)) { if (fcd != 0 && UTF16.isLeadSurrogate(ch)) {
ch = m_source_.next(); m_source_.next();
ch = (char)m_source_.current();
if (UTF16.isTrailSurrogate(ch)) { if (UTF16.isTrailSurrogate(ch)) {
fcd = NormalizerImpl.getFCD16FromSurrogatePair(fcd, ch); fcd = NormalizerImpl.getFCD16FromSurrogatePair(fcd, ch);
} else { } else {
@ -1112,9 +1168,9 @@ public final class CollationElementIterator
* <p>Offsets are returned at the next character.</p> * <p>Offsets are returned at the next character.</p>
* @return next fcd character * @return next fcd character
*/ */
private char nextChar() private int nextChar()
{ {
char result; int result;
// loop handles the next character whether it is in the buffer or not. // loop handles the next character whether it is in the buffer or not.
if (m_bufferOffset_ < 0) { if (m_bufferOffset_ < 0) {
@ -1147,8 +1203,9 @@ public final class CollationElementIterator
if (result < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_) { if (result < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_) {
// We need to peek at the next character in order to tell if we are // We need to peek at the next character in order to tell if we are
// FCD // FCD
char next = m_source_.next(); m_source_.next();
if (next == CharacterIterator.DONE int next = m_source_.current();
if (next == UCharacterIterator.DONE
|| next <= LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_) { || next <= LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_) {
return result; // end of source string and if next character return result; // end of source string and if next character
// starts with a base character is always fcd. // starts with a base character is always fcd.
@ -1156,7 +1213,7 @@ public final class CollationElementIterator
} }
// Need a more complete FCD check and possible normalization. // Need a more complete FCD check and possible normalization.
if (!FCDCheck(result, startoffset)) { if (!FCDCheck((char)result, startoffset)) {
normalize(); normalize();
result = m_buffer_.charAt(0); result = m_buffer_.charAt(0);
m_bufferOffset_ = 1; m_bufferOffset_ = 1;
@ -1206,7 +1263,7 @@ public final class CollationElementIterator
else if (UTF16.isTrailSurrogate(ch) && m_FCDLimit_ > 0) { else if (UTF16.isTrailSurrogate(ch) && m_FCDLimit_ > 0) {
// note trail surrogate characters gets 0 fcd // note trail surrogate characters gets 0 fcd
char trailch = ch; char trailch = ch;
ch = m_source_.previous(); ch = (char)m_source_.previous();
if (UTF16.isLeadSurrogate(ch)) { if (UTF16.isLeadSurrogate(ch)) {
fcd = NormalizerImpl.getFCD16(ch); fcd = NormalizerImpl.getFCD16(ch);
if (fcd != 0) { if (fcd != 0) {
@ -1228,13 +1285,13 @@ public final class CollationElementIterator
if (offset == 0) { if (offset == 0) {
break; break;
} }
ch = m_source_.previous(); ch = (char)m_source_.previous();
if (!UTF16.isSurrogate(ch)) { if (!UTF16.isSurrogate(ch)) {
fcd = NormalizerImpl.getFCD16(ch); fcd = NormalizerImpl.getFCD16(ch);
} }
else if (UTF16.isTrailSurrogate(ch) && m_source_.getIndex() > 0) { else if (UTF16.isTrailSurrogate(ch) && m_source_.getIndex() > 0) {
char trail = ch; char trail = ch;
ch = m_source_.previous(); ch = (char)m_source_.previous();
if (UTF16.isLeadSurrogate(ch)) { if (UTF16.isLeadSurrogate(ch)) {
fcd = NormalizerImpl.getFCD16(ch); fcd = NormalizerImpl.getFCD16(ch);
} }
@ -1270,7 +1327,7 @@ public final class CollationElementIterator
* <p>Offsets are returned at the current character.</p> * <p>Offsets are returned at the current character.</p>
* @return previous fcd character * @return previous fcd character
*/ */
private char previousChar() private int previousChar()
{ {
if (m_bufferOffset_ >= 0) { if (m_bufferOffset_ >= 0) {
m_bufferOffset_ --; m_bufferOffset_ --;
@ -1280,10 +1337,10 @@ public final class CollationElementIterator
else { else {
// At the start of buffer, route back to string. // At the start of buffer, route back to string.
m_buffer_.setLength(0); m_buffer_.setLength(0);
if (m_FCDStart_ == m_source_.getBeginIndex()) { if (m_FCDStart_ == 0) {
m_FCDStart_ = -1; m_FCDStart_ = -1;
m_source_.setIndex(m_source_.getBeginIndex()); m_source_.setIndex(0);
return CharacterIterator.DONE; return UCharacterIterator.DONE;
} }
else { else {
m_FCDLimit_ = m_FCDStart_; m_FCDLimit_ = m_FCDStart_;
@ -1292,21 +1349,21 @@ public final class CollationElementIterator
} }
} }
} }
char result = m_source_.previous(); int result = m_source_.previous();
int startoffset = m_source_.getIndex(); int startoffset = m_source_.getIndex();
if (result < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_ if (result < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_
|| m_collator_.getDecomposition() == Collator.NO_DECOMPOSITION || m_collator_.getDecomposition() == Collator.NO_DECOMPOSITION
|| m_FCDStart_ <= startoffset || m_source_.getIndex() == 0) { || m_FCDStart_ <= startoffset || m_source_.getIndex() == 0) {
return result; return result;
} }
char ch = m_source_.previous(); int ch = m_source_.previous();
if (ch < FULL_ZERO_COMBINING_CLASS_FAST_LIMIT_) { if (ch < FULL_ZERO_COMBINING_CLASS_FAST_LIMIT_) {
// if previous character is FCD // if previous character is FCD
m_source_.next(); m_source_.next();
return result; return result;
} }
// Need a more complete FCD check and possible normalization. // Need a more complete FCD check and possible normalization.
if (!FCDCheckBackwards(result, startoffset)) { if (!FCDCheckBackwards((char)result, startoffset)) {
normalizeBackwards(); normalizeBackwards();
m_bufferOffset_ --; m_bufferOffset_ --;
result = m_buffer_.charAt(m_bufferOffset_); result = m_buffer_.charAt(m_bufferOffset_);
@ -1340,10 +1397,10 @@ public final class CollationElementIterator
} }
else { else {
// at end of buffer. check if fcd is at the end // at end of buffer. check if fcd is at the end
return m_FCDLimit_ == m_source_.getEndIndex(); return m_FCDLimit_ == m_source_.getLength();
} }
} }
return m_source_.getEndIndex() == m_source_.getIndex(); return m_source_.getLength() == m_source_.getIndex();
} }
/** /**
@ -1408,12 +1465,12 @@ public final class CollationElementIterator
// Note: this operation might activate the normalization buffer. We have to check for // Note: this operation might activate the normalization buffer. We have to check for
// that and act accordingly. // that and act accordingly.
m_FCDStart_ = m_source_.getIndex() - 1; m_FCDStart_ = m_source_.getIndex() - 1;
char thCh = nextChar(); char thCh = (char)nextChar();
int cp = thCh; int cp = thCh;
if (UTF16.isLeadSurrogate(thCh)) { if (UTF16.isLeadSurrogate(thCh)) {
if (!isEnd()) { if (!isEnd()) {
backupInternalState(m_utilSpecialBackUp_); backupInternalState(m_utilSpecialBackUp_);
char trailCh = nextChar(); char trailCh = (char)nextChar();
if (UTF16.isTrailSurrogate(trailCh)) { if (UTF16.isTrailSurrogate(trailCh)) {
cp = UCharacterProperty.getRawSupplementary( cp = UCharacterProperty.getRawSupplementary(
thCh, trailCh); thCh, trailCh);
@ -1582,7 +1639,7 @@ public final class CollationElementIterator
ce = collator.m_contractionCE_[offset]; ce = collator.m_contractionCE_[offset];
break; break;
} }
char previous = previousChar(); char previous = (char)previousChar();
while (previous > collator.m_contractionIndex_[offset]) { while (previous > collator.m_contractionIndex_[offset]) {
// contraction characters are ordered, skip smaller characters // contraction characters are ordered, skip smaller characters
offset ++; offset ++;
@ -1613,7 +1670,7 @@ public final class CollationElementIterator
// 3. schar is a trail surrogate in a valid surrogate // 3. schar is a trail surrogate in a valid surrogate
// sequence that is explicitly set to zero. // sequence that is explicitly set to zero.
if (!isBackwardsStart()) { if (!isBackwardsStart()) {
char lead = previousChar(); char lead = (char)previousChar();
if (UTF16.isLeadSurrogate(lead)) { if (UTF16.isLeadSurrogate(lead)) {
isZeroCE = collator.m_trie_.getLeadValue(lead); isZeroCE = collator.m_trie_.getLeadValue(lead);
if (RuleBasedCollator.getTag(isZeroCE) if (RuleBasedCollator.getTag(isZeroCE)
@ -1706,12 +1763,11 @@ public final class CollationElementIterator
* Returns the current character for forward iteration * Returns the current character for forward iteration
* @return current character * @return current character
*/ */
private char currentChar() private int currentChar()
{ {
if (m_bufferOffset_ < 0) { if (m_bufferOffset_ < 0) {
char result = m_source_.previous(); m_source_.previous();
m_source_.next(); return m_source_.next();
return result;
} }
// m_bufferOffset_ is never 0 in normal circumstances except after a // m_bufferOffset_ is never 0 in normal circumstances except after a
@ -1740,8 +1796,8 @@ public final class CollationElementIterator
else { else {
m_utilSkippedBuffer_.setLength(0); m_utilSkippedBuffer_.setLength(0);
} }
char ch = currentChar(); char ch = (char)currentChar();
m_utilSkippedBuffer_.append(currentChar()); m_utilSkippedBuffer_.append((char)currentChar());
// accent after the first character // accent after the first character
if (m_utilSpecialDiscontiguousBackUp_ == null) { if (m_utilSpecialDiscontiguousBackUp_ == null) {
m_utilSpecialDiscontiguousBackUp_ = new Backup(); m_utilSpecialDiscontiguousBackUp_ = new Backup();
@ -1750,14 +1806,15 @@ public final class CollationElementIterator
char nextch = ch; char nextch = ch;
while (true) { while (true) {
ch = nextch; ch = nextch;
nextch = nextChar(); int ch_int = nextChar();
if (nextch == CharacterIterator.DONE nextch = (char)ch_int;
if (ch_int == UCharacterIterator.DONE
|| getCombiningClass(nextch) == 0) { || getCombiningClass(nextch) == 0) {
// if there are no more accents to move around // if there are no more accents to move around
// we don't have to shift previousChar, since we are resetting // we don't have to shift previousChar, since we are resetting
// the offset later // the offset later
if (multicontraction) { if (multicontraction) {
if (nextch != CharacterIterator.DONE) { if (ch_int != UCharacterIterator.DONE) {
previousChar(); // backtrack previousChar(); // backtrack
} }
setDiscontiguous(m_utilSkippedBuffer_); setDiscontiguous(m_utilSkippedBuffer_);
@ -1836,7 +1893,7 @@ public final class CollationElementIterator
byte maxCC = (byte)(collator.m_contractionIndex_[offset] & 0xFF); byte maxCC = (byte)(collator.m_contractionIndex_[offset] & 0xFF);
// checks if all characters have the same combining class // checks if all characters have the same combining class
byte allSame = (byte)(collator.m_contractionIndex_[offset] >> 8); byte allSame = (byte)(collator.m_contractionIndex_[offset] >> 8);
char ch = nextChar(); char ch = (char)nextChar();
offset ++; offset ++;
while (ch > collator.m_contractionIndex_[offset]) { while (ch > collator.m_contractionIndex_[offset]) {
// contraction characters are ordered, skip all smaller // contraction characters are ordered, skip all smaller
@ -1859,7 +1916,7 @@ public final class CollationElementIterator
else if (UTF16.isLeadSurrogate(ch)) { else if (UTF16.isLeadSurrogate(ch)) {
if (!isEnd()) { if (!isEnd()) {
backupInternalState(m_utilSpecialBackUp_); backupInternalState(m_utilSpecialBackUp_);
char trail = nextChar(); char trail = (char)nextChar();
if (UTF16.isTrailSurrogate(trail)) { if (UTF16.isTrailSurrogate(trail)) {
// do stuff with trail // do stuff with trail
if (RuleBasedCollator.getTag(isZeroCE) if (RuleBasedCollator.getTag(isZeroCE)
@ -1901,10 +1958,11 @@ public final class CollationElementIterator
else { else {
// Contraction is possibly discontiguous. // Contraction is possibly discontiguous.
// find the next character if ch is not a base character // find the next character if ch is not a base character
char nextch = nextChar(); int ch_int = nextChar();
if (nextch != CharacterIterator.DONE) { if (ch_int != UCharacterIterator.DONE) {
previousChar(); previousChar();
} }
char nextch = (char)ch_int;
if (getCombiningClass(nextch) == 0) { if (getCombiningClass(nextch) == 0) {
previousChar(); previousChar();
// base character not part of discontiguous contraction // base character not part of discontiguous contraction
@ -2098,11 +2156,11 @@ public final class CollationElementIterator
// Get next character. // Get next character.
if (!isEnd()){ if (!isEnd()){
backupInternalState(m_utilSpecialBackUp_); backupInternalState(m_utilSpecialBackUp_);
char ch = nextChar(); int char32 = nextChar();
int char32 = ch; char ch = (char)char32;
if (UTF16.isLeadSurrogate(ch)){ if (UTF16.isLeadSurrogate(ch)){
if (!isEnd()) { if (!isEnd()) {
char trail = nextChar(); char trail = (char)nextChar();
if (UTF16.isTrailSurrogate(trail)) { if (UTF16.isTrailSurrogate(trail)) {
char32 = UCharacterProperty.getRawSupplementary( char32 = UCharacterProperty.getRawSupplementary(
ch, trail); ch, trail);
@ -2227,8 +2285,9 @@ public final class CollationElementIterator
*/ */
private int nextSurrogate(char ch) private int nextSurrogate(char ch)
{ {
char nextch = nextChar(); int ch_int = nextChar();
if (nextch != CharacterIterator.DONE && char nextch = (char)ch_int;
if (ch_int != CharacterIterator.DONE &&
UTF16.isTrailSurrogate(nextch)) { UTF16.isTrailSurrogate(nextch)) {
int codepoint = UCharacterProperty.getRawSupplementary(ch, nextch); int codepoint = UCharacterProperty.getRawSupplementary(ch, nextch);
return nextImplicit(codepoint); return nextImplicit(codepoint);
@ -2330,7 +2389,7 @@ public final class CollationElementIterator
return IGNORABLE; return IGNORABLE;
} }
backupInternalState(m_utilSpecialBackUp_); backupInternalState(m_utilSpecialBackUp_);
char trail = nextChar(); char trail = (char)nextChar();
ce = nextSurrogate(collator, ce, trail); ce = nextSurrogate(collator, ce, trail);
// calculate the supplementary code point value, // calculate the supplementary code point value,
// if surrogate was not tailored we go one more round // if surrogate was not tailored we go one more round
@ -2403,10 +2462,10 @@ public final class CollationElementIterator
// check that ch is from the normalization buffer or not // check that ch is from the normalization buffer or not
boolean innorm = m_bufferOffset_ >= 0; boolean innorm = m_bufferOffset_ >= 0;
char prevch = previousChar(); int prevch = previousChar();
if (!isThaiPreVowel(prevch)) { if (!isThaiPreVowel(prevch)) {
// we now rearrange unconditionally do not check for base consonant // we now rearrange unconditionally do not check for base consonant
if (prevch != CharacterIterator.DONE) { if (prevch != UCharacterIterator.DONE) {
nextChar(); nextChar();
} }
// Treat Thai as a length one expansion // Treat Thai as a length one expansion
@ -2445,10 +2504,10 @@ public final class CollationElementIterator
m_FCDLimit_ = m_FCDStart_ + 2; m_FCDLimit_ = m_FCDStart_ + 2;
} }
if (reorder) { if (reorder) {
m_buffer_.insert(1, prevch); m_buffer_.insert(1, (char)prevch);
} }
else { else {
m_buffer_.insert(0, prevch); m_buffer_.insert(0, (char)prevch);
} }
return IGNORABLE; return IGNORABLE;
} }
@ -2475,7 +2534,7 @@ public final class CollationElementIterator
ce = collator.m_contractionCE_[offset]; ce = collator.m_contractionCE_[offset];
break; break;
} }
char prevch = previousChar(); char prevch = (char)previousChar();
while (prevch > collator.m_contractionIndex_[offset]) { while (prevch > collator.m_contractionIndex_[offset]) {
// since contraction codepoints are ordered, we skip all that // since contraction codepoints are ordered, we skip all that
// are smaller // are smaller
@ -2505,7 +2564,7 @@ public final class CollationElementIterator
// 3. schar is a trail surrogate in a valid surrogate // 3. schar is a trail surrogate in a valid surrogate
// sequence that is explicitly set to zero. // sequence that is explicitly set to zero.
if (!isBackwardsStart()) { if (!isBackwardsStart()) {
char lead = previousChar(); char lead = (char)previousChar();
if (UTF16.isLeadSurrogate(lead)) { if (UTF16.isLeadSurrogate(lead)) {
isZeroCE = collator.m_trie_.getLeadValue(lead); isZeroCE = collator.m_trie_.getLeadValue(lead);
if (RuleBasedCollator.getTag(isZeroCE) if (RuleBasedCollator.getTag(isZeroCE)
@ -2563,7 +2622,7 @@ public final class CollationElementIterator
m_utilStringBuffer_.setLength(0); m_utilStringBuffer_.setLength(0);
// since we might encounter normalized characters (from the thai // since we might encounter normalized characters (from the thai
// processing) we can't use peekCharacter() here. // processing) we can't use peekCharacter() here.
char prevch = previousChar(); char prevch = (char)previousChar();
boolean atStart = false; boolean atStart = false;
while (collator.isUnsafe(ch) || isThaiPreVowel(prevch)) { while (collator.isUnsafe(ch) || isThaiPreVowel(prevch)) {
m_utilStringBuffer_.insert(0, ch); m_utilStringBuffer_.insert(0, ch);
@ -2572,7 +2631,7 @@ public final class CollationElementIterator
atStart = true; atStart = true;
break; break;
} }
prevch = previousChar(); prevch = (char)previousChar();
} }
if (!atStart) { if (!atStart) {
// undo the previousChar() if we didn't reach the beginning // undo the previousChar() if we didn't reach the beginning
@ -2692,7 +2751,7 @@ public final class CollationElementIterator
int char32 = ch; int char32 = ch;
if (UTF16.isTrailSurrogate(ch)) { if (UTF16.isTrailSurrogate(ch)) {
if (!isBackwardsStart()){ if (!isBackwardsStart()){
char lead = previousChar(); char lead = (char)previousChar();
if (UTF16.isLeadSurrogate(lead)) { if (UTF16.isLeadSurrogate(lead)) {
char32 = UCharacterProperty.getRawSupplementary(lead, char32 = UCharacterProperty.getRawSupplementary(lead,
ch); ch);
@ -2753,11 +2812,11 @@ public final class CollationElementIterator
if (!isBackwardsStart()){ if (!isBackwardsStart()){
backupInternalState(m_utilSpecialBackUp_); backupInternalState(m_utilSpecialBackUp_);
ch = previousChar(); char32 = previousChar();
char32 = ch; ch = (char)ch;
if (UTF16.isTrailSurrogate(ch)){ if (UTF16.isTrailSurrogate(ch)){
if (!isBackwardsStart()) { if (!isBackwardsStart()) {
char lead = previousChar(); char lead = (char)previousChar();
if (UTF16.isLeadSurrogate(lead)) { if (UTF16.isLeadSurrogate(lead)) {
char32 char32
= UCharacterProperty.getRawSupplementary( = UCharacterProperty.getRawSupplementary(
@ -2926,7 +2985,7 @@ public final class CollationElementIterator
// we are at the start of the string, wrong place to be at // we are at the start of the string, wrong place to be at
return IGNORABLE; return IGNORABLE;
} }
char prevch = previousChar(); char prevch = (char)previousChar();
// Handles Han and Supplementary characters here. // Handles Han and Supplementary characters here.
if (UTF16.isLeadSurrogate(prevch)) { if (UTF16.isLeadSurrogate(prevch)) {
return previousImplicit( return previousImplicit(
@ -3099,12 +3158,12 @@ public final class CollationElementIterator
if (offset != 0) { if (offset != 0) {
int currentoffset = m_source_.getIndex(); int currentoffset = m_source_.getIndex();
m_source_.setIndex(currentoffset + offset); m_source_.setIndex(currentoffset + offset);
char result = m_source_.current(); char result = (char)m_source_.current();
m_source_.setIndex(currentoffset); m_source_.setIndex(currentoffset);
return result; return result;
} }
else { else {
return m_source_.current(); return (char)m_source_.current();
} }
} }

View File

@ -5,8 +5,8 @@
******************************************************************************* *******************************************************************************
* *
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/RuleBasedCollator.java,v $ * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/RuleBasedCollator.java,v $
* $Date: 2003/09/22 06:24:20 $ * $Date: 2003/10/08 21:51:44 $
* $Revision: 1.47 $ * $Revision: 1.48 $
* *
******************************************************************************* *******************************************************************************
*/ */
@ -19,7 +19,6 @@ import java.util.Locale;
import java.util.ResourceBundle; import java.util.ResourceBundle;
import java.util.Arrays; import java.util.Arrays;
import java.text.CharacterIterator; import java.text.CharacterIterator;
import java.text.StringCharacterIterator;
import com.ibm.icu.lang.UCharacter; import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.util.VersionInfo; import com.ibm.icu.util.VersionInfo;
import com.ibm.icu.impl.IntTrie; import com.ibm.icu.impl.IntTrie;
@ -28,6 +27,7 @@ import com.ibm.icu.impl.ICULocaleData;
import com.ibm.icu.impl.BOCU; import com.ibm.icu.impl.BOCU;
import com.ibm.icu.impl.Utility; import com.ibm.icu.impl.Utility;
import com.ibm.icu.impl.ICUDebug; import com.ibm.icu.impl.ICUDebug;
import com.ibm.icu.impl.StringUCharacterIterator;
/** /**
* <p>RuleBasedCollator is a concrete subclass of Collator. It allows * <p>RuleBasedCollator is a concrete subclass of Collator. It allows
@ -256,6 +256,19 @@ public final class RuleBasedCollator extends Collator
return new CollationElementIterator(newsource, this); return new CollationElementIterator(newsource, this);
} }
/**
* Return a CollationElementIterator for the given UCharacterIterator.
* The source iterator's integrity will be preserved since a new copy
* will be created for use.
* @see CollationElementIterator
* @draft ICU 2.8
*/
public CollationElementIterator getCollationElementIterator(
UCharacterIterator source)
{
return new CollationElementIterator(source, this);
}
// public setters -------------------------------------------------------- // public setters --------------------------------------------------------
/** /**
@ -1733,7 +1746,8 @@ public final class RuleBasedCollator extends Collator
} }
if (ch >= (HEURISTIC_SIZE_ << HEURISTIC_SHIFT_)) { if (ch >= (HEURISTIC_SIZE_ << HEURISTIC_SHIFT_)) {
if (UTF16.isLeadSurrogate(ch) || UTF16.isTrailSurrogate(ch)) { if (UTF16.isLeadSurrogate(ch)
|| UTF16.isTrailSurrogate(ch)) {
// Trail surrogate are always considered unsafe. // Trail surrogate are always considered unsafe.
return true; return true;
} }
@ -1966,9 +1980,9 @@ public final class RuleBasedCollator extends Collator
/** /**
* Bunch of utility iterators * Bunch of utility iterators
*/ */
private StringCharacterIterator m_srcUtilIter_; private StringUCharacterIterator m_srcUtilIter_;
private CollationElementIterator m_srcUtilColEIter_; private CollationElementIterator m_srcUtilColEIter_;
private StringCharacterIterator m_tgtUtilIter_; private StringUCharacterIterator m_tgtUtilIter_;
private CollationElementIterator m_tgtUtilColEIter_; private CollationElementIterator m_tgtUtilColEIter_;
/** /**
* Utility comparison flags * Utility comparison flags
@ -3787,9 +3801,9 @@ public final class RuleBasedCollator extends Collator
* Initializes utility iterators and byte buffer used by compare * Initializes utility iterators and byte buffer used by compare
*/ */
private final void initUtility() { private final void initUtility() {
m_srcUtilIter_ = new StringCharacterIterator(new String("")); m_srcUtilIter_ = new StringUCharacterIterator();
m_srcUtilColEIter_ = new CollationElementIterator(m_srcUtilIter_, this); m_srcUtilColEIter_ = new CollationElementIterator(m_srcUtilIter_, this);
m_tgtUtilIter_ = new StringCharacterIterator(new String("")); m_tgtUtilIter_ = new StringUCharacterIterator();
m_tgtUtilColEIter_ = new CollationElementIterator(m_tgtUtilIter_, this); m_tgtUtilColEIter_ = new CollationElementIterator(m_tgtUtilIter_, this);
m_utilBytes0_ = new byte[SORT_BUFFER_INIT_SIZE_CASE_]; // case m_utilBytes0_ = new byte[SORT_BUFFER_INIT_SIZE_CASE_]; // case
m_utilBytes1_ = new byte[SORT_BUFFER_INIT_SIZE_1_]; // primary m_utilBytes1_ = new byte[SORT_BUFFER_INIT_SIZE_1_]; // primary

View File

@ -5,8 +5,8 @@
******************************************************************************* *******************************************************************************
* *
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/StringSearch.java,v $ * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/StringSearch.java,v $
* $Date: 2003/07/31 19:51:12 $ * $Date: 2003/10/08 21:51:44 $
* $Revision: 1.25 $ * $Revision: 1.26 $
* *
***************************************************************************************** *****************************************************************************************
*/ */
@ -17,6 +17,7 @@ import java.text.CharacterIterator;
import java.text.StringCharacterIterator; import java.text.StringCharacterIterator;
import java.util.Locale; import java.util.Locale;
import com.ibm.icu.lang.UCharacter; import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.impl.CharacterIteratorWrapper;
import com.ibm.icu.impl.NormalizerImpl; import com.ibm.icu.impl.NormalizerImpl;
/** /**
@ -1088,7 +1089,8 @@ public final class StringSearch extends SearchIterator
|| breakIterator.following(end - 1) == end); || breakIterator.following(end - 1) == end);
if (result) { if (result) {
// iterates the individual ces // iterates the individual ces
m_utilColEIter_.setText(targetText, start); m_utilColEIter_.setText(
new CharacterIteratorWrapper(targetText), start);
for (int count = 0; count < m_pattern_.m_CELength_; for (int count = 0; count < m_pattern_.m_CELength_;
count ++) { count ++) {
int ce = getCE(m_utilColEIter_.next()); int ce = getCE(m_utilColEIter_.next());