ICU-1897
bugs solved * japanese case level * more stringent checks on ce buffer size. X-SVN-Rev: 8665
This commit is contained in:
parent
6b4f81d44d
commit
076095ae7e
@ -800,12 +800,14 @@ public final class CollationElementIterator
|
||||
* <li>The leading combining class from the current character is 0 or the
|
||||
* trailing combining class of the previous char was zero.
|
||||
* </ul>
|
||||
* Incoming source offsets points to the next processing character.
|
||||
* Incoming source offsets points to the current processing character.
|
||||
* Return source offsets points to the current processing character.
|
||||
* </p>
|
||||
* @param ch current character
|
||||
* @param offset current character offset
|
||||
* @return true if FCDCheck passes, false otherwise
|
||||
*/
|
||||
private boolean FCDCheck()
|
||||
private boolean FCDCheck(char ch, int offset)
|
||||
{
|
||||
boolean result = true;
|
||||
|
||||
@ -813,8 +815,8 @@ public final class CollationElementIterator
|
||||
|
||||
// Get the trailing combining class of the current character.
|
||||
// If it's zero, we are OK.
|
||||
char ch = m_source_.previous();
|
||||
m_FCDStart_ = m_source_.getIndex();
|
||||
m_FCDStart_ = offset;
|
||||
m_source_.setIndex(offset);
|
||||
// trie access
|
||||
char fcd = 0; // synwee todo: unorm_getFCD16(ch);
|
||||
if (fcd != 0 && UTF16.isLeadSurrogate(ch)) {
|
||||
@ -860,6 +862,7 @@ public final class CollationElementIterator
|
||||
}
|
||||
}
|
||||
m_source_.setIndex(m_FCDStart_);
|
||||
m_source_.next();
|
||||
m_FCDLimit_ = m_source_.getIndex();
|
||||
return result;
|
||||
}
|
||||
@ -873,7 +876,8 @@ public final class CollationElementIterator
|
||||
private char nextChar()
|
||||
{
|
||||
char result;
|
||||
// loop handles the next character whether it is in the buffer or not.
|
||||
int startoffset = m_source_.getIndex();
|
||||
// loop handles the next character whether it is in the buffer or not.
|
||||
if (m_bufferOffset_ == -1) {
|
||||
// we're working on the source and not normalizing. fast path.
|
||||
// note Thai pre-vowel reordering uses buffer too
|
||||
@ -890,22 +894,24 @@ public final class CollationElementIterator
|
||||
m_buffer_.delete(0, m_buffer_.length());
|
||||
return nextChar();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
if (m_collator_.m_decomposition_ == Collator.NO_DECOMPOSITION
|
||||
|| m_bufferOffset_ != -1 || m_FCDLimit_ > m_source_.getIndex()
|
||||
|| m_bufferOffset_ != -1 || m_FCDLimit_ > startoffset
|
||||
// skip the fcd checks
|
||||
|| result < FULL_ZERO_COMBINING_CLASS_FAST_LIMIT_
|
||||
// Fast fcd safe path. trail combining class == 0.
|
||||
) {
|
||||
m_source_.next();
|
||||
|
||||
m_source_.next();
|
||||
return result;
|
||||
}
|
||||
|
||||
if (result < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_) {
|
||||
if (result < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_) {
|
||||
// We need to peek at the next character in order to tell if we are
|
||||
// FCD
|
||||
char next = m_source_.next();
|
||||
char next = m_source_.next();
|
||||
if (next == CharacterIterator.DONE
|
||||
|| next == LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_) {
|
||||
return result; // end of source string and if next character
|
||||
@ -914,18 +920,17 @@ public final class CollationElementIterator
|
||||
}
|
||||
|
||||
// Need a more complete FCD check and possible normalization.
|
||||
if (!FCDCheck()) {
|
||||
if (!FCDCheck(result, startoffset)) {
|
||||
normalize();
|
||||
result = m_buffer_.charAt(0);
|
||||
m_bufferOffset_ = 1;
|
||||
}
|
||||
m_source_.next();
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Incremental normalization, this is an essential optimization.
|
||||
*7 Assuming FCD checks has been done, normalize the non-FCD characters into
|
||||
* Assuming FCD checks has been done, normalize the non-FCD characters into
|
||||
* the buffer.
|
||||
* Source offsets points to the current processing character.</p>
|
||||
*/
|
||||
@ -959,14 +964,16 @@ public final class CollationElementIterator
|
||||
* Input source offsets points to the previous character.
|
||||
* Return source offsets points to the current processing character.
|
||||
* </p>
|
||||
* @param ch current character
|
||||
* @param offset current character offset
|
||||
* @return true if FCDCheck passes, false otherwise
|
||||
*/
|
||||
private boolean FCDCheckBackwards()
|
||||
*/
|
||||
private boolean FCDCheckBackwards(char ch, int offset)
|
||||
{
|
||||
boolean result = true;
|
||||
char ch = m_source_.next();
|
||||
char fcd = 0;
|
||||
m_FCDLimit_ = m_source_.getIndex();
|
||||
m_FCDLimit_ = offset;
|
||||
m_source_.setIndex(offset);
|
||||
if (!UTF16.isSurrogate(ch)) {
|
||||
fcd = 0; // synwee todo unorm_getFCD16(fcdTrieIndex, c);
|
||||
}
|
||||
@ -1053,10 +1060,10 @@ public final class CollationElementIterator
|
||||
}
|
||||
}
|
||||
char result = m_source_.previous();
|
||||
int startoffset = m_source_.getIndex();
|
||||
if (result < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_
|
||||
|| m_collator_.m_decomposition_ == Collator.NO_DECOMPOSITION
|
||||
|| m_FCDStart_ <= m_source_.getIndex()
|
||||
|| m_source_.getIndex() == 0) {
|
||||
|| m_FCDStart_ <= startoffset || m_source_.getIndex() == 0) {
|
||||
return result;
|
||||
}
|
||||
char ch = m_source_.previous();
|
||||
@ -1066,7 +1073,7 @@ public final class CollationElementIterator
|
||||
return result;
|
||||
}
|
||||
// Need a more complete FCD check and possible normalization.
|
||||
if (!FCDCheckBackwards()) {
|
||||
if (!FCDCheckBackwards(ch, startoffset)) {
|
||||
normalizeBackwards();
|
||||
m_bufferOffset_ --;
|
||||
result = m_buffer_.charAt(m_bufferOffset_);
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/RuleBasedCollator.java,v $
|
||||
* $Date: 2002/05/20 23:43:01 $
|
||||
* $Revision: 1.6 $
|
||||
* $Date: 2002/05/22 01:14:38 $
|
||||
* $Revision: 1.7 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -809,7 +809,7 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
|
||||
}
|
||||
}
|
||||
|
||||
if (compare[4]) { // checkQuad
|
||||
if (doShift4) { // checkQuad
|
||||
result = doQuaternaryCompare(cebuffer, lowestpvalue);
|
||||
if (result != 0) {
|
||||
return result;
|
||||
@ -1320,7 +1320,7 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
|
||||
protected void updateInternalState()
|
||||
{
|
||||
if (m_caseFirst_ == AttributeValue.UPPER_FIRST_) {
|
||||
m_caseSwitch_ = CASE_SWITCH_;
|
||||
m_caseSwitch_ = (byte)CASE_SWITCH_;
|
||||
}
|
||||
else {
|
||||
m_caseSwitch_ = NO_CASE_SWITCH_;
|
||||
@ -1334,7 +1334,7 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
|
||||
m_bottom3_ = COMMON_BOTTOM_3_;
|
||||
}
|
||||
else {
|
||||
m_mask3_ = CE_KEEP_CASE_;
|
||||
m_mask3_ = (byte)CE_KEEP_CASE_;
|
||||
m_addition3_ = FLAG_BIT_MASK_CASE_SWITCH_ON_;
|
||||
if (m_caseFirst_ == AttributeValue.UPPER_FIRST_) {
|
||||
m_common3_ = COMMON_UPPER_FIRST_3_;
|
||||
@ -1518,18 +1518,18 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
|
||||
/**
|
||||
* Case first constants
|
||||
*/
|
||||
private static final byte CASE_SWITCH_ = (byte)0xC0;
|
||||
private static final byte NO_CASE_SWITCH_ = 0;
|
||||
private static final int CASE_SWITCH_ = 0xC0;
|
||||
private static final int NO_CASE_SWITCH_ = 0;
|
||||
/**
|
||||
* Case level constants
|
||||
*/
|
||||
private static final byte CE_REMOVE_CASE_ = (byte)0x3F;
|
||||
private static final byte CE_KEEP_CASE_ = (byte)0xFF;
|
||||
private static final int CE_REMOVE_CASE_ = 0x3F;
|
||||
private static final int CE_KEEP_CASE_ = 0xFF;
|
||||
/**
|
||||
* Case strength mask
|
||||
*/
|
||||
private static final byte CE_CASE_BIT_MASK_ = (byte)0xC0;
|
||||
private static final byte CE_CASE_MASK_3_ = (byte)0xFF;
|
||||
private static final int CE_CASE_BIT_MASK_ = 0xC0;
|
||||
private static final int CE_CASE_MASK_3_ = 0xFF;
|
||||
/**
|
||||
* Sortkey size factor. Values can be changed.
|
||||
*/
|
||||
@ -2320,7 +2320,8 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
|
||||
if (bytes[1].length <= bytescount[1] + isize) {
|
||||
bytes[1] = increase(bytes[1], bytescount[1], 1 + isize);
|
||||
}
|
||||
BOSCU.writeIdenticalLevelRun(source, bytes[1], bytescount[1]);
|
||||
bytescount[1] = BOSCU.writeIdenticalLevelRun(source, bytes[1],
|
||||
bytescount[1]);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -2533,16 +2534,20 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
|
||||
{
|
||||
// if we reach here, the ce offset accessed is the last ce
|
||||
// appended to the buffer
|
||||
boolean isNullOrder = (cebuffer[0][cebuffersize[0] - 1]
|
||||
== CollationElementIterator.NULLORDER);
|
||||
|
||||
boolean isSourceNullOrder = (cebuffer[0][cebuffersize[0] - 1]
|
||||
== CollationElementIterator.NULLORDER);
|
||||
boolean isTargetNullOrder = (cebuffer[1][cebuffersize[1] - 1]
|
||||
== CollationElementIterator.NULLORDER);
|
||||
cebuffer[0] = null;
|
||||
cebuffer[1] = null;
|
||||
cebuffersize[0] = 0;
|
||||
cebuffersize[1] = 0;
|
||||
if (isNullOrder) {
|
||||
if (isSourceNullOrder) {
|
||||
return -1;
|
||||
}
|
||||
if (isTargetNullOrder) {
|
||||
return 1;
|
||||
}
|
||||
// getting rid of the sign
|
||||
sorder >>>= CE_PRIMARY_SHIFT_;
|
||||
torder >>>= CE_PRIMARY_SHIFT_;
|
||||
@ -2685,6 +2690,10 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
|
||||
CollationElementIterator.NULLORDER) {
|
||||
return -1;
|
||||
}
|
||||
if (cebuffer[1][toffset - 1] ==
|
||||
CollationElementIterator.NULLORDER) {
|
||||
return 1;
|
||||
}
|
||||
return (sorder < torder) ? -1 : 1;
|
||||
}
|
||||
}
|
||||
@ -2696,11 +2705,11 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
|
||||
int sorder = getSecondaryFrenchCE(cebuffer, offset,
|
||||
continuationoffset, 0);
|
||||
int torder = getSecondaryFrenchCE(cebuffer, offset,
|
||||
continuationoffset,1);
|
||||
continuationoffset, 1);
|
||||
if (sorder == torder) {
|
||||
if (cebuffer[0][offset[0] - 1]
|
||||
== CollationElementIterator.NULLORDER
|
||||
|| (offset[0] < 0 && offset[1] < 0)) {
|
||||
if ((offset[0] < 0 && offset[1] < 0)
|
||||
|| cebuffer[0][offset[0]]
|
||||
== CollationElementIterator.NULLORDER) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -2729,17 +2738,19 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
|
||||
while (result == CollationElementIterator.IGNORABLE
|
||||
&& offset[index] >= 0) {
|
||||
if (continuationoffset[index] == 0) {
|
||||
while (isContinuation(cebuffer[0][offset[index] --]));
|
||||
// after this, sorder is at the start of continuation,
|
||||
// and offset points before that
|
||||
if (isContinuation(cebuffer[0][offset[index] + 1])) {
|
||||
// save offset for later
|
||||
continuationoffset[index] = offset[index];
|
||||
offset[index] += 2;
|
||||
}
|
||||
result = cebuffer[index][offset[index]];
|
||||
while (isContinuation(cebuffer[index][offset[index] --]));
|
||||
// after this, sorder is at the start of continuation,
|
||||
// and offset points before that
|
||||
if (isContinuation(cebuffer[index][offset[index] + 1])) {
|
||||
// save offset for later
|
||||
continuationoffset[index] = offset[index];
|
||||
offset[index] += 2;
|
||||
}
|
||||
//}
|
||||
}
|
||||
else {
|
||||
result = cebuffer[0][offset[index] ++];
|
||||
result = cebuffer[index][offset[index] ++];
|
||||
if (!isContinuation(result)) {
|
||||
// we have finished with this continuation
|
||||
offset[index] = continuationoffset[index];
|
||||
@ -2780,7 +2791,7 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
|
||||
while ((torder & CE_REMOVE_CASE_)
|
||||
== CollationElementIterator.IGNORABLE) {
|
||||
torder = cebuffer[1][toffset ++];
|
||||
if (!isContinuation(sorder)) {
|
||||
if (!isContinuation(torder)) {
|
||||
torder &= CE_CASE_MASK_3_;
|
||||
torder ^= m_caseSwitch_;
|
||||
}
|
||||
@ -2853,6 +2864,10 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
|
||||
CollationElementIterator.NULLORDER) {
|
||||
return -1;
|
||||
}
|
||||
if (cebuffer[1][toffset - 1] ==
|
||||
CollationElementIterator.NULLORDER) {
|
||||
return 1;
|
||||
}
|
||||
return (sorder < torder) ? -1 : 1;
|
||||
}
|
||||
}
|
||||
@ -2927,6 +2942,10 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
|
||||
CollationElementIterator.NULLORDER) {
|
||||
return -1;
|
||||
}
|
||||
if (cebuffer[1][toffset - 1] ==
|
||||
CollationElementIterator.NULLORDER) {
|
||||
return 1;
|
||||
}
|
||||
return (sorder < torder) ? -1 : 1;
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user