bugs solved
* japanese case level
* more stringent checks on ce buffer size.

X-SVN-Rev: 8665
This commit is contained in:
Syn Wee Quek 2002-05-22 01:14:38 +00:00
parent 6b4f81d44d
commit 076095ae7e
2 changed files with 75 additions and 49 deletions

View File

@ -800,12 +800,14 @@ public final class CollationElementIterator
* <li>The leading combining class from the current character is 0 or the
* trailing combining class of the previous char was zero.
* </ul>
* Incoming source offsets points to the next processing character.
* Incoming source offsets points to the current processing character.
* Return source offsets points to the current processing character.
* </p>
* @param ch current character
* @param offset current character offset
* @return true if FCDCheck passes, false otherwise
*/
private boolean FCDCheck()
private boolean FCDCheck(char ch, int offset)
{
boolean result = true;
@ -813,8 +815,8 @@ public final class CollationElementIterator
// Get the trailing combining class of the current character.
// If it's zero, we are OK.
char ch = m_source_.previous();
m_FCDStart_ = m_source_.getIndex();
m_FCDStart_ = offset;
m_source_.setIndex(offset);
// trie access
char fcd = 0; // synwee todo: unorm_getFCD16(ch);
if (fcd != 0 && UTF16.isLeadSurrogate(ch)) {
@ -860,6 +862,7 @@ public final class CollationElementIterator
}
}
m_source_.setIndex(m_FCDStart_);
m_source_.next();
m_FCDLimit_ = m_source_.getIndex();
return result;
}
@ -873,7 +876,8 @@ public final class CollationElementIterator
private char nextChar()
{
char result;
// loop handles the next character whether it is in the buffer or not.
int startoffset = m_source_.getIndex();
// loop handles the next character whether it is in the buffer or not.
if (m_bufferOffset_ == -1) {
// we're working on the source and not normalizing. fast path.
// note Thai pre-vowel reordering uses buffer too
@ -890,22 +894,24 @@ public final class CollationElementIterator
m_buffer_.delete(0, m_buffer_.length());
return nextChar();
}
return result;
}
if (m_collator_.m_decomposition_ == Collator.NO_DECOMPOSITION
|| m_bufferOffset_ != -1 || m_FCDLimit_ > m_source_.getIndex()
|| m_bufferOffset_ != -1 || m_FCDLimit_ > startoffset
// skip the fcd checks
|| result < FULL_ZERO_COMBINING_CLASS_FAST_LIMIT_
// Fast fcd safe path. trail combining class == 0.
) {
m_source_.next();
m_source_.next();
return result;
}
if (result < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_) {
if (result < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_) {
// We need to peek at the next character in order to tell if we are
// FCD
char next = m_source_.next();
char next = m_source_.next();
if (next == CharacterIterator.DONE
|| next == LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_) {
return result; // end of source string and if next character
@ -914,18 +920,17 @@ public final class CollationElementIterator
}
// Need a more complete FCD check and possible normalization.
if (!FCDCheck()) {
if (!FCDCheck(result, startoffset)) {
normalize();
result = m_buffer_.charAt(0);
m_bufferOffset_ = 1;
}
m_source_.next();
return result;
}
/**
* <p>Incremental normalization, this is an essential optimization.
*7 Assuming FCD checks has been done, normalize the non-FCD characters into
* Assuming FCD checks has been done, normalize the non-FCD characters into
* the buffer.
* Source offsets points to the current processing character.</p>
*/
@ -959,14 +964,16 @@ public final class CollationElementIterator
* Input source offsets points to the previous character.
* Return source offsets points to the current processing character.
* </p>
* @param ch current character
* @param offset current character offset
* @return true if FCDCheck passes, false otherwise
*/
private boolean FCDCheckBackwards()
*/
private boolean FCDCheckBackwards(char ch, int offset)
{
boolean result = true;
char ch = m_source_.next();
char fcd = 0;
m_FCDLimit_ = m_source_.getIndex();
m_FCDLimit_ = offset;
m_source_.setIndex(offset);
if (!UTF16.isSurrogate(ch)) {
fcd = 0; // synwee todo unorm_getFCD16(fcdTrieIndex, c);
}
@ -1053,10 +1060,10 @@ public final class CollationElementIterator
}
}
char result = m_source_.previous();
int startoffset = m_source_.getIndex();
if (result < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_
|| m_collator_.m_decomposition_ == Collator.NO_DECOMPOSITION
|| m_FCDStart_ <= m_source_.getIndex()
|| m_source_.getIndex() == 0) {
|| m_FCDStart_ <= startoffset || m_source_.getIndex() == 0) {
return result;
}
char ch = m_source_.previous();
@ -1066,7 +1073,7 @@ public final class CollationElementIterator
return result;
}
// Need a more complete FCD check and possible normalization.
if (!FCDCheckBackwards()) {
if (!FCDCheckBackwards(ch, startoffset)) {
normalizeBackwards();
m_bufferOffset_ --;
result = m_buffer_.charAt(m_bufferOffset_);

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/RuleBasedCollator.java,v $
* $Date: 2002/05/20 23:43:01 $
* $Revision: 1.6 $
* $Date: 2002/05/22 01:14:38 $
* $Revision: 1.7 $
*
*******************************************************************************
*/
@ -809,7 +809,7 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
}
}
if (compare[4]) { // checkQuad
if (doShift4) { // checkQuad
result = doQuaternaryCompare(cebuffer, lowestpvalue);
if (result != 0) {
return result;
@ -1320,7 +1320,7 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
protected void updateInternalState()
{
if (m_caseFirst_ == AttributeValue.UPPER_FIRST_) {
m_caseSwitch_ = CASE_SWITCH_;
m_caseSwitch_ = (byte)CASE_SWITCH_;
}
else {
m_caseSwitch_ = NO_CASE_SWITCH_;
@ -1334,7 +1334,7 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
m_bottom3_ = COMMON_BOTTOM_3_;
}
else {
m_mask3_ = CE_KEEP_CASE_;
m_mask3_ = (byte)CE_KEEP_CASE_;
m_addition3_ = FLAG_BIT_MASK_CASE_SWITCH_ON_;
if (m_caseFirst_ == AttributeValue.UPPER_FIRST_) {
m_common3_ = COMMON_UPPER_FIRST_3_;
@ -1518,18 +1518,18 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
/**
* Case first constants
*/
private static final byte CASE_SWITCH_ = (byte)0xC0;
private static final byte NO_CASE_SWITCH_ = 0;
private static final int CASE_SWITCH_ = 0xC0;
private static final int NO_CASE_SWITCH_ = 0;
/**
* Case level constants
*/
private static final byte CE_REMOVE_CASE_ = (byte)0x3F;
private static final byte CE_KEEP_CASE_ = (byte)0xFF;
private static final int CE_REMOVE_CASE_ = 0x3F;
private static final int CE_KEEP_CASE_ = 0xFF;
/**
* Case strength mask
*/
private static final byte CE_CASE_BIT_MASK_ = (byte)0xC0;
private static final byte CE_CASE_MASK_3_ = (byte)0xFF;
private static final int CE_CASE_BIT_MASK_ = 0xC0;
private static final int CE_CASE_MASK_3_ = 0xFF;
/**
* Sortkey size factor. Values can be changed.
*/
@ -2320,7 +2320,8 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
if (bytes[1].length <= bytescount[1] + isize) {
bytes[1] = increase(bytes[1], bytescount[1], 1 + isize);
}
BOSCU.writeIdenticalLevelRun(source, bytes[1], bytescount[1]);
bytescount[1] = BOSCU.writeIdenticalLevelRun(source, bytes[1],
bytescount[1]);
}
/**
@ -2533,16 +2534,20 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
{
// if we reach here, the ce offset accessed is the last ce
// appended to the buffer
boolean isNullOrder = (cebuffer[0][cebuffersize[0] - 1]
== CollationElementIterator.NULLORDER);
boolean isSourceNullOrder = (cebuffer[0][cebuffersize[0] - 1]
== CollationElementIterator.NULLORDER);
boolean isTargetNullOrder = (cebuffer[1][cebuffersize[1] - 1]
== CollationElementIterator.NULLORDER);
cebuffer[0] = null;
cebuffer[1] = null;
cebuffersize[0] = 0;
cebuffersize[1] = 0;
if (isNullOrder) {
if (isSourceNullOrder) {
return -1;
}
if (isTargetNullOrder) {
return 1;
}
// getting rid of the sign
sorder >>>= CE_PRIMARY_SHIFT_;
torder >>>= CE_PRIMARY_SHIFT_;
@ -2685,6 +2690,10 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
CollationElementIterator.NULLORDER) {
return -1;
}
if (cebuffer[1][toffset - 1] ==
CollationElementIterator.NULLORDER) {
return 1;
}
return (sorder < torder) ? -1 : 1;
}
}
@ -2696,11 +2705,11 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
int sorder = getSecondaryFrenchCE(cebuffer, offset,
continuationoffset, 0);
int torder = getSecondaryFrenchCE(cebuffer, offset,
continuationoffset,1);
continuationoffset, 1);
if (sorder == torder) {
if (cebuffer[0][offset[0] - 1]
== CollationElementIterator.NULLORDER
|| (offset[0] < 0 && offset[1] < 0)) {
if ((offset[0] < 0 && offset[1] < 0)
|| cebuffer[0][offset[0]]
== CollationElementIterator.NULLORDER) {
break;
}
}
@ -2729,17 +2738,19 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
while (result == CollationElementIterator.IGNORABLE
&& offset[index] >= 0) {
if (continuationoffset[index] == 0) {
while (isContinuation(cebuffer[0][offset[index] --]));
// after this, sorder is at the start of continuation,
// and offset points before that
if (isContinuation(cebuffer[0][offset[index] + 1])) {
// save offset for later
continuationoffset[index] = offset[index];
offset[index] += 2;
}
result = cebuffer[index][offset[index]];
while (isContinuation(cebuffer[index][offset[index] --]));
// after this, sorder is at the start of continuation,
// and offset points before that
if (isContinuation(cebuffer[index][offset[index] + 1])) {
// save offset for later
continuationoffset[index] = offset[index];
offset[index] += 2;
}
//}
}
else {
result = cebuffer[0][offset[index] ++];
result = cebuffer[index][offset[index] ++];
if (!isContinuation(result)) {
// we have finished with this continuation
offset[index] = continuationoffset[index];
@ -2780,7 +2791,7 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
while ((torder & CE_REMOVE_CASE_)
== CollationElementIterator.IGNORABLE) {
torder = cebuffer[1][toffset ++];
if (!isContinuation(sorder)) {
if (!isContinuation(torder)) {
torder &= CE_CASE_MASK_3_;
torder ^= m_caseSwitch_;
}
@ -2853,6 +2864,10 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
CollationElementIterator.NULLORDER) {
return -1;
}
if (cebuffer[1][toffset - 1] ==
CollationElementIterator.NULLORDER) {
return 1;
}
return (sorder < torder) ? -1 : 1;
}
}
@ -2927,6 +2942,10 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
CollationElementIterator.NULLORDER) {
return -1;
}
if (cebuffer[1][toffset - 1] ==
CollationElementIterator.NULLORDER) {
return 1;
}
return (sorder < torder) ? -1 : 1;
}
}