ICU-10829 simplify U+FFFE collation: U+FFFE use common non-primary weights, adjust code for that, test order of U+FFFE equivalent to ucol_mergeSortkeys() but not necessarily same sort keys, omit case level if lowerFirst and only common weights
X-SVN-Rev: 36856
This commit is contained in:
parent
7ce10f73cf
commit
030eff56d3
@ -146,7 +146,7 @@
|
|||||||
* This value may change in subsequent releases of ICU.
|
* This value may change in subsequent releases of ICU.
|
||||||
* @stable ICU 2.4
|
* @stable ICU 2.4
|
||||||
*/
|
*/
|
||||||
#define UCOL_RUNTIME_VERSION 8
|
#define UCOL_RUNTIME_VERSION 9
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Collation builder code version.
|
* Collation builder code version.
|
||||||
|
Binary file not shown.
Binary file not shown.
@ -47051,7 +47051,7 @@ FDD1 FDD0; [E4, 05, 05] # unassigned first primary
|
|||||||
|
|
||||||
# SPECIAL MAX/MIN COLLATION ELEMENTS
|
# SPECIAL MAX/MIN COLLATION ELEMENTS
|
||||||
|
|
||||||
FFFE; [02, 02, 02] # Special LOWEST primary, for merge/interleaving
|
FFFE; [02, 05, 05] # Special LOWEST primary, for merge/interleaving
|
||||||
FFFF; [EF FF, 05, 05] # Special HIGHEST primary, for ranges
|
FFFF; [EF FF, 05, 05] # Special HIGHEST primary, for ranges
|
||||||
|
|
||||||
|
|
||||||
|
@ -29,17 +29,19 @@ public:
|
|||||||
// Special sort key bytes for all levels.
|
// Special sort key bytes for all levels.
|
||||||
static const uint8_t TERMINATOR_BYTE = 0;
|
static const uint8_t TERMINATOR_BYTE = 0;
|
||||||
static const uint8_t LEVEL_SEPARATOR_BYTE = 1;
|
static const uint8_t LEVEL_SEPARATOR_BYTE = 1;
|
||||||
|
|
||||||
|
/** The secondary/tertiary lower limit for tailoring before any root elements. */
|
||||||
|
static const uint32_t BEFORE_WEIGHT16 = 0x0100;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Merge-sort-key separator.
|
* Merge-sort-key separator.
|
||||||
* Must not be used as the lead byte of any CE weight,
|
* Same as the unique primary and identical-level weights of U+FFFE.
|
||||||
* nor as primary compression low terminator.
|
* Must not be used as primary compression low terminator.
|
||||||
* Otherwise usable.
|
* Otherwise usable.
|
||||||
*/
|
*/
|
||||||
static const uint8_t MERGE_SEPARATOR_BYTE = 2;
|
static const uint8_t MERGE_SEPARATOR_BYTE = 2;
|
||||||
static const uint32_t MERGE_SEPARATOR_PRIMARY = 0x02000000; // U+FFFE
|
static const uint32_t MERGE_SEPARATOR_PRIMARY = 0x02000000; // U+FFFE
|
||||||
static const uint32_t MERGE_SEPARATOR_WEIGHT16 = 0x0200; // U+FFFE
|
static const uint32_t MERGE_SEPARATOR_CE32 = 0x02000505; // U+FFFE
|
||||||
static const uint32_t MERGE_SEPARATOR_LOWER32 = 0x02000200; // U+FFFE
|
|
||||||
static const uint32_t MERGE_SEPARATOR_CE32 = 0x02000202; // U+FFFE
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Primary compression low terminator, must be greater than MERGE_SEPARATOR_BYTE.
|
* Primary compression low terminator, must be greater than MERGE_SEPARATOR_BYTE.
|
||||||
|
@ -450,8 +450,8 @@ CollationBuilder::addReset(int32_t strength, const UnicodeString &str,
|
|||||||
}
|
}
|
||||||
nodes.setElementAt(node, index);
|
nodes.setElementAt(node, index);
|
||||||
int32_t nextIndex = nextIndexFromNode(node);
|
int32_t nextIndex = nextIndexFromNode(node);
|
||||||
// Insert default nodes with weights 02 and 05, reset to the 02 node.
|
// Insert default nodes with weights 01 and 05, reset to the 01 node.
|
||||||
node = nodeFromWeight16(BEFORE_WEIGHT16) | nodeFromStrength(strength);
|
node = nodeFromWeight16(Collation::BEFORE_WEIGHT16) | nodeFromStrength(strength);
|
||||||
index = insertNodeBetween(index, nextIndex, node, errorCode);
|
index = insertNodeBetween(index, nextIndex, node, errorCode);
|
||||||
node = nodeFromWeight16(Collation::COMMON_WEIGHT16) | hasBefore3 |
|
node = nodeFromWeight16(Collation::COMMON_WEIGHT16) | hasBefore3 |
|
||||||
nodeFromStrength(strength);
|
nodeFromStrength(strength);
|
||||||
@ -961,7 +961,7 @@ CollationBuilder::findCommonNode(int32_t index, int32_t strength) const {
|
|||||||
index = nextIndexFromNode(node);
|
index = nextIndexFromNode(node);
|
||||||
node = nodes.elementAti(index);
|
node = nodes.elementAti(index);
|
||||||
U_ASSERT(!isTailoredNode(node) && strengthFromNode(node) == strength &&
|
U_ASSERT(!isTailoredNode(node) && strengthFromNode(node) == strength &&
|
||||||
weight16FromNode(node) == BEFORE_WEIGHT16);
|
weight16FromNode(node) == Collation::BEFORE_WEIGHT16);
|
||||||
// Skip to the explicit common node.
|
// Skip to the explicit common node.
|
||||||
do {
|
do {
|
||||||
index = nextIndexFromNode(node);
|
index = nextIndexFromNode(node);
|
||||||
@ -1398,7 +1398,7 @@ CollationBuilder::makeTailoredCEs(UErrorCode &errorCode) {
|
|||||||
// Gap at the beginning of the tertiary CE range.
|
// Gap at the beginning of the tertiary CE range.
|
||||||
t = rootElements.getTertiaryBoundary() - 0x100;
|
t = rootElements.getTertiaryBoundary() - 0x100;
|
||||||
tLimit = rootElements.getFirstTertiaryCE() & Collation::ONLY_TERTIARY_MASK;
|
tLimit = rootElements.getFirstTertiaryCE() & Collation::ONLY_TERTIARY_MASK;
|
||||||
} else if(t == BEFORE_WEIGHT16) {
|
} else if(t == Collation::BEFORE_WEIGHT16) {
|
||||||
tLimit = Collation::COMMON_WEIGHT16;
|
tLimit = Collation::COMMON_WEIGHT16;
|
||||||
} else if(!pIsTailored && !sIsTailored) {
|
} else if(!pIsTailored && !sIsTailored) {
|
||||||
// p and s are root weights.
|
// p and s are root weights.
|
||||||
@ -1441,7 +1441,7 @@ CollationBuilder::makeTailoredCEs(UErrorCode &errorCode) {
|
|||||||
// Gap at the beginning of the secondary CE range.
|
// Gap at the beginning of the secondary CE range.
|
||||||
s = rootElements.getSecondaryBoundary() - 0x100;
|
s = rootElements.getSecondaryBoundary() - 0x100;
|
||||||
sLimit = rootElements.getFirstSecondaryCE() >> 16;
|
sLimit = rootElements.getFirstSecondaryCE() >> 16;
|
||||||
} else if(s == BEFORE_WEIGHT16) {
|
} else if(s == Collation::BEFORE_WEIGHT16) {
|
||||||
sLimit = Collation::COMMON_WEIGHT16;
|
sLimit = Collation::COMMON_WEIGHT16;
|
||||||
} else if(!pIsTailored) {
|
} else if(!pIsTailored) {
|
||||||
// p is a root primary.
|
// p is a root primary.
|
||||||
|
@ -215,9 +215,6 @@ private:
|
|||||||
|
|
||||||
static int32_t ceStrength(int64_t ce);
|
static int32_t ceStrength(int64_t ce);
|
||||||
|
|
||||||
/** The secondary/tertiary lower limit for tailoring before the common weight. */
|
|
||||||
static const uint32_t BEFORE_WEIGHT16 = Collation::MERGE_SEPARATOR_WEIGHT16;
|
|
||||||
|
|
||||||
/** At most 1M nodes, limited by the 20 bits in node bit fields. */
|
/** At most 1M nodes, limited by the 20 bits in node bit fields. */
|
||||||
static const int32_t MAX_INDEX = 0xfffff;
|
static const int32_t MAX_INDEX = 0xfffff;
|
||||||
/**
|
/**
|
||||||
|
@ -136,18 +136,17 @@ CollationCompare::compareUpToQuaternary(CollationIterator &left, CollationIterat
|
|||||||
int32_t rightStart = 0;
|
int32_t rightStart = 0;
|
||||||
for(;;) {
|
for(;;) {
|
||||||
// Find the merge separator or the NO_CE terminator.
|
// Find the merge separator or the NO_CE terminator.
|
||||||
|
uint32_t p;
|
||||||
int32_t leftLimit = leftStart;
|
int32_t leftLimit = leftStart;
|
||||||
uint32_t leftLower32;
|
while((p = (uint32_t)(left.getCE(leftLimit) >> 32)) >
|
||||||
while((leftLower32 = (uint32_t)left.getCE(leftLimit)) >
|
Collation::MERGE_SEPARATOR_PRIMARY ||
|
||||||
Collation::MERGE_SEPARATOR_LOWER32 ||
|
p == 0) {
|
||||||
leftLower32 == 0) {
|
|
||||||
++leftLimit;
|
++leftLimit;
|
||||||
}
|
}
|
||||||
int32_t rightLimit = rightStart;
|
int32_t rightLimit = rightStart;
|
||||||
uint32_t rightLower32;
|
while((p = (uint32_t)(right.getCE(rightLimit) >> 32)) >
|
||||||
while((rightLower32 = (uint32_t)right.getCE(rightLimit)) >
|
Collation::MERGE_SEPARATOR_PRIMARY ||
|
||||||
Collation::MERGE_SEPARATOR_LOWER32 ||
|
p == 0) {
|
||||||
rightLower32 == 0) {
|
|
||||||
++rightLimit;
|
++rightLimit;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -175,7 +174,7 @@ CollationCompare::compareUpToQuaternary(CollationIterator &left, CollationIterat
|
|||||||
// Both strings have the same number of merge separators,
|
// Both strings have the same number of merge separators,
|
||||||
// or else there would have been a primary-level difference.
|
// or else there would have been a primary-level difference.
|
||||||
U_ASSERT(left.getCE(leftLimit) == right.getCE(rightLimit));
|
U_ASSERT(left.getCE(leftLimit) == right.getCE(rightLimit));
|
||||||
if(left.getCE(leftLimit) == Collation::NO_CE) { break; }
|
if(p == Collation::NO_CE_PRIMARY) { break; }
|
||||||
// Skip both merge separators and continue.
|
// Skip both merge separators and continue.
|
||||||
leftStart = leftLimit + 1;
|
leftStart = leftLimit + 1;
|
||||||
rightStart = rightLimit + 1;
|
rightStart = rightLimit + 1;
|
||||||
@ -276,20 +275,19 @@ CollationCompare::compareUpToQuaternary(CollationIterator &left, CollationIterat
|
|||||||
|
|
||||||
if(leftTertiary != rightTertiary) {
|
if(leftTertiary != rightTertiary) {
|
||||||
if(CollationSettings::sortsTertiaryUpperCaseFirst(options)) {
|
if(CollationSettings::sortsTertiaryUpperCaseFirst(options)) {
|
||||||
// Pass through NO_CE and MERGE_SEPARATOR
|
// Pass through NO_CE and keep real tertiary weights larger than that.
|
||||||
// and keep real tertiary weights larger than the MERGE_SEPARATOR.
|
|
||||||
// Do not change the artificial uppercase weight of a tertiary CE (0.0.ut),
|
// Do not change the artificial uppercase weight of a tertiary CE (0.0.ut),
|
||||||
// to keep tertiary CEs well-formed.
|
// to keep tertiary CEs well-formed.
|
||||||
// Their case+tertiary weights must be greater than those of
|
// Their case+tertiary weights must be greater than those of
|
||||||
// primary and secondary CEs.
|
// primary and secondary CEs.
|
||||||
if(leftTertiary > Collation::MERGE_SEPARATOR_WEIGHT16) {
|
if(leftTertiary > Collation::NO_CE_WEIGHT16) {
|
||||||
if(leftLower32 > 0xffff) {
|
if(leftLower32 > 0xffff) {
|
||||||
leftTertiary ^= 0xc000;
|
leftTertiary ^= 0xc000;
|
||||||
} else {
|
} else {
|
||||||
leftTertiary += 0x4000;
|
leftTertiary += 0x4000;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(rightTertiary > Collation::MERGE_SEPARATOR_WEIGHT16) {
|
if(rightTertiary > Collation::NO_CE_WEIGHT16) {
|
||||||
if(rightLower32 > 0xffff) {
|
if(rightLower32 > 0xffff) {
|
||||||
rightTertiary ^= 0xc000;
|
rightTertiary ^= 0xc000;
|
||||||
} else {
|
} else {
|
||||||
@ -316,11 +314,9 @@ CollationCompare::compareUpToQuaternary(CollationIterator &left, CollationIterat
|
|||||||
do {
|
do {
|
||||||
int64_t ce = left.getCE(leftIndex++);
|
int64_t ce = left.getCE(leftIndex++);
|
||||||
leftQuaternary = (uint32_t)ce & 0xffff;
|
leftQuaternary = (uint32_t)ce & 0xffff;
|
||||||
if(leftQuaternary == 0) {
|
if(leftQuaternary <= Collation::NO_CE_WEIGHT16) {
|
||||||
// Variable primary or completely ignorable.
|
// Variable primary or completely ignorable or NO_CE.
|
||||||
leftQuaternary = (uint32_t)(ce >> 32);
|
leftQuaternary = (uint32_t)(ce >> 32);
|
||||||
} else if(leftQuaternary <= Collation::MERGE_SEPARATOR_WEIGHT16) {
|
|
||||||
// Leave NO_CE or MERGE_SEPARATOR as is.
|
|
||||||
} else {
|
} else {
|
||||||
// Regular CE, not tertiary ignorable.
|
// Regular CE, not tertiary ignorable.
|
||||||
// Preserve the quaternary weight in bits 7..6.
|
// Preserve the quaternary weight in bits 7..6.
|
||||||
@ -332,11 +328,9 @@ CollationCompare::compareUpToQuaternary(CollationIterator &left, CollationIterat
|
|||||||
do {
|
do {
|
||||||
int64_t ce = right.getCE(rightIndex++);
|
int64_t ce = right.getCE(rightIndex++);
|
||||||
rightQuaternary = (uint32_t)ce & 0xffff;
|
rightQuaternary = (uint32_t)ce & 0xffff;
|
||||||
if(rightQuaternary == 0) {
|
if(rightQuaternary <= Collation::NO_CE_WEIGHT16) {
|
||||||
// Variable primary or completely ignorable.
|
// Variable primary or completely ignorable or NO_CE.
|
||||||
rightQuaternary = (uint32_t)(ce >> 32);
|
rightQuaternary = (uint32_t)(ce >> 32);
|
||||||
} else if(rightQuaternary <= Collation::MERGE_SEPARATOR_WEIGHT16) {
|
|
||||||
// Leave NO_CE or MERGE_SEPARATOR as is.
|
|
||||||
} else {
|
} else {
|
||||||
// Regular CE, not tertiary ignorable.
|
// Regular CE, not tertiary ignorable.
|
||||||
// Preserve the quaternary weight in bits 7..6.
|
// Preserve the quaternary weight in bits 7..6.
|
||||||
@ -353,7 +347,7 @@ CollationCompare::compareUpToQuaternary(CollationIterator &left, CollationIterat
|
|||||||
}
|
}
|
||||||
return (leftQuaternary < rightQuaternary) ? UCOL_LESS : UCOL_GREATER;
|
return (leftQuaternary < rightQuaternary) ? UCOL_LESS : UCOL_GREATER;
|
||||||
}
|
}
|
||||||
if(leftQuaternary == Collation::NO_CE_WEIGHT16) { break; }
|
if(leftQuaternary == Collation::NO_CE_PRIMARY) { break; }
|
||||||
}
|
}
|
||||||
return UCOL_EQUAL;
|
return UCOL_EQUAL;
|
||||||
}
|
}
|
||||||
|
@ -262,7 +262,7 @@ CollationKeys::writeSortKeyUpToQuaternary(CollationIterator &iter,
|
|||||||
int32_t commonQuaternaries = 0;
|
int32_t commonQuaternaries = 0;
|
||||||
|
|
||||||
uint32_t prevSecondary = 0;
|
uint32_t prevSecondary = 0;
|
||||||
UBool anyMergeSeparators = FALSE;
|
int32_t secSegmentStart = 0;
|
||||||
|
|
||||||
for(;;) {
|
for(;;) {
|
||||||
// No need to keep all CEs in the buffer when we write a sort key.
|
// No need to keep all CEs in the buffer when we write a sort key.
|
||||||
@ -350,7 +350,11 @@ CollationKeys::writeSortKeyUpToQuaternary(CollationIterator &iter,
|
|||||||
uint32_t s = lower32 >> 16;
|
uint32_t s = lower32 >> 16;
|
||||||
if(s == 0) {
|
if(s == 0) {
|
||||||
// secondary ignorable
|
// secondary ignorable
|
||||||
} else if(s == Collation::COMMON_WEIGHT16) {
|
} else if(s == Collation::COMMON_WEIGHT16 &&
|
||||||
|
((options & CollationSettings::BACKWARD_SECONDARY) == 0 ||
|
||||||
|
p != Collation::MERGE_SEPARATOR_PRIMARY)) {
|
||||||
|
// s is a common secondary weight, and
|
||||||
|
// backwards-secondary is off or the ce is not the merge separator.
|
||||||
++commonSecondaries;
|
++commonSecondaries;
|
||||||
} else if((options & CollationSettings::BACKWARD_SECONDARY) == 0) {
|
} else if((options & CollationSettings::BACKWARD_SECONDARY) == 0) {
|
||||||
if(commonSecondaries != 0) {
|
if(commonSecondaries != 0) {
|
||||||
@ -389,16 +393,28 @@ CollationKeys::writeSortKeyUpToQuaternary(CollationIterator &iter,
|
|||||||
}
|
}
|
||||||
// commonSecondaries == 0
|
// commonSecondaries == 0
|
||||||
}
|
}
|
||||||
// Reduce separators so that we can look for byte<=1 later.
|
if(0 < p && p <= Collation::MERGE_SEPARATOR_PRIMARY) {
|
||||||
if(s <= Collation::MERGE_SEPARATOR_WEIGHT16) {
|
// The backwards secondary level compares secondary weights backwards
|
||||||
if(s == Collation::MERGE_SEPARATOR_WEIGHT16) {
|
// within segments separated by the merge separator (U+FFFE).
|
||||||
anyMergeSeparators = TRUE;
|
uint8_t *secs = secondaries.data();
|
||||||
|
int32_t last = secondaries.length() - 1;
|
||||||
|
if(secSegmentStart < last) {
|
||||||
|
uint8_t *p = secs + secSegmentStart;
|
||||||
|
uint8_t *q = secs + last;
|
||||||
|
do {
|
||||||
|
uint8_t b = *p;
|
||||||
|
*p++ = *q;
|
||||||
|
*q-- = b;
|
||||||
|
} while(p < q);
|
||||||
}
|
}
|
||||||
secondaries.appendByte((s >> 8) - 1);
|
secondaries.appendByte(p == Collation::NO_CE_PRIMARY ?
|
||||||
|
Collation::LEVEL_SEPARATOR_BYTE : Collation::MERGE_SEPARATOR_BYTE);
|
||||||
|
prevSecondary = 0;
|
||||||
|
secSegmentStart = secondaries.length();
|
||||||
} else {
|
} else {
|
||||||
secondaries.appendReverseWeight16(s);
|
secondaries.appendReverseWeight16(s);
|
||||||
|
prevSecondary = s;
|
||||||
}
|
}
|
||||||
prevSecondary = s;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -411,19 +427,23 @@ CollationKeys::writeSortKeyUpToQuaternary(CollationIterator &iter,
|
|||||||
} else {
|
} else {
|
||||||
uint32_t c = (lower32 >> 8) & 0xff; // case bits & tertiary lead byte
|
uint32_t c = (lower32 >> 8) & 0xff; // case bits & tertiary lead byte
|
||||||
U_ASSERT((c & 0xc0) != 0xc0);
|
U_ASSERT((c & 0xc0) != 0xc0);
|
||||||
if((c & 0xc0) == 0 && c > Collation::MERGE_SEPARATOR_BYTE) {
|
if((c & 0xc0) == 0 && c > Collation::LEVEL_SEPARATOR_BYTE) {
|
||||||
++commonCases;
|
++commonCases;
|
||||||
} else {
|
} else {
|
||||||
if((options & CollationSettings::UPPER_FIRST) == 0) {
|
if((options & CollationSettings::UPPER_FIRST) == 0) {
|
||||||
// lowerFirst: Compress common weights to nibbles 1..7..13, mixed=14, upper=15.
|
// lowerFirst: Compress common weights to nibbles 1..7..13, mixed=14, upper=15.
|
||||||
if(commonCases != 0) {
|
// If there are only common (=lowest) weights in the whole level,
|
||||||
|
// then we need not write anything.
|
||||||
|
// Level length differences are handled already on the next-higher level.
|
||||||
|
if(commonCases != 0 &&
|
||||||
|
(c > Collation::LEVEL_SEPARATOR_BYTE || !cases.isEmpty())) {
|
||||||
--commonCases;
|
--commonCases;
|
||||||
while(commonCases >= CASE_LOWER_FIRST_COMMON_MAX_COUNT) {
|
while(commonCases >= CASE_LOWER_FIRST_COMMON_MAX_COUNT) {
|
||||||
cases.appendByte(CASE_LOWER_FIRST_COMMON_MIDDLE << 4);
|
cases.appendByte(CASE_LOWER_FIRST_COMMON_MIDDLE << 4);
|
||||||
commonCases -= CASE_LOWER_FIRST_COMMON_MAX_COUNT;
|
commonCases -= CASE_LOWER_FIRST_COMMON_MAX_COUNT;
|
||||||
}
|
}
|
||||||
uint32_t b;
|
uint32_t b;
|
||||||
if(c <= Collation::MERGE_SEPARATOR_BYTE) {
|
if(c <= Collation::LEVEL_SEPARATOR_BYTE) {
|
||||||
b = CASE_LOWER_FIRST_COMMON_LOW + commonCases;
|
b = CASE_LOWER_FIRST_COMMON_LOW + commonCases;
|
||||||
} else {
|
} else {
|
||||||
b = CASE_LOWER_FIRST_COMMON_HIGH - commonCases;
|
b = CASE_LOWER_FIRST_COMMON_HIGH - commonCases;
|
||||||
@ -431,7 +451,7 @@ CollationKeys::writeSortKeyUpToQuaternary(CollationIterator &iter,
|
|||||||
cases.appendByte(b << 4);
|
cases.appendByte(b << 4);
|
||||||
commonCases = 0;
|
commonCases = 0;
|
||||||
}
|
}
|
||||||
if(c > Collation::MERGE_SEPARATOR_BYTE) {
|
if(c > Collation::LEVEL_SEPARATOR_BYTE) {
|
||||||
c = (CASE_LOWER_FIRST_COMMON_HIGH + (c >> 6)) << 4; // 14 or 15
|
c = (CASE_LOWER_FIRST_COMMON_HIGH + (c >> 6)) << 4; // 14 or 15
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -447,11 +467,11 @@ CollationKeys::writeSortKeyUpToQuaternary(CollationIterator &iter,
|
|||||||
cases.appendByte((CASE_UPPER_FIRST_COMMON_LOW + commonCases) << 4);
|
cases.appendByte((CASE_UPPER_FIRST_COMMON_LOW + commonCases) << 4);
|
||||||
commonCases = 0;
|
commonCases = 0;
|
||||||
}
|
}
|
||||||
if(c > Collation::MERGE_SEPARATOR_BYTE) {
|
if(c > Collation::LEVEL_SEPARATOR_BYTE) {
|
||||||
c = (CASE_UPPER_FIRST_COMMON_LOW - (c >> 6)) << 4; // 2 or 1
|
c = (CASE_UPPER_FIRST_COMMON_LOW - (c >> 6)) << 4; // 2 or 1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// c is a separator byte 01 or 02,
|
// c is a separator byte 01,
|
||||||
// or a left-shifted nibble 0x10, 0x20, ... 0xf0.
|
// or a left-shifted nibble 0x10, 0x20, ... 0xf0.
|
||||||
cases.appendByte(c);
|
cases.appendByte(c);
|
||||||
}
|
}
|
||||||
@ -510,14 +530,14 @@ CollationKeys::writeSortKeyUpToQuaternary(CollationIterator &iter,
|
|||||||
// Their case+tertiary weights must be greater than those of
|
// Their case+tertiary weights must be greater than those of
|
||||||
// primary and secondary CEs.
|
// primary and secondary CEs.
|
||||||
//
|
//
|
||||||
// Separators 01..02 -> 01..02 (unchanged)
|
// Separator 01 -> 01 (unchanged)
|
||||||
// Lowercase 03..04 -> 83..84 (includes uncased)
|
// Lowercase 02..04 -> 82..84 (includes uncased)
|
||||||
// Common weight 05 -> 85..C5 (common-weight compression range)
|
// Common weight 05 -> 85..C5 (common-weight compression range)
|
||||||
// Lowercase 06..3F -> C6..FF
|
// Lowercase 06..3F -> C6..FF
|
||||||
// Mixed case 43..7F -> 43..7F
|
// Mixed case 42..7F -> 42..7F
|
||||||
// Uppercase 83..BF -> 03..3F
|
// Uppercase 82..BF -> 02..3F
|
||||||
// Tertiary CE 86..BF -> C6..FF
|
// Tertiary CE 86..BF -> C6..FF
|
||||||
if(t <= Collation::MERGE_SEPARATOR_WEIGHT16) {
|
if(t <= Collation::NO_CE_WEIGHT16) {
|
||||||
// Keep separators unchanged.
|
// Keep separators unchanged.
|
||||||
} else if(lower32 > 0xffff) {
|
} else if(lower32 > 0xffff) {
|
||||||
// Invert case bits of primary & secondary CEs.
|
// Invert case bits of primary & secondary CEs.
|
||||||
@ -551,24 +571,22 @@ CollationKeys::writeSortKeyUpToQuaternary(CollationIterator &iter,
|
|||||||
|
|
||||||
if((levels & Collation::QUATERNARY_LEVEL_FLAG) != 0) {
|
if((levels & Collation::QUATERNARY_LEVEL_FLAG) != 0) {
|
||||||
uint32_t q = lower32 & 0xffff;
|
uint32_t q = lower32 & 0xffff;
|
||||||
if((q & 0xc0) == 0 && q > Collation::MERGE_SEPARATOR_WEIGHT16) {
|
if((q & 0xc0) == 0 && q > Collation::NO_CE_WEIGHT16) {
|
||||||
++commonQuaternaries;
|
++commonQuaternaries;
|
||||||
} else if(q <= Collation::MERGE_SEPARATOR_WEIGHT16 &&
|
} else if(q == Collation::NO_CE_WEIGHT16 &&
|
||||||
(options & CollationSettings::ALTERNATE_MASK) == 0 &&
|
(options & CollationSettings::ALTERNATE_MASK) == 0 &&
|
||||||
(quaternaries.isEmpty() ||
|
quaternaries.isEmpty()) {
|
||||||
quaternaries[quaternaries.length() - 1] == Collation::MERGE_SEPARATOR_BYTE)) {
|
// If alternate=non-ignorable and there are only common quaternary weights,
|
||||||
// If alternate=non-ignorable and there are only
|
// then we need not write anything.
|
||||||
// common quaternary weights between two separators,
|
|
||||||
// then we need not write anything between these separators.
|
|
||||||
// The only weights greater than the merge separator and less than the common weight
|
// The only weights greater than the merge separator and less than the common weight
|
||||||
// are shifted primary weights, which are not generated for alternate=non-ignorable.
|
// are shifted primary weights, which are not generated for alternate=non-ignorable.
|
||||||
// There are also exactly as many quaternary weights as tertiary weights,
|
// There are also exactly as many quaternary weights as tertiary weights,
|
||||||
// so level length differences are handled already on tertiary level.
|
// so level length differences are handled already on tertiary level.
|
||||||
// Any above-common quaternary weight will compare greater regardless.
|
// Any above-common quaternary weight will compare greater regardless.
|
||||||
quaternaries.appendByte(q >> 8);
|
quaternaries.appendByte(Collation::LEVEL_SEPARATOR_BYTE);
|
||||||
} else {
|
} else {
|
||||||
if(q <= Collation::MERGE_SEPARATOR_WEIGHT16) {
|
if(q == Collation::NO_CE_WEIGHT16) {
|
||||||
q >>= 8;
|
q = Collation::LEVEL_SEPARATOR_BYTE;
|
||||||
} else {
|
} else {
|
||||||
q = 0xfc + ((q >> 6) & 3);
|
q = 0xfc + ((q >> 6) & 3);
|
||||||
}
|
}
|
||||||
@ -602,42 +620,7 @@ CollationKeys::writeSortKeyUpToQuaternary(CollationIterator &iter,
|
|||||||
if(!callback.needToWrite(Collation::SECONDARY_LEVEL)) { return; }
|
if(!callback.needToWrite(Collation::SECONDARY_LEVEL)) { return; }
|
||||||
ok &= secondaries.isOk();
|
ok &= secondaries.isOk();
|
||||||
sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
|
sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
|
||||||
uint8_t *secs = secondaries.data();
|
secondaries.appendTo(sink);
|
||||||
int32_t length = secondaries.length() - 1; // Ignore the trailing NO_CE.
|
|
||||||
if((options & CollationSettings::BACKWARD_SECONDARY) != 0) {
|
|
||||||
// The backwards secondary level compares secondary weights backwards
|
|
||||||
// within segments separated by the merge separator (U+FFFE, weight 02).
|
|
||||||
// The separator weights 01 & 02 were reduced to 00 & 01 so that
|
|
||||||
// we do not accidentally separate at a _second_ weight byte of 02.
|
|
||||||
int32_t start = 0;
|
|
||||||
for(;;) {
|
|
||||||
// Find the merge separator or the NO_CE terminator.
|
|
||||||
int32_t limit;
|
|
||||||
if(anyMergeSeparators) {
|
|
||||||
limit = start;
|
|
||||||
while(secs[limit] > 1) { ++limit; }
|
|
||||||
} else {
|
|
||||||
limit = length;
|
|
||||||
}
|
|
||||||
// Reverse this segment.
|
|
||||||
if(start < limit) {
|
|
||||||
uint8_t *p = secs + start;
|
|
||||||
uint8_t *q = secs + limit - 1;
|
|
||||||
while(p < q) {
|
|
||||||
uint8_t s = *p;
|
|
||||||
*p++ = *q;
|
|
||||||
*q-- = s;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Did we reach the end of the string?
|
|
||||||
if(secs[limit] == 0) { break; }
|
|
||||||
// Restore the merge separator.
|
|
||||||
secs[limit] = 2;
|
|
||||||
// Skip the merge separator and continue.
|
|
||||||
start = limit + 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
sink.Append(reinterpret_cast<char *>(secs), length);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if((levels & Collation::CASE_LEVEL_FLAG) != 0) {
|
if((levels & Collation::CASE_LEVEL_FLAG) != 0) {
|
||||||
@ -649,21 +632,12 @@ CollationKeys::writeSortKeyUpToQuaternary(CollationIterator &iter,
|
|||||||
uint8_t b = 0;
|
uint8_t b = 0;
|
||||||
for(int32_t i = 0; i < length; ++i) {
|
for(int32_t i = 0; i < length; ++i) {
|
||||||
uint8_t c = (uint8_t)cases[i];
|
uint8_t c = (uint8_t)cases[i];
|
||||||
if(c <= Collation::MERGE_SEPARATOR_BYTE) {
|
U_ASSERT((c & 0xf) == 0 && c != 0);
|
||||||
U_ASSERT(c != 0);
|
if(b == 0) {
|
||||||
if(b != 0) {
|
b = c;
|
||||||
sink.Append(b);
|
|
||||||
b = 0;
|
|
||||||
}
|
|
||||||
sink.Append(c);
|
|
||||||
} else {
|
} else {
|
||||||
U_ASSERT((c & 0xf) == 0);
|
sink.Append(b | (c >> 4));
|
||||||
if(b == 0) {
|
b = 0;
|
||||||
b = c;
|
|
||||||
} else {
|
|
||||||
sink.Append(b | (c >> 4));
|
|
||||||
b = 0;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(b != 0) {
|
if(b != 0) {
|
||||||
|
@ -124,7 +124,7 @@ CollationRootElements::getSecondaryBefore(uint32_t p, uint32_t s) const {
|
|||||||
sec = elements[index] >> 16;
|
sec = elements[index] >> 16;
|
||||||
} else {
|
} else {
|
||||||
index = findPrimary(p) + 1;
|
index = findPrimary(p) + 1;
|
||||||
previousSec = Collation::MERGE_SEPARATOR_WEIGHT16;
|
previousSec = Collation::BEFORE_WEIGHT16;
|
||||||
sec = Collation::COMMON_WEIGHT16;
|
sec = Collation::COMMON_WEIGHT16;
|
||||||
}
|
}
|
||||||
U_ASSERT(s >= sec);
|
U_ASSERT(s >= sec);
|
||||||
@ -149,12 +149,12 @@ CollationRootElements::getTertiaryBefore(uint32_t p, uint32_t s, uint32_t t) con
|
|||||||
previousTer = 0;
|
previousTer = 0;
|
||||||
} else {
|
} else {
|
||||||
index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX];
|
index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX];
|
||||||
previousTer = Collation::MERGE_SEPARATOR_WEIGHT16;
|
previousTer = Collation::BEFORE_WEIGHT16;
|
||||||
}
|
}
|
||||||
secTer = elements[index] & ~SEC_TER_DELTA_FLAG;
|
secTer = elements[index] & ~SEC_TER_DELTA_FLAG;
|
||||||
} else {
|
} else {
|
||||||
index = findPrimary(p) + 1;
|
index = findPrimary(p) + 1;
|
||||||
previousTer = Collation::MERGE_SEPARATOR_WEIGHT16;
|
previousTer = Collation::BEFORE_WEIGHT16;
|
||||||
secTer = Collation::COMMON_SEC_AND_TER_CE;
|
secTer = Collation::COMMON_SEC_AND_TER_CE;
|
||||||
}
|
}
|
||||||
uint32_t st = (s << 16) | t;
|
uint32_t st = (s << 16) | t;
|
||||||
|
@ -126,7 +126,7 @@ CollationWeights::initForSecondary() {
|
|||||||
maxBytes[1] = 0;
|
maxBytes[1] = 0;
|
||||||
minBytes[2] = 0;
|
minBytes[2] = 0;
|
||||||
maxBytes[2] = 0;
|
maxBytes[2] = 0;
|
||||||
minBytes[3] = Collation::MERGE_SEPARATOR_BYTE + 1;
|
minBytes[3] = Collation::LEVEL_SEPARATOR_BYTE + 1;
|
||||||
maxBytes[3] = 0xff;
|
maxBytes[3] = 0xff;
|
||||||
minBytes[4] = 2;
|
minBytes[4] = 2;
|
||||||
maxBytes[4] = 0xff;
|
maxBytes[4] = 0xff;
|
||||||
@ -142,7 +142,7 @@ CollationWeights::initForTertiary() {
|
|||||||
maxBytes[2] = 0;
|
maxBytes[2] = 0;
|
||||||
// We use only 6 bits per byte.
|
// We use only 6 bits per byte.
|
||||||
// The other bits are used for case & quaternary weights.
|
// The other bits are used for case & quaternary weights.
|
||||||
minBytes[3] = Collation::MERGE_SEPARATOR_BYTE + 1;
|
minBytes[3] = Collation::LEVEL_SEPARATOR_BYTE + 1;
|
||||||
maxBytes[3] = 0x3f;
|
maxBytes[3] = 0x3f;
|
||||||
minBytes[4] = 2;
|
minBytes[4] = 2;
|
||||||
maxBytes[4] = 0x3f;
|
maxBytes[4] = 0x3f;
|
||||||
|
@ -114,6 +114,8 @@ private:
|
|||||||
UBool getCollationKey(const char *norm, const UnicodeString &line,
|
UBool getCollationKey(const char *norm, const UnicodeString &line,
|
||||||
const UChar *s, int32_t length,
|
const UChar *s, int32_t length,
|
||||||
CollationKey &key, IcuTestErrorCode &errorCode);
|
CollationKey &key, IcuTestErrorCode &errorCode);
|
||||||
|
UBool getMergedCollationKey(const UChar *s, int32_t length,
|
||||||
|
CollationKey &key, IcuTestErrorCode &errorCode);
|
||||||
UBool checkCompareTwo(const char *norm, const UnicodeString &prevFileLine,
|
UBool checkCompareTwo(const char *norm, const UnicodeString &prevFileLine,
|
||||||
const UnicodeString &prevString, const UnicodeString &s,
|
const UnicodeString &prevString, const UnicodeString &s,
|
||||||
UCollationResult expectedOrder, Collation::Level expectedLevel,
|
UCollationResult expectedOrder, Collation::Level expectedLevel,
|
||||||
@ -172,11 +174,9 @@ void CollationTest::TestMinMax() {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
int64_t ce = ces.elementAti(0);
|
int64_t ce = ces.elementAti(0);
|
||||||
int64_t expected =
|
int64_t expected = Collation::makeCE(Collation::MERGE_SEPARATOR_PRIMARY);
|
||||||
((int64_t)Collation::MERGE_SEPARATOR_PRIMARY << 32) |
|
|
||||||
Collation::MERGE_SEPARATOR_LOWER32;
|
|
||||||
if(ce != expected) {
|
if(ce != expected) {
|
||||||
errln("CE(U+fffe)=%04lx != 02.02.02", (long)ce);
|
errln("CE(U+fffe)=%04lx != 02..", (long)ce);
|
||||||
}
|
}
|
||||||
|
|
||||||
ce = ces.elementAti(1);
|
ce = ces.elementAti(1);
|
||||||
@ -617,11 +617,8 @@ UBool isValidCE(const CollationRootElements &re, const CollationData &data,
|
|||||||
}
|
}
|
||||||
// Minimum & maximum lead bytes.
|
// Minimum & maximum lead bytes.
|
||||||
if((p1 != 0 && p1 <= Collation::MERGE_SEPARATOR_BYTE) ||
|
if((p1 != 0 && p1 <= Collation::MERGE_SEPARATOR_BYTE) ||
|
||||||
(s1 != 0 && s1 <= Collation::MERGE_SEPARATOR_BYTE) ||
|
s1 == Collation::LEVEL_SEPARATOR_BYTE ||
|
||||||
(t1 != 0 && t1 <= Collation::MERGE_SEPARATOR_BYTE)) {
|
t1 == Collation::LEVEL_SEPARATOR_BYTE || t1 > 0x3f) {
|
||||||
return FALSE;
|
|
||||||
}
|
|
||||||
if(t1 != 0 && t1 > 0x3f) {
|
|
||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
if(c > 2) {
|
if(c > 2) {
|
||||||
@ -1372,7 +1369,39 @@ UBool CollationTest::getCollationKey(const char *norm, const UnicodeString &line
|
|||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If s contains U+FFFE, check that merged segments make the same key.
|
// Check that internalNextSortKeyPart() makes the same key, with several part sizes.
|
||||||
|
static const int32_t partSizes[] = { 32, 3, 1 };
|
||||||
|
for(int32_t psi = 0; psi < UPRV_LENGTHOF(partSizes); ++psi) {
|
||||||
|
int32_t partSize = partSizes[psi];
|
||||||
|
CharString parts;
|
||||||
|
if(!getSortKeyParts(s, length, parts, 32, errorCode)) {
|
||||||
|
infoln(fileTestName);
|
||||||
|
errln("Collator(%s).internalNextSortKeyPart(%d) failed: %s",
|
||||||
|
norm, (int)partSize, errorCode.errorName());
|
||||||
|
infoln(line);
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
if(keyLength != parts.length() || uprv_memcmp(keyBytes, parts.data(), keyLength) != 0) {
|
||||||
|
infoln(fileTestName);
|
||||||
|
errln("Collator(%s).getCollationKey() != internalNextSortKeyPart(%d)",
|
||||||
|
norm, (int)partSize);
|
||||||
|
infoln(line);
|
||||||
|
infoln(printCollationKey(key));
|
||||||
|
infoln(printSortKey(reinterpret_cast<uint8_t *>(parts.data()), parts.length()));
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Changes the key to the merged segments of the U+FFFE-separated substrings of s.
|
||||||
|
* Leaves key unchanged if s does not contain U+FFFE.
|
||||||
|
* @return TRUE if the key was successfully changed
|
||||||
|
*/
|
||||||
|
UBool CollationTest::getMergedCollationKey(const UChar *s, int32_t length,
|
||||||
|
CollationKey &key, IcuTestErrorCode &errorCode) {
|
||||||
|
if(errorCode.isFailure()) { return FALSE; }
|
||||||
LocalMemory<uint8_t> mergedKey;
|
LocalMemory<uint8_t> mergedKey;
|
||||||
int32_t mergedKeyLength = 0;
|
int32_t mergedKeyLength = 0;
|
||||||
int32_t mergedKeyCapacity = 0;
|
int32_t mergedKeyCapacity = 0;
|
||||||
@ -1382,7 +1411,7 @@ UBool CollationTest::getCollationKey(const char *norm, const UnicodeString &line
|
|||||||
if(i == sLength) {
|
if(i == sLength) {
|
||||||
if(segmentStart == 0) {
|
if(segmentStart == 0) {
|
||||||
// s does not contain any U+FFFE.
|
// s does not contain any U+FFFE.
|
||||||
break;
|
return FALSE;
|
||||||
}
|
}
|
||||||
} else if(s[i] != 0xfffe) {
|
} else if(s[i] != 0xfffe) {
|
||||||
++i;
|
++i;
|
||||||
@ -1423,41 +1452,7 @@ UBool CollationTest::getCollationKey(const char *norm, const UnicodeString &line
|
|||||||
if(i == sLength) { break; }
|
if(i == sLength) { break; }
|
||||||
segmentStart = ++i;
|
segmentStart = ++i;
|
||||||
}
|
}
|
||||||
if(segmentStart != 0 &&
|
key = CollationKey(mergedKey.getAlias(), mergedKeyLength);
|
||||||
(mergedKeyLength != keyLength ||
|
|
||||||
uprv_memcmp(mergedKey.getAlias(), keyBytes, keyLength) != 0)) {
|
|
||||||
infoln(fileTestName);
|
|
||||||
errln("Collator(%s).getCollationKey(with U+FFFE) != "
|
|
||||||
"ucol_mergeSortkeys(segments)",
|
|
||||||
norm);
|
|
||||||
infoln(line);
|
|
||||||
infoln(printCollationKey(key));
|
|
||||||
infoln(printSortKey(mergedKey.getAlias(), mergedKeyLength));
|
|
||||||
return FALSE;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check that internalNextSortKeyPart() makes the same key, with several part sizes.
|
|
||||||
static const int32_t partSizes[] = { 32, 3, 1 };
|
|
||||||
for(int32_t psi = 0; psi < UPRV_LENGTHOF(partSizes); ++psi) {
|
|
||||||
int32_t partSize = partSizes[psi];
|
|
||||||
CharString parts;
|
|
||||||
if(!getSortKeyParts(s, length, parts, 32, errorCode)) {
|
|
||||||
infoln(fileTestName);
|
|
||||||
errln("Collator(%s).internalNextSortKeyPart(%d) failed: %s",
|
|
||||||
norm, (int)partSize, errorCode.errorName());
|
|
||||||
infoln(line);
|
|
||||||
return FALSE;
|
|
||||||
}
|
|
||||||
if(keyLength != parts.length() || uprv_memcmp(keyBytes, parts.data(), keyLength) != 0) {
|
|
||||||
infoln(fileTestName);
|
|
||||||
errln("Collator(%s).getCollationKey() != internalNextSortKeyPart(%d)",
|
|
||||||
norm, (int)partSize);
|
|
||||||
infoln(line);
|
|
||||||
infoln(printCollationKey(key));
|
|
||||||
infoln(printSortKey(reinterpret_cast<uint8_t *>(parts.data()), parts.length()));
|
|
||||||
return FALSE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return TRUE;
|
return TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1488,6 +1483,29 @@ const UnicodeString &surrogatesToFFFD(const UnicodeString &s, UnicodeString &buf
|
|||||||
return buffer;
|
return buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int32_t getDifferenceLevel(const CollationKey &prevKey, const CollationKey &key,
|
||||||
|
UCollationResult order, UBool collHasCaseLevel) {
|
||||||
|
if(order == UCOL_EQUAL) {
|
||||||
|
return Collation::NO_LEVEL;
|
||||||
|
}
|
||||||
|
int32_t prevKeyLength;
|
||||||
|
const uint8_t *prevBytes = prevKey.getByteArray(prevKeyLength);
|
||||||
|
int32_t keyLength;
|
||||||
|
const uint8_t *bytes = key.getByteArray(keyLength);
|
||||||
|
int32_t level = Collation::PRIMARY_LEVEL;
|
||||||
|
for(int32_t i = 0;; ++i) {
|
||||||
|
uint8_t b = prevBytes[i];
|
||||||
|
if(b != bytes[i]) { break; }
|
||||||
|
if(b == Collation::LEVEL_SEPARATOR_BYTE) {
|
||||||
|
++level;
|
||||||
|
if(level == Collation::CASE_LEVEL && !collHasCaseLevel) {
|
||||||
|
++level;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return level;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
UBool CollationTest::checkCompareTwo(const char *norm, const UnicodeString &prevFileLine,
|
UBool CollationTest::checkCompareTwo(const char *norm, const UnicodeString &prevFileLine,
|
||||||
@ -1649,23 +1667,9 @@ UBool CollationTest::checkCompareTwo(const char *norm, const UnicodeString &prev
|
|||||||
infoln(printCollationKey(key));
|
infoln(printCollationKey(key));
|
||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
|
UBool collHasCaseLevel = coll->getAttribute(UCOL_CASE_LEVEL, errorCode) == UCOL_ON;
|
||||||
|
int32_t level = getDifferenceLevel(prevKey, key, order, collHasCaseLevel);
|
||||||
if(order != UCOL_EQUAL && expectedLevel != Collation::NO_LEVEL) {
|
if(order != UCOL_EQUAL && expectedLevel != Collation::NO_LEVEL) {
|
||||||
int32_t prevKeyLength;
|
|
||||||
const uint8_t *prevBytes = prevKey.getByteArray(prevKeyLength);
|
|
||||||
int32_t keyLength;
|
|
||||||
const uint8_t *bytes = key.getByteArray(keyLength);
|
|
||||||
int32_t level = Collation::PRIMARY_LEVEL;
|
|
||||||
for(int32_t i = 0;; ++i) {
|
|
||||||
uint8_t b = prevBytes[i];
|
|
||||||
if(b != bytes[i]) { break; }
|
|
||||||
if(b == Collation::LEVEL_SEPARATOR_BYTE) {
|
|
||||||
++level;
|
|
||||||
if(level == Collation::CASE_LEVEL &&
|
|
||||||
coll->getAttribute(UCOL_CASE_LEVEL, errorCode) == UCOL_OFF) {
|
|
||||||
++level;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if(level != expectedLevel) {
|
if(level != expectedLevel) {
|
||||||
infoln(fileTestName);
|
infoln(fileTestName);
|
||||||
errln("line %d Collator(%s).getCollationKey(previous, current).compareTo()=%d wrong level: %d != %d",
|
errln("line %d Collator(%s).getCollationKey(previous, current).compareTo()=%d wrong level: %d != %d",
|
||||||
@ -1677,6 +1681,45 @@ UBool CollationTest::checkCompareTwo(const char *norm, const UnicodeString &prev
|
|||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If either string contains U+FFFE, then their sort keys must compare the same as
|
||||||
|
// the merged sort keys of each string's between-FFFE segments.
|
||||||
|
//
|
||||||
|
// It is not required that
|
||||||
|
// sortkey(str1 + "\uFFFE" + str2) == mergeSortkeys(sortkey(str1), sortkey(str2))
|
||||||
|
// only that those two methods yield the same order.
|
||||||
|
//
|
||||||
|
// Use bit-wise OR so that getMergedCollationKey() is always called for both strings.
|
||||||
|
if((getMergedCollationKey(prevString.getBuffer(), prevString.length(), prevKey, errorCode) |
|
||||||
|
getMergedCollationKey(s.getBuffer(), s.length(), key, errorCode)) ||
|
||||||
|
errorCode.isFailure()) {
|
||||||
|
order = prevKey.compareTo(key, errorCode);
|
||||||
|
if(order != expectedOrder || errorCode.isFailure()) {
|
||||||
|
infoln(fileTestName);
|
||||||
|
errln("line %d ucol_mergeSortkeys(Collator(%s).getCollationKey"
|
||||||
|
"(previous, current segments between U+FFFE)).compareTo() wrong order: %d != %d (%s)",
|
||||||
|
(int)fileLineNumber, norm, order, expectedOrder, errorCode.errorName());
|
||||||
|
infoln(prevFileLine);
|
||||||
|
infoln(fileLine);
|
||||||
|
infoln(printCollationKey(prevKey));
|
||||||
|
infoln(printCollationKey(key));
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
int32_t mergedLevel = getDifferenceLevel(prevKey, key, order, collHasCaseLevel);
|
||||||
|
if(order != UCOL_EQUAL && expectedLevel != Collation::NO_LEVEL) {
|
||||||
|
if(mergedLevel != level) {
|
||||||
|
infoln(fileTestName);
|
||||||
|
errln("line %d ucol_mergeSortkeys(Collator(%s).getCollationKey"
|
||||||
|
"(previous, current segments between U+FFFE)).compareTo()=%d wrong level: %d != %d",
|
||||||
|
(int)fileLineNumber, norm, order, mergedLevel, level);
|
||||||
|
infoln(prevFileLine);
|
||||||
|
infoln(fileLine);
|
||||||
|
infoln(printCollationKey(prevKey));
|
||||||
|
infoln(printCollationKey(key));
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
return TRUE;
|
return TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user