ICU-4350 Upgrade ICU4C to UCA 4.1
X-SVN-Rev: 17622
This commit is contained in:
parent
5e1a113aba
commit
32354b1c86
@ -137,6 +137,7 @@ inline void IInit_collIterate(const UCollator *collator, const UChar *sourceStr
|
|||||||
}
|
}
|
||||||
(s)->iterator = NULL;
|
(s)->iterator = NULL;
|
||||||
//(s)->iteratorIndex = 0;
|
//(s)->iteratorIndex = 0;
|
||||||
|
(s)->consumedChars = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
U_CAPI void U_EXPORT2
|
U_CAPI void U_EXPORT2
|
||||||
@ -174,6 +175,7 @@ inline void backupState(const collIterate *data, collIterateState *backup)
|
|||||||
data->iterator->move(data->iterator, backup->iteratorMove, UITER_CURRENT);
|
data->iterator->move(data->iterator, backup->iteratorMove, UITER_CURRENT);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
backup->consumedChars = data->consumedChars;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -231,6 +233,7 @@ inline void loadState(collIterate *data, const collIterateState *backup,
|
|||||||
*/
|
*/
|
||||||
data->fcdPosition = backup->fcdPosition;
|
data->fcdPosition = backup->fcdPosition;
|
||||||
}
|
}
|
||||||
|
data->consumedChars = backup->consumedChars;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -528,24 +531,6 @@ void ucol_setOptionsFromHeader(UCollator* result, UColOptionSet * opts, UErrorCo
|
|||||||
result->options = opts;
|
result->options = opts;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
|
||||||
// doesn't look like anybody is using this
|
|
||||||
void ucol_putOptionsToHeader(UCollator* result, UColOptionSet * opts, UErrorCode *status) {
|
|
||||||
if(U_FAILURE(*status)) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
opts->caseFirst = result->caseFirst;
|
|
||||||
opts->caseLevel = result->caseLevel;
|
|
||||||
opts->frenchCollation = result->frenchCollation;
|
|
||||||
opts->normalizationMode = result->normalizationMode;
|
|
||||||
opts->strength = result->strength;
|
|
||||||
opts->variableTopValue = result->variableTopValue;
|
|
||||||
opts->alternateHandling = result->alternateHandling;
|
|
||||||
opts->hiraganaQ = result->hiraganaQ;
|
|
||||||
opts->numericCollation = result->numericCollation;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Approximate determination if a character is at a contraction end.
|
* Approximate determination if a character is at a contraction end.
|
||||||
@ -556,7 +541,7 @@ void ucol_putOptionsToHeader(UCollator* result, UColOptionSet * opts, UErrorCode
|
|||||||
*/
|
*/
|
||||||
static
|
static
|
||||||
inline UBool ucol_contractionEndCP(UChar c, const UCollator *coll) {
|
inline UBool ucol_contractionEndCP(UChar c, const UCollator *coll) {
|
||||||
if (UTF_IS_TRAIL(c)) {
|
if (U16_IS_TRAIL(c)) {
|
||||||
return TRUE;
|
return TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -582,9 +567,9 @@ inline UBool ucol_contractionEndCP(UChar c, const UCollator *coll) {
|
|||||||
* in contraction processing.
|
* in contraction processing.
|
||||||
*/
|
*/
|
||||||
static
|
static
|
||||||
inline uint8_t i_getCombiningClass(UChar c, const UCollator *coll) {
|
inline uint8_t i_getCombiningClass(UChar32 c, const UCollator *coll) {
|
||||||
uint8_t sCC = 0;
|
uint8_t sCC = 0;
|
||||||
if (c >= 0x300 && ucol_unsafeCP(c, coll)) {
|
if ((c >= 0x300 && ucol_unsafeCP(c, coll)) || c > 0xFFFF) {
|
||||||
sCC = u_getCombiningClass(c);
|
sCC = u_getCombiningClass(c);
|
||||||
}
|
}
|
||||||
return sCC;
|
return sCC;
|
||||||
@ -1259,8 +1244,8 @@ inline UBool collIterFCD(collIterate *collationSource) {
|
|||||||
/* trie access */
|
/* trie access */
|
||||||
fcd = unorm_getFCD16(fcdTrieIndex, c);
|
fcd = unorm_getFCD16(fcdTrieIndex, c);
|
||||||
if (fcd != 0) {
|
if (fcd != 0) {
|
||||||
if (UTF_IS_FIRST_SURROGATE(c)) {
|
if (U16_IS_LEAD(c)) {
|
||||||
if ((endP == NULL || srcP != endP) && UTF_IS_SECOND_SURROGATE(c2=*srcP)) {
|
if ((endP == NULL || srcP != endP) && U16_IS_TRAIL(c2=*srcP)) {
|
||||||
++srcP;
|
++srcP;
|
||||||
fcd = unorm_getFCD16FromSurrogatePair(fcdTrieIndex, fcd, c2);
|
fcd = unorm_getFCD16FromSurrogatePair(fcdTrieIndex, fcd, c2);
|
||||||
} else {
|
} else {
|
||||||
@ -1280,8 +1265,8 @@ inline UBool collIterFCD(collIterate *collationSource) {
|
|||||||
c = *srcP++;
|
c = *srcP++;
|
||||||
/* trie access */
|
/* trie access */
|
||||||
fcd = unorm_getFCD16(fcdTrieIndex, c);
|
fcd = unorm_getFCD16(fcdTrieIndex, c);
|
||||||
if (fcd != 0 && UTF_IS_FIRST_SURROGATE(c)) {
|
if (fcd != 0 && U16_IS_LEAD(c)) {
|
||||||
if ((endP == NULL || srcP != endP) && UTF_IS_SECOND_SURROGATE(c2=*srcP)) {
|
if ((endP == NULL || srcP != endP) && U16_IS_TRAIL(c2=*srcP)) {
|
||||||
++srcP;
|
++srcP;
|
||||||
fcd = unorm_getFCD16FromSurrogatePair(fcdTrieIndex, fcd, c2);
|
fcd = unorm_getFCD16FromSurrogatePair(fcdTrieIndex, fcd, c2);
|
||||||
} else {
|
} else {
|
||||||
@ -1330,6 +1315,7 @@ inline uint32_t ucol_IGetNextCE(const UCollator *coll, collIterate *collationSou
|
|||||||
}
|
}
|
||||||
|
|
||||||
UChar ch = 0;
|
UChar ch = 0;
|
||||||
|
collationSource->consumedChars = 0;
|
||||||
|
|
||||||
for (;;) /* Loop handles case when incremental normalize switches */
|
for (;;) /* Loop handles case when incremental normalize switches */
|
||||||
{ /* to or from the side buffer / original string, and we */
|
{ /* to or from the side buffer / original string, and we */
|
||||||
@ -1568,9 +1554,9 @@ inline UBool collPrevIterFCD(collIterate *data)
|
|||||||
|
|
||||||
/* Get the trailing combining class of the current character. */
|
/* Get the trailing combining class of the current character. */
|
||||||
c = *--src;
|
c = *--src;
|
||||||
if (!UTF_IS_SURROGATE(c)) {
|
if (!U16_IS_SURROGATE(c)) {
|
||||||
fcd = unorm_getFCD16(fcdTrieIndex, c);
|
fcd = unorm_getFCD16(fcdTrieIndex, c);
|
||||||
} else if (UTF_IS_SECOND_SURROGATE(c) && start < src && UTF_IS_FIRST_SURROGATE(c2 = *(src - 1))) {
|
} else if (U16_IS_TRAIL(c) && start < src && U16_IS_LEAD(c2 = *(src - 1))) {
|
||||||
--src;
|
--src;
|
||||||
fcd = unorm_getFCD16(fcdTrieIndex, c2);
|
fcd = unorm_getFCD16(fcdTrieIndex, c2);
|
||||||
if (fcd != 0) {
|
if (fcd != 0) {
|
||||||
@ -1595,9 +1581,9 @@ inline UBool collPrevIterFCD(collIterate *data)
|
|||||||
}
|
}
|
||||||
|
|
||||||
c = *--src;
|
c = *--src;
|
||||||
if (!UTF_IS_SURROGATE(c)) {
|
if (!U16_IS_SURROGATE(c)) {
|
||||||
fcd = unorm_getFCD16(fcdTrieIndex, c);
|
fcd = unorm_getFCD16(fcdTrieIndex, c);
|
||||||
} else if (UTF_IS_SECOND_SURROGATE(c) && start < src && UTF_IS_FIRST_SURROGATE(c2 = *(src - 1))) {
|
} else if (U16_IS_TRAIL(c) && start < src && U16_IS_LEAD(c2 = *(src - 1))) {
|
||||||
--src;
|
--src;
|
||||||
fcd = unorm_getFCD16(fcdTrieIndex, c2);
|
fcd = unorm_getFCD16(fcdTrieIndex, c2);
|
||||||
if (fcd != 0) {
|
if (fcd != 0) {
|
||||||
@ -1817,75 +1803,34 @@ inline uint32_t ucol_IGetPrevCE(const UCollator *coll, collIterate *data,
|
|||||||
contraction
|
contraction
|
||||||
*/
|
*/
|
||||||
if (ucol_contractionEndCP(ch, coll) && !isAtStartPrevIterate(data)) {
|
if (ucol_contractionEndCP(ch, coll) && !isAtStartPrevIterate(data)) {
|
||||||
result = ucol_prv_getSpecialPrevCE(coll, ch, UCOL_CONTRACTION, data, status);
|
result = ucol_prv_getSpecialPrevCE(coll, ch, UCOL_CONTRACTION, data, status);
|
||||||
}
|
} else {
|
||||||
else {
|
if (ch <= 0xFF) {
|
||||||
// TODO: fix me for THAI - I reference *(data->pos-1)
|
result = coll->latinOneMapping[ch];
|
||||||
if ((data->flags & UCOL_ITER_INNORMBUF) == 0 &&
|
}
|
||||||
/*UCOL_ISTHAIBASECONSONANT(ch) &&*/ // This is from the old specs - we now rearrange unconditionally
|
else {
|
||||||
// makes sure that we're not at the beggining of the string
|
result = UTRIE_GET32_FROM_LEAD(coll->mapping, ch);
|
||||||
//data->pos > data->string &&
|
}
|
||||||
!collIter_bos(data) &&
|
if (result > UCOL_NOT_FOUND) {
|
||||||
UCOL_ISTHAIPREVOWEL(peekCharacter(data, -1)))
|
result = ucol_prv_getSpecialPrevCE(coll, ch, result, data, status);
|
||||||
//UCOL_ISTHAIPREVOWEL(*(data->pos -1)))
|
}
|
||||||
{
|
if (result == UCOL_NOT_FOUND) {
|
||||||
collIterateState entryState;
|
if (!isAtStartPrevIterate(data) &&
|
||||||
backupState(data, &entryState);
|
ucol_contractionEndCP(ch, data->coll)) {
|
||||||
// we have to check if the previous character is also Thai
|
result = UCOL_CONTRACTION;
|
||||||
// if not, we can just set the result
|
}
|
||||||
goBackOne(data);
|
else {
|
||||||
if(collIter_bos(data) || !UCOL_ISTHAIPREVOWEL(peekCharacter(data, -1))) {
|
if(coll->UCA) {
|
||||||
loadState(data, &entryState, FALSE);
|
result = UTRIE_GET32_FROM_LEAD(coll->UCA->mapping, ch);
|
||||||
result = UCOL_THAI;
|
}
|
||||||
} else { // previous is also reordered
|
|
||||||
// we need to go back as long as they are being reordered
|
|
||||||
// count over the range of reorderable characters and see
|
|
||||||
// if there is an even or odd number of them
|
|
||||||
// if even, we should not reorder. If odd we should reorder.
|
|
||||||
int32_t noReordered = 1; // the one we already detected
|
|
||||||
while(!collIter_bos(data) && UCOL_ISTHAIPREVOWEL(peekCharacter(data, -1))) {
|
|
||||||
noReordered++;
|
|
||||||
goBackOne(data);
|
|
||||||
}
|
|
||||||
if(noReordered & 1) { // odd number of reorderables
|
|
||||||
result = UCOL_THAI;
|
|
||||||
} else {
|
|
||||||
result = UTRIE_GET32_FROM_LEAD(coll->mapping, ch);
|
|
||||||
}
|
|
||||||
loadState(data, &entryState, FALSE);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (ch <= 0xFF) {
|
|
||||||
result = coll->latinOneMapping[ch];
|
|
||||||
//if (result > UCOL_NOT_FOUND) {
|
|
||||||
//result = ucol_prv_getSpecialPrevCE(coll, ch, result, data, status);
|
|
||||||
//}
|
|
||||||
}
|
}
|
||||||
else {
|
|
||||||
/*result = ucmpe32_get(coll->mapping, ch);*/
|
|
||||||
result = UTRIE_GET32_FROM_LEAD(coll->mapping, ch);
|
|
||||||
}
|
|
||||||
if (result > UCOL_NOT_FOUND) {
|
|
||||||
result = ucol_prv_getSpecialPrevCE(coll, ch, result, data, status);
|
|
||||||
}
|
|
||||||
if (result == UCOL_NOT_FOUND) {
|
|
||||||
if (!isAtStartPrevIterate(data) &&
|
|
||||||
ucol_contractionEndCP(ch, data->coll)) {
|
|
||||||
result = UCOL_CONTRACTION;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
/*result = ucmpe32_get(UCA->mapping, ch);*/
|
|
||||||
if(coll->UCA) {
|
|
||||||
result = UTRIE_GET32_FROM_LEAD(coll->UCA->mapping, ch);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (result > UCOL_NOT_FOUND && coll->UCA) {
|
if (result > UCOL_NOT_FOUND && coll->UCA) {
|
||||||
result = ucol_prv_getSpecialPrevCE(coll->UCA, ch, result, data, status);
|
result = ucol_prv_getSpecialPrevCE(coll->UCA, ch, result, data, status);
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2613,7 +2558,7 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
|
|||||||
UChar trail;
|
UChar trail;
|
||||||
collIterateState state;
|
collIterateState state;
|
||||||
backupState(source, &state);
|
backupState(source, &state);
|
||||||
if (collIter_eos(source) || !(UTF16_IS_TRAIL((trail = getNextNormalizedChar(source))))) {
|
if (collIter_eos(source) || !(U16_IS_TRAIL((trail = getNextNormalizedChar(source))))) {
|
||||||
// we chould have stepped one char forward and it might have turned that it
|
// we chould have stepped one char forward and it might have turned that it
|
||||||
// was not a trail surrogate. In that case, we have to backup.
|
// was not a trail surrogate. In that case, we have to backup.
|
||||||
loadState(source, &state, TRUE);
|
loadState(source, &state, TRUE);
|
||||||
@ -2631,93 +2576,6 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case THAI_TAG:
|
|
||||||
/* Thai/Lao reordering */
|
|
||||||
if (((source->flags) & UCOL_ITER_INNORMBUF) /* Already Swapped || */
|
|
||||||
|| collIter_eos(source)) /* At end of string. No swap possible */
|
|
||||||
{
|
|
||||||
// Treat Thai as a length one expansion */
|
|
||||||
CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE); /* find the offset to expansion table */
|
|
||||||
CE = *CEOffset++;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// Move the prevowel and the following base Consonant into the normalization buffer
|
|
||||||
// with their order swapped
|
|
||||||
// Note: this operation might activate the normalization buffer. We have to check for
|
|
||||||
// that and act accordingly.
|
|
||||||
UChar thCh = getNextNormalizedChar(source);
|
|
||||||
UChar32 cp = 0;
|
|
||||||
if(U16_IS_LEAD(thCh)) {
|
|
||||||
if(!collIter_eos(source)) {
|
|
||||||
collIterateState thaiState;
|
|
||||||
backupState(source, &thaiState);
|
|
||||||
UChar trailCh = getNextNormalizedChar(source);
|
|
||||||
if(U16_IS_TRAIL(trailCh)) {
|
|
||||||
cp = U16_GET_SUPPLEMENTARY(thCh, trailCh);
|
|
||||||
} else {
|
|
||||||
loadState(source, &thaiState, TRUE);
|
|
||||||
cp = (UChar32)thCh;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
cp = (UChar32)thCh;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
cp = (UChar32)thCh;
|
|
||||||
}
|
|
||||||
// Now we have the character that needs to be decomposed
|
|
||||||
// if the normalizing buffer was not used, we can just use our structure and be happy.
|
|
||||||
if((source->flags & UCOL_ITER_INNORMBUF) == 0) {
|
|
||||||
// decompose into writable buffer
|
|
||||||
int32_t decompLen = unorm_getDecomposition(cp, FALSE, &(source->writableBuffer[1]), UCOL_WRITABLE_BUFFER_SIZE-1);
|
|
||||||
if(decompLen < 0) {
|
|
||||||
decompLen = -decompLen;
|
|
||||||
}
|
|
||||||
// reorder Thai and the character after it
|
|
||||||
if(decompLen >= 2 && U16_IS_LEAD(source->writableBuffer[1]) && U16_IS_TRAIL(source->writableBuffer[2])) {
|
|
||||||
source->writableBuffer[0] = source->writableBuffer[1];
|
|
||||||
source->writableBuffer[1] = source->writableBuffer[2];
|
|
||||||
source->writableBuffer[2] = ch;
|
|
||||||
} else {
|
|
||||||
source->writableBuffer[0] = source->writableBuffer[1];
|
|
||||||
source->writableBuffer[1] = ch;
|
|
||||||
}
|
|
||||||
// zero terminate, since normalization buffer is always zero terminated
|
|
||||||
source->writableBuffer[decompLen+1] = 0; // we added the prevowel
|
|
||||||
if(source->pos) {
|
|
||||||
source->fcdPosition = source->pos; // Indicate where to continue in main input string
|
|
||||||
// after exhausting the writableBuffer
|
|
||||||
}
|
|
||||||
source->pos = source->writableBuffer;
|
|
||||||
source->origFlags = source->flags;
|
|
||||||
source->flags |= UCOL_ITER_INNORMBUF;
|
|
||||||
source->flags &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN | UCOL_USE_ITERATOR);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
// stuff is already normalized... what to do here???
|
|
||||||
|
|
||||||
// if we are in the normalization buffer, thCh must be in it
|
|
||||||
// prove by contradiction
|
|
||||||
// if thCh is not in the normalization buffer,
|
|
||||||
// that means that trailCh is the normalization buffer
|
|
||||||
// that means that trailCh is a trail surrogate by the above
|
|
||||||
// bounding if block, this is a contradiction because there
|
|
||||||
// are no characters at the moment that decomposes to an
|
|
||||||
// unmatched surrogate. qed.
|
|
||||||
if (cp >= 0x10000) {
|
|
||||||
source->writableBuffer[0] = source->writableBuffer[1];
|
|
||||||
source->writableBuffer[1] = source->writableBuffer[2];
|
|
||||||
source->writableBuffer[2] = ch;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
source->writableBuffer[0] = source->writableBuffer[1];
|
|
||||||
source->writableBuffer[1] = ch;
|
|
||||||
}
|
|
||||||
source->pos = source->writableBuffer;
|
|
||||||
}
|
|
||||||
CE = UCOL_IGNORABLE;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case SPEC_PROC_TAG:
|
case SPEC_PROC_TAG:
|
||||||
{
|
{
|
||||||
// Special processing is getting a CE that is preceded by a certain prefix
|
// Special processing is getting a CE that is preceded by a certain prefix
|
||||||
@ -2759,42 +2617,6 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// if there is a completely ignorable code point in the middle of
|
|
||||||
// a prefix, we need to act as if it's not there
|
|
||||||
// assumption: 'real' noncharacters (*fffe, *ffff, fdd0-fdef are set to zero)
|
|
||||||
// lone surrogates cannot be set to zero as it would break other processing
|
|
||||||
uint32_t isZeroCE = UTRIE_GET32_FROM_LEAD(coll->mapping, schar);
|
|
||||||
// it's easy for BMP code points
|
|
||||||
if(isZeroCE == 0) {
|
|
||||||
continue;
|
|
||||||
} else if(UTF_IS_TRAIL(schar) || UTF_IS_LEAD(schar)) {
|
|
||||||
// for supplementary code points, we have to check the next one
|
|
||||||
// situations where we are going to ignore
|
|
||||||
// 1. beginning of the string: schar is a lone surrogate
|
|
||||||
// 2. schar is a lone surrogate
|
|
||||||
// 3. schar is a trail surrogate in a valid surrogate sequence
|
|
||||||
// that is explicitly set to zero.
|
|
||||||
if (!collIter_bos(source)) {
|
|
||||||
UChar lead;
|
|
||||||
if(UTF_IS_LEAD(lead = getPrevNormalizedChar(source))) {
|
|
||||||
isZeroCE = UTRIE_GET32_FROM_LEAD(coll->mapping, lead);
|
|
||||||
if(getCETag(isZeroCE) == SURROGATE_TAG) {
|
|
||||||
uint32_t finalCE = UTRIE_GET32_FROM_OFFSET_TRAIL(coll->mapping, isZeroCE&0xFFFFFF, schar);
|
|
||||||
if(finalCE == 0) {
|
|
||||||
// this is a real, assigned completely ignorable code point
|
|
||||||
goBackOne(source);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// lone surrogate, completely ignorable
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// lone surrogate at the beggining, completely ignorable
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Source string char was not in the table.
|
// Source string char was not in the table.
|
||||||
// We have not found the prefix.
|
// We have not found the prefix.
|
||||||
CE = *(coll->contractionCEs +
|
CE = *(coll->contractionCEs +
|
||||||
@ -2864,45 +2686,23 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
|
|||||||
// Pick up the corresponding CE from the table.
|
// Pick up the corresponding CE from the table.
|
||||||
CE = *(coll->contractionCEs +
|
CE = *(coll->contractionCEs +
|
||||||
(UCharOffset - coll->contractionIndex));
|
(UCharOffset - coll->contractionIndex));
|
||||||
|
source->consumedChars++;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// if there is a completely ignorable code point in the middle of
|
|
||||||
// contraction, we need to act as if it's not there
|
|
||||||
uint32_t isZeroCE = UTRIE_GET32_FROM_LEAD(coll->mapping, schar);
|
|
||||||
// it's easy for BMP code points
|
|
||||||
if(isZeroCE == 0) {
|
|
||||||
continue;
|
|
||||||
} else if(UTF_IS_LEAD(schar)) {
|
|
||||||
if(!collIter_eos(source)) {
|
|
||||||
backupState(source, &state);
|
|
||||||
UChar trail = getNextNormalizedChar(source);
|
|
||||||
if(UTF_IS_TRAIL(trail)) { // do stuff with trail
|
|
||||||
if(getCETag(isZeroCE) == SURROGATE_TAG) {
|
|
||||||
uint32_t finalCE = UTRIE_GET32_FROM_OFFSET_TRAIL(coll->mapping, isZeroCE&0xFFFFFF, trail);
|
|
||||||
if(finalCE == 0) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// broken surrogate sequence, thus completely ignorable
|
|
||||||
loadState(source, &state, TRUE);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
loadState(source, &state, TRUE);
|
|
||||||
} else { // no more characters, so broken surrogate pair...
|
|
||||||
// this contraction will ultimately fail, but not because of us
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
} // else if(UTF_IS_LEAD(schar))
|
|
||||||
|
|
||||||
// Source string char was not in contraction table.
|
// Source string char was not in contraction table.
|
||||||
// Unless we have a discontiguous contraction, we have finished
|
// Unless we have a discontiguous contraction, we have finished
|
||||||
// with this contraction.
|
// with this contraction.
|
||||||
|
UChar32 miss = schar;
|
||||||
|
if(U16_IS_LEAD(schar)) { // in order to do the proper detection, we
|
||||||
|
// need to see if we're dealing with a supplementary
|
||||||
|
miss = U16_GET_SUPPLEMENTARY(schar, getNextNormalizedChar(source));
|
||||||
|
}
|
||||||
|
|
||||||
uint8_t sCC;
|
uint8_t sCC;
|
||||||
if (schar < 0x300 ||
|
if (miss < 0x300 ||
|
||||||
maxCC == 0 ||
|
maxCC == 0 ||
|
||||||
(sCC = i_getCombiningClass(schar, coll)) == 0 ||
|
(sCC = i_getCombiningClass(miss, coll)) == 0 ||
|
||||||
sCC>maxCC ||
|
sCC>maxCC ||
|
||||||
(allSame != 0 && sCC == maxCC) ||
|
(allSame != 0 && sCC == maxCC) ||
|
||||||
collIter_eos(source)) {
|
collIter_eos(source)) {
|
||||||
@ -2910,6 +2710,9 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
|
|||||||
goBackOne(source); // back up the source string by one,
|
goBackOne(source); // back up the source string by one,
|
||||||
// because the character we just looked at was
|
// because the character we just looked at was
|
||||||
// not part of the contraction. */
|
// not part of the contraction. */
|
||||||
|
if(U_IS_SUPPLEMENTARY(miss)) {
|
||||||
|
goBackOne(source);
|
||||||
|
}
|
||||||
CE = *(coll->contractionCEs +
|
CE = *(coll->contractionCEs +
|
||||||
(ContractionStart - coll->contractionIndex));
|
(ContractionStart - coll->contractionIndex));
|
||||||
} else {
|
} else {
|
||||||
@ -2921,9 +2724,13 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
|
|||||||
/* find the next character if schar is not a base character
|
/* find the next character if schar is not a base character
|
||||||
and we are not yet at the end of the string */
|
and we are not yet at the end of the string */
|
||||||
tempchar = getNextNormalizedChar(source);
|
tempchar = getNextNormalizedChar(source);
|
||||||
|
// probably need another supplementary thingie here
|
||||||
goBackOne(source);
|
goBackOne(source);
|
||||||
if (i_getCombiningClass(tempchar, coll) == 0) {
|
if (i_getCombiningClass(tempchar, coll) == 0) {
|
||||||
goBackOne(source);
|
goBackOne(source);
|
||||||
|
if(U_IS_SUPPLEMENTARY(miss)) {
|
||||||
|
goBackOne(source);
|
||||||
|
}
|
||||||
/* Spit out the last char of the string, wasn't tasty enough */
|
/* Spit out the last char of the string, wasn't tasty enough */
|
||||||
CE = *(coll->contractionCEs +
|
CE = *(coll->contractionCEs +
|
||||||
(ContractionStart - coll->contractionIndex));
|
(ContractionStart - coll->contractionIndex));
|
||||||
@ -3217,20 +3024,6 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
|
|||||||
CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE); /* find the offset to expansion table */
|
CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE); /* find the offset to expansion table */
|
||||||
CE = *CEOffset++;
|
CE = *CEOffset++;
|
||||||
break;
|
break;
|
||||||
#if 0
|
|
||||||
CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE); /* find the offset to expansion table */
|
|
||||||
size = getExpansionCount(CE);
|
|
||||||
CE = *CEOffset++;
|
|
||||||
if(size != 0) { /* if there are less than 16 elements in expansion, we don't terminate */
|
|
||||||
for(i = 1; i<size; i++) {
|
|
||||||
*(source->CEpos++) = *CEOffset++;
|
|
||||||
}
|
|
||||||
} else { /* else, we do */
|
|
||||||
while(*CEOffset != 0) {
|
|
||||||
*(source->CEpos++) = *CEOffset++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
return CE;
|
return CE;
|
||||||
}
|
}
|
||||||
@ -3395,78 +3188,6 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
|
|||||||
/* if you have encountered it here, it means that a */
|
/* if you have encountered it here, it means that a */
|
||||||
/* broken sequence was encountered and this is an error */
|
/* broken sequence was encountered and this is an error */
|
||||||
return 0;
|
return 0;
|
||||||
case THAI_TAG:
|
|
||||||
if ((source->flags & UCOL_ITER_INNORMBUF) || /* Already Swapped || */
|
|
||||||
source->string == source->pos || /* At start of string.|| */
|
|
||||||
/* previous char not Thai prevowel */
|
|
||||||
/*UCOL_ISTHAIBASECONSONANT(*(source->pos)) == FALSE ||*/ // This is from the old specs - we now rearrange unconditionally
|
|
||||||
UCOL_ISTHAIPREVOWEL(peekCharacter(source, -1)) == FALSE)
|
|
||||||
//UCOL_ISTHAIPREVOWEL(*(source->pos - 1)) == FALSE)
|
|
||||||
{
|
|
||||||
/* Treat Thai as a length one expansion */
|
|
||||||
/* find the offset to expansion table */
|
|
||||||
CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE);
|
|
||||||
CE = *CEOffset ++;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
Move the prevowel and the following base Consonant into the
|
|
||||||
normalization buffer with their order swapped
|
|
||||||
*/
|
|
||||||
UChar32 cp = (UChar32)peekCharacter(source, 0);
|
|
||||||
UBool reorder = TRUE;
|
|
||||||
|
|
||||||
int32_t decompLen = unorm_getDecomposition(cp, FALSE, source->writableBuffer, UCOL_WRITABLE_BUFFER_SIZE-1);
|
|
||||||
if(decompLen < 0) {
|
|
||||||
decompLen = -decompLen; // there was no decomposition
|
|
||||||
} else { // we need to check if we will hit a contraction trigger because of decomposition
|
|
||||||
int32_t i = decompLen;
|
|
||||||
for(i = 0; i < decompLen; i++) {
|
|
||||||
if(ucol_contractionEndCP(source->writableBuffer[i], coll)) {
|
|
||||||
reorder = FALSE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
UChar *tempbuffer = source->writableBuffer +
|
|
||||||
(source->writableBufSize - 1);
|
|
||||||
uprv_memcpy(tempbuffer-decompLen + 1, source->writableBuffer, sizeof(UChar)*decompLen);
|
|
||||||
if(reorder) {
|
|
||||||
*(tempbuffer - decompLen) = *(tempbuffer - decompLen + 1);
|
|
||||||
*(tempbuffer - decompLen + 1) = peekCharacter(source, -1);
|
|
||||||
} else {
|
|
||||||
*(tempbuffer - decompLen) = peekCharacter(source, -1);
|
|
||||||
}
|
|
||||||
*(tempbuffer - decompLen - 1) = 0;
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
UChar *tempbuffer = source->writableBuffer +
|
|
||||||
(source->writableBufSize - 1);
|
|
||||||
*(tempbuffer - 2) = 0;
|
|
||||||
*(tempbuffer - 1) = peekCharacter(source, 0);
|
|
||||||
*(tempbuffer) = peekCharacter(source, -1);
|
|
||||||
*/
|
|
||||||
/*
|
|
||||||
Indicate where to continue in main input string after exhausting
|
|
||||||
the writableBuffer
|
|
||||||
*/
|
|
||||||
if (source->pos - 1 == source->string) {
|
|
||||||
source->fcdPosition = NULL;
|
|
||||||
} else {
|
|
||||||
source->fcdPosition = source->pos-2;
|
|
||||||
}
|
|
||||||
|
|
||||||
source->pos = tempbuffer+1; // we're doing predecrement, right?
|
|
||||||
source->origFlags = source->flags;
|
|
||||||
source->flags |= UCOL_ITER_INNORMBUF;
|
|
||||||
source->flags &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN);
|
|
||||||
|
|
||||||
//CE = UCOL_IGNORABLE;
|
|
||||||
return(UCOL_IGNORABLE);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case SPEC_PROC_TAG:
|
case SPEC_PROC_TAG:
|
||||||
{
|
{
|
||||||
// Special processing is getting a CE that is preceded by a certain prefix
|
// Special processing is getting a CE that is preceded by a certain prefix
|
||||||
@ -3513,7 +3234,7 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
|
|||||||
// it's easy for BMP code points
|
// it's easy for BMP code points
|
||||||
if(isZeroCE == 0) {
|
if(isZeroCE == 0) {
|
||||||
continue;
|
continue;
|
||||||
} else if(UTF_IS_TRAIL(schar) || UTF_IS_LEAD(schar)) {
|
} else if(U16_IS_TRAIL(schar) || U16_IS_LEAD(schar)) {
|
||||||
// for supplementary code points, we have to check the next one
|
// for supplementary code points, we have to check the next one
|
||||||
// situations where we are going to ignore
|
// situations where we are going to ignore
|
||||||
// 1. beginning of the string: schar is a lone surrogate
|
// 1. beginning of the string: schar is a lone surrogate
|
||||||
@ -3522,7 +3243,7 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
|
|||||||
// that is explicitly set to zero.
|
// that is explicitly set to zero.
|
||||||
if (!collIter_bos(source)) {
|
if (!collIter_bos(source)) {
|
||||||
UChar lead;
|
UChar lead;
|
||||||
if(UTF_IS_LEAD(lead = getPrevNormalizedChar(source))) {
|
if(U16_IS_LEAD(lead = getPrevNormalizedChar(source))) {
|
||||||
isZeroCE = UTRIE_GET32_FROM_LEAD(coll->mapping, lead);
|
isZeroCE = UTRIE_GET32_FROM_LEAD(coll->mapping, lead);
|
||||||
if(getCETag(isZeroCE) == SURROGATE_TAG) {
|
if(getCETag(isZeroCE) == SURROGATE_TAG) {
|
||||||
uint32_t finalCE = UTRIE_GET32_FROM_OFFSET_TRAIL(coll->mapping, isZeroCE&0xFFFFFF, schar);
|
uint32_t finalCE = UTRIE_GET32_FROM_OFFSET_TRAIL(coll->mapping, isZeroCE&0xFFFFFF, schar);
|
||||||
@ -3581,9 +3302,7 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
|
|||||||
*(UCharOffset --) = 0;
|
*(UCharOffset --) = 0;
|
||||||
noChars = 0;
|
noChars = 0;
|
||||||
// have to swap thai characters
|
// have to swap thai characters
|
||||||
while (ucol_unsafeCP(schar, coll) || UCOL_ISTHAIPREVOWEL(peekCharacter(source, -1))) {
|
while (ucol_unsafeCP(schar, coll)) {
|
||||||
// we might have ended here after trying to reorder Thai, but seeing that there are unsafe points
|
|
||||||
// in the backward processing
|
|
||||||
*(UCharOffset) = schar;
|
*(UCharOffset) = schar;
|
||||||
noChars++;
|
noChars++;
|
||||||
UCharOffset --;
|
UCharOffset --;
|
||||||
@ -3911,33 +3630,6 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
|
|||||||
CEOffset = (uint32_t *)coll->image + getExpansionOffset(CE);
|
CEOffset = (uint32_t *)coll->image + getExpansionOffset(CE);
|
||||||
CE = *(CEOffset++);
|
CE = *(CEOffset++);
|
||||||
break;
|
break;
|
||||||
#if 0
|
|
||||||
/* find the offset to expansion table */
|
|
||||||
CEOffset = (uint32_t *)coll->image + getExpansionOffset(CE);
|
|
||||||
size = getExpansionCount(CE);
|
|
||||||
if (size != 0) {
|
|
||||||
/*
|
|
||||||
if there are less than 16 elements in expansion, we don't terminate
|
|
||||||
*/
|
|
||||||
uint32_t count;
|
|
||||||
for (count = 0; count < size; count++) {
|
|
||||||
*(source->CEpos ++) = *CEOffset++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
/* else, we do */
|
|
||||||
while (*CEOffset != 0) {
|
|
||||||
*(source->CEpos ++) = *CEOffset ++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
source->toReturn = source->CEpos - 1;
|
|
||||||
// in case of one element expansion, we
|
|
||||||
// want to immediately return CEpos
|
|
||||||
if(source->toReturn == source->CEs) {
|
|
||||||
source->CEpos = source->CEs;
|
|
||||||
}
|
|
||||||
return *(source->toReturn);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
case HANGUL_SYLLABLE_TAG: /* AC00-D7AF*/
|
case HANGUL_SYLLABLE_TAG: /* AC00-D7AF*/
|
||||||
@ -4044,7 +3736,7 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
|
|||||||
prevChar = *prev;
|
prevChar = *prev;
|
||||||
|
|
||||||
/* Handles Han and Supplementary characters here.*/
|
/* Handles Han and Supplementary characters here.*/
|
||||||
if (UTF_IS_FIRST_SURROGATE(prevChar)) {
|
if (U16_IS_LEAD(prevChar)) {
|
||||||
cp = ((((uint32_t)prevChar)<<10UL)+(ch)-(((uint32_t)0xd800<<10UL)+0xdc00-0x10000));
|
cp = ((((uint32_t)prevChar)<<10UL)+(ch)-(((uint32_t)0xd800<<10UL)+0xdc00-0x10000));
|
||||||
source->pos = prev;
|
source->pos = prev;
|
||||||
} else {
|
} else {
|
||||||
@ -6456,6 +6148,7 @@ saveState:
|
|||||||
} else {
|
} else {
|
||||||
state[0] = iterState;
|
state[0] = iterState;
|
||||||
iterSkips++;
|
iterSkips++;
|
||||||
|
iterSkips += s.consumedChars;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Store the number of elements processed. On CE levels, this is
|
// Store the number of elements processed. On CE levels, this is
|
||||||
@ -8325,9 +8018,6 @@ ucol_strcollIter( const UCollator *coll,
|
|||||||
|
|
||||||
while((sChar = sColl.iterator->next(sColl.iterator)) ==
|
while((sChar = sColl.iterator->next(sColl.iterator)) ==
|
||||||
(tChar = tColl.iterator->next(tColl.iterator))) {
|
(tChar = tColl.iterator->next(tColl.iterator))) {
|
||||||
if(UCOL_ISTHAIPREVOWEL(sChar)) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if(sChar == U_SENTINEL) {
|
if(sChar == U_SENTINEL) {
|
||||||
result = UCOL_EQUAL;
|
result = UCOL_EQUAL;
|
||||||
goto end_compare;
|
goto end_compare;
|
||||||
@ -8422,9 +8112,6 @@ ucol_strcoll( const UCollator *coll,
|
|||||||
if ( *pSrc != *pTarg || *pSrc == 0) {
|
if ( *pSrc != *pTarg || *pSrc == 0) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if(UCOL_ISTHAIPREVOWEL(*pSrc)) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
pSrc++;
|
pSrc++;
|
||||||
pTarg++;
|
pTarg++;
|
||||||
}
|
}
|
||||||
@ -8458,9 +8145,6 @@ ucol_strcoll( const UCollator *coll,
|
|||||||
if (*pSrc != *pTarg) {
|
if (*pSrc != *pTarg) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if(UCOL_ISTHAIPREVOWEL(*pSrc)) { // they are the same here, so any will do
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
pSrc++;
|
pSrc++;
|
||||||
pTarg++;
|
pTarg++;
|
||||||
}
|
}
|
||||||
|
@ -67,29 +67,73 @@ isAcceptableInvUCA(void * /*context*/,
|
|||||||
}
|
}
|
||||||
U_CDECL_END
|
U_CDECL_END
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Takes two CEs (lead and continuation) and
|
||||||
|
* compares them as CEs should be compared:
|
||||||
|
* primary vs. primary, secondary vs. secondary
|
||||||
|
* tertiary vs. tertiary
|
||||||
|
*/
|
||||||
|
static int32_t compareCEs(uint32_t source0, uint32_t source1, uint32_t target0, uint32_t target1) {
|
||||||
|
uint32_t s1 = source0, s2, t1 = target0, t2;
|
||||||
|
if(isContinuation(source1)) {
|
||||||
|
s2 = source1;
|
||||||
|
} else {
|
||||||
|
s2 = 0;
|
||||||
|
}
|
||||||
|
if(isContinuation(target1)) {
|
||||||
|
t2 = target1;
|
||||||
|
} else {
|
||||||
|
t2 = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t s = 0, t = 0;
|
||||||
|
if(s1 == t1 && s2 == t2) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16);
|
||||||
|
t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16);
|
||||||
|
if(s < t) {
|
||||||
|
return -1;
|
||||||
|
} else if(s > t) {
|
||||||
|
return 1;
|
||||||
|
} else {
|
||||||
|
s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8;
|
||||||
|
t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8;
|
||||||
|
if(s < t) {
|
||||||
|
return -1;
|
||||||
|
} else if(s > t) {
|
||||||
|
return 1;
|
||||||
|
} else {
|
||||||
|
s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF);
|
||||||
|
t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF);
|
||||||
|
if(s < t) {
|
||||||
|
return -1;
|
||||||
|
} else {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
int32_t ucol_inv_findCE(const UColTokenParser *src, uint32_t CE, uint32_t SecondCE) {
|
int32_t ucol_inv_findCE(const UColTokenParser *src, uint32_t CE, uint32_t SecondCE) {
|
||||||
uint32_t bottom = 0, top = src->invUCA->tableSize;
|
uint32_t bottom = 0, top = src->invUCA->tableSize;
|
||||||
uint32_t i = 0;
|
uint32_t i = 0;
|
||||||
uint32_t first = 0, second = 0;
|
uint32_t first = 0, second = 0;
|
||||||
uint32_t *CETable = (uint32_t *)((uint8_t *)src->invUCA+src->invUCA->table);
|
uint32_t *CETable = (uint32_t *)((uint8_t *)src->invUCA+src->invUCA->table);
|
||||||
|
int32_t res = 0;
|
||||||
|
|
||||||
while(bottom < top-1) {
|
while(bottom < top-1) {
|
||||||
i = (top+bottom)/2;
|
i = (top+bottom)/2;
|
||||||
first = *(CETable+3*i);
|
first = *(CETable+3*i);
|
||||||
second = *(CETable+3*i+1);
|
second = *(CETable+3*i+1);
|
||||||
if(first > CE) {
|
res = compareCEs(first, second, CE, SecondCE);
|
||||||
|
if(res > 0) {
|
||||||
top = i;
|
top = i;
|
||||||
} else if(first < CE) {
|
} else if(res < 0) {
|
||||||
bottom = i;
|
bottom = i;
|
||||||
} else {
|
} else {
|
||||||
if(second > SecondCE) {
|
break;
|
||||||
top = i;
|
|
||||||
} else if(second < SecondCE) {
|
|
||||||
bottom = i;
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -862,13 +906,6 @@ U_CFUNC void ucol_createElements(UColTokenParser *src, tempUCATable *t, UColTokL
|
|||||||
el.cSize = (tok->source >> 24);
|
el.cSize = (tok->source >> 24);
|
||||||
uprv_memcpy(el.uchars, (tok->source & 0x00FFFFFF) + src->source, el.cSize*sizeof(UChar));
|
uprv_memcpy(el.uchars, (tok->source & 0x00FFFFFF) + src->source, el.cSize*sizeof(UChar));
|
||||||
}
|
}
|
||||||
|
|
||||||
if(UCOL_ISTHAIPREVOWEL(el.cPoints[0])) {
|
|
||||||
el.isThai = TRUE;
|
|
||||||
} else {
|
|
||||||
el.isThai = FALSE;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(src->UCA != NULL) {
|
if(src->UCA != NULL) {
|
||||||
for(i = 0; i<el.cSize; i++) {
|
for(i = 0; i<el.cSize; i++) {
|
||||||
if(UCOL_ISJAMO(el.cPoints[i])) {
|
if(UCOL_ISJAMO(el.cPoints[i])) {
|
||||||
@ -877,44 +914,12 @@ U_CFUNC void ucol_createElements(UColTokenParser *src, tempUCATable *t, UColTokL
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
|
||||||
// we do case bits in doCE now, since we will mess up expansions otherwise.
|
|
||||||
// Case bits handling
|
|
||||||
el.CEs[0] &= 0xFFFFFF3F; // Clean the case bits field
|
|
||||||
if(el.cSize > 1) {
|
|
||||||
// Do it manually
|
|
||||||
el.CEs[0] |= ucol_uprv_getCaseBits(src->UCA, el.cPoints, el.cSize, status);
|
|
||||||
} else {
|
|
||||||
// Copy it from the UCA
|
|
||||||
uint32_t caseCE = ucol_getFirstCE(src->UCA, el.cPoints[0], status);
|
|
||||||
el.CEs[0] |= (caseCE & 0xC0);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* and then, add it */
|
/* and then, add it */
|
||||||
#if UCOL_DEBUG==2
|
#if UCOL_DEBUG==2
|
||||||
fprintf(stderr, "Adding: %04X with %08X\n", el.cPoints[0], el.CEs[0]);
|
fprintf(stderr, "Adding: %04X with %08X\n", el.cPoints[0], el.CEs[0]);
|
||||||
#endif
|
#endif
|
||||||
uprv_uca_addAnElement(t, &el, status);
|
uprv_uca_addAnElement(t, &el, status);
|
||||||
|
|
||||||
#if 0
|
|
||||||
if(el.cSize > 1) { // this is a contraction, we should check whether a composed form should also be included
|
|
||||||
UChar composed[256];
|
|
||||||
uint32_t compLen = unorm_normalize(el.cPoints, el.cSize, UNORM_NFC, 0, composed, 256, status);;
|
|
||||||
|
|
||||||
if(compLen != el.cSize || uprv_memcmp(composed, el.cPoints, el.cSize*sizeof(UChar))) {
|
|
||||||
// composed form of a contraction is different than the decomposed form!
|
|
||||||
// do it!
|
|
||||||
#ifdef UCOL_DEBUG
|
|
||||||
fprintf(stderr, "Adding composed for %04X->%04X\n", *element->cPoints, *composed);
|
|
||||||
#endif
|
|
||||||
el.cSize = compLen;
|
|
||||||
uprv_memcpy(el.cPoints, composed, el.cSize*sizeof(UChar));
|
|
||||||
uprv_uca_addAnElement(t, &el, status);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if UCOL_DEBUG_DUPLICATES
|
#if UCOL_DEBUG_DUPLICATES
|
||||||
if(*status != U_ZERO_ERROR) {
|
if(*status != U_ZERO_ERROR) {
|
||||||
fprintf(stderr, "replaced CE for %04X with CE for %04X\n", el.cPoints[0], tok->debugSource);
|
fprintf(stderr, "replaced CE for %04X with CE for %04X\n", el.cPoints[0], tok->debugSource);
|
||||||
|
@ -28,6 +28,7 @@
|
|||||||
#include "unicode/utypes.h"
|
#include "unicode/utypes.h"
|
||||||
|
|
||||||
#if !UCONFIG_NO_COLLATION
|
#if !UCONFIG_NO_COLLATION
|
||||||
|
#if !UCONFIG_NO_COLLATION_BUILDER
|
||||||
|
|
||||||
#include "ucol_imp.h"
|
#include "ucol_imp.h"
|
||||||
#include "ucol_tok.h"
|
#include "ucol_tok.h"
|
||||||
@ -55,6 +56,7 @@ typedef struct {
|
|||||||
uint32_t fHigh; /*forbidden High */
|
uint32_t fHigh; /*forbidden High */
|
||||||
} ucolCEGenerator;
|
} ucolCEGenerator;
|
||||||
|
|
||||||
|
#endif /* #if !UCONFIG_NO_COLLATION_BUILDER */
|
||||||
#endif /* #if !UCONFIG_NO_COLLATION */
|
#endif /* #if !UCONFIG_NO_COLLATION */
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -281,6 +281,7 @@ uprv_uca_cloneTempTable(tempUCATable *t, UErrorCode *status) {
|
|||||||
r->maxExpansions->position = t->maxExpansions->position;
|
r->maxExpansions->position = t->maxExpansions->position;
|
||||||
if(t->maxExpansions->endExpansionCE != NULL) {
|
if(t->maxExpansions->endExpansionCE != NULL) {
|
||||||
r->maxExpansions->endExpansionCE = (uint32_t *)uprv_malloc(sizeof(uint32_t)*t->maxExpansions->size);
|
r->maxExpansions->endExpansionCE = (uint32_t *)uprv_malloc(sizeof(uint32_t)*t->maxExpansions->size);
|
||||||
|
uprv_memset(r->maxExpansions->endExpansionCE, 0xDB, sizeof(uint32_t)*t->maxExpansions->size);
|
||||||
/* test for NULL */
|
/* test for NULL */
|
||||||
if (r->maxExpansions->endExpansionCE == NULL) {
|
if (r->maxExpansions->endExpansionCE == NULL) {
|
||||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||||
@ -292,6 +293,7 @@ uprv_uca_cloneTempTable(tempUCATable *t, UErrorCode *status) {
|
|||||||
}
|
}
|
||||||
if(t->maxExpansions->expansionCESize != NULL) {
|
if(t->maxExpansions->expansionCESize != NULL) {
|
||||||
r->maxExpansions->expansionCESize = (uint8_t *)uprv_malloc(sizeof(uint8_t)*t->maxExpansions->size);
|
r->maxExpansions->expansionCESize = (uint8_t *)uprv_malloc(sizeof(uint8_t)*t->maxExpansions->size);
|
||||||
|
uprv_memset(r->maxExpansions->expansionCESize, 0xDB, sizeof(uint8_t)*t->maxExpansions->size);
|
||||||
/* test for NULL */
|
/* test for NULL */
|
||||||
if (r->maxExpansions->expansionCESize == NULL) {
|
if (r->maxExpansions->expansionCESize == NULL) {
|
||||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||||
@ -1016,14 +1018,7 @@ uprv_uca_addAnElement(tempUCATable *t, UCAElements *element, UErrorCode *status)
|
|||||||
element->mapCE = 0; // clear mapCE so that we can catch expansions
|
element->mapCE = 0; // clear mapCE so that we can catch expansions
|
||||||
|
|
||||||
if(element->noOfCEs == 1) {
|
if(element->noOfCEs == 1) {
|
||||||
if(element->isThai == FALSE) {
|
element->mapCE = element->CEs[0];
|
||||||
element->mapCE = element->CEs[0];
|
|
||||||
} else { /* add thai - totally bad here */
|
|
||||||
expansion = (uint32_t)(UCOL_SPECIAL_FLAG | (THAI_TAG<<UCOL_TAG_SHIFT)
|
|
||||||
| ((uprv_uca_addExpansion(expansions, element->CEs[0], status)+(headersize>>2))<<4)
|
|
||||||
| 0x1);
|
|
||||||
element->mapCE = expansion;
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
/* ICU 2.1 long primaries */
|
/* ICU 2.1 long primaries */
|
||||||
/* unfortunately, it looks like we have to look for a long primary here */
|
/* unfortunately, it looks like we have to look for a long primary here */
|
||||||
@ -1425,15 +1420,15 @@ uprv_uca_assembleTable(tempUCATable *t, UErrorCode *status) {
|
|||||||
|
|
||||||
/* copy max expansion table */
|
/* copy max expansion table */
|
||||||
myData->endExpansionCE = tableOffset;
|
myData->endExpansionCE = tableOffset;
|
||||||
myData->endExpansionCECount = maxexpansion->position;
|
myData->endExpansionCECount = maxexpansion->position - 1;
|
||||||
/* not copying the first element which is a dummy */
|
/* not copying the first element which is a dummy */
|
||||||
uprv_memcpy(dataStart + tableOffset, maxexpansion->endExpansionCE + 1,
|
uprv_memcpy(dataStart + tableOffset, maxexpansion->endExpansionCE + 1,
|
||||||
maxexpansion->position * sizeof(uint32_t));
|
(maxexpansion->position - 1) * sizeof(uint32_t));
|
||||||
tableOffset += (uint32_t)(paddedsize(maxexpansion->position * sizeof(uint32_t)));
|
tableOffset += (uint32_t)(paddedsize((maxexpansion->position)* sizeof(uint32_t)));
|
||||||
myData->expansionCESize = tableOffset;
|
myData->expansionCESize = tableOffset;
|
||||||
uprv_memcpy(dataStart + tableOffset, maxexpansion->expansionCESize + 1,
|
uprv_memcpy(dataStart + tableOffset, maxexpansion->expansionCESize + 1,
|
||||||
maxexpansion->position * sizeof(uint8_t));
|
(maxexpansion->position - 1) * sizeof(uint8_t));
|
||||||
tableOffset += (uint32_t)(paddedsize(maxexpansion->position * sizeof(uint8_t)));
|
tableOffset += (uint32_t)(paddedsize((maxexpansion->position)* sizeof(uint8_t)));
|
||||||
|
|
||||||
/* Unsafe chars table. Finish it off, then copy it. */
|
/* Unsafe chars table. Finish it off, then copy it. */
|
||||||
uprv_uca_unsafeCPAddCCNZ(t, status);
|
uprv_uca_unsafeCPAddCCNZ(t, status);
|
||||||
@ -1546,12 +1541,6 @@ _enumCategoryRangeClosureCategory(const void *context, UChar32 start, UChar32 li
|
|||||||
// Since unsafeCPSet is static in ucol_elm, we are going
|
// Since unsafeCPSet is static in ucol_elm, we are going
|
||||||
// to wrap it up in the uprv_uca_unsafeCPAddCCNZ function
|
// to wrap it up in the uprv_uca_unsafeCPAddCCNZ function
|
||||||
}
|
}
|
||||||
if(UCOL_ISTHAIPREVOWEL(el.cPoints[0])) {
|
|
||||||
el.isThai = TRUE;
|
|
||||||
} else {
|
|
||||||
el.isThai = FALSE;
|
|
||||||
}
|
|
||||||
|
|
||||||
uprv_uca_addAnElement(t, &el, status);
|
uprv_uca_addAnElement(t, &el, status);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -279,6 +279,9 @@ typedef struct collIterate {
|
|||||||
uint32_t CEs[UCOL_EXPAND_CE_BUFFER_SIZE]; /* This is where we store CEs */
|
uint32_t CEs[UCOL_EXPAND_CE_BUFFER_SIZE]; /* This is where we store CEs */
|
||||||
UChar stackWritableBuffer[UCOL_WRITABLE_BUFFER_SIZE]; /* A writable buffer. */
|
UChar stackWritableBuffer[UCOL_WRITABLE_BUFFER_SIZE]; /* A writable buffer. */
|
||||||
UCharIterator *iterator;
|
UCharIterator *iterator;
|
||||||
|
uint32_t consumedChars; /* number of extra consumed chars in a contraction */
|
||||||
|
/* used in conjuction with iterator state for partial */
|
||||||
|
/* sortkeys */
|
||||||
/*int32_t iteratorIndex;*/
|
/*int32_t iteratorIndex;*/
|
||||||
} collIterate;
|
} collIterate;
|
||||||
|
|
||||||
@ -295,6 +298,7 @@ struct collIterateState {
|
|||||||
uint8_t origFlags;
|
uint8_t origFlags;
|
||||||
uint32_t iteratorIndex;
|
uint32_t iteratorIndex;
|
||||||
int32_t iteratorMove;
|
int32_t iteratorMove;
|
||||||
|
uint32_t consumedChars;
|
||||||
};
|
};
|
||||||
|
|
||||||
U_CAPI void U_EXPORT2
|
U_CAPI void U_EXPORT2
|
||||||
@ -558,7 +562,7 @@ enum {
|
|||||||
UCOL_BYTE_FIRST_TAILORED = 0x04,
|
UCOL_BYTE_FIRST_TAILORED = 0x04,
|
||||||
UCOL_BYTE_COMMON = 0x05,
|
UCOL_BYTE_COMMON = 0x05,
|
||||||
UCOL_BYTE_FIRST_UCA = UCOL_BYTE_COMMON,
|
UCOL_BYTE_FIRST_UCA = UCOL_BYTE_COMMON,
|
||||||
UCOL_CODAN_PLACEHOLDER = 0x24,
|
UCOL_CODAN_PLACEHOLDER = 0x26,
|
||||||
UCOL_BYTE_LAST_LATIN_PRIMARY = 0x4C,
|
UCOL_BYTE_LAST_LATIN_PRIMARY = 0x4C,
|
||||||
UCOL_BYTE_FIRST_NON_LATIN_PRIMARY = 0x4D,
|
UCOL_BYTE_FIRST_NON_LATIN_PRIMARY = 0x4D,
|
||||||
UCOL_BYTE_UNSHIFTED_MAX = 0xFF
|
UCOL_BYTE_UNSHIFTED_MAX = 0xFF
|
||||||
|
Loading…
Reference in New Issue
Block a user