ICU-1245 handler for special processing for iterating backwards
X-SVN-Rev: 5986
This commit is contained in:
parent
6fbdc4ddeb
commit
58a9839764
@ -2416,6 +2416,61 @@ uint32_t getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
|
|||||||
return(UCOL_IGNORABLE);
|
return(UCOL_IGNORABLE);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case SPEC_PROC_TAG:
|
||||||
|
{
|
||||||
|
// Special processing is getting a CE that is preceded by a certain prefix
|
||||||
|
// Currently this is only needed for optimizing Japanese length and iteration marks.
|
||||||
|
// When we encouter a special processing tag, we go backwards and try to see if
|
||||||
|
// we have a match.
|
||||||
|
// Contraction tables are used - so the whole process is not unlike contraction.
|
||||||
|
// prefix data is stored backwards in the table.
|
||||||
|
const UChar *UCharOffset;
|
||||||
|
UChar schar, tchar, *sourcePointer = source->pos;
|
||||||
|
Normalizer n(source->string, source->pos-source->string+1, UNORM_NFKC);
|
||||||
|
n.last();
|
||||||
|
for(;;) {
|
||||||
|
// This loop will run once per source string character, for as long as we
|
||||||
|
// are matching a potential contraction sequence
|
||||||
|
|
||||||
|
// First we position ourselves at the begining of contraction sequence
|
||||||
|
const UChar *ContractionStart = UCharOffset = (UChar *)coll->image+getContractOffset(CE);
|
||||||
|
schar = (UChar)n.previous();
|
||||||
|
|
||||||
|
if(schar==Normalizer::DONE) {
|
||||||
|
CE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
while(schar > (tchar = *UCharOffset)) { /* since the contraction codepoints should be ordered, we skip all that are smaller */
|
||||||
|
UCharOffset++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (schar == tchar) {
|
||||||
|
// Found the source string char in the table.
|
||||||
|
// Pick up the corresponding CE from the table.
|
||||||
|
CE = *(coll->contractionCEs +
|
||||||
|
(UCharOffset - coll->contractionIndex));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Source string char was not in the table.
|
||||||
|
// We have not found the prefix.
|
||||||
|
CE = *(coll->contractionCEs +
|
||||||
|
(ContractionStart - coll->contractionIndex));
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!isPrefix(CE)) {
|
||||||
|
// The source string char was in the contraction table, and the corresponding
|
||||||
|
// CE is not a prefix CE. We found the prefix, break
|
||||||
|
// out of loop, this CE will end up being returned. This is the normal
|
||||||
|
// way out of prefix handling when the source actually contained
|
||||||
|
// the prefix.
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case CONTRACTION_TAG:
|
case CONTRACTION_TAG:
|
||||||
/* to ensure that the backwards and forwards iteration matches, we
|
/* to ensure that the backwards and forwards iteration matches, we
|
||||||
take the current region of most possible match and pass it through
|
take the current region of most possible match and pass it through
|
||||||
|
Loading…
Reference in New Issue
Block a user