ICU-1245 handler for special processing for iterating backwards

X-SVN-Rev: 5986
This commit is contained in:
Vladimir Weinstein 2001-10-02 01:25:25 +00:00
parent 6fbdc4ddeb
commit 58a9839764

View File

@ -2416,6 +2416,61 @@ uint32_t getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
return(UCOL_IGNORABLE); return(UCOL_IGNORABLE);
} }
break; break;
case SPEC_PROC_TAG:
{
// Special processing is getting a CE that is preceded by a certain prefix
// Currently this is only needed for optimizing Japanese length and iteration marks.
// When we encouter a special processing tag, we go backwards and try to see if
// we have a match.
// Contraction tables are used - so the whole process is not unlike contraction.
// prefix data is stored backwards in the table.
const UChar *UCharOffset;
UChar schar, tchar, *sourcePointer = source->pos;
Normalizer n(source->string, source->pos-source->string+1, UNORM_NFKC);
n.last();
for(;;) {
// This loop will run once per source string character, for as long as we
// are matching a potential contraction sequence
// First we position ourselves at the begining of contraction sequence
const UChar *ContractionStart = UCharOffset = (UChar *)coll->image+getContractOffset(CE);
schar = (UChar)n.previous();
if(schar==Normalizer::DONE) {
CE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex));
break;
}
while(schar > (tchar = *UCharOffset)) { /* since the contraction codepoints should be ordered, we skip all that are smaller */
UCharOffset++;
}
if (schar == tchar) {
// Found the source string char in the table.
// Pick up the corresponding CE from the table.
CE = *(coll->contractionCEs +
(UCharOffset - coll->contractionIndex));
}
else
{
// Source string char was not in the table.
// We have not found the prefix.
CE = *(coll->contractionCEs +
(ContractionStart - coll->contractionIndex));
}
if(!isPrefix(CE)) {
// The source string char was in the contraction table, and the corresponding
// CE is not a prefix CE. We found the prefix, break
// out of loop, this CE will end up being returned. This is the normal
// way out of prefix handling when the source actually contained
// the prefix.
break;
}
}
break;
}
case CONTRACTION_TAG: case CONTRACTION_TAG:
/* to ensure that the backwards and forwards iteration matches, we /* to ensure that the backwards and forwards iteration matches, we
take the current region of most possible match and pass it through take the current region of most possible match and pass it through