ICU-5595 Fix scrambled dictionary break cache when built in reverse with multiple spans

X-SVN-Rev: 21421
This commit is contained in:
Deborah Goldsmith 2007-04-17 23:01:42 +00:00
parent 17717d9c7f
commit 8cf4403816
2 changed files with 32 additions and 21 deletions

View File

@ -527,6 +527,11 @@ int32_t RuleBasedBreakIterator::previous(void) {
if (fCachedBreakPositions != NULL) {
if (fPositionInCache > 0) {
--fPositionInCache;
// If we're at the beginning of the cache, need to reevaluate the
// rule status
if (fPositionInCache <= 0) {
fLastStatusIndexValid = FALSE;
}
int32_t pos = fCachedBreakPositions[fPositionInCache];
utext_setNativeIndex(fText, pos);
return pos;
@ -731,6 +736,11 @@ int32_t RuleBasedBreakIterator::preceding(int32_t offset) {
&& offset > fCachedBreakPositions[fPositionInCache])
++fPositionInCache;
--fPositionInCache;
// If we're at the beginning of the cache, need to reevaluate the
// rule status
if (fPositionInCache <= 0) {
fLastStatusIndexValid = FALSE;
}
utext_setNativeIndex(fText, fCachedBreakPositions[fPositionInCache]);
return fCachedBreakPositions[fPositionInCache];
}
@ -1595,25 +1605,19 @@ int32_t RuleBasedBreakIterator::checkDictionary(int32_t startPos,
// Loop through the text, looking for ranges of dictionary characters.
// For each span, find the appropriate break engine, and ask it to find
// any breaks within the span.
// Note: we always do this in the forward direction, so that the break
// cache is built in the right order.
if (reverse) {
utext_setNativeIndex(fText, rangeStart);
}
while(U_SUCCESS(status)) {
if (reverse) {
while((current = (int32_t)UTEXT_GETNATIVEINDEX(fText)) > rangeStart && (category & 0x4000) == 0) {
c = UTEXT_PREVIOUS32(fText);
UTRIE_GET16(&fData->fTrie, c, category);
}
if (current <= rangeStart) {
break;
}
while((current = (int32_t)UTEXT_GETNATIVEINDEX(fText)) < rangeEnd && (category & 0x4000) == 0) {
utext_next32(fText); // TODO: tweak for post-increment operation
c = utext_current32(fText);
UTRIE_GET16(&fData->fTrie, c, category);
}
else {
while((current = (int32_t)UTEXT_GETNATIVEINDEX(fText)) < rangeEnd && (category & 0x4000) == 0) {
utext_next32(fText); // TODO: tweak for post-increment operation
c = utext_current32(fText);
UTRIE_GET16(&fData->fTrie, c, category);
}
if (current >= rangeEnd) {
break;
}
if (current >= rangeEnd) {
break;
}
// We now have a dictionary character. Get the appropriate language object
@ -1623,7 +1627,7 @@ int32_t RuleBasedBreakIterator::checkDictionary(int32_t startPos,
// Ask the language object if there are any breaks. It will leave the text
// pointer on the other side of its range, ready to search for the next one.
if (lbe != NULL) {
foundBreakCount += lbe->findBreaks(fText, rangeStart, rangeEnd, reverse, fBreakType, breaks);
foundBreakCount += lbe->findBreaks(fText, rangeStart, rangeEnd, FALSE, fBreakType, breaks);
}
// Reload the loop variables for the next go-round
@ -1667,9 +1671,8 @@ int32_t RuleBasedBreakIterator::checkDictionary(int32_t startPos,
// If the allocation failed, just fall through to the "no breaks found" case.
}
// If we get here, there were no language-based breaks. As a result, the
// text pointer should be back to where it started, but set it just to
// make sure.
// If we get here, there were no language-based breaks. Set the text pointer
// to the original proposed break.
utext_setNativeIndex(fText, reverse ? startPos : endPos);
return (reverse ? startPos : endPos);
}

View File

@ -529,4 +529,12 @@ What is the proper use of the abbreviation pp.? •Yes, I am definatelly 12" tal
#
<data>•สวัสดี<200>ครับ<200>สบาย<200>ดี<200>ไหม<200> •ครับ<200></data>
#
# Trac ticket 5595 Test Case
<data>•บท<200>ที่๑พายุ<200>ไซโคลน<200>โด<200>โรธี<200>อาศัย<200>อยู่<200>ท่ามกลาง<200>\
ทุ่งใหญ่<200>ใน<200>แคนซัส<200>กับ<200>ลุง<200>เฮ<200>นรี<200>ชาวไร่<200>และ<200>ป้า<200>เอ็ม<200>\
ภรรยา<200>ชาวไร่<200>บ้าน<200>ของ<200>พวก<200>เขา<200>หลัง<200>เล็ก<200>เพราะ<200>ไม้<200>\
สร้าง<200>บ้าน<200>ต้อง<200>ขน<200>มา<200>ด้วย<200>เกวียน<200>เป็น<200>ระยะ<200>ทาง<200>หลาย<200>\
ไมล์<200></data>