ICU-5595 Fix scrambled dictionary break cache when built in reverse with multiple spans
X-SVN-Rev: 21421
This commit is contained in:
parent
17717d9c7f
commit
8cf4403816
@ -527,6 +527,11 @@ int32_t RuleBasedBreakIterator::previous(void) {
|
||||
if (fCachedBreakPositions != NULL) {
|
||||
if (fPositionInCache > 0) {
|
||||
--fPositionInCache;
|
||||
// If we're at the beginning of the cache, need to reevaluate the
|
||||
// rule status
|
||||
if (fPositionInCache <= 0) {
|
||||
fLastStatusIndexValid = FALSE;
|
||||
}
|
||||
int32_t pos = fCachedBreakPositions[fPositionInCache];
|
||||
utext_setNativeIndex(fText, pos);
|
||||
return pos;
|
||||
@ -731,6 +736,11 @@ int32_t RuleBasedBreakIterator::preceding(int32_t offset) {
|
||||
&& offset > fCachedBreakPositions[fPositionInCache])
|
||||
++fPositionInCache;
|
||||
--fPositionInCache;
|
||||
// If we're at the beginning of the cache, need to reevaluate the
|
||||
// rule status
|
||||
if (fPositionInCache <= 0) {
|
||||
fLastStatusIndexValid = FALSE;
|
||||
}
|
||||
utext_setNativeIndex(fText, fCachedBreakPositions[fPositionInCache]);
|
||||
return fCachedBreakPositions[fPositionInCache];
|
||||
}
|
||||
@ -1595,25 +1605,19 @@ int32_t RuleBasedBreakIterator::checkDictionary(int32_t startPos,
|
||||
// Loop through the text, looking for ranges of dictionary characters.
|
||||
// For each span, find the appropriate break engine, and ask it to find
|
||||
// any breaks within the span.
|
||||
// Note: we always do this in the forward direction, so that the break
|
||||
// cache is built in the right order.
|
||||
if (reverse) {
|
||||
utext_setNativeIndex(fText, rangeStart);
|
||||
}
|
||||
while(U_SUCCESS(status)) {
|
||||
if (reverse) {
|
||||
while((current = (int32_t)UTEXT_GETNATIVEINDEX(fText)) > rangeStart && (category & 0x4000) == 0) {
|
||||
c = UTEXT_PREVIOUS32(fText);
|
||||
UTRIE_GET16(&fData->fTrie, c, category);
|
||||
}
|
||||
if (current <= rangeStart) {
|
||||
break;
|
||||
}
|
||||
while((current = (int32_t)UTEXT_GETNATIVEINDEX(fText)) < rangeEnd && (category & 0x4000) == 0) {
|
||||
utext_next32(fText); // TODO: tweak for post-increment operation
|
||||
c = utext_current32(fText);
|
||||
UTRIE_GET16(&fData->fTrie, c, category);
|
||||
}
|
||||
else {
|
||||
while((current = (int32_t)UTEXT_GETNATIVEINDEX(fText)) < rangeEnd && (category & 0x4000) == 0) {
|
||||
utext_next32(fText); // TODO: tweak for post-increment operation
|
||||
c = utext_current32(fText);
|
||||
UTRIE_GET16(&fData->fTrie, c, category);
|
||||
}
|
||||
if (current >= rangeEnd) {
|
||||
break;
|
||||
}
|
||||
if (current >= rangeEnd) {
|
||||
break;
|
||||
}
|
||||
|
||||
// We now have a dictionary character. Get the appropriate language object
|
||||
@ -1623,7 +1627,7 @@ int32_t RuleBasedBreakIterator::checkDictionary(int32_t startPos,
|
||||
// Ask the language object if there are any breaks. It will leave the text
|
||||
// pointer on the other side of its range, ready to search for the next one.
|
||||
if (lbe != NULL) {
|
||||
foundBreakCount += lbe->findBreaks(fText, rangeStart, rangeEnd, reverse, fBreakType, breaks);
|
||||
foundBreakCount += lbe->findBreaks(fText, rangeStart, rangeEnd, FALSE, fBreakType, breaks);
|
||||
}
|
||||
|
||||
// Reload the loop variables for the next go-round
|
||||
@ -1667,9 +1671,8 @@ int32_t RuleBasedBreakIterator::checkDictionary(int32_t startPos,
|
||||
// If the allocation failed, just fall through to the "no breaks found" case.
|
||||
}
|
||||
|
||||
// If we get here, there were no language-based breaks. As a result, the
|
||||
// text pointer should be back to where it started, but set it just to
|
||||
// make sure.
|
||||
// If we get here, there were no language-based breaks. Set the text pointer
|
||||
// to the original proposed break.
|
||||
utext_setNativeIndex(fText, reverse ? startPos : endPos);
|
||||
return (reverse ? startPos : endPos);
|
||||
}
|
||||
|
8
icu4c/source/test/testdata/rbbitst.txt
vendored
8
icu4c/source/test/testdata/rbbitst.txt
vendored
@ -529,4 +529,12 @@ What is the proper use of the abbreviation pp.? •Yes, I am definatelly 12" tal
|
||||
#
|
||||
<data>•สวัสดี<200>ครับ<200>สบาย<200>ดี<200>ไหม<200> •ครับ<200></data>
|
||||
|
||||
#
|
||||
# Trac ticket 5595 Test Case
|
||||
<data>•บท<200>ที่๑พายุ<200>ไซโคลน<200>โด<200>โรธี<200>อาศัย<200>อยู่<200>ท่ามกลาง<200>\
|
||||
ทุ่งใหญ่<200>ใน<200>แคนซัส<200>กับ<200>ลุง<200>เฮ<200>นรี<200>ชาวไร่<200>และ<200>ป้า<200>เอ็ม<200>\
|
||||
ภรรยา<200>ชาวไร่<200>บ้าน<200>ของ<200>พวก<200>เขา<200>หลัง<200>เล็ก<200>เพราะ<200>ไม้<200>\
|
||||
สร้าง<200>บ้าน<200>ต้อง<200>ขน<200>มา<200>ด้วย<200>เกวียน<200>เป็น<200>ระยะ<200>ทาง<200>หลาย<200>\
|
||||
ไมล์<200></data>
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user