ICU-1250 backport bug fixes from icu4j re: dangling lead surrogates and cursorpos matching
X-SVN-Rev: 6012
This commit is contained in:
parent
5da9d23c58
commit
2c1b306ea4
@ -491,13 +491,13 @@ UMatchDegree TransliterationRule::matchAndReplace(Replaceable& text,
|
||||
if (segments == NULL) {
|
||||
text.handleReplaceBetween(pos.start, keyLimit, output);
|
||||
lenDelta = output.length() - (keyLimit - pos.start);
|
||||
if (cursorPos >= 0 && cursorPos < keyLength) {
|
||||
// Within the key, the cursor refers to 16-bit code units
|
||||
if (cursorPos >= 0 && cursorPos <= output.length()) {
|
||||
// Within the output string, the cursor refers to 16-bit code units
|
||||
newStart = pos.start + cursorPos;
|
||||
} else {
|
||||
newStart = pos.start;
|
||||
int32_t n = cursorPos;
|
||||
// Outside the key, cursorPos counts code points
|
||||
// Outside the output string, cursorPos counts code points
|
||||
while (n > 0) {
|
||||
newStart += UTF_CHAR_LENGTH(text.char32At(newStart));
|
||||
--n;
|
||||
|
@ -253,17 +253,7 @@ void Transliterator::transliterate(Replaceable& text,
|
||||
UChar32 insertion,
|
||||
UErrorCode& status) const {
|
||||
UnicodeString str(insertion);
|
||||
if (UTF_IS_LEAD(insertion)) {
|
||||
// Oops, the caller passed us a single lead surrogate. In
|
||||
// general, we don't support this, but we'll do the caller a
|
||||
// favor in the special case of LEAD followed by TRAIL
|
||||
// insertion. Anything else won't work.
|
||||
text.handleReplaceBetween(index.limit, index.limit, str);
|
||||
++index.limit;
|
||||
++index.contextLimit;
|
||||
} else {
|
||||
_transliterate(text, index, &str, status);
|
||||
}
|
||||
_transliterate(text, index, &str, status);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -329,6 +319,15 @@ void Transliterator::_transliterate(Replaceable& text,
|
||||
index.contextLimit += insertion->length();
|
||||
}
|
||||
|
||||
if (index.limit > 0 &&
|
||||
UTF_IS_LEAD(text.charAt(index.limit - 1))) {
|
||||
// Oops, there is a dangling lead surrogate in the buffer.
|
||||
// This will break most transliterators, since they will
|
||||
// assume it is part of a pari. Don't transliterate until
|
||||
// more text comes in.
|
||||
return;
|
||||
}
|
||||
|
||||
filteredTransliterate(text, index, TRUE);
|
||||
|
||||
#if 0
|
||||
|
Loading…
Reference in New Issue
Block a user