ICU-21280 Correct source bytes counting in UTF8->UTF8 conversion

This commit is contained in:
Roman Savchenko 2020-09-11 15:50:41 +03:00 committed by Markus Scherer
parent 5ed09dc9b8
commit 5a42118a6f
3 changed files with 63 additions and 2 deletions

View File

@ -707,9 +707,9 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
// Do not go back into the bytes that will be read for finishing a partial
// sequence from the previous buffer.
int32_t length=count-toULimit;
int32_t length=count-toULength;
U8_TRUNCATE_IF_INCOMPLETE(source, 0, length);
count=toULimit+length;
count=toULength+length;
}
if(c!=0) {

View File

@ -77,6 +77,7 @@ ConversionTest::runIndexedTest(int32_t index, UBool exec, const char *&name, cha
TESTCASE_AUTO(TestGetUnicodeSet2);
TESTCASE_AUTO(TestDefaultIgnorableCallback);
TESTCASE_AUTO(TestUTF8ToUTF8Overflow);
TESTCASE_AUTO(TestUTF8ToUTF8Streaming);
TESTCASE_AUTO_END;
}
@ -830,6 +831,65 @@ ConversionTest::TestUTF8ToUTF8Overflow() {
}
}
void
ConversionTest::TestUTF8ToUTF8Streaming() {
IcuTestErrorCode errorCode(*this, "TestUTF8ToUTF8Streaming");
LocalUConverterPointer cnv1(ucnv_open("UTF-8", errorCode));
LocalUConverterPointer cnv2(ucnv_open("UTF-8", errorCode));
// UTF8 encoded cyrillic part of 'Lorem ipsum'
static const char* text =
"\xd0\xb5\xd1\x82\x20\xd1\x81\xd1\x86\xd0\xb0\xd0\xb5\xd0\xb2\xd0"
"\xbe\xd0\xbb\xd0\xb0\x20\xd1\x81\xd0\xb0\xd0\xb4\xd0\xb8\xd0\xbf"
"\xd1\x81\xd1\x86\xd0\xb8\xd0\xbd\xd0\xb3\x20\xd0\xb0\xd1\x86\xd1"
"\x86\xd0\xbe\xd0\xbc\xd0\xbc\xd0\xbe\xd0\xb4\xd0\xb0\xd1\x80\xd0"
"\xb5\x20\xd1\x85\xd0\xb0\xd1\x81";
int32_t chunk1 = 25; // partial lead at the end: 0xd0
int32_t chunk2 = 47; // partial tail at the beginning: 0xb0
char result[128];
int32_t sourceLen = (int32_t)strlen(text);
const char* source = text;
const char* sourceLimit = text + chunk1;
int32_t targetLen = sizeof(result);
char* target = result;
const char* targetLimit = result + targetLen;
UChar buffer16[20];
UChar* pivotSource = buffer16;
UChar* pivotTarget = buffer16;
const UChar* pivotLimit = buffer16 + UPRV_LENGTHOF(buffer16);
int32_t length;
ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
&target, result + targetLen, &source, sourceLimit,
buffer16, &pivotSource, &pivotTarget, pivotLimit,
FALSE, FALSE, errorCode);
length = (int32_t)(target - result);
targetLen -= length;
assertEquals("First chunk -1 doesn't match converted length", chunk1 - 1, length);
source = text + chunk1;
sourceLimit = source + chunk2;
// Convert the rest and flush.
ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
&target, targetLimit, &source, sourceLimit,
buffer16, &pivotSource, &pivotTarget, pivotLimit,
FALSE, TRUE, errorCode);
length = (int32_t)(target - result - length);
targetLen -= length;
assertEquals("Second chunk + 2 doesn't match converted length", chunk2 + 1, length);
assertEquals("Full text length match", sourceLen, sizeof(result) - targetLen);
assertSuccess("UTF-8->UTF-8", errorCode);
}
// open testdata or ICU data converter ------------------------------------- ***
UConverter *

View File

@ -77,6 +77,7 @@ public:
void TestGetUnicodeSet2();
void TestDefaultIgnorableCallback();
void TestUTF8ToUTF8Overflow();
void TestUTF8ToUTF8Streaming();
private:
UBool