ICU-21280 Correct source bytes counting in UTF8->UTF8 conversion
This commit is contained in:
parent
5ed09dc9b8
commit
5a42118a6f
@ -707,9 +707,9 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
|
||||
|
||||
// Do not go back into the bytes that will be read for finishing a partial
|
||||
// sequence from the previous buffer.
|
||||
int32_t length=count-toULimit;
|
||||
int32_t length=count-toULength;
|
||||
U8_TRUNCATE_IF_INCOMPLETE(source, 0, length);
|
||||
count=toULimit+length;
|
||||
count=toULength+length;
|
||||
}
|
||||
|
||||
if(c!=0) {
|
||||
|
@ -77,6 +77,7 @@ ConversionTest::runIndexedTest(int32_t index, UBool exec, const char *&name, cha
|
||||
TESTCASE_AUTO(TestGetUnicodeSet2);
|
||||
TESTCASE_AUTO(TestDefaultIgnorableCallback);
|
||||
TESTCASE_AUTO(TestUTF8ToUTF8Overflow);
|
||||
TESTCASE_AUTO(TestUTF8ToUTF8Streaming);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
@ -830,6 +831,65 @@ ConversionTest::TestUTF8ToUTF8Overflow() {
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ConversionTest::TestUTF8ToUTF8Streaming() {
|
||||
IcuTestErrorCode errorCode(*this, "TestUTF8ToUTF8Streaming");
|
||||
LocalUConverterPointer cnv1(ucnv_open("UTF-8", errorCode));
|
||||
LocalUConverterPointer cnv2(ucnv_open("UTF-8", errorCode));
|
||||
|
||||
// UTF8 encoded cyrillic part of 'Lorem ipsum'
|
||||
static const char* text =
|
||||
"\xd0\xb5\xd1\x82\x20\xd1\x81\xd1\x86\xd0\xb0\xd0\xb5\xd0\xb2\xd0"
|
||||
"\xbe\xd0\xbb\xd0\xb0\x20\xd1\x81\xd0\xb0\xd0\xb4\xd0\xb8\xd0\xbf"
|
||||
"\xd1\x81\xd1\x86\xd0\xb8\xd0\xbd\xd0\xb3\x20\xd0\xb0\xd1\x86\xd1"
|
||||
"\x86\xd0\xbe\xd0\xbc\xd0\xbc\xd0\xbe\xd0\xb4\xd0\xb0\xd1\x80\xd0"
|
||||
"\xb5\x20\xd1\x85\xd0\xb0\xd1\x81";
|
||||
|
||||
int32_t chunk1 = 25; // partial lead at the end: 0xd0
|
||||
int32_t chunk2 = 47; // partial tail at the beginning: 0xb0
|
||||
|
||||
char result[128];
|
||||
|
||||
int32_t sourceLen = (int32_t)strlen(text);
|
||||
const char* source = text;
|
||||
const char* sourceLimit = text + chunk1;
|
||||
|
||||
int32_t targetLen = sizeof(result);
|
||||
char* target = result;
|
||||
const char* targetLimit = result + targetLen;
|
||||
|
||||
UChar buffer16[20];
|
||||
UChar* pivotSource = buffer16;
|
||||
UChar* pivotTarget = buffer16;
|
||||
const UChar* pivotLimit = buffer16 + UPRV_LENGTHOF(buffer16);
|
||||
|
||||
int32_t length;
|
||||
ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
|
||||
&target, result + targetLen, &source, sourceLimit,
|
||||
buffer16, &pivotSource, &pivotTarget, pivotLimit,
|
||||
FALSE, FALSE, errorCode);
|
||||
|
||||
length = (int32_t)(target - result);
|
||||
targetLen -= length;
|
||||
assertEquals("First chunk -1 doesn't match converted length", chunk1 - 1, length);
|
||||
|
||||
source = text + chunk1;
|
||||
sourceLimit = source + chunk2;
|
||||
|
||||
// Convert the rest and flush.
|
||||
ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
|
||||
&target, targetLimit, &source, sourceLimit,
|
||||
buffer16, &pivotSource, &pivotTarget, pivotLimit,
|
||||
FALSE, TRUE, errorCode);
|
||||
|
||||
length = (int32_t)(target - result - length);
|
||||
targetLen -= length;
|
||||
assertEquals("Second chunk + 2 doesn't match converted length", chunk2 + 1, length);
|
||||
|
||||
assertEquals("Full text length match", sourceLen, sizeof(result) - targetLen);
|
||||
assertSuccess("UTF-8->UTF-8", errorCode);
|
||||
}
|
||||
|
||||
// open testdata or ICU data converter ------------------------------------- ***
|
||||
|
||||
UConverter *
|
||||
|
@ -77,6 +77,7 @@ public:
|
||||
void TestGetUnicodeSet2();
|
||||
void TestDefaultIgnorableCallback();
|
||||
void TestUTF8ToUTF8Overflow();
|
||||
void TestUTF8ToUTF8Streaming();
|
||||
|
||||
private:
|
||||
UBool
|
||||
|
Loading…
Reference in New Issue
Block a user