ICU-13560 make some toUnicode converter check toULength not toUnicodeStatus for continuing after partial sequences; ucnv.cpp framework code only resets toULength after an error
X-SVN-Rev: 40793
This commit is contained in:
parent
ac0972f12c
commit
0dc85d2408
@ -55,7 +55,7 @@ T_UConverter_toUnicode_UTF32_BE(UConverterToUnicodeArgs * args,
|
||||
uint32_t ch, i;
|
||||
|
||||
/* Restore state of current sequence */
|
||||
if (args->converter->toUnicodeStatus && myTarget < targetLimit) {
|
||||
if (args->converter->toULength > 0 && myTarget < targetLimit) {
|
||||
i = args->converter->toULength; /* restore # of bytes consumed */
|
||||
args->converter->toULength = 0;
|
||||
|
||||
@ -136,7 +136,7 @@ T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(UConverterToUnicodeArgs * args,
|
||||
int32_t offsetNum = 0;
|
||||
|
||||
/* Restore state of current sequence */
|
||||
if (args->converter->toUnicodeStatus && myTarget < targetLimit) {
|
||||
if (args->converter->toULength > 0 && myTarget < targetLimit) {
|
||||
i = args->converter->toULength; /* restore # of bytes consumed */
|
||||
args->converter->toULength = 0;
|
||||
|
||||
@ -517,7 +517,7 @@ T_UConverter_toUnicode_UTF32_LE(UConverterToUnicodeArgs * args,
|
||||
uint32_t ch, i;
|
||||
|
||||
/* Restore state of current sequence */
|
||||
if (args->converter->toUnicodeStatus && myTarget < targetLimit)
|
||||
if (args->converter->toULength > 0 && myTarget < targetLimit)
|
||||
{
|
||||
i = args->converter->toULength; /* restore # of bytes consumed */
|
||||
args->converter->toULength = 0;
|
||||
@ -604,7 +604,7 @@ T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(UConverterToUnicodeArgs * args,
|
||||
int32_t offsetNum = 0;
|
||||
|
||||
/* Restore state of current sequence */
|
||||
if (args->converter->toUnicodeStatus && myTarget < targetLimit)
|
||||
if (args->converter->toULength > 0 && myTarget < targetLimit)
|
||||
{
|
||||
i = args->converter->toULength; /* restore # of bytes consumed */
|
||||
args->converter->toULength = 0;
|
||||
|
@ -76,7 +76,7 @@ static void U_CALLCONV ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
|
||||
int32_t i, inBytes;
|
||||
|
||||
/* Restore size of current sequence */
|
||||
if (cnv->toUnicodeStatus && myTarget < targetLimit)
|
||||
if (cnv->toULength > 0 && myTarget < targetLimit)
|
||||
{
|
||||
inBytes = cnv->mode; /* restore # of bytes to consume */
|
||||
i = cnv->toULength; /* restore # of bytes consumed */
|
||||
@ -194,7 +194,7 @@ static void U_CALLCONV ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeAr
|
||||
int32_t i, inBytes;
|
||||
|
||||
/* Restore size of current sequence */
|
||||
if (cnv->toUnicodeStatus && myTarget < targetLimit)
|
||||
if (cnv->toULength > 0 && myTarget < targetLimit)
|
||||
{
|
||||
inBytes = cnv->mode; /* restore # of bytes to consume */
|
||||
i = cnv->toULength; /* restore # of bytes consumed */
|
||||
@ -670,12 +670,13 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
|
||||
targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
|
||||
|
||||
/* get the converter state from the UTF-8 UConverter */
|
||||
c=(UChar32)utf8->toUnicodeStatus;
|
||||
if(c!=0) {
|
||||
if(utf8->toULength > 0) {
|
||||
toULength=oldToULength=utf8->toULength;
|
||||
toULimit=(int8_t)utf8->mode;
|
||||
c=(UChar32)utf8->toUnicodeStatus;
|
||||
} else {
|
||||
toULength=oldToULength=toULimit=0;
|
||||
c = 0;
|
||||
}
|
||||
|
||||
count=(int32_t)(sourceLimit-source)+oldToULength;
|
||||
|
@ -340,7 +340,11 @@ ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
|
||||
targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
|
||||
|
||||
/* get the converter state from the UTF-8 UConverter */
|
||||
c=(UChar32)utf8->toUnicodeStatus;
|
||||
if (utf8->toULength > 0) {
|
||||
c=(UChar32)utf8->toUnicodeStatus;
|
||||
} else {
|
||||
c = 0;
|
||||
}
|
||||
if(c!=0 && source<sourceLimit) {
|
||||
if(targetCapacity==0) {
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
@ -620,7 +624,7 @@ ucnv_ASCIIFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
|
||||
|
||||
uint8_t c;
|
||||
|
||||
if(pToUArgs->converter->toUnicodeStatus!=0) {
|
||||
if(pToUArgs->converter->toULength > 0) {
|
||||
/* no handling of partial UTF-8 characters here, fall back to pivoting */
|
||||
*pErrorCode=U_USING_DEFAULT_WARNING;
|
||||
return;
|
||||
|
@ -5064,12 +5064,13 @@ ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
|
||||
hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY);
|
||||
|
||||
/* get the converter state from the UTF-8 UConverter */
|
||||
c=(UChar32)utf8->toUnicodeStatus;
|
||||
if(c!=0) {
|
||||
if(utf8->toULength > 0) {
|
||||
toULength=oldToULength=utf8->toULength;
|
||||
toULimit=(int8_t)utf8->mode;
|
||||
c=(UChar32)utf8->toUnicodeStatus;
|
||||
} else {
|
||||
toULength=oldToULength=toULimit=0;
|
||||
c = 0;
|
||||
}
|
||||
|
||||
// The conversion loop checks source<sourceLimit only once per 1/2/3-byte character.
|
||||
@ -5359,12 +5360,13 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
|
||||
hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY);
|
||||
|
||||
/* get the converter state from the UTF-8 UConverter */
|
||||
c=(UChar32)utf8->toUnicodeStatus;
|
||||
if(c!=0) {
|
||||
if(utf8->toULength > 0) {
|
||||
toULength=oldToULength=utf8->toULength;
|
||||
toULimit=(int8_t)utf8->mode;
|
||||
c=(UChar32)utf8->toUnicodeStatus;
|
||||
} else {
|
||||
toULength=oldToULength=toULimit=0;
|
||||
c = 0;
|
||||
}
|
||||
|
||||
// The conversion loop checks source<sourceLimit only once per 1/2/3-byte character.
|
||||
|
@ -733,6 +733,7 @@ ConversionTest::TestUTF8ToUTF8Overflow() {
|
||||
UChar *pivotSource = buffer16;
|
||||
UChar *pivotTarget = buffer16;
|
||||
const UChar *pivotLimit = buffer16 + UPRV_LENGTHOF(buffer16);
|
||||
int32_t length;
|
||||
|
||||
// Convert with insufficient target capacity.
|
||||
result[2] = 5;
|
||||
@ -741,7 +742,7 @@ ConversionTest::TestUTF8ToUTF8Overflow() {
|
||||
buffer16, &pivotSource, &pivotTarget, pivotLimit,
|
||||
FALSE, FALSE, errorCode);
|
||||
assertEquals("overflow", U_BUFFER_OVERFLOW_ERROR, errorCode.reset());
|
||||
int32_t length = (int32_t)(target - result);
|
||||
length = (int32_t)(target - result);
|
||||
assertEquals("number of bytes written", 2, length);
|
||||
assertEquals("next byte not clobbered", 5, result[2]);
|
||||
|
||||
@ -790,6 +791,52 @@ ConversionTest::TestUTF8ToUTF8Overflow() {
|
||||
if (length == 5) {
|
||||
assertTrue("text2 result same as input", memcmp(text2, result, length) == 0);
|
||||
}
|
||||
|
||||
ucnv_reset(cnv1.getAlias());
|
||||
ucnv_reset(cnv2.getAlias());
|
||||
memset(result, 0, sizeof(result));
|
||||
static const char *illFormed = "\xf1\x91\x93\x96\x91\x94"; // U+514D6 + two more trail bytes
|
||||
source = illFormed;
|
||||
sourceLimit = illFormed + strlen(illFormed);
|
||||
target = result;
|
||||
pivotSource = pivotTarget = buffer16;
|
||||
|
||||
ucnv_setToUCallBack(cnv1.getAlias(), UCNV_TO_U_CALLBACK_STOP, nullptr, nullptr, nullptr, errorCode);
|
||||
|
||||
// Convert only two bytes and flush (but expect failure).
|
||||
char errorBytes[10];
|
||||
int8_t errorLength;
|
||||
result[0] = 5;
|
||||
ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
|
||||
&target, targetLimit, &source, source + 2,
|
||||
buffer16, &pivotSource, &pivotTarget, pivotLimit,
|
||||
FALSE, TRUE, errorCode);
|
||||
assertEquals("illFormed truncated", U_TRUNCATED_CHAR_FOUND, errorCode.reset());
|
||||
length = (int32_t)(target - result);
|
||||
assertEquals("illFormed number of bytes written", 0, length);
|
||||
errorLength = UPRV_LENGTHOF(errorBytes);
|
||||
ucnv_getInvalidChars(cnv1.getAlias(), errorBytes, &errorLength, errorCode);
|
||||
assertEquals("illFormed truncated errorLength", 2, (int32_t)errorLength);
|
||||
if (errorLength == 2) {
|
||||
assertEquals("illFormed truncated errorBytes", 0xf191,
|
||||
((int32_t)(uint8_t)errorBytes[0] << 8) | (uint8_t)errorBytes[1]);
|
||||
}
|
||||
|
||||
// Continue conversion starting with a trail byte.
|
||||
ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
|
||||
&target, targetLimit, &source, sourceLimit,
|
||||
buffer16, &pivotSource, &pivotTarget, pivotLimit,
|
||||
FALSE, TRUE, errorCode);
|
||||
|
||||
assertEquals("illFormed trail byte", U_ILLEGAL_CHAR_FOUND, errorCode.reset());
|
||||
length = (int32_t)(target - result);
|
||||
assertEquals("illFormed trail byte number of bytes written", 0, length);
|
||||
errorLength = UPRV_LENGTHOF(errorBytes);
|
||||
ucnv_getInvalidChars(cnv1.getAlias(), errorBytes, &errorLength, errorCode);
|
||||
assertEquals("illFormed trail byte errorLength", 1, (int32_t)errorLength);
|
||||
if (errorLength == 1) {
|
||||
assertEquals("illFormed trail byte errorBytes", 0x93, (int32_t)(uint8_t)errorBytes[0]);
|
||||
}
|
||||
}
|
||||
|
||||
// open testdata or ICU data converter ------------------------------------- ***
|
||||
|
Loading…
Reference in New Issue
Block a user