ICU-13560 make some toUnicode converter check toULength not toUnicodeStatus for continuing after partial sequences; ucnv.cpp framework code only resets toULength after an error

X-SVN-Rev: 40793
This commit is contained in:
Markus Scherer 2018-01-23 21:32:36 +00:00
parent ac0972f12c
commit 0dc85d2408
5 changed files with 69 additions and 15 deletions

View File

@ -55,7 +55,7 @@ T_UConverter_toUnicode_UTF32_BE(UConverterToUnicodeArgs * args,
uint32_t ch, i;
/* Restore state of current sequence */
if (args->converter->toUnicodeStatus && myTarget < targetLimit) {
if (args->converter->toULength > 0 && myTarget < targetLimit) {
i = args->converter->toULength; /* restore # of bytes consumed */
args->converter->toULength = 0;
@ -136,7 +136,7 @@ T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(UConverterToUnicodeArgs * args,
int32_t offsetNum = 0;
/* Restore state of current sequence */
if (args->converter->toUnicodeStatus && myTarget < targetLimit) {
if (args->converter->toULength > 0 && myTarget < targetLimit) {
i = args->converter->toULength; /* restore # of bytes consumed */
args->converter->toULength = 0;
@ -517,7 +517,7 @@ T_UConverter_toUnicode_UTF32_LE(UConverterToUnicodeArgs * args,
uint32_t ch, i;
/* Restore state of current sequence */
if (args->converter->toUnicodeStatus && myTarget < targetLimit)
if (args->converter->toULength > 0 && myTarget < targetLimit)
{
i = args->converter->toULength; /* restore # of bytes consumed */
args->converter->toULength = 0;
@ -604,7 +604,7 @@ T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(UConverterToUnicodeArgs * args,
int32_t offsetNum = 0;
/* Restore state of current sequence */
if (args->converter->toUnicodeStatus && myTarget < targetLimit)
if (args->converter->toULength > 0 && myTarget < targetLimit)
{
i = args->converter->toULength; /* restore # of bytes consumed */
args->converter->toULength = 0;

View File

@ -76,7 +76,7 @@ static void U_CALLCONV ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
int32_t i, inBytes;
/* Restore size of current sequence */
if (cnv->toUnicodeStatus && myTarget < targetLimit)
if (cnv->toULength > 0 && myTarget < targetLimit)
{
inBytes = cnv->mode; /* restore # of bytes to consume */
i = cnv->toULength; /* restore # of bytes consumed */
@ -194,7 +194,7 @@ static void U_CALLCONV ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeAr
int32_t i, inBytes;
/* Restore size of current sequence */
if (cnv->toUnicodeStatus && myTarget < targetLimit)
if (cnv->toULength > 0 && myTarget < targetLimit)
{
inBytes = cnv->mode; /* restore # of bytes to consume */
i = cnv->toULength; /* restore # of bytes consumed */
@ -670,12 +670,13 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
/* get the converter state from the UTF-8 UConverter */
c=(UChar32)utf8->toUnicodeStatus;
if(c!=0) {
if(utf8->toULength > 0) {
toULength=oldToULength=utf8->toULength;
toULimit=(int8_t)utf8->mode;
c=(UChar32)utf8->toUnicodeStatus;
} else {
toULength=oldToULength=toULimit=0;
c = 0;
}
count=(int32_t)(sourceLimit-source)+oldToULength;

View File

@ -340,7 +340,11 @@ ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
/* get the converter state from the UTF-8 UConverter */
c=(UChar32)utf8->toUnicodeStatus;
if (utf8->toULength > 0) {
c=(UChar32)utf8->toUnicodeStatus;
} else {
c = 0;
}
if(c!=0 && source<sourceLimit) {
if(targetCapacity==0) {
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
@ -620,7 +624,7 @@ ucnv_ASCIIFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
uint8_t c;
if(pToUArgs->converter->toUnicodeStatus!=0) {
if(pToUArgs->converter->toULength > 0) {
/* no handling of partial UTF-8 characters here, fall back to pivoting */
*pErrorCode=U_USING_DEFAULT_WARNING;
return;

View File

@ -5064,12 +5064,13 @@ ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY);
/* get the converter state from the UTF-8 UConverter */
c=(UChar32)utf8->toUnicodeStatus;
if(c!=0) {
if(utf8->toULength > 0) {
toULength=oldToULength=utf8->toULength;
toULimit=(int8_t)utf8->mode;
c=(UChar32)utf8->toUnicodeStatus;
} else {
toULength=oldToULength=toULimit=0;
c = 0;
}
// The conversion loop checks source<sourceLimit only once per 1/2/3-byte character.
@ -5359,12 +5360,13 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY);
/* get the converter state from the UTF-8 UConverter */
c=(UChar32)utf8->toUnicodeStatus;
if(c!=0) {
if(utf8->toULength > 0) {
toULength=oldToULength=utf8->toULength;
toULimit=(int8_t)utf8->mode;
c=(UChar32)utf8->toUnicodeStatus;
} else {
toULength=oldToULength=toULimit=0;
c = 0;
}
// The conversion loop checks source<sourceLimit only once per 1/2/3-byte character.

View File

@ -733,6 +733,7 @@ ConversionTest::TestUTF8ToUTF8Overflow() {
UChar *pivotSource = buffer16;
UChar *pivotTarget = buffer16;
const UChar *pivotLimit = buffer16 + UPRV_LENGTHOF(buffer16);
int32_t length;
// Convert with insufficient target capacity.
result[2] = 5;
@ -741,7 +742,7 @@ ConversionTest::TestUTF8ToUTF8Overflow() {
buffer16, &pivotSource, &pivotTarget, pivotLimit,
FALSE, FALSE, errorCode);
assertEquals("overflow", U_BUFFER_OVERFLOW_ERROR, errorCode.reset());
int32_t length = (int32_t)(target - result);
length = (int32_t)(target - result);
assertEquals("number of bytes written", 2, length);
assertEquals("next byte not clobbered", 5, result[2]);
@ -790,6 +791,52 @@ ConversionTest::TestUTF8ToUTF8Overflow() {
if (length == 5) {
assertTrue("text2 result same as input", memcmp(text2, result, length) == 0);
}
ucnv_reset(cnv1.getAlias());
ucnv_reset(cnv2.getAlias());
memset(result, 0, sizeof(result));
static const char *illFormed = "\xf1\x91\x93\x96\x91\x94"; // U+514D6 + two more trail bytes
source = illFormed;
sourceLimit = illFormed + strlen(illFormed);
target = result;
pivotSource = pivotTarget = buffer16;
ucnv_setToUCallBack(cnv1.getAlias(), UCNV_TO_U_CALLBACK_STOP, nullptr, nullptr, nullptr, errorCode);
// Convert only two bytes and flush (but expect failure).
char errorBytes[10];
int8_t errorLength;
result[0] = 5;
ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
&target, targetLimit, &source, source + 2,
buffer16, &pivotSource, &pivotTarget, pivotLimit,
FALSE, TRUE, errorCode);
assertEquals("illFormed truncated", U_TRUNCATED_CHAR_FOUND, errorCode.reset());
length = (int32_t)(target - result);
assertEquals("illFormed number of bytes written", 0, length);
errorLength = UPRV_LENGTHOF(errorBytes);
ucnv_getInvalidChars(cnv1.getAlias(), errorBytes, &errorLength, errorCode);
assertEquals("illFormed truncated errorLength", 2, (int32_t)errorLength);
if (errorLength == 2) {
assertEquals("illFormed truncated errorBytes", 0xf191,
((int32_t)(uint8_t)errorBytes[0] << 8) | (uint8_t)errorBytes[1]);
}
// Continue conversion starting with a trail byte.
ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
&target, targetLimit, &source, sourceLimit,
buffer16, &pivotSource, &pivotTarget, pivotLimit,
FALSE, TRUE, errorCode);
assertEquals("illFormed trail byte", U_ILLEGAL_CHAR_FOUND, errorCode.reset());
length = (int32_t)(target - result);
assertEquals("illFormed trail byte number of bytes written", 0, length);
errorLength = UPRV_LENGTHOF(errorBytes);
ucnv_getInvalidChars(cnv1.getAlias(), errorBytes, &errorLength, errorCode);
assertEquals("illFormed trail byte errorLength", 1, (int32_t)errorLength);
if (errorLength == 1) {
assertEquals("illFormed trail byte errorBytes", 0x93, (int32_t)(uint8_t)errorBytes[0]);
}
}
// open testdata or ICU data converter ------------------------------------- ***