ICU-5574 Test and fix a buffer overflow of the offset handling when a single surrogate character is given.

X-SVN-Rev: 21144
This commit is contained in:
George Rhoten 2007-03-01 12:41:15 +00:00
parent 8dd6c746ba
commit a5d3c1ebfc
2 changed files with 44 additions and 11 deletions

View File

@ -3168,6 +3168,9 @@ getTrail:
}
} else {
/* no more input */
if (pArgs->flush) {
*pErrorCode=U_TRUNCATED_CHAR_FOUND;
}
break;
}
} else {
@ -3235,6 +3238,14 @@ getTrail:
/* set offsets since the start or the last callback */
if(offsets!=NULL) {
size_t count=source-lastSource;
if (count > 0 && *pErrorCode == U_TRUNCATED_CHAR_FOUND) {
/*
Caller gave us a partial supplementary character,
which this function couldn't convert in any case.
The callback will handle the offset.
*/
count--;
}
while(count>0) {
*offsets++=sourceIndex++;
--count;
@ -4799,8 +4810,7 @@ ucnv_MBCSWriteSub(UConverterFromUnicodeArgs *pArgs,
/* reset the selector for the next code point */
cnv->useSubChar1=FALSE;
switch(cnv->sharedData->mbcs.outputType) {
case MBCS_OUTPUT_2_SISO:
if (cnv->sharedData->mbcs.outputType == MBCS_OUTPUT_2_SISO) {
p=buffer;
/* fromUnicodeStatus contains prevLength */
@ -4826,16 +4836,11 @@ ucnv_MBCSWriteSub(UConverterFromUnicodeArgs *pArgs,
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return;
}
ucnv_cbFromUWriteBytes(pArgs,
buffer, (int32_t)(p-buffer),
offsetIndex, pErrorCode);
break;
default:
ucnv_cbFromUWriteBytes(pArgs,
subchar, length,
offsetIndex, pErrorCode);
break;
subchar=buffer;
length=(int32_t)(p-buffer);
}
ucnv_cbFromUWriteBytes(pArgs, subchar, length, offsetIndex, pErrorCode);
}
U_CFUNC UConverterType

View File

@ -517,6 +517,34 @@ conversion:table(nofallback) {
:intvector{ 0,1,1,1,1,1,2 },
:int{1}, :int{0}, "", "?=@$!@$", ""
}
{
"windows-1252",
"1\U00010001\u0085\U000500022\ud8003\udc014\ue001",
:bin{ 311a1a1a321a331a341a },
:intvector{ 0,1,3,4,6,7,8,9,10,11 },
:int{1}, :int{0}, "", "?", ""
}
{
"windows-1252",
"\uD87E", // lone surrogate can cause an offset overflow
:bin{ 1a },
:intvector{ 0 },
:int{1}, :int{0}, "", "?", ""
}
{
"windows-1252",
"\uD87E", // lone surrogate can cause an offset overflow
:bin{ 6875683f },
:intvector{ 0,0,0,0 },
:int{1}, :int{0}, "", "?=huh?", "" // Use a long substitution character
}
{
"*test4",
"\u30ab", // An incomplete multi-codepoint character
:bin{ ff },
:intvector{ 0 },
:int{1}, :int{0}, "", "?", ""
}
{
"ibm-930", // stateful MBCS
"a\ufdd0\u4e00\ufdd0e",