ICU-6677 add and fix u_strFromUTF32() input string validation

X-SVN-Rev: 25441
This commit is contained in:
Markus Scherer 2009-02-19 05:18:34 +00:00
parent 90eee0718a
commit a856eb2eba
2 changed files with 145 additions and 58 deletions

View File

@ -1,7 +1,7 @@
/*
******************************************************************************
*
* Copyright (C) 2001-2007, International Business Machines
* Copyright (C) 2001-2009, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
@ -36,78 +36,87 @@ u_strFromUTF32(UChar *dest,
int32_t *pDestLength,
const UChar32 *src,
int32_t srcLength,
UErrorCode *pErrorCode)
{
int32_t reqLength = 0;
uint32_t ch =0;
UChar *pDestLimit =dest+destCapacity;
UChar *pDest = dest;
const uint32_t *pSrc = (const uint32_t *)src;
UErrorCode *pErrorCode) {
const UChar32 *srcLimit;
UChar32 ch;
UChar *destLimit;
UChar *pDest;
int32_t reqLength;
/* args check */
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
if(U_FAILURE(*pErrorCode)){
return NULL;
}
if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
/* Check if the source is null terminated */
if(srcLength == -1 ){
while(((ch=*pSrc)!=0) && (pDest < pDestLimit)){
++pSrc;
if(ch<=0xFFFF){
*(pDest++)=(UChar)ch;
}else if(ch<=0x10ffff){
*(pDest++)=UTF16_LEAD(ch);
if(pDest<pDestLimit){
*(pDest++)=UTF16_TRAIL(ch);
}else{
reqLength++;
break;
}
}else{
*pErrorCode = U_INVALID_CHAR_FOUND;
return NULL;
pDest = dest;
destLimit = dest + destCapacity;
reqLength = 0;
if(srcLength < 0) {
/* simple loop for conversion of a NUL-terminated BMP string */
while((ch=*src) != 0 &&
((uint32_t)ch < 0xd800 || (0xe000 <= ch && ch <= 0xffff)) &&
pDest < destLimit) {
++src;
*pDest++ = (UChar)ch;
}
if(ch == 0) {
srcLength = 0;
} else {
/* "complicated" case, get the remaining string length */
const UChar32 *src0 = src;
while(*++src != 0) {}
srcLength = (int32_t)(src - src0);
src = src0;
}
}
/* convert with length */
srcLimit = src + srcLength;
while(src < srcLimit && pDest < destLimit) {
ch = *src++;
if((uint32_t)ch < 0xd800 || (0xe000 <= ch && ch <= 0xffff)) {
*pDest++ = (UChar)ch;
} else if(0x10000 <= ch && ch <= 0x10ffff) {
*pDest++ = U16_LEAD(ch);
if(pDest < destLimit) {
*pDest++ = U16_TRAIL(ch);
} else {
reqLength = 1;
break;
}
} else {
/* surrogate code point, or not a Unicode code point at all */
*pErrorCode = U_INVALID_CHAR_FOUND;
return NULL;
}
while((ch=*pSrc++) != 0){
reqLength+=UTF_CHAR_LENGTH(ch);
}
}else{
const uint32_t* pSrcLimit = ((const uint32_t*)pSrc) + srcLength;
while((pSrc < pSrcLimit) && (pDest < pDestLimit)){
ch = *pSrc++;
if(ch<=0xFFFF){
*(pDest++)=(UChar)ch;
}else if(ch<=0x10FFFF){
*(pDest++)=UTF16_LEAD(ch);
if(pDest<pDestLimit){
*(pDest++)=UTF16_TRAIL(ch);
}else{
reqLength++;
break;
}
}else{
*pErrorCode = U_INVALID_CHAR_FOUND;
return NULL;
}
}
while(pSrc <pSrcLimit){
ch = *pSrc++;
reqLength+=UTF_CHAR_LENGTH(ch);
}
/* preflight the remaining string */
while(src < srcLimit) {
ch = *src++;
if((uint32_t)ch < 0xd800 || (0xe000 <= ch && ch <= 0xffff)) {
++reqLength;
} else if(0x10000 <= ch && ch <= 0x10ffff) {
reqLength += 2;
} else {
/* surrogate code point, or not a Unicode code point at all */
*pErrorCode = U_INVALID_CHAR_FOUND;
return NULL;
}
}
reqLength += (int32_t)(pDest - dest);
if(pDestLength){
if(pDestLength) {
*pDestLength = reqLength;
}
/* Terminate the buffer */
u_terminateUChars(dest,destCapacity,reqLength,pErrorCode);
u_terminateUChars(dest, destCapacity, reqLength, pErrorCode);
return dest;
}
@ -126,11 +135,11 @@ u_strToUTF32(UChar32 *dest,
int32_t reqLength=0;
uint32_t ch=0;
uint32_t *pDest = (uint32_t *)dest;
uint32_t *pDestLimit = pDest + destCapacity;
uint32_t *destLimit = pDest + destCapacity;
UChar ch2=0;
/* args check */
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
if(U_FAILURE(*pErrorCode)){
return NULL;
}
@ -141,7 +150,7 @@ u_strToUTF32(UChar32 *dest,
}
if(srcLength==-1) {
while((ch=*pSrc)!=0 && pDest!=pDestLimit) {
while((ch=*pSrc)!=0 && pDest!=destLimit) {
++pSrc;
/*need not check for NUL because NUL fails UTF_IS_TRAIL() anyway*/
if(UTF_IS_LEAD(ch) && UTF_IS_TRAIL(ch2=*pSrc)) {
@ -158,7 +167,7 @@ u_strToUTF32(UChar32 *dest,
}
} else {
pSrcLimit = pSrc+srcLength;
while(pSrc<pSrcLimit && pDest<pDestLimit) {
while(pSrc<pSrcLimit && pDest<destLimit) {
ch=*pSrc++;
if(UTF_IS_LEAD(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) {
++pSrc;

View File

@ -32,6 +32,7 @@ void addUCharTransformTest(TestNode** root);
static void Test_strToUTF32(void);
static void Test_strFromUTF32(void);
static void Test_strFromUTF32_surrogates(void);
static void Test_UChar_UTF8_API(void);
static void Test_FromUTF8(void);
static void Test_FromUTF8Lenient(void);
@ -44,6 +45,7 @@ addUCharTransformTest(TestNode** root)
{
addTest(root, &Test_strToUTF32, "custrtrn/Test_strToUTF32");
addTest(root, &Test_strFromUTF32, "custrtrn/Test_strFromUTF32");
addTest(root, &Test_strFromUTF32_surrogates, "custrtrn/Test_strFromUTF32_surrogates");
addTest(root, &Test_UChar_UTF8_API, "custrtrn/Test_UChar_UTF8_API");
addTest(root, &Test_FromUTF8, "custrtrn/Test_FromUTF8");
addTest(root, &Test_FromUTF8Lenient, "custrtrn/Test_FromUTF8Lenient");
@ -259,6 +261,82 @@ static void Test_strFromUTF32(void){
}
}
/* test surrogate code points */
static void Test_strFromUTF32_surrogates() {
UErrorCode err = U_ZERO_ERROR;
UChar uTarget[400];
int32_t len32, uDestLen;
int i;
static const UChar32 surr32[] = { 0x41, 0xd900, 0x61, 0xdc00, -1, 0x110000, 0x5a, 0x50000, 0x7a, 0 };
static const UChar expected[] = { 0x5a, 0xd900, 0xdc00, 0x7a, 0 };
len32 = LENGTHOF(surr32);
for(i = 0; i < 6; ++i) {
err = U_ZERO_ERROR;
u_strFromUTF32(uTarget, 0, &uDestLen, surr32+i, len32-i, &err);
if(err != U_INVALID_CHAR_FOUND) {
log_err("u_strFromUTF32(preflight surr32+%ld) sets %s != U_INVALID_CHAR_FOUND\n",
(long)i, u_errorName(err));
return;
}
err = U_ZERO_ERROR;
u_strFromUTF32(uTarget, LENGTHOF(uTarget), &uDestLen, surr32+i, len32-i, &err);
if(err != U_INVALID_CHAR_FOUND) {
log_err("u_strFromUTF32(surr32+%ld) sets %s != U_INVALID_CHAR_FOUND\n",
(long)i, u_errorName(err));
return;
}
err = U_ZERO_ERROR;
u_strFromUTF32(NULL, 0, &uDestLen, surr32+i, -1, &err);
if(err != U_INVALID_CHAR_FOUND) {
log_err("u_strFromUTF32(preflight surr32+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n",
(long)i, u_errorName(err));
return;
}
err = U_ZERO_ERROR;
u_strFromUTF32(uTarget, LENGTHOF(uTarget), &uDestLen, surr32+i, -1, &err);
if(err != U_INVALID_CHAR_FOUND) {
log_err("u_strFromUTF32(surr32+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n",
(long)i, u_errorName(err));
return;
}
}
err = U_ZERO_ERROR;
u_strFromUTF32(uTarget, 0, &uDestLen, surr32+6, len32-6-1, &err);
if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 4) {
log_err("u_strFromUTF32(preflight surr32+6) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
u_errorName(err));
return;
}
err = U_ZERO_ERROR;
u_strFromUTF32(uTarget, LENGTHOF(uTarget), &uDestLen, surr32+6, len32-6-1, &err);
if(err != U_ZERO_ERROR || uDestLen != 4 || u_memcmp(uTarget, expected, 5)) {
log_err("u_strFromUTF32(surr32+6) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
u_errorName(err));
return;
}
err = U_ZERO_ERROR;
u_strFromUTF32(NULL, 0, &uDestLen, surr32+6, -1, &err);
if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 4) {
log_err("u_strFromUTF32(preflight surr32+6/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
u_errorName(err));
return;
}
err = U_ZERO_ERROR;
u_strFromUTF32(uTarget, LENGTHOF(uTarget), &uDestLen, surr32+6, -1, &err);
if(err != U_ZERO_ERROR || uDestLen != 4 || u_memcmp(uTarget, expected, 5)) {
log_err("u_strFromUTF32(surr32+6/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
u_errorName(err));
return;
}
}
static void Test_UChar_UTF8_API(void){