ICU-6677 add and fix u_strFromUTF32() input string validation
X-SVN-Rev: 25441
This commit is contained in:
parent
90eee0718a
commit
a856eb2eba
@ -1,7 +1,7 @@
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2001-2007, International Business Machines
|
||||
* Copyright (C) 2001-2009, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
@ -36,78 +36,87 @@ u_strFromUTF32(UChar *dest,
|
||||
int32_t *pDestLength,
|
||||
const UChar32 *src,
|
||||
int32_t srcLength,
|
||||
UErrorCode *pErrorCode)
|
||||
{
|
||||
int32_t reqLength = 0;
|
||||
uint32_t ch =0;
|
||||
UChar *pDestLimit =dest+destCapacity;
|
||||
UChar *pDest = dest;
|
||||
const uint32_t *pSrc = (const uint32_t *)src;
|
||||
UErrorCode *pErrorCode) {
|
||||
const UChar32 *srcLimit;
|
||||
UChar32 ch;
|
||||
UChar *destLimit;
|
||||
UChar *pDest;
|
||||
int32_t reqLength;
|
||||
|
||||
/* args check */
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
|
||||
if(U_FAILURE(*pErrorCode)){
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
|
||||
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Check if the source is null terminated */
|
||||
if(srcLength == -1 ){
|
||||
while(((ch=*pSrc)!=0) && (pDest < pDestLimit)){
|
||||
++pSrc;
|
||||
if(ch<=0xFFFF){
|
||||
*(pDest++)=(UChar)ch;
|
||||
}else if(ch<=0x10ffff){
|
||||
*(pDest++)=UTF16_LEAD(ch);
|
||||
if(pDest<pDestLimit){
|
||||
*(pDest++)=UTF16_TRAIL(ch);
|
||||
}else{
|
||||
reqLength++;
|
||||
break;
|
||||
}
|
||||
}else{
|
||||
*pErrorCode = U_INVALID_CHAR_FOUND;
|
||||
return NULL;
|
||||
pDest = dest;
|
||||
destLimit = dest + destCapacity;
|
||||
reqLength = 0;
|
||||
|
||||
if(srcLength < 0) {
|
||||
/* simple loop for conversion of a NUL-terminated BMP string */
|
||||
while((ch=*src) != 0 &&
|
||||
((uint32_t)ch < 0xd800 || (0xe000 <= ch && ch <= 0xffff)) &&
|
||||
pDest < destLimit) {
|
||||
++src;
|
||||
*pDest++ = (UChar)ch;
|
||||
}
|
||||
if(ch == 0) {
|
||||
srcLength = 0;
|
||||
} else {
|
||||
/* "complicated" case, get the remaining string length */
|
||||
const UChar32 *src0 = src;
|
||||
while(*++src != 0) {}
|
||||
srcLength = (int32_t)(src - src0);
|
||||
src = src0;
|
||||
}
|
||||
}
|
||||
|
||||
/* convert with length */
|
||||
srcLimit = src + srcLength;
|
||||
while(src < srcLimit && pDest < destLimit) {
|
||||
ch = *src++;
|
||||
if((uint32_t)ch < 0xd800 || (0xe000 <= ch && ch <= 0xffff)) {
|
||||
*pDest++ = (UChar)ch;
|
||||
} else if(0x10000 <= ch && ch <= 0x10ffff) {
|
||||
*pDest++ = U16_LEAD(ch);
|
||||
if(pDest < destLimit) {
|
||||
*pDest++ = U16_TRAIL(ch);
|
||||
} else {
|
||||
reqLength = 1;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
/* surrogate code point, or not a Unicode code point at all */
|
||||
*pErrorCode = U_INVALID_CHAR_FOUND;
|
||||
return NULL;
|
||||
}
|
||||
while((ch=*pSrc++) != 0){
|
||||
reqLength+=UTF_CHAR_LENGTH(ch);
|
||||
}
|
||||
}else{
|
||||
const uint32_t* pSrcLimit = ((const uint32_t*)pSrc) + srcLength;
|
||||
while((pSrc < pSrcLimit) && (pDest < pDestLimit)){
|
||||
ch = *pSrc++;
|
||||
if(ch<=0xFFFF){
|
||||
*(pDest++)=(UChar)ch;
|
||||
}else if(ch<=0x10FFFF){
|
||||
*(pDest++)=UTF16_LEAD(ch);
|
||||
if(pDest<pDestLimit){
|
||||
*(pDest++)=UTF16_TRAIL(ch);
|
||||
}else{
|
||||
reqLength++;
|
||||
break;
|
||||
}
|
||||
}else{
|
||||
*pErrorCode = U_INVALID_CHAR_FOUND;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
while(pSrc <pSrcLimit){
|
||||
ch = *pSrc++;
|
||||
reqLength+=UTF_CHAR_LENGTH(ch);
|
||||
}
|
||||
|
||||
/* preflight the remaining string */
|
||||
while(src < srcLimit) {
|
||||
ch = *src++;
|
||||
if((uint32_t)ch < 0xd800 || (0xe000 <= ch && ch <= 0xffff)) {
|
||||
++reqLength;
|
||||
} else if(0x10000 <= ch && ch <= 0x10ffff) {
|
||||
reqLength += 2;
|
||||
} else {
|
||||
/* surrogate code point, or not a Unicode code point at all */
|
||||
*pErrorCode = U_INVALID_CHAR_FOUND;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
reqLength += (int32_t)(pDest - dest);
|
||||
if(pDestLength){
|
||||
if(pDestLength) {
|
||||
*pDestLength = reqLength;
|
||||
}
|
||||
|
||||
/* Terminate the buffer */
|
||||
u_terminateUChars(dest,destCapacity,reqLength,pErrorCode);
|
||||
u_terminateUChars(dest, destCapacity, reqLength, pErrorCode);
|
||||
|
||||
return dest;
|
||||
}
|
||||
@ -126,11 +135,11 @@ u_strToUTF32(UChar32 *dest,
|
||||
int32_t reqLength=0;
|
||||
uint32_t ch=0;
|
||||
uint32_t *pDest = (uint32_t *)dest;
|
||||
uint32_t *pDestLimit = pDest + destCapacity;
|
||||
uint32_t *destLimit = pDest + destCapacity;
|
||||
UChar ch2=0;
|
||||
|
||||
/* args check */
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
|
||||
if(U_FAILURE(*pErrorCode)){
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -141,7 +150,7 @@ u_strToUTF32(UChar32 *dest,
|
||||
}
|
||||
|
||||
if(srcLength==-1) {
|
||||
while((ch=*pSrc)!=0 && pDest!=pDestLimit) {
|
||||
while((ch=*pSrc)!=0 && pDest!=destLimit) {
|
||||
++pSrc;
|
||||
/*need not check for NUL because NUL fails UTF_IS_TRAIL() anyway*/
|
||||
if(UTF_IS_LEAD(ch) && UTF_IS_TRAIL(ch2=*pSrc)) {
|
||||
@ -158,7 +167,7 @@ u_strToUTF32(UChar32 *dest,
|
||||
}
|
||||
} else {
|
||||
pSrcLimit = pSrc+srcLength;
|
||||
while(pSrc<pSrcLimit && pDest<pDestLimit) {
|
||||
while(pSrc<pSrcLimit && pDest<destLimit) {
|
||||
ch=*pSrc++;
|
||||
if(UTF_IS_LEAD(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) {
|
||||
++pSrc;
|
||||
|
@ -32,6 +32,7 @@ void addUCharTransformTest(TestNode** root);
|
||||
|
||||
static void Test_strToUTF32(void);
|
||||
static void Test_strFromUTF32(void);
|
||||
static void Test_strFromUTF32_surrogates(void);
|
||||
static void Test_UChar_UTF8_API(void);
|
||||
static void Test_FromUTF8(void);
|
||||
static void Test_FromUTF8Lenient(void);
|
||||
@ -44,6 +45,7 @@ addUCharTransformTest(TestNode** root)
|
||||
{
|
||||
addTest(root, &Test_strToUTF32, "custrtrn/Test_strToUTF32");
|
||||
addTest(root, &Test_strFromUTF32, "custrtrn/Test_strFromUTF32");
|
||||
addTest(root, &Test_strFromUTF32_surrogates, "custrtrn/Test_strFromUTF32_surrogates");
|
||||
addTest(root, &Test_UChar_UTF8_API, "custrtrn/Test_UChar_UTF8_API");
|
||||
addTest(root, &Test_FromUTF8, "custrtrn/Test_FromUTF8");
|
||||
addTest(root, &Test_FromUTF8Lenient, "custrtrn/Test_FromUTF8Lenient");
|
||||
@ -259,6 +261,82 @@ static void Test_strFromUTF32(void){
|
||||
}
|
||||
}
|
||||
|
||||
/* test surrogate code points */
|
||||
static void Test_strFromUTF32_surrogates() {
|
||||
UErrorCode err = U_ZERO_ERROR;
|
||||
UChar uTarget[400];
|
||||
int32_t len32, uDestLen;
|
||||
int i;
|
||||
|
||||
static const UChar32 surr32[] = { 0x41, 0xd900, 0x61, 0xdc00, -1, 0x110000, 0x5a, 0x50000, 0x7a, 0 };
|
||||
static const UChar expected[] = { 0x5a, 0xd900, 0xdc00, 0x7a, 0 };
|
||||
len32 = LENGTHOF(surr32);
|
||||
for(i = 0; i < 6; ++i) {
|
||||
err = U_ZERO_ERROR;
|
||||
u_strFromUTF32(uTarget, 0, &uDestLen, surr32+i, len32-i, &err);
|
||||
if(err != U_INVALID_CHAR_FOUND) {
|
||||
log_err("u_strFromUTF32(preflight surr32+%ld) sets %s != U_INVALID_CHAR_FOUND\n",
|
||||
(long)i, u_errorName(err));
|
||||
return;
|
||||
}
|
||||
|
||||
err = U_ZERO_ERROR;
|
||||
u_strFromUTF32(uTarget, LENGTHOF(uTarget), &uDestLen, surr32+i, len32-i, &err);
|
||||
if(err != U_INVALID_CHAR_FOUND) {
|
||||
log_err("u_strFromUTF32(surr32+%ld) sets %s != U_INVALID_CHAR_FOUND\n",
|
||||
(long)i, u_errorName(err));
|
||||
return;
|
||||
}
|
||||
|
||||
err = U_ZERO_ERROR;
|
||||
u_strFromUTF32(NULL, 0, &uDestLen, surr32+i, -1, &err);
|
||||
if(err != U_INVALID_CHAR_FOUND) {
|
||||
log_err("u_strFromUTF32(preflight surr32+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n",
|
||||
(long)i, u_errorName(err));
|
||||
return;
|
||||
}
|
||||
|
||||
err = U_ZERO_ERROR;
|
||||
u_strFromUTF32(uTarget, LENGTHOF(uTarget), &uDestLen, surr32+i, -1, &err);
|
||||
if(err != U_INVALID_CHAR_FOUND) {
|
||||
log_err("u_strFromUTF32(surr32+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n",
|
||||
(long)i, u_errorName(err));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
err = U_ZERO_ERROR;
|
||||
u_strFromUTF32(uTarget, 0, &uDestLen, surr32+6, len32-6-1, &err);
|
||||
if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 4) {
|
||||
log_err("u_strFromUTF32(preflight surr32+6) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
|
||||
u_errorName(err));
|
||||
return;
|
||||
}
|
||||
|
||||
err = U_ZERO_ERROR;
|
||||
u_strFromUTF32(uTarget, LENGTHOF(uTarget), &uDestLen, surr32+6, len32-6-1, &err);
|
||||
if(err != U_ZERO_ERROR || uDestLen != 4 || u_memcmp(uTarget, expected, 5)) {
|
||||
log_err("u_strFromUTF32(surr32+6) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
|
||||
u_errorName(err));
|
||||
return;
|
||||
}
|
||||
|
||||
err = U_ZERO_ERROR;
|
||||
u_strFromUTF32(NULL, 0, &uDestLen, surr32+6, -1, &err);
|
||||
if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 4) {
|
||||
log_err("u_strFromUTF32(preflight surr32+6/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
|
||||
u_errorName(err));
|
||||
return;
|
||||
}
|
||||
|
||||
err = U_ZERO_ERROR;
|
||||
u_strFromUTF32(uTarget, LENGTHOF(uTarget), &uDestLen, surr32+6, -1, &err);
|
||||
if(err != U_ZERO_ERROR || uDestLen != 4 || u_memcmp(uTarget, expected, 5)) {
|
||||
log_err("u_strFromUTF32(surr32+6/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
|
||||
u_errorName(err));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
static void Test_UChar_UTF8_API(void){
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user