ICU-12639 detect & handle malformed UTF-8, never call the low-level full case mapping functions with a negative value
X-SVN-Rev: 39295
This commit is contained in:
parent
bc0960b298
commit
104b90bc3f
@ -815,8 +815,9 @@ U_CAPI int32_t U_EXPORT2
|
||||
ucase_toFullLower(const UCaseProps *csp, UChar32 c,
|
||||
UCaseContextIterator *iter, void *context,
|
||||
const UChar **pString,
|
||||
const char *locale, int32_t *locCache)
|
||||
{
|
||||
const char *locale, int32_t *locCache) {
|
||||
// The sign of the result has meaning, input must be non-negative so that it can be returned as is.
|
||||
U_ASSERT(c >= 0);
|
||||
UChar32 result=c;
|
||||
uint16_t props=UTRIE2_GET16(&csp->trie, c);
|
||||
if(!PROPS_HAS_EXCEPTION(props)) {
|
||||
@ -961,6 +962,8 @@ toUpperOrTitle(const UCaseProps *csp, UChar32 c,
|
||||
const UChar **pString,
|
||||
const char *locale, int32_t *locCache,
|
||||
UBool upperNotTitle) {
|
||||
// The sign of the result has meaning, input must be non-negative so that it can be returned as is.
|
||||
U_ASSERT(c >= 0);
|
||||
UChar32 result=c;
|
||||
uint16_t props=UTRIE2_GET16(&csp->trie, c);
|
||||
if(!PROPS_HAS_EXCEPTION(props)) {
|
||||
@ -1169,8 +1172,9 @@ ucase_fold(const UCaseProps *csp, UChar32 c, uint32_t options) {
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucase_toFullFolding(const UCaseProps *csp, UChar32 c,
|
||||
const UChar **pString,
|
||||
uint32_t options)
|
||||
{
|
||||
uint32_t options) {
|
||||
// The sign of the result has meaning, input must be non-negative so that it can be returned as is.
|
||||
U_ASSERT(c >= 0);
|
||||
UChar32 result=c;
|
||||
uint16_t props=UTRIE2_GET16(&csp->trie, c);
|
||||
if(!PROPS_HAS_EXCEPTION(props)) {
|
||||
|
@ -206,6 +206,21 @@ appendUChar(uint8_t *dest, int32_t destIndex, int32_t destCapacity, UChar c) {
|
||||
return limit;
|
||||
}
|
||||
|
||||
static inline int32_t
|
||||
appendString(uint8_t *dest, int32_t destIndex, int32_t destCapacity,
|
||||
const uint8_t *s, int32_t length) {
|
||||
if(length>0) {
|
||||
if(length>(INT32_MAX-destIndex)) {
|
||||
return -1; // integer overflow
|
||||
}
|
||||
if((destIndex+length)<=destCapacity) {
|
||||
uprv_memcpy(dest+destIndex, s, length);
|
||||
}
|
||||
destIndex+=length;
|
||||
}
|
||||
return destIndex;
|
||||
}
|
||||
|
||||
static UChar32 U_CALLCONV
|
||||
utf8_caseContextIterator(void *context, int8_t dir) {
|
||||
UCaseContext *csc=(UCaseContext *)context;
|
||||
@ -263,9 +278,11 @@ _caseMap(const UCaseMap *csm, UCaseMapFull *map,
|
||||
U8_NEXT(src, srcIndex, srcLimit, c);
|
||||
csc->cpLimit=srcIndex;
|
||||
if(c<0) {
|
||||
int32_t i=csc->cpStart;
|
||||
while(destIndex<destCapacity && i<srcIndex) {
|
||||
dest[destIndex++]=src[i++];
|
||||
// Malformed UTF-8.
|
||||
destIndex=appendString(dest, destIndex, destCapacity, src+csc->cpStart, srcIndex-csc->cpStart);
|
||||
if(destIndex<0) {
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
@ -297,7 +314,7 @@ ucasemap_internalUTF8ToTitle(const UCaseMap *csm,
|
||||
UErrorCode *pErrorCode) {
|
||||
const UChar *s;
|
||||
UChar32 c;
|
||||
int32_t prev, titleStart, titleLimit, idx, destIndex, length;
|
||||
int32_t prev, titleStart, titleLimit, idx, destIndex;
|
||||
UBool isFirstIndex;
|
||||
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
@ -363,21 +380,24 @@ ucasemap_internalUTF8ToTitle(const UCaseMap *csm,
|
||||
break; /* cased letter at [titleStart..titleLimit[ */
|
||||
}
|
||||
}
|
||||
length=titleStart-prev;
|
||||
if(length>0) {
|
||||
if((destIndex+length)<=destCapacity) {
|
||||
uprv_memcpy(dest+destIndex, src+prev, length);
|
||||
}
|
||||
destIndex+=length;
|
||||
destIndex=appendString(dest, destIndex, destCapacity, src+prev, titleStart-prev);
|
||||
if(destIndex<0) {
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
if(titleStart<titleLimit) {
|
||||
/* titlecase c which is from [titleStart..titleLimit[ */
|
||||
csc.cpStart=titleStart;
|
||||
csc.cpLimit=titleLimit;
|
||||
c=ucase_toFullTitle(csm->csp, c, utf8_caseContextIterator, &csc, &s, csm->locale, &locCache);
|
||||
destIndex=appendResult(dest, destIndex, destCapacity, c, s);
|
||||
if(c>=0) {
|
||||
csc.cpStart=titleStart;
|
||||
csc.cpLimit=titleLimit;
|
||||
c=ucase_toFullTitle(csm->csp, c, utf8_caseContextIterator, &csc, &s, csm->locale, &locCache);
|
||||
destIndex=appendResult(dest, destIndex, destCapacity, c, s);
|
||||
} else {
|
||||
// Malformed UTF-8.
|
||||
destIndex=appendString(dest, destIndex, destCapacity, src+titleStart, titleLimit-titleStart);
|
||||
}
|
||||
if(destIndex<0) {
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
@ -407,15 +427,11 @@ ucasemap_internalUTF8ToTitle(const UCaseMap *csm,
|
||||
}
|
||||
} else {
|
||||
/* Optionally just copy the rest of the word unchanged. */
|
||||
length=idx-titleLimit;
|
||||
if(length>(INT32_MAX-destIndex)) {
|
||||
destIndex=appendString(dest, destIndex, destCapacity, src+titleLimit, idx-titleLimit);
|
||||
if(destIndex<0) {
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
if((destIndex+length)<=destCapacity) {
|
||||
uprv_memcpy(dest+destIndex, src+titleLimit, length);
|
||||
}
|
||||
destIndex+=length;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -547,7 +563,7 @@ int32_t toUpper(const UCaseMap *csm,
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
} else {
|
||||
} else if(c>=0) {
|
||||
const UChar *s;
|
||||
UChar32 c2 = 0;
|
||||
c=ucase_toFullUpper(csm->csp, c, NULL, NULL, &s, csm->locale, &locCache);
|
||||
@ -561,6 +577,13 @@ int32_t toUpper(const UCaseMap *csm,
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Malformed UTF-8.
|
||||
destIndex=appendString(dest, destIndex, destCapacity, src+i, nextIndex-i);
|
||||
if(destIndex<0) {
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
i = nextIndex;
|
||||
state = nextState;
|
||||
@ -627,8 +650,11 @@ utf8_foldCase(const UCaseProps *csp,
|
||||
start=srcIndex;
|
||||
U8_NEXT(src, srcIndex, srcLength, c);
|
||||
if(c<0) {
|
||||
while(destIndex<destCapacity && start<srcIndex) {
|
||||
dest[destIndex++]=src[start++];
|
||||
// Malformed UTF-8.
|
||||
destIndex=appendString(dest, destIndex, destCapacity, src+start, srcIndex-start);
|
||||
if(destIndex<0) {
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
@ -98,6 +98,21 @@ appendUChar(UChar *dest, int32_t destIndex, int32_t destCapacity, UChar c) {
|
||||
return destIndex+1;
|
||||
}
|
||||
|
||||
static inline int32_t
|
||||
appendString(UChar *dest, int32_t destIndex, int32_t destCapacity,
|
||||
const UChar *s, int32_t length) {
|
||||
if(length>0) {
|
||||
if(length>(INT32_MAX-destIndex)) {
|
||||
return -1; // integer overflow
|
||||
}
|
||||
if((destIndex+length)<=destCapacity) {
|
||||
u_memcpy(dest+destIndex, s, length);
|
||||
}
|
||||
destIndex+=length;
|
||||
}
|
||||
return destIndex;
|
||||
}
|
||||
|
||||
static UChar32 U_CALLCONV
|
||||
utf16_caseContextIterator(void *context, int8_t dir) {
|
||||
UCaseContext *csc=(UCaseContext *)context;
|
||||
@ -182,7 +197,7 @@ ustrcase_internalToTitle(const UCaseMap *csm,
|
||||
UErrorCode *pErrorCode) {
|
||||
const UChar *s;
|
||||
UChar32 c;
|
||||
int32_t prev, titleStart, titleLimit, idx, destIndex, length;
|
||||
int32_t prev, titleStart, titleLimit, idx, destIndex;
|
||||
UBool isFirstIndex;
|
||||
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
@ -248,12 +263,10 @@ ustrcase_internalToTitle(const UCaseMap *csm,
|
||||
break; /* cased letter at [titleStart..titleLimit[ */
|
||||
}
|
||||
}
|
||||
length=titleStart-prev;
|
||||
if(length>0) {
|
||||
if((destIndex+length)<=destCapacity) {
|
||||
u_memcpy(dest+destIndex, src+prev, length);
|
||||
}
|
||||
destIndex+=length;
|
||||
destIndex=appendString(dest, destIndex, destCapacity, src+prev, titleStart-prev);
|
||||
if(destIndex<0) {
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
@ -297,15 +310,11 @@ ustrcase_internalToTitle(const UCaseMap *csm,
|
||||
}
|
||||
} else {
|
||||
/* Optionally just copy the rest of the word unchanged. */
|
||||
length=idx-titleLimit;
|
||||
if(length>(INT32_MAX-destIndex)) {
|
||||
destIndex=appendString(dest, destIndex, destCapacity, src+titleLimit, idx-titleLimit);
|
||||
if(destIndex<0) {
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
if((destIndex+length)<=destCapacity) {
|
||||
u_memcpy(dest+destIndex, src+titleLimit, length);
|
||||
}
|
||||
destIndex+=length;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -48,6 +48,7 @@ StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, cha
|
||||
TESTCASE_AUTO(TestFullCaseFoldingIterator);
|
||||
TESTCASE_AUTO(TestGreekUpper);
|
||||
TESTCASE_AUTO(TestLongUpper);
|
||||
TESTCASE_AUTO(TestMalformedUTF8);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
@ -707,3 +708,44 @@ StringCaseTest::TestLongUpper() {
|
||||
errorCode.errorName(), (long)destLength);
|
||||
}
|
||||
}
|
||||
|
||||
void StringCaseTest::TestMalformedUTF8() {
|
||||
// ticket #12639
|
||||
IcuTestErrorCode errorCode(*this, "TestTitleMalformedUTF8");
|
||||
LocalUCaseMapPointer csm(ucasemap_open("en", U_TITLECASE_NO_BREAK_ADJUSTMENT, errorCode));
|
||||
if (errorCode.isFailure()) {
|
||||
errln("ucasemap_open(English) failed - %s", errorCode.errorName());
|
||||
return;
|
||||
}
|
||||
char src[1] = { (char)0x85 }; // malformed UTF-8
|
||||
char dest[3] = { 0, 0, 0 };
|
||||
int32_t destLength = ucasemap_utf8ToTitle(csm.getAlias(), dest, 3, src, 1, errorCode);
|
||||
if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
|
||||
errln("ucasemap_utf8ToTitle(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
|
||||
errorCode.errorName(), (int)destLength, dest[0]);
|
||||
}
|
||||
|
||||
errorCode.reset();
|
||||
dest[0] = 0;
|
||||
destLength = ucasemap_utf8ToLower(csm.getAlias(), dest, 3, src, 1, errorCode);
|
||||
if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
|
||||
errln("ucasemap_utf8ToLower(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
|
||||
errorCode.errorName(), (int)destLength, dest[0]);
|
||||
}
|
||||
|
||||
errorCode.reset();
|
||||
dest[0] = 0;
|
||||
destLength = ucasemap_utf8ToUpper(csm.getAlias(), dest, 3, src, 1, errorCode);
|
||||
if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
|
||||
errln("ucasemap_utf8ToUpper(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
|
||||
errorCode.errorName(), (int)destLength, dest[0]);
|
||||
}
|
||||
|
||||
errorCode.reset();
|
||||
dest[0] = 0;
|
||||
destLength = ucasemap_utf8FoldCase(csm.getAlias(), dest, 3, src, 1, errorCode);
|
||||
if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
|
||||
errln("ucasemap_utf8FoldCase(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
|
||||
errorCode.errorName(), (int)destLength, dest[0]);
|
||||
}
|
||||
}
|
||||
|
@ -111,6 +111,7 @@ public:
|
||||
void TestFullCaseFoldingIterator();
|
||||
void TestGreekUpper();
|
||||
void TestLongUpper();
|
||||
void TestMalformedUTF8();
|
||||
|
||||
private:
|
||||
void assertGreekUpper(const char *s, const char *expected);
|
||||
|
Loading…
Reference in New Issue
Block a user