From 104b90bc3f450a2e688a4a173c9eff7e4f90f0b4 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Tue, 20 Sep 2016 20:32:12 +0000 Subject: [PATCH] ICU-12639 detect & handle malformed UTF-8, never call the low-level full case mapping functions with a negative value X-SVN-Rev: 39295 --- icu4c/source/common/ucase.cpp | 12 +++-- icu4c/source/common/ucasemap.cpp | 72 ++++++++++++++++++-------- icu4c/source/common/ustrcase.cpp | 35 ++++++++----- icu4c/source/test/intltest/strcase.cpp | 42 +++++++++++++++ icu4c/source/test/intltest/ustrtest.h | 1 + 5 files changed, 122 insertions(+), 40 deletions(-) diff --git a/icu4c/source/common/ucase.cpp b/icu4c/source/common/ucase.cpp index fe4335ea45..97ded9ee2d 100644 --- a/icu4c/source/common/ucase.cpp +++ b/icu4c/source/common/ucase.cpp @@ -815,8 +815,9 @@ U_CAPI int32_t U_EXPORT2 ucase_toFullLower(const UCaseProps *csp, UChar32 c, UCaseContextIterator *iter, void *context, const UChar **pString, - const char *locale, int32_t *locCache) -{ + const char *locale, int32_t *locCache) { + // The sign of the result has meaning, input must be non-negative so that it can be returned as is. + U_ASSERT(c >= 0); UChar32 result=c; uint16_t props=UTRIE2_GET16(&csp->trie, c); if(!PROPS_HAS_EXCEPTION(props)) { @@ -961,6 +962,8 @@ toUpperOrTitle(const UCaseProps *csp, UChar32 c, const UChar **pString, const char *locale, int32_t *locCache, UBool upperNotTitle) { + // The sign of the result has meaning, input must be non-negative so that it can be returned as is. + U_ASSERT(c >= 0); UChar32 result=c; uint16_t props=UTRIE2_GET16(&csp->trie, c); if(!PROPS_HAS_EXCEPTION(props)) { @@ -1169,8 +1172,9 @@ ucase_fold(const UCaseProps *csp, UChar32 c, uint32_t options) { U_CAPI int32_t U_EXPORT2 ucase_toFullFolding(const UCaseProps *csp, UChar32 c, const UChar **pString, - uint32_t options) -{ + uint32_t options) { + // The sign of the result has meaning, input must be non-negative so that it can be returned as is. + U_ASSERT(c >= 0); UChar32 result=c; uint16_t props=UTRIE2_GET16(&csp->trie, c); if(!PROPS_HAS_EXCEPTION(props)) { diff --git a/icu4c/source/common/ucasemap.cpp b/icu4c/source/common/ucasemap.cpp index e8807dd9a5..c0d56c2873 100644 --- a/icu4c/source/common/ucasemap.cpp +++ b/icu4c/source/common/ucasemap.cpp @@ -206,6 +206,21 @@ appendUChar(uint8_t *dest, int32_t destIndex, int32_t destCapacity, UChar c) { return limit; } +static inline int32_t +appendString(uint8_t *dest, int32_t destIndex, int32_t destCapacity, + const uint8_t *s, int32_t length) { + if(length>0) { + if(length>(INT32_MAX-destIndex)) { + return -1; // integer overflow + } + if((destIndex+length)<=destCapacity) { + uprv_memcpy(dest+destIndex, s, length); + } + destIndex+=length; + } + return destIndex; +} + static UChar32 U_CALLCONV utf8_caseContextIterator(void *context, int8_t dir) { UCaseContext *csc=(UCaseContext *)context; @@ -263,9 +278,11 @@ _caseMap(const UCaseMap *csm, UCaseMapFull *map, U8_NEXT(src, srcIndex, srcLimit, c); csc->cpLimit=srcIndex; if(c<0) { - int32_t i=csc->cpStart; - while(destIndexcpStart, srcIndex-csc->cpStart); + if(destIndex<0) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; } continue; } @@ -297,7 +314,7 @@ ucasemap_internalUTF8ToTitle(const UCaseMap *csm, UErrorCode *pErrorCode) { const UChar *s; UChar32 c; - int32_t prev, titleStart, titleLimit, idx, destIndex, length; + int32_t prev, titleStart, titleLimit, idx, destIndex; UBool isFirstIndex; if(U_FAILURE(*pErrorCode)) { @@ -363,21 +380,24 @@ ucasemap_internalUTF8ToTitle(const UCaseMap *csm, break; /* cased letter at [titleStart..titleLimit[ */ } } - length=titleStart-prev; - if(length>0) { - if((destIndex+length)<=destCapacity) { - uprv_memcpy(dest+destIndex, src+prev, length); - } - destIndex+=length; + destIndex=appendString(dest, destIndex, destCapacity, src+prev, titleStart-prev); + if(destIndex<0) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; } } if(titleStartcsp, c, utf8_caseContextIterator, &csc, &s, csm->locale, &locCache); - destIndex=appendResult(dest, destIndex, destCapacity, c, s); + if(c>=0) { + csc.cpStart=titleStart; + csc.cpLimit=titleLimit; + c=ucase_toFullTitle(csm->csp, c, utf8_caseContextIterator, &csc, &s, csm->locale, &locCache); + destIndex=appendResult(dest, destIndex, destCapacity, c, s); + } else { + // Malformed UTF-8. + destIndex=appendString(dest, destIndex, destCapacity, src+titleStart, titleLimit-titleStart); + } if(destIndex<0) { *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; @@ -407,15 +427,11 @@ ucasemap_internalUTF8ToTitle(const UCaseMap *csm, } } else { /* Optionally just copy the rest of the word unchanged. */ - length=idx-titleLimit; - if(length>(INT32_MAX-destIndex)) { + destIndex=appendString(dest, destIndex, destCapacity, src+titleLimit, idx-titleLimit); + if(destIndex<0) { *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } - if((destIndex+length)<=destCapacity) { - uprv_memcpy(dest+destIndex, src+titleLimit, length); - } - destIndex+=length; } } } @@ -547,7 +563,7 @@ int32_t toUpper(const UCaseMap *csm, *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } - } else { + } else if(c>=0) { const UChar *s; UChar32 c2 = 0; c=ucase_toFullUpper(csm->csp, c, NULL, NULL, &s, csm->locale, &locCache); @@ -561,6 +577,13 @@ int32_t toUpper(const UCaseMap *csm, return 0; } } + } else { + // Malformed UTF-8. + destIndex=appendString(dest, destIndex, destCapacity, src+i, nextIndex-i); + if(destIndex<0) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } } i = nextIndex; state = nextState; @@ -627,8 +650,11 @@ utf8_foldCase(const UCaseProps *csp, start=srcIndex; U8_NEXT(src, srcIndex, srcLength, c); if(c<0) { - while(destIndex0) { + if(length>(INT32_MAX-destIndex)) { + return -1; // integer overflow + } + if((destIndex+length)<=destCapacity) { + u_memcpy(dest+destIndex, s, length); + } + destIndex+=length; + } + return destIndex; +} + static UChar32 U_CALLCONV utf16_caseContextIterator(void *context, int8_t dir) { UCaseContext *csc=(UCaseContext *)context; @@ -182,7 +197,7 @@ ustrcase_internalToTitle(const UCaseMap *csm, UErrorCode *pErrorCode) { const UChar *s; UChar32 c; - int32_t prev, titleStart, titleLimit, idx, destIndex, length; + int32_t prev, titleStart, titleLimit, idx, destIndex; UBool isFirstIndex; if(U_FAILURE(*pErrorCode)) { @@ -248,12 +263,10 @@ ustrcase_internalToTitle(const UCaseMap *csm, break; /* cased letter at [titleStart..titleLimit[ */ } } - length=titleStart-prev; - if(length>0) { - if((destIndex+length)<=destCapacity) { - u_memcpy(dest+destIndex, src+prev, length); - } - destIndex+=length; + destIndex=appendString(dest, destIndex, destCapacity, src+prev, titleStart-prev); + if(destIndex<0) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; } } @@ -297,15 +310,11 @@ ustrcase_internalToTitle(const UCaseMap *csm, } } else { /* Optionally just copy the rest of the word unchanged. */ - length=idx-titleLimit; - if(length>(INT32_MAX-destIndex)) { + destIndex=appendString(dest, destIndex, destCapacity, src+titleLimit, idx-titleLimit); + if(destIndex<0) { *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } - if((destIndex+length)<=destCapacity) { - u_memcpy(dest+destIndex, src+titleLimit, length); - } - destIndex+=length; } } } diff --git a/icu4c/source/test/intltest/strcase.cpp b/icu4c/source/test/intltest/strcase.cpp index af79f68e49..e5304d4fcc 100644 --- a/icu4c/source/test/intltest/strcase.cpp +++ b/icu4c/source/test/intltest/strcase.cpp @@ -48,6 +48,7 @@ StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, cha TESTCASE_AUTO(TestFullCaseFoldingIterator); TESTCASE_AUTO(TestGreekUpper); TESTCASE_AUTO(TestLongUpper); + TESTCASE_AUTO(TestMalformedUTF8); TESTCASE_AUTO_END; } @@ -707,3 +708,44 @@ StringCaseTest::TestLongUpper() { errorCode.errorName(), (long)destLength); } } + +void StringCaseTest::TestMalformedUTF8() { + // ticket #12639 + IcuTestErrorCode errorCode(*this, "TestTitleMalformedUTF8"); + LocalUCaseMapPointer csm(ucasemap_open("en", U_TITLECASE_NO_BREAK_ADJUSTMENT, errorCode)); + if (errorCode.isFailure()) { + errln("ucasemap_open(English) failed - %s", errorCode.errorName()); + return; + } + char src[1] = { (char)0x85 }; // malformed UTF-8 + char dest[3] = { 0, 0, 0 }; + int32_t destLength = ucasemap_utf8ToTitle(csm.getAlias(), dest, 3, src, 1, errorCode); + if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) { + errln("ucasemap_utf8ToTitle(\\x85) failed: %s destLength=%d dest[0]=0x%02x", + errorCode.errorName(), (int)destLength, dest[0]); + } + + errorCode.reset(); + dest[0] = 0; + destLength = ucasemap_utf8ToLower(csm.getAlias(), dest, 3, src, 1, errorCode); + if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) { + errln("ucasemap_utf8ToLower(\\x85) failed: %s destLength=%d dest[0]=0x%02x", + errorCode.errorName(), (int)destLength, dest[0]); + } + + errorCode.reset(); + dest[0] = 0; + destLength = ucasemap_utf8ToUpper(csm.getAlias(), dest, 3, src, 1, errorCode); + if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) { + errln("ucasemap_utf8ToUpper(\\x85) failed: %s destLength=%d dest[0]=0x%02x", + errorCode.errorName(), (int)destLength, dest[0]); + } + + errorCode.reset(); + dest[0] = 0; + destLength = ucasemap_utf8FoldCase(csm.getAlias(), dest, 3, src, 1, errorCode); + if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) { + errln("ucasemap_utf8FoldCase(\\x85) failed: %s destLength=%d dest[0]=0x%02x", + errorCode.errorName(), (int)destLength, dest[0]); + } +} diff --git a/icu4c/source/test/intltest/ustrtest.h b/icu4c/source/test/intltest/ustrtest.h index 8dfa750ac2..ef3f6cff8a 100644 --- a/icu4c/source/test/intltest/ustrtest.h +++ b/icu4c/source/test/intltest/ustrtest.h @@ -111,6 +111,7 @@ public: void TestFullCaseFoldingIterator(); void TestGreekUpper(); void TestLongUpper(); + void TestMalformedUTF8(); private: void assertGreekUpper(const char *s, const char *expected);