ICU-7848 make normalize-append restore the middle string section (the relevant suffix of the first string) when something goes wrong (especially C buffer overflow)

X-SVN-Rev: 30014
This commit is contained in:
Markus Scherer 2011-05-04 05:50:20 +00:00
parent 3d44c5dc4b
commit 56b28bd292
4 changed files with 112 additions and 22 deletions

View File

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2009-2010, International Business Machines
* Copyright (C) 2009-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -154,15 +154,24 @@ public:
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return first;
}
ReorderingBuffer buffer(impl, first);
if(buffer.init(first.length()+second.length(), errorCode)) {
normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
buffer, errorCode);
int32_t firstLength=first.length();
UnicodeString safeMiddle;
{
ReorderingBuffer buffer(impl, first);
if(buffer.init(firstLength+second.length(), errorCode)) {
normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
safeMiddle, buffer, errorCode);
}
} // The ReorderingBuffer destructor finalizes the first string.
if(U_FAILURE(errorCode)) {
// Restore the modified suffix of the first string.
first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
}
return first;
}
virtual void
normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
UnicodeString &safeMiddle,
ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
virtual UBool
getDecomposition(UChar32 c, UnicodeString &decomposition) const {
@ -233,8 +242,9 @@ private:
using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
virtual void
normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
UnicodeString &safeMiddle,
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
impl.decomposeAndAppend(src, limit, doNormalize, buffer, errorCode);
impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
}
virtual const UChar *
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
@ -263,8 +273,9 @@ private:
using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
virtual void
normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
UnicodeString &safeMiddle,
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, buffer, errorCode);
impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
}
virtual UBool
@ -332,8 +343,9 @@ private:
using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
virtual void
normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
UnicodeString &safeMiddle,
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
impl.makeFCDAndAppend(src, limit, doNormalize, buffer, errorCode);
impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
}
virtual const UChar *
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
@ -693,16 +705,29 @@ normalizeSecondAndAppend(const UNormalizer2 *norm2,
return 0;
}
UnicodeString firstString(first, firstLength, firstCapacity);
firstLength=firstString.length(); // In case it was -1.
// secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash.
if(secondLength!=0) {
const Normalizer2 *n2=(const Normalizer2 *)norm2;
const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
if(n2wi!=NULL) {
// Avoid duplicate argument checking and support NUL-terminated src.
ReorderingBuffer buffer(n2wi->impl, firstString);
if(buffer.init(firstLength+secondLength+1, *pErrorCode)) { // destCapacity>=-1
n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL,
doNormalize, buffer, *pErrorCode);
UnicodeString safeMiddle;
{
ReorderingBuffer buffer(n2wi->impl, firstString);
if(buffer.init(firstLength+secondLength+1, *pErrorCode)) { // destCapacity>=-1
n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL,
doNormalize, safeMiddle, buffer, *pErrorCode);
}
} // The ReorderingBuffer destructor finalizes firstString.
if(U_FAILURE(*pErrorCode) || firstString.length()>firstCapacity) {
// Restore the modified suffix of the first string.
// This does not restore first[] array contents between firstLength and firstCapacity.
// (That might be uninitialized memory, as far as we know.)
safeMiddle.extract(0, 0x7fffffff, first+firstLength-safeMiddle.length());
if(firstLength<firstCapacity) {
first[firstLength]=0; // NUL-terminate in case it was originally.
}
}
} else {
UnicodeString secondString(secondLength<0, second, secondLength);

View File

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2009-2010, International Business Machines
* Copyright (C) 2009-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -570,8 +570,10 @@ Normalizer2Impl::getDecomposition(UChar32 c, UChar buffer[4], int32_t &length) c
void Normalizer2Impl::decomposeAndAppend(const UChar *src, const UChar *limit,
UBool doDecompose,
UnicodeString &safeMiddle,
ReorderingBuffer &buffer,
UErrorCode &errorCode) const {
buffer.copyReorderableSuffixTo(safeMiddle);
if(doDecompose) {
decompose(src, limit, &buffer, errorCode);
return;
@ -584,6 +586,9 @@ void Normalizer2Impl::decomposeAndAppend(const UChar *src, const UChar *limit,
prevCC=cc;
cc=getCC(iter.next16());
};
if(limit==NULL) { // appendZeroCC() needs limit!=NULL
limit=u_strchr(iter.codePointStart, 0);
}
buffer.append(src, (int32_t)(iter.codePointStart-src), firstCC, prevCC, errorCode) &&
buffer.appendZeroCC(iter.codePointStart, limit, errorCode);
}
@ -1271,16 +1276,20 @@ Normalizer2Impl::composeQuickCheck(const UChar *src, const UChar *limit,
void Normalizer2Impl::composeAndAppend(const UChar *src, const UChar *limit,
UBool doCompose,
UBool onlyContiguous,
UnicodeString &safeMiddle,
ReorderingBuffer &buffer,
UErrorCode &errorCode) const {
if(!buffer.isEmpty()) {
const UChar *firstStarterInSrc=findNextCompBoundary(src, limit);
if(src!=firstStarterInSrc) {
if(src==firstStarterInSrc) {
buffer.copyReorderableSuffixTo(safeMiddle);
} else {
const UChar *lastStarterInDest=findPreviousCompBoundary(buffer.getStart(),
buffer.getLimit());
UnicodeString middle(lastStarterInDest,
(int32_t)(buffer.getLimit()-lastStarterInDest));
buffer.removeSuffix((int32_t)(buffer.getLimit()-lastStarterInDest));
int32_t destSuffixLength=(int32_t)(buffer.getLimit()-lastStarterInDest);
UnicodeString middle(lastStarterInDest, destSuffixLength);
buffer.removeSuffix(destSuffixLength);
safeMiddle=middle;
middle.append(src, (int32_t)(firstStarterInSrc-src));
const UChar *middleStart=middle.getBuffer();
compose(middleStart, middleStart+middle.length(), onlyContiguous,
@ -1294,6 +1303,9 @@ void Normalizer2Impl::composeAndAppend(const UChar *src, const UChar *limit,
if(doCompose) {
compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
} else {
if(limit==NULL) { // appendZeroCC() needs limit!=NULL
limit=u_strchr(src, 0);
}
buffer.appendZeroCC(src, limit, errorCode);
}
}
@ -1650,16 +1662,20 @@ Normalizer2Impl::makeFCD(const UChar *src, const UChar *limit,
void Normalizer2Impl::makeFCDAndAppend(const UChar *src, const UChar *limit,
UBool doMakeFCD,
UnicodeString &safeMiddle,
ReorderingBuffer &buffer,
UErrorCode &errorCode) const {
if(!buffer.isEmpty()) {
const UChar *firstBoundaryInSrc=findNextFCDBoundary(src, limit);
if(src!=firstBoundaryInSrc) {
if(src==firstBoundaryInSrc) {
buffer.copyReorderableSuffixTo(safeMiddle);
} else {
const UChar *lastBoundaryInDest=findPreviousFCDBoundary(buffer.getStart(),
buffer.getLimit());
UnicodeString middle(lastBoundaryInDest,
(int32_t)(buffer.getLimit()-lastBoundaryInDest));
buffer.removeSuffix((int32_t)(buffer.getLimit()-lastBoundaryInDest));
int32_t destSuffixLength=(int32_t)(buffer.getLimit()-lastBoundaryInDest);
UnicodeString middle(lastBoundaryInDest, destSuffixLength);
buffer.removeSuffix(destSuffixLength);
safeMiddle=middle;
middle.append(src, (int32_t)(firstBoundaryInSrc-src));
const UChar *middleStart=middle.getBuffer();
makeFCD(middleStart, middleStart+middle.length(), &buffer, errorCode);
@ -1672,6 +1688,9 @@ void Normalizer2Impl::makeFCDAndAppend(const UChar *src, const UChar *limit,
if(doMakeFCD) {
makeFCD(src, limit, &buffer, errorCode);
} else {
if(limit==NULL) { // appendZeroCC() needs limit!=NULL
limit=u_strchr(src, 0);
}
buffer.appendZeroCC(src, limit, errorCode);
}
}

View File

@ -151,6 +151,9 @@ public:
reorderStart=limit=newLimit;
lastCC=0;
}
void copyReorderableSuffixTo(UnicodeString &s) const {
s.setTo(reorderStart, (int32_t)(limit-reorderStart));
}
private:
/*
* TODO: Revisit whether it makes sense to track reorderStart.
@ -328,6 +331,7 @@ public:
ReorderingBuffer *buffer, UErrorCode &errorCode) const;
void decomposeAndAppend(const UChar *src, const UChar *limit,
UBool doDecompose,
UnicodeString &safeMiddle,
ReorderingBuffer &buffer,
UErrorCode &errorCode) const;
UBool compose(const UChar *src, const UChar *limit,
@ -341,12 +345,14 @@ public:
void composeAndAppend(const UChar *src, const UChar *limit,
UBool doCompose,
UBool onlyContiguous,
UnicodeString &safeMiddle,
ReorderingBuffer &buffer,
UErrorCode &errorCode) const;
const UChar *makeFCD(const UChar *src, const UChar *limit,
ReorderingBuffer *buffer, UErrorCode &errorCode) const;
void makeFCDAndAppend(const UChar *src, const UChar *limit,
UBool doMakeFCD,
UnicodeString &safeMiddle,
ReorderingBuffer &buffer,
UErrorCode &errorCode) const;

View File

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2010, International Business Machines Corporation and
* Copyright (c) 1997-2011, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
/********************************************************************************
@ -64,6 +64,8 @@ TestFCD(void);
static void
TestGetDecomposition(void);
static void TestAppendRestoreMiddle(void);
static const char* const canonTests[][3] = {
/* Input*/ /*Decomposed*/ /*Composed*/
{ "cat", "cat", "cat" },
@ -151,6 +153,7 @@ void addNormTest(TestNode** root)
addTest(root, &TestFCNFKCClosure, "tsnorm/cnormtst/TestFCNFKCClosure");
addTest(root, &TestComposition, "tsnorm/cnormtst/TestComposition");
addTest(root, &TestGetDecomposition, "tsnorm/cnormtst/TestGetDecomposition");
addTest(root, &TestAppendRestoreMiddle, "tsnorm/cnormtst/TestAppendRestoreMiddle");
}
static const char* const modeStrings[]={
@ -1511,4 +1514,41 @@ TestGetDecomposition() {
}
}
static void
TestAppendRestoreMiddle() {
UChar a[20]={ 0x61, 0x62, 0x63, 0x41, 0x327, 0 }; /* last chars are 'A' and 'cedilla' NFC */
static const UChar b[]={ 0x30A, 0x64, 0x65, 0x66, 0 }; /* first char is 'ring above' NFC */
/* NFC: C5 is 'A with ring above' */
static const UChar expected[]={ 0x61, 0x62, 0x63, 0xC5, 0x327, 0x64, 0x65, 0x66 };
int32_t length;
UErrorCode errorCode=U_ZERO_ERROR;
const UNormalizer2 *n2=unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE, &errorCode);
if(U_FAILURE(errorCode)) {
log_err_status(errorCode, "unorm2_getInstance(nfc/COMPOSE) failed: %s\n", u_errorName(errorCode));
return;
}
/*
* Use length=-1 to fool the estimate of the ReorderingBuffer capacity.
* Use a capacity of 6 or 7 so that the middle sequence <41 327 30A>
* still fits into a[] but the full result still overflows this capacity.
* (Let it modify the destination buffer before reallocating internally.)
*/
length=unorm2_append(n2, a, -1, 6, b, -1, &errorCode);
if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=LENGTHOF(expected)) {
log_err("unorm2_append(preflight) returned wrong length of %d\n", (int)length);
return;
}
/* Verify that the middle is unchanged or restored. (ICU ticket #7848) */
if(a[0]!=0x61 || a[1]!=0x62 || a[2]!=0x63 || a[3]!=0x41 || a[4]!=0x327 || a[5]!=0) {
log_err("unorm2_append(overflow) modified the first string\n");
return;
}
errorCode=U_ZERO_ERROR;
length=unorm2_append(n2, a, -1, LENGTHOF(a), b, -1, &errorCode);
if(U_FAILURE(errorCode) || length!=LENGTHOF(expected) || 0!=u_memcmp(a, expected, length)) {
log_err("unorm2_append(real) failed - %s, length %d\n", u_errorName(errorCode), (int)length);
return;
}
}
#endif /* #if !UCONFIG_NO_NORMALIZATION */