From 65c6f66b2e21cb6d7da393eada9fa4e71909b6ae Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Wed, 30 Apr 2003 02:41:01 +0000 Subject: [PATCH] ICU-2356 all UnicodeString methods should treat NULL input pointers as empty strings X-SVN-Rev: 11740 --- icu4c/source/common/unicode/unistr.h | 55 +++++++++------- icu4c/source/common/unistr.cpp | 94 ++++++++++++++++++---------- 2 files changed, 96 insertions(+), 53 deletions(-) diff --git a/icu4c/source/common/unicode/unistr.h b/icu4c/source/common/unicode/unistr.h index 020052dda4..9d1d705256 100644 --- a/icu4c/source/common/unicode/unistr.h +++ b/icu4c/source/common/unicode/unistr.h @@ -125,6 +125,25 @@ class BreakIterator; // unicode/brkiter.h * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit() * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).

* + * UnicodeString methods are more lenient with regard to input parameter values + * than other ICU APIs. In particular: + * - If indexes are out of bounds for a UnicodeString object + * (<0 or >length()) then they are "pinned" to the nearest boundary. + * - If primitive string pointer values (e.g., const UChar * or char *) + * for input strings are NULL, then those input string parameters are treated + * as if they pointed to an empty string. + * - Most UnicodeString methods do not take a UErrorCode parameter because + * there are usually very few opportunities for failure other than a shortage + * of memory, error codes in low-level C++ string methods would be inconvenient, + * and the error code as the last parameter (ICU convention) would prevent + * the use of default parameter values. + * Instead, such methods set the UnicodeString into a "bogus" state + * (see isBogus()) if an error occurs. + * + * In string comparisons, two UnicodeString objects that are both "bogus" + * compare equal (to be transitive and prevent endless loops in sorting), + * and a "bogus" string compares less than any non-"bogus" one. + * *

UnicodeString uses several storage methods. * String contents can be stored inside the UnicodeString object itself, * in an allocated and shared buffer, or in an outside buffer that is "aliased". @@ -3176,19 +3195,17 @@ UnicodeString::getBuffer() const { //======================================== inline int8_t UnicodeString::doCompare(int32_t start, - int32_t _length, + int32_t length, const UnicodeString& srcText, int32_t srcStart, int32_t srcLength) const { - const UChar *srcChars; - if(!srcText.isBogus()) { - srcText.pinIndices(srcStart, srcLength); - srcChars=srcText.getArrayStart(); + if(srcText.isBogus()) { + return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise } else { - srcChars=0; + srcText.pinIndices(srcStart, srcLength); + return doCompare(start, length, srcText.fArray, srcStart, srcLength); } - return doCompare(start, _length, srcChars, srcStart, srcLength); } inline UBool @@ -3272,19 +3289,17 @@ UnicodeString::compareBetween(int32_t start, inline int8_t UnicodeString::doCompareCodePointOrder(int32_t start, - int32_t _length, + int32_t length, const UnicodeString& srcText, int32_t srcStart, int32_t srcLength) const { - const UChar *srcChars; - if(!srcText.isBogus()) { - srcText.pinIndices(srcStart, srcLength); - srcChars=srcText.getArrayStart(); + if(srcText.isBogus()) { + return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise } else { - srcChars=0; + srcText.pinIndices(srcStart, srcLength); + return doCompareCodePointOrder(start, length, srcText.fArray, srcStart, srcLength); } - return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); } inline int8_t @@ -3335,20 +3350,18 @@ UnicodeString::compareCodePointOrderBetween(int32_t start, inline int8_t UnicodeString::doCaseCompare(int32_t start, - int32_t _length, + int32_t length, const UnicodeString &srcText, int32_t srcStart, int32_t srcLength, uint32_t options) const { - const UChar *srcChars; - if(!srcText.isBogus()) { - srcText.pinIndices(srcStart, srcLength); - srcChars=srcText.getArrayStart(); + if(srcText.isBogus()) { + return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise } else { - srcChars=0; + srcText.pinIndices(srcStart, srcLength); + return doCaseCompare(start, length, srcText.fArray, srcStart, srcLength, options); } - return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options); } inline int8_t diff --git a/icu4c/source/common/unistr.cpp b/icu4c/source/common/unistr.cpp index ae3ce1b40f..42a34dccde 100644 --- a/icu4c/source/common/unistr.cpp +++ b/icu4c/source/common/unistr.cpp @@ -218,7 +218,7 @@ UnicodeString::UnicodeString(const UChar *text) fArray(fStackBuffer), fFlags(kShortString) { - doReplace(0, 0, text, 0, u_strlen(text)); + doReplace(0, 0, text, 0, -1); } UnicodeString::UnicodeString(const UChar *text, @@ -239,9 +239,15 @@ UnicodeString::UnicodeString(UBool isTerminated, fArray((UChar *)text), fFlags(kReadonlyAlias) { - if( text == 0 || textLength < -1 || - (textLength == -1 && !isTerminated) || - (textLength >= 0 && isTerminated && text[textLength] != 0) + if(text == NULL) { + // treat as an empty string, do not alias + fLength = 0; + fCapacity = US_STACKBUF_SIZE; + fArray = fStackBuffer; + fFlags = kShortString; + } else if(textLength < -1 || + (textLength == -1 && !isTerminated) || + (textLength >= 0 && isTerminated && text[textLength] != 0) ) { setToBogus(); } else if(textLength == -1) { @@ -259,10 +265,15 @@ UnicodeString::UnicodeString(UChar *buff, fArray(buff), fFlags(kWritableAlias) { - if(buff == 0 || buffLength < -1 || buffLength > buffCapacity) { + if(buff == NULL) { + // treat as an empty string, do not alias + fLength = 0; + fCapacity = US_STACKBUF_SIZE; + fArray = fStackBuffer; + fFlags = kShortString; + } else if(buff == 0 || buffLength < -1 || buffLength > buffCapacity) { setToBogus(); - } - if(buffLength == -1) { + } else if(buffLength == -1) { // fLength = u_strlen(buff); but do not look beyond buffCapacity const UChar *p = buff, *limit = buff + buffCapacity; while(p != limit && *p != 0) { @@ -308,7 +319,9 @@ UnicodeString::UnicodeString(const char *src, int32_t srcLength, { if(U_SUCCESS(errorCode)) { // check arguments - if(srcLength<-1 || (srcLength!=0 && src==0)) { + if(src==NULL) { + // treat as an empty string, do nothing more + } else if(srcLength<-1) { errorCode=U_ILLEGAL_ARGUMENT_ERROR; } else { // get input length @@ -579,19 +592,18 @@ UnicodeString::doCompare( int32_t start, int32_t srcLength) const { // compare illegal string values + // treat const UChar *srcChars==NULL as an empty string if(isBogus()) { - if(srcChars==0) { - return 0; - } else { - return -1; - } - } else if(srcChars==0) { - return 1; + return -1; } - + // pin indices to legal values pinIndices(start, length); + if(srcChars == NULL) { + srcStart = srcLength = 0; + } + // get the correct pointer const UChar *chars = getArrayStart(); @@ -659,19 +671,18 @@ UnicodeString::doCompareCodePointOrder(int32_t start, int32_t srcLength) const { // compare illegal string values + // treat const UChar *srcChars==NULL as an empty string if(isBogus()) { - if(srcChars==0) { - return 0; - } else { - return -1; - } - } else if(srcChars==0) { - return 1; + return -1; } // pin indices to legal values pinIndices(start, length); + if(srcChars == NULL) { + srcStart = srcLength = 0; + } + int32_t diff = uprv_strCompare(fArray + start, length, srcChars + srcStart, srcLength, FALSE, TRUE); /* translate the 32-bit result into an 8-bit one */ if(diff!=0) { @@ -690,19 +701,18 @@ UnicodeString::doCaseCompare(int32_t start, uint32_t options) const { // compare illegal string values + // treat const UChar *srcChars==NULL as an empty string if(isBogus()) { - if(srcChars==0) { - return 0; - } else { - return -1; - } - } else if(srcChars==0) { - return 1; + return -1; } // pin indices to legal values pinIndices(start, length); + if(srcChars == NULL) { + srcStart = srcLength = 0; + } + // get the correct pointer const UChar *chars = getArrayStart(); @@ -1008,7 +1018,17 @@ UnicodeString::setTo(UBool isTerminated, return *this; } - if( text == 0 || textLength < -1 || + if(text == NULL) { + // treat as an empty string, do not alias + releaseArray(); + fLength = 0; + fCapacity = US_STACKBUF_SIZE; + fArray = fStackBuffer; + fFlags = kShortString; + return *this; + } + + if( textLength < -1 || (textLength == -1 && !isTerminated) || (textLength >= 0 && isTerminated && text[textLength] != 0) ) { @@ -1042,7 +1062,17 @@ UnicodeString::setTo(UChar *buffer, return *this; } - if(buffer == 0 || buffLength < 0 || buffLength > buffCapacity) { + if(buffer == NULL) { + // treat as an empty string, do not alias + releaseArray(); + fLength = 0; + fCapacity = US_STACKBUF_SIZE; + fArray = fStackBuffer; + fFlags = kShortString; + return *this; + } + + if(buffLength < 0 || buffLength > buffCapacity) { setToBogus(); return *this; }