ICU-2356 all UnicodeString methods should treat NULL input pointers as empty strings

X-SVN-Rev: 11740
2003-04-30 02:41:01 +00:00 · 2003-04-30 02:41:01 +00:00 · 65c6f66b2e
commit 65c6f66b2e
parent 959aa6bab4
2 changed files with 96 additions and 53 deletions
--- a/icu4c/source/common/unicode/unistr.h
+++ b/icu4c/source/common/unicode/unistr.h
@ -125,6 +125,25 @@ class BreakIterator;        // unicode/brkiter.h
 * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
 * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p>
 *
+ * UnicodeString methods are more lenient with regard to input parameter values
+ * than other ICU APIs. In particular:
+ * - If indexes are out of bounds for a UnicodeString object
+ *   (<0 or >length()) then they are "pinned" to the nearest boundary.
+ * - If primitive string pointer values (e.g., const UChar * or char *)
+ *   for input strings are NULL, then those input string parameters are treated
+ *   as if they pointed to an empty string.
+ * - Most UnicodeString methods do not take a UErrorCode parameter because
+ *   there are usually very few opportunities for failure other than a shortage
+ *   of memory, error codes in low-level C++ string methods would be inconvenient,
+ *   and the error code as the last parameter (ICU convention) would prevent
+ *   the use of default parameter values.
+ *   Instead, such methods set the UnicodeString into a "bogus" state
+ *   (see isBogus()) if an error occurs.
+ *
+ * In string comparisons, two UnicodeString objects that are both "bogus"
+ * compare equal (to be transitive and prevent endless loops in sorting),
+ * and a "bogus" string compares less than any non-"bogus" one.
+ *
 * <p>UnicodeString uses several storage methods.
 * String contents can be stored inside the UnicodeString object itself,
 * in an allocated and shared buffer, or in an outside buffer that is "aliased".
@ -3176,19 +3195,17 @@ UnicodeString::getBuffer() const {
 //========================================
 inline int8_t
 UnicodeString::doCompare(int32_t start,
-              int32_t _length,
+              int32_t length,
              const UnicodeString& srcText,
              int32_t srcStart,
              int32_t srcLength) const
 {
-  const UChar *srcChars;
-  if(!srcText.isBogus()) {
-    srcText.pinIndices(srcStart, srcLength);
-    srcChars=srcText.getArrayStart();
+  if(srcText.isBogus()) {
+    return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
  } else {
-    srcChars=0;
+    srcText.pinIndices(srcStart, srcLength);
+    return doCompare(start, length, srcText.fArray, srcStart, srcLength);
  }
-  return doCompare(start, _length, srcChars, srcStart, srcLength);
 }

 inline UBool
@ -3272,19 +3289,17 @@ UnicodeString::compareBetween(int32_t start,

 inline int8_t
 UnicodeString::doCompareCodePointOrder(int32_t start,
-                                       int32_t _length,
+                                       int32_t length,
                                       const UnicodeString& srcText,
                                       int32_t srcStart,
                                       int32_t srcLength) const
 {
-  const UChar *srcChars;
-  if(!srcText.isBogus()) {
-    srcText.pinIndices(srcStart, srcLength);
-    srcChars=srcText.getArrayStart();
+  if(srcText.isBogus()) {
+    return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
  } else {
-    srcChars=0;
+    srcText.pinIndices(srcStart, srcLength);
+    return doCompareCodePointOrder(start, length, srcText.fArray, srcStart, srcLength);
  }
-  return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength);
 }

 inline int8_t 
@ -3335,20 +3350,18 @@ UnicodeString::compareCodePointOrderBetween(int32_t start,

 inline int8_t
 UnicodeString::doCaseCompare(int32_t start,
-                             int32_t _length,
+                             int32_t length,
                             const UnicodeString &srcText,
                             int32_t srcStart,
                             int32_t srcLength,
                             uint32_t options) const
 {
-  const UChar *srcChars;
-  if(!srcText.isBogus()) {
-    srcText.pinIndices(srcStart, srcLength);
-    srcChars=srcText.getArrayStart();
+  if(srcText.isBogus()) {
+    return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
  } else {
-    srcChars=0;
+    srcText.pinIndices(srcStart, srcLength);
+    return doCaseCompare(start, length, srcText.fArray, srcStart, srcLength, options);
  }
-  return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
 }

 inline int8_t 
--- a/icu4c/source/common/unistr.cpp
+++ b/icu4c/source/common/unistr.cpp
@ -218,7 +218,7 @@ UnicodeString::UnicodeString(const UChar *text)
    fArray(fStackBuffer),
    fFlags(kShortString)
 {
-  doReplace(0, 0, text, 0, u_strlen(text));
+  doReplace(0, 0, text, 0, -1);
 }

 UnicodeString::UnicodeString(const UChar *text,
@ -239,9 +239,15 @@ UnicodeString::UnicodeString(UBool isTerminated,
    fArray((UChar *)text),
    fFlags(kReadonlyAlias)
 {
-  if( text == 0 || textLength < -1 ||
-      (textLength == -1 && !isTerminated) ||
-      (textLength >= 0 && isTerminated && text[textLength] != 0)
+  if(text == NULL) {
+    // treat as an empty string, do not alias
+    fLength = 0;
+    fCapacity = US_STACKBUF_SIZE;
+    fArray = fStackBuffer;
+    fFlags = kShortString;
+  } else if(textLength < -1 ||
+            (textLength == -1 && !isTerminated) ||
+            (textLength >= 0 && isTerminated && text[textLength] != 0)
  ) {
    setToBogus();
  } else if(textLength == -1) {
@ -259,10 +265,15 @@ UnicodeString::UnicodeString(UChar *buff,
    fArray(buff),
    fFlags(kWritableAlias)
 {
-  if(buff == 0 || buffLength < -1 || buffLength > buffCapacity) {
+  if(buff == NULL) {
+    // treat as an empty string, do not alias
+    fLength = 0;
+    fCapacity = US_STACKBUF_SIZE;
+    fArray = fStackBuffer;
+    fFlags = kShortString;
+  } else if(buff == 0 || buffLength < -1 || buffLength > buffCapacity) {
    setToBogus();
-  }
-  if(buffLength == -1) {
+  } else if(buffLength == -1) {
    // fLength = u_strlen(buff); but do not look beyond buffCapacity
    const UChar *p = buff, *limit = buff + buffCapacity;
    while(p != limit && *p != 0) {
@ -308,7 +319,9 @@ UnicodeString::UnicodeString(const char *src, int32_t srcLength,
 {
  if(U_SUCCESS(errorCode)) {
    // check arguments
-    if(srcLength<-1 || (srcLength!=0 && src==0)) {
+    if(src==NULL) {
+      // treat as an empty string, do nothing more
+    } else if(srcLength<-1) {
      errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    } else {
      // get input length
@ -579,19 +592,18 @@ UnicodeString::doCompare( int32_t start,
              int32_t srcLength) const
 {
  // compare illegal string values
+  // treat const UChar *srcChars==NULL as an empty string
  if(isBogus()) {
-    if(srcChars==0) {
-      return 0;
-    } else {
-      return -1;
-    }
-  } else if(srcChars==0) {
-    return 1;
+    return -1;
  }
-
+  
  // pin indices to legal values
  pinIndices(start, length);

+  if(srcChars == NULL) {
+    srcStart = srcLength = 0;
+  }
+
  // get the correct pointer
  const UChar *chars = getArrayStart();

@ -659,19 +671,18 @@ UnicodeString::doCompareCodePointOrder(int32_t start,
                                       int32_t srcLength) const
 {
  // compare illegal string values
+  // treat const UChar *srcChars==NULL as an empty string
  if(isBogus()) {
-    if(srcChars==0) {
-      return 0;
-    } else {
-      return -1;
-    }
-  } else if(srcChars==0) {
-    return 1;
+    return -1;
  }

  // pin indices to legal values
  pinIndices(start, length);

+  if(srcChars == NULL) {
+    srcStart = srcLength = 0;
+  }
+
  int32_t diff = uprv_strCompare(fArray + start, length, srcChars + srcStart, srcLength, FALSE, TRUE);
  /* translate the 32-bit result into an 8-bit one */
  if(diff!=0) {
@ -690,19 +701,18 @@ UnicodeString::doCaseCompare(int32_t start,
                             uint32_t options) const
 {
  // compare illegal string values
+  // treat const UChar *srcChars==NULL as an empty string
  if(isBogus()) {
-    if(srcChars==0) {
-      return 0;
-    } else {
-      return -1;
-    }
-  } else if(srcChars==0) {
-    return 1;
+    return -1;
  }

  // pin indices to legal values
  pinIndices(start, length);

+  if(srcChars == NULL) {
+    srcStart = srcLength = 0;
+  }
+
  // get the correct pointer
  const UChar *chars = getArrayStart();

@ -1008,7 +1018,17 @@ UnicodeString::setTo(UBool isTerminated,
    return *this;
  }

-  if( text == 0 || textLength < -1 ||
+  if(text == NULL) {
+    // treat as an empty string, do not alias
+    releaseArray();
+    fLength = 0;
+    fCapacity = US_STACKBUF_SIZE;
+    fArray = fStackBuffer;
+    fFlags = kShortString;
+    return *this;
+  }
+
+  if( textLength < -1 ||
      (textLength == -1 && !isTerminated) ||
      (textLength >= 0 && isTerminated && text[textLength] != 0)
  ) {
@ -1042,7 +1062,17 @@ UnicodeString::setTo(UChar *buffer,
    return *this;
  }

-  if(buffer == 0 || buffLength < 0 || buffLength > buffCapacity) {
+  if(buffer == NULL) {
+    // treat as an empty string, do not alias
+    releaseArray();
+    fLength = 0;
+    fCapacity = US_STACKBUF_SIZE;
+    fArray = fStackBuffer;
+    fFlags = kShortString;
+    return *this;
+  }
+
+  if(buffLength < 0 || buffLength > buffCapacity) {
    setToBogus();
    return *this;
  }