diff --git a/icu4c/source/common/schriter.cpp b/icu4c/source/common/schriter.cpp index 29f15f9749..c6c42442a7 100644 --- a/icu4c/source/common/schriter.cpp +++ b/icu4c/source/common/schriter.cpp @@ -1,6 +1,6 @@ /* ****************************************************************************** -* Copyright (C) 1998-2004, International Business Machines Corporation and * +* Copyright (C) 1998-2007, International Business Machines Corporation and * * others. All Rights Reserved. * ****************************************************************************** * @@ -28,31 +28,31 @@ StringCharacterIterator::StringCharacterIterator() } StringCharacterIterator::StringCharacterIterator(const UnicodeString& textStr) - : UCharCharacterIterator(textStr.fArray, textStr.length()), + : UCharCharacterIterator(textStr.getBuffer(), textStr.length()), text(textStr) { // we had set the input parameter's array, now we need to set our copy's array - UCharCharacterIterator::text = this->text.fArray; + UCharCharacterIterator::text = this->text.getBuffer(); } StringCharacterIterator::StringCharacterIterator(const UnicodeString& textStr, int32_t textPos) - : UCharCharacterIterator(textStr.fArray, textStr.length(), textPos), + : UCharCharacterIterator(textStr.getBuffer(), textStr.length(), textPos), text(textStr) { // we had set the input parameter's array, now we need to set our copy's array - UCharCharacterIterator::text = this->text.fArray; + UCharCharacterIterator::text = this->text.getBuffer(); } StringCharacterIterator::StringCharacterIterator(const UnicodeString& textStr, int32_t textBegin, int32_t textEnd, int32_t textPos) - : UCharCharacterIterator(textStr.fArray, textStr.length(), textBegin, textEnd, textPos), + : UCharCharacterIterator(textStr.getBuffer(), textStr.length(), textBegin, textEnd, textPos), text(textStr) { // we had set the input parameter's array, now we need to set our copy's array - UCharCharacterIterator::text = this->text.fArray; + UCharCharacterIterator::text = this->text.getBuffer(); } StringCharacterIterator::StringCharacterIterator(const StringCharacterIterator& that) @@ -60,7 +60,7 @@ StringCharacterIterator::StringCharacterIterator(const StringCharacterIterator& text(that.text) { // we had set the input parameter's array, now we need to set our copy's array - UCharCharacterIterator::text = this->text.fArray; + UCharCharacterIterator::text = this->text.getBuffer(); } StringCharacterIterator::~StringCharacterIterator() { @@ -71,7 +71,7 @@ StringCharacterIterator::operator=(const StringCharacterIterator& that) { UCharCharacterIterator::operator=(that); text = that.text; // we had set the input parameter's array, now we need to set our copy's array - UCharCharacterIterator::text = this->text.fArray; + UCharCharacterIterator::text = this->text.getBuffer(); return *this; } @@ -105,7 +105,7 @@ StringCharacterIterator::clone() const { void StringCharacterIterator::setText(const UnicodeString& newText) { text = newText; - UCharCharacterIterator::setText(text.fArray, text.length()); + UCharCharacterIterator::setText(text.getBuffer(), text.length()); } void diff --git a/icu4c/source/common/unicode/unistr.h b/icu4c/source/common/unicode/unistr.h index 575998e4ed..18bf9e05a8 100644 --- a/icu4c/source/common/unicode/unistr.h +++ b/icu4c/source/common/unicode/unistr.h @@ -3089,9 +3089,23 @@ private: int32_t doHashCode(void) const; // get pointer to start of array + // these do not check for kOpenGetBuffer, unlike the public getBuffer() function inline UChar* getArrayStart(void); inline const UChar* getArrayStart(void) const; + // A UnicodeString object (not necessarily its current buffer) + // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity). + inline UBool isWritable() const; + + // Is the current buffer writable? + inline UBool isBufferWritable() const; + + // None of the following does releaseArray(). + inline void setLength(int32_t len); // sets only fShortLength and fLength + inline void setToEmpty(); // sets fFlags=kShortString + inline void setToStackBuffer(int32_t len); // sets fFlags=kShortString + inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags + // allocate the array; result may be fStackBuffer // sets refCount to 1 if appropriate // sets fArray, fCapacity, and fFlags @@ -3177,7 +3191,10 @@ private: // constants enum { - US_STACKBUF_SIZE=7, // Size of stack buffer for small strings + // Set the stack buffer size so that sizeof(UnicodeString) is a multiple of sizeof(pointer): + // 32-bit pointers: 4+1+1+13*2 = 32 bytes + // 64-bit pointers: 8+1+1+15*2 = 40 bytes + US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for small strings kInvalidUChar=0xffff, // invalid UChar index kGrowSize=128, // grow size for this buffer kInvalidHashCode=0, // invalid hash code @@ -3198,7 +3215,6 @@ private: kWritableAlias=0 }; - friend class StringCharacterIterator; friend class StringThreadTest; /* @@ -3213,12 +3229,19 @@ private: * on 64-bit machines (8-byte pointers), it should be 40 bytes. */ // (implicit) *vtable; - int32_t fLength; // number of characters in fArray - int32_t fCapacity; // sizeof fArray - UChar *fArray; // the Unicode data - uint16_t fFlags; // bit flags: see constants above - UChar fStackBuffer [ US_STACKBUF_SIZE ]; // buffer for small strings - + int8_t fShortLength; // 0..127: length <0: real length is in fUnion.fFields.fLength + uint8_t fFlags; // bit flags: see constants above + union { + // fStackBuffer is used iff (fFlags&kUsingStackBuffer) + // else fFields is used + UChar fStackBuffer [US_STACKBUF_SIZE]; // buffer for small strings + struct { + uint16_t fPadding; // align the following field at 8B (32b pointers) or 12B (64b) + int32_t fLength; // number of characters in fArray if >127; else undefined + UChar *fArray; // the Unicode data (aligned at 12B (32b pointers) or 16B (64b)) + int32_t fCapacity; // sizeof fArray + } fFields; + } fUnion; }; /** @@ -3246,8 +3269,8 @@ UnicodeString::pinIndex(int32_t& start) const // pin index if(start < 0) { start = 0; - } else if(start > fLength) { - start = fLength; + } else if(start > length()) { + start = length(); } } @@ -3256,36 +3279,37 @@ UnicodeString::pinIndices(int32_t& start, int32_t& _length) const { // pin indices + int32_t len = length(); if(start < 0) { start = 0; - } else if(start > fLength) { - start = fLength; + } else if(start > len) { + start = len; } if(_length < 0) { _length = 0; - } else if(_length > (fLength - start)) { - _length = (fLength - start); + } else if(_length > (len - start)) { + _length = (len - start); } } inline UChar* UnicodeString::getArrayStart() -{ return fArray; } +{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; } inline const UChar* UnicodeString::getArrayStart() const -{ return fArray; } +{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; } //======================================== // Read-only implementation methods //======================================== inline int32_t UnicodeString::length() const -{ return fLength; } +{ return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; } inline int32_t UnicodeString::getCapacity() const -{ return fCapacity; } +{ return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; } inline int32_t UnicodeString::hashCode() const @@ -3295,12 +3319,26 @@ inline UBool UnicodeString::isBogus() const { return (UBool)(fFlags & kIsBogus); } +inline UBool +UnicodeString::isWritable() const +{ return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); } + +inline UBool +UnicodeString::isBufferWritable() const +{ + return (UBool)( + !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) && + (!(fFlags&kRefCounted) || refCount()==1)); +} + inline const UChar * UnicodeString::getBuffer() const { - if(!(fFlags&(kIsBogus|kOpenGetBuffer))) { - return fArray; - } else { + if(fFlags&(kIsBogus|kOpenGetBuffer)) { return 0; + } else if(fFlags&kUsingStackBuffer) { + return fUnion.fStackBuffer; + } else { + return fUnion.fFields.fArray; } } @@ -3318,7 +3356,7 @@ UnicodeString::doCompare(int32_t start, return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise } else { srcText.pinIndices(srcStart, srcLength); - return doCompare(start, length, srcText.fArray, srcStart, srcLength); + return doCompare(start, length, srcText.getArrayStart(), srcStart, srcLength); } } @@ -3328,10 +3366,11 @@ UnicodeString::operator== (const UnicodeString& text) const if(isBogus()) { return text.isBogus(); } else { + int32_t len = length(), textLength = text.length(); return !text.isBogus() && - fLength == text.fLength && - doCompare(0, fLength, text, 0, text.fLength) == 0; + len == textLength && + doCompare(0, len, text, 0, textLength) == 0; } } @@ -3341,34 +3380,34 @@ UnicodeString::operator!= (const UnicodeString& text) const inline UBool UnicodeString::operator> (const UnicodeString& text) const -{ return doCompare(0, fLength, text, 0, text.fLength) == 1; } +{ return doCompare(0, length(), text, 0, text.length()) == 1; } inline UBool UnicodeString::operator< (const UnicodeString& text) const -{ return doCompare(0, fLength, text, 0, text.fLength) == -1; } +{ return doCompare(0, length(), text, 0, text.length()) == -1; } inline UBool UnicodeString::operator>= (const UnicodeString& text) const -{ return doCompare(0, fLength, text, 0, text.fLength) != -1; } +{ return doCompare(0, length(), text, 0, text.length()) != -1; } inline UBool UnicodeString::operator<= (const UnicodeString& text) const -{ return doCompare(0, fLength, text, 0, text.fLength) != 1; } +{ return doCompare(0, length(), text, 0, text.length()) != 1; } inline int8_t UnicodeString::compare(const UnicodeString& text) const -{ return doCompare(0, fLength, text, 0, text.fLength); } +{ return doCompare(0, length(), text, 0, text.length()); } inline int8_t UnicodeString::compare(int32_t start, int32_t _length, const UnicodeString& srcText) const -{ return doCompare(start, _length, srcText, 0, srcText.fLength); } +{ return doCompare(start, _length, srcText, 0, srcText.length()); } inline int8_t UnicodeString::compare(const UChar *srcChars, int32_t srcLength) const -{ return doCompare(0, fLength, srcChars, 0, srcLength); } +{ return doCompare(0, length(), srcChars, 0, srcLength); } inline int8_t UnicodeString::compare(int32_t start, @@ -3412,24 +3451,24 @@ UnicodeString::doCompareCodePointOrder(int32_t start, return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise } else { srcText.pinIndices(srcStart, srcLength); - return doCompareCodePointOrder(start, length, srcText.fArray, srcStart, srcLength); + return doCompareCodePointOrder(start, length, srcText.getArrayStart(), srcStart, srcLength); } } inline int8_t UnicodeString::compareCodePointOrder(const UnicodeString& text) const -{ return doCompareCodePointOrder(0, fLength, text, 0, text.fLength); } +{ return doCompareCodePointOrder(0, length(), text, 0, text.length()); } inline int8_t UnicodeString::compareCodePointOrder(int32_t start, int32_t _length, const UnicodeString& srcText) const -{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.fLength); } +{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); } inline int8_t UnicodeString::compareCodePointOrder(const UChar *srcChars, int32_t srcLength) const -{ return doCompareCodePointOrder(0, fLength, srcChars, 0, srcLength); } +{ return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); } inline int8_t UnicodeString::compareCodePointOrder(int32_t start, @@ -3474,13 +3513,13 @@ UnicodeString::doCaseCompare(int32_t start, return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise } else { srcText.pinIndices(srcStart, srcLength); - return doCaseCompare(start, length, srcText.fArray, srcStart, srcLength, options); + return doCaseCompare(start, length, srcText.getArrayStart(), srcStart, srcLength, options); } } inline int8_t UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const { - return doCaseCompare(0, fLength, text, 0, text.fLength, options); + return doCaseCompare(0, length(), text, 0, text.length(), options); } inline int8_t @@ -3488,14 +3527,14 @@ UnicodeString::caseCompare(int32_t start, int32_t _length, const UnicodeString &srcText, uint32_t options) const { - return doCaseCompare(start, _length, srcText, 0, srcText.fLength, options); + return doCaseCompare(start, _length, srcText, 0, srcText.length(), options); } inline int8_t UnicodeString::caseCompare(const UChar *srcChars, int32_t srcLength, uint32_t options) const { - return doCaseCompare(0, fLength, srcChars, 0, srcLength, options); + return doCaseCompare(0, length(), srcChars, 0, srcLength, options); } inline int8_t @@ -3554,27 +3593,27 @@ UnicodeString::indexOf(const UnicodeString& srcText, inline int32_t UnicodeString::indexOf(const UnicodeString& text) const -{ return indexOf(text, 0, text.fLength, 0, fLength); } +{ return indexOf(text, 0, text.length(), 0, length()); } inline int32_t UnicodeString::indexOf(const UnicodeString& text, int32_t start) const { pinIndex(start); - return indexOf(text, 0, text.fLength, start, fLength - start); + return indexOf(text, 0, text.length(), start, length() - start); } inline int32_t UnicodeString::indexOf(const UnicodeString& text, int32_t start, int32_t _length) const -{ return indexOf(text, 0, text.fLength, start, _length); } +{ return indexOf(text, 0, text.length(), start, _length); } inline int32_t UnicodeString::indexOf(const UChar *srcChars, int32_t srcLength, int32_t start) const { pinIndex(start); - return indexOf(srcChars, 0, srcLength, start, fLength - start); + return indexOf(srcChars, 0, srcLength, start, length() - start); } inline int32_t @@ -3598,24 +3637,24 @@ UnicodeString::indexOf(UChar32 c, inline int32_t UnicodeString::indexOf(UChar c) const -{ return doIndexOf(c, 0, fLength); } +{ return doIndexOf(c, 0, length()); } inline int32_t UnicodeString::indexOf(UChar32 c) const -{ return indexOf(c, 0, fLength); } +{ return indexOf(c, 0, length()); } inline int32_t UnicodeString::indexOf(UChar c, int32_t start) const { pinIndex(start); - return doIndexOf(c, start, fLength - start); + return doIndexOf(c, start, length() - start); } inline int32_t UnicodeString::indexOf(UChar32 c, int32_t start) const { pinIndex(start); - return indexOf(c, start, fLength - start); + return indexOf(c, start, length() - start); } inline int32_t @@ -3630,7 +3669,7 @@ UnicodeString::lastIndexOf(const UChar *srcChars, int32_t srcLength, int32_t start) const { pinIndex(start); - return lastIndexOf(srcChars, 0, srcLength, start, fLength - start); + return lastIndexOf(srcChars, 0, srcLength, start, length() - start); } inline int32_t @@ -3653,18 +3692,18 @@ inline int32_t UnicodeString::lastIndexOf(const UnicodeString& text, int32_t start, int32_t _length) const -{ return lastIndexOf(text, 0, text.fLength, start, _length); } +{ return lastIndexOf(text, 0, text.length(), start, _length); } inline int32_t UnicodeString::lastIndexOf(const UnicodeString& text, int32_t start) const { pinIndex(start); - return lastIndexOf(text, 0, text.fLength, start, fLength - start); + return lastIndexOf(text, 0, text.length(), start, length() - start); } inline int32_t UnicodeString::lastIndexOf(const UnicodeString& text) const -{ return lastIndexOf(text, 0, text.fLength, 0, fLength); } +{ return lastIndexOf(text, 0, text.length(), 0, length()); } inline int32_t UnicodeString::lastIndexOf(UChar c, @@ -3681,30 +3720,30 @@ UnicodeString::lastIndexOf(UChar32 c, inline int32_t UnicodeString::lastIndexOf(UChar c) const -{ return doLastIndexOf(c, 0, fLength); } +{ return doLastIndexOf(c, 0, length()); } inline int32_t UnicodeString::lastIndexOf(UChar32 c) const { - return lastIndexOf(c, 0, fLength); + return lastIndexOf(c, 0, length()); } inline int32_t UnicodeString::lastIndexOf(UChar c, int32_t start) const { pinIndex(start); - return doLastIndexOf(c, start, fLength - start); + return doLastIndexOf(c, start, length() - start); } inline int32_t UnicodeString::lastIndexOf(UChar32 c, int32_t start) const { pinIndex(start); - return lastIndexOf(c, start, fLength - start); + return lastIndexOf(c, start, length() - start); } inline UBool UnicodeString::startsWith(const UnicodeString& text) const -{ return compare(0, text.fLength, text, 0, text.fLength) == 0; } +{ return compare(0, text.length(), text, 0, text.length()) == 0; } inline UBool UnicodeString::startsWith(const UnicodeString& srcText, @@ -3725,15 +3764,15 @@ UnicodeString::startsWith(const UChar *srcChars, inline UBool UnicodeString::endsWith(const UnicodeString& text) const -{ return doCompare(fLength - text.fLength, text.fLength, - text, 0, text.fLength) == 0; } +{ return doCompare(length() - text.length(), text.length(), + text, 0, text.length()) == 0; } inline UBool UnicodeString::endsWith(const UnicodeString& srcText, int32_t srcStart, int32_t srcLength) const { srcText.pinIndices(srcStart, srcLength); - return doCompare(fLength - srcLength, srcLength, + return doCompare(length() - srcLength, srcLength, srcText, srcStart, srcLength) == 0; } @@ -3743,7 +3782,7 @@ UnicodeString::endsWith(const UChar *srcChars, if(srcLength < 0) { srcLength = u_strlen(srcChars); } - return doCompare(fLength - srcLength, srcLength, + return doCompare(length() - srcLength, srcLength, srcChars, 0, srcLength) == 0; } @@ -3754,7 +3793,7 @@ UnicodeString::endsWith(const UChar *srcChars, if(srcLength < 0) { srcLength = u_strlen(srcChars + srcStart); } - return doCompare(fLength - srcLength, srcLength, + return doCompare(length() - srcLength, srcLength, srcChars, srcStart, srcLength) == 0; } @@ -3765,7 +3804,7 @@ inline UnicodeString& UnicodeString::replace(int32_t start, int32_t _length, const UnicodeString& srcText) -{ return doReplace(start, _length, srcText, 0, srcText.fLength); } +{ return doReplace(start, _length, srcText, 0, srcText.length()); } inline UnicodeString& UnicodeString::replace(int32_t start, @@ -3811,7 +3850,7 @@ inline UnicodeString& UnicodeString::replaceBetween(int32_t start, int32_t limit, const UnicodeString& srcText) -{ return doReplace(start, limit - start, srcText, 0, srcText.fLength); } +{ return doReplace(start, limit - start, srcText, 0, srcText.length()); } inline UnicodeString& UnicodeString::replaceBetween(int32_t start, @@ -3824,16 +3863,16 @@ UnicodeString::replaceBetween(int32_t start, inline UnicodeString& UnicodeString::findAndReplace(const UnicodeString& oldText, const UnicodeString& newText) -{ return findAndReplace(0, fLength, oldText, 0, oldText.fLength, - newText, 0, newText.fLength); } +{ return findAndReplace(0, length(), oldText, 0, oldText.length(), + newText, 0, newText.length()); } inline UnicodeString& UnicodeString::findAndReplace(int32_t start, int32_t _length, const UnicodeString& oldText, const UnicodeString& newText) -{ return findAndReplace(start, _length, oldText, 0, oldText.fLength, - newText, 0, newText.fLength); } +{ return findAndReplace(start, _length, oldText, 0, oldText.length(), + newText, 0, newText.length()); } // ============================ // extract @@ -3842,7 +3881,7 @@ inline void UnicodeString::doExtract(int32_t start, int32_t _length, UnicodeString& target) const -{ target.replace(0, target.fLength, *this, start, _length); } +{ target.replace(0, target.length(), *this, start, _length); } inline void UnicodeString::extract(int32_t start, @@ -3885,8 +3924,8 @@ UnicodeString::extractBetween(int32_t start, inline UChar UnicodeString::doCharAt(int32_t offset) const { - if((uint32_t)offset < (uint32_t)fLength) { - return fArray[offset]; + if((uint32_t)offset < (uint32_t)length()) { + return getArrayStart()[offset]; } else { return kInvalidUChar; } @@ -3903,9 +3942,11 @@ UnicodeString::operator[] (int32_t offset) const inline UChar32 UnicodeString::char32At(int32_t offset) const { - if((uint32_t)offset < (uint32_t)fLength) { + int32_t len = length(); + if((uint32_t)offset < (uint32_t)len) { + const UChar *array = getArrayStart(); UChar32 c; - U16_GET(fArray, 0, offset, fLength, c); + U16_GET(array, 0, offset, len, c); return c; } else { return kInvalidUChar; @@ -3914,8 +3955,9 @@ UnicodeString::char32At(int32_t offset) const inline int32_t UnicodeString::getChar32Start(int32_t offset) const { - if((uint32_t)offset < (uint32_t)fLength) { - U16_SET_CP_START(fArray, 0, offset); + if((uint32_t)offset < (uint32_t)length()) { + const UChar *array = getArrayStart(); + U16_SET_CP_START(array, 0, offset); return offset; } else { return 0; @@ -3924,43 +3966,79 @@ UnicodeString::getChar32Start(int32_t offset) const { inline int32_t UnicodeString::getChar32Limit(int32_t offset) const { - if((uint32_t)offset < (uint32_t)fLength) { - U16_SET_CP_LIMIT(fArray, 0, offset, fLength); + int32_t len = length(); + if((uint32_t)offset < (uint32_t)len) { + const UChar *array = getArrayStart(); + U16_SET_CP_LIMIT(array, 0, offset, len); return offset; } else { - return fLength; + return len; } } inline UBool UnicodeString::isEmpty() const { - return fLength == 0; + return fShortLength == 0; } //======================================== // Write implementation methods //======================================== +inline void +UnicodeString::setLength(int32_t len) { + if(len <= 127) { + fShortLength = (int8_t)len; + } else { + fShortLength = (int8_t)-1; + fUnion.fFields.fLength = len; + } +} + +inline void +UnicodeString::setToEmpty() { + fShortLength = 0; + fFlags = kShortString; +} + +inline void +UnicodeString::setToStackBuffer(int32_t len) { + fShortLength = (int8_t)len; + fFlags = kShortString; +} + +inline void +UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) { + setLength(len); + fUnion.fFields.fArray = array; + fUnion.fFields.fCapacity = capacity; +} + inline const UChar * UnicodeString::getTerminatedBuffer() { - if(fFlags&(kIsBogus|kOpenGetBuffer)) { + if(!isWritable()) { return 0; - } else if(fLength 0x10ffff) { @@ -170,13 +166,14 @@ UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) capacity = length; } if(allocate(capacity)) { + UChar *array = getArrayStart(); int32_t i = 0; // fill the new string with c if(unitCount == 1) { // fill with length UChars while(i < length) { - fArray[i++] = (UChar)c; + array[i++] = (UChar)c; } } else { // get the code units for c @@ -191,40 +188,34 @@ UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) while(i < length) { int32_t unitIdx = 0; while(unitIdx < unitCount) { - fArray[i++]=units[unitIdx++]; + array[i++]=units[unitIdx++]; } } } } - fLength = length; + setLength(length); } } UnicodeString::UnicodeString(UChar ch) - : fLength(1), - fCapacity(US_STACKBUF_SIZE), - fArray(fStackBuffer), + : fShortLength(1), fFlags(kShortString) { - fStackBuffer[0] = ch; + fUnion.fStackBuffer[0] = ch; } UnicodeString::UnicodeString(UChar32 ch) - : fLength(1), - fCapacity(US_STACKBUF_SIZE), - fArray(fStackBuffer), + : fShortLength(0), fFlags(kShortString) { int32_t i = 0; UBool isError = FALSE; - U16_APPEND(fStackBuffer, i, US_STACKBUF_SIZE, ch, isError); - fLength = i; + U16_APPEND(fUnion.fStackBuffer, i, US_STACKBUF_SIZE, ch, isError); + fShortLength = (int8_t)i; } UnicodeString::UnicodeString(const UChar *text) - : fLength(0), - fCapacity(US_STACKBUF_SIZE), - fArray(fStackBuffer), + : fShortLength(0), fFlags(kShortString) { doReplace(0, 0, text, 0, -1); @@ -232,9 +223,7 @@ UnicodeString::UnicodeString(const UChar *text) UnicodeString::UnicodeString(const UChar *text, int32_t textLength) - : fLength(0), - fCapacity(US_STACKBUF_SIZE), - fArray(fStackBuffer), + : fShortLength(0), fFlags(kShortString) { doReplace(0, 0, text, 0, textLength); @@ -243,59 +232,52 @@ UnicodeString::UnicodeString(const UChar *text, UnicodeString::UnicodeString(UBool isTerminated, const UChar *text, int32_t textLength) - : fLength(textLength), - fCapacity(isTerminated ? textLength + 1 : textLength), - fArray((UChar *)text), + : fShortLength(0), fFlags(kReadonlyAlias) { if(text == NULL) { // treat as an empty string, do not alias - fLength = 0; - fCapacity = US_STACKBUF_SIZE; - fArray = fStackBuffer; - fFlags = kShortString; + setToEmpty(); } else if(textLength < -1 || (textLength == -1 && !isTerminated) || (textLength >= 0 && isTerminated && text[textLength] != 0) ) { setToBogus(); - } else if(textLength == -1) { - // text is terminated, or else it would have failed the above test - fLength = u_strlen(text); - fCapacity = fLength + 1; + } else { + if(textLength == -1) { + // text is terminated, or else it would have failed the above test + textLength = u_strlen(text); + } + setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength); } } UnicodeString::UnicodeString(UChar *buff, int32_t buffLength, int32_t buffCapacity) - : fLength(buffLength), - fCapacity(buffCapacity), - fArray(buff), + : fShortLength(0), fFlags(kWritableAlias) { if(buff == NULL) { // treat as an empty string, do not alias - fLength = 0; - fCapacity = US_STACKBUF_SIZE; - fArray = fStackBuffer; - fFlags = kShortString; + setToEmpty(); } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) { setToBogus(); - } else if(buffLength == -1) { - // fLength = u_strlen(buff); but do not look beyond buffCapacity - const UChar *p = buff, *limit = buff + buffCapacity; - while(p != limit && *p != 0) { - ++p; + } else { + if(buffLength == -1) { + // fLength = u_strlen(buff); but do not look beyond buffCapacity + const UChar *p = buff, *limit = buff + buffCapacity; + while(p != limit && *p != 0) { + ++p; + } + buffLength = (int32_t)(p - buff); } - fLength = (int32_t)(p - buff); + setArray(buff, buffLength, buffCapacity); } } UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant) - : fLength(0), - fCapacity(US_STACKBUF_SIZE), - fArray(fStackBuffer), + : fShortLength(0), fFlags(kShortString) { if(src==NULL) { @@ -306,7 +288,7 @@ UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant) } if(cloneArrayIfNeeded(length, length, FALSE)) { u_charsToUChars(src, getArrayStart(), length); - fLength = length; + setLength(length); } else { setToBogus(); } @@ -315,9 +297,7 @@ UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant) UnicodeString::UnicodeString(const UnicodeString& that) : Replaceable(), - fLength(0), - fCapacity(US_STACKBUF_SIZE), - fArray(fStackBuffer), + fShortLength(0), fFlags(kShortString) { copyFrom(that); @@ -326,9 +306,7 @@ UnicodeString::UnicodeString(const UnicodeString& that) UnicodeString::UnicodeString(const UnicodeString& that, int32_t srcStart) : Replaceable(), - fLength(0), - fCapacity(US_STACKBUF_SIZE), - fArray(fStackBuffer), + fShortLength(0), fFlags(kShortString) { setTo(that, srcStart); @@ -338,9 +316,7 @@ UnicodeString::UnicodeString(const UnicodeString& that, int32_t srcStart, int32_t srcLength) : Replaceable(), - fLength(0), - fCapacity(US_STACKBUF_SIZE), - fArray(fStackBuffer), + fShortLength(0), fFlags(kShortString) { setTo(that, srcStart, srcLength); @@ -365,8 +341,6 @@ UnicodeString::clone() const { UBool UnicodeString::allocate(int32_t capacity) { if(capacity <= US_STACKBUF_SIZE) { - fArray = fStackBuffer; - fCapacity = US_STACKBUF_SIZE; fFlags = kShortString; } else { // count bytes for the refCounter and the string capacity, and @@ -379,12 +353,13 @@ UnicodeString::allocate(int32_t capacity) { *array++ = 1; // have fArray point to the first UChar - fArray = (UChar *)array; - fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR)); + fUnion.fFields.fArray = (UChar *)array; + fUnion.fFields.fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR)); fFlags = kLongString; } else { - fLength = 0; - fCapacity = 0; + fShortLength = 0; + fUnion.fFields.fArray = 0; + fUnion.fFields.fCapacity = 0; fFlags = kIsBogus; return FALSE; } @@ -431,40 +406,38 @@ UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) { // delete the current contents releaseArray(); - // we always copy the length - fLength = src.fLength; - if(fLength == 0) { + if(src.isEmpty()) { // empty string - use the stack buffer - fArray = fStackBuffer; - fCapacity = US_STACKBUF_SIZE; - fFlags = kShortString; + setToEmpty(); return *this; } + // we always copy the length + int32_t srcLength = src.length(); + setLength(srcLength); + // fLength>0 and not an "open" src.getBuffer(minCapacity) switch(src.fFlags) { case kShortString: // short string using the stack buffer, do the same - fArray = fStackBuffer; - fCapacity = US_STACKBUF_SIZE; fFlags = kShortString; - uprv_memcpy(fStackBuffer, src.fArray, fLength * U_SIZEOF_UCHAR); + uprv_memcpy(fUnion.fStackBuffer, src.fUnion.fStackBuffer, fShortLength * U_SIZEOF_UCHAR); break; case kLongString: // src uses a refCounted string buffer, use that buffer with refCount // src is const, use a cast - we don't really change it ((UnicodeString &)src).addRef(); // copy all fields, share the reference-counted buffer - fArray = src.fArray; - fCapacity = src.fCapacity; + fUnion.fFields.fArray = src.fUnion.fFields.fArray; + fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity; fFlags = src.fFlags; break; case kReadonlyAlias: if(fastCopy) { // src is a readonly alias, do the same // -> maintain the readonly alias as such - fArray = src.fArray; - fCapacity = src.fCapacity; + fUnion.fFields.fArray = src.fUnion.fFields.fArray; + fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity; fFlags = src.fFlags; break; } @@ -472,17 +445,17 @@ UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) { // -> allocate a new buffer and copy the contents case kWritableAlias: // src is a writable alias; we make a copy of that instead - if(allocate(fLength)) { - uprv_memcpy(fArray, src.fArray, fLength * U_SIZEOF_UCHAR); + if(allocate(srcLength)) { + uprv_memcpy(getArrayStart(), src.getArrayStart(), srcLength * U_SIZEOF_UCHAR); break; } // if there is not enough memory, then fall through to setting to bogus default: // if src is bogus, set ourselves to bogus // do not call setToBogus() here because fArray and fFlags are not consistent here - fArray = 0; - fLength = 0; - fCapacity = 0; + fShortLength = 0; + fUnion.fFields.fArray = 0; + fUnion.fFields.fCapacity = 0; fFlags = kIsBogus; break; } @@ -495,17 +468,25 @@ UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) { //======================================== UnicodeString UnicodeString::unescape() const { - UnicodeString result; - for (int32_t i=0; i> 15 | 1); @@ -644,29 +625,31 @@ int32_t UnicodeString::countChar32(int32_t start, int32_t length) const { pinIndices(start, length); // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL - return u_countChar32(fArray+start, length); + return u_countChar32(getArrayStart()+start, length); } UBool UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const { pinIndices(start, length); // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL - return u_strHasMoreChar32Than(fArray+start, length, number); + return u_strHasMoreChar32Than(getArrayStart()+start, length, number); } int32_t UnicodeString::moveIndex32(int32_t index, int32_t delta) const { // pin index + int32_t len = length(); if(index<0) { index=0; - } else if(index>fLength) { - index=fLength; + } else if(index>len) { + index=len; } + const UChar *array = getArrayStart(); if(delta>0) { - UTF_FWD_N(fArray, index, fLength, delta); + UTF_FWD_N(array, index, len, delta); } else { - UTF_BACK_N(fArray, 0, index, -delta); + UTF_BACK_N(array, 0, index, -delta); } return index; @@ -682,26 +665,29 @@ UnicodeString::doExtract(int32_t start, pinIndices(start, length); // do not copy anything if we alias dst itself - if(fArray + start != dst + dstStart) { - us_arrayCopy(getArrayStart(), start, dst, dstStart, length); + const UChar *array = getArrayStart(); + if(array + start != dst + dstStart) { + us_arrayCopy(array, start, dst, dstStart, length); } } int32_t UnicodeString::extract(UChar *dest, int32_t destCapacity, UErrorCode &errorCode) const { + int32_t len = length(); if(U_SUCCESS(errorCode)) { if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) { errorCode=U_ILLEGAL_ARGUMENT_ERROR; } else { - if(fLength>0 && fLength<=destCapacity && fArray!=dest) { - uprv_memcpy(dest, fArray, fLength*U_SIZEOF_UCHAR); + const UChar *array = getArrayStart(); + if(len>0 && len<=destCapacity && array!=dest) { + uprv_memcpy(dest, array, len*U_SIZEOF_UCHAR); } - return u_terminateUChars(dest, destCapacity, fLength, &errorCode); + return u_terminateUChars(dest, destCapacity, len, &errorCode); } } - return fLength; + return len; } int32_t @@ -755,11 +741,12 @@ UnicodeString::indexOf(const UChar *srcChars, pinIndices(start, length); // find the first occurrence of the substring - const UChar *match = u_strFindFirst(fArray + start, length, srcChars + srcStart, srcLength); + const UChar *array = getArrayStart(); + const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength); if(match == NULL) { return -1; } else { - return (int32_t)(match - fArray); + return (int32_t)(match - array); } } @@ -772,11 +759,12 @@ UnicodeString::doIndexOf(UChar c, pinIndices(start, length); // find the first occurrence of c - const UChar *match = u_memchr(fArray + start, c, length); + const UChar *array = getArrayStart(); + const UChar *match = u_memchr(array + start, c, length); if(match == NULL) { return -1; } else { - return (int32_t)(match - fArray); + return (int32_t)(match - array); } } @@ -788,11 +776,12 @@ UnicodeString::doIndexOf(UChar32 c, pinIndices(start, length); // find the first occurrence of c - const UChar *match = u_memchr32(fArray + start, c, length); + const UChar *array = getArrayStart(); + const UChar *match = u_memchr32(array + start, c, length); if(match == NULL) { return -1; } else { - return (int32_t)(match - fArray); + return (int32_t)(match - array); } } @@ -816,11 +805,12 @@ UnicodeString::lastIndexOf(const UChar *srcChars, pinIndices(start, length); // find the last occurrence of the substring - const UChar *match = u_strFindLast(fArray + start, length, srcChars + srcStart, srcLength); + const UChar *array = getArrayStart(); + const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength); if(match == NULL) { return -1; } else { - return (int32_t)(match - fArray); + return (int32_t)(match - array); } } @@ -837,11 +827,12 @@ UnicodeString::doLastIndexOf(UChar c, pinIndices(start, length); // find the last occurrence of c - const UChar *match = u_memrchr(fArray + start, c, length); + const UChar *array = getArrayStart(); + const UChar *match = u_memrchr(array + start, c, length); if(match == NULL) { return -1; } else { - return (int32_t)(match - fArray); + return (int32_t)(match - array); } } @@ -853,11 +844,12 @@ UnicodeString::doLastIndexOf(UChar32 c, pinIndices(start, length); // find the last occurrence of c - const UChar *match = u_memrchr32(fArray + start, c, length); + const UChar *array = getArrayStart(); + const UChar *match = u_memrchr32(array + start, c, length); if(match == NULL) { return -1; } else { - return (int32_t)(match - fArray); + return (int32_t)(match - array); } } @@ -909,8 +901,9 @@ UnicodeString::setToBogus() { releaseArray(); - fArray = 0; - fCapacity = fLength = 0; + fShortLength = 0; + fUnion.fFields.fArray = 0; + fUnion.fFields.fCapacity = 0; fFlags = kIsBogus; } @@ -918,10 +911,7 @@ UnicodeString::setToBogus() void UnicodeString::unBogus() { if(fFlags & kIsBogus) { - fArray = fStackBuffer; - fLength = 0; - fCapacity = US_STACKBUF_SIZE; - fFlags = kShortString; + setToEmpty(); } } @@ -939,10 +929,7 @@ UnicodeString::setTo(UBool isTerminated, if(text == NULL) { // treat as an empty string, do not alias releaseArray(); - fLength = 0; - fCapacity = US_STACKBUF_SIZE; - fArray = fStackBuffer; - fFlags = kShortString; + setToEmpty(); return *this; } @@ -956,15 +943,11 @@ UnicodeString::setTo(UBool isTerminated, releaseArray(); - fArray = (UChar *)text; - if(textLength != -1) { - fLength = textLength; - fCapacity = isTerminated ? fLength + 1 : fLength; - } else { + if(textLength == -1) { // text is terminated, or else it would have failed the above test - fLength = u_strlen(text); - fCapacity = fLength + 1; + textLength = u_strlen(text); } + setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength); fFlags = kReadonlyAlias; return *this; @@ -983,10 +966,7 @@ UnicodeString::setTo(UChar *buffer, if(buffer == NULL) { // treat as an empty string, do not alias releaseArray(); - fLength = 0; - fCapacity = US_STACKBUF_SIZE; - fArray = fStackBuffer; - fFlags = kShortString; + setToEmpty(); return *this; } @@ -1004,9 +984,7 @@ UnicodeString::setTo(UChar *buffer, releaseArray(); - fArray = buffer; - fLength = buffLength; - fCapacity = buffCapacity; + setArray(buffer, buffLength, buffCapacity); fFlags = kWritableAlias; return *this; } @@ -1015,14 +993,15 @@ UnicodeString& UnicodeString::setCharAt(int32_t offset, UChar c) { - if(cloneArrayIfNeeded() && fLength > 0) { + int32_t len = length(); + if(cloneArrayIfNeeded() && len > 0) { if(offset < 0) { offset = 0; - } else if(offset >= fLength) { - offset = fLength - 1; + } else if(offset >= len) { + offset = len - 1; } - fArray[offset] = c; + getArrayStart()[offset] = c; } return *this; } @@ -1054,7 +1033,7 @@ UnicodeString::doReplace(int32_t start, int32_t srcStart, int32_t srcLength) { - if(isBogus()) { + if(!isWritable()) { return *this; } @@ -1065,20 +1044,45 @@ UnicodeString::doReplace(int32_t start, srcLength = u_strlen(srcChars + srcStart); } - int32_t *bufferToDelete = 0; + int32_t oldLength = this->length(); + + // calculate the size of the string after the replace + int32_t newSize; + + // optimize append() onto a large-enough, owned string + if(start >= oldLength) { + newSize = oldLength + srcLength; + if(newSize <= getCapacity() && isBufferWritable()) { + us_arrayCopy(srcChars, srcStart, getArrayStart(), oldLength, srcLength); + setLength(newSize); + return *this; + } else { + // pin the indices to legal values + start = oldLength; + length = 0; + } + } else { + // pin the indices to legal values + pinIndices(start, length); + + newSize = oldLength - length + srcLength; + } // the following may change fArray but will not copy the current contents; // therefore we need to keep the current fArray - UChar *oldArray = fArray; - int32_t oldLength = fLength; - - // pin the indices to legal values - pinIndices(start, length); - - // calculate the size of the string after the replace - int32_t newSize = oldLength - length + srcLength; + UChar oldStackBuffer[US_STACKBUF_SIZE]; + UChar *oldArray; + if((fFlags&kUsingStackBuffer) && (newSize > US_STACKBUF_SIZE)) { + // copy the stack buffer contents because it will be overwritten with + // fUnion.fFields values + u_memcpy(oldStackBuffer, fUnion.fStackBuffer, oldLength); + oldArray = oldStackBuffer; + } else { + oldArray = getArrayStart(); + } // clone our array and allocate a bigger array if needed + int32_t *bufferToDelete = 0; if(!cloneArrayIfNeeded(newSize, newSize + (newSize >> 2) + kGrowSize, FALSE, &bufferToDelete) ) { @@ -1087,23 +1091,24 @@ UnicodeString::doReplace(int32_t start, // now do the replace - if(fArray != oldArray) { + UChar *newArray = getArrayStart(); + if(newArray != oldArray) { // if fArray changed, then we need to copy everything except what will change - us_arrayCopy(oldArray, 0, fArray, 0, start); + us_arrayCopy(oldArray, 0, newArray, 0, start); us_arrayCopy(oldArray, start + length, - fArray, start + srcLength, + newArray, start + srcLength, oldLength - (start + length)); } else if(length != srcLength) { // fArray did not change; copy only the portion that isn't changing, leaving a hole us_arrayCopy(oldArray, start + length, - fArray, start + srcLength, + newArray, start + srcLength, oldLength - (start + length)); } // now fill in the hole with the new string - us_arrayCopy(srcChars, srcStart, getArrayStart(), start, srcLength); + us_arrayCopy(srcChars, srcStart, newArray, start, srcLength); - fLength = newSize; + setLength(newSize); // delayed delete in case srcChars == fArray when we started, and // to keep oldArray alive for the above operations @@ -1159,7 +1164,7 @@ UnicodeString& UnicodeString::doReverse(int32_t start, int32_t length) { - if(fLength <= 1 || !cloneArrayIfNeeded()) { + if(this->length() <= 1 || !cloneArrayIfNeeded()) { return *this; } @@ -1167,7 +1172,7 @@ UnicodeString::doReverse(int32_t start, pinIndices(start, length); UChar *left = getArrayStart() + start; - UChar *right = getArrayStart() + start + length; + UChar *right = left + length; UChar swap; UBool hasSupplementary = FALSE; @@ -1182,7 +1187,7 @@ UnicodeString::doReverse(int32_t start, UChar swap2; left = getArrayStart() + start; - right = getArrayStart() + start + length - 1; // -1 so that we can look at *(left+1) if left= targetLength || !cloneArrayIfNeeded(targetLength)) { + int32_t oldLength = length(); + if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) { return FALSE; } else { // move contents up by padding width - int32_t start = targetLength - fLength; - us_arrayCopy(fArray, 0, fArray, start, fLength); + UChar *array = getArrayStart(); + int32_t start = targetLength - oldLength; + us_arrayCopy(array, 0, array, start, oldLength); // fill in padding character while(--start >= 0) { - fArray[start] = padChar; + array[start] = padChar; } - fLength = targetLength; + setLength(targetLength); return TRUE; } } @@ -1220,15 +1227,17 @@ UBool UnicodeString::padTrailing(int32_t targetLength, UChar padChar) { - if(fLength >= targetLength || !cloneArrayIfNeeded(targetLength)) { + int32_t oldLength = length(); + if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) { return FALSE; } else { // fill in padding character + UChar *array = getArrayStart(); int32_t length = targetLength; - while(--length >= fLength) { - fArray[length] = padChar; + while(--length >= oldLength) { + array[length] = padChar; } - fLength = targetLength; + setLength(targetLength); return TRUE; } } @@ -1241,7 +1250,7 @@ UnicodeString::doHashCode() const { /* Delegate hash computation to uhash. This makes UnicodeString * hashing consistent with UChar* hashing. */ - int32_t hashCode = uhash_hashUCharsN(getArrayStart(), fLength); + int32_t hashCode = uhash_hashUCharsN(getArrayStart(), length()); if (hashCode == kInvalidHashCode) { hashCode = kEmptyHashCode; } @@ -1256,8 +1265,8 @@ UChar * UnicodeString::getBuffer(int32_t minCapacity) { if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) { fFlags|=kOpenGetBuffer; - fLength=0; - return fArray; + fShortLength=0; + return getArrayStart(); } else { return 0; } @@ -1267,18 +1276,18 @@ void UnicodeString::releaseBuffer(int32_t newLength) { if(fFlags&kOpenGetBuffer && newLength>=-1) { // set the new fLength + int32_t capacity=getCapacity(); if(newLength==-1) { // the new length is the string length, capped by fCapacity - const UChar *p=fArray, *limit=fArray+fCapacity; + const UChar *array=getArrayStart(), *p=array, *limit=array+capacity; while(pcapacity) { + newLength=capacity; } + setLength(newLength); fFlags&=~kOpenGetBuffer; } } @@ -1295,13 +1304,13 @@ UnicodeString::cloneArrayIfNeeded(int32_t newCapacity, // default parameters need to be static, therefore // the defaults are -1 to have convenience defaults if(newCapacity == -1) { - newCapacity = fCapacity; + newCapacity = getCapacity(); } // while a getBuffer(minCapacity) is "open", // prevent any modifications of the string by returning FALSE here // if the string is bogus, then only an assignment or similar can revive it - if((fFlags&(kOpenGetBuffer|kIsBogus))!=0) { + if(!isWritable()) { return FALSE; } @@ -1315,12 +1324,8 @@ UnicodeString::cloneArrayIfNeeded(int32_t newCapacity, if(forceClone || fFlags & kBufferIsReadonly || fFlags & kRefCounted && refCount() > 1 || - newCapacity > fCapacity + newCapacity > getCapacity() ) { - // save old values - UChar *array = fArray; - uint16_t flags = fFlags; - // check growCapacity for default value and use of the stack buffer if(growCapacity == -1) { growCapacity = newCapacity; @@ -1328,25 +1333,46 @@ UnicodeString::cloneArrayIfNeeded(int32_t newCapacity, growCapacity = US_STACKBUF_SIZE; } + // save old values + UChar oldStackBuffer[US_STACKBUF_SIZE]; + UChar *oldArray; + uint8_t flags = fFlags; + + if(flags&kUsingStackBuffer) { + if(doCopyArray && growCapacity > US_STACKBUF_SIZE) { + // copy the stack buffer contents because it will be overwritten with + // fUnion.fFields values + us_arrayCopy(fUnion.fStackBuffer, 0, oldStackBuffer, 0, fShortLength); + oldArray = oldStackBuffer; + } else { + oldArray = 0; // no need to copy from stack buffer to itself + } + } else { + oldArray = fUnion.fFields.fArray; + } + // allocate a new array if(allocate(growCapacity) || newCapacity < growCapacity && allocate(newCapacity) ) { - if(doCopyArray) { + if(doCopyArray && oldArray != 0) { // copy the contents // do not copy more than what fits - it may be smaller than before - if(fCapacity < fLength) { - fLength = fCapacity; + int32_t minLength = length(); + newCapacity = getCapacity(); + if(newCapacity < minLength) { + minLength = newCapacity; + setLength(minLength); } - us_arrayCopy(array, 0, fArray, 0, fLength); + us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength); } else { - fLength = 0; + fShortLength = 0; } // release the old array if(flags & kRefCounted) { // the array is refCounted; decrement and release if 0 - int32_t *pRefCount = ((int32_t *)array - 1); + int32_t *pRefCount = ((int32_t *)oldArray - 1); if(umtx_atomic_dec(pRefCount) == 0) { if(pBufferToDelete == 0) { uprv_free(pRefCount); @@ -1359,7 +1385,9 @@ UnicodeString::cloneArrayIfNeeded(int32_t newCapacity, } else { // not enough memory for growCapacity and not even for the smaller newCapacity // reset the old values for setToBogus() to release the array - fArray = array; + if(!(flags&kUsingStackBuffer)) { + fUnion.fFields.fArray = oldArray; + } fFlags = flags; setToBogus(); return FALSE; diff --git a/icu4c/source/common/unistr_case.cpp b/icu4c/source/common/unistr_case.cpp index e557b4da7b..617b80d0ce 100644 --- a/icu4c/source/common/unistr_case.cpp +++ b/icu4c/source/common/unistr_case.cpp @@ -95,7 +95,7 @@ UnicodeString::caseMap(BreakIterator *titleIter, const char *locale, uint32_t options, int32_t toWhichCase) { - if(fLength <= 0) { + if(isEmpty() || !isWritable()) { // nothing to do return *this; } @@ -110,54 +110,62 @@ UnicodeString::caseMap(BreakIterator *titleIter, } // We need to allocate a new buffer for the internal string case mapping function. - // This is very similar to how doReplace() below keeps the old array pointer + // This is very similar to how doReplace() keeps the old array pointer // and deletes the old array itself after it is done. // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array. - UChar *oldArray = fArray; - int32_t oldLength = fLength; - int32_t *bufferToDelete = 0; + UChar oldStackBuffer[US_STACKBUF_SIZE]; + UChar *oldArray; + int32_t oldLength; - // Make sure that if the string is in fStackBuffer we do not overwrite it! - int32_t capacity; - if(fLength <= US_STACKBUF_SIZE) { - if(fArray == fStackBuffer) { - capacity = 2 * US_STACKBUF_SIZE; // make sure that cloneArrayIfNeeded() allocates a new buffer - } else { - capacity = US_STACKBUF_SIZE; - } + if(fFlags&kUsingStackBuffer) { + // copy the stack buffer contents because it will be overwritten + u_memcpy(oldStackBuffer, fUnion.fStackBuffer, fShortLength); + oldArray = oldStackBuffer; + oldLength = fShortLength; } else { - capacity = fLength + 20; + oldArray = getArrayStart(); + oldLength = length(); } + + int32_t capacity; + if(oldLength <= US_STACKBUF_SIZE) { + capacity = US_STACKBUF_SIZE; + } else { + capacity = oldLength + 20; + } + int32_t *bufferToDelete = 0; if(!cloneArrayIfNeeded(capacity, capacity, FALSE, &bufferToDelete, TRUE)) { return *this; } // Case-map, and if the result is too long, then reallocate and repeat. + int32_t newLength; do { errorCode = U_ZERO_ERROR; if(toWhichCase==TO_LOWER) { - fLength = ustr_toLower(csp, fArray, fCapacity, - oldArray, oldLength, - locale, &errorCode); + newLength = ustr_toLower(csp, getArrayStart(), getCapacity(), + oldArray, oldLength, + locale, &errorCode); } else if(toWhichCase==TO_UPPER) { - fLength = ustr_toUpper(csp, fArray, fCapacity, - oldArray, oldLength, - locale, &errorCode); + newLength = ustr_toUpper(csp, getArrayStart(), getCapacity(), + oldArray, oldLength, + locale, &errorCode); } else if(toWhichCase==TO_TITLE) { #if UCONFIG_NO_BREAK_ITERATION errorCode=U_UNSUPPORTED_ERROR; #else - fLength = ustr_toTitle(csp, fArray, fCapacity, - oldArray, oldLength, - (UBreakIterator *)titleIter, locale, options, &errorCode); + newLength = ustr_toTitle(csp, getArrayStart(), getCapacity(), + oldArray, oldLength, + (UBreakIterator *)titleIter, locale, options, &errorCode); #endif } else { - fLength = ustr_foldCase(csp, fArray, fCapacity, - oldArray, oldLength, - options, - &errorCode); + newLength = ustr_foldCase(csp, getArrayStart(), getCapacity(), + oldArray, oldLength, + options, + &errorCode); } - } while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(fLength, fLength, FALSE)); + setLength(newLength); + } while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(newLength, newLength, FALSE)); if (bufferToDelete) { uprv_free(bufferToDelete); diff --git a/icu4c/source/common/unistr_cnv.cpp b/icu4c/source/common/unistr_cnv.cpp index a5166e09d8..1a6819e0d6 100644 --- a/icu4c/source/common/unistr_cnv.cpp +++ b/icu4c/source/common/unistr_cnv.cpp @@ -38,9 +38,7 @@ U_NAMESPACE_BEGIN UnicodeString::UnicodeString(const char *codepageData, const char *codepage) - : fLength(0), - fCapacity(US_STACKBUF_SIZE), - fArray(fStackBuffer), + : fShortLength(0), fFlags(kShortString) { if(codepageData != 0) { @@ -52,9 +50,7 @@ UnicodeString::UnicodeString(const char *codepageData, UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage) - : fLength(0), - fCapacity(US_STACKBUF_SIZE), - fArray(fStackBuffer), + : fShortLength(0), fFlags(kShortString) { if(codepageData != 0) { @@ -65,9 +61,7 @@ UnicodeString::UnicodeString(const char *codepageData, UnicodeString::UnicodeString(const char *src, int32_t srcLength, UConverter *cnv, UErrorCode &errorCode) - : fLength(0), - fCapacity(US_STACKBUF_SIZE), - fArray(fStackBuffer), + : fShortLength(0), fFlags(kShortString) { if(U_SUCCESS(errorCode)) { @@ -183,7 +177,7 @@ UnicodeString::extract(char *dest, int32_t destCapacity, } // nothing to do? - if(fLength<=0) { + if(isEmpty()) { return u_terminateChars(dest, destCapacity, 0, &errorCode); } @@ -201,14 +195,14 @@ UnicodeString::extract(char *dest, int32_t destCapacity, } // convert - int32_t length=doExtract(0, fLength, dest, destCapacity, cnv, errorCode); + int32_t len=doExtract(0, length(), dest, destCapacity, cnv, errorCode); // release the converter if(isDefaultConverter) { u_releaseDefaultConverter(cnv); } - return length; + return len; } int32_t @@ -224,7 +218,7 @@ UnicodeString::doExtract(int32_t start, int32_t length, return 0; } - const UChar *src=fArray+start, *srcLimit=src+length; + const UChar *src=getArrayStart()+start, *srcLimit=src+length; char *originalDest=dest; const char *destLimit; @@ -294,7 +288,7 @@ UnicodeString::doCodepageCreate(const char *codepageData, // use the "invariant characters" conversion if(cloneArrayIfNeeded(dataLength, dataLength, FALSE)) { u_charsToUChars(codepageData, getArrayStart(), dataLength); - fLength = dataLength; + setLength(dataLength); } else { setToBogus(); } @@ -328,11 +322,17 @@ UnicodeString::doCodepageCreate(const char *codepageData, // set up the conversion parameters const char *mySource = codepageData; const char *mySourceEnd = mySource + dataLength; - UChar *myTarget; + UChar *array, *myTarget; // estimate the size needed: - // 1.25 UChar's per source byte should cover most cases - int32_t arraySize = dataLength + (dataLength >> 2); + int32_t arraySize; + if(dataLength <= US_STACKBUF_SIZE) { + // try to use the stack buffer + arraySize = US_STACKBUF_SIZE; + } else { + // 1.25 UChar's per source byte should cover most cases + arraySize = dataLength + (dataLength >> 2); + } // we do not care about the current contents UBool doCopyArray = FALSE; @@ -343,12 +343,13 @@ UnicodeString::doCodepageCreate(const char *codepageData, } // perform the conversion - myTarget = fArray + fLength; - ucnv_toUnicode(converter, &myTarget, fArray + fCapacity, + array = getArrayStart(); + myTarget = array + length(); + ucnv_toUnicode(converter, &myTarget, array + getCapacity(), &mySource, mySourceEnd, 0, TRUE, &status); // update the conversion parameters - fLength = (int32_t)(myTarget - fArray); + setLength((int32_t)(myTarget - array)); // allocate more space and copy data, if needed if(status == U_BUFFER_OVERFLOW_ERROR) { @@ -360,7 +361,7 @@ UnicodeString::doCodepageCreate(const char *codepageData, // estimate the new size needed, larger than before // try 2 UChar's per remaining source byte - arraySize = (int32_t)(fLength + 2 * (mySourceEnd - mySource)); + arraySize = (int32_t)(length() + 2 * (mySourceEnd - mySource)); } else { break; } diff --git a/icu4c/source/common/unistr_props.cpp b/icu4c/source/common/unistr_props.cpp index a82408a558..7670de4655 100644 --- a/icu4c/source/common/unistr_props.cpp +++ b/icu4c/source/common/unistr_props.cpp @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 1999-2006, International Business Machines +* Copyright (C) 1999-2007, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -29,8 +29,10 @@ UnicodeString::trim() return *this; } + UChar *array = getArrayStart(); UChar32 c; - int32_t i = fLength, length; + int32_t oldLength = this->length(); + int32_t i = oldLength, length; // first cut off trailing white space for(;;) { @@ -38,13 +40,13 @@ UnicodeString::trim() if(i <= 0) { break; } - UTF_PREV_CHAR(fArray, 0, i, c); + U16_PREV(array, 0, i, c); if(!(c == 0x20 || u_isWhitespace(c))) { break; } } - if(length < fLength) { - fLength = length; + if(length < oldLength) { + setLength(length); } // find leading white space @@ -55,7 +57,7 @@ UnicodeString::trim() if(i >= length) { break; } - UTF_NEXT_CHAR(fArray, i, length, c); + U16_NEXT(array, i, length, c); if(!(c == 0x20 || u_isWhitespace(c))) { break; }