ICU-5835 optimize UnicodeString: increase US_STACKBUF_SIZE from 7 to 13; optimize append() and unescape()

X-SVN-Rev: 22718
This commit is contained in:
Markus Scherer 2007-09-28 04:50:25 +00:00
parent 63c858f9cb
commit 13270fbe6f
6 changed files with 488 additions and 371 deletions

View File

@ -1,6 +1,6 @@
/*
******************************************************************************
* Copyright (C) 1998-2004, International Business Machines Corporation and *
* Copyright (C) 1998-2007, International Business Machines Corporation and *
* others. All Rights Reserved. *
******************************************************************************
*
@ -28,31 +28,31 @@ StringCharacterIterator::StringCharacterIterator()
}
StringCharacterIterator::StringCharacterIterator(const UnicodeString& textStr)
: UCharCharacterIterator(textStr.fArray, textStr.length()),
: UCharCharacterIterator(textStr.getBuffer(), textStr.length()),
text(textStr)
{
// we had set the input parameter's array, now we need to set our copy's array
UCharCharacterIterator::text = this->text.fArray;
UCharCharacterIterator::text = this->text.getBuffer();
}
StringCharacterIterator::StringCharacterIterator(const UnicodeString& textStr,
int32_t textPos)
: UCharCharacterIterator(textStr.fArray, textStr.length(), textPos),
: UCharCharacterIterator(textStr.getBuffer(), textStr.length(), textPos),
text(textStr)
{
// we had set the input parameter's array, now we need to set our copy's array
UCharCharacterIterator::text = this->text.fArray;
UCharCharacterIterator::text = this->text.getBuffer();
}
StringCharacterIterator::StringCharacterIterator(const UnicodeString& textStr,
int32_t textBegin,
int32_t textEnd,
int32_t textPos)
: UCharCharacterIterator(textStr.fArray, textStr.length(), textBegin, textEnd, textPos),
: UCharCharacterIterator(textStr.getBuffer(), textStr.length(), textBegin, textEnd, textPos),
text(textStr)
{
// we had set the input parameter's array, now we need to set our copy's array
UCharCharacterIterator::text = this->text.fArray;
UCharCharacterIterator::text = this->text.getBuffer();
}
StringCharacterIterator::StringCharacterIterator(const StringCharacterIterator& that)
@ -60,7 +60,7 @@ StringCharacterIterator::StringCharacterIterator(const StringCharacterIterator&
text(that.text)
{
// we had set the input parameter's array, now we need to set our copy's array
UCharCharacterIterator::text = this->text.fArray;
UCharCharacterIterator::text = this->text.getBuffer();
}
StringCharacterIterator::~StringCharacterIterator() {
@ -71,7 +71,7 @@ StringCharacterIterator::operator=(const StringCharacterIterator& that) {
UCharCharacterIterator::operator=(that);
text = that.text;
// we had set the input parameter's array, now we need to set our copy's array
UCharCharacterIterator::text = this->text.fArray;
UCharCharacterIterator::text = this->text.getBuffer();
return *this;
}
@ -105,7 +105,7 @@ StringCharacterIterator::clone() const {
void
StringCharacterIterator::setText(const UnicodeString& newText) {
text = newText;
UCharCharacterIterator::setText(text.fArray, text.length());
UCharCharacterIterator::setText(text.getBuffer(), text.length());
}
void

View File

@ -3089,9 +3089,23 @@ private:
int32_t doHashCode(void) const;
// get pointer to start of array
// these do not check for kOpenGetBuffer, unlike the public getBuffer() function
inline UChar* getArrayStart(void);
inline const UChar* getArrayStart(void) const;
// A UnicodeString object (not necessarily its current buffer)
// is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
inline UBool isWritable() const;
// Is the current buffer writable?
inline UBool isBufferWritable() const;
// None of the following does releaseArray().
inline void setLength(int32_t len); // sets only fShortLength and fLength
inline void setToEmpty(); // sets fFlags=kShortString
inline void setToStackBuffer(int32_t len); // sets fFlags=kShortString
inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags
// allocate the array; result may be fStackBuffer
// sets refCount to 1 if appropriate
// sets fArray, fCapacity, and fFlags
@ -3177,7 +3191,10 @@ private:
// constants
enum {
US_STACKBUF_SIZE=7, // Size of stack buffer for small strings
// Set the stack buffer size so that sizeof(UnicodeString) is a multiple of sizeof(pointer):
// 32-bit pointers: 4+1+1+13*2 = 32 bytes
// 64-bit pointers: 8+1+1+15*2 = 40 bytes
US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for small strings
kInvalidUChar=0xffff, // invalid UChar index
kGrowSize=128, // grow size for this buffer
kInvalidHashCode=0, // invalid hash code
@ -3198,7 +3215,6 @@ private:
kWritableAlias=0
};
friend class StringCharacterIterator;
friend class StringThreadTest;
/*
@ -3213,12 +3229,19 @@ private:
* on 64-bit machines (8-byte pointers), it should be 40 bytes.
*/
// (implicit) *vtable;
int32_t fLength; // number of characters in fArray
int32_t fCapacity; // sizeof fArray
UChar *fArray; // the Unicode data
uint16_t fFlags; // bit flags: see constants above
UChar fStackBuffer [ US_STACKBUF_SIZE ]; // buffer for small strings
int8_t fShortLength; // 0..127: length <0: real length is in fUnion.fFields.fLength
uint8_t fFlags; // bit flags: see constants above
union {
// fStackBuffer is used iff (fFlags&kUsingStackBuffer)
// else fFields is used
UChar fStackBuffer [US_STACKBUF_SIZE]; // buffer for small strings
struct {
uint16_t fPadding; // align the following field at 8B (32b pointers) or 12B (64b)
int32_t fLength; // number of characters in fArray if >127; else undefined
UChar *fArray; // the Unicode data (aligned at 12B (32b pointers) or 16B (64b))
int32_t fCapacity; // sizeof fArray
} fFields;
} fUnion;
};
/**
@ -3246,8 +3269,8 @@ UnicodeString::pinIndex(int32_t& start) const
// pin index
if(start < 0) {
start = 0;
} else if(start > fLength) {
start = fLength;
} else if(start > length()) {
start = length();
}
}
@ -3256,36 +3279,37 @@ UnicodeString::pinIndices(int32_t& start,
int32_t& _length) const
{
// pin indices
int32_t len = length();
if(start < 0) {
start = 0;
} else if(start > fLength) {
start = fLength;
} else if(start > len) {
start = len;
}
if(_length < 0) {
_length = 0;
} else if(_length > (fLength - start)) {
_length = (fLength - start);
} else if(_length > (len - start)) {
_length = (len - start);
}
}
inline UChar*
UnicodeString::getArrayStart()
{ return fArray; }
{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
inline const UChar*
UnicodeString::getArrayStart() const
{ return fArray; }
{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
//========================================
// Read-only implementation methods
//========================================
inline int32_t
UnicodeString::length() const
{ return fLength; }
{ return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; }
inline int32_t
UnicodeString::getCapacity() const
{ return fCapacity; }
{ return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; }
inline int32_t
UnicodeString::hashCode() const
@ -3295,12 +3319,26 @@ inline UBool
UnicodeString::isBogus() const
{ return (UBool)(fFlags & kIsBogus); }
inline UBool
UnicodeString::isWritable() const
{ return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); }
inline UBool
UnicodeString::isBufferWritable() const
{
return (UBool)(
!(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
(!(fFlags&kRefCounted) || refCount()==1));
}
inline const UChar *
UnicodeString::getBuffer() const {
if(!(fFlags&(kIsBogus|kOpenGetBuffer))) {
return fArray;
} else {
if(fFlags&(kIsBogus|kOpenGetBuffer)) {
return 0;
} else if(fFlags&kUsingStackBuffer) {
return fUnion.fStackBuffer;
} else {
return fUnion.fFields.fArray;
}
}
@ -3318,7 +3356,7 @@ UnicodeString::doCompare(int32_t start,
return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
} else {
srcText.pinIndices(srcStart, srcLength);
return doCompare(start, length, srcText.fArray, srcStart, srcLength);
return doCompare(start, length, srcText.getArrayStart(), srcStart, srcLength);
}
}
@ -3328,10 +3366,11 @@ UnicodeString::operator== (const UnicodeString& text) const
if(isBogus()) {
return text.isBogus();
} else {
int32_t len = length(), textLength = text.length();
return
!text.isBogus() &&
fLength == text.fLength &&
doCompare(0, fLength, text, 0, text.fLength) == 0;
len == textLength &&
doCompare(0, len, text, 0, textLength) == 0;
}
}
@ -3341,34 +3380,34 @@ UnicodeString::operator!= (const UnicodeString& text) const
inline UBool
UnicodeString::operator> (const UnicodeString& text) const
{ return doCompare(0, fLength, text, 0, text.fLength) == 1; }
{ return doCompare(0, length(), text, 0, text.length()) == 1; }
inline UBool
UnicodeString::operator< (const UnicodeString& text) const
{ return doCompare(0, fLength, text, 0, text.fLength) == -1; }
{ return doCompare(0, length(), text, 0, text.length()) == -1; }
inline UBool
UnicodeString::operator>= (const UnicodeString& text) const
{ return doCompare(0, fLength, text, 0, text.fLength) != -1; }
{ return doCompare(0, length(), text, 0, text.length()) != -1; }
inline UBool
UnicodeString::operator<= (const UnicodeString& text) const
{ return doCompare(0, fLength, text, 0, text.fLength) != 1; }
{ return doCompare(0, length(), text, 0, text.length()) != 1; }
inline int8_t
UnicodeString::compare(const UnicodeString& text) const
{ return doCompare(0, fLength, text, 0, text.fLength); }
{ return doCompare(0, length(), text, 0, text.length()); }
inline int8_t
UnicodeString::compare(int32_t start,
int32_t _length,
const UnicodeString& srcText) const
{ return doCompare(start, _length, srcText, 0, srcText.fLength); }
{ return doCompare(start, _length, srcText, 0, srcText.length()); }
inline int8_t
UnicodeString::compare(const UChar *srcChars,
int32_t srcLength) const
{ return doCompare(0, fLength, srcChars, 0, srcLength); }
{ return doCompare(0, length(), srcChars, 0, srcLength); }
inline int8_t
UnicodeString::compare(int32_t start,
@ -3412,24 +3451,24 @@ UnicodeString::doCompareCodePointOrder(int32_t start,
return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
} else {
srcText.pinIndices(srcStart, srcLength);
return doCompareCodePointOrder(start, length, srcText.fArray, srcStart, srcLength);
return doCompareCodePointOrder(start, length, srcText.getArrayStart(), srcStart, srcLength);
}
}
inline int8_t
UnicodeString::compareCodePointOrder(const UnicodeString& text) const
{ return doCompareCodePointOrder(0, fLength, text, 0, text.fLength); }
{ return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
inline int8_t
UnicodeString::compareCodePointOrder(int32_t start,
int32_t _length,
const UnicodeString& srcText) const
{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.fLength); }
{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
inline int8_t
UnicodeString::compareCodePointOrder(const UChar *srcChars,
int32_t srcLength) const
{ return doCompareCodePointOrder(0, fLength, srcChars, 0, srcLength); }
{ return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
inline int8_t
UnicodeString::compareCodePointOrder(int32_t start,
@ -3474,13 +3513,13 @@ UnicodeString::doCaseCompare(int32_t start,
return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
} else {
srcText.pinIndices(srcStart, srcLength);
return doCaseCompare(start, length, srcText.fArray, srcStart, srcLength, options);
return doCaseCompare(start, length, srcText.getArrayStart(), srcStart, srcLength, options);
}
}
inline int8_t
UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
return doCaseCompare(0, fLength, text, 0, text.fLength, options);
return doCaseCompare(0, length(), text, 0, text.length(), options);
}
inline int8_t
@ -3488,14 +3527,14 @@ UnicodeString::caseCompare(int32_t start,
int32_t _length,
const UnicodeString &srcText,
uint32_t options) const {
return doCaseCompare(start, _length, srcText, 0, srcText.fLength, options);
return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
}
inline int8_t
UnicodeString::caseCompare(const UChar *srcChars,
int32_t srcLength,
uint32_t options) const {
return doCaseCompare(0, fLength, srcChars, 0, srcLength, options);
return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
}
inline int8_t
@ -3554,27 +3593,27 @@ UnicodeString::indexOf(const UnicodeString& srcText,
inline int32_t
UnicodeString::indexOf(const UnicodeString& text) const
{ return indexOf(text, 0, text.fLength, 0, fLength); }
{ return indexOf(text, 0, text.length(), 0, length()); }
inline int32_t
UnicodeString::indexOf(const UnicodeString& text,
int32_t start) const {
pinIndex(start);
return indexOf(text, 0, text.fLength, start, fLength - start);
return indexOf(text, 0, text.length(), start, length() - start);
}
inline int32_t
UnicodeString::indexOf(const UnicodeString& text,
int32_t start,
int32_t _length) const
{ return indexOf(text, 0, text.fLength, start, _length); }
{ return indexOf(text, 0, text.length(), start, _length); }
inline int32_t
UnicodeString::indexOf(const UChar *srcChars,
int32_t srcLength,
int32_t start) const {
pinIndex(start);
return indexOf(srcChars, 0, srcLength, start, fLength - start);
return indexOf(srcChars, 0, srcLength, start, length() - start);
}
inline int32_t
@ -3598,24 +3637,24 @@ UnicodeString::indexOf(UChar32 c,
inline int32_t
UnicodeString::indexOf(UChar c) const
{ return doIndexOf(c, 0, fLength); }
{ return doIndexOf(c, 0, length()); }
inline int32_t
UnicodeString::indexOf(UChar32 c) const
{ return indexOf(c, 0, fLength); }
{ return indexOf(c, 0, length()); }
inline int32_t
UnicodeString::indexOf(UChar c,
int32_t start) const {
pinIndex(start);
return doIndexOf(c, start, fLength - start);
return doIndexOf(c, start, length() - start);
}
inline int32_t
UnicodeString::indexOf(UChar32 c,
int32_t start) const {
pinIndex(start);
return indexOf(c, start, fLength - start);
return indexOf(c, start, length() - start);
}
inline int32_t
@ -3630,7 +3669,7 @@ UnicodeString::lastIndexOf(const UChar *srcChars,
int32_t srcLength,
int32_t start) const {
pinIndex(start);
return lastIndexOf(srcChars, 0, srcLength, start, fLength - start);
return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
}
inline int32_t
@ -3653,18 +3692,18 @@ inline int32_t
UnicodeString::lastIndexOf(const UnicodeString& text,
int32_t start,
int32_t _length) const
{ return lastIndexOf(text, 0, text.fLength, start, _length); }
{ return lastIndexOf(text, 0, text.length(), start, _length); }
inline int32_t
UnicodeString::lastIndexOf(const UnicodeString& text,
int32_t start) const {
pinIndex(start);
return lastIndexOf(text, 0, text.fLength, start, fLength - start);
return lastIndexOf(text, 0, text.length(), start, length() - start);
}
inline int32_t
UnicodeString::lastIndexOf(const UnicodeString& text) const
{ return lastIndexOf(text, 0, text.fLength, 0, fLength); }
{ return lastIndexOf(text, 0, text.length(), 0, length()); }
inline int32_t
UnicodeString::lastIndexOf(UChar c,
@ -3681,30 +3720,30 @@ UnicodeString::lastIndexOf(UChar32 c,
inline int32_t
UnicodeString::lastIndexOf(UChar c) const
{ return doLastIndexOf(c, 0, fLength); }
{ return doLastIndexOf(c, 0, length()); }
inline int32_t
UnicodeString::lastIndexOf(UChar32 c) const {
return lastIndexOf(c, 0, fLength);
return lastIndexOf(c, 0, length());
}
inline int32_t
UnicodeString::lastIndexOf(UChar c,
int32_t start) const {
pinIndex(start);
return doLastIndexOf(c, start, fLength - start);
return doLastIndexOf(c, start, length() - start);
}
inline int32_t
UnicodeString::lastIndexOf(UChar32 c,
int32_t start) const {
pinIndex(start);
return lastIndexOf(c, start, fLength - start);
return lastIndexOf(c, start, length() - start);
}
inline UBool
UnicodeString::startsWith(const UnicodeString& text) const
{ return compare(0, text.fLength, text, 0, text.fLength) == 0; }
{ return compare(0, text.length(), text, 0, text.length()) == 0; }
inline UBool
UnicodeString::startsWith(const UnicodeString& srcText,
@ -3725,15 +3764,15 @@ UnicodeString::startsWith(const UChar *srcChars,
inline UBool
UnicodeString::endsWith(const UnicodeString& text) const
{ return doCompare(fLength - text.fLength, text.fLength,
text, 0, text.fLength) == 0; }
{ return doCompare(length() - text.length(), text.length(),
text, 0, text.length()) == 0; }
inline UBool
UnicodeString::endsWith(const UnicodeString& srcText,
int32_t srcStart,
int32_t srcLength) const {
srcText.pinIndices(srcStart, srcLength);
return doCompare(fLength - srcLength, srcLength,
return doCompare(length() - srcLength, srcLength,
srcText, srcStart, srcLength) == 0;
}
@ -3743,7 +3782,7 @@ UnicodeString::endsWith(const UChar *srcChars,
if(srcLength < 0) {
srcLength = u_strlen(srcChars);
}
return doCompare(fLength - srcLength, srcLength,
return doCompare(length() - srcLength, srcLength,
srcChars, 0, srcLength) == 0;
}
@ -3754,7 +3793,7 @@ UnicodeString::endsWith(const UChar *srcChars,
if(srcLength < 0) {
srcLength = u_strlen(srcChars + srcStart);
}
return doCompare(fLength - srcLength, srcLength,
return doCompare(length() - srcLength, srcLength,
srcChars, srcStart, srcLength) == 0;
}
@ -3765,7 +3804,7 @@ inline UnicodeString&
UnicodeString::replace(int32_t start,
int32_t _length,
const UnicodeString& srcText)
{ return doReplace(start, _length, srcText, 0, srcText.fLength); }
{ return doReplace(start, _length, srcText, 0, srcText.length()); }
inline UnicodeString&
UnicodeString::replace(int32_t start,
@ -3811,7 +3850,7 @@ inline UnicodeString&
UnicodeString::replaceBetween(int32_t start,
int32_t limit,
const UnicodeString& srcText)
{ return doReplace(start, limit - start, srcText, 0, srcText.fLength); }
{ return doReplace(start, limit - start, srcText, 0, srcText.length()); }
inline UnicodeString&
UnicodeString::replaceBetween(int32_t start,
@ -3824,16 +3863,16 @@ UnicodeString::replaceBetween(int32_t start,
inline UnicodeString&
UnicodeString::findAndReplace(const UnicodeString& oldText,
const UnicodeString& newText)
{ return findAndReplace(0, fLength, oldText, 0, oldText.fLength,
newText, 0, newText.fLength); }
{ return findAndReplace(0, length(), oldText, 0, oldText.length(),
newText, 0, newText.length()); }
inline UnicodeString&
UnicodeString::findAndReplace(int32_t start,
int32_t _length,
const UnicodeString& oldText,
const UnicodeString& newText)
{ return findAndReplace(start, _length, oldText, 0, oldText.fLength,
newText, 0, newText.fLength); }
{ return findAndReplace(start, _length, oldText, 0, oldText.length(),
newText, 0, newText.length()); }
// ============================
// extract
@ -3842,7 +3881,7 @@ inline void
UnicodeString::doExtract(int32_t start,
int32_t _length,
UnicodeString& target) const
{ target.replace(0, target.fLength, *this, start, _length); }
{ target.replace(0, target.length(), *this, start, _length); }
inline void
UnicodeString::extract(int32_t start,
@ -3885,8 +3924,8 @@ UnicodeString::extractBetween(int32_t start,
inline UChar
UnicodeString::doCharAt(int32_t offset) const
{
if((uint32_t)offset < (uint32_t)fLength) {
return fArray[offset];
if((uint32_t)offset < (uint32_t)length()) {
return getArrayStart()[offset];
} else {
return kInvalidUChar;
}
@ -3903,9 +3942,11 @@ UnicodeString::operator[] (int32_t offset) const
inline UChar32
UnicodeString::char32At(int32_t offset) const
{
if((uint32_t)offset < (uint32_t)fLength) {
int32_t len = length();
if((uint32_t)offset < (uint32_t)len) {
const UChar *array = getArrayStart();
UChar32 c;
U16_GET(fArray, 0, offset, fLength, c);
U16_GET(array, 0, offset, len, c);
return c;
} else {
return kInvalidUChar;
@ -3914,8 +3955,9 @@ UnicodeString::char32At(int32_t offset) const
inline int32_t
UnicodeString::getChar32Start(int32_t offset) const {
if((uint32_t)offset < (uint32_t)fLength) {
U16_SET_CP_START(fArray, 0, offset);
if((uint32_t)offset < (uint32_t)length()) {
const UChar *array = getArrayStart();
U16_SET_CP_START(array, 0, offset);
return offset;
} else {
return 0;
@ -3924,43 +3966,79 @@ UnicodeString::getChar32Start(int32_t offset) const {
inline int32_t
UnicodeString::getChar32Limit(int32_t offset) const {
if((uint32_t)offset < (uint32_t)fLength) {
U16_SET_CP_LIMIT(fArray, 0, offset, fLength);
int32_t len = length();
if((uint32_t)offset < (uint32_t)len) {
const UChar *array = getArrayStart();
U16_SET_CP_LIMIT(array, 0, offset, len);
return offset;
} else {
return fLength;
return len;
}
}
inline UBool
UnicodeString::isEmpty() const {
return fLength == 0;
return fShortLength == 0;
}
//========================================
// Write implementation methods
//========================================
inline void
UnicodeString::setLength(int32_t len) {
if(len <= 127) {
fShortLength = (int8_t)len;
} else {
fShortLength = (int8_t)-1;
fUnion.fFields.fLength = len;
}
}
inline void
UnicodeString::setToEmpty() {
fShortLength = 0;
fFlags = kShortString;
}
inline void
UnicodeString::setToStackBuffer(int32_t len) {
fShortLength = (int8_t)len;
fFlags = kShortString;
}
inline void
UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
setLength(len);
fUnion.fFields.fArray = array;
fUnion.fFields.fCapacity = capacity;
}
inline const UChar *
UnicodeString::getTerminatedBuffer() {
if(fFlags&(kIsBogus|kOpenGetBuffer)) {
if(!isWritable()) {
return 0;
} else if(fLength<fCapacity && fArray[fLength]==0) {
return fArray;
} else if(cloneArrayIfNeeded(fLength+1)) {
fArray[fLength]=0;
return fArray;
} else {
return 0;
UChar *array = getArrayStart();
int32_t len = length();
if(len < getCapacity() && array[len] == 0) {
return array;
} else if(cloneArrayIfNeeded(len+1)) {
array = getArrayStart();
array[len] = 0;
return array;
} else {
return 0;
}
}
}
inline UnicodeString&
UnicodeString::operator= (UChar ch)
{ return doReplace(0, fLength, &ch, 0, 1); }
{ return doReplace(0, length(), &ch, 0, 1); }
inline UnicodeString&
UnicodeString::operator= (UChar32 ch)
{ return replace(0, fLength, ch); }
{ return replace(0, length(), ch); }
inline UnicodeString&
UnicodeString::setTo(const UnicodeString& srcText,
@ -3968,7 +4046,7 @@ UnicodeString::setTo(const UnicodeString& srcText,
int32_t srcLength)
{
unBogus();
return doReplace(0, fLength, srcText, srcStart, srcLength);
return doReplace(0, length(), srcText, srcStart, srcLength);
}
inline UnicodeString&
@ -3977,14 +4055,14 @@ UnicodeString::setTo(const UnicodeString& srcText,
{
unBogus();
srcText.pinIndex(srcStart);
return doReplace(0, fLength, srcText, srcStart, srcText.fLength - srcStart);
return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
}
inline UnicodeString&
UnicodeString::setTo(const UnicodeString& srcText)
{
unBogus();
return doReplace(0, fLength, srcText, 0, srcText.fLength);
return doReplace(0, length(), srcText, 0, srcText.length());
}
inline UnicodeString&
@ -3992,47 +4070,47 @@ UnicodeString::setTo(const UChar *srcChars,
int32_t srcLength)
{
unBogus();
return doReplace(0, fLength, srcChars, 0, srcLength);
return doReplace(0, length(), srcChars, 0, srcLength);
}
inline UnicodeString&
UnicodeString::setTo(UChar srcChar)
{
unBogus();
return doReplace(0, fLength, &srcChar, 0, 1);
return doReplace(0, length(), &srcChar, 0, 1);
}
inline UnicodeString&
UnicodeString::setTo(UChar32 srcChar)
{
unBogus();
return replace(0, fLength, srcChar);
return replace(0, length(), srcChar);
}
inline UnicodeString&
UnicodeString::append(const UnicodeString& srcText,
int32_t srcStart,
int32_t srcLength)
{ return doReplace(fLength, 0, srcText, srcStart, srcLength); }
{ return doReplace(length(), 0, srcText, srcStart, srcLength); }
inline UnicodeString&
UnicodeString::append(const UnicodeString& srcText)
{ return doReplace(fLength, 0, srcText, 0, srcText.fLength); }
{ return doReplace(length(), 0, srcText, 0, srcText.length()); }
inline UnicodeString&
UnicodeString::append(const UChar *srcChars,
int32_t srcStart,
int32_t srcLength)
{ return doReplace(fLength, 0, srcChars, srcStart, srcLength); }
{ return doReplace(length(), 0, srcChars, srcStart, srcLength); }
inline UnicodeString&
UnicodeString::append(const UChar *srcChars,
int32_t srcLength)
{ return doReplace(fLength, 0, srcChars, 0, srcLength); }
{ return doReplace(length(), 0, srcChars, 0, srcLength); }
inline UnicodeString&
UnicodeString::append(UChar srcChar)
{ return doReplace(fLength, 0, &srcChar, 0, 1); }
{ return doReplace(length(), 0, &srcChar, 0, 1); }
inline UnicodeString&
UnicodeString::append(UChar32 srcChar) {
@ -4040,12 +4118,12 @@ UnicodeString::append(UChar32 srcChar) {
int32_t _length = 0;
UBool isError = FALSE;
U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
return doReplace(fLength, 0, buffer, 0, _length);
return doReplace(length(), 0, buffer, 0, _length);
}
inline UnicodeString&
UnicodeString::operator+= (UChar ch)
{ return doReplace(fLength, 0, &ch, 0, 1); }
{ return doReplace(length(), 0, &ch, 0, 1); }
inline UnicodeString&
UnicodeString::operator+= (UChar32 ch) {
@ -4054,7 +4132,7 @@ UnicodeString::operator+= (UChar32 ch) {
inline UnicodeString&
UnicodeString::operator+= (const UnicodeString& srcText)
{ return doReplace(fLength, 0, srcText, 0, srcText.fLength); }
{ return doReplace(length(), 0, srcText, 0, srcText.length()); }
inline UnicodeString&
UnicodeString::insert(int32_t start,
@ -4066,7 +4144,7 @@ UnicodeString::insert(int32_t start,
inline UnicodeString&
UnicodeString::insert(int32_t start,
const UnicodeString& srcText)
{ return doReplace(start, 0, srcText, 0, srcText.fLength); }
{ return doReplace(start, 0, srcText, 0, srcText.length()); }
inline UnicodeString&
UnicodeString::insert(int32_t start,
@ -4099,7 +4177,7 @@ UnicodeString::remove()
if(isBogus()) {
unBogus();
} else {
fLength = 0;
setLength(0);
}
return *this;
}
@ -4127,8 +4205,8 @@ UnicodeString::truncate(int32_t targetLength)
// truncate(0) of a bogus string makes the string empty and non-bogus
unBogus();
return FALSE;
} else if((uint32_t)targetLength < (uint32_t)fLength) {
fLength = targetLength;
} else if((uint32_t)targetLength < (uint32_t)length()) {
setLength(targetLength);
return TRUE;
} else {
return FALSE;
@ -4137,7 +4215,7 @@ UnicodeString::truncate(int32_t targetLength)
inline UnicodeString&
UnicodeString::reverse()
{ return doReverse(0, fLength); }
{ return doReverse(0, length()); }
inline UnicodeString&
UnicodeString::reverse(int32_t start,

View File

@ -118,11 +118,11 @@ operator+ (const UnicodeString &s1, const UnicodeString &s2) {
void
UnicodeString::addRef()
{ umtx_atomic_inc((int32_t *)fArray - 1);}
{ umtx_atomic_inc((int32_t *)fUnion.fFields.fArray - 1);}
int32_t
UnicodeString::removeRef()
{ return umtx_atomic_dec((int32_t *)fArray - 1);}
{ return umtx_atomic_dec((int32_t *)fUnion.fFields.fArray - 1);}
int32_t
UnicodeString::refCount() const
@ -130,7 +130,7 @@ UnicodeString::refCount() const
umtx_lock(NULL);
// Note: without the lock to force a memory barrier, we might see a very
// stale value on some multi-processor systems.
int32_t count = *((int32_t *)fArray - 1);
int32_t count = *((int32_t *)fUnion.fFields.fArray - 1);
umtx_unlock(NULL);
return count;
}
@ -138,7 +138,7 @@ UnicodeString::refCount() const
void
UnicodeString::releaseArray() {
if((fFlags & kRefCounted) && removeRef() == 0) {
uprv_free((int32_t *)fArray - 1);
uprv_free((int32_t *)fUnion.fFields.fArray - 1);
}
}
@ -148,16 +148,12 @@ UnicodeString::releaseArray() {
// Constructors
//========================================
UnicodeString::UnicodeString()
: fLength(0),
fCapacity(US_STACKBUF_SIZE),
fArray(fStackBuffer),
: fShortLength(0),
fFlags(kShortString)
{}
UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count)
: fLength(0),
fCapacity(US_STACKBUF_SIZE),
fArray(0),
: fShortLength(0),
fFlags(0)
{
if(count <= 0 || (uint32_t)c > 0x10ffff) {
@ -170,13 +166,14 @@ UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count)
capacity = length;
}
if(allocate(capacity)) {
UChar *array = getArrayStart();
int32_t i = 0;
// fill the new string with c
if(unitCount == 1) {
// fill with length UChars
while(i < length) {
fArray[i++] = (UChar)c;
array[i++] = (UChar)c;
}
} else {
// get the code units for c
@ -191,40 +188,34 @@ UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count)
while(i < length) {
int32_t unitIdx = 0;
while(unitIdx < unitCount) {
fArray[i++]=units[unitIdx++];
array[i++]=units[unitIdx++];
}
}
}
}
fLength = length;
setLength(length);
}
}
UnicodeString::UnicodeString(UChar ch)
: fLength(1),
fCapacity(US_STACKBUF_SIZE),
fArray(fStackBuffer),
: fShortLength(1),
fFlags(kShortString)
{
fStackBuffer[0] = ch;
fUnion.fStackBuffer[0] = ch;
}
UnicodeString::UnicodeString(UChar32 ch)
: fLength(1),
fCapacity(US_STACKBUF_SIZE),
fArray(fStackBuffer),
: fShortLength(0),
fFlags(kShortString)
{
int32_t i = 0;
UBool isError = FALSE;
U16_APPEND(fStackBuffer, i, US_STACKBUF_SIZE, ch, isError);
fLength = i;
U16_APPEND(fUnion.fStackBuffer, i, US_STACKBUF_SIZE, ch, isError);
fShortLength = (int8_t)i;
}
UnicodeString::UnicodeString(const UChar *text)
: fLength(0),
fCapacity(US_STACKBUF_SIZE),
fArray(fStackBuffer),
: fShortLength(0),
fFlags(kShortString)
{
doReplace(0, 0, text, 0, -1);
@ -232,9 +223,7 @@ UnicodeString::UnicodeString(const UChar *text)
UnicodeString::UnicodeString(const UChar *text,
int32_t textLength)
: fLength(0),
fCapacity(US_STACKBUF_SIZE),
fArray(fStackBuffer),
: fShortLength(0),
fFlags(kShortString)
{
doReplace(0, 0, text, 0, textLength);
@ -243,59 +232,52 @@ UnicodeString::UnicodeString(const UChar *text,
UnicodeString::UnicodeString(UBool isTerminated,
const UChar *text,
int32_t textLength)
: fLength(textLength),
fCapacity(isTerminated ? textLength + 1 : textLength),
fArray((UChar *)text),
: fShortLength(0),
fFlags(kReadonlyAlias)
{
if(text == NULL) {
// treat as an empty string, do not alias
fLength = 0;
fCapacity = US_STACKBUF_SIZE;
fArray = fStackBuffer;
fFlags = kShortString;
setToEmpty();
} else if(textLength < -1 ||
(textLength == -1 && !isTerminated) ||
(textLength >= 0 && isTerminated && text[textLength] != 0)
) {
setToBogus();
} else if(textLength == -1) {
// text is terminated, or else it would have failed the above test
fLength = u_strlen(text);
fCapacity = fLength + 1;
} else {
if(textLength == -1) {
// text is terminated, or else it would have failed the above test
textLength = u_strlen(text);
}
setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
}
}
UnicodeString::UnicodeString(UChar *buff,
int32_t buffLength,
int32_t buffCapacity)
: fLength(buffLength),
fCapacity(buffCapacity),
fArray(buff),
: fShortLength(0),
fFlags(kWritableAlias)
{
if(buff == NULL) {
// treat as an empty string, do not alias
fLength = 0;
fCapacity = US_STACKBUF_SIZE;
fArray = fStackBuffer;
fFlags = kShortString;
setToEmpty();
} else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
setToBogus();
} else if(buffLength == -1) {
// fLength = u_strlen(buff); but do not look beyond buffCapacity
const UChar *p = buff, *limit = buff + buffCapacity;
while(p != limit && *p != 0) {
++p;
} else {
if(buffLength == -1) {
// fLength = u_strlen(buff); but do not look beyond buffCapacity
const UChar *p = buff, *limit = buff + buffCapacity;
while(p != limit && *p != 0) {
++p;
}
buffLength = (int32_t)(p - buff);
}
fLength = (int32_t)(p - buff);
setArray(buff, buffLength, buffCapacity);
}
}
UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant)
: fLength(0),
fCapacity(US_STACKBUF_SIZE),
fArray(fStackBuffer),
: fShortLength(0),
fFlags(kShortString)
{
if(src==NULL) {
@ -306,7 +288,7 @@ UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant)
}
if(cloneArrayIfNeeded(length, length, FALSE)) {
u_charsToUChars(src, getArrayStart(), length);
fLength = length;
setLength(length);
} else {
setToBogus();
}
@ -315,9 +297,7 @@ UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant)
UnicodeString::UnicodeString(const UnicodeString& that)
: Replaceable(),
fLength(0),
fCapacity(US_STACKBUF_SIZE),
fArray(fStackBuffer),
fShortLength(0),
fFlags(kShortString)
{
copyFrom(that);
@ -326,9 +306,7 @@ UnicodeString::UnicodeString(const UnicodeString& that)
UnicodeString::UnicodeString(const UnicodeString& that,
int32_t srcStart)
: Replaceable(),
fLength(0),
fCapacity(US_STACKBUF_SIZE),
fArray(fStackBuffer),
fShortLength(0),
fFlags(kShortString)
{
setTo(that, srcStart);
@ -338,9 +316,7 @@ UnicodeString::UnicodeString(const UnicodeString& that,
int32_t srcStart,
int32_t srcLength)
: Replaceable(),
fLength(0),
fCapacity(US_STACKBUF_SIZE),
fArray(fStackBuffer),
fShortLength(0),
fFlags(kShortString)
{
setTo(that, srcStart, srcLength);
@ -365,8 +341,6 @@ UnicodeString::clone() const {
UBool
UnicodeString::allocate(int32_t capacity) {
if(capacity <= US_STACKBUF_SIZE) {
fArray = fStackBuffer;
fCapacity = US_STACKBUF_SIZE;
fFlags = kShortString;
} else {
// count bytes for the refCounter and the string capacity, and
@ -379,12 +353,13 @@ UnicodeString::allocate(int32_t capacity) {
*array++ = 1;
// have fArray point to the first UChar
fArray = (UChar *)array;
fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR));
fUnion.fFields.fArray = (UChar *)array;
fUnion.fFields.fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR));
fFlags = kLongString;
} else {
fLength = 0;
fCapacity = 0;
fShortLength = 0;
fUnion.fFields.fArray = 0;
fUnion.fFields.fCapacity = 0;
fFlags = kIsBogus;
return FALSE;
}
@ -431,40 +406,38 @@ UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
// delete the current contents
releaseArray();
// we always copy the length
fLength = src.fLength;
if(fLength == 0) {
if(src.isEmpty()) {
// empty string - use the stack buffer
fArray = fStackBuffer;
fCapacity = US_STACKBUF_SIZE;
fFlags = kShortString;
setToEmpty();
return *this;
}
// we always copy the length
int32_t srcLength = src.length();
setLength(srcLength);
// fLength>0 and not an "open" src.getBuffer(minCapacity)
switch(src.fFlags) {
case kShortString:
// short string using the stack buffer, do the same
fArray = fStackBuffer;
fCapacity = US_STACKBUF_SIZE;
fFlags = kShortString;
uprv_memcpy(fStackBuffer, src.fArray, fLength * U_SIZEOF_UCHAR);
uprv_memcpy(fUnion.fStackBuffer, src.fUnion.fStackBuffer, fShortLength * U_SIZEOF_UCHAR);
break;
case kLongString:
// src uses a refCounted string buffer, use that buffer with refCount
// src is const, use a cast - we don't really change it
((UnicodeString &)src).addRef();
// copy all fields, share the reference-counted buffer
fArray = src.fArray;
fCapacity = src.fCapacity;
fUnion.fFields.fArray = src.fUnion.fFields.fArray;
fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
fFlags = src.fFlags;
break;
case kReadonlyAlias:
if(fastCopy) {
// src is a readonly alias, do the same
// -> maintain the readonly alias as such
fArray = src.fArray;
fCapacity = src.fCapacity;
fUnion.fFields.fArray = src.fUnion.fFields.fArray;
fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
fFlags = src.fFlags;
break;
}
@ -472,17 +445,17 @@ UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
// -> allocate a new buffer and copy the contents
case kWritableAlias:
// src is a writable alias; we make a copy of that instead
if(allocate(fLength)) {
uprv_memcpy(fArray, src.fArray, fLength * U_SIZEOF_UCHAR);
if(allocate(srcLength)) {
uprv_memcpy(getArrayStart(), src.getArrayStart(), srcLength * U_SIZEOF_UCHAR);
break;
}
// if there is not enough memory, then fall through to setting to bogus
default:
// if src is bogus, set ourselves to bogus
// do not call setToBogus() here because fArray and fFlags are not consistent here
fArray = 0;
fLength = 0;
fCapacity = 0;
fShortLength = 0;
fUnion.fFields.fArray = 0;
fUnion.fFields.fCapacity = 0;
fFlags = kIsBogus;
break;
}
@ -495,17 +468,25 @@ UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
//========================================
UnicodeString UnicodeString::unescape() const {
UnicodeString result;
for (int32_t i=0; i<length(); ) {
UChar32 c = charAt(i++);
if (c == 0x005C /*'\\'*/) {
c = unescapeAt(i); // advances i
if (c == (UChar32)0xFFFFFFFF) {
UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity
const UChar *array = getBuffer();
int32_t len = length();
int32_t prev = 0;
for (int32_t i=0;;) {
if (i == len) {
result.append(array, prev, len - prev);
break;
}
if (array[i++] == 0x5C /*'\\'*/) {
result.append(array, prev, (i - 1) - prev);
UChar32 c = unescapeAt(i); // advances i
if (c < 0) {
result.remove(); // return empty string
break; // invalid escape sequence
}
result.append(c);
prev = i;
}
result.append(c);
}
return result;
}
@ -616,7 +597,7 @@ UnicodeString::doCompareCodePointOrder(int32_t start,
srcStart = srcLength = 0;
}
int32_t diff = uprv_strCompare(fArray + start, length, srcChars + srcStart, srcLength, FALSE, TRUE);
int32_t diff = uprv_strCompare(getArrayStart() + start, length, srcChars + srcStart, srcLength, FALSE, TRUE);
/* translate the 32-bit result into an 8-bit one */
if(diff!=0) {
return (int8_t)(diff >> 15 | 1);
@ -644,29 +625,31 @@ int32_t
UnicodeString::countChar32(int32_t start, int32_t length) const {
pinIndices(start, length);
// if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL
return u_countChar32(fArray+start, length);
return u_countChar32(getArrayStart()+start, length);
}
UBool
UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {
pinIndices(start, length);
// if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL
return u_strHasMoreChar32Than(fArray+start, length, number);
return u_strHasMoreChar32Than(getArrayStart()+start, length, number);
}
int32_t
UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
// pin index
int32_t len = length();
if(index<0) {
index=0;
} else if(index>fLength) {
index=fLength;
} else if(index>len) {
index=len;
}
const UChar *array = getArrayStart();
if(delta>0) {
UTF_FWD_N(fArray, index, fLength, delta);
UTF_FWD_N(array, index, len, delta);
} else {
UTF_BACK_N(fArray, 0, index, -delta);
UTF_BACK_N(array, 0, index, -delta);
}
return index;
@ -682,26 +665,29 @@ UnicodeString::doExtract(int32_t start,
pinIndices(start, length);
// do not copy anything if we alias dst itself
if(fArray + start != dst + dstStart) {
us_arrayCopy(getArrayStart(), start, dst, dstStart, length);
const UChar *array = getArrayStart();
if(array + start != dst + dstStart) {
us_arrayCopy(array, start, dst, dstStart, length);
}
}
int32_t
UnicodeString::extract(UChar *dest, int32_t destCapacity,
UErrorCode &errorCode) const {
int32_t len = length();
if(U_SUCCESS(errorCode)) {
if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
} else {
if(fLength>0 && fLength<=destCapacity && fArray!=dest) {
uprv_memcpy(dest, fArray, fLength*U_SIZEOF_UCHAR);
const UChar *array = getArrayStart();
if(len>0 && len<=destCapacity && array!=dest) {
uprv_memcpy(dest, array, len*U_SIZEOF_UCHAR);
}
return u_terminateUChars(dest, destCapacity, fLength, &errorCode);
return u_terminateUChars(dest, destCapacity, len, &errorCode);
}
}
return fLength;
return len;
}
int32_t
@ -755,11 +741,12 @@ UnicodeString::indexOf(const UChar *srcChars,
pinIndices(start, length);
// find the first occurrence of the substring
const UChar *match = u_strFindFirst(fArray + start, length, srcChars + srcStart, srcLength);
const UChar *array = getArrayStart();
const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength);
if(match == NULL) {
return -1;
} else {
return (int32_t)(match - fArray);
return (int32_t)(match - array);
}
}
@ -772,11 +759,12 @@ UnicodeString::doIndexOf(UChar c,
pinIndices(start, length);
// find the first occurrence of c
const UChar *match = u_memchr(fArray + start, c, length);
const UChar *array = getArrayStart();
const UChar *match = u_memchr(array + start, c, length);
if(match == NULL) {
return -1;
} else {
return (int32_t)(match - fArray);
return (int32_t)(match - array);
}
}
@ -788,11 +776,12 @@ UnicodeString::doIndexOf(UChar32 c,
pinIndices(start, length);
// find the first occurrence of c
const UChar *match = u_memchr32(fArray + start, c, length);
const UChar *array = getArrayStart();
const UChar *match = u_memchr32(array + start, c, length);
if(match == NULL) {
return -1;
} else {
return (int32_t)(match - fArray);
return (int32_t)(match - array);
}
}
@ -816,11 +805,12 @@ UnicodeString::lastIndexOf(const UChar *srcChars,
pinIndices(start, length);
// find the last occurrence of the substring
const UChar *match = u_strFindLast(fArray + start, length, srcChars + srcStart, srcLength);
const UChar *array = getArrayStart();
const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength);
if(match == NULL) {
return -1;
} else {
return (int32_t)(match - fArray);
return (int32_t)(match - array);
}
}
@ -837,11 +827,12 @@ UnicodeString::doLastIndexOf(UChar c,
pinIndices(start, length);
// find the last occurrence of c
const UChar *match = u_memrchr(fArray + start, c, length);
const UChar *array = getArrayStart();
const UChar *match = u_memrchr(array + start, c, length);
if(match == NULL) {
return -1;
} else {
return (int32_t)(match - fArray);
return (int32_t)(match - array);
}
}
@ -853,11 +844,12 @@ UnicodeString::doLastIndexOf(UChar32 c,
pinIndices(start, length);
// find the last occurrence of c
const UChar *match = u_memrchr32(fArray + start, c, length);
const UChar *array = getArrayStart();
const UChar *match = u_memrchr32(array + start, c, length);
if(match == NULL) {
return -1;
} else {
return (int32_t)(match - fArray);
return (int32_t)(match - array);
}
}
@ -909,8 +901,9 @@ UnicodeString::setToBogus()
{
releaseArray();
fArray = 0;
fCapacity = fLength = 0;
fShortLength = 0;
fUnion.fFields.fArray = 0;
fUnion.fFields.fCapacity = 0;
fFlags = kIsBogus;
}
@ -918,10 +911,7 @@ UnicodeString::setToBogus()
void
UnicodeString::unBogus() {
if(fFlags & kIsBogus) {
fArray = fStackBuffer;
fLength = 0;
fCapacity = US_STACKBUF_SIZE;
fFlags = kShortString;
setToEmpty();
}
}
@ -939,10 +929,7 @@ UnicodeString::setTo(UBool isTerminated,
if(text == NULL) {
// treat as an empty string, do not alias
releaseArray();
fLength = 0;
fCapacity = US_STACKBUF_SIZE;
fArray = fStackBuffer;
fFlags = kShortString;
setToEmpty();
return *this;
}
@ -956,15 +943,11 @@ UnicodeString::setTo(UBool isTerminated,
releaseArray();
fArray = (UChar *)text;
if(textLength != -1) {
fLength = textLength;
fCapacity = isTerminated ? fLength + 1 : fLength;
} else {
if(textLength == -1) {
// text is terminated, or else it would have failed the above test
fLength = u_strlen(text);
fCapacity = fLength + 1;
textLength = u_strlen(text);
}
setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
fFlags = kReadonlyAlias;
return *this;
@ -983,10 +966,7 @@ UnicodeString::setTo(UChar *buffer,
if(buffer == NULL) {
// treat as an empty string, do not alias
releaseArray();
fLength = 0;
fCapacity = US_STACKBUF_SIZE;
fArray = fStackBuffer;
fFlags = kShortString;
setToEmpty();
return *this;
}
@ -1004,9 +984,7 @@ UnicodeString::setTo(UChar *buffer,
releaseArray();
fArray = buffer;
fLength = buffLength;
fCapacity = buffCapacity;
setArray(buffer, buffLength, buffCapacity);
fFlags = kWritableAlias;
return *this;
}
@ -1015,14 +993,15 @@ UnicodeString&
UnicodeString::setCharAt(int32_t offset,
UChar c)
{
if(cloneArrayIfNeeded() && fLength > 0) {
int32_t len = length();
if(cloneArrayIfNeeded() && len > 0) {
if(offset < 0) {
offset = 0;
} else if(offset >= fLength) {
offset = fLength - 1;
} else if(offset >= len) {
offset = len - 1;
}
fArray[offset] = c;
getArrayStart()[offset] = c;
}
return *this;
}
@ -1054,7 +1033,7 @@ UnicodeString::doReplace(int32_t start,
int32_t srcStart,
int32_t srcLength)
{
if(isBogus()) {
if(!isWritable()) {
return *this;
}
@ -1065,20 +1044,45 @@ UnicodeString::doReplace(int32_t start,
srcLength = u_strlen(srcChars + srcStart);
}
int32_t *bufferToDelete = 0;
int32_t oldLength = this->length();
// calculate the size of the string after the replace
int32_t newSize;
// optimize append() onto a large-enough, owned string
if(start >= oldLength) {
newSize = oldLength + srcLength;
if(newSize <= getCapacity() && isBufferWritable()) {
us_arrayCopy(srcChars, srcStart, getArrayStart(), oldLength, srcLength);
setLength(newSize);
return *this;
} else {
// pin the indices to legal values
start = oldLength;
length = 0;
}
} else {
// pin the indices to legal values
pinIndices(start, length);
newSize = oldLength - length + srcLength;
}
// the following may change fArray but will not copy the current contents;
// therefore we need to keep the current fArray
UChar *oldArray = fArray;
int32_t oldLength = fLength;
// pin the indices to legal values
pinIndices(start, length);
// calculate the size of the string after the replace
int32_t newSize = oldLength - length + srcLength;
UChar oldStackBuffer[US_STACKBUF_SIZE];
UChar *oldArray;
if((fFlags&kUsingStackBuffer) && (newSize > US_STACKBUF_SIZE)) {
// copy the stack buffer contents because it will be overwritten with
// fUnion.fFields values
u_memcpy(oldStackBuffer, fUnion.fStackBuffer, oldLength);
oldArray = oldStackBuffer;
} else {
oldArray = getArrayStart();
}
// clone our array and allocate a bigger array if needed
int32_t *bufferToDelete = 0;
if(!cloneArrayIfNeeded(newSize, newSize + (newSize >> 2) + kGrowSize,
FALSE, &bufferToDelete)
) {
@ -1087,23 +1091,24 @@ UnicodeString::doReplace(int32_t start,
// now do the replace
if(fArray != oldArray) {
UChar *newArray = getArrayStart();
if(newArray != oldArray) {
// if fArray changed, then we need to copy everything except what will change
us_arrayCopy(oldArray, 0, fArray, 0, start);
us_arrayCopy(oldArray, 0, newArray, 0, start);
us_arrayCopy(oldArray, start + length,
fArray, start + srcLength,
newArray, start + srcLength,
oldLength - (start + length));
} else if(length != srcLength) {
// fArray did not change; copy only the portion that isn't changing, leaving a hole
us_arrayCopy(oldArray, start + length,
fArray, start + srcLength,
newArray, start + srcLength,
oldLength - (start + length));
}
// now fill in the hole with the new string
us_arrayCopy(srcChars, srcStart, getArrayStart(), start, srcLength);
us_arrayCopy(srcChars, srcStart, newArray, start, srcLength);
fLength = newSize;
setLength(newSize);
// delayed delete in case srcChars == fArray when we started, and
// to keep oldArray alive for the above operations
@ -1159,7 +1164,7 @@ UnicodeString&
UnicodeString::doReverse(int32_t start,
int32_t length)
{
if(fLength <= 1 || !cloneArrayIfNeeded()) {
if(this->length() <= 1 || !cloneArrayIfNeeded()) {
return *this;
}
@ -1167,7 +1172,7 @@ UnicodeString::doReverse(int32_t start,
pinIndices(start, length);
UChar *left = getArrayStart() + start;
UChar *right = getArrayStart() + start + length;
UChar *right = left + length;
UChar swap;
UBool hasSupplementary = FALSE;
@ -1182,7 +1187,7 @@ UnicodeString::doReverse(int32_t start,
UChar swap2;
left = getArrayStart() + start;
right = getArrayStart() + start + length - 1; // -1 so that we can look at *(left+1) if left<right
right = left + length - 1; // -1 so that we can look at *(left+1) if left<right
while(left < right) {
if(UTF_IS_TRAIL(swap = *left) && UTF_IS_LEAD(swap2 = *(left + 1))) {
*left++ = swap2;
@ -1200,18 +1205,20 @@ UBool
UnicodeString::padLeading(int32_t targetLength,
UChar padChar)
{
if(fLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
int32_t oldLength = length();
if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
return FALSE;
} else {
// move contents up by padding width
int32_t start = targetLength - fLength;
us_arrayCopy(fArray, 0, fArray, start, fLength);
UChar *array = getArrayStart();
int32_t start = targetLength - oldLength;
us_arrayCopy(array, 0, array, start, oldLength);
// fill in padding character
while(--start >= 0) {
fArray[start] = padChar;
array[start] = padChar;
}
fLength = targetLength;
setLength(targetLength);
return TRUE;
}
}
@ -1220,15 +1227,17 @@ UBool
UnicodeString::padTrailing(int32_t targetLength,
UChar padChar)
{
if(fLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
int32_t oldLength = length();
if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
return FALSE;
} else {
// fill in padding character
UChar *array = getArrayStart();
int32_t length = targetLength;
while(--length >= fLength) {
fArray[length] = padChar;
while(--length >= oldLength) {
array[length] = padChar;
}
fLength = targetLength;
setLength(targetLength);
return TRUE;
}
}
@ -1241,7 +1250,7 @@ UnicodeString::doHashCode() const
{
/* Delegate hash computation to uhash. This makes UnicodeString
* hashing consistent with UChar* hashing. */
int32_t hashCode = uhash_hashUCharsN(getArrayStart(), fLength);
int32_t hashCode = uhash_hashUCharsN(getArrayStart(), length());
if (hashCode == kInvalidHashCode) {
hashCode = kEmptyHashCode;
}
@ -1256,8 +1265,8 @@ UChar *
UnicodeString::getBuffer(int32_t minCapacity) {
if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {
fFlags|=kOpenGetBuffer;
fLength=0;
return fArray;
fShortLength=0;
return getArrayStart();
} else {
return 0;
}
@ -1267,18 +1276,18 @@ void
UnicodeString::releaseBuffer(int32_t newLength) {
if(fFlags&kOpenGetBuffer && newLength>=-1) {
// set the new fLength
int32_t capacity=getCapacity();
if(newLength==-1) {
// the new length is the string length, capped by fCapacity
const UChar *p=fArray, *limit=fArray+fCapacity;
const UChar *array=getArrayStart(), *p=array, *limit=array+capacity;
while(p<limit && *p!=0) {
++p;
}
fLength=(int32_t)(p-fArray);
} else if(newLength<=fCapacity) {
fLength=newLength;
} else {
fLength=fCapacity;
newLength=(int32_t)(p-array);
} else if(newLength>capacity) {
newLength=capacity;
}
setLength(newLength);
fFlags&=~kOpenGetBuffer;
}
}
@ -1295,13 +1304,13 @@ UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
// default parameters need to be static, therefore
// the defaults are -1 to have convenience defaults
if(newCapacity == -1) {
newCapacity = fCapacity;
newCapacity = getCapacity();
}
// while a getBuffer(minCapacity) is "open",
// prevent any modifications of the string by returning FALSE here
// if the string is bogus, then only an assignment or similar can revive it
if((fFlags&(kOpenGetBuffer|kIsBogus))!=0) {
if(!isWritable()) {
return FALSE;
}
@ -1315,12 +1324,8 @@ UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
if(forceClone ||
fFlags & kBufferIsReadonly ||
fFlags & kRefCounted && refCount() > 1 ||
newCapacity > fCapacity
newCapacity > getCapacity()
) {
// save old values
UChar *array = fArray;
uint16_t flags = fFlags;
// check growCapacity for default value and use of the stack buffer
if(growCapacity == -1) {
growCapacity = newCapacity;
@ -1328,25 +1333,46 @@ UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
growCapacity = US_STACKBUF_SIZE;
}
// save old values
UChar oldStackBuffer[US_STACKBUF_SIZE];
UChar *oldArray;
uint8_t flags = fFlags;
if(flags&kUsingStackBuffer) {
if(doCopyArray && growCapacity > US_STACKBUF_SIZE) {
// copy the stack buffer contents because it will be overwritten with
// fUnion.fFields values
us_arrayCopy(fUnion.fStackBuffer, 0, oldStackBuffer, 0, fShortLength);
oldArray = oldStackBuffer;
} else {
oldArray = 0; // no need to copy from stack buffer to itself
}
} else {
oldArray = fUnion.fFields.fArray;
}
// allocate a new array
if(allocate(growCapacity) ||
newCapacity < growCapacity && allocate(newCapacity)
) {
if(doCopyArray) {
if(doCopyArray && oldArray != 0) {
// copy the contents
// do not copy more than what fits - it may be smaller than before
if(fCapacity < fLength) {
fLength = fCapacity;
int32_t minLength = length();
newCapacity = getCapacity();
if(newCapacity < minLength) {
minLength = newCapacity;
setLength(minLength);
}
us_arrayCopy(array, 0, fArray, 0, fLength);
us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength);
} else {
fLength = 0;
fShortLength = 0;
}
// release the old array
if(flags & kRefCounted) {
// the array is refCounted; decrement and release if 0
int32_t *pRefCount = ((int32_t *)array - 1);
int32_t *pRefCount = ((int32_t *)oldArray - 1);
if(umtx_atomic_dec(pRefCount) == 0) {
if(pBufferToDelete == 0) {
uprv_free(pRefCount);
@ -1359,7 +1385,9 @@ UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
} else {
// not enough memory for growCapacity and not even for the smaller newCapacity
// reset the old values for setToBogus() to release the array
fArray = array;
if(!(flags&kUsingStackBuffer)) {
fUnion.fFields.fArray = oldArray;
}
fFlags = flags;
setToBogus();
return FALSE;

View File

@ -95,7 +95,7 @@ UnicodeString::caseMap(BreakIterator *titleIter,
const char *locale,
uint32_t options,
int32_t toWhichCase) {
if(fLength <= 0) {
if(isEmpty() || !isWritable()) {
// nothing to do
return *this;
}
@ -110,54 +110,62 @@ UnicodeString::caseMap(BreakIterator *titleIter,
}
// We need to allocate a new buffer for the internal string case mapping function.
// This is very similar to how doReplace() below keeps the old array pointer
// This is very similar to how doReplace() keeps the old array pointer
// and deletes the old array itself after it is done.
// In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array.
UChar *oldArray = fArray;
int32_t oldLength = fLength;
int32_t *bufferToDelete = 0;
UChar oldStackBuffer[US_STACKBUF_SIZE];
UChar *oldArray;
int32_t oldLength;
// Make sure that if the string is in fStackBuffer we do not overwrite it!
int32_t capacity;
if(fLength <= US_STACKBUF_SIZE) {
if(fArray == fStackBuffer) {
capacity = 2 * US_STACKBUF_SIZE; // make sure that cloneArrayIfNeeded() allocates a new buffer
} else {
capacity = US_STACKBUF_SIZE;
}
if(fFlags&kUsingStackBuffer) {
// copy the stack buffer contents because it will be overwritten
u_memcpy(oldStackBuffer, fUnion.fStackBuffer, fShortLength);
oldArray = oldStackBuffer;
oldLength = fShortLength;
} else {
capacity = fLength + 20;
oldArray = getArrayStart();
oldLength = length();
}
int32_t capacity;
if(oldLength <= US_STACKBUF_SIZE) {
capacity = US_STACKBUF_SIZE;
} else {
capacity = oldLength + 20;
}
int32_t *bufferToDelete = 0;
if(!cloneArrayIfNeeded(capacity, capacity, FALSE, &bufferToDelete, TRUE)) {
return *this;
}
// Case-map, and if the result is too long, then reallocate and repeat.
int32_t newLength;
do {
errorCode = U_ZERO_ERROR;
if(toWhichCase==TO_LOWER) {
fLength = ustr_toLower(csp, fArray, fCapacity,
oldArray, oldLength,
locale, &errorCode);
newLength = ustr_toLower(csp, getArrayStart(), getCapacity(),
oldArray, oldLength,
locale, &errorCode);
} else if(toWhichCase==TO_UPPER) {
fLength = ustr_toUpper(csp, fArray, fCapacity,
oldArray, oldLength,
locale, &errorCode);
newLength = ustr_toUpper(csp, getArrayStart(), getCapacity(),
oldArray, oldLength,
locale, &errorCode);
} else if(toWhichCase==TO_TITLE) {
#if UCONFIG_NO_BREAK_ITERATION
errorCode=U_UNSUPPORTED_ERROR;
#else
fLength = ustr_toTitle(csp, fArray, fCapacity,
oldArray, oldLength,
(UBreakIterator *)titleIter, locale, options, &errorCode);
newLength = ustr_toTitle(csp, getArrayStart(), getCapacity(),
oldArray, oldLength,
(UBreakIterator *)titleIter, locale, options, &errorCode);
#endif
} else {
fLength = ustr_foldCase(csp, fArray, fCapacity,
oldArray, oldLength,
options,
&errorCode);
newLength = ustr_foldCase(csp, getArrayStart(), getCapacity(),
oldArray, oldLength,
options,
&errorCode);
}
} while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(fLength, fLength, FALSE));
setLength(newLength);
} while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(newLength, newLength, FALSE));
if (bufferToDelete) {
uprv_free(bufferToDelete);

View File

@ -38,9 +38,7 @@ U_NAMESPACE_BEGIN
UnicodeString::UnicodeString(const char *codepageData,
const char *codepage)
: fLength(0),
fCapacity(US_STACKBUF_SIZE),
fArray(fStackBuffer),
: fShortLength(0),
fFlags(kShortString)
{
if(codepageData != 0) {
@ -52,9 +50,7 @@ UnicodeString::UnicodeString(const char *codepageData,
UnicodeString::UnicodeString(const char *codepageData,
int32_t dataLength,
const char *codepage)
: fLength(0),
fCapacity(US_STACKBUF_SIZE),
fArray(fStackBuffer),
: fShortLength(0),
fFlags(kShortString)
{
if(codepageData != 0) {
@ -65,9 +61,7 @@ UnicodeString::UnicodeString(const char *codepageData,
UnicodeString::UnicodeString(const char *src, int32_t srcLength,
UConverter *cnv,
UErrorCode &errorCode)
: fLength(0),
fCapacity(US_STACKBUF_SIZE),
fArray(fStackBuffer),
: fShortLength(0),
fFlags(kShortString)
{
if(U_SUCCESS(errorCode)) {
@ -183,7 +177,7 @@ UnicodeString::extract(char *dest, int32_t destCapacity,
}
// nothing to do?
if(fLength<=0) {
if(isEmpty()) {
return u_terminateChars(dest, destCapacity, 0, &errorCode);
}
@ -201,14 +195,14 @@ UnicodeString::extract(char *dest, int32_t destCapacity,
}
// convert
int32_t length=doExtract(0, fLength, dest, destCapacity, cnv, errorCode);
int32_t len=doExtract(0, length(), dest, destCapacity, cnv, errorCode);
// release the converter
if(isDefaultConverter) {
u_releaseDefaultConverter(cnv);
}
return length;
return len;
}
int32_t
@ -224,7 +218,7 @@ UnicodeString::doExtract(int32_t start, int32_t length,
return 0;
}
const UChar *src=fArray+start, *srcLimit=src+length;
const UChar *src=getArrayStart()+start, *srcLimit=src+length;
char *originalDest=dest;
const char *destLimit;
@ -294,7 +288,7 @@ UnicodeString::doCodepageCreate(const char *codepageData,
// use the "invariant characters" conversion
if(cloneArrayIfNeeded(dataLength, dataLength, FALSE)) {
u_charsToUChars(codepageData, getArrayStart(), dataLength);
fLength = dataLength;
setLength(dataLength);
} else {
setToBogus();
}
@ -328,11 +322,17 @@ UnicodeString::doCodepageCreate(const char *codepageData,
// set up the conversion parameters
const char *mySource = codepageData;
const char *mySourceEnd = mySource + dataLength;
UChar *myTarget;
UChar *array, *myTarget;
// estimate the size needed:
// 1.25 UChar's per source byte should cover most cases
int32_t arraySize = dataLength + (dataLength >> 2);
int32_t arraySize;
if(dataLength <= US_STACKBUF_SIZE) {
// try to use the stack buffer
arraySize = US_STACKBUF_SIZE;
} else {
// 1.25 UChar's per source byte should cover most cases
arraySize = dataLength + (dataLength >> 2);
}
// we do not care about the current contents
UBool doCopyArray = FALSE;
@ -343,12 +343,13 @@ UnicodeString::doCodepageCreate(const char *codepageData,
}
// perform the conversion
myTarget = fArray + fLength;
ucnv_toUnicode(converter, &myTarget, fArray + fCapacity,
array = getArrayStart();
myTarget = array + length();
ucnv_toUnicode(converter, &myTarget, array + getCapacity(),
&mySource, mySourceEnd, 0, TRUE, &status);
// update the conversion parameters
fLength = (int32_t)(myTarget - fArray);
setLength((int32_t)(myTarget - array));
// allocate more space and copy data, if needed
if(status == U_BUFFER_OVERFLOW_ERROR) {
@ -360,7 +361,7 @@ UnicodeString::doCodepageCreate(const char *codepageData,
// estimate the new size needed, larger than before
// try 2 UChar's per remaining source byte
arraySize = (int32_t)(fLength + 2 * (mySourceEnd - mySource));
arraySize = (int32_t)(length() + 2 * (mySourceEnd - mySource));
} else {
break;
}

View File

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 1999-2006, International Business Machines
* Copyright (C) 1999-2007, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -29,8 +29,10 @@ UnicodeString::trim()
return *this;
}
UChar *array = getArrayStart();
UChar32 c;
int32_t i = fLength, length;
int32_t oldLength = this->length();
int32_t i = oldLength, length;
// first cut off trailing white space
for(;;) {
@ -38,13 +40,13 @@ UnicodeString::trim()
if(i <= 0) {
break;
}
UTF_PREV_CHAR(fArray, 0, i, c);
U16_PREV(array, 0, i, c);
if(!(c == 0x20 || u_isWhitespace(c))) {
break;
}
}
if(length < fLength) {
fLength = length;
if(length < oldLength) {
setLength(length);
}
// find leading white space
@ -55,7 +57,7 @@ UnicodeString::trim()
if(i >= length) {
break;
}
UTF_NEXT_CHAR(fArray, i, length, c);
U16_NEXT(array, i, length, c);
if(!(c == 0x20 || u_isWhitespace(c))) {
break;
}