ICU-312 jitterbug 136: new UnicodeString memory allocation/aliasing implementation
X-SVN-Rev: 1012
This commit is contained in:
parent
0ebfa05d46
commit
eb3ba827c6
@ -142,7 +142,7 @@ UnicodeConverterCPP::fromUnicodeString(char* target,
|
|||||||
|
|
||||||
|
|
||||||
mySourceLength = source.length();
|
mySourceLength = source.length();
|
||||||
mySource = source.getUChars();
|
mySource = source.getArrayStart();
|
||||||
myTarget = target;
|
myTarget = target;
|
||||||
ucnv_fromUnicode(&myConverter,
|
ucnv_fromUnicode(&myConverter,
|
||||||
&myTarget,
|
&myTarget,
|
||||||
|
@ -13,26 +13,32 @@
|
|||||||
* Simple utility to set output buffer parameters
|
* Simple utility to set output buffer parameters
|
||||||
******************************************************/
|
******************************************************/
|
||||||
void T_fillOutputParams(const UnicodeString* temp,
|
void T_fillOutputParams(const UnicodeString* temp,
|
||||||
UChar* result,
|
UChar* result,
|
||||||
const int32_t resultLength,
|
const int32_t resultLength,
|
||||||
int32_t* resultLengthOut,
|
int32_t* resultLengthOut,
|
||||||
UErrorCode* status)
|
UErrorCode* status)
|
||||||
{
|
{
|
||||||
|
int32_t actual = temp->length();
|
||||||
const int32_t actual = temp->length();
|
|
||||||
const bool_t overflowed = actual >= resultLength;
|
|
||||||
const int32_t returnedSize = uprv_min(actual, resultLength-1);
|
|
||||||
if ((temp->length() < resultLength) && (result != temp->getUChars()) && (returnedSize > 0)) {
|
|
||||||
u_strcpy(result, temp->getUChars());
|
|
||||||
}
|
|
||||||
|
|
||||||
if (resultLength > 0) {
|
if (resultLength > 0) {
|
||||||
result[returnedSize] = 0;
|
// copy the contents; extract() will check if it needs to copy anything at all
|
||||||
}
|
temp->extract(0, resultLength - 1, result, 0);
|
||||||
if (resultLengthOut) {
|
|
||||||
*resultLengthOut = actual;
|
// zero-terminate the result buffer
|
||||||
if (U_SUCCESS(*status) && overflowed) {
|
if (actual < resultLength) {
|
||||||
*status = U_BUFFER_OVERFLOW_ERROR;
|
result[actual] = 0;
|
||||||
|
} else {
|
||||||
|
result[resultLength - 1] = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// set the output length to the actual string length
|
||||||
|
if (resultLengthOut != 0) {
|
||||||
|
*resultLengthOut = actual;
|
||||||
|
}
|
||||||
|
|
||||||
|
// set the error code according to the necessary buffer length
|
||||||
|
if (actual >= resultLength && U_SUCCESS(*status)) {
|
||||||
|
*status = U_BUFFER_OVERFLOW_ERROR;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -23,7 +23,7 @@
|
|||||||
* getLanguagesForCountry()
|
* getLanguagesForCountry()
|
||||||
* 03/16/99 bertrand rehaul.
|
* 03/16/99 bertrand rehaul.
|
||||||
* 07/21/99 stephen Added U_CFUNC setDefault
|
* 07/21/99 stephen Added U_CFUNC setDefault
|
||||||
* 11/09/99 weiv Added const char * getName() const;
|
* 11/09/99 weiv Added const char * getName() const;
|
||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -322,32 +322,7 @@ void
|
|||||||
Locale::setHashCode()
|
Locale::setHashCode()
|
||||||
{
|
{
|
||||||
UnicodeString fullNameUString(language, "");
|
UnicodeString fullNameUString(language, "");
|
||||||
fullNameUString += UnicodeString(country, "");
|
khashCode = fullNameUString.append(UnicodeString(country, "")).append(UnicodeString(variant, "")).hashCode();
|
||||||
fullNameUString += UnicodeString(variant, "");
|
|
||||||
const UChar *key = fullNameUString.getUChars();
|
|
||||||
int32_t len = fullNameUString.length();
|
|
||||||
int32_t hash = 0;
|
|
||||||
const UChar *limit = key + len;
|
|
||||||
int32_t inc = (len >= 128 ? len/64 : 1);
|
|
||||||
|
|
||||||
/*
|
|
||||||
We compute the hash by iterating sparsely over 64 (at most) characters
|
|
||||||
spaced evenly through the string. For each character, we multiply the
|
|
||||||
previous hash value by a prime number and add the new character in,
|
|
||||||
in the manner of a additive linear congruential random number generator,
|
|
||||||
thus producing a pseudorandom deterministic value which should be well
|
|
||||||
distributed over the output range. [LIU]
|
|
||||||
*/
|
|
||||||
|
|
||||||
while(key < limit)
|
|
||||||
{
|
|
||||||
hash = (hash * 37) + (char)*key;
|
|
||||||
key += inc;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(hash == 0) hash = 1;
|
|
||||||
|
|
||||||
khashCode = hash & 0x7FFFFFFF;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -753,13 +728,15 @@ Locale::getLanguagesForCountry(const UnicodeString& country, int32_t& count)
|
|||||||
// lookups.
|
// lookups.
|
||||||
if(ctry2LangMapping == 0) {
|
if(ctry2LangMapping == 0) {
|
||||||
UErrorCode err = U_ZERO_ERROR;
|
UErrorCode err = U_ZERO_ERROR;
|
||||||
UHashtable *temp = uhash_open(uhash_hashUChars, uhash_compareUChars, &err);
|
UHashtable *temp = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, &err);
|
||||||
if (U_FAILURE(err))
|
if (U_FAILURE(err))
|
||||||
{
|
{
|
||||||
count = 0;
|
count = 0;
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uhash_setKeyDeleter(temp, uhash_deleteUnicodeString);
|
||||||
|
|
||||||
int32_t i = 0;
|
int32_t i = 0;
|
||||||
int32_t j;
|
int32_t j;
|
||||||
int32_t count = sizeof(compressedCtry2LangMapping) / sizeof(compressedCtry2LangMapping[0]);
|
int32_t count = sizeof(compressedCtry2LangMapping) / sizeof(compressedCtry2LangMapping[0]);
|
||||||
@ -768,15 +745,15 @@ Locale::getLanguagesForCountry(const UnicodeString& country, int32_t& count)
|
|||||||
compressedCtry2LangMapping.extractBetween(i, i + 2, key);
|
compressedCtry2LangMapping.extractBetween(i, i + 2, key);
|
||||||
i += 2;
|
i += 2;
|
||||||
for(j = i; j < count; j += 2)
|
for(j = i; j < count; j += 2)
|
||||||
if(Unicode::isUpperCase(compressedCtry2LangMapping[j]))
|
if(Unicode::isUpperCase(compressedCtry2LangMapping[j]))
|
||||||
break;
|
break;
|
||||||
UnicodeString compressedValues;
|
UnicodeString compressedValues;
|
||||||
compressedCtry2LangMapping.extractBetween(i, j, compressedValues);
|
compressedCtry2LangMapping.extractBetween(i, j, compressedValues);
|
||||||
UnicodeString *values = new UnicodeString[compressedValues.length() / 2];
|
UnicodeString *values = new UnicodeString[compressedValues.length() / 2];
|
||||||
int32_t valLen = sizeof(values) / sizeof(values[0]);
|
int32_t valLen = sizeof(values) / sizeof(values[0]);
|
||||||
for (int32_t k = 0; k < valLen; ++k)
|
for (int32_t k = 0; k < valLen; ++k)
|
||||||
compressedValues.extractBetween(k * 2, (k * 2) + 2, values[k]);
|
compressedValues.extractBetween(k * 2, (k * 2) + 2, values[k]);
|
||||||
uhash_put(temp, (void*)key.getUChars(), values, &err);
|
uhash_put(temp, new UnicodeString(key), values, &err);
|
||||||
i = j;
|
i = j;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -786,9 +763,8 @@ Locale::getLanguagesForCountry(const UnicodeString& country, int32_t& count)
|
|||||||
else
|
else
|
||||||
ctry2LangMapping = temp;
|
ctry2LangMapping = temp;
|
||||||
}
|
}
|
||||||
|
|
||||||
const UnicodeString *result = (const UnicodeString*)
|
const UnicodeString *result = (const UnicodeString*)uhash_get(ctry2LangMapping, &country);
|
||||||
uhash_get(ctry2LangMapping, country.getUChars());
|
|
||||||
if(result == 0)
|
if(result == 0)
|
||||||
count = 0;
|
count = 0;
|
||||||
else
|
else
|
||||||
|
@ -36,7 +36,7 @@
|
|||||||
/* UnicodeString stuff */
|
/* UnicodeString stuff */
|
||||||
typedef struct UnicodeString UnicodeString;
|
typedef struct UnicodeString UnicodeString;
|
||||||
|
|
||||||
U_CAPI const UChar* T_UnicodeString_getUChars(const UnicodeString *s);
|
U_CFUNC int32_t T_UnicodeString_length(const UnicodeString *s);
|
||||||
|
|
||||||
U_CAPI int32_t
|
U_CAPI int32_t
|
||||||
T_UnicodeString_extract(const UnicodeString *s, char *dst);
|
T_UnicodeString_extract(const UnicodeString *s, char *dst);
|
||||||
@ -1051,7 +1051,7 @@ void _lazyEvaluate_installedLocales()
|
|||||||
|
|
||||||
for (i = 0; i < _installedLocalesCount; i++)
|
for (i = 0; i < _installedLocalesCount; i++)
|
||||||
{
|
{
|
||||||
strSize = u_strlen(T_UnicodeString_getUChars(temp[i]));
|
strSize = T_UnicodeString_length(temp[i]);
|
||||||
|
|
||||||
temp2[i] = (char*) uprv_malloc(sizeof(char) *
|
temp2[i] = (char*) uprv_malloc(sizeof(char) *
|
||||||
(strSize + 1));
|
(strSize + 1));
|
||||||
|
@ -27,12 +27,14 @@
|
|||||||
#include "unicode/ucnv.h"
|
#include "unicode/ucnv.h"
|
||||||
#include "unicode/rep.h"
|
#include "unicode/rep.h"
|
||||||
|
|
||||||
// Size of stack buffer for small strings
|
|
||||||
#define US_STACKBUF_SIZE 10
|
|
||||||
|
|
||||||
class Locale;
|
class Locale;
|
||||||
class UCharReference;
|
class UCharReference;
|
||||||
class UnicodeStringStreamer;
|
class UnicodeStringStreamer;
|
||||||
|
class UnicodeConverterCPP;
|
||||||
|
|
||||||
|
// for unistrm.h
|
||||||
|
class ostream;
|
||||||
|
U_COMMON_API ostream &operator<<(ostream& stream, const UnicodeString& s);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Unicode String literals in C++.
|
* Unicode String literals in C++.
|
||||||
@ -59,9 +61,43 @@ class UnicodeStringStreamer;
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* UnicodeString is a concrete implementation of the abstract class
|
* UnicodeString is a concrete implementation of the abstract class Replaceable.
|
||||||
* UnicodeText. UnicodeString performs codeset conversion from char*
|
* It is a string class that stores Unicode characters directly and provides
|
||||||
* data based on the type of data specified.
|
* similar functionality as the Java string class.
|
||||||
|
*
|
||||||
|
* UnicodeString uses four storage models:
|
||||||
|
* <ol>
|
||||||
|
* <li>Short strings are normally stored inside the UnicodeString object itself.
|
||||||
|
* The object has fields for the "bookkeeping" and a small UChar array.
|
||||||
|
* When the object is copied, then the internal characters are copied
|
||||||
|
* into the destination object.</li>
|
||||||
|
* <li>Longer strings are normally stored in allocated memory.
|
||||||
|
* The allocated UChar array is preceeded by a reference counter.
|
||||||
|
* When the string object is copied, then the allocated buffer is shared by
|
||||||
|
* incrementing the reference counter.</li>
|
||||||
|
* <li>A UnicodeString can be constructed or setTo() such that it aliases a read-only
|
||||||
|
* buffer instead of copying the characters. In this case, the string object
|
||||||
|
* uses this aliased buffer for as long as it is not modified, and it will never
|
||||||
|
* attempt to modify or release the buffer. This has copy-on-write semantics:
|
||||||
|
* When the string object is modified, then the buffer contents is first copied
|
||||||
|
* into writeable memory (inside the object for short strings, or allocated
|
||||||
|
* buffer for longer strings). When a UnicodeString with a read-only alias
|
||||||
|
* is assigned to another UnicodeString, then both string objects will
|
||||||
|
* share the same read-only alias.</li>
|
||||||
|
* <li>A UnicodeString can be constructed or setTo() such that it aliases a writeable
|
||||||
|
* buffer instead of copying the characters. The difference from the above is that
|
||||||
|
* the string object will write through to this aliased buffer for write
|
||||||
|
* operations. Only when the capacity of the buffer is not sufficient is
|
||||||
|
* a new buffer allocated and the contents copied.
|
||||||
|
* An efficient way to get the string contents into the original buffer is
|
||||||
|
* to use the extract(..., UChar *dst, ...) function: It will only copy the
|
||||||
|
* string contents if the dst buffer is different from the buffer of the string
|
||||||
|
* object itself. If a string grows and shrinks during a sequence of operations,
|
||||||
|
* then it will not use the same buffer any more, but may fit into it again.
|
||||||
|
* When a UnicodeString with a writeable alias is assigned to another UnicodeString,
|
||||||
|
* then the contents is always copied. The destination string will not alias
|
||||||
|
* to the buffer that the source string aliases.</li>
|
||||||
|
* </ol>
|
||||||
*/
|
*/
|
||||||
class U_COMMON_API UnicodeString : public Replaceable
|
class U_COMMON_API UnicodeString : public Replaceable
|
||||||
{
|
{
|
||||||
@ -678,6 +714,9 @@ public:
|
|||||||
* Copy the characters in the range
|
* Copy the characters in the range
|
||||||
* [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,
|
* [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,
|
||||||
* beginning at <tt>dstStart</tt>.
|
* beginning at <tt>dstStart</tt>.
|
||||||
|
* If the string aliases to <code>dst</code> itself as an external buffer,
|
||||||
|
* then extract() will not copy the contents.
|
||||||
|
*
|
||||||
* @param start offset of first character which will be copied into the array
|
* @param start offset of first character which will be copied into the array
|
||||||
* @param length the number of characters to extract
|
* @param length the number of characters to extract
|
||||||
* @param dst array in which to copy characters. The length of <tt>dst</tt>
|
* @param dst array in which to copy characters. The length of <tt>dst</tt>
|
||||||
@ -841,23 +880,6 @@ public:
|
|||||||
*/
|
*/
|
||||||
inline UnicodeString& setTo(const UnicodeString& srcText);
|
inline UnicodeString& setTo(const UnicodeString& srcText);
|
||||||
|
|
||||||
/**
|
|
||||||
* Set the characters in the UnicodeString object to the characters
|
|
||||||
* in <TT>srcChars</TT> in the range
|
|
||||||
* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
|
|
||||||
* <TT>srcChars</TT> is not modified.
|
|
||||||
* @param srcChars the source for the new characters
|
|
||||||
* @param srcStart the offset into <TT>srcChars</TT> where new characters
|
|
||||||
* will be obtained
|
|
||||||
* @param srcLength the number of characters in <TT>srcChars</TT> in the
|
|
||||||
* replace string
|
|
||||||
* @return a reference to this
|
|
||||||
* @stable
|
|
||||||
*/
|
|
||||||
inline UnicodeString& setTo(const UChar *srcChars,
|
|
||||||
UTextOffset srcStart,
|
|
||||||
int32_t srcLength);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the characters in the UnicodeString object to the characters
|
* Set the characters in the UnicodeString object to the characters
|
||||||
* in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
|
* in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
|
||||||
@ -879,6 +901,51 @@ public:
|
|||||||
*/
|
*/
|
||||||
UnicodeString& setTo(UChar srcChar);
|
UnicodeString& setTo(UChar srcChar);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor.
|
||||||
|
* The text will be used for the UnicodeString object, but
|
||||||
|
* it will not be released when the UnicodeString is destroyed.
|
||||||
|
* This has copy-on-write semantics:
|
||||||
|
* When the string is modified, then the buffer is first copied into
|
||||||
|
* newly allocated memory.
|
||||||
|
* The aliased buffer is never modified.
|
||||||
|
* In an assignment to another UnicodeString, the text will be aliased again,
|
||||||
|
* so that both strings then alias the same readonly-text.
|
||||||
|
*
|
||||||
|
* @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
|
||||||
|
* This must be true if <code>textLength==-1</code>.
|
||||||
|
* @param text The characters to alias for the UnicodeString.
|
||||||
|
* @param textLength The number of Unicode characters in <code>text</code> to alias.
|
||||||
|
* If -1, then this constructor will determine the length
|
||||||
|
* by calling <code>u_strlen()</code>.
|
||||||
|
* @draft
|
||||||
|
*/
|
||||||
|
UnicodeString &setTo(bool_t isTerminated,
|
||||||
|
const UChar *text,
|
||||||
|
int32_t textLength);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Aliasing setTo() function, analogous to the writeable-aliasing UChar* constructor.
|
||||||
|
* The text will be used for the UnicodeString object, but
|
||||||
|
* it will not be released when the UnicodeString is destroyed.
|
||||||
|
* This has write-through semantics:
|
||||||
|
* For as long as the capacity of the buffer is sufficient, write operations
|
||||||
|
* will directly affect the buffer. When more capacity is necessary, then
|
||||||
|
* a new buffer will be allocated and the contents copied as with regularly
|
||||||
|
* constructed strings.
|
||||||
|
* In an assignment to another UnicodeString, the buffer will be copied.
|
||||||
|
* The extract(UChar *dst) function detects whether the dst pointer is the same
|
||||||
|
* as the string buffer itself and will in this case not copy the contents.
|
||||||
|
*
|
||||||
|
* @param buffer The characters to alias for the UnicodeString.
|
||||||
|
* @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
|
||||||
|
* @param buffCapacity The size of <code>buffer</code> in UChars.
|
||||||
|
* @draft
|
||||||
|
*/
|
||||||
|
UnicodeString &setTo(UChar *buffer,
|
||||||
|
int32_t buffLength,
|
||||||
|
int32_t buffCapacity);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the character at the specified offset to the specified character.
|
* Set the character at the specified offset to the specified character.
|
||||||
* @param offset A valid offset into the text of the character to set
|
* @param offset A valid offset into the text of the character to set
|
||||||
@ -1426,12 +1493,15 @@ public:
|
|||||||
int32_t textLength);
|
int32_t textLength);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Aliasing UChar* constructor.
|
* Readonly-aliasing UChar* constructor.
|
||||||
* The text will be used for the new UnicodeString object, but
|
* The text will be used for the UnicodeString object, but
|
||||||
* it will not be released when the UnicodeString is destroyed.
|
* it will not be released when the UnicodeString is destroyed.
|
||||||
* Be careful not to attempt to modify the contents of the UnicodeString
|
* This has copy-on-write semantics:
|
||||||
* if the text is read-only. Operations that allocate an entirely
|
* When the string is modified, then the buffer is first copied into
|
||||||
* new buffer are harmless.
|
* newly allocated memory.
|
||||||
|
* The aliased buffer is never modified.
|
||||||
|
* In an assignment to another UnicodeString, the text will be aliased again,
|
||||||
|
* so that both strings then alias the same readonly-text.
|
||||||
*
|
*
|
||||||
* @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
|
* @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
|
||||||
* This must be true if <code>textLength==-1</code>.
|
* This must be true if <code>textLength==-1</code>.
|
||||||
@ -1445,6 +1515,26 @@ public:
|
|||||||
UChar *text,
|
UChar *text,
|
||||||
int32_t textLength);
|
int32_t textLength);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writeable-aliasing UChar* constructor.
|
||||||
|
* The text will be used for the UnicodeString object, but
|
||||||
|
* it will not be released when the UnicodeString is destroyed.
|
||||||
|
* This has write-through semantics:
|
||||||
|
* For as long as the capacity of the buffer is sufficient, write operations
|
||||||
|
* will directly affect the buffer. When more capacity is necessary, then
|
||||||
|
* a new buffer will be allocated and the contents copied as with regularly
|
||||||
|
* constructed strings.
|
||||||
|
* In an assignment to another UnicodeString, the buffer will be copied.
|
||||||
|
* The extract(UChar *dst) function detects whether the dst pointer is the same
|
||||||
|
* as the string buffer itself and will in this case not copy the contents.
|
||||||
|
*
|
||||||
|
* @param buffer The characters to alias for the UnicodeString.
|
||||||
|
* @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
|
||||||
|
* @param buffCapacity The size of <code>buffer</code> in UChars.
|
||||||
|
* @draft
|
||||||
|
*/
|
||||||
|
UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* char* constructor.
|
* char* constructor.
|
||||||
* @param codepageData an array of bytes, null-terminated
|
* @param codepageData an array of bytes, null-terminated
|
||||||
@ -1480,7 +1570,7 @@ public:
|
|||||||
* @param that The UnicodeString object to copy.
|
* @param that The UnicodeString object to copy.
|
||||||
* @stable
|
* @stable
|
||||||
*/
|
*/
|
||||||
inline UnicodeString(const UnicodeString& that);
|
UnicodeString(const UnicodeString& that);
|
||||||
|
|
||||||
/** Destructor.
|
/** Destructor.
|
||||||
* @stable
|
* @stable
|
||||||
@ -1519,24 +1609,6 @@ public:
|
|||||||
|
|
||||||
UCharReference operator[] (UTextOffset pos);
|
UCharReference operator[] (UTextOffset pos);
|
||||||
|
|
||||||
// {sfb} remove these later?
|
|
||||||
/* Hack to avoid circular dependencies */
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Convert the characters in this to UPPER CASE following the conventions of
|
|
||||||
* the default locale.
|
|
||||||
* @retrurn A reference to this.
|
|
||||||
*/
|
|
||||||
// UnicodeString& toUpper();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Convert the characters in this to lower case following the conventions of
|
|
||||||
* the default locale.
|
|
||||||
* @retrurn A reference to this.
|
|
||||||
*/
|
|
||||||
// UnicodeString& toLower();
|
|
||||||
|
|
||||||
|
|
||||||
//========================================
|
//========================================
|
||||||
// Implementation methods
|
// Implementation methods
|
||||||
//========================================
|
//========================================
|
||||||
@ -1575,9 +1647,6 @@ private:
|
|||||||
|
|
||||||
inline UChar doCharAt(UTextOffset offset) const;
|
inline UChar doCharAt(UTextOffset offset) const;
|
||||||
|
|
||||||
UnicodeString& doSetCharAt(UTextOffset offset,
|
|
||||||
UChar c);
|
|
||||||
|
|
||||||
UnicodeString& doReplace(UTextOffset start,
|
UnicodeString& doReplace(UTextOffset start,
|
||||||
int32_t length,
|
int32_t length,
|
||||||
const UnicodeString& srcText,
|
const UnicodeString& srcText,
|
||||||
@ -1603,6 +1672,15 @@ private:
|
|||||||
// get the "real" capacity of the array, adjusted for ref count
|
// get the "real" capacity of the array, adjusted for ref count
|
||||||
inline int32_t getCapacity(void) const;
|
inline int32_t getCapacity(void) const;
|
||||||
|
|
||||||
|
// allocate the array; result may be fStackBuffer
|
||||||
|
// sets refCount to 1 if appropriate
|
||||||
|
// sets fArray, fCapacity, and fFlags
|
||||||
|
// returns boolean for success or failure
|
||||||
|
bool_t allocate(int32_t capacity);
|
||||||
|
|
||||||
|
// release the array if owned
|
||||||
|
inline void releaseArray();
|
||||||
|
|
||||||
// utility method to get around lack of exception handling
|
// utility method to get around lack of exception handling
|
||||||
void setToBogus(void);
|
void setToBogus(void);
|
||||||
|
|
||||||
@ -1621,36 +1699,53 @@ private:
|
|||||||
* subset ("invariant characters") of the platform encoding. See utypes.h.
|
* subset ("invariant characters") of the platform encoding. See utypes.h.
|
||||||
*/
|
*/
|
||||||
void doCodepageCreate(const char *codepageData,
|
void doCodepageCreate(const char *codepageData,
|
||||||
int32_t dataLength,
|
int32_t dataLength,
|
||||||
const char *codepage);
|
const char *codepage);
|
||||||
|
|
||||||
// clones array if refCount > 1
|
/*
|
||||||
void cloneArrayIfNeeded(void);
|
* This function is called when write access to the array
|
||||||
|
* is necessary.
|
||||||
|
*
|
||||||
|
* We need to make a copy of the array if
|
||||||
|
* the buffer is read-only, or
|
||||||
|
* the buffer is refCounted (shared), and refCount>1, or
|
||||||
|
* the buffer is too small.
|
||||||
|
*
|
||||||
|
* Return FALSE if memory could not be allocated.
|
||||||
|
*/
|
||||||
|
bool_t cloneArrayIfNeeded(int32_t newCapacity = -1,
|
||||||
|
int32_t growCapacity = -1,
|
||||||
|
bool_t doCopyArray = TRUE,
|
||||||
|
int32_t **pBufferToDelete = 0);
|
||||||
|
|
||||||
// ref counting
|
// ref counting
|
||||||
inline uint16_t addRef(void);
|
inline int32_t addRef(void);
|
||||||
inline uint16_t removeRef(void);
|
inline int32_t removeRef(void);
|
||||||
inline uint16_t refCount(void) const;
|
inline int32_t refCount(void) const;
|
||||||
inline uint16_t setRefCount(uint16_t count);
|
inline int32_t setRefCount(int32_t count);
|
||||||
|
|
||||||
UChar fStackBuffer [ US_STACKBUF_SIZE ]; // buffer for small strings
|
|
||||||
UChar *fArray; // the Unicode data
|
|
||||||
int32_t fLength; // number characters in fArray
|
|
||||||
int32_t fCapacity; // sizeof fArray
|
|
||||||
int32_t fHashCode; // the hash code
|
|
||||||
bool_t fRefCounted; // indicates if we own storage
|
|
||||||
bool_t fBogus; // indicates if an operation failed
|
|
||||||
|
|
||||||
// constants
|
// constants
|
||||||
static const UChar fgInvalidUChar; // invalid UChar index
|
enum {
|
||||||
static const int32_t kGrowSize; // grow size for this buffer
|
US_STACKBUF_SIZE=9, // Size of stack buffer for small strings
|
||||||
static const int32_t kInvalidHashCode; // invalid hash code
|
kInvalidUChar=0xffff, // invalid UChar index
|
||||||
static const int32_t kEmptyHashCode; // hash code for empty string
|
kGrowSize=128, // grow size for this buffer
|
||||||
|
kInvalidHashCode=0, // invalid hash code
|
||||||
|
kEmptyHashCode=1, // hash code for empty string
|
||||||
|
|
||||||
|
// bit flag values for fFlags
|
||||||
|
kIsBogus=1, // this string is bogus, i.e., not valid
|
||||||
|
kUsingStackBuffer=2, // fArray==fStackBuffer
|
||||||
|
kRefCounted=4, // there is a refCount field before the characters in fArray
|
||||||
|
kBufferIsReadonly=8, // do not write to this buffer
|
||||||
|
|
||||||
|
// combined values for convenience
|
||||||
|
kShortString=kUsingStackBuffer,
|
||||||
|
kLongString=kRefCounted,
|
||||||
|
kReadonlyAlias=kBufferIsReadonly,
|
||||||
|
kWriteableAlias=0
|
||||||
|
};
|
||||||
|
|
||||||
// statics
|
// statics
|
||||||
inline static int32_t allocation(int32_t minSize); // allocation algorithm
|
|
||||||
inline static UChar* allocate(int32_t minSize, // allocate buffer >= minSize
|
|
||||||
int32_t& actualSize);
|
|
||||||
|
|
||||||
// default converter cache
|
// default converter cache
|
||||||
static UConverter* getDefaultConverter(UErrorCode& status);
|
static UConverter* getDefaultConverter(UErrorCode& status);
|
||||||
@ -1659,6 +1754,27 @@ private:
|
|||||||
static UConverter *fgDefaultConverter;
|
static UConverter *fgDefaultConverter;
|
||||||
|
|
||||||
friend class UnicodeStringStreamer;
|
friend class UnicodeStringStreamer;
|
||||||
|
friend class UnicodeConverterCPP;
|
||||||
|
friend U_COMMON_API ostream &operator<<(ostream& stream, const UnicodeString& s);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The following are all the class fields that are stored
|
||||||
|
* in each UnicodeString object.
|
||||||
|
* Note that UnicodeString has virtual functions,
|
||||||
|
* therefore there is an implicit vtable pointer
|
||||||
|
* as the first real field.
|
||||||
|
* The fields should be aligned such that no padding is
|
||||||
|
* necessary, mostly by having larger types first.
|
||||||
|
* On 32-bit machines, the size should be 40 bytes,
|
||||||
|
* on 64-bit machines (8-byte pointers), it should be 48 bytes.
|
||||||
|
*/
|
||||||
|
// (implicit) *vtable;
|
||||||
|
UChar *fArray; // the Unicode data
|
||||||
|
int32_t fLength; // number characters in fArray
|
||||||
|
int32_t fCapacity; // sizeof fArray
|
||||||
|
int32_t fHashCode; // the hash code
|
||||||
|
uint16_t fFlags; // bit flags: see constants above
|
||||||
|
UChar fStackBuffer [ US_STACKBUF_SIZE ]; // buffer for small strings
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
@ -1684,19 +1800,11 @@ public:
|
|||||||
/* @deprecated */
|
/* @deprecated */
|
||||||
inline void operator delete(void *location);
|
inline void operator delete(void *location);
|
||||||
|
|
||||||
|
|
||||||
//========================================
|
//========================================
|
||||||
// Non-public API - will be removed!
|
// Non-public API - will be removed!
|
||||||
//========================================
|
//========================================
|
||||||
/* @deprecated */
|
/* @deprecated */
|
||||||
UnicodeString(UChar *buff, int32_t bufLength, int32_t buffCapacity);
|
const UChar* getUChars() const;
|
||||||
/* @deprecated */
|
|
||||||
const UChar* getUChars(void) const;
|
|
||||||
/* @deprecated */
|
|
||||||
inline const UChar* getUniChars(void) const;
|
|
||||||
/* @deprecated */
|
|
||||||
UChar* orphanStorage(void);
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
//========================================
|
//========================================
|
||||||
@ -1716,12 +1824,6 @@ uprv_arrayCopy(const UnicodeString *src, int32_t srcStart,
|
|||||||
//========================================
|
//========================================
|
||||||
// Inline members
|
// Inline members
|
||||||
//========================================
|
//========================================
|
||||||
inline
|
|
||||||
UnicodeString::UnicodeString(const UnicodeString& that)
|
|
||||||
: fArray(fStackBuffer), fLength(0), fCapacity(US_STACKBUF_SIZE),
|
|
||||||
fRefCounted(FALSE), fHashCode(kEmptyHashCode), fBogus(FALSE)
|
|
||||||
{ *this = that; }
|
|
||||||
|
|
||||||
|
|
||||||
//========================================
|
//========================================
|
||||||
// Read-only alias methods
|
// Read-only alias methods
|
||||||
@ -2089,10 +2191,10 @@ UnicodeString::extractBetween(UTextOffset start,
|
|||||||
inline UChar
|
inline UChar
|
||||||
UnicodeString::doCharAt(UTextOffset offset) const
|
UnicodeString::doCharAt(UTextOffset offset) const
|
||||||
{
|
{
|
||||||
if(offset < 0 || offset >= fLength)
|
if(offset < 0 || offset >= fLength) {
|
||||||
return fgInvalidUChar;
|
return kInvalidUChar;
|
||||||
// in ref-counted implementation, first char is ref count
|
}
|
||||||
return fArray[ fRefCounted ? offset + 1 : offset ];
|
return fArray[ offset ];
|
||||||
}
|
}
|
||||||
|
|
||||||
inline UChar
|
inline UChar
|
||||||
@ -2136,11 +2238,13 @@ inline UnicodeString&
|
|||||||
UnicodeString::setTo(const UnicodeString& srcText)
|
UnicodeString::setTo(const UnicodeString& srcText)
|
||||||
{ return doReplace(0, fLength, srcText, 0, srcText.fLength); }
|
{ return doReplace(0, fLength, srcText, 0, srcText.fLength); }
|
||||||
|
|
||||||
|
#if 0
|
||||||
inline UnicodeString&
|
inline UnicodeString&
|
||||||
UnicodeString::setTo(const UChar *srcChars,
|
UnicodeString::setTo(const UChar *srcChars,
|
||||||
UTextOffset srcStart,
|
UTextOffset srcStart,
|
||||||
int32_t srcLength)
|
int32_t srcLength)
|
||||||
{ return doReplace(0, fLength, srcChars, srcStart, srcLength); }
|
{ return doReplace(0, fLength, srcChars, srcStart, srcLength); }
|
||||||
|
#endif
|
||||||
|
|
||||||
inline UnicodeString&
|
inline UnicodeString&
|
||||||
UnicodeString::setTo(const UChar *srcChars,
|
UnicodeString::setTo(const UChar *srcChars,
|
||||||
@ -2311,7 +2415,7 @@ UnicodeString::reverse(UTextOffset start,
|
|||||||
//========================================
|
//========================================
|
||||||
inline bool_t
|
inline bool_t
|
||||||
UnicodeString::isBogus() const
|
UnicodeString::isBogus() const
|
||||||
{ return fBogus; }
|
{ return fFlags & kIsBogus; }
|
||||||
|
|
||||||
|
|
||||||
//========================================
|
//========================================
|
||||||
@ -2320,31 +2424,38 @@ UnicodeString::isBogus() const
|
|||||||
|
|
||||||
inline UChar*
|
inline UChar*
|
||||||
UnicodeString::getArrayStart()
|
UnicodeString::getArrayStart()
|
||||||
{ return (fRefCounted ? fArray + 1 : fArray); }
|
{ return fArray; }
|
||||||
|
|
||||||
inline const UChar*
|
inline const UChar*
|
||||||
UnicodeString::getArrayStart() const
|
UnicodeString::getArrayStart() const
|
||||||
{ return (fRefCounted ? fArray + 1 : fArray); }
|
{ return fArray; }
|
||||||
|
|
||||||
inline int32_t
|
inline int32_t
|
||||||
UnicodeString::getCapacity() const
|
UnicodeString::getCapacity() const
|
||||||
{ return (fRefCounted ? fCapacity - 1 : fCapacity); }
|
{ return fCapacity; }
|
||||||
|
|
||||||
inline uint16_t
|
inline void
|
||||||
|
UnicodeString::releaseArray() {
|
||||||
|
if((fFlags & kRefCounted) && removeRef() == 0) {
|
||||||
|
delete [] ((int32_t *)fArray - 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline int32_t
|
||||||
UnicodeString::addRef()
|
UnicodeString::addRef()
|
||||||
{ return ++(fArray[0]); }
|
{ return ++*((int32_t *)fArray - 1); }
|
||||||
|
|
||||||
inline uint16_t
|
inline int32_t
|
||||||
UnicodeString::removeRef()
|
UnicodeString::removeRef()
|
||||||
{ return --(fArray[0]); }
|
{ return --*((int32_t *)fArray - 1); }
|
||||||
|
|
||||||
inline uint16_t
|
inline int32_t
|
||||||
UnicodeString::refCount() const
|
UnicodeString::refCount() const
|
||||||
{ return fArray[0]; }
|
{ return *((int32_t *)fArray - 1); }
|
||||||
|
|
||||||
inline uint16_t
|
inline int32_t
|
||||||
UnicodeString::setRefCount(uint16_t count)
|
UnicodeString::setRefCount(int32_t count)
|
||||||
{ fRefCounted = TRUE; return (fArray[0] = count); }
|
{ return (*((int32_t *)fArray - 1) = count); }
|
||||||
|
|
||||||
|
|
||||||
// deprecated API - remove later
|
// deprecated API - remove later
|
||||||
@ -2352,10 +2463,6 @@ inline int32_t
|
|||||||
UnicodeString::size() const
|
UnicodeString::size() const
|
||||||
{ return fLength; }
|
{ return fLength; }
|
||||||
|
|
||||||
inline const UChar*
|
|
||||||
UnicodeString::getUniChars() const
|
|
||||||
{ return getUChars(); }
|
|
||||||
|
|
||||||
inline UnicodeString&
|
inline UnicodeString&
|
||||||
UnicodeString::findAndReplace(const UnicodeString& oldText,
|
UnicodeString::findAndReplace(const UnicodeString& oldText,
|
||||||
const UnicodeString& newText,
|
const UnicodeString& newText,
|
||||||
@ -2380,14 +2487,6 @@ UnicodeString::operator delete(void *location)
|
|||||||
//========================================
|
//========================================
|
||||||
// Static members
|
// Static members
|
||||||
//========================================
|
//========================================
|
||||||
inline int32_t
|
|
||||||
UnicodeString::allocation(int32_t minSize)
|
|
||||||
{ return minSize < kGrowSize ? kGrowSize
|
|
||||||
: (minSize * 2 + kGrowSize) & ~(kGrowSize - 1); }
|
|
||||||
|
|
||||||
inline UChar*
|
|
||||||
UnicodeString::allocate(int32_t minSize, int32_t& actualSize)
|
|
||||||
{ actualSize = allocation(minSize); return new UChar[ actualSize ]; }
|
|
||||||
|
|
||||||
//========================================
|
//========================================
|
||||||
// class UCharReference
|
// class UCharReference
|
||||||
@ -2442,6 +2541,3 @@ UCharReference::operator UChar()
|
|||||||
{ return fString->charAt(fPos); }
|
{ return fString->charAt(fPos); }
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -549,17 +549,6 @@ UnicodeStringTest::TestMiscellaneous()
|
|||||||
for (i = 0; i < test2.length(); i++)
|
for (i = 0; i < test2.length(); i++)
|
||||||
if (test2[i] != test4[i])
|
if (test2[i] != test4[i])
|
||||||
errln(UnicodeString("getUChars() failed: strings differ at position ") + i);
|
errln(UnicodeString("getUChars() failed: strings differ at position ") + i);
|
||||||
|
|
||||||
test4 = test1.orphanStorage();
|
|
||||||
|
|
||||||
if (test1.length() != 0)
|
|
||||||
errln("orphanStorage() failed: orphaned string's contents is " + test1);
|
|
||||||
|
|
||||||
for (i = 0; i < test2.length(); i++)
|
|
||||||
if (test2[i] != test4[i])
|
|
||||||
errln(UnicodeString("orphanStorage() failed: strings differ at position ") + i);
|
|
||||||
|
|
||||||
delete (UChar*)test4;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
@ -606,13 +595,9 @@ UnicodeStringTest::TestStackAllocation()
|
|||||||
errln("insert() on stack-allocated UnicodeString didn't work right");
|
errln("insert() on stack-allocated UnicodeString didn't work right");
|
||||||
if (guardWord2 != 0x4DED)
|
if (guardWord2 != 0x4DED)
|
||||||
errln("insert() on stack-allocated UnicodeString overwrote guard word!");
|
errln("insert() on stack-allocated UnicodeString overwrote guard word!");
|
||||||
#if 0
|
|
||||||
// the current implementation will always reallocate the memory
|
|
||||||
// after it was aliased in case it was read-only;
|
|
||||||
// therefore, this test must fail and we don't perform it
|
|
||||||
if (workingBuffer[24] != 0x67)
|
if (workingBuffer[24] != 0x67)
|
||||||
errln("insert() on stack-allocated UnicodeString didn't affect backing store");
|
errln("insert() on stack-allocated UnicodeString didn't affect backing store");
|
||||||
#endif
|
|
||||||
|
|
||||||
*test += " to the aid of their country.";
|
*test += " to the aid of their country.";
|
||||||
if (*test != "Now is the time for all good men to come to the aid of their country.")
|
if (*test != "Now is the time for all good men to come to the aid of their country.")
|
||||||
@ -624,9 +609,32 @@ UnicodeStringTest::TestStackAllocation()
|
|||||||
if (*test != "ha!")
|
if (*test != "ha!")
|
||||||
errln("Assignment to stack-allocated UnicodeString didn't work");
|
errln("Assignment to stack-allocated UnicodeString didn't work");
|
||||||
if (workingBuffer[0] != 0x4e)
|
if (workingBuffer[0] != 0x4e)
|
||||||
errln("Change to UnicodeString after overflow are stil affecting original buffer");
|
errln("Change to UnicodeString after overflow are still affecting original buffer");
|
||||||
if (guardWord2 != 0x4DED)
|
if (guardWord2 != 0x4DED)
|
||||||
errln("Change to UnicodeString after overflow overwrote guard word!");
|
errln("Change to UnicodeString after overflow overwrote guard word!");
|
||||||
|
|
||||||
|
// test read-only aliasing with setTo()
|
||||||
|
workingBuffer[0] = 0x20ac;
|
||||||
|
workingBuffer[1] = 0x125;
|
||||||
|
workingBuffer[2] = 0;
|
||||||
|
test->setTo(TRUE, workingBuffer, 2);
|
||||||
|
if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x125) {
|
||||||
|
errln("UnicodeString.setTo(readonly alias) does not alias correctly");
|
||||||
|
}
|
||||||
|
workingBuffer[1] = 0x109;
|
||||||
|
if(test->charAt(1) != 0x109) {
|
||||||
|
errln("UnicodeString.setTo(readonly alias) made a copy: did not see change in buffer");
|
||||||
|
}
|
||||||
|
|
||||||
|
test->setTo(TRUE, workingBuffer, -1);
|
||||||
|
if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x109) {
|
||||||
|
errln("UnicodeString.setTo(readonly alias, length -1) does not alias correctly");
|
||||||
|
}
|
||||||
|
|
||||||
|
test->setTo(FALSE, workingBuffer, -1);
|
||||||
|
if(!test->isBogus()) {
|
||||||
|
errln("UnicodeString.setTo(unterminated readonly alias, length -1) does not result in isBogus()");
|
||||||
|
}
|
||||||
|
|
||||||
delete test;
|
delete test;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user