diff --git a/icu4c/source/common/charstr.h b/icu4c/source/common/charstr.h index b655361527..4b86c835f9 100644 --- a/icu4c/source/common/charstr.h +++ b/icu4c/source/common/charstr.h @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (c) 2001-2011, International Business Machines +* Copyright (c) 2001-2012, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Date Name Description @@ -61,9 +61,9 @@ public: */ CharString ©From(const CharString &other, UErrorCode &errorCode); - UBool isEmpty() { return len==0; } + UBool isEmpty() const { return len==0; } int32_t length() const { return len; } - char operator[] (int32_t index) const { return buffer[index]; } + char operator[](int32_t index) const { return buffer[index]; } StringPiece toStringPiece() const { return StringPiece(buffer.getAlias(), len); } const char *data() const { return buffer.getAlias(); } diff --git a/icu4c/source/i18n/sortkey.cpp b/icu4c/source/i18n/sortkey.cpp index 4023b81fb5..bf702cb0de 100644 --- a/icu4c/source/i18n/sortkey.cpp +++ b/icu4c/source/i18n/sortkey.cpp @@ -1,7 +1,7 @@ /* ******************************************************************************* -* Copyright (C) 1996-2011, International Business Machines Corporation and * -* others. All Rights Reserved. * +* Copyright (C) 1996-2012, International Business Machines Corporation and +* others. All Rights Reserved. ******************************************************************************* */ //=============================================================================== @@ -38,77 +38,80 @@ U_NAMESPACE_BEGIN -// A hash code of kInvalidHashCode indicates that the has code needs +// A hash code of kInvalidHashCode indicates that the hash code needs // to be computed. A hash code of kEmptyHashCode is used for empty keys // and for any key whose computed hash code is kInvalidHashCode. -#define kInvalidHashCode ((int32_t)0) -#define kEmptyHashCode ((int32_t)1) +static const int32_t kInvalidHashCode = 0; +static const int32_t kEmptyHashCode = 1; +// The "bogus hash code" replaces a separate fBogus flag. +static const int32_t kBogusHashCode = 2; UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey) CollationKey::CollationKey() - : UObject(), fBogus(FALSE), fCount(0), fCapacity(0), - fHashCode(kEmptyHashCode), fBytes(NULL) + : UObject(), fFlagAndLength(0), + fHashCode(kEmptyHashCode) { } // Create a collation key from a bit array. CollationKey::CollationKey(const uint8_t* newValues, int32_t count) - : UObject(), fBogus(FALSE), fCount(count), fCapacity(count), + : UObject(), fFlagAndLength(count), fHashCode(kInvalidHashCode) { - fBytes = (uint8_t *)uprv_malloc(count); - - if (fBytes == NULL) - { + if (count < 0 || (newValues == NULL && count != 0) || + (count > getCapacity() && reallocate(count, 0) == NULL)) { setToBogus(); return; } - uprv_memcpy(fBytes, newValues, fCount); + if (count > 0) { + uprv_memcpy(getBytes(), newValues, count); + } } CollationKey::CollationKey(const CollationKey& other) -: UObject(other), fBogus(FALSE), fCount(other.fCount), fCapacity(other.fCapacity), - fHashCode(other.fHashCode), fBytes(NULL) + : UObject(other), fFlagAndLength(other.getLength()), + fHashCode(other.fHashCode) { - if (other.fBogus) + if (other.isBogus()) { setToBogus(); return; } - fBytes = (uint8_t *)uprv_malloc(fCapacity); - - if (fBytes == NULL) - { + int32_t length = fFlagAndLength; + if (length > getCapacity() && reallocate(length, 0) == NULL) { setToBogus(); return; } - uprv_memcpy(fBytes, other.fBytes, other.fCount); - if(fCapacity>fCount) { - uprv_memset(fBytes+fCount, 0, fCapacity-fCount); + if (length > 0) { + uprv_memcpy(getBytes(), other.getBytes(), length); } } CollationKey::~CollationKey() { - uprv_free(fBytes); + if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); } } -void CollationKey::adopt(uint8_t *values, int32_t capacity, int32_t count) { - if(fBytes != NULL) { - uprv_free(fBytes); +uint8_t *CollationKey::reallocate(int32_t newCapacity, int32_t length) { + uint8_t *newBytes = static_cast(uprv_malloc(newCapacity)); + if(newBytes == NULL) { return NULL; } + if(length > 0) { + uprv_memcpy(newBytes, getBytes(), length); } - fBytes = values; - fCapacity = capacity; - setLength(count); + if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); } + fUnion.fFields.fBytes = newBytes; + fUnion.fFields.fCapacity = newCapacity; + fFlagAndLength |= 0x80000000; + return newBytes; } void CollationKey::setLength(int32_t newLength) { - fBogus = FALSE; - fCount = newLength; + // U_ASSERT(newLength >= 0 && newLength <= getCapacity()); + fFlagAndLength = (fFlagAndLength & 0x80000000) | newLength; fHashCode = kInvalidHashCode; } @@ -116,8 +119,7 @@ void CollationKey::setLength(int32_t newLength) { CollationKey& CollationKey::reset() { - fCount = 0; - fBogus = FALSE; + fFlagAndLength &= 0x80000000; fHashCode = kEmptyHashCode; return *this; @@ -127,12 +129,8 @@ CollationKey::reset() CollationKey& CollationKey::setToBogus() { - uprv_free(fBytes); - fBytes = NULL; - - fCapacity = 0; - fCount = 0; - fHashCode = kInvalidHashCode; + fFlagAndLength &= 0x80000000; + fHashCode = kBogusHashCode; return *this; } @@ -140,9 +138,9 @@ CollationKey::setToBogus() UBool CollationKey::operator==(const CollationKey& source) const { - return (this->fCount == source.fCount && - (this->fBytes == source.fBytes || - uprv_memcmp(this->fBytes, source.fBytes, this->fCount) == 0)); + return getLength() == source.getLength() && + (this == &source || + uprv_memcmp(getBytes(), source.getBytes(), getLength()) == 0); } const CollationKey& @@ -155,106 +153,26 @@ CollationKey::operator=(const CollationKey& other) return setToBogus(); } - if (other.fBytes != NULL) - { - ensureCapacity(other.fCount); - - if (isBogus()) - { - return *this; - } - - fHashCode = other.fHashCode; - uprv_memcpy(fBytes, other.fBytes, fCount); + int32_t length = other.getLength(); + if (length > getCapacity() && reallocate(length, 0) == NULL) { + return setToBogus(); } - else - { - fCount = 0; - fBogus = FALSE; - fHashCode = kEmptyHashCode; + if (length > 0) { + uprv_memcpy(getBytes(), other.getBytes(), length); } + fFlagAndLength = (fFlagAndLength & 0x80000000) | length; + fHashCode = other.fHashCode; } return *this; } // Bitwise comparison for the collation keys. -// NOTE: this is somewhat messy 'cause we can't count -// on memcmp returning the exact values which match -// Collator::EComparisonResult Collator::EComparisonResult CollationKey::compareTo(const CollationKey& target) const { - uint8_t *src = this->fBytes; - uint8_t *tgt = target.fBytes; - - // are we comparing the same string - if (src == tgt) - return Collator::EQUAL; - - /* - int count = (this->fCount < target.fCount) ? this->fCount : target.fCount; - if (count == 0) - { - // If count is 0, at least one of the keys is empty. - // An empty key is always LESS than a non-empty one - // and EQUAL to another empty - if (this->fCount < target.fCount) - { - return Collator::LESS; - } - - if (this->fCount > target.fCount) - { - return Collator::GREATER; - } - return Collator::EQUAL; - } - */ - - int minLength; - Collator::EComparisonResult result; - - // are we comparing different lengths? - if (this->fCount != target.fCount) { - if (this->fCount < target.fCount) { - minLength = this->fCount; - result = Collator::LESS; - } - else { - minLength = target.fCount; - result = Collator::GREATER; - } - } - else { - minLength = target.fCount; - result = Collator::EQUAL; - } - - if (minLength > 0) { - int diff = uprv_memcmp(src, tgt, minLength); - if (diff > 0) { - return Collator::GREATER; - } - else - if (diff < 0) { - return Collator::LESS; - } - } - - return result; - /* - if (result < 0) - { - return Collator::LESS; - } - - if (result > 0) - { - return Collator::GREATER; - } - return Collator::EQUAL; - */ + UErrorCode errorCode = U_ZERO_ERROR; + return static_cast(compareTo(target, errorCode)); } // Bitwise comparison for the collation keys. @@ -262,30 +180,25 @@ UCollationResult CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const { if(U_SUCCESS(status)) { - uint8_t *src = this->fBytes; - uint8_t *tgt = target.fBytes; + const uint8_t *src = getBytes(); + const uint8_t *tgt = target.getBytes(); // are we comparing the same string if (src == tgt) return UCOL_EQUAL; - int minLength; UCollationResult result; // are we comparing different lengths? - if (this->fCount != target.fCount) { - if (this->fCount < target.fCount) { - minLength = this->fCount; - result = UCOL_LESS; - } - else { - minLength = target.fCount; - result = UCOL_GREATER; - } - } - else { - minLength = target.fCount; - result = UCOL_EQUAL; + int32_t minLength = getLength(); + int32_t targetLength = target.getLength(); + if (minLength < targetLength) { + result = UCOL_LESS; + } else if (minLength == targetLength) { + result = UCOL_EQUAL; + } else { + minLength = targetLength; + result = UCOL_GREATER; } if (minLength > 0) { @@ -305,31 +218,6 @@ CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const } } -CollationKey& -CollationKey::ensureCapacity(int32_t newSize) -{ - if (fCapacity < newSize) - { - uprv_free(fBytes); - - fBytes = (uint8_t *)uprv_malloc(newSize); - - if (fBytes == NULL) - { - return setToBogus(); - } - - uprv_memset(fBytes, 0, fCapacity); - fCapacity = newSize; - } - - fBogus = FALSE; - fCount = newSize; - fHashCode = kInvalidHashCode; - - return *this; -} - #ifdef U_USE_COLLATION_KEY_DEPRECATES // Create a copy of the byte array. uint8_t* @@ -344,13 +232,30 @@ CollationKey::toByteArray(int32_t& count) const else { count = fCount; - uprv_memcpy(result, fBytes, fCount); + if (count > 0) { + uprv_memcpy(result, fBytes, fCount); + } } return result; } #endif +static int32_t +computeHashCode(const uint8_t *key, int32_t length) { + const char *s = reinterpret_cast(key); + int32_t hash; + if (s == NULL || length == 0) { + hash = kEmptyHashCode; + } else { + hash = ustr_hashCharsN(s, length); + if (hash == kInvalidHashCode || hash == kBogusHashCode) { + hash = kEmptyHashCode; + } + } + return hash; +} + int32_t CollationKey::hashCode() const { @@ -362,33 +267,7 @@ CollationKey::hashCode() const if (fHashCode == kInvalidHashCode) { - const char *s = reinterpret_cast(fBytes); - ((CollationKey *)this)->fHashCode = s == NULL ? 0 : ustr_hashCharsN(s, fCount); -#if 0 - // We compute the hash by iterating sparsely over 64 (at most) characters - // spaced evenly through the string. For each character, we multiply the - // previous hash value by a prime number and add the new character in, - // in the manner of a additive linear congruential random number generator, - // thus producing a pseudorandom deterministic value which should be well - // distributed over the output range. [LIU] - const uint8_t *p = fBytes, *limit = fBytes + fCount; - int32_t inc = (fCount >= 256) ? fCount/128 : 2; // inc = max(fSize/64, 1); - int32_t hash = 0; - - while (p < limit) - { - hash = ( hash * 37 ) + ((p[0] << 8) + p[1]); - p += inc; - } - - // If we happened to get kInvalidHashCode, replace it with kEmptyHashCode - if (hash == kInvalidHashCode) - { - hash = kEmptyHashCode; - } - - ((CollationKey *)this)->fHashCode = hash; // cast away const -#endif + fHashCode = computeHashCode(getBytes(), getLength()); } return fHashCode; @@ -400,8 +279,7 @@ U_CAPI int32_t U_EXPORT2 ucol_keyHashCode(const uint8_t *key, int32_t length) { - icu::CollationKey newKey(key, length); - return newKey.hashCode(); + return computeHashCode(key, length); } #endif /* #if !UCONFIG_NO_COLLATION */ diff --git a/icu4c/source/i18n/tblcoll.cpp b/icu4c/source/i18n/tblcoll.cpp index 96ff6875f2..cbe0394930 100644 --- a/icu4c/source/i18n/tblcoll.cpp +++ b/icu4c/source/i18n/tblcoll.cpp @@ -427,29 +427,11 @@ CollationKey& RuleBasedCollator::getCollationKey(const UChar* source, return sortkey.reset(); } - uint8_t *result; - int32_t resultCapacity; - if (sortkey.fCapacity >= (sourceLen * 3)) { - // Try to reuse the CollationKey.fBytes. - result = sortkey.fBytes; - resultCapacity = sortkey.fCapacity; - } else { - result = NULL; - resultCapacity = 0; - } - int32_t resultLen = ucol_getSortKeyWithAllocation(ucollator, source, sourceLen, - result, resultCapacity, &status); + int32_t resultLen = ucol_getCollationKey(ucollator, source, sourceLen, sortkey, status); if (U_SUCCESS(status)) { - if (result == sortkey.fBytes) { - sortkey.setLength(resultLen); - } else { - sortkey.adopt(result, resultCapacity, resultLen); - } + sortkey.setLength(resultLen); } else { - if (result != sortkey.fBytes) { - uprv_free(result); - } sortkey.setToBogus(); } return sortkey; diff --git a/icu4c/source/i18n/ucol.cpp b/icu4c/source/i18n/ucol.cpp index 882a387379..9926135a28 100644 --- a/icu4c/source/i18n/ucol.cpp +++ b/icu4c/source/i18n/ucol.cpp @@ -4274,94 +4274,63 @@ U_NAMESPACE_BEGIN class SortKeyByteSink : public ByteSink { public: - static const uint32_t FILL_ORIGINAL_BUFFER = 1; - static const uint32_t DONT_GROW = 2; - SortKeyByteSink(char *dest, int32_t destCapacity, uint32_t flags=0) - : ownedBuffer_(NULL), buffer_(dest), capacity_(destCapacity), - appended_(0), - fill_(flags & FILL_ORIGINAL_BUFFER), - grow_((flags & DONT_GROW) == 0) { - if (buffer_ == NULL || capacity_ < 0) { - buffer_ = reinterpret_cast(&lastResortByte_); + SortKeyByteSink(char *dest, int32_t destCapacity) + : buffer_(dest), capacity_(destCapacity), + appended_(0) { + if (buffer_ == NULL) { + capacity_ = 0; + } else if(capacity_ < 0) { + buffer_ = NULL; capacity_ = 0; } } - virtual ~SortKeyByteSink(); virtual void Append(const char *bytes, int32_t n); - void Append(const uint8_t *bytes, int32_t n) { Append(reinterpret_cast(bytes), n); } - void Append(uint8_t b) { - if (appended_ < capacity_) { - buffer_[appended_++] = (char)b; - } else { - Append(&b, 1); + void Append(uint32_t b) { + if (appended_ < capacity_ || Resize(1, appended_)) { + buffer_[appended_] = (char)b; } + ++appended_; } - void Append(uint8_t b1, uint8_t b2) { + void Append(uint32_t b1, uint32_t b2) { int32_t a2 = appended_ + 2; - if (a2 <= capacity_) { + if (a2 <= capacity_ || Resize(2, appended_)) { buffer_[appended_] = (char)b1; buffer_[appended_ + 1] = (char)b2; - appended_ = a2; - } else { - char bytes[2] = { (char)b1, (char)b2 }; - Append(bytes, 2); + } else if(appended_ < capacity_) { + buffer_[appended_] = (char)b1; } + appended_ = a2; } - void Append(const SortKeyByteSink &other) { Append(other.buffer_, other.appended_); } virtual char *GetAppendBuffer(int32_t min_capacity, int32_t desired_capacity_hint, char *scratch, int32_t scratch_capacity, int32_t *result_capacity); int32_t NumberOfBytesAppended() const { return appended_; } - uint8_t &LastByte() { - if (buffer_ != NULL && appended_ > 0) { - return reinterpret_cast(buffer_)[appended_ - 1]; - } else { - return lastResortByte_; - } - } - uint8_t *GetLastFewBytes(int32_t n) { - if (buffer_ != NULL && appended_ >= n) { - return reinterpret_cast(buffer_) + appended_ - n; - } else { - return NULL; - } - } - char *GetBuffer() { return buffer_; } - uint8_t *GetUnsignedBuffer() { return reinterpret_cast(buffer_); } - uint8_t *OrphanUnsignedBuffer(int32_t &orphanedCapacity); - UBool IsOk() const { return buffer_ != NULL; } // otherwise out-of-memory + /** @return FALSE if memory allocation failed */ + UBool IsOk() const { return buffer_ != NULL; } -private: - SortKeyByteSink(const SortKeyByteSink &); // copy constructor not implemented - SortKeyByteSink &operator=(const SortKeyByteSink &); // assignment operator not implemented +protected: + virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) = 0; + virtual UBool Resize(int32_t appendCapacity, int32_t length) = 0; - UBool Resize(int32_t appendCapacity, int32_t length); void SetNotOk() { buffer_ = NULL; capacity_ = 0; } - static uint8_t lastResortByte_; // last-resort return value from LastByte() - - char *ownedBuffer_; char *buffer_; int32_t capacity_; int32_t appended_; - UBool fill_; - UBool grow_; + +private: + SortKeyByteSink(const SortKeyByteSink &); // copy constructor not implemented + SortKeyByteSink &operator=(const SortKeyByteSink &); // assignment operator not implemented }; -uint8_t SortKeyByteSink::lastResortByte_ = 0; - -SortKeyByteSink::~SortKeyByteSink() { - uprv_free(ownedBuffer_); -} - void SortKeyByteSink::Append(const char *bytes, int32_t n) { - if (n <= 0) { + if (n <= 0 || bytes == NULL) { return; } int32_t length = appended_; @@ -4369,37 +4338,12 @@ SortKeyByteSink::Append(const char *bytes, int32_t n) { if ((buffer_ + length) == bytes) { return; // the caller used GetAppendBuffer() and wrote the bytes already } - if (buffer_ == NULL) { - return; // allocation failed before already - } int32_t available = capacity_ - length; - if (bytes == NULL) { - // assume that the caller failed to allocate memory - if (fill_) { - if (n > available) { - n = available; - } - uprv_memset(buffer_, 0, n); - } - SetNotOk(); // propagate the out-of-memory error - return; + if (n <= available) { + uprv_memcpy(buffer_ + length, bytes, n); + } else { + AppendBeyondCapacity(bytes, n, length); } - if (n > available) { - if (fill_ && available > 0) { - // Fill the original buffer completely. - uprv_memcpy(buffer_ + length, bytes, available); - bytes += available; - length += available; - n -= available; - available = 0; - } - fill_ = FALSE; - if (!Resize(n, length)) { - SetNotOk(); - return; - } - } - uprv_memcpy(buffer_ + length, bytes, n); } char * @@ -4425,53 +4369,142 @@ SortKeyByteSink::GetAppendBuffer(int32_t min_capacity, } } +class FixedSortKeyByteSink : public SortKeyByteSink { +public: + FixedSortKeyByteSink(char *dest, int32_t destCapacity) + : SortKeyByteSink(dest, destCapacity) {} + +private: + virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length); + virtual UBool Resize(int32_t appendCapacity, int32_t length); +}; + +void +FixedSortKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t /*n*/, int32_t length) { + // buffer_ != NULL && bytes != NULL && n > 0 && appended_ > capacity_ + // Fill the buffer completely. + int32_t available = capacity_ - length; + if (available > 0) { + uprv_memcpy(buffer_ + length, bytes, available); + } +} + UBool -SortKeyByteSink::Resize(int32_t appendCapacity, int32_t length) { - if (!grow_) { - return FALSE; +FixedSortKeyByteSink::Resize(int32_t /*appendCapacity*/, int32_t /*length*/) { + return FALSE; +} + +class CollationKeyByteSink : public SortKeyByteSink { +public: + CollationKeyByteSink(CollationKey &key) + : SortKeyByteSink(reinterpret_cast(key.getBytes()), key.getCapacity()), + key_(key) {} + +private: + virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length); + virtual UBool Resize(int32_t appendCapacity, int32_t length); + + CollationKey &key_; +}; + +void +CollationKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) { + // buffer_ != NULL && bytes != NULL && n > 0 && appended_ > capacity_ + if (Resize(n, length)) { + uprv_memcpy(buffer_ + length, bytes, n); + } +} + +UBool +CollationKeyByteSink::Resize(int32_t appendCapacity, int32_t length) { + if (buffer_ == NULL) { + return FALSE; // allocation failed before already } int32_t newCapacity = 2 * capacity_; int32_t altCapacity = length + 2 * appendCapacity; if (newCapacity < altCapacity) { newCapacity = altCapacity; } - if (newCapacity < 1024) { - newCapacity = 1024; + if (newCapacity < 200) { + newCapacity = 200; } - char *newBuffer = (char *)uprv_malloc(newCapacity); + uint8_t *newBuffer = key_.reallocate(newCapacity, length); if (newBuffer == NULL) { + SetNotOk(); return FALSE; } - uprv_memcpy(newBuffer, buffer_, length); - uprv_free(ownedBuffer_); - ownedBuffer_ = buffer_ = newBuffer; + buffer_ = reinterpret_cast(newBuffer); capacity_ = newCapacity; return TRUE; } -uint8_t * -SortKeyByteSink::OrphanUnsignedBuffer(int32_t &orphanedCapacity) { - if (buffer_ == NULL || appended_ == 0) { - orphanedCapacity = 0; - return NULL; +/** + * uint8_t byte buffer, similar to CharString but simpler. + */ +class SortKeyLevel : public UMemory { +public: + SortKeyLevel() : len(0), ok(TRUE) {} + ~SortKeyLevel() {} + + /** @return FALSE if memory allocation failed */ + UBool isOk() const { return ok; } + UBool isEmpty() const { return len == 0; } + int32_t length() const { return len; } + const uint8_t *data() const { return buffer.getAlias(); } + uint8_t operator[](int32_t index) const { return buffer[index]; } + + void appendByte(uint32_t b); + + void appendTo(ByteSink &sink) const { + sink.Append(reinterpret_cast(buffer.getAlias()), len); } - if (ownedBuffer_ != NULL) { - // orphan & forget the ownedBuffer_ - uint8_t *returnBuffer = reinterpret_cast(ownedBuffer_); - ownedBuffer_ = buffer_ = NULL; - orphanedCapacity = capacity_; - capacity_ = appended_ = 0; - return returnBuffer; + + uint8_t &lastByte() { + U_ASSERT(len > 0); + return buffer[len - 1]; } - // clone the buffer_ - uint8_t *newBuffer = (uint8_t *)uprv_malloc(appended_); - if (newBuffer == NULL) { - orphanedCapacity = 0; - return NULL; + + uint8_t *getLastFewBytes(int32_t n) { + if (ok && len >= n) { + return buffer.getAlias() + len - n; + } else { + return NULL; + } } - uprv_memcpy(newBuffer, buffer_, appended_); - orphanedCapacity = appended_; - return newBuffer; + +private: + MaybeStackArray buffer; + int32_t len; + UBool ok; + + UBool ensureCapacity(int32_t appendCapacity); + + SortKeyLevel(const SortKeyLevel &other); // forbid copying of this class + SortKeyLevel &operator=(const SortKeyLevel &other); // forbid copying of this class +}; + +void SortKeyLevel::appendByte(uint32_t b) { + if(len < buffer.getCapacity() || ensureCapacity(1)) { + buffer[len++] = (uint8_t)b; + } +} + +UBool SortKeyLevel::ensureCapacity(int32_t appendCapacity) { + if(!ok) { + return FALSE; + } + int32_t newCapacity = 2 * buffer.getCapacity(); + int32_t altCapacity = len + 2 * appendCapacity; + if (newCapacity < altCapacity) { + newCapacity = altCapacity; + } + if (newCapacity < 200) { + newCapacity = 200; + } + if(buffer.resize(newCapacity, len)==NULL) { + return ok = FALSE; + } + return TRUE; } U_NAMESPACE_END @@ -4507,33 +4540,31 @@ ucol_getSortKey(const UCollator *coll, /*ucol_calcSortKey(...);*/ /*ucol_calcSortKeySimpleTertiary(...);*/ - SortKeyByteSink sink(reinterpret_cast(result), resultLength, - SortKeyByteSink::FILL_ORIGINAL_BUFFER | SortKeyByteSink::DONT_GROW); + uint8_t noDest[1] = { 0 }; + if(result == NULL) { + // Distinguish pure preflighting from an allocation error. + result = noDest; + resultLength = 0; + } + FixedSortKeyByteSink sink(reinterpret_cast(result), resultLength); coll->sortKeyGen(coll, source, sourceLength, sink, &status); - keySize = sink.NumberOfBytesAppended(); + if(U_SUCCESS(status)) { + keySize = sink.NumberOfBytesAppended(); + } } UTRACE_DATA2(UTRACE_VERBOSE, "Sort Key = %vb", result, keySize); UTRACE_EXIT_STATUS(status); return keySize; } -/* this function is called by the C++ API for sortkey generation */ U_CFUNC int32_t -ucol_getSortKeyWithAllocation(const UCollator *coll, - const UChar *source, int32_t sourceLength, - uint8_t *&result, int32_t &resultCapacity, - UErrorCode *pErrorCode) { - SortKeyByteSink sink(reinterpret_cast(result), resultCapacity); - coll->sortKeyGen(coll, source, sourceLength, sink, pErrorCode); - int32_t resultLen = sink.NumberOfBytesAppended(); - if (U_SUCCESS(*pErrorCode)) { - if (!sink.IsOk()) { - *pErrorCode = U_MEMORY_ALLOCATION_ERROR; - } else if (result != sink.GetUnsignedBuffer()) { - result = sink.OrphanUnsignedBuffer(resultCapacity); - } - } - return resultLen; +ucol_getCollationKey(const UCollator *coll, + const UChar *source, int32_t sourceLength, + CollationKey &key, + UErrorCode &errorCode) { + CollationKeyByteSink sink(key); + coll->sortKeyGen(coll, source, sourceLength, sink, &errorCode); + return sink.NumberOfBytesAppended(); } // Is this primary weight compressible? @@ -4545,16 +4576,16 @@ isCompressible(const UCollator * /*coll*/, uint8_t primary1) { } static -inline void doCaseShift(SortKeyByteSink &cases, uint32_t &caseShift) { +inline void doCaseShift(SortKeyLevel &cases, uint32_t &caseShift) { if (caseShift == 0) { - cases.Append(UCOL_CASE_BYTE_START); + cases.appendByte(UCOL_CASE_BYTE_START); caseShift = UCOL_CASE_SHIFT_START; } } // Packs the secondary buffer when processing French locale. static void -packFrench(uint8_t *secondaries, int32_t secsize, SortKeyByteSink &result) { +packFrench(const uint8_t *secondaries, int32_t secsize, SortKeyByteSink &result) { secondaries += secsize; // We read the secondary-level bytes back to front. uint8_t secondary; int32_t count2 = 0; @@ -4569,16 +4600,16 @@ packFrench(uint8_t *secondaries, int32_t secsize, SortKeyByteSink &result) { if (count2 > 0) { if (secondary > UCOL_COMMON2) { // not necessary for 4th level. while (count2 > UCOL_TOP_COUNT2) { - result.Append((uint8_t)(UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2)); + result.Append(UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2); count2 -= (uint32_t)UCOL_TOP_COUNT2; } - result.Append((uint8_t)(UCOL_COMMON_TOP2 - (count2-1))); + result.Append(UCOL_COMMON_TOP2 - (count2-1)); } else { while (count2 > UCOL_BOT_COUNT2) { - result.Append((uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2)); + result.Append(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2); count2 -= (uint32_t)UCOL_BOT_COUNT2; } - result.Append((uint8_t)(UCOL_COMMON_BOT2 + (count2-1))); + result.Append(UCOL_COMMON_BOT2 + (count2-1)); } count2 = 0; } @@ -4587,10 +4618,10 @@ packFrench(uint8_t *secondaries, int32_t secsize, SortKeyByteSink &result) { } if (count2 > 0) { while (count2 > UCOL_BOT_COUNT2) { - result.Append((uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2)); + result.Append(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2); count2 -= (uint32_t)UCOL_BOT_COUNT2; } - result.Append((uint8_t)(UCOL_COMMON_BOT2 + (count2-1))); + result.Append(UCOL_COMMON_BOT2 + (count2-1)); } } @@ -4608,15 +4639,11 @@ ucol_calcSortKey(const UCollator *coll, return; } - /* Stack allocated buffers for buffers we use */ - char second[UCOL_SECONDARY_MAX_BUFFER], tert[UCOL_TERTIARY_MAX_BUFFER]; - char caseB[UCOL_CASE_MAX_BUFFER], quad[UCOL_QUAD_MAX_BUFFER]; - SortKeyByteSink &primaries = result; - SortKeyByteSink secondaries(second, LENGTHOF(second)); - SortKeyByteSink tertiaries(tert, LENGTHOF(tert)); - SortKeyByteSink cases(caseB, LENGTHOF(caseB)); - SortKeyByteSink quads(quad, LENGTHOF(quad)); + SortKeyLevel secondaries; + SortKeyLevel tertiaries; + SortKeyLevel cases; + SortKeyLevel quads; UnicodeString normSource; @@ -4735,19 +4762,19 @@ ucol_calcSortKey(const UCollator *coll, if(compareQuad == 0) { if(count4 > 0) { while (count4 > UCOL_BOT_COUNT4) { - quads.Append((uint8_t)(UCOL_COMMON_BOT4 + UCOL_BOT_COUNT4)); + quads.appendByte(UCOL_COMMON_BOT4 + UCOL_BOT_COUNT4); count4 -= UCOL_BOT_COUNT4; } - quads.Append((uint8_t)(UCOL_COMMON_BOT4 + (count4-1))); + quads.appendByte(UCOL_COMMON_BOT4 + (count4-1)); count4 = 0; } /* We are dealing with a variable and we're treating them as shifted */ /* This is a shifted ignorable */ if(primary1 != 0) { /* we need to check this since we could be in continuation */ - quads.Append(primary1); + quads.appendByte(primary1); } if(primary2 != 0) { - quads.Append(primary2); + quads.appendByte(primary2); } } wasShifted = TRUE; @@ -4762,7 +4789,7 @@ ucol_calcSortKey(const UCollator *coll, primaries.Append(primary2); } else { if(leadPrimary != 0) { - primaries.Append((uint8_t)((primary1 > leadPrimary) ? UCOL_BYTE_UNSHIFTED_MAX : UCOL_BYTE_UNSHIFTED_MIN)); + primaries.Append((primary1 > leadPrimary) ? UCOL_BYTE_UNSHIFTED_MAX : UCOL_BYTE_UNSHIFTED_MIN); } if(primary2 == UCOL_IGNORABLE) { /* one byter, not compressed */ @@ -4794,20 +4821,20 @@ ucol_calcSortKey(const UCollator *coll, if (count2 > 0) { if (secondary > UCOL_COMMON2) { // not necessary for 4th level. while (count2 > UCOL_TOP_COUNT2) { - secondaries.Append((uint8_t)(UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2)); + secondaries.appendByte(UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2); count2 -= (uint32_t)UCOL_TOP_COUNT2; } - secondaries.Append((uint8_t)(UCOL_COMMON_TOP2 - (count2-1))); + secondaries.appendByte(UCOL_COMMON_TOP2 - (count2-1)); } else { while (count2 > UCOL_BOT_COUNT2) { - secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2)); + secondaries.appendByte(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2); count2 -= (uint32_t)UCOL_BOT_COUNT2; } - secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + (count2-1))); + secondaries.appendByte(UCOL_COMMON_BOT2 + (count2-1)); } count2 = 0; } - secondaries.Append(secondary); + secondaries.appendByte(secondary); } } else { /* Do the special handling for French secondaries */ @@ -4815,7 +4842,7 @@ ucol_calcSortKey(const UCollator *coll, /* abc1c2c3de with french secondaries need to be edc1c2c3ba NOT edc3c2c1ba */ if(notIsContinuation) { if (lastSecondaryLength > 1) { - uint8_t *frenchStartPtr = secondaries.GetLastFewBytes(lastSecondaryLength); + uint8_t *frenchStartPtr = secondaries.getLastFewBytes(lastSecondaryLength); if (frenchStartPtr != NULL) { /* reverse secondaries from frenchStartPtr up to frenchEndPtr */ uint8_t *frenchEndPtr = frenchStartPtr + lastSecondaryLength - 1; @@ -4826,7 +4853,7 @@ ucol_calcSortKey(const UCollator *coll, } else { ++lastSecondaryLength; } - secondaries.Append(secondary); + secondaries.appendByte(secondary); } } @@ -4841,21 +4868,21 @@ ucol_calcSortKey(const UCollator *coll, if(tertiary != 0) { if(coll->caseFirst == UCOL_UPPER_FIRST) { if((caseBits & 0xC0) == 0) { - cases.LastByte() |= 1 << (--caseShift); + cases.lastByte() |= 1 << (--caseShift); } else { - cases.LastByte() |= 0 << (--caseShift); + cases.lastByte() |= 0 << (--caseShift); /* second bit */ doCaseShift(cases, caseShift); - cases.LastByte() |= ((caseBits>>6)&1) << (--caseShift); + cases.lastByte() |= ((caseBits>>6)&1) << (--caseShift); } } else { if((caseBits & 0xC0) == 0) { - cases.LastByte() |= 0 << (--caseShift); + cases.lastByte() |= 0 << (--caseShift); } else { - cases.LastByte() |= 1 << (--caseShift); + cases.lastByte() |= 1 << (--caseShift); /* second bit */ doCaseShift(cases, caseShift); - cases.LastByte() |= ((caseBits>>7)&1) << (--caseShift); + cases.lastByte() |= ((caseBits>>7)&1) << (--caseShift); } } } @@ -4881,20 +4908,20 @@ ucol_calcSortKey(const UCollator *coll, if (count3 > 0) { if ((tertiary > tertiaryCommon)) { while (count3 > coll->tertiaryTopCount) { - tertiaries.Append((uint8_t)(tertiaryTop - coll->tertiaryTopCount)); + tertiaries.appendByte(tertiaryTop - coll->tertiaryTopCount); count3 -= (uint32_t)coll->tertiaryTopCount; } - tertiaries.Append((uint8_t)(tertiaryTop - (count3-1))); + tertiaries.appendByte(tertiaryTop - (count3-1)); } else { while (count3 > coll->tertiaryBottomCount) { - tertiaries.Append((uint8_t)(tertiaryBottom + coll->tertiaryBottomCount)); + tertiaries.appendByte(tertiaryBottom + coll->tertiaryBottomCount); count3 -= (uint32_t)coll->tertiaryBottomCount; } - tertiaries.Append((uint8_t)(tertiaryBottom + (count3-1))); + tertiaries.appendByte(tertiaryBottom + (count3-1)); } count3 = 0; } - tertiaries.Append(tertiary); + tertiaries.appendByte(tertiary); } } @@ -4902,13 +4929,13 @@ ucol_calcSortKey(const UCollator *coll, if(s.flags & UCOL_WAS_HIRAGANA) { // This was Hiragana and we need to note it if(count4>0) { // Close this part while (count4 > UCOL_BOT_COUNT4) { - quads.Append((uint8_t)(UCOL_COMMON_BOT4 + UCOL_BOT_COUNT4)); + quads.appendByte(UCOL_COMMON_BOT4 + UCOL_BOT_COUNT4); count4 -= UCOL_BOT_COUNT4; } - quads.Append((uint8_t)(UCOL_COMMON_BOT4 + (count4-1))); + quads.appendByte(UCOL_COMMON_BOT4 + (count4-1)); count4 = 0; } - quads.Append(UCOL_HIRAGANA_QUAD); // Add the Hiragana + quads.appendByte(UCOL_HIRAGANA_QUAD); // Add the Hiragana } else { // This wasn't Hiragana, so we can continue adding stuff count4++; } @@ -4919,68 +4946,74 @@ ucol_calcSortKey(const UCollator *coll, /* Here, we are generally done with processing */ /* bailing out would not be too productive */ + UBool ok = TRUE; if(U_SUCCESS(*status)) { /* we have done all the CE's, now let's put them together to form a key */ if(compareSec == 0) { if (count2 > 0) { while (count2 > UCOL_BOT_COUNT2) { - secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2)); + secondaries.appendByte(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2); count2 -= (uint32_t)UCOL_BOT_COUNT2; } - secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + (count2-1))); + secondaries.appendByte(UCOL_COMMON_BOT2 + (count2-1)); } result.Append(UCOL_LEVELTERMINATOR); - if(!isFrenchSec || !secondaries.IsOk()) { - result.Append(secondaries); + if(!secondaries.isOk()) { + ok = FALSE; + } else if(!isFrenchSec) { + secondaries.appendTo(result); } else { // If there are any unresolved continuation secondaries, // reverse them here so that we can reverse the whole secondary thing. if (lastSecondaryLength > 1) { - uint8_t *frenchStartPtr = secondaries.GetLastFewBytes(lastSecondaryLength); + uint8_t *frenchStartPtr = secondaries.getLastFewBytes(lastSecondaryLength); if (frenchStartPtr != NULL) { /* reverse secondaries from frenchStartPtr up to frenchEndPtr */ uint8_t *frenchEndPtr = frenchStartPtr + lastSecondaryLength - 1; uprv_ucol_reverse_buffer(uint8_t, frenchStartPtr, frenchEndPtr); } } - packFrench(secondaries.GetUnsignedBuffer(), secondaries.NumberOfBytesAppended(), result); + packFrench(secondaries.data(), secondaries.length(), result); } } if(doCase) { + ok &= cases.isOk(); result.Append(UCOL_LEVELTERMINATOR); - result.Append(cases); + cases.appendTo(result); } if(compareTer == 0) { if (count3 > 0) { if (coll->tertiaryCommon != UCOL_COMMON_BOT3) { while (count3 >= coll->tertiaryTopCount) { - tertiaries.Append((uint8_t)(tertiaryTop - coll->tertiaryTopCount)); + tertiaries.appendByte(tertiaryTop - coll->tertiaryTopCount); count3 -= (uint32_t)coll->tertiaryTopCount; } - tertiaries.Append((uint8_t)(tertiaryTop - count3)); + tertiaries.appendByte(tertiaryTop - count3); } else { while (count3 > coll->tertiaryBottomCount) { - tertiaries.Append((uint8_t)(tertiaryBottom + coll->tertiaryBottomCount)); + tertiaries.appendByte(tertiaryBottom + coll->tertiaryBottomCount); count3 -= (uint32_t)coll->tertiaryBottomCount; } - tertiaries.Append((uint8_t)(tertiaryBottom + (count3-1))); + tertiaries.appendByte(tertiaryBottom + (count3-1)); } } + ok &= tertiaries.isOk(); result.Append(UCOL_LEVELTERMINATOR); - result.Append(tertiaries); + tertiaries.appendTo(result); if(compareQuad == 0/*qShifted == TRUE*/) { if(count4 > 0) { while (count4 > UCOL_BOT_COUNT4) { - quads.Append((uint8_t)(UCOL_COMMON_BOT4 + UCOL_BOT_COUNT4)); + quads.appendByte(UCOL_COMMON_BOT4 + UCOL_BOT_COUNT4); count4 -= UCOL_BOT_COUNT4; } - quads.Append((uint8_t)(UCOL_COMMON_BOT4 + (count4-1))); + quads.appendByte(UCOL_COMMON_BOT4 + (count4-1)); } + ok &= quads.isOk(); result.Append(UCOL_LEVELTERMINATOR); - result.Append(quads); + quads.appendTo(result); } if(compareIdent) { @@ -4993,6 +5026,9 @@ ucol_calcSortKey(const UCollator *coll, /* To avoid memory leak, free the offset buffer if necessary. */ ucol_freeOffsetBuffer(&s); + + ok &= result.IsOk(); + if(!ok && U_SUCCESS(*status)) { *status = U_MEMORY_ALLOCATION_ERROR; } } @@ -5009,12 +5045,9 @@ ucol_calcSortKeySimpleTertiary(const UCollator *coll, return; } - /* Stack allocated buffers for buffers we use */ - char second[UCOL_SECONDARY_MAX_BUFFER], tert[UCOL_TERTIARY_MAX_BUFFER]; - SortKeyByteSink &primaries = result; - SortKeyByteSink secondaries(second, LENGTHOF(second)); - SortKeyByteSink tertiaries(tert, LENGTHOF(tert)); + SortKeyLevel secondaries; + SortKeyLevel tertiaries; UnicodeString normSource; @@ -5096,7 +5129,7 @@ ucol_calcSortKeySimpleTertiary(const UCollator *coll, primaries.Append(primary2); } else { if(leadPrimary != 0) { - primaries.Append((uint8_t)((primary1 > leadPrimary) ? UCOL_BYTE_UNSHIFTED_MAX : UCOL_BYTE_UNSHIFTED_MIN)); + primaries.Append((primary1 > leadPrimary) ? UCOL_BYTE_UNSHIFTED_MAX : UCOL_BYTE_UNSHIFTED_MIN); } if(primary2 == UCOL_IGNORABLE) { /* one byter, not compressed */ @@ -5127,20 +5160,20 @@ ucol_calcSortKeySimpleTertiary(const UCollator *coll, if (count2 > 0) { if (secondary > UCOL_COMMON2) { // not necessary for 4th level. while (count2 > UCOL_TOP_COUNT2) { - secondaries.Append((uint8_t)(UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2)); + secondaries.appendByte(UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2); count2 -= (uint32_t)UCOL_TOP_COUNT2; } - secondaries.Append((uint8_t)(UCOL_COMMON_TOP2 - (count2-1))); + secondaries.appendByte(UCOL_COMMON_TOP2 - (count2-1)); } else { while (count2 > UCOL_BOT_COUNT2) { - secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2)); + secondaries.appendByte(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2); count2 -= (uint32_t)UCOL_BOT_COUNT2; } - secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + (count2-1))); + secondaries.appendByte(UCOL_COMMON_BOT2 + (count2-1)); } count2 = 0; } - secondaries.Append(secondary); + secondaries.appendByte(secondary); } } @@ -5162,53 +5195,56 @@ ucol_calcSortKeySimpleTertiary(const UCollator *coll, if (count3 > 0) { if ((tertiary > tertiaryCommon)) { while (count3 > coll->tertiaryTopCount) { - tertiaries.Append((uint8_t)(tertiaryTop - coll->tertiaryTopCount)); + tertiaries.appendByte(tertiaryTop - coll->tertiaryTopCount); count3 -= (uint32_t)coll->tertiaryTopCount; } - tertiaries.Append((uint8_t)(tertiaryTop - (count3-1))); + tertiaries.appendByte(tertiaryTop - (count3-1)); } else { while (count3 > coll->tertiaryBottomCount) { - tertiaries.Append((uint8_t)(tertiaryBottom + coll->tertiaryBottomCount)); + tertiaries.appendByte(tertiaryBottom + coll->tertiaryBottomCount); count3 -= (uint32_t)coll->tertiaryBottomCount; } - tertiaries.Append((uint8_t)(tertiaryBottom + (count3-1))); + tertiaries.appendByte(tertiaryBottom + (count3-1)); } count3 = 0; } - tertiaries.Append(tertiary); + tertiaries.appendByte(tertiary); } } } + UBool ok = TRUE; if(U_SUCCESS(*status)) { /* we have done all the CE's, now let's put them together to form a key */ if (count2 > 0) { while (count2 > UCOL_BOT_COUNT2) { - secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2)); + secondaries.appendByte(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2); count2 -= (uint32_t)UCOL_BOT_COUNT2; } - secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + (count2-1))); + secondaries.appendByte(UCOL_COMMON_BOT2 + (count2-1)); } + ok &= secondaries.isOk(); result.Append(UCOL_LEVELTERMINATOR); - result.Append(secondaries); + secondaries.appendTo(result); if (count3 > 0) { if (coll->tertiaryCommon != UCOL_COMMON3_NORMAL) { while (count3 >= coll->tertiaryTopCount) { - tertiaries.Append((uint8_t)(tertiaryTop - coll->tertiaryTopCount)); + tertiaries.appendByte(tertiaryTop - coll->tertiaryTopCount); count3 -= (uint32_t)coll->tertiaryTopCount; } - tertiaries.Append((uint8_t)(tertiaryTop - count3)); + tertiaries.appendByte(tertiaryTop - count3); } else { while (count3 > coll->tertiaryBottomCount) { - tertiaries.Append((uint8_t)(tertiaryBottom + coll->tertiaryBottomCount)); + tertiaries.appendByte(tertiaryBottom + coll->tertiaryBottomCount); count3 -= (uint32_t)coll->tertiaryBottomCount; } - tertiaries.Append((uint8_t)(tertiaryBottom + (count3-1))); + tertiaries.appendByte(tertiaryBottom + (count3-1)); } } + ok &= tertiaries.isOk(); result.Append(UCOL_LEVELTERMINATOR); - result.Append(tertiaries); + tertiaries.appendTo(result); result.Append(0); } @@ -5216,9 +5252,8 @@ ucol_calcSortKeySimpleTertiary(const UCollator *coll, /* To avoid memory leak, free the offset buffer if necessary. */ ucol_freeOffsetBuffer(&s); - if (U_SUCCESS(*status) && !result.IsOk()) { - *status = U_BUFFER_OVERFLOW_ERROR; - } + ok &= result.IsOk(); + if(!ok && U_SUCCESS(*status)) { *status = U_MEMORY_ALLOCATION_ERROR; } } static inline diff --git a/icu4c/source/i18n/ucol_imp.h b/icu4c/source/i18n/ucol_imp.h index b54ed04be4..301188accf 100644 --- a/icu4c/source/i18n/ucol_imp.h +++ b/icu4c/source/i18n/ucol_imp.h @@ -205,21 +205,6 @@ /* if it is too small, heap allocation will occur.*/ /* you can change this value if you need memory - it will affect the performance, though, since we're going to malloc */ #define UCOL_MAX_BUFFER 128 -#define UCOL_PRIMARY_MAX_BUFFER 8*UCOL_MAX_BUFFER -#define UCOL_SECONDARY_MAX_BUFFER UCOL_MAX_BUFFER -#define UCOL_TERTIARY_MAX_BUFFER UCOL_MAX_BUFFER -/* -#define UCOL_CASE_MAX_BUFFER UCOL_MAX_BUFFER/4 - -UCOL_CASE_MAX_BUFFER as previously defined above was too small. A single collation element can -generate two caseShift values, and UCOL_CASE_SHIFT_START (=7) caseShift values are compressed into -one byte. UCOL_MAX_BUFFER should effectively be multipled by 2/UCOL_CASE_SHIFT_START (2/7), not 1/4. -Perhaps UCOL_CASE_SHIFT_START used to be 8; then this would have been correct. We should dynamically -define UCOL_CASE_MAX_BUFFER in terms of both UCOL_MAX_BUFFER and UCOL_CASE_SHIFT_START. Since -UCOL_CASE_SHIFT_START is defined lower down, we move the real definition of UCOL_CASE_MAX_BUFFER -after it, further down. -*/ -#define UCOL_QUAD_MAX_BUFFER 2*UCOL_MAX_BUFFER #define UCOL_NORMALIZATION_GROWTH 2 #define UCOL_NORMALIZATION_MAX_BUFFER UCOL_MAX_BUFFER*UCOL_NORMALIZATION_GROWTH @@ -423,15 +408,6 @@ uprv_init_pce(const struct UCollationElements *elems); #define UCOL_CASE_BYTE_START 0x80 #define UCOL_CASE_SHIFT_START 7 -/* -The definition of UCOL_CASE_MAX_BUFFER is moved down here so it can use UCOL_CASE_SHIFT_START. - -A single collation element can generate two caseShift values, and UCOL_CASE_SHIFT_START caseShift -values are compressed into one byte. The UCOL_CASE_MAX_BUFFER should effectively be UCOL_MAX_BUFFER -multipled by 2/UCOL_CASE_SHIFT_START, with suitable rounding up. -*/ -#define UCOL_CASE_MAX_BUFFER (((2*UCOL_MAX_BUFFER) + UCOL_CASE_SHIFT_START - 1)/UCOL_CASE_SHIFT_START) - #define UCOL_IGNORABLE 0 /* get weights from a CE */ @@ -555,16 +531,17 @@ void *ucol_getABuffer(const UCollator *coll, uint32_t size); U_NAMESPACE_BEGIN +class CollationKey; class SortKeyByteSink; U_NAMESPACE_END /* function used by C++ getCollationKey to prevent restarting the calculation */ U_CFUNC int32_t -ucol_getSortKeyWithAllocation(const UCollator *coll, - const UChar *source, int32_t sourceLength, - uint8_t *&result, int32_t &resultCapacity, - UErrorCode *pErrorCode); +ucol_getCollationKey(const UCollator *coll, + const UChar *source, int32_t sourceLength, + icu::CollationKey &key, + UErrorCode &errorCode); typedef void U_CALLCONV SortKeyGenerator(const UCollator *coll, diff --git a/icu4c/source/i18n/unicode/sortkey.h b/icu4c/source/i18n/unicode/sortkey.h index 17b651173b..45c9002ed5 100644 --- a/icu4c/source/i18n/unicode/sortkey.h +++ b/icu4c/source/i18n/unicode/sortkey.h @@ -1,6 +1,6 @@ /* ***************************************************************************** - * Copyright (C) 1996-2011, International Business Machines Corporation and others. + * Copyright (C) 1996-2012, International Business Machines Corporation and others. * All Rights Reserved. ***************************************************************************** * @@ -241,30 +241,27 @@ public: private: /** - * Returns an array of the collation key values as 16-bit integers. - * The caller owns the storage and must delete it. - * @param values Output param of the collation key values. - * @param capacity Size of the values array. - * @param count output parameter of the number of collation key values - * @return a pointer to an array of 16-bit collation key values. - */ - void adopt(uint8_t *values, int32_t capacity, int32_t count); + * Replaces the current bytes buffer with a new one of newCapacity + * and copies length bytes from the old buffer to the new one. + * @return the new buffer, or NULL if the allocation failed + */ + uint8_t *reallocate(int32_t newCapacity, int32_t length); /** * Set a new length for a new sort key in the existing fBytes. */ void setLength(int32_t newLength); - /* - * Creates a collation key with a string. - */ + uint8_t *getBytes() { + return (fFlagAndLength >= 0) ? fUnion.fStackBuffer : fUnion.fFields.fBytes; + } + const uint8_t *getBytes() const { + return (fFlagAndLength >= 0) ? fUnion.fStackBuffer : fUnion.fFields.fBytes; + } + int32_t getCapacity() const { + return (fFlagAndLength >= 0) ? (int32_t)sizeof(fUnion) : fUnion.fFields.fCapacity; + } + int32_t getLength() const { return fFlagAndLength & 0x7fffffff; } - /** - * If this CollationKey has capacity less than newSize, - * its internal capacity will be increased to newSize. - * @param newSize minimum size this CollationKey has to have - * @return this CollationKey - */ - CollationKey& ensureCapacity(int32_t newSize); /** * Set the CollationKey to a "bogus" or invalid state * @return this CollationKey @@ -275,33 +272,42 @@ private: * @return this CollationKey */ CollationKey& reset(void); - + /** * Allow private access to RuleBasedCollator */ friend class RuleBasedCollator; - /** - * Bogus status - */ - UBool fBogus; - /** - * Size of fBytes used to store the sortkey. i.e. up till the - * null-termination. - */ - int32_t fCount; - /** - * Full size of the fBytes - */ - int32_t fCapacity; - /** - * Unique hash value of this CollationKey - */ - int32_t fHashCode; - /** - * Array to store the sortkey - */ - uint8_t* fBytes; + friend class CollationKeyByteSink; + // Class fields. sizeof(CollationKey) is intended to be 48 bytes + // on a machine with 64-bit pointers. + // We use a union to maximize the size of the internal buffer, + // similar to UnicodeString but not as tight and complex. + + // (implicit) *vtable; + /** + * Sort key length and flag. + * Bit 31 is set if the buffer is heap-allocated. + * Bits 30..0 contain the sort key length. + */ + int32_t fFlagAndLength; + /** + * Unique hash value of this CollationKey. + * Special value 2 if the key is bogus. + */ + mutable int32_t fHashCode; + /** + * fUnion provides 32 bytes for the internal buffer or for + * pointer+capacity. + */ + union StackBufferOrFields { + /** fStackBuffer is used iff fFlagAndLength>=0, else fFields is used */ + uint8_t fStackBuffer[32]; + struct { + uint8_t *fBytes; + int32_t fCapacity; + } fFields; + } fUnion; }; inline UBool @@ -313,14 +319,14 @@ CollationKey::operator!=(const CollationKey& other) const inline UBool CollationKey::isBogus() const { - return fBogus; + return fHashCode == 2; // kBogusHashCode } inline const uint8_t* CollationKey::getByteArray(int32_t &count) const { - count = fCount; - return fBytes; + count = getLength(); + return getBytes(); } U_NAMESPACE_END diff --git a/icu4c/source/test/intltest/apicoll.cpp b/icu4c/source/test/intltest/apicoll.cpp index b5dbafaf7c..2b35d2b21f 100644 --- a/icu4c/source/test/intltest/apicoll.cpp +++ b/icu4c/source/test/intltest/apicoll.cpp @@ -559,7 +559,7 @@ CollationAPITest::TestCollationKey(/* char* par */) // bogus key returned here key1Status = U_ILLEGAL_ARGUMENT_ERROR; col->getCollationKey(NULL, 0, sortk1, key1Status); - doAssert(sortk1.getByteArray(length) == NULL && length == 0, + doAssert(sortk1.isBogus() && (sortk1.getByteArray(length), length) == 0, "Error code should return bogus collation key"); key1Status = U_ZERO_ERROR;