ICU-9556 CollationKey with internal array; cleaner more understandable ByteSink code and usage in calcSortKey
X-SVN-Rev: 32408
This commit is contained in:
parent
d455b9984e
commit
44d515a063
@ -1,6 +1,6 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2001-2011, International Business Machines
|
||||
* Copyright (c) 2001-2012, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
@ -61,9 +61,9 @@ public:
|
||||
*/
|
||||
CharString ©From(const CharString &other, UErrorCode &errorCode);
|
||||
|
||||
UBool isEmpty() { return len==0; }
|
||||
UBool isEmpty() const { return len==0; }
|
||||
int32_t length() const { return len; }
|
||||
char operator[] (int32_t index) const { return buffer[index]; }
|
||||
char operator[](int32_t index) const { return buffer[index]; }
|
||||
StringPiece toStringPiece() const { return StringPiece(buffer.getAlias(), len); }
|
||||
|
||||
const char *data() const { return buffer.getAlias(); }
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2011, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
* Copyright (C) 1996-2012, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
//===============================================================================
|
||||
@ -38,77 +38,80 @@
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
// A hash code of kInvalidHashCode indicates that the has code needs
|
||||
// A hash code of kInvalidHashCode indicates that the hash code needs
|
||||
// to be computed. A hash code of kEmptyHashCode is used for empty keys
|
||||
// and for any key whose computed hash code is kInvalidHashCode.
|
||||
#define kInvalidHashCode ((int32_t)0)
|
||||
#define kEmptyHashCode ((int32_t)1)
|
||||
static const int32_t kInvalidHashCode = 0;
|
||||
static const int32_t kEmptyHashCode = 1;
|
||||
// The "bogus hash code" replaces a separate fBogus flag.
|
||||
static const int32_t kBogusHashCode = 2;
|
||||
|
||||
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey)
|
||||
|
||||
CollationKey::CollationKey()
|
||||
: UObject(), fBogus(FALSE), fCount(0), fCapacity(0),
|
||||
fHashCode(kEmptyHashCode), fBytes(NULL)
|
||||
: UObject(), fFlagAndLength(0),
|
||||
fHashCode(kEmptyHashCode)
|
||||
{
|
||||
}
|
||||
|
||||
// Create a collation key from a bit array.
|
||||
CollationKey::CollationKey(const uint8_t* newValues, int32_t count)
|
||||
: UObject(), fBogus(FALSE), fCount(count), fCapacity(count),
|
||||
: UObject(), fFlagAndLength(count),
|
||||
fHashCode(kInvalidHashCode)
|
||||
{
|
||||
fBytes = (uint8_t *)uprv_malloc(count);
|
||||
|
||||
if (fBytes == NULL)
|
||||
{
|
||||
if (count < 0 || (newValues == NULL && count != 0) ||
|
||||
(count > getCapacity() && reallocate(count, 0) == NULL)) {
|
||||
setToBogus();
|
||||
return;
|
||||
}
|
||||
|
||||
uprv_memcpy(fBytes, newValues, fCount);
|
||||
if (count > 0) {
|
||||
uprv_memcpy(getBytes(), newValues, count);
|
||||
}
|
||||
}
|
||||
|
||||
CollationKey::CollationKey(const CollationKey& other)
|
||||
: UObject(other), fBogus(FALSE), fCount(other.fCount), fCapacity(other.fCapacity),
|
||||
fHashCode(other.fHashCode), fBytes(NULL)
|
||||
: UObject(other), fFlagAndLength(other.getLength()),
|
||||
fHashCode(other.fHashCode)
|
||||
{
|
||||
if (other.fBogus)
|
||||
if (other.isBogus())
|
||||
{
|
||||
setToBogus();
|
||||
return;
|
||||
}
|
||||
|
||||
fBytes = (uint8_t *)uprv_malloc(fCapacity);
|
||||
|
||||
if (fBytes == NULL)
|
||||
{
|
||||
int32_t length = fFlagAndLength;
|
||||
if (length > getCapacity() && reallocate(length, 0) == NULL) {
|
||||
setToBogus();
|
||||
return;
|
||||
}
|
||||
|
||||
uprv_memcpy(fBytes, other.fBytes, other.fCount);
|
||||
if(fCapacity>fCount) {
|
||||
uprv_memset(fBytes+fCount, 0, fCapacity-fCount);
|
||||
if (length > 0) {
|
||||
uprv_memcpy(getBytes(), other.getBytes(), length);
|
||||
}
|
||||
}
|
||||
|
||||
CollationKey::~CollationKey()
|
||||
{
|
||||
uprv_free(fBytes);
|
||||
if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); }
|
||||
}
|
||||
|
||||
void CollationKey::adopt(uint8_t *values, int32_t capacity, int32_t count) {
|
||||
if(fBytes != NULL) {
|
||||
uprv_free(fBytes);
|
||||
uint8_t *CollationKey::reallocate(int32_t newCapacity, int32_t length) {
|
||||
uint8_t *newBytes = static_cast<uint8_t *>(uprv_malloc(newCapacity));
|
||||
if(newBytes == NULL) { return NULL; }
|
||||
if(length > 0) {
|
||||
uprv_memcpy(newBytes, getBytes(), length);
|
||||
}
|
||||
fBytes = values;
|
||||
fCapacity = capacity;
|
||||
setLength(count);
|
||||
if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); }
|
||||
fUnion.fFields.fBytes = newBytes;
|
||||
fUnion.fFields.fCapacity = newCapacity;
|
||||
fFlagAndLength |= 0x80000000;
|
||||
return newBytes;
|
||||
}
|
||||
|
||||
void CollationKey::setLength(int32_t newLength) {
|
||||
fBogus = FALSE;
|
||||
fCount = newLength;
|
||||
// U_ASSERT(newLength >= 0 && newLength <= getCapacity());
|
||||
fFlagAndLength = (fFlagAndLength & 0x80000000) | newLength;
|
||||
fHashCode = kInvalidHashCode;
|
||||
}
|
||||
|
||||
@ -116,8 +119,7 @@ void CollationKey::setLength(int32_t newLength) {
|
||||
CollationKey&
|
||||
CollationKey::reset()
|
||||
{
|
||||
fCount = 0;
|
||||
fBogus = FALSE;
|
||||
fFlagAndLength &= 0x80000000;
|
||||
fHashCode = kEmptyHashCode;
|
||||
|
||||
return *this;
|
||||
@ -127,12 +129,8 @@ CollationKey::reset()
|
||||
CollationKey&
|
||||
CollationKey::setToBogus()
|
||||
{
|
||||
uprv_free(fBytes);
|
||||
fBytes = NULL;
|
||||
|
||||
fCapacity = 0;
|
||||
fCount = 0;
|
||||
fHashCode = kInvalidHashCode;
|
||||
fFlagAndLength &= 0x80000000;
|
||||
fHashCode = kBogusHashCode;
|
||||
|
||||
return *this;
|
||||
}
|
||||
@ -140,9 +138,9 @@ CollationKey::setToBogus()
|
||||
UBool
|
||||
CollationKey::operator==(const CollationKey& source) const
|
||||
{
|
||||
return (this->fCount == source.fCount &&
|
||||
(this->fBytes == source.fBytes ||
|
||||
uprv_memcmp(this->fBytes, source.fBytes, this->fCount) == 0));
|
||||
return getLength() == source.getLength() &&
|
||||
(this == &source ||
|
||||
uprv_memcmp(getBytes(), source.getBytes(), getLength()) == 0);
|
||||
}
|
||||
|
||||
const CollationKey&
|
||||
@ -155,106 +153,26 @@ CollationKey::operator=(const CollationKey& other)
|
||||
return setToBogus();
|
||||
}
|
||||
|
||||
if (other.fBytes != NULL)
|
||||
{
|
||||
ensureCapacity(other.fCount);
|
||||
|
||||
if (isBogus())
|
||||
{
|
||||
return *this;
|
||||
}
|
||||
|
||||
fHashCode = other.fHashCode;
|
||||
uprv_memcpy(fBytes, other.fBytes, fCount);
|
||||
int32_t length = other.getLength();
|
||||
if (length > getCapacity() && reallocate(length, 0) == NULL) {
|
||||
return setToBogus();
|
||||
}
|
||||
else
|
||||
{
|
||||
fCount = 0;
|
||||
fBogus = FALSE;
|
||||
fHashCode = kEmptyHashCode;
|
||||
if (length > 0) {
|
||||
uprv_memcpy(getBytes(), other.getBytes(), length);
|
||||
}
|
||||
fFlagAndLength = (fFlagAndLength & 0x80000000) | length;
|
||||
fHashCode = other.fHashCode;
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Bitwise comparison for the collation keys.
|
||||
// NOTE: this is somewhat messy 'cause we can't count
|
||||
// on memcmp returning the exact values which match
|
||||
// Collator::EComparisonResult
|
||||
Collator::EComparisonResult
|
||||
CollationKey::compareTo(const CollationKey& target) const
|
||||
{
|
||||
uint8_t *src = this->fBytes;
|
||||
uint8_t *tgt = target.fBytes;
|
||||
|
||||
// are we comparing the same string
|
||||
if (src == tgt)
|
||||
return Collator::EQUAL;
|
||||
|
||||
/*
|
||||
int count = (this->fCount < target.fCount) ? this->fCount : target.fCount;
|
||||
if (count == 0)
|
||||
{
|
||||
// If count is 0, at least one of the keys is empty.
|
||||
// An empty key is always LESS than a non-empty one
|
||||
// and EQUAL to another empty
|
||||
if (this->fCount < target.fCount)
|
||||
{
|
||||
return Collator::LESS;
|
||||
}
|
||||
|
||||
if (this->fCount > target.fCount)
|
||||
{
|
||||
return Collator::GREATER;
|
||||
}
|
||||
return Collator::EQUAL;
|
||||
}
|
||||
*/
|
||||
|
||||
int minLength;
|
||||
Collator::EComparisonResult result;
|
||||
|
||||
// are we comparing different lengths?
|
||||
if (this->fCount != target.fCount) {
|
||||
if (this->fCount < target.fCount) {
|
||||
minLength = this->fCount;
|
||||
result = Collator::LESS;
|
||||
}
|
||||
else {
|
||||
minLength = target.fCount;
|
||||
result = Collator::GREATER;
|
||||
}
|
||||
}
|
||||
else {
|
||||
minLength = target.fCount;
|
||||
result = Collator::EQUAL;
|
||||
}
|
||||
|
||||
if (minLength > 0) {
|
||||
int diff = uprv_memcmp(src, tgt, minLength);
|
||||
if (diff > 0) {
|
||||
return Collator::GREATER;
|
||||
}
|
||||
else
|
||||
if (diff < 0) {
|
||||
return Collator::LESS;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
/*
|
||||
if (result < 0)
|
||||
{
|
||||
return Collator::LESS;
|
||||
}
|
||||
|
||||
if (result > 0)
|
||||
{
|
||||
return Collator::GREATER;
|
||||
}
|
||||
return Collator::EQUAL;
|
||||
*/
|
||||
UErrorCode errorCode = U_ZERO_ERROR;
|
||||
return static_cast<Collator::EComparisonResult>(compareTo(target, errorCode));
|
||||
}
|
||||
|
||||
// Bitwise comparison for the collation keys.
|
||||
@ -262,30 +180,25 @@ UCollationResult
|
||||
CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const
|
||||
{
|
||||
if(U_SUCCESS(status)) {
|
||||
uint8_t *src = this->fBytes;
|
||||
uint8_t *tgt = target.fBytes;
|
||||
const uint8_t *src = getBytes();
|
||||
const uint8_t *tgt = target.getBytes();
|
||||
|
||||
// are we comparing the same string
|
||||
if (src == tgt)
|
||||
return UCOL_EQUAL;
|
||||
|
||||
int minLength;
|
||||
UCollationResult result;
|
||||
|
||||
// are we comparing different lengths?
|
||||
if (this->fCount != target.fCount) {
|
||||
if (this->fCount < target.fCount) {
|
||||
minLength = this->fCount;
|
||||
result = UCOL_LESS;
|
||||
}
|
||||
else {
|
||||
minLength = target.fCount;
|
||||
result = UCOL_GREATER;
|
||||
}
|
||||
}
|
||||
else {
|
||||
minLength = target.fCount;
|
||||
result = UCOL_EQUAL;
|
||||
int32_t minLength = getLength();
|
||||
int32_t targetLength = target.getLength();
|
||||
if (minLength < targetLength) {
|
||||
result = UCOL_LESS;
|
||||
} else if (minLength == targetLength) {
|
||||
result = UCOL_EQUAL;
|
||||
} else {
|
||||
minLength = targetLength;
|
||||
result = UCOL_GREATER;
|
||||
}
|
||||
|
||||
if (minLength > 0) {
|
||||
@ -305,31 +218,6 @@ CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const
|
||||
}
|
||||
}
|
||||
|
||||
CollationKey&
|
||||
CollationKey::ensureCapacity(int32_t newSize)
|
||||
{
|
||||
if (fCapacity < newSize)
|
||||
{
|
||||
uprv_free(fBytes);
|
||||
|
||||
fBytes = (uint8_t *)uprv_malloc(newSize);
|
||||
|
||||
if (fBytes == NULL)
|
||||
{
|
||||
return setToBogus();
|
||||
}
|
||||
|
||||
uprv_memset(fBytes, 0, fCapacity);
|
||||
fCapacity = newSize;
|
||||
}
|
||||
|
||||
fBogus = FALSE;
|
||||
fCount = newSize;
|
||||
fHashCode = kInvalidHashCode;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
#ifdef U_USE_COLLATION_KEY_DEPRECATES
|
||||
// Create a copy of the byte array.
|
||||
uint8_t*
|
||||
@ -344,13 +232,30 @@ CollationKey::toByteArray(int32_t& count) const
|
||||
else
|
||||
{
|
||||
count = fCount;
|
||||
uprv_memcpy(result, fBytes, fCount);
|
||||
if (count > 0) {
|
||||
uprv_memcpy(result, fBytes, fCount);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int32_t
|
||||
computeHashCode(const uint8_t *key, int32_t length) {
|
||||
const char *s = reinterpret_cast<const char *>(key);
|
||||
int32_t hash;
|
||||
if (s == NULL || length == 0) {
|
||||
hash = kEmptyHashCode;
|
||||
} else {
|
||||
hash = ustr_hashCharsN(s, length);
|
||||
if (hash == kInvalidHashCode || hash == kBogusHashCode) {
|
||||
hash = kEmptyHashCode;
|
||||
}
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
int32_t
|
||||
CollationKey::hashCode() const
|
||||
{
|
||||
@ -362,33 +267,7 @@ CollationKey::hashCode() const
|
||||
|
||||
if (fHashCode == kInvalidHashCode)
|
||||
{
|
||||
const char *s = reinterpret_cast<const char *>(fBytes);
|
||||
((CollationKey *)this)->fHashCode = s == NULL ? 0 : ustr_hashCharsN(s, fCount);
|
||||
#if 0
|
||||
// We compute the hash by iterating sparsely over 64 (at most) characters
|
||||
// spaced evenly through the string. For each character, we multiply the
|
||||
// previous hash value by a prime number and add the new character in,
|
||||
// in the manner of a additive linear congruential random number generator,
|
||||
// thus producing a pseudorandom deterministic value which should be well
|
||||
// distributed over the output range. [LIU]
|
||||
const uint8_t *p = fBytes, *limit = fBytes + fCount;
|
||||
int32_t inc = (fCount >= 256) ? fCount/128 : 2; // inc = max(fSize/64, 1);
|
||||
int32_t hash = 0;
|
||||
|
||||
while (p < limit)
|
||||
{
|
||||
hash = ( hash * 37 ) + ((p[0] << 8) + p[1]);
|
||||
p += inc;
|
||||
}
|
||||
|
||||
// If we happened to get kInvalidHashCode, replace it with kEmptyHashCode
|
||||
if (hash == kInvalidHashCode)
|
||||
{
|
||||
hash = kEmptyHashCode;
|
||||
}
|
||||
|
||||
((CollationKey *)this)->fHashCode = hash; // cast away const
|
||||
#endif
|
||||
fHashCode = computeHashCode(getBytes(), getLength());
|
||||
}
|
||||
|
||||
return fHashCode;
|
||||
@ -400,8 +279,7 @@ U_CAPI int32_t U_EXPORT2
|
||||
ucol_keyHashCode(const uint8_t *key,
|
||||
int32_t length)
|
||||
{
|
||||
icu::CollationKey newKey(key, length);
|
||||
return newKey.hashCode();
|
||||
return computeHashCode(key, length);
|
||||
}
|
||||
|
||||
#endif /* #if !UCONFIG_NO_COLLATION */
|
||||
|
@ -427,29 +427,11 @@ CollationKey& RuleBasedCollator::getCollationKey(const UChar* source,
|
||||
return sortkey.reset();
|
||||
}
|
||||
|
||||
uint8_t *result;
|
||||
int32_t resultCapacity;
|
||||
if (sortkey.fCapacity >= (sourceLen * 3)) {
|
||||
// Try to reuse the CollationKey.fBytes.
|
||||
result = sortkey.fBytes;
|
||||
resultCapacity = sortkey.fCapacity;
|
||||
} else {
|
||||
result = NULL;
|
||||
resultCapacity = 0;
|
||||
}
|
||||
int32_t resultLen = ucol_getSortKeyWithAllocation(ucollator, source, sourceLen,
|
||||
result, resultCapacity, &status);
|
||||
int32_t resultLen = ucol_getCollationKey(ucollator, source, sourceLen, sortkey, status);
|
||||
|
||||
if (U_SUCCESS(status)) {
|
||||
if (result == sortkey.fBytes) {
|
||||
sortkey.setLength(resultLen);
|
||||
} else {
|
||||
sortkey.adopt(result, resultCapacity, resultLen);
|
||||
}
|
||||
sortkey.setLength(resultLen);
|
||||
} else {
|
||||
if (result != sortkey.fBytes) {
|
||||
uprv_free(result);
|
||||
}
|
||||
sortkey.setToBogus();
|
||||
}
|
||||
return sortkey;
|
||||
|
@ -4274,94 +4274,63 @@ U_NAMESPACE_BEGIN
|
||||
|
||||
class SortKeyByteSink : public ByteSink {
|
||||
public:
|
||||
static const uint32_t FILL_ORIGINAL_BUFFER = 1;
|
||||
static const uint32_t DONT_GROW = 2;
|
||||
SortKeyByteSink(char *dest, int32_t destCapacity, uint32_t flags=0)
|
||||
: ownedBuffer_(NULL), buffer_(dest), capacity_(destCapacity),
|
||||
appended_(0),
|
||||
fill_(flags & FILL_ORIGINAL_BUFFER),
|
||||
grow_((flags & DONT_GROW) == 0) {
|
||||
if (buffer_ == NULL || capacity_ < 0) {
|
||||
buffer_ = reinterpret_cast<char *>(&lastResortByte_);
|
||||
SortKeyByteSink(char *dest, int32_t destCapacity)
|
||||
: buffer_(dest), capacity_(destCapacity),
|
||||
appended_(0) {
|
||||
if (buffer_ == NULL) {
|
||||
capacity_ = 0;
|
||||
} else if(capacity_ < 0) {
|
||||
buffer_ = NULL;
|
||||
capacity_ = 0;
|
||||
}
|
||||
}
|
||||
virtual ~SortKeyByteSink();
|
||||
|
||||
virtual void Append(const char *bytes, int32_t n);
|
||||
void Append(const uint8_t *bytes, int32_t n) { Append(reinterpret_cast<const char *>(bytes), n); }
|
||||
void Append(uint8_t b) {
|
||||
if (appended_ < capacity_) {
|
||||
buffer_[appended_++] = (char)b;
|
||||
} else {
|
||||
Append(&b, 1);
|
||||
void Append(uint32_t b) {
|
||||
if (appended_ < capacity_ || Resize(1, appended_)) {
|
||||
buffer_[appended_] = (char)b;
|
||||
}
|
||||
++appended_;
|
||||
}
|
||||
void Append(uint8_t b1, uint8_t b2) {
|
||||
void Append(uint32_t b1, uint32_t b2) {
|
||||
int32_t a2 = appended_ + 2;
|
||||
if (a2 <= capacity_) {
|
||||
if (a2 <= capacity_ || Resize(2, appended_)) {
|
||||
buffer_[appended_] = (char)b1;
|
||||
buffer_[appended_ + 1] = (char)b2;
|
||||
appended_ = a2;
|
||||
} else {
|
||||
char bytes[2] = { (char)b1, (char)b2 };
|
||||
Append(bytes, 2);
|
||||
} else if(appended_ < capacity_) {
|
||||
buffer_[appended_] = (char)b1;
|
||||
}
|
||||
appended_ = a2;
|
||||
}
|
||||
void Append(const SortKeyByteSink &other) { Append(other.buffer_, other.appended_); }
|
||||
virtual char *GetAppendBuffer(int32_t min_capacity,
|
||||
int32_t desired_capacity_hint,
|
||||
char *scratch, int32_t scratch_capacity,
|
||||
int32_t *result_capacity);
|
||||
int32_t NumberOfBytesAppended() const { return appended_; }
|
||||
uint8_t &LastByte() {
|
||||
if (buffer_ != NULL && appended_ > 0) {
|
||||
return reinterpret_cast<uint8_t *>(buffer_)[appended_ - 1];
|
||||
} else {
|
||||
return lastResortByte_;
|
||||
}
|
||||
}
|
||||
uint8_t *GetLastFewBytes(int32_t n) {
|
||||
if (buffer_ != NULL && appended_ >= n) {
|
||||
return reinterpret_cast<uint8_t *>(buffer_) + appended_ - n;
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
char *GetBuffer() { return buffer_; }
|
||||
uint8_t *GetUnsignedBuffer() { return reinterpret_cast<uint8_t *>(buffer_); }
|
||||
uint8_t *OrphanUnsignedBuffer(int32_t &orphanedCapacity);
|
||||
UBool IsOk() const { return buffer_ != NULL; } // otherwise out-of-memory
|
||||
/** @return FALSE if memory allocation failed */
|
||||
UBool IsOk() const { return buffer_ != NULL; }
|
||||
|
||||
private:
|
||||
SortKeyByteSink(const SortKeyByteSink &); // copy constructor not implemented
|
||||
SortKeyByteSink &operator=(const SortKeyByteSink &); // assignment operator not implemented
|
||||
protected:
|
||||
virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) = 0;
|
||||
virtual UBool Resize(int32_t appendCapacity, int32_t length) = 0;
|
||||
|
||||
UBool Resize(int32_t appendCapacity, int32_t length);
|
||||
void SetNotOk() {
|
||||
buffer_ = NULL;
|
||||
capacity_ = 0;
|
||||
}
|
||||
|
||||
static uint8_t lastResortByte_; // last-resort return value from LastByte()
|
||||
|
||||
char *ownedBuffer_;
|
||||
char *buffer_;
|
||||
int32_t capacity_;
|
||||
int32_t appended_;
|
||||
UBool fill_;
|
||||
UBool grow_;
|
||||
|
||||
private:
|
||||
SortKeyByteSink(const SortKeyByteSink &); // copy constructor not implemented
|
||||
SortKeyByteSink &operator=(const SortKeyByteSink &); // assignment operator not implemented
|
||||
};
|
||||
|
||||
uint8_t SortKeyByteSink::lastResortByte_ = 0;
|
||||
|
||||
SortKeyByteSink::~SortKeyByteSink() {
|
||||
uprv_free(ownedBuffer_);
|
||||
}
|
||||
|
||||
void
|
||||
SortKeyByteSink::Append(const char *bytes, int32_t n) {
|
||||
if (n <= 0) {
|
||||
if (n <= 0 || bytes == NULL) {
|
||||
return;
|
||||
}
|
||||
int32_t length = appended_;
|
||||
@ -4369,37 +4338,12 @@ SortKeyByteSink::Append(const char *bytes, int32_t n) {
|
||||
if ((buffer_ + length) == bytes) {
|
||||
return; // the caller used GetAppendBuffer() and wrote the bytes already
|
||||
}
|
||||
if (buffer_ == NULL) {
|
||||
return; // allocation failed before already
|
||||
}
|
||||
int32_t available = capacity_ - length;
|
||||
if (bytes == NULL) {
|
||||
// assume that the caller failed to allocate memory
|
||||
if (fill_) {
|
||||
if (n > available) {
|
||||
n = available;
|
||||
}
|
||||
uprv_memset(buffer_, 0, n);
|
||||
}
|
||||
SetNotOk(); // propagate the out-of-memory error
|
||||
return;
|
||||
if (n <= available) {
|
||||
uprv_memcpy(buffer_ + length, bytes, n);
|
||||
} else {
|
||||
AppendBeyondCapacity(bytes, n, length);
|
||||
}
|
||||
if (n > available) {
|
||||
if (fill_ && available > 0) {
|
||||
// Fill the original buffer completely.
|
||||
uprv_memcpy(buffer_ + length, bytes, available);
|
||||
bytes += available;
|
||||
length += available;
|
||||
n -= available;
|
||||
available = 0;
|
||||
}
|
||||
fill_ = FALSE;
|
||||
if (!Resize(n, length)) {
|
||||
SetNotOk();
|
||||
return;
|
||||
}
|
||||
}
|
||||
uprv_memcpy(buffer_ + length, bytes, n);
|
||||
}
|
||||
|
||||
char *
|
||||
@ -4425,53 +4369,142 @@ SortKeyByteSink::GetAppendBuffer(int32_t min_capacity,
|
||||
}
|
||||
}
|
||||
|
||||
class FixedSortKeyByteSink : public SortKeyByteSink {
|
||||
public:
|
||||
FixedSortKeyByteSink(char *dest, int32_t destCapacity)
|
||||
: SortKeyByteSink(dest, destCapacity) {}
|
||||
|
||||
private:
|
||||
virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length);
|
||||
virtual UBool Resize(int32_t appendCapacity, int32_t length);
|
||||
};
|
||||
|
||||
void
|
||||
FixedSortKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t /*n*/, int32_t length) {
|
||||
// buffer_ != NULL && bytes != NULL && n > 0 && appended_ > capacity_
|
||||
// Fill the buffer completely.
|
||||
int32_t available = capacity_ - length;
|
||||
if (available > 0) {
|
||||
uprv_memcpy(buffer_ + length, bytes, available);
|
||||
}
|
||||
}
|
||||
|
||||
UBool
|
||||
SortKeyByteSink::Resize(int32_t appendCapacity, int32_t length) {
|
||||
if (!grow_) {
|
||||
return FALSE;
|
||||
FixedSortKeyByteSink::Resize(int32_t /*appendCapacity*/, int32_t /*length*/) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
class CollationKeyByteSink : public SortKeyByteSink {
|
||||
public:
|
||||
CollationKeyByteSink(CollationKey &key)
|
||||
: SortKeyByteSink(reinterpret_cast<char *>(key.getBytes()), key.getCapacity()),
|
||||
key_(key) {}
|
||||
|
||||
private:
|
||||
virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length);
|
||||
virtual UBool Resize(int32_t appendCapacity, int32_t length);
|
||||
|
||||
CollationKey &key_;
|
||||
};
|
||||
|
||||
void
|
||||
CollationKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) {
|
||||
// buffer_ != NULL && bytes != NULL && n > 0 && appended_ > capacity_
|
||||
if (Resize(n, length)) {
|
||||
uprv_memcpy(buffer_ + length, bytes, n);
|
||||
}
|
||||
}
|
||||
|
||||
UBool
|
||||
CollationKeyByteSink::Resize(int32_t appendCapacity, int32_t length) {
|
||||
if (buffer_ == NULL) {
|
||||
return FALSE; // allocation failed before already
|
||||
}
|
||||
int32_t newCapacity = 2 * capacity_;
|
||||
int32_t altCapacity = length + 2 * appendCapacity;
|
||||
if (newCapacity < altCapacity) {
|
||||
newCapacity = altCapacity;
|
||||
}
|
||||
if (newCapacity < 1024) {
|
||||
newCapacity = 1024;
|
||||
if (newCapacity < 200) {
|
||||
newCapacity = 200;
|
||||
}
|
||||
char *newBuffer = (char *)uprv_malloc(newCapacity);
|
||||
uint8_t *newBuffer = key_.reallocate(newCapacity, length);
|
||||
if (newBuffer == NULL) {
|
||||
SetNotOk();
|
||||
return FALSE;
|
||||
}
|
||||
uprv_memcpy(newBuffer, buffer_, length);
|
||||
uprv_free(ownedBuffer_);
|
||||
ownedBuffer_ = buffer_ = newBuffer;
|
||||
buffer_ = reinterpret_cast<char *>(newBuffer);
|
||||
capacity_ = newCapacity;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
uint8_t *
|
||||
SortKeyByteSink::OrphanUnsignedBuffer(int32_t &orphanedCapacity) {
|
||||
if (buffer_ == NULL || appended_ == 0) {
|
||||
orphanedCapacity = 0;
|
||||
return NULL;
|
||||
/**
|
||||
* uint8_t byte buffer, similar to CharString but simpler.
|
||||
*/
|
||||
class SortKeyLevel : public UMemory {
|
||||
public:
|
||||
SortKeyLevel() : len(0), ok(TRUE) {}
|
||||
~SortKeyLevel() {}
|
||||
|
||||
/** @return FALSE if memory allocation failed */
|
||||
UBool isOk() const { return ok; }
|
||||
UBool isEmpty() const { return len == 0; }
|
||||
int32_t length() const { return len; }
|
||||
const uint8_t *data() const { return buffer.getAlias(); }
|
||||
uint8_t operator[](int32_t index) const { return buffer[index]; }
|
||||
|
||||
void appendByte(uint32_t b);
|
||||
|
||||
void appendTo(ByteSink &sink) const {
|
||||
sink.Append(reinterpret_cast<const char *>(buffer.getAlias()), len);
|
||||
}
|
||||
if (ownedBuffer_ != NULL) {
|
||||
// orphan & forget the ownedBuffer_
|
||||
uint8_t *returnBuffer = reinterpret_cast<uint8_t *>(ownedBuffer_);
|
||||
ownedBuffer_ = buffer_ = NULL;
|
||||
orphanedCapacity = capacity_;
|
||||
capacity_ = appended_ = 0;
|
||||
return returnBuffer;
|
||||
|
||||
uint8_t &lastByte() {
|
||||
U_ASSERT(len > 0);
|
||||
return buffer[len - 1];
|
||||
}
|
||||
// clone the buffer_
|
||||
uint8_t *newBuffer = (uint8_t *)uprv_malloc(appended_);
|
||||
if (newBuffer == NULL) {
|
||||
orphanedCapacity = 0;
|
||||
return NULL;
|
||||
|
||||
uint8_t *getLastFewBytes(int32_t n) {
|
||||
if (ok && len >= n) {
|
||||
return buffer.getAlias() + len - n;
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
uprv_memcpy(newBuffer, buffer_, appended_);
|
||||
orphanedCapacity = appended_;
|
||||
return newBuffer;
|
||||
|
||||
private:
|
||||
MaybeStackArray<uint8_t, 40> buffer;
|
||||
int32_t len;
|
||||
UBool ok;
|
||||
|
||||
UBool ensureCapacity(int32_t appendCapacity);
|
||||
|
||||
SortKeyLevel(const SortKeyLevel &other); // forbid copying of this class
|
||||
SortKeyLevel &operator=(const SortKeyLevel &other); // forbid copying of this class
|
||||
};
|
||||
|
||||
void SortKeyLevel::appendByte(uint32_t b) {
|
||||
if(len < buffer.getCapacity() || ensureCapacity(1)) {
|
||||
buffer[len++] = (uint8_t)b;
|
||||
}
|
||||
}
|
||||
|
||||
UBool SortKeyLevel::ensureCapacity(int32_t appendCapacity) {
|
||||
if(!ok) {
|
||||
return FALSE;
|
||||
}
|
||||
int32_t newCapacity = 2 * buffer.getCapacity();
|
||||
int32_t altCapacity = len + 2 * appendCapacity;
|
||||
if (newCapacity < altCapacity) {
|
||||
newCapacity = altCapacity;
|
||||
}
|
||||
if (newCapacity < 200) {
|
||||
newCapacity = 200;
|
||||
}
|
||||
if(buffer.resize(newCapacity, len)==NULL) {
|
||||
return ok = FALSE;
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
@ -4507,33 +4540,31 @@ ucol_getSortKey(const UCollator *coll,
|
||||
/*ucol_calcSortKey(...);*/
|
||||
/*ucol_calcSortKeySimpleTertiary(...);*/
|
||||
|
||||
SortKeyByteSink sink(reinterpret_cast<char *>(result), resultLength,
|
||||
SortKeyByteSink::FILL_ORIGINAL_BUFFER | SortKeyByteSink::DONT_GROW);
|
||||
uint8_t noDest[1] = { 0 };
|
||||
if(result == NULL) {
|
||||
// Distinguish pure preflighting from an allocation error.
|
||||
result = noDest;
|
||||
resultLength = 0;
|
||||
}
|
||||
FixedSortKeyByteSink sink(reinterpret_cast<char *>(result), resultLength);
|
||||
coll->sortKeyGen(coll, source, sourceLength, sink, &status);
|
||||
keySize = sink.NumberOfBytesAppended();
|
||||
if(U_SUCCESS(status)) {
|
||||
keySize = sink.NumberOfBytesAppended();
|
||||
}
|
||||
}
|
||||
UTRACE_DATA2(UTRACE_VERBOSE, "Sort Key = %vb", result, keySize);
|
||||
UTRACE_EXIT_STATUS(status);
|
||||
return keySize;
|
||||
}
|
||||
|
||||
/* this function is called by the C++ API for sortkey generation */
|
||||
U_CFUNC int32_t
|
||||
ucol_getSortKeyWithAllocation(const UCollator *coll,
|
||||
const UChar *source, int32_t sourceLength,
|
||||
uint8_t *&result, int32_t &resultCapacity,
|
||||
UErrorCode *pErrorCode) {
|
||||
SortKeyByteSink sink(reinterpret_cast<char *>(result), resultCapacity);
|
||||
coll->sortKeyGen(coll, source, sourceLength, sink, pErrorCode);
|
||||
int32_t resultLen = sink.NumberOfBytesAppended();
|
||||
if (U_SUCCESS(*pErrorCode)) {
|
||||
if (!sink.IsOk()) {
|
||||
*pErrorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
} else if (result != sink.GetUnsignedBuffer()) {
|
||||
result = sink.OrphanUnsignedBuffer(resultCapacity);
|
||||
}
|
||||
}
|
||||
return resultLen;
|
||||
ucol_getCollationKey(const UCollator *coll,
|
||||
const UChar *source, int32_t sourceLength,
|
||||
CollationKey &key,
|
||||
UErrorCode &errorCode) {
|
||||
CollationKeyByteSink sink(key);
|
||||
coll->sortKeyGen(coll, source, sourceLength, sink, &errorCode);
|
||||
return sink.NumberOfBytesAppended();
|
||||
}
|
||||
|
||||
// Is this primary weight compressible?
|
||||
@ -4545,16 +4576,16 @@ isCompressible(const UCollator * /*coll*/, uint8_t primary1) {
|
||||
}
|
||||
|
||||
static
|
||||
inline void doCaseShift(SortKeyByteSink &cases, uint32_t &caseShift) {
|
||||
inline void doCaseShift(SortKeyLevel &cases, uint32_t &caseShift) {
|
||||
if (caseShift == 0) {
|
||||
cases.Append(UCOL_CASE_BYTE_START);
|
||||
cases.appendByte(UCOL_CASE_BYTE_START);
|
||||
caseShift = UCOL_CASE_SHIFT_START;
|
||||
}
|
||||
}
|
||||
|
||||
// Packs the secondary buffer when processing French locale.
|
||||
static void
|
||||
packFrench(uint8_t *secondaries, int32_t secsize, SortKeyByteSink &result) {
|
||||
packFrench(const uint8_t *secondaries, int32_t secsize, SortKeyByteSink &result) {
|
||||
secondaries += secsize; // We read the secondary-level bytes back to front.
|
||||
uint8_t secondary;
|
||||
int32_t count2 = 0;
|
||||
@ -4569,16 +4600,16 @@ packFrench(uint8_t *secondaries, int32_t secsize, SortKeyByteSink &result) {
|
||||
if (count2 > 0) {
|
||||
if (secondary > UCOL_COMMON2) { // not necessary for 4th level.
|
||||
while (count2 > UCOL_TOP_COUNT2) {
|
||||
result.Append((uint8_t)(UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2));
|
||||
result.Append(UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2);
|
||||
count2 -= (uint32_t)UCOL_TOP_COUNT2;
|
||||
}
|
||||
result.Append((uint8_t)(UCOL_COMMON_TOP2 - (count2-1)));
|
||||
result.Append(UCOL_COMMON_TOP2 - (count2-1));
|
||||
} else {
|
||||
while (count2 > UCOL_BOT_COUNT2) {
|
||||
result.Append((uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2));
|
||||
result.Append(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2);
|
||||
count2 -= (uint32_t)UCOL_BOT_COUNT2;
|
||||
}
|
||||
result.Append((uint8_t)(UCOL_COMMON_BOT2 + (count2-1)));
|
||||
result.Append(UCOL_COMMON_BOT2 + (count2-1));
|
||||
}
|
||||
count2 = 0;
|
||||
}
|
||||
@ -4587,10 +4618,10 @@ packFrench(uint8_t *secondaries, int32_t secsize, SortKeyByteSink &result) {
|
||||
}
|
||||
if (count2 > 0) {
|
||||
while (count2 > UCOL_BOT_COUNT2) {
|
||||
result.Append((uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2));
|
||||
result.Append(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2);
|
||||
count2 -= (uint32_t)UCOL_BOT_COUNT2;
|
||||
}
|
||||
result.Append((uint8_t)(UCOL_COMMON_BOT2 + (count2-1)));
|
||||
result.Append(UCOL_COMMON_BOT2 + (count2-1));
|
||||
}
|
||||
}
|
||||
|
||||
@ -4608,15 +4639,11 @@ ucol_calcSortKey(const UCollator *coll,
|
||||
return;
|
||||
}
|
||||
|
||||
/* Stack allocated buffers for buffers we use */
|
||||
char second[UCOL_SECONDARY_MAX_BUFFER], tert[UCOL_TERTIARY_MAX_BUFFER];
|
||||
char caseB[UCOL_CASE_MAX_BUFFER], quad[UCOL_QUAD_MAX_BUFFER];
|
||||
|
||||
SortKeyByteSink &primaries = result;
|
||||
SortKeyByteSink secondaries(second, LENGTHOF(second));
|
||||
SortKeyByteSink tertiaries(tert, LENGTHOF(tert));
|
||||
SortKeyByteSink cases(caseB, LENGTHOF(caseB));
|
||||
SortKeyByteSink quads(quad, LENGTHOF(quad));
|
||||
SortKeyLevel secondaries;
|
||||
SortKeyLevel tertiaries;
|
||||
SortKeyLevel cases;
|
||||
SortKeyLevel quads;
|
||||
|
||||
UnicodeString normSource;
|
||||
|
||||
@ -4735,19 +4762,19 @@ ucol_calcSortKey(const UCollator *coll,
|
||||
if(compareQuad == 0) {
|
||||
if(count4 > 0) {
|
||||
while (count4 > UCOL_BOT_COUNT4) {
|
||||
quads.Append((uint8_t)(UCOL_COMMON_BOT4 + UCOL_BOT_COUNT4));
|
||||
quads.appendByte(UCOL_COMMON_BOT4 + UCOL_BOT_COUNT4);
|
||||
count4 -= UCOL_BOT_COUNT4;
|
||||
}
|
||||
quads.Append((uint8_t)(UCOL_COMMON_BOT4 + (count4-1)));
|
||||
quads.appendByte(UCOL_COMMON_BOT4 + (count4-1));
|
||||
count4 = 0;
|
||||
}
|
||||
/* We are dealing with a variable and we're treating them as shifted */
|
||||
/* This is a shifted ignorable */
|
||||
if(primary1 != 0) { /* we need to check this since we could be in continuation */
|
||||
quads.Append(primary1);
|
||||
quads.appendByte(primary1);
|
||||
}
|
||||
if(primary2 != 0) {
|
||||
quads.Append(primary2);
|
||||
quads.appendByte(primary2);
|
||||
}
|
||||
}
|
||||
wasShifted = TRUE;
|
||||
@ -4762,7 +4789,7 @@ ucol_calcSortKey(const UCollator *coll,
|
||||
primaries.Append(primary2);
|
||||
} else {
|
||||
if(leadPrimary != 0) {
|
||||
primaries.Append((uint8_t)((primary1 > leadPrimary) ? UCOL_BYTE_UNSHIFTED_MAX : UCOL_BYTE_UNSHIFTED_MIN));
|
||||
primaries.Append((primary1 > leadPrimary) ? UCOL_BYTE_UNSHIFTED_MAX : UCOL_BYTE_UNSHIFTED_MIN);
|
||||
}
|
||||
if(primary2 == UCOL_IGNORABLE) {
|
||||
/* one byter, not compressed */
|
||||
@ -4794,20 +4821,20 @@ ucol_calcSortKey(const UCollator *coll,
|
||||
if (count2 > 0) {
|
||||
if (secondary > UCOL_COMMON2) { // not necessary for 4th level.
|
||||
while (count2 > UCOL_TOP_COUNT2) {
|
||||
secondaries.Append((uint8_t)(UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2));
|
||||
secondaries.appendByte(UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2);
|
||||
count2 -= (uint32_t)UCOL_TOP_COUNT2;
|
||||
}
|
||||
secondaries.Append((uint8_t)(UCOL_COMMON_TOP2 - (count2-1)));
|
||||
secondaries.appendByte(UCOL_COMMON_TOP2 - (count2-1));
|
||||
} else {
|
||||
while (count2 > UCOL_BOT_COUNT2) {
|
||||
secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2));
|
||||
secondaries.appendByte(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2);
|
||||
count2 -= (uint32_t)UCOL_BOT_COUNT2;
|
||||
}
|
||||
secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + (count2-1)));
|
||||
secondaries.appendByte(UCOL_COMMON_BOT2 + (count2-1));
|
||||
}
|
||||
count2 = 0;
|
||||
}
|
||||
secondaries.Append(secondary);
|
||||
secondaries.appendByte(secondary);
|
||||
}
|
||||
} else {
|
||||
/* Do the special handling for French secondaries */
|
||||
@ -4815,7 +4842,7 @@ ucol_calcSortKey(const UCollator *coll,
|
||||
/* abc1c2c3de with french secondaries need to be edc1c2c3ba NOT edc3c2c1ba */
|
||||
if(notIsContinuation) {
|
||||
if (lastSecondaryLength > 1) {
|
||||
uint8_t *frenchStartPtr = secondaries.GetLastFewBytes(lastSecondaryLength);
|
||||
uint8_t *frenchStartPtr = secondaries.getLastFewBytes(lastSecondaryLength);
|
||||
if (frenchStartPtr != NULL) {
|
||||
/* reverse secondaries from frenchStartPtr up to frenchEndPtr */
|
||||
uint8_t *frenchEndPtr = frenchStartPtr + lastSecondaryLength - 1;
|
||||
@ -4826,7 +4853,7 @@ ucol_calcSortKey(const UCollator *coll,
|
||||
} else {
|
||||
++lastSecondaryLength;
|
||||
}
|
||||
secondaries.Append(secondary);
|
||||
secondaries.appendByte(secondary);
|
||||
}
|
||||
}
|
||||
|
||||
@ -4841,21 +4868,21 @@ ucol_calcSortKey(const UCollator *coll,
|
||||
if(tertiary != 0) {
|
||||
if(coll->caseFirst == UCOL_UPPER_FIRST) {
|
||||
if((caseBits & 0xC0) == 0) {
|
||||
cases.LastByte() |= 1 << (--caseShift);
|
||||
cases.lastByte() |= 1 << (--caseShift);
|
||||
} else {
|
||||
cases.LastByte() |= 0 << (--caseShift);
|
||||
cases.lastByte() |= 0 << (--caseShift);
|
||||
/* second bit */
|
||||
doCaseShift(cases, caseShift);
|
||||
cases.LastByte() |= ((caseBits>>6)&1) << (--caseShift);
|
||||
cases.lastByte() |= ((caseBits>>6)&1) << (--caseShift);
|
||||
}
|
||||
} else {
|
||||
if((caseBits & 0xC0) == 0) {
|
||||
cases.LastByte() |= 0 << (--caseShift);
|
||||
cases.lastByte() |= 0 << (--caseShift);
|
||||
} else {
|
||||
cases.LastByte() |= 1 << (--caseShift);
|
||||
cases.lastByte() |= 1 << (--caseShift);
|
||||
/* second bit */
|
||||
doCaseShift(cases, caseShift);
|
||||
cases.LastByte() |= ((caseBits>>7)&1) << (--caseShift);
|
||||
cases.lastByte() |= ((caseBits>>7)&1) << (--caseShift);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -4881,20 +4908,20 @@ ucol_calcSortKey(const UCollator *coll,
|
||||
if (count3 > 0) {
|
||||
if ((tertiary > tertiaryCommon)) {
|
||||
while (count3 > coll->tertiaryTopCount) {
|
||||
tertiaries.Append((uint8_t)(tertiaryTop - coll->tertiaryTopCount));
|
||||
tertiaries.appendByte(tertiaryTop - coll->tertiaryTopCount);
|
||||
count3 -= (uint32_t)coll->tertiaryTopCount;
|
||||
}
|
||||
tertiaries.Append((uint8_t)(tertiaryTop - (count3-1)));
|
||||
tertiaries.appendByte(tertiaryTop - (count3-1));
|
||||
} else {
|
||||
while (count3 > coll->tertiaryBottomCount) {
|
||||
tertiaries.Append((uint8_t)(tertiaryBottom + coll->tertiaryBottomCount));
|
||||
tertiaries.appendByte(tertiaryBottom + coll->tertiaryBottomCount);
|
||||
count3 -= (uint32_t)coll->tertiaryBottomCount;
|
||||
}
|
||||
tertiaries.Append((uint8_t)(tertiaryBottom + (count3-1)));
|
||||
tertiaries.appendByte(tertiaryBottom + (count3-1));
|
||||
}
|
||||
count3 = 0;
|
||||
}
|
||||
tertiaries.Append(tertiary);
|
||||
tertiaries.appendByte(tertiary);
|
||||
}
|
||||
}
|
||||
|
||||
@ -4902,13 +4929,13 @@ ucol_calcSortKey(const UCollator *coll,
|
||||
if(s.flags & UCOL_WAS_HIRAGANA) { // This was Hiragana and we need to note it
|
||||
if(count4>0) { // Close this part
|
||||
while (count4 > UCOL_BOT_COUNT4) {
|
||||
quads.Append((uint8_t)(UCOL_COMMON_BOT4 + UCOL_BOT_COUNT4));
|
||||
quads.appendByte(UCOL_COMMON_BOT4 + UCOL_BOT_COUNT4);
|
||||
count4 -= UCOL_BOT_COUNT4;
|
||||
}
|
||||
quads.Append((uint8_t)(UCOL_COMMON_BOT4 + (count4-1)));
|
||||
quads.appendByte(UCOL_COMMON_BOT4 + (count4-1));
|
||||
count4 = 0;
|
||||
}
|
||||
quads.Append(UCOL_HIRAGANA_QUAD); // Add the Hiragana
|
||||
quads.appendByte(UCOL_HIRAGANA_QUAD); // Add the Hiragana
|
||||
} else { // This wasn't Hiragana, so we can continue adding stuff
|
||||
count4++;
|
||||
}
|
||||
@ -4919,68 +4946,74 @@ ucol_calcSortKey(const UCollator *coll,
|
||||
/* Here, we are generally done with processing */
|
||||
/* bailing out would not be too productive */
|
||||
|
||||
UBool ok = TRUE;
|
||||
if(U_SUCCESS(*status)) {
|
||||
/* we have done all the CE's, now let's put them together to form a key */
|
||||
if(compareSec == 0) {
|
||||
if (count2 > 0) {
|
||||
while (count2 > UCOL_BOT_COUNT2) {
|
||||
secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2));
|
||||
secondaries.appendByte(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2);
|
||||
count2 -= (uint32_t)UCOL_BOT_COUNT2;
|
||||
}
|
||||
secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + (count2-1)));
|
||||
secondaries.appendByte(UCOL_COMMON_BOT2 + (count2-1));
|
||||
}
|
||||
result.Append(UCOL_LEVELTERMINATOR);
|
||||
if(!isFrenchSec || !secondaries.IsOk()) {
|
||||
result.Append(secondaries);
|
||||
if(!secondaries.isOk()) {
|
||||
ok = FALSE;
|
||||
} else if(!isFrenchSec) {
|
||||
secondaries.appendTo(result);
|
||||
} else {
|
||||
// If there are any unresolved continuation secondaries,
|
||||
// reverse them here so that we can reverse the whole secondary thing.
|
||||
if (lastSecondaryLength > 1) {
|
||||
uint8_t *frenchStartPtr = secondaries.GetLastFewBytes(lastSecondaryLength);
|
||||
uint8_t *frenchStartPtr = secondaries.getLastFewBytes(lastSecondaryLength);
|
||||
if (frenchStartPtr != NULL) {
|
||||
/* reverse secondaries from frenchStartPtr up to frenchEndPtr */
|
||||
uint8_t *frenchEndPtr = frenchStartPtr + lastSecondaryLength - 1;
|
||||
uprv_ucol_reverse_buffer(uint8_t, frenchStartPtr, frenchEndPtr);
|
||||
}
|
||||
}
|
||||
packFrench(secondaries.GetUnsignedBuffer(), secondaries.NumberOfBytesAppended(), result);
|
||||
packFrench(secondaries.data(), secondaries.length(), result);
|
||||
}
|
||||
}
|
||||
|
||||
if(doCase) {
|
||||
ok &= cases.isOk();
|
||||
result.Append(UCOL_LEVELTERMINATOR);
|
||||
result.Append(cases);
|
||||
cases.appendTo(result);
|
||||
}
|
||||
|
||||
if(compareTer == 0) {
|
||||
if (count3 > 0) {
|
||||
if (coll->tertiaryCommon != UCOL_COMMON_BOT3) {
|
||||
while (count3 >= coll->tertiaryTopCount) {
|
||||
tertiaries.Append((uint8_t)(tertiaryTop - coll->tertiaryTopCount));
|
||||
tertiaries.appendByte(tertiaryTop - coll->tertiaryTopCount);
|
||||
count3 -= (uint32_t)coll->tertiaryTopCount;
|
||||
}
|
||||
tertiaries.Append((uint8_t)(tertiaryTop - count3));
|
||||
tertiaries.appendByte(tertiaryTop - count3);
|
||||
} else {
|
||||
while (count3 > coll->tertiaryBottomCount) {
|
||||
tertiaries.Append((uint8_t)(tertiaryBottom + coll->tertiaryBottomCount));
|
||||
tertiaries.appendByte(tertiaryBottom + coll->tertiaryBottomCount);
|
||||
count3 -= (uint32_t)coll->tertiaryBottomCount;
|
||||
}
|
||||
tertiaries.Append((uint8_t)(tertiaryBottom + (count3-1)));
|
||||
tertiaries.appendByte(tertiaryBottom + (count3-1));
|
||||
}
|
||||
}
|
||||
ok &= tertiaries.isOk();
|
||||
result.Append(UCOL_LEVELTERMINATOR);
|
||||
result.Append(tertiaries);
|
||||
tertiaries.appendTo(result);
|
||||
|
||||
if(compareQuad == 0/*qShifted == TRUE*/) {
|
||||
if(count4 > 0) {
|
||||
while (count4 > UCOL_BOT_COUNT4) {
|
||||
quads.Append((uint8_t)(UCOL_COMMON_BOT4 + UCOL_BOT_COUNT4));
|
||||
quads.appendByte(UCOL_COMMON_BOT4 + UCOL_BOT_COUNT4);
|
||||
count4 -= UCOL_BOT_COUNT4;
|
||||
}
|
||||
quads.Append((uint8_t)(UCOL_COMMON_BOT4 + (count4-1)));
|
||||
quads.appendByte(UCOL_COMMON_BOT4 + (count4-1));
|
||||
}
|
||||
ok &= quads.isOk();
|
||||
result.Append(UCOL_LEVELTERMINATOR);
|
||||
result.Append(quads);
|
||||
quads.appendTo(result);
|
||||
}
|
||||
|
||||
if(compareIdent) {
|
||||
@ -4993,6 +5026,9 @@ ucol_calcSortKey(const UCollator *coll,
|
||||
|
||||
/* To avoid memory leak, free the offset buffer if necessary. */
|
||||
ucol_freeOffsetBuffer(&s);
|
||||
|
||||
ok &= result.IsOk();
|
||||
if(!ok && U_SUCCESS(*status)) { *status = U_MEMORY_ALLOCATION_ERROR; }
|
||||
}
|
||||
|
||||
|
||||
@ -5009,12 +5045,9 @@ ucol_calcSortKeySimpleTertiary(const UCollator *coll,
|
||||
return;
|
||||
}
|
||||
|
||||
/* Stack allocated buffers for buffers we use */
|
||||
char second[UCOL_SECONDARY_MAX_BUFFER], tert[UCOL_TERTIARY_MAX_BUFFER];
|
||||
|
||||
SortKeyByteSink &primaries = result;
|
||||
SortKeyByteSink secondaries(second, LENGTHOF(second));
|
||||
SortKeyByteSink tertiaries(tert, LENGTHOF(tert));
|
||||
SortKeyLevel secondaries;
|
||||
SortKeyLevel tertiaries;
|
||||
|
||||
UnicodeString normSource;
|
||||
|
||||
@ -5096,7 +5129,7 @@ ucol_calcSortKeySimpleTertiary(const UCollator *coll,
|
||||
primaries.Append(primary2);
|
||||
} else {
|
||||
if(leadPrimary != 0) {
|
||||
primaries.Append((uint8_t)((primary1 > leadPrimary) ? UCOL_BYTE_UNSHIFTED_MAX : UCOL_BYTE_UNSHIFTED_MIN));
|
||||
primaries.Append((primary1 > leadPrimary) ? UCOL_BYTE_UNSHIFTED_MAX : UCOL_BYTE_UNSHIFTED_MIN);
|
||||
}
|
||||
if(primary2 == UCOL_IGNORABLE) {
|
||||
/* one byter, not compressed */
|
||||
@ -5127,20 +5160,20 @@ ucol_calcSortKeySimpleTertiary(const UCollator *coll,
|
||||
if (count2 > 0) {
|
||||
if (secondary > UCOL_COMMON2) { // not necessary for 4th level.
|
||||
while (count2 > UCOL_TOP_COUNT2) {
|
||||
secondaries.Append((uint8_t)(UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2));
|
||||
secondaries.appendByte(UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2);
|
||||
count2 -= (uint32_t)UCOL_TOP_COUNT2;
|
||||
}
|
||||
secondaries.Append((uint8_t)(UCOL_COMMON_TOP2 - (count2-1)));
|
||||
secondaries.appendByte(UCOL_COMMON_TOP2 - (count2-1));
|
||||
} else {
|
||||
while (count2 > UCOL_BOT_COUNT2) {
|
||||
secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2));
|
||||
secondaries.appendByte(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2);
|
||||
count2 -= (uint32_t)UCOL_BOT_COUNT2;
|
||||
}
|
||||
secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + (count2-1)));
|
||||
secondaries.appendByte(UCOL_COMMON_BOT2 + (count2-1));
|
||||
}
|
||||
count2 = 0;
|
||||
}
|
||||
secondaries.Append(secondary);
|
||||
secondaries.appendByte(secondary);
|
||||
}
|
||||
}
|
||||
|
||||
@ -5162,53 +5195,56 @@ ucol_calcSortKeySimpleTertiary(const UCollator *coll,
|
||||
if (count3 > 0) {
|
||||
if ((tertiary > tertiaryCommon)) {
|
||||
while (count3 > coll->tertiaryTopCount) {
|
||||
tertiaries.Append((uint8_t)(tertiaryTop - coll->tertiaryTopCount));
|
||||
tertiaries.appendByte(tertiaryTop - coll->tertiaryTopCount);
|
||||
count3 -= (uint32_t)coll->tertiaryTopCount;
|
||||
}
|
||||
tertiaries.Append((uint8_t)(tertiaryTop - (count3-1)));
|
||||
tertiaries.appendByte(tertiaryTop - (count3-1));
|
||||
} else {
|
||||
while (count3 > coll->tertiaryBottomCount) {
|
||||
tertiaries.Append((uint8_t)(tertiaryBottom + coll->tertiaryBottomCount));
|
||||
tertiaries.appendByte(tertiaryBottom + coll->tertiaryBottomCount);
|
||||
count3 -= (uint32_t)coll->tertiaryBottomCount;
|
||||
}
|
||||
tertiaries.Append((uint8_t)(tertiaryBottom + (count3-1)));
|
||||
tertiaries.appendByte(tertiaryBottom + (count3-1));
|
||||
}
|
||||
count3 = 0;
|
||||
}
|
||||
tertiaries.Append(tertiary);
|
||||
tertiaries.appendByte(tertiary);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
UBool ok = TRUE;
|
||||
if(U_SUCCESS(*status)) {
|
||||
/* we have done all the CE's, now let's put them together to form a key */
|
||||
if (count2 > 0) {
|
||||
while (count2 > UCOL_BOT_COUNT2) {
|
||||
secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2));
|
||||
secondaries.appendByte(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2);
|
||||
count2 -= (uint32_t)UCOL_BOT_COUNT2;
|
||||
}
|
||||
secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + (count2-1)));
|
||||
secondaries.appendByte(UCOL_COMMON_BOT2 + (count2-1));
|
||||
}
|
||||
ok &= secondaries.isOk();
|
||||
result.Append(UCOL_LEVELTERMINATOR);
|
||||
result.Append(secondaries);
|
||||
secondaries.appendTo(result);
|
||||
|
||||
if (count3 > 0) {
|
||||
if (coll->tertiaryCommon != UCOL_COMMON3_NORMAL) {
|
||||
while (count3 >= coll->tertiaryTopCount) {
|
||||
tertiaries.Append((uint8_t)(tertiaryTop - coll->tertiaryTopCount));
|
||||
tertiaries.appendByte(tertiaryTop - coll->tertiaryTopCount);
|
||||
count3 -= (uint32_t)coll->tertiaryTopCount;
|
||||
}
|
||||
tertiaries.Append((uint8_t)(tertiaryTop - count3));
|
||||
tertiaries.appendByte(tertiaryTop - count3);
|
||||
} else {
|
||||
while (count3 > coll->tertiaryBottomCount) {
|
||||
tertiaries.Append((uint8_t)(tertiaryBottom + coll->tertiaryBottomCount));
|
||||
tertiaries.appendByte(tertiaryBottom + coll->tertiaryBottomCount);
|
||||
count3 -= (uint32_t)coll->tertiaryBottomCount;
|
||||
}
|
||||
tertiaries.Append((uint8_t)(tertiaryBottom + (count3-1)));
|
||||
tertiaries.appendByte(tertiaryBottom + (count3-1));
|
||||
}
|
||||
}
|
||||
ok &= tertiaries.isOk();
|
||||
result.Append(UCOL_LEVELTERMINATOR);
|
||||
result.Append(tertiaries);
|
||||
tertiaries.appendTo(result);
|
||||
|
||||
result.Append(0);
|
||||
}
|
||||
@ -5216,9 +5252,8 @@ ucol_calcSortKeySimpleTertiary(const UCollator *coll,
|
||||
/* To avoid memory leak, free the offset buffer if necessary. */
|
||||
ucol_freeOffsetBuffer(&s);
|
||||
|
||||
if (U_SUCCESS(*status) && !result.IsOk()) {
|
||||
*status = U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
ok &= result.IsOk();
|
||||
if(!ok && U_SUCCESS(*status)) { *status = U_MEMORY_ALLOCATION_ERROR; }
|
||||
}
|
||||
|
||||
static inline
|
||||
|
@ -205,21 +205,6 @@
|
||||
/* if it is too small, heap allocation will occur.*/
|
||||
/* you can change this value if you need memory - it will affect the performance, though, since we're going to malloc */
|
||||
#define UCOL_MAX_BUFFER 128
|
||||
#define UCOL_PRIMARY_MAX_BUFFER 8*UCOL_MAX_BUFFER
|
||||
#define UCOL_SECONDARY_MAX_BUFFER UCOL_MAX_BUFFER
|
||||
#define UCOL_TERTIARY_MAX_BUFFER UCOL_MAX_BUFFER
|
||||
/*
|
||||
#define UCOL_CASE_MAX_BUFFER UCOL_MAX_BUFFER/4
|
||||
|
||||
UCOL_CASE_MAX_BUFFER as previously defined above was too small. A single collation element can
|
||||
generate two caseShift values, and UCOL_CASE_SHIFT_START (=7) caseShift values are compressed into
|
||||
one byte. UCOL_MAX_BUFFER should effectively be multipled by 2/UCOL_CASE_SHIFT_START (2/7), not 1/4.
|
||||
Perhaps UCOL_CASE_SHIFT_START used to be 8; then this would have been correct. We should dynamically
|
||||
define UCOL_CASE_MAX_BUFFER in terms of both UCOL_MAX_BUFFER and UCOL_CASE_SHIFT_START. Since
|
||||
UCOL_CASE_SHIFT_START is defined lower down, we move the real definition of UCOL_CASE_MAX_BUFFER
|
||||
after it, further down.
|
||||
*/
|
||||
#define UCOL_QUAD_MAX_BUFFER 2*UCOL_MAX_BUFFER
|
||||
|
||||
#define UCOL_NORMALIZATION_GROWTH 2
|
||||
#define UCOL_NORMALIZATION_MAX_BUFFER UCOL_MAX_BUFFER*UCOL_NORMALIZATION_GROWTH
|
||||
@ -423,15 +408,6 @@ uprv_init_pce(const struct UCollationElements *elems);
|
||||
#define UCOL_CASE_BYTE_START 0x80
|
||||
#define UCOL_CASE_SHIFT_START 7
|
||||
|
||||
/*
|
||||
The definition of UCOL_CASE_MAX_BUFFER is moved down here so it can use UCOL_CASE_SHIFT_START.
|
||||
|
||||
A single collation element can generate two caseShift values, and UCOL_CASE_SHIFT_START caseShift
|
||||
values are compressed into one byte. The UCOL_CASE_MAX_BUFFER should effectively be UCOL_MAX_BUFFER
|
||||
multipled by 2/UCOL_CASE_SHIFT_START, with suitable rounding up.
|
||||
*/
|
||||
#define UCOL_CASE_MAX_BUFFER (((2*UCOL_MAX_BUFFER) + UCOL_CASE_SHIFT_START - 1)/UCOL_CASE_SHIFT_START)
|
||||
|
||||
#define UCOL_IGNORABLE 0
|
||||
|
||||
/* get weights from a CE */
|
||||
@ -555,16 +531,17 @@ void *ucol_getABuffer(const UCollator *coll, uint32_t size);
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class CollationKey;
|
||||
class SortKeyByteSink;
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
/* function used by C++ getCollationKey to prevent restarting the calculation */
|
||||
U_CFUNC int32_t
|
||||
ucol_getSortKeyWithAllocation(const UCollator *coll,
|
||||
const UChar *source, int32_t sourceLength,
|
||||
uint8_t *&result, int32_t &resultCapacity,
|
||||
UErrorCode *pErrorCode);
|
||||
ucol_getCollationKey(const UCollator *coll,
|
||||
const UChar *source, int32_t sourceLength,
|
||||
icu::CollationKey &key,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
typedef void U_CALLCONV
|
||||
SortKeyGenerator(const UCollator *coll,
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
*****************************************************************************
|
||||
* Copyright (C) 1996-2011, International Business Machines Corporation and others.
|
||||
* Copyright (C) 1996-2012, International Business Machines Corporation and others.
|
||||
* All Rights Reserved.
|
||||
*****************************************************************************
|
||||
*
|
||||
@ -241,30 +241,27 @@ public:
|
||||
|
||||
private:
|
||||
/**
|
||||
* Returns an array of the collation key values as 16-bit integers.
|
||||
* The caller owns the storage and must delete it.
|
||||
* @param values Output param of the collation key values.
|
||||
* @param capacity Size of the values array.
|
||||
* @param count output parameter of the number of collation key values
|
||||
* @return a pointer to an array of 16-bit collation key values.
|
||||
*/
|
||||
void adopt(uint8_t *values, int32_t capacity, int32_t count);
|
||||
* Replaces the current bytes buffer with a new one of newCapacity
|
||||
* and copies length bytes from the old buffer to the new one.
|
||||
* @return the new buffer, or NULL if the allocation failed
|
||||
*/
|
||||
uint8_t *reallocate(int32_t newCapacity, int32_t length);
|
||||
/**
|
||||
* Set a new length for a new sort key in the existing fBytes.
|
||||
*/
|
||||
void setLength(int32_t newLength);
|
||||
|
||||
/*
|
||||
* Creates a collation key with a string.
|
||||
*/
|
||||
uint8_t *getBytes() {
|
||||
return (fFlagAndLength >= 0) ? fUnion.fStackBuffer : fUnion.fFields.fBytes;
|
||||
}
|
||||
const uint8_t *getBytes() const {
|
||||
return (fFlagAndLength >= 0) ? fUnion.fStackBuffer : fUnion.fFields.fBytes;
|
||||
}
|
||||
int32_t getCapacity() const {
|
||||
return (fFlagAndLength >= 0) ? (int32_t)sizeof(fUnion) : fUnion.fFields.fCapacity;
|
||||
}
|
||||
int32_t getLength() const { return fFlagAndLength & 0x7fffffff; }
|
||||
|
||||
/**
|
||||
* If this CollationKey has capacity less than newSize,
|
||||
* its internal capacity will be increased to newSize.
|
||||
* @param newSize minimum size this CollationKey has to have
|
||||
* @return this CollationKey
|
||||
*/
|
||||
CollationKey& ensureCapacity(int32_t newSize);
|
||||
/**
|
||||
* Set the CollationKey to a "bogus" or invalid state
|
||||
* @return this CollationKey
|
||||
@ -275,33 +272,42 @@ private:
|
||||
* @return this CollationKey
|
||||
*/
|
||||
CollationKey& reset(void);
|
||||
|
||||
|
||||
/**
|
||||
* Allow private access to RuleBasedCollator
|
||||
*/
|
||||
friend class RuleBasedCollator;
|
||||
/**
|
||||
* Bogus status
|
||||
*/
|
||||
UBool fBogus;
|
||||
/**
|
||||
* Size of fBytes used to store the sortkey. i.e. up till the
|
||||
* null-termination.
|
||||
*/
|
||||
int32_t fCount;
|
||||
/**
|
||||
* Full size of the fBytes
|
||||
*/
|
||||
int32_t fCapacity;
|
||||
/**
|
||||
* Unique hash value of this CollationKey
|
||||
*/
|
||||
int32_t fHashCode;
|
||||
/**
|
||||
* Array to store the sortkey
|
||||
*/
|
||||
uint8_t* fBytes;
|
||||
friend class CollationKeyByteSink;
|
||||
|
||||
// Class fields. sizeof(CollationKey) is intended to be 48 bytes
|
||||
// on a machine with 64-bit pointers.
|
||||
// We use a union to maximize the size of the internal buffer,
|
||||
// similar to UnicodeString but not as tight and complex.
|
||||
|
||||
// (implicit) *vtable;
|
||||
/**
|
||||
* Sort key length and flag.
|
||||
* Bit 31 is set if the buffer is heap-allocated.
|
||||
* Bits 30..0 contain the sort key length.
|
||||
*/
|
||||
int32_t fFlagAndLength;
|
||||
/**
|
||||
* Unique hash value of this CollationKey.
|
||||
* Special value 2 if the key is bogus.
|
||||
*/
|
||||
mutable int32_t fHashCode;
|
||||
/**
|
||||
* fUnion provides 32 bytes for the internal buffer or for
|
||||
* pointer+capacity.
|
||||
*/
|
||||
union StackBufferOrFields {
|
||||
/** fStackBuffer is used iff fFlagAndLength>=0, else fFields is used */
|
||||
uint8_t fStackBuffer[32];
|
||||
struct {
|
||||
uint8_t *fBytes;
|
||||
int32_t fCapacity;
|
||||
} fFields;
|
||||
} fUnion;
|
||||
};
|
||||
|
||||
inline UBool
|
||||
@ -313,14 +319,14 @@ CollationKey::operator!=(const CollationKey& other) const
|
||||
inline UBool
|
||||
CollationKey::isBogus() const
|
||||
{
|
||||
return fBogus;
|
||||
return fHashCode == 2; // kBogusHashCode
|
||||
}
|
||||
|
||||
inline const uint8_t*
|
||||
CollationKey::getByteArray(int32_t &count) const
|
||||
{
|
||||
count = fCount;
|
||||
return fBytes;
|
||||
count = getLength();
|
||||
return getBytes();
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
@ -559,7 +559,7 @@ CollationAPITest::TestCollationKey(/* char* par */)
|
||||
// bogus key returned here
|
||||
key1Status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
col->getCollationKey(NULL, 0, sortk1, key1Status);
|
||||
doAssert(sortk1.getByteArray(length) == NULL && length == 0,
|
||||
doAssert(sortk1.isBogus() && (sortk1.getByteArray(length), length) == 0,
|
||||
"Error code should return bogus collation key");
|
||||
|
||||
key1Status = U_ZERO_ERROR;
|
||||
|
Loading…
Reference in New Issue
Block a user