1999-08-16 21:50:52 +00:00
|
|
|
/*
|
1999-11-22 20:25:35 +00:00
|
|
|
*******************************************************************************
|
|
|
|
* Copyright (C) 1996-1999, International Business Machines Corporation and *
|
|
|
|
* others. All Rights Reserved. *
|
|
|
|
*******************************************************************************
|
1999-08-16 21:50:52 +00:00
|
|
|
*/
|
|
|
|
//===============================================================================
|
|
|
|
//
|
|
|
|
// File sortkey.cpp
|
|
|
|
//
|
|
|
|
//
|
|
|
|
//
|
|
|
|
// Created by: Helena Shih
|
|
|
|
//
|
|
|
|
// Modification History:
|
|
|
|
//
|
|
|
|
// Date Name Description
|
|
|
|
//
|
|
|
|
// 6/20/97 helena Java class name change.
|
|
|
|
// 6/23/97 helena Added comments to make code more readable.
|
|
|
|
// 6/26/98 erm Canged to use byte arrays instead of UnicodeString
|
|
|
|
// 7/31/98 erm hashCode: minimum inc should be 2 not 1,
|
|
|
|
// Cleaned up operator=
|
|
|
|
// 07/12/99 helena HPUX 11 CC port.
|
|
|
|
//===============================================================================
|
|
|
|
|
|
|
|
#ifndef _SORTKEY
|
1999-12-28 23:57:50 +00:00
|
|
|
#include "unicode/sortkey.h"
|
1999-08-16 21:50:52 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef _CMEMORY
|
|
|
|
#include "cmemory.h"
|
|
|
|
#endif
|
|
|
|
|
2000-12-12 01:15:30 +00:00
|
|
|
#include "uhash.h"
|
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
// A hash code of kInvalidHashCode indicates that the has code needs
|
|
|
|
// to be computed. A hash code of kEmptyHashCode is used for empty keys
|
|
|
|
// and for any key whose computed hash code is kInvalidHashCode.
|
|
|
|
const int32_t CollationKey::kInvalidHashCode = 0;
|
|
|
|
const int32_t CollationKey::kEmptyHashCode = 1;
|
|
|
|
|
|
|
|
CollationKey::CollationKey()
|
2000-08-14 23:23:20 +00:00
|
|
|
: fBogus(FALSE), fCount(0), fCapacity(0),
|
1999-08-16 21:50:52 +00:00
|
|
|
fHashCode(kEmptyHashCode), fBytes(NULL)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2000-12-12 01:15:30 +00:00
|
|
|
// Adopt bytes allocated with malloc
|
|
|
|
CollationKey::CollationKey(int32_t count, uint8_t *values)
|
2001-02-12 20:52:49 +00:00
|
|
|
: fBogus(FALSE), fCount(count), fCapacity(count),
|
|
|
|
fHashCode(kInvalidHashCode), fBytes(values)
|
2000-12-12 01:15:30 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
// Create a collation key from a bit array.
|
|
|
|
CollationKey::CollationKey(const uint8_t* newValues, int32_t count)
|
2000-08-14 23:23:20 +00:00
|
|
|
: fBogus(FALSE), fCount(count), fCapacity(count),
|
1999-08-16 21:50:52 +00:00
|
|
|
fHashCode(kInvalidHashCode)
|
|
|
|
{
|
2000-12-12 01:15:30 +00:00
|
|
|
fBytes = (uint8_t *)uprv_malloc(count);
|
1999-08-16 21:50:52 +00:00
|
|
|
|
|
|
|
if (fBytes == NULL)
|
|
|
|
{
|
|
|
|
setToBogus();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
1999-12-28 23:57:50 +00:00
|
|
|
uprv_memcpy(fBytes, newValues, fCount);
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
CollationKey::CollationKey(const UnicodeString& value)
|
|
|
|
{
|
|
|
|
copyUnicodeString(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
CollationKey::CollationKey(const CollationKey& other)
|
2000-08-23 23:48:04 +00:00
|
|
|
: fBogus(FALSE), fCount(other.fCount), fCapacity(other.fCapacity),
|
|
|
|
fHashCode(other.fHashCode), fBytes(NULL)
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
|
|
|
if (other.fBogus)
|
|
|
|
{
|
|
|
|
setToBogus();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2000-12-12 01:15:30 +00:00
|
|
|
fBytes = (uint8_t *)uprv_malloc(fCapacity);
|
1999-08-16 21:50:52 +00:00
|
|
|
|
|
|
|
if (fBytes == NULL)
|
|
|
|
{
|
|
|
|
setToBogus();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
1999-12-28 23:57:50 +00:00
|
|
|
uprv_memcpy(fBytes, other.fBytes, other.fCount);
|
1999-10-22 00:41:21 +00:00
|
|
|
if(fCapacity>fCount) {
|
1999-12-28 23:57:50 +00:00
|
|
|
uprv_memset(fBytes+fCount, 0, fCapacity-fCount);
|
1999-10-22 00:41:21 +00:00
|
|
|
}
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
CollationKey::~CollationKey()
|
|
|
|
{
|
2000-12-12 01:15:30 +00:00
|
|
|
uprv_free(fBytes);
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
2000-12-12 01:15:30 +00:00
|
|
|
void CollationKey::adopt(uint8_t *values, int32_t count) {
|
|
|
|
if(fBytes != NULL) {
|
|
|
|
uprv_free(fBytes);
|
|
|
|
}
|
|
|
|
fBogus = FALSE;
|
|
|
|
fBytes = values;
|
|
|
|
fCount = count;
|
|
|
|
fCapacity = count;
|
|
|
|
fHashCode = kInvalidHashCode;
|
|
|
|
}
|
1999-08-16 21:50:52 +00:00
|
|
|
// set the key to an empty state
|
|
|
|
CollationKey&
|
|
|
|
CollationKey::reset()
|
|
|
|
{
|
|
|
|
fCount = 0;
|
|
|
|
fBogus = FALSE;
|
|
|
|
fHashCode = kEmptyHashCode;
|
|
|
|
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
// set the key to a "bogus" or invalid state
|
|
|
|
CollationKey&
|
|
|
|
CollationKey::setToBogus()
|
|
|
|
{
|
|
|
|
delete[] fBytes;
|
|
|
|
fBytes = NULL;
|
|
|
|
|
|
|
|
fCapacity = 0;
|
|
|
|
fCount = 0;
|
|
|
|
fHashCode = kInvalidHashCode;
|
|
|
|
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
2000-05-18 22:08:39 +00:00
|
|
|
UBool
|
1999-08-16 21:50:52 +00:00
|
|
|
CollationKey::operator==(const CollationKey& source) const
|
|
|
|
{
|
|
|
|
return (this->fCount == source.fCount &&
|
|
|
|
(this->fBytes == source.fBytes ||
|
1999-12-28 23:57:50 +00:00
|
|
|
uprv_memcmp(this->fBytes, source.fBytes, this->fCount) == 0));
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
const CollationKey&
|
|
|
|
CollationKey::operator=(const CollationKey& other)
|
|
|
|
{
|
|
|
|
if (this != &other)
|
|
|
|
{
|
|
|
|
if (other.isBogus())
|
|
|
|
{
|
|
|
|
return setToBogus();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (other.fBytes != NULL)
|
|
|
|
{
|
|
|
|
ensureCapacity(other.fCount);
|
|
|
|
|
|
|
|
if (isBogus())
|
|
|
|
{
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
fHashCode = other.fHashCode;
|
1999-12-28 23:57:50 +00:00
|
|
|
uprv_memcpy(fBytes, other.fBytes, fCount);
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
reset();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Bitwise comparison for the collation keys.
|
|
|
|
// NOTE: this is somewhat messy 'cause we can't count
|
|
|
|
// on memcmp returning the exact values which match
|
|
|
|
// Collator::EComparisonResult
|
|
|
|
Collator::EComparisonResult
|
|
|
|
CollationKey::compareTo(const CollationKey& target) const
|
|
|
|
{
|
|
|
|
int count = (this->fCount < target.fCount) ? this->fCount : target.fCount;
|
|
|
|
|
|
|
|
if (count == 0)
|
|
|
|
{
|
|
|
|
// If count is 0, at least one of the keys is empty.
|
|
|
|
// An empty key is always LESS than a non-empty one
|
|
|
|
// and EQUAL to another empty
|
|
|
|
if (this->fCount < target.fCount)
|
|
|
|
{
|
|
|
|
return Collator::LESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (this->fCount > target.fCount)
|
|
|
|
{
|
|
|
|
return Collator::GREATER;
|
|
|
|
}
|
|
|
|
|
|
|
|
return Collator::EQUAL;
|
|
|
|
}
|
|
|
|
|
1999-12-28 23:57:50 +00:00
|
|
|
int result = uprv_memcmp(this->fBytes, target.fBytes, count);
|
1999-08-16 21:50:52 +00:00
|
|
|
|
|
|
|
if (result < 0)
|
|
|
|
{
|
|
|
|
return Collator::LESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (result > 0)
|
|
|
|
{
|
|
|
|
return Collator::GREATER;
|
|
|
|
}
|
|
|
|
|
|
|
|
return Collator::EQUAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
CollationKey&
|
|
|
|
CollationKey::ensureCapacity(int32_t newSize)
|
|
|
|
{
|
|
|
|
if (fCapacity < newSize)
|
|
|
|
{
|
2000-12-12 01:15:30 +00:00
|
|
|
uprv_free(fBytes);
|
1999-08-16 21:50:52 +00:00
|
|
|
|
2000-12-12 01:15:30 +00:00
|
|
|
fBytes = (uint8_t *)uprv_malloc(newSize);
|
1999-08-16 21:50:52 +00:00
|
|
|
|
|
|
|
if (fBytes == NULL)
|
|
|
|
{
|
|
|
|
return setToBogus();
|
|
|
|
}
|
|
|
|
|
1999-12-28 23:57:50 +00:00
|
|
|
uprv_memset(fBytes, 0, fCapacity);
|
1999-08-16 21:50:52 +00:00
|
|
|
fCapacity = newSize;
|
|
|
|
}
|
|
|
|
|
|
|
|
fBogus = FALSE;
|
|
|
|
fCount = newSize;
|
|
|
|
fHashCode = kInvalidHashCode;
|
|
|
|
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
int32_t
|
|
|
|
CollationKey::storeUnicodeString(int32_t cursor, const UnicodeString &value)
|
|
|
|
{
|
|
|
|
UTextOffset input = 0;
|
1999-12-08 02:11:04 +00:00
|
|
|
int32_t charCount = value.length();
|
1999-08-16 21:50:52 +00:00
|
|
|
|
|
|
|
while (input < charCount)
|
|
|
|
{
|
|
|
|
cursor = storeBytes(cursor, value[input++]);
|
|
|
|
}
|
|
|
|
|
|
|
|
return storeBytes(cursor, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
CollationKey&
|
|
|
|
CollationKey::copyUnicodeString(const UnicodeString &value)
|
|
|
|
{
|
1999-12-08 02:11:04 +00:00
|
|
|
int32_t charCount = value.length();
|
1999-08-16 21:50:52 +00:00
|
|
|
|
|
|
|
// We allocate enough space for two null bytes at the end.
|
|
|
|
ensureCapacity((charCount * 2) + 2);
|
|
|
|
|
|
|
|
if (isBogus())
|
|
|
|
{
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
storeUnicodeString(0, value);
|
|
|
|
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
CollationKey::reverseBytes(UTextOffset from, UTextOffset to)
|
|
|
|
{
|
|
|
|
uint8_t *left = &fBytes[from];
|
|
|
|
uint8_t *right = &fBytes[to - 2];
|
|
|
|
|
|
|
|
while (left < right)
|
|
|
|
{
|
|
|
|
uint8_t swap[2];
|
|
|
|
|
|
|
|
swap[0] = right[0];
|
|
|
|
swap[1] = right[1];
|
|
|
|
|
|
|
|
right[0] = left[0];
|
|
|
|
right[1] = left[1];
|
|
|
|
|
|
|
|
left[0] = swap[0];
|
|
|
|
left[1] = swap[1];
|
|
|
|
|
|
|
|
left += 2;
|
|
|
|
right -= 2;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create a copy of the byte array.
|
|
|
|
uint8_t*
|
|
|
|
CollationKey::toByteArray(int32_t& count) const
|
|
|
|
{
|
|
|
|
uint8_t *result = new uint8_t[fCount];
|
|
|
|
|
|
|
|
if (result == NULL)
|
|
|
|
{
|
|
|
|
count = 0;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
count = fCount;
|
1999-12-28 23:57:50 +00:00
|
|
|
uprv_memcpy(result, fBytes, fCount);
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint16_t*
|
|
|
|
CollationKey::copyValues(int32_t &size) const
|
|
|
|
{
|
|
|
|
uint16_t *result;
|
|
|
|
uint8_t *input = fBytes;
|
|
|
|
UTextOffset output = 0;
|
|
|
|
|
|
|
|
size = fCount / 2;
|
|
|
|
result = new uint16_t[size];
|
|
|
|
|
|
|
|
if (result == NULL)
|
|
|
|
{
|
|
|
|
size = 0;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
while (output < size)
|
|
|
|
{
|
2000-08-14 23:44:25 +00:00
|
|
|
result[output] = (uint16_t)((input[0] << 8) | input[1]);
|
1999-08-16 21:50:52 +00:00
|
|
|
output += 1;
|
|
|
|
input += 2;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
int32_t
|
|
|
|
CollationKey::hashCode() const
|
|
|
|
{
|
|
|
|
// (Cribbed from UnicodeString)
|
|
|
|
// We cache the hashCode; when it becomes invalid, due to any change to the
|
|
|
|
// string, we note this by setting it to kInvalidHashCode. [LIU]
|
|
|
|
|
|
|
|
// Note: This method is semantically const, but physically non-const.
|
|
|
|
|
|
|
|
if (fHashCode == kInvalidHashCode)
|
|
|
|
{
|
2000-12-12 01:15:30 +00:00
|
|
|
((CollationKey *)this)->fHashCode = uhash_hashChars(fBytes);
|
|
|
|
#if 0
|
1999-08-16 21:50:52 +00:00
|
|
|
// We compute the hash by iterating sparsely over 64 (at most) characters
|
|
|
|
// spaced evenly through the string. For each character, we multiply the
|
|
|
|
// previous hash value by a prime number and add the new character in,
|
|
|
|
// in the manner of a additive linear congruential random number generator,
|
|
|
|
// thus producing a pseudorandom deterministic value which should be well
|
|
|
|
// distributed over the output range. [LIU]
|
|
|
|
const uint8_t *p = fBytes, *limit = fBytes + fCount;
|
|
|
|
int32_t inc = (fCount >= 256) ? fCount/128 : 2; // inc = max(fSize/64, 1);
|
|
|
|
int32_t hash = 0;
|
|
|
|
|
|
|
|
while (p < limit)
|
|
|
|
{
|
|
|
|
hash = ( hash * 37 ) + ((p[0] << 8) + p[1]);
|
|
|
|
p += inc;
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we happened to get kInvalidHashCode, replace it with kEmptyHashCode
|
|
|
|
if (hash == kInvalidHashCode)
|
|
|
|
{
|
|
|
|
hash = kEmptyHashCode;
|
|
|
|
}
|
|
|
|
|
|
|
|
((CollationKey *)this)->fHashCode = hash; // cast away const
|
2000-12-12 01:15:30 +00:00
|
|
|
#endif
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return fHashCode;
|
|
|
|
}
|