scuffed-code/icu4c/source/i18n/sortkey.cpp

338 lines
8.2 KiB
C++
Raw Normal View History

1999-08-16 21:50:52 +00:00
/*
*******************************************************************************
* Copyright (C) 1996-2001, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
1999-08-16 21:50:52 +00:00
*/
//===============================================================================
//
// File sortkey.cpp
//
//
1999-08-16 21:50:52 +00:00
//
// Created by: Helena Shih
//
// Modification History:
//
// Date Name Description
//
// 6/20/97 helena Java class name change.
// 6/23/97 helena Added comments to make code more readable.
// 6/26/98 erm Canged to use byte arrays instead of UnicodeString
// 7/31/98 erm hashCode: minimum inc should be 2 not 1,
// Cleaned up operator=
// 07/12/99 helena HPUX 11 CC port.
// 03/06/01 synwee Modified compareTo, to handle the result of
// 2 string similar in contents, but one is longer
// than the other
1999-08-16 21:50:52 +00:00
//===============================================================================
#include "unicode/sortkey.h"
1999-08-16 21:50:52 +00:00
#include "cmemory.h"
#include "uhash.h"
U_NAMESPACE_BEGIN
1999-08-16 21:50:52 +00:00
// A hash code of kInvalidHashCode indicates that the has code needs
// to be computed. A hash code of kEmptyHashCode is used for empty keys
// and for any key whose computed hash code is kInvalidHashCode.
#define kInvalidHashCode ((int32_t)0)
#define kEmptyHashCode ((int32_t)1)
1999-08-16 21:50:52 +00:00
CollationKey::CollationKey()
: fBogus(FALSE), fCount(0), fCapacity(0),
1999-08-16 21:50:52 +00:00
fHashCode(kEmptyHashCode), fBytes(NULL)
{
}
// Create a collation key from a bit array.
CollationKey::CollationKey(const uint8_t* newValues, int32_t count)
: fBogus(FALSE), fCount(count), fCapacity(count),
1999-08-16 21:50:52 +00:00
fHashCode(kInvalidHashCode)
{
fBytes = (uint8_t *)uprv_malloc(count);
1999-08-16 21:50:52 +00:00
if (fBytes == NULL)
{
setToBogus();
return;
}
uprv_memcpy(fBytes, newValues, fCount);
1999-08-16 21:50:52 +00:00
}
CollationKey::CollationKey(const CollationKey& other)
: fBogus(FALSE), fCount(other.fCount), fCapacity(other.fCapacity),
fHashCode(other.fHashCode), fBytes(NULL)
1999-08-16 21:50:52 +00:00
{
if (other.fBogus)
{
setToBogus();
return;
}
fBytes = (uint8_t *)uprv_malloc(fCapacity);
1999-08-16 21:50:52 +00:00
if (fBytes == NULL)
{
setToBogus();
return;
}
uprv_memcpy(fBytes, other.fBytes, other.fCount);
if(fCapacity>fCount) {
uprv_memset(fBytes+fCount, 0, fCapacity-fCount);
}
1999-08-16 21:50:52 +00:00
}
CollationKey::~CollationKey()
{
uprv_free(fBytes);
1999-08-16 21:50:52 +00:00
}
void CollationKey::adopt(uint8_t *values, int32_t count) {
if(fBytes != NULL) {
uprv_free(fBytes);
}
fBogus = FALSE;
fBytes = values;
fCount = count;
fCapacity = count;
fHashCode = kInvalidHashCode;
}
1999-08-16 21:50:52 +00:00
// set the key to an empty state
CollationKey&
CollationKey::reset()
{
1999-08-16 21:50:52 +00:00
fCount = 0;
fBogus = FALSE;
fHashCode = kEmptyHashCode;
return *this;
}
// set the key to a "bogus" or invalid state
CollationKey&
CollationKey::setToBogus()
{
uprv_free(fBytes);
1999-08-16 21:50:52 +00:00
fBytes = NULL;
fCapacity = 0;
fCount = 0;
fHashCode = kInvalidHashCode;
return *this;
}
UBool
1999-08-16 21:50:52 +00:00
CollationKey::operator==(const CollationKey& source) const
{
return (this->fCount == source.fCount &&
(this->fBytes == source.fBytes ||
uprv_memcmp(this->fBytes, source.fBytes, this->fCount) == 0));
1999-08-16 21:50:52 +00:00
}
const CollationKey&
CollationKey::operator=(const CollationKey& other)
{
if (this != &other)
{
if (other.isBogus())
{
return setToBogus();
}
if (other.fBytes != NULL)
{
ensureCapacity(other.fCount);
if (isBogus())
{
return *this;
}
fHashCode = other.fHashCode;
uprv_memcpy(fBytes, other.fBytes, fCount);
1999-08-16 21:50:52 +00:00
}
else
{
fCount = 0;
fBogus = FALSE;
fHashCode = kEmptyHashCode;
1999-08-16 21:50:52 +00:00
}
}
return *this;
}
// Bitwise comparison for the collation keys.
// NOTE: this is somewhat messy 'cause we can't count
// on memcmp returning the exact values which match
// Collator::EComparisonResult
Collator::EComparisonResult
CollationKey::compareTo(const CollationKey& target) const
{
uint8_t *src = this->fBytes;
uint8_t *tgt = target.fBytes;
1999-08-16 21:50:52 +00:00
// are we comparing the same string
if (src == tgt)
return Collator::EQUAL;
/*
int count = (this->fCount < target.fCount) ? this->fCount : target.fCount;
if (count == 0)
{
// If count is 0, at least one of the keys is empty.
// An empty key is always LESS than a non-empty one
// and EQUAL to another empty
if (this->fCount < target.fCount)
{
return Collator::LESS;
}
if (this->fCount > target.fCount)
{
return Collator::GREATER;
}
return Collator::EQUAL;
}
*/
int minLength;
Collator::EComparisonResult result;
// are we comparing different lengths?
if (this->fCount != target.fCount) {
if (this->fCount < target.fCount) {
minLength = this->fCount;
result = Collator::LESS;
}
else {
minLength = target.fCount;
result = Collator::GREATER;
}
1999-08-16 21:50:52 +00:00
}
else {
minLength = target.fCount;
result = Collator::EQUAL;
}
if (minLength > 0) {
int diff = uprv_memcmp(src, tgt, minLength);
if (diff > 0) {
return Collator::GREATER;
}
else
if (diff < 0) {
return Collator::LESS;
}
}
return result;
/*
if (result < 0)
{
return Collator::LESS;
}
if (result > 0)
{
return Collator::GREATER;
}
return Collator::EQUAL;
*/
1999-08-16 21:50:52 +00:00
}
CollationKey&
CollationKey::ensureCapacity(int32_t newSize)
{
if (fCapacity < newSize)
{
uprv_free(fBytes);
1999-08-16 21:50:52 +00:00
fBytes = (uint8_t *)uprv_malloc(newSize);
1999-08-16 21:50:52 +00:00
if (fBytes == NULL)
{
return setToBogus();
}
uprv_memset(fBytes, 0, fCapacity);
1999-08-16 21:50:52 +00:00
fCapacity = newSize;
}
fBogus = FALSE;
fCount = newSize;
fHashCode = kInvalidHashCode;
return *this;
}
1999-08-16 21:50:52 +00:00
// Create a copy of the byte array.
uint8_t*
CollationKey::toByteArray(int32_t& count) const
{
uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount );
1999-08-16 21:50:52 +00:00
if (result == NULL)
{
count = 0;
}
else
{
count = fCount;
uprv_memcpy(result, fBytes, fCount);
1999-08-16 21:50:52 +00:00
}
return result;
1999-08-16 21:50:52 +00:00
}
int32_t
CollationKey::hashCode() const
{
// (Cribbed from UnicodeString)
// We cache the hashCode; when it becomes invalid, due to any change to the
// string, we note this by setting it to kInvalidHashCode. [LIU]
// Note: This method is semantically const, but physically non-const.
if (fHashCode == kInvalidHashCode)
{
UHashTok key;
key.pointer = fBytes;
((CollationKey *)this)->fHashCode = uhash_hashChars(key);
#if 0
1999-08-16 21:50:52 +00:00
// We compute the hash by iterating sparsely over 64 (at most) characters
// spaced evenly through the string. For each character, we multiply the
// previous hash value by a prime number and add the new character in,
// in the manner of a additive linear congruential random number generator,
// thus producing a pseudorandom deterministic value which should be well
// distributed over the output range. [LIU]
const uint8_t *p = fBytes, *limit = fBytes + fCount;
int32_t inc = (fCount >= 256) ? fCount/128 : 2; // inc = max(fSize/64, 1);
int32_t hash = 0;
while (p < limit)
{
hash = ( hash * 37 ) + ((p[0] << 8) + p[1]);
1999-08-16 21:50:52 +00:00
p += inc;
}
// If we happened to get kInvalidHashCode, replace it with kEmptyHashCode
if (hash == kInvalidHashCode)
{
hash = kEmptyHashCode;
}
((CollationKey *)this)->fHashCode = hash; // cast away const
#endif
1999-08-16 21:50:52 +00:00
}
return fHashCode;
}
U_NAMESPACE_END