363ee387f6
X-SVN-Rev: 362
574 lines
14 KiB
C
574 lines
14 KiB
C
/*
|
|
**********************************************************************
|
|
* Copyright (C) 1997-1999, International Business Machines
|
|
* Corporation and others. All Rights Reserved.
|
|
**********************************************************************
|
|
*
|
|
* File uhash.c
|
|
*
|
|
* Modification History:
|
|
*
|
|
* Date Name Description
|
|
* 03/12/99 stephen Creation.
|
|
*******************************************************************************
|
|
*/
|
|
|
|
#include "uhash.h"
|
|
#include "ustring.h"
|
|
#include "cstring.h"
|
|
#include "cmemory.h"
|
|
|
|
/* Private function prototypes */
|
|
void
|
|
uhash_initialize(UHashtable *hash,
|
|
int32_t primeIndex,
|
|
UErrorCode *status);
|
|
|
|
int32_t
|
|
uhash_leastGreaterPrimeIndex(int32_t source);
|
|
|
|
void
|
|
uhash_rehash(UHashtable *hash,
|
|
UErrorCode *status);
|
|
|
|
void
|
|
uhash_putInternal(UHashtable *hash,
|
|
int32_t hashCode,
|
|
void *value);
|
|
|
|
int32_t
|
|
uhash_find(const UHashtable *hash,
|
|
int32_t hashCode);
|
|
|
|
|
|
|
|
/*
|
|
INVARIANT: the size of the table MUST be prime for this algorithm to work!
|
|
Prime table can be tuned for different performance/storage characteristics
|
|
We avoid computing primes by precomputing a table that we use.
|
|
*/
|
|
int32_t UHASH_PRIMES [] =
|
|
{
|
|
17, 37, 67, 131, 257,
|
|
521, 1031, 2053, 4099, 8209, 16411, 32771, 65537,
|
|
131101, 262147, 524309, 1048583, 2097169, 4194319, 8388617, 16777259,
|
|
33554467, 67108879, 134217757, 268435459, 536870923, 1073741827, 2147483647
|
|
};
|
|
|
|
#define UHASH_PRIMES_LENGTH 28
|
|
|
|
#define UHASH_SLOT_DELETED ((int32_t) 0x80000000)
|
|
#define UHASH_SLOT_EMPTY ((int32_t) UHASH_SLOT_DELETED + 1)
|
|
#define UHASH_MAX_UNUSED ((int32_t) UHASH_SLOT_EMPTY)
|
|
|
|
/*
|
|
INVARIANTS:
|
|
DELETED <= MAX_UNUSED
|
|
EMPTY <= MAX_UNUSED
|
|
Any hash > MAX_UNUSED*
|
|
* hashcodes may not start out this way, but internally,
|
|
they are adjusted so that they are always positive, and this is always true.
|
|
Note here that we are assuming 32-bit ints.
|
|
*/
|
|
|
|
U_CAPI UHashtable*
|
|
uhash_open(UHashFunction func,
|
|
UErrorCode *status)
|
|
{
|
|
UHashtable* myUHT = uhash_openSize(func, 3, status);
|
|
if (U_SUCCESS(*status)) myUHT->isGrowable = TRUE;
|
|
|
|
return myUHT;
|
|
}
|
|
|
|
U_CAPI UHashtable*
|
|
uhash_openSize(UHashFunction func,
|
|
int32_t size,
|
|
UErrorCode *status)
|
|
{
|
|
UHashtable *result;
|
|
|
|
if(U_FAILURE(*status)) return NULL;
|
|
|
|
result = (UHashtable*) icu_malloc(sizeof(UHashtable));
|
|
if(result == 0) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
return 0;
|
|
}
|
|
|
|
result->highWaterFactor = 0.5F;
|
|
result->lowWaterFactor = 0.0F;
|
|
result->hashFunction = func;
|
|
result->valueDelete = NULL;
|
|
result->toBeDeleted = NULL;
|
|
result->toBeDeletedCount = 0;
|
|
result->isGrowable = FALSE;
|
|
|
|
uhash_initialize(result, uhash_leastGreaterPrimeIndex(size), status);
|
|
|
|
if(U_FAILURE(*status)) {
|
|
icu_free(result);
|
|
return 0;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
U_CAPI void
|
|
uhash_setValueDeleter(UHashtable *hash, ValueDeleter del )
|
|
{
|
|
hash->valueDelete = del;
|
|
}
|
|
|
|
U_CAPI void
|
|
uhash_close(UHashtable *hash)
|
|
{
|
|
if (hash->valueDelete)
|
|
{
|
|
ValueDeleter my_free = hash->valueDelete;
|
|
void** vals = hash->values;
|
|
void** toBeDeleted = hash->toBeDeleted;
|
|
int32_t i;
|
|
int32_t count = hash->count;
|
|
int32_t toBeDeletedCount = hash->toBeDeletedCount;
|
|
for (i = 0; i < count; i++) my_free(vals[i]);
|
|
while (toBeDeletedCount--) my_free(toBeDeleted[toBeDeletedCount]);
|
|
|
|
}
|
|
icu_free(hash->values);
|
|
icu_free(hash->hashes);
|
|
icu_free(hash->toBeDeleted);
|
|
}
|
|
|
|
U_CAPI int32_t
|
|
uhash_size(const UHashtable *hash)
|
|
{
|
|
return hash->count;
|
|
}
|
|
U_CAPI int32_t
|
|
uhash_putKey(UHashtable *hash,
|
|
int32_t valueKey,
|
|
void *value,
|
|
UErrorCode *status)
|
|
{
|
|
/* Put finds the position in the table for the new value. */
|
|
|
|
int32_t hashCode;
|
|
int32_t index;
|
|
|
|
if(U_FAILURE(*status)) return UHASH_INVALID;
|
|
|
|
if(hash->count > hash->highWaterMark) {
|
|
if (hash->isGrowable) uhash_rehash(hash, status);
|
|
else {
|
|
*status = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
return UHASH_INVALID;
|
|
}
|
|
}
|
|
|
|
hashCode = valueKey;
|
|
index = uhash_find(hash, hashCode);
|
|
|
|
/* deleted or empty */
|
|
if(hash->hashes[index] <= UHASH_MAX_UNUSED) {
|
|
/* make new object */
|
|
hash->hashes[index] = hashCode;
|
|
++(hash->count);
|
|
}
|
|
|
|
/* delete old value? */
|
|
if (hash->valueDelete)
|
|
{
|
|
void * result = hash->values[index];
|
|
if (result != value) /*Make sure the same object isn't scheduled for a double deletion*/
|
|
{
|
|
hash->toBeDeleted = (void**) icu_realloc(hash->toBeDeleted, sizeof(void*)*(++(hash->toBeDeletedCount)));
|
|
hash->toBeDeleted[(hash->toBeDeletedCount)-1] = result;
|
|
}
|
|
hash->values[index] = 0;
|
|
}
|
|
|
|
|
|
/* store value */
|
|
hash->values[index] = value;
|
|
|
|
/* return the hash code to the user */
|
|
return hashCode;
|
|
}
|
|
|
|
U_CAPI int32_t
|
|
uhash_put(UHashtable *hash,
|
|
void *value,
|
|
UErrorCode *status)
|
|
{
|
|
/* Put finds the position in the table for the new value. */
|
|
|
|
int32_t hashCode;
|
|
int32_t index;
|
|
|
|
if(U_FAILURE(*status)) return UHASH_INVALID;
|
|
|
|
if(hash->count > hash->highWaterMark) {
|
|
if (hash->isGrowable) uhash_rehash(hash, status);
|
|
else {
|
|
*status = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
return UHASH_INVALID;
|
|
}
|
|
}
|
|
|
|
hashCode = (hash->hashFunction)(value);
|
|
index = uhash_find(hash, hashCode);
|
|
|
|
/* deleted or empty */
|
|
if(hash->hashes[index] <= UHASH_MAX_UNUSED) {
|
|
/* make new object */
|
|
hash->hashes[index] = hashCode;
|
|
++(hash->count);
|
|
}
|
|
|
|
/* delete old value? */
|
|
if (hash->valueDelete)
|
|
{
|
|
void* result = hash->values[index];
|
|
if (result != value) /*Make sure the same object isn't scheduled for a double deletion*/
|
|
{
|
|
hash->toBeDeleted = (void**) icu_realloc(hash->toBeDeleted,
|
|
sizeof(void*)*(++(hash->toBeDeletedCount)));
|
|
hash->toBeDeleted[(hash->toBeDeletedCount)-1] = result;
|
|
}
|
|
hash->values[index] = 0;
|
|
}
|
|
|
|
|
|
/* store value */
|
|
hash->values[index] = value;
|
|
|
|
/* return the hash code to the user */
|
|
return hashCode;
|
|
}
|
|
|
|
U_CAPI void*
|
|
uhash_get(const UHashtable *hash,
|
|
int32_t key)
|
|
{
|
|
/* srl: Shouldn't we check to see if hash->hashes[uhash_find(hash, key)] == UHASH_SLOT_DELETED ?
|
|
Perhaps in theory hash->values[...] should have been set to 0, but can we depend
|
|
on this?
|
|
*/
|
|
|
|
void *result = hash->values[uhash_find(hash, key)];
|
|
return result;
|
|
}
|
|
|
|
U_CAPI void*
|
|
uhash_remove(UHashtable *hash,
|
|
int32_t key,
|
|
UErrorCode *status)
|
|
{
|
|
/*
|
|
First find the position of the key in the table
|
|
If the object has not be removed already, remove it.
|
|
We have to put a special value in that position that means that
|
|
something has been deleted, since when we do a find,
|
|
we have to continue PAST any deleted values
|
|
*/
|
|
int32_t index = uhash_find(hash, key);
|
|
void *result = 0;
|
|
|
|
/* neither deleted nor empty */
|
|
if(hash->hashes[index] > UHASH_MAX_UNUSED) {
|
|
/* set to deleted */
|
|
hash->hashes[index] = UHASH_SLOT_DELETED;
|
|
result = hash->values[index];
|
|
|
|
/* delete old value? */
|
|
if (hash->valueDelete)
|
|
{
|
|
hash->valueDelete(result);
|
|
}
|
|
hash->values[index] = 0; /* srl .. always null out the value even if there's no deletor!! */
|
|
|
|
--(hash->count);
|
|
|
|
if(hash->count < hash->lowWaterMark) {
|
|
uhash_rehash(hash, status);
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
U_CAPI void*
|
|
uhash_nextElement(const UHashtable *hash,
|
|
int32_t *pos)
|
|
{
|
|
/*
|
|
Walk through the array until you find an element that is not EMPTY and
|
|
not DELETED
|
|
*/
|
|
|
|
int32_t i;
|
|
void *value;
|
|
|
|
for(i = *pos + 1; i < hash->length; ++i) {
|
|
if(hash->hashes[i] > UHASH_MAX_UNUSED) {
|
|
*pos = i;
|
|
value = hash->values[i];
|
|
return value;
|
|
}
|
|
}
|
|
|
|
/* No more elements */
|
|
return 0;
|
|
}
|
|
|
|
/* ================================================== */
|
|
/* Private functions */
|
|
/* ================================================== */
|
|
void
|
|
uhash_initialize(UHashtable *hash,
|
|
int32_t primeIndex,
|
|
UErrorCode *status)
|
|
{
|
|
int32_t i;
|
|
|
|
if(U_FAILURE(*status)) return;
|
|
|
|
if(primeIndex < 0) {
|
|
primeIndex = 0;
|
|
}
|
|
else if(primeIndex >= UHASH_PRIMES_LENGTH) {
|
|
primeIndex = UHASH_PRIMES_LENGTH - 1;
|
|
}
|
|
|
|
hash->primeIndex = primeIndex;
|
|
hash->length = UHASH_PRIMES[primeIndex];
|
|
|
|
hash->values = (void**) icu_malloc(sizeof(void*) * hash->length);
|
|
if(hash->values == 0) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
return;
|
|
}
|
|
|
|
hash->hashes = (int32_t*) icu_malloc(sizeof(int32_t) * hash->length);
|
|
if(hash->values == 0) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
icu_free(hash->values);
|
|
return;
|
|
}
|
|
|
|
for(i = 0; i < hash->length; ++i) {
|
|
hash->hashes[i] = UHASH_SLOT_EMPTY;
|
|
hash->values[i] = 0;
|
|
}
|
|
|
|
hash->count = 0;
|
|
hash->lowWaterMark = (int32_t)(hash->length * hash->lowWaterFactor);
|
|
hash->highWaterMark = (int32_t)(hash->length * hash->highWaterFactor);
|
|
}
|
|
|
|
int32_t
|
|
uhash_leastGreaterPrimeIndex(int32_t source)
|
|
{
|
|
int32_t i;
|
|
for(i = 0; i < UHASH_PRIMES_LENGTH; ++i) {
|
|
if(source < UHASH_PRIMES[i]) {
|
|
break;
|
|
}
|
|
}
|
|
return (i == 0 ? 0 : (i - 1));
|
|
}
|
|
|
|
void
|
|
uhash_rehash(UHashtable *hash,
|
|
UErrorCode *status)
|
|
{
|
|
/*
|
|
Rebuild the table from the start. This clears out deadwood, in case
|
|
we have a lot of deleted values. See Find
|
|
It is also used when the table grows or shrinks a lot.
|
|
INVARIANT: The size of the table MUST be prime for this algorithm to work!
|
|
*/
|
|
|
|
void **oldValues = hash->values;
|
|
int32_t *oldHashList = hash->hashes;
|
|
int32_t old_length = hash->length;
|
|
int32_t newPrimeIndex = hash->primeIndex;
|
|
int32_t i;
|
|
|
|
if(U_FAILURE(*status)) return;
|
|
|
|
if(hash->count > hash->highWaterMark) {
|
|
++newPrimeIndex;
|
|
}
|
|
else if(hash->count < hash->lowWaterMark) {
|
|
newPrimeIndex -= 2;
|
|
}
|
|
|
|
uhash_initialize(hash, newPrimeIndex, status);
|
|
for(i = old_length - 1; i >= 0; --i) {
|
|
void *value = oldValues[i];
|
|
if(value != 0) {
|
|
uhash_putInternal(hash, oldHashList[i], value);
|
|
}
|
|
}
|
|
|
|
icu_free(oldValues);
|
|
icu_free(oldHashList);
|
|
}
|
|
|
|
void
|
|
uhash_putInternal(UHashtable *hash,
|
|
int32_t hashCode,
|
|
void *value)
|
|
{
|
|
int32_t index = uhash_find(hash, hashCode);
|
|
if(hash->hashes[index] <= UHASH_MAX_UNUSED) {
|
|
/* deleted or empty */
|
|
hash->hashes[index] = hashCode;
|
|
++(hash->count);
|
|
}
|
|
|
|
/* reset value */
|
|
hash->values[index] = value;
|
|
}
|
|
|
|
int32_t
|
|
uhash_find(const UHashtable *hash,
|
|
int32_t hashCode)
|
|
{
|
|
/*
|
|
This is the key routine. It looks for a particular key in the following
|
|
way. First find the start position, which is basically the key modulo the
|
|
length. Test it to see if it is
|
|
a. Identical (same hash values)
|
|
b. Deleted
|
|
c. Empty
|
|
Stop if it is identical or empty, otherwise continue by adding a "jump"
|
|
value (moduloing by the length again to keep it within range) and
|
|
retesting. For efficiency, it needs enough empty values so that the
|
|
searches stop within a reasonable amount of time. This can be changed by
|
|
changing the high/low water marks.
|
|
INVARIANT: the size of the table MUST be prime for this algorithm to work!
|
|
*/
|
|
|
|
int32_t firstDeleted = -1;
|
|
int32_t index = (hashCode ^ 0x4000000) % hash->length;
|
|
int32_t jump = 0;
|
|
|
|
while(TRUE) {
|
|
int32_t tableHash = hash->hashes[index];
|
|
|
|
/* Compare hash codes */
|
|
if(tableHash == hashCode) {
|
|
return index;
|
|
}
|
|
|
|
/* neither correct nor unused */
|
|
else if(tableHash > UHASH_MAX_UNUSED) {
|
|
/* ignore */
|
|
}
|
|
|
|
/* empty, end o' the line */
|
|
else if(tableHash == UHASH_SLOT_EMPTY) {
|
|
if(firstDeleted >= 0) {
|
|
/* reset if had deleted slot */
|
|
index = firstDeleted;
|
|
}
|
|
return index;
|
|
}
|
|
|
|
/* remember first deleted */
|
|
else if(firstDeleted < 0) {
|
|
firstDeleted = index;
|
|
}
|
|
|
|
/* lazy compute jump */
|
|
if(jump == 0) {
|
|
jump = (hashCode % (hash->length - 1)) + 1;
|
|
}
|
|
|
|
index = (index + jump) % hash->length;
|
|
}
|
|
|
|
/* This never happens -- just make the compiler happy */
|
|
return -1;
|
|
}
|
|
|
|
/* Predefined hash functions */
|
|
|
|
U_CAPI int32_t
|
|
uhash_hashUString(const void *parm)
|
|
{
|
|
if(parm != NULL) {
|
|
const UChar *key = (const UChar*) parm;
|
|
int32_t len = u_strlen(key);
|
|
int32_t hash = UHASH_INVALID;
|
|
const UChar *limit = key + len;
|
|
int32_t inc = (len >= 128 ? len/64 : 1);
|
|
|
|
/*
|
|
We compute the hash by iterating sparsely over 64 (at most) characters
|
|
spaced evenly through the string. For each character, we multiply the
|
|
previous hash value by a prime number and add the new character in,
|
|
in the manner of a additive linear congruential random number generator,
|
|
thus producing a pseudorandom deterministic value which should be well
|
|
distributed over the output range. [LIU]
|
|
*/
|
|
|
|
while(key < limit) {
|
|
hash = (hash * 37) + *key;
|
|
key += inc;
|
|
}
|
|
|
|
if(hash == UHASH_INVALID) {
|
|
hash = UHASH_EMPTY;
|
|
}
|
|
|
|
return hash & 0x7FFFFFFF;
|
|
} else {
|
|
return UHASH_INVALID;
|
|
}
|
|
}
|
|
|
|
U_CAPI int32_t
|
|
uhash_hashString(const void *parm)
|
|
{
|
|
if(parm != NULL) {
|
|
const char *key = (const char*) parm;
|
|
int32_t len = icu_strlen(key);
|
|
int32_t hash = UHASH_INVALID;
|
|
const char *limit = key + len;
|
|
int32_t inc = (len >= 128 ? len/64 : 1);
|
|
|
|
/*
|
|
We compute the hash by iterating sparsely over 64 (at most) characters
|
|
spaced evenly through the string. For each character, we multiply the
|
|
previous hash value by a prime number and add the new character in,
|
|
in the manner of a additive linear congruential random number generator,
|
|
thus producing a pseudorandom deterministic value which should be well
|
|
distributed over the output range. [LIU]
|
|
*/
|
|
|
|
while(key < limit) {
|
|
hash = (hash * 37) + *key;
|
|
key += inc;
|
|
}
|
|
|
|
if(hash == UHASH_INVALID) {
|
|
hash = UHASH_EMPTY;
|
|
}
|
|
|
|
return hash & 0x7FFFFFFF;
|
|
} else {
|
|
return UHASH_INVALID;
|
|
}
|
|
}
|
|
|
|
U_CAPI int32_t
|
|
uhash_hashLong(const void *parm)
|
|
{
|
|
int32_t hash = (int32_t) parm;
|
|
return (int32_t) (hash & 0x7FFFFFFF);
|
|
}
|