/* ******************************************************************************* * * * COPYRIGHT: * * (C) Copyright Taligent, Inc., 1997 * * (C) Copyright International Business Machines Corporation, 1997-1999 * * Licensed Material - Program-Property of IBM - All Rights Reserved. * * US Government Users Restricted Rights - Use, duplication, or disclosure * * restricted by GSA ADP Schedule Contract with IBM Corp. * * * ******************************************************************************* * * File uhash.c * * Modification History: * * Date Name Description * 03/12/99 stephen Creation. ******************************************************************************* */ #include "uhash.h" #include "ustring.h" #include "cmemory.h" /* Private function prototypes */ void uhash_initialize(UHashtable *hash, int32_t primeIndex, UErrorCode *status); int32_t uhash_leastGreaterPrimeIndex(int32_t source); void uhash_rehash(UHashtable *hash, UErrorCode *status); void uhash_putInternal(UHashtable *hash, int32_t hashCode, void *value); int32_t uhash_find(const UHashtable *hash, int32_t hashCode); /* INVARIANT: the size of the table MUST be prime for this algorithm to work! Prime table can be tuned for different performance/storage characteristics We avoid computing primes by precomputing a table that we use. */ int32_t UHASH_PRIMES [] = { 17, 37, 67, 131, 257, 521, 1031, 2053, 4099, 8209, 16411, 32771, 65537, 131101, 262147, 524309, 1048583, 2097169, 4194319, 8388617, 16777259, 33554467, 67108879, 134217757, 268435459, 536870923, 1073741827, 2147483647 }; #define UHASH_PRIMES_LENGTH 28 #define UHASH_SLOT_DELETED ((int32_t) 0x80000000) #define UHASH_SLOT_EMPTY ((int32_t) UHASH_SLOT_DELETED + 1) #define UHASH_MAX_UNUSED ((int32_t) UHASH_SLOT_EMPTY) /* INVARIANTS: DELETED <= MAX_UNUSED EMPTY <= MAX_UNUSED Any hash > MAX_UNUSED* * hashcodes may not start out this way, but internally, they are adjusted so that they are always positive, and this is always true. Note here that we are assuming 32-bit ints. */ CAPI UHashtable* uhash_open(UHashFunction func, UErrorCode *status) { UHashtable* myUHT = uhash_openSize(func, 3, status); if (SUCCESS(*status)) myUHT->isGrowable = TRUE; return myUHT; } CAPI UHashtable* uhash_openSize(UHashFunction func, int32_t size, UErrorCode *status) { UHashtable *result; if(FAILURE(*status)) return NULL; result = (UHashtable*) icu_malloc(sizeof(UHashtable)); if(result == 0) { *status = U_MEMORY_ALLOCATION_ERROR; return 0; } result->highWaterFactor = 0.5F; result->lowWaterFactor = 0.0F; result->hashFunction = func; result->valueDelete = NULL; result->toBeDeleted = NULL; result->toBeDeletedCount = 0; result->isGrowable = FALSE; uhash_initialize(result, uhash_leastGreaterPrimeIndex(size), status); if(FAILURE(*status)) { icu_free(result); return 0; } return result; } CAPI void uhash_setValueDeleter(UHashtable *hash, ValueDeleter del ) { hash->valueDelete = del; } CAPI void uhash_close(UHashtable *hash) { if (hash->valueDelete) { ValueDeleter my_free = hash->valueDelete; void** vals = hash->values; void** toBeDeleted = hash->toBeDeleted; int32_t i; int32_t count = hash->count; int32_t toBeDeletedCount = hash->toBeDeletedCount; for (i = 0; i < count; i++) my_free(vals[i]); while (toBeDeletedCount--) my_free(toBeDeleted[toBeDeletedCount]); } icu_free(hash->values); icu_free(hash->hashes); icu_free(hash->toBeDeleted); } CAPI int32_t uhash_size(const UHashtable *hash) { return hash->count; } CAPI int32_t uhash_putKey(UHashtable *hash, int32_t valueKey, void *value, UErrorCode *status) { /* Put finds the position in the table for the new value. */ int32_t hashCode; int32_t index; if(FAILURE(*status)) return UHASH_INVALID; if(hash->count > hash->highWaterMark) { if (hash->isGrowable) uhash_rehash(hash, status); else { *status = U_INDEX_OUTOFBOUNDS_ERROR; return UHASH_INVALID; } } hashCode = valueKey; index = uhash_find(hash, hashCode); /* deleted or empty */ if(hash->hashes[index] <= UHASH_MAX_UNUSED) { /* make new object */ hash->hashes[index] = hashCode; ++(hash->count); } /* delete old value? */ if (hash->valueDelete) { void * result = hash->values[index]; if (result != value) /*Make sure the same object isn't scheduled for a double deletion*/ { hash->toBeDeleted = (void**) icu_realloc(hash->toBeDeleted, sizeof(void*)*(++(hash->toBeDeletedCount))); hash->toBeDeleted[(hash->toBeDeletedCount)-1] = result; } hash->values[index] = 0; } /* store value */ hash->values[index] = value; /* return the hash code to the user */ return hashCode; } CAPI int32_t uhash_put(UHashtable *hash, void *value, UErrorCode *status) { /* Put finds the position in the table for the new value. */ int32_t hashCode; int32_t index; if(FAILURE(*status)) return UHASH_INVALID; if(hash->count > hash->highWaterMark) { if (hash->isGrowable) uhash_rehash(hash, status); else { *status = U_INDEX_OUTOFBOUNDS_ERROR; return UHASH_INVALID; } } hashCode = (hash->hashFunction)(value); index = uhash_find(hash, hashCode); /* deleted or empty */ if(hash->hashes[index] <= UHASH_MAX_UNUSED) { /* make new object */ hash->hashes[index] = hashCode; ++(hash->count); } /* delete old value? */ if (hash->valueDelete) { void* result = hash->values[index]; if (result != value) /*Make sure the same object isn't scheduled for a double deletion*/ { hash->toBeDeleted = (void**) icu_realloc(hash->toBeDeleted, sizeof(void*)*(++(hash->toBeDeletedCount))); hash->toBeDeleted[(hash->toBeDeletedCount)-1] = result; } hash->values[index] = 0; } /* store value */ hash->values[index] = value; /* return the hash code to the user */ return hashCode; } CAPI void* uhash_get(const UHashtable *hash, int32_t key) { /* srl: Shouldn't we check to see if hash->hashes[uhash_find(hash, key)] == UHASH_SLOT_DELETED ? Perhaps in theory hash->values[...] should have been set to 0, but can we depend on this? */ void *result = hash->values[uhash_find(hash, key)]; return result; } CAPI void* uhash_remove(UHashtable *hash, int32_t key, UErrorCode *status) { /* First find the position of the key in the table If the object has not be removed already, remove it. We have to put a special value in that position that means that something has been deleted, since when we do a find, we have to continue PAST any deleted values */ int32_t index = uhash_find(hash, key); void *result = 0; /* neither deleted nor empty */ if(hash->hashes[index] > UHASH_MAX_UNUSED) { /* set to deleted */ hash->hashes[index] = UHASH_SLOT_DELETED; result = hash->values[index]; /* delete old value? */ if (hash->valueDelete) { hash->valueDelete(result); } hash->values[index] = 0; /* srl .. always null out the value even if there's no deletor!! */ --(hash->count); if(hash->count < hash->lowWaterMark) { uhash_rehash(hash, status); } } return result; } CAPI void* uhash_nextElement(const UHashtable *hash, int32_t *pos) { /* Walk through the array until you find an element that is not EMPTY and not DELETED */ int32_t i; void *value; for(i = *pos + 1; i < hash->length; ++i) { if(hash->hashes[i] > UHASH_MAX_UNUSED) { *pos = i; value = hash->values[i]; return value; } } /* No more elements */ return 0; } /* ================================================== */ /* Private functions */ /* ================================================== */ void uhash_initialize(UHashtable *hash, int32_t primeIndex, UErrorCode *status) { int32_t i; if(FAILURE(*status)) return; if(primeIndex < 0) { primeIndex = 0; } else if(primeIndex >= UHASH_PRIMES_LENGTH) { primeIndex = UHASH_PRIMES_LENGTH - 1; } hash->primeIndex = primeIndex; hash->length = UHASH_PRIMES[primeIndex]; hash->values = (void**) icu_malloc(sizeof(void*) * hash->length); if(hash->values == 0) { *status = U_MEMORY_ALLOCATION_ERROR; return; } hash->hashes = (int32_t*) icu_malloc(sizeof(int32_t) * hash->length); if(hash->values == 0) { *status = U_MEMORY_ALLOCATION_ERROR; icu_free(hash->values); return; } for(i = 0; i < hash->length; ++i) { hash->hashes[i] = UHASH_SLOT_EMPTY; hash->values[i] = 0; } hash->count = 0; hash->lowWaterMark = (int32_t)(hash->length * hash->lowWaterFactor); hash->highWaterMark = (int32_t)(hash->length * hash->highWaterFactor); } int32_t uhash_leastGreaterPrimeIndex(int32_t source) { int32_t i; for(i = 0; i < UHASH_PRIMES_LENGTH; ++i) { if(source < UHASH_PRIMES[i]) { break; } } return (i == 0 ? 0 : (i - 1)); } void uhash_rehash(UHashtable *hash, UErrorCode *status) { /* Rebuild the table from the start. This clears out deadwood, in case we have a lot of deleted values. See Find It is also used when the table grows or shrinks a lot. INVARIANT: The size of the table MUST be prime for this algorithm to work! */ void **oldValues = hash->values; int32_t *oldHashList = hash->hashes; int32_t old_length = hash->length; int32_t newPrimeIndex = hash->primeIndex; int32_t i; if(FAILURE(*status)) return; if(hash->count > hash->highWaterMark) { ++newPrimeIndex; } else if(hash->count < hash->lowWaterMark) { newPrimeIndex -= 2; } uhash_initialize(hash, newPrimeIndex, status); for(i = old_length - 1; i >= 0; --i) { void *value = oldValues[i]; if(value != 0) { uhash_putInternal(hash, oldHashList[i], value); } } icu_free(oldValues); icu_free(oldHashList); } void uhash_putInternal(UHashtable *hash, int32_t hashCode, void *value) { int32_t index = uhash_find(hash, hashCode); if(hash->hashes[index] <= UHASH_MAX_UNUSED) { /* deleted or empty */ hash->hashes[index] = hashCode; ++(hash->count); } /* reset value */ hash->values[index] = value; } int32_t uhash_find(const UHashtable *hash, int32_t hashCode) { /* This is the key routine. It looks for a particular key in the following way. First find the start position, which is basically the key modulo the length. Test it to see if it is a. Identical (same hash values) b. Deleted c. Empty Stop if it is identical or empty, otherwise continue by adding a "jump" value (moduloing by the length again to keep it within range) and retesting. For efficiency, it needs enough empty values so that the searches stop within a reasonable amount of time. This can be changed by changing the high/low water marks. INVARIANT: the size of the table MUST be prime for this algorithm to work! */ int32_t firstDeleted = -1; int32_t index = (hashCode ^ 0x4000000) % hash->length; int32_t jump = 0; while(TRUE) { int32_t tableHash = hash->hashes[index]; /* Compare hash codes */ if(tableHash == hashCode) { return index; } /* neither correct nor unused */ else if(tableHash > UHASH_MAX_UNUSED) { /* ignore */ } /* empty, end o' the line */ else if(tableHash == UHASH_SLOT_EMPTY) { if(firstDeleted >= 0) { /* reset if had deleted slot */ index = firstDeleted; } return index; } /* remember first deleted */ else if(firstDeleted < 0) { firstDeleted = index; } /* lazy compute jump */ if(jump == 0) { jump = (hashCode % (hash->length - 1)) + 1; } index = (index + jump) % hash->length; } /* This never happens -- just make the compiler happy */ return -1; } /* Predefined hash functions */ CAPI int32_t uhash_hashUString(const void *parm) { const UChar *key = (const UChar*) parm; int32_t len = u_strlen(key); int32_t hash = UHASH_INVALID; const UChar *limit = key + len; int32_t inc = (len >= 128 ? len/64 : 1); /* We compute the hash by iterating sparsely over 64 (at most) characters spaced evenly through the string. For each character, we multiply the previous hash value by a prime number and add the new character in, in the manner of a additive linear congruential random number generator, thus producing a pseudorandom deterministic value which should be well distributed over the output range. [LIU] */ while(key < limit) { hash = (hash * 37) + *key; key += inc; } if(hash == UHASH_INVALID) hash = UHASH_EMPTY; return hash & 0x7FFFFFFF; } CAPI int32_t uhash_hashString(const void *parm) { const char *key = (const char*) parm; int32_t len = strlen(key); int32_t hash = UHASH_INVALID; const char *limit = key + len; int32_t inc = (len >= 128 ? len/64 : 1); /* We compute the hash by iterating sparsely over 64 (at most) characters spaced evenly through the string. For each character, we multiply the previous hash value by a prime number and add the new character in, in the manner of a additive linear congruential random number generator, thus producing a pseudorandom deterministic value which should be well distributed over the output range. [LIU] */ while(key < limit) { hash = (hash * 37) + *key; key += inc; } if(hash == UHASH_INVALID) hash = UHASH_EMPTY; return hash & 0x7FFFFFFF; } CAPI int32_t uhash_hashLong(const void *parm) { int32_t hash = (int32_t) parm; return (int32_t) (hash & 0x7FFFFFFF); }