scuffed-code/icu4c/source/common/ucmp16.c

659 lines
19 KiB
C
Raw Normal View History

1999-08-16 21:50:52 +00:00
/*
******************************************************************************
*
* Copyright (C) 1997-2001, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
1999-08-16 21:50:52 +00:00
*/
/*============================================================================
1999-08-16 21:50:52 +00:00
*
* File cmpshrta.cpp
*
* Modification History:
*
* Date Name Description
* 2/5/97 aliu Added CompactIntArray streamIn and streamOut methods.
* 3/4/97 aliu Tuned performance of CompactIntArray constructor,
* 05/07/97 helena Added isBogus()
* based on performance data indicating that this_obj was slow.
* 07/15/98 erm Synched with Java 1.2 CompactShortArray.java.
* 07/30/98 erm Added changes from 07/29/98 code review.
* 11/01/99 weiv Added getArray, getIndex and getCount based on Jitterbug 4
*============================================================================
1999-08-16 21:50:52 +00:00
*/
#include "ucmp16.h"
#include "cmemory.h"
#define arrayRegionMatches(source, sourceStart, target, targetStart, len) (uprv_memcmp(&source[sourceStart], &target[targetStart], len * sizeof(int16_t)) != 0)
1999-08-16 21:50:52 +00:00
static const int32_t UCMP16_kMaxUnicode = UCMP16_kMaxUnicode_int;
static const int32_t UCMP16_kUnicodeCount = UCMP16_kUnicodeCount_int;
static const int32_t UCMP16_kBlockShift = UCMP16_kBlockShift_int;
static const int32_t UCMP16_kBlockCount = UCMP16_kBlockCount_int;
static const int32_t UCMP16_kBlockBytes = UCMP16_kBlockBytes_int;
static const int32_t UCMP16_kIndexShift = UCMP16_kIndexShift_int;
static const int32_t UCMP16_kIndexCount = UCMP16_kIndexCount_int;
static const uint32_t UCMP16_kBlockMask = UCMP16_kBlockMask_int;
1999-08-16 21:50:52 +00:00
/**
* Sets the array to the invalid memory state.
*/
static CompactShortArray* setToBogus(CompactShortArray* array);
static void touchBlock(CompactShortArray* this_obj,
int32_t i,
int16_t value);
static UBool blockTouched(const CompactShortArray* this_obj,
1999-08-16 21:50:52 +00:00
int32_t i);
/* debug flags*/
/*=======================================================*/
int32_t ucmp16_getkUnicodeCount()
{return UCMP16_kUnicodeCount;}
int32_t ucmp16_getkBlockCount()
{return UCMP16_kBlockCount;}
CompactShortArray* ucmp16_open(int16_t defaultValue)
{
int32_t i;
CompactShortArray* this_obj = (CompactShortArray*) uprv_malloc(sizeof(CompactShortArray));
if (this_obj == NULL)
return NULL;
this_obj->fArray = (int16_t*)uprv_malloc(UCMP16_kUnicodeCount * sizeof(int16_t));
if (this_obj->fArray == NULL)
1999-08-16 21:50:52 +00:00
{
uprv_free(this_obj);
return NULL;
1999-08-16 21:50:52 +00:00
}
this_obj->fIndex = (uint16_t*)uprv_malloc(UCMP16_kIndexCount * sizeof(uint16_t));
if (this_obj->fIndex == NULL)
1999-08-16 21:50:52 +00:00
{
uprv_free(this_obj->fArray);
uprv_free(this_obj);
return NULL;
1999-08-16 21:50:52 +00:00
}
this_obj->fHashes =(int32_t*)uprv_malloc(UCMP16_kIndexCount * sizeof(int32_t));
if (this_obj->fHashes == NULL)
1999-08-16 21:50:52 +00:00
{
uprv_free(this_obj->fArray);
uprv_free(this_obj->fIndex);
uprv_free(this_obj);
return NULL;
1999-08-16 21:50:52 +00:00
}
this_obj->fStructSize = sizeof(CompactShortArray);
this_obj->fCount = UCMP16_kUnicodeCount;
this_obj->fCompact = FALSE;
this_obj->fBogus = FALSE;
this_obj->fAlias = FALSE;
this_obj->fIAmOwned = FALSE;
this_obj->fDefaultValue = defaultValue;
this_obj->kBlockShift = UCMP16_kBlockShift;
this_obj->kBlockMask = UCMP16_kBlockMask;
for (i = 0; i < UCMP16_kUnicodeCount; i += 1)
1999-08-16 21:50:52 +00:00
{
this_obj->fArray[i] = defaultValue;
1999-08-16 21:50:52 +00:00
}
for (i = 0; i < UCMP16_kIndexCount; i += 1)
1999-08-16 21:50:52 +00:00
{
this_obj->fIndex[i] = (uint16_t)(i << UCMP16_kBlockShift);
this_obj->fHashes[i] = 0;
1999-08-16 21:50:52 +00:00
}
return this_obj;
1999-08-16 21:50:52 +00:00
}
void ucmp16_initBogus(CompactShortArray *this_obj)
{
if (this_obj == NULL)
return;
this_obj->fStructSize = sizeof(CompactShortArray);
this_obj->fCount = UCMP16_kUnicodeCount;
this_obj->fCompact = FALSE;
this_obj->fBogus = TRUE;
this_obj->fArray = NULL;
this_obj->fAlias = FALSE;
this_obj->fIndex = NULL;
this_obj->fHashes = NULL;
this_obj->fIAmOwned = TRUE;
this_obj->fDefaultValue = 0;
}
void ucmp16_init(CompactShortArray *this_obj, int16_t defaultValue)
{
int32_t i;
if (this_obj == NULL)
return;
this_obj->fStructSize = sizeof(CompactShortArray);
this_obj->fCount = UCMP16_kUnicodeCount;
this_obj->fCompact = FALSE;
this_obj->fBogus = FALSE;
this_obj->fArray = NULL;
this_obj->fAlias = FALSE;
this_obj->fIndex = NULL;
this_obj->fHashes = NULL;
this_obj->fIAmOwned = TRUE;
this_obj->fDefaultValue = defaultValue;
this_obj->fArray = (int16_t*)uprv_malloc(UCMP16_kUnicodeCount * sizeof(int16_t));
if (this_obj->fArray == NULL)
{
setToBogus(this_obj);
return;
}
this_obj->fIndex = (uint16_t*)uprv_malloc(UCMP16_kIndexCount * sizeof(uint16_t));
if (this_obj->fIndex == NULL)
{
setToBogus(this_obj);
return;
}
this_obj->fHashes =(int32_t*)uprv_malloc(UCMP16_kIndexCount * sizeof(int32_t));
if (this_obj->fHashes == NULL)
{
setToBogus(this_obj);
return;
}
this_obj->kBlockShift = UCMP16_kBlockShift;
this_obj->kBlockMask = UCMP16_kBlockMask;
for (i = 0; i < UCMP16_kUnicodeCount; i += 1)
{
this_obj->fArray[i] = defaultValue;
}
for (i = 0; i < UCMP16_kIndexCount; i += 1)
{
this_obj->fIndex[i] = (uint16_t)(i << UCMP16_kBlockShift);
this_obj->fHashes[i] = 0;
}
}
1999-08-16 21:50:52 +00:00
CompactShortArray* ucmp16_openAdopt(uint16_t *indexArray,
int16_t *newValues,
int32_t count,
int16_t defaultValue)
{
CompactShortArray* this_obj = (CompactShortArray*) uprv_malloc(sizeof(CompactShortArray));
if (this_obj == NULL)
return NULL;
ucmp16_initAdopt(this_obj, indexArray, newValues, count, defaultValue);
this_obj->fIAmOwned = FALSE;
return this_obj;
1999-08-16 21:50:52 +00:00
}
CompactShortArray* ucmp16_openAdoptWithBlockShift(uint16_t *indexArray,
int16_t *newValues,
int32_t count,
int16_t defaultValue,
int32_t blockShift)
{
CompactShortArray* this_obj = ucmp16_openAdopt(indexArray,
newValues,
count,
defaultValue);
if (this_obj) {
this_obj->kBlockShift = blockShift;
this_obj->kBlockMask = (uint32_t) (((uint32_t)1 << (uint32_t)blockShift) - (uint32_t)1);
}
return this_obj;
1999-08-16 21:50:52 +00:00
}
CompactShortArray* ucmp16_openAlias(uint16_t *indexArray,
int16_t *newValues,
int32_t count,
int16_t defaultValue)
{
CompactShortArray* this_obj = (CompactShortArray*) uprv_malloc(sizeof(CompactShortArray));
if (this_obj == NULL)
return NULL;
ucmp16_initAlias(this_obj, indexArray, newValues, count, defaultValue);
this_obj->fIAmOwned = FALSE;
return this_obj;
}
/*=======================================================*/
CompactShortArray* ucmp16_initAdopt(CompactShortArray *this_obj,
uint16_t *indexArray,
int16_t *newValues,
int32_t count,
int16_t defaultValue)
{
if (this_obj) {
this_obj->fHashes = NULL;
this_obj->fCount = count;
this_obj->fDefaultValue = defaultValue;
this_obj->fBogus = FALSE;
this_obj->fArray = newValues;
this_obj->fIndex = indexArray;
this_obj->fCompact = (UBool)(count < UCMP16_kUnicodeCount);
this_obj->fStructSize = sizeof(CompactShortArray);
this_obj->kBlockShift = UCMP16_kBlockShift;
this_obj->kBlockMask = UCMP16_kBlockMask;
this_obj->fAlias = FALSE;
this_obj->fIAmOwned = TRUE;
}
return this_obj;
}
CompactShortArray* ucmp16_initAdoptWithBlockShift(CompactShortArray *this_obj,
uint16_t *indexArray,
int16_t *newValues,
int32_t count,
int16_t defaultValue,
int32_t blockShift)
{
ucmp16_initAdopt(this_obj, indexArray, newValues, count, defaultValue);
if (this_obj) {
this_obj->kBlockShift = blockShift;
this_obj->kBlockMask = (uint32_t) (((uint32_t)1 << (uint32_t)blockShift) - (uint32_t)1);
}
return this_obj;
}
CompactShortArray* ucmp16_initAlias(CompactShortArray *this_obj,
uint16_t *indexArray,
int16_t *newValues,
int32_t count,
int16_t defaultValue)
{
if (this_obj) {
this_obj->fHashes = NULL;
this_obj->fCount = count;
this_obj->fDefaultValue = defaultValue;
this_obj->fBogus = FALSE;
this_obj->fArray = newValues;
this_obj->fIndex = indexArray;
this_obj->fCompact = (UBool)(count < UCMP16_kUnicodeCount);
this_obj->fStructSize = sizeof(CompactShortArray);
this_obj->kBlockShift = UCMP16_kBlockShift;
this_obj->kBlockMask = UCMP16_kBlockMask;
this_obj->fAlias = TRUE;
this_obj->fIAmOwned = TRUE;
}
return this_obj;
}
CompactShortArray* ucmp16_initAliasWithBlockShift(CompactShortArray *this_obj,
uint16_t *indexArray,
int16_t *newValues,
int32_t count,
int16_t defaultValue,
int32_t blockShift)
{
ucmp16_initAlias(this_obj, indexArray, newValues, count, defaultValue);
if (this_obj) {
this_obj->kBlockShift = blockShift;
this_obj->kBlockMask = (uint32_t) (((uint32_t)1 << (uint32_t)blockShift) - (uint32_t)1);
}
return this_obj;
}
1999-08-16 21:50:52 +00:00
/*=======================================================*/
1999-08-16 21:50:52 +00:00
void ucmp16_close(CompactShortArray* this_obj)
{
if(this_obj != NULL) {
setToBogus(this_obj);
if(!this_obj->fIAmOwned)
{
uprv_free(this_obj);
}
}
1999-08-16 21:50:52 +00:00
}
static CompactShortArray* setToBogus(CompactShortArray* this_obj)
1999-08-16 21:50:52 +00:00
{
if(this_obj != NULL) {
if(!this_obj->fAlias) {
if (this_obj->fArray != NULL) {
uprv_free(this_obj->fArray);
this_obj->fArray = NULL;
}
if (this_obj->fIndex != NULL) {
uprv_free(this_obj->fIndex);
this_obj->fIndex = NULL;
}
}
if (this_obj->fHashes != NULL) {
uprv_free(this_obj->fHashes);
this_obj->fHashes = NULL;
}
this_obj->fCount = 0;
this_obj->fCompact = FALSE;
this_obj->fBogus = TRUE;
}
return this_obj;
1999-08-16 21:50:52 +00:00
}
UBool ucmp16_isBogus(const CompactShortArray* this_obj)
{
return (UBool)(this_obj == NULL || this_obj->fBogus);
}
1999-08-16 21:50:52 +00:00
void ucmp16_expand(CompactShortArray* this_obj)
{
if (this_obj->fCompact)
1999-08-16 21:50:52 +00:00
{
int32_t i;
int16_t *tempArray = (int16_t*)uprv_malloc(UCMP16_kUnicodeCount * sizeof(int16_t));
2000-08-02 23:40:16 +00:00
if (tempArray == NULL)
{
setToBogus(this_obj);
return;
}
2000-08-02 23:40:16 +00:00
this_obj->fHashes =(int32_t*)uprv_malloc(UCMP16_kIndexCount * sizeof(int32_t));
if (this_obj->fHashes == NULL)
{
setToBogus(this_obj);
return;
1999-08-16 21:50:52 +00:00
}
for (i = 0; i < UCMP16_kUnicodeCount; i += 1)
{
tempArray[i] = ucmp16_get(this_obj, (UChar)i); /* HSYS : How expand?*/
1999-08-16 21:50:52 +00:00
}
for (i = 0; i < (1 << (16 - this_obj->kBlockShift)); i += 1)
{
this_obj->fIndex[i] = (uint16_t)(i<<this_obj->kBlockShift);
}
uprv_free(this_obj->fArray);
this_obj->fArray = tempArray;
this_obj->fCompact = FALSE;
/* Since we don't know if the fIndex is also an alias, we allow tempArray to leak. */
/* this_obj->fAlias = FALSE; */
1999-08-16 21:50:52 +00:00
}
}
void ucmp16_set(CompactShortArray* this_obj,
UChar c,
int16_t value)
{
if (this_obj->fCompact)
1999-08-16 21:50:52 +00:00
{
ucmp16_expand(this_obj);
if (this_obj->fBogus)
return;
1999-08-16 21:50:52 +00:00
}
this_obj->fArray[(int32_t)c] = value;
if (value != this_obj->fDefaultValue)
1999-08-16 21:50:52 +00:00
{
touchBlock(this_obj, c >> this_obj->kBlockShift, value);
1999-08-16 21:50:52 +00:00
}
}
void ucmp16_setRange(CompactShortArray* this_obj,
UChar start,
UChar end,
int16_t value)
{
int32_t i;
if (this_obj->fCompact)
1999-08-16 21:50:52 +00:00
{
ucmp16_expand(this_obj);
if (this_obj->fBogus)
return;
1999-08-16 21:50:52 +00:00
}
if (value != this_obj->fDefaultValue)
1999-08-16 21:50:52 +00:00
{
for (i = start; i <= end; i += 1)
{
this_obj->fArray[i] = value;
touchBlock(this_obj, i >> this_obj->kBlockShift, value);
}
1999-08-16 21:50:52 +00:00
}
else
1999-08-16 21:50:52 +00:00
{
for (i = start; i <= end; i += 1)
this_obj->fArray[i] = value;
1999-08-16 21:50:52 +00:00
}
}
/*=======================================================*/
void ucmp16_compact(CompactShortArray* this_obj)
{
if (!this_obj->fCompact)
1999-08-16 21:50:52 +00:00
{
int32_t limitCompacted = 0;
int32_t i, iBlockStart;
int16_t iUntouched = -1;
for (i = 0, iBlockStart = 0; i < (1 << (16 - this_obj->kBlockShift)); i += 1, iBlockStart += (1 << this_obj->kBlockShift))
1999-08-16 21:50:52 +00:00
{
UBool touched = blockTouched(this_obj, i);
this_obj->fIndex[i] = 0xFFFF;
if (!touched && iUntouched != -1)
{
/* If no values in this_obj block were set, we can just set its
* index to be the same as some other block with no values
* set, assuming we've seen one yet.
*/
this_obj->fIndex[i] = iUntouched;
1999-08-16 21:50:52 +00:00
}
else
1999-08-16 21:50:52 +00:00
{
int32_t j, jBlockStart;
for (j = 0, jBlockStart = 0;
j < limitCompacted;
j += 1, jBlockStart += (1 << this_obj->kBlockShift))
{
if (this_obj->fHashes[i] == this_obj->fHashes[j] &&
arrayRegionMatches(this_obj->fArray,
iBlockStart,
this_obj->fArray,
jBlockStart,
(1 << this_obj->kBlockShift)))
{
this_obj->fIndex[i] = (int16_t)jBlockStart;
1999-08-16 21:50:52 +00:00
}
}
1999-08-16 21:50:52 +00:00
/* TODO: verify this_obj is correct*/
if (this_obj->fIndex[i] == 0xFFFF)
{
/* we didn't match, so copy & update*/
uprv_memcpy(&(this_obj->fArray[jBlockStart]),
&(this_obj->fArray[iBlockStart]),
(1 << this_obj->kBlockShift)*sizeof(int16_t));
this_obj->fIndex[i] = (int16_t)jBlockStart;
this_obj->fHashes[j] = this_obj->fHashes[i];
limitCompacted += 1;
if (!touched)
{
/* If this_obj is the first untouched block we've seen,*/
/* remember its index.*/
iUntouched = (int16_t)jBlockStart;
1999-08-16 21:50:52 +00:00
}
}
}
}
/* we are done compacting, so now make the array shorter*/
/* TODO: uprv_realloc should be used instead.
What if uprv_malloc returns NULL [grhoten] */
{
int32_t newSize = limitCompacted * (1 << this_obj->kBlockShift);
int16_t *result = (int16_t*) uprv_malloc(sizeof(int16_t) * newSize);
uprv_memcpy(result, this_obj->fArray, newSize * sizeof(int16_t));
uprv_free(this_obj->fArray);
this_obj->fArray = result;
this_obj->fCount = newSize;
uprv_free(this_obj->fHashes);
this_obj->fHashes = NULL;
this_obj->fCompact = TRUE;
}
1999-08-16 21:50:52 +00:00
}
}
/**
* Query whether a specified block was "touched", i.e. had a value set.
* Untouched blocks can be skipped when compacting the array
*/
int16_t ucmp16_getDefaultValue(const CompactShortArray* this_obj)
{
return this_obj->fDefaultValue;
1999-08-16 21:50:52 +00:00
}
static void touchBlock(CompactShortArray* this_obj,
1999-08-16 21:50:52 +00:00
int32_t i,
int16_t value)
{
this_obj->fHashes[i] = (this_obj->fHashes[i] + (value << 1)) | 1;
1999-08-16 21:50:52 +00:00
}
static UBool blockTouched(const CompactShortArray* this_obj, int32_t i)
1999-08-16 21:50:52 +00:00
{
return (UBool)(this_obj->fHashes[i] != 0);
1999-08-16 21:50:52 +00:00
}
uint32_t ucmp16_getCount(const CompactShortArray* this_obj)
{
return this_obj->fCount;
}
1999-08-16 21:50:52 +00:00
const int16_t* ucmp16_getArray(const CompactShortArray* this_obj)
{
return this_obj->fArray;
}
const uint16_t* ucmp16_getIndex(const CompactShortArray* this_obj)
{
return this_obj->fIndex;
}
1999-08-16 21:50:52 +00:00
U_CAPI uint32_t U_EXPORT2 ucmp16_flattenMem (const CompactShortArray* array, UMemoryStream *MS)
{
int32_t size = 0;
uprv_mstrm_write32(MS, ICU_UCMP16_VERSION);
size += 4;
uprv_mstrm_write32(MS, array->fCount);
size += 4;
uprv_mstrm_write32(MS, array->kBlockShift);
size += 4;
uprv_mstrm_write32(MS, array->kBlockMask);
size += 4;
uprv_mstrm_writeBlock(MS, array->fIndex, sizeof(array->fIndex[0])*UCMP16_kIndexCount);
size += sizeof(array->fIndex[0])*UCMP16_kIndexCount;
uprv_mstrm_writeBlock(MS, array->fArray, sizeof(array->fArray[0])*array->fCount);
size += sizeof(array->fArray[0])*array->fCount;
while(size%4) /* end padding */
{
uprv_mstrm_writePadding(MS, 1); /* Pad total so far to even size */
size += 1;
}
return size;
}
1999-08-16 21:50:52 +00:00
/* We use sizeof(*array), etc so that this code can be as portable as
possible between the ucmpX_ family. Check lines marked 'SIZE'.
*/
U_CAPI void U_EXPORT2 ucmp16_initFromData(CompactShortArray *this_obj, const uint8_t **source, UErrorCode *status)
{
uint32_t i;
const uint8_t *oldSource = *source;
if(U_FAILURE(*status))
return;
this_obj->fBogus = FALSE;
this_obj->fStructSize = sizeof(CompactShortArray);
this_obj->fCompact = TRUE;
this_obj->fAlias = TRUE;
this_obj->fIAmOwned = TRUE;
this_obj->fHashes = NULL;
this_obj->fDefaultValue = 0x0000; /* not used */
i = * ((const uint32_t*) *source);
(*source) += 4;
if(i != ICU_UCMP16_VERSION)
{
this_obj->fArray = NULL;
this_obj->fIndex = NULL;
this_obj->fBogus = TRUE;
this_obj->fCount = 0;
*status = U_INVALID_FORMAT_ERROR;
return;
}
this_obj->fCount = * ((const uint32_t*)*source);
(*source) += 4;
this_obj->kBlockShift = * ((const uint32_t*)*source);
(*source) += 4;
this_obj->kBlockMask = * ((const uint32_t*)*source);
(*source) += 4;
this_obj->fIndex = (uint16_t*) *source;
(*source) += sizeof(this_obj->fIndex[0])*UCMP16_kIndexCount;
this_obj->fArray = (int16_t*) *source;
(*source) += sizeof(this_obj->fArray[0])*this_obj->fCount;
/* eat up padding */
while((*source-(oldSource))%4)
(*source)++;
}