1999-08-16 21:50:52 +00:00
|
|
|
/*
|
|
|
|
*****************************************************************************************
|
1999-12-13 22:28:37 +00:00
|
|
|
*
|
2001-03-21 23:22:16 +00:00
|
|
|
* Copyright (C) 1997-200, International Business Machines
|
1999-12-13 22:28:37 +00:00
|
|
|
* Corporation and others. All Rights Reserved.
|
|
|
|
*
|
1999-08-16 21:50:52 +00:00
|
|
|
*****************************************************************************************
|
|
|
|
*/
|
|
|
|
/*===============================================================================
|
|
|
|
*
|
|
|
|
* File cmpshrta.cpp
|
|
|
|
*
|
|
|
|
* Modification History:
|
|
|
|
*
|
|
|
|
* Date Name Description
|
|
|
|
* 2/5/97 aliu Added CompactIntArray streamIn and streamOut methods.
|
|
|
|
* 3/4/97 aliu Tuned performance of CompactIntArray constructor,
|
|
|
|
* 05/07/97 helena Added isBogus()
|
|
|
|
* based on performance data indicating that this was slow.
|
|
|
|
* 07/15/98 erm Synched with Java 1.2 CompactShortArray.java.
|
|
|
|
* 07/30/98 erm Added changes from 07/29/98 code review.
|
|
|
|
*===============================================================================
|
|
|
|
*/
|
|
|
|
#include "ucmp16.h"
|
|
|
|
#include "cmemory.h"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1999-12-28 23:57:50 +00:00
|
|
|
#define arrayRegionMatches(source, sourceStart, target, targetStart, len) (uprv_memcmp(&source[sourceStart], &target[targetStart], len * sizeof(int16_t)) != 0)
|
1999-08-16 21:50:52 +00:00
|
|
|
|
|
|
|
/* internal constants*/
|
|
|
|
#define UCMP16_kMaxUnicode_int 65535
|
|
|
|
#define UCMP16_kUnicodeCount_int (UCMP16_kMaxUnicode_int + 1)
|
|
|
|
#define UCMP16_kBlockShift_int 7
|
|
|
|
#define UCMP16_kBlockCount_int (1 << UCMP16_kBlockShift_int)
|
|
|
|
#define UCMP16_kBlockBytes_int (UCMP16_kBlockCount_int * sizeof(int16_t))
|
|
|
|
#define UCMP16_kIndexShift_int (16 - UCMP16_kBlockShift_int)
|
|
|
|
#define UCMP16_kIndexCount_int (1 << UCMP16_kIndexShift_int)
|
|
|
|
#define UCMP16_kBlockMask_int (UCMP16_kBlockCount_int - 1)
|
|
|
|
|
|
|
|
|
|
|
|
const int32_t UCMP16_kMaxUnicode = UCMP16_kMaxUnicode_int;
|
|
|
|
const int32_t UCMP16_kUnicodeCount = UCMP16_kUnicodeCount_int;
|
|
|
|
const int32_t UCMP16_kBlockShift = UCMP16_kBlockShift_int;
|
|
|
|
const int32_t UCMP16_kBlockCount = UCMP16_kBlockCount_int;
|
|
|
|
const int32_t UCMP16_kBlockBytes = UCMP16_kBlockBytes_int;
|
|
|
|
const int32_t UCMP16_kIndexShift = UCMP16_kIndexShift_int;
|
|
|
|
const int32_t UCMP16_kIndexCount = UCMP16_kIndexCount_int;
|
|
|
|
const uint32_t UCMP16_kBlockMask = UCMP16_kBlockMask_int;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Sets the array to the invalid memory state.
|
|
|
|
*/
|
|
|
|
static CompactShortArray* setToBogus(CompactShortArray* array);
|
|
|
|
static void touchBlock(CompactShortArray* this,
|
|
|
|
int32_t i,
|
|
|
|
int16_t value);
|
2000-05-18 22:08:39 +00:00
|
|
|
static UBool blockTouched(const CompactShortArray* this,
|
1999-08-16 21:50:52 +00:00
|
|
|
int32_t i);
|
|
|
|
|
|
|
|
|
|
|
|
/* debug flags*/
|
|
|
|
/*=======================================================*/
|
|
|
|
|
|
|
|
int32_t ucmp16_getkUnicodeCount()
|
|
|
|
{return UCMP16_kUnicodeCount;}
|
|
|
|
|
|
|
|
int32_t ucmp16_getkBlockCount()
|
|
|
|
{return UCMP16_kBlockCount;}
|
|
|
|
|
|
|
|
int32_t ucmp16_getkIndexCount()
|
|
|
|
{ return UCMP16_kIndexCount;}
|
|
|
|
|
|
|
|
CompactShortArray* ucmp16_open(int16_t defaultValue)
|
|
|
|
{
|
|
|
|
int32_t i;
|
1999-12-28 23:57:50 +00:00
|
|
|
CompactShortArray* this = (CompactShortArray*) uprv_malloc(sizeof(CompactShortArray));
|
1999-08-16 21:50:52 +00:00
|
|
|
if (this == NULL) return NULL;
|
2001-03-21 23:22:16 +00:00
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
this->fCount = UCMP16_kUnicodeCount;
|
2001-03-21 23:22:16 +00:00
|
|
|
this->fCompact = FALSE;
|
1999-08-16 21:50:52 +00:00
|
|
|
this->fBogus = FALSE;
|
|
|
|
this->fArray = NULL;
|
|
|
|
this->fIndex = NULL;
|
2001-03-21 23:22:16 +00:00
|
|
|
this->fHashes = NULL;
|
1999-08-16 21:50:52 +00:00
|
|
|
this->fDefaultValue = defaultValue;
|
2001-03-21 23:22:16 +00:00
|
|
|
|
1999-12-28 23:57:50 +00:00
|
|
|
this->fArray = (int16_t*)uprv_malloc(UCMP16_kUnicodeCount * sizeof(int16_t));
|
1999-08-16 21:50:52 +00:00
|
|
|
if (this->fArray == NULL)
|
|
|
|
{
|
|
|
|
this->fBogus = TRUE;
|
|
|
|
return NULL;
|
|
|
|
}
|
2001-03-21 23:22:16 +00:00
|
|
|
|
1999-12-28 23:57:50 +00:00
|
|
|
this->fIndex = (uint16_t*)uprv_malloc(UCMP16_kIndexCount * sizeof(uint16_t));
|
1999-08-16 21:50:52 +00:00
|
|
|
if (this->fIndex == NULL)
|
|
|
|
{
|
1999-12-28 23:57:50 +00:00
|
|
|
uprv_free(this->fArray);
|
1999-08-16 21:50:52 +00:00
|
|
|
this->fArray = NULL;
|
2001-03-21 23:22:16 +00:00
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
this->fBogus = TRUE;
|
|
|
|
return NULL;
|
|
|
|
}
|
2001-03-21 23:22:16 +00:00
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
this->kBlockShift = UCMP16_kBlockShift;
|
|
|
|
this->kBlockMask = UCMP16_kBlockMask;
|
|
|
|
for (i = 0; i < UCMP16_kUnicodeCount; i += 1)
|
|
|
|
{
|
|
|
|
this->fArray[i] = defaultValue;
|
|
|
|
}
|
2001-03-21 23:22:16 +00:00
|
|
|
|
1999-12-28 23:57:50 +00:00
|
|
|
this->fHashes =(int32_t*)uprv_malloc(UCMP16_kIndexCount * sizeof(int32_t));
|
1999-08-16 21:50:52 +00:00
|
|
|
if (this->fHashes == NULL)
|
|
|
|
{
|
1999-12-28 23:57:50 +00:00
|
|
|
uprv_free(this->fArray);
|
|
|
|
uprv_free(this->fIndex);
|
1999-08-16 21:50:52 +00:00
|
|
|
this->fBogus = TRUE;
|
|
|
|
return NULL;
|
|
|
|
}
|
2001-03-21 23:22:16 +00:00
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
for (i = 0; i < UCMP16_kIndexCount; i += 1)
|
|
|
|
{
|
|
|
|
this->fIndex[i] = (uint16_t)(i << UCMP16_kBlockShift);
|
|
|
|
this->fHashes[i] = 0;
|
|
|
|
}
|
2001-03-21 23:22:16 +00:00
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
return this;
|
|
|
|
}
|
|
|
|
|
|
|
|
CompactShortArray* ucmp16_openAdopt(uint16_t *indexArray,
|
2001-03-21 23:22:16 +00:00
|
|
|
int16_t *newValues,
|
1999-08-16 21:50:52 +00:00
|
|
|
int32_t count,
|
|
|
|
int16_t defaultValue)
|
|
|
|
{
|
1999-12-28 23:57:50 +00:00
|
|
|
CompactShortArray* this = (CompactShortArray*) uprv_malloc(sizeof(CompactShortArray));
|
1999-08-16 21:50:52 +00:00
|
|
|
if (this == NULL) return NULL;
|
|
|
|
this->fHashes = NULL;
|
2001-03-21 23:22:16 +00:00
|
|
|
this->fCount = count;
|
1999-08-16 21:50:52 +00:00
|
|
|
this->fDefaultValue = defaultValue;
|
|
|
|
this->fBogus = FALSE;
|
|
|
|
this->fArray = newValues;
|
|
|
|
this->fIndex = indexArray;
|
|
|
|
this->fCompact = count < UCMP16_kUnicodeCount;
|
|
|
|
this->kBlockShift = UCMP16_kBlockShift;
|
|
|
|
this->kBlockMask = UCMP16_kBlockMask;
|
|
|
|
|
|
|
|
return this;
|
|
|
|
}
|
|
|
|
|
|
|
|
CompactShortArray* ucmp16_openAdoptWithBlockShift(uint16_t *indexArray,
|
|
|
|
int16_t *newValues,
|
|
|
|
int32_t count,
|
|
|
|
int16_t defaultValue,
|
|
|
|
int32_t blockShift)
|
|
|
|
{
|
|
|
|
CompactShortArray* this = ucmp16_openAdopt(indexArray,
|
|
|
|
newValues,
|
|
|
|
count,
|
|
|
|
defaultValue);
|
|
|
|
if (this == NULL) return NULL;
|
2001-03-21 23:22:16 +00:00
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
this->kBlockShift = blockShift;
|
|
|
|
this->kBlockMask = (uint32_t) (((uint32_t)1 << (uint32_t)blockShift) - (uint32_t)1);
|
2001-03-21 23:22:16 +00:00
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
return this;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*=======================================================*/
|
2001-03-21 23:22:16 +00:00
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
void ucmp16_close(CompactShortArray* this)
|
|
|
|
{
|
1999-12-28 23:57:50 +00:00
|
|
|
uprv_free(this->fArray);
|
|
|
|
uprv_free(this->fIndex);
|
|
|
|
uprv_free(this->fHashes);
|
|
|
|
uprv_free(this);
|
1999-08-16 21:50:52 +00:00
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
CompactShortArray* setToBogus(CompactShortArray* this)
|
|
|
|
{
|
1999-12-28 23:57:50 +00:00
|
|
|
uprv_free(this->fArray);
|
1999-08-16 21:50:52 +00:00
|
|
|
this->fArray = NULL;
|
2001-03-21 23:22:16 +00:00
|
|
|
|
1999-12-28 23:57:50 +00:00
|
|
|
uprv_free(this->fIndex);
|
1999-08-16 21:50:52 +00:00
|
|
|
this->fIndex = NULL;
|
2001-03-21 23:22:16 +00:00
|
|
|
|
1999-12-28 23:57:50 +00:00
|
|
|
uprv_free(this->fHashes);
|
1999-08-16 21:50:52 +00:00
|
|
|
this->fHashes = NULL;
|
2001-03-21 23:22:16 +00:00
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
this->fCount = 0;
|
|
|
|
this->fCompact = FALSE;
|
|
|
|
this->fBogus = TRUE;
|
2001-03-21 23:22:16 +00:00
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
return this;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void ucmp16_expand(CompactShortArray* this)
|
|
|
|
{
|
|
|
|
if (this->fCompact)
|
|
|
|
{
|
|
|
|
int32_t i;
|
1999-12-28 23:57:50 +00:00
|
|
|
int16_t *tempArray = (int16_t*)uprv_malloc(UCMP16_kUnicodeCount * sizeof(int16_t));
|
2001-03-21 23:22:16 +00:00
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
if (tempArray == NULL)
|
|
|
|
{
|
|
|
|
this->fBogus = TRUE;
|
|
|
|
return;
|
|
|
|
}
|
2001-03-21 23:22:16 +00:00
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
for (i = 0; i < UCMP16_kUnicodeCount; i += 1)
|
|
|
|
{
|
|
|
|
tempArray[i] = ucmp16_get(this, (UChar)i); /* HSYS : How expand?*/
|
|
|
|
}
|
2001-03-21 23:22:16 +00:00
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
for (i = 0; i < (1 << (16 - this->kBlockShift)); i += 1)
|
|
|
|
{
|
|
|
|
this->fIndex[i] = (uint16_t)(i<<this->kBlockShift);
|
|
|
|
}
|
2001-03-21 23:22:16 +00:00
|
|
|
|
1999-12-28 23:57:50 +00:00
|
|
|
uprv_free(this->fArray);
|
1999-08-16 21:50:52 +00:00
|
|
|
this->fArray = tempArray;
|
|
|
|
this->fCompact = FALSE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void ucmp16_set(CompactShortArray* this,
|
|
|
|
UChar c,
|
|
|
|
int16_t value)
|
|
|
|
{
|
|
|
|
if (this->fCompact)
|
|
|
|
{
|
|
|
|
ucmp16_expand(this);
|
|
|
|
if (this->fBogus) return;
|
|
|
|
}
|
2001-03-21 23:22:16 +00:00
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
this->fArray[(int32_t)c] = value;
|
2001-03-21 23:22:16 +00:00
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
if (value != this->fDefaultValue)
|
|
|
|
{
|
|
|
|
touchBlock(this, c >> this->kBlockShift, value);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2001-03-21 23:22:16 +00:00
|
|
|
void ucmp16_setRange(CompactShortArray* this,
|
1999-08-16 21:50:52 +00:00
|
|
|
UChar start,
|
|
|
|
UChar end,
|
|
|
|
int16_t value)
|
|
|
|
{
|
|
|
|
int32_t i;
|
|
|
|
if (this->fCompact)
|
|
|
|
{
|
|
|
|
ucmp16_expand(this);
|
|
|
|
if (this->fBogus) return;
|
|
|
|
}
|
|
|
|
if (value != this->fDefaultValue)
|
|
|
|
{
|
|
|
|
for (i = start; i <= end; i += 1)
|
|
|
|
{
|
|
|
|
this->fArray[i] = value;
|
|
|
|
touchBlock(this, i >> this->kBlockShift, value);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
for (i = start; i <= end; i += 1) this->fArray[i] = value;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*=======================================================*/
|
|
|
|
void ucmp16_compact(CompactShortArray* this)
|
|
|
|
{
|
|
|
|
if (!this->fCompact)
|
|
|
|
{
|
|
|
|
int32_t limitCompacted = 0;
|
|
|
|
int32_t i, iBlockStart;
|
|
|
|
int16_t iUntouched = -1;
|
2001-03-21 23:22:16 +00:00
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
for (i = 0, iBlockStart = 0; i < (1 << (16 - this->kBlockShift)); i += 1, iBlockStart += (1 << this->kBlockShift))
|
|
|
|
{
|
2000-05-18 22:08:39 +00:00
|
|
|
UBool touched = blockTouched(this, i);
|
2001-03-21 23:22:16 +00:00
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
this->fIndex[i] = 0xFFFF;
|
2001-03-21 23:22:16 +00:00
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
if (!touched && iUntouched != -1)
|
|
|
|
{
|
|
|
|
/* If no values in this block were set, we can just set its
|
|
|
|
* index to be the same as some other block with no values
|
|
|
|
* set, assuming we've seen one yet.
|
|
|
|
*/
|
|
|
|
this->fIndex[i] = iUntouched;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
int32_t j, jBlockStart;
|
2001-03-21 23:22:16 +00:00
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
for (j = 0, jBlockStart = 0;
|
|
|
|
j < limitCompacted;
|
|
|
|
j += 1, jBlockStart += (1 << this->kBlockShift))
|
|
|
|
{
|
|
|
|
if (this->fHashes[i] == this->fHashes[j] &&
|
|
|
|
arrayRegionMatches(this->fArray,
|
|
|
|
iBlockStart,
|
2001-03-21 23:22:16 +00:00
|
|
|
this->fArray,
|
1999-08-16 21:50:52 +00:00
|
|
|
jBlockStart,
|
|
|
|
(1 << this->kBlockShift)))
|
|
|
|
{
|
|
|
|
this->fIndex[i] = (int16_t)jBlockStart;
|
|
|
|
}
|
|
|
|
}
|
2001-03-21 23:22:16 +00:00
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
/* TODO: verify this is correct*/
|
|
|
|
if (this->fIndex[i] == 0xFFFF)
|
|
|
|
{
|
|
|
|
/* we didn't match, so copy & update*/
|
2001-03-21 23:22:16 +00:00
|
|
|
uprv_memcpy(&(this->fArray[jBlockStart]),
|
1999-08-16 21:50:52 +00:00
|
|
|
&(this->fArray[iBlockStart]),
|
|
|
|
(1 << this->kBlockShift)*sizeof(int16_t));
|
2001-03-21 23:22:16 +00:00
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
this->fIndex[i] = (int16_t)jBlockStart;
|
|
|
|
this->fHashes[j] = this->fHashes[i];
|
|
|
|
limitCompacted += 1;
|
2001-03-21 23:22:16 +00:00
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
if (!touched)
|
|
|
|
{
|
|
|
|
/* If this is the first untouched block we've seen,*/
|
|
|
|
/* remember its index.*/
|
|
|
|
iUntouched = (int16_t)jBlockStart;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* we are done compacting, so now make the array shorter*/
|
|
|
|
{
|
|
|
|
int32_t newSize = limitCompacted * (1 << this->kBlockShift);
|
1999-12-28 23:57:50 +00:00
|
|
|
int16_t *result = (int16_t*) uprv_malloc(sizeof(int16_t) * newSize);
|
2001-03-21 23:22:16 +00:00
|
|
|
|
1999-12-28 23:57:50 +00:00
|
|
|
uprv_memcpy(result, this->fArray, newSize * sizeof(int16_t));
|
2001-03-21 23:22:16 +00:00
|
|
|
|
1999-12-28 23:57:50 +00:00
|
|
|
uprv_free(this->fArray);
|
1999-08-16 21:50:52 +00:00
|
|
|
this->fArray = result;
|
|
|
|
this->fCount = newSize;
|
1999-12-28 23:57:50 +00:00
|
|
|
uprv_free(this->fHashes);
|
1999-08-16 21:50:52 +00:00
|
|
|
this->fHashes = NULL;
|
2001-03-21 23:22:16 +00:00
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
this->fCompact = TRUE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Query whether a specified block was "touched", i.e. had a value set.
|
|
|
|
* Untouched blocks can be skipped when compacting the array
|
|
|
|
*/
|
|
|
|
|
|
|
|
int16_t ucmp16_getDefaultValue(const CompactShortArray* this)
|
|
|
|
{
|
|
|
|
return this->fDefaultValue;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void touchBlock(CompactShortArray* this,
|
|
|
|
int32_t i,
|
|
|
|
int16_t value)
|
|
|
|
{
|
|
|
|
this->fHashes[i] = (this->fHashes[i] + (value << 1)) | 1;
|
|
|
|
}
|
|
|
|
|
2000-05-18 22:08:39 +00:00
|
|
|
UBool blockTouched(const CompactShortArray* this, int32_t i)
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
|
|
|
return (this->fHashes[i] != 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
const int16_t*
|
|
|
|
ucmp16_getArray(const CompactShortArray* this)
|
|
|
|
{
|
|
|
|
return this->fArray;
|
|
|
|
}
|
|
|
|
|
|
|
|
const uint16_t*
|
|
|
|
ucmp16_getIndex(const CompactShortArray* this)
|
|
|
|
{
|
|
|
|
return this->fIndex;
|
|
|
|
}
|
|
|
|
|
2001-03-21 23:22:16 +00:00
|
|
|
uint32_t
|
1999-08-16 21:50:52 +00:00
|
|
|
ucmp16_getCount(const CompactShortArray* this)
|
|
|
|
{
|
|
|
|
return this->fCount;
|
|
|
|
}
|
|
|
|
|