1999-11-22 20:56:34 +00:00
|
|
|
/*============================================================================
|
2001-03-21 20:44:20 +00:00
|
|
|
* Copyright (C) 1997-2001, International Business Machines Corporation
|
1999-11-22 20:56:34 +00:00
|
|
|
* and others. All Rights Reserved.
|
1999-08-16 21:50:52 +00:00
|
|
|
*
|
|
|
|
* File cmpshrta.cpp
|
|
|
|
*
|
|
|
|
* Modification History:
|
|
|
|
*
|
|
|
|
* Date Name Description
|
|
|
|
* 2/5/97 aliu Added CompactIntArray streamIn and streamOut methods.
|
|
|
|
* 3/4/97 aliu Tuned performance of CompactIntArray constructor,
|
|
|
|
* based on performance data indicating that this_obj was slow.
|
|
|
|
* 05/07/97 helena Added isBogus()
|
|
|
|
* 04/26/99 Madhu Ported to C for C Implementation
|
1999-11-22 20:56:34 +00:00
|
|
|
* 11/12/99 srl macroized ucmp32_get()
|
2000-11-06 23:58:15 +00:00
|
|
|
* 11/07/00 weiv aligned implementation with ucmp8
|
2001-03-21 20:44:20 +00:00
|
|
|
*============================================================================
|
1999-08-16 21:50:52 +00:00
|
|
|
*/
|
|
|
|
#include "ucmp32.h"
|
|
|
|
#include "cmemory.h"
|
|
|
|
#include "filestrm.h"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static int32_t ucmp32_findOverlappingPosition(CompactIntArray* this_obj, uint32_t start,
|
|
|
|
const UChar *tempIndex,
|
|
|
|
int32_t tempIndexCount,
|
|
|
|
uint32_t cycle);
|
2000-08-08 23:26:10 +00:00
|
|
|
|
2000-05-18 22:08:39 +00:00
|
|
|
static UBool debugSmall = FALSE;
|
1999-08-16 21:50:52 +00:00
|
|
|
static uint32_t debugSmallLimit = 30000;
|
|
|
|
|
|
|
|
/** debug flags
|
|
|
|
*=======================================================
|
|
|
|
*/
|
|
|
|
|
|
|
|
int32_t ucmp32_getkUnicodeCount() { return UCMP32_kUnicodeCount;}
|
|
|
|
int32_t ucmp32_getkBlockCount() { return UCMP32_kBlockCount;}
|
|
|
|
|
1999-10-18 23:44:20 +00:00
|
|
|
U_CAPI void ucmp32_streamIn(CompactIntArray* this_obj, FileStream* is)
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
|
|
|
int32_t newCount, len;
|
|
|
|
char c;
|
|
|
|
if (!T_FileStream_error(is))
|
|
|
|
{
|
|
|
|
|
|
|
|
T_FileStream_read(is, &newCount, sizeof(newCount));
|
|
|
|
if (this_obj->fCount != newCount)
|
|
|
|
{
|
|
|
|
this_obj->fCount = newCount;
|
1999-12-28 23:39:02 +00:00
|
|
|
uprv_free(this_obj->fArray);
|
1999-08-16 21:50:52 +00:00
|
|
|
this_obj->fArray = 0;
|
1999-12-28 23:39:02 +00:00
|
|
|
this_obj->fArray = (int32_t*)uprv_malloc(this_obj->fCount * sizeof(int32_t));
|
1999-08-16 21:50:52 +00:00
|
|
|
if (!this_obj->fArray) {
|
|
|
|
this_obj->fBogus = TRUE;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
T_FileStream_read(is, this_obj->fArray, sizeof(*(this_obj->fArray)) * this_obj->fCount);
|
|
|
|
T_FileStream_read(is, &len, sizeof(len));
|
|
|
|
if (len == 0)
|
|
|
|
{
|
1999-12-28 23:39:02 +00:00
|
|
|
uprv_free(this_obj->fIndex);
|
1999-08-16 21:50:52 +00:00
|
|
|
this_obj->fIndex = 0;
|
|
|
|
}
|
|
|
|
else if (len == UCMP32_kIndexCount)
|
|
|
|
{
|
|
|
|
if (this_obj->fIndex == 0)
|
1999-12-28 23:39:02 +00:00
|
|
|
this_obj->fIndex =(uint16_t*)uprv_malloc(UCMP32_kIndexCount * sizeof(uint16_t));
|
1999-08-16 21:50:52 +00:00
|
|
|
if (!this_obj->fIndex) {
|
|
|
|
this_obj->fBogus = TRUE;
|
1999-12-28 23:39:02 +00:00
|
|
|
uprv_free(this_obj->fArray);
|
1999-08-16 21:50:52 +00:00
|
|
|
this_obj->fArray = 0;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
T_FileStream_read(is, this_obj->fIndex, sizeof(*(this_obj->fIndex)) * UCMP32_kIndexCount);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
this_obj->fBogus = TRUE;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
/* char instead of int8_t for Mac compilation*/
|
|
|
|
T_FileStream_read(is, (char*)&c, sizeof(c));
|
2000-08-08 23:26:10 +00:00
|
|
|
this_obj->fCompact = (UBool)(c != 0);
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
1999-10-18 23:44:20 +00:00
|
|
|
U_CAPI void ucmp32_streamOut(CompactIntArray* this_obj, FileStream* os)
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
|
|
|
char c;
|
|
|
|
if (!T_FileStream_error(os))
|
|
|
|
{
|
|
|
|
if (this_obj->fCount != 0 && this_obj->fArray != 0)
|
|
|
|
{
|
|
|
|
T_FileStream_write(os, &(this_obj->fCount), sizeof(this_obj->fCount));
|
|
|
|
T_FileStream_write(os, this_obj->fArray, sizeof(*(this_obj->fArray)) * this_obj->fCount);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
int32_t zero = 0;
|
|
|
|
T_FileStream_write(os, &zero, sizeof(zero));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (this_obj->fIndex == 0)
|
|
|
|
{
|
|
|
|
int32_t len = 0;
|
|
|
|
T_FileStream_write(os, &len, sizeof(len));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
int32_t len = UCMP32_kIndexCount;
|
|
|
|
T_FileStream_write(os, &len, sizeof(len));
|
|
|
|
T_FileStream_write(os, this_obj->fIndex, sizeof(*(this_obj->fIndex)) * UCMP32_kIndexCount);
|
|
|
|
}
|
2000-08-08 23:26:10 +00:00
|
|
|
c = (char)(this_obj->fCompact ? 1 : 0); /* char instead of int8_t for Mac compilation*/
|
1999-08-16 21:50:52 +00:00
|
|
|
T_FileStream_write(os, (const char*)&c, sizeof(c));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2000-05-22 19:50:23 +00:00
|
|
|
U_CAPI void ucmp32_streamMemIn(CompactIntArray* this_obj, UMemoryStream* is)
|
|
|
|
{
|
|
|
|
int32_t newCount, len;
|
|
|
|
char c;
|
|
|
|
if (!uprv_mstrm_error(is))
|
|
|
|
{
|
|
|
|
|
|
|
|
uprv_mstrm_read(is, &newCount, sizeof(newCount));
|
|
|
|
if (this_obj->fCount != newCount)
|
|
|
|
{
|
|
|
|
this_obj->fCount = newCount;
|
|
|
|
uprv_free(this_obj->fArray);
|
|
|
|
this_obj->fArray = 0;
|
|
|
|
this_obj->fArray = (int32_t*)uprv_malloc(this_obj->fCount * sizeof(int32_t));
|
|
|
|
if (!this_obj->fArray) {
|
|
|
|
this_obj->fBogus = TRUE;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
uprv_mstrm_read(is, this_obj->fArray, sizeof(*(this_obj->fArray)) * this_obj->fCount);
|
|
|
|
uprv_mstrm_read(is, &len, sizeof(len));
|
|
|
|
if (len == 0)
|
|
|
|
{
|
|
|
|
uprv_free(this_obj->fIndex);
|
|
|
|
this_obj->fIndex = 0;
|
|
|
|
}
|
|
|
|
else if (len == UCMP32_kIndexCount)
|
|
|
|
{
|
|
|
|
if (this_obj->fIndex == 0)
|
|
|
|
this_obj->fIndex =(uint16_t*)uprv_malloc(UCMP32_kIndexCount * sizeof(uint16_t));
|
|
|
|
if (!this_obj->fIndex) {
|
|
|
|
this_obj->fBogus = TRUE;
|
|
|
|
uprv_free(this_obj->fArray);
|
|
|
|
this_obj->fArray = 0;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
uprv_mstrm_read(is, this_obj->fIndex, sizeof(*(this_obj->fIndex)) * UCMP32_kIndexCount);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
this_obj->fBogus = TRUE;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
/* char instead of int8_t for Mac compilation*/
|
|
|
|
uprv_mstrm_read(is, (char*)&c, sizeof(c));
|
2000-08-08 23:26:10 +00:00
|
|
|
this_obj->fCompact = (UBool)(c != 0);
|
2000-05-22 19:50:23 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
U_CAPI void ucmp32_streamMemOut(CompactIntArray* this_obj, UMemoryStream* os)
|
|
|
|
{
|
|
|
|
char c;
|
|
|
|
if (!uprv_mstrm_error(os))
|
|
|
|
{
|
|
|
|
if (this_obj->fCount != 0 && this_obj->fArray != 0)
|
|
|
|
{
|
|
|
|
uprv_mstrm_write(os, (uint8_t *)&(this_obj->fCount), sizeof(this_obj->fCount));
|
|
|
|
uprv_mstrm_write(os, (uint8_t *)this_obj->fArray, sizeof(*(this_obj->fArray)) * this_obj->fCount);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
int32_t zero = 0;
|
|
|
|
uprv_mstrm_write(os, (uint8_t *)&zero, sizeof(zero));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (this_obj->fIndex == 0)
|
|
|
|
{
|
|
|
|
int32_t len = 0;
|
|
|
|
uprv_mstrm_write(os, (uint8_t *)&len, sizeof(len));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
int32_t len = UCMP32_kIndexCount;
|
|
|
|
uprv_mstrm_write(os, (uint8_t *)&len, sizeof(len));
|
|
|
|
uprv_mstrm_write(os, (uint8_t *)this_obj->fIndex, sizeof(*(this_obj->fIndex)) * UCMP32_kIndexCount);
|
|
|
|
}
|
2000-08-08 23:26:10 +00:00
|
|
|
c = (char)(this_obj->fCompact ? 1 : 0); /* char instead of int8_t for Mac compilation*/
|
2000-05-24 20:33:09 +00:00
|
|
|
uprv_mstrm_write(os, (uint8_t *)&c, sizeof(c));
|
2000-05-22 19:50:23 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
CompactIntArray* ucmp32_open(int32_t defaultValue)
|
|
|
|
{
|
|
|
|
uint16_t i;
|
|
|
|
int32_t *p, *p_end;
|
|
|
|
uint16_t *q, *q_end;
|
1999-12-28 23:39:02 +00:00
|
|
|
CompactIntArray* this_obj = (CompactIntArray*) uprv_malloc(sizeof(CompactIntArray));
|
1999-08-16 21:50:52 +00:00
|
|
|
if (this_obj == NULL) return NULL;
|
|
|
|
|
2000-11-06 23:58:15 +00:00
|
|
|
this_obj->fStructSize = sizeof(CompactIntArray);
|
|
|
|
this_obj->fArray = NULL;
|
|
|
|
this_obj->fIndex = NULL;
|
1999-08-16 21:50:52 +00:00
|
|
|
this_obj->fCount = UCMP32_kUnicodeCount;
|
|
|
|
this_obj->fCompact = FALSE;
|
|
|
|
this_obj->fBogus = FALSE;
|
2000-11-06 23:58:15 +00:00
|
|
|
this_obj->fAlias = FALSE;
|
|
|
|
this_obj->fIAmOwned = FALSE;
|
1999-08-16 21:50:52 +00:00
|
|
|
|
|
|
|
/*set up the index array and the data array.
|
|
|
|
* the index array always points into particular parts of the data array
|
|
|
|
* it is initially set up to point at regular block boundaries
|
|
|
|
* The following example uses blocks of 4 for simplicity
|
|
|
|
* Example: Expanded
|
|
|
|
* INDEX# 0 1 2 3 4
|
|
|
|
* INDEX 0 4 8 12 16 ...
|
|
|
|
* ARRAY abcdeababcedzyabcdea...
|
|
|
|
* | | | | | |...
|
|
|
|
* whenever you set an element in the array, it unpacks to this_obj state
|
|
|
|
* After compression, the index will point to various places in the data array
|
|
|
|
* wherever there is a runs of the same elements as in the original
|
|
|
|
* Example: Compressed
|
|
|
|
* INDEX# 0 1 2 3 4
|
|
|
|
* INDEX 0 4 1 8 2 ...
|
|
|
|
* ARRAY abcdeabazyabc...
|
|
|
|
* If you look at the example, index# 2 in the expanded version points
|
|
|
|
* to data position number 8, which has elements "bced". In the compressed
|
|
|
|
* version, index# 2 points to data position 1, which also has "bced"
|
|
|
|
*/
|
1999-12-28 23:39:02 +00:00
|
|
|
this_obj->fArray = (int32_t*)uprv_malloc(UCMP32_kUnicodeCount * sizeof(int32_t));
|
1999-08-16 21:50:52 +00:00
|
|
|
if (this_obj->fArray == NULL) {
|
|
|
|
this_obj->fBogus = TRUE;
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
1999-12-28 23:39:02 +00:00
|
|
|
this_obj->fIndex = (uint16_t*)uprv_malloc(UCMP32_kIndexCount * sizeof(uint16_t));
|
1999-08-16 21:50:52 +00:00
|
|
|
if (!this_obj->fIndex) {
|
1999-12-28 23:39:02 +00:00
|
|
|
uprv_free(this_obj->fArray);
|
1999-08-16 21:50:52 +00:00
|
|
|
this_obj->fArray = NULL;
|
|
|
|
this_obj->fBogus = TRUE;
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
p = this_obj->fArray;
|
|
|
|
p_end = p + UCMP32_kUnicodeCount;
|
|
|
|
while (p < p_end) *p++ = defaultValue;
|
|
|
|
|
|
|
|
q = this_obj->fIndex;
|
|
|
|
q_end = q + UCMP32_kIndexCount;
|
|
|
|
i = 0;
|
|
|
|
while (q < q_end)
|
|
|
|
{
|
|
|
|
*q++ = i;
|
|
|
|
i += (1 << UCMP32_kBlockShift);
|
|
|
|
}
|
|
|
|
return this_obj;
|
|
|
|
}
|
|
|
|
|
2000-09-15 19:40:20 +00:00
|
|
|
CompactIntArray* ucmp32_openAdopt(uint16_t *indexArray,
|
|
|
|
int32_t *newValues,
|
|
|
|
int32_t count)
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
1999-12-28 23:39:02 +00:00
|
|
|
CompactIntArray* this_obj = (CompactIntArray*) uprv_malloc(sizeof(CompactIntArray));
|
2000-09-15 19:40:20 +00:00
|
|
|
|
2000-11-08 22:56:34 +00:00
|
|
|
ucmp32_initAdopt(this_obj, indexArray, newValues, count);
|
|
|
|
this_obj->fIAmOwned = FALSE;
|
|
|
|
return this_obj;
|
|
|
|
}
|
|
|
|
|
|
|
|
CompactIntArray* ucmp32_openAlias(uint16_t *indexArray,
|
|
|
|
int32_t *newValues,
|
|
|
|
int32_t count)
|
|
|
|
{
|
|
|
|
CompactIntArray* this_obj = (CompactIntArray*) uprv_malloc(sizeof(CompactIntArray));
|
|
|
|
|
|
|
|
ucmp32_initAlias(this_obj, indexArray, newValues, count);
|
|
|
|
this_obj->fIAmOwned = FALSE;
|
|
|
|
return this_obj;
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
2000-12-14 21:15:29 +00:00
|
|
|
CompactIntArray* ucmp32_openFromData( const uint8_t **source,
|
|
|
|
UErrorCode *status)
|
|
|
|
{
|
|
|
|
CompactIntArray* this_obj = (CompactIntArray*) uprv_malloc(sizeof(CompactIntArray));
|
|
|
|
|
|
|
|
ucmp32_initFromData(this_obj, source, status);
|
|
|
|
this_obj->fIAmOwned = FALSE;
|
|
|
|
return this_obj;
|
|
|
|
}
|
1999-08-16 21:50:52 +00:00
|
|
|
/*=======================================================*/
|
|
|
|
|
2000-09-15 19:40:20 +00:00
|
|
|
CompactIntArray* ucmp32_initAdopt(CompactIntArray* this_obj,
|
|
|
|
uint16_t *indexArray,
|
|
|
|
int32_t *newValues,
|
|
|
|
int32_t count)
|
|
|
|
{
|
|
|
|
if (this_obj) {
|
|
|
|
this_obj->fCount = count;
|
|
|
|
this_obj->fBogus = FALSE;
|
2000-11-06 23:58:15 +00:00
|
|
|
this_obj->fStructSize = sizeof(CompactIntArray);
|
|
|
|
|
2000-09-15 19:40:20 +00:00
|
|
|
this_obj->fArray = newValues;
|
|
|
|
this_obj->fIndex = indexArray;
|
|
|
|
this_obj->fCompact = (UBool)((count < UCMP32_kUnicodeCount) ? TRUE : FALSE);
|
2000-11-06 23:58:15 +00:00
|
|
|
this_obj->fAlias = FALSE;
|
2000-11-08 22:56:34 +00:00
|
|
|
this_obj->fIAmOwned = TRUE;
|
2000-09-15 19:40:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return this_obj;
|
|
|
|
}
|
|
|
|
|
2000-11-08 22:56:34 +00:00
|
|
|
CompactIntArray* ucmp32_initAlias(CompactIntArray* this_obj,
|
|
|
|
uint16_t *indexArray,
|
|
|
|
int32_t *newValues,
|
|
|
|
int32_t count)
|
|
|
|
{
|
|
|
|
if (this_obj) {
|
|
|
|
this_obj->fCount = count;
|
|
|
|
this_obj->fBogus = FALSE;
|
|
|
|
this_obj->fStructSize = sizeof(CompactIntArray);
|
|
|
|
|
|
|
|
this_obj->fArray = newValues;
|
|
|
|
this_obj->fIndex = indexArray;
|
|
|
|
this_obj->fCompact = (UBool)((count < UCMP32_kUnicodeCount) ? TRUE : FALSE);
|
|
|
|
this_obj->fAlias = TRUE;
|
|
|
|
this_obj->fIAmOwned = TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
return this_obj;
|
|
|
|
}
|
2000-09-15 19:40:20 +00:00
|
|
|
/*=======================================================*/
|
|
|
|
|
|
|
|
void ucmp32_close(CompactIntArray* this_obj)
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
1999-12-07 00:35:36 +00:00
|
|
|
if(this_obj != NULL) {
|
2000-11-06 23:58:15 +00:00
|
|
|
if(!this_obj->fAlias) {
|
|
|
|
if(this_obj->fArray != NULL) {
|
|
|
|
uprv_free(this_obj->fArray);
|
|
|
|
}
|
|
|
|
if(this_obj->fIndex != NULL) {
|
|
|
|
uprv_free(this_obj->fIndex);
|
|
|
|
}
|
1999-12-07 00:35:36 +00:00
|
|
|
}
|
2000-11-06 23:58:15 +00:00
|
|
|
if(!this_obj->fIAmOwned) { /* Called if 'init' was called instead of 'open'. */
|
|
|
|
uprv_free(this_obj);
|
1999-12-07 00:35:36 +00:00
|
|
|
}
|
|
|
|
}
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
2000-05-18 22:08:39 +00:00
|
|
|
UBool ucmp32_isBogus(const CompactIntArray* this_obj)
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
2000-08-08 23:26:10 +00:00
|
|
|
return (UBool)(this_obj == NULL || this_obj->fBogus);
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void ucmp32_expand(CompactIntArray* this_obj) {
|
|
|
|
/* can optimize later.
|
|
|
|
* if we have to expand, then walk through the blocks instead of using Get
|
|
|
|
* this_obj code unpacks the array by copying the blocks to the normalized position.
|
|
|
|
* Example: Compressed
|
|
|
|
* INDEX# 0 1 2 3 4
|
|
|
|
* INDEX 0 4 1 8 2 ...
|
|
|
|
* ARRAY abcdeabazyabc...
|
|
|
|
* turns into
|
|
|
|
* Example: Expanded
|
|
|
|
* INDEX# 0 1 2 3 4
|
|
|
|
* INDEX 0 4 8 12 16 ...
|
|
|
|
* ARRAY abcdeababcedzyabcdea...
|
|
|
|
*/
|
|
|
|
int32_t i;
|
|
|
|
int32_t* tempArray;
|
|
|
|
if (this_obj->fCompact) {
|
1999-12-28 23:39:02 +00:00
|
|
|
tempArray = (int32_t*)uprv_malloc(UCMP32_kUnicodeCount * sizeof(int32_t));
|
1999-08-16 21:50:52 +00:00
|
|
|
if (tempArray == NULL) {
|
|
|
|
this_obj->fBogus = TRUE;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
for (i = 0; i < UCMP32_kUnicodeCount; ++i) {
|
|
|
|
tempArray[i] = ucmp32_get(this_obj, (UChar)i); /* HSYS : How expand?*/
|
|
|
|
}
|
|
|
|
for (i = 0; i < UCMP32_kIndexCount; ++i) {
|
|
|
|
this_obj->fIndex[i] = (uint16_t)(i<<UCMP32_kBlockShift);
|
|
|
|
}
|
1999-12-28 23:39:02 +00:00
|
|
|
uprv_free(this_obj->fArray);
|
1999-08-16 21:50:52 +00:00
|
|
|
this_obj->fArray = tempArray;
|
|
|
|
this_obj->fCompact = FALSE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32_t ucmp32_getCount(const CompactIntArray* this_obj)
|
|
|
|
{
|
|
|
|
return this_obj->fCount;
|
|
|
|
}
|
|
|
|
|
|
|
|
const int32_t* ucmp32_getArray(const CompactIntArray* this_obj)
|
|
|
|
{
|
|
|
|
return this_obj->fArray;
|
|
|
|
}
|
|
|
|
|
|
|
|
const uint16_t* ucmp32_getIndex(const CompactIntArray* this_obj)
|
|
|
|
{
|
|
|
|
return this_obj->fIndex;
|
|
|
|
}
|
|
|
|
|
|
|
|
void ucmp32_set(CompactIntArray* this_obj, UChar c, int32_t value)
|
|
|
|
{
|
|
|
|
if (this_obj->fCompact == TRUE) {
|
|
|
|
ucmp32_expand(this_obj);
|
|
|
|
if (this_obj->fBogus) return;
|
|
|
|
}
|
|
|
|
this_obj->fArray[(int32_t)c] = value;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void ucmp32_setRange(CompactIntArray* this_obj, UChar start, UChar end, int32_t value)
|
|
|
|
{
|
|
|
|
int32_t i;
|
|
|
|
if (this_obj->fCompact == TRUE) {
|
|
|
|
ucmp32_expand(this_obj);
|
|
|
|
if (this_obj->fBogus) return;
|
|
|
|
|
|
|
|
}
|
|
|
|
for (i = start; i <= end; ++i) {
|
|
|
|
this_obj->fArray[i] = value;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/*=======================================================
|
|
|
|
* this_obj->fArray: an array to be overlapped
|
|
|
|
* start and count: specify the block to be overlapped
|
|
|
|
* tempIndex: the overlapped array (actually indices back into inputContents)
|
|
|
|
* inputHash: an index of hashes for tempIndex, where
|
|
|
|
* inputHash[i] = XOR of values from i-count+1 to i
|
|
|
|
*/
|
|
|
|
|
2000-08-11 01:27:17 +00:00
|
|
|
static int32_t ucmp32_findOverlappingPosition(CompactIntArray* this_obj,
|
1999-08-16 21:50:52 +00:00
|
|
|
uint32_t start,
|
|
|
|
const UChar* tempIndex,
|
|
|
|
int32_t tempIndexCount,
|
|
|
|
uint32_t cycle) {
|
|
|
|
/* this_obj is a utility routine for finding blocks that overlap.
|
|
|
|
* IMPORTANT: the cycle number is very important. Small cycles take a lot
|
|
|
|
* longer to work. In some cases, they may be able to get better compaction.
|
|
|
|
*/
|
|
|
|
int32_t i;
|
|
|
|
int32_t j;
|
|
|
|
int32_t currentCount;
|
|
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < tempIndexCount; i += cycle) {
|
|
|
|
currentCount = UCMP32_kBlockCount;
|
|
|
|
if (i + UCMP32_kBlockCount > tempIndexCount) {
|
|
|
|
currentCount = tempIndexCount - i;
|
|
|
|
}
|
|
|
|
for (j = 0; j < currentCount; ++j) {
|
|
|
|
if (this_obj->fArray[start + j] != this_obj->fArray[tempIndex[i + j]]) break;
|
|
|
|
}
|
|
|
|
if (j == currentCount) break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return i;
|
|
|
|
}
|
|
|
|
/*=======================================================*/
|
|
|
|
|
|
|
|
void ucmp32_compact(CompactIntArray* this_obj, int32_t cycle) {
|
|
|
|
/* this_obj actually does the compaction.
|
|
|
|
* it walks throught the contents of the expanded array, finding the
|
|
|
|
* first block in the data that matches the contents of the current index.
|
|
|
|
* As it works, it keeps an updated pointer to the last position,
|
|
|
|
* so that it knows how big to make the final array
|
|
|
|
* If the matching succeeds, then the index will point into the data
|
|
|
|
* at some earlier position.
|
|
|
|
* If the matching fails, then last position pointer will be bumped,
|
|
|
|
* and the index will point to that last block of data.
|
|
|
|
*/
|
|
|
|
UChar* tempIndex;
|
|
|
|
int32_t tempIndexCount;
|
|
|
|
int32_t* tempArray;
|
|
|
|
int32_t iBlock, iIndex;
|
|
|
|
int32_t newCount, firstPosition;
|
|
|
|
uint32_t block;
|
|
|
|
if (!this_obj->fCompact) {
|
|
|
|
|
|
|
|
/* fix cycle, must be 0 < cycle <= blockcount*/
|
|
|
|
if (cycle < 0) cycle = 1;
|
|
|
|
else if (cycle > UCMP32_kBlockCount)
|
|
|
|
cycle = UCMP32_kBlockCount;
|
|
|
|
|
|
|
|
/* make temp storage, larger than we need*/
|
1999-12-28 23:39:02 +00:00
|
|
|
tempIndex =(UChar*)uprv_malloc(UCMP32_kUnicodeCount * sizeof(uint32_t));
|
1999-08-16 21:50:52 +00:00
|
|
|
if (tempIndex == NULL) {
|
|
|
|
this_obj->fBogus = TRUE;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
/* set up first block.*/
|
|
|
|
tempIndexCount = UCMP32_kBlockCount;
|
|
|
|
for (iIndex = 0; iIndex < UCMP32_kBlockCount; ++iIndex) {
|
|
|
|
tempIndex[iIndex] = (uint16_t)iIndex;
|
|
|
|
}; /* endfor (iIndex = 0; .....)*/
|
|
|
|
this_obj->fIndex[0] = 0;
|
|
|
|
|
|
|
|
/* for each successive block, find out its first position in the compacted array*/
|
|
|
|
for (iBlock = 1; iBlock < UCMP32_kIndexCount; ++iBlock) {
|
|
|
|
|
|
|
|
block = iBlock<<UCMP32_kBlockShift;
|
|
|
|
if (debugSmall) if (block > debugSmallLimit) break;
|
|
|
|
firstPosition = ucmp32_findOverlappingPosition(this_obj, block, tempIndex, tempIndexCount, cycle);
|
|
|
|
|
|
|
|
/* if not contained in the current list, copy the remainder
|
|
|
|
* invariant; cumulativeHash[iBlock] = XOR of values from iBlock-kBlockCount+1 to iBlock
|
|
|
|
* we do this_obj by XORing out cumulativeHash[iBlock-kBlockCount]
|
|
|
|
*/
|
|
|
|
newCount = firstPosition + UCMP32_kBlockCount;
|
|
|
|
if (newCount > tempIndexCount) {
|
|
|
|
for (iIndex = tempIndexCount; iIndex < newCount; ++iIndex) {
|
|
|
|
tempIndex[iIndex] = (uint16_t)(iIndex - firstPosition + block);
|
|
|
|
} /* endfor (iIndex = tempIndexCount....)*/
|
|
|
|
tempIndexCount = newCount;
|
|
|
|
} /*endif (newCount > tempIndexCount)*/
|
|
|
|
this_obj->fIndex[iBlock] = (uint16_t)firstPosition;
|
|
|
|
} /* endfor (iBlock = 1.....)*/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* now allocate and copy the items into the array*/
|
1999-12-28 23:39:02 +00:00
|
|
|
tempArray = (int32_t*)uprv_malloc(tempIndexCount * sizeof(uint32_t));
|
1999-08-16 21:50:52 +00:00
|
|
|
if (tempArray == NULL) {
|
|
|
|
this_obj->fBogus = TRUE;
|
1999-12-28 23:39:02 +00:00
|
|
|
uprv_free(tempIndex);
|
1999-08-16 21:50:52 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
for (iIndex = 0; iIndex < tempIndexCount; ++iIndex) {
|
|
|
|
tempArray[iIndex] = this_obj->fArray[tempIndex[iIndex]];
|
|
|
|
}
|
1999-12-28 23:39:02 +00:00
|
|
|
uprv_free(this_obj->fArray);
|
1999-08-16 21:50:52 +00:00
|
|
|
this_obj->fArray = tempArray;
|
|
|
|
this_obj->fCount = tempIndexCount;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* free up temp storage*/
|
1999-12-28 23:39:02 +00:00
|
|
|
uprv_free(tempIndex);
|
1999-08-16 21:50:52 +00:00
|
|
|
this_obj->fCompact = TRUE;
|
|
|
|
|
|
|
|
#ifdef _DEBUG
|
|
|
|
/*the following line is useful for specific debugging purposes*/
|
|
|
|
/*fprintf(stderr, "Compacted to %ld with cycle %d\n", fCount, cycle);*/
|
|
|
|
#endif
|
|
|
|
} /* endif (!this_obj->fCompact)*/
|
|
|
|
}
|
|
|
|
|
2000-11-07 16:32:42 +00:00
|
|
|
U_CAPI uint32_t U_EXPORT2 ucmp32_flattenMem (const CompactIntArray* array, UMemoryStream *MS)
|
2000-11-06 23:58:15 +00:00
|
|
|
{
|
|
|
|
int32_t size = 0;
|
|
|
|
|
|
|
|
uprv_mstrm_write32(MS, ICU_UCMP32_VERSION);
|
|
|
|
size += 4;
|
|
|
|
|
|
|
|
uprv_mstrm_write32(MS, array->fCount);
|
|
|
|
size += 4;
|
|
|
|
|
|
|
|
uprv_mstrm_writeBlock(MS, array->fIndex, sizeof(array->fIndex[0])*UCMP32_kIndexCount);
|
|
|
|
size += sizeof(array->fIndex[0])*UCMP32_kIndexCount;
|
|
|
|
|
|
|
|
uprv_mstrm_writeBlock(MS, array->fArray, sizeof(array->fArray[0])*array->fCount);
|
|
|
|
size += sizeof(array->fArray[0])*array->fCount;
|
|
|
|
|
|
|
|
while(size%4) /* end padding */
|
|
|
|
{
|
|
|
|
uprv_mstrm_writePadding(MS, 1); /* Pad total so far to even size */
|
|
|
|
size += 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return size;
|
|
|
|
}
|
|
|
|
|
|
|
|
U_CAPI void U_EXPORT2 ucmp32_initFromData(CompactIntArray *this_obj, const uint8_t **source, UErrorCode *status)
|
|
|
|
{
|
|
|
|
uint32_t i;
|
|
|
|
const uint8_t *oldSource = *source;
|
|
|
|
|
|
|
|
if(U_FAILURE(*status))
|
|
|
|
return;
|
|
|
|
|
|
|
|
this_obj->fArray = NULL;
|
|
|
|
this_obj->fIndex = NULL;
|
|
|
|
this_obj->fBogus = FALSE;
|
|
|
|
this_obj->fStructSize = sizeof(CompactIntArray);
|
|
|
|
this_obj->fCompact = TRUE;
|
|
|
|
this_obj->fAlias = TRUE;
|
|
|
|
this_obj->fIAmOwned = TRUE;
|
|
|
|
|
|
|
|
i = * ((const uint32_t*) *source);
|
|
|
|
(*source) += 4;
|
|
|
|
|
|
|
|
if(i != ICU_UCMP32_VERSION)
|
|
|
|
{
|
|
|
|
*status = U_INVALID_FORMAT_ERROR;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
this_obj->fCount = * ((const uint32_t*)*source);
|
|
|
|
(*source) += 4;
|
|
|
|
|
|
|
|
this_obj->fIndex = (uint16_t*) *source;
|
|
|
|
(*source) += sizeof(this_obj->fIndex[0])*UCMP32_kIndexCount;
|
|
|
|
|
|
|
|
this_obj->fArray = (int32_t*) *source;
|
|
|
|
(*source) += sizeof(this_obj->fArray[0])*this_obj->fCount;
|
|
|
|
|
|
|
|
/* eat up padding */
|
|
|
|
while((*source-(oldSource))%4)
|
|
|
|
(*source)++;
|
|
|
|
}
|