1999-08-16 21:50:52 +00:00
|
|
|
/*
|
|
|
|
********************************************************************
|
|
|
|
* COPYRIGHT:
|
2001-03-21 20:44:20 +00:00
|
|
|
* Copyright (c) 1996-2001, International Business Machines Corporation and
|
1999-11-22 20:56:34 +00:00
|
|
|
* others. All Rights Reserved.
|
1999-08-16 21:50:52 +00:00
|
|
|
********************************************************************
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifndef UCMP8_H
|
|
|
|
#define UCMP8_H
|
|
|
|
|
2000-04-19 23:05:27 +00:00
|
|
|
/* 32-bits.
|
|
|
|
Bump this whenever the internal structure changes.
|
|
|
|
*/
|
|
|
|
#define ICU_UCMP8_VERSION 0x01260000
|
1999-08-16 21:50:52 +00:00
|
|
|
|
2000-11-06 23:58:15 +00:00
|
|
|
#include "umemstrm.h"
|
1999-12-28 23:39:02 +00:00
|
|
|
#include "unicode/utypes.h"
|
1999-08-16 21:50:52 +00:00
|
|
|
|
2000-12-13 19:48:01 +00:00
|
|
|
/*====================================
|
|
|
|
* class CompactByteArray
|
1999-08-16 21:50:52 +00:00
|
|
|
* Provides a compact way to store information that is indexed by Unicode values,
|
|
|
|
* such as character properties, types, keyboard values, etc.
|
|
|
|
* The ATypes are used by value, so should be small, integers or pointers.
|
|
|
|
*====================================
|
|
|
|
*/
|
|
|
|
|
1999-10-18 23:44:20 +00:00
|
|
|
U_CAPI int32_t U_EXPORT2 ucmp8_getkUnicodeCount(void);
|
|
|
|
U_CAPI int32_t U_EXPORT2 ucmp8_getkBlockCount(void);
|
1999-08-16 21:50:52 +00:00
|
|
|
|
1999-10-18 23:44:20 +00:00
|
|
|
typedef struct CompactByteArray {
|
1999-12-04 02:31:40 +00:00
|
|
|
uint32_t fStructSize;
|
1999-08-16 21:50:52 +00:00
|
|
|
int8_t* fArray;
|
|
|
|
uint16_t* fIndex;
|
|
|
|
int32_t fCount;
|
2000-05-18 22:08:39 +00:00
|
|
|
UBool fCompact;
|
|
|
|
UBool fBogus;
|
|
|
|
UBool fAlias;
|
|
|
|
UBool fIAmOwned; /* don't free CBA on close */
|
1999-08-16 21:50:52 +00:00
|
|
|
} CompactByteArray;
|
|
|
|
|
1999-11-22 20:56:34 +00:00
|
|
|
#define UCMP8_kUnicodeCount 65536
|
|
|
|
#define UCMP8_kBlockShift 7
|
|
|
|
#define UCMP8_kBlockCount (1<<UCMP8_kBlockShift)
|
|
|
|
#define UCMP8_kIndexShift (16-UCMP8_kBlockShift)
|
|
|
|
#define UCMP8_kIndexCount (1<<UCMP8_kIndexShift)
|
|
|
|
#define UCMP8_kBlockMask (UCMP8_kBlockCount-1)
|
|
|
|
|
|
|
|
|
2000-12-13 19:48:01 +00:00
|
|
|
/**
|
|
|
|
* Construct an empty CompactByteArray with uprv_malloc(). Do not call any of the
|
|
|
|
* ucmp8_init*() functions after using this function. They will cause a memory
|
|
|
|
* leak.
|
|
|
|
*
|
|
|
|
* @param defaultValue the default value for all characters not explicitly in the array
|
|
|
|
* @see ucmp8_init
|
|
|
|
* @see ucmp8_initBogus
|
|
|
|
* @return The initialized array.
|
|
|
|
*/
|
1999-10-18 23:44:20 +00:00
|
|
|
U_CAPI CompactByteArray* U_EXPORT2 ucmp8_open(int8_t defaultValue);
|
2000-04-19 23:05:27 +00:00
|
|
|
|
2000-12-13 19:48:01 +00:00
|
|
|
/**
|
|
|
|
* Construct a CompactByteArray from a pre-computed index and values array. The values
|
|
|
|
* will be adopted by the CompactByteArray. Memory is allocated with uprv_malloc.
|
|
|
|
* Note: for speed, the compact method will only re-use blocks in the values array
|
|
|
|
* that are on a block boundary. The pre-computed arrays passed in to this constructor
|
|
|
|
* may re-use blocks at any position in the values array. The indexArray and newValues
|
|
|
|
* will be uprv_free'd when ucmp16_close() is called.
|
|
|
|
*
|
|
|
|
* @param indexArray the index array to be adopted
|
|
|
|
* @param newValues the value array to be adopted
|
|
|
|
* @param count the number of entries in the value array
|
|
|
|
* @see compact
|
|
|
|
*/
|
1999-10-18 23:44:20 +00:00
|
|
|
U_CAPI CompactByteArray* U_EXPORT2 ucmp8_openAdopt(uint16_t* indexArray,
|
1999-08-16 21:50:52 +00:00
|
|
|
int8_t* newValues,
|
|
|
|
int32_t count);
|
2000-12-13 19:48:01 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Construct a CompactByteArray from a pre-computed index and values array. The values
|
|
|
|
* will be aliased by the CompactByteArray. Memory is allocated with uprv_malloc.
|
|
|
|
* Note: for speed, the compact method will only re-use blocks in the values array
|
|
|
|
* that are on a block boundary. The pre-computed arrays passed in to this constructor
|
|
|
|
* may re-use blocks at any position in the values array.
|
|
|
|
*
|
|
|
|
* @param indexArray the index array to be adopted
|
|
|
|
* @param newValues the value array to be adopted
|
|
|
|
* @param count the number of entries in the value array
|
|
|
|
* @see compact
|
|
|
|
*/
|
1999-12-10 21:56:55 +00:00
|
|
|
U_CAPI CompactByteArray* U_EXPORT2 ucmp8_openAlias(uint16_t* indexArray,
|
|
|
|
int8_t* newValues,
|
|
|
|
int32_t count);
|
2000-09-15 19:40:20 +00:00
|
|
|
|
2000-12-13 19:48:01 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Initialize an empty CompactByteArray. Do not call this function
|
|
|
|
* if you created the array with ucmp8_open() because it will cause a memory
|
|
|
|
* leak.
|
|
|
|
*
|
|
|
|
* @param defaultValue the default value for all characters not explicitly in the array
|
|
|
|
* @param array An uninitialized CompactByteArray
|
|
|
|
* @see ucmp8_open
|
|
|
|
*/
|
|
|
|
U_CAPI void U_EXPORT2 ucmp8_init(CompactByteArray* array, int8_t defaultValue);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Initialize an empty CompactByteArray to the bogus value. Do not call this
|
|
|
|
* function if you created the array with ucmp8_open() because it will cause
|
|
|
|
* a memory leak.
|
|
|
|
*
|
|
|
|
* @param array An uninitialized CompactByteArray
|
|
|
|
* @see ucmp8_open
|
|
|
|
* @see ucmp8_isBogus
|
|
|
|
*/
|
|
|
|
U_CAPI void U_EXPORT2 ucmp8_initBogus(CompactByteArray* array);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Initialize a CompactByteArray from a pre-computed index and values array. The values
|
|
|
|
* will be adopted by the CompactByteArray. Memory is allocated with uprv_malloc.
|
|
|
|
* Note: for speed, the compact method will only re-use blocks in the values array
|
|
|
|
* that are on a block boundary. The pre-computed arrays passed in to this constructor
|
|
|
|
* may re-use blocks at any position in the values array. The indexArray and newValues
|
|
|
|
* will be uprv_free'd when ucmp16_close() is called.
|
|
|
|
*
|
|
|
|
* @param indexArray the index array to be adopted
|
|
|
|
* @param newValues the value array to be adopted
|
|
|
|
* @param count the number of entries in the value array
|
|
|
|
* @see compact
|
|
|
|
*/
|
2000-09-15 19:40:20 +00:00
|
|
|
U_CAPI CompactByteArray* U_EXPORT2 ucmp8_initAdopt(CompactByteArray *this_obj,
|
|
|
|
uint16_t* indexArray,
|
|
|
|
int8_t* newValues,
|
|
|
|
int32_t count);
|
2000-12-13 19:48:01 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Initialize a CompactByteArray from a pre-computed index and values array. The values
|
|
|
|
* will be aliased by the CompactByteArray. Memory is allocated with uprv_malloc.
|
|
|
|
* Note: for speed, the compact method will only re-use blocks in the values array
|
|
|
|
* that are on a block boundary. The pre-computed arrays passed in to this constructor
|
|
|
|
* may re-use blocks at any position in the values array.
|
|
|
|
*
|
|
|
|
* @param indexArray the index array to be adopted
|
|
|
|
* @param newValues the value array to be adopted
|
|
|
|
* @param count the number of entries in the value array
|
|
|
|
* @see compact
|
|
|
|
*/
|
2000-09-15 19:40:20 +00:00
|
|
|
U_CAPI CompactByteArray* U_EXPORT2 ucmp8_initAlias(CompactByteArray *this_obj,
|
|
|
|
uint16_t* indexArray,
|
|
|
|
int8_t* newValues,
|
|
|
|
int32_t count);
|
|
|
|
|
2000-12-13 19:48:01 +00:00
|
|
|
/**
|
|
|
|
* Free up any allocated memory associated with this compact array.
|
|
|
|
* The memory that is uprv_free'd depends on how the array was initialized
|
|
|
|
* or opened.
|
|
|
|
*
|
|
|
|
* @param array The compact array to close
|
|
|
|
*/
|
1999-10-18 23:44:20 +00:00
|
|
|
U_CAPI void U_EXPORT2 ucmp8_close(CompactByteArray* array);
|
2000-12-13 19:48:01 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns TRUE if the creation of the compact array fails.
|
|
|
|
*/
|
2000-11-04 01:16:21 +00:00
|
|
|
U_CAPI UBool U_EXPORT2 ucmp8_isBogus(const CompactByteArray* array);
|
1999-08-16 21:50:52 +00:00
|
|
|
|
2000-12-13 19:48:01 +00:00
|
|
|
/**
|
|
|
|
* Get the mapped value of a Unicode character.
|
|
|
|
*
|
|
|
|
* @param index the character to get the mapped value with
|
|
|
|
* @return the mapped value of the given character
|
|
|
|
*/
|
1999-11-22 20:56:34 +00:00
|
|
|
#define ucmp8_get(array, index) (array->fArray[(array->fIndex[index >> UCMP8_kBlockShift] & 0xFFFF) + (index & UCMP8_kBlockMask)])
|
|
|
|
|
|
|
|
#define ucmp8_getu(array,index) (uint8_t)ucmp8_get(array,index)
|
1999-08-16 21:50:52 +00:00
|
|
|
|
|
|
|
|
2000-12-13 19:48:01 +00:00
|
|
|
/**
|
|
|
|
* Set a new value for a Unicode character.
|
|
|
|
* Set automatically expands the array if it is compacted.
|
|
|
|
*
|
|
|
|
* @param character the character to set the mapped value with
|
|
|
|
* @param value the new mapped value
|
|
|
|
*/
|
1999-10-18 23:44:20 +00:00
|
|
|
U_CAPI void U_EXPORT2 ucmp8_set(CompactByteArray* array,
|
2000-04-14 05:22:29 +00:00
|
|
|
UChar character,
|
1999-08-16 21:50:52 +00:00
|
|
|
int8_t value);
|
|
|
|
|
2000-12-13 19:48:01 +00:00
|
|
|
/**
|
|
|
|
* Set new values for a range of Unicode character.
|
|
|
|
*
|
|
|
|
* @param start the starting offset of the range
|
|
|
|
* @param end the ending offset of the range
|
|
|
|
* @param value the new mapped value
|
|
|
|
*/
|
1999-10-18 23:44:20 +00:00
|
|
|
U_CAPI void U_EXPORT2 ucmp8_setRange(CompactByteArray* array,
|
1999-08-16 21:50:52 +00:00
|
|
|
UChar start,
|
|
|
|
UChar end,
|
|
|
|
int8_t value);
|
|
|
|
|
1999-10-18 23:44:20 +00:00
|
|
|
U_CAPI int32_t U_EXPORT2 ucmp8_getCount(const CompactByteArray* array);
|
|
|
|
U_CAPI const int8_t* U_EXPORT2 ucmp8_getArray(const CompactByteArray* array);
|
|
|
|
U_CAPI const uint16_t* U_EXPORT2 ucmp8_getIndex(const CompactByteArray* array);
|
1999-08-16 21:50:52 +00:00
|
|
|
|
2000-12-13 19:48:01 +00:00
|
|
|
/**
|
|
|
|
* Compact the array.
|
|
|
|
* The value of cycle determines how large the overlap can be.
|
|
|
|
* A cycle of 1 is the most compacted, but takes the most time to do.
|
|
|
|
* If values stored in the array tend to repeat in cycles of, say, 16,
|
|
|
|
* then using that will be faster than cycle = 1, and get almost the
|
|
|
|
* same compression.
|
|
|
|
*/
|
1999-10-18 23:44:20 +00:00
|
|
|
U_CAPI void U_EXPORT2 ucmp8_compact(CompactByteArray* array,
|
1999-08-16 21:50:52 +00:00
|
|
|
uint32_t cycle);
|
|
|
|
|
2000-12-13 19:48:01 +00:00
|
|
|
/** Expanded takes the array back to a 65536 element array*/
|
1999-10-18 23:44:20 +00:00
|
|
|
U_CAPI void U_EXPORT2 ucmp8_expand(CompactByteArray* array);
|
1999-08-16 21:50:52 +00:00
|
|
|
|
2000-04-19 23:05:27 +00:00
|
|
|
/** (more) INTERNAL USE ONLY **/
|
2000-11-07 16:32:42 +00:00
|
|
|
U_CAPI uint32_t U_EXPORT2 ucmp8_flattenMem (const CompactByteArray* array, UMemoryStream *MS);
|
2000-04-19 23:05:27 +00:00
|
|
|
/* initializes an existing CBA from memory. Will cause ucmp8_close() to not deallocate anything. */
|
|
|
|
U_CAPI void U_EXPORT2 ucmp8_initFromData(CompactByteArray* array, const uint8_t **source, UErrorCode *status);
|
1999-08-16 21:50:52 +00:00
|
|
|
|
|
|
|
#endif
|
|
|
|
|