scuffed-code/icu4c/source/common/ucmpe32.h
Vladimir Weinstein ffa911c85b ICU-1043 fixes for the surrogate handling
X-SVN-Rev: 6720
2001-11-10 00:13:03 +00:00

207 lines
6.4 KiB
C

/*
**********************************************************************
* Copyright (C) 2001, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: ucmpe32.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2001aug03
* created by: Vladimir Weinstein
*
* This is basically a rip-off of trie developed by Markus for
* normalization data, but using a reduced ucmp interface
* Interface is implemented as much as required by the collation
* framework.
* This table is slow on data addition, but should support surrogates
* nicely.
*/
/**
* NOTE: This array is specifically implemented to support surrogates
* in the collation framework. It's interface is minimal and usage model
* is far from the flexible. Use at your own risk outside of collation.
* Risk is also present in the collation framework, but there is hardly
* anything you can do about it, save reimplementig the framework
*/
#ifndef UCMPE32_H
#define UCMPE32_H
#define ICU_UCMPE32_VERSION 0x01260000
#define INIT_UCMPE32_STAGE2_SIZE 60000
#include "unicode/utypes.h"
#include "filestrm.h"
#include "umemstrm.h"
/* trie constants */
enum {
/*
* must be <=10:
* above 10, a lead surrogate's block is smaller than a stage 2 block
*/
_UCMPE32_TRIE_SHIFT=5,
_UCMPE32_STAGE_2_BLOCK_COUNT=1<<_UCMPE32_TRIE_SHIFT,
_UCMPE32_STAGE_2_MASK=_UCMPE32_STAGE_2_BLOCK_COUNT-1,
_UCMPE32_STAGE_1_BMP_COUNT=(1<<(16-_UCMPE32_TRIE_SHIFT)),
_UCMPE32_SURROGATE_BLOCK_BITS=10-_UCMPE32_TRIE_SHIFT,
_UCMPE32_SURROGATE_BLOCK_COUNT=(1<<_UCMPE32_SURROGATE_BLOCK_BITS),
_UCMPE32_EXTRA_SHIFT=16, /* 16 bits for the index to UChars and other extra data */
_UCMPE32_EXTRA_INDEX_TOP=0xfc00, /* start of surrogate specials after shift */
_UCMPE32_EXTRA_SURROGATE_MASK=0x3ff,
_UCMPE32_EXTRA_SURROGATE_TOP=0x3f0 /* hangul etc. */
};
/* this may be >0xffff and may not work as an enum */
#define _UCMPE32_STAGE_1_MAX_COUNT (0x110000>>_UCMPE32_TRIE_SHIFT)
typedef struct CompactEIntArray{
uint32_t fStructSize;
UBool fCompact;
UBool fAlias;
UBool fBogus;
UBool fInitPhase;
uint16_t *stage1;
int32_t *stage2;
int32_t stage1Top;
int32_t stage2Top;
int32_t stage2DefaultTop;
int32_t fDefaultValue;
int32_t fSurrogateValue;
int32_t fLeadSurrogateValue;
int32_t stage2Size;
} CompactEIntArray;
/**
* Construct an empty CompactEIntArray.
*
* @param defaultValue the default value for all characters not explicitly in the array
*/
U_CAPI CompactEIntArray* U_EXPORT2
ucmpe32_open(int32_t defaultValue, int32_t surrogateValue, int32_t leadSurrogateValue, UErrorCode *status);
/**
* Opens a compacted read-only array from
* a block in memory.
*/
U_CAPI CompactEIntArray* U_EXPORT2
ucmpe32_openFromData( const uint8_t **source, UErrorCode *status);
/**
* Clones an array. It can be either compacted or expanded
*/
U_CAPI CompactEIntArray* U_EXPORT2
ucmpe32_clone(CompactEIntArray* orig, UErrorCode *status);
/**
* Free up any allocated memory associated with this compact array.
* The memory that is uprv_free'd depends on how the array was initialized
* or opened.
*
* @param array The compact array to close
*/
U_CAPI void U_EXPORT2 ucmpe32_close(CompactEIntArray* array);
/**
* Get the mapped value of a Unicode character.
*
* @param index the character to get the mapped value with
* @return the mapped value of the given character
*/
#define ucmpe32_get(this_obj, index) (this_obj->stage2[(this_obj->stage1[(index >> _UCMPE32_TRIE_SHIFT)] )+ \
(index & _UCMPE32_STAGE_2_MASK)])
U_CAPI
int32_t
ucmpe32_get32(CompactEIntArray* this_obj, UChar32 code);
/**
* Get the mapped value of a confirmed surrogate. First value already comes
* from the trie and is combined with the following value in order to get
* the value. THIS CAN BE ONLY USED ON A COMPACTED TRIE. You will get wrong
* results if you try it on the expanded one
* NO ERROR CHECKING IS PERFORMED! PREPARE YOUR DATA CAREFULLY!
* @param leadValue32 the mapping of the leading surrogate.
* @param trail the trailing surrogate
* @return the mapped value of the given character
*/
#define ucmpe32_getSurrogate(this_obj, leadValue32, trail) ucmpe32_get(this_obj, \
((leadValue32 & 0xffc00) | (trail & 0x3ff)))
/**
* This is a slow function that takes lead and trail surrogate and gets
* the mapping regardless of the compaction status.
*/
U_CAPI int32_t U_EXPORT2
ucmpe32_getSurrogateEx(CompactEIntArray *array, UChar lead, UChar trail);
/**
* Set a new value for a Unicode character.
* Do not set if the array is compacted - nothing will happen.
* @param character the character to set the mapped value with
* @param value the new mapped value
*/
U_CAPI void U_EXPORT2 ucmpe32_set32(CompactEIntArray *array,
UChar32 character,
int32_t value);
/**
* Sets an inclusive range of characters to the same value
* @param start start of the range
* @param end end of the range
* @param value value to be set
*/
U_CAPI void
ucmpe32_setRange32(CompactEIntArray* this_obj, UChar32 start, UChar32 end, int32_t value);
/**
* alias for compatibility
*/
#define ucmpe32_set(array, character, value) ucmpe32_set32((array), (UChar32)(character), (value))
/**
* Set a new value for a surrogate character.
* Do not set if the array is compacted - nothing will happen.
* Set automatically expands the array if it is compacted.
* Alternatively you can put the surrogate code point together
* yourself and use set32.
* @param lead leading surrogate unit
* @param trail trailing surrogate unit
* @param value the new mapped value
*/
U_CAPI void U_EXPORT2
ucmpe32_setSurrogate(CompactEIntArray* this_obj, UChar lead,
UChar trail, int32_t value);
/**
* compacts the array.
* This folds the surrogates and compacts the array.
* no setting will succeed after the array is compacted.
* Array have to be compacted in order to be flattened.
*/
U_CAPI void U_EXPORT2
ucmpe32_compact(CompactEIntArray* this_object);
/**
* Flattens the array to an memory stream.
* Array has to be compacted beforehand.
* @param MS memory stream to flatten to
* @return number of bytes written.
*/
U_CAPI uint32_t U_EXPORT2
ucmpe32_flattenMem(const CompactEIntArray* this_object, UMemoryStream *MS);
#endif