ICU-2594 description of collator and inverse uca images.
X-SVN-Rev: 12451
This commit is contained in:
parent
58a83101b4
commit
d06542ba39
@ -36,12 +36,61 @@
|
|||||||
#if !UCONFIG_NO_COLLATION
|
#if !UCONFIG_NO_COLLATION
|
||||||
|
|
||||||
#include "unicode/ucol.h"
|
#include "unicode/ucol.h"
|
||||||
/*#include "ucmpe32.h"*/
|
|
||||||
#include "utrie.h"
|
#include "utrie.h"
|
||||||
#include "unicode/ures.h"
|
#include "unicode/ures.h"
|
||||||
#include "unicode/udata.h"
|
#include "unicode/udata.h"
|
||||||
#include "unicode/uiter.h"
|
#include "unicode/uiter.h"
|
||||||
|
|
||||||
|
/* This is the internal header file which contains important declarations for
|
||||||
|
* the collation framework.
|
||||||
|
* Ready to use collators are stored as binary images. Both UCA and tailorings
|
||||||
|
* share the same binary format. Individual files (currently only UCA) have a
|
||||||
|
* udata header in front of the image and should be opened using udata_open.
|
||||||
|
* Tailoring images are currently stored inside resource bundles and are intialized
|
||||||
|
* through ucol_open API.
|
||||||
|
* Here is the format of binary collation image.
|
||||||
|
* int32_t size; - image size in bytes
|
||||||
|
* Offsets to interesting data. All offsets are in bytes.
|
||||||
|
* to get the address add to the header address and cast properly.
|
||||||
|
* Offsets are in ascending order if non-zero.
|
||||||
|
* uint32_t options; - offset to default collator options (UColOptionSet *), 1 signed 3-bit value, followed by 7 unsigned 32-bit values, followed by 64 reserved bytes (could be considered 16 32-bit values)
|
||||||
|
* uint32_t UCAConsts; - only used in UCA image - structure which holds values for indirect positioning and implicit ranges
|
||||||
|
* uint32_t contractionUCACombos; - only used in UCA image - list of UCA contractions
|
||||||
|
* uint32_t unusedReserved1; - reserved for future use
|
||||||
|
* uint32_t mappingPosition; - offset to UTrie (const uint8_t *mappingPosition)
|
||||||
|
* uint32_t expansion; - offset to expansion table (uint32_t *expansion)
|
||||||
|
* uint32_t contractionIndex; - offset to contraction table (UChar *contractionIndex)
|
||||||
|
* uint32_t contractionCEs; - offset to resulting contraction CEs (uint32_t *contractionCEs)
|
||||||
|
* uint32_t contractionSize; - size of contraction table (both Index and CEs)
|
||||||
|
* uint32_t endExpansionCE; - offset to array of last collation element in expansion (uint32_t *)
|
||||||
|
* uint32_t expansionCESize; - array of maximum expansion sizes (uint8_t *)
|
||||||
|
* int32_t endExpansionCECount; - size of endExpansionCE
|
||||||
|
* uint32_t unsafeCP; - hash table of unsafe code points (uint8_t *)
|
||||||
|
* uint32_t contrEndCP; - hash table of final code points in contractions (uint8_t *)
|
||||||
|
* int32_t CEcount; - currently unused
|
||||||
|
* UBool jamoSpecial; - Jamo special indicator (uint8_t)
|
||||||
|
* uint8_t padding[3]; - padding 3 uint8_t
|
||||||
|
* UVersionInfo version; - version 4 uint8_t
|
||||||
|
* UVersionInfo UCAVersion; - version 4 uint8_t
|
||||||
|
* UVersionInfo UCDVersion; - version 4 uint8_t
|
||||||
|
* char charsetName[32]; - currently unused 32 uint8_t
|
||||||
|
* uint8_t reserved[56]; - currently unused 64 uint8_t
|
||||||
|
* This header is followed by data addressed by offsets in the header.
|
||||||
|
*
|
||||||
|
* Inverse UCA is used for constructing collators from rules. It is always an individual file
|
||||||
|
* and always has a UDataInfo header.
|
||||||
|
* here is the structure:
|
||||||
|
*
|
||||||
|
* uint32_t byteSize; - size of inverse UCA image in bytes
|
||||||
|
* uint32_t tableSize; - size of inverse table (number of (inverse elements + 2)*3
|
||||||
|
* uint32_t contsSize; - size of continuation table (number of UChars in table)
|
||||||
|
* uint32_t table; - offset to inverse table (uint32_t *)
|
||||||
|
* uint32_t conts; - offset to continuation table (uint16_t *)
|
||||||
|
* UVersionInfo UCAVersion; - version of the UCA, read from file 4 uint8_t
|
||||||
|
* uint8_t padding[8]; - padding 8 uint8_t
|
||||||
|
* Header is followed by the table and continuation table.
|
||||||
|
*/
|
||||||
|
|
||||||
/* UDataInfo for UCA mapping table */
|
/* UDataInfo for UCA mapping table */
|
||||||
static const UDataInfo ucaDataInfo={
|
static const UDataInfo ucaDataInfo={
|
||||||
sizeof(UDataInfo),
|
sizeof(UDataInfo),
|
||||||
|
Loading…
Reference in New Issue
Block a user