ICU-2594 description of collator and inverse uca images.
X-SVN-Rev: 12451
This commit is contained in:
parent
58a83101b4
commit
d06542ba39
@ -36,12 +36,61 @@
|
||||
#if !UCONFIG_NO_COLLATION
|
||||
|
||||
#include "unicode/ucol.h"
|
||||
/*#include "ucmpe32.h"*/
|
||||
#include "utrie.h"
|
||||
#include "unicode/ures.h"
|
||||
#include "unicode/udata.h"
|
||||
#include "unicode/uiter.h"
|
||||
|
||||
/* This is the internal header file which contains important declarations for
|
||||
* the collation framework.
|
||||
* Ready to use collators are stored as binary images. Both UCA and tailorings
|
||||
* share the same binary format. Individual files (currently only UCA) have a
|
||||
* udata header in front of the image and should be opened using udata_open.
|
||||
* Tailoring images are currently stored inside resource bundles and are intialized
|
||||
* through ucol_open API.
|
||||
* Here is the format of binary collation image.
|
||||
* int32_t size; - image size in bytes
|
||||
* Offsets to interesting data. All offsets are in bytes.
|
||||
* to get the address add to the header address and cast properly.
|
||||
* Offsets are in ascending order if non-zero.
|
||||
* uint32_t options; - offset to default collator options (UColOptionSet *), 1 signed 3-bit value, followed by 7 unsigned 32-bit values, followed by 64 reserved bytes (could be considered 16 32-bit values)
|
||||
* uint32_t UCAConsts; - only used in UCA image - structure which holds values for indirect positioning and implicit ranges
|
||||
* uint32_t contractionUCACombos; - only used in UCA image - list of UCA contractions
|
||||
* uint32_t unusedReserved1; - reserved for future use
|
||||
* uint32_t mappingPosition; - offset to UTrie (const uint8_t *mappingPosition)
|
||||
* uint32_t expansion; - offset to expansion table (uint32_t *expansion)
|
||||
* uint32_t contractionIndex; - offset to contraction table (UChar *contractionIndex)
|
||||
* uint32_t contractionCEs; - offset to resulting contraction CEs (uint32_t *contractionCEs)
|
||||
* uint32_t contractionSize; - size of contraction table (both Index and CEs)
|
||||
* uint32_t endExpansionCE; - offset to array of last collation element in expansion (uint32_t *)
|
||||
* uint32_t expansionCESize; - array of maximum expansion sizes (uint8_t *)
|
||||
* int32_t endExpansionCECount; - size of endExpansionCE
|
||||
* uint32_t unsafeCP; - hash table of unsafe code points (uint8_t *)
|
||||
* uint32_t contrEndCP; - hash table of final code points in contractions (uint8_t *)
|
||||
* int32_t CEcount; - currently unused
|
||||
* UBool jamoSpecial; - Jamo special indicator (uint8_t)
|
||||
* uint8_t padding[3]; - padding 3 uint8_t
|
||||
* UVersionInfo version; - version 4 uint8_t
|
||||
* UVersionInfo UCAVersion; - version 4 uint8_t
|
||||
* UVersionInfo UCDVersion; - version 4 uint8_t
|
||||
* char charsetName[32]; - currently unused 32 uint8_t
|
||||
* uint8_t reserved[56]; - currently unused 64 uint8_t
|
||||
* This header is followed by data addressed by offsets in the header.
|
||||
*
|
||||
* Inverse UCA is used for constructing collators from rules. It is always an individual file
|
||||
* and always has a UDataInfo header.
|
||||
* here is the structure:
|
||||
*
|
||||
* uint32_t byteSize; - size of inverse UCA image in bytes
|
||||
* uint32_t tableSize; - size of inverse table (number of (inverse elements + 2)*3
|
||||
* uint32_t contsSize; - size of continuation table (number of UChars in table)
|
||||
* uint32_t table; - offset to inverse table (uint32_t *)
|
||||
* uint32_t conts; - offset to continuation table (uint16_t *)
|
||||
* UVersionInfo UCAVersion; - version of the UCA, read from file 4 uint8_t
|
||||
* uint8_t padding[8]; - padding 8 uint8_t
|
||||
* Header is followed by the table and continuation table.
|
||||
*/
|
||||
|
||||
/* UDataInfo for UCA mapping table */
|
||||
static const UDataInfo ucaDataInfo={
|
||||
sizeof(UDataInfo),
|
||||
|
Loading…
Reference in New Issue
Block a user