ICU-2594 description of collator and inverse uca images.

X-SVN-Rev: 12451
This commit is contained in:
Vladimir Weinstein 2003-06-11 05:36:29 +00:00
parent 58a83101b4
commit d06542ba39

View File

@ -36,12 +36,61 @@
#if !UCONFIG_NO_COLLATION #if !UCONFIG_NO_COLLATION
#include "unicode/ucol.h" #include "unicode/ucol.h"
/*#include "ucmpe32.h"*/
#include "utrie.h" #include "utrie.h"
#include "unicode/ures.h" #include "unicode/ures.h"
#include "unicode/udata.h" #include "unicode/udata.h"
#include "unicode/uiter.h" #include "unicode/uiter.h"
/* This is the internal header file which contains important declarations for
* the collation framework.
* Ready to use collators are stored as binary images. Both UCA and tailorings
* share the same binary format. Individual files (currently only UCA) have a
* udata header in front of the image and should be opened using udata_open.
* Tailoring images are currently stored inside resource bundles and are intialized
* through ucol_open API.
* Here is the format of binary collation image.
* int32_t size; - image size in bytes
* Offsets to interesting data. All offsets are in bytes.
* to get the address add to the header address and cast properly.
* Offsets are in ascending order if non-zero.
* uint32_t options; - offset to default collator options (UColOptionSet *), 1 signed 3-bit value, followed by 7 unsigned 32-bit values, followed by 64 reserved bytes (could be considered 16 32-bit values)
* uint32_t UCAConsts; - only used in UCA image - structure which holds values for indirect positioning and implicit ranges
* uint32_t contractionUCACombos; - only used in UCA image - list of UCA contractions
* uint32_t unusedReserved1; - reserved for future use
* uint32_t mappingPosition; - offset to UTrie (const uint8_t *mappingPosition)
* uint32_t expansion; - offset to expansion table (uint32_t *expansion)
* uint32_t contractionIndex; - offset to contraction table (UChar *contractionIndex)
* uint32_t contractionCEs; - offset to resulting contraction CEs (uint32_t *contractionCEs)
* uint32_t contractionSize; - size of contraction table (both Index and CEs)
* uint32_t endExpansionCE; - offset to array of last collation element in expansion (uint32_t *)
* uint32_t expansionCESize; - array of maximum expansion sizes (uint8_t *)
* int32_t endExpansionCECount; - size of endExpansionCE
* uint32_t unsafeCP; - hash table of unsafe code points (uint8_t *)
* uint32_t contrEndCP; - hash table of final code points in contractions (uint8_t *)
* int32_t CEcount; - currently unused
* UBool jamoSpecial; - Jamo special indicator (uint8_t)
* uint8_t padding[3]; - padding 3 uint8_t
* UVersionInfo version; - version 4 uint8_t
* UVersionInfo UCAVersion; - version 4 uint8_t
* UVersionInfo UCDVersion; - version 4 uint8_t
* char charsetName[32]; - currently unused 32 uint8_t
* uint8_t reserved[56]; - currently unused 64 uint8_t
* This header is followed by data addressed by offsets in the header.
*
* Inverse UCA is used for constructing collators from rules. It is always an individual file
* and always has a UDataInfo header.
* here is the structure:
*
* uint32_t byteSize; - size of inverse UCA image in bytes
* uint32_t tableSize; - size of inverse table (number of (inverse elements + 2)*3
* uint32_t contsSize; - size of continuation table (number of UChars in table)
* uint32_t table; - offset to inverse table (uint32_t *)
* uint32_t conts; - offset to continuation table (uint16_t *)
* UVersionInfo UCAVersion; - version of the UCA, read from file 4 uint8_t
* uint8_t padding[8]; - padding 8 uint8_t
* Header is followed by the table and continuation table.
*/
/* UDataInfo for UCA mapping table */ /* UDataInfo for UCA mapping table */
static const UDataInfo ucaDataInfo={ static const UDataInfo ucaDataInfo={
sizeof(UDataInfo), sizeof(UDataInfo),