2001-02-26 10:28:56 +00:00
|
|
|
/*
|
|
|
|
*******************************************************************************
|
|
|
|
*
|
2011-07-06 04:03:35 +00:00
|
|
|
* Copyright (C) 2000-2011, International Business Machines
|
2001-02-26 10:28:56 +00:00
|
|
|
* Corporation and others. All Rights Reserved.
|
|
|
|
*
|
|
|
|
*******************************************************************************
|
2001-03-08 17:40:42 +00:00
|
|
|
* file name: ucol_elm.h
|
2001-02-26 10:28:56 +00:00
|
|
|
* encoding: US-ASCII
|
|
|
|
* tab size: 8 (not used)
|
|
|
|
* indentation:4
|
|
|
|
*
|
|
|
|
* created 02/22/2001
|
|
|
|
* created by: Vladimir Weinstein
|
|
|
|
*
|
|
|
|
* This program reads the Franctional UCA table and generates
|
|
|
|
* internal format for UCA table as well as inverse UCA table.
|
|
|
|
* It then writes binary files containing the data: ucadata.dat
|
|
|
|
* & invuca.dat
|
|
|
|
*/
|
|
|
|
#ifndef UCOL_UCAELEMS_H
|
|
|
|
#define UCOL_UCAELEMS_H
|
|
|
|
|
|
|
|
#include "unicode/utypes.h"
|
2010-10-12 23:59:00 +00:00
|
|
|
#include "unicode/uniset.h"
|
2007-11-30 04:29:20 +00:00
|
|
|
#include "ucol_tok.h"
|
2002-09-20 01:54:48 +00:00
|
|
|
|
|
|
|
#if !UCONFIG_NO_COLLATION
|
|
|
|
|
2001-03-08 17:40:42 +00:00
|
|
|
#include "ucol_imp.h"
|
2001-02-26 10:28:56 +00:00
|
|
|
|
2001-05-16 17:09:31 +00:00
|
|
|
#ifdef UCOL_DEBUG
|
2002-02-28 01:42:40 +00:00
|
|
|
#include "cmemory.h"
|
2001-05-16 17:09:31 +00:00
|
|
|
#include <stdio.h>
|
|
|
|
#endif
|
|
|
|
|
2005-10-12 01:06:22 +00:00
|
|
|
U_CDECL_BEGIN
|
2001-10-20 01:09:31 +00:00
|
|
|
|
2006-10-12 02:25:36 +00:00
|
|
|
/* This is the maximum trie capacity for the mapping trie.
|
|
|
|
Due to current limitations in genuca and the design of UTrie,
|
|
|
|
this number can't be more than 256K.
|
|
|
|
As of Unicode 5, it currently could safely go to 128K without
|
|
|
|
a problem. Normally, less than 32K are tailored.
|
|
|
|
*/
|
2006-11-22 23:14:11 +00:00
|
|
|
#define UCOL_ELM_TRIE_CAPACITY 0x40000
|
2006-10-12 02:25:36 +00:00
|
|
|
|
2007-11-30 04:29:20 +00:00
|
|
|
/* This is the maxmun capacity for temparay combining class
|
|
|
|
* table. The table will be compacted after scanning all the
|
|
|
|
* Unicode codepoints.
|
|
|
|
*/
|
|
|
|
#define UCOL_MAX_CM_TAB 0x10000
|
|
|
|
|
|
|
|
|
2001-02-26 10:28:56 +00:00
|
|
|
typedef struct {
|
|
|
|
uint32_t *CEs;
|
|
|
|
int32_t position;
|
|
|
|
int32_t size;
|
|
|
|
} ExpansionTable;
|
|
|
|
|
|
|
|
typedef struct {
|
2001-09-27 23:18:14 +00:00
|
|
|
UChar prefixChars[128];
|
|
|
|
UChar *prefix;
|
|
|
|
uint32_t prefixSize;
|
2001-02-26 10:28:56 +00:00
|
|
|
UChar uchars[128];
|
|
|
|
UChar *cPoints;
|
2001-03-22 21:16:20 +00:00
|
|
|
uint32_t cSize; /* Number of characters in sequence - for contraction */
|
|
|
|
uint32_t noOfCEs; /* Number of collation elements */
|
2001-02-26 10:28:56 +00:00
|
|
|
uint32_t CEs[128]; /* These are collation elements - there could be more than one - in case of expansion */
|
|
|
|
uint32_t mapCE; /* This is the value element maps in original table */
|
2001-03-22 21:16:20 +00:00
|
|
|
uint32_t sizePrim[128];
|
|
|
|
uint32_t sizeSec[128];
|
|
|
|
uint32_t sizeTer[128];
|
2001-02-26 10:28:56 +00:00
|
|
|
UBool caseBit;
|
|
|
|
UBool isThai;
|
|
|
|
} UCAElements;
|
|
|
|
|
2001-06-12 15:53:34 +00:00
|
|
|
typedef struct {
|
|
|
|
uint32_t *endExpansionCE;
|
|
|
|
UBool *isV;
|
2005-08-23 18:44:52 +00:00
|
|
|
int32_t position;
|
|
|
|
int32_t size;
|
2001-06-12 15:53:34 +00:00
|
|
|
uint8_t maxLSize;
|
|
|
|
uint8_t maxVSize;
|
|
|
|
uint8_t maxTSize;
|
|
|
|
} MaxJamoExpansionTable;
|
|
|
|
|
2001-02-26 10:28:56 +00:00
|
|
|
typedef struct {
|
2001-03-02 01:14:03 +00:00
|
|
|
uint32_t *endExpansionCE;
|
|
|
|
uint8_t *expansionCESize;
|
2005-08-23 18:44:52 +00:00
|
|
|
int32_t position;
|
|
|
|
int32_t size;
|
2001-03-02 01:14:03 +00:00
|
|
|
} MaxExpansionTable;
|
|
|
|
|
2007-11-30 04:29:20 +00:00
|
|
|
typedef struct {
|
|
|
|
uint16_t index[256]; /* index of cPoints by combining class 0-255. */
|
|
|
|
UChar *cPoints; /* code point array of all combining marks */
|
|
|
|
uint32_t size; /* total number of combining marks */
|
|
|
|
} CombinClassTable;
|
|
|
|
|
2001-03-02 01:14:03 +00:00
|
|
|
typedef struct {
|
2001-12-19 07:00:45 +00:00
|
|
|
/*CompactEIntArray *mapping; */
|
|
|
|
UNewTrie *mapping;
|
2001-06-12 15:53:34 +00:00
|
|
|
ExpansionTable *expansions;
|
|
|
|
struct CntTable *contractions;
|
|
|
|
UCATableHeader *image;
|
|
|
|
UColOptionSet *options;
|
|
|
|
MaxExpansionTable *maxExpansions;
|
|
|
|
MaxJamoExpansionTable *maxJamoExpansions;
|
|
|
|
uint8_t *unsafeCP;
|
|
|
|
uint8_t *contrEndCP;
|
|
|
|
const UCollator *UCA;
|
2001-10-02 01:26:13 +00:00
|
|
|
UHashtable *prefixLookup;
|
2007-11-30 04:29:20 +00:00
|
|
|
CombinClassTable *cmLookup; /* combining class lookup for tailoring. */
|
2001-02-26 10:28:56 +00:00
|
|
|
} tempUCATable;
|
|
|
|
|
2007-11-30 04:29:20 +00:00
|
|
|
typedef struct {
|
|
|
|
UChar cp;
|
|
|
|
uint16_t cClass; // combining class
|
|
|
|
}CompData;
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
CompData *precomp;
|
|
|
|
int32_t precompLen;
|
|
|
|
UChar *decomp;
|
|
|
|
int32_t decompLen;
|
|
|
|
UChar *comp;
|
|
|
|
int32_t compLen;
|
|
|
|
uint16_t curClass;
|
|
|
|
uint16_t tailoringCM;
|
|
|
|
int32_t cmPos;
|
|
|
|
}tempTailorContext;
|
|
|
|
|
2003-07-24 23:23:19 +00:00
|
|
|
U_CAPI tempUCATable * U_EXPORT2 uprv_uca_initTempTable(UCATableHeader *image, UColOptionSet *opts, const UCollator *UCA, UColCETags initTag, UColCETags supplementaryInitTag, UErrorCode *status);
|
2001-02-26 10:28:56 +00:00
|
|
|
U_CAPI void U_EXPORT2 uprv_uca_closeTempTable(tempUCATable *t);
|
|
|
|
U_CAPI uint32_t U_EXPORT2 uprv_uca_addAnElement(tempUCATable *t, UCAElements *element, UErrorCode *status);
|
2001-09-20 20:16:39 +00:00
|
|
|
U_CAPI UCATableHeader * U_EXPORT2 uprv_uca_assembleTable(tempUCATable *t, UErrorCode *status);
|
2010-10-12 23:59:00 +00:00
|
|
|
|
|
|
|
U_CAPI int32_t U_EXPORT2
|
|
|
|
uprv_uca_canonicalClosure(tempUCATable *t, UColTokenParser *src,
|
2011-07-06 04:03:35 +00:00
|
|
|
icu::UnicodeSet *closed, UErrorCode *status);
|
2001-02-26 10:28:56 +00:00
|
|
|
|
2005-10-12 01:06:22 +00:00
|
|
|
U_CDECL_END
|
2001-10-20 01:09:31 +00:00
|
|
|
|
2002-09-20 01:54:48 +00:00
|
|
|
#endif /* #if !UCONFIG_NO_COLLATION */
|
|
|
|
|
2001-02-27 18:38:48 +00:00
|
|
|
#endif
|