ICU-7264 parse ScriptExtensions.txt, write uprops.icu formatVersion 7
X-SVN-Rev: 28377
This commit is contained in:
parent
1360486f9e
commit
3bf87d9766
@ -5,5 +5,5 @@
|
||||
# created by: Markus W. Scherer
|
||||
# edited on: 2010jul20
|
||||
# edited by: Stuart G. Gill
|
||||
add_executable(genprops genprops.c props2.c store.c)
|
||||
add_executable(genprops genprops.cpp props2.cpp store.c)
|
||||
target_link_libraries(genprops icuuc icutu)
|
||||
|
@ -1,11 +1,11 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2008, International Business Machines
|
||||
* Copyright (C) 1999-2010, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: genprops.c
|
||||
* file name: genprops.cpp
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
@ -40,6 +40,8 @@ U_CDECL_END
|
||||
|
||||
#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
|
||||
|
||||
U_NAMESPACE_USE
|
||||
|
||||
UBool beVerbose=FALSE, haveCopyright=TRUE;
|
||||
|
||||
/* prototypes --------------------------------------------------------------- */
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2008, International Business Machines
|
||||
* Copyright (C) 1999-2010, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
@ -34,13 +34,13 @@ typedef struct {
|
||||
} Props;
|
||||
|
||||
/* global flags */
|
||||
extern UBool beVerbose, haveCopyright;
|
||||
U_CFUNC UBool beVerbose, haveCopyright;
|
||||
|
||||
extern const char *const
|
||||
U_CFUNC const char *const
|
||||
genCategoryNames[];
|
||||
|
||||
/* properties vectors in props2.c */
|
||||
extern UPropsVectors *pv;
|
||||
/* properties vectors in props2.cpp */
|
||||
U_CFUNC UPropsVectors *pv;
|
||||
|
||||
/* prototypes */
|
||||
U_CFUNC void
|
||||
@ -52,28 +52,28 @@ isToken(const char *token, const char *s);
|
||||
U_CFUNC int32_t
|
||||
getTokenIndex(const char *const tokens[], int32_t countTokens, const char *s);
|
||||
|
||||
extern void
|
||||
U_CFUNC void
|
||||
setUnicodeVersion(const char *v);
|
||||
|
||||
extern void
|
||||
U_CFUNC void
|
||||
initStore(void);
|
||||
|
||||
extern void
|
||||
U_CFUNC void
|
||||
exitStore(void);
|
||||
|
||||
extern uint32_t
|
||||
U_CFUNC uint32_t
|
||||
makeProps(Props *p);
|
||||
|
||||
extern void
|
||||
U_CFUNC void
|
||||
addProps(uint32_t c, uint32_t props);
|
||||
|
||||
extern uint32_t
|
||||
U_CFUNC uint32_t
|
||||
getProps(uint32_t c);
|
||||
|
||||
extern void
|
||||
U_CFUNC void
|
||||
repeatProps(uint32_t first, uint32_t last, uint32_t props);
|
||||
|
||||
extern void
|
||||
U_CFUNC void
|
||||
generateData(const char *dataDir, UBool csource);
|
||||
|
||||
/* props2.c */
|
||||
|
@ -1,11 +1,11 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002-2009, International Business Machines
|
||||
* Copyright (C) 2002-2010, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: props2.c
|
||||
* file name: props2.cpp
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
@ -20,6 +20,7 @@
|
||||
#include <stdio.h>
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/uscript.h"
|
||||
#include "cstring.h"
|
||||
#include "cmemory.h"
|
||||
@ -32,11 +33,15 @@
|
||||
|
||||
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
|
||||
|
||||
U_NAMESPACE_USE
|
||||
|
||||
/* data --------------------------------------------------------------------- */
|
||||
|
||||
static UNewTrie *newTrie;
|
||||
UPropsVectors *pv;
|
||||
|
||||
static UnicodeString *scriptExtensions;
|
||||
|
||||
/* miscellaneous ------------------------------------------------------------ */
|
||||
|
||||
static char *
|
||||
@ -45,7 +50,7 @@ trimTerminateField(char *s, char *limit) {
|
||||
s=(char *)u_skipWhitespace(s);
|
||||
|
||||
/* trim trailing whitespace */
|
||||
while(s<limit && (*(limit-1)==' ' || *(limit-1)=='\t')) {
|
||||
while(s<limit && U_IS_INV_WHITESPACE(*(limit-1))) {
|
||||
--limit;
|
||||
}
|
||||
*limit=0;
|
||||
@ -77,6 +82,11 @@ ageLineFn(void *context,
|
||||
char *fields[][2], int32_t fieldCount,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
static void U_CALLCONV
|
||||
scriptExtensionsLineFn(void *context,
|
||||
char *fields[][2], int32_t fieldCount,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
static void
|
||||
parseMultiFieldFile(char *filename, char *basename,
|
||||
const char *ucdFile, const char *suffix,
|
||||
@ -415,12 +425,14 @@ initAdditionalProperties() {
|
||||
fprintf(stderr, "error: upvec_open() failed - %s\n", u_errorName(errorCode));
|
||||
exit(errorCode);
|
||||
}
|
||||
scriptExtensions=new UnicodeString;
|
||||
}
|
||||
|
||||
U_CFUNC void
|
||||
exitAdditionalProperties() {
|
||||
utrie_close(newTrie);
|
||||
upvec_close(pv);
|
||||
delete scriptExtensions;
|
||||
}
|
||||
|
||||
U_CFUNC void
|
||||
@ -436,22 +448,10 @@ generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pEr
|
||||
|
||||
parseTwoFieldFile(filename, basename, "DerivedAge", suffix, ageLineFn, pErrorCode);
|
||||
|
||||
/*
|
||||
* UTR 24 says:
|
||||
* Section 2:
|
||||
* "Common - For characters that may be used
|
||||
* within multiple scripts,
|
||||
* or any unassigned code points."
|
||||
*
|
||||
* Section 4:
|
||||
* "The value COMMON is the default value,
|
||||
* given to all code points that are not
|
||||
* explicitly mentioned in the data file."
|
||||
*
|
||||
* COMMON==USCRIPT_COMMON==0 - nothing to do
|
||||
*/
|
||||
parseSingleEnumFile(filename, basename, suffix, &scriptSingleEnum, pErrorCode);
|
||||
|
||||
parseTwoFieldFile(filename, basename, "ScriptExtensions", suffix, scriptExtensionsLineFn, pErrorCode);
|
||||
|
||||
parseSingleEnumFile(filename, basename, suffix, &blockSingleEnum, pErrorCode);
|
||||
|
||||
parseBinariesFile(filename, basename, suffix, &propListBinaries, pErrorCode);
|
||||
@ -567,6 +567,135 @@ ageLineFn(void *context,
|
||||
}
|
||||
}
|
||||
|
||||
/* ScriptExtensions.txt ----------------------------------------------------- */
|
||||
|
||||
static void U_CALLCONV
|
||||
scriptExtensionsLineFn(void *context,
|
||||
char *fields[][2], int32_t fieldCount,
|
||||
UErrorCode *pErrorCode) {
|
||||
uint32_t start, end;
|
||||
u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
fprintf(stderr, "genprops: syntax error in ScriptExtensions.txt field 0 at %s\n", fields[0][0]);
|
||||
exit(*pErrorCode);
|
||||
}
|
||||
|
||||
/* parse list of script codes */
|
||||
UnicodeString codes; // vector of 16-bit UScriptCode values
|
||||
char *s=fields[1][0];
|
||||
for(;;) {
|
||||
// skip whitespace before each token
|
||||
s=(char *)u_skipWhitespace(s);
|
||||
if(*s==0 || *s==';') {
|
||||
break;
|
||||
}
|
||||
// skip non-whitespace, non-terminator characters to find the token limit
|
||||
char *limit=s;
|
||||
char c;
|
||||
do {
|
||||
c=*++limit;
|
||||
} while(!U_IS_INV_WHITESPACE(c) && c!=0 && c!=';');
|
||||
// NUL-terminated this token
|
||||
*limit=0;
|
||||
// convert the token (script property value alias) into a UScriptCode value
|
||||
int32_t value=u_getPropertyValueEnum(UCHAR_SCRIPT, s);
|
||||
if(value<0) {
|
||||
fprintf(stderr, "genprops: syntax error in ScriptExtensions.txt field 1 at %s\n", s);
|
||||
exit(U_INVALID_FORMAT_ERROR);
|
||||
}
|
||||
// Insertion sort into the list of script codes.
|
||||
for(int32_t i=0;; ++i) {
|
||||
if(i<codes.length()) {
|
||||
if(value<codes[i]) {
|
||||
codes.insert(i, (UChar)value);
|
||||
break;
|
||||
} else if(value==codes[i]) {
|
||||
fprintf(stderr,
|
||||
"genprops: duplicate script code in ScriptExtensions.txt field 1 at %s "
|
||||
"for U+%04lx..U+%04lx\n",
|
||||
s, (long)start, (long)end);
|
||||
exit(U_INVALID_FORMAT_ERROR);
|
||||
}
|
||||
// continue while value>codes[i]
|
||||
} else {
|
||||
codes.append((UChar)value);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(c==0 || c==';') {
|
||||
// the token ended at a terminator
|
||||
break;
|
||||
} else {
|
||||
// the token ended at U_IS_INV_WHITESPACE(c), continue after c
|
||||
s=limit+1;
|
||||
}
|
||||
}
|
||||
int32_t length=codes.length();
|
||||
if(length==0) {
|
||||
fprintf(stderr,
|
||||
"genprops: missing values in ScriptExtensions.txt field 1 "
|
||||
"for U+%04lx..U+%04lx\n",
|
||||
(long)start, (long)end);
|
||||
exit(U_INVALID_FORMAT_ERROR);
|
||||
}
|
||||
// Set bit 15 on the last script code, for termination.
|
||||
codes.setCharAt(length-1, (UChar)(codes[length-1]|0x8000));
|
||||
// Find this list of codes in the Script_Extensions data so far, or add this list.
|
||||
int32_t index=scriptExtensions->indexOf(codes);
|
||||
if(index<0) {
|
||||
index=scriptExtensions->length();
|
||||
scriptExtensions->append(codes);
|
||||
}
|
||||
// Modify the Script data for each of the start..end code points
|
||||
// to include the Script_Extensions index.
|
||||
do {
|
||||
uint32_t scriptX=upvec_getValue(pv, (UChar32)start, 0)&UPROPS_SCRIPT_X_MASK;
|
||||
// Find the next code point that has a different script value.
|
||||
// We want to add the Script_Extensions index to the code point range start..next-1.
|
||||
UChar32 next;
|
||||
for(next=(UChar32)start+1;
|
||||
next<=(UChar32)end && scriptX==(upvec_getValue(pv, next, 0)&UPROPS_SCRIPT_X_MASK);
|
||||
++next) {}
|
||||
if(scriptX>=UPROPS_SCRIPT_X_WITH_COMMON) {
|
||||
fprintf(stderr,
|
||||
"genprops: ScriptExtensions.txt has values for U+%04lx..U+%04lx "
|
||||
"which overlaps with a range including U+%04lx..U+%04lx\n",
|
||||
(long)start, (long)end, (long)start, (long)(next-1));
|
||||
exit(U_INVALID_FORMAT_ERROR);
|
||||
}
|
||||
// Encode the (Script, Script_Extensions index) pair.
|
||||
if(scriptX==USCRIPT_COMMON) {
|
||||
scriptX=UPROPS_SCRIPT_X_WITH_COMMON|(uint32_t)index;
|
||||
} else if(scriptX==USCRIPT_INHERITED) {
|
||||
scriptX=UPROPS_SCRIPT_X_WITH_INHERITED|(uint32_t)index;
|
||||
} else {
|
||||
// Store an additional pair of 16-bit units for an unusual main Script code
|
||||
// together with the Script_Extensions index.
|
||||
UnicodeString codeIndexPair;
|
||||
codeIndexPair.append((UChar)scriptX).append((UChar)index);
|
||||
index=scriptExtensions->indexOf(codeIndexPair);
|
||||
if(index<0) {
|
||||
index=scriptExtensions->length();
|
||||
scriptExtensions->append(codeIndexPair);
|
||||
}
|
||||
scriptX=UPROPS_SCRIPT_X_WITH_OTHER|(uint32_t)index;
|
||||
}
|
||||
if(index>UPROPS_SCRIPT_MASK) {
|
||||
fprintf(stderr, "genprops: Script_Extensions indexes overflow bit field\n");
|
||||
exit(U_BUFFER_OVERFLOW_ERROR);
|
||||
}
|
||||
// Write the (Script, Script_Extensions index) pair into
|
||||
// the properties vector for start..next-1.
|
||||
upvec_setValue(pv, (UChar32)start, (UChar32)(next-1),
|
||||
0, scriptX, UPROPS_SCRIPT_X_MASK, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
fprintf(stderr, "genprops error: unable to set Script_Extensions: %s\n", u_errorName(*pErrorCode));
|
||||
exit(*pErrorCode);
|
||||
}
|
||||
start=next;
|
||||
} while(start<=end);
|
||||
}
|
||||
|
||||
/* DerivedNumericValues.txt ------------------------------------------------- */
|
||||
|
||||
static void U_CALLCONV
|
||||
@ -719,7 +848,36 @@ writeAdditionalData(FILE *f, uint8_t *p, int32_t capacity, int32_t indexes[UPROP
|
||||
fprintf(stderr, "genprops error: unable to serialize trie for additional properties: %s\n", u_errorName(errorCode));
|
||||
exit(errorCode);
|
||||
}
|
||||
if(p!=NULL) {
|
||||
|
||||
/* round up scriptExtensions to multiple of 4 bytes */
|
||||
if(scriptExtensions->length()&1) {
|
||||
scriptExtensions->append((UChar)0);
|
||||
}
|
||||
|
||||
/* set indexes */
|
||||
indexes[UPROPS_ADDITIONAL_VECTORS_INDEX]=
|
||||
indexes[UPROPS_ADDITIONAL_TRIE_INDEX]+length/4;
|
||||
indexes[UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX]=UPROPS_VECTOR_WORDS;
|
||||
indexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]=
|
||||
indexes[UPROPS_ADDITIONAL_VECTORS_INDEX]+pvCount;
|
||||
indexes[UPROPS_RESERVED_INDEX_7]=
|
||||
indexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]+scriptExtensions->length()/2;
|
||||
indexes[UPROPS_RESERVED_INDEX_8]=indexes[UPROPS_RESERVED_INDEX_7];
|
||||
indexes[UPROPS_DATA_TOP_INDEX]=indexes[UPROPS_RESERVED_INDEX_8];
|
||||
|
||||
indexes[UPROPS_MAX_VALUES_INDEX]=
|
||||
(((int32_t)U_EA_COUNT-1)<<UPROPS_EA_SHIFT)|
|
||||
(((int32_t)UBLOCK_COUNT-1)<<UPROPS_BLOCK_SHIFT)|
|
||||
(((int32_t)USCRIPT_CODE_LIMIT-1)&UPROPS_SCRIPT_MASK);
|
||||
indexes[UPROPS_MAX_VALUES_2_INDEX]=
|
||||
(((int32_t)U_LB_COUNT-1)<<UPROPS_LB_SHIFT)|
|
||||
(((int32_t)U_SB_COUNT-1)<<UPROPS_SB_SHIFT)|
|
||||
(((int32_t)U_WB_COUNT-1)<<UPROPS_WB_SHIFT)|
|
||||
(((int32_t)U_GCB_COUNT-1)<<UPROPS_GCB_SHIFT)|
|
||||
((int32_t)U_DT_COUNT-1);
|
||||
|
||||
int32_t additionalPropsSize=4*(indexes[UPROPS_DATA_TOP_INDEX]-indexes[UPROPS_ADDITIONAL_TRIE_INDEX]);
|
||||
if(p!=NULL && additionalPropsSize<=capacity) {
|
||||
if(beVerbose) {
|
||||
printf("size in bytes of additional props trie:%5u\n", (int)length);
|
||||
}
|
||||
@ -756,7 +914,7 @@ writeAdditionalData(FILE *f, uint8_t *p, int32_t capacity, int32_t indexes[UPROP
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(
|
||||
stderr,
|
||||
"genbidi error: deleting lead surrogate code unit values failed - %s\n",
|
||||
"genprops error: deleting lead surrogate code unit values failed - %s\n",
|
||||
u_errorName(errorCode));
|
||||
exit(errorCode);
|
||||
}
|
||||
@ -772,47 +930,33 @@ writeAdditionalData(FILE *f, uint8_t *p, int32_t capacity, int32_t indexes[UPROP
|
||||
"};\n\n");
|
||||
|
||||
utrie2_close(trie2);
|
||||
}
|
||||
|
||||
p+=length;
|
||||
capacity-=length;
|
||||
|
||||
/* set indexes */
|
||||
indexes[UPROPS_ADDITIONAL_VECTORS_INDEX]=
|
||||
indexes[UPROPS_ADDITIONAL_TRIE_INDEX]+length/4;
|
||||
indexes[UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX]=UPROPS_VECTOR_WORDS;
|
||||
indexes[UPROPS_RESERVED_INDEX]=
|
||||
indexes[UPROPS_ADDITIONAL_VECTORS_INDEX]+pvCount;
|
||||
|
||||
indexes[UPROPS_MAX_VALUES_INDEX]=
|
||||
(((int32_t)U_EA_COUNT-1)<<UPROPS_EA_SHIFT)|
|
||||
(((int32_t)UBLOCK_COUNT-1)<<UPROPS_BLOCK_SHIFT)|
|
||||
(((int32_t)USCRIPT_CODE_LIMIT-1)&UPROPS_SCRIPT_MASK);
|
||||
indexes[UPROPS_MAX_VALUES_2_INDEX]=
|
||||
(((int32_t)U_LB_COUNT-1)<<UPROPS_LB_SHIFT)|
|
||||
(((int32_t)U_SB_COUNT-1)<<UPROPS_SB_SHIFT)|
|
||||
(((int32_t)U_WB_COUNT-1)<<UPROPS_WB_SHIFT)|
|
||||
(((int32_t)U_GCB_COUNT-1)<<UPROPS_GCB_SHIFT)|
|
||||
((int32_t)U_DT_COUNT-1);
|
||||
}
|
||||
|
||||
if(p!=NULL && (pvCount*4)<=capacity) {
|
||||
if(f!=NULL) {
|
||||
usrc_writeArray(f,
|
||||
"static const uint32_t propsVectors[%ld]={\n",
|
||||
pvArray, 32, pvCount,
|
||||
"};\n\n");
|
||||
fprintf(f, "static const int32_t countPropsVectors=%ld;\n", (long)pvCount);
|
||||
fprintf(f, "static const int32_t propsVectorsColumns=%ld;\n", (long)indexes[UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX]);
|
||||
|
||||
usrc_writeArray(f,
|
||||
"static const uint16_t scriptExtensions[%ld]={\n",
|
||||
scriptExtensions->getBuffer(), 16, scriptExtensions->length(),
|
||||
"};\n\n");
|
||||
} else {
|
||||
uprv_memcpy(p, pvArray, pvCount*4);
|
||||
p+=length;
|
||||
length=pvCount*4;
|
||||
uprv_memcpy(p, pvArray, length);
|
||||
|
||||
p+=length;
|
||||
length=scriptExtensions->length()*2;
|
||||
uprv_memcpy(p, scriptExtensions->getBuffer(), length);
|
||||
}
|
||||
if(beVerbose) {
|
||||
printf("number of additional props vectors: %5u\n", (int)pvRows);
|
||||
printf("number of 32-bit words per vector: %5u\n", UPROPS_VECTOR_WORDS);
|
||||
printf("number of 16-bit scriptExtensions: %5u\n", (int)scriptExtensions->length());
|
||||
}
|
||||
}
|
||||
length+=pvCount*4;
|
||||
|
||||
return length;
|
||||
return additionalPropsSize;
|
||||
}
|
@ -41,7 +41,7 @@ the udata API for loading ICU data. Especially, a UDataInfo structure
|
||||
precedes the actual data. It contains platform properties values and the
|
||||
file format version.
|
||||
|
||||
The following is a description of format version 6 .
|
||||
The following is a description of format version 7 .
|
||||
|
||||
Data contents:
|
||||
|
||||
@ -74,8 +74,10 @@ Formally, the file contains the following structures:
|
||||
i4 additionalVectorsIndex; -- 32-bit unit index to the table of properties vectors
|
||||
i5 additionalVectorsColumns; -- number of 32-bit words per properties vector
|
||||
|
||||
i6 reservedItemIndex; -- 32-bit unit index to the top of the properties vectors table
|
||||
i7..i9 reservedIndexes; -- reserved values; 0 for now
|
||||
i6 scriptExtensionsIndex; -- 32-bit unit index to the Script_Extensions data
|
||||
i7 reservedIndex7; -- 32-bit unit index to the top of the Script_Extensions data
|
||||
i8 reservedIndex8; -- for now: i7, i8 and i9 have the same values
|
||||
i9 dataTopIndex; -- size of the data file (number of 32-bit units after the header)
|
||||
|
||||
i10 maxValues; -- maximum code values for vector word 0, see uprops.h (new in format version 3.1+)
|
||||
i11 maxValues2; -- maximum code values for vector word 2, see uprops.h (new in format version 3.2)
|
||||
@ -92,6 +94,20 @@ Formally, the file contains the following structures:
|
||||
AT serialized trie for additional properties (byte size: 4*(i4-i3))
|
||||
PV const uint32_t propsVectors[(i6-i4)/i5][i5]==uint32_t propsVectors[i6-i4];
|
||||
|
||||
SCX const uint16_t scriptExtensions[2*(i7-i6)];
|
||||
|
||||
SCX contains Script_Extensions lists and (Script code, Script_Extensions index) pairs.
|
||||
A Script_Extensions list is a sequence of UScriptCode values in ascending order,
|
||||
with the last code having bit 15 set for termination.
|
||||
A (Script code, Script_Extensions index) pair is the main UScriptCode (Script value)
|
||||
followed by the index of the Script_Extensions list.
|
||||
If the propsVectors[] column 0 value indicates that there are Script_Extensions,
|
||||
then the UPROPS_SCRIPT_MASK bit field is an index to either a list or a pair in SCX,
|
||||
rather than the Script itself. The high bits in the UPROPS_SCRIPT_X_MASK fields
|
||||
indicate whether the main Script value is Common or Inherited (and the index is to a list)
|
||||
vs. another value (and the index is to a pair).
|
||||
(See UPROPS_SCRIPT_X_WITH_COMMON etc. in uprops.h.)
|
||||
|
||||
Trie lookup and properties:
|
||||
|
||||
In order to condense the data for the 21-bit code space, several properties of
|
||||
@ -206,6 +222,12 @@ Format version 6 became necessary because Unicode 5.2 adds fractions with
|
||||
denominators 9, 10 and 16, and it was easier to redesign the encoding of numeric
|
||||
types and values rather than add another variant to the previous format.
|
||||
|
||||
--- Changes in format version 7 ---
|
||||
|
||||
Unicode 6.0 adds Script_Extensions. For characters with script extensions data,
|
||||
the script code bits are an index into the new Script_Extensions array rather
|
||||
than a script code.
|
||||
|
||||
----------------------------------------------------------------------------- */
|
||||
|
||||
/* UDataInfo cf. udata.h */
|
||||
@ -227,14 +249,14 @@ static UNewTrie *pTrie=NULL;
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
extern void
|
||||
U_CFUNC void
|
||||
setUnicodeVersion(const char *v) {
|
||||
UVersionInfo version;
|
||||
u_versionFromString(version, v);
|
||||
uprv_memcpy(dataInfo.dataVersion, version, 4);
|
||||
}
|
||||
|
||||
extern void
|
||||
U_CFUNC void
|
||||
initStore() {
|
||||
pTrie=utrie_open(NULL, NULL, 40000, 0, 0, TRUE);
|
||||
if(pTrie==NULL) {
|
||||
@ -245,7 +267,7 @@ initStore() {
|
||||
initAdditionalProperties();
|
||||
}
|
||||
|
||||
extern void
|
||||
U_CFUNC void
|
||||
exitStore() {
|
||||
utrie_close(pTrie);
|
||||
exitAdditionalProperties();
|
||||
@ -253,7 +275,7 @@ exitStore() {
|
||||
|
||||
/* store a character's properties ------------------------------------------- */
|
||||
|
||||
extern uint32_t
|
||||
U_CFUNC uint32_t
|
||||
makeProps(Props *p) {
|
||||
uint32_t den;
|
||||
int32_t type, value, exp, ntv;
|
||||
@ -327,7 +349,7 @@ makeProps(Props *p) {
|
||||
(ntv<<UPROPS_NUMERIC_TYPE_VALUE_SHIFT);
|
||||
}
|
||||
|
||||
extern void
|
||||
U_CFUNC void
|
||||
addProps(uint32_t c, uint32_t x) {
|
||||
if(!utrie_set32(pTrie, (UChar32)c, x)) {
|
||||
fprintf(stderr, "error: too many entries for the properties trie\n");
|
||||
@ -335,14 +357,14 @@ addProps(uint32_t c, uint32_t x) {
|
||||
}
|
||||
}
|
||||
|
||||
extern uint32_t
|
||||
U_CFUNC uint32_t
|
||||
getProps(uint32_t c) {
|
||||
return utrie_get32(pTrie, (UChar32)c, NULL);
|
||||
}
|
||||
|
||||
/* areas of same properties ------------------------------------------------- */
|
||||
|
||||
extern void
|
||||
U_CFUNC void
|
||||
repeatProps(uint32_t first, uint32_t last, uint32_t x) {
|
||||
if(!utrie_setRange32(pTrie, (UChar32)first, (UChar32)(last+1), x, FALSE)) {
|
||||
fprintf(stderr, "error: too many entries for the properties trie\n");
|
||||
@ -352,7 +374,7 @@ repeatProps(uint32_t first, uint32_t last, uint32_t x) {
|
||||
|
||||
/* generate output data ----------------------------------------------------- */
|
||||
|
||||
extern void
|
||||
U_CFUNC void
|
||||
generateData(const char *dataDir, UBool csource) {
|
||||
static int32_t indexes[UPROPS_INDEX_COUNT]={
|
||||
0, 0, 0, 0,
|
||||
|
Loading…
Reference in New Issue
Block a user