ICU-8972 genprops: merge props2writer.cpp (which used to be props2.cpp) into corepropswriter.cpp (which used to be store.c); avoids clumsy back-and-forth
X-SVN-Rev: 31150
This commit is contained in:
parent
cb626ab6d7
commit
b1e48658b3
@ -5,5 +5,5 @@
|
|||||||
# created by: Markus W. Scherer
|
# created by: Markus W. Scherer
|
||||||
# edited on: 2010jul20
|
# edited on: 2010jul20
|
||||||
# edited by: Stuart G. Gill
|
# edited by: Stuart G. Gill
|
||||||
add_executable(genprops genprops.cpp corepropswriter.cpp props2writer.cpp)
|
add_executable(genprops genprops.cpp corepropswriter.cpp)
|
||||||
target_link_libraries(genprops icuuc icutu)
|
target_link_libraries(genprops icuuc icutu)
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
* Corporation and others. All Rights Reserved.
|
* Corporation and others. All Rights Reserved.
|
||||||
*
|
*
|
||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
* file name: corepropswriter.cpp (was store.c)
|
* file name: corepropswriter.cpp (was store.c && props2.cpp)
|
||||||
* encoding: US-ASCII
|
* encoding: US-ASCII
|
||||||
* tab size: 8 (not used)
|
* tab size: 8 (not used)
|
||||||
* indentation:4
|
* indentation:4
|
||||||
@ -21,9 +21,15 @@
|
|||||||
#include "unicode/utypes.h"
|
#include "unicode/utypes.h"
|
||||||
#include "unicode/uchar.h"
|
#include "unicode/uchar.h"
|
||||||
#include "unicode/udata.h"
|
#include "unicode/udata.h"
|
||||||
|
#include "unicode/uniset.h"
|
||||||
|
#include "unicode/unistr.h"
|
||||||
|
#include "unicode/usetiter.h"
|
||||||
|
#include "unicode/uscript.h"
|
||||||
#include "cmemory.h"
|
#include "cmemory.h"
|
||||||
#include "cstring.h"
|
#include "cstring.h"
|
||||||
#include "genprops.h"
|
#include "genprops.h"
|
||||||
|
#include "propsvec.h"
|
||||||
|
#include "uassert.h"
|
||||||
#include "unewdata.h"
|
#include "unewdata.h"
|
||||||
#include "uprops.h"
|
#include "uprops.h"
|
||||||
#include "utrie2.h"
|
#include "utrie2.h"
|
||||||
@ -230,6 +236,8 @@ Change from UTrie to UTrie2.
|
|||||||
|
|
||||||
----------------------------------------------------------------------------- */
|
----------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
|
||||||
|
|
||||||
U_NAMESPACE_USE
|
U_NAMESPACE_USE
|
||||||
|
|
||||||
/* UDataInfo cf. udata.h */
|
/* UDataInfo cf. udata.h */
|
||||||
@ -247,8 +255,6 @@ static UDataInfo dataInfo={
|
|||||||
{ 6, 0, 0, 0 } /* dataVersion */
|
{ 6, 0, 0, 0 } /* dataVersion */
|
||||||
};
|
};
|
||||||
|
|
||||||
static UTrie2 *pTrie=NULL;
|
|
||||||
|
|
||||||
class CorePropsWriter : public PropsWriter {
|
class CorePropsWriter : public PropsWriter {
|
||||||
public:
|
public:
|
||||||
CorePropsWriter(UErrorCode &errorCode);
|
CorePropsWriter(UErrorCode &errorCode);
|
||||||
@ -259,18 +265,34 @@ public:
|
|||||||
virtual void finalizeData(UErrorCode &errorCode);
|
virtual void finalizeData(UErrorCode &errorCode);
|
||||||
virtual void writeCSourceFile(const char *path, UErrorCode &errorCode);
|
virtual void writeCSourceFile(const char *path, UErrorCode &errorCode);
|
||||||
virtual void writeBinaryData(const char *path, UBool withCopyright, UErrorCode &errorCode);
|
virtual void writeBinaryData(const char *path, UBool withCopyright, UErrorCode &errorCode);
|
||||||
|
|
||||||
|
private:
|
||||||
|
void setGcAndNumeric(const UniProps &, const UnicodeSet &newValues, UErrorCode &errorCode);
|
||||||
|
|
||||||
|
UTrie2 *pTrie;
|
||||||
|
UTrie2 *props2Trie;
|
||||||
|
UPropsVectors *pv;
|
||||||
|
UnicodeString scriptExtensions;
|
||||||
};
|
};
|
||||||
|
|
||||||
CorePropsWriter::CorePropsWriter(UErrorCode &errorCode) {
|
CorePropsWriter::CorePropsWriter(UErrorCode &errorCode)
|
||||||
|
: pTrie(NULL), props2Trie(NULL), pv(NULL) {
|
||||||
pTrie=utrie2_open(0, 0, &errorCode);
|
pTrie=utrie2_open(0, 0, &errorCode);
|
||||||
if(U_FAILURE(errorCode)) {
|
if(U_FAILURE(errorCode)) {
|
||||||
fprintf(stderr, "genprops error: corepropswriter utrie2_open() failed - %s\n",
|
fprintf(stderr, "genprops error: corepropswriter utrie2_open() failed - %s\n",
|
||||||
u_errorName(errorCode));
|
u_errorName(errorCode));
|
||||||
}
|
}
|
||||||
|
pv=upvec_open(UPROPS_VECTOR_WORDS, &errorCode);
|
||||||
|
if(U_FAILURE(errorCode)) {
|
||||||
|
fprintf(stderr, "genprops error: corepropswriter upvec_open() failed - %s\n",
|
||||||
|
u_errorName(errorCode));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
CorePropsWriter::~CorePropsWriter() {
|
CorePropsWriter::~CorePropsWriter() {
|
||||||
utrie2_close(pTrie);
|
utrie2_close(pTrie);
|
||||||
|
utrie2_close(props2Trie);
|
||||||
|
upvec_close(pv);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
@ -361,7 +383,8 @@ encodeNumericValue(UChar32 start, const char *s, UErrorCode &errorCode) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
CorePropsWriter::setProps(const UniProps &props, const UnicodeSet &newValues, UErrorCode &errorCode) {
|
CorePropsWriter::setGcAndNumeric(const UniProps &props, const UnicodeSet &newValues,
|
||||||
|
UErrorCode &errorCode) {
|
||||||
if(U_FAILURE(errorCode)) { return; }
|
if(U_FAILURE(errorCode)) { return; }
|
||||||
UChar32 start=props.start;
|
UChar32 start=props.start;
|
||||||
UChar32 end=props.end;
|
UChar32 end=props.end;
|
||||||
@ -428,6 +451,177 @@ CorePropsWriter::setProps(const UniProps &props, const UnicodeSet &newValues, UE
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct PropToBinary {
|
||||||
|
int32_t prop; // UProperty
|
||||||
|
int32_t vecWord, vecShift;
|
||||||
|
};
|
||||||
|
|
||||||
|
static const PropToBinary
|
||||||
|
propToBinaries[]={
|
||||||
|
{ UCHAR_WHITE_SPACE, 1, UPROPS_WHITE_SPACE },
|
||||||
|
{ UCHAR_DASH, 1, UPROPS_DASH },
|
||||||
|
// Note: The Hyphen property is stabilized since Unicode 4.0
|
||||||
|
// and deprecated since Unicode 6.0.
|
||||||
|
{ UCHAR_HYPHEN, 1, UPROPS_HYPHEN },
|
||||||
|
{ UCHAR_QUOTATION_MARK, 1, UPROPS_QUOTATION_MARK },
|
||||||
|
{ UCHAR_TERMINAL_PUNCTUATION, 1, UPROPS_TERMINAL_PUNCTUATION },
|
||||||
|
// Note: The Hex_Digit and ASCII_Hex_Digit properties are probably stable enough
|
||||||
|
// so that they could be hardcoded.
|
||||||
|
{ UCHAR_HEX_DIGIT, 1, UPROPS_HEX_DIGIT },
|
||||||
|
{ UCHAR_ASCII_HEX_DIGIT, 1, UPROPS_ASCII_HEX_DIGIT },
|
||||||
|
{ UCHAR_IDEOGRAPHIC, 1, UPROPS_IDEOGRAPHIC },
|
||||||
|
{ UCHAR_DIACRITIC, 1, UPROPS_DIACRITIC },
|
||||||
|
{ UCHAR_EXTENDER, 1, UPROPS_EXTENDER },
|
||||||
|
// Note: The Noncharacter_Code_Point property is probably stable enough
|
||||||
|
// so that it could be hardcoded.
|
||||||
|
{ UCHAR_NONCHARACTER_CODE_POINT, 1, UPROPS_NONCHARACTER_CODE_POINT },
|
||||||
|
// Note: The Grapheme_Link property is deprecated since Unicode 5.0
|
||||||
|
// because it is a "Duplication of ccc=9" (UAX #44).
|
||||||
|
{ UCHAR_GRAPHEME_LINK, 1, UPROPS_GRAPHEME_LINK },
|
||||||
|
{ UCHAR_IDS_BINARY_OPERATOR, 1, UPROPS_IDS_BINARY_OPERATOR },
|
||||||
|
{ UCHAR_IDS_TRINARY_OPERATOR, 1, UPROPS_IDS_TRINARY_OPERATOR },
|
||||||
|
{ UCHAR_RADICAL, 1, UPROPS_RADICAL },
|
||||||
|
{ UCHAR_UNIFIED_IDEOGRAPH, 1, UPROPS_UNIFIED_IDEOGRAPH },
|
||||||
|
{ UCHAR_DEPRECATED, 1, UPROPS_DEPRECATED },
|
||||||
|
{ UCHAR_LOGICAL_ORDER_EXCEPTION, 1, UPROPS_LOGICAL_ORDER_EXCEPTION },
|
||||||
|
{ UCHAR_S_TERM, 1, UPROPS_S_TERM },
|
||||||
|
{ UCHAR_VARIATION_SELECTOR, 1, UPROPS_VARIATION_SELECTOR },
|
||||||
|
// Note: Pattern_Syntax & Pattern_White_Space are available via
|
||||||
|
// the internal PatternProps class and need not be stored here any more.
|
||||||
|
{ UCHAR_PATTERN_SYNTAX, 1, UPROPS_PATTERN_SYNTAX },
|
||||||
|
{ UCHAR_PATTERN_WHITE_SPACE, 1, UPROPS_PATTERN_WHITE_SPACE },
|
||||||
|
{ UCHAR_XID_START, 1, UPROPS_XID_START },
|
||||||
|
{ UCHAR_XID_CONTINUE, 1, UPROPS_XID_CONTINUE },
|
||||||
|
{ UCHAR_MATH, 1, UPROPS_MATH },
|
||||||
|
{ UCHAR_ALPHABETIC, 1, UPROPS_ALPHABETIC },
|
||||||
|
{ UCHAR_GRAPHEME_EXTEND, 1, UPROPS_GRAPHEME_EXTEND },
|
||||||
|
{ UCHAR_DEFAULT_IGNORABLE_CODE_POINT, 1, UPROPS_DEFAULT_IGNORABLE_CODE_POINT },
|
||||||
|
{ UCHAR_ID_START, 1, UPROPS_ID_START },
|
||||||
|
{ UCHAR_ID_CONTINUE, 1, UPROPS_ID_CONTINUE },
|
||||||
|
{ UCHAR_GRAPHEME_BASE, 1, UPROPS_GRAPHEME_BASE },
|
||||||
|
};
|
||||||
|
|
||||||
|
struct PropToEnum {
|
||||||
|
int32_t prop; // UProperty
|
||||||
|
int32_t vecWord, vecShift;
|
||||||
|
uint32_t vecMask;
|
||||||
|
};
|
||||||
|
|
||||||
|
static const PropToEnum
|
||||||
|
propToEnums[]={
|
||||||
|
// Use UPROPS_SCRIPT_X_MASK not UPROPS_SCRIPT_MASK:
|
||||||
|
// When writing a Script code, remove Script_Extensions bits as well.
|
||||||
|
// If needed, they will get written again.
|
||||||
|
{ UCHAR_SCRIPT, 0, 0, UPROPS_SCRIPT_X_MASK },
|
||||||
|
{ UCHAR_BLOCK, 0, UPROPS_BLOCK_SHIFT, UPROPS_BLOCK_MASK },
|
||||||
|
{ UCHAR_EAST_ASIAN_WIDTH, 0, UPROPS_EA_SHIFT, UPROPS_EA_MASK },
|
||||||
|
{ UCHAR_DECOMPOSITION_TYPE, 2, 0, UPROPS_DT_MASK },
|
||||||
|
{ UCHAR_GRAPHEME_CLUSTER_BREAK, 2, UPROPS_GCB_SHIFT, UPROPS_GCB_MASK },
|
||||||
|
{ UCHAR_WORD_BREAK, 2, UPROPS_WB_SHIFT, UPROPS_WB_MASK },
|
||||||
|
{ UCHAR_SENTENCE_BREAK, 2, UPROPS_SB_SHIFT, UPROPS_SB_MASK },
|
||||||
|
{ UCHAR_LINE_BREAK, 2, UPROPS_LB_SHIFT, UPROPS_LB_MASK },
|
||||||
|
};
|
||||||
|
|
||||||
|
void
|
||||||
|
CorePropsWriter::setProps(const UniProps &props, const UnicodeSet &newValues,
|
||||||
|
UErrorCode &errorCode) {
|
||||||
|
setGcAndNumeric(props, newValues, errorCode);
|
||||||
|
if(U_FAILURE(errorCode)) { return; }
|
||||||
|
|
||||||
|
UChar32 start=props.start;
|
||||||
|
UChar32 end=props.end;
|
||||||
|
if(start==0 && end==0x10ffff) {
|
||||||
|
// Also set bits for initialValue and errorValue.
|
||||||
|
end=UPVEC_MAX_CP;
|
||||||
|
}
|
||||||
|
if(newValues.containsSome(0, UCHAR_BINARY_LIMIT-1)) {
|
||||||
|
for(int32_t i=0; i<LENGTHOF(propToBinaries); ++i) {
|
||||||
|
const PropToBinary &p2b=propToBinaries[i];
|
||||||
|
U_ASSERT(p2b.vecShift<32);
|
||||||
|
if(newValues.contains(p2b.prop)) {
|
||||||
|
uint32_t mask=U_MASK(p2b.vecShift);
|
||||||
|
uint32_t value= props.binProps[p2b.prop] ? mask : 0;
|
||||||
|
upvec_setValue(pv, start, end, p2b.vecWord, value, mask, &errorCode);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(newValues.containsSome(UCHAR_INT_START, UCHAR_INT_LIMIT-1)) {
|
||||||
|
for(int32_t i=0; i<LENGTHOF(propToEnums); ++i) {
|
||||||
|
const PropToEnum &p2e=propToEnums[i];
|
||||||
|
U_ASSERT(p2e.vecShift<32);
|
||||||
|
if(newValues.contains(p2e.prop)) {
|
||||||
|
uint32_t mask=p2e.vecMask;
|
||||||
|
uint32_t value=(uint32_t)(props.getIntProp(p2e.prop)<<p2e.vecShift);
|
||||||
|
U_ASSERT((value&mask)==value);
|
||||||
|
upvec_setValue(pv, start, end, p2e.vecWord, value, mask, &errorCode);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(newValues.contains(UCHAR_AGE)) {
|
||||||
|
if(props.age[0]>15 || props.age[1]>15 || props.age[2]!=0 || props.age[3]!=0) {
|
||||||
|
char buffer[U_MAX_VERSION_STRING_LENGTH];
|
||||||
|
u_versionToString(props.age, buffer);
|
||||||
|
fprintf(stderr, "genprops error: age %s cannot be encoded\n", buffer);
|
||||||
|
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
uint32_t version=(props.age[0]<<4)|props.age[1];
|
||||||
|
upvec_setValue(pv, start, end,
|
||||||
|
0, version<<UPROPS_AGE_SHIFT, UPROPS_AGE_MASK,
|
||||||
|
&errorCode);
|
||||||
|
}
|
||||||
|
// Write a new (Script, Script_Extensions) value if there are Script_Extensions
|
||||||
|
// and either Script or Script_Extensions are new on the current line.
|
||||||
|
// (If only Script is new, then it just clobbered the relevant bits.)
|
||||||
|
if( !props.scx.isEmpty() &&
|
||||||
|
(newValues.contains(UCHAR_SCRIPT) || newValues.contains(UCHAR_SCRIPT_EXTENSIONS))
|
||||||
|
) {
|
||||||
|
UnicodeString codes; // vector of 16-bit UScriptCode values
|
||||||
|
UnicodeSetIterator iter(props.scx);
|
||||||
|
while(iter.next()) { codes.append((UChar)iter.getCodepoint()); }
|
||||||
|
|
||||||
|
// Set bit 15 on the last script code, for termination.
|
||||||
|
int32_t length=codes.length();
|
||||||
|
codes.setCharAt(length-1, (UChar)(codes[length-1]|0x8000));
|
||||||
|
// Find this list of codes in the Script_Extensions data so far, or add this list.
|
||||||
|
int32_t index=scriptExtensions.indexOf(codes);
|
||||||
|
if(index<0) {
|
||||||
|
index=scriptExtensions.length();
|
||||||
|
scriptExtensions.append(codes);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Encode the (Script, Script_Extensions index) pair.
|
||||||
|
int32_t script=props.getIntProp(UCHAR_SCRIPT);
|
||||||
|
uint32_t scriptX;
|
||||||
|
if(script==USCRIPT_COMMON) {
|
||||||
|
scriptX=UPROPS_SCRIPT_X_WITH_COMMON|(uint32_t)index;
|
||||||
|
} else if(script==USCRIPT_INHERITED) {
|
||||||
|
scriptX=UPROPS_SCRIPT_X_WITH_INHERITED|(uint32_t)index;
|
||||||
|
} else {
|
||||||
|
// Store an additional pair of 16-bit units for an unusual main Script code
|
||||||
|
// together with the Script_Extensions index.
|
||||||
|
UnicodeString codeIndexPair;
|
||||||
|
codeIndexPair.append((UChar)script).append((UChar)index);
|
||||||
|
index=scriptExtensions.indexOf(codeIndexPair);
|
||||||
|
if(index<0) {
|
||||||
|
index=scriptExtensions.length();
|
||||||
|
scriptExtensions.append(codeIndexPair);
|
||||||
|
}
|
||||||
|
scriptX=UPROPS_SCRIPT_X_WITH_OTHER|(uint32_t)index;
|
||||||
|
}
|
||||||
|
if(index>UPROPS_SCRIPT_MASK) {
|
||||||
|
fprintf(stderr, "genprops: Script_Extensions indexes overflow bit field\n");
|
||||||
|
errorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
upvec_setValue(pv, start, end, 0, scriptX, UPROPS_SCRIPT_X_MASK, &errorCode);
|
||||||
|
}
|
||||||
|
if(U_FAILURE(errorCode)) {
|
||||||
|
fprintf(stderr, "genprops error: unable to set props2 values for %04lX..%04lX: %s\n",
|
||||||
|
(long)start, (long)end, u_errorName(errorCode));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int32_t indexes[UPROPS_INDEX_COUNT]={
|
static int32_t indexes[UPROPS_INDEX_COUNT]={
|
||||||
0, 0, 0, 0,
|
0, 0, 0, 0,
|
||||||
0, 0, 0, 0,
|
0, 0, 0, 0,
|
||||||
@ -437,6 +631,9 @@ static int32_t indexes[UPROPS_INDEX_COUNT]={
|
|||||||
|
|
||||||
static uint8_t trieBlock[40000];
|
static uint8_t trieBlock[40000];
|
||||||
static int32_t trieSize;
|
static int32_t trieSize;
|
||||||
|
static uint8_t props2TrieBlock[100000];
|
||||||
|
static int32_t props2TrieSize;
|
||||||
|
|
||||||
static int32_t totalSize;
|
static int32_t totalSize;
|
||||||
|
|
||||||
void
|
void
|
||||||
@ -446,11 +643,40 @@ CorePropsWriter::finalizeData(UErrorCode &errorCode) {
|
|||||||
utrie2_freeze(pTrie, UTRIE2_16_VALUE_BITS, &errorCode);
|
utrie2_freeze(pTrie, UTRIE2_16_VALUE_BITS, &errorCode);
|
||||||
trieSize=utrie2_serialize(pTrie, trieBlock, sizeof(trieBlock), &errorCode);
|
trieSize=utrie2_serialize(pTrie, trieBlock, sizeof(trieBlock), &errorCode);
|
||||||
if(U_FAILURE(errorCode)) {
|
if(U_FAILURE(errorCode)) {
|
||||||
fprintf(stderr, "genprops error: utrie2_freeze(main trie)+utrie2_serialize() failed: %s (length %ld)\n",
|
fprintf(stderr,
|
||||||
|
"genprops error: utrie2_freeze(main trie)+utrie2_serialize() "
|
||||||
|
"failed: %s (length %ld)\n",
|
||||||
u_errorName(errorCode), (long)trieSize);
|
u_errorName(errorCode), (long)trieSize);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
props2Trie=upvec_compactToUTrie2WithRowIndexes(pv, &errorCode);
|
||||||
|
if(U_FAILURE(errorCode)) {
|
||||||
|
fprintf(stderr, "genprops error: unable to build trie for additional properties: %s\n",
|
||||||
|
u_errorName(errorCode));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
props2TrieSize=utrie2_serialize(props2Trie,
|
||||||
|
props2TrieBlock, (int32_t)sizeof(props2TrieBlock),
|
||||||
|
&errorCode);
|
||||||
|
if(U_FAILURE(errorCode)) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"genprops error: utrie2_freeze(additional properties)+utrie2_serialize() failed: %s\n",
|
||||||
|
u_errorName(errorCode));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t pvRows;
|
||||||
|
const uint32_t *pvArray=upvec_getArray(pv, &pvRows, NULL);
|
||||||
|
int32_t pvCount=pvRows*UPROPS_VECTOR_WORDS;
|
||||||
|
|
||||||
|
/* round up scriptExtensions to multiple of 4 bytes */
|
||||||
|
if(scriptExtensions.length()&1) {
|
||||||
|
scriptExtensions.append((UChar)0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* set indexes */
|
||||||
int32_t offset=sizeof(indexes)/4; /* uint32_t offset to the properties trie */
|
int32_t offset=sizeof(indexes)/4; /* uint32_t offset to the properties trie */
|
||||||
offset+=trieSize>>2;
|
offset+=trieSize>>2;
|
||||||
indexes[UPROPS_PROPS32_INDEX]= /* set indexes to the same offsets for empty */
|
indexes[UPROPS_PROPS32_INDEX]= /* set indexes to the same offsets for empty */
|
||||||
@ -458,13 +684,34 @@ CorePropsWriter::finalizeData(UErrorCode &errorCode) {
|
|||||||
indexes[UPROPS_EXCEPTIONS_TOP_INDEX]= /* so that less runtime code has to be changed */
|
indexes[UPROPS_EXCEPTIONS_TOP_INDEX]= /* so that less runtime code has to be changed */
|
||||||
indexes[UPROPS_ADDITIONAL_TRIE_INDEX]=offset;
|
indexes[UPROPS_ADDITIONAL_TRIE_INDEX]=offset;
|
||||||
|
|
||||||
|
offset+=props2TrieSize/4;
|
||||||
|
indexes[UPROPS_ADDITIONAL_VECTORS_INDEX]=offset;
|
||||||
|
indexes[UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX]=UPROPS_VECTOR_WORDS;
|
||||||
|
offset+=pvCount;
|
||||||
|
indexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]=offset;
|
||||||
|
offset+=scriptExtensions.length()/2;
|
||||||
|
indexes[UPROPS_RESERVED_INDEX_7]=offset;
|
||||||
|
indexes[UPROPS_RESERVED_INDEX_8]=offset;
|
||||||
|
indexes[UPROPS_DATA_TOP_INDEX]=offset;
|
||||||
|
totalSize=4*offset;
|
||||||
|
|
||||||
|
indexes[UPROPS_MAX_VALUES_INDEX]=
|
||||||
|
(((int32_t)U_EA_COUNT-1)<<UPROPS_EA_SHIFT)|
|
||||||
|
(((int32_t)UBLOCK_COUNT-1)<<UPROPS_BLOCK_SHIFT)|
|
||||||
|
(((int32_t)USCRIPT_CODE_LIMIT-1)&UPROPS_SCRIPT_MASK);
|
||||||
|
indexes[UPROPS_MAX_VALUES_2_INDEX]=
|
||||||
|
(((int32_t)U_LB_COUNT-1)<<UPROPS_LB_SHIFT)|
|
||||||
|
(((int32_t)U_SB_COUNT-1)<<UPROPS_SB_SHIFT)|
|
||||||
|
(((int32_t)U_WB_COUNT-1)<<UPROPS_WB_SHIFT)|
|
||||||
|
(((int32_t)U_GCB_COUNT-1)<<UPROPS_GCB_SHIFT)|
|
||||||
|
((int32_t)U_DT_COUNT-1);
|
||||||
|
|
||||||
if(beVerbose) {
|
if(beVerbose) {
|
||||||
printf("trie size in bytes: %5u\n", (int)trieSize);
|
printf("trie size in bytes: %5u\n", (int)trieSize);
|
||||||
}
|
printf("size in bytes of additional props trie:%5u\n", (int)props2TrieSize);
|
||||||
|
printf("number of additional props vectors: %5u\n", (int)pvRows);
|
||||||
totalSize=4*offset+props2FinalizeData(indexes, errorCode);
|
printf("number of 32-bit words per vector: %5u\n", UPROPS_VECTOR_WORDS);
|
||||||
|
printf("number of 16-bit scriptExtensions: %5u\n", (int)scriptExtensions.length());
|
||||||
if(beVerbose) {
|
|
||||||
printf("data size: %6ld\n", (long)totalSize);
|
printf("data size: %6ld\n", (long)totalSize);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -473,6 +720,10 @@ void
|
|||||||
CorePropsWriter::writeCSourceFile(const char *path, UErrorCode &errorCode) {
|
CorePropsWriter::writeCSourceFile(const char *path, UErrorCode &errorCode) {
|
||||||
if(U_FAILURE(errorCode)) { return; }
|
if(U_FAILURE(errorCode)) { return; }
|
||||||
|
|
||||||
|
int32_t pvRows;
|
||||||
|
const uint32_t *pvArray=upvec_getArray(pv, &pvRows, NULL);
|
||||||
|
int32_t pvCount=pvRows*UPROPS_VECTOR_WORDS;
|
||||||
|
|
||||||
FILE *f=usrc_createFromGenerator(path, "uchar_props_data.h",
|
FILE *f=usrc_createFromGenerator(path, "uchar_props_data.h",
|
||||||
"icu/tools/src/unicode/c/genprops/corepropswriter.cpp");
|
"icu/tools/src/unicode/c/genprops/corepropswriter.cpp");
|
||||||
if(f==NULL) {
|
if(f==NULL) {
|
||||||
@ -495,7 +746,26 @@ CorePropsWriter::writeCSourceFile(const char *path, UErrorCode &errorCode) {
|
|||||||
pTrie, "propsTrie_index", NULL,
|
pTrie, "propsTrie_index", NULL,
|
||||||
"};\n\n");
|
"};\n\n");
|
||||||
|
|
||||||
props2AppendToCSourceFile(f, errorCode);
|
usrc_writeUTrie2Arrays(f,
|
||||||
|
"static const uint16_t propsVectorsTrie_index[%ld]={\n", NULL,
|
||||||
|
props2Trie,
|
||||||
|
"\n};\n\n");
|
||||||
|
usrc_writeUTrie2Struct(f,
|
||||||
|
"static const UTrie2 propsVectorsTrie={\n",
|
||||||
|
props2Trie, "propsVectorsTrie_index", NULL,
|
||||||
|
"};\n\n");
|
||||||
|
|
||||||
|
usrc_writeArray(f,
|
||||||
|
"static const uint32_t propsVectors[%ld]={\n",
|
||||||
|
pvArray, 32, pvCount,
|
||||||
|
"};\n\n");
|
||||||
|
fprintf(f, "static const int32_t countPropsVectors=%ld;\n", (long)pvCount);
|
||||||
|
fprintf(f, "static const int32_t propsVectorsColumns=%ld;\n", (long)UPROPS_VECTOR_WORDS);
|
||||||
|
|
||||||
|
usrc_writeArray(f,
|
||||||
|
"static const uint16_t scriptExtensions[%ld]={\n",
|
||||||
|
scriptExtensions.getBuffer(), 16, scriptExtensions.length(),
|
||||||
|
"};\n\n");
|
||||||
|
|
||||||
usrc_writeArray(f,
|
usrc_writeArray(f,
|
||||||
"static const int32_t indexes[UPROPS_INDEX_COUNT]={",
|
"static const int32_t indexes[UPROPS_INDEX_COUNT]={",
|
||||||
@ -508,6 +778,10 @@ void
|
|||||||
CorePropsWriter::writeBinaryData(const char *path, UBool withCopyright, UErrorCode &errorCode) {
|
CorePropsWriter::writeBinaryData(const char *path, UBool withCopyright, UErrorCode &errorCode) {
|
||||||
if(U_FAILURE(errorCode)) { return; }
|
if(U_FAILURE(errorCode)) { return; }
|
||||||
|
|
||||||
|
int32_t pvRows;
|
||||||
|
const uint32_t *pvArray=upvec_getArray(pv, &pvRows, NULL);
|
||||||
|
int32_t pvCount=pvRows*UPROPS_VECTOR_WORDS;
|
||||||
|
|
||||||
UNewDataMemory *pData=udata_create(path, "icu", "uprops", &dataInfo,
|
UNewDataMemory *pData=udata_create(path, "icu", "uprops", &dataInfo,
|
||||||
withCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
|
withCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
|
||||||
if(U_FAILURE(errorCode)) {
|
if(U_FAILURE(errorCode)) {
|
||||||
@ -518,7 +792,9 @@ CorePropsWriter::writeBinaryData(const char *path, UBool withCopyright, UErrorCo
|
|||||||
|
|
||||||
udata_writeBlock(pData, indexes, sizeof(indexes));
|
udata_writeBlock(pData, indexes, sizeof(indexes));
|
||||||
udata_writeBlock(pData, trieBlock, trieSize);
|
udata_writeBlock(pData, trieBlock, trieSize);
|
||||||
props2AppendToBinaryFile(pData, errorCode);
|
udata_writeBlock(pData, props2TrieBlock, props2TrieSize);
|
||||||
|
udata_writeBlock(pData, pvArray, pvCount*4);
|
||||||
|
udata_writeBlock(pData, scriptExtensions.getBuffer(), scriptExtensions.length()*2);
|
||||||
|
|
||||||
long dataLength=udata_finish(pData, &errorCode);
|
long dataLength=udata_finish(pData, &errorCode);
|
||||||
if(U_FAILURE(errorCode)) {
|
if(U_FAILURE(errorCode)) {
|
||||||
|
@ -95,7 +95,6 @@ main(int argc, char* argv[]) {
|
|||||||
/* initialize */
|
/* initialize */
|
||||||
IcuToolErrorCode errorCode("genprops");
|
IcuToolErrorCode errorCode("genprops");
|
||||||
LocalPointer<PropsWriter> corePropsWriter(createCorePropsWriter(errorCode));
|
LocalPointer<PropsWriter> corePropsWriter(createCorePropsWriter(errorCode));
|
||||||
LocalPointer<PropsWriter> props2Writer(createProps2Writer(errorCode));
|
|
||||||
if(errorCode.isFailure()) {
|
if(errorCode.isFailure()) {
|
||||||
fprintf(stderr, "genprops: unable to create PropsWriters - %s\n", errorCode.errorName());
|
fprintf(stderr, "genprops: unable to create PropsWriters - %s\n", errorCode.errorName());
|
||||||
return errorCode.reset();
|
return errorCode.reset();
|
||||||
@ -125,7 +124,6 @@ main(int argc, char* argv[]) {
|
|||||||
if(ppucd.lineHasPropertyValues()) {
|
if(ppucd.lineHasPropertyValues()) {
|
||||||
const UniProps *props=ppucd.getProps(newValues, errorCode);
|
const UniProps *props=ppucd.getProps(newValues, errorCode);
|
||||||
corePropsWriter->setProps(*props, newValues, errorCode);
|
corePropsWriter->setProps(*props, newValues, errorCode);
|
||||||
props2Writer->setProps(*props, newValues, errorCode);
|
|
||||||
} else if(lineType==PreparsedUCD::UNICODE_VERSION_LINE) {
|
} else if(lineType==PreparsedUCD::UNICODE_VERSION_LINE) {
|
||||||
const UVersionInfo &version=ppucd.getUnicodeVersion();
|
const UVersionInfo &version=ppucd.getUnicodeVersion();
|
||||||
corePropsWriter->setUnicodeVersion(version);
|
corePropsWriter->setUnicodeVersion(version);
|
||||||
|
@ -34,19 +34,8 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
PropsWriter *createCorePropsWriter(UErrorCode &errorCode);
|
PropsWriter *createCorePropsWriter(UErrorCode &errorCode);
|
||||||
PropsWriter *createProps2Writer(UErrorCode &errorCode);
|
|
||||||
|
|
||||||
/* global flags */
|
/* global flags */
|
||||||
U_CFUNC UBool beVerbose;
|
U_CFUNC UBool beVerbose;
|
||||||
|
|
||||||
/* prototypes */
|
|
||||||
int32_t
|
|
||||||
props2FinalizeData(int32_t indexes[], UErrorCode &errorCode);
|
|
||||||
|
|
||||||
void
|
|
||||||
props2AppendToCSourceFile(FILE *f, UErrorCode &errorCode);
|
|
||||||
|
|
||||||
void
|
|
||||||
props2AppendToBinaryFile(UNewDataMemory *pData, UErrorCode &errorCode);
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,350 +0,0 @@
|
|||||||
/*
|
|
||||||
*******************************************************************************
|
|
||||||
*
|
|
||||||
* Copyright (C) 2002-2011, International Business Machines
|
|
||||||
* Corporation and others. All Rights Reserved.
|
|
||||||
*
|
|
||||||
*******************************************************************************
|
|
||||||
* file name: props2writer.cpp (was props2.cpp)
|
|
||||||
* encoding: US-ASCII
|
|
||||||
* tab size: 8 (not used)
|
|
||||||
* indentation:4
|
|
||||||
*
|
|
||||||
* created on: 2002feb24
|
|
||||||
* created by: Markus W. Scherer
|
|
||||||
*
|
|
||||||
* Parse more Unicode Character Database files and store
|
|
||||||
* additional Unicode character properties in bit set vectors.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include "unicode/utypes.h"
|
|
||||||
#include "unicode/uchar.h"
|
|
||||||
#include "unicode/uniset.h"
|
|
||||||
#include "unicode/unistr.h"
|
|
||||||
#include "unicode/usetiter.h"
|
|
||||||
#include "unicode/uscript.h"
|
|
||||||
#include "cstring.h"
|
|
||||||
#include "genprops.h"
|
|
||||||
#include "propsvec.h"
|
|
||||||
#include "uassert.h"
|
|
||||||
#include "unewdata.h"
|
|
||||||
#include "uprops.h"
|
|
||||||
#include "utrie2.h"
|
|
||||||
#include "writesrc.h"
|
|
||||||
|
|
||||||
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
|
|
||||||
|
|
||||||
U_NAMESPACE_USE
|
|
||||||
|
|
||||||
static UTrie2 *newTrie=NULL;
|
|
||||||
static UPropsVectors *pv=NULL;
|
|
||||||
|
|
||||||
static UnicodeString *scriptExtensions=NULL;
|
|
||||||
|
|
||||||
class Props2Writer : public PropsWriter {
|
|
||||||
public:
|
|
||||||
Props2Writer(UErrorCode &errorCode);
|
|
||||||
virtual ~Props2Writer();
|
|
||||||
|
|
||||||
virtual void setProps(const UniProps &, const UnicodeSet &newValues, UErrorCode &errorCode);
|
|
||||||
};
|
|
||||||
|
|
||||||
Props2Writer::Props2Writer(UErrorCode &errorCode) {
|
|
||||||
pv=upvec_open(UPROPS_VECTOR_WORDS, &errorCode);
|
|
||||||
if(U_FAILURE(errorCode)) {
|
|
||||||
fprintf(stderr, "genprops error: props2writer upvec_open() failed - %s\n",
|
|
||||||
u_errorName(errorCode));
|
|
||||||
}
|
|
||||||
scriptExtensions=new UnicodeString();
|
|
||||||
}
|
|
||||||
|
|
||||||
Props2Writer::~Props2Writer() {
|
|
||||||
utrie2_close(newTrie);
|
|
||||||
upvec_close(pv);
|
|
||||||
delete scriptExtensions;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct PropToBinary {
|
|
||||||
int32_t prop; // UProperty
|
|
||||||
int32_t vecWord, vecShift;
|
|
||||||
};
|
|
||||||
|
|
||||||
static const PropToBinary
|
|
||||||
propToBinaries[]={
|
|
||||||
{ UCHAR_WHITE_SPACE, 1, UPROPS_WHITE_SPACE },
|
|
||||||
{ UCHAR_DASH, 1, UPROPS_DASH },
|
|
||||||
// Note: The Hyphen property is stabilized since Unicode 4.0
|
|
||||||
// and deprecated since Unicode 6.0.
|
|
||||||
{ UCHAR_HYPHEN, 1, UPROPS_HYPHEN },
|
|
||||||
{ UCHAR_QUOTATION_MARK, 1, UPROPS_QUOTATION_MARK },
|
|
||||||
{ UCHAR_TERMINAL_PUNCTUATION, 1, UPROPS_TERMINAL_PUNCTUATION },
|
|
||||||
// Note: The Hex_Digit and ASCII_Hex_Digit properties are probably stable enough
|
|
||||||
// so that they could be hardcoded.
|
|
||||||
{ UCHAR_HEX_DIGIT, 1, UPROPS_HEX_DIGIT },
|
|
||||||
{ UCHAR_ASCII_HEX_DIGIT, 1, UPROPS_ASCII_HEX_DIGIT },
|
|
||||||
{ UCHAR_IDEOGRAPHIC, 1, UPROPS_IDEOGRAPHIC },
|
|
||||||
{ UCHAR_DIACRITIC, 1, UPROPS_DIACRITIC },
|
|
||||||
{ UCHAR_EXTENDER, 1, UPROPS_EXTENDER },
|
|
||||||
// Note: The Noncharacter_Code_Point property is probably stable enough
|
|
||||||
// so that it could be hardcoded.
|
|
||||||
{ UCHAR_NONCHARACTER_CODE_POINT, 1, UPROPS_NONCHARACTER_CODE_POINT },
|
|
||||||
// Note: The Grapheme_Link property is deprecated since Unicode 5.0
|
|
||||||
// because it is a "Duplication of ccc=9" (UAX #44).
|
|
||||||
{ UCHAR_GRAPHEME_LINK, 1, UPROPS_GRAPHEME_LINK },
|
|
||||||
{ UCHAR_IDS_BINARY_OPERATOR, 1, UPROPS_IDS_BINARY_OPERATOR },
|
|
||||||
{ UCHAR_IDS_TRINARY_OPERATOR, 1, UPROPS_IDS_TRINARY_OPERATOR },
|
|
||||||
{ UCHAR_RADICAL, 1, UPROPS_RADICAL },
|
|
||||||
{ UCHAR_UNIFIED_IDEOGRAPH, 1, UPROPS_UNIFIED_IDEOGRAPH },
|
|
||||||
{ UCHAR_DEPRECATED, 1, UPROPS_DEPRECATED },
|
|
||||||
{ UCHAR_LOGICAL_ORDER_EXCEPTION, 1, UPROPS_LOGICAL_ORDER_EXCEPTION },
|
|
||||||
{ UCHAR_S_TERM, 1, UPROPS_S_TERM },
|
|
||||||
{ UCHAR_VARIATION_SELECTOR, 1, UPROPS_VARIATION_SELECTOR },
|
|
||||||
// Note: Pattern_Syntax & Pattern_White_Space are available via
|
|
||||||
// the internal PatternProps class and need not be stored here any more.
|
|
||||||
{ UCHAR_PATTERN_SYNTAX, 1, UPROPS_PATTERN_SYNTAX },
|
|
||||||
{ UCHAR_PATTERN_WHITE_SPACE, 1, UPROPS_PATTERN_WHITE_SPACE },
|
|
||||||
{ UCHAR_XID_START, 1, UPROPS_XID_START },
|
|
||||||
{ UCHAR_XID_CONTINUE, 1, UPROPS_XID_CONTINUE },
|
|
||||||
{ UCHAR_MATH, 1, UPROPS_MATH },
|
|
||||||
{ UCHAR_ALPHABETIC, 1, UPROPS_ALPHABETIC },
|
|
||||||
{ UCHAR_GRAPHEME_EXTEND, 1, UPROPS_GRAPHEME_EXTEND },
|
|
||||||
{ UCHAR_DEFAULT_IGNORABLE_CODE_POINT, 1, UPROPS_DEFAULT_IGNORABLE_CODE_POINT },
|
|
||||||
{ UCHAR_ID_START, 1, UPROPS_ID_START },
|
|
||||||
{ UCHAR_ID_CONTINUE, 1, UPROPS_ID_CONTINUE },
|
|
||||||
{ UCHAR_GRAPHEME_BASE, 1, UPROPS_GRAPHEME_BASE },
|
|
||||||
};
|
|
||||||
|
|
||||||
struct PropToEnum {
|
|
||||||
int32_t prop; // UProperty
|
|
||||||
int32_t vecWord, vecShift;
|
|
||||||
uint32_t vecMask;
|
|
||||||
};
|
|
||||||
|
|
||||||
static const PropToEnum
|
|
||||||
propToEnums[]={
|
|
||||||
// Use UPROPS_SCRIPT_X_MASK not UPROPS_SCRIPT_MASK:
|
|
||||||
// When writing a Script code, remove Script_Extensions bits as well.
|
|
||||||
// If needed, they will get written again.
|
|
||||||
{ UCHAR_SCRIPT, 0, 0, UPROPS_SCRIPT_X_MASK },
|
|
||||||
{ UCHAR_BLOCK, 0, UPROPS_BLOCK_SHIFT, UPROPS_BLOCK_MASK },
|
|
||||||
{ UCHAR_EAST_ASIAN_WIDTH, 0, UPROPS_EA_SHIFT, UPROPS_EA_MASK },
|
|
||||||
{ UCHAR_DECOMPOSITION_TYPE, 2, 0, UPROPS_DT_MASK },
|
|
||||||
{ UCHAR_GRAPHEME_CLUSTER_BREAK, 2, UPROPS_GCB_SHIFT, UPROPS_GCB_MASK },
|
|
||||||
{ UCHAR_WORD_BREAK, 2, UPROPS_WB_SHIFT, UPROPS_WB_MASK },
|
|
||||||
{ UCHAR_SENTENCE_BREAK, 2, UPROPS_SB_SHIFT, UPROPS_SB_MASK },
|
|
||||||
{ UCHAR_LINE_BREAK, 2, UPROPS_LB_SHIFT, UPROPS_LB_MASK },
|
|
||||||
};
|
|
||||||
|
|
||||||
void
|
|
||||||
Props2Writer::setProps(const UniProps &props, const UnicodeSet &newValues, UErrorCode &errorCode) {
|
|
||||||
if(U_FAILURE(errorCode)) { return; }
|
|
||||||
UChar32 start=props.start;
|
|
||||||
UChar32 end=props.end;
|
|
||||||
if(start==0 && end==0x10ffff) {
|
|
||||||
// Also set bits for initialValue and errorValue.
|
|
||||||
end=UPVEC_MAX_CP;
|
|
||||||
}
|
|
||||||
if(newValues.containsSome(0, UCHAR_BINARY_LIMIT-1)) {
|
|
||||||
for(int32_t i=0; i<LENGTHOF(propToBinaries); ++i) {
|
|
||||||
const PropToBinary &p2b=propToBinaries[i];
|
|
||||||
U_ASSERT(p2b.vecShift<32);
|
|
||||||
if(newValues.contains(p2b.prop)) {
|
|
||||||
uint32_t mask=U_MASK(p2b.vecShift);
|
|
||||||
uint32_t value= props.binProps[p2b.prop] ? mask : 0;
|
|
||||||
upvec_setValue(pv, start, end, p2b.vecWord, value, mask, &errorCode);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if(newValues.containsSome(UCHAR_INT_START, UCHAR_INT_LIMIT-1)) {
|
|
||||||
for(int32_t i=0; i<LENGTHOF(propToEnums); ++i) {
|
|
||||||
const PropToEnum &p2e=propToEnums[i];
|
|
||||||
U_ASSERT(p2e.vecShift<32);
|
|
||||||
if(newValues.contains(p2e.prop)) {
|
|
||||||
uint32_t mask=p2e.vecMask;
|
|
||||||
uint32_t value=(uint32_t)(props.getIntProp(p2e.prop)<<p2e.vecShift);
|
|
||||||
U_ASSERT((value&mask)==value);
|
|
||||||
upvec_setValue(pv, start, end, p2e.vecWord, value, mask, &errorCode);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if(newValues.contains(UCHAR_AGE)) {
|
|
||||||
if(props.age[0]>15 || props.age[1]>15 || props.age[2]!=0 || props.age[3]!=0) {
|
|
||||||
char buffer[U_MAX_VERSION_STRING_LENGTH];
|
|
||||||
u_versionToString(props.age, buffer);
|
|
||||||
fprintf(stderr, "genprops error: age %s cannot be encoded\n", buffer);
|
|
||||||
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
uint32_t version=(props.age[0]<<4)|props.age[1];
|
|
||||||
upvec_setValue(pv, start, end,
|
|
||||||
0, version<<UPROPS_AGE_SHIFT, UPROPS_AGE_MASK,
|
|
||||||
&errorCode);
|
|
||||||
}
|
|
||||||
// Write a new (Script, Script_Extensions) value if there are Script_Extensions
|
|
||||||
// and either Script or Script_Extensions are new on the current line.
|
|
||||||
// (If only Script is new, then it just clobbered the relevant bits.)
|
|
||||||
if( !props.scx.isEmpty() &&
|
|
||||||
(newValues.contains(UCHAR_SCRIPT) || newValues.contains(UCHAR_SCRIPT_EXTENSIONS))
|
|
||||||
) {
|
|
||||||
UnicodeString codes; // vector of 16-bit UScriptCode values
|
|
||||||
UnicodeSetIterator iter(props.scx);
|
|
||||||
while(iter.next()) { codes.append((UChar)iter.getCodepoint()); }
|
|
||||||
|
|
||||||
// Set bit 15 on the last script code, for termination.
|
|
||||||
int32_t length=codes.length();
|
|
||||||
codes.setCharAt(length-1, (UChar)(codes[length-1]|0x8000));
|
|
||||||
// Find this list of codes in the Script_Extensions data so far, or add this list.
|
|
||||||
int32_t index=scriptExtensions->indexOf(codes);
|
|
||||||
if(index<0) {
|
|
||||||
index=scriptExtensions->length();
|
|
||||||
scriptExtensions->append(codes);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Encode the (Script, Script_Extensions index) pair.
|
|
||||||
int32_t script=props.getIntProp(UCHAR_SCRIPT);
|
|
||||||
uint32_t scriptX;
|
|
||||||
if(script==USCRIPT_COMMON) {
|
|
||||||
scriptX=UPROPS_SCRIPT_X_WITH_COMMON|(uint32_t)index;
|
|
||||||
} else if(script==USCRIPT_INHERITED) {
|
|
||||||
scriptX=UPROPS_SCRIPT_X_WITH_INHERITED|(uint32_t)index;
|
|
||||||
} else {
|
|
||||||
// Store an additional pair of 16-bit units for an unusual main Script code
|
|
||||||
// together with the Script_Extensions index.
|
|
||||||
UnicodeString codeIndexPair;
|
|
||||||
codeIndexPair.append((UChar)script).append((UChar)index);
|
|
||||||
index=scriptExtensions->indexOf(codeIndexPair);
|
|
||||||
if(index<0) {
|
|
||||||
index=scriptExtensions->length();
|
|
||||||
scriptExtensions->append(codeIndexPair);
|
|
||||||
}
|
|
||||||
scriptX=UPROPS_SCRIPT_X_WITH_OTHER|(uint32_t)index;
|
|
||||||
}
|
|
||||||
if(index>UPROPS_SCRIPT_MASK) {
|
|
||||||
fprintf(stderr, "genprops: Script_Extensions indexes overflow bit field\n");
|
|
||||||
errorCode=U_BUFFER_OVERFLOW_ERROR;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
upvec_setValue(pv, start, end, 0, scriptX, UPROPS_SCRIPT_X_MASK, &errorCode);
|
|
||||||
}
|
|
||||||
if(U_FAILURE(errorCode)) {
|
|
||||||
fprintf(stderr, "genprops error: unable to set props2 values for %04lX..%04lX: %s\n",
|
|
||||||
(long)start, (long)end, u_errorName(errorCode));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static uint8_t trieBlock[100000];
|
|
||||||
static int32_t trieSize;
|
|
||||||
|
|
||||||
int32_t
|
|
||||||
props2FinalizeData(int32_t indexes[UPROPS_INDEX_COUNT], UErrorCode &errorCode) {
|
|
||||||
if(U_FAILURE(errorCode)) { return 0; }
|
|
||||||
|
|
||||||
newTrie=upvec_compactToUTrie2WithRowIndexes(pv, &errorCode);
|
|
||||||
if(U_FAILURE(errorCode)) {
|
|
||||||
fprintf(stderr, "genprops error: unable to build trie for additional properties: %s\n",
|
|
||||||
u_errorName(errorCode));
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
trieSize=utrie2_serialize(newTrie, trieBlock, (int32_t)sizeof(trieBlock), &errorCode);
|
|
||||||
if(U_FAILURE(errorCode)) {
|
|
||||||
fprintf(stderr,
|
|
||||||
"genprops error: utrie2_freeze(additional properties)+utrie2_serialize() failed: %s\n",
|
|
||||||
u_errorName(errorCode));
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int32_t pvRows;
|
|
||||||
const uint32_t *pvArray=upvec_getArray(pv, &pvRows, NULL);
|
|
||||||
int32_t pvCount=pvRows*UPROPS_VECTOR_WORDS;
|
|
||||||
|
|
||||||
/* round up scriptExtensions to multiple of 4 bytes */
|
|
||||||
if(scriptExtensions->length()&1) {
|
|
||||||
scriptExtensions->append((UChar)0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* set indexes */
|
|
||||||
indexes[UPROPS_ADDITIONAL_VECTORS_INDEX]=
|
|
||||||
indexes[UPROPS_ADDITIONAL_TRIE_INDEX]+trieSize/4;
|
|
||||||
indexes[UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX]=UPROPS_VECTOR_WORDS;
|
|
||||||
indexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]=
|
|
||||||
indexes[UPROPS_ADDITIONAL_VECTORS_INDEX]+pvCount;
|
|
||||||
indexes[UPROPS_RESERVED_INDEX_7]=
|
|
||||||
indexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]+scriptExtensions->length()/2;
|
|
||||||
indexes[UPROPS_RESERVED_INDEX_8]=indexes[UPROPS_RESERVED_INDEX_7];
|
|
||||||
indexes[UPROPS_DATA_TOP_INDEX]=indexes[UPROPS_RESERVED_INDEX_8];
|
|
||||||
|
|
||||||
indexes[UPROPS_MAX_VALUES_INDEX]=
|
|
||||||
(((int32_t)U_EA_COUNT-1)<<UPROPS_EA_SHIFT)|
|
|
||||||
(((int32_t)UBLOCK_COUNT-1)<<UPROPS_BLOCK_SHIFT)|
|
|
||||||
(((int32_t)USCRIPT_CODE_LIMIT-1)&UPROPS_SCRIPT_MASK);
|
|
||||||
indexes[UPROPS_MAX_VALUES_2_INDEX]=
|
|
||||||
(((int32_t)U_LB_COUNT-1)<<UPROPS_LB_SHIFT)|
|
|
||||||
(((int32_t)U_SB_COUNT-1)<<UPROPS_SB_SHIFT)|
|
|
||||||
(((int32_t)U_WB_COUNT-1)<<UPROPS_WB_SHIFT)|
|
|
||||||
(((int32_t)U_GCB_COUNT-1)<<UPROPS_GCB_SHIFT)|
|
|
||||||
((int32_t)U_DT_COUNT-1);
|
|
||||||
|
|
||||||
if(beVerbose) {
|
|
||||||
printf("size in bytes of additional props trie:%5u\n", (int)trieSize);
|
|
||||||
printf("number of additional props vectors: %5u\n", (int)pvRows);
|
|
||||||
printf("number of 32-bit words per vector: %5u\n", UPROPS_VECTOR_WORDS);
|
|
||||||
printf("number of 16-bit scriptExtensions: %5u\n", (int)scriptExtensions->length());
|
|
||||||
}
|
|
||||||
|
|
||||||
return 4*(indexes[UPROPS_DATA_TOP_INDEX]-indexes[UPROPS_ADDITIONAL_TRIE_INDEX]);
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
props2AppendToCSourceFile(FILE *f, UErrorCode &errorCode) {
|
|
||||||
if(U_FAILURE(errorCode)) { return; }
|
|
||||||
|
|
||||||
int32_t pvRows;
|
|
||||||
const uint32_t *pvArray=upvec_getArray(pv, &pvRows, NULL);
|
|
||||||
int32_t pvCount=pvRows*UPROPS_VECTOR_WORDS;
|
|
||||||
|
|
||||||
usrc_writeUTrie2Arrays(f,
|
|
||||||
"static const uint16_t propsVectorsTrie_index[%ld]={\n", NULL,
|
|
||||||
newTrie,
|
|
||||||
"\n};\n\n");
|
|
||||||
usrc_writeUTrie2Struct(f,
|
|
||||||
"static const UTrie2 propsVectorsTrie={\n",
|
|
||||||
newTrie, "propsVectorsTrie_index", NULL,
|
|
||||||
"};\n\n");
|
|
||||||
|
|
||||||
usrc_writeArray(f,
|
|
||||||
"static const uint32_t propsVectors[%ld]={\n",
|
|
||||||
pvArray, 32, pvCount,
|
|
||||||
"};\n\n");
|
|
||||||
fprintf(f, "static const int32_t countPropsVectors=%ld;\n", (long)pvCount);
|
|
||||||
fprintf(f, "static const int32_t propsVectorsColumns=%ld;\n", (long)UPROPS_VECTOR_WORDS);
|
|
||||||
|
|
||||||
usrc_writeArray(f,
|
|
||||||
"static const uint16_t scriptExtensions[%ld]={\n",
|
|
||||||
scriptExtensions->getBuffer(), 16, scriptExtensions->length(),
|
|
||||||
"};\n\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
props2AppendToBinaryFile(UNewDataMemory *pData, UErrorCode &errorCode) {
|
|
||||||
if(U_FAILURE(errorCode)) { return; }
|
|
||||||
|
|
||||||
int32_t pvRows;
|
|
||||||
const uint32_t *pvArray=upvec_getArray(pv, &pvRows, NULL);
|
|
||||||
int32_t pvCount=pvRows*UPROPS_VECTOR_WORDS;
|
|
||||||
|
|
||||||
udata_writeBlock(pData, trieBlock, trieSize);
|
|
||||||
udata_writeBlock(pData, pvArray, pvCount*4);
|
|
||||||
udata_writeBlock(pData, scriptExtensions->getBuffer(), scriptExtensions->length()*2);
|
|
||||||
}
|
|
||||||
|
|
||||||
PropsWriter *
|
|
||||||
createProps2Writer(UErrorCode &errorCode) {
|
|
||||||
if(U_FAILURE(errorCode)) { return NULL; }
|
|
||||||
PropsWriter *pw=new Props2Writer(errorCode);
|
|
||||||
if(pw==NULL) {
|
|
||||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
|
||||||
}
|
|
||||||
return pw;
|
|
||||||
}
|
|
Loading…
Reference in New Issue
Block a user