ICU-8972 genprops: remove --csource option (always write both C & binary), use argv[1]=path/to/ICU/src/root instead of --destdir; separate finalizing vs. writing data

X-SVN-Rev: 31144
This commit is contained in:
Markus Scherer 2011-12-18 00:37:18 +00:00
parent 2cac672e6e
commit 764caf4347
4 changed files with 323 additions and 325 deletions

View File

@ -249,24 +249,6 @@ static UDataInfo dataInfo={
static UTrie2 *pTrie=NULL;
/* -------------------------------------------------------------------------- */
static void
initStore() {
UErrorCode errorCode=U_ZERO_ERROR;
pTrie=utrie2_open(0, 0, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "genprops error: corepropswriter utrie2_open() failed - %s\n",
u_errorName(errorCode));
exit(errorCode);
}
}
static void
exitStore() {
utrie2_close(pTrie);
}
/* store a character's properties ------------------------------------------- */
U_CFUNC uint32_t
@ -372,131 +354,30 @@ repeatProps(uint32_t first, uint32_t last, uint32_t x) {
}
}
/* generate output data ----------------------------------------------------- */
U_CFUNC void
generateData(const char *dataDir, UBool csource) {
static int32_t indexes[UPROPS_INDEX_COUNT]={
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0
};
static uint8_t trieBlock[40000];
static uint8_t additionalProps[120000];
UNewDataMemory *pData;
UErrorCode errorCode=U_ZERO_ERROR;
uint32_t size = 0;
int32_t trieSize, additionalPropsSize, offset;
long dataLength;
utrie2_freeze(pTrie, UTRIE2_16_VALUE_BITS, &errorCode);
trieSize=utrie2_serialize(pTrie, trieBlock, sizeof(trieBlock), &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "error: utrie2_freeze(main trie)+utrie2_serialize() failed: %s (length %ld)\n",
u_errorName(errorCode), (long)trieSize);
exit(errorCode);
}
offset=sizeof(indexes)/4; /* uint32_t offset to the properties trie */
/* round up trie size to 4-alignment */
while(trieSize&3) {
trieBlock[trieSize++]=0;
}
offset+=trieSize>>2;
indexes[UPROPS_PROPS32_INDEX]= /* set indexes to the same offsets for empty */
indexes[UPROPS_EXCEPTIONS_INDEX]= /* structures from the old format version 3 */
indexes[UPROPS_EXCEPTIONS_TOP_INDEX]= /* so that less runtime code has to be changed */
indexes[UPROPS_ADDITIONAL_TRIE_INDEX]=offset;
if(beVerbose) {
printf("trie size in bytes: %5u\n", (int)trieSize);
}
if(csource) {
/* write .c file for hardcoded data */
FILE *f=usrc_createFromGenerator(dataDir, "uchar_props_data.h",
"icu/tools/src/unicode/c/genprops/corepropswriter.cpp");
if(f!=NULL) {
fputs("#ifndef INCLUDED_FROM_UCHAR_C\n"
"# error This file must be #included from uchar.c only.\n"
"#endif\n\n", f);
/* unused
usrc_writeArray(f,
"static const UVersionInfo formatVersion={",
dataInfo.formatVersion, 8, 4,
"};\n\n");
*/
usrc_writeArray(f,
"static const UVersionInfo dataVersion={",
dataInfo.dataVersion, 8, 4,
"};\n\n");
usrc_writeUTrie2Arrays(f,
"static const uint16_t propsTrie_index[%ld]={\n", NULL,
pTrie,
"\n};\n\n");
usrc_writeUTrie2Struct(f,
"static const UTrie2 propsTrie={\n",
pTrie, "propsTrie_index", NULL,
"};\n\n");
additionalPropsSize=writeAdditionalData(f, additionalProps, sizeof(additionalProps), indexes);
size=4*offset+additionalPropsSize; /* total size of data */
usrc_writeArray(f,
"static const int32_t indexes[UPROPS_INDEX_COUNT]={",
indexes, 32, UPROPS_INDEX_COUNT,
"};\n\n");
fclose(f);
}
} else {
/* write the data */
pData=udata_create(dataDir, DATA_TYPE, DATA_NAME, &dataInfo,
haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "genprops: udata_create(%s, %s.%s) failed - %s\n",
dataDir, DATA_NAME, DATA_TYPE,
u_errorName(errorCode));
exit(errorCode);
}
additionalPropsSize=writeAdditionalData(NULL, additionalProps, sizeof(additionalProps), indexes);
size=4*offset+additionalPropsSize; /* total size of data */
udata_writeBlock(pData, indexes, sizeof(indexes));
udata_writeBlock(pData, trieBlock, trieSize);
udata_writeBlock(pData, additionalProps, additionalPropsSize);
/* finish up */
dataLength=udata_finish(pData, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "genprops: error %d writing the output file\n", errorCode);
exit(errorCode);
}
if(dataLength!=(long)size) {
fprintf(stderr, "genprops: data length %ld != calculated size %lu\n",
dataLength, (unsigned long)size);
exit(U_INTERNAL_PROGRAM_ERROR);
}
}
if(beVerbose) {
printf("data size: %6lu\n", (unsigned long)size);
}
}
class CorePropsWriter : public PropsWriter {
public:
CorePropsWriter() { initStore(); }
virtual ~CorePropsWriter() { exitStore(); }
CorePropsWriter(UErrorCode &errorCode);
virtual ~CorePropsWriter();
virtual void setUnicodeVersion(const UVersionInfo version);
virtual void setProps(const UniProps &, const UnicodeSet &newValues, UErrorCode &errorCode);
virtual void finalizeData(UErrorCode &errorCode);
virtual void writeCSourceFile(const char *path, UErrorCode &errorCode);
virtual void writeBinaryData(const char *path, UBool withCopyright, UErrorCode &errorCode);
};
CorePropsWriter::CorePropsWriter(UErrorCode &errorCode) {
pTrie=utrie2_open(0, 0, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "genprops error: corepropswriter utrie2_open() failed - %s\n",
u_errorName(errorCode));
}
}
CorePropsWriter::~CorePropsWriter() {
utrie2_close(pTrie);
}
void
CorePropsWriter::setUnicodeVersion(const UVersionInfo version) {
uprv_memcpy(dataInfo.dataVersion, version, 4);
@ -506,10 +387,115 @@ void
CorePropsWriter::setProps(const UniProps &props, const UnicodeSet &newValues, UErrorCode &errorCode) {
}
static int32_t indexes[UPROPS_INDEX_COUNT]={
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0
};
static uint8_t trieBlock[40000];
static int32_t trieSize;
static int32_t totalSize;
void
CorePropsWriter::finalizeData(UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return; }
utrie2_freeze(pTrie, UTRIE2_16_VALUE_BITS, &errorCode);
trieSize=utrie2_serialize(pTrie, trieBlock, sizeof(trieBlock), &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "genprops error: utrie2_freeze(main trie)+utrie2_serialize() failed: %s (length %ld)\n",
u_errorName(errorCode), (long)trieSize);
return;
}
int32_t offset=sizeof(indexes)/4; /* uint32_t offset to the properties trie */
offset+=trieSize>>2;
indexes[UPROPS_PROPS32_INDEX]= /* set indexes to the same offsets for empty */
indexes[UPROPS_EXCEPTIONS_INDEX]= /* structures from the old format version 3 */
indexes[UPROPS_EXCEPTIONS_TOP_INDEX]= /* so that less runtime code has to be changed */
indexes[UPROPS_ADDITIONAL_TRIE_INDEX]=offset;
if(beVerbose) {
printf("trie size in bytes: %5u\n", (int)trieSize);
}
totalSize=4*offset+props2FinalizeData(indexes, errorCode);
if(beVerbose) {
printf("data size: %6ld\n", (long)totalSize);
}
}
void
CorePropsWriter::writeCSourceFile(const char *path, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return; }
FILE *f=usrc_createFromGenerator(path, "uchar_props_data.h",
"icu/tools/src/unicode/c/genprops/corepropswriter.cpp");
if(f==NULL) {
errorCode=U_FILE_ACCESS_ERROR;
return;
}
fputs("#ifndef INCLUDED_FROM_UCHAR_C\n"
"# error This file must be #included from uchar.c only.\n"
"#endif\n\n", f);
usrc_writeArray(f,
"static const UVersionInfo dataVersion={",
dataInfo.dataVersion, 8, 4,
"};\n\n");
usrc_writeUTrie2Arrays(f,
"static const uint16_t propsTrie_index[%ld]={\n", NULL,
pTrie,
"\n};\n\n");
usrc_writeUTrie2Struct(f,
"static const UTrie2 propsTrie={\n",
pTrie, "propsTrie_index", NULL,
"};\n\n");
props2AppendToCSourceFile(f, errorCode);
usrc_writeArray(f,
"static const int32_t indexes[UPROPS_INDEX_COUNT]={",
indexes, 32, UPROPS_INDEX_COUNT,
"};\n\n");
fclose(f);
}
void
CorePropsWriter::writeBinaryData(const char *path, UBool withCopyright, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return; }
UNewDataMemory *pData=udata_create(path, "icu", "uprops", &dataInfo,
withCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "genprops: udata_create(%s, uprops.icu) failed - %s\n",
path, u_errorName(errorCode));
return;
}
udata_writeBlock(pData, indexes, sizeof(indexes));
udata_writeBlock(pData, trieBlock, trieSize);
props2AppendToBinaryFile(pData, errorCode);
long dataLength=udata_finish(pData, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "genprops: error %s writing the output file\n", u_errorName(errorCode));
return;
}
if(dataLength!=(long)totalSize) {
fprintf(stderr, "genprops: data length %ld != calculated size %ld\n",
dataLength, (long)totalSize);
errorCode=U_INTERNAL_PROGRAM_ERROR;
}
}
PropsWriter *
createCorePropsWriter(UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return NULL; }
PropsWriter *pw=new CorePropsWriter();
PropsWriter *pw=new CorePropsWriter(errorCode);
if(pw==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
}

View File

@ -41,18 +41,21 @@
#include "uparse.h"
#include "uprops.h"
#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
// TODO: remove
#define USE_NEW 1
U_NAMESPACE_USE
UBool beVerbose=FALSE, haveCopyright=TRUE;
UBool beVerbose=FALSE;
PropsWriter::~PropsWriter() {}
void PropsWriter::setUnicodeVersion(const UVersionInfo version) {}
void PropsWriter::setUnicodeVersion(const UVersionInfo) {}
void PropsWriter::setProps(const UniProps &, const UnicodeSet &, UErrorCode &) {}
void PropsWriter::finalizeData(UErrorCode &) {}
void PropsWriter::writeCSourceFile(const char *, UErrorCode &) {}
void PropsWriter::writeBinaryData(const char *, UBool, UErrorCode &) {}
/* prototypes --------------------------------------------------------------- */
@ -67,10 +70,8 @@ enum
HELP_QUESTION_MARK,
VERBOSE,
COPYRIGHT,
DESTDIR,
SOURCEDIR,
ICUDATADIR,
CSOURCE
ICUDATADIR
};
/* Keep these values in sync with the above enums */
@ -79,25 +80,22 @@ static UOption options[]={
UOPTION_HELP_QUESTION_MARK,
UOPTION_VERBOSE,
UOPTION_COPYRIGHT,
UOPTION_DESTDIR,
UOPTION_SOURCEDIR,
UOPTION_ICUDATADIR,
UOPTION_DEF("csource", 'C', UOPT_NO_ARG)
UOPTION_ICUDATADIR
};
extern int
main(int argc, char* argv[]) {
char filename[300];
const char *srcDir=NULL, *destDir=NULL, *suffix=NULL;
const char *srcDir=NULL;
char *basename=NULL;
U_MAIN_INIT_ARGS(argc, argv);
/* preset then read command line options */
options[DESTDIR].value=u_getDataDirectory();
options[SOURCEDIR].value="";
options[ICUDATADIR].value=u_getDataDirectory();
argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
argc=u_parseArgs(argc, argv, LENGTHOF(options), options);
/* error handling, printing usage message */
if(argc<0) {
@ -105,42 +103,35 @@ main(int argc, char* argv[]) {
"error in command line argument \"%s\"\n",
argv[-argc]);
}
if(argc<0 || options[HELP_H].doesOccur || options[HELP_QUESTION_MARK].doesOccur) {
if(argc<2 || options[HELP_H].doesOccur || options[HELP_QUESTION_MARK].doesOccur) {
/*
* Broken into chucks because the C89 standard says the minimum
* Broken into chunks because the C89 standard says the minimum
* required supported string length is 509 bytes.
*/
fprintf(stderr,
"Usage: %s [-options] [suffix]\n"
"Usage: %s [-options] path/to/ICU/src/root\n"
"\n"
"read the UnicodeData.txt file and other Unicode properties files and\n"
"create a binary file " DATA_NAME "." DATA_TYPE " with the character properties\n"
"Reads the preparsed UCD file path/to/ICU/src/root/source/data/unidata/ppucd.txt and\n"
"writes source and binary data files with the character properties.\n"
"(UCD=Unicode Character Database)\n"
"\n",
argv[0]);
fprintf(stderr,
"Options:\n"
"\t-h or -? or --help this usage text\n"
"\t-v or --verbose verbose output\n"
"\t-c or --copyright include a copyright notice\n"
"\t-u or --unicode Unicode version, followed by the version like 3.0.0\n"
"\t-C or --csource generate a .c source file rather than the .icu binary\n");
"\t-c or --copyright include a copyright notice\n");
fprintf(stderr,
"\t-d or --destdir destination directory, followed by the path\n"
"\t-s or --sourcedir source directory, followed by the path\n"
"\t-i or --icudatadir directory for locating any needed intermediate data files,\n"
"\t followed by path, defaults to %s\n"
"\tsuffix suffix that is to be appended with a '-'\n"
"\t to the source file basenames before opening;\n"
"\t 'genprops new' will read UnicodeData-new.txt etc.\n",
"\t followed by path, defaults to %s\n",
u_getDataDirectory());
return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
return argc<2 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
}
/* get the options values */
beVerbose=options[VERBOSE].doesOccur;
haveCopyright=options[COPYRIGHT].doesOccur;
srcDir=options[SOURCEDIR].value;
destDir=options[DESTDIR].value;
/* initialize */
IcuToolErrorCode errorCode("genprops");
@ -151,7 +142,16 @@ main(int argc, char* argv[]) {
return errorCode.reset();
}
CharString ppucdPath(srcDir, errorCode);
CharString icuSrcRoot(argv[1], errorCode);
CharString icuSource(icuSrcRoot, errorCode);
icuSource.appendPathPart("source", errorCode);
CharString icuSourceData(icuSource, errorCode);
icuSourceData.appendPathPart("data", errorCode);
CharString ppucdPath(icuSourceData, errorCode);
ppucdPath.appendPathPart("unidata", errorCode);
ppucdPath.appendPathPart("ppucd.txt", errorCode);
PreparsedUCD ppucd(ppucdPath.data(), errorCode);
@ -178,12 +178,6 @@ main(int argc, char* argv[]) {
}
}
if(argc>=2) {
suffix=argv[1];
} else {
suffix=NULL;
}
if (options[ICUDATADIR].doesOccur) {
u_setDataDirectory(options[ICUDATADIR].value);
}
@ -196,19 +190,32 @@ main(int argc, char* argv[]) {
}
/* process UnicodeData.txt */
writeUCDFilename(basename, "UnicodeData", suffix);
writeUCDFilename(basename, "UnicodeData", NULL);
parseDB(filename, errorCode);
/* process additional properties files */
*basename=0;
generateAdditionalProperties(filename, suffix, errorCode);
generateAdditionalProperties(filename, NULL, errorCode);
/* process parsed data */
if(U_SUCCESS(errorCode)) {
/* write the properties data file */
generateData(destDir, options[CSOURCE].doesOccur);
corePropsWriter->finalizeData(errorCode);
if(errorCode.isFailure()) {
fprintf(stderr, "genprops error: failure finalizing the data - %s\n",
errorCode.errorName());
return errorCode.reset();
}
// Write the files with the generated data.
CharString sourceCommon(icuSource, errorCode);
sourceCommon.appendPathPart("common", errorCode);
CharString sourceDataIn(icuSourceData, errorCode);
sourceDataIn.appendPathPart("in", errorCode);
UBool withCopyright=options[COPYRIGHT].doesOccur;
corePropsWriter->writeCSourceFile(sourceCommon.data(), errorCode);
corePropsWriter->writeBinaryData(sourceDataIn.data(), withCopyright, errorCode);
return errorCode;
}

View File

@ -21,6 +21,7 @@
#include "unicode/uniset.h"
#include "ppucd.h"
#include "propsvec.h"
#include "unewdata.h"
/* file definitions */
#define DATA_NAME "uprops"
@ -31,8 +32,9 @@ public:
virtual ~PropsWriter();
virtual void setUnicodeVersion(const UVersionInfo version);
virtual void setProps(const UniProps &props, const UnicodeSet &newValues, UErrorCode &errorCode);
// virtual writeCSourceFile(icusrcroot);
// virtual writeBinaryData(icusrcroot);
virtual void finalizeData(UErrorCode &errorCode);
virtual void writeCSourceFile(const char *path, UErrorCode &errorCode);
virtual void writeBinaryData(const char *path, UBool withCopyright, UErrorCode &errorCode);
};
PropsWriter *createCorePropsWriter(UErrorCode &errorCode);
@ -47,7 +49,7 @@ typedef struct {
} Props;
/* global flags */
U_CFUNC UBool beVerbose, haveCopyright;
U_CFUNC UBool beVerbose;
U_CFUNC const char *const
genCategoryNames[];
@ -74,13 +76,16 @@ getProps(uint32_t c);
U_CFUNC void
repeatProps(uint32_t first, uint32_t last, uint32_t props);
U_CFUNC void
generateData(const char *dataDir, UBool csource);
U_CFUNC void
generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pErrorCode);
U_CFUNC int32_t
writeAdditionalData(FILE *f, uint8_t *p, int32_t capacity, int32_t indexes[16]);
int32_t
props2FinalizeData(int32_t indexes[], UErrorCode &errorCode);
void
props2AppendToCSourceFile(FILE *f, UErrorCode &errorCode);
void
props2AppendToBinaryFile(UNewDataMemory *pData, UErrorCode &errorCode);
#endif

View File

@ -31,6 +31,7 @@
#include "uparse.h"
#include "writesrc.h"
#include "genprops.h"
#include "unewdata.h"
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
@ -38,10 +39,10 @@ U_NAMESPACE_USE
/* data --------------------------------------------------------------------- */
static UTrie2 *newTrie;
static UPropsVectors *pv;
static UTrie2 *newTrie=NULL;
static UPropsVectors *pv=NULL;
static UnicodeString *scriptExtensions;
static UnicodeString *scriptExtensions=NULL;
/* miscellaneous ------------------------------------------------------------ */
@ -96,25 +97,6 @@ numericLineFn(void *context,
/* -------------------------------------------------------------------------- */
static void
initAdditionalProperties() {
UErrorCode errorCode=U_ZERO_ERROR;
pv=upvec_open(UPROPS_VECTOR_WORDS, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "genprops error: props2writer upvec_open() failed - %s\n",
u_errorName(errorCode));
exit(errorCode);
}
scriptExtensions=new UnicodeString;
}
static void
exitAdditionalProperties() {
utrie2_close(newTrie);
upvec_close(pv);
delete scriptExtensions;
}
U_CFUNC void
generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pErrorCode) {
char *basename;
@ -127,27 +109,6 @@ generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pEr
parseMultiFieldFile(filename, basename, "DerivedNumericValues", suffix, 2, numericLineFn, pErrorCode);
parseTwoFieldFile(filename, basename, "ScriptExtensions", suffix, scriptExtensionsLineFn, pErrorCode);
newTrie=upvec_compactToUTrie2WithRowIndexes(pv, pErrorCode);
// TODO: remove
#if 0
const uint32_t *pvArray;
int32_t pvRows;
pvArray=upvec_getArray(pv, &pvRows, NULL);
for(int32_t c=0; c<=0x10ffff; ++c) {
uint16_t ri=utrie2_get32(newTrie, c);
uint32_t v2=pvArray[ri+2];
int32_t dt=v2&UPROPS_DT_MASK;
if(dt!=0) {
printf("%04x %d\n", c, dt);
}
}
#endif
if(U_FAILURE(*pErrorCode)) {
fprintf(stderr, "genprops error: unable to build trie for additional properties: %s\n",
u_errorName(*pErrorCode));
exit(*pErrorCode);
}
}
/* ScriptExtensions.txt ----------------------------------------------------- */
@ -418,107 +379,29 @@ numericLineFn(void *context,
}
}
/* data serialization ------------------------------------------------------- */
U_CFUNC int32_t
writeAdditionalData(FILE *f, uint8_t *p, int32_t capacity, int32_t indexes[UPROPS_INDEX_COUNT]) {
const uint32_t *pvArray;
int32_t pvRows, pvCount;
int32_t length;
UErrorCode errorCode;
pvArray=upvec_getArray(pv, &pvRows, NULL);
pvCount=pvRows*UPROPS_VECTOR_WORDS;
errorCode=U_ZERO_ERROR;
length=utrie2_serialize(newTrie, p, capacity, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr,
"genprops error: utrie2_freeze(additional properties)+utrie2_serialize() failed: %s\n",
u_errorName(errorCode));
exit(errorCode);
}
/* round up scriptExtensions to multiple of 4 bytes */
if(scriptExtensions->length()&1) {
scriptExtensions->append((UChar)0);
}
/* set indexes */
indexes[UPROPS_ADDITIONAL_VECTORS_INDEX]=
indexes[UPROPS_ADDITIONAL_TRIE_INDEX]+length/4;
indexes[UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX]=UPROPS_VECTOR_WORDS;
indexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]=
indexes[UPROPS_ADDITIONAL_VECTORS_INDEX]+pvCount;
indexes[UPROPS_RESERVED_INDEX_7]=
indexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]+scriptExtensions->length()/2;
indexes[UPROPS_RESERVED_INDEX_8]=indexes[UPROPS_RESERVED_INDEX_7];
indexes[UPROPS_DATA_TOP_INDEX]=indexes[UPROPS_RESERVED_INDEX_8];
indexes[UPROPS_MAX_VALUES_INDEX]=
(((int32_t)U_EA_COUNT-1)<<UPROPS_EA_SHIFT)|
(((int32_t)UBLOCK_COUNT-1)<<UPROPS_BLOCK_SHIFT)|
(((int32_t)USCRIPT_CODE_LIMIT-1)&UPROPS_SCRIPT_MASK);
indexes[UPROPS_MAX_VALUES_2_INDEX]=
(((int32_t)U_LB_COUNT-1)<<UPROPS_LB_SHIFT)|
(((int32_t)U_SB_COUNT-1)<<UPROPS_SB_SHIFT)|
(((int32_t)U_WB_COUNT-1)<<UPROPS_WB_SHIFT)|
(((int32_t)U_GCB_COUNT-1)<<UPROPS_GCB_SHIFT)|
((int32_t)U_DT_COUNT-1);
int32_t additionalPropsSize=4*(indexes[UPROPS_DATA_TOP_INDEX]-indexes[UPROPS_ADDITIONAL_TRIE_INDEX]);
if(p!=NULL && additionalPropsSize<=capacity) {
if(beVerbose) {
printf("size in bytes of additional props trie:%5u\n", (int)length);
}
if(f!=NULL) {
usrc_writeUTrie2Arrays(f,
"static const uint16_t propsVectorsTrie_index[%ld]={\n", NULL,
newTrie,
"\n};\n\n");
usrc_writeUTrie2Struct(f,
"static const UTrie2 propsVectorsTrie={\n",
newTrie, "propsVectorsTrie_index", NULL,
"};\n\n");
usrc_writeArray(f,
"static const uint32_t propsVectors[%ld]={\n",
pvArray, 32, pvCount,
"};\n\n");
fprintf(f, "static const int32_t countPropsVectors=%ld;\n", (long)pvCount);
fprintf(f, "static const int32_t propsVectorsColumns=%ld;\n", (long)indexes[UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX]);
usrc_writeArray(f,
"static const uint16_t scriptExtensions[%ld]={\n",
scriptExtensions->getBuffer(), 16, scriptExtensions->length(),
"};\n\n");
} else {
p+=length;
length=pvCount*4;
uprv_memcpy(p, pvArray, length);
p+=length;
length=scriptExtensions->length()*2;
uprv_memcpy(p, scriptExtensions->getBuffer(), length);
}
if(beVerbose) {
printf("number of additional props vectors: %5u\n", (int)pvRows);
printf("number of 32-bit words per vector: %5u\n", UPROPS_VECTOR_WORDS);
printf("number of 16-bit scriptExtensions: %5u\n", (int)scriptExtensions->length());
}
}
return additionalPropsSize;
}
class Props2Writer : public PropsWriter {
public:
Props2Writer() { initAdditionalProperties(); }
virtual ~Props2Writer() { exitAdditionalProperties(); }
Props2Writer(UErrorCode &errorCode);
virtual ~Props2Writer();
virtual void setProps(const UniProps &, const UnicodeSet &newValues, UErrorCode &errorCode);
};
Props2Writer::Props2Writer(UErrorCode &errorCode) {
pv=upvec_open(UPROPS_VECTOR_WORDS, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "genprops error: props2writer upvec_open() failed - %s\n",
u_errorName(errorCode));
}
scriptExtensions=new UnicodeString;
}
Props2Writer::~Props2Writer() {
utrie2_close(newTrie);
upvec_close(pv);
delete scriptExtensions;
}
struct PropToBinary {
int32_t prop; // UProperty
int32_t vecWord, vecShift;
@ -642,10 +525,127 @@ Props2Writer::setProps(const UniProps &props, const UnicodeSet &newValues, UErro
}
}
static uint8_t trieBlock[100000];
static int32_t trieSize;
int32_t
props2FinalizeData(int32_t indexes[UPROPS_INDEX_COUNT], UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return 0; }
newTrie=upvec_compactToUTrie2WithRowIndexes(pv, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "genprops error: unable to build trie for additional properties: %s\n",
u_errorName(errorCode));
return 0;
}
trieSize=utrie2_serialize(newTrie, trieBlock, (int32_t)sizeof(trieBlock), &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr,
"genprops error: utrie2_freeze(additional properties)+utrie2_serialize() failed: %s\n",
u_errorName(errorCode));
return 0;
}
int32_t pvRows;
const uint32_t *pvArray=upvec_getArray(pv, &pvRows, NULL);
int32_t pvCount=pvRows*UPROPS_VECTOR_WORDS;
// TODO: remove
#if 0
for(int32_t c=0; c<=0x10ffff; ++c) {
uint16_t ri=utrie2_get32(newTrie, c);
uint32_t v2=pvArray[ri+2];
int32_t dt=v2&UPROPS_DT_MASK;
if(dt!=0) {
printf("%04x %d\n", c, dt);
}
}
#endif
/* round up scriptExtensions to multiple of 4 bytes */
if(scriptExtensions->length()&1) {
scriptExtensions->append((UChar)0);
}
/* set indexes */
indexes[UPROPS_ADDITIONAL_VECTORS_INDEX]=
indexes[UPROPS_ADDITIONAL_TRIE_INDEX]+trieSize/4;
indexes[UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX]=UPROPS_VECTOR_WORDS;
indexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]=
indexes[UPROPS_ADDITIONAL_VECTORS_INDEX]+pvCount;
indexes[UPROPS_RESERVED_INDEX_7]=
indexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]+scriptExtensions->length()/2;
indexes[UPROPS_RESERVED_INDEX_8]=indexes[UPROPS_RESERVED_INDEX_7];
indexes[UPROPS_DATA_TOP_INDEX]=indexes[UPROPS_RESERVED_INDEX_8];
indexes[UPROPS_MAX_VALUES_INDEX]=
(((int32_t)U_EA_COUNT-1)<<UPROPS_EA_SHIFT)|
(((int32_t)UBLOCK_COUNT-1)<<UPROPS_BLOCK_SHIFT)|
(((int32_t)USCRIPT_CODE_LIMIT-1)&UPROPS_SCRIPT_MASK);
indexes[UPROPS_MAX_VALUES_2_INDEX]=
(((int32_t)U_LB_COUNT-1)<<UPROPS_LB_SHIFT)|
(((int32_t)U_SB_COUNT-1)<<UPROPS_SB_SHIFT)|
(((int32_t)U_WB_COUNT-1)<<UPROPS_WB_SHIFT)|
(((int32_t)U_GCB_COUNT-1)<<UPROPS_GCB_SHIFT)|
((int32_t)U_DT_COUNT-1);
if(beVerbose) {
printf("size in bytes of additional props trie:%5u\n", (int)trieSize);
printf("number of additional props vectors: %5u\n", (int)pvRows);
printf("number of 32-bit words per vector: %5u\n", UPROPS_VECTOR_WORDS);
printf("number of 16-bit scriptExtensions: %5u\n", (int)scriptExtensions->length());
}
return 4*(indexes[UPROPS_DATA_TOP_INDEX]-indexes[UPROPS_ADDITIONAL_TRIE_INDEX]);
}
void
props2AppendToCSourceFile(FILE *f, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return; }
int32_t pvRows;
const uint32_t *pvArray=upvec_getArray(pv, &pvRows, NULL);
int32_t pvCount=pvRows*UPROPS_VECTOR_WORDS;
usrc_writeUTrie2Arrays(f,
"static const uint16_t propsVectorsTrie_index[%ld]={\n", NULL,
newTrie,
"\n};\n\n");
usrc_writeUTrie2Struct(f,
"static const UTrie2 propsVectorsTrie={\n",
newTrie, "propsVectorsTrie_index", NULL,
"};\n\n");
usrc_writeArray(f,
"static const uint32_t propsVectors[%ld]={\n",
pvArray, 32, pvCount,
"};\n\n");
fprintf(f, "static const int32_t countPropsVectors=%ld;\n", (long)pvCount);
fprintf(f, "static const int32_t propsVectorsColumns=%ld;\n", (long)UPROPS_VECTOR_WORDS);
usrc_writeArray(f,
"static const uint16_t scriptExtensions[%ld]={\n",
scriptExtensions->getBuffer(), 16, scriptExtensions->length(),
"};\n\n");
}
void
props2AppendToBinaryFile(UNewDataMemory *pData, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return; }
int32_t pvRows;
const uint32_t *pvArray=upvec_getArray(pv, &pvRows, NULL);
int32_t pvCount=pvRows*UPROPS_VECTOR_WORDS;
udata_writeBlock(pData, trieBlock, trieSize);
udata_writeBlock(pData, pvArray, pvCount*4);
udata_writeBlock(pData, scriptExtensions->getBuffer(), scriptExtensions->length()*2);
}
PropsWriter *
createProps2Writer(UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return NULL; }
PropsWriter *pw=new Props2Writer();
PropsWriter *pw=new Props2Writer(errorCode);
if(pw==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
}