ICU-8972 genprops: remove --csource option (always write both C & binary), use argv[1]=path/to/ICU/src/root instead of --destdir; separate finalizing vs. writing data
X-SVN-Rev: 31144
This commit is contained in:
parent
2cac672e6e
commit
764caf4347
@ -249,24 +249,6 @@ static UDataInfo dataInfo={
|
||||
|
||||
static UTrie2 *pTrie=NULL;
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
static void
|
||||
initStore() {
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
pTrie=utrie2_open(0, 0, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr, "genprops error: corepropswriter utrie2_open() failed - %s\n",
|
||||
u_errorName(errorCode));
|
||||
exit(errorCode);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
exitStore() {
|
||||
utrie2_close(pTrie);
|
||||
}
|
||||
|
||||
/* store a character's properties ------------------------------------------- */
|
||||
|
||||
U_CFUNC uint32_t
|
||||
@ -372,131 +354,30 @@ repeatProps(uint32_t first, uint32_t last, uint32_t x) {
|
||||
}
|
||||
}
|
||||
|
||||
/* generate output data ----------------------------------------------------- */
|
||||
|
||||
U_CFUNC void
|
||||
generateData(const char *dataDir, UBool csource) {
|
||||
static int32_t indexes[UPROPS_INDEX_COUNT]={
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0
|
||||
};
|
||||
static uint8_t trieBlock[40000];
|
||||
static uint8_t additionalProps[120000];
|
||||
|
||||
UNewDataMemory *pData;
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
uint32_t size = 0;
|
||||
int32_t trieSize, additionalPropsSize, offset;
|
||||
long dataLength;
|
||||
|
||||
utrie2_freeze(pTrie, UTRIE2_16_VALUE_BITS, &errorCode);
|
||||
trieSize=utrie2_serialize(pTrie, trieBlock, sizeof(trieBlock), &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr, "error: utrie2_freeze(main trie)+utrie2_serialize() failed: %s (length %ld)\n",
|
||||
u_errorName(errorCode), (long)trieSize);
|
||||
exit(errorCode);
|
||||
}
|
||||
|
||||
offset=sizeof(indexes)/4; /* uint32_t offset to the properties trie */
|
||||
|
||||
/* round up trie size to 4-alignment */
|
||||
while(trieSize&3) {
|
||||
trieBlock[trieSize++]=0;
|
||||
}
|
||||
offset+=trieSize>>2;
|
||||
indexes[UPROPS_PROPS32_INDEX]= /* set indexes to the same offsets for empty */
|
||||
indexes[UPROPS_EXCEPTIONS_INDEX]= /* structures from the old format version 3 */
|
||||
indexes[UPROPS_EXCEPTIONS_TOP_INDEX]= /* so that less runtime code has to be changed */
|
||||
indexes[UPROPS_ADDITIONAL_TRIE_INDEX]=offset;
|
||||
|
||||
if(beVerbose) {
|
||||
printf("trie size in bytes: %5u\n", (int)trieSize);
|
||||
}
|
||||
|
||||
if(csource) {
|
||||
/* write .c file for hardcoded data */
|
||||
FILE *f=usrc_createFromGenerator(dataDir, "uchar_props_data.h",
|
||||
"icu/tools/src/unicode/c/genprops/corepropswriter.cpp");
|
||||
if(f!=NULL) {
|
||||
fputs("#ifndef INCLUDED_FROM_UCHAR_C\n"
|
||||
"# error This file must be #included from uchar.c only.\n"
|
||||
"#endif\n\n", f);
|
||||
/* unused
|
||||
usrc_writeArray(f,
|
||||
"static const UVersionInfo formatVersion={",
|
||||
dataInfo.formatVersion, 8, 4,
|
||||
"};\n\n");
|
||||
*/
|
||||
usrc_writeArray(f,
|
||||
"static const UVersionInfo dataVersion={",
|
||||
dataInfo.dataVersion, 8, 4,
|
||||
"};\n\n");
|
||||
usrc_writeUTrie2Arrays(f,
|
||||
"static const uint16_t propsTrie_index[%ld]={\n", NULL,
|
||||
pTrie,
|
||||
"\n};\n\n");
|
||||
usrc_writeUTrie2Struct(f,
|
||||
"static const UTrie2 propsTrie={\n",
|
||||
pTrie, "propsTrie_index", NULL,
|
||||
"};\n\n");
|
||||
|
||||
additionalPropsSize=writeAdditionalData(f, additionalProps, sizeof(additionalProps), indexes);
|
||||
size=4*offset+additionalPropsSize; /* total size of data */
|
||||
|
||||
usrc_writeArray(f,
|
||||
"static const int32_t indexes[UPROPS_INDEX_COUNT]={",
|
||||
indexes, 32, UPROPS_INDEX_COUNT,
|
||||
"};\n\n");
|
||||
fclose(f);
|
||||
}
|
||||
} else {
|
||||
/* write the data */
|
||||
pData=udata_create(dataDir, DATA_TYPE, DATA_NAME, &dataInfo,
|
||||
haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr, "genprops: udata_create(%s, %s.%s) failed - %s\n",
|
||||
dataDir, DATA_NAME, DATA_TYPE,
|
||||
u_errorName(errorCode));
|
||||
exit(errorCode);
|
||||
}
|
||||
|
||||
additionalPropsSize=writeAdditionalData(NULL, additionalProps, sizeof(additionalProps), indexes);
|
||||
size=4*offset+additionalPropsSize; /* total size of data */
|
||||
|
||||
udata_writeBlock(pData, indexes, sizeof(indexes));
|
||||
udata_writeBlock(pData, trieBlock, trieSize);
|
||||
udata_writeBlock(pData, additionalProps, additionalPropsSize);
|
||||
|
||||
/* finish up */
|
||||
dataLength=udata_finish(pData, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr, "genprops: error %d writing the output file\n", errorCode);
|
||||
exit(errorCode);
|
||||
}
|
||||
|
||||
if(dataLength!=(long)size) {
|
||||
fprintf(stderr, "genprops: data length %ld != calculated size %lu\n",
|
||||
dataLength, (unsigned long)size);
|
||||
exit(U_INTERNAL_PROGRAM_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
if(beVerbose) {
|
||||
printf("data size: %6lu\n", (unsigned long)size);
|
||||
}
|
||||
}
|
||||
|
||||
class CorePropsWriter : public PropsWriter {
|
||||
public:
|
||||
CorePropsWriter() { initStore(); }
|
||||
virtual ~CorePropsWriter() { exitStore(); }
|
||||
CorePropsWriter(UErrorCode &errorCode);
|
||||
virtual ~CorePropsWriter();
|
||||
|
||||
virtual void setUnicodeVersion(const UVersionInfo version);
|
||||
virtual void setProps(const UniProps &, const UnicodeSet &newValues, UErrorCode &errorCode);
|
||||
virtual void finalizeData(UErrorCode &errorCode);
|
||||
virtual void writeCSourceFile(const char *path, UErrorCode &errorCode);
|
||||
virtual void writeBinaryData(const char *path, UBool withCopyright, UErrorCode &errorCode);
|
||||
};
|
||||
|
||||
CorePropsWriter::CorePropsWriter(UErrorCode &errorCode) {
|
||||
pTrie=utrie2_open(0, 0, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr, "genprops error: corepropswriter utrie2_open() failed - %s\n",
|
||||
u_errorName(errorCode));
|
||||
}
|
||||
}
|
||||
|
||||
CorePropsWriter::~CorePropsWriter() {
|
||||
utrie2_close(pTrie);
|
||||
}
|
||||
|
||||
void
|
||||
CorePropsWriter::setUnicodeVersion(const UVersionInfo version) {
|
||||
uprv_memcpy(dataInfo.dataVersion, version, 4);
|
||||
@ -506,10 +387,115 @@ void
|
||||
CorePropsWriter::setProps(const UniProps &props, const UnicodeSet &newValues, UErrorCode &errorCode) {
|
||||
}
|
||||
|
||||
static int32_t indexes[UPROPS_INDEX_COUNT]={
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0
|
||||
};
|
||||
|
||||
static uint8_t trieBlock[40000];
|
||||
static int32_t trieSize;
|
||||
static int32_t totalSize;
|
||||
|
||||
void
|
||||
CorePropsWriter::finalizeData(UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) { return; }
|
||||
|
||||
utrie2_freeze(pTrie, UTRIE2_16_VALUE_BITS, &errorCode);
|
||||
trieSize=utrie2_serialize(pTrie, trieBlock, sizeof(trieBlock), &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr, "genprops error: utrie2_freeze(main trie)+utrie2_serialize() failed: %s (length %ld)\n",
|
||||
u_errorName(errorCode), (long)trieSize);
|
||||
return;
|
||||
}
|
||||
|
||||
int32_t offset=sizeof(indexes)/4; /* uint32_t offset to the properties trie */
|
||||
offset+=trieSize>>2;
|
||||
indexes[UPROPS_PROPS32_INDEX]= /* set indexes to the same offsets for empty */
|
||||
indexes[UPROPS_EXCEPTIONS_INDEX]= /* structures from the old format version 3 */
|
||||
indexes[UPROPS_EXCEPTIONS_TOP_INDEX]= /* so that less runtime code has to be changed */
|
||||
indexes[UPROPS_ADDITIONAL_TRIE_INDEX]=offset;
|
||||
|
||||
if(beVerbose) {
|
||||
printf("trie size in bytes: %5u\n", (int)trieSize);
|
||||
}
|
||||
|
||||
totalSize=4*offset+props2FinalizeData(indexes, errorCode);
|
||||
|
||||
if(beVerbose) {
|
||||
printf("data size: %6ld\n", (long)totalSize);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
CorePropsWriter::writeCSourceFile(const char *path, UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) { return; }
|
||||
|
||||
FILE *f=usrc_createFromGenerator(path, "uchar_props_data.h",
|
||||
"icu/tools/src/unicode/c/genprops/corepropswriter.cpp");
|
||||
if(f==NULL) {
|
||||
errorCode=U_FILE_ACCESS_ERROR;
|
||||
return;
|
||||
}
|
||||
fputs("#ifndef INCLUDED_FROM_UCHAR_C\n"
|
||||
"# error This file must be #included from uchar.c only.\n"
|
||||
"#endif\n\n", f);
|
||||
usrc_writeArray(f,
|
||||
"static const UVersionInfo dataVersion={",
|
||||
dataInfo.dataVersion, 8, 4,
|
||||
"};\n\n");
|
||||
usrc_writeUTrie2Arrays(f,
|
||||
"static const uint16_t propsTrie_index[%ld]={\n", NULL,
|
||||
pTrie,
|
||||
"\n};\n\n");
|
||||
usrc_writeUTrie2Struct(f,
|
||||
"static const UTrie2 propsTrie={\n",
|
||||
pTrie, "propsTrie_index", NULL,
|
||||
"};\n\n");
|
||||
|
||||
props2AppendToCSourceFile(f, errorCode);
|
||||
|
||||
usrc_writeArray(f,
|
||||
"static const int32_t indexes[UPROPS_INDEX_COUNT]={",
|
||||
indexes, 32, UPROPS_INDEX_COUNT,
|
||||
"};\n\n");
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
void
|
||||
CorePropsWriter::writeBinaryData(const char *path, UBool withCopyright, UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) { return; }
|
||||
|
||||
UNewDataMemory *pData=udata_create(path, "icu", "uprops", &dataInfo,
|
||||
withCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr, "genprops: udata_create(%s, uprops.icu) failed - %s\n",
|
||||
path, u_errorName(errorCode));
|
||||
return;
|
||||
}
|
||||
|
||||
udata_writeBlock(pData, indexes, sizeof(indexes));
|
||||
udata_writeBlock(pData, trieBlock, trieSize);
|
||||
props2AppendToBinaryFile(pData, errorCode);
|
||||
|
||||
long dataLength=udata_finish(pData, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr, "genprops: error %s writing the output file\n", u_errorName(errorCode));
|
||||
return;
|
||||
}
|
||||
|
||||
if(dataLength!=(long)totalSize) {
|
||||
fprintf(stderr, "genprops: data length %ld != calculated size %ld\n",
|
||||
dataLength, (long)totalSize);
|
||||
errorCode=U_INTERNAL_PROGRAM_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
PropsWriter *
|
||||
createCorePropsWriter(UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) { return NULL; }
|
||||
PropsWriter *pw=new CorePropsWriter();
|
||||
PropsWriter *pw=new CorePropsWriter(errorCode);
|
||||
if(pw==NULL) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
|
@ -41,18 +41,21 @@
|
||||
#include "uparse.h"
|
||||
#include "uprops.h"
|
||||
|
||||
#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
|
||||
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
|
||||
|
||||
// TODO: remove
|
||||
#define USE_NEW 1
|
||||
|
||||
U_NAMESPACE_USE
|
||||
|
||||
UBool beVerbose=FALSE, haveCopyright=TRUE;
|
||||
UBool beVerbose=FALSE;
|
||||
|
||||
PropsWriter::~PropsWriter() {}
|
||||
void PropsWriter::setUnicodeVersion(const UVersionInfo version) {}
|
||||
void PropsWriter::setUnicodeVersion(const UVersionInfo) {}
|
||||
void PropsWriter::setProps(const UniProps &, const UnicodeSet &, UErrorCode &) {}
|
||||
void PropsWriter::finalizeData(UErrorCode &) {}
|
||||
void PropsWriter::writeCSourceFile(const char *, UErrorCode &) {}
|
||||
void PropsWriter::writeBinaryData(const char *, UBool, UErrorCode &) {}
|
||||
|
||||
/* prototypes --------------------------------------------------------------- */
|
||||
|
||||
@ -67,10 +70,8 @@ enum
|
||||
HELP_QUESTION_MARK,
|
||||
VERBOSE,
|
||||
COPYRIGHT,
|
||||
DESTDIR,
|
||||
SOURCEDIR,
|
||||
ICUDATADIR,
|
||||
CSOURCE
|
||||
ICUDATADIR
|
||||
};
|
||||
|
||||
/* Keep these values in sync with the above enums */
|
||||
@ -79,25 +80,22 @@ static UOption options[]={
|
||||
UOPTION_HELP_QUESTION_MARK,
|
||||
UOPTION_VERBOSE,
|
||||
UOPTION_COPYRIGHT,
|
||||
UOPTION_DESTDIR,
|
||||
UOPTION_SOURCEDIR,
|
||||
UOPTION_ICUDATADIR,
|
||||
UOPTION_DEF("csource", 'C', UOPT_NO_ARG)
|
||||
UOPTION_ICUDATADIR
|
||||
};
|
||||
|
||||
extern int
|
||||
main(int argc, char* argv[]) {
|
||||
char filename[300];
|
||||
const char *srcDir=NULL, *destDir=NULL, *suffix=NULL;
|
||||
const char *srcDir=NULL;
|
||||
char *basename=NULL;
|
||||
|
||||
U_MAIN_INIT_ARGS(argc, argv);
|
||||
|
||||
/* preset then read command line options */
|
||||
options[DESTDIR].value=u_getDataDirectory();
|
||||
options[SOURCEDIR].value="";
|
||||
options[ICUDATADIR].value=u_getDataDirectory();
|
||||
argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
|
||||
argc=u_parseArgs(argc, argv, LENGTHOF(options), options);
|
||||
|
||||
/* error handling, printing usage message */
|
||||
if(argc<0) {
|
||||
@ -105,42 +103,35 @@ main(int argc, char* argv[]) {
|
||||
"error in command line argument \"%s\"\n",
|
||||
argv[-argc]);
|
||||
}
|
||||
if(argc<0 || options[HELP_H].doesOccur || options[HELP_QUESTION_MARK].doesOccur) {
|
||||
if(argc<2 || options[HELP_H].doesOccur || options[HELP_QUESTION_MARK].doesOccur) {
|
||||
/*
|
||||
* Broken into chucks because the C89 standard says the minimum
|
||||
* Broken into chunks because the C89 standard says the minimum
|
||||
* required supported string length is 509 bytes.
|
||||
*/
|
||||
fprintf(stderr,
|
||||
"Usage: %s [-options] [suffix]\n"
|
||||
"Usage: %s [-options] path/to/ICU/src/root\n"
|
||||
"\n"
|
||||
"read the UnicodeData.txt file and other Unicode properties files and\n"
|
||||
"create a binary file " DATA_NAME "." DATA_TYPE " with the character properties\n"
|
||||
"Reads the preparsed UCD file path/to/ICU/src/root/source/data/unidata/ppucd.txt and\n"
|
||||
"writes source and binary data files with the character properties.\n"
|
||||
"(UCD=Unicode Character Database)\n"
|
||||
"\n",
|
||||
argv[0]);
|
||||
fprintf(stderr,
|
||||
"Options:\n"
|
||||
"\t-h or -? or --help this usage text\n"
|
||||
"\t-v or --verbose verbose output\n"
|
||||
"\t-c or --copyright include a copyright notice\n"
|
||||
"\t-u or --unicode Unicode version, followed by the version like 3.0.0\n"
|
||||
"\t-C or --csource generate a .c source file rather than the .icu binary\n");
|
||||
"\t-c or --copyright include a copyright notice\n");
|
||||
fprintf(stderr,
|
||||
"\t-d or --destdir destination directory, followed by the path\n"
|
||||
"\t-s or --sourcedir source directory, followed by the path\n"
|
||||
"\t-i or --icudatadir directory for locating any needed intermediate data files,\n"
|
||||
"\t followed by path, defaults to %s\n"
|
||||
"\tsuffix suffix that is to be appended with a '-'\n"
|
||||
"\t to the source file basenames before opening;\n"
|
||||
"\t 'genprops new' will read UnicodeData-new.txt etc.\n",
|
||||
"\t followed by path, defaults to %s\n",
|
||||
u_getDataDirectory());
|
||||
return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
|
||||
return argc<2 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
|
||||
}
|
||||
|
||||
/* get the options values */
|
||||
beVerbose=options[VERBOSE].doesOccur;
|
||||
haveCopyright=options[COPYRIGHT].doesOccur;
|
||||
srcDir=options[SOURCEDIR].value;
|
||||
destDir=options[DESTDIR].value;
|
||||
|
||||
/* initialize */
|
||||
IcuToolErrorCode errorCode("genprops");
|
||||
@ -151,7 +142,16 @@ main(int argc, char* argv[]) {
|
||||
return errorCode.reset();
|
||||
}
|
||||
|
||||
CharString ppucdPath(srcDir, errorCode);
|
||||
CharString icuSrcRoot(argv[1], errorCode);
|
||||
|
||||
CharString icuSource(icuSrcRoot, errorCode);
|
||||
icuSource.appendPathPart("source", errorCode);
|
||||
|
||||
CharString icuSourceData(icuSource, errorCode);
|
||||
icuSourceData.appendPathPart("data", errorCode);
|
||||
|
||||
CharString ppucdPath(icuSourceData, errorCode);
|
||||
ppucdPath.appendPathPart("unidata", errorCode);
|
||||
ppucdPath.appendPathPart("ppucd.txt", errorCode);
|
||||
|
||||
PreparsedUCD ppucd(ppucdPath.data(), errorCode);
|
||||
@ -178,12 +178,6 @@ main(int argc, char* argv[]) {
|
||||
}
|
||||
}
|
||||
|
||||
if(argc>=2) {
|
||||
suffix=argv[1];
|
||||
} else {
|
||||
suffix=NULL;
|
||||
}
|
||||
|
||||
if (options[ICUDATADIR].doesOccur) {
|
||||
u_setDataDirectory(options[ICUDATADIR].value);
|
||||
}
|
||||
@ -196,19 +190,32 @@ main(int argc, char* argv[]) {
|
||||
}
|
||||
|
||||
/* process UnicodeData.txt */
|
||||
writeUCDFilename(basename, "UnicodeData", suffix);
|
||||
writeUCDFilename(basename, "UnicodeData", NULL);
|
||||
parseDB(filename, errorCode);
|
||||
|
||||
/* process additional properties files */
|
||||
*basename=0;
|
||||
generateAdditionalProperties(filename, suffix, errorCode);
|
||||
generateAdditionalProperties(filename, NULL, errorCode);
|
||||
|
||||
/* process parsed data */
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
/* write the properties data file */
|
||||
generateData(destDir, options[CSOURCE].doesOccur);
|
||||
corePropsWriter->finalizeData(errorCode);
|
||||
if(errorCode.isFailure()) {
|
||||
fprintf(stderr, "genprops error: failure finalizing the data - %s\n",
|
||||
errorCode.errorName());
|
||||
return errorCode.reset();
|
||||
}
|
||||
|
||||
// Write the files with the generated data.
|
||||
CharString sourceCommon(icuSource, errorCode);
|
||||
sourceCommon.appendPathPart("common", errorCode);
|
||||
|
||||
CharString sourceDataIn(icuSourceData, errorCode);
|
||||
sourceDataIn.appendPathPart("in", errorCode);
|
||||
|
||||
UBool withCopyright=options[COPYRIGHT].doesOccur;
|
||||
|
||||
corePropsWriter->writeCSourceFile(sourceCommon.data(), errorCode);
|
||||
corePropsWriter->writeBinaryData(sourceDataIn.data(), withCopyright, errorCode);
|
||||
|
||||
return errorCode;
|
||||
}
|
||||
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "unicode/uniset.h"
|
||||
#include "ppucd.h"
|
||||
#include "propsvec.h"
|
||||
#include "unewdata.h"
|
||||
|
||||
/* file definitions */
|
||||
#define DATA_NAME "uprops"
|
||||
@ -31,8 +32,9 @@ public:
|
||||
virtual ~PropsWriter();
|
||||
virtual void setUnicodeVersion(const UVersionInfo version);
|
||||
virtual void setProps(const UniProps &props, const UnicodeSet &newValues, UErrorCode &errorCode);
|
||||
// virtual writeCSourceFile(icusrcroot);
|
||||
// virtual writeBinaryData(icusrcroot);
|
||||
virtual void finalizeData(UErrorCode &errorCode);
|
||||
virtual void writeCSourceFile(const char *path, UErrorCode &errorCode);
|
||||
virtual void writeBinaryData(const char *path, UBool withCopyright, UErrorCode &errorCode);
|
||||
};
|
||||
|
||||
PropsWriter *createCorePropsWriter(UErrorCode &errorCode);
|
||||
@ -47,7 +49,7 @@ typedef struct {
|
||||
} Props;
|
||||
|
||||
/* global flags */
|
||||
U_CFUNC UBool beVerbose, haveCopyright;
|
||||
U_CFUNC UBool beVerbose;
|
||||
|
||||
U_CFUNC const char *const
|
||||
genCategoryNames[];
|
||||
@ -74,13 +76,16 @@ getProps(uint32_t c);
|
||||
U_CFUNC void
|
||||
repeatProps(uint32_t first, uint32_t last, uint32_t props);
|
||||
|
||||
U_CFUNC void
|
||||
generateData(const char *dataDir, UBool csource);
|
||||
|
||||
U_CFUNC void
|
||||
generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pErrorCode);
|
||||
|
||||
U_CFUNC int32_t
|
||||
writeAdditionalData(FILE *f, uint8_t *p, int32_t capacity, int32_t indexes[16]);
|
||||
int32_t
|
||||
props2FinalizeData(int32_t indexes[], UErrorCode &errorCode);
|
||||
|
||||
void
|
||||
props2AppendToCSourceFile(FILE *f, UErrorCode &errorCode);
|
||||
|
||||
void
|
||||
props2AppendToBinaryFile(UNewDataMemory *pData, UErrorCode &errorCode);
|
||||
|
||||
#endif
|
||||
|
@ -31,6 +31,7 @@
|
||||
#include "uparse.h"
|
||||
#include "writesrc.h"
|
||||
#include "genprops.h"
|
||||
#include "unewdata.h"
|
||||
|
||||
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
|
||||
|
||||
@ -38,10 +39,10 @@ U_NAMESPACE_USE
|
||||
|
||||
/* data --------------------------------------------------------------------- */
|
||||
|
||||
static UTrie2 *newTrie;
|
||||
static UPropsVectors *pv;
|
||||
static UTrie2 *newTrie=NULL;
|
||||
static UPropsVectors *pv=NULL;
|
||||
|
||||
static UnicodeString *scriptExtensions;
|
||||
static UnicodeString *scriptExtensions=NULL;
|
||||
|
||||
/* miscellaneous ------------------------------------------------------------ */
|
||||
|
||||
@ -96,25 +97,6 @@ numericLineFn(void *context,
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
static void
|
||||
initAdditionalProperties() {
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
pv=upvec_open(UPROPS_VECTOR_WORDS, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr, "genprops error: props2writer upvec_open() failed - %s\n",
|
||||
u_errorName(errorCode));
|
||||
exit(errorCode);
|
||||
}
|
||||
scriptExtensions=new UnicodeString;
|
||||
}
|
||||
|
||||
static void
|
||||
exitAdditionalProperties() {
|
||||
utrie2_close(newTrie);
|
||||
upvec_close(pv);
|
||||
delete scriptExtensions;
|
||||
}
|
||||
|
||||
U_CFUNC void
|
||||
generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pErrorCode) {
|
||||
char *basename;
|
||||
@ -127,27 +109,6 @@ generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pEr
|
||||
parseMultiFieldFile(filename, basename, "DerivedNumericValues", suffix, 2, numericLineFn, pErrorCode);
|
||||
|
||||
parseTwoFieldFile(filename, basename, "ScriptExtensions", suffix, scriptExtensionsLineFn, pErrorCode);
|
||||
|
||||
newTrie=upvec_compactToUTrie2WithRowIndexes(pv, pErrorCode);
|
||||
// TODO: remove
|
||||
#if 0
|
||||
const uint32_t *pvArray;
|
||||
int32_t pvRows;
|
||||
pvArray=upvec_getArray(pv, &pvRows, NULL);
|
||||
for(int32_t c=0; c<=0x10ffff; ++c) {
|
||||
uint16_t ri=utrie2_get32(newTrie, c);
|
||||
uint32_t v2=pvArray[ri+2];
|
||||
int32_t dt=v2&UPROPS_DT_MASK;
|
||||
if(dt!=0) {
|
||||
printf("%04x %d\n", c, dt);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
fprintf(stderr, "genprops error: unable to build trie for additional properties: %s\n",
|
||||
u_errorName(*pErrorCode));
|
||||
exit(*pErrorCode);
|
||||
}
|
||||
}
|
||||
|
||||
/* ScriptExtensions.txt ----------------------------------------------------- */
|
||||
@ -418,107 +379,29 @@ numericLineFn(void *context,
|
||||
}
|
||||
}
|
||||
|
||||
/* data serialization ------------------------------------------------------- */
|
||||
|
||||
U_CFUNC int32_t
|
||||
writeAdditionalData(FILE *f, uint8_t *p, int32_t capacity, int32_t indexes[UPROPS_INDEX_COUNT]) {
|
||||
const uint32_t *pvArray;
|
||||
int32_t pvRows, pvCount;
|
||||
int32_t length;
|
||||
UErrorCode errorCode;
|
||||
|
||||
pvArray=upvec_getArray(pv, &pvRows, NULL);
|
||||
pvCount=pvRows*UPROPS_VECTOR_WORDS;
|
||||
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=utrie2_serialize(newTrie, p, capacity, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr,
|
||||
"genprops error: utrie2_freeze(additional properties)+utrie2_serialize() failed: %s\n",
|
||||
u_errorName(errorCode));
|
||||
exit(errorCode);
|
||||
}
|
||||
|
||||
/* round up scriptExtensions to multiple of 4 bytes */
|
||||
if(scriptExtensions->length()&1) {
|
||||
scriptExtensions->append((UChar)0);
|
||||
}
|
||||
|
||||
/* set indexes */
|
||||
indexes[UPROPS_ADDITIONAL_VECTORS_INDEX]=
|
||||
indexes[UPROPS_ADDITIONAL_TRIE_INDEX]+length/4;
|
||||
indexes[UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX]=UPROPS_VECTOR_WORDS;
|
||||
indexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]=
|
||||
indexes[UPROPS_ADDITIONAL_VECTORS_INDEX]+pvCount;
|
||||
indexes[UPROPS_RESERVED_INDEX_7]=
|
||||
indexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]+scriptExtensions->length()/2;
|
||||
indexes[UPROPS_RESERVED_INDEX_8]=indexes[UPROPS_RESERVED_INDEX_7];
|
||||
indexes[UPROPS_DATA_TOP_INDEX]=indexes[UPROPS_RESERVED_INDEX_8];
|
||||
|
||||
indexes[UPROPS_MAX_VALUES_INDEX]=
|
||||
(((int32_t)U_EA_COUNT-1)<<UPROPS_EA_SHIFT)|
|
||||
(((int32_t)UBLOCK_COUNT-1)<<UPROPS_BLOCK_SHIFT)|
|
||||
(((int32_t)USCRIPT_CODE_LIMIT-1)&UPROPS_SCRIPT_MASK);
|
||||
indexes[UPROPS_MAX_VALUES_2_INDEX]=
|
||||
(((int32_t)U_LB_COUNT-1)<<UPROPS_LB_SHIFT)|
|
||||
(((int32_t)U_SB_COUNT-1)<<UPROPS_SB_SHIFT)|
|
||||
(((int32_t)U_WB_COUNT-1)<<UPROPS_WB_SHIFT)|
|
||||
(((int32_t)U_GCB_COUNT-1)<<UPROPS_GCB_SHIFT)|
|
||||
((int32_t)U_DT_COUNT-1);
|
||||
|
||||
int32_t additionalPropsSize=4*(indexes[UPROPS_DATA_TOP_INDEX]-indexes[UPROPS_ADDITIONAL_TRIE_INDEX]);
|
||||
if(p!=NULL && additionalPropsSize<=capacity) {
|
||||
if(beVerbose) {
|
||||
printf("size in bytes of additional props trie:%5u\n", (int)length);
|
||||
}
|
||||
if(f!=NULL) {
|
||||
usrc_writeUTrie2Arrays(f,
|
||||
"static const uint16_t propsVectorsTrie_index[%ld]={\n", NULL,
|
||||
newTrie,
|
||||
"\n};\n\n");
|
||||
usrc_writeUTrie2Struct(f,
|
||||
"static const UTrie2 propsVectorsTrie={\n",
|
||||
newTrie, "propsVectorsTrie_index", NULL,
|
||||
"};\n\n");
|
||||
|
||||
usrc_writeArray(f,
|
||||
"static const uint32_t propsVectors[%ld]={\n",
|
||||
pvArray, 32, pvCount,
|
||||
"};\n\n");
|
||||
fprintf(f, "static const int32_t countPropsVectors=%ld;\n", (long)pvCount);
|
||||
fprintf(f, "static const int32_t propsVectorsColumns=%ld;\n", (long)indexes[UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX]);
|
||||
|
||||
usrc_writeArray(f,
|
||||
"static const uint16_t scriptExtensions[%ld]={\n",
|
||||
scriptExtensions->getBuffer(), 16, scriptExtensions->length(),
|
||||
"};\n\n");
|
||||
} else {
|
||||
p+=length;
|
||||
length=pvCount*4;
|
||||
uprv_memcpy(p, pvArray, length);
|
||||
|
||||
p+=length;
|
||||
length=scriptExtensions->length()*2;
|
||||
uprv_memcpy(p, scriptExtensions->getBuffer(), length);
|
||||
}
|
||||
if(beVerbose) {
|
||||
printf("number of additional props vectors: %5u\n", (int)pvRows);
|
||||
printf("number of 32-bit words per vector: %5u\n", UPROPS_VECTOR_WORDS);
|
||||
printf("number of 16-bit scriptExtensions: %5u\n", (int)scriptExtensions->length());
|
||||
}
|
||||
}
|
||||
|
||||
return additionalPropsSize;
|
||||
}
|
||||
|
||||
class Props2Writer : public PropsWriter {
|
||||
public:
|
||||
Props2Writer() { initAdditionalProperties(); }
|
||||
virtual ~Props2Writer() { exitAdditionalProperties(); }
|
||||
Props2Writer(UErrorCode &errorCode);
|
||||
virtual ~Props2Writer();
|
||||
|
||||
virtual void setProps(const UniProps &, const UnicodeSet &newValues, UErrorCode &errorCode);
|
||||
};
|
||||
|
||||
Props2Writer::Props2Writer(UErrorCode &errorCode) {
|
||||
pv=upvec_open(UPROPS_VECTOR_WORDS, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr, "genprops error: props2writer upvec_open() failed - %s\n",
|
||||
u_errorName(errorCode));
|
||||
}
|
||||
scriptExtensions=new UnicodeString;
|
||||
}
|
||||
|
||||
Props2Writer::~Props2Writer() {
|
||||
utrie2_close(newTrie);
|
||||
upvec_close(pv);
|
||||
delete scriptExtensions;
|
||||
}
|
||||
|
||||
struct PropToBinary {
|
||||
int32_t prop; // UProperty
|
||||
int32_t vecWord, vecShift;
|
||||
@ -642,10 +525,127 @@ Props2Writer::setProps(const UniProps &props, const UnicodeSet &newValues, UErro
|
||||
}
|
||||
}
|
||||
|
||||
static uint8_t trieBlock[100000];
|
||||
static int32_t trieSize;
|
||||
|
||||
int32_t
|
||||
props2FinalizeData(int32_t indexes[UPROPS_INDEX_COUNT], UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) { return 0; }
|
||||
|
||||
newTrie=upvec_compactToUTrie2WithRowIndexes(pv, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr, "genprops error: unable to build trie for additional properties: %s\n",
|
||||
u_errorName(errorCode));
|
||||
return 0;
|
||||
}
|
||||
|
||||
trieSize=utrie2_serialize(newTrie, trieBlock, (int32_t)sizeof(trieBlock), &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr,
|
||||
"genprops error: utrie2_freeze(additional properties)+utrie2_serialize() failed: %s\n",
|
||||
u_errorName(errorCode));
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t pvRows;
|
||||
const uint32_t *pvArray=upvec_getArray(pv, &pvRows, NULL);
|
||||
int32_t pvCount=pvRows*UPROPS_VECTOR_WORDS;
|
||||
// TODO: remove
|
||||
#if 0
|
||||
for(int32_t c=0; c<=0x10ffff; ++c) {
|
||||
uint16_t ri=utrie2_get32(newTrie, c);
|
||||
uint32_t v2=pvArray[ri+2];
|
||||
int32_t dt=v2&UPROPS_DT_MASK;
|
||||
if(dt!=0) {
|
||||
printf("%04x %d\n", c, dt);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* round up scriptExtensions to multiple of 4 bytes */
|
||||
if(scriptExtensions->length()&1) {
|
||||
scriptExtensions->append((UChar)0);
|
||||
}
|
||||
|
||||
/* set indexes */
|
||||
indexes[UPROPS_ADDITIONAL_VECTORS_INDEX]=
|
||||
indexes[UPROPS_ADDITIONAL_TRIE_INDEX]+trieSize/4;
|
||||
indexes[UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX]=UPROPS_VECTOR_WORDS;
|
||||
indexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]=
|
||||
indexes[UPROPS_ADDITIONAL_VECTORS_INDEX]+pvCount;
|
||||
indexes[UPROPS_RESERVED_INDEX_7]=
|
||||
indexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]+scriptExtensions->length()/2;
|
||||
indexes[UPROPS_RESERVED_INDEX_8]=indexes[UPROPS_RESERVED_INDEX_7];
|
||||
indexes[UPROPS_DATA_TOP_INDEX]=indexes[UPROPS_RESERVED_INDEX_8];
|
||||
|
||||
indexes[UPROPS_MAX_VALUES_INDEX]=
|
||||
(((int32_t)U_EA_COUNT-1)<<UPROPS_EA_SHIFT)|
|
||||
(((int32_t)UBLOCK_COUNT-1)<<UPROPS_BLOCK_SHIFT)|
|
||||
(((int32_t)USCRIPT_CODE_LIMIT-1)&UPROPS_SCRIPT_MASK);
|
||||
indexes[UPROPS_MAX_VALUES_2_INDEX]=
|
||||
(((int32_t)U_LB_COUNT-1)<<UPROPS_LB_SHIFT)|
|
||||
(((int32_t)U_SB_COUNT-1)<<UPROPS_SB_SHIFT)|
|
||||
(((int32_t)U_WB_COUNT-1)<<UPROPS_WB_SHIFT)|
|
||||
(((int32_t)U_GCB_COUNT-1)<<UPROPS_GCB_SHIFT)|
|
||||
((int32_t)U_DT_COUNT-1);
|
||||
|
||||
if(beVerbose) {
|
||||
printf("size in bytes of additional props trie:%5u\n", (int)trieSize);
|
||||
printf("number of additional props vectors: %5u\n", (int)pvRows);
|
||||
printf("number of 32-bit words per vector: %5u\n", UPROPS_VECTOR_WORDS);
|
||||
printf("number of 16-bit scriptExtensions: %5u\n", (int)scriptExtensions->length());
|
||||
}
|
||||
|
||||
return 4*(indexes[UPROPS_DATA_TOP_INDEX]-indexes[UPROPS_ADDITIONAL_TRIE_INDEX]);
|
||||
}
|
||||
|
||||
void
|
||||
props2AppendToCSourceFile(FILE *f, UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) { return; }
|
||||
|
||||
int32_t pvRows;
|
||||
const uint32_t *pvArray=upvec_getArray(pv, &pvRows, NULL);
|
||||
int32_t pvCount=pvRows*UPROPS_VECTOR_WORDS;
|
||||
|
||||
usrc_writeUTrie2Arrays(f,
|
||||
"static const uint16_t propsVectorsTrie_index[%ld]={\n", NULL,
|
||||
newTrie,
|
||||
"\n};\n\n");
|
||||
usrc_writeUTrie2Struct(f,
|
||||
"static const UTrie2 propsVectorsTrie={\n",
|
||||
newTrie, "propsVectorsTrie_index", NULL,
|
||||
"};\n\n");
|
||||
|
||||
usrc_writeArray(f,
|
||||
"static const uint32_t propsVectors[%ld]={\n",
|
||||
pvArray, 32, pvCount,
|
||||
"};\n\n");
|
||||
fprintf(f, "static const int32_t countPropsVectors=%ld;\n", (long)pvCount);
|
||||
fprintf(f, "static const int32_t propsVectorsColumns=%ld;\n", (long)UPROPS_VECTOR_WORDS);
|
||||
|
||||
usrc_writeArray(f,
|
||||
"static const uint16_t scriptExtensions[%ld]={\n",
|
||||
scriptExtensions->getBuffer(), 16, scriptExtensions->length(),
|
||||
"};\n\n");
|
||||
}
|
||||
|
||||
void
|
||||
props2AppendToBinaryFile(UNewDataMemory *pData, UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) { return; }
|
||||
|
||||
int32_t pvRows;
|
||||
const uint32_t *pvArray=upvec_getArray(pv, &pvRows, NULL);
|
||||
int32_t pvCount=pvRows*UPROPS_VECTOR_WORDS;
|
||||
|
||||
udata_writeBlock(pData, trieBlock, trieSize);
|
||||
udata_writeBlock(pData, pvArray, pvCount*4);
|
||||
udata_writeBlock(pData, scriptExtensions->getBuffer(), scriptExtensions->length()*2);
|
||||
}
|
||||
|
||||
PropsWriter *
|
||||
createProps2Writer(UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) { return NULL; }
|
||||
PropsWriter *pw=new Props2Writer();
|
||||
PropsWriter *pw=new Props2Writer(errorCode);
|
||||
if(pw==NULL) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user