ICU-8972 move genpname/genpname.cpp to genprops/pnameswriter.cpp; use on-the-fly property names data for ppucd parsing: avoids having to rebuild ICU & tools before generating properties data
X-SVN-Rev: 31153
This commit is contained in:
parent
7a22393787
commit
0b9ba091db
@ -1,4 +1,4 @@
|
||||
# Copyright (C) 2010, International Business Machines
|
||||
# Copyright (C) 2010-2011, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#
|
||||
# created on: 2010jun03
|
||||
@ -21,7 +21,6 @@ add_subdirectory(genbidi)
|
||||
add_subdirectory(gencase)
|
||||
add_subdirectory(gennames)
|
||||
add_subdirectory(gennorm)
|
||||
add_subdirectory(genpname)
|
||||
add_subdirectory(genprops)
|
||||
add_subdirectory(genuca)
|
||||
add_subdirectory(genuts46)
|
||||
|
@ -5,5 +5,5 @@
|
||||
# created by: Markus W. Scherer
|
||||
# edited on: 2010jul20
|
||||
# edited by: Stuart G. Gill
|
||||
add_executable(genprops genprops.cpp corepropswriter.cpp)
|
||||
add_executable(genprops genprops.cpp corepropswriter.cpp pnameswriter.cpp)
|
||||
target_link_libraries(genprops icuuc icutu)
|
||||
|
@ -707,6 +707,7 @@ CorePropsWriter::finalizeData(UErrorCode &errorCode) {
|
||||
((int32_t)U_DT_COUNT-1);
|
||||
|
||||
if(beVerbose) {
|
||||
puts("* uprops.icu stats *");
|
||||
printf("trie size in bytes: %5u\n", (int)trieSize);
|
||||
printf("size in bytes of additional props trie:%5u\n", (int)props2TrieSize);
|
||||
printf("number of additional props vectors: %5u\n", (int)pvRows);
|
||||
|
@ -35,6 +35,7 @@ U_NAMESPACE_USE
|
||||
|
||||
UBool beVerbose=FALSE;
|
||||
|
||||
PropsWriter::PropsWriter() {}
|
||||
PropsWriter::~PropsWriter() {}
|
||||
void PropsWriter::setUnicodeVersion(const UVersionInfo) {}
|
||||
void PropsWriter::setProps(const UniProps &, const UnicodeSet &, UErrorCode &) {}
|
||||
@ -94,6 +95,7 @@ main(int argc, char* argv[]) {
|
||||
|
||||
/* initialize */
|
||||
IcuToolErrorCode errorCode("genprops");
|
||||
LocalPointer<PNamesWriter> pnamesWriter(createPNamesWriter(errorCode));
|
||||
LocalPointer<PropsWriter> corePropsWriter(createCorePropsWriter(errorCode));
|
||||
if(errorCode.isFailure()) {
|
||||
fprintf(stderr, "genprops: unable to create PropsWriters - %s\n", errorCode.errorName());
|
||||
@ -118,6 +120,16 @@ main(int argc, char* argv[]) {
|
||||
ppucdPath.data(), errorCode.errorName());
|
||||
return errorCode.reset();
|
||||
}
|
||||
|
||||
// The PNamesWriter uses preparsed pnames_data.h.
|
||||
pnamesWriter->finalizeData(errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr, "genprops: PNamesWriter::finalizeData() failed - %s\n",
|
||||
errorCode.errorName());
|
||||
return errorCode.reset();
|
||||
}
|
||||
ppucd.setPropertyNames(pnamesWriter->getPropertyNames());
|
||||
|
||||
PreparsedUCD::LineType lineType;
|
||||
UnicodeSet newValues;
|
||||
while((lineType=ppucd.readLine(errorCode))!=PreparsedUCD::NO_LINE) {
|
||||
@ -152,6 +164,8 @@ main(int argc, char* argv[]) {
|
||||
|
||||
UBool withCopyright=options[COPYRIGHT].doesOccur;
|
||||
|
||||
pnamesWriter->writeCSourceFile(sourceCommon.data(), errorCode);
|
||||
pnamesWriter->writeBinaryData(sourceDataIn.data(), withCopyright, errorCode);
|
||||
corePropsWriter->writeCSourceFile(sourceCommon.data(), errorCode);
|
||||
corePropsWriter->writeBinaryData(sourceDataIn.data(), withCopyright, errorCode);
|
||||
|
||||
|
@ -25,6 +25,7 @@
|
||||
|
||||
class PropsWriter {
|
||||
public:
|
||||
PropsWriter();
|
||||
virtual ~PropsWriter();
|
||||
virtual void setUnicodeVersion(const UVersionInfo version);
|
||||
virtual void setProps(const icu::UniProps &props, const icu::UnicodeSet &newValues, UErrorCode &errorCode);
|
||||
@ -33,7 +34,13 @@ public:
|
||||
virtual void writeBinaryData(const char *path, UBool withCopyright, UErrorCode &errorCode);
|
||||
};
|
||||
|
||||
class PNamesWriter : public PropsWriter {
|
||||
public:
|
||||
virtual const PropertyNames *getPropertyNames() = 0;
|
||||
};
|
||||
|
||||
PropsWriter *createCorePropsWriter(UErrorCode &errorCode);
|
||||
PNamesWriter *createPNamesWriter(UErrorCode &errorCode);
|
||||
|
||||
/* global flags */
|
||||
U_CFUNC UBool beVerbose;
|
||||
|
@ -6,21 +6,23 @@
|
||||
* Date Name Description
|
||||
* 10/11/02 aliu Creation.
|
||||
* 2010nov19 Markus Scherer Rewrite for formatVersion 2.
|
||||
* 2011dec18 Markus Scherer Moved genpname/genpname.cpp to genprops/pnameswriter.cpp.
|
||||
**********************************************************************
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/bytestrie.h"
|
||||
#include "unicode/bytestriebuilder.h"
|
||||
#include "unicode/putil.h"
|
||||
#include "unicode/uclean.h"
|
||||
#include "cmemory.h"
|
||||
#include "charstr.h"
|
||||
#include "cstring.h"
|
||||
#include "denseranges.h"
|
||||
#include "unewdata.h"
|
||||
#include "uoptions.h"
|
||||
#include "genprops.h"
|
||||
#include "propname.h"
|
||||
#include "toolutil.h"
|
||||
#include "uinvchar.h"
|
||||
#include "unewdata.h"
|
||||
#include "uvectr32.h"
|
||||
#include "writesrc.h"
|
||||
|
||||
@ -132,7 +134,7 @@ Property::Property(int32_t _enumValue,
|
||||
}
|
||||
|
||||
// *** Include the data header ***
|
||||
#include "data.h"
|
||||
#include "pnames_data.h"
|
||||
|
||||
/* return a list of unique names, not including "", for this property
|
||||
* @param stringIndices array of at least MAX_NAMES_PER_GROUP
|
||||
@ -174,13 +176,32 @@ int32_t Alias::getUniqueNames(int32_t* stringIndices) const {
|
||||
// END DATA
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
class Builder {
|
||||
class PNamesWriterImpl;
|
||||
|
||||
class PNamesPropertyNames : public PropertyNames {
|
||||
public:
|
||||
Builder(UErrorCode &errorCode) : valueMaps(errorCode), btb(errorCode), maxNameLength(0) {}
|
||||
PNamesPropertyNames(const PNamesWriterImpl &pnwi)
|
||||
: impl(pnwi), valueMaps(NULL), bytesTries(NULL) {}
|
||||
void init();
|
||||
virtual int32_t getPropertyEnum(const char *name) const;
|
||||
virtual int32_t getPropertyValueEnum(int32_t property, const char *name) const;
|
||||
private:
|
||||
int32_t findProperty(int32_t property) const;
|
||||
UBool containsName(BytesTrie &trie, const char *name) const;
|
||||
int32_t getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) const;
|
||||
|
||||
void build() {
|
||||
IcuToolErrorCode errorCode("genpname Builder::build()");
|
||||
const PNamesWriterImpl &impl;
|
||||
const int32_t *valueMaps;
|
||||
const uint8_t *bytesTries;
|
||||
};
|
||||
|
||||
class PNamesWriterImpl : public PNamesWriter {
|
||||
public:
|
||||
PNamesWriterImpl(UErrorCode &errorCode)
|
||||
: valueMaps(errorCode), btb(errorCode), maxNameLength(0),
|
||||
pnames(*this) {}
|
||||
|
||||
virtual void finalizeData(UErrorCode &errorCode) {
|
||||
// Build main property aliases value map at value map offset 0,
|
||||
// so that we need not store another offset for it.
|
||||
UVector32 propEnums(errorCode);
|
||||
@ -265,8 +286,19 @@ public:
|
||||
for(i=PropNameData::IX_RESERVED7; i<PropNameData::IX_COUNT; ++i) {
|
||||
indexes[i]=0;
|
||||
}
|
||||
|
||||
if(beVerbose) {
|
||||
puts("* pnames.icu stats *");
|
||||
printf("length of all value maps: %6ld\n", (long)valueMaps.size());
|
||||
printf("length of all BytesTries: %6ld\n", (long)bytesTries.length());
|
||||
printf("length of all name groups: %6ld\n", (long)nameGroups.length());
|
||||
printf("length of pnames.icu data: %6ld\n", (long)indexes[PropNameData::IX_TOTAL_SIZE]);
|
||||
}
|
||||
}
|
||||
|
||||
virtual void writeCSourceFile(const char *path, UErrorCode &errorCode);
|
||||
virtual void writeBinaryData(const char *path, UBool withCopyright, UErrorCode &errorCode);
|
||||
|
||||
int32_t writeNameGroup(const Alias &alias, UErrorCode &errorCode) {
|
||||
int32_t nameOffset=nameGroups.length();
|
||||
// Count how many aliases this group has.
|
||||
@ -403,12 +435,18 @@ public:
|
||||
return bytesTrieOffset;
|
||||
}
|
||||
|
||||
virtual const PropertyNames *getPropertyNames() {
|
||||
pnames.init();
|
||||
return &pnames;
|
||||
}
|
||||
|
||||
int32_t indexes[PropNameData::IX_COUNT];
|
||||
UVector32 valueMaps;
|
||||
BytesTrieBuilder btb;
|
||||
CharString bytesTries;
|
||||
CharString nameGroups;
|
||||
int32_t maxNameLength;
|
||||
PNamesPropertyNames pnames;
|
||||
};
|
||||
|
||||
/* UDataInfo cf. udata.h */
|
||||
@ -426,29 +464,38 @@ static const UDataInfo dataInfo = {
|
||||
{ VERSION_0, VERSION_1, VERSION_2, VERSION_3 } /* Unicode version */
|
||||
};
|
||||
|
||||
static void writeDataFile(const char *destdir, const Builder& builder, UBool useCopyright) {
|
||||
IcuToolErrorCode errorCode("genpname writeDataFile()");
|
||||
UNewDataMemory *pdata=udata_create(destdir, PNAME_DATA_TYPE, PNAME_DATA_NAME, &dataInfo,
|
||||
useCopyright ? U_COPYRIGHT_STRING : 0, errorCode);
|
||||
errorCode.assertSuccess();
|
||||
void
|
||||
PNamesWriterImpl::writeBinaryData(const char *path, UBool withCopyright, UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) { return; }
|
||||
UNewDataMemory *pdata=udata_create(path, PNAME_DATA_TYPE, PNAME_DATA_NAME, &dataInfo,
|
||||
withCopyright ? U_COPYRIGHT_STRING : 0, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr, "genprops: udata_create(%s, pnames.icu) failed - %s\n",
|
||||
path, u_errorName(errorCode));
|
||||
return;
|
||||
}
|
||||
|
||||
udata_writeBlock(pdata, builder.indexes, PropNameData::IX_COUNT*4);
|
||||
udata_writeBlock(pdata, builder.valueMaps.getBuffer(), builder.valueMaps.size()*4);
|
||||
udata_writeBlock(pdata, builder.bytesTries.data(), builder.bytesTries.length());
|
||||
udata_writeBlock(pdata, builder.nameGroups.data(), builder.nameGroups.length());
|
||||
udata_writeBlock(pdata, indexes, PropNameData::IX_COUNT*4);
|
||||
udata_writeBlock(pdata, valueMaps.getBuffer(), valueMaps.size()*4);
|
||||
udata_writeBlock(pdata, bytesTries.data(), bytesTries.length());
|
||||
udata_writeBlock(pdata, nameGroups.data(), nameGroups.length());
|
||||
|
||||
int32_t dataLength=(int32_t)udata_finish(pdata, errorCode);
|
||||
if(dataLength!=builder.indexes[PropNameData::IX_TOTAL_SIZE]) {
|
||||
int32_t dataLength=(int32_t)udata_finish(pdata, &errorCode);
|
||||
if(dataLength!=indexes[PropNameData::IX_TOTAL_SIZE]) {
|
||||
fprintf(stderr,
|
||||
"udata_finish(pnames.icu) reports %ld bytes written but should be %ld\n",
|
||||
(long)dataLength, (long)builder.indexes[PropNameData::IX_TOTAL_SIZE]);
|
||||
exit(U_INTERNAL_PROGRAM_ERROR);
|
||||
(long)dataLength, (long)indexes[PropNameData::IX_TOTAL_SIZE]);
|
||||
errorCode=U_INTERNAL_PROGRAM_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
static void writeCSourceFile(const char *destdir, const Builder& builder) {
|
||||
FILE *f=usrc_create(destdir, "propname_data.h");
|
||||
void
|
||||
PNamesWriterImpl::writeCSourceFile(const char *path, UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) { return; }
|
||||
FILE *f=usrc_createFromGenerator(path, "propname_data.h",
|
||||
"icu/tools/src/unicode/c/genprops/pnameswriter.cpp");
|
||||
if(f==NULL) {
|
||||
errorCode=U_FILE_ACCESS_ERROR;
|
||||
return; // usrc_create() reported an error.
|
||||
}
|
||||
|
||||
@ -459,17 +506,17 @@ static void writeCSourceFile(const char *destdir, const Builder& builder) {
|
||||
fputs("U_NAMESPACE_BEGIN\n\n", f);
|
||||
|
||||
usrc_writeArray(f, "const int32_t PropNameData::indexes[%ld]={",
|
||||
builder.indexes, 32, PropNameData::IX_COUNT,
|
||||
indexes, 32, PropNameData::IX_COUNT,
|
||||
"};\n\n");
|
||||
usrc_writeArray(f, "const int32_t PropNameData::valueMaps[%ld]={\n",
|
||||
builder.valueMaps.getBuffer(), 32, builder.valueMaps.size(),
|
||||
valueMaps.getBuffer(), 32, valueMaps.size(),
|
||||
"\n};\n\n");
|
||||
usrc_writeArray(f, "const uint8_t PropNameData::bytesTries[%ld]={\n",
|
||||
builder.bytesTries.data(), 8, builder.bytesTries.length(),
|
||||
bytesTries.data(), 8, bytesTries.length(),
|
||||
"\n};\n\n");
|
||||
usrc_writeArrayOfMostlyInvChars(
|
||||
f, "const char PropNameData::nameGroups[%ld]={\n",
|
||||
builder.nameGroups.data(), builder.nameGroups.length(),
|
||||
nameGroups.data(), nameGroups.length(),
|
||||
"\n};\n\n");
|
||||
|
||||
fputs("U_NAMESPACE_END\n", f);
|
||||
@ -477,68 +524,87 @@ static void writeCSourceFile(const char *destdir, const Builder& builder) {
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
enum {
|
||||
HELP_H,
|
||||
HELP_QUESTION_MARK,
|
||||
VERBOSE,
|
||||
COPYRIGHT,
|
||||
DESTDIR,
|
||||
CSOURCE
|
||||
};
|
||||
|
||||
/* Keep these values in sync with the above enums */
|
||||
static UOption options[]={
|
||||
UOPTION_HELP_H,
|
||||
UOPTION_HELP_QUESTION_MARK,
|
||||
UOPTION_VERBOSE,
|
||||
UOPTION_COPYRIGHT,
|
||||
UOPTION_DESTDIR,
|
||||
UOPTION_DEF("csource", 'C', UOPT_NO_ARG)
|
||||
};
|
||||
|
||||
extern int main(int argc, char *argv[]) {
|
||||
U_MAIN_INIT_ARGS(argc, argv);
|
||||
|
||||
/* preset then read command line options */
|
||||
options[DESTDIR].value=u_getDataDirectory();
|
||||
argc=u_parseArgs(argc, argv, LENGTHOF(options), options);
|
||||
|
||||
/* error handling, printing usage message */
|
||||
if(argc<0) {
|
||||
fprintf(stderr, "error in command line argument \"%s\"\n", argv[-argc]);
|
||||
PNamesWriter *
|
||||
createPNamesWriter(UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) { return NULL; }
|
||||
PNamesWriter *pw=new PNamesWriterImpl(errorCode);
|
||||
if(pw==NULL) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
if(argc!=1 || options[HELP_H].doesOccur || options[HELP_QUESTION_MARK].doesOccur) {
|
||||
fprintf(stderr,
|
||||
"Usage: %s [-options]\n"
|
||||
"\tCreates " PNAME_DATA_NAME "." PNAME_DATA_TYPE "\n"
|
||||
"\n",
|
||||
argv[0]);
|
||||
fprintf(stderr,
|
||||
"Options:\n"
|
||||
"\t-h or -? or --help this usage text\n"
|
||||
"\t-v or --verbose turn on verbose output\n"
|
||||
"\t-c or --copyright include a copyright notice\n"
|
||||
"\t-d or --destdir destination directory, followed by the path\n"
|
||||
"\t-C or --csource generate a .h source file rather than the .icu binary\n");
|
||||
return argc!=1 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
|
||||
}
|
||||
|
||||
IcuToolErrorCode errorCode("genpname main() Builder()");
|
||||
Builder builder(errorCode);
|
||||
errorCode.assertSuccess();
|
||||
builder.build();
|
||||
if(options[VERBOSE].doesOccur) {
|
||||
printf("length of all value maps: %6ld\n", (long)builder.valueMaps.size());
|
||||
printf("length of all BytesTries: %6ld\n", (long)builder.bytesTries.length());
|
||||
printf("length of all name groups: %6ld\n", (long)builder.nameGroups.length());
|
||||
printf("length of pnames.icu data: %6ld\n", (long)builder.indexes[PropNameData::IX_TOTAL_SIZE]);
|
||||
}
|
||||
|
||||
if(options[CSOURCE].doesOccur) {
|
||||
writeCSourceFile(options[DESTDIR].value, builder);
|
||||
} else {
|
||||
writeDataFile(options[DESTDIR].value, builder, options[COPYRIGHT].doesOccur);
|
||||
}
|
||||
|
||||
return 0; // success
|
||||
return pw;
|
||||
}
|
||||
|
||||
// Note: The following is a partial copy of runtime propname.cpp code.
|
||||
// Consider changing that into a semi-public API to avoid duplication.
|
||||
|
||||
void PNamesPropertyNames::init() {
|
||||
valueMaps=impl.valueMaps.getBuffer();
|
||||
bytesTries=reinterpret_cast<const uint8_t *>(impl.bytesTries.data());
|
||||
}
|
||||
|
||||
int32_t PNamesPropertyNames::findProperty(int32_t property) const {
|
||||
int32_t i=1; // valueMaps index, initially after numRanges
|
||||
for(int32_t numRanges=valueMaps[0]; numRanges>0; --numRanges) {
|
||||
// Read and skip the start and limit of this range.
|
||||
int32_t start=valueMaps[i];
|
||||
int32_t limit=valueMaps[i+1];
|
||||
i+=2;
|
||||
if(property<start) {
|
||||
break;
|
||||
}
|
||||
if(property<limit) {
|
||||
return i+(property-start)*2;
|
||||
}
|
||||
i+=(limit-start)*2; // Skip all entries for this range.
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
UBool PNamesPropertyNames::containsName(BytesTrie &trie, const char *name) const {
|
||||
if(name==NULL) {
|
||||
return FALSE;
|
||||
}
|
||||
UStringTrieResult result=USTRINGTRIE_NO_VALUE;
|
||||
char c;
|
||||
while((c=*name++)!=0) {
|
||||
c=uprv_invCharToLowercaseAscii(c);
|
||||
// Ignore delimiters '-', '_', and ASCII White_Space.
|
||||
if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) {
|
||||
continue;
|
||||
}
|
||||
if(!USTRINGTRIE_HAS_NEXT(result)) {
|
||||
return FALSE;
|
||||
}
|
||||
result=trie.next((uint8_t)c);
|
||||
}
|
||||
return USTRINGTRIE_HAS_VALUE(result);
|
||||
}
|
||||
|
||||
int32_t PNamesPropertyNames::getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) const {
|
||||
BytesTrie trie(bytesTries+bytesTrieOffset);
|
||||
if(containsName(trie, alias)) {
|
||||
return trie.getValue();
|
||||
} else {
|
||||
return UCHAR_INVALID_CODE;
|
||||
}
|
||||
}
|
||||
|
||||
int32_t
|
||||
PNamesPropertyNames::getPropertyEnum(const char *alias) const {
|
||||
return getPropertyOrValueEnum(0, alias);
|
||||
}
|
||||
|
||||
int32_t
|
||||
PNamesPropertyNames::getPropertyValueEnum(int32_t property, const char *alias) const {
|
||||
int32_t valueMapIndex=findProperty(property);
|
||||
if(valueMapIndex==0) {
|
||||
return UCHAR_INVALID_CODE; // Not a known property.
|
||||
}
|
||||
valueMapIndex=valueMaps[valueMapIndex+1];
|
||||
if(valueMapIndex==0) {
|
||||
return UCHAR_INVALID_CODE; // The property does not have named values.
|
||||
}
|
||||
// valueMapIndex is the start of the property's valueMap,
|
||||
// where the first word is the BytesTrie offset.
|
||||
return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
|
||||
}
|
Loading…
Reference in New Issue
Block a user