2002-10-30 17:58:20 +00:00
|
|
|
/*
|
|
|
|
**********************************************************************
|
2011-01-05 21:06:55 +00:00
|
|
|
* Copyright (C) 2002-2011, International Business Machines
|
2002-10-30 17:58:20 +00:00
|
|
|
* Corporation and others. All Rights Reserved.
|
|
|
|
**********************************************************************
|
|
|
|
* Date Name Description
|
|
|
|
* 10/11/02 aliu Creation.
|
2010-12-31 18:06:45 +00:00
|
|
|
* 2010nov19 Markus Scherer Rewrite for formatVersion 2.
|
2002-10-30 17:58:20 +00:00
|
|
|
**********************************************************************
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "unicode/utypes.h"
|
2011-03-03 22:02:58 +00:00
|
|
|
#include "unicode/bytestriebuilder.h"
|
2002-10-30 17:58:20 +00:00
|
|
|
#include "unicode/putil.h"
|
2003-08-14 18:08:42 +00:00
|
|
|
#include "unicode/uclean.h"
|
2002-10-30 17:58:20 +00:00
|
|
|
#include "cmemory.h"
|
2010-12-31 18:06:45 +00:00
|
|
|
#include "charstr.h"
|
2002-10-30 17:58:20 +00:00
|
|
|
#include "cstring.h"
|
2010-12-31 18:06:45 +00:00
|
|
|
#include "denseranges.h"
|
2002-10-30 17:58:20 +00:00
|
|
|
#include "unewdata.h"
|
|
|
|
#include "uoptions.h"
|
|
|
|
#include "propname.h"
|
2010-12-31 18:06:45 +00:00
|
|
|
#include "toolutil.h"
|
|
|
|
#include "uvectr32.h"
|
|
|
|
#include "writesrc.h"
|
2002-10-30 17:58:20 +00:00
|
|
|
|
2004-11-12 00:26:54 +00:00
|
|
|
#include <stdio.h>
|
|
|
|
|
2011-01-05 21:06:55 +00:00
|
|
|
// We test for ASCII delimiters and White_Space, and build ASCII string BytesTries.
|
2010-12-31 18:06:45 +00:00
|
|
|
#if U_CHARSET_FAMILY!=U_ASCII_FAMILY
|
|
|
|
# error This builder requires U_CHARSET_FAMILY==U_ASCII_FAMILY.
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
|
2006-09-04 16:28:24 +00:00
|
|
|
|
2010-12-31 18:06:45 +00:00
|
|
|
U_NAMESPACE_USE
|
2002-10-30 17:58:20 +00:00
|
|
|
|
|
|
|
//----------------------------------------------------------------------
|
|
|
|
// BEGIN DATA
|
|
|
|
//
|
|
|
|
// This is the raw data to be output. We define the data structure,
|
|
|
|
// then include a machine-generated header that contains the actual
|
|
|
|
// data.
|
|
|
|
|
|
|
|
#include "unicode/uchar.h"
|
|
|
|
#include "unicode/uscript.h"
|
2004-04-07 00:28:39 +00:00
|
|
|
#include "unicode/unorm.h"
|
2010-01-26 00:55:35 +00:00
|
|
|
#include "unicode/unorm2.h"
|
2002-10-30 17:58:20 +00:00
|
|
|
|
|
|
|
class AliasName {
|
|
|
|
public:
|
|
|
|
const char* str;
|
|
|
|
int32_t index;
|
2010-12-31 18:06:45 +00:00
|
|
|
char normalized[64];
|
2002-10-30 17:58:20 +00:00
|
|
|
|
|
|
|
AliasName(const char* str, int32_t index);
|
|
|
|
|
2010-12-31 18:06:45 +00:00
|
|
|
int compare(const AliasName& other) const {
|
|
|
|
return uprv_strcmp(normalized, other.normalized);
|
|
|
|
}
|
2002-10-30 17:58:20 +00:00
|
|
|
|
|
|
|
UBool operator==(const AliasName& other) const {
|
|
|
|
return compare(other) == 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
UBool operator!=(const AliasName& other) const {
|
|
|
|
return compare(other) != 0;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
AliasName::AliasName(const char* _str,
|
2010-12-31 18:06:45 +00:00
|
|
|
int32_t _index) :
|
2002-10-30 17:58:20 +00:00
|
|
|
str(_str),
|
|
|
|
index(_index)
|
|
|
|
{
|
2010-12-31 18:06:45 +00:00
|
|
|
// Build the normalized form of the alias.
|
|
|
|
const char *s=str;
|
|
|
|
char c;
|
|
|
|
int32_t i=0;
|
|
|
|
while((c=*s++)!=0) {
|
|
|
|
// Ignore delimiters '-', '_', and ASCII White_Space.
|
|
|
|
if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
normalized[i++]=uprv_tolower(c);
|
|
|
|
}
|
|
|
|
normalized[i]=0;
|
|
|
|
if(i>=LENGTHOF(normalized)) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"Error: Property (value) alias '%s' results in "
|
|
|
|
"too-long normalized string (length %d)\n",
|
|
|
|
str, (int)i);
|
|
|
|
exit(U_BUFFER_OVERFLOW_ERROR);
|
|
|
|
}
|
2002-10-30 17:58:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
class Alias {
|
|
|
|
public:
|
|
|
|
int32_t enumValue;
|
|
|
|
int32_t nameGroupIndex;
|
|
|
|
|
2010-12-31 18:06:45 +00:00
|
|
|
Alias(int32_t enumValue, int32_t nameGroupIndex);
|
2002-10-30 17:58:20 +00:00
|
|
|
|
|
|
|
int32_t getUniqueNames(int32_t* nameGroupIndices) const;
|
|
|
|
};
|
|
|
|
|
2010-12-31 18:06:45 +00:00
|
|
|
Alias::Alias(int32_t anEnumValue, int32_t aNameGroupIndex) :
|
2002-10-30 17:58:20 +00:00
|
|
|
enumValue(anEnumValue),
|
|
|
|
nameGroupIndex(aNameGroupIndex)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
class Property : public Alias {
|
|
|
|
public:
|
|
|
|
int32_t valueCount;
|
|
|
|
const Alias* valueList;
|
|
|
|
|
|
|
|
Property(int32_t enumValue,
|
2010-12-31 18:06:45 +00:00
|
|
|
int32_t nameGroupIndex,
|
|
|
|
int32_t valueCount,
|
|
|
|
const Alias* valueList);
|
2002-10-30 17:58:20 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
Property::Property(int32_t _enumValue,
|
2010-12-31 18:06:45 +00:00
|
|
|
int32_t _nameGroupIndex,
|
|
|
|
int32_t _valueCount,
|
|
|
|
const Alias* _valueList) :
|
2002-10-30 17:58:20 +00:00
|
|
|
Alias(_enumValue, _nameGroupIndex),
|
|
|
|
valueCount(_valueCount),
|
|
|
|
valueList(_valueList)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
// *** Include the data header ***
|
|
|
|
#include "data.h"
|
|
|
|
|
|
|
|
/* return a list of unique names, not including "", for this property
|
|
|
|
* @param stringIndices array of at least MAX_NAMES_PER_GROUP
|
|
|
|
* elements, will be filled with indices into STRING_TABLE
|
|
|
|
* @return number of indices, >= 1
|
|
|
|
*/
|
|
|
|
int32_t Alias::getUniqueNames(int32_t* stringIndices) const {
|
|
|
|
int32_t count = 0;
|
|
|
|
int32_t i = nameGroupIndex;
|
|
|
|
UBool done = FALSE;
|
|
|
|
while (!done) {
|
|
|
|
int32_t j = NAME_GROUP[i++];
|
|
|
|
if (j < 0) {
|
|
|
|
done = TRUE;
|
|
|
|
j = -j;
|
|
|
|
}
|
|
|
|
if (j == 0) continue; // omit "" entries
|
|
|
|
UBool dupe = FALSE;
|
|
|
|
for (int32_t k=0; k<count; ++k) {
|
|
|
|
if (stringIndices[k] == j) {
|
|
|
|
dupe = TRUE;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
// also do a string check for things like "age|Age"
|
|
|
|
if (STRING_TABLE[stringIndices[k]] == STRING_TABLE[j]) {
|
|
|
|
//printf("Found dupe %s|%s\n",
|
|
|
|
// STRING_TABLE[stringIndices[k]].str,
|
|
|
|
// STRING_TABLE[j].str);
|
|
|
|
dupe = TRUE;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (dupe) continue; // omit duplicates
|
|
|
|
stringIndices[count++] = j;
|
|
|
|
}
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
|
|
|
// END DATA
|
|
|
|
//----------------------------------------------------------------------
|
|
|
|
|
2010-12-31 18:06:45 +00:00
|
|
|
class Builder {
|
2002-10-30 17:58:20 +00:00
|
|
|
public:
|
2011-01-27 21:42:08 +00:00
|
|
|
Builder(UErrorCode &errorCode) : valueMaps(errorCode), btb(errorCode), maxNameLength(0) {}
|
2002-10-30 17:58:20 +00:00
|
|
|
|
2010-12-31 18:06:45 +00:00
|
|
|
void build() {
|
|
|
|
IcuToolErrorCode errorCode("genpname Builder::build()");
|
2002-10-30 17:58:20 +00:00
|
|
|
|
2010-12-31 18:06:45 +00:00
|
|
|
// Build main property aliases value map at value map offset 0,
|
|
|
|
// so that we need not store another offset for it.
|
|
|
|
UVector32 propEnums(errorCode);
|
|
|
|
int32_t propIndex;
|
|
|
|
for(propIndex=0; propIndex<PROPERTY_COUNT; ++propIndex) {
|
|
|
|
propEnums.sortedInsert(PROPERTY[propIndex].enumValue, errorCode);
|
|
|
|
}
|
|
|
|
int32_t ranges[10][2];
|
|
|
|
int32_t numPropRanges=uprv_makeDenseRanges(propEnums.getBuffer(), PROPERTY_COUNT, 0x100,
|
|
|
|
ranges, LENGTHOF(ranges));
|
|
|
|
valueMaps.addElement(numPropRanges, errorCode);
|
|
|
|
int32_t i, j;
|
|
|
|
for(i=0; i<numPropRanges; ++i) {
|
|
|
|
valueMaps.addElement(ranges[i][0], errorCode);
|
|
|
|
valueMaps.addElement(ranges[i][1]+1, errorCode);
|
|
|
|
for(j=ranges[i][0]; j<=ranges[i][1]; ++j) {
|
|
|
|
// Reserve two slots per property for the name group offset and the value-map offset.
|
|
|
|
valueMaps.addElement(0, errorCode);
|
|
|
|
valueMaps.addElement(0, errorCode);
|
2002-10-30 17:58:20 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-01-05 21:06:55 +00:00
|
|
|
// Build the properties trie first, at BytesTrie offset 0,
|
2010-12-31 18:06:45 +00:00
|
|
|
// so that we need not store another offset for it.
|
2011-01-05 21:06:55 +00:00
|
|
|
buildAliasesBytesTrie(PROPERTY, PROPERTY_COUNT, errorCode);
|
2010-12-31 18:06:45 +00:00
|
|
|
|
|
|
|
// Build the name group for the first property, at nameGroups offset 0.
|
|
|
|
// Name groups for *value* aliases must not start at offset 0
|
|
|
|
// because that is a missing-value marker for sparse value ranges.
|
|
|
|
setPropertyInt(PROPERTY[0].enumValue, 0,
|
|
|
|
writeNameGroup(PROPERTY[0], errorCode));
|
|
|
|
|
|
|
|
// Build the known-repeated binary properties once.
|
|
|
|
int32_t binPropsValueMapOffset=valueMaps.size();
|
2011-01-05 21:06:55 +00:00
|
|
|
int32_t bytesTrieOffset=buildAliasesBytesTrie(VALUES_binprop, VALUES_binprop_COUNT, errorCode);
|
|
|
|
valueMaps.addElement(bytesTrieOffset, errorCode);
|
2010-12-31 18:06:45 +00:00
|
|
|
buildValueMap(VALUES_binprop, VALUES_binprop_COUNT, errorCode);
|
|
|
|
|
|
|
|
// Build the known-repeated canonical combining class properties once.
|
|
|
|
int32_t cccValueMapOffset=valueMaps.size();
|
2011-01-05 21:06:55 +00:00
|
|
|
bytesTrieOffset=buildAliasesBytesTrie(VALUES_ccc, VALUES_ccc_COUNT, errorCode);
|
|
|
|
valueMaps.addElement(bytesTrieOffset, errorCode);
|
2010-12-31 18:06:45 +00:00
|
|
|
buildValueMap(VALUES_ccc, VALUES_ccc_COUNT, errorCode);
|
|
|
|
|
|
|
|
// Build the rest of the data.
|
|
|
|
for(propIndex=0; propIndex<PROPERTY_COUNT; ++propIndex) {
|
|
|
|
if(propIndex>0) {
|
|
|
|
// writeNameGroup(PROPERTY[0], ...) already done
|
|
|
|
setPropertyInt(PROPERTY[propIndex].enumValue, 0,
|
|
|
|
writeNameGroup(PROPERTY[propIndex], errorCode));
|
|
|
|
}
|
|
|
|
int32_t valueCount=PROPERTY[propIndex].valueCount;
|
|
|
|
if(valueCount>0) {
|
|
|
|
int32_t valueMapOffset;
|
|
|
|
const Alias *valueList=PROPERTY[propIndex].valueList;
|
|
|
|
if(valueList==VALUES_binprop) {
|
|
|
|
valueMapOffset=binPropsValueMapOffset;
|
|
|
|
} else if(valueList==VALUES_ccc || valueList==VALUES_lccc || valueList==VALUES_tccc) {
|
|
|
|
valueMapOffset=cccValueMapOffset;
|
|
|
|
} else {
|
|
|
|
valueMapOffset=valueMaps.size();
|
2011-01-05 21:06:55 +00:00
|
|
|
bytesTrieOffset=buildAliasesBytesTrie(valueList, valueCount, errorCode);
|
|
|
|
valueMaps.addElement(bytesTrieOffset, errorCode);
|
2010-12-31 18:06:45 +00:00
|
|
|
buildValueMap(valueList, valueCount, errorCode);
|
|
|
|
}
|
|
|
|
setPropertyInt(PROPERTY[propIndex].enumValue, 1, valueMapOffset);
|
2002-10-30 17:58:20 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-12-31 18:06:45 +00:00
|
|
|
// Write the indexes.
|
|
|
|
int32_t offset=(int32_t)sizeof(indexes);
|
|
|
|
indexes[PropNameData::IX_VALUE_MAPS_OFFSET]=offset;
|
|
|
|
offset+=valueMaps.size()*4;
|
|
|
|
indexes[PropNameData::IX_BYTE_TRIES_OFFSET]=offset;
|
2011-01-05 21:06:55 +00:00
|
|
|
offset+=bytesTries.length();
|
2010-12-31 18:06:45 +00:00
|
|
|
indexes[PropNameData::IX_NAME_GROUPS_OFFSET]=offset;
|
|
|
|
offset+=nameGroups.length();
|
|
|
|
for(i=PropNameData::IX_RESERVED3_OFFSET; i<=PropNameData::IX_TOTAL_SIZE; ++i) {
|
|
|
|
indexes[i]=offset;
|
2002-10-30 17:58:20 +00:00
|
|
|
}
|
2010-12-31 18:06:45 +00:00
|
|
|
indexes[PropNameData::IX_MAX_NAME_LENGTH]=maxNameLength;
|
|
|
|
for(i=PropNameData::IX_RESERVED7; i<PropNameData::IX_COUNT; ++i) {
|
|
|
|
indexes[i]=0;
|
2002-10-30 17:58:20 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-12-31 18:06:45 +00:00
|
|
|
int32_t writeNameGroup(const Alias &alias, UErrorCode &errorCode) {
|
|
|
|
int32_t nameOffset=nameGroups.length();
|
|
|
|
// Count how many aliases this group has.
|
|
|
|
int32_t i=alias.nameGroupIndex;
|
|
|
|
int32_t nameIndex;
|
|
|
|
do { nameIndex=NAME_GROUP[i++]; } while(nameIndex>=0);
|
|
|
|
int32_t count=i-alias.nameGroupIndex;
|
|
|
|
// The first byte tells us how many aliases there are.
|
|
|
|
// We use only values 0..0x1f in the first byte because when we write
|
|
|
|
// the name groups as an invariant-character string into a source file,
|
|
|
|
// those values (C0 control codes) are written as numbers rather than as characters.
|
|
|
|
if(count>=0x20) {
|
|
|
|
fprintf(stderr, "Error: Too many aliases in the group with index %d\n",
|
|
|
|
(int)alias.nameGroupIndex);
|
|
|
|
exit(U_INDEX_OUTOFBOUNDS_ERROR);
|
2002-10-30 17:58:20 +00:00
|
|
|
}
|
2010-12-31 18:06:45 +00:00
|
|
|
nameGroups.append((char)count, errorCode);
|
|
|
|
// There is at least a short name (sometimes empty) and a long name. (count>=2)
|
|
|
|
// Note: Sometimes the short and long names are the same.
|
|
|
|
// In such a case, we could set a flag and omit the duplicate,
|
|
|
|
// but that would save only about 1.35% of total data size (Unicode 6.0/ICU 4.6)
|
|
|
|
// which is not worth the trouble.
|
|
|
|
i=alias.nameGroupIndex;
|
|
|
|
int32_t n;
|
|
|
|
do {
|
|
|
|
nameIndex=n=NAME_GROUP[i++];
|
|
|
|
if(nameIndex<0) {
|
|
|
|
nameIndex=-nameIndex;
|
|
|
|
}
|
|
|
|
const char *s=STRING_TABLE[nameIndex].str;
|
|
|
|
int32_t sLength=uprv_strlen(s)+1;
|
|
|
|
if(sLength>maxNameLength) {
|
|
|
|
maxNameLength=sLength;
|
|
|
|
}
|
|
|
|
nameGroups.append(s, sLength, errorCode); // including NUL
|
|
|
|
} while(n>=0);
|
|
|
|
return nameOffset;
|
|
|
|
}
|
|
|
|
|
|
|
|
void buildValueMap(const Alias aliases[], int32_t length, UErrorCode &errorCode) {
|
|
|
|
UVector32 sortedValues(errorCode);
|
|
|
|
UVector32 nameOffsets(errorCode); // Parallel to aliases[].
|
|
|
|
int32_t i;
|
|
|
|
for(i=0; i<length; ++i) {
|
|
|
|
sortedValues.sortedInsert(aliases[i].enumValue, errorCode);
|
|
|
|
nameOffsets.addElement(writeNameGroup(aliases[i], errorCode), errorCode);
|
2002-10-30 17:58:20 +00:00
|
|
|
}
|
2010-12-31 18:06:45 +00:00
|
|
|
int32_t ranges[10][2];
|
|
|
|
int32_t numRanges=uprv_makeDenseRanges(sortedValues.getBuffer(), length, 0xe0,
|
|
|
|
ranges, LENGTHOF(ranges));
|
|
|
|
if(numRanges>0) {
|
|
|
|
valueMaps.addElement(numRanges, errorCode);
|
|
|
|
for(i=0; i<numRanges; ++i) {
|
|
|
|
valueMaps.addElement(ranges[i][0], errorCode);
|
|
|
|
valueMaps.addElement(ranges[i][1]+1, errorCode);
|
|
|
|
for(int32_t j=ranges[i][0]; j<=ranges[i][1]; ++j) {
|
|
|
|
// The range might not be completely dense, so j might not have an entry,
|
|
|
|
// in which case we write a nameOffset of 0.
|
|
|
|
// Real nameOffsets for property values are never 0.
|
|
|
|
// (The first name group is for the first property name.)
|
|
|
|
int32_t aliasIndex=aliasesIndexOf(aliases, length, j);
|
|
|
|
int32_t nameOffset= aliasIndex>=0 ? nameOffsets.elementAti(aliasIndex) : 0;
|
|
|
|
valueMaps.addElement(nameOffset, errorCode);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// No dense ranges.
|
|
|
|
valueMaps.addElement(0x10+length, errorCode);
|
|
|
|
for(i=0; i<length; ++i) {
|
|
|
|
valueMaps.addElement(sortedValues.elementAti(i), errorCode);
|
|
|
|
}
|
|
|
|
for(i=0; i<length; ++i) {
|
|
|
|
valueMaps.addElement(
|
|
|
|
nameOffsets.elementAti(
|
|
|
|
aliasesIndexOf(aliases, length,
|
|
|
|
sortedValues.elementAti(i))), errorCode);
|
|
|
|
}
|
2002-10-30 17:58:20 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-12-31 18:06:45 +00:00
|
|
|
static int32_t aliasesIndexOf(const Alias aliases[], int32_t length, int32_t value) {
|
|
|
|
for(int32_t i=0;; ++i) {
|
|
|
|
if(aliases[i].enumValue==value) {
|
|
|
|
return i;
|
|
|
|
}
|
2002-10-30 17:58:20 +00:00
|
|
|
}
|
2010-12-31 18:06:45 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
void setPropertyInt(int32_t prop, int32_t subIndex, int32_t value) {
|
|
|
|
// Assume that prop is in the valueMaps.elementAti(0) ranges.
|
|
|
|
int32_t index=1;
|
|
|
|
for(;;) {
|
|
|
|
int32_t rangeStart=valueMaps.elementAti(index);
|
|
|
|
int32_t rangeLimit=valueMaps.elementAti(index+1);
|
|
|
|
index+=2;
|
|
|
|
if(rangeStart<=prop && prop<rangeLimit) {
|
|
|
|
valueMaps.setElementAt(value, index+2*(prop-rangeStart)+subIndex);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
index+=2*(rangeLimit-rangeStart);
|
2002-10-30 17:58:20 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-01-05 21:06:55 +00:00
|
|
|
void addAliasToBytesTrie(const Alias &alias, UErrorCode &errorCode) {
|
2010-12-31 18:06:45 +00:00
|
|
|
int32_t names[MAX_NAMES_PER_GROUP];
|
|
|
|
int32_t numNames=alias.getUniqueNames(names);
|
|
|
|
for(int32_t i=0; i<numNames; ++i) {
|
|
|
|
// printf("* adding %s: 0x%lx\n", STRING_TABLE[names[i]].normalized, (long)alias.enumValue);
|
|
|
|
btb.add(STRING_TABLE[names[i]].normalized, alias.enumValue, errorCode);
|
2002-10-30 17:58:20 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-01-05 21:06:55 +00:00
|
|
|
int32_t buildAliasesBytesTrie(const Alias aliases[], int32_t length, UErrorCode &errorCode) {
|
2010-12-31 18:06:45 +00:00
|
|
|
btb.clear();
|
|
|
|
for(int32_t i=0; i<length; ++i) {
|
2011-01-05 21:06:55 +00:00
|
|
|
addAliasToBytesTrie(aliases[i], errorCode);
|
2002-10-30 17:58:20 +00:00
|
|
|
}
|
2011-01-05 21:06:55 +00:00
|
|
|
int32_t bytesTrieOffset=bytesTries.length();
|
2011-01-27 21:42:08 +00:00
|
|
|
bytesTries.append(btb.buildStringPiece(USTRINGTRIE_BUILD_SMALL, errorCode), errorCode);
|
2011-01-05 21:06:55 +00:00
|
|
|
return bytesTrieOffset;
|
2010-12-31 18:06:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Overload for Property. Property is-an Alias, but when we iterate through
|
|
|
|
// the array we need to increment by the right object size.
|
2011-01-05 21:06:55 +00:00
|
|
|
int32_t buildAliasesBytesTrie(const Property aliases[], int32_t length,
|
|
|
|
UErrorCode &errorCode) {
|
2010-12-31 18:06:45 +00:00
|
|
|
btb.clear();
|
|
|
|
for(int32_t i=0; i<length; ++i) {
|
2011-01-05 21:06:55 +00:00
|
|
|
addAliasToBytesTrie(aliases[i], errorCode);
|
2002-10-30 17:58:20 +00:00
|
|
|
}
|
2011-01-05 21:06:55 +00:00
|
|
|
int32_t bytesTrieOffset=bytesTries.length();
|
2011-01-27 21:42:08 +00:00
|
|
|
bytesTries.append(btb.buildStringPiece(USTRINGTRIE_BUILD_SMALL, errorCode), errorCode);
|
2011-01-05 21:06:55 +00:00
|
|
|
return bytesTrieOffset;
|
2002-10-30 17:58:20 +00:00
|
|
|
}
|
|
|
|
|
2010-12-31 18:06:45 +00:00
|
|
|
int32_t indexes[PropNameData::IX_COUNT];
|
|
|
|
UVector32 valueMaps;
|
2011-01-05 21:06:55 +00:00
|
|
|
BytesTrieBuilder btb;
|
|
|
|
CharString bytesTries;
|
2010-12-31 18:06:45 +00:00
|
|
|
CharString nameGroups;
|
|
|
|
int32_t maxNameLength;
|
|
|
|
};
|
2002-10-30 17:58:20 +00:00
|
|
|
|
|
|
|
/* UDataInfo cf. udata.h */
|
2010-12-31 18:06:45 +00:00
|
|
|
static const UDataInfo dataInfo = {
|
2002-10-30 17:58:20 +00:00
|
|
|
sizeof(UDataInfo),
|
|
|
|
0,
|
|
|
|
|
|
|
|
U_IS_BIG_ENDIAN,
|
|
|
|
U_CHARSET_FAMILY,
|
|
|
|
sizeof(UChar),
|
|
|
|
0,
|
|
|
|
|
2010-12-31 18:06:45 +00:00
|
|
|
{ PNAME_SIG_0, PNAME_SIG_1, PNAME_SIG_2, PNAME_SIG_3 },
|
|
|
|
{ 2, 0, 0, 0 }, /* formatVersion */
|
|
|
|
{ VERSION_0, VERSION_1, VERSION_2, VERSION_3 } /* Unicode version */
|
2002-10-30 17:58:20 +00:00
|
|
|
};
|
|
|
|
|
2010-12-31 18:06:45 +00:00
|
|
|
static void writeDataFile(const char *destdir, const Builder& builder, UBool useCopyright) {
|
|
|
|
IcuToolErrorCode errorCode("genpname writeDataFile()");
|
|
|
|
UNewDataMemory *pdata=udata_create(destdir, PNAME_DATA_TYPE, PNAME_DATA_NAME, &dataInfo,
|
|
|
|
useCopyright ? U_COPYRIGHT_STRING : 0, errorCode);
|
|
|
|
errorCode.assertSuccess();
|
2002-10-30 17:58:20 +00:00
|
|
|
|
2010-12-31 18:06:45 +00:00
|
|
|
udata_writeBlock(pdata, builder.indexes, PropNameData::IX_COUNT*4);
|
|
|
|
udata_writeBlock(pdata, builder.valueMaps.getBuffer(), builder.valueMaps.size()*4);
|
2011-01-05 21:06:55 +00:00
|
|
|
udata_writeBlock(pdata, builder.bytesTries.data(), builder.bytesTries.length());
|
2010-12-31 18:06:45 +00:00
|
|
|
udata_writeBlock(pdata, builder.nameGroups.data(), builder.nameGroups.length());
|
2002-10-30 17:58:20 +00:00
|
|
|
|
2010-12-31 18:06:45 +00:00
|
|
|
int32_t dataLength=(int32_t)udata_finish(pdata, errorCode);
|
|
|
|
if(dataLength!=builder.indexes[PropNameData::IX_TOTAL_SIZE]) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"udata_finish(pnames.icu) reports %ld bytes written but should be %ld\n",
|
|
|
|
(long)dataLength, (long)builder.indexes[PropNameData::IX_TOTAL_SIZE]);
|
|
|
|
exit(U_INTERNAL_PROGRAM_ERROR);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void writeCSourceFile(const char *destdir, const Builder& builder) {
|
|
|
|
FILE *f=usrc_create(destdir, "propname_data.h");
|
|
|
|
if(f==NULL) {
|
|
|
|
return; // usrc_create() reported an error.
|
|
|
|
}
|
|
|
|
|
|
|
|
fputs("#ifndef INCLUDED_FROM_PROPNAME_CPP\n"
|
|
|
|
"# error This file must be #included from propname.cpp only.\n"
|
|
|
|
"#endif\n\n", f);
|
|
|
|
|
2010-12-31 23:53:55 +00:00
|
|
|
fputs("U_NAMESPACE_BEGIN\n\n", f);
|
|
|
|
|
2010-12-31 18:06:45 +00:00
|
|
|
usrc_writeArray(f, "const int32_t PropNameData::indexes[%ld]={",
|
|
|
|
builder.indexes, 32, PropNameData::IX_COUNT,
|
|
|
|
"};\n\n");
|
|
|
|
usrc_writeArray(f, "const int32_t PropNameData::valueMaps[%ld]={\n",
|
|
|
|
builder.valueMaps.getBuffer(), 32, builder.valueMaps.size(),
|
|
|
|
"\n};\n\n");
|
2011-01-05 21:06:55 +00:00
|
|
|
usrc_writeArray(f, "const uint8_t PropNameData::bytesTries[%ld]={\n",
|
|
|
|
builder.bytesTries.data(), 8, builder.bytesTries.length(),
|
2010-12-31 18:06:45 +00:00
|
|
|
"\n};\n\n");
|
|
|
|
usrc_writeArrayOfMostlyInvChars(
|
|
|
|
f, "const char PropNameData::nameGroups[%ld]={\n",
|
|
|
|
builder.nameGroups.data(), builder.nameGroups.length(),
|
2010-12-31 23:53:55 +00:00
|
|
|
"\n};\n\n");
|
|
|
|
|
|
|
|
fputs("U_NAMESPACE_END\n", f);
|
2010-12-31 18:06:45 +00:00
|
|
|
|
|
|
|
fclose(f);
|
|
|
|
}
|
|
|
|
|
|
|
|
enum {
|
|
|
|
HELP_H,
|
|
|
|
HELP_QUESTION_MARK,
|
|
|
|
VERBOSE,
|
|
|
|
COPYRIGHT,
|
|
|
|
DESTDIR,
|
|
|
|
CSOURCE
|
2002-10-30 17:58:20 +00:00
|
|
|
};
|
|
|
|
|
2010-12-31 18:06:45 +00:00
|
|
|
/* Keep these values in sync with the above enums */
|
2002-10-30 17:58:20 +00:00
|
|
|
static UOption options[]={
|
|
|
|
UOPTION_HELP_H,
|
|
|
|
UOPTION_HELP_QUESTION_MARK,
|
2010-12-31 18:06:45 +00:00
|
|
|
UOPTION_VERBOSE,
|
2002-10-30 17:58:20 +00:00
|
|
|
UOPTION_COPYRIGHT,
|
|
|
|
UOPTION_DESTDIR,
|
2010-12-31 18:06:45 +00:00
|
|
|
UOPTION_DEF("csource", 'C', UOPT_NO_ARG)
|
2002-10-30 17:58:20 +00:00
|
|
|
};
|
|
|
|
|
2010-12-31 18:06:45 +00:00
|
|
|
extern int main(int argc, char *argv[]) {
|
|
|
|
U_MAIN_INIT_ARGS(argc, argv);
|
2003-08-14 21:34:54 +00:00
|
|
|
|
2002-10-30 17:58:20 +00:00
|
|
|
/* preset then read command line options */
|
2010-12-31 18:06:45 +00:00
|
|
|
options[DESTDIR].value=u_getDataDirectory();
|
|
|
|
argc=u_parseArgs(argc, argv, LENGTHOF(options), options);
|
2002-10-30 17:58:20 +00:00
|
|
|
|
|
|
|
/* error handling, printing usage message */
|
2010-12-31 18:06:45 +00:00
|
|
|
if(argc<0) {
|
|
|
|
fprintf(stderr, "error in command line argument \"%s\"\n", argv[-argc]);
|
2002-10-30 17:58:20 +00:00
|
|
|
}
|
2010-12-31 18:06:45 +00:00
|
|
|
if(argc!=1 || options[HELP_H].doesOccur || options[HELP_QUESTION_MARK].doesOccur) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"Usage: %s [-options]\n"
|
|
|
|
"\tCreates " PNAME_DATA_NAME "." PNAME_DATA_TYPE "\n"
|
|
|
|
"\n",
|
|
|
|
argv[0]);
|
2002-10-30 17:58:20 +00:00
|
|
|
fprintf(stderr,
|
2010-12-31 18:06:45 +00:00
|
|
|
"Options:\n"
|
2002-10-30 17:58:20 +00:00
|
|
|
"\t-h or -? or --help this usage text\n"
|
|
|
|
"\t-v or --verbose turn on verbose output\n"
|
|
|
|
"\t-c or --copyright include a copyright notice\n"
|
|
|
|
"\t-d or --destdir destination directory, followed by the path\n"
|
2010-12-31 18:06:45 +00:00
|
|
|
"\t-C or --csource generate a .h source file rather than the .icu binary\n");
|
|
|
|
return argc!=1 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
|
2002-10-30 17:58:20 +00:00
|
|
|
}
|
|
|
|
|
2010-12-31 18:06:45 +00:00
|
|
|
IcuToolErrorCode errorCode("genpname main() Builder()");
|
|
|
|
Builder builder(errorCode);
|
|
|
|
errorCode.assertSuccess();
|
|
|
|
builder.build();
|
|
|
|
if(options[VERBOSE].doesOccur) {
|
|
|
|
printf("length of all value maps: %6ld\n", (long)builder.valueMaps.size());
|
2011-01-05 21:06:55 +00:00
|
|
|
printf("length of all BytesTries: %6ld\n", (long)builder.bytesTries.length());
|
2010-12-31 18:06:45 +00:00
|
|
|
printf("length of all name groups: %6ld\n", (long)builder.nameGroups.length());
|
|
|
|
printf("length of pnames.icu data: %6ld\n", (long)builder.indexes[PropNameData::IX_TOTAL_SIZE]);
|
2002-10-30 17:58:20 +00:00
|
|
|
}
|
|
|
|
|
2010-12-31 18:06:45 +00:00
|
|
|
if(options[CSOURCE].doesOccur) {
|
|
|
|
writeCSourceFile(options[DESTDIR].value, builder);
|
|
|
|
} else {
|
|
|
|
writeDataFile(options[DESTDIR].value, builder, options[COPYRIGHT].doesOccur);
|
2002-10-30 17:58:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0; // success
|
|
|
|
}
|