scuffed-code/tools/unicode/c/genpname/genpname.cpp
2010-04-09 23:36:05 +00:00

1227 lines
37 KiB
C++

/*
**********************************************************************
* Copyright (C) 2002-2010, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
* 10/11/02 aliu Creation.
**********************************************************************
*/
#include "unicode/utypes.h"
#include "unicode/putil.h"
#include "unicode/uclean.h"
#include "cmemory.h"
#include "cstring.h"
#include "filestrm.h"
#include "uarrsort.h"
#include "unewdata.h"
#include "uoptions.h"
#include "uprops.h"
#include "propname.h"
#include "uassert.h"
#include <stdio.h>
U_NAMESPACE_USE
// TODO: Clean up and comment this code.
//----------------------------------------------------------------------
// BEGIN DATA
//
// This is the raw data to be output. We define the data structure,
// then include a machine-generated header that contains the actual
// data.
#include "unicode/uchar.h"
#include "unicode/uscript.h"
#include "unicode/unorm.h"
#include "unicode/unorm2.h"
class AliasName {
public:
const char* str;
int32_t index;
AliasName(const char* str, int32_t index);
int compare(const AliasName& other) const;
UBool operator==(const AliasName& other) const {
return compare(other) == 0;
}
UBool operator!=(const AliasName& other) const {
return compare(other) != 0;
}
};
AliasName::AliasName(const char* _str,
int32_t _index) :
str(_str),
index(_index)
{
}
int AliasName::compare(const AliasName& other) const {
return uprv_comparePropertyNames(str, other.str);
}
class Alias {
public:
int32_t enumValue;
int32_t nameGroupIndex;
Alias(int32_t enumValue,
int32_t nameGroupIndex);
int32_t getUniqueNames(int32_t* nameGroupIndices) const;
};
Alias::Alias(int32_t anEnumValue,
int32_t aNameGroupIndex) :
enumValue(anEnumValue),
nameGroupIndex(aNameGroupIndex)
{
}
class Property : public Alias {
public:
int32_t valueCount;
const Alias* valueList;
Property(int32_t enumValue,
int32_t nameGroupIndex,
int32_t valueCount,
const Alias* valueList);
};
Property::Property(int32_t _enumValue,
int32_t _nameGroupIndex,
int32_t _valueCount,
const Alias* _valueList) :
Alias(_enumValue, _nameGroupIndex),
valueCount(_valueCount),
valueList(_valueList)
{
}
// *** Include the data header ***
#include "data.h"
/* return a list of unique names, not including "", for this property
* @param stringIndices array of at least MAX_NAMES_PER_GROUP
* elements, will be filled with indices into STRING_TABLE
* @return number of indices, >= 1
*/
int32_t Alias::getUniqueNames(int32_t* stringIndices) const {
int32_t count = 0;
int32_t i = nameGroupIndex;
UBool done = FALSE;
while (!done) {
int32_t j = NAME_GROUP[i++];
if (j < 0) {
done = TRUE;
j = -j;
}
if (j == 0) continue; // omit "" entries
UBool dupe = FALSE;
for (int32_t k=0; k<count; ++k) {
if (stringIndices[k] == j) {
dupe = TRUE;
break;
}
// also do a string check for things like "age|Age"
if (STRING_TABLE[stringIndices[k]] == STRING_TABLE[j]) {
//printf("Found dupe %s|%s\n",
// STRING_TABLE[stringIndices[k]].str,
// STRING_TABLE[j].str);
dupe = TRUE;
break;
}
}
if (dupe) continue; // omit duplicates
stringIndices[count++] = j;
}
return count;
}
// END DATA
//----------------------------------------------------------------------
#define MALLOC(type, count) \
(type*) uprv_malloc(sizeof(type) * count)
void die(const char* msg) {
fprintf(stderr, "Error: %s\n", msg);
exit(1);
}
//----------------------------------------------------------------------
/**
* A list of Alias objects.
*/
class AliasList {
public:
virtual ~AliasList();
virtual const Alias& operator[](int32_t i) const = 0;
virtual int32_t count() const = 0;
};
AliasList::~AliasList() {}
/**
* A single array.
*/
class AliasArrayList : public AliasList {
const Alias* a;
int32_t n;
public:
AliasArrayList(const Alias* _a, int32_t _n) {
a = _a;
n = _n;
}
virtual const Alias& operator[](int32_t i) const {
return a[i];
}
virtual int32_t count() const {
return n;
}
};
/**
* A single array.
*/
class PropertyArrayList : public AliasList {
const Property* a;
int32_t n;
public:
PropertyArrayList(const Property* _a, int32_t _n) {
a = _a;
n = _n;
}
virtual const Alias& operator[](int32_t i) const {
return a[i];
}
virtual int32_t count() const {
return n;
}
};
//----------------------------------------------------------------------
/**
* An element in a name index. It maps a name (given by index) into
* an enum value.
*/
class NameToEnumEntry {
public:
int32_t nameIndex;
int32_t enumValue;
NameToEnumEntry(int32_t a, int32_t b) { nameIndex=a; enumValue=b; }
};
// Sort function for NameToEnumEntry (sort by name)
U_CFUNC int32_t
compareNameToEnumEntry(const void * /*context*/, const void* e1, const void* e2) {
return
STRING_TABLE[((NameToEnumEntry*)e1)->nameIndex].
compare(STRING_TABLE[((NameToEnumEntry*)e2)->nameIndex]);
}
//----------------------------------------------------------------------
/**
* An element in an enum index. It maps an enum into a name group entry
* (given by index).
*/
class EnumToNameGroupEntry {
public:
int32_t enumValue;
int32_t nameGroupIndex;
EnumToNameGroupEntry(int32_t a, int32_t b) { enumValue=a; nameGroupIndex=b; }
// are enumValues contiguous for count entries starting with this one?
// ***!!!*** we assume we are in an array and look at neighbors ***!!!***
UBool isContiguous(int32_t count) const {
const EnumToNameGroupEntry* p = this;
for (int32_t i=1; i<count; ++i) {
if (p[i].enumValue != (this->enumValue + i)) {
return FALSE;
}
}
return TRUE;
}
};
// Sort function for EnumToNameGroupEntry (sort by name index)
U_CFUNC int32_t
compareEnumToNameGroupEntry(const void * /*context*/, const void* e1, const void* e2) {
return ((EnumToNameGroupEntry*)e1)->enumValue - ((EnumToNameGroupEntry*)e2)->enumValue;
}
//----------------------------------------------------------------------
/**
* An element in the map from enumerated property enums to value maps.
*/
class EnumToValueEntry {
public:
int32_t enumValue;
EnumToNameGroupEntry* enumToName;
int32_t enumToName_count;
NameToEnumEntry* nameToEnum;
int32_t nameToEnum_count;
// are enumValues contiguous for count entries starting with this one?
// ***!!!*** we assume we are in an array and look at neighbors ***!!!***
UBool isContiguous(int32_t count) const {
const EnumToValueEntry* p = this;
for (int32_t i=1; i<count; ++i) {
if (p[i].enumValue != (this->enumValue + i)) {
return FALSE;
}
}
return TRUE;
}
};
// Sort function for EnumToValueEntry (sort by enum)
U_CFUNC int32_t
compareEnumToValueEntry(const void * /*context*/, const void* e1, const void* e2) {
return ((EnumToValueEntry*)e1)->enumValue - ((EnumToValueEntry*)e2)->enumValue;
}
//----------------------------------------------------------------------
// BEGIN Builder
#define IS_VALID_OFFSET(x) (((x)>=0)&&((x)<=MAX_OFFSET))
class Builder {
// header:
PropertyAliases header;
// 0:
NonContiguousEnumToOffset* enumToName;
int32_t enumToName_size;
Offset enumToName_offset;
// 1: (deleted)
// 2:
NameToEnum* nameToEnum;
int32_t nameToEnum_size;
Offset nameToEnum_offset;
// 3:
NonContiguousEnumToOffset* enumToValue;
int32_t enumToValue_size;
Offset enumToValue_offset;
// 4:
ValueMap* valueMap;
int32_t valueMap_size;
int32_t valueMap_count;
Offset valueMap_offset;
// for any i, one of valueEnumToName[i] or valueNCEnumToName[i] is
// NULL and one is not. valueEnumToName_size[i] is the size of
// the non-NULL one. i=0..valueMapCount-1
// 5a:
EnumToOffset** valueEnumToName;
// 5b:
NonContiguousEnumToOffset** valueNCEnumToName;
int32_t* valueEnumToName_size;
Offset* valueEnumToName_offset;
// 6:
// arrays of valueMap_count pointers, sizes, & offsets
NameToEnum** valueNameToEnum;
int32_t* valueNameToEnum_size;
Offset* valueNameToEnum_offset;
// 98:
Offset* nameGroupPool;
int32_t nameGroupPool_count;
int32_t nameGroupPool_size;
Offset nameGroupPool_offset;
// 99:
char* stringPool;
int32_t stringPool_count;
int32_t stringPool_size;
Offset stringPool_offset;
Offset* stringPool_offsetArray; // relative to stringPool
int32_t total_size; // size of everything
int32_t debug;
public:
Builder(int32_t debugLevel);
~Builder();
void buildTopLevelProperties(const NameToEnumEntry* propName,
int32_t propNameCount,
const EnumToNameGroupEntry* propEnum,
int32_t propEnumCount);
void buildValues(const EnumToValueEntry* e2v,
int32_t count);
void buildStringPool(const AliasName* propertyNames,
int32_t propertyNameCount,
const int32_t* nameGroupIndices,
int32_t nameGroupIndicesCount);
void fixup();
int8_t* createData(int32_t& length) const;
private:
static EnumToOffset* buildEnumToOffset(const EnumToNameGroupEntry* e2ng,
int32_t count,
int32_t& size);
static NonContiguousEnumToOffset*
buildNCEnumToNameGroup(const EnumToNameGroupEntry* e2ng,
int32_t count,
int32_t& size);
static NonContiguousEnumToOffset*
buildNCEnumToValue(const EnumToValueEntry* e2v,
int32_t count,
int32_t& size);
static NameToEnum* buildNameToEnum(const NameToEnumEntry* nameToEnum,
int32_t count,
int32_t& size);
Offset stringIndexToOffset(int32_t index, UBool allowNeg=FALSE) const;
void fixupNameToEnum(NameToEnum* n);
void fixupEnumToNameGroup(EnumToOffset* e2ng);
void fixupNCEnumToNameGroup(NonContiguousEnumToOffset* e2ng);
void computeOffsets();
void fixupStringPoolOffsets();
void fixupNameGroupPoolOffsets();
void fixupMiscellaneousOffsets();
static int32_t align(int32_t a);
static void erase(void* p, int32_t size);
};
Builder::Builder(int32_t debugLevel) {
debug = debugLevel;
enumToName = 0;
nameToEnum = 0;
enumToValue = 0;
valueMap_count = 0;
valueMap = 0;
valueEnumToName = 0;
valueNCEnumToName = 0;
valueEnumToName_size = 0;
valueEnumToName_offset = 0;
valueNameToEnum = 0;
valueNameToEnum_size = 0;
valueNameToEnum_offset = 0;
nameGroupPool = 0;
stringPool = 0;
stringPool_offsetArray = 0;
}
Builder::~Builder() {
uprv_free(enumToName);
uprv_free(nameToEnum);
uprv_free(enumToValue);
uprv_free(valueMap);
for (int32_t i=0; i<valueMap_count; ++i) {
uprv_free(valueEnumToName[i]);
uprv_free(valueNCEnumToName[i]);
uprv_free(valueNameToEnum[i]);
}
uprv_free(valueEnumToName);
uprv_free(valueNCEnumToName);
uprv_free(valueEnumToName_size);
uprv_free(valueEnumToName_offset);
uprv_free(valueNameToEnum);
uprv_free(valueNameToEnum_size);
uprv_free(valueNameToEnum_offset);
uprv_free(nameGroupPool);
uprv_free(stringPool);
uprv_free(stringPool_offsetArray);
}
int32_t Builder::align(int32_t a) {
U_ASSERT(a >= 0);
int32_t k = a % sizeof(int32_t);
if (k == 0) {
return a;
}
a += sizeof(int32_t) - k;
return a;
}
void Builder::erase(void* p, int32_t size) {
U_ASSERT(size >= 0);
int8_t* q = (int8_t*) p;
while (size--) {
*q++ = 0;
}
}
EnumToOffset* Builder::buildEnumToOffset(const EnumToNameGroupEntry* e2ng,
int32_t count,
int32_t& size) {
U_ASSERT(e2ng->isContiguous(count));
size = align(EnumToOffset::getSize(count));
EnumToOffset* result = (EnumToOffset*) uprv_malloc(size);
erase(result, size);
result->enumStart = e2ng->enumValue;
result->enumLimit = e2ng->enumValue + count;
Offset* p = result->getOffsetArray();
for (int32_t i=0; i<count; ++i) {
// set these to NGI index values
// fix them up to NGI offset values
U_ASSERT(IS_VALID_OFFSET(e2ng[i].nameGroupIndex));
p[i] = (Offset) e2ng[i].nameGroupIndex; // FIXUP later
}
return result;
}
NonContiguousEnumToOffset*
Builder::buildNCEnumToNameGroup(const EnumToNameGroupEntry* e2ng,
int32_t count,
int32_t& size) {
U_ASSERT(!e2ng->isContiguous(count));
size = align(NonContiguousEnumToOffset::getSize(count));
NonContiguousEnumToOffset* nc = (NonContiguousEnumToOffset*) uprv_malloc(size);
erase(nc, size);
nc->count = count;
EnumValue* e = nc->getEnumArray();
Offset* p = nc->getOffsetArray();
for (int32_t i=0; i<count; ++i) {
// set these to NGI index values
// fix them up to NGI offset values
e[i] = e2ng[i].enumValue;
U_ASSERT(IS_VALID_OFFSET(e2ng[i].nameGroupIndex));
p[i] = (Offset) e2ng[i].nameGroupIndex; // FIXUP later
}
return nc;
}
NonContiguousEnumToOffset*
Builder::buildNCEnumToValue(const EnumToValueEntry* e2v,
int32_t count,
int32_t& size) {
U_ASSERT(!e2v->isContiguous(count));
size = align(NonContiguousEnumToOffset::getSize(count));
NonContiguousEnumToOffset* result = (NonContiguousEnumToOffset*) uprv_malloc(size);
erase(result, size);
result->count = count;
EnumValue* e = result->getEnumArray();
for (int32_t i=0; i<count; ++i) {
e[i] = e2v[i].enumValue;
// offset must be set later
}
return result;
}
/**
* Given an index into the string pool, return an offset. computeOffsets()
* must have been called already. If allowNegative is true, allow negatives
* and preserve their sign.
*/
Offset Builder::stringIndexToOffset(int32_t index, UBool allowNegative) const {
// Index 0 is ""; we turn this into an Offset of zero
if (index == 0) return 0;
if (index < 0) {
if (allowNegative) {
return -Builder::stringIndexToOffset(-index);
} else {
die("Negative string pool index");
}
} else {
if (index >= stringPool_count) {
die("String pool index too large");
}
Offset result = stringPool_offset + stringPool_offsetArray[index];
U_ASSERT(result >= 0 && result < total_size);
return result;
}
return 0; // never executed; make compiler happy
}
NameToEnum* Builder::buildNameToEnum(const NameToEnumEntry* nameToEnum,
int32_t count,
int32_t& size) {
size = align(NameToEnum::getSize(count));
NameToEnum* n2e = (NameToEnum*) uprv_malloc(size);
erase(n2e, size);
n2e->count = count;
Offset* p = n2e->getNameArray();
EnumValue* e = n2e->getEnumArray();
for (int32_t i=0; i<count; ++i) {
// set these to SP index values
// fix them up to SP offset values
U_ASSERT(IS_VALID_OFFSET(nameToEnum[i].nameIndex));
p[i] = (Offset) nameToEnum[i].nameIndex; // FIXUP later
e[i] = nameToEnum[i].enumValue;
}
return n2e;
}
void Builder::buildTopLevelProperties(const NameToEnumEntry* propName,
int32_t propNameCount,
const EnumToNameGroupEntry* propEnum,
int32_t propEnumCount) {
enumToName = buildNCEnumToNameGroup(propEnum,
propEnumCount,
enumToName_size);
nameToEnum = buildNameToEnum(propName,
propNameCount,
nameToEnum_size);
}
void Builder::buildValues(const EnumToValueEntry* e2v,
int32_t count) {
int32_t i;
U_ASSERT(!e2v->isContiguous(count));
valueMap_count = count;
enumToValue = buildNCEnumToValue(e2v, count,
enumToValue_size);
valueMap_size = align(count * sizeof(ValueMap));
valueMap = (ValueMap*) uprv_malloc(valueMap_size);
erase(valueMap, valueMap_size);
valueEnumToName = MALLOC(EnumToOffset*, count);
valueNCEnumToName = MALLOC(NonContiguousEnumToOffset*, count);
valueEnumToName_size = MALLOC(int32_t, count);
valueEnumToName_offset = MALLOC(Offset, count);
valueNameToEnum = MALLOC(NameToEnum*, count);
valueNameToEnum_size = MALLOC(int32_t, count);
valueNameToEnum_offset = MALLOC(Offset, count);
for (i=0; i<count; ++i) {
UBool isContiguous =
e2v[i].enumToName->isContiguous(e2v[i].enumToName_count);
valueEnumToName[i] = 0;
valueNCEnumToName[i] = 0;
if (isContiguous) {
valueEnumToName[i] = buildEnumToOffset(e2v[i].enumToName,
e2v[i].enumToName_count,
valueEnumToName_size[i]);
} else {
valueNCEnumToName[i] = buildNCEnumToNameGroup(e2v[i].enumToName,
e2v[i].enumToName_count,
valueEnumToName_size[i]);
}
valueNameToEnum[i] =
buildNameToEnum(e2v[i].nameToEnum,
e2v[i].nameToEnum_count,
valueNameToEnum_size[i]);
}
}
void Builder::buildStringPool(const AliasName* propertyNames,
int32_t propertyNameCount,
const int32_t* nameGroupIndices,
int32_t nameGroupIndicesCount) {
int32_t i;
nameGroupPool_count = nameGroupIndicesCount;
nameGroupPool_size = sizeof(Offset) * nameGroupPool_count;
nameGroupPool = MALLOC(Offset, nameGroupPool_count);
for (i=0; i<nameGroupPool_count; ++i) {
// Some indices are negative.
int32_t a = nameGroupIndices[i];
if (a < 0) a = -a;
U_ASSERT(IS_VALID_OFFSET(a));
nameGroupPool[i] = (Offset) nameGroupIndices[i];
}
stringPool_count = propertyNameCount;
stringPool_size = 0;
// first string must be "" -- we skip it
U_ASSERT(*propertyNames[0].str == 0);
for (i=1 /*sic*/; i<propertyNameCount; ++i) {
stringPool_size += (int32_t)(uprv_strlen(propertyNames[i].str) + 1);
}
stringPool = MALLOC(char, stringPool_size);
stringPool_offsetArray = MALLOC(Offset, stringPool_count);
Offset soFar = 0;
char* p = stringPool;
stringPool_offsetArray[0] = -1; // we don't use this entry
for (i=1 /*sic*/; i<propertyNameCount; ++i) {
const char* str = propertyNames[i].str;
int32_t len = (int32_t)uprv_strlen(str);
uprv_strcpy(p, str);
p += len;
*p++ = 0;
stringPool_offsetArray[i] = soFar;
soFar += (Offset)(len+1);
}
U_ASSERT(soFar == stringPool_size);
U_ASSERT(p == (stringPool + stringPool_size));
}
// Confirm that PropertyAliases is a POD (plain old data; see C++
// std). The following union will _fail to compile_ if
// PropertyAliases is _not_ a POD. (Note: We used to use the offsetof
// macro to check this, but that's not quite right, so that test is
// commented out -- see below.)
typedef union {
int32_t i;
PropertyAliases p;
} PropertyAliasesPODTest;
void Builder::computeOffsets() {
int32_t i;
Offset off = sizeof(header);
if (debug>0) {
printf("header \t offset=%4d size=%5d\n", 0, off);
}
// PropertyAliases must have no v-table and must be
// padded (if necessary) to the next 32-bit boundary.
//U_ASSERT(offsetof(PropertyAliases, enumToName_offset) == 0); // see above
U_ASSERT(sizeof(header) % sizeof(int32_t) == 0);
#define COMPUTE_OFFSET(foo) COMPUTE_OFFSET2(foo,int32_t)
#define COMPUTE_OFFSET2(foo,type) \
if (debug>0)\
printf(#foo "\t offset=%4d size=%5d\n", off, (int)foo##_size);\
foo##_offset = off;\
U_ASSERT(IS_VALID_OFFSET(off + foo##_size));\
U_ASSERT(foo##_offset % sizeof(type) == 0);\
off = (Offset) (off + foo##_size);
COMPUTE_OFFSET(enumToName); // 0:
COMPUTE_OFFSET(nameToEnum); // 2:
COMPUTE_OFFSET(enumToValue); // 3:
COMPUTE_OFFSET(valueMap); // 4:
for (i=0; i<valueMap_count; ++i) {
if (debug>0) {
printf(" enumToName[%d]\t offset=%4d size=%5d\n",
(int)i, off, (int)valueEnumToName_size[i]);
}
valueEnumToName_offset[i] = off; // 5:
U_ASSERT(IS_VALID_OFFSET(off + valueEnumToName_size[i]));
off = (Offset) (off + valueEnumToName_size[i]);
if (debug>0) {
printf(" nameToEnum[%d]\t offset=%4d size=%5d\n",
(int)i, off, (int)valueNameToEnum_size[i]);
}
valueNameToEnum_offset[i] = off; // 6:
U_ASSERT(IS_VALID_OFFSET(off + valueNameToEnum_size[i]));
off = (Offset) (off + valueNameToEnum_size[i]);
}
// These last two chunks have weaker alignment needs
COMPUTE_OFFSET2(nameGroupPool,Offset); // 98:
COMPUTE_OFFSET2(stringPool,char); // 99:
total_size = off;
if (debug>0) printf("total size=%5d\n\n", (int)total_size);
U_ASSERT(total_size <= (MAX_OFFSET+1));
}
void Builder::fixupNameToEnum(NameToEnum* n) {
// Fix the string pool offsets in n
Offset* p = n->getNameArray();
for (int32_t i=0; i<n->count; ++i) {
p[i] = stringIndexToOffset(p[i]);
}
}
void Builder::fixupStringPoolOffsets() {
int32_t i;
// 2:
fixupNameToEnum(nameToEnum);
// 6:
for (i=0; i<valueMap_count; ++i) {
fixupNameToEnum(valueNameToEnum[i]);
}
// 98:
for (i=0; i<nameGroupPool_count; ++i) {
nameGroupPool[i] = stringIndexToOffset(nameGroupPool[i], TRUE);
}
}
void Builder::fixupEnumToNameGroup(EnumToOffset* e2ng) {
EnumValue i;
int32_t j;
Offset* p = e2ng->getOffsetArray();
for (i=e2ng->enumStart, j=0; i<e2ng->enumLimit; ++i, ++j) {
p[j] = nameGroupPool_offset + sizeof(Offset) * p[j];
}
}
void Builder::fixupNCEnumToNameGroup(NonContiguousEnumToOffset* e2ng) {
int32_t i;
/*EnumValue* e = e2ng->getEnumArray();*/
Offset* p = e2ng->getOffsetArray();
for (i=0; i<e2ng->count; ++i) {
p[i] = nameGroupPool_offset + sizeof(Offset) * p[i];
}
}
void Builder::fixupNameGroupPoolOffsets() {
int32_t i;
// 0:
fixupNCEnumToNameGroup(enumToName);
// 1: (deleted)
// 5:
for (i=0; i<valueMap_count; ++i) {
// 5a:
if (valueEnumToName[i] != 0) {
fixupEnumToNameGroup(valueEnumToName[i]);
}
// 5b:
if (valueNCEnumToName[i] != 0) {
fixupNCEnumToNameGroup(valueNCEnumToName[i]);
}
}
}
void Builder::fixupMiscellaneousOffsets() {
int32_t i;
// header:
erase(&header, sizeof(header));
header.enumToName_offset = enumToName_offset;
header.nameToEnum_offset = nameToEnum_offset;
header.enumToValue_offset = enumToValue_offset;
// header meta-info used by Java:
U_ASSERT(total_size > 0 && total_size < 0x7FFF);
header.total_size = (int16_t) total_size;
header.valueMap_offset = valueMap_offset;
header.valueMap_count = (int16_t) valueMap_count;
header.nameGroupPool_offset = nameGroupPool_offset;
header.nameGroupPool_count = (int16_t) nameGroupPool_count;
header.stringPool_offset = stringPool_offset;
header.stringPool_count = (int16_t) stringPool_count - 1; // don't include "" entry
U_ASSERT(valueMap_count <= 0x7FFF);
U_ASSERT(nameGroupPool_count <= 0x7FFF);
U_ASSERT(stringPool_count <= 0x7FFF);
// 3:
Offset* p = enumToValue->getOffsetArray();
/*EnumValue* e = enumToValue->getEnumArray();*/
U_ASSERT(valueMap_count == enumToValue->count);
for (i=0; i<valueMap_count; ++i) {
p[i] = (Offset)(valueMap_offset + sizeof(ValueMap) * i);
}
// 4:
for (i=0; i<valueMap_count; ++i) {
ValueMap& v = valueMap[i];
v.enumToName_offset = v.ncEnumToName_offset = 0;
if (valueEnumToName[i] != 0) {
v.enumToName_offset = valueEnumToName_offset[i];
}
if (valueNCEnumToName[i] != 0) {
v.ncEnumToName_offset = valueEnumToName_offset[i];
}
v.nameToEnum_offset = valueNameToEnum_offset[i];
}
}
void Builder::fixup() {
computeOffsets();
fixupStringPoolOffsets();
fixupNameGroupPoolOffsets();
fixupMiscellaneousOffsets();
}
int8_t* Builder::createData(int32_t& length) const {
length = total_size;
int8_t* result = MALLOC(int8_t, length);
int8_t* p = result;
int8_t* limit = result + length;
#define APPEND2(x, size) \
U_ASSERT((p+size)<=limit); \
uprv_memcpy(p, x, size); \
p += size
#define APPEND(x) APPEND2(x, x##_size)
APPEND2(&header, sizeof(header));
APPEND(enumToName);
APPEND(nameToEnum);
APPEND(enumToValue);
APPEND(valueMap);
for (int32_t i=0; i<valueMap_count; ++i) {
U_ASSERT((valueEnumToName[i] != 0 && valueNCEnumToName[i] == 0) ||
(valueEnumToName[i] == 0 && valueNCEnumToName[i] != 0));
if (valueEnumToName[i] != 0) {
APPEND2(valueEnumToName[i], valueEnumToName_size[i]);
}
if (valueNCEnumToName[i] != 0) {
APPEND2(valueNCEnumToName[i], valueEnumToName_size[i]);
}
APPEND2(valueNameToEnum[i], valueNameToEnum_size[i]);
}
APPEND(nameGroupPool);
APPEND(stringPool);
if (p != limit) {
fprintf(stderr, "p != limit; p = %p, limit = %p", p, limit);
exit(1);
}
return result;
}
// END Builder
//----------------------------------------------------------------------
/* UDataInfo cf. udata.h */
static UDataInfo dataInfo = {
sizeof(UDataInfo),
0,
U_IS_BIG_ENDIAN,
U_CHARSET_FAMILY,
sizeof(UChar),
0,
{PNAME_SIG_0, PNAME_SIG_1, PNAME_SIG_2, PNAME_SIG_3},
{PNAME_FORMAT_VERSION, 0, 0, 0}, /* formatVersion */
{VERSION_0, VERSION_1, VERSION_2, VERSION_3} /* Unicode version */
};
class genpname {
// command-line options
UBool useCopyright;
UBool verbose;
int32_t debug;
public:
int MMain(int argc, char *argv[]);
private:
NameToEnumEntry* createNameIndex(const AliasList& list,
int32_t& nameIndexCount);
EnumToNameGroupEntry* createEnumIndex(const AliasList& list);
int32_t writeDataFile(const char *destdir, const Builder&);
};
int main(int argc, char *argv[]) {
UErrorCode status = U_ZERO_ERROR;
u_init(&status);
if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) {
// Note: u_init() will try to open ICU property data.
// failures here are expected when building ICU from scratch.
// ignore them.
fprintf(stderr, "genpname: can not initialize ICU. Status = %s\n",
u_errorName(status));
exit(1);
}
genpname app;
U_MAIN_INIT_ARGS(argc, argv);
int retVal = app.MMain(argc, argv);
u_cleanup();
return retVal;
}
static UOption options[]={
UOPTION_HELP_H,
UOPTION_HELP_QUESTION_MARK,
UOPTION_COPYRIGHT,
UOPTION_DESTDIR,
UOPTION_VERBOSE,
UOPTION_DEF("debug", 'D', UOPT_REQUIRES_ARG),
};
NameToEnumEntry* genpname::createNameIndex(const AliasList& list,
int32_t& nameIndexCount) {
// Build name => enum map
// This is an n->1 map. There are typically multiple names
// mapping to one enum. The name index is sorted in order of the name,
// as defined by the uprv_compareAliasNames() function.
int32_t i, j;
int32_t count = list.count();
// compute upper limit on number of names in the index
int32_t nameIndexCapacity = count * MAX_NAMES_PER_GROUP;
NameToEnumEntry* nameIndex = MALLOC(NameToEnumEntry, nameIndexCapacity);
nameIndexCount = 0;
int32_t names[MAX_NAMES_PER_GROUP];
for (i=0; i<count; ++i) {
const Alias& p = list[i];
int32_t n = p.getUniqueNames(names);
for (j=0; j<n; ++j) {
U_ASSERT(nameIndexCount < nameIndexCapacity);
nameIndex[nameIndexCount++] =
NameToEnumEntry(names[j], p.enumValue);
}
}
/*
* use a stable sort to ensure consistent results between
* genpname.cpp and the propname.cpp swapping code
*/
UErrorCode errorCode = U_ZERO_ERROR;
uprv_sortArray(nameIndex, nameIndexCount, sizeof(nameIndex[0]),
compareNameToEnumEntry, NULL, TRUE, &errorCode);
if (debug>1) {
printf("Alias names: %d\n", (int)nameIndexCount);
for (i=0; i<nameIndexCount; ++i) {
printf("%s => %d\n",
STRING_TABLE[nameIndex[i].nameIndex].str,
(int)nameIndex[i].enumValue);
}
printf("\n");
}
// make sure there are no duplicates. for a sorted list we need
// only compare adjacent items. Alias.getUniqueNames() has
// already eliminated duplicate names for a single property, which
// does occur, so we're checking for duplicate names between two
// properties, which should never occur.
UBool ok = TRUE;
for (i=1; i<nameIndexCount; ++i) {
if (STRING_TABLE[nameIndex[i-1].nameIndex] ==
STRING_TABLE[nameIndex[i].nameIndex]) {
printf("Error: Duplicate names in property list: \"%s\", \"%s\"\n",
STRING_TABLE[nameIndex[i-1].nameIndex].str,
STRING_TABLE[nameIndex[i].nameIndex].str);
ok = FALSE;
}
}
if (!ok) {
die("Two or more duplicate names in property list");
}
return nameIndex;
}
EnumToNameGroupEntry* genpname::createEnumIndex(const AliasList& list) {
// Build the enum => name map
// This is a 1->n map. Each enum maps to 1 or more names. To
// accomplish this the index entry points to an element of the
// NAME_GROUP array. This is the short name (which may be empty).
// From there, subsequent elements of NAME_GROUP are alternate
// names for this enum, up to and including the first one that is
// negative (negate for actual index).
int32_t i, j, k;
int32_t count = list.count();
EnumToNameGroupEntry* enumIndex = MALLOC(EnumToNameGroupEntry, count);
for (i=0; i<count; ++i) {
const Alias& p = list[i];
enumIndex[i] = EnumToNameGroupEntry(p.enumValue, p.nameGroupIndex);
}
UErrorCode errorCode = U_ZERO_ERROR;
uprv_sortArray(enumIndex, count, sizeof(enumIndex[0]),
compareEnumToNameGroupEntry, NULL, FALSE, &errorCode);
if (debug>1) {
printf("Property enums: %d\n", (int)count);
for (i=0; i<count; ++i) {
printf("%d => %d: ",
(int)enumIndex[i].enumValue,
(int)enumIndex[i].nameGroupIndex);
UBool done = FALSE;
for (j=enumIndex[i].nameGroupIndex; !done; ++j) {
k = NAME_GROUP[j];
if (k < 0) {
k = -k;
done = TRUE;
}
printf("\"%s\"", STRING_TABLE[k].str);
if (!done) printf(", ");
}
printf("\n");
}
printf("\n");
}
return enumIndex;
}
int genpname::MMain(int argc, char* argv[])
{
int32_t i, j;
UErrorCode status = U_ZERO_ERROR;
u_init(&status);
if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) {
fprintf(stderr, "Error: u_init returned %s\n", u_errorName(status));
status = U_ZERO_ERROR;
}
/* preset then read command line options */
options[3].value=u_getDataDirectory();
argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
/* error handling, printing usage message */
if (argc<0) {
fprintf(stderr,
"error in command line argument \"%s\"\n",
argv[-argc]);
}
debug = options[5].doesOccur ? (*options[5].value - '0') : 0;
if (argc!=1 || options[0].doesOccur || options[1].doesOccur ||
debug < 0 || debug > 9) {
fprintf(stderr,
"usage: %s [-options]\n"
"\tcreate " PNAME_DATA_NAME "." PNAME_DATA_TYPE "\n"
"options:\n"
"\t-h or -? or --help this usage text\n"
"\t-v or --verbose turn on verbose output\n"
"\t-c or --copyright include a copyright notice\n"
"\t-d or --destdir destination directory, followed by the path\n"
"\t-D or --debug 0..9 emit debugging messages (if > 0)\n",
argv[0]);
return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
}
/* get the options values */
useCopyright=options[2].doesOccur;
verbose = options[4].doesOccur;
// ------------------------------------------------------------
// Do not sort the string table, instead keep it in data.h order.
// This simplifies data swapping and testing thereof because the string
// table itself need not be sorted during swapping.
// The NameToEnum sorter sorts each such map's string offsets instead.
if (debug>1) {
printf("String pool: %d\n", (int)STRING_COUNT);
for (i=0; i<STRING_COUNT; ++i) {
if (i != 0) {
printf(", ");
}
printf("%s (%d)", STRING_TABLE[i].str, (int)STRING_TABLE[i].index);
}
printf("\n\n");
}
// ------------------------------------------------------------
// Create top-level property indices
PropertyArrayList props(PROPERTY, PROPERTY_COUNT);
int32_t propNameCount;
NameToEnumEntry* propName = createNameIndex(props, propNameCount);
EnumToNameGroupEntry* propEnum = createEnumIndex(props);
// ------------------------------------------------------------
// Create indices for the value list for each enumerated property
// This will have more entries than we need...
EnumToValueEntry* enumToValue = MALLOC(EnumToValueEntry, PROPERTY_COUNT);
int32_t enumToValue_count = 0;
for (i=0, j=0; i<PROPERTY_COUNT; ++i) {
if (PROPERTY[i].valueCount == 0) continue;
AliasArrayList values(PROPERTY[i].valueList,
PROPERTY[i].valueCount);
enumToValue[j].enumValue = PROPERTY[i].enumValue;
enumToValue[j].enumToName = createEnumIndex(values);
enumToValue[j].enumToName_count = PROPERTY[i].valueCount;
enumToValue[j].nameToEnum = createNameIndex(values,
enumToValue[j].nameToEnum_count);
++j;
}
enumToValue_count = j;
uprv_sortArray(enumToValue, enumToValue_count, sizeof(enumToValue[0]),
compareEnumToValueEntry, NULL, FALSE, &status);
// ------------------------------------------------------------
// Build PropertyAliases layout in memory
Builder builder(debug);
builder.buildTopLevelProperties(propName,
propNameCount,
propEnum,
PROPERTY_COUNT);
builder.buildValues(enumToValue,
enumToValue_count);
builder.buildStringPool(STRING_TABLE,
STRING_COUNT,
NAME_GROUP,
NAME_GROUP_COUNT);
builder.fixup();
////////////////////////////////////////////////////////////
// Write the output file
////////////////////////////////////////////////////////////
int32_t wlen = writeDataFile(options[3].value, builder);
if (verbose) {
fprintf(stdout, "Output file: %s.%s, %ld bytes\n",
U_ICUDATA_NAME "_" PNAME_DATA_NAME, PNAME_DATA_TYPE, (long)wlen);
}
return 0; // success
}
int32_t genpname::writeDataFile(const char *destdir, const Builder& builder) {
int32_t length;
int8_t* data = builder.createData(length);
UNewDataMemory *pdata;
UErrorCode status = U_ZERO_ERROR;
pdata = udata_create(destdir, PNAME_DATA_TYPE, PNAME_DATA_NAME, &dataInfo,
useCopyright ? U_COPYRIGHT_STRING : 0, &status);
if (U_FAILURE(status)) {
die("Unable to create data memory");
}
udata_writeBlock(pdata, data, length);
int32_t dataLength = (int32_t) udata_finish(pdata, &status);
if (U_FAILURE(status)) {
die("Error writing output file");
}
if (dataLength != length) {
die("Written file doesn't match expected size");
}
return dataLength;
}
//eof