/* ********************************************************************** * Copyright (C) 2002-2011, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Date Name Description * 10/11/02 aliu Creation. * 2010nov19 Markus Scherer Rewrite for formatVersion 2. ********************************************************************** */ #include "unicode/utypes.h" #include "unicode/bytestriebuilder.h" #include "unicode/putil.h" #include "unicode/uclean.h" #include "cmemory.h" #include "charstr.h" #include "cstring.h" #include "denseranges.h" #include "unewdata.h" #include "uoptions.h" #include "propname.h" #include "toolutil.h" #include "uvectr32.h" #include "writesrc.h" #include // We test for ASCII delimiters and White_Space, and build ASCII string BytesTries. #if U_CHARSET_FAMILY!=U_ASCII_FAMILY # error This builder requires U_CHARSET_FAMILY==U_ASCII_FAMILY. #endif #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) U_NAMESPACE_USE //---------------------------------------------------------------------- // BEGIN DATA // // This is the raw data to be output. We define the data structure, // then include a machine-generated header that contains the actual // data. #include "unicode/uchar.h" #include "unicode/uscript.h" #include "unicode/unorm.h" #include "unicode/unorm2.h" class AliasName { public: const char* str; int32_t index; char normalized[64]; AliasName(const char* str, int32_t index); int compare(const AliasName& other) const { return uprv_strcmp(normalized, other.normalized); } UBool operator==(const AliasName& other) const { return compare(other) == 0; } UBool operator!=(const AliasName& other) const { return compare(other) != 0; } }; AliasName::AliasName(const char* _str, int32_t _index) : str(_str), index(_index) { // Build the normalized form of the alias. const char *s=str; char c; int32_t i=0; while((c=*s++)!=0) { // Ignore delimiters '-', '_', and ASCII White_Space. if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) { continue; } normalized[i++]=uprv_tolower(c); } normalized[i]=0; if(i>=LENGTHOF(normalized)) { fprintf(stderr, "Error: Property (value) alias '%s' results in " "too-long normalized string (length %d)\n", str, (int)i); exit(U_BUFFER_OVERFLOW_ERROR); } } class Alias { public: int32_t enumValue; int32_t nameGroupIndex; Alias(int32_t enumValue, int32_t nameGroupIndex); int32_t getUniqueNames(int32_t* nameGroupIndices) const; }; Alias::Alias(int32_t anEnumValue, int32_t aNameGroupIndex) : enumValue(anEnumValue), nameGroupIndex(aNameGroupIndex) { } class Property : public Alias { public: int32_t valueCount; const Alias* valueList; Property(int32_t enumValue, int32_t nameGroupIndex, int32_t valueCount, const Alias* valueList); }; Property::Property(int32_t _enumValue, int32_t _nameGroupIndex, int32_t _valueCount, const Alias* _valueList) : Alias(_enumValue, _nameGroupIndex), valueCount(_valueCount), valueList(_valueList) { } // *** Include the data header *** #include "data.h" /* return a list of unique names, not including "", for this property * @param stringIndices array of at least MAX_NAMES_PER_GROUP * elements, will be filled with indices into STRING_TABLE * @return number of indices, >= 1 */ int32_t Alias::getUniqueNames(int32_t* stringIndices) const { int32_t count = 0; int32_t i = nameGroupIndex; UBool done = FALSE; while (!done) { int32_t j = NAME_GROUP[i++]; if (j < 0) { done = TRUE; j = -j; } if (j == 0) continue; // omit "" entries UBool dupe = FALSE; for (int32_t k=0; k0) { // writeNameGroup(PROPERTY[0], ...) already done setPropertyInt(PROPERTY[propIndex].enumValue, 0, writeNameGroup(PROPERTY[propIndex], errorCode)); } int32_t valueCount=PROPERTY[propIndex].valueCount; if(valueCount>0) { int32_t valueMapOffset; const Alias *valueList=PROPERTY[propIndex].valueList; if(valueList==VALUES_binprop) { valueMapOffset=binPropsValueMapOffset; } else if(valueList==VALUES_ccc || valueList==VALUES_lccc || valueList==VALUES_tccc) { valueMapOffset=cccValueMapOffset; } else { valueMapOffset=valueMaps.size(); bytesTrieOffset=buildAliasesBytesTrie(valueList, valueCount, errorCode); valueMaps.addElement(bytesTrieOffset, errorCode); buildValueMap(valueList, valueCount, errorCode); } setPropertyInt(PROPERTY[propIndex].enumValue, 1, valueMapOffset); } } // Write the indexes. int32_t offset=(int32_t)sizeof(indexes); indexes[PropNameData::IX_VALUE_MAPS_OFFSET]=offset; offset+=valueMaps.size()*4; indexes[PropNameData::IX_BYTE_TRIES_OFFSET]=offset; offset+=bytesTries.length(); indexes[PropNameData::IX_NAME_GROUPS_OFFSET]=offset; offset+=nameGroups.length(); for(i=PropNameData::IX_RESERVED3_OFFSET; i<=PropNameData::IX_TOTAL_SIZE; ++i) { indexes[i]=offset; } indexes[PropNameData::IX_MAX_NAME_LENGTH]=maxNameLength; for(i=PropNameData::IX_RESERVED7; i=0); int32_t count=i-alias.nameGroupIndex; // The first byte tells us how many aliases there are. // We use only values 0..0x1f in the first byte because when we write // the name groups as an invariant-character string into a source file, // those values (C0 control codes) are written as numbers rather than as characters. if(count>=0x20) { fprintf(stderr, "Error: Too many aliases in the group with index %d\n", (int)alias.nameGroupIndex); exit(U_INDEX_OUTOFBOUNDS_ERROR); } nameGroups.append((char)count, errorCode); // There is at least a short name (sometimes empty) and a long name. (count>=2) // Note: Sometimes the short and long names are the same. // In such a case, we could set a flag and omit the duplicate, // but that would save only about 1.35% of total data size (Unicode 6.0/ICU 4.6) // which is not worth the trouble. i=alias.nameGroupIndex; int32_t n; do { nameIndex=n=NAME_GROUP[i++]; if(nameIndex<0) { nameIndex=-nameIndex; } const char *s=STRING_TABLE[nameIndex].str; int32_t sLength=uprv_strlen(s)+1; if(sLength>maxNameLength) { maxNameLength=sLength; } nameGroups.append(s, sLength, errorCode); // including NUL } while(n>=0); return nameOffset; } void buildValueMap(const Alias aliases[], int32_t length, UErrorCode &errorCode) { UVector32 sortedValues(errorCode); UVector32 nameOffsets(errorCode); // Parallel to aliases[]. int32_t i; for(i=0; i0) { valueMaps.addElement(numRanges, errorCode); for(i=0; i=0 ? nameOffsets.elementAti(aliasIndex) : 0; valueMaps.addElement(nameOffset, errorCode); } } } else { // No dense ranges. valueMaps.addElement(0x10+length, errorCode); for(i=0; i