/* ******************************************************************************* * * Copyright (C) 2002, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* * file name: props2.c * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 * * created on: 2002feb24 * created by: Markus W. Scherer * * Parse more Unicode Character Database files and store * additional Unicode character properties in bit set vectors. */ #include #include "unicode/utypes.h" #include "unicode/uchar.h" #include "unicode/uscript.h" #include "cstring.h" #include "cmemory.h" #include "utrie.h" #include "uprops.h" #include "propsvec.h" #include "uparse.h" #include "genprops.h" #define FLAG(n) ((uint32_t)1<<(n)) /* data --------------------------------------------------------------------- */ static UNewTrie *trie; static uint32_t *pv; static int32_t pvCount; static uint32_t prevStart=0, prevLimit=0, prevValue=0; /* prototypes --------------------------------------------------------------- */ static void parseTwoFieldFile(char *filename, char *basename, const char *ucdFile, const char *suffix, UParseLineFn *lineFn, UErrorCode *pErrorCode); static void ageLineFn(void *context, char *fields[][2], int32_t fieldCount, UErrorCode *pErrorCode); static void scriptsLineFn(void *context, char *fields[][2], int32_t fieldCount, UErrorCode *pErrorCode); static void blocksLineFn(void *context, char *fields[][2], int32_t fieldCount, UErrorCode *pErrorCode); static void propListLineFn(void *context, char *fields[][2], int32_t fieldCount, UErrorCode *pErrorCode); static void derivedPropListLineFn(void *context, char *fields[][2], int32_t fieldCount, UErrorCode *pErrorCode); static void eaWidthLineFn(void *context, char *fields[][2], int32_t fieldCount, UErrorCode *pErrorCode); /* -------------------------------------------------------------------------- */ U_CFUNC void initAdditionalProperties() { pv=upvec_open(UPROPS_VECTOR_WORDS, 20000); } U_CFUNC void setMainProperties(uint32_t start, uint32_t limit, uint32_t value) { #if 0 /* ### TODO: remove this function */ UErrorCode errorCode=U_ZERO_ERROR; if(!upvec_setValue(pv, start, limit, 2, value, 0xffffffff, &errorCode)) { fprintf(stderr, "genprops: unable to set main properties: %s\n", u_errorName(errorCode)); exit(errorCode); } #endif } U_CFUNC void generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pErrorCode) { char *basename; UErrorCode errorCode; basename=filename+uprv_strlen(filename); /* process various UCD .txt files */ parseTwoFieldFile(filename, basename, "DerivedAge", suffix, ageLineFn, pErrorCode); /* * UTR 24 says: * Section 2: * "Common - For characters that may be used * within multiple scripts, * or any unassigned code points." * * Section 4: * "The value COMMON is the default value, * given to all code points that are not * explicitly mentioned in the data file." */ if(!upvec_setValue(pv, 0, 0x110000, 0, (uint32_t)USCRIPT_COMMON, UPROPS_SCRIPT_MASK, pErrorCode)) { fprintf(stderr, "genprops error: unable to set script code: %s\n", u_errorName(*pErrorCode)); exit(*pErrorCode); } parseTwoFieldFile(filename, basename, "Scripts", suffix, scriptsLineFn, pErrorCode); parseTwoFieldFile(filename, basename, "Blocks", suffix, blocksLineFn, pErrorCode); parseTwoFieldFile(filename, basename, "PropList", suffix, propListLineFn, pErrorCode); parseTwoFieldFile(filename, basename, "DerivedCoreProperties", suffix, derivedPropListLineFn, pErrorCode); /* * Preset East Asian Width defaults: * N for all * A for Private Use * W for plane 2 */ errorCode=U_ZERO_ERROR; if( !upvec_setValue(pv, 0, 0x110000, 0, (uint32_t)(U_EA_NEUTRAL<15 || (*end!='.' && *end!=' ' && *end!='\t' && *end!=0)) { fprintf(stderr, "genprops: syntax error in DerivedAge.txt field 1 at %s\n", fields[1][0]); *pErrorCode=U_PARSE_ERROR; exit(U_PARSE_ERROR); } version=value<<4; /* parse minor version number */ if(*end=='.') { s=(char *)u_skipWhitespace(end+1); value=(uint32_t)uprv_strtoul(s, &end, 10); if(s==end || value>15 || (*end!=' ' && *end!='\t' && *end!=0)) { fprintf(stderr, "genprops: syntax error in DerivedAge.txt field 1 at %s\n", fields[1][0]); *pErrorCode=U_PARSE_ERROR; exit(U_PARSE_ERROR); } version|=value; } if(!upvec_setValue(pv, start, limit, 0, version<=0) { uint32_t flag=FLAG(UPROPS_XID_START+i); if(!upvec_setValue(pv, start, limit, 1, flag, flag, pErrorCode)) { fprintf(stderr, "genprops error: unable to set derived binary property: %s\n", u_errorName(*pErrorCode)); exit(*pErrorCode); } } } /* East Asian Width --------------------------------------------------------- */ /* keep this list in sync with UEAWidthCode in uprops.h or uchar.h */ static const char *const eaNames[U_EA_TOP]={ "N", /* Non-East Asian Neutral, default for unassigned code points */ "A", /* Ambiguous, default for Private Use code points */ "H", /* Half-width */ "F", /* Full-width */ "Na", /* Narrow */ "W" /* Wide, default for plane 2 */ }; static void eaWidthLineFn(void *context, char *fields[][2], int32_t fieldCount, UErrorCode *pErrorCode) { uint32_t start, limit; int32_t i; u_parseCodePointRange(fields[0][0], &start, &limit, pErrorCode); if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "genprops: syntax error in EastAsianWidth.txt field 0 at %s\n", fields[0][0]); exit(*pErrorCode); } ++limit; /* parse binary property name */ i=getTokenIndex(eaNames, U_EA_TOP, fields[1][0]); if(i<0) { fprintf(stderr, "genprops error: unknown width name \"%s\" in EastAsianWidth.txt\n", fields[1][0]); *pErrorCode=U_PARSE_ERROR; exit(U_PARSE_ERROR); } /* collect maximum ranges */ if(prevLimit==start && (uint32_t)i==prevValue) { prevLimit=limit; } else { if(!upvec_setValue(pv, prevStart, prevLimit, 0, (uint32_t)(prevValue<