/* ******************************************************************************* * * Copyright (C) 2002, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* * file name: props2.c * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 * * created on: 2002feb24 * created by: Markus W. Scherer * * Parse more Unicode Character Database files and store * additional Unicode character properties in bit set vectors. */ #include #include "unicode/utypes.h" #include "unicode/uchar.h" #include "unicode/uscript.h" #include "cstring.h" #include "cmemory.h" #include "utrie.h" #include "uprops.h" #include "propsvec.h" #include "uparse.h" #include "genprops.h" /* data --------------------------------------------------------------------- */ static UNewTrie *trie; static uint32_t *pv; static int32_t pvCount; /* prototypes --------------------------------------------------------------- */ static void parseTwoFieldFile(char *filename, char *basename, const char *ucdFile, const char *suffix, UParseLineFn *lineFn, UErrorCode *pErrorCode); static void ageLineFn(void *context, char *fields[][2], int32_t fieldCount, UErrorCode *pErrorCode); static void parseScripts(const char *filename, UErrorCode *pErrorCode); static void blocksLineFn(void *context, char *fields[][2], int32_t fieldCount, UErrorCode *pErrorCode); static void propListLineFn(void *context, char *fields[][2], int32_t fieldCount, UErrorCode *pErrorCode); /* -------------------------------------------------------------------------- */ U_CFUNC void generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pErrorCode) { char *basename; basename=filename+uprv_strlen(filename); pv=upvec_open(UPROPS_VECTOR_WORDS, 20000); /* process various UCD .txt files */ parseTwoFieldFile(filename, basename, "DerivedAge", suffix, ageLineFn, pErrorCode); writeUCDFilename(basename, "Scripts", suffix); parseScripts(filename, pErrorCode); parseTwoFieldFile(filename, basename, "Blocks", suffix, blocksLineFn, pErrorCode); parseTwoFieldFile(filename, basename, "PropList", suffix, propListLineFn, pErrorCode); trie=utrie_open(NULL, NULL, 50000, 0, FALSE); if(trie==NULL) { *pErrorCode=U_MEMORY_ALLOCATION_ERROR; upvec_close(pv); return; } pvCount=upvec_toTrie(pv, trie, pErrorCode); if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "genprops error: unable to build trie for additional properties: %s\n", u_errorName(*pErrorCode)); exit(*pErrorCode); } } static void parseTwoFieldFile(char *filename, char *basename, const char *ucdFile, const char *suffix, UParseLineFn *lineFn, UErrorCode *pErrorCode) { char *fields[2][2]; writeUCDFilename(basename, ucdFile, suffix); if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return; } u_parseDelimitedFile(filename, ';', fields, 2, lineFn, NULL, pErrorCode); } /* DerivedAge.txt ----------------------------------------------------------- */ static void ageLineFn(void *context, char *fields[][2], int32_t fieldCount, UErrorCode *pErrorCode) { char *s, *end; uint32_t value, start, limit, version; u_parseCodePointRange(fields[0][0], &start, &limit, pErrorCode); if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "genprops: syntax error in DerivedAge.txt field 0 at %s\n", fields[0][0]); exit(*pErrorCode); } ++limit; /* parse version number */ s=(char *)u_skipWhitespace(fields[1][0]); value=(uint32_t)uprv_strtoul(s, &end, 10); if(s==end || value==0 || value>15 || (*end!='.' && *end!=' ' && *end!='\t' && *end!=0)) { fprintf(stderr, "genprops: syntax error in DerivedAge.txt field 1 at %s\n", fields[1][0]); *pErrorCode=U_PARSE_ERROR; exit(U_PARSE_ERROR); } version=value<<4; /* parse minor version number */ if(*end=='.') { s=(char *)u_skipWhitespace(end+1); value=(uint32_t)uprv_strtoul(s, &end, 10); if(s==end || value>15 || (*end!=' ' && *end!='\t' && *end!=0)) { fprintf(stderr, "genprops: syntax error in DerivedAge.txt field 1 at %s\n", fields[1][0]); *pErrorCode=U_PARSE_ERROR; exit(U_PARSE_ERROR); } version|=value; } if(!upvec_setValue(pv, start, limit, 0, version<