/* ******************************************************************************* * * Copyright (C) 1999, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* * file name: genprops.c * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 * * created on: 1999dec08 * created by: Markus W. Scherer * * This program reads the Unicode character database text file, * parses it, and extracts most of the properties for each character. * It then writes a binary file containing the properties * that is designed to be used directly for random-access to * the properties of each Unicode character. */ #include #include #include "unicode/utypes.h" #include "unicode/uchar.h" #include "cmemory.h" #include "cstring.h" #include "filestrm.h" #include "unicode/udata.h" #include "unewdata.h" #include "genprops.h" extern bool_t beVerbose=FALSE, haveCopyright=TRUE; /* general categories */ extern const char *const genCategoryNames[U_CHAR_CATEGORY_COUNT]={ NULL, "Lu", "Ll", "Lt", "Lm", "Lo", "Mn", "Me", "Mc", "Nd", "Nl", "No", "Zs", "Zl", "Zp", "Cc", "Cf", "Co", "Cs", "Pd", "Ps", "Pe", "Pc", "Po", "Sm", "Sc", "Sk", "So", "Pi", "Pf", "Cn" }; extern const char *const bidiNames[U_CHAR_DIRECTION_COUNT]={ "L", "R", "EN", "ES", "ET", "AN", "CS", "B", "S", "WS", "ON", "LRE", "LRO", "AL", "RLE", "RLO", "PDF", "NSM", "BN" }; /* prototypes --------------------------------------------------------------- */ static void init(void); static void parseDB(FileStream *in); static int16_t getField(char *line, int16_t start, int16_t limit); static void checkLineIndex(uint32_t code, int16_t limit, int16_t length); /* -------------------------------------------------------------------------- */ extern int main(int argc, char *argv[]) { FileStream *in; char *arg, *filename=NULL; int i; if(argc<=1) { fprintf(stderr, "usage: %s [-1[+|-]] [-v[+|-]] [-c[+|-]] filename\n" "\tread the UnicodeData.txt file and \n" "\tcreate a binary file " DATA_NAME "." DATA_TYPE " with the character properties\n" "\toptions:\n" "\t\t-v[+|-] verbose output\n" "\t\t-c[+|-] do (not) include a copyright notice\n" "\t\tfilename absolute path/filename for the\n" "\t\t\tUnicode database text file (default: standard input)\n", argv[0]); } for(i=1; i0 && (line[length-1]=='\r' || line[length-1]=='\n')) { line[--length]=0; } /* reset the properties */ uprv_memset(&p, 0, sizeof(p)); hasNumericValue=FALSE; /* get the character code, field 0 */ p.code=uprv_strtoul(line, &end, 16); limit=end-line; if(limit<1 || *end!=';') { fprintf(stderr, "genprops: syntax error in field 0 at code 0x%lx\n", p.code); exit(U_PARSE_ERROR); } /* skip character name, field 1 */ checkLineIndex(p.code, ++limit, length); limit=getField(line, limit, length); /* get general category, field 2 */ start=limit+1; checkLineIndex(p.code, start, length); limit=getField(line, start, length); line[limit]=0; for(i=1;;) { if(uprv_strcmp(line+start, genCategoryNames[i])==0) { p.generalCategory=(uint8_t)i; break; } if(++i==U_CHAR_CATEGORY_COUNT) { fprintf(stderr, "genprops: unknown general category \"%s\" at code 0x%lx\n", line+start, p.code); exit(U_PARSE_ERROR); } } /* get canonical combining class, field 3 */ start=limit+1; checkLineIndex(p.code, start, length); p.canonicalCombining=(uint8_t)uprv_strtoul(line+start, &end, 10); limit=end-line; if(start>=limit || *end!=';') { fprintf(stderr, "genprops: syntax error in field 3 at code 0x%lx\n", p.code); exit(U_PARSE_ERROR); } /* get BiDi category, field 4 */ start=limit+1; checkLineIndex(p.code, start, length); limit=getField(line, start, length); line[limit]=0; for(i=0;;) { if(uprv_strcmp(line+start, bidiNames[i])==0) { p.bidi=(uint8_t)i; break; } if(++i==U_CHAR_DIRECTION_COUNT) { fprintf(stderr, "genprops: unknown BiDi category \"%s\" at code 0x%lx\n", line+start, p.code); exit(U_PARSE_ERROR); } } /* character decomposition mapping, field 5 */ /* ### skip for now */ checkLineIndex(p.code, ++limit, length); limit=getField(line, limit, length); /* decimal digit value, field 6 */ start=limit+1; checkLineIndex(p.code, start, length); value=uprv_strtoul(line+start, &end, 10); if(*end!=';') { fprintf(stderr, "genprops: syntax error in field 6 at code 0x%lx\n", p.code); exit(U_PARSE_ERROR); } limit=end-line; if(start0 && *end=='/') { p.denominator=uprv_strtoul(end+1, &end, 10); } if(*end!=';') { fprintf(stderr, "genprops: syntax error in field 8 at code 0x%lx\n", p.code); exit(U_PARSE_ERROR); } limit=end-line; if(start=length) { fprintf(stderr, "genprops: too few fields at code 0x%lx\n", code); exit(U_PARSE_ERROR); } }