From faa1bf56cbd9f7810c84d9e8953ad1592ff9d0a3 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Fri, 1 Mar 2002 01:58:49 +0000 Subject: [PATCH] ICU-1721 store numericType, parse Scripts.txt, some cleanup and more common code X-SVN-Rev: 7826 --- icu4c/source/tools/genprops/genprops.c | 78 ++++++++++++++-------- icu4c/source/tools/genprops/genprops.h | 7 +- icu4c/source/tools/genprops/props2.c | 89 +++++++++++++++++++++++++- icu4c/source/tools/genprops/store.c | 74 +++++++++++++-------- 4 files changed, 192 insertions(+), 56 deletions(-) diff --git a/icu4c/source/tools/genprops/genprops.c b/icu4c/source/tools/genprops/genprops.c index fde615c0a1..845ccda644 100644 --- a/icu4c/source/tools/genprops/genprops.c +++ b/icu4c/source/tools/genprops/genprops.c @@ -175,6 +175,34 @@ writeUCDFilename(char *basename, const char *filename, const char *suffix) { uprv_strcpy(basename+length, ".txt"); } +U_CFUNC int32_t +getTokenIndex(const char *const tokens[], int32_t countTokens, const char *s) { + const char *t, *z; + int32_t i, j; + + s=u_skipWhitespace(s); + for(i=0; i=0) { + p.generalCategory=(uint8_t)i; + } else { + fprintf(stderr, "genprops: unknown general category \"%s\" at code 0x%lx\n", + fields[2][0], (unsigned long)p.code); + *pErrorCode=U_PARSE_ERROR; + exit(U_PARSE_ERROR); } /* get canonical combining class, field 3 */ @@ -508,18 +532,14 @@ unicodeDataLineFn(void *context, } /* get BiDi category, field 4 */ - *fields[4][1]=0; - for(i=0;;) { - if(uprv_strcmp(fields[4][0], bidiNames[i])==0) { - p.bidi=(uint8_t)i; - break; - } - if(++i==U_CHAR_DIRECTION_COUNT) { - fprintf(stderr, "genprops: unknown BiDi category \"%s\" at code 0x%lx\n", - fields[4][0], (unsigned long)p.code); - *pErrorCode=U_PARSE_ERROR; - exit(U_PARSE_ERROR); - } + i=getTokenIndex(bidiNames, U_CHAR_DIRECTION_COUNT, fields[4][0]); + if(i>=0) { + p.bidi=(uint8_t)i; + } else { + fprintf(stderr, "genprops: unknown BiDi category \"%s\" at code 0x%lx\n", + fields[4][0], (unsigned long)p.code); + *pErrorCode=U_PARSE_ERROR; + exit(U_PARSE_ERROR); } /* decimal digit value, field 6 */ @@ -532,6 +552,7 @@ unicodeDataLineFn(void *context, exit(U_PARSE_ERROR); } p.decimalDigitValue=(int16_t)value; + p.numericType=1; } /* digit value, field 7 */ @@ -544,6 +565,9 @@ unicodeDataLineFn(void *context, exit(U_PARSE_ERROR); } p.digitValue=(int16_t)value; + if(p.numericType==0) { + p.numericType=2; + } } /* numeric value, field 8 */ @@ -582,7 +606,9 @@ unicodeDataLineFn(void *context, } else { p.numericValue=(int32_t)value; } - p.hasNumericValue=TRUE; + if(p.numericType==0) { + p.numericType=3; + } } /* get Mirrored flag, field 9 */ diff --git a/icu4c/source/tools/genprops/genprops.h b/icu4c/source/tools/genprops/genprops.h index 2cec055525..51b5381da5 100644 --- a/icu4c/source/tools/genprops/genprops.h +++ b/icu4c/source/tools/genprops/genprops.h @@ -42,9 +42,9 @@ typedef struct { typedef struct { uint32_t code, lowerCase, upperCase, titleCase, mirrorMapping; int16_t decimalDigitValue, digitValue; /* -1: no value */ - int32_t numericValue; /* see hasNumericValue */ + int32_t numericValue; /* see numericType */ uint32_t denominator; /* 0: no value */ - uint8_t generalCategory, canonicalCombining, bidi, isMirrored, hasNumericValue; + uint8_t generalCategory, canonicalCombining, bidi, isMirrored, numericType; SpecialCasing *specialCasing; CaseFolding *caseFolding; } Props; @@ -63,6 +63,9 @@ genCategoryNames[]; U_CFUNC void writeUCDFilename(char *basename, const char *filename, const char *suffix); +U_CFUNC int32_t +getTokenIndex(const char *const tokens[], int32_t countTokens, const char *s); + extern void setUnicodeVersion(const char *v); diff --git a/icu4c/source/tools/genprops/props2.c b/icu4c/source/tools/genprops/props2.c index dfe3ca1173..643b1e5e72 100644 --- a/icu4c/source/tools/genprops/props2.c +++ b/icu4c/source/tools/genprops/props2.c @@ -19,6 +19,7 @@ #include #include "unicode/utypes.h" +#include "unicode/uscript.h" #include "cstring.h" #include "cmemory.h" #include "utrie.h" @@ -38,6 +39,9 @@ static int32_t pvCount; static void parseAge(const char *filename, uint32_t *pv, UErrorCode *pErrorCode); +static void +parseScripts(const char *filename, uint32_t *pv, UErrorCode *pErrorCode); + /* -------------------------------------------------------------------------- */ U_CFUNC void @@ -48,10 +52,13 @@ generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pEr pv=upvec_open(UPROPS_VECTOR_WORDS, 20000); - /* process DerivedAge.txt */ + /* process various UCD .txt files */ writeUCDFilename(basename, "DerivedAge", suffix); parseAge(filename, pv, pErrorCode); + writeUCDFilename(basename, "Scripts", suffix); + parseScripts(filename, pv, pErrorCode); + trie=utrie_open(NULL, NULL, 50000, 0, FALSE); if(trie==NULL) { *pErrorCode=U_MEMORY_ALLOCATION_ERROR; @@ -66,6 +73,8 @@ generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pEr } } +/* DerivedAge.txt ----------------------------------------------------------- */ + static void ageLineFn(void *context, char *fields[][2], int32_t fieldCount, @@ -120,6 +129,80 @@ parseAge(const char *filename, uint32_t *pv, UErrorCode *pErrorCode) { u_parseDelimitedFile(filename, ';', fields, 2, ageLineFn, pv, pErrorCode); } +/* Scripts.txt -------------------------------------------------------------- */ + +static void +scriptsLineFn(void *context, + char *fields[][2], int32_t fieldCount, + UErrorCode *pErrorCode) { + uint32_t *pv=(uint32_t *)context; + char *s, *end; + uint32_t start, limit; + UScriptCode script; + + u_parseCodePointRange(fields[0][0], &start, &limit, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + fprintf(stderr, "genprops: syntax error in Scripts.txt field 0 at %s\n", fields[0][0]); + exit(*pErrorCode); + } + ++limit; + + /* parse script name */ + s=(char *)u_skipWhitespace(fields[1][0]); + + /* trim trailing whitespace */ + end=fields[1][1]; + while(sgeneralCategory==U_DECIMAL_DIGIT_NUMBER) { /* verify that all numeric fields contain the same value */ if(p->decimalDigitValue!=-1 && p->digitValue==p->decimalDigitValue && - p->hasNumericValue && p->numericValue==p->decimalDigitValue && + p->numericType==1 && p->numericValue==p->decimalDigitValue && p->denominator==0 ) { value=p->decimalDigitValue; @@ -422,15 +447,13 @@ makeProps(Props *p) { } ++count; } else if(p->generalCategory==U_LETTER_NUMBER || p->generalCategory==U_OTHER_NUMBER) { - /* verify that only the numeric value field itself contains a value */ - if(p->decimalDigitValue==-1 && p->digitValue==-1 && p->hasNumericValue) { + if(p->numericType==3) { value=p->numericValue; } else { x=EXCEPTION_BIT; } ++count; - } else if(p->decimalDigitValue!=-1 || p->digitValue!=-1 || p->hasNumericValue) { - /* verify that only numeric categories have numeric values */ + } else if(p->numericType!=0) { x=EXCEPTION_BIT; ++count; } @@ -506,7 +529,7 @@ makeProps(Props *p) { (uint32_t)p->decimalDigitValue<<16| (uint16_t)p->digitValue; } - if(p->hasNumericValue) { + if(p->numericType==3) { if(p->denominator==0) { first|=0x10; exceptions[value+length++]=(uint32_t)p->numericValue; @@ -593,6 +616,7 @@ makeProps(Props *p) { (uint32_t)p->generalCategory | (uint32_t)p->bidi<isMirrored<numericType<code<=0x9f) {