ICU-130 32-bit exception values, add mirror mappings, overhaul...
X-SVN-Rev: 1141
This commit is contained in:
parent
7af09219eb
commit
34e9e8fc9f
@ -24,18 +24,190 @@
|
||||
#include <stdlib.h>
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/putil.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "filestrm.h"
|
||||
#include "unicode/udata.h"
|
||||
#include "unewdata.h"
|
||||
#include "uoptions.h"
|
||||
#include "uparse.h"
|
||||
#include "genprops.h"
|
||||
#include "unicode/putil.h"
|
||||
|
||||
extern bool_t beVerbose=FALSE, haveCopyright=TRUE;
|
||||
|
||||
/* general categories */
|
||||
/* prototypes --------------------------------------------------------------- */
|
||||
|
||||
static void
|
||||
init(void);
|
||||
|
||||
static void
|
||||
parseMirror(const char *filename, UErrorCode *pErrorCode);
|
||||
|
||||
static void
|
||||
parseDB(const char *filename, UErrorCode *pErrorCode);
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
static UOption options[]={
|
||||
UOPTION_HELP_H,
|
||||
UOPTION_HELP_QUESTION_MARK,
|
||||
UOPTION_VERBOSE,
|
||||
UOPTION_COPYRIGHT,
|
||||
UOPTION_DESTDIR,
|
||||
UOPTION_SOURCEDIR,
|
||||
{ "unicode", NULL, NULL, NULL, 'u', UOPT_REQUIRES_ARG, 0 }
|
||||
};
|
||||
|
||||
extern int
|
||||
main(int argc, const char *argv[]) {
|
||||
char filename[300];
|
||||
const char *srcDir=NULL, *destDir=NULL, *suffix=NULL;
|
||||
char *basename=NULL;
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
|
||||
/* preset then read command line options */
|
||||
options[4].value=u_getDataDirectory();
|
||||
options[5].value="";
|
||||
options[6].value="3.0.0";
|
||||
argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
|
||||
|
||||
/* error handling, printing usage message */
|
||||
if(argc<0) {
|
||||
fprintf(stderr,
|
||||
"error in command line argument \"%s\"\n",
|
||||
argv[-argc]);
|
||||
}
|
||||
if(argc<0 || options[0].doesOccur || options[1].doesOccur) {
|
||||
fprintf(stderr,
|
||||
"usage: %s [-options] [suffix]\n"
|
||||
"\tread the UnicodeData.txt file and other Unicode properties files and\n"
|
||||
"\tcreate a binary file " DATA_NAME "." DATA_TYPE " with the character properties\n"
|
||||
"\toptions:\n"
|
||||
"\t\t-h or -? or --help this usage text\n"
|
||||
"\t\t-v or --verbose verbose output\n"
|
||||
"\t\t-c or --copyright include a copyright notice\n"
|
||||
"\t\t-d or --destdir destination directory, followed by the path\n"
|
||||
"\t\t-s or --sourcedir source directory, followed by the path\n"
|
||||
"\t\t-u or --unicode Unicode version, followed by the version like 3.0.0\n"
|
||||
"\t\tsuffix suffix that is to be appended with a '-'\n"
|
||||
"\t\t to the source file basenames before opening;\n"
|
||||
"\t\t 'genprops new' will read UnicodeData-new.txt etc.\n",
|
||||
argv[0]);
|
||||
return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
|
||||
}
|
||||
|
||||
/* get the options values */
|
||||
beVerbose=options[2].doesOccur;
|
||||
haveCopyright=options[3].doesOccur;
|
||||
srcDir=options[5].value;
|
||||
destDir=options[4].value;
|
||||
|
||||
if(argc>=2) {
|
||||
suffix=argv[1];
|
||||
} else {
|
||||
suffix=NULL;
|
||||
}
|
||||
|
||||
setUnicodeVersion(options[6].value);
|
||||
|
||||
/* prepare the filename beginning with the source dir */
|
||||
uprv_strcpy(filename, srcDir);
|
||||
basename=filename+uprv_strlen(filename);
|
||||
if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
|
||||
*basename=U_FILE_SEP_CHAR;
|
||||
}
|
||||
|
||||
/* initialize */
|
||||
init();
|
||||
initStore();
|
||||
|
||||
/* process Mirror.txt */
|
||||
if(suffix==NULL) {
|
||||
uprv_strcpy(basename, "Mirror.txt");
|
||||
} else {
|
||||
uprv_strcpy(basename, "Mirror");
|
||||
basename[6]='-';
|
||||
uprv_strcpy(basename+7, suffix);
|
||||
uprv_strcat(basename+7, ".txt");
|
||||
}
|
||||
parseMirror(filename, &errorCode);
|
||||
|
||||
/* process UnicodeData.txt */
|
||||
if(suffix==NULL) {
|
||||
uprv_strcpy(basename, "UnicodeData.txt");
|
||||
} else {
|
||||
uprv_strcpy(basename, "UnicodeData");
|
||||
basename[11]='-';
|
||||
uprv_strcpy(basename+12, suffix);
|
||||
uprv_strcat(basename+12, ".txt");
|
||||
}
|
||||
parseDB(filename, &errorCode);
|
||||
|
||||
/* process parsed data */
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
repeatProps();
|
||||
compactProps();
|
||||
compactStage3();
|
||||
compactStage2();
|
||||
|
||||
/* write the properties data file */
|
||||
generateData(destDir);
|
||||
}
|
||||
|
||||
return errorCode;
|
||||
}
|
||||
|
||||
static void
|
||||
init(void) {
|
||||
}
|
||||
|
||||
/* parser for Mirror.txt ---------------------------------------------------- */
|
||||
|
||||
#define MAX_MIRROR_COUNT 2000
|
||||
|
||||
static uint32_t mirrorMappings[MAX_MIRROR_COUNT][2];
|
||||
static int32_t mirrorCount=0;
|
||||
|
||||
static void
|
||||
MirrorCode(void *context, char *start, char *limit, int32_t fieldNr, UErrorCode *pErrorCode) {
|
||||
char *end;
|
||||
|
||||
mirrorMappings[mirrorCount][fieldNr]=uprv_strtoul(start, &end, 16);
|
||||
if((end-start)<1 || end!=limit) {
|
||||
fprintf(stderr, "genprops: syntax error in Mirror.txt field %d at %s\n", fieldNr, start);
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
MirrorFinish(void *context, char *start, char *limit, int32_t fieldNr, UErrorCode *pErrorCode) {
|
||||
if(++mirrorCount==MAX_MIRROR_COUNT) {
|
||||
fprintf(stderr, "genprops: too many mirror mappings\n");
|
||||
exit(U_INDEX_OUTOFBOUNDS_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
static UParseFieldFn *mirrorFields[4]={
|
||||
NULL,
|
||||
MirrorCode,
|
||||
MirrorCode,
|
||||
MirrorFinish
|
||||
};
|
||||
|
||||
static void
|
||||
parseMirror(const char *filename, UErrorCode *pErrorCode) {
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
u_parseDelimitedFile(filename, ';', mirrorFields, 2, NULL, pErrorCode);
|
||||
}
|
||||
|
||||
/* parser for UnicodeData.txt ----------------------------------------------- */
|
||||
|
||||
#define NO_NUMERIC_VALUE ((uint32_t)15821005)
|
||||
|
||||
/* general categories */
|
||||
extern const char *const
|
||||
genCategoryNames[U_CHAR_CATEGORY_COUNT]={
|
||||
NULL,
|
||||
@ -55,327 +227,229 @@ bidiNames[U_CHAR_DIRECTION_COUNT]={
|
||||
"WS", "ON", "LRE", "LRO", "AL", "RLE", "RLO", "PDF", "NSM", "BN"
|
||||
};
|
||||
|
||||
/* prototypes --------------------------------------------------------------- */
|
||||
/* control code properties */
|
||||
static const struct {
|
||||
uint32_t code;
|
||||
uint8_t generalCategory;
|
||||
} controlProps[]={
|
||||
/* TAB */ 0x9, U_SPACE_SEPARATOR,
|
||||
/* VT */ 0xb, U_SPACE_SEPARATOR,
|
||||
/* LF */ 0xa, U_PARAGRAPH_SEPARATOR,
|
||||
/* FF */ 0xc, U_LINE_SEPARATOR,
|
||||
/* CR */ 0xd, U_PARAGRAPH_SEPARATOR,
|
||||
/* FS */ 0x1c, U_PARAGRAPH_SEPARATOR,
|
||||
/* GS */ 0x1d, U_PARAGRAPH_SEPARATOR,
|
||||
/* RS */ 0x1e, U_PARAGRAPH_SEPARATOR,
|
||||
/* US */ 0x1f, U_SPACE_SEPARATOR,
|
||||
/* NL */ 0x85, U_PARAGRAPH_SEPARATOR
|
||||
};
|
||||
|
||||
static void
|
||||
init(void);
|
||||
UnicodeDataInit(void *context, char *start, char *limit, int32_t fieldNr, UErrorCode *pErrorCode) {
|
||||
Props *p=(Props *)context;
|
||||
|
||||
static void
|
||||
parseDB(FileStream *in);
|
||||
|
||||
static int16_t
|
||||
getField(char *line, int16_t start, int16_t limit);
|
||||
|
||||
static void
|
||||
checkLineIndex(uint32_t code, int16_t limit, int16_t length);
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
extern int
|
||||
main(int argc, char *argv[]) {
|
||||
FileStream *in;
|
||||
const char *destdir = 0;
|
||||
char *arg, *filename=NULL;
|
||||
int i;
|
||||
|
||||
if(argc<=1) {
|
||||
fprintf(stderr,
|
||||
"usage: %s [-1[+|-]] [-v[+|-]] [-c[+|-]] filename\n"
|
||||
"\tread the UnicodeData.txt file and \n"
|
||||
"\tcreate a binary file " DATA_NAME "." DATA_TYPE " with the character properties\n"
|
||||
"\toptions:\n"
|
||||
"\t\t-v[+|-] verbose output\n"
|
||||
"\t\t-c[+|-] do (not) include a copyright notice\n"
|
||||
"\t\tfilename absolute path/filename for the\n"
|
||||
"\t\t\tUnicode database text file (default: standard input)\n",
|
||||
argv[0]);
|
||||
}
|
||||
|
||||
for(i=1; i<argc; ++i) {
|
||||
arg=argv[i];
|
||||
if(arg[0]=='-') {
|
||||
switch(arg[1]) {
|
||||
case 'v':
|
||||
beVerbose= arg[2]=='+';
|
||||
break;
|
||||
case 'c':
|
||||
haveCopyright= arg[2]=='+';
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
filename=arg;
|
||||
}
|
||||
}
|
||||
|
||||
if(filename==NULL) {
|
||||
in=T_FileStream_stdin();
|
||||
} else {
|
||||
in=T_FileStream_open(filename, "r");
|
||||
if(in==NULL) {
|
||||
fprintf(stderr, "genprops: unable to open input file %s\n", filename);
|
||||
exit(U_FILE_ACCESS_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
if (!destdir) {
|
||||
destdir = u_getDataDirectory();
|
||||
}
|
||||
|
||||
init();
|
||||
initStore();
|
||||
parseDB(in);
|
||||
repeatProps();
|
||||
compactProps();
|
||||
compactStage3();
|
||||
compactStage2();
|
||||
generateData(destdir);
|
||||
|
||||
if(in!=T_FileStream_stdin()) {
|
||||
T_FileStream_close(in);
|
||||
}
|
||||
|
||||
return 0;
|
||||
/* reset the properties */
|
||||
uprv_memset(p, 0, sizeof(Props));
|
||||
p->numericValue=NO_NUMERIC_VALUE;
|
||||
}
|
||||
|
||||
static void
|
||||
init(void) {
|
||||
}
|
||||
|
||||
/* parsing ------------------------------------------------------------------ */
|
||||
|
||||
static void
|
||||
parseDB(FileStream *in) {
|
||||
char line[300];
|
||||
UnicodeDataCode(void *context, char *start, char *limit, int32_t fieldNr, UErrorCode *pErrorCode) {
|
||||
Props *p=(Props *)context;
|
||||
char *end;
|
||||
Props p;
|
||||
uint32_t value;
|
||||
int16_t start, limit, length, i;
|
||||
bool_t hasNumericValue;
|
||||
|
||||
while(T_FileStream_readLine(in, line, sizeof(line))!=NULL) {
|
||||
length=uprv_strlen(line);
|
||||
|
||||
/* remove trailing newline characters */
|
||||
while(length>0 && (line[length-1]=='\r' || line[length-1]=='\n')) {
|
||||
line[--length]=0;
|
||||
}
|
||||
|
||||
/* reset the properties */
|
||||
uprv_memset(&p, 0, sizeof(p));
|
||||
hasNumericValue=FALSE;
|
||||
|
||||
/* get the character code, field 0 */
|
||||
p.code=uprv_strtoul(line, &end, 16);
|
||||
limit=end-line;
|
||||
if(limit<1 || *end!=';') {
|
||||
fprintf(stderr, "genprops: syntax error in field 0 at code 0x%lx\n", p.code);
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
|
||||
/* skip character name, field 1 */
|
||||
checkLineIndex(p.code, ++limit, length);
|
||||
limit=getField(line, limit, length);
|
||||
|
||||
/* get general category, field 2 */
|
||||
start=limit+1;
|
||||
checkLineIndex(p.code, start, length);
|
||||
limit=getField(line, start, length);
|
||||
line[limit]=0;
|
||||
for(i=1;;) {
|
||||
if(uprv_strcmp(line+start, genCategoryNames[i])==0) {
|
||||
p.generalCategory=(uint8_t)i;
|
||||
break;
|
||||
}
|
||||
if(++i==U_CHAR_CATEGORY_COUNT) {
|
||||
fprintf(stderr, "genprops: unknown general category \"%s\" at code 0x%lx\n", line+start, p.code);
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
/* get canonical combining class, field 3 */
|
||||
start=limit+1;
|
||||
checkLineIndex(p.code, start, length);
|
||||
p.canonicalCombining=(uint8_t)uprv_strtoul(line+start, &end, 10);
|
||||
limit=end-line;
|
||||
if(start>=limit || *end!=';') {
|
||||
fprintf(stderr, "genprops: syntax error in field 3 at code 0x%lx\n", p.code);
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
|
||||
/* get BiDi category, field 4 */
|
||||
start=limit+1;
|
||||
checkLineIndex(p.code, start, length);
|
||||
limit=getField(line, start, length);
|
||||
line[limit]=0;
|
||||
for(i=0;;) {
|
||||
if(uprv_strcmp(line+start, bidiNames[i])==0) {
|
||||
p.bidi=(uint8_t)i;
|
||||
break;
|
||||
}
|
||||
if(++i==U_CHAR_DIRECTION_COUNT) {
|
||||
fprintf(stderr, "genprops: unknown BiDi category \"%s\" at code 0x%lx\n", line+start, p.code);
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
/* character decomposition mapping, field 5 */
|
||||
/* ### skip for now */
|
||||
checkLineIndex(p.code, ++limit, length);
|
||||
limit=getField(line, limit, length);
|
||||
|
||||
/* decimal digit value, field 6 */
|
||||
start=limit+1;
|
||||
checkLineIndex(p.code, start, length);
|
||||
value=uprv_strtoul(line+start, &end, 10);
|
||||
if(*end!=';') {
|
||||
fprintf(stderr, "genprops: syntax error in field 6 at code 0x%lx\n", p.code);
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
limit=end-line;
|
||||
if(start<limit) {
|
||||
p.numericValue=value;
|
||||
hasNumericValue=TRUE;
|
||||
}
|
||||
|
||||
/* digit value, field 7 */
|
||||
start=limit+1;
|
||||
checkLineIndex(p.code, start, length);
|
||||
value=uprv_strtoul(line+start, &end, 10);
|
||||
if(*end!=';') {
|
||||
fprintf(stderr, "genprops: syntax error in field 7 at code 0x%lx\n", p.code);
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
limit=end-line;
|
||||
if(start<limit) {
|
||||
if(hasNumericValue) {
|
||||
if(p.numericValue!=value) {
|
||||
fprintf(stderr, "genprops: more than one numeric value at code 0x%lx\n", p.code);
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
} else {
|
||||
p.numericValue=value;
|
||||
hasNumericValue=TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
/* numeric value, field 8 */
|
||||
start=limit+1;
|
||||
checkLineIndex(p.code, start, length);
|
||||
value=uprv_strtoul(line+start, &end, 10);
|
||||
if(value>0 && *end=='/') {
|
||||
p.denominator=uprv_strtoul(end+1, &end, 10);
|
||||
}
|
||||
if(*end!=';') {
|
||||
fprintf(stderr, "genprops: syntax error in field 8 at code 0x%lx\n", p.code);
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
limit=end-line;
|
||||
if(start<limit) {
|
||||
if(hasNumericValue) {
|
||||
if(p.numericValue!=value) {
|
||||
fprintf(stderr, "genprops: more than one numeric value at code 0x%lx\n", p.code);
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
} else {
|
||||
p.numericValue=value;
|
||||
hasNumericValue=TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
/* get Mirrored flag, field 9 */
|
||||
start=limit+1;
|
||||
checkLineIndex(p.code, start, length);
|
||||
limit=getField(line, start, length);
|
||||
if(line[start]=='Y') {
|
||||
p.isMirrored=1;
|
||||
} else if(limit-start!=1 || line[start]!='N') {
|
||||
fprintf(stderr, "genprops: syntax error in field 9 at code 0x%lx\n", p.code);
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
|
||||
/* skip Unicode 1.0 character name, field 10 */
|
||||
checkLineIndex(p.code, ++limit, length);
|
||||
limit=getField(line, limit, length);
|
||||
|
||||
/* skip comment, field 11 */
|
||||
checkLineIndex(p.code, ++limit, length);
|
||||
limit=getField(line, limit, length);
|
||||
|
||||
/* get uppercase mapping, field 12 */
|
||||
start=limit+1;
|
||||
checkLineIndex(p.code, start, length);
|
||||
p.upperCase=uprv_strtoul(line+start, &end, 16);
|
||||
limit=end-line;
|
||||
if(*end!=';') {
|
||||
fprintf(stderr, "genprops: syntax error in field 12 at code 0x%lx\n", p.code);
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
|
||||
/* get lowercase mapping, field 13 */
|
||||
start=limit+1;
|
||||
checkLineIndex(p.code, start, length);
|
||||
p.lowerCase=uprv_strtoul(line+start, &end, 16);
|
||||
limit=end-line;
|
||||
if(*end!=';') {
|
||||
fprintf(stderr, "genprops: syntax error in field 13 at code 0x%lx\n", p.code);
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
|
||||
/* get titlecase mapping, field 14 */
|
||||
start=limit+1;
|
||||
if(start<length) {
|
||||
/* this is the last field */
|
||||
p.titleCase=uprv_strtoul(line+start, &end, 16);
|
||||
if(*end!=';' && *end!=0) {
|
||||
fprintf(stderr, "genprops: syntax error in field 14 at code 0x%lx\n", p.code);
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* debug output */
|
||||
if(beVerbose) {
|
||||
printf(
|
||||
"0x%06lx "
|
||||
"%s(%2d) "
|
||||
"comb=%3d "
|
||||
"bidi=%3s(%2d) "
|
||||
"num=%7d/%7d "
|
||||
"mirr=%d "
|
||||
"u%06lx l%06lx t%06lx"
|
||||
"\n",
|
||||
p.code,
|
||||
genCategoryNames[p.generalCategory], p.generalCategory,
|
||||
p.canonicalCombining,
|
||||
bidiNames[p.bidi], p.bidi,
|
||||
p.numericValue, p.denominator,
|
||||
p.isMirrored,
|
||||
p.upperCase, p.lowerCase, p.titleCase);
|
||||
}
|
||||
#endif
|
||||
|
||||
addProps(&p);
|
||||
}
|
||||
}
|
||||
|
||||
static int16_t
|
||||
getField(char *line, int16_t start, int16_t limit) {
|
||||
while(start<limit && line[start]!=';') {
|
||||
++start;
|
||||
}
|
||||
return start;
|
||||
}
|
||||
|
||||
static void
|
||||
checkLineIndex(uint32_t code, int16_t index, int16_t length) {
|
||||
if(index>=length) {
|
||||
fprintf(stderr, "genprops: too few fields at code 0x%lx\n", code);
|
||||
/* get the character code, field 0 */
|
||||
p->code=uprv_strtoul(start, &end, 16);
|
||||
if((end-start)<1 || end!=limit) {
|
||||
fprintf(stderr, "genprops: syntax error in field 0 at %s\n", start);
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
UnicodeDataCategory(void *context, char *start, char *limit, int32_t fieldNr, UErrorCode *pErrorCode) {
|
||||
Props *p=(Props *)context;
|
||||
int i;
|
||||
char c;
|
||||
|
||||
/* get general category, field 2 */
|
||||
c=*limit;
|
||||
*limit=0;
|
||||
for(i=1;;) {
|
||||
if(uprv_strcmp(start, genCategoryNames[i])==0) {
|
||||
p->generalCategory=(uint8_t)i;
|
||||
break;
|
||||
}
|
||||
if(++i==U_CHAR_CATEGORY_COUNT) {
|
||||
fprintf(stderr, "genprops: unknown general category \"%s\" at code 0x%lx\n", start, p->code);
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
}
|
||||
*limit=c;
|
||||
}
|
||||
|
||||
static void
|
||||
UnicodeDataCombining(void *context, char *start, char *limit, int32_t fieldNr, UErrorCode *pErrorCode) {
|
||||
Props *p=(Props *)context;
|
||||
char *end;
|
||||
|
||||
/* get canonical combining class, field 3 */
|
||||
p->canonicalCombining=(uint8_t)uprv_strtoul(start, &end, 10);
|
||||
if(start>=end || end!=limit) {
|
||||
fprintf(stderr, "genprops: syntax error in field 3 at code 0x%lx\n", p->code);
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
UnicodeDataBiDi(void *context, char *start, char *limit, int32_t fieldNr, UErrorCode *pErrorCode) {
|
||||
Props *p=(Props *)context;
|
||||
int i;
|
||||
char c;
|
||||
|
||||
/* get BiDi category, field 4 */
|
||||
c=*limit;
|
||||
*limit=0;
|
||||
for(i=0;;) {
|
||||
if(uprv_strcmp(start, bidiNames[i])==0) {
|
||||
p->bidi=(uint8_t)i;
|
||||
break;
|
||||
}
|
||||
if(++i==U_CHAR_DIRECTION_COUNT) {
|
||||
fprintf(stderr, "genprops: unknown BiDi category \"%s\" at code 0x%lx\n", start, p->code);
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
}
|
||||
*limit=c;
|
||||
}
|
||||
|
||||
static void
|
||||
UnicodeDataNumeric(void *context, char *start, char *limit, int32_t fieldNr, UErrorCode *pErrorCode) {
|
||||
Props *p=(Props *)context;
|
||||
uint32_t value;
|
||||
char *end;
|
||||
|
||||
/* decimal digit value, field 6 */
|
||||
/* digit value, field 7 */
|
||||
/* numeric value, field 8 */
|
||||
value=uprv_strtoul(start, &end, 10);
|
||||
if(fieldNr==8 && value>0 && *end=='/') {
|
||||
/* field 8 may contain a fractional value, get the denominator */
|
||||
p->denominator=uprv_strtoul(end+1, &end, 10);
|
||||
}
|
||||
if(end!=limit) {
|
||||
fprintf(stderr, "genprops: syntax error in field 6 at code 0x%lx\n", p->code);
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
if(start<end) {
|
||||
if(p->numericValue!=NO_NUMERIC_VALUE && p->numericValue!=value) {
|
||||
fprintf(stderr, "genprops: more than one numeric value at code 0x%lx\n", p->code);
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
p->numericValue=value;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
UnicodeDataMirrored(void *context, char *start, char *limit, int32_t fieldNr, UErrorCode *pErrorCode) {
|
||||
Props *p=(Props *)context;
|
||||
|
||||
/* get Mirrored flag, field 9 */
|
||||
if(*start=='Y') {
|
||||
p->isMirrored=1;
|
||||
} else if(limit-start!=1 || *start!='N') {
|
||||
fprintf(stderr, "genprops: syntax error in field 9 at code 0x%lx\n", p->code);
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
UnicodeDataCase(void *context, char *start, char *limit, int32_t fieldNr, UErrorCode *pErrorCode) {
|
||||
Props *p=(Props *)context;
|
||||
char *end;
|
||||
uint32_t mapping;
|
||||
|
||||
/* get uppercase mapping, field 12 */
|
||||
/* get lowercase mapping, field 13 */
|
||||
/* get titlecase mapping, field 14 */
|
||||
mapping=uprv_strtoul(start, &end, 16);
|
||||
if(end!=limit) {
|
||||
fprintf(stderr, "genprops: syntax error in field %d at code 0x%lx\n", fieldNr, p->code);
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
switch(fieldNr) {
|
||||
case 12:
|
||||
p->upperCase=mapping;
|
||||
break;
|
||||
case 13:
|
||||
p->lowerCase=mapping;
|
||||
break;
|
||||
case 14:
|
||||
p->titleCase=mapping;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
UnicodeDataFinish(void *context, char *start, char *limit, int32_t fieldNr, UErrorCode *pErrorCode) {
|
||||
static int32_t mirrorIndex=0;
|
||||
Props *p=(Props *)context;
|
||||
int16_t i;
|
||||
|
||||
if(p->numericValue==NO_NUMERIC_VALUE) {
|
||||
p->numericValue=0;
|
||||
}
|
||||
|
||||
/* override properties for some common control characters */
|
||||
if(p->generalCategory==U_CONTROL_CHAR) {
|
||||
for(i=0; i<sizeof(controlProps)/sizeof(controlProps[0]); ++i) {
|
||||
if(controlProps[i].code==p->code) {
|
||||
p->generalCategory=controlProps[i].generalCategory;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* set additional properties from previously parsed files */
|
||||
if(mirrorIndex<mirrorCount && p->code==mirrorMappings[mirrorIndex][0]) {
|
||||
p->mirrorMapping=mirrorMappings[mirrorIndex++][1];
|
||||
}
|
||||
|
||||
addProps(p);
|
||||
}
|
||||
|
||||
static UParseFieldFn *unicodeDBFields[17]={
|
||||
UnicodeDataInit,
|
||||
|
||||
UnicodeDataCode,
|
||||
NULL, /* 1: character name */
|
||||
UnicodeDataCategory,
|
||||
UnicodeDataCombining,
|
||||
UnicodeDataBiDi,
|
||||
NULL, /* 5: character decomposition mapping */
|
||||
UnicodeDataNumeric,
|
||||
UnicodeDataNumeric,
|
||||
UnicodeDataNumeric,
|
||||
UnicodeDataMirrored,
|
||||
NULL, /* 10: Unicode 1.0 character name */
|
||||
NULL, /* 11: comment */
|
||||
UnicodeDataCase,
|
||||
UnicodeDataCase,
|
||||
UnicodeDataCase,
|
||||
|
||||
UnicodeDataFinish
|
||||
};
|
||||
|
||||
static void
|
||||
parseDB(const char *filename, UErrorCode *pErrorCode) {
|
||||
Props p;
|
||||
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
u_parseDelimitedFile(filename, ';', unicodeDBFields, 15, &p, pErrorCode);
|
||||
}
|
||||
|
||||
/*
|
||||
* Hey, Emacs, please set the following:
|
||||
*
|
||||
|
@ -25,8 +25,8 @@
|
||||
|
||||
/* character properties */
|
||||
typedef struct {
|
||||
uint32_t code, lowerCase, upperCase, titleCase;
|
||||
uint32_t decomp[16];
|
||||
uint32_t code, lowerCase, upperCase, titleCase, mirrorMapping;
|
||||
/* ### uint32_t decomp[16]; */
|
||||
uint32_t numericValue, denominator;
|
||||
uint8_t generalCategory, canonicalCombining, bidi, isMirrored;
|
||||
} Props;
|
||||
@ -42,6 +42,9 @@ extern const char *const
|
||||
genCategoryNames[];
|
||||
|
||||
/* prototypes */
|
||||
extern void
|
||||
setUnicodeVersion(const char *v);
|
||||
|
||||
extern void
|
||||
initStore(void);
|
||||
|
||||
|
@ -47,7 +47,7 @@ The following is a description of format version 1.0 .
|
||||
Data contents:
|
||||
|
||||
The contents is a parsed, binary form of several Unicode character
|
||||
database files, mose prominently UnicodeData.txt.
|
||||
database files, most prominently UnicodeData.txt.
|
||||
|
||||
Any Unicode code point from 0 to 0x10ffff can be looked up to get
|
||||
the properties, if any, for that code point. This means that the input
|
||||
@ -72,7 +72,7 @@ Formally, the file contains the following structures:
|
||||
A1 const uint16_t STAGE_3_BITS(=4);
|
||||
(STAGE_1_BITS(=11) not stored, implicitly=21-(STAGE_2_BITS+STAGE_3_BITS))
|
||||
A2 const uint16_t exceptionsIndex; -- 32-bit unit index
|
||||
A3 const uint16_t ucharsIndex; -- 32-bit unit index
|
||||
A3 const uint16_t reservedIndex;
|
||||
A4 const uint16_t reservedIndex;
|
||||
A5 const uint16_t reservedIndex;
|
||||
A6 const uint16_t reservedIndex;
|
||||
@ -84,10 +84,7 @@ Formally, the file contains the following structures:
|
||||
(possible 1*uint16_t for padding to 4-alignment)
|
||||
|
||||
P const uint32_t props32[variable size];
|
||||
E const uint16_t exceptions[variable size];
|
||||
(possible 1*uint16_t for padding to 4-alignment)
|
||||
|
||||
U const UChar uchars[variable size];
|
||||
E const uint32_t exceptions[variable size];
|
||||
|
||||
3-stage lookup and properties:
|
||||
|
||||
@ -124,8 +121,7 @@ arrive at an index into the props32[] table containing the character
|
||||
properties for c.
|
||||
For some characters, not all of the properties can be efficiently encoded
|
||||
using 32 bits. For them, the 32-bit word contains an index into the exceptions[]
|
||||
array. Some exception entries, in turn, may contain indexes into the uchars[]
|
||||
array of Unicode strings, especially for non-1:1 case mappings.
|
||||
array.
|
||||
|
||||
The first stage consumes the 11 most significant bits of the 21-bit code point
|
||||
and results in an index into the second stage:
|
||||
@ -142,28 +138,27 @@ specific value, which itself is only an index into the props32[] table:
|
||||
|
||||
uint16_t i=p16[i3+(c&0xf)];
|
||||
|
||||
Note that the bit numbers and shifts actually depend on the STAGE_2/3_BITS
|
||||
in p16[0..1].
|
||||
|
||||
There is finally the 32-bit encoded set of properties for c:
|
||||
|
||||
uint32_t props=p32[i];
|
||||
|
||||
For some characters, this contains an index into the exceptions array:
|
||||
|
||||
if(props&0x20) {
|
||||
uint16_t e=(uint16_t)(props>>20);
|
||||
if(props&EXCEPTION_BIT)) {
|
||||
uint16_t e=(uint16_t)(props>>VALUE_SHIFT);
|
||||
...
|
||||
}
|
||||
|
||||
The exception values are a variable number of uint16_t starting at
|
||||
The exception values are a variable number of uint32_t starting at
|
||||
|
||||
const uint16_t *pe=p16+2*exceptionsIndex+e;
|
||||
const uint32_t *pe=p32+exceptionsIndex+e;
|
||||
|
||||
The first uint16_t there contains flags about what values actually follow it.
|
||||
Some of those may be indexes for case mappings or similar and point to strings
|
||||
(zero-terminated) in the uchars[] array:
|
||||
|
||||
...
|
||||
uint16_t u=pe[index depends on pe[0]];
|
||||
const UChar *pu=(const UChar *)(p32+ucharsIndex)+u;
|
||||
The first uint32_t there contains flags about what values actually follow it.
|
||||
Some of the exception values are UChar32 code points for the case mappings,
|
||||
others are numeric values etc.
|
||||
|
||||
32-bit properties sets:
|
||||
|
||||
@ -171,9 +166,9 @@ Each 32-bit properties word contains:
|
||||
|
||||
0.. 4 general category
|
||||
5 has exception values
|
||||
6.. 9 BiDi category (the 5 explicit codes stored as one)
|
||||
10 is mirrored
|
||||
11..19 reserved
|
||||
6..10 BiDi category
|
||||
11 is mirrored
|
||||
12..19 reserved
|
||||
20..31 value according to bits 0..5:
|
||||
if(has exception) {
|
||||
exception index;
|
||||
@ -181,52 +176,82 @@ Each 32-bit properties word contains:
|
||||
case Ll: delta to uppercase; -- same as titlecase
|
||||
case Lu: delta to lowercase; -- titlecase is same as c
|
||||
case Lt: delta to lowercase; -- uppercase is same as c
|
||||
case Mn: canonical category;
|
||||
case Mn: combining class;
|
||||
case N*: numeric value;
|
||||
default: *;
|
||||
default:
|
||||
if(is mirrored) {
|
||||
delta to mirror
|
||||
} else {
|
||||
0
|
||||
};
|
||||
}
|
||||
|
||||
Exception values:
|
||||
|
||||
The first uint16_t word of exception values for a code point contains flags
|
||||
that indicate which values follow:
|
||||
In the first uint32_t exception word for a code point,
|
||||
bits
|
||||
31..24 reserved
|
||||
23..16 combining class
|
||||
15..0 flags that indicate which values follow:
|
||||
|
||||
bit
|
||||
0 has uppercase mapping
|
||||
1 has lowercase mapping
|
||||
2 has titlecase mapping
|
||||
3 has canonical category
|
||||
4 has numeric value (numerator)
|
||||
5 has denominator value
|
||||
3 has numeric value (numerator)
|
||||
4 has denominator value
|
||||
5 has a mirror-image Unicode code point
|
||||
|
||||
According to the flags in this word, one or more uint16_t words follow it
|
||||
According to the flags in this word, one or more uint32_t words follow it
|
||||
in the sequence of the bit flags in the flags word; if a flag is not set,
|
||||
then the value is missing or 0:
|
||||
|
||||
For the case mappings, one uint16_t word each is an index into uchars[],
|
||||
pointing to a zero-terminated UChar string for the case mapping.
|
||||
For the case mappings and the mirror-image Unicode code point,
|
||||
one uint32_t or UChar32 each is the code point.
|
||||
|
||||
For the canonical category, the lower 8 bits of a uint16_t word give the
|
||||
category value directly. The upper 8 bits are currently reserved.
|
||||
|
||||
For the numeric/numerator value, a uint16_t word contains the value directly,
|
||||
For the numeric/numerator value, an int32_t word contains the value directly,
|
||||
except for when there is no numerator but a denominator, then the numerator
|
||||
is 1.
|
||||
|
||||
For the denominator value, a uint16_t word contains the value directly.
|
||||
For the denominator value, a uint32_t word contains the value directly.
|
||||
|
||||
Example:
|
||||
U+2160, ROMAN NUMERAL ONE, needs an exception because it has a lowercase
|
||||
mapping and a numeric value.
|
||||
Its exception values would be stored as 3 uint16_t words:
|
||||
Its exception values would be stored as 3 uint32_t words:
|
||||
|
||||
- flags=0x12 (see above)
|
||||
- lowercase index into uchars[]
|
||||
- flags=0x0a (see above) with combining class 0
|
||||
- lowercase mapping 0x2170
|
||||
- numeric value=1
|
||||
|
||||
----------------------------------------------------------------------------- */
|
||||
|
||||
/* ### finding an exception value */
|
||||
#define HAVE_EXCEPTION_VALUE(flags, index) ((flags)&(1<<(index)))
|
||||
|
||||
/* number of bits in an integer value 0..31 */
|
||||
static uint8_t flagsOffset[32]={
|
||||
0, 1, 1, 2, 1, 2, 2, 3,
|
||||
1, 2, 2, 3, 2, 3, 3, 4,
|
||||
1, 2, 2, 3, 2, 3, 3, 4,
|
||||
2, 3, 3, 4, 3, 4, 4, 5
|
||||
};
|
||||
|
||||
#define GET_EXCEPTION_OFFSET(flags, index, offset) { \
|
||||
if((index)>=5) { \
|
||||
(offset)+=flagsOffset[(flags)&0x1f]; \
|
||||
(flags)>>=5; \
|
||||
(index)-=5; \
|
||||
} \
|
||||
(offset)+=flagsOffset[(flags)&((1<<(index))-1)]; \
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/* UDataInfo cf. udata.h */
|
||||
static const UDataInfo dataInfo={
|
||||
static UDataInfo dataInfo={
|
||||
sizeof(UDataInfo),
|
||||
0,
|
||||
|
||||
@ -262,6 +287,19 @@ enum {
|
||||
MAX_STAGE_2_COUNT=MAX_PROPS_COUNT
|
||||
};
|
||||
|
||||
/* definitions for the properties words */
|
||||
enum {
|
||||
EXCEPTION_SHIFT=5,
|
||||
BIDI_SHIFT,
|
||||
MIRROR_SHIFT=BIDI_SHIFT+5,
|
||||
VALUE_SHIFT=20,
|
||||
|
||||
EXCEPTION_BIT=1UL<<EXCEPTION_SHIFT,
|
||||
VALUE_BITS=32-VALUE_SHIFT,
|
||||
MAX_VALUE=(1UL<<(VALUE_BITS-1))-1,
|
||||
MIN_VALUE=-(MAX_VALUE+1)
|
||||
};
|
||||
|
||||
static uint16_t stage1[STAGE_1_BLOCK], stage2[MAX_STAGE_2_COUNT],
|
||||
stage3[MAX_PROPS_COUNT], map[MAX_PROPS_COUNT];
|
||||
|
||||
@ -273,16 +311,14 @@ static uint32_t props[MAX_PROPS_COUNT], props32[MAX_PROPS_COUNT];
|
||||
static uint16_t propsTop=STAGE_3_BLOCK; /* the first props[] are always empty */
|
||||
|
||||
/* exceptions values */
|
||||
static uint16_t exceptions[MAX_EXCEPTIONS_COUNT+20];
|
||||
static uint32_t exceptions[MAX_EXCEPTIONS_COUNT+20];
|
||||
static uint16_t exceptionsTop=0;
|
||||
|
||||
/* Unicode characters, e.g. for special casing or decomposition */
|
||||
|
||||
static UChar uchars[MAX_UCHAR_COUNT+20];
|
||||
static uint16_t ucharsTop=0;
|
||||
|
||||
/* statistics */
|
||||
|
||||
static uint16_t exceptionsCount=0;
|
||||
|
||||
/* prototypes --------------------------------------------------------------- */
|
||||
@ -320,6 +356,38 @@ addUChars(const UChar *s, uint16_t length);
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
/* ### this must become public in putil.c */
|
||||
static void
|
||||
__versionFromString(UVersionInfo versionArray, const char *versionString) {
|
||||
char *end;
|
||||
uint16_t part=0;
|
||||
|
||||
if(versionArray==NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
if(versionString!=NULL) {
|
||||
for(;;) {
|
||||
versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
|
||||
if(*end!=U_VERSION_DELIMITER || ++part==U_MAX_VERSION_LENGTH) {
|
||||
break;
|
||||
}
|
||||
versionString=end+1;
|
||||
}
|
||||
}
|
||||
|
||||
while(part<U_MAX_VERSION_LENGTH) {
|
||||
versionArray[part++]=0;
|
||||
}
|
||||
}
|
||||
|
||||
extern void
|
||||
setUnicodeVersion(const char *v) {
|
||||
UVersionInfo version;
|
||||
__versionFromString(version, v);
|
||||
uprv_memcpy(dataInfo.dataVersion, version, 4);
|
||||
}
|
||||
|
||||
extern void
|
||||
initStore() {
|
||||
uprv_memset(stage1, 0, sizeof(stage1));
|
||||
@ -334,12 +402,6 @@ initStore() {
|
||||
|
||||
extern void
|
||||
addProps(Props *p) {
|
||||
/* map the explicit BiDi codes to one single value */
|
||||
static const uint8_t bidiMap[U_CHAR_DIRECTION_COUNT]={
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8,
|
||||
9, 10, 15, 15, 11, 15, 15, 15, 12, 13
|
||||
};
|
||||
|
||||
uint32_t x;
|
||||
int32_t value;
|
||||
uint16_t count;
|
||||
@ -394,7 +456,7 @@ addProps(Props *p) {
|
||||
if(!(isMn || isNumber)) {
|
||||
value=(int32_t)p->code-(int32_t)p->upperCase;
|
||||
} else {
|
||||
x=1<<5;
|
||||
x=EXCEPTION_BIT;
|
||||
}
|
||||
++count;
|
||||
}
|
||||
@ -403,7 +465,7 @@ addProps(Props *p) {
|
||||
if(!(isMn || isNumber)) {
|
||||
value=(int32_t)p->lowerCase-(int32_t)p->code;
|
||||
} else {
|
||||
x=1<<5;
|
||||
x=EXCEPTION_BIT;
|
||||
}
|
||||
++count;
|
||||
}
|
||||
@ -412,7 +474,7 @@ addProps(Props *p) {
|
||||
if(!(isMn || isNumber)) {
|
||||
value=(int32_t)p->code-(int32_t)p->titleCase;
|
||||
} else {
|
||||
x=1<<5;
|
||||
x=EXCEPTION_BIT;
|
||||
}
|
||||
++count;
|
||||
}
|
||||
@ -421,7 +483,7 @@ addProps(Props *p) {
|
||||
if(isMn) {
|
||||
value=p->canonicalCombining;
|
||||
} else {
|
||||
x=1<<5;
|
||||
x=EXCEPTION_BIT;
|
||||
}
|
||||
++count;
|
||||
}
|
||||
@ -430,7 +492,7 @@ addProps(Props *p) {
|
||||
if(isNumber) {
|
||||
value=p->numericValue;
|
||||
} else {
|
||||
x=1<<5;
|
||||
x=EXCEPTION_BIT;
|
||||
}
|
||||
++count;
|
||||
}
|
||||
@ -439,9 +501,15 @@ addProps(Props *p) {
|
||||
value=p->denominator;
|
||||
++count;
|
||||
}
|
||||
if(p->isMirrored) {
|
||||
if(p->mirrorMapping!=0) {
|
||||
value=(int32_t)p->mirrorMapping-(int32_t)p->code;
|
||||
}
|
||||
++count;
|
||||
}
|
||||
|
||||
/* handle exceptions */
|
||||
if(count>1 || x!=0 || value<-2048 || 2047<value) {
|
||||
if(count>1 || x!=0 || value<MIN_VALUE || MAX_VALUE<value) {
|
||||
/* this code point needs exception values */
|
||||
if(DO_DEBUG_OUT /* ### beVerbose */) {
|
||||
if(x!=0) {
|
||||
@ -454,18 +522,58 @@ addProps(Props *p) {
|
||||
}
|
||||
|
||||
++exceptionsCount;
|
||||
x=1<<5;
|
||||
x=EXCEPTION_BIT;
|
||||
|
||||
/* ### allocate and create exception values */
|
||||
value=-exceptionsCount;
|
||||
/* allocate and create exception values */
|
||||
value=exceptionsTop;
|
||||
if(value>=4096) {
|
||||
fprintf(stderr, "genprops: out of exceptions memory\n");
|
||||
exit(U_MEMORY_ALLOCATION_ERROR);
|
||||
} else {
|
||||
uint32_t first=(uint32_t)p->canonicalCombining<<16;
|
||||
uint16_t length=1;
|
||||
|
||||
if(p->upperCase!=0) {
|
||||
first|=1;
|
||||
exceptions[value+length++]=p->upperCase;
|
||||
}
|
||||
if(p->lowerCase!=0) {
|
||||
first|=2;
|
||||
exceptions[value+length++]=p->lowerCase;
|
||||
}
|
||||
if(p->upperCase!=p->titleCase) {
|
||||
first|=4;
|
||||
exceptions[value+length++]=p->titleCase;
|
||||
}
|
||||
if(p->denominator==0) {
|
||||
if(p->numericValue!=0) {
|
||||
first|=8;
|
||||
exceptions[value+length++]=p->numericValue;
|
||||
}
|
||||
} else {
|
||||
if(p->numericValue!=1) {
|
||||
first|=8;
|
||||
exceptions[value+length++]=p->numericValue;
|
||||
}
|
||||
first|=0x10;
|
||||
exceptions[value+length++]=p->denominator;
|
||||
}
|
||||
if(p->isMirrored) {
|
||||
first|=0x20;
|
||||
exceptions[value+length++]=p->mirrorMapping;
|
||||
}
|
||||
|
||||
exceptions[value]=first;
|
||||
exceptionsTop+=length;
|
||||
}
|
||||
}
|
||||
|
||||
/* put together the 32-bit word of encoded properties */
|
||||
x|=
|
||||
p->generalCategory |
|
||||
bidiMap[p->bidi]<<6UL |
|
||||
p->isMirrored<<10UL |
|
||||
(uint32_t)value<<20;
|
||||
(uint32_t)p->generalCategory |
|
||||
(uint32_t)p->bidi<<BIDI_SHIFT |
|
||||
(uint32_t)p->isMirrored<<MIRROR_SHIFT |
|
||||
(uint32_t)value<<VALUE_SHIFT;
|
||||
|
||||
setProps(p->code, x, &count, &count, &count);
|
||||
|
||||
@ -911,17 +1019,15 @@ generateData(const char *dataDir) {
|
||||
}
|
||||
|
||||
indexes[2]=offset+=propsTop; /* uint32_t offset to exceptions[] */
|
||||
indexes[3]=offset+=(exceptionsTop+1)/2; /* uint32_t offset to uchars[], include padding */
|
||||
|
||||
size=4*offset+ucharsTop*U_SIZEOF_UCHAR; /* total size of data */
|
||||
size=4*(offset+exceptionsTop); /* total size of data */
|
||||
|
||||
if(beVerbose) {
|
||||
printf("number of stage 2 entries: %5u\n", stage2Top);
|
||||
printf("number of stage 3 entries: %5u\n", stage3Top);
|
||||
printf("number of unique properties values: %5u\n", propsTop);
|
||||
printf("number of code points with exceptions: %5u\n", exceptionsCount);
|
||||
printf("size in bytes of exceptions: %5u\n", 2*exceptionsTop);
|
||||
printf("size in bytes of Uchars: %5u\n", ucharsTop*U_SIZEOF_UCHAR);
|
||||
printf("size in bytes of exceptions: %5u\n", 4*exceptionsTop);
|
||||
printf("data size: %6lu\n", size);
|
||||
}
|
||||
|
||||
@ -939,9 +1045,7 @@ generateData(const char *dataDir) {
|
||||
udata_writeBlock(pData, stage3, 2*stage3Top);
|
||||
udata_writePadding(pData, (stage2Top+stage3Top)&1);
|
||||
udata_writeBlock(pData, props32, 4*propsTop);
|
||||
udata_writeBlock(pData, exceptions, 2*exceptionsTop);
|
||||
udata_writePadding(pData, exceptionsTop&1);
|
||||
udata_writeBlock(pData, uchars, ucharsTop*U_SIZEOF_UCHAR);
|
||||
udata_writeBlock(pData, exceptions, 4*exceptionsTop);
|
||||
|
||||
/* finish up */
|
||||
dataLength=udata_finish(pData, &errorCode);
|
||||
|
Loading…
Reference in New Issue
Block a user