2004-12-31 13:28:06 +00:00
|
|
|
/*
|
|
|
|
*******************************************************************************
|
|
|
|
*
|
2011-12-03 00:20:31 +00:00
|
|
|
* Copyright (C) 2004-2011, International Business Machines
|
2004-12-31 13:28:06 +00:00
|
|
|
* Corporation and others. All Rights Reserved.
|
|
|
|
*
|
|
|
|
*******************************************************************************
|
|
|
|
* file name: genbidi.c
|
|
|
|
* encoding: US-ASCII
|
|
|
|
* tab size: 8 (not used)
|
|
|
|
* indentation:4
|
|
|
|
*
|
|
|
|
* created on: 2004dec30
|
|
|
|
* created by: Markus W. Scherer
|
|
|
|
*
|
|
|
|
* This program reads several of the Unicode character database text files,
|
|
|
|
* parses them, and extracts the bidi/shaping properties for each character.
|
|
|
|
* It then writes a binary file containing the properties
|
|
|
|
* that is designed to be used directly for random-access to
|
|
|
|
* the properties of each Unicode character.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
#include "unicode/utypes.h"
|
|
|
|
#include "unicode/uchar.h"
|
|
|
|
#include "unicode/putil.h"
|
|
|
|
#include "unicode/uclean.h"
|
|
|
|
#include "cmemory.h"
|
|
|
|
#include "cstring.h"
|
|
|
|
#include "uarrsort.h"
|
|
|
|
#include "unewdata.h"
|
|
|
|
#include "uoptions.h"
|
|
|
|
#include "uparse.h"
|
|
|
|
#include "propsvec.h"
|
|
|
|
#include "ubidi_props.h"
|
|
|
|
#include "genbidi.h"
|
|
|
|
|
|
|
|
#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
|
|
|
|
|
|
|
|
/* data --------------------------------------------------------------------- */
|
|
|
|
|
2008-10-23 06:00:59 +00:00
|
|
|
UPropsVectors *pv;
|
2004-12-31 13:28:06 +00:00
|
|
|
|
|
|
|
UBool beVerbose=FALSE, haveCopyright=TRUE;
|
|
|
|
|
|
|
|
/* prototypes --------------------------------------------------------------- */
|
|
|
|
|
|
|
|
static UBool
|
|
|
|
isToken(const char *token, const char *s);
|
|
|
|
|
|
|
|
static void
|
|
|
|
parseBidiMirroring(const char *filename, UErrorCode *pErrorCode);
|
|
|
|
|
|
|
|
static void
|
|
|
|
parseDB(const char *filename, UErrorCode *pErrorCode);
|
|
|
|
|
|
|
|
/* miscellaneous ------------------------------------------------------------ */
|
|
|
|
|
|
|
|
/* TODO: more common code, move functions to uparse.h|c */
|
|
|
|
|
|
|
|
static char *
|
|
|
|
trimTerminateField(char *s, char *limit) {
|
|
|
|
/* trim leading whitespace */
|
|
|
|
s=(char *)u_skipWhitespace(s);
|
|
|
|
|
|
|
|
/* trim trailing whitespace */
|
|
|
|
while(s<limit && (*(limit-1)==' ' || *(limit-1)=='\t')) {
|
|
|
|
--limit;
|
|
|
|
}
|
|
|
|
*limit=0;
|
|
|
|
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
parseTwoFieldFile(char *filename, char *basename,
|
|
|
|
const char *ucdFile, const char *suffix,
|
|
|
|
UParseLineFn *lineFn,
|
|
|
|
UErrorCode *pErrorCode) {
|
|
|
|
char *fields[2][2];
|
|
|
|
|
|
|
|
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
writeUCDFilename(basename, ucdFile, suffix);
|
|
|
|
|
|
|
|
u_parseDelimitedFile(filename, ';', fields, 2, lineFn, NULL, pErrorCode);
|
|
|
|
if(U_FAILURE(*pErrorCode)) {
|
|
|
|
fprintf(stderr, "error parsing %s.txt: %s\n", ucdFile, u_errorName(*pErrorCode));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void U_CALLCONV
|
|
|
|
bidiClassLineFn(void *context,
|
|
|
|
char *fields[][2], int32_t fieldCount,
|
|
|
|
UErrorCode *pErrorCode);
|
|
|
|
|
|
|
|
/* parse files with single enumerated properties ---------------------------- */
|
|
|
|
|
|
|
|
/* TODO: more common code, move functions to uparse.h|c */
|
|
|
|
|
|
|
|
struct SingleEnum {
|
|
|
|
const char *ucdFile, *propName;
|
|
|
|
UProperty prop;
|
|
|
|
int32_t vecWord, vecShift;
|
|
|
|
uint32_t vecMask;
|
|
|
|
};
|
|
|
|
typedef struct SingleEnum SingleEnum;
|
|
|
|
|
|
|
|
static void
|
|
|
|
parseSingleEnumFile(char *filename, char *basename, const char *suffix,
|
|
|
|
const SingleEnum *sen,
|
|
|
|
UErrorCode *pErrorCode);
|
|
|
|
|
|
|
|
static const SingleEnum jtSingleEnum={
|
|
|
|
"DerivedJoiningType", "joining type",
|
|
|
|
UCHAR_JOINING_TYPE,
|
|
|
|
0, UBIDI_JT_SHIFT, UBIDI_JT_MASK
|
|
|
|
};
|
|
|
|
|
|
|
|
static const SingleEnum jgSingleEnum={
|
|
|
|
"DerivedJoiningGroup", "joining group",
|
|
|
|
UCHAR_JOINING_GROUP,
|
2005-01-01 18:00:17 +00:00
|
|
|
1, 0, 0xff /* column 1 bits 7..0 */
|
2004-12-31 13:28:06 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
static void U_CALLCONV
|
|
|
|
singleEnumLineFn(void *context,
|
|
|
|
char *fields[][2], int32_t fieldCount,
|
|
|
|
UErrorCode *pErrorCode) {
|
|
|
|
const SingleEnum *sen;
|
|
|
|
char *s;
|
2008-10-22 19:50:07 +00:00
|
|
|
uint32_t start, end, uv;
|
2004-12-31 13:28:06 +00:00
|
|
|
int32_t value;
|
|
|
|
|
|
|
|
sen=(const SingleEnum *)context;
|
|
|
|
|
2008-10-22 19:50:07 +00:00
|
|
|
u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode);
|
2004-12-31 13:28:06 +00:00
|
|
|
if(U_FAILURE(*pErrorCode)) {
|
|
|
|
fprintf(stderr, "genbidi: syntax error in %s.txt field 0 at %s\n", sen->ucdFile, fields[0][0]);
|
|
|
|
exit(*pErrorCode);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* parse property alias */
|
|
|
|
s=trimTerminateField(fields[1][0], fields[1][1]);
|
|
|
|
value=u_getPropertyValueEnum(sen->prop, s);
|
|
|
|
if(value<0) {
|
|
|
|
if(sen->prop==UCHAR_BLOCK) {
|
|
|
|
if(isToken("Greek", s)) {
|
|
|
|
value=UBLOCK_GREEK; /* Unicode 3.2 renames this to "Greek and Coptic" */
|
|
|
|
} else if(isToken("Combining Marks for Symbols", s)) {
|
|
|
|
value=UBLOCK_COMBINING_MARKS_FOR_SYMBOLS; /* Unicode 3.2 renames this to "Combining Diacritical Marks for Symbols" */
|
|
|
|
} else if(isToken("Private Use", s)) {
|
|
|
|
value=UBLOCK_PRIVATE_USE; /* Unicode 3.2 renames this to "Private Use Area" */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if(value<0) {
|
|
|
|
fprintf(stderr, "genbidi error: unknown %s name in %s.txt field 1 at %s\n",
|
|
|
|
sen->propName, sen->ucdFile, s);
|
|
|
|
exit(U_PARSE_ERROR);
|
|
|
|
}
|
|
|
|
|
|
|
|
uv=(uint32_t)(value<<sen->vecShift);
|
|
|
|
if((uv&sen->vecMask)!=uv) {
|
|
|
|
fprintf(stderr, "genbidi error: %s value overflow (0x%x) at %s\n",
|
|
|
|
sen->propName, (int)uv, s);
|
|
|
|
exit(U_INTERNAL_PROGRAM_ERROR);
|
|
|
|
}
|
|
|
|
|
2008-10-23 06:00:59 +00:00
|
|
|
upvec_setValue(pv, start, end, sen->vecWord, uv, sen->vecMask, pErrorCode);
|
|
|
|
if(U_FAILURE(*pErrorCode)) {
|
2004-12-31 13:28:06 +00:00
|
|
|
fprintf(stderr, "genbidi error: unable to set %s code: %s\n",
|
|
|
|
sen->propName, u_errorName(*pErrorCode));
|
|
|
|
exit(*pErrorCode);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
parseSingleEnumFile(char *filename, char *basename, const char *suffix,
|
|
|
|
const SingleEnum *sen,
|
|
|
|
UErrorCode *pErrorCode) {
|
|
|
|
char *fields[2][2];
|
|
|
|
|
|
|
|
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
writeUCDFilename(basename, sen->ucdFile, suffix);
|
|
|
|
|
|
|
|
u_parseDelimitedFile(filename, ';', fields, 2, singleEnumLineFn, (void *)sen, pErrorCode);
|
|
|
|
if(U_FAILURE(*pErrorCode)) {
|
|
|
|
fprintf(stderr, "error parsing %s.txt: %s\n", sen->ucdFile, u_errorName(*pErrorCode));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* parse files with multiple binary properties ------------------------------ */
|
|
|
|
|
|
|
|
/* TODO: more common code, move functions to uparse.h|c */
|
|
|
|
|
|
|
|
/* TODO: similar to genbidi/props2.c but not the same; same as in gencase/gencase.c */
|
|
|
|
|
|
|
|
struct Binary {
|
|
|
|
const char *propName;
|
|
|
|
int32_t vecWord;
|
|
|
|
uint32_t vecValue, vecMask;
|
|
|
|
};
|
|
|
|
typedef struct Binary Binary;
|
|
|
|
|
|
|
|
struct Binaries {
|
|
|
|
const char *ucdFile;
|
|
|
|
const Binary *binaries;
|
|
|
|
int32_t binariesCount;
|
|
|
|
};
|
|
|
|
typedef struct Binaries Binaries;
|
|
|
|
|
|
|
|
static const Binary
|
|
|
|
propListNames[]={
|
|
|
|
{ "Bidi_Control", 0, U_MASK(UBIDI_BIDI_CONTROL_SHIFT), U_MASK(UBIDI_BIDI_CONTROL_SHIFT) },
|
|
|
|
{ "Join_Control", 0, U_MASK(UBIDI_JOIN_CONTROL_SHIFT), U_MASK(UBIDI_JOIN_CONTROL_SHIFT) }
|
|
|
|
};
|
|
|
|
|
|
|
|
static const Binaries
|
|
|
|
propListBinaries={
|
|
|
|
"PropList", propListNames, LENGTHOF(propListNames)
|
|
|
|
};
|
|
|
|
|
|
|
|
static void U_CALLCONV
|
|
|
|
binariesLineFn(void *context,
|
|
|
|
char *fields[][2], int32_t fieldCount,
|
|
|
|
UErrorCode *pErrorCode) {
|
|
|
|
const Binaries *bin;
|
|
|
|
char *s;
|
2008-10-22 19:50:07 +00:00
|
|
|
uint32_t start, end;
|
2004-12-31 13:28:06 +00:00
|
|
|
int32_t i;
|
|
|
|
|
|
|
|
bin=(const Binaries *)context;
|
|
|
|
|
2008-10-22 19:50:07 +00:00
|
|
|
u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode);
|
2004-12-31 13:28:06 +00:00
|
|
|
if(U_FAILURE(*pErrorCode)) {
|
|
|
|
fprintf(stderr, "genbidi: syntax error in %s.txt field 0 at %s\n", bin->ucdFile, fields[0][0]);
|
|
|
|
exit(*pErrorCode);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* parse binary property name */
|
|
|
|
s=(char *)u_skipWhitespace(fields[1][0]);
|
|
|
|
for(i=0;; ++i) {
|
|
|
|
if(i==bin->binariesCount) {
|
|
|
|
/* ignore unrecognized properties */
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if(isToken(bin->binaries[i].propName, s)) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if(bin->binaries[i].vecMask==0) {
|
|
|
|
fprintf(stderr, "genbidi error: mask value %d==0 for %s %s\n",
|
|
|
|
(int)bin->binaries[i].vecMask, bin->ucdFile, bin->binaries[i].propName);
|
|
|
|
exit(U_INTERNAL_PROGRAM_ERROR);
|
|
|
|
}
|
|
|
|
|
2008-10-23 06:00:59 +00:00
|
|
|
upvec_setValue(pv, start, end, bin->binaries[i].vecWord, bin->binaries[i].vecValue, bin->binaries[i].vecMask, pErrorCode);
|
|
|
|
if(U_FAILURE(*pErrorCode)) {
|
2004-12-31 13:28:06 +00:00
|
|
|
fprintf(stderr, "genbidi error: unable to set %s, code: %s\n",
|
|
|
|
bin->binaries[i].propName, u_errorName(*pErrorCode));
|
|
|
|
exit(*pErrorCode);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
parseBinariesFile(char *filename, char *basename, const char *suffix,
|
|
|
|
const Binaries *bin,
|
|
|
|
UErrorCode *pErrorCode) {
|
|
|
|
char *fields[2][2];
|
|
|
|
|
|
|
|
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
writeUCDFilename(basename, bin->ucdFile, suffix);
|
|
|
|
|
|
|
|
u_parseDelimitedFile(filename, ';', fields, 2, binariesLineFn, (void *)bin, pErrorCode);
|
|
|
|
if(U_FAILURE(*pErrorCode)) {
|
|
|
|
fprintf(stderr, "error parsing %s.txt: %s\n", bin->ucdFile, u_errorName(*pErrorCode));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* -------------------------------------------------------------------------- */
|
|
|
|
|
|
|
|
enum {
|
|
|
|
HELP_H,
|
|
|
|
HELP_QUESTION_MARK,
|
|
|
|
VERBOSE,
|
|
|
|
COPYRIGHT,
|
|
|
|
DESTDIR,
|
|
|
|
SOURCEDIR,
|
|
|
|
UNICODE_VERSION,
|
2005-04-28 23:51:52 +00:00
|
|
|
ICUDATADIR,
|
|
|
|
CSOURCE
|
2004-12-31 13:28:06 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
/* Keep these values in sync with the above enums */
|
|
|
|
static UOption options[]={
|
|
|
|
UOPTION_HELP_H,
|
|
|
|
UOPTION_HELP_QUESTION_MARK,
|
|
|
|
UOPTION_VERBOSE,
|
|
|
|
UOPTION_COPYRIGHT,
|
|
|
|
UOPTION_DESTDIR,
|
|
|
|
UOPTION_SOURCEDIR,
|
2005-04-28 23:51:52 +00:00
|
|
|
UOPTION_DEF("unicode", 'u', UOPT_REQUIRES_ARG),
|
|
|
|
UOPTION_ICUDATADIR,
|
|
|
|
UOPTION_DEF("csource", 'C', UOPT_NO_ARG)
|
2004-12-31 13:28:06 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
extern int
|
|
|
|
main(int argc, char* argv[]) {
|
|
|
|
char filename[300];
|
|
|
|
const char *srcDir=NULL, *destDir=NULL, *suffix=NULL;
|
|
|
|
char *basename=NULL;
|
|
|
|
UErrorCode errorCode=U_ZERO_ERROR;
|
|
|
|
|
|
|
|
U_MAIN_INIT_ARGS(argc, argv);
|
|
|
|
|
|
|
|
/* preset then read command line options */
|
|
|
|
options[DESTDIR].value=u_getDataDirectory();
|
|
|
|
options[SOURCEDIR].value="";
|
|
|
|
options[UNICODE_VERSION].value="";
|
|
|
|
options[ICUDATADIR].value=u_getDataDirectory();
|
|
|
|
argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
|
|
|
|
|
|
|
|
/* error handling, printing usage message */
|
|
|
|
if(argc<0) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"error in command line argument \"%s\"\n",
|
|
|
|
argv[-argc]);
|
|
|
|
}
|
|
|
|
if(argc<0 || options[HELP_H].doesOccur || options[HELP_QUESTION_MARK].doesOccur) {
|
|
|
|
/*
|
|
|
|
* Broken into chucks because the C89 standard says the minimum
|
|
|
|
* required supported string length is 509 bytes.
|
|
|
|
*/
|
|
|
|
fprintf(stderr,
|
|
|
|
"Usage: %s [-options] [suffix]\n"
|
|
|
|
"\n"
|
|
|
|
"read the UnicodeData.txt file and other Unicode properties files and\n"
|
|
|
|
"create a binary file " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE " with the bidi/shaping properties\n"
|
|
|
|
"\n",
|
|
|
|
argv[0]);
|
|
|
|
fprintf(stderr,
|
|
|
|
"Options:\n"
|
|
|
|
"\t-h or -? or --help this usage text\n"
|
|
|
|
"\t-v or --verbose verbose output\n"
|
|
|
|
"\t-c or --copyright include a copyright notice\n"
|
2005-04-29 23:35:00 +00:00
|
|
|
"\t-u or --unicode Unicode version, followed by the version like 3.0.0\n"
|
|
|
|
"\t-C or --csource generate a .c source file rather than the .icu binary\n");
|
2004-12-31 13:28:06 +00:00
|
|
|
fprintf(stderr,
|
|
|
|
"\t-d or --destdir destination directory, followed by the path\n"
|
|
|
|
"\t-s or --sourcedir source directory, followed by the path\n"
|
|
|
|
"\t-i or --icudatadir directory for locating any needed intermediate data files,\n"
|
|
|
|
"\t followed by path, defaults to %s\n"
|
|
|
|
"\tsuffix suffix that is to be appended with a '-'\n"
|
|
|
|
"\t to the source file basenames before opening;\n"
|
|
|
|
"\t 'genbidi new' will read UnicodeData-new.txt etc.\n",
|
|
|
|
u_getDataDirectory());
|
|
|
|
return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* get the options values */
|
|
|
|
beVerbose=options[VERBOSE].doesOccur;
|
|
|
|
haveCopyright=options[COPYRIGHT].doesOccur;
|
|
|
|
srcDir=options[SOURCEDIR].value;
|
|
|
|
destDir=options[DESTDIR].value;
|
|
|
|
|
|
|
|
if(argc>=2) {
|
|
|
|
suffix=argv[1];
|
|
|
|
} else {
|
|
|
|
suffix=NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(options[UNICODE_VERSION].doesOccur) {
|
|
|
|
setUnicodeVersion(options[UNICODE_VERSION].value);
|
|
|
|
}
|
|
|
|
/* else use the default dataVersion in store.c */
|
|
|
|
|
|
|
|
if (options[ICUDATADIR].doesOccur) {
|
|
|
|
u_setDataDirectory(options[ICUDATADIR].value);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* prepare the filename beginning with the source dir */
|
|
|
|
uprv_strcpy(filename, srcDir);
|
|
|
|
basename=filename+uprv_strlen(filename);
|
|
|
|
if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
|
|
|
|
*basename++=U_FILE_SEP_CHAR;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* initialize */
|
2008-10-23 06:00:59 +00:00
|
|
|
pv=upvec_open(2, &errorCode);
|
2004-12-31 13:28:06 +00:00
|
|
|
|
|
|
|
/* process BidiMirroring.txt */
|
|
|
|
writeUCDFilename(basename, "BidiMirroring", suffix);
|
|
|
|
parseBidiMirroring(filename, &errorCode);
|
|
|
|
|
|
|
|
/* process additional properties files */
|
|
|
|
*basename=0;
|
|
|
|
|
|
|
|
parseBinariesFile(filename, basename, suffix, &propListBinaries, &errorCode);
|
|
|
|
|
|
|
|
parseSingleEnumFile(filename, basename, suffix, &jtSingleEnum, &errorCode);
|
|
|
|
|
|
|
|
parseSingleEnumFile(filename, basename, suffix, &jgSingleEnum, &errorCode);
|
|
|
|
|
|
|
|
/* process UnicodeData.txt */
|
|
|
|
writeUCDFilename(basename, "UnicodeData", suffix);
|
|
|
|
parseDB(filename, &errorCode);
|
|
|
|
|
|
|
|
/* set proper bidi class for unassigned code points (Cn) */
|
|
|
|
parseTwoFieldFile(filename, basename, "DerivedBidiClass", suffix, bidiClassLineFn, &errorCode);
|
|
|
|
|
|
|
|
/* process parsed data */
|
|
|
|
if(U_SUCCESS(errorCode)) {
|
|
|
|
/* write the properties data file */
|
2005-04-28 23:51:52 +00:00
|
|
|
generateData(destDir, options[CSOURCE].doesOccur);
|
2004-12-31 13:28:06 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
u_cleanup();
|
|
|
|
return errorCode;
|
|
|
|
}
|
|
|
|
|
|
|
|
U_CFUNC void
|
|
|
|
writeUCDFilename(char *basename, const char *filename, const char *suffix) {
|
|
|
|
int32_t length=(int32_t)uprv_strlen(filename);
|
|
|
|
uprv_strcpy(basename, filename);
|
|
|
|
if(suffix!=NULL) {
|
|
|
|
basename[length++]='-';
|
|
|
|
uprv_strcpy(basename+length, suffix);
|
|
|
|
length+=(int32_t)uprv_strlen(suffix);
|
|
|
|
}
|
|
|
|
uprv_strcpy(basename+length, ".txt");
|
|
|
|
}
|
|
|
|
|
|
|
|
/* TODO: move to toolutil */
|
|
|
|
static UBool
|
|
|
|
isToken(const char *token, const char *s) {
|
|
|
|
const char *z;
|
|
|
|
int32_t j;
|
|
|
|
|
|
|
|
s=u_skipWhitespace(s);
|
|
|
|
for(j=0;; ++j) {
|
|
|
|
if(token[j]!=0) {
|
|
|
|
if(s[j]!=token[j]) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
z=u_skipWhitespace(s+j);
|
|
|
|
if(*z==';' || *z==0) {
|
|
|
|
return TRUE;
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* parser for BidiMirroring.txt --------------------------------------------- */
|
|
|
|
|
|
|
|
static void U_CALLCONV
|
|
|
|
mirrorLineFn(void *context,
|
|
|
|
char *fields[][2], int32_t fieldCount,
|
|
|
|
UErrorCode *pErrorCode) {
|
2011-12-03 00:20:31 +00:00
|
|
|
const char *s;
|
2004-12-31 13:28:06 +00:00
|
|
|
char *end;
|
|
|
|
UChar32 src, mirror;
|
|
|
|
|
2011-12-03 00:20:31 +00:00
|
|
|
/* ignore "<code point>" which is on the @missing line */
|
|
|
|
s=u_skipWhitespace(fields[1][0]);
|
|
|
|
if(0==uprv_strncmp(s, "<code point>", 12)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2004-12-31 13:28:06 +00:00
|
|
|
src=(UChar32)uprv_strtoul(fields[0][0], &end, 16);
|
|
|
|
if(end<=fields[0][0] || end!=fields[0][1]) {
|
|
|
|
fprintf(stderr, "genbidi: syntax error in BidiMirroring.txt field 0 at %s\n", fields[0][0]);
|
|
|
|
*pErrorCode=U_PARSE_ERROR;
|
|
|
|
exit(U_PARSE_ERROR);
|
|
|
|
}
|
|
|
|
|
2011-12-03 00:20:31 +00:00
|
|
|
mirror=(UChar32)uprv_strtoul(s, &end, 16);
|
|
|
|
if(end<=s || end!=fields[1][1]) {
|
2004-12-31 13:28:06 +00:00
|
|
|
fprintf(stderr, "genbidi: syntax error in BidiMirroring.txt field 1 at %s\n", fields[1][0]);
|
|
|
|
*pErrorCode=U_PARSE_ERROR;
|
|
|
|
exit(U_PARSE_ERROR);
|
|
|
|
}
|
|
|
|
|
|
|
|
addMirror(src, mirror);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
parseBidiMirroring(const char *filename, UErrorCode *pErrorCode) {
|
|
|
|
char *fields[2][2];
|
|
|
|
|
|
|
|
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
u_parseDelimitedFile(filename, ';', fields, 2, mirrorLineFn, NULL, pErrorCode);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* parser for UnicodeData.txt ----------------------------------------------- */
|
|
|
|
|
|
|
|
static void U_CALLCONV
|
|
|
|
unicodeDataLineFn(void *context,
|
|
|
|
char *fields[][2], int32_t fieldCount,
|
|
|
|
UErrorCode *pErrorCode) {
|
|
|
|
char *end;
|
|
|
|
UErrorCode errorCode;
|
|
|
|
UChar32 c;
|
|
|
|
|
|
|
|
errorCode=U_ZERO_ERROR;
|
|
|
|
|
|
|
|
/* get the character code, field 0 */
|
|
|
|
c=(UChar32)uprv_strtoul(fields[0][0], &end, 16);
|
|
|
|
if(end<=fields[0][0] || end!=fields[0][1]) {
|
|
|
|
fprintf(stderr, "genbidi: syntax error in field 0 at %s\n", fields[0][0]);
|
|
|
|
*pErrorCode=U_PARSE_ERROR;
|
|
|
|
exit(U_PARSE_ERROR);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* get Mirrored flag, field 9 */
|
|
|
|
if(*fields[9][0]=='Y') {
|
2008-10-23 06:00:59 +00:00
|
|
|
upvec_setValue(pv, c, c, 0, U_MASK(UBIDI_IS_MIRRORED_SHIFT), U_MASK(UBIDI_IS_MIRRORED_SHIFT), &errorCode);
|
|
|
|
if(U_FAILURE(*pErrorCode)) {
|
2004-12-31 13:28:06 +00:00
|
|
|
fprintf(stderr, "genbidi error: unable to set 'is mirrored' for U+%04lx, code: %s\n",
|
|
|
|
(long)c, u_errorName(errorCode));
|
|
|
|
exit(errorCode);
|
|
|
|
}
|
|
|
|
} else if(fields[9][1]-fields[9][0]!=1 || *fields[9][0]!='N') {
|
|
|
|
fprintf(stderr, "genbidi: syntax error in field 9 at U+%04lx\n",
|
|
|
|
(long)c);
|
|
|
|
*pErrorCode=U_PARSE_ERROR;
|
|
|
|
exit(U_PARSE_ERROR);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
parseDB(const char *filename, UErrorCode *pErrorCode) {
|
|
|
|
/* default Bidi classes for unassigned code points */
|
|
|
|
static const UChar32 defaultBidi[][3]={ /* { start, end, class } */
|
|
|
|
/* R: U+0590..U+05FF, U+07C0..U+08FF, U+FB1D..U+FB4F, U+10800..U+10FFF */
|
|
|
|
{ 0x0590, 0x05FF, U_RIGHT_TO_LEFT },
|
|
|
|
{ 0x07C0, 0x08FF, U_RIGHT_TO_LEFT },
|
|
|
|
{ 0xFB1D, 0xFB4F, U_RIGHT_TO_LEFT },
|
|
|
|
{ 0x10800, 0x10FFF, U_RIGHT_TO_LEFT },
|
|
|
|
|
|
|
|
/* AL: U+0600..U+07BF, U+FB50..U+FDCF, U+FDF0..U+FDFF, U+FE70..U+FEFE */
|
|
|
|
{ 0x0600, 0x07BF, U_RIGHT_TO_LEFT_ARABIC },
|
|
|
|
{ 0xFB50, 0xFDCF, U_RIGHT_TO_LEFT_ARABIC },
|
|
|
|
{ 0xFDF0, 0xFDFF, U_RIGHT_TO_LEFT_ARABIC },
|
|
|
|
{ 0xFE70, 0xFEFE, U_RIGHT_TO_LEFT_ARABIC }
|
|
|
|
|
|
|
|
/* L otherwise */
|
|
|
|
};
|
|
|
|
|
|
|
|
char *fields[15][2];
|
|
|
|
UChar32 start, end;
|
|
|
|
int32_t i;
|
|
|
|
|
|
|
|
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set default Bidi classes for unassigned code points.
|
|
|
|
* See the documentation for Bidi_Class in UCD.html in the Unicode data.
|
|
|
|
* http://www.unicode.org/Public/
|
2006-03-03 20:59:01 +00:00
|
|
|
*
|
|
|
|
* Starting with Unicode 5.0, DerivedBidiClass.txt should (re)set
|
|
|
|
* the Bidi_Class values for all code points including unassigned ones
|
|
|
|
* and including L values for these.
|
|
|
|
* This code becomes unnecesary but harmless. Leave it for now in case
|
|
|
|
* someone uses genbidi on pre-Unicode 5.0 data.
|
2004-12-31 13:28:06 +00:00
|
|
|
*/
|
|
|
|
for(i=0; i<LENGTHOF(defaultBidi); ++i) {
|
|
|
|
start=defaultBidi[i][0];
|
|
|
|
end=defaultBidi[i][1];
|
2008-10-23 06:00:59 +00:00
|
|
|
upvec_setValue(pv, start, end, 0, (uint32_t)defaultBidi[i][2], UBIDI_CLASS_MASK, pErrorCode);
|
|
|
|
if(U_FAILURE(*pErrorCode)) {
|
2004-12-31 13:28:06 +00:00
|
|
|
fprintf(stderr, "genbidi error: unable to set default bidi class for U+%04lx..U+%04lx, code: %s\n",
|
|
|
|
(long)start, (long)end, u_errorName(*pErrorCode));
|
|
|
|
exit(*pErrorCode);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
u_parseDelimitedFile(filename, ';', fields, 15, unicodeDataLineFn, NULL, pErrorCode);
|
|
|
|
|
|
|
|
if(U_FAILURE(*pErrorCode)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* DerivedBidiClass.txt ----------------------------------------------------- */
|
|
|
|
|
|
|
|
static void U_CALLCONV
|
|
|
|
bidiClassLineFn(void *context,
|
|
|
|
char *fields[][2], int32_t fieldCount,
|
|
|
|
UErrorCode *pErrorCode) {
|
|
|
|
char *s;
|
2008-10-22 19:50:07 +00:00
|
|
|
uint32_t start, end, value;
|
2004-12-31 13:28:06 +00:00
|
|
|
|
|
|
|
/* get the code point range */
|
2008-10-22 19:50:07 +00:00
|
|
|
u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode);
|
2004-12-31 13:28:06 +00:00
|
|
|
if(U_FAILURE(*pErrorCode)) {
|
|
|
|
fprintf(stderr, "genbidi: syntax error in DerivedBidiClass.txt field 0 at %s\n", fields[0][0]);
|
|
|
|
exit(*pErrorCode);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* parse bidi class */
|
|
|
|
s=trimTerminateField(fields[1][0], fields[1][1]);
|
|
|
|
value=u_getPropertyValueEnum(UCHAR_BIDI_CLASS, s);
|
|
|
|
if((int32_t)value<0) {
|
|
|
|
fprintf(stderr, "genbidi error: unknown bidi class in DerivedBidiClass.txt field 1 at %s\n", s);
|
|
|
|
exit(U_PARSE_ERROR);
|
|
|
|
}
|
|
|
|
|
2008-10-23 06:00:59 +00:00
|
|
|
upvec_setValue(pv, start, end, 0, value, UBIDI_CLASS_MASK, pErrorCode);
|
|
|
|
if(U_FAILURE(*pErrorCode)) {
|
2006-03-03 20:59:01 +00:00
|
|
|
fprintf(stderr, "genbidi error: unable to set derived bidi class for U+%04x..U+%04x - %s\n",
|
2008-10-22 19:50:07 +00:00
|
|
|
(int)start, (int)end, u_errorName(*pErrorCode));
|
2006-03-03 20:59:01 +00:00
|
|
|
exit(*pErrorCode);
|
2004-12-31 13:28:06 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Hey, Emacs, please set the following:
|
|
|
|
*
|
|
|
|
* Local Variables:
|
|
|
|
* indent-tabs-mode: nil
|
|
|
|
* End:
|
|
|
|
*
|
|
|
|
*/
|