ICU-4078 gennorm now needs other properties files - but must not depend on unorm.icu...

X-SVN-Rev: 16317
This commit is contained in:
Markus Scherer 2004-09-13 23:40:06 +00:00
parent a52e1cd824
commit 5320060c1a
2 changed files with 62 additions and 5 deletions

View File

@ -23,11 +23,13 @@
#include <stdlib.h>
#include "unicode/utypes.h"
#include "unicode/uchar.h"
#include "unicode/ustring.h"
#include "unicode/putil.h"
#include "cmemory.h"
#include "cstring.h"
#include "unicode/uclean.h"
#include "unicode/udata.h"
#include "unicode/uset.h"
#include "cmemory.h"
#include "cstring.h"
#include "unewdata.h"
#include "uoptions.h"
#include "uparse.h"
@ -53,6 +55,17 @@ parseDB(const char *filename, UErrorCode *pErrorCode);
/* -------------------------------------------------------------------------- */
enum {
HELP_H,
HELP_QUESTION_MARK,
VERBOSE,
COPYRIGHT,
DESTDIR,
SOURCEDIR,
UNICODE_VERSION,
ICUDATADIR
};
static UOption options[]={
UOPTION_HELP_H,
UOPTION_HELP_QUESTION_MARK,
@ -60,7 +73,8 @@ static UOption options[]={
UOPTION_COPYRIGHT,
UOPTION_DESTDIR,
UOPTION_SOURCEDIR,
{ "unicode", NULL, NULL, NULL, 'u', UOPT_REQUIRES_ARG, 0 }
{ "unicode", NULL, NULL, NULL, 'u', UOPT_REQUIRES_ARG, 0 },
UOPTION_ICUDATADIR
};
extern int
@ -78,6 +92,7 @@ main(int argc, char* argv[]) {
options[4].value=u_getDataDirectory();
options[5].value="";
options[6].value="3.0.0";
options[ICUDATADIR].value=u_getDataDirectory();
argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
/* error handling, printing usage message */
@ -137,6 +152,28 @@ main(int argc, char* argv[]) {
setUnicodeVersion(options[6].value);
if (options[ICUDATADIR].doesOccur) {
u_setDataDirectory(options[ICUDATADIR].value);
}
/*
* Verify that we can work with properties
* but don't call u_init() because that needs unorm.icu which we are just
* going to build here.
*/
{
U_STRING_DECL(ideo, "[:Ideographic:]", 15);
USet *set;
U_STRING_INIT(ideo, "[:Ideographic:]", 15);
set=uset_openPattern(ideo, -1, &errorCode);
if(U_FAILURE(errorCode) || !uset_contains(set, 0xf900)) {
fprintf(stderr, "gennorm is unable to work with properties (uprops.icu): %s\n", u_errorName(errorCode));
exit(errorCode);
}
uset_close(set);
}
/* prepare the filename beginning with the source dir */
uprv_strcpy(filename, srcDir);
basename=filename+uprv_strlen(filename);

View File

@ -110,6 +110,9 @@ static Norm *norms;
*/
static uint32_t haveSeenFlags[256];
/* set of characters with NFD_QC=No (i.e., those with canonical decompositions) */
static USet *nfdQCNoSet;
/* see addCombiningCP() for details */
static uint32_t combiningCPs[2000];
@ -177,6 +180,9 @@ init() {
/* reset all "have seen" flags */
uprv_memset(haveSeenFlags, 0, sizeof(haveSeenFlags));
/* open an empty set */
nfdQCNoSet=uset_open(1, 0);
/* allocate extra data memory for UTF-16 decomposition strings and other values */
extraMem=utm_open("gennorm extra 16-bit memory", _NORM_EXTRA_INDEX_TOP, _NORM_EXTRA_INDEX_TOP, 2);
/* initialize the extraMem counter for the top of FNC strings */
@ -813,6 +819,10 @@ setQCFlags(uint32_t code, uint8_t qcFlags) {
indexes[_NORM_INDEX_MIN_NFKD_NO_MAYBE]=(uint16_t)code;
}
}
if(qcFlags&_NORM_QC_NFD) {
uset_add(nfdQCNoSet, (UChar32)code);
}
}
extern void
@ -1726,7 +1736,7 @@ generateData(const char *dataDir) {
#else
U_STRING_DECL(nxCJKCompatPattern, "[[:Ideographic:]&[:NFD_QC=No:]]", 31);
U_STRING_DECL(nxCJKCompatPattern, "[:Ideographic:]", 15);
U_STRING_DECL(nxUnicode32Pattern, "[:^Age=3.2:]", 12);
USet *set;
int32_t normTrieSize, fcdTrieSize, auxTrieSize;
@ -1765,7 +1775,14 @@ generateData(const char *dataDir) {
canonStartSetsTop+=canonStartSets[_NORM_SET_INDEX_CANON_SUPP_TABLE_LENGTH];
/* create the normalization exclusion sets */
U_STRING_INIT(nxCJKCompatPattern, "[[:Ideographic:]&[:NFD_QC=No:]]", 31);
/*
* nxCJKCompatPattern should be [[:Ideographic:]&[:NFD_QC=No:]]
* but we cannot use NFD_QC from the pattern because that would require
* unorm.icu which we are just going to generate.
* Therefore we have manually collected nfdQCNoSet and intersect Ideographic
* with that.
*/
U_STRING_INIT(nxCJKCompatPattern, "[:Ideographic:]", 15);
U_STRING_INIT(nxUnicode32Pattern, "[:^Age=3.2:]", 12);
canonStartSets[_NORM_SET_INDEX_NX_CJK_COMPAT_OFFSET]=canonStartSetsTop;
@ -1774,6 +1791,7 @@ generateData(const char *dataDir) {
fprintf(stderr, "error: uset_openPattern([:Ideographic:]&[:NFD_QC=No:]) failed, %s\n", u_errorName(errorCode));
exit(errorCode);
}
uset_retainAll(set, nfdQCNoSet);
canonStartSetsTop+=uset_serialize(set, canonStartSets+canonStartSetsTop, LENGTHOF(canonStartSets)-canonStartSetsTop, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "error: uset_serialize([:Ideographic:]&[:NFD_QC=No:]) failed, %s\n", u_errorName(errorCode));
@ -1902,6 +1920,8 @@ cleanUpData(void) {
utrie_close(fcdTrie);
utrie_close(auxTrie);
uset_close(nfdQCNoSet);
uprv_free(normTrie);
uprv_free(norm32Trie);
uprv_free(fcdTrie);