ICU-4078 gennorm now needs other properties files - but must not depend on unorm.icu...
X-SVN-Rev: 16317
This commit is contained in:
parent
a52e1cd824
commit
5320060c1a
@ -23,11 +23,13 @@
|
||||
#include <stdlib.h>
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/putil.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "unicode/uclean.h"
|
||||
#include "unicode/udata.h"
|
||||
#include "unicode/uset.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "unewdata.h"
|
||||
#include "uoptions.h"
|
||||
#include "uparse.h"
|
||||
@ -53,6 +55,17 @@ parseDB(const char *filename, UErrorCode *pErrorCode);
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
enum {
|
||||
HELP_H,
|
||||
HELP_QUESTION_MARK,
|
||||
VERBOSE,
|
||||
COPYRIGHT,
|
||||
DESTDIR,
|
||||
SOURCEDIR,
|
||||
UNICODE_VERSION,
|
||||
ICUDATADIR
|
||||
};
|
||||
|
||||
static UOption options[]={
|
||||
UOPTION_HELP_H,
|
||||
UOPTION_HELP_QUESTION_MARK,
|
||||
@ -60,7 +73,8 @@ static UOption options[]={
|
||||
UOPTION_COPYRIGHT,
|
||||
UOPTION_DESTDIR,
|
||||
UOPTION_SOURCEDIR,
|
||||
{ "unicode", NULL, NULL, NULL, 'u', UOPT_REQUIRES_ARG, 0 }
|
||||
{ "unicode", NULL, NULL, NULL, 'u', UOPT_REQUIRES_ARG, 0 },
|
||||
UOPTION_ICUDATADIR
|
||||
};
|
||||
|
||||
extern int
|
||||
@ -78,6 +92,7 @@ main(int argc, char* argv[]) {
|
||||
options[4].value=u_getDataDirectory();
|
||||
options[5].value="";
|
||||
options[6].value="3.0.0";
|
||||
options[ICUDATADIR].value=u_getDataDirectory();
|
||||
argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
|
||||
|
||||
/* error handling, printing usage message */
|
||||
@ -137,6 +152,28 @@ main(int argc, char* argv[]) {
|
||||
|
||||
setUnicodeVersion(options[6].value);
|
||||
|
||||
if (options[ICUDATADIR].doesOccur) {
|
||||
u_setDataDirectory(options[ICUDATADIR].value);
|
||||
}
|
||||
|
||||
/*
|
||||
* Verify that we can work with properties
|
||||
* but don't call u_init() because that needs unorm.icu which we are just
|
||||
* going to build here.
|
||||
*/
|
||||
{
|
||||
U_STRING_DECL(ideo, "[:Ideographic:]", 15);
|
||||
USet *set;
|
||||
|
||||
U_STRING_INIT(ideo, "[:Ideographic:]", 15);
|
||||
set=uset_openPattern(ideo, -1, &errorCode);
|
||||
if(U_FAILURE(errorCode) || !uset_contains(set, 0xf900)) {
|
||||
fprintf(stderr, "gennorm is unable to work with properties (uprops.icu): %s\n", u_errorName(errorCode));
|
||||
exit(errorCode);
|
||||
}
|
||||
uset_close(set);
|
||||
}
|
||||
|
||||
/* prepare the filename beginning with the source dir */
|
||||
uprv_strcpy(filename, srcDir);
|
||||
basename=filename+uprv_strlen(filename);
|
||||
|
@ -110,6 +110,9 @@ static Norm *norms;
|
||||
*/
|
||||
static uint32_t haveSeenFlags[256];
|
||||
|
||||
/* set of characters with NFD_QC=No (i.e., those with canonical decompositions) */
|
||||
static USet *nfdQCNoSet;
|
||||
|
||||
/* see addCombiningCP() for details */
|
||||
static uint32_t combiningCPs[2000];
|
||||
|
||||
@ -177,6 +180,9 @@ init() {
|
||||
/* reset all "have seen" flags */
|
||||
uprv_memset(haveSeenFlags, 0, sizeof(haveSeenFlags));
|
||||
|
||||
/* open an empty set */
|
||||
nfdQCNoSet=uset_open(1, 0);
|
||||
|
||||
/* allocate extra data memory for UTF-16 decomposition strings and other values */
|
||||
extraMem=utm_open("gennorm extra 16-bit memory", _NORM_EXTRA_INDEX_TOP, _NORM_EXTRA_INDEX_TOP, 2);
|
||||
/* initialize the extraMem counter for the top of FNC strings */
|
||||
@ -813,6 +819,10 @@ setQCFlags(uint32_t code, uint8_t qcFlags) {
|
||||
indexes[_NORM_INDEX_MIN_NFKD_NO_MAYBE]=(uint16_t)code;
|
||||
}
|
||||
}
|
||||
|
||||
if(qcFlags&_NORM_QC_NFD) {
|
||||
uset_add(nfdQCNoSet, (UChar32)code);
|
||||
}
|
||||
}
|
||||
|
||||
extern void
|
||||
@ -1726,7 +1736,7 @@ generateData(const char *dataDir) {
|
||||
|
||||
#else
|
||||
|
||||
U_STRING_DECL(nxCJKCompatPattern, "[[:Ideographic:]&[:NFD_QC=No:]]", 31);
|
||||
U_STRING_DECL(nxCJKCompatPattern, "[:Ideographic:]", 15);
|
||||
U_STRING_DECL(nxUnicode32Pattern, "[:^Age=3.2:]", 12);
|
||||
USet *set;
|
||||
int32_t normTrieSize, fcdTrieSize, auxTrieSize;
|
||||
@ -1765,7 +1775,14 @@ generateData(const char *dataDir) {
|
||||
canonStartSetsTop+=canonStartSets[_NORM_SET_INDEX_CANON_SUPP_TABLE_LENGTH];
|
||||
|
||||
/* create the normalization exclusion sets */
|
||||
U_STRING_INIT(nxCJKCompatPattern, "[[:Ideographic:]&[:NFD_QC=No:]]", 31);
|
||||
/*
|
||||
* nxCJKCompatPattern should be [[:Ideographic:]&[:NFD_QC=No:]]
|
||||
* but we cannot use NFD_QC from the pattern because that would require
|
||||
* unorm.icu which we are just going to generate.
|
||||
* Therefore we have manually collected nfdQCNoSet and intersect Ideographic
|
||||
* with that.
|
||||
*/
|
||||
U_STRING_INIT(nxCJKCompatPattern, "[:Ideographic:]", 15);
|
||||
U_STRING_INIT(nxUnicode32Pattern, "[:^Age=3.2:]", 12);
|
||||
|
||||
canonStartSets[_NORM_SET_INDEX_NX_CJK_COMPAT_OFFSET]=canonStartSetsTop;
|
||||
@ -1774,6 +1791,7 @@ generateData(const char *dataDir) {
|
||||
fprintf(stderr, "error: uset_openPattern([:Ideographic:]&[:NFD_QC=No:]) failed, %s\n", u_errorName(errorCode));
|
||||
exit(errorCode);
|
||||
}
|
||||
uset_retainAll(set, nfdQCNoSet);
|
||||
canonStartSetsTop+=uset_serialize(set, canonStartSets+canonStartSetsTop, LENGTHOF(canonStartSets)-canonStartSetsTop, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr, "error: uset_serialize([:Ideographic:]&[:NFD_QC=No:]) failed, %s\n", u_errorName(errorCode));
|
||||
@ -1902,6 +1920,8 @@ cleanUpData(void) {
|
||||
utrie_close(fcdTrie);
|
||||
utrie_close(auxTrie);
|
||||
|
||||
uset_close(nfdQCNoSet);
|
||||
|
||||
uprv_free(normTrie);
|
||||
uprv_free(norm32Trie);
|
||||
uprv_free(fcdTrie);
|
||||
|
Loading…
Reference in New Issue
Block a user