ICU-20460 Adding mechanism to build unicore data into dat file.
This commit is contained in:
parent
d2d59c6d65
commit
eac8f4b31a
1
icu4c/source/configure
vendored
1
icu4c/source/configure
vendored
@ -9133,6 +9133,7 @@ else
|
||||
--seqmode parallel \
|
||||
--src_dir "$srcdir/data" \
|
||||
--filter_file "$ICU_DATA_FILTER_FILE" \
|
||||
$BUILDTOOL_OPTS \
|
||||
> data/rules.mk
|
||||
if test "$?" != "0"; then
|
||||
as_fn_error $? "Python failed to run; see above error." "$LINENO" 5
|
||||
|
@ -1397,6 +1397,7 @@ else
|
||||
--seqmode parallel \
|
||||
--src_dir "$srcdir/data" \
|
||||
--filter_file "$ICU_DATA_FILTER_FILE" \
|
||||
$ICU_DATA_BUILDTOOL_OPTS \
|
||||
> data/rules.mk
|
||||
if test "$?" != "0"; then
|
||||
AC_MSG_ERROR(Python failed to run; see above error.)
|
||||
|
@ -29,6 +29,7 @@ def generate(config, glob, common_vars):
|
||||
requests += generate_brkitr_dictionaries(config, glob, common_vars)
|
||||
requests += generate_normalization(config, glob, common_vars)
|
||||
requests += generate_coll_ucadata(config, glob, common_vars)
|
||||
requests += generate_full_unicore_data(config, glob, common_vars)
|
||||
requests += generate_unames(config, glob, common_vars)
|
||||
requests += generate_ulayout(config, glob, common_vars)
|
||||
requests += generate_misc(config, glob, common_vars)
|
||||
@ -273,7 +274,8 @@ def generate_brkitr_dictionaries(config, glob, common_vars):
|
||||
def generate_normalization(config, glob, common_vars):
|
||||
# NRM Files
|
||||
input_files = [InFile(filename) for filename in glob("in/*.nrm")]
|
||||
input_files.remove(InFile("in/nfc.nrm")) # nfc.nrm is pre-compiled into C++
|
||||
# nfc.nrm is pre-compiled into C++; see generate_full_unicore_data
|
||||
input_files.remove(InFile("in/nfc.nrm"))
|
||||
output_files = [OutFile(v.filename[3:]) for v in input_files]
|
||||
return [
|
||||
RepeatedExecutionRequest(
|
||||
@ -308,6 +310,36 @@ def generate_coll_ucadata(config, glob, common_vars):
|
||||
]
|
||||
|
||||
|
||||
def generate_full_unicore_data(config, glob, common_vars):
|
||||
# The core Unicode properties files (pnames.icu, uprops.icu, ucase.icu, ubidi.icu)
|
||||
# are hardcoded in the common DLL and therefore not included in the data package any more.
|
||||
# They are not built by default but need to be built for ICU4J data,
|
||||
# both in the .jar and in the .dat file (if ICU4J uses the .dat file).
|
||||
# See ICU-4497.
|
||||
if not config.include_uni_core_data:
|
||||
return []
|
||||
|
||||
basenames = [
|
||||
"pnames.icu",
|
||||
"uprops.icu",
|
||||
"ucase.icu",
|
||||
"ubidi.icu",
|
||||
"nfc.nrm"
|
||||
]
|
||||
input_files = [InFile("in/%s" % bn) for bn in basenames]
|
||||
output_files = [OutFile(bn) for bn in basenames]
|
||||
return [
|
||||
RepeatedExecutionRequest(
|
||||
name = "unicore",
|
||||
category = "unicore",
|
||||
input_files = input_files,
|
||||
output_files = output_files,
|
||||
tool = IcuTool("icupkg"),
|
||||
args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}"
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
def generate_unames(config, glob, common_vars):
|
||||
# Unicode Character Names
|
||||
input_file = InFile("in/unames.icu")
|
||||
|
@ -82,35 +82,8 @@ endif
|
||||
OUTTMPDIR=$(OUTDIR)/tmp
|
||||
MAINBUILDDIR=$(OUTDIR)/build
|
||||
BUILDDIR=$(MAINBUILDDIR)/$(ICUDATA_PLATFORM_NAME)
|
||||
UNICODEDATADIR=$(SRCDATADIR)/unidata
|
||||
LOCSRCDIR=$(SRCDATADIR)/locales
|
||||
CURRSRCDIR=$(SRCDATADIR)/curr
|
||||
CURRBLDDIR=$(BUILDDIR)/curr
|
||||
LANGSRCDIR=$(SRCDATADIR)/lang
|
||||
LANGBLDDIR=$(BUILDDIR)/lang
|
||||
REGIONSRCDIR=$(SRCDATADIR)/region
|
||||
REGIONBLDDIR=$(BUILDDIR)/region
|
||||
ZONESRCDIR=$(SRCDATADIR)/zone
|
||||
ZONEBLDDIR=$(BUILDDIR)/zone
|
||||
UNITSRCDIR=$(SRCDATADIR)/unit
|
||||
UNITBLDDIR=$(BUILDDIR)/unit
|
||||
COLSRCDIR=$(SRCDATADIR)/coll
|
||||
COLBLDDIR=$(BUILDDIR)/coll
|
||||
RBNFSRCDIR=$(SRCDATADIR)/rbnf
|
||||
RBNFBLDDIR=$(BUILDDIR)/rbnf
|
||||
TRANSLITSRCDIR=$(SRCDATADIR)/translit
|
||||
TRANSLITBLDDIR=$(BUILDDIR)/translit
|
||||
MISCSRCDIR=$(SRCDATADIR)/misc
|
||||
BRKSRCDIR=$(SRCDATADIR)/brkitr
|
||||
BRKBLDDIR=$(BUILDDIR)/brkitr
|
||||
DICTSRCDIR=$(BRKSRCDIR)/dictionaries
|
||||
BRKRULESRCDIR=$(BRKSRCDIR)/rules
|
||||
MISCSRCDIR=$(SRCDATADIR)/misc
|
||||
UCMSRCDIR=$(SRCDATADIR)/mappings
|
||||
SPREPSRCDIR=$(SRCDATADIR)/sprep
|
||||
COMINCDIR=$(top_srcdir)/common/unicode
|
||||
SRCLISTDEPS=Makefile $(srcdir)/Makefile.in
|
||||
BUILD_DIRS=$(OUTDIR) $(MAINBUILDDIR) $(BUILDDIR) $(CURRBLDDIR) $(LANGBLDDIR) $(REGIONBLDDIR) $(ZONEBLDDIR) $(UNITBLDDIR) $(BRKBLDDIR) $(COLBLDDIR) $(RBNFBLDDIR) $(TRANSLITBLDDIR) $(OUTTMPDIR) $(OUTTMPDIR_390STUB) $(OUTTMPDIR)/$(CURR_TREE) $(OUTTMPDIR)/$(LANG_TREE) $(OUTTMPDIR)/$(REGION_TREE) $(OUTTMPDIR)/$(ZONE_TREE) $(OUTTMPDIR)/$(UNIT_TREE) $(OUTTMPDIR)/$(COLLATION_TREE) $(OUTTMPDIR)/$(RBNF_TREE) $(OUTTMPDIR)/$(TRANSLIT_TREE) $(OUTTMPDIR)/$(BREAK_TREE)
|
||||
|
||||
# Variable names for rules.mk
|
||||
OUT_DIR=$(BUILDDIR)
|
||||
@ -145,7 +118,7 @@ check-exhaustive: check
|
||||
distclean-local: clean
|
||||
$(RMV) Makefile
|
||||
|
||||
all-local: build-dir icupkg.inc build-local packagedata $(POST_DATA_BUILD) $(OS390PKG)
|
||||
all-local: icupkg.inc build-local packagedata $(POST_DATA_BUILD) $(OS390PKG)
|
||||
|
||||
dist-local:
|
||||
|
||||
@ -153,7 +126,7 @@ clean-map:
|
||||
-test -z *.map || $(RMV) *.map
|
||||
|
||||
clean-local: cleanpackage cleanfiles clean-map
|
||||
$(RMV) build-dir* build-local packagedata uni-core-data
|
||||
$(RMV) $(OUTDIR) build-local packagedata uni-core-data
|
||||
|
||||
cleanfiles:
|
||||
test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
|
||||
@ -252,7 +225,7 @@ include $(top_builddir)/$(subdir)/rules.mk
|
||||
ifeq ($(ENABLE_SO_VERSION_DATA),1)
|
||||
ifeq ($(PKGDATA_MODE),dll)
|
||||
SO_VERSION_DATA = $(OUTTMPDIR)/icudata.res
|
||||
$(SO_VERSION_DATA) : $(MISCSRCDIR)/icudata.rc | build-dir
|
||||
$(SO_VERSION_DATA) : $(MISCSRCDIR)/icudata.rc
|
||||
ifeq ($(MSYS_RC_MODE),1)
|
||||
rc.exe -i$(srcdir)/../common -i$(top_builddir)/common -fo$@ $(CPPFLAGS) $<
|
||||
else
|
||||
@ -264,36 +237,6 @@ endif
|
||||
PKGDATA_LIST = $(TMP_DIR)/icudata.lst
|
||||
|
||||
|
||||
##### Define all the data files. the build rule that depends on them is below.
|
||||
# X_FILES_SHORT = just the base names (for lists)
|
||||
# X_FILES = full paths (for dependency)
|
||||
|
||||
## DAT files - Misc. data files.
|
||||
# 2005-may-05 Removed Unicode properties files (unorm.icu, uprops.icu, ucase.icu, ubidi.icu)
|
||||
# from data build. See Jitterbug 4497. (makedata.mak revision 1.117)
|
||||
# 2010-dec Removed pnames.icu.
|
||||
# These are now hardcoded in ICU4C and only loaded in ICU4J.
|
||||
#
|
||||
DAT_FILES_SHORT=unames.icu cnvalias.icu coll/ucadata.icu nfkc.nrm nfkc_cf.nrm uts46.nrm
|
||||
DAT_FILES=$(DAT_FILES_SHORT:%=$(BUILDDIR)/%)
|
||||
|
||||
## All generated files
|
||||
ALL_FILES = $(DAT_FILES) $(CNV_FILES) $(CNV_FILES_SPECIAL) $(BRK_FILES) $(DICT_FILES) $(RES_FILES) $(INDEX_RES_FILE) $(CURR_FILES) $(LANG_FILES) $(REGION_FILES) $(ZONE_FILES) $(UNIT_FILES) $(COLLATION_FILES) $(BRK_RES_FILES) $(RBNF_FILES) $(TRANSLIT_FILES) $(SPREP_FILES) $(CFU_FILES)
|
||||
ALL_INDEX_SRC_FILES = $(PKGDATA_LIST) $(INDEX_FILE) $(CURR_INDEX_FILE) $(LANG_INDEX_FILE) $(REGION_INDEX_FILE) $(ZONE_INDEX_FILE) $(UNIT_INDEX_FILE) $(COLLATION_INDEX_FILE) $(BRK_RES_INDEX_FILE) $(RBNF_INDEX_FILE)
|
||||
# a list to use in the .lst files (package-relative)
|
||||
COLL_FILES_LIST=$(COLLATION_FILES_SHORT) $(COLLATION_INDEX_RES_SHORT)
|
||||
BRK_FILES_LIST=$(BRK_FILES_SHORT) $(BRK_RES_FILES_SHORT) $(BRK_RES_INDEX_RES_SHORT) $(DICT_FILES_SHORT)
|
||||
LOCALE_FILES_LIST= $(RES_FILES_SHORT) $(LANG_FILES_SHORT) $(REGION_FILES_SHORT) $(ZONE_FILES_SHORT) $(UNIT_FILES_SHORT)
|
||||
MISC_FILES_LIST=$(DAT_FILES_SHORT) $(CNV_FILES_SHORT) $(CNV_FILES_SHORT_SPECIAL) $(CURR_FILES_SHORT) $(RBNF_FILES_SHORT) $(RBNF_INDEX_RES_SHORT) $(TRANSLIT_FILES_SHORT) $(SPREP_FILES_SHORT) $(CFU_FILES_SHORT)
|
||||
UNI_CORE_DATA=pnames.icu uprops.icu ucase.icu ubidi.icu nfc.nrm
|
||||
UNI_CORE_TARGET_DATA=$(UNI_CORE_DATA:%=$(BUILDDIR)/%)
|
||||
|
||||
ifneq ($(INCLUDE_UNI_CORE_DATA),)
|
||||
MISC_FILES_LIST+=$(UNI_CORE_DATA)
|
||||
build-local: uni-core-data
|
||||
echo timestamp > $@
|
||||
endif
|
||||
|
||||
#####################################################
|
||||
# General data build rules
|
||||
|
||||
@ -301,10 +244,10 @@ endif
|
||||
CLEANFILES = *~ icupkg.inc *.x
|
||||
|
||||
ifeq ($(ICUDATA_SOURCE_ARCHIVE),)
|
||||
build-local: build-dir $(SO_VERSION_DATA) $(ICUDATA_ALL_OUTPUT_FILES) $(PKGDATA_LIST) $(OS390LIST)
|
||||
build-local: $(SO_VERSION_DATA) $(ICUDATA_ALL_OUTPUT_FILES) $(PKGDATA_LIST) $(OS390LIST)
|
||||
echo timestamp > $@
|
||||
else
|
||||
build-local: build-dir $(SO_VERSION_DATA) $(PKGDATA_LIST) $(OS390LIST)
|
||||
build-local: $(SO_VERSION_DATA) $(PKGDATA_LIST) $(OS390LIST)
|
||||
echo timestamp > $@
|
||||
$(PKGDATA_LIST): $(SRCLISTDEPS) $(ICUDATA_SOURCE_ARCHIVE)
|
||||
ifneq ($(ICUDATA_SOURCE_IS_NATIVE_TARGET),YES)
|
||||
@ -317,32 +260,12 @@ endif
|
||||
endif
|
||||
|
||||
|
||||
$(BUILD_DIRS): build-dir
|
||||
|
||||
build-dir:
|
||||
@-$(RMV) $@
|
||||
echo timestamp > $@.tmp
|
||||
@list='$(BUILD_DIRS)'; \
|
||||
for dir in $$list; do \
|
||||
if ! test -d $$dir; then \
|
||||
echo $(MKINSTALLDIRS) $(BUILD_DIRS); \
|
||||
$(MKINSTALLDIRS) $(BUILD_DIRS); \
|
||||
fi; \
|
||||
done
|
||||
mv $@.tmp $@
|
||||
|
||||
# The | is an order-only prerequisite. This helps when the -j option is used,
|
||||
# and we don't want the files to be built before the directories are built.
|
||||
ifneq ($(filter order-only,$(.FEATURES)),)
|
||||
$(ALL_FILES) $(ALL_INDEX_SRC_FILES): | build-dir
|
||||
endif
|
||||
|
||||
# if the tzcode directory contains a new tzdata*.tar.gz file, use it for zoneinfo
|
||||
ifeq ($(TZDATA),)
|
||||
TZDATA = $(firstword $(wildcard $(top_builddir)/tools/tzcode/tzdata*.tar.gz) $(wildcard $(top_srcdir)/tools/tzcode/tzdata*.tar.gz))
|
||||
endif
|
||||
|
||||
# TODO: Make the TZDATA override part of Python buildtool
|
||||
# TODO(ICU-20466): Make the TZDATA override part of Python buildtool
|
||||
ifneq ($(TZDATA),)
|
||||
|
||||
TZCODE_DIR=$(top_builddir)/tools/tzcode
|
||||
@ -362,14 +285,6 @@ $(ZONEINFO): $(TZDATA)
|
||||
# end of zoneinfo-generation
|
||||
endif
|
||||
|
||||
# The core Unicode properties files (pnames.icu, uprops.icu, ucase.icu, ubidi.icu)
|
||||
# are hardcoded in the common DLL and therefore not included in the data package any more.
|
||||
# They are not built by default but need to be built for ICU4J data and for getting the .c source files
|
||||
# when updating the Unicode data.
|
||||
uni-core-data: build-dir $(UNI_CORE_TARGET_DATA)
|
||||
@echo Unicode .icu files built to $(BUILDDIR)
|
||||
echo timestamp > $@
|
||||
|
||||
# Build the ICU4J icudata.jar.
|
||||
# Command line:
|
||||
# (Run this from the output data folder which may not be .../source/data in an out-of-source build.)
|
||||
@ -385,19 +300,11 @@ ICU4J_TZDATA_FILES=zoneinfo64 metaZones timezoneTypes windowsZones
|
||||
ICU4J_DATA_DIRNAME=com/ibm/icu/impl/data/$(ICUDATA_BASENAME_VERSION)b
|
||||
ICU4J_TZDATA_PATHS=$(ICU4J_TZDATA_FILES:%="$(ICU4J_DATA_DIRNAME)/%.res")
|
||||
|
||||
# Targets for prebuilt Unicode data
|
||||
$(BUILDDIR)/%.icu: $(SRCDATADIR)/in/%.icu | $(DIRS)
|
||||
$(INVOKE) $(TOOLBINDIR)/icupkg -t$(ICUDATA_CHAR) $< $@
|
||||
|
||||
$(BUILDDIR)/nfc.nrm: $(SRCDATADIR)/in/nfc.nrm | $(DIRS)
|
||||
$(INVOKE) $(TOOLBINDIR)/icupkg -t$(ICUDATA_CHAR) $< $@
|
||||
|
||||
# generate icu4j-related data to $(OUTDIR)/icu4j/com/ibm/icu/impl/data/...
|
||||
generate-data: build-dir packagedata $(OUTTMPDIR)/$(ICUDATA_PLATFORM_NAME).dat uni-core-data
|
||||
generate-data: packagedata $(OUTTMPDIR)/$(ICUDATA_PLATFORM_NAME).dat
|
||||
mkdir -p $(OUTDIR)/icu4j/$(ICU4J_DATA_DIRNAME)
|
||||
mkdir -p $(OUTDIR)/icu4j/tzdata/$(ICU4J_DATA_DIRNAME)
|
||||
echo $(UNI_CORE_DATA) > $(OUTDIR)/icu4j/add.txt
|
||||
$(INVOKE) $(TOOLBINDIR)/icupkg $(OUTTMPDIR)/$(ICUDATA_PLATFORM_NAME).dat $(OUTDIR)/icu4j/$(ICUDATA_BASENAME_VERSION)b.dat -a $(OUTDIR)/icu4j/add.txt -s $(BUILDDIR) -x '*' -tb -d $(OUTDIR)/icu4j/$(ICU4J_DATA_DIRNAME)
|
||||
$(INVOKE) $(TOOLBINDIR)/icupkg $(OUTTMPDIR)/$(ICUDATA_PLATFORM_NAME).dat $(OUTDIR)/icu4j/$(ICUDATA_BASENAME_VERSION)b.dat -s $(BUILDDIR) -x '*' -tb -d $(OUTDIR)/icu4j/$(ICU4J_DATA_DIRNAME)
|
||||
mv $(ICU4J_TZDATA_PATHS:%=$(OUTDIR)/icu4j/%) "$(OUTDIR)/icu4j/tzdata/$(ICU4J_DATA_DIRNAME)"
|
||||
|
||||
$(OUTDIR)/icu4j/icutzdata.jar: generate-data
|
||||
@ -408,6 +315,7 @@ $(OUTDIR)/icu4j/icutzdata.jar: generate-data
|
||||
# - swap the ICU data
|
||||
# - extract all data items
|
||||
# - package them into the .jar file
|
||||
# TODO(ICU-20466): Move this to Python
|
||||
$(OUTDIR)/icu4j/icudata.jar: generate-data
|
||||
$(JAR) cf $(OUTDIR)/icu4j/icudata.jar -C $(OUTDIR)/icu4j $(ICU4J_DATA_DIRNAME)/
|
||||
|
||||
|
@ -84,6 +84,12 @@ flag_parser.add_argument(
|
||||
choices = ["unihan", "implicithan"],
|
||||
default = "unihan"
|
||||
)
|
||||
flag_parser.add_argument(
|
||||
"--include_uni_core_data",
|
||||
help = "Include the full Unicode core data in the dat file.",
|
||||
default = False,
|
||||
action = "store_true"
|
||||
)
|
||||
flag_parser.add_argument(
|
||||
"--seqmode",
|
||||
help = "Whether to optimize rules to be run sequentially (fewer threads) or in parallel (many threads). Defaults to 'sequential', which is better for unix-exec and windows-exec modes. 'parallel' is often better for massively parallel build systems.",
|
||||
@ -119,9 +125,13 @@ class Config(object):
|
||||
def __init__(self, args):
|
||||
# Process arguments
|
||||
self.max_parallel = (args.seqmode == "parallel")
|
||||
|
||||
# Either "unihan" or "implicithan"
|
||||
self.coll_han_type = args.collation_ucadata
|
||||
|
||||
# Boolean: Whether to include core Unicode data files in the .dat file
|
||||
self.include_uni_core_data = args.include_uni_core_data
|
||||
|
||||
# Default fields before processing filter file
|
||||
self.filters_json_data = {}
|
||||
|
||||
|
@ -17,9 +17,15 @@ In the following,
|
||||
$icu4j_root is the ICU4J root directory
|
||||
$jdk_bin is the JDK bin directory (for the jar tool)
|
||||
|
||||
1. Download and build ICU4C. For more instructions on downloading and building
|
||||
ICU4C, see the ICU4C readme at:
|
||||
http://source.icu-project.org/repos/icu/trunk/icu4c/readme.html#HowToBuild
|
||||
1. Download, configure, and build ICU4C. When you configure ICU4C, you must
|
||||
set the environment variable ICU_DATA_BUILDTOOL_OPTS to
|
||||
"--include_uni_core_data" to build additional required ICU4J data:
|
||||
|
||||
ICU_DATA_BUILDTOOL_OPTS=--include_uni_core_data ./runConfigureICU Linux
|
||||
|
||||
For more instructions on downloading and building ICU4C,
|
||||
see the ICU4C readme at:
|
||||
https://htmlpreview.github.io/?https://github.com/unicode-org/icu/blob/master/icu4c/readme.html#HowToBuild
|
||||
(Windows: build as 'x86, Release' otherwise you will have to set 'CFG' differently below.)
|
||||
|
||||
*NOTE* You should do a full rebuild after any data changes.
|
||||
|
@ -240,6 +240,8 @@ $(COREDATA_TS):
|
||||
--tool_cfg "$(CFG)" \
|
||||
--out_dir "$(ICUBLD_PKG)" \
|
||||
--tmp_dir "$(ICUTMP)"
|
||||
--filter_file "$(ICU_DATA_FILTER_FILE)" \
|
||||
$(ICU_DATA_BUILDTOOL_OPTS) \
|
||||
@echo "timestamp" > $(COREDATA_TS)
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user