ICU-10923 Python logic for resource filters.
- Adds schema validation for config file - Adds JSON comment stripping utility
This commit is contained in:
parent
d8520c9f66
commit
b0d572c7f1
@ -21,31 +21,6 @@ def generate(config, glob, common_vars):
|
||||
print("Error: Cannot find data directory; please specify --glob_dir", file=sys.stderr)
|
||||
exit(1)
|
||||
|
||||
# DIRECTORIES
|
||||
build_dirs = [
|
||||
"{OUT_DIR}",
|
||||
"{OUT_DIR}/curr",
|
||||
"{OUT_DIR}/lang",
|
||||
"{OUT_DIR}/region",
|
||||
"{OUT_DIR}/zone",
|
||||
"{OUT_DIR}/unit",
|
||||
"{OUT_DIR}/brkitr",
|
||||
"{OUT_DIR}/coll",
|
||||
"{OUT_DIR}/rbnf",
|
||||
"{OUT_DIR}/translit",
|
||||
"{TMP_DIR}",
|
||||
"{TMP_DIR}/curr",
|
||||
"{TMP_DIR}/lang",
|
||||
"{TMP_DIR}/locales",
|
||||
"{TMP_DIR}/region",
|
||||
"{TMP_DIR}/zone",
|
||||
"{TMP_DIR}/unit",
|
||||
"{TMP_DIR}/coll",
|
||||
"{TMP_DIR}/rbnf",
|
||||
"{TMP_DIR}/translit",
|
||||
"{TMP_DIR}/brkitr"
|
||||
]
|
||||
|
||||
requests += generate_cnvalias(config, glob, common_vars)
|
||||
requests += generate_confusables(config, glob, common_vars)
|
||||
requests += generate_conversion_mappings(config, glob, common_vars)
|
||||
@ -154,7 +129,7 @@ def generate(config, glob, common_vars):
|
||||
)
|
||||
]
|
||||
|
||||
return (build_dirs, requests)
|
||||
return requests
|
||||
|
||||
|
||||
def generate_cnvalias(config, glob, common_vars):
|
||||
|
@ -8,9 +8,11 @@ from __future__ import print_function
|
||||
import argparse
|
||||
import glob as pyglob
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
from . import *
|
||||
from .comment_stripper import CommentStripper
|
||||
from .renderers import makefile, windirect
|
||||
from . import filtration, utils
|
||||
import BUILDRULES
|
||||
@ -118,15 +120,38 @@ class Config(object):
|
||||
try:
|
||||
with open(args.filter_file, "r") as f:
|
||||
print("Note: Applying filters from %s." % args.filter_file, file=sys.stderr)
|
||||
try:
|
||||
import hjson
|
||||
self.filters_json_data = hjson.load(f)
|
||||
except ImportError:
|
||||
self.filters_json_data = json.load(f)
|
||||
self._parse_filter_file(f)
|
||||
except IOError:
|
||||
print("Error: Could not read filter file %s." % args.filter_file, file=sys.stderr)
|
||||
exit(1)
|
||||
|
||||
def _parse_filter_file(self, f):
|
||||
# Use the Hjson parser if it is available; otherwise, use vanilla JSON.
|
||||
try:
|
||||
import hjson
|
||||
self.filters_json_data = hjson.load(f)
|
||||
except ImportError:
|
||||
self.filters_json_data = json.load(CommentStripper(f))
|
||||
|
||||
# Optionally pre-validate the JSON schema before further processing.
|
||||
# Some schema errors will be caught later, but this step ensures
|
||||
# maximal validity.
|
||||
try:
|
||||
import jsonschema
|
||||
schema_path = os.path.join(os.path.dirname(__file__), "filtration_schema.json")
|
||||
with open(schema_path) as schema_f:
|
||||
schema = json.load(CommentStripper(schema_f))
|
||||
validator = jsonschema.Draft4Validator(schema)
|
||||
for error in validator.iter_errors(self.filters_json_data, schema):
|
||||
print("WARNING: ICU data filter JSON file:", error.message,
|
||||
"at", "".join(
|
||||
"[%d]" % part if isinstance(part, int) else ".%s" % part
|
||||
for part in error.absolute_path
|
||||
),
|
||||
file=sys.stderr)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
def has_feature(self, feature_name):
|
||||
assert feature_name in AVAILABLE_FEATURES
|
||||
return feature_name in self._feature_set
|
||||
@ -166,10 +191,12 @@ def main():
|
||||
# For the purposes of buildtool, force Unix-style directory separators.
|
||||
return [v.replace("\\", "/")[len(args.glob_dir)+1:] for v in sorted(result_paths)]
|
||||
|
||||
build_dirs, requests = BUILDRULES.generate(config, glob, common)
|
||||
requests = BUILDRULES.generate(config, glob, common)
|
||||
requests = filtration.apply_filters(requests, config)
|
||||
requests = utils.flatten_requests(requests, config, common)
|
||||
|
||||
build_dirs = utils.compute_directories(requests)
|
||||
|
||||
if args.format == "gnumake":
|
||||
print(makefile.get_gnumake_rules(
|
||||
build_dirs,
|
||||
|
51
icu4c/source/data/buildtool/comment_stripper.py
Normal file
51
icu4c/source/data/buildtool/comment_stripper.py
Normal file
@ -0,0 +1,51 @@
|
||||
# Copyright (C) 2018 and later: Unicode, Inc. and others.
|
||||
# License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
import io
|
||||
|
||||
class CommentStripper(object):
|
||||
"""Removes lines starting with "//" from a file stream."""
|
||||
|
||||
def __init__(self, f):
|
||||
self.f = f
|
||||
self.state = 0
|
||||
|
||||
def read(self, size=-1):
|
||||
bytes = self.f.read(size)
|
||||
# TODO: Do we need to read more bytes if comments were stripped
|
||||
# in order to obey the size request?
|
||||
return "".join(self._strip_comments(bytes))
|
||||
|
||||
def _strip_comments(self, bytes):
|
||||
for byte in bytes:
|
||||
if self.state == 0:
|
||||
# state 0: start of a line
|
||||
if byte == "/":
|
||||
self.state = 1
|
||||
elif byte == "\n":
|
||||
self.state = 0
|
||||
yield byte
|
||||
else:
|
||||
self.state = 2
|
||||
yield byte
|
||||
elif self.state == 1:
|
||||
# state 1: read a single '/'
|
||||
if byte == "/":
|
||||
self.state = 3
|
||||
elif byte == "\n":
|
||||
self.state = 0
|
||||
yield "/" # the one that was skipped
|
||||
yield "\n"
|
||||
else:
|
||||
self.state = 2
|
||||
yield "/" # the one that was skipped
|
||||
yield byte
|
||||
elif self.state == 2:
|
||||
# state 2: middle of a line, no comment
|
||||
if byte == "\n":
|
||||
self.state = 0
|
||||
yield byte
|
||||
elif self.state == 3:
|
||||
# state 3: inside a comment
|
||||
if byte == "\n":
|
||||
self.state = 0
|
@ -50,6 +50,11 @@ class Filter(object):
|
||||
pass
|
||||
|
||||
|
||||
class InclusionFilter(Filter):
|
||||
def match(self, file):
|
||||
return True
|
||||
|
||||
|
||||
class ExclusionFilter(Filter):
|
||||
def match(self, file):
|
||||
return False
|
||||
@ -166,6 +171,122 @@ def _preprocess_file_filters(requests, config):
|
||||
return filters
|
||||
|
||||
|
||||
def _apply_resource_filters(old_requests, config):
|
||||
class ResourceFilterInfo(object):
|
||||
def __init__(self, category):
|
||||
self.category = category
|
||||
self.filter_tmp_dir = "filters/%s" % category
|
||||
self.input_files = None
|
||||
self.filter_files = None
|
||||
self.rules_by_file = None
|
||||
|
||||
def apply_to_requests(self, all_requests):
|
||||
# Call this method only once per list of requests.
|
||||
assert self.input_files is None
|
||||
for request in all_requests:
|
||||
if request.category != self.category:
|
||||
continue
|
||||
if not isinstance(request, AbstractExecutionRequest):
|
||||
continue
|
||||
if request.tool != IcuTool("genrb"):
|
||||
continue
|
||||
self._set_files(request.input_files)
|
||||
# Add dependencies directly to dep_files
|
||||
request.dep_files += self.filter_files
|
||||
arg_str = "--filterDir {TMP_DIR}/%s" % self.filter_tmp_dir
|
||||
request.args = "%s %s" % (arg_str, request.args)
|
||||
|
||||
# Make sure we found the target request
|
||||
if self.input_files is None:
|
||||
print("WARNING: Category not found: %s" % self.category, file=sys.stderr)
|
||||
self.input_files = []
|
||||
self.filter_files = []
|
||||
self.rules_by_file = []
|
||||
|
||||
def _set_files(self, files):
|
||||
# Note: The input files to genrb for a certain category should always
|
||||
# be the same. For example, there are often two genrb calls: one for
|
||||
# --writePoolBundle, and the other for --usePoolBundle. They are both
|
||||
# expected to have the same list of input files.
|
||||
if self.input_files is not None:
|
||||
assert self.input_files == files
|
||||
return
|
||||
self.input_files = list(files)
|
||||
self.filter_files = [
|
||||
TmpFile("%s/%s" % (self.filter_tmp_dir, basename))
|
||||
for basename in (
|
||||
file.filename[file.filename.rfind("/")+1:]
|
||||
for file in files
|
||||
)
|
||||
]
|
||||
self.rules_by_file = [[] for _ in range(len(files))]
|
||||
|
||||
def add_rules(self, file_filter, rules):
|
||||
for file, rule_list in zip(self.input_files, self.rules_by_file):
|
||||
if file_filter.match(file):
|
||||
rule_list += rules
|
||||
|
||||
def make_requests(self):
|
||||
# Map from rule list to filter files with that rule list
|
||||
unique_rules = defaultdict(list)
|
||||
for filter_file, rules in zip(self.filter_files, self.rules_by_file):
|
||||
unique_rules[tuple(rules)].append(filter_file)
|
||||
|
||||
new_requests = []
|
||||
i = 0
|
||||
for rules, filter_files in unique_rules.items():
|
||||
base_filter_file = filter_files[0]
|
||||
new_requests += [
|
||||
PrintFileRequest(
|
||||
name = "%s_print_%d" % (self.category, i),
|
||||
output_file = base_filter_file,
|
||||
content = self._generate_resource_filter_txt(rules)
|
||||
)
|
||||
]
|
||||
i += 1
|
||||
for filter_file in filter_files[1:]:
|
||||
new_requests += [
|
||||
CopyRequest(
|
||||
name = "%s_copy_%d" % (self.category, i),
|
||||
input_file = base_filter_file,
|
||||
output_file = filter_file
|
||||
)
|
||||
]
|
||||
i += 1
|
||||
return new_requests
|
||||
|
||||
@classmethod
|
||||
def _generate_resource_filter_txt(cls, rules):
|
||||
result = "# Caution: This file is automatically generated\n\n"
|
||||
result += "\n".join(rules)
|
||||
return result
|
||||
|
||||
|
||||
def _apply_resource_filters(all_requests, config):
|
||||
"""Creates filters for looking within resource bundle files."""
|
||||
return old_requests
|
||||
json_data = config.filters_json_data
|
||||
if "resourceFilters" not in json_data:
|
||||
return all_requests
|
||||
|
||||
collected = {}
|
||||
for entry in json_data["resourceFilters"]:
|
||||
if "files" in entry:
|
||||
file_filter = Filter.create_from_json(entry["files"])
|
||||
else:
|
||||
file_filter = InclusionFilter()
|
||||
for category in entry["categories"]:
|
||||
# not defaultdict because we need to pass arguments to the constructor
|
||||
if category not in collected:
|
||||
filter_info = ResourceFilterInfo(category)
|
||||
filter_info.apply_to_requests(all_requests)
|
||||
collected[category] = filter_info
|
||||
else:
|
||||
filter_info = collected[category]
|
||||
filter_info.add_rules(file_filter, entry["rules"])
|
||||
|
||||
# Add the filter generation requests to the beginning so that by default
|
||||
# they are made before genrb gets run (order is required by windirect)
|
||||
new_requests = []
|
||||
for filter_info in collected.values():
|
||||
new_requests += filter_info.make_requests()
|
||||
new_requests += all_requests
|
||||
return new_requests
|
||||
|
85
icu4c/source/data/buildtool/filtration_schema.json
Normal file
85
icu4c/source/data/buildtool/filtration_schema.json
Normal file
@ -0,0 +1,85 @@
|
||||
// Copyright (C) 2018 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
{
|
||||
"$id": "http://unicode.org/icu-filter-schema",
|
||||
"$schema": "http://json-schema.org/draft-04/schema#",
|
||||
"description": "JSON Schema for an ICU data filter file",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"localeFilter": { "$ref": "#/definitions/filter" },
|
||||
"featureFilters": {
|
||||
"type": "object",
|
||||
"additionalProperties": { "$ref": "#/definitions/filter" }
|
||||
},
|
||||
"resourceFilters": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"categories": {
|
||||
"type": "array",
|
||||
"items": { "type": "string" }
|
||||
},
|
||||
"files": { "$ref": "#/definitions/filter" },
|
||||
"rules": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"pattern": "^[+-]/(\\w+(/\\w+)*)?$"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": ["categories", "rules"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"definitions": {
|
||||
"filter": {
|
||||
"type": "object",
|
||||
"oneOf": [
|
||||
{
|
||||
"properties": {
|
||||
"filterType": { "$ref": "#/definitions/filterType" },
|
||||
"whitelist": { "$ref": "#/definitions/stringList" }
|
||||
},
|
||||
"required": ["whitelist"],
|
||||
"additionalProperties": false
|
||||
},
|
||||
{
|
||||
"properties": {
|
||||
"filterType": { "$ref": "#/definitions/filterType" },
|
||||
"blacklist": { "$ref": "#/definitions/stringList" }
|
||||
},
|
||||
"required": ["blacklist"],
|
||||
"additionalProperties": false
|
||||
},
|
||||
{
|
||||
"properties": {
|
||||
"filterType": { "$ref": "#/definitions/filterType" }
|
||||
},
|
||||
"additionalProperties": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"filterType": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"file-stem",
|
||||
"language",
|
||||
"regex",
|
||||
"exclude"
|
||||
]
|
||||
},
|
||||
"stringList": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"minItems": 1,
|
||||
"uniqueItems": true
|
||||
}
|
||||
}
|
||||
}
|
@ -9,7 +9,7 @@ from abc import abstractmethod
|
||||
import copy
|
||||
import sys
|
||||
|
||||
|
||||
from . import *
|
||||
from . import utils
|
||||
|
||||
|
||||
|
@ -86,6 +86,14 @@ def get_all_output_files(requests, include_tmp=False):
|
||||
return [f for _, f in set((type(f), f) for f in files)]
|
||||
|
||||
|
||||
def compute_directories(requests):
|
||||
dirs = set()
|
||||
for file in get_all_output_files(requests, include_tmp=True):
|
||||
path = "%s/%s" % (dir_for(file), file.filename)
|
||||
dirs.add(path[:path.rfind("/")])
|
||||
return list(sorted(dirs))
|
||||
|
||||
|
||||
class SpaceSeparatedList(list):
|
||||
"""A list that joins itself with spaces when converted to a string."""
|
||||
def __str__(self):
|
||||
|
4
icu4c/source/test/testdata/BUILDRULES.py
vendored
4
icu4c/source/test/testdata/BUILDRULES.py
vendored
@ -8,8 +8,6 @@ from buildtool.request_types import *
|
||||
|
||||
|
||||
def generate(config, glob, common_vars):
|
||||
build_dirs = ["{OUT_DIR}", "{TMP_DIR}"]
|
||||
|
||||
requests = []
|
||||
requests += generate_rb(config, glob, common_vars)
|
||||
requests += generate_sprep(config, glob, common_vars)
|
||||
@ -26,7 +24,7 @@ def generate(config, glob, common_vars):
|
||||
)
|
||||
]
|
||||
|
||||
return (build_dirs, requests)
|
||||
return requests
|
||||
|
||||
|
||||
def generate_rb(config, glob, common_vars):
|
||||
|
Loading…
Reference in New Issue
Block a user