Add support for --raw and --omit to js2c.

--raw writes the raw source data to a separate file
  --omit allows omitting the source data from the generated files.

The intention is (future) support for having the embedder optionally
store the source data 'blob' and handling it to V8::Initialize, with
the goal of reducing the binary size of V8.

The patch also contains numerous unrelated changes/refactorings in the hope of increasing maintainability. Let me know whether you agree. In particular:
- Remove some unused code.
- Do not overwrite Python built-ins (e.g. the type() function)
- Do not use a string as exception object (no longer supported in python 2.7)
- Add command line argument handling + help text.
- Split logic into:
  - PrepareSources - which reads + preprocesses the source files
  - BuildMetadata - which takes the prepared sources and generates the data the code template needs.

BUG=355539
LOG=N
R=jochen@chromium.org

Review URL: https://codereview.chromium.org/225723002

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@20657 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
vogelheim@chromium.org 2014-04-10 14:53:32 +00:00
parent 5633dfa6f5
commit b88068ff2b

View File

@ -32,24 +32,23 @@
# library.
import os, re, sys, string
import optparse
import jsmin
import bz2
import textwrap
def ToCAsciiArray(lines):
class Error(Exception):
def __init__(self, msg):
Exception.__init__(self, msg)
def ToCArray(byte_sequence):
result = []
for chr in lines:
value = ord(chr)
assert value < 128
result.append(str(value))
return ", ".join(result)
def ToCArray(lines):
result = []
for chr in lines:
for chr in byte_sequence:
result.append(str(ord(chr)))
return ", ".join(result)
joined = ", ".join(result)
return textwrap.fill(joined, 80)
def RemoveCommentsAndTrailingWhitespace(lines):
@ -68,46 +67,19 @@ def ReadFile(filename):
return lines
def ReadLines(filename):
result = []
for line in open(filename, "rt"):
if '#' in line:
line = line[:line.index('#')]
line = line.strip()
if len(line) > 0:
result.append(line)
return result
def LoadConfigFrom(name):
import ConfigParser
config = ConfigParser.ConfigParser()
config.read(name)
return config
def ParseValue(string):
string = string.strip()
if string.startswith('[') and string.endswith(']'):
return string.lstrip('[').rstrip(']').split()
else:
return string
EVAL_PATTERN = re.compile(r'\beval\s*\(')
WITH_PATTERN = re.compile(r'\bwith\s*\(')
def Validate(lines, file):
lines = RemoveCommentsAndTrailingWhitespace(lines)
def Validate(lines):
# Because of simplified context setup, eval and with is not
# allowed in the natives files.
eval_match = EVAL_PATTERN.search(lines)
if eval_match:
raise ("Eval disallowed in natives: %s" % file)
with_match = WITH_PATTERN.search(lines)
if with_match:
raise ("With statements disallowed in natives: %s" % file)
if EVAL_PATTERN.search(lines):
raise Error("Eval disallowed in natives.")
if WITH_PATTERN.search(lines):
raise Error("With statements disallowed in natives.")
# Pass lines through unchanged.
return lines
def ExpandConstants(lines, constants):
@ -187,7 +159,7 @@ PYTHON_MACRO_PATTERN = re.compile(r'^python\s+macro\s+([a-zA-Z0-9_]+)\s*\(([^)]*
def ReadMacros(lines):
constants = []
macros = []
for line in lines:
for line in lines.split('\n'):
hash = line.find('#')
if hash != -1: line = line[:hash]
line = line.strip()
@ -213,13 +185,13 @@ def ReadMacros(lines):
fun = eval("lambda " + ",".join(args) + ': ' + body)
macros.append((re.compile("\\b%s\\(" % name), PythonMacro(args, fun)))
else:
raise ("Illegal line: " + line)
raise Error("Illegal line: " + line)
return (constants, macros)
INLINE_MACRO_PATTERN = re.compile(r'macro\s+([a-zA-Z0-9_]+)\s*\(([^)]*)\)\s*\n')
INLINE_MACRO_END_PATTERN = re.compile(r'endmacro\s*\n')
def ExpandInlineMacros(lines, filename):
def ExpandInlineMacros(lines):
pos = 0
while True:
macro_match = INLINE_MACRO_PATTERN.search(lines, pos)
@ -230,7 +202,7 @@ def ExpandInlineMacros(lines, filename):
args = [match.strip() for match in macro_match.group(2).split(',')]
end_macro_match = INLINE_MACRO_END_PATTERN.search(lines, macro_match.end());
if end_macro_match is None:
raise ("Macro %s unclosed in %s" % (name, filename))
raise Error("Macro %s unclosed" % name)
body = lines[macro_match.end():end_macro_match.start()]
# remove macro definition
@ -245,6 +217,7 @@ def ExpandInlineMacros(lines, filename):
return s
lines = ExpandMacroDefinition(lines, pos, name_pattern, macro, non_expander)
HEADER_TEMPLATE = """\
// Copyright 2011 Google Inc. All Rights Reserved.
@ -259,7 +232,7 @@ HEADER_TEMPLATE = """\
namespace v8 {
namespace internal {
static const byte sources[] = { %(sources_data)s };
%(sources_declaration)s\
%(raw_sources_declaration)s\
@ -311,6 +284,10 @@ namespace internal {
} // v8
"""
SOURCES_DECLARATION = """\
static const byte sources[] = { %s };
"""
RAW_SOURCES_COMPRESSION_DECLARATION = """\
static const char* raw_sources = NULL;
@ -336,97 +313,202 @@ GET_SCRIPT_NAME_CASE = """\
if (index == %(i)i) return Vector<const char>("%(name)s", %(length)i);
"""
def JS2C(source, target, env):
ids = []
debugger_ids = []
modules = []
# Locate the macros file name.
consts = []
macros = []
for s in source:
if 'macros.py' == (os.path.split(str(s))[1]):
(consts, macros) = ReadMacros(ReadLines(str(s)))
else:
modules.append(s)
minifier = jsmin.JavaScriptMinifier()
def BuildFilterChain(macro_filename):
"""Build the chain of filter functions to be applied to the sources.
module_offset = 0
all_sources = []
for module in modules:
filename = str(module)
debugger = filename.endswith('-debugger.js')
lines = ReadFile(filename)
lines = ExpandConstants(lines, consts)
lines = ExpandMacros(lines, macros)
lines = RemoveCommentsAndTrailingWhitespace(lines)
lines = ExpandInlineMacros(lines, filename)
Validate(lines, filename)
lines = minifier.JSMinify(lines)
id = (os.path.split(filename)[1])[:-3]
if debugger: id = id[:-9]
raw_length = len(lines)
if debugger:
debugger_ids.append((id, raw_length, module_offset))
else:
ids.append((id, raw_length, module_offset))
all_sources.append(lines)
module_offset += raw_length
total_length = raw_total_length = module_offset
Args:
macro_filename: Name of the macro file, if any.
if env['COMPRESSION'] == 'off':
raw_sources_declaration = RAW_SOURCES_DECLARATION
sources_data = ToCAsciiArray("".join(all_sources))
Returns:
A function (string -> string) that reads a source file and processes it.
"""
filter_chain = [ReadFile]
if macro_filename:
(consts, macros) = ReadMacros(ReadFile(macro_filename))
filter_chain.append(lambda l: ExpandConstants(l, consts))
filter_chain.append(lambda l: ExpandMacros(l, macros))
filter_chain.extend([
RemoveCommentsAndTrailingWhitespace,
ExpandInlineMacros,
Validate,
jsmin.JavaScriptMinifier().JSMinify
])
def chain(f1, f2):
return lambda x: f2(f1(x))
return reduce(chain, filter_chain)
class Sources:
def __init__(self):
self.names = []
self.modules = []
self.is_debugger_id = []
def IsDebuggerFile(filename):
return filename.endswith("-debugger.js")
def IsMacroFile(filename):
return filename.endswith("macros.py")
def PrepareSources(source_files):
"""Read, prepare and assemble the list of source files.
Args:
sources: List of Javascript-ish source files. A file named macros.py
will be treated as a list of macros.
Returns:
An instance of Sources.
"""
macro_file = None
macro_files = filter(IsMacroFile, source_files)
assert len(macro_files) in [0, 1]
if macro_files:
source_files.remove(macro_files[0])
macro_file = macro_files[0]
filters = BuildFilterChain(macro_file)
# Sort 'debugger' sources first.
source_files = sorted(source_files,
lambda l,r: IsDebuggerFile(r) - IsDebuggerFile(l))
result = Sources()
for source in source_files:
try:
lines = filters(source)
except Error as e:
raise Error("In file %s:\n%s" % (source, str(e)))
result.modules.append(lines);
is_debugger = IsDebuggerFile(source)
result.is_debugger_id.append(is_debugger);
name = os.path.basename(source)[:-3]
result.names.append(name if not is_debugger else name[:-9]);
return result
def BuildMetadata(sources, source_bytes, native_type, omit):
"""Build the meta data required to generate a libaries file.
Args:
sources: A Sources instance with the prepared sources.
source_bytes: A list of source bytes.
(The concatenation of all sources; might be compressed.)
native_type: The parameter for the NativesCollection template.
omit: bool, whether we should omit the sources in the output.
Returns:
A dictionary for use with HEADER_TEMPLATE.
"""
total_length = len(source_bytes)
raw_sources = "".join(sources.modules)
# The sources are expected to be ASCII-only.
assert not filter(lambda value: ord(value) >= 128, raw_sources)
# Loop over modules and build up indices into the source blob:
get_index_cases = []
get_script_name_cases = []
get_raw_script_source_cases = []
offset = 0
for i in xrange(len(sources.modules)):
native_name = "native %s.js" % sources.names[i]
d = {
"i": i,
"id": sources.names[i],
"name": native_name,
"length": len(native_name),
"offset": offset,
"raw_length": len(sources.modules[i]),
}
get_index_cases.append(GET_INDEX_CASE % d)
get_script_name_cases.append(GET_SCRIPT_NAME_CASE % d)
get_raw_script_source_cases.append(GET_RAW_SCRIPT_SOURCE_CASE % d)
offset += len(sources.modules[i])
assert offset == len(raw_sources)
# If we have the raw sources we can declare them accordingly.
have_raw_sources = source_bytes == raw_sources and not omit
raw_sources_declaration = (RAW_SOURCES_DECLARATION
if have_raw_sources else RAW_SOURCES_COMPRESSION_DECLARATION)
metadata = {
"builtin_count": len(sources.modules),
"debugger_count": sum(sources.is_debugger_id),
"sources_declaration": SOURCES_DECLARATION % ToCArray(source_bytes),
"sources_data": ToCArray(source_bytes) if not omit else "",
"raw_sources_declaration": raw_sources_declaration,
"raw_total_length": sum(map(len, sources.modules)),
"total_length": total_length,
"get_index_cases": "".join(get_index_cases),
"get_raw_script_source_cases": "".join(get_raw_script_source_cases),
"get_script_name_cases": "".join(get_script_name_cases),
"type": native_type,
}
return metadata
def CompressMaybe(sources, compression_type):
"""Take the prepared sources and generate a sequence of bytes.
Args:
sources: A Sources instance with the prepared sourced.
compression_type: string, describing the desired compression.
Returns:
A sequence of bytes.
"""
sources_bytes = "".join(sources.modules)
if compression_type == "off":
return sources_bytes
elif compression_type == "bz2":
return bz2.compress(sources_bytes)
else:
raw_sources_declaration = RAW_SOURCES_COMPRESSION_DECLARATION
if env['COMPRESSION'] == 'bz2':
all_sources = bz2.compress("".join(all_sources))
total_length = len(all_sources)
sources_data = ToCArray(all_sources)
raise Error("Unknown compression type %s." % compression_type)
# Build debugger support functions
get_index_cases = [ ]
get_raw_script_source_cases = [ ]
get_script_name_cases = [ ]
i = 0
for (id, raw_length, module_offset) in debugger_ids + ids:
native_name = "native %s.js" % id
get_index_cases.append(GET_INDEX_CASE % { 'id': id, 'i': i })
get_raw_script_source_cases.append(GET_RAW_SCRIPT_SOURCE_CASE % {
'offset': module_offset,
'raw_length': raw_length,
'i': i
})
get_script_name_cases.append(GET_SCRIPT_NAME_CASE % {
'name': native_name,
'length': len(native_name),
'i': i
})
i = i + 1
def JS2C(source, target, native_type, compression_type, raw_file, omit):
sources = PrepareSources(source)
sources_bytes = CompressMaybe(sources, compression_type)
metadata = BuildMetadata(sources, sources_bytes, native_type, omit)
# Emit result
output = open(str(target[0]), "w")
output.write(HEADER_TEMPLATE % {
'builtin_count': len(ids) + len(debugger_ids),
'debugger_count': len(debugger_ids),
'sources_data': sources_data,
'raw_sources_declaration': raw_sources_declaration,
'raw_total_length': raw_total_length,
'total_length': total_length,
'get_index_cases': "".join(get_index_cases),
'get_raw_script_source_cases': "".join(get_raw_script_source_cases),
'get_script_name_cases': "".join(get_script_name_cases),
'type': env['TYPE']
})
# Optionally emit raw file.
if raw_file:
output = open(raw_file, "w")
output.write(sources_bytes)
output.close()
# Emit resulting source file.
output = open(target, "w")
output.write(HEADER_TEMPLATE % metadata)
output.close()
def main():
natives = sys.argv[1]
type = sys.argv[2]
compression = sys.argv[3]
source_files = sys.argv[4:]
JS2C(source_files, [natives], { 'TYPE': type, 'COMPRESSION': compression })
parser = optparse.OptionParser()
parser.add_option("--raw", action="store",
help="file to write the processed sources array to.")
parser.add_option("--omit", dest="omit", action="store_true",
help="Omit the raw sources from the generated code.")
parser.set_usage("""js2c out.cc type compression sources.js ...
out.cc: C code to be generated.
type: type parameter for NativesCollection template.
compression: type of compression used. [off|bz2]
sources.js: JS internal sources or macros.py.""")
(options, args) = parser.parse_args()
JS2C(args[3:], args[0], args[1], args[2], options.raw, options.omit)
if __name__ == "__main__":
main()