Compress sources of JS libraries in addition to the snapshot.

This saves ~170K on current sources.

R=sgjesse@chromium.org
BUG=none
TEST=none

Review URL: http://codereview.chromium.org/7066048

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@8189 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
mikhail.naganov@gmail.com 2011-06-06 20:47:30 +00:00
parent da40f3da1d
commit f1309b0417
12 changed files with 289 additions and 97 deletions

View File

@ -2704,6 +2704,31 @@ class StartupData {
int raw_size;
};
/**
* A helper class for driving V8 startup data decompression. It is based on
* "CompressedStartupData" API functions from the V8 class. It isn't mandatory
* for an embedder to use this class, instead, API functions can be used
* directly.
*
* For an example of the class usage, see the "shell.cc" sample application.
*/
class V8EXPORT StartupDataDecompressor {
public:
StartupDataDecompressor();
virtual ~StartupDataDecompressor();
int Decompress();
protected:
virtual int DecompressData(char* raw_data,
int* raw_data_size,
const char* compressed_data,
int compressed_data_size) = 0;
private:
char** raw_data;
};
/**
* Container class for static utility functions.
*/
@ -2753,6 +2778,10 @@ class V8EXPORT V8 {
* v8::V8::SetDecompressedStartupData(compressed_data);
* ... now V8 can be initialized
* ... make sure the decompressed data stays valid until V8 shutdown
*
* A helper class StartupDataDecompressor is provided. It implements
* the protocol of the interaction described above, and can be used in
* most cases instead of calling these API functions directly.
*/
static StartupData::CompressionAlgorithm GetCompressedStartupDataAlgorithm();
static int GetCompressedStartupDataCount();

View File

@ -213,6 +213,34 @@ class SourceGroup {
static SourceGroup* isolate_sources = NULL;
#ifdef COMPRESS_STARTUP_DATA_BZ2
class BZip2Decompressor : public v8::StartupDataDecompressor {
public:
virtual ~BZip2Decompressor() { }
protected:
virtual int DecompressData(char* raw_data,
int* raw_data_size,
const char* compressed_data,
int compressed_data_size) {
ASSERT_EQ(v8::StartupData::kBZip2,
v8::V8::GetCompressedStartupDataAlgorithm());
unsigned int decompressed_size = *raw_data_size;
int result =
BZ2_bzBuffToBuffDecompress(raw_data,
&decompressed_size,
const_cast<char*>(compressed_data),
compressed_data_size,
0, 1);
if (result == BZ_OK) {
*raw_data_size = decompressed_size;
}
return result;
}
};
#endif
int RunMain(int argc, char* argv[]) {
v8::V8::SetFlagsFromCommandLine(&argc, argv, true);
v8::HandleScope handle_scope;
@ -303,29 +331,13 @@ int main(int argc, char* argv[]) {
}
#ifdef COMPRESS_STARTUP_DATA_BZ2
ASSERT_EQ(v8::StartupData::kBZip2,
v8::V8::GetCompressedStartupDataAlgorithm());
int compressed_data_count = v8::V8::GetCompressedStartupDataCount();
v8::StartupData* compressed_data = new v8::StartupData[compressed_data_count];
v8::V8::GetCompressedStartupData(compressed_data);
for (int i = 0; i < compressed_data_count; ++i) {
char* decompressed = new char[compressed_data[i].raw_size];
unsigned int decompressed_size = compressed_data[i].raw_size;
int result =
BZ2_bzBuffToBuffDecompress(decompressed,
&decompressed_size,
const_cast<char*>(compressed_data[i].data),
compressed_data[i].compressed_size,
0, 1);
if (result != BZ_OK) {
fprintf(stderr, "bzip error code: %d\n", result);
exit(1);
}
compressed_data[i].data = decompressed;
compressed_data[i].raw_size = decompressed_size;
BZip2Decompressor startup_data_decompressor;
int bz2_result = startup_data_decompressor.Decompress();
if (bz2_result != BZ_OK) {
fprintf(stderr, "bzip error code: %d\n", bz2_result);
exit(1);
}
v8::V8::SetDecompressedStartupData(compressed_data);
#endif // COMPRESS_STARTUP_DATA_BZ2
#endif
v8::V8::SetFlagsFromCommandLine(&argc, argv, true);
int result = 0;
@ -348,13 +360,6 @@ int main(int argc, char* argv[]) {
}
v8::V8::Dispose();
#ifdef COMPRESS_STARTUP_DATA_BZ2
for (int i = 0; i < compressed_data_count; ++i) {
delete[] compressed_data[i].data;
}
delete[] compressed_data;
#endif // COMPRESS_STARTUP_DATA_BZ2
return result;
}

View File

@ -316,11 +316,17 @@ def ConfigureObjectFiles():
else:
env['BUILDERS']['Snapshot'] = Builder(action='$SOURCE $TARGET')
def BuildJS2CEnv(type):
js2c_env = { 'TYPE': type, 'COMPRESSION': 'off' }
if 'COMPRESS_STARTUP_DATA_BZ2' in env['CPPDEFINES']:
js2c_env['COMPRESSION'] = 'bz2'
return js2c_env
# Build the standard platform-independent source files.
source_files = context.GetRelevantSources(SOURCES)
d8_files = context.GetRelevantSources(D8_FILES)
d8_js = env.JS2C('d8-js.cc', 'd8.js', TYPE='D8')
d8_js = env.JS2C('d8-js.cc', 'd8.js', **{'TYPE': 'D8', 'COMPRESSION': 'off'})
d8_js_obj = context.ConfigureObject(env, d8_js, CPPPATH=['.'])
d8_objs = [context.ConfigureObject(env, [d8_files]), d8_js_obj]
@ -328,14 +334,17 @@ def ConfigureObjectFiles():
# compile it.
library_files = [s for s in LIBRARY_FILES]
library_files.append('macros.py')
libraries_src = env.JS2C(['libraries.cc'], library_files, TYPE='CORE')
libraries_src = env.JS2C(
['libraries.cc'], library_files, **BuildJS2CEnv('CORE'))
libraries_obj = context.ConfigureObject(env, libraries_src, CPPPATH=['.'])
# Combine the experimental JavaScript library files into a C++ file
# and compile it.
experimental_library_files = [ s for s in EXPERIMENTAL_LIBRARY_FILES ]
experimental_library_files.append('macros.py')
experimental_libraries_src = env.JS2C(['experimental-libraries.cc'], experimental_library_files, TYPE='EXPERIMENTAL')
experimental_libraries_src = env.JS2C(['experimental-libraries.cc'],
experimental_library_files,
**BuildJS2CEnv('EXPERIMENTAL'))
experimental_libraries_obj = context.ConfigureObject(env, experimental_libraries_src, CPPPATH=['.'])
source_objs = context.ConfigureObject(env, source_files)

View File

@ -38,6 +38,7 @@
#include "global-handles.h"
#include "heap-profiler.h"
#include "messages.h"
#include "natives.h"
#include "parser.h"
#include "platform.h"
#include "profile-generator-inl.h"
@ -311,6 +312,46 @@ static inline i::Isolate* EnterIsolateIfNeeded() {
}
StartupDataDecompressor::StartupDataDecompressor()
: raw_data(i::NewArray<char*>(V8::GetCompressedStartupDataCount())) {
for (int i = 0; i < V8::GetCompressedStartupDataCount(); ++i) {
raw_data[i] = NULL;
}
}
StartupDataDecompressor::~StartupDataDecompressor() {
for (int i = 0; i < V8::GetCompressedStartupDataCount(); ++i) {
i::DeleteArray(raw_data[i]);
}
i::DeleteArray(raw_data);
}
int StartupDataDecompressor::Decompress() {
int compressed_data_count = V8::GetCompressedStartupDataCount();
StartupData* compressed_data =
i::NewArray<StartupData>(compressed_data_count);
V8::GetCompressedStartupData(compressed_data);
for (int i = 0; i < compressed_data_count; ++i) {
char* decompressed = raw_data[i] =
i::NewArray<char>(compressed_data[i].raw_size);
if (compressed_data[i].compressed_size != 0) {
int result = DecompressData(decompressed,
&compressed_data[i].raw_size,
compressed_data[i].data,
compressed_data[i].compressed_size);
if (result != 0) return result;
} else {
ASSERT_EQ(0, compressed_data[i].raw_size);
}
compressed_data[i].data = decompressed;
}
V8::SetDecompressedStartupData(compressed_data);
return 0;
}
StartupData::CompressionAlgorithm V8::GetCompressedStartupDataAlgorithm() {
#ifdef COMPRESS_STARTUP_DATA_BZ2
return StartupData::kBZip2;
@ -323,6 +364,8 @@ StartupData::CompressionAlgorithm V8::GetCompressedStartupDataAlgorithm() {
enum CompressedStartupDataItems {
kSnapshot = 0,
kSnapshotContext,
kLibraries,
kExperimentalLibraries,
kCompressedStartupDataCount
};
@ -347,6 +390,21 @@ void V8::GetCompressedStartupData(StartupData* compressed_data) {
compressed_data[kSnapshotContext].compressed_size =
i::Snapshot::context_size();
compressed_data[kSnapshotContext].raw_size = i::Snapshot::context_raw_size();
i::Vector<const i::byte> libraries_source = i::Natives::GetScriptsSource();
compressed_data[kLibraries].data =
reinterpret_cast<const char*>(libraries_source.start());
compressed_data[kLibraries].compressed_size = libraries_source.length();
compressed_data[kLibraries].raw_size = i::Natives::GetRawScriptsSize();
i::Vector<const i::byte> exp_libraries_source =
i::ExperimentalNatives::GetScriptsSource();
compressed_data[kExperimentalLibraries].data =
reinterpret_cast<const char*>(exp_libraries_source.start());
compressed_data[kExperimentalLibraries].compressed_size =
exp_libraries_source.length();
compressed_data[kExperimentalLibraries].raw_size =
i::ExperimentalNatives::GetRawScriptsSize();
#endif
}
@ -362,6 +420,20 @@ void V8::SetDecompressedStartupData(StartupData* decompressed_data) {
i::Snapshot::set_context_raw_data(
reinterpret_cast<const i::byte*>(
decompressed_data[kSnapshotContext].data));
ASSERT_EQ(i::Natives::GetRawScriptsSize(),
decompressed_data[kLibraries].raw_size);
i::Vector<const char> libraries_source(
decompressed_data[kLibraries].data,
decompressed_data[kLibraries].raw_size);
i::Natives::SetRawScriptsSource(libraries_source);
ASSERT_EQ(i::ExperimentalNatives::GetRawScriptsSize(),
decompressed_data[kExperimentalLibraries].raw_size);
i::Vector<const char> exp_libraries_source(
decompressed_data[kExperimentalLibraries].data,
decompressed_data[kExperimentalLibraries].raw_size);
i::ExperimentalNatives::SetRawScriptsSource(exp_libraries_source);
#endif
}

View File

@ -47,8 +47,9 @@ namespace internal {
NativesExternalStringResource::NativesExternalStringResource(
Bootstrapper* bootstrapper,
const char* source)
: data_(source), length_(StrLength(source)) {
const char* source,
size_t length)
: data_(source), length_(length) {
if (bootstrapper->delete_these_non_arrays_on_tear_down_ == NULL) {
bootstrapper->delete_these_non_arrays_on_tear_down_ = new List<char*>(2);
}
@ -75,16 +76,18 @@ Handle<String> Bootstrapper::NativesSourceLookup(int index) {
if (heap->natives_source_cache()->get(index)->IsUndefined()) {
if (!Snapshot::IsEnabled() || FLAG_new_snapshot) {
// We can use external strings for the natives.
Vector<const char> source = Natives::GetRawScriptSource(index);
NativesExternalStringResource* resource =
new NativesExternalStringResource(this,
Natives::GetScriptSource(index).start());
source.start(),
source.length());
Handle<String> source_code =
factory->NewExternalStringFromAscii(resource);
heap->natives_source_cache()->set(index, *source_code);
} else {
// Old snapshot code can't cope with external strings at all.
Handle<String> source_code =
factory->NewStringFromAscii(Natives::GetScriptSource(index));
factory->NewStringFromAscii(Natives::GetRawScriptSource(index));
heap->natives_source_cache()->set(index, *source_code);
}
}
@ -1182,7 +1185,8 @@ bool Genesis::CompileExperimentalBuiltin(Isolate* isolate, int index) {
Vector<const char> name = ExperimentalNatives::GetScriptName(index);
Factory* factory = isolate->factory();
Handle<String> source_code =
factory->NewStringFromAscii(ExperimentalNatives::GetScriptSource(index));
factory->NewStringFromAscii(
ExperimentalNatives::GetRawScriptSource(index));
return CompileNative(name, source_code);
}

View File

@ -168,8 +168,9 @@ class BootstrapperActive BASE_EMBEDDED {
class NativesExternalStringResource
: public v8::String::ExternalAsciiStringResource {
public:
explicit NativesExternalStringResource(Bootstrapper* bootstrapper,
const char* source);
NativesExternalStringResource(Bootstrapper* bootstrapper,
const char* source,
size_t length);
const char* data() const {
return data_;

View File

@ -566,7 +566,7 @@ void Shell::Initialize() {
// Run the d8 shell utility script in the utility context
int source_index = i::NativesCollection<i::D8>::GetIndex("d8");
i::Vector<const char> shell_source
= i::NativesCollection<i::D8>::GetScriptSource(source_index);
= i::NativesCollection<i::D8>::GetRawScriptSource(source_index);
i::Vector<const char> shell_source_name
= i::NativesCollection<i::D8>::GetScriptName(source_index);
Handle<String> source = String::New(shell_source.start(),

View File

@ -136,6 +136,7 @@ class PartialSnapshotSink : public i::SnapshotByteSink {
return true;
}
int raw_size() { return raw_size_; }
private:
i::List<char> data_;
int raw_size_;
@ -265,6 +266,32 @@ class BZip2Compressor : public Compressor {
private:
i::ScopedVector<char>* output_;
};
class BZip2Decompressor : public StartupDataDecompressor {
public:
virtual ~BZip2Decompressor() { }
protected:
virtual int DecompressData(char* raw_data,
int* raw_data_size,
const char* compressed_data,
int compressed_data_size) {
ASSERT_EQ(StartupData::kBZip2,
V8::GetCompressedStartupDataAlgorithm());
unsigned int decompressed_size = *raw_data_size;
int result =
BZ2_bzBuffToBuffDecompress(raw_data,
&decompressed_size,
const_cast<char*>(compressed_data),
compressed_data_size,
0, 1);
if (result == BZ_OK) {
*raw_data_size = decompressed_size;
}
return result;
}
};
#endif
@ -281,6 +308,14 @@ int main(int argc, char** argv) {
i::FlagList::PrintHelp();
return !i::FLAG_help;
}
#ifdef COMPRESS_STARTUP_DATA_BZ2
BZip2Decompressor natives_decompressor;
int bz2_result = natives_decompressor.Decompress();
if (bz2_result != BZ_OK) {
fprintf(stderr, "bzip error code: %d\n", bz2_result);
exit(1);
}
#endif
i::Serializer::Enable();
Persistent<Context> context = v8::Context::New();
ASSERT(!context.IsEmpty());

View File

@ -52,8 +52,11 @@ class NativesCollection {
// non-debugger scripts have an index in the interval [GetDebuggerCount(),
// GetNativesCount()).
static int GetIndex(const char* name);
static Vector<const char> GetScriptSource(int index);
static int GetRawScriptsSize();
static Vector<const char> GetRawScriptSource(int index);
static Vector<const char> GetScriptName(int index);
static Vector<const byte> GetScriptsSource();
static void SetRawScriptsSource(Vector<const char> raw_source);
};
typedef NativesCollection<CORE> Natives;

View File

@ -1017,10 +1017,11 @@ void Deserializer::ReadChunk(Object** current,
case kNativesStringResource: {
int index = source_->Get();
Vector<const char> source_vector = Natives::GetScriptSource(index);
Vector<const char> source_vector = Natives::GetRawScriptSource(index);
NativesExternalStringResource* resource =
new NativesExternalStringResource(
isolate->bootstrapper(), source_vector.start());
new NativesExternalStringResource(isolate->bootstrapper(),
source_vector.start(),
source_vector.length());
*current++ = reinterpret_cast<Object*>(resource);
break;
}

View File

@ -30,7 +30,7 @@
'use_system_v8%': 0,
'msvs_use_common_release': 0,
'gcc_version%': 'unknown',
'v8_compress_startup_data%': 'false',
'v8_compress_startup_data%': 'off',
'v8_target_arch%': '<(target_arch)',
# Setting 'v8_can_use_unaligned_accesses' to 'true' will allow the code
@ -902,6 +902,7 @@
'../../tools/js2c.py',
'<@(_outputs)',
'CORE',
'<(v8_compress_startup_data)',
'<@(library_files)'
],
},
@ -919,6 +920,7 @@
'../../tools/js2c.py',
'<@(_outputs)',
'EXPERIMENTAL',
'<(v8_compress_startup_data)',
'<@(experimental_library_files)'
],
},

View File

@ -33,15 +33,22 @@
import os, re, sys, string
import jsmin
import bz2
def ToCArray(lines):
def ToCAsciiArray(lines):
result = []
for chr in lines:
value = ord(chr)
assert value < 128
result.append(str(value))
result.append("0")
return ", ".join(result)
def ToCArray(lines):
result = []
for chr in lines:
result.append(str(ord(chr)))
return ", ".join(result)
@ -87,8 +94,8 @@ def ParseValue(string):
return string
EVAL_PATTERN = re.compile(r'\beval\s*\(');
WITH_PATTERN = re.compile(r'\bwith\s*\(');
EVAL_PATTERN = re.compile(r'\beval\s*\(')
WITH_PATTERN = re.compile(r'\bwith\s*\(')
def Validate(lines, file):
@ -212,11 +219,14 @@ HEADER_TEMPLATE = """\
#include "v8.h"
#include "natives.h"
#include "utils.h"
namespace v8 {
namespace internal {
%(source_lines)s\
static const byte sources[] = { %(sources_data)s };
%(raw_sources_declaration)s\
template <>
int NativesCollection<%(type)s>::GetBuiltinsCount() {
@ -235,8 +245,13 @@ namespace internal {
}
template <>
Vector<const char> NativesCollection<%(type)s>::GetScriptSource(int index) {
%(get_script_source_cases)s\
int NativesCollection<%(type)s>::GetRawScriptsSize() {
return %(raw_total_length)i;
}
template <>
Vector<const char> NativesCollection<%(type)s>::GetRawScriptSource(int index) {
%(get_raw_script_source_cases)s\
return Vector<const char>("", 0);
}
@ -246,27 +261,43 @@ namespace internal {
return Vector<const char>("", 0);
}
template <>
Vector<const byte> NativesCollection<%(type)s>::GetScriptsSource() {
return Vector<const byte>(sources, %(total_length)i);
}
template <>
void NativesCollection<%(type)s>::SetRawScriptsSource(Vector<const char> raw_source) {
ASSERT(%(raw_total_length)i == raw_source.length());
raw_sources = raw_source.start();
}
} // internal
} // v8
"""
SOURCE_DECLARATION = """\
static const char %(id)s[] = { %(data)s };
RAW_SOURCES_COMPRESSION_DECLARATION = """\
static const char* raw_sources = NULL;
"""
GET_DEBUGGER_INDEX_CASE = """\
RAW_SOURCES_DECLARATION = """\
static const char* raw_sources = reinterpret_cast<const char*>(sources);
"""
GET_INDEX_CASE = """\
if (strcmp(name, "%(id)s") == 0) return %(i)i;
"""
GET_DEBUGGER_SCRIPT_SOURCE_CASE = """\
if (index == %(i)i) return Vector<const char>(%(id)s, %(length)i);
GET_RAW_SCRIPT_SOURCE_CASE = """\
if (index == %(i)i) return Vector<const char>(raw_sources + %(offset)i, %(raw_length)i);
"""
GET_DEBUGGER_SCRIPT_NAME_CASE = """\
GET_SCRIPT_NAME_CASE = """\
if (index == %(i)i) return Vector<const char>("%(name)s", %(length)i);
"""
@ -283,11 +314,10 @@ def JS2C(source, target, env):
else:
modules.append(s)
# Build source code lines
source_lines = [ ]
minifier = jsmin.JavaScriptMinifier()
module_offset = 0
all_sources = []
for module in modules:
filename = str(module)
debugger = filename.endswith('-debugger.js')
@ -296,49 +326,46 @@ def JS2C(source, target, env):
lines = ExpandMacros(lines, macros)
Validate(lines, filename)
lines = minifier.JSMinify(lines)
data = ToCArray(lines)
id = (os.path.split(filename)[1])[:-3]
if debugger: id = id[:-9]
raw_length = len(lines)
if debugger:
debugger_ids.append((id, len(lines)))
debugger_ids.append((id, raw_length, module_offset))
else:
ids.append((id, len(lines)))
source_lines.append(SOURCE_DECLARATION % { 'id': id, 'data': data })
ids.append((id, raw_length, module_offset))
all_sources.append(lines)
module_offset += raw_length
total_length = raw_total_length = module_offset
if env['COMPRESSION'] == 'off':
raw_sources_declaration = RAW_SOURCES_DECLARATION
sources_data = ToCAsciiArray("".join(all_sources))
else:
raw_sources_declaration = RAW_SOURCES_COMPRESSION_DECLARATION
if env['COMPRESSION'] == 'bz2':
all_sources = bz2.compress("".join(all_sources))
total_length = len(all_sources)
sources_data = ToCArray(all_sources)
# Build debugger support functions
get_index_cases = [ ]
get_script_source_cases = [ ]
get_raw_script_source_cases = [ ]
get_script_name_cases = [ ]
i = 0
for (id, length) in debugger_ids:
for (id, raw_length, module_offset) in debugger_ids + ids:
native_name = "native %s.js" % id
get_index_cases.append(GET_DEBUGGER_INDEX_CASE % { 'id': id, 'i': i })
get_script_source_cases.append(GET_DEBUGGER_SCRIPT_SOURCE_CASE % {
'id': id,
'length': length,
'i': i
})
get_script_name_cases.append(GET_DEBUGGER_SCRIPT_NAME_CASE % {
'name': native_name,
'length': len(native_name),
'i': i
});
i = i + 1
for (id, length) in ids:
native_name = "native %s.js" % id
get_index_cases.append(GET_DEBUGGER_INDEX_CASE % { 'id': id, 'i': i })
get_script_source_cases.append(GET_DEBUGGER_SCRIPT_SOURCE_CASE % {
'id': id,
'length': length,
'i': i
})
get_script_name_cases.append(GET_DEBUGGER_SCRIPT_NAME_CASE % {
'name': native_name,
'length': len(native_name),
'i': i
});
get_index_cases.append(GET_INDEX_CASE % { 'id': id, 'i': i })
get_raw_script_source_cases.append(GET_RAW_SCRIPT_SOURCE_CASE % {
'offset': module_offset,
'raw_length': raw_length,
'i': i
})
get_script_name_cases.append(GET_SCRIPT_NAME_CASE % {
'name': native_name,
'length': len(native_name),
'i': i
})
i = i + 1
# Emit result
@ -346,9 +373,12 @@ def JS2C(source, target, env):
output.write(HEADER_TEMPLATE % {
'builtin_count': len(ids) + len(debugger_ids),
'debugger_count': len(debugger_ids),
'source_lines': "\n".join(source_lines),
'sources_data': sources_data,
'raw_sources_declaration': raw_sources_declaration,
'raw_total_length': raw_total_length,
'total_length': total_length,
'get_index_cases': "".join(get_index_cases),
'get_script_source_cases': "".join(get_script_source_cases),
'get_raw_script_source_cases': "".join(get_raw_script_source_cases),
'get_script_name_cases': "".join(get_script_name_cases),
'type': env['TYPE']
})
@ -357,8 +387,9 @@ def JS2C(source, target, env):
def main():
natives = sys.argv[1]
type = sys.argv[2]
source_files = sys.argv[3:]
JS2C(source_files, [natives], { 'TYPE': type })
compression = sys.argv[3]
source_files = sys.argv[4:]
JS2C(source_files, [natives], { 'TYPE': type, 'COMPRESSION': compression })
if __name__ == "__main__":
main()