From 1aede5f400d7d6450f1baa7a492cefa5a0cdd34c Mon Sep 17 00:00:00 2001 From: "yangguo@chromium.org" Date: Tue, 8 Jul 2014 09:04:08 +0000 Subject: [PATCH] Introduce code serializer/deserializer. R=vogelheim@chromium.org Review URL: https://codereview.chromium.org/373713006 git-svn-id: https://v8.googlecode.com/svn/branches/bleeding_edge@22276 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/assembler.h | 1 + src/compiler.cc | 15 ++++- src/flag-definitions.h | 2 + src/full-codegen.cc | 5 ++ src/mksnapshot.cc | 17 +----- src/parser.cc | 5 +- src/parser.h | 2 +- src/serialize.cc | 105 ++++++++++++++++++++++++++++++++++- src/serialize.h | 25 +++++++++ src/snapshot-source-sink.cc | 6 ++ src/snapshot-source-sink.h | 36 ++++++++++++ src/version.h | 1 + test/cctest/test-compiler.cc | 41 ++++++++++++++ 13 files changed, 239 insertions(+), 22 deletions(-) diff --git a/src/assembler.h b/src/assembler.h index a086496358..0ef7f5e4f0 100644 --- a/src/assembler.h +++ b/src/assembler.h @@ -66,6 +66,7 @@ class AssemblerBase: public Malloced { void set_emit_debug_code(bool value) { emit_debug_code_ = value; } bool serializer_enabled() const { return serializer_enabled_; } + void enable_serializer() { serializer_enabled_ = true; } bool predictable_code_size() const { return predictable_code_size_; } void set_predictable_code_size(bool value) { predictable_code_size_ = value; } diff --git a/src/compiler.cc b/src/compiler.cc index 92f8c11a19..2b451501f0 100644 --- a/src/compiler.cc +++ b/src/compiler.cc @@ -933,9 +933,11 @@ Handle Compiler::CompileScript( cached_data = NULL; } else if (cached_data_mode == PRODUCE_CACHED_DATA) { ASSERT(cached_data && !*cached_data); + ASSERT(extension == NULL); } else { ASSERT(cached_data_mode == CONSUME_CACHED_DATA); ASSERT(cached_data && *cached_data); + ASSERT(extension == NULL); } Isolate* isolate = source->GetIsolate(); int source_length = source->length(); @@ -951,6 +953,11 @@ Handle Compiler::CompileScript( maybe_result = compilation_cache->LookupScript( source, script_name, line_offset, column_offset, is_shared_cross_origin, context); + if (maybe_result.is_null() && FLAG_serialize_toplevel && + cached_data_mode == CONSUME_CACHED_DATA) { + Object* des = CodeSerializer::Deserialize(isolate, *cached_data); + return handle(SharedFunctionInfo::cast(des), isolate); + } } if (!maybe_result.ToHandle(&result)) { @@ -971,17 +978,21 @@ Handle Compiler::CompileScript( // Compile the function and add it to the cache. CompilationInfoWithZone info(script); info.MarkAsGlobal(); - info.SetExtension(extension); info.SetCachedData(cached_data, cached_data_mode); + info.SetExtension(extension); info.SetContext(context); if (FLAG_use_strict) info.SetStrictMode(STRICT); + result = CompileToplevel(&info); if (extension == NULL && !result.is_null() && !result->dont_cache()) { compilation_cache->PutScript(source, context, result); + if (FLAG_serialize_toplevel && cached_data_mode == PRODUCE_CACHED_DATA) { + *cached_data = CodeSerializer::Serialize(result); + } } if (result.is_null()) isolate->ReportPendingMessages(); } else if (result->ic_age() != isolate->heap()->global_ic_age()) { - result->ResetForNewContext(isolate->heap()->global_ic_age()); + result->ResetForNewContext(isolate->heap()->global_ic_age()); } return result; } diff --git a/src/flag-definitions.h b/src/flag-definitions.h index 79fba014c9..424845f39c 100644 --- a/src/flag-definitions.h +++ b/src/flag-definitions.h @@ -423,6 +423,8 @@ DEFINE_BOOL(trace_deopt, false, "trace optimize function deoptimization") DEFINE_BOOL(trace_stub_failures, false, "trace deoptimization of generated code stubs") +DEFINE_BOOL(serialize_toplevel, false, "enable caching of toplevel scripts") + // compiler.cc DEFINE_INT(min_preparse_length, 1024, "minimum length for automatic enable preparsing") diff --git a/src/full-codegen.cc b/src/full-codegen.cc index e20b20dab8..cb5e94a0c3 100644 --- a/src/full-codegen.cc +++ b/src/full-codegen.cc @@ -301,6 +301,11 @@ bool FullCodeGenerator::MakeCode(CompilationInfo* info) { CodeGenerator::MakeCodePrologue(info, "full"); const int kInitialBufferSize = 4 * KB; MacroAssembler masm(info->isolate(), NULL, kInitialBufferSize); + if (FLAG_serialize_toplevel && + info->cached_data_mode() == PRODUCE_CACHED_DATA && info->is_global()) { + masm.enable_serializer(); + } + #ifdef ENABLE_GDB_JIT_INTERFACE masm.positions_recorder()->StartGDBJITLineInfoRecording(); #endif diff --git a/src/mksnapshot.cc b/src/mksnapshot.cc index 5ed3855e9e..36c03763c7 100644 --- a/src/mksnapshot.cc +++ b/src/mksnapshot.cc @@ -32,17 +32,6 @@ class Compressor { }; -class ListSnapshotSink : public i::SnapshotByteSink { - public: - explicit ListSnapshotSink(i::List* data) : data_(data) { } - virtual ~ListSnapshotSink() {} - virtual void Put(int byte, const char* description) { data_->Add(byte); } - virtual int Position() { return data_->length(); } - private: - i::List* data_; -}; - - class SnapshotWriter { public: explicit SnapshotWriter(const char* snapshot_file) @@ -93,7 +82,7 @@ class SnapshotWriter { return; i::List startup_blob; - ListSnapshotSink sink(&startup_blob); + i::ListSnapshotSink sink(&startup_blob); int spaces[] = { i::NEW_SPACE, i::OLD_POINTER_SPACE, i::OLD_DATA_SPACE, i::CODE_SPACE, @@ -417,12 +406,12 @@ int main(int argc, char** argv) { // This results in a somewhat smaller snapshot, probably because it gets // rid of some things that are cached between garbage collections. i::List snapshot_data; - ListSnapshotSink snapshot_sink(&snapshot_data); + i::ListSnapshotSink snapshot_sink(&snapshot_data); i::StartupSerializer ser(internal_isolate, &snapshot_sink); ser.SerializeStrongReferences(); i::List context_data; - ListSnapshotSink contex_sink(&context_data); + i::ListSnapshotSink contex_sink(&context_data); i::PartialSerializer context_ser(internal_isolate, &ser, &contex_sink); context_ser.Serialize(&raw_context); ser.SerializeWeakReferences(); diff --git a/src/parser.cc b/src/parser.cc index 261de4eed7..4ab9659dd2 100644 --- a/src/parser.cc +++ b/src/parser.cc @@ -182,7 +182,7 @@ void RegExpBuilder::AddQuantifierToAtom( } -ScriptData* ScriptData::New(const char* data, int length) { +ScriptData* ScriptData::New(const char* data, int length, bool owns_store) { // The length is obviously invalid. if (length % sizeof(unsigned) != 0) { return NULL; @@ -190,7 +190,8 @@ ScriptData* ScriptData::New(const char* data, int length) { int deserialized_data_length = length / sizeof(unsigned); unsigned* deserialized_data; - bool owns_store = reinterpret_cast(data) % sizeof(unsigned) != 0; + owns_store = + owns_store || reinterpret_cast(data) % sizeof(unsigned) != 0; if (owns_store) { // Copy the data to align it. deserialized_data = i::NewArray(deserialized_data_length); diff --git a/src/parser.h b/src/parser.h index 9722a898f3..ac276645a1 100644 --- a/src/parser.h +++ b/src/parser.h @@ -72,7 +72,7 @@ class ScriptData { // The created ScriptData won't take ownership of the data. If the alignment // is not correct, this will copy the data (and the created ScriptData will // take ownership of the copy). - static ScriptData* New(const char* data, int length); + static ScriptData* New(const char* data, int length, bool owns_store = false); virtual ~ScriptData(); virtual int Length(); diff --git a/src/serialize.cc b/src/serialize.cc index 769e64502b..da298a4b5d 100644 --- a/src/serialize.cc +++ b/src/serialize.cc @@ -19,6 +19,7 @@ #include "src/snapshot-source-sink.h" #include "src/stub-cache.h" #include "src/v8threads.h" +#include "src/version.h" namespace v8 { namespace internal { @@ -1796,12 +1797,13 @@ int Serializer::SpaceAreaSize(int space) { } +void Serializer::PadByte() { sink_->Put(kNop, "Padding"); } + + void Serializer::Pad() { // The non-branching GetInt will read up to 3 bytes too far, so we need // to pad the snapshot to make sure we don't read over the end. - for (unsigned i = 0; i < sizeof(int32_t) - 1; i++) { - sink_->Put(kNop, "Padding"); - } + for (unsigned i = 0; i < sizeof(int32_t) - 1; i++) PadByte(); } @@ -1811,4 +1813,101 @@ void Serializer::InitializeCodeAddressMap() { } +ScriptData* CodeSerializer::Serialize(Handle info) { + // Serialize code object. + List payload; + ListSnapshotSink listsink(&payload); + CodeSerializer ser(info->GetIsolate(), &listsink); + DisallowHeapAllocation no_gc; + Object** location = Handle::cast(info).location(); + ser.VisitPointer(location); + ser.Pad(); + + // Allocate storage. The payload length may not be aligned. Round up. + // TODO(yangguo) replace ScriptData with a more generic super class. + int payload_length = payload.length(); + int raw_length = payload_length / sizeof(unsigned) + kHeaderSize; + if (!IsAligned(payload_length, sizeof(unsigned))) raw_length++; + unsigned* raw_data = i::NewArray(raw_length); + char* payload_data = reinterpret_cast(raw_data + kHeaderSize); + + // Write header. + raw_data[kVersionHashOffset] = Version::Hash(); + raw_data[kPayloadLengthOffset] = payload_length; + STATIC_ASSERT(NEW_SPACE == 0); + for (int i = NEW_SPACE; i <= PROPERTY_CELL_SPACE; i++) { + raw_data[kReservationsOffset + i] = ser.CurrentAllocationAddress(i); + } + + CopyBytes(payload_data, payload.begin(), static_cast(payload_length)); + + return new ScriptData(Vector(raw_data, raw_length), true); +} + + +void CodeSerializer::SerializeObject(Object* o, HowToCode how_to_code, + WhereToPoint where_to_point, int skip) { + CHECK(o->IsHeapObject()); + HeapObject* heap_object = HeapObject::cast(o); + + // The code-caches link to context-specific code objects, which + // the startup and context serializes cannot currently handle. + ASSERT(!heap_object->IsMap() || + Map::cast(heap_object)->code_cache() == + heap_object->GetHeap()->empty_fixed_array()); + + int root_index; + if ((root_index = RootIndex(heap_object, how_to_code)) != kInvalidRootIndex) { + PutRoot(root_index, heap_object, how_to_code, where_to_point, skip); + return; + } + + // TODO(yangguo) wire up builtins. + // TODO(yangguo) wire up stubs from stub cache. + // TODO(yangguo) wire up script source. + // TODO(yangguo) wire up internalized strings + ASSERT(!heap_object->IsInternalizedString()); + // TODO(yangguo) We cannot deal with different hash seeds yet. + ASSERT(!heap_object->IsHashTable()); + + if (address_mapper_.IsMapped(heap_object)) { + int space = SpaceOfObject(heap_object); + int address = address_mapper_.MappedTo(heap_object); + SerializeReferenceToPreviousObject(space, address, how_to_code, + where_to_point, skip); + return; + } + + if (skip != 0) { + sink_->Put(kSkip, "SkipFromSerializeObject"); + sink_->PutInt(skip, "SkipDistanceFromSerializeObject"); + } + // Object has not yet been serialized. Serialize it here. + ObjectSerializer serializer(this, heap_object, sink_, how_to_code, + where_to_point); + serializer.Serialize(); +} + + +Object* CodeSerializer::Deserialize(Isolate* isolate, ScriptData* data) { + const unsigned* raw_data = reinterpret_cast(data->Data()); + CHECK_EQ(Version::Hash(), raw_data[kVersionHashOffset]); + int payload_length = raw_data[kPayloadLengthOffset]; + const byte* payload_data = + reinterpret_cast(raw_data + kHeaderSize); + ASSERT_LE(payload_length, data->Length() - kHeaderSize); + + SnapshotByteSource payload(payload_data, payload_length); + Deserializer deserializer(&payload); + STATIC_ASSERT(NEW_SPACE == 0); + // TODO(yangguo) what happens if remaining new space is too small? + for (int i = NEW_SPACE; i <= PROPERTY_CELL_SPACE; i++) { + deserializer.set_reservation( + i, raw_data[CodeSerializer::kReservationsOffset + i]); + } + Object* root; + deserializer.DeserializePartial(isolate, &root); + ASSERT(root->IsSharedFunctionInfo()); + return root; +} } } // namespace v8::internal diff --git a/src/serialize.h b/src/serialize.h index 70482d8dcf..df9cb68f4d 100644 --- a/src/serialize.h +++ b/src/serialize.h @@ -8,6 +8,7 @@ #include "src/hashmap.h" #include "src/heap-profiler.h" #include "src/isolate.h" +#include "src/parser.h" #include "src/snapshot-source-sink.h" namespace v8 { @@ -469,6 +470,7 @@ class Serializer : public SerializerDeserializer { SerializationAddressMapper address_mapper_; intptr_t root_index_wave_front_; void Pad(); + void PadByte(); friend class ObjectSerializer; friend class Deserializer; @@ -551,6 +553,29 @@ class StartupSerializer : public Serializer { }; +class CodeSerializer : public Serializer { + public: + CodeSerializer(Isolate* isolate, SnapshotByteSink* sink) + : Serializer(isolate, sink) { + set_root_index_wave_front(Heap::kStrongRootListLength); + InitializeCodeAddressMap(); + } + + static ScriptData* Serialize(Handle info); + virtual void SerializeObject(Object* o, HowToCode how_to_code, + WhereToPoint where_to_point, int skip); + + static Object* Deserialize(Isolate* isolate, ScriptData* data); + + // The data header consists of int-sized entries: + // [0] version hash + // [1] length in bytes + // [2..8] reservation sizes for spaces from NEW_SPACE to PROPERTY_CELL_SPACE. + static const int kHeaderSize = 9; + static const int kVersionHashOffset = 0; + static const int kPayloadLengthOffset = 1; + static const int kReservationsOffset = 2; +}; } } // namespace v8::internal #endif // V8_SERIALIZE_H_ diff --git a/src/snapshot-source-sink.cc b/src/snapshot-source-sink.cc index 146f949cc4..e9988eb7d3 100644 --- a/src/snapshot-source-sink.cc +++ b/src/snapshot-source-sink.cc @@ -91,5 +91,11 @@ bool SnapshotByteSource::GetBlob(const byte** data, int* number_of_bytes) { } } + +void DebugSnapshotSink::Put(int byte, const char* description) { + PrintF("%24s: %x\n", description, byte); + sink_->Put(byte, description); +} + } // namespace v8::internal } // namespace v8 diff --git a/src/snapshot-source-sink.h b/src/snapshot-source-sink.h index 6e2328cc27..eae8606fbe 100644 --- a/src/snapshot-source-sink.h +++ b/src/snapshot-source-sink.h @@ -82,6 +82,42 @@ class SnapshotByteSink { }; +class DummySnapshotSink : public SnapshotByteSink { + public: + DummySnapshotSink() : length_(0) {} + virtual ~DummySnapshotSink() {} + virtual void Put(int byte, const char* description) { length_++; } + virtual int Position() { return length_; } + + private: + int length_; +}; + + +// Wrap a SnapshotByteSink into a DebugSnapshotSink to get debugging output. +class DebugSnapshotSink : public SnapshotByteSink { + public: + explicit DebugSnapshotSink(SnapshotByteSink* chained) : sink_(chained) {} + virtual void Put(int byte, const char* description) V8_OVERRIDE; + virtual int Position() V8_OVERRIDE { return sink_->Position(); } + + private: + SnapshotByteSink* sink_; +}; + + +class ListSnapshotSink : public i::SnapshotByteSink { + public: + explicit ListSnapshotSink(i::List* data) : data_(data) {} + virtual void Put(int byte, const char* description) V8_OVERRIDE { + data_->Add(byte); + } + virtual int Position() V8_OVERRIDE { return data_->length(); } + + private: + i::List* data_; +}; + } // namespace v8::internal } // namespace v8 diff --git a/src/version.h b/src/version.h index b0a6071521..4f600054ec 100644 --- a/src/version.h +++ b/src/version.h @@ -16,6 +16,7 @@ class Version { static int GetBuild() { return build_; } static int GetPatch() { return patch_; } static bool IsCandidate() { return candidate_; } + static int Hash() { return (major_ << 20) ^ (minor_ << 10) ^ patch_; } // Calculate the V8 version string. static void GetString(Vector str); diff --git a/test/cctest/test-compiler.cc b/test/cctest/test-compiler.cc index 5384dcc131..c77b4b2dd3 100644 --- a/test/cctest/test-compiler.cc +++ b/test/cctest/test-compiler.cc @@ -32,6 +32,7 @@ #include "src/compiler.h" #include "src/disasm.h" +#include "src/parser.h" #include "test/cctest/cctest.h" using namespace v8::internal; @@ -398,6 +399,46 @@ TEST(OptimizedCodeSharing) { } +TEST(SerializeToplevel) { + FLAG_serialize_toplevel = true; + v8::HandleScope scope(CcTest::isolate()); + v8::Local context = CcTest::NewContext(PRINT_EXTENSION); + v8::Context::Scope context_scope(context); + + const char* source1 = "1 + 1"; + const char* source2 = "1 + 2"; // Use alternate string to verify caching. + + Isolate* isolate = CcTest::i_isolate(); + Handle source1_string = isolate->factory() + ->NewStringFromUtf8(CStrVector(source1)) + .ToHandleChecked(); + Handle source2_string = isolate->factory() + ->NewStringFromUtf8(CStrVector(source2)) + .ToHandleChecked(); + + ScriptData* cache = NULL; + + Handle orig = + Compiler::CompileScript(source1_string, Handle(), 0, 0, false, + Handle(isolate->native_context()), NULL, + &cache, PRODUCE_CACHED_DATA, NOT_NATIVES_CODE); + + Handle info = + Compiler::CompileScript(source2_string, Handle(), 0, 0, false, + Handle(isolate->native_context()), NULL, + &cache, CONSUME_CACHED_DATA, NOT_NATIVES_CODE); + + CHECK_NE(*orig, *info); + Handle fun = + isolate->factory()->NewFunctionFromSharedFunctionInfo( + info, isolate->native_context()); + Handle global(isolate->context()->global_object()); + Handle result = + Execution::Call(isolate, fun, global, 0, NULL).ToHandleChecked(); + CHECK_EQ(2, Handle::cast(result)->value()); +} + + #ifdef ENABLE_DISASSEMBLER static Handle GetJSFunction(v8::Handle obj, const char* property_name) {