From 858e3c7ef7bc80ea1767737853971e4e6a4633af Mon Sep 17 00:00:00 2001 From: Sam Maier Date: Fri, 7 Feb 2020 15:22:04 -0500 Subject: [PATCH] Compressing SnapshotData with zlib This CL introduces, and turns on by default, compression for snapshots. The compression is handled in SnapshotCompression, which is a new static helper class for producing/consuming compressed SnapshotData. To turn off snapshot compression (accomplished by removing calls of Compress/Decompress) set the GN arg: v8_disable_snapshot_compression = true Bug: chromium:833361 Change-Id: If8abc3662e8473fbd0c94e443946fbea804a305e Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1846066 Commit-Queue: Sam Maier Reviewed-by: Jakob Gruber Cr-Commit-Position: refs/heads/master@{#66230} --- BUILD.gn | 13 +++- src/snapshot/DEPS | 3 + src/snapshot/serializer-common.cc | 3 +- src/snapshot/snapshot-common.cc | 78 +++++++++++++++++------ src/snapshot/snapshot-compression.cc | 95 ++++++++++++++++++++++++++++ src/snapshot/snapshot-compression.h | 27 ++++++++ src/snapshot/snapshot.h | 16 ++++- test/cctest/test-serialize.cc | 25 +++++++- 8 files changed, 233 insertions(+), 27 deletions(-) create mode 100644 src/snapshot/snapshot-compression.cc create mode 100644 src/snapshot/snapshot-compression.h diff --git a/BUILD.gn b/BUILD.gn index d5e838c71f..9999cc008d 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -211,6 +211,9 @@ declare_args() { # Enable additional targets necessary for verification of torque # file generation v8_verify_torque_generation_invariance = false + + # Disable all snapshot compression. + v8_enable_snapshot_compression = true } # Derived defaults. @@ -501,6 +504,9 @@ config("features") { if (v8_enable_regexp_interpreter_threaded_dispatch) { defines += [ "V8_ENABLE_REGEXP_INTERPRETER_THREADED_DISPATCH" ] } + if (v8_enable_snapshot_compression) { + defines += [ "V8_SNAPSHOT_COMPRESSION" ] + } } config("toolchain") { @@ -2864,6 +2870,8 @@ v8_source_set("v8_base_without_compiler") { "src/snapshot/serializer.cc", "src/snapshot/serializer.h", "src/snapshot/snapshot-common.cc", + "src/snapshot/snapshot-compression.cc", + "src/snapshot/snapshot-compression.h", "src/snapshot/snapshot-source-sink.cc", "src/snapshot/snapshot-source-sink.h", "src/snapshot/snapshot.h", @@ -3363,7 +3371,10 @@ v8_source_set("v8_base_without_compiler") { ] } - deps += [ "//third_party/zlib" ] + deps += [ + "//third_party/zlib", + "//third_party/zlib/google:compression_utils_portable", + ] if (v8_postmortem_support) { sources += [ "$target_gen_dir/debug-support.cc" ] diff --git a/src/snapshot/DEPS b/src/snapshot/DEPS index 70ef55e340..93f17c9286 100644 --- a/src/snapshot/DEPS +++ b/src/snapshot/DEPS @@ -2,6 +2,9 @@ specific_include_rules = { "mksnapshot\.cc": [ "+include/libplatform/libplatform.h", ], + "snapshot-compression.cc": [ + "+third_party/zlib", + ], "serializer-common.cc": [ "+third_party/zlib", ], diff --git a/src/snapshot/serializer-common.cc b/src/snapshot/serializer-common.cc index 1703af7717..9218d4eaa9 100644 --- a/src/snapshot/serializer-common.cc +++ b/src/snapshot/serializer-common.cc @@ -100,7 +100,6 @@ void SerializedData::AllocateData(uint32_t size) { data_ = NewArray(size); size_ = size; owns_data_ = true; - DCHECK(IsAligned(reinterpret_cast(data_), kPointerAlignment)); } // static @@ -146,7 +145,7 @@ void SerializerDeserializer::RestoreExternalReferenceRedirectors( } } -V8_EXPORT_PRIVATE extern uint32_t Checksum(Vector payload) { +uint32_t Checksum(Vector payload) { #ifdef MEMORY_SANITIZER // Computing the checksum includes padding bytes for objects like strings. // Mark every object as initialized in the code serializer. diff --git a/src/snapshot/snapshot-common.cc b/src/snapshot/snapshot-common.cc index 8e0c28c6d6..cd64113a75 100644 --- a/src/snapshot/snapshot-common.cc +++ b/src/snapshot/snapshot-common.cc @@ -14,9 +14,21 @@ #include "src/utils/memcopy.h" #include "src/utils/version.h" +#ifdef V8_SNAPSHOT_COMPRESSION +#include "src/snapshot/snapshot-compression.h" +#endif + namespace v8 { namespace internal { +SnapshotData MaybeDecompress(const Vector& snapshot_data) { +#ifdef V8_SNAPSHOT_COMPRESSION + return SnapshotCompression::Decompress(snapshot_data); +#else + return SnapshotData(snapshot_data); +#endif +} + #ifdef DEBUG bool Snapshot::SnapshotIsValid(const v8::StartupData* snapshot_blob) { return Snapshot::ExtractNumContexts(snapshot_blob) > 0; @@ -43,9 +55,11 @@ bool Snapshot::Initialize(Isolate* isolate) { CheckVersion(blob); CHECK(VerifyChecksum(blob)); Vector startup_data = ExtractStartupData(blob); - SnapshotData startup_snapshot_data(startup_data); Vector read_only_data = ExtractReadOnlyData(blob); - SnapshotData read_only_snapshot_data(read_only_data); + + SnapshotData startup_snapshot_data(MaybeDecompress(startup_data)); + SnapshotData read_only_snapshot_data(MaybeDecompress(read_only_data)); + StartupDeserializer startup_deserializer(&startup_snapshot_data); ReadOnlyDeserializer read_only_deserializer(&read_only_snapshot_data); startup_deserializer.SetRehashability(ExtractRehashability(blob)); @@ -73,7 +87,7 @@ MaybeHandle Snapshot::NewContextFromSnapshot( bool can_rehash = ExtractRehashability(blob); Vector context_data = ExtractContextData(blob, static_cast(context_index)); - SnapshotData snapshot_data(context_data); + SnapshotData snapshot_data(MaybeDecompress(context_data)); MaybeHandle maybe_result = PartialDeserializer::DeserializeContext( isolate, &snapshot_data, can_rehash, global_proxy, @@ -116,24 +130,49 @@ void ProfileDeserialization( } v8::StartupData Snapshot::CreateSnapshotBlob( - const SnapshotData* startup_snapshot, - const SnapshotData* read_only_snapshot, - const std::vector& context_snapshots, bool can_be_rehashed) { - uint32_t num_contexts = static_cast(context_snapshots.size()); + const SnapshotData* startup_snapshot_in, + const SnapshotData* read_only_snapshot_in, + const std::vector& context_snapshots_in, + bool can_be_rehashed) { + // Have these separate from snapshot_in for compression, since we need to + // access the compressed data as well as the uncompressed reservations. + const SnapshotData* startup_snapshot; + const SnapshotData* read_only_snapshot; + const std::vector* context_snapshots; +#ifdef V8_SNAPSHOT_COMPRESSION + SnapshotData startup_compressed( + SnapshotCompression::Compress(startup_snapshot_in)); + SnapshotData read_only_compressed( + SnapshotCompression::Compress(read_only_snapshot_in)); + startup_snapshot = &startup_compressed; + read_only_snapshot = &read_only_compressed; + std::vector context_snapshots_compressed; + context_snapshots_compressed.reserve(context_snapshots_in.size()); + std::vector context_snapshots_compressed_ptrs; + for (unsigned int i = 0; i < context_snapshots_in.size(); ++i) { + context_snapshots_compressed.push_back( + SnapshotCompression::Compress(context_snapshots_in[i])); + context_snapshots_compressed_ptrs.push_back( + &context_snapshots_compressed[i]); + } + context_snapshots = &context_snapshots_compressed_ptrs; +#else + startup_snapshot = startup_snapshot_in; + read_only_snapshot = read_only_snapshot_in; + context_snapshots = &context_snapshots_in; +#endif + + uint32_t num_contexts = static_cast(context_snapshots->size()); uint32_t startup_snapshot_offset = StartupSnapshotOffset(num_contexts); uint32_t total_length = startup_snapshot_offset; - DCHECK(IsAligned(total_length, kPointerAlignment)); total_length += static_cast(startup_snapshot->RawData().length()); - DCHECK(IsAligned(total_length, kPointerAlignment)); total_length += static_cast(read_only_snapshot->RawData().length()); - DCHECK(IsAligned(total_length, kPointerAlignment)); - for (const auto context_snapshot : context_snapshots) { + for (const auto context_snapshot : *context_snapshots) { total_length += static_cast(context_snapshot->RawData().length()); - DCHECK(IsAligned(total_length, kPointerAlignment)); } - ProfileDeserialization(read_only_snapshot, startup_snapshot, - context_snapshots); + ProfileDeserialization(read_only_snapshot_in, startup_snapshot_in, + context_snapshots_in); char* data = new char[total_length]; // Zero out pre-payload data. Part of that is only used for padding. @@ -157,7 +196,7 @@ v8::StartupData Snapshot::CreateSnapshotBlob( if (FLAG_profile_deserialization) { PrintF("Snapshot blob consists of:\n%10d bytes in %d chunks for startup\n", payload_length, - static_cast(startup_snapshot->Reservations().size())); + static_cast(startup_snapshot_in->Reservations().size())); } payload_offset += payload_length; @@ -176,15 +215,17 @@ v8::StartupData Snapshot::CreateSnapshotBlob( // Partial snapshots (context-specific data). for (uint32_t i = 0; i < num_contexts; i++) { SetHeaderValue(data, ContextSnapshotOffsetOffset(i), payload_offset); - SnapshotData* context_snapshot = context_snapshots[i]; + SnapshotData* context_snapshot = (*context_snapshots)[i]; payload_length = context_snapshot->RawData().length(); CopyBytes( data + payload_offset, reinterpret_cast(context_snapshot->RawData().begin()), payload_length); if (FLAG_profile_deserialization) { - PrintF("%10d bytes in %d chunks for context #%d\n", payload_length, - static_cast(context_snapshot->Reservations().size()), i); + PrintF( + "%10d bytes in %d chunks for context #%d\n", payload_length, + static_cast(context_snapshots_in[i]->Reservations().size()), + i); } payload_offset += payload_length; } @@ -310,7 +351,6 @@ SnapshotData::SnapshotData(const Serializer* serializer) { uint32_t padded_payload_offset = POINTER_SIZE_ALIGN(payload_offset); uint32_t size = padded_payload_offset + static_cast(payload->size()); - DCHECK(IsAligned(size, kPointerAlignment)); // Allocate backing store and create result data. AllocateData(size); diff --git a/src/snapshot/snapshot-compression.cc b/src/snapshot/snapshot-compression.cc new file mode 100644 index 0000000000..dea16bfa34 --- /dev/null +++ b/src/snapshot/snapshot-compression.cc @@ -0,0 +1,95 @@ +// Copyright 2020 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "src/snapshot/snapshot-compression.h" + +#include "src/utils/memcopy.h" +#include "third_party/zlib/google/compression_utils_portable.h" + +namespace v8 { +namespace internal { + +uint32_t GetUncompressedSize(const Bytef* compressed_data) { + uint32_t size; + MemCopy(&size, compressed_data, sizeof(size)); + return size; +} + +SnapshotData SnapshotCompression::Compress( + const SnapshotData* uncompressed_data) { + SnapshotData snapshot_data; + base::ElapsedTimer timer; + if (FLAG_profile_deserialization) timer.Start(); + + static_assert(sizeof(Bytef) == 1, ""); + const uLongf input_size = + static_cast(uncompressed_data->RawData().size()); + uint32_t payload_length = + static_cast(uncompressed_data->RawData().size()); + + uLongf compressed_data_size = compressBound(input_size); + + // Allocating >= the final amount we will need. + snapshot_data.AllocateData( + static_cast(sizeof(payload_length) + compressed_data_size)); + + byte* compressed_data = const_cast(snapshot_data.RawData().begin()); + // Since we are doing raw compression (no zlib or gzip headers), we need to + // manually store the uncompressed size. + MemCopy(compressed_data, &payload_length, sizeof(payload_length)); + + CHECK_EQ(zlib_internal::CompressHelper( + zlib_internal::ZRAW, compressed_data + sizeof(payload_length), + &compressed_data_size, + bit_cast(uncompressed_data->RawData().begin()), + input_size, Z_DEFAULT_COMPRESSION, nullptr, nullptr), + Z_OK); + + // Reallocating to exactly the size we need. + snapshot_data.Resize(static_cast(compressed_data_size) + + sizeof(payload_length)); + DCHECK_EQ(payload_length, + GetUncompressedSize(snapshot_data.RawData().begin())); + + if (FLAG_profile_deserialization) { + double ms = timer.Elapsed().InMillisecondsF(); + PrintF("[Compressing %d bytes took %0.3f ms]\n", payload_length, ms); + } + return snapshot_data; +} + +SnapshotData SnapshotCompression::Decompress( + Vector compressed_data) { + SnapshotData snapshot_data; + base::ElapsedTimer timer; + if (FLAG_profile_deserialization) timer.Start(); + + const Bytef* input_bytef = bit_cast(compressed_data.begin()); + + // Since we are doing raw compression (no zlib or gzip headers), we need to + // manually retrieve the uncompressed size. + uint32_t uncompressed_payload_length = GetUncompressedSize(input_bytef); + input_bytef += sizeof(uncompressed_payload_length); + + snapshot_data.AllocateData(uncompressed_payload_length); + + uLongf uncompressed_size = uncompressed_payload_length; + CHECK_EQ(zlib_internal::UncompressHelper( + zlib_internal::ZRAW, + bit_cast(snapshot_data.RawData().begin()), + &uncompressed_size, input_bytef, + static_cast(compressed_data.size() - + sizeof(uncompressed_payload_length))), + Z_OK); + + if (FLAG_profile_deserialization) { + double ms = timer.Elapsed().InMillisecondsF(); + PrintF("[Decompressing %d bytes took %0.3f ms]\n", + uncompressed_payload_length, ms); + } + return snapshot_data; +} + +} // namespace internal +} // namespace v8 diff --git a/src/snapshot/snapshot-compression.h b/src/snapshot/snapshot-compression.h new file mode 100644 index 0000000000..59c21feb74 --- /dev/null +++ b/src/snapshot/snapshot-compression.h @@ -0,0 +1,27 @@ +// Copyright 2020 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef V8_SNAPSHOT_SNAPSHOT_COMPRESSION_H_ +#define V8_SNAPSHOT_SNAPSHOT_COMPRESSION_H_ + +#include "src/snapshot/serializer-common.h" +#include "src/snapshot/serializer.h" +#include "src/snapshot/snapshot.h" +#include "src/utils/vector.h" + +namespace v8 { +namespace internal { + +class SnapshotCompression : public AllStatic { + public: + V8_EXPORT_PRIVATE static SnapshotData Compress( + const SnapshotData* uncompressed_data); + V8_EXPORT_PRIVATE static SnapshotData Decompress( + Vector compressed_data); +}; + +} // namespace internal +} // namespace v8 + +#endif // V8_SNAPSHOT_SNAPSHOT_COMPRESSION_H_ diff --git a/src/snapshot/snapshot.h b/src/snapshot/snapshot.h index 156d873019..d9f05c59a8 100644 --- a/src/snapshot/snapshot.h +++ b/src/snapshot/snapshot.h @@ -16,6 +16,7 @@ namespace internal { // Forward declarations. class Isolate; class PartialSerializer; +class SnapshotCompression; class StartupSerializer; // Wrapper around reservation sizes and the serialization payload. @@ -37,6 +38,15 @@ class V8_EXPORT_PRIVATE SnapshotData : public SerializedData { } protected: + // Empty constructor used by SnapshotCompression so it can manually allocate + // memory. + SnapshotData() : SerializedData() {} + friend class SnapshotCompression; + + // Resize used by SnapshotCompression so it can shrink the compressed + // SnapshotData. + void Resize(uint32_t size) { size_ = size; } + // The data header consists of uint32_t-sized entries: // [0] magic number and (internal) external reference count // [1] number of reservation size entries @@ -77,9 +87,9 @@ class Snapshot : public AllStatic { // ---------------- Serialization ---------------- static v8::StartupData CreateSnapshotBlob( - const SnapshotData* startup_snapshot, - const SnapshotData* read_only_snapshot, - const std::vector& context_snapshots, + const SnapshotData* startup_snapshot_in, + const SnapshotData* read_only_snapshot_in, + const std::vector& context_snapshots_in, bool can_be_rehashed); #ifdef DEBUG diff --git a/test/cctest/test-serialize.cc b/test/cctest/test-serialize.cc index f37b623504..a766968a1a 100644 --- a/test/cctest/test-serialize.cc +++ b/test/cctest/test-serialize.cc @@ -53,6 +53,7 @@ #include "src/snapshot/partial-serializer.h" #include "src/snapshot/read-only-deserializer.h" #include "src/snapshot/read-only-serializer.h" +#include "src/snapshot/snapshot-compression.h" #include "src/snapshot/snapshot.h" #include "src/snapshot/startup-deserializer.h" #include "src/snapshot/startup-serializer.h" @@ -414,6 +415,24 @@ static void PartiallySerializeContext(Vector* startup_blob_out, ReadOnlyHeap::ClearSharedHeapForTest(); } +UNINITIALIZED_TEST(SnapshotCompression) { + DisableAlwaysOpt(); + Vector startup_blob; + Vector read_only_blob; + Vector partial_blob; + PartiallySerializeContext(&startup_blob, &read_only_blob, &partial_blob); + SnapshotData original_snapshot_data(partial_blob); + SnapshotData compressed = + i::SnapshotCompression::Compress(&original_snapshot_data); + SnapshotData decompressed = + i::SnapshotCompression::Decompress(compressed.RawData()); + CHECK_EQ(partial_blob, decompressed.RawData()); + + startup_blob.Dispose(); + read_only_blob.Dispose(); + partial_blob.Dispose(); +} + UNINITIALIZED_TEST(PartialSerializerContext) { DisableAlwaysOpt(); Vector startup_blob; @@ -2455,8 +2474,10 @@ TEST(CodeSerializerBitFlip) { const char* source = "function f() { return 'abc'; }; f() + 'def'"; v8::ScriptCompiler::CachedData* cache = CompileRunAndProduceCache(source); - // Random bit flip. - const_cast(cache->data)[337] ^= 0x40; + // Arbitrary bit flip. + int arbitrary_spot = 337; + CHECK_LT(arbitrary_spot, cache->length); + const_cast(cache->data)[arbitrary_spot] ^= 0x40; v8::Isolate::CreateParams create_params; create_params.array_buffer_allocator = CcTest::array_buffer_allocator();