Compressing SnapshotData with zlib

This CL introduces, and turns on by default, compression for snapshots.
The compression is handled in SnapshotCompression, which is a new static
helper class for producing/consuming compressed SnapshotData.

To turn off snapshot compression (accomplished by removing calls of
Compress/Decompress) set the GN arg:
v8_disable_snapshot_compression = true


Bug: chromium:833361
Change-Id: If8abc3662e8473fbd0c94e443946fbea804a305e
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1846066
Commit-Queue: Sam Maier <smaier@chromium.org>
Reviewed-by: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#66230}
This commit is contained in:
Sam Maier 2020-02-07 15:22:04 -05:00 committed by Commit Bot
parent 6239ec101a
commit 858e3c7ef7
8 changed files with 233 additions and 27 deletions

View File

@ -211,6 +211,9 @@ declare_args() {
# Enable additional targets necessary for verification of torque
# file generation
v8_verify_torque_generation_invariance = false
# Disable all snapshot compression.
v8_enable_snapshot_compression = true
}
# Derived defaults.
@ -501,6 +504,9 @@ config("features") {
if (v8_enable_regexp_interpreter_threaded_dispatch) {
defines += [ "V8_ENABLE_REGEXP_INTERPRETER_THREADED_DISPATCH" ]
}
if (v8_enable_snapshot_compression) {
defines += [ "V8_SNAPSHOT_COMPRESSION" ]
}
}
config("toolchain") {
@ -2864,6 +2870,8 @@ v8_source_set("v8_base_without_compiler") {
"src/snapshot/serializer.cc",
"src/snapshot/serializer.h",
"src/snapshot/snapshot-common.cc",
"src/snapshot/snapshot-compression.cc",
"src/snapshot/snapshot-compression.h",
"src/snapshot/snapshot-source-sink.cc",
"src/snapshot/snapshot-source-sink.h",
"src/snapshot/snapshot.h",
@ -3363,7 +3371,10 @@ v8_source_set("v8_base_without_compiler") {
]
}
deps += [ "//third_party/zlib" ]
deps += [
"//third_party/zlib",
"//third_party/zlib/google:compression_utils_portable",
]
if (v8_postmortem_support) {
sources += [ "$target_gen_dir/debug-support.cc" ]

View File

@ -2,6 +2,9 @@ specific_include_rules = {
"mksnapshot\.cc": [
"+include/libplatform/libplatform.h",
],
"snapshot-compression.cc": [
"+third_party/zlib",
],
"serializer-common.cc": [
"+third_party/zlib",
],

View File

@ -100,7 +100,6 @@ void SerializedData::AllocateData(uint32_t size) {
data_ = NewArray<byte>(size);
size_ = size;
owns_data_ = true;
DCHECK(IsAligned(reinterpret_cast<intptr_t>(data_), kPointerAlignment));
}
// static
@ -146,7 +145,7 @@ void SerializerDeserializer::RestoreExternalReferenceRedirectors(
}
}
V8_EXPORT_PRIVATE extern uint32_t Checksum(Vector<const byte> payload) {
uint32_t Checksum(Vector<const byte> payload) {
#ifdef MEMORY_SANITIZER
// Computing the checksum includes padding bytes for objects like strings.
// Mark every object as initialized in the code serializer.

View File

@ -14,9 +14,21 @@
#include "src/utils/memcopy.h"
#include "src/utils/version.h"
#ifdef V8_SNAPSHOT_COMPRESSION
#include "src/snapshot/snapshot-compression.h"
#endif
namespace v8 {
namespace internal {
SnapshotData MaybeDecompress(const Vector<const byte>& snapshot_data) {
#ifdef V8_SNAPSHOT_COMPRESSION
return SnapshotCompression::Decompress(snapshot_data);
#else
return SnapshotData(snapshot_data);
#endif
}
#ifdef DEBUG
bool Snapshot::SnapshotIsValid(const v8::StartupData* snapshot_blob) {
return Snapshot::ExtractNumContexts(snapshot_blob) > 0;
@ -43,9 +55,11 @@ bool Snapshot::Initialize(Isolate* isolate) {
CheckVersion(blob);
CHECK(VerifyChecksum(blob));
Vector<const byte> startup_data = ExtractStartupData(blob);
SnapshotData startup_snapshot_data(startup_data);
Vector<const byte> read_only_data = ExtractReadOnlyData(blob);
SnapshotData read_only_snapshot_data(read_only_data);
SnapshotData startup_snapshot_data(MaybeDecompress(startup_data));
SnapshotData read_only_snapshot_data(MaybeDecompress(read_only_data));
StartupDeserializer startup_deserializer(&startup_snapshot_data);
ReadOnlyDeserializer read_only_deserializer(&read_only_snapshot_data);
startup_deserializer.SetRehashability(ExtractRehashability(blob));
@ -73,7 +87,7 @@ MaybeHandle<Context> Snapshot::NewContextFromSnapshot(
bool can_rehash = ExtractRehashability(blob);
Vector<const byte> context_data =
ExtractContextData(blob, static_cast<uint32_t>(context_index));
SnapshotData snapshot_data(context_data);
SnapshotData snapshot_data(MaybeDecompress(context_data));
MaybeHandle<Context> maybe_result = PartialDeserializer::DeserializeContext(
isolate, &snapshot_data, can_rehash, global_proxy,
@ -116,24 +130,49 @@ void ProfileDeserialization(
}
v8::StartupData Snapshot::CreateSnapshotBlob(
const SnapshotData* startup_snapshot,
const SnapshotData* read_only_snapshot,
const std::vector<SnapshotData*>& context_snapshots, bool can_be_rehashed) {
uint32_t num_contexts = static_cast<uint32_t>(context_snapshots.size());
const SnapshotData* startup_snapshot_in,
const SnapshotData* read_only_snapshot_in,
const std::vector<SnapshotData*>& context_snapshots_in,
bool can_be_rehashed) {
// Have these separate from snapshot_in for compression, since we need to
// access the compressed data as well as the uncompressed reservations.
const SnapshotData* startup_snapshot;
const SnapshotData* read_only_snapshot;
const std::vector<SnapshotData*>* context_snapshots;
#ifdef V8_SNAPSHOT_COMPRESSION
SnapshotData startup_compressed(
SnapshotCompression::Compress(startup_snapshot_in));
SnapshotData read_only_compressed(
SnapshotCompression::Compress(read_only_snapshot_in));
startup_snapshot = &startup_compressed;
read_only_snapshot = &read_only_compressed;
std::vector<SnapshotData> context_snapshots_compressed;
context_snapshots_compressed.reserve(context_snapshots_in.size());
std::vector<SnapshotData*> context_snapshots_compressed_ptrs;
for (unsigned int i = 0; i < context_snapshots_in.size(); ++i) {
context_snapshots_compressed.push_back(
SnapshotCompression::Compress(context_snapshots_in[i]));
context_snapshots_compressed_ptrs.push_back(
&context_snapshots_compressed[i]);
}
context_snapshots = &context_snapshots_compressed_ptrs;
#else
startup_snapshot = startup_snapshot_in;
read_only_snapshot = read_only_snapshot_in;
context_snapshots = &context_snapshots_in;
#endif
uint32_t num_contexts = static_cast<uint32_t>(context_snapshots->size());
uint32_t startup_snapshot_offset = StartupSnapshotOffset(num_contexts);
uint32_t total_length = startup_snapshot_offset;
DCHECK(IsAligned(total_length, kPointerAlignment));
total_length += static_cast<uint32_t>(startup_snapshot->RawData().length());
DCHECK(IsAligned(total_length, kPointerAlignment));
total_length += static_cast<uint32_t>(read_only_snapshot->RawData().length());
DCHECK(IsAligned(total_length, kPointerAlignment));
for (const auto context_snapshot : context_snapshots) {
for (const auto context_snapshot : *context_snapshots) {
total_length += static_cast<uint32_t>(context_snapshot->RawData().length());
DCHECK(IsAligned(total_length, kPointerAlignment));
}
ProfileDeserialization(read_only_snapshot, startup_snapshot,
context_snapshots);
ProfileDeserialization(read_only_snapshot_in, startup_snapshot_in,
context_snapshots_in);
char* data = new char[total_length];
// Zero out pre-payload data. Part of that is only used for padding.
@ -157,7 +196,7 @@ v8::StartupData Snapshot::CreateSnapshotBlob(
if (FLAG_profile_deserialization) {
PrintF("Snapshot blob consists of:\n%10d bytes in %d chunks for startup\n",
payload_length,
static_cast<uint32_t>(startup_snapshot->Reservations().size()));
static_cast<uint32_t>(startup_snapshot_in->Reservations().size()));
}
payload_offset += payload_length;
@ -176,15 +215,17 @@ v8::StartupData Snapshot::CreateSnapshotBlob(
// Partial snapshots (context-specific data).
for (uint32_t i = 0; i < num_contexts; i++) {
SetHeaderValue(data, ContextSnapshotOffsetOffset(i), payload_offset);
SnapshotData* context_snapshot = context_snapshots[i];
SnapshotData* context_snapshot = (*context_snapshots)[i];
payload_length = context_snapshot->RawData().length();
CopyBytes(
data + payload_offset,
reinterpret_cast<const char*>(context_snapshot->RawData().begin()),
payload_length);
if (FLAG_profile_deserialization) {
PrintF("%10d bytes in %d chunks for context #%d\n", payload_length,
static_cast<uint32_t>(context_snapshot->Reservations().size()), i);
PrintF(
"%10d bytes in %d chunks for context #%d\n", payload_length,
static_cast<uint32_t>(context_snapshots_in[i]->Reservations().size()),
i);
}
payload_offset += payload_length;
}
@ -310,7 +351,6 @@ SnapshotData::SnapshotData(const Serializer* serializer) {
uint32_t padded_payload_offset = POINTER_SIZE_ALIGN(payload_offset);
uint32_t size =
padded_payload_offset + static_cast<uint32_t>(payload->size());
DCHECK(IsAligned(size, kPointerAlignment));
// Allocate backing store and create result data.
AllocateData(size);

View File

@ -0,0 +1,95 @@
// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/snapshot/snapshot-compression.h"
#include "src/utils/memcopy.h"
#include "third_party/zlib/google/compression_utils_portable.h"
namespace v8 {
namespace internal {
uint32_t GetUncompressedSize(const Bytef* compressed_data) {
uint32_t size;
MemCopy(&size, compressed_data, sizeof(size));
return size;
}
SnapshotData SnapshotCompression::Compress(
const SnapshotData* uncompressed_data) {
SnapshotData snapshot_data;
base::ElapsedTimer timer;
if (FLAG_profile_deserialization) timer.Start();
static_assert(sizeof(Bytef) == 1, "");
const uLongf input_size =
static_cast<uLongf>(uncompressed_data->RawData().size());
uint32_t payload_length =
static_cast<uint32_t>(uncompressed_data->RawData().size());
uLongf compressed_data_size = compressBound(input_size);
// Allocating >= the final amount we will need.
snapshot_data.AllocateData(
static_cast<uint32_t>(sizeof(payload_length) + compressed_data_size));
byte* compressed_data = const_cast<byte*>(snapshot_data.RawData().begin());
// Since we are doing raw compression (no zlib or gzip headers), we need to
// manually store the uncompressed size.
MemCopy(compressed_data, &payload_length, sizeof(payload_length));
CHECK_EQ(zlib_internal::CompressHelper(
zlib_internal::ZRAW, compressed_data + sizeof(payload_length),
&compressed_data_size,
bit_cast<const Bytef*>(uncompressed_data->RawData().begin()),
input_size, Z_DEFAULT_COMPRESSION, nullptr, nullptr),
Z_OK);
// Reallocating to exactly the size we need.
snapshot_data.Resize(static_cast<uint32_t>(compressed_data_size) +
sizeof(payload_length));
DCHECK_EQ(payload_length,
GetUncompressedSize(snapshot_data.RawData().begin()));
if (FLAG_profile_deserialization) {
double ms = timer.Elapsed().InMillisecondsF();
PrintF("[Compressing %d bytes took %0.3f ms]\n", payload_length, ms);
}
return snapshot_data;
}
SnapshotData SnapshotCompression::Decompress(
Vector<const byte> compressed_data) {
SnapshotData snapshot_data;
base::ElapsedTimer timer;
if (FLAG_profile_deserialization) timer.Start();
const Bytef* input_bytef = bit_cast<const Bytef*>(compressed_data.begin());
// Since we are doing raw compression (no zlib or gzip headers), we need to
// manually retrieve the uncompressed size.
uint32_t uncompressed_payload_length = GetUncompressedSize(input_bytef);
input_bytef += sizeof(uncompressed_payload_length);
snapshot_data.AllocateData(uncompressed_payload_length);
uLongf uncompressed_size = uncompressed_payload_length;
CHECK_EQ(zlib_internal::UncompressHelper(
zlib_internal::ZRAW,
bit_cast<Bytef*>(snapshot_data.RawData().begin()),
&uncompressed_size, input_bytef,
static_cast<uLong>(compressed_data.size() -
sizeof(uncompressed_payload_length))),
Z_OK);
if (FLAG_profile_deserialization) {
double ms = timer.Elapsed().InMillisecondsF();
PrintF("[Decompressing %d bytes took %0.3f ms]\n",
uncompressed_payload_length, ms);
}
return snapshot_data;
}
} // namespace internal
} // namespace v8

View File

@ -0,0 +1,27 @@
// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_SNAPSHOT_SNAPSHOT_COMPRESSION_H_
#define V8_SNAPSHOT_SNAPSHOT_COMPRESSION_H_
#include "src/snapshot/serializer-common.h"
#include "src/snapshot/serializer.h"
#include "src/snapshot/snapshot.h"
#include "src/utils/vector.h"
namespace v8 {
namespace internal {
class SnapshotCompression : public AllStatic {
public:
V8_EXPORT_PRIVATE static SnapshotData Compress(
const SnapshotData* uncompressed_data);
V8_EXPORT_PRIVATE static SnapshotData Decompress(
Vector<const byte> compressed_data);
};
} // namespace internal
} // namespace v8
#endif // V8_SNAPSHOT_SNAPSHOT_COMPRESSION_H_

View File

@ -16,6 +16,7 @@ namespace internal {
// Forward declarations.
class Isolate;
class PartialSerializer;
class SnapshotCompression;
class StartupSerializer;
// Wrapper around reservation sizes and the serialization payload.
@ -37,6 +38,15 @@ class V8_EXPORT_PRIVATE SnapshotData : public SerializedData {
}
protected:
// Empty constructor used by SnapshotCompression so it can manually allocate
// memory.
SnapshotData() : SerializedData() {}
friend class SnapshotCompression;
// Resize used by SnapshotCompression so it can shrink the compressed
// SnapshotData.
void Resize(uint32_t size) { size_ = size; }
// The data header consists of uint32_t-sized entries:
// [0] magic number and (internal) external reference count
// [1] number of reservation size entries
@ -77,9 +87,9 @@ class Snapshot : public AllStatic {
// ---------------- Serialization ----------------
static v8::StartupData CreateSnapshotBlob(
const SnapshotData* startup_snapshot,
const SnapshotData* read_only_snapshot,
const std::vector<SnapshotData*>& context_snapshots,
const SnapshotData* startup_snapshot_in,
const SnapshotData* read_only_snapshot_in,
const std::vector<SnapshotData*>& context_snapshots_in,
bool can_be_rehashed);
#ifdef DEBUG

View File

@ -53,6 +53,7 @@
#include "src/snapshot/partial-serializer.h"
#include "src/snapshot/read-only-deserializer.h"
#include "src/snapshot/read-only-serializer.h"
#include "src/snapshot/snapshot-compression.h"
#include "src/snapshot/snapshot.h"
#include "src/snapshot/startup-deserializer.h"
#include "src/snapshot/startup-serializer.h"
@ -414,6 +415,24 @@ static void PartiallySerializeContext(Vector<const byte>* startup_blob_out,
ReadOnlyHeap::ClearSharedHeapForTest();
}
UNINITIALIZED_TEST(SnapshotCompression) {
DisableAlwaysOpt();
Vector<const byte> startup_blob;
Vector<const byte> read_only_blob;
Vector<const byte> partial_blob;
PartiallySerializeContext(&startup_blob, &read_only_blob, &partial_blob);
SnapshotData original_snapshot_data(partial_blob);
SnapshotData compressed =
i::SnapshotCompression::Compress(&original_snapshot_data);
SnapshotData decompressed =
i::SnapshotCompression::Decompress(compressed.RawData());
CHECK_EQ(partial_blob, decompressed.RawData());
startup_blob.Dispose();
read_only_blob.Dispose();
partial_blob.Dispose();
}
UNINITIALIZED_TEST(PartialSerializerContext) {
DisableAlwaysOpt();
Vector<const byte> startup_blob;
@ -2455,8 +2474,10 @@ TEST(CodeSerializerBitFlip) {
const char* source = "function f() { return 'abc'; }; f() + 'def'";
v8::ScriptCompiler::CachedData* cache = CompileRunAndProduceCache(source);
// Random bit flip.
const_cast<uint8_t*>(cache->data)[337] ^= 0x40;
// Arbitrary bit flip.
int arbitrary_spot = 337;
CHECK_LT(arbitrary_spot, cache->length);
const_cast<uint8_t*>(cache->data)[arbitrary_spot] ^= 0x40;
v8::Isolate::CreateParams create_params;
create_params.array_buffer_allocator = CcTest::array_buffer_allocator();