[deoptimizer] Add translation array compression behind a flag

TranslationArrays (TA) are large and rarely used, thus could benefit
from compression. This CL adds a --turbo-compress-translation-arrays
flag (off by default) to experiment with that.

Each optimized Code object has an associated translation array
(Code->DeoptimizationData->TranslationArray). These translation arrays
have roughly the same size as the Code object itself. They are
used only rarely: when deoptimizing, and when traversing the stack and
looking into optimized frames. Neither of these code paths are
especially performance critical. TA's contain only immutable, untagged
data. They are thus good candidates for compression.

The trade-off is between TA memory consumption and time spent
in decompression/compression. This CL keeps everything on the main
thread, but it would also be possible to move compression (the more
expensive operation by a factor of 5 to 10) to a worker thread.

Numbers from a local Octane2 run:

Sum of Code instructions sizes: 4.6MB
Sum of uncompressed TA sizes:   4.1MB
Sum of compressed TA sizes:     0.6MB

Compression times depend on the selected compression quality, but
roughly:

Compression:   50ms (40us avg per compilation)
Decompression:        7us avg per compilation

Drive-by: Translation arrays currently use run-length encoding;
I disabled this for when --turbo-compress-translation-arrays is
enabled (no need to compress twice).

Bug: v8:11354
Change-Id: I7828d7d91eb074816b383b02f883c5d7b7e318b7
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2652497
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Reviewed-by: Michael Stanton <mvstanton@chromium.org>
Cr-Commit-Position: refs/heads/master@{#72533}
This commit is contained in:
Jakob Gruber 2021-02-04 15:21:37 +01:00 committed by Commit Bot
parent 5e80ce590c
commit b62bbd1637
4 changed files with 119 additions and 35 deletions

5
src/deoptimizer/DEPS Normal file
View File

@ -0,0 +1,5 @@
specific_include_rules = {
"translation-array.cc": [
"+third_party/zlib",
],
}

View File

@ -5,60 +5,125 @@
#include "src/deoptimizer/translation-array.h"
#include "src/objects/fixed-array-inl.h"
#include "third_party/zlib/google/compression_utils_portable.h"
namespace v8 {
namespace internal {
namespace {
// Constants describing compressed TranslationArray layout. Only relevant if
// --turbo-compress-translation-arrays is enabled.
constexpr int kUncompressedSizeOffset = 0;
constexpr int kUncompressedSizeSize = kInt32Size;
constexpr int kCompressedDataOffset =
kUncompressedSizeOffset + kUncompressedSizeSize;
constexpr int kTranslationArrayElementSize = kInt32Size;
} // namespace
TranslationArrayIterator::TranslationArrayIterator(TranslationArray buffer,
int index)
: buffer_(buffer), index_(index) {
DCHECK(index >= 0 && index < buffer.length());
if (V8_UNLIKELY(FLAG_turbo_compress_translation_arrays)) {
const int size = buffer_.get_int(kUncompressedSizeOffset);
uncompressed_contents_.insert(uncompressed_contents_.begin(), size, 0);
uLongf uncompressed_size = size * kTranslationArrayElementSize;
CHECK_EQ(
zlib_internal::UncompressHelper(
zlib_internal::ZRAW,
bit_cast<Bytef*>(uncompressed_contents_.data()), &uncompressed_size,
buffer_.GetDataStartAddress() + kCompressedDataOffset,
buffer_.DataSize()),
Z_OK);
DCHECK(index >= 0 && index < size);
} else {
DCHECK(index >= 0 && index < buffer.length());
}
}
int32_t TranslationArrayIterator::Next() {
// Run through the bytes until we reach one with a least significant
// bit of zero (marks the end).
uint32_t bits = 0;
for (int i = 0; true; i += 7) {
DCHECK(HasNext());
uint8_t next = buffer_.get(index_++);
bits |= (next >> 1) << i;
if ((next & 1) == 0) break;
if (V8_UNLIKELY(FLAG_turbo_compress_translation_arrays)) {
return uncompressed_contents_[index_++];
} else {
// Run through the bytes until we reach one with a least significant
// bit of zero (marks the end).
uint32_t bits = 0;
for (int i = 0; true; i += 7) {
DCHECK(HasNext());
uint8_t next = buffer_.get(index_++);
bits |= (next >> 1) << i;
if ((next & 1) == 0) break;
}
// The bits encode the sign in the least significant bit.
bool is_negative = (bits & 1) == 1;
int32_t result = bits >> 1;
return is_negative ? -result : result;
}
// The bits encode the sign in the least significant bit.
bool is_negative = (bits & 1) == 1;
int32_t result = bits >> 1;
return is_negative ? -result : result;
}
bool TranslationArrayIterator::HasNext() const {
return index_ < buffer_.length();
if (V8_UNLIKELY(FLAG_turbo_compress_translation_arrays)) {
return index_ < static_cast<int>(uncompressed_contents_.size());
} else {
return index_ < buffer_.length();
}
}
void TranslationArrayBuilder::Add(int32_t value) {
// This wouldn't handle kMinInt correctly if it ever encountered it.
DCHECK_NE(value, kMinInt);
// Encode the sign bit in the least significant bit.
bool is_negative = (value < 0);
uint32_t bits = (static_cast<uint32_t>(is_negative ? -value : value) << 1) |
static_cast<uint32_t>(is_negative);
// Encode the individual bytes using the least significant bit of
// each byte to indicate whether or not more bytes follow.
do {
uint32_t next = bits >> 7;
contents_.push_back(((bits << 1) & 0xFF) | (next != 0));
bits = next;
} while (bits != 0);
if (V8_UNLIKELY(FLAG_turbo_compress_translation_arrays)) {
contents_for_compression_.push_back(value);
} else {
// This wouldn't handle kMinInt correctly if it ever encountered it.
DCHECK_NE(value, kMinInt);
// Encode the sign bit in the least significant bit.
bool is_negative = (value < 0);
uint32_t bits = (static_cast<uint32_t>(is_negative ? -value : value) << 1) |
static_cast<uint32_t>(is_negative);
// Encode the individual bytes using the least significant bit of
// each byte to indicate whether or not more bytes follow.
do {
uint32_t next = bits >> 7;
contents_.push_back(((bits << 1) & 0xFF) | (next != 0));
bits = next;
} while (bits != 0);
}
}
Handle<TranslationArray> TranslationArrayBuilder::ToTranslationArray(
Factory* factory) {
Handle<TranslationArray> result =
factory->NewByteArray(Size(), AllocationType::kOld);
if (V8_UNLIKELY(FLAG_turbo_compress_translation_arrays)) {
const int input_size = SizeInBytes();
uLongf compressed_data_size = compressBound(input_size);
memcpy(result->GetDataStartAddress(), contents_.data(),
contents_.size() * sizeof(uint8_t));
return result;
ZoneVector<byte> compressed_data(compressed_data_size, zone());
CHECK_EQ(
zlib_internal::CompressHelper(
zlib_internal::ZRAW, compressed_data.data(), &compressed_data_size,
bit_cast<const Bytef*>(contents_for_compression_.data()),
input_size, Z_DEFAULT_COMPRESSION, nullptr, nullptr),
Z_OK);
const int translation_array_size =
static_cast<int>(compressed_data_size) + kUncompressedSizeSize;
Handle<TranslationArray> result =
factory->NewByteArray(translation_array_size, AllocationType::kOld);
result->set_int(kUncompressedSizeOffset, Size());
std::memcpy(result->GetDataStartAddress() + kCompressedDataOffset,
compressed_data.data(), compressed_data_size);
return result;
} else {
Handle<TranslationArray> result =
factory->NewByteArray(SizeInBytes(), AllocationType::kOld);
memcpy(result->GetDataStartAddress(), contents_.data(),
contents_.size() * sizeof(uint8_t));
return result;
}
}
void TranslationArrayBuilder::BeginBuiltinContinuationFrame(

View File

@ -36,15 +36,15 @@ class TranslationArrayIterator {
}
private:
std::vector<int32_t> uncompressed_contents_;
TranslationArray buffer_;
int index_;
};
class TranslationArrayBuilder {
public:
explicit TranslationArrayBuilder(Zone* zone) : contents_(zone), zone_(zone) {}
int Size() const { return static_cast<int>(contents_.size()); }
explicit TranslationArrayBuilder(Zone* zone)
: contents_(zone), contents_for_compression_(zone), zone_(zone) {}
Handle<TranslationArray> ToTranslationArray(Factory* factory);
@ -101,9 +101,21 @@ class TranslationArrayBuilder {
void Add(int32_t value);
void Add(TranslationOpcode opcode) { Add(static_cast<int32_t>(opcode)); }
int Size() const {
return V8_UNLIKELY(FLAG_turbo_compress_translation_arrays)
? static_cast<int>(contents_for_compression_.size())
: static_cast<int>(contents_.size());
}
int SizeInBytes() const {
return V8_UNLIKELY(FLAG_turbo_compress_translation_arrays)
? Size() * kInt32Size
: Size();
}
Zone* zone() const { return zone_; }
ZoneVector<uint8_t> contents_;
ZoneVector<int32_t> contents_for_compression_;
Zone* const zone_;
};

View File

@ -744,6 +744,8 @@ DEFINE_INT(reuse_opt_code_count, 0,
DEFINE_BOOL(turbo_dynamic_map_checks, true,
"use dynamic map checks when generating code for property accesses "
"if all handlers in an IC are the same for turboprop and NCI")
DEFINE_BOOL(turbo_compress_translation_arrays, false,
"compress translation arrays (experimental)")
// Native context independent (NCI) code.
DEFINE_BOOL(turbo_nci, false,