[deoptimizer] Add translation array compression behind a flag
TranslationArrays (TA) are large and rarely used, thus could benefit from compression. This CL adds a --turbo-compress-translation-arrays flag (off by default) to experiment with that. Each optimized Code object has an associated translation array (Code->DeoptimizationData->TranslationArray). These translation arrays have roughly the same size as the Code object itself. They are used only rarely: when deoptimizing, and when traversing the stack and looking into optimized frames. Neither of these code paths are especially performance critical. TA's contain only immutable, untagged data. They are thus good candidates for compression. The trade-off is between TA memory consumption and time spent in decompression/compression. This CL keeps everything on the main thread, but it would also be possible to move compression (the more expensive operation by a factor of 5 to 10) to a worker thread. Numbers from a local Octane2 run: Sum of Code instructions sizes: 4.6MB Sum of uncompressed TA sizes: 4.1MB Sum of compressed TA sizes: 0.6MB Compression times depend on the selected compression quality, but roughly: Compression: 50ms (40us avg per compilation) Decompression: 7us avg per compilation Drive-by: Translation arrays currently use run-length encoding; I disabled this for when --turbo-compress-translation-arrays is enabled (no need to compress twice). Bug: v8:11354 Change-Id: I7828d7d91eb074816b383b02f883c5d7b7e318b7 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2652497 Commit-Queue: Jakob Gruber <jgruber@chromium.org> Reviewed-by: Michael Stanton <mvstanton@chromium.org> Cr-Commit-Position: refs/heads/master@{#72533}
This commit is contained in:
parent
5e80ce590c
commit
b62bbd1637
5
src/deoptimizer/DEPS
Normal file
5
src/deoptimizer/DEPS
Normal file
@ -0,0 +1,5 @@
|
||||
specific_include_rules = {
|
||||
"translation-array.cc": [
|
||||
"+third_party/zlib",
|
||||
],
|
||||
}
|
@ -5,60 +5,125 @@
|
||||
#include "src/deoptimizer/translation-array.h"
|
||||
|
||||
#include "src/objects/fixed-array-inl.h"
|
||||
#include "third_party/zlib/google/compression_utils_portable.h"
|
||||
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
|
||||
namespace {
|
||||
|
||||
// Constants describing compressed TranslationArray layout. Only relevant if
|
||||
// --turbo-compress-translation-arrays is enabled.
|
||||
constexpr int kUncompressedSizeOffset = 0;
|
||||
constexpr int kUncompressedSizeSize = kInt32Size;
|
||||
constexpr int kCompressedDataOffset =
|
||||
kUncompressedSizeOffset + kUncompressedSizeSize;
|
||||
constexpr int kTranslationArrayElementSize = kInt32Size;
|
||||
|
||||
} // namespace
|
||||
|
||||
TranslationArrayIterator::TranslationArrayIterator(TranslationArray buffer,
|
||||
int index)
|
||||
: buffer_(buffer), index_(index) {
|
||||
DCHECK(index >= 0 && index < buffer.length());
|
||||
if (V8_UNLIKELY(FLAG_turbo_compress_translation_arrays)) {
|
||||
const int size = buffer_.get_int(kUncompressedSizeOffset);
|
||||
uncompressed_contents_.insert(uncompressed_contents_.begin(), size, 0);
|
||||
|
||||
uLongf uncompressed_size = size * kTranslationArrayElementSize;
|
||||
|
||||
CHECK_EQ(
|
||||
zlib_internal::UncompressHelper(
|
||||
zlib_internal::ZRAW,
|
||||
bit_cast<Bytef*>(uncompressed_contents_.data()), &uncompressed_size,
|
||||
buffer_.GetDataStartAddress() + kCompressedDataOffset,
|
||||
buffer_.DataSize()),
|
||||
Z_OK);
|
||||
DCHECK(index >= 0 && index < size);
|
||||
} else {
|
||||
DCHECK(index >= 0 && index < buffer.length());
|
||||
}
|
||||
}
|
||||
|
||||
int32_t TranslationArrayIterator::Next() {
|
||||
// Run through the bytes until we reach one with a least significant
|
||||
// bit of zero (marks the end).
|
||||
uint32_t bits = 0;
|
||||
for (int i = 0; true; i += 7) {
|
||||
DCHECK(HasNext());
|
||||
uint8_t next = buffer_.get(index_++);
|
||||
bits |= (next >> 1) << i;
|
||||
if ((next & 1) == 0) break;
|
||||
if (V8_UNLIKELY(FLAG_turbo_compress_translation_arrays)) {
|
||||
return uncompressed_contents_[index_++];
|
||||
} else {
|
||||
// Run through the bytes until we reach one with a least significant
|
||||
// bit of zero (marks the end).
|
||||
uint32_t bits = 0;
|
||||
for (int i = 0; true; i += 7) {
|
||||
DCHECK(HasNext());
|
||||
uint8_t next = buffer_.get(index_++);
|
||||
bits |= (next >> 1) << i;
|
||||
if ((next & 1) == 0) break;
|
||||
}
|
||||
// The bits encode the sign in the least significant bit.
|
||||
bool is_negative = (bits & 1) == 1;
|
||||
int32_t result = bits >> 1;
|
||||
return is_negative ? -result : result;
|
||||
}
|
||||
// The bits encode the sign in the least significant bit.
|
||||
bool is_negative = (bits & 1) == 1;
|
||||
int32_t result = bits >> 1;
|
||||
return is_negative ? -result : result;
|
||||
}
|
||||
|
||||
bool TranslationArrayIterator::HasNext() const {
|
||||
return index_ < buffer_.length();
|
||||
if (V8_UNLIKELY(FLAG_turbo_compress_translation_arrays)) {
|
||||
return index_ < static_cast<int>(uncompressed_contents_.size());
|
||||
} else {
|
||||
return index_ < buffer_.length();
|
||||
}
|
||||
}
|
||||
|
||||
void TranslationArrayBuilder::Add(int32_t value) {
|
||||
// This wouldn't handle kMinInt correctly if it ever encountered it.
|
||||
DCHECK_NE(value, kMinInt);
|
||||
// Encode the sign bit in the least significant bit.
|
||||
bool is_negative = (value < 0);
|
||||
uint32_t bits = (static_cast<uint32_t>(is_negative ? -value : value) << 1) |
|
||||
static_cast<uint32_t>(is_negative);
|
||||
// Encode the individual bytes using the least significant bit of
|
||||
// each byte to indicate whether or not more bytes follow.
|
||||
do {
|
||||
uint32_t next = bits >> 7;
|
||||
contents_.push_back(((bits << 1) & 0xFF) | (next != 0));
|
||||
bits = next;
|
||||
} while (bits != 0);
|
||||
if (V8_UNLIKELY(FLAG_turbo_compress_translation_arrays)) {
|
||||
contents_for_compression_.push_back(value);
|
||||
} else {
|
||||
// This wouldn't handle kMinInt correctly if it ever encountered it.
|
||||
DCHECK_NE(value, kMinInt);
|
||||
// Encode the sign bit in the least significant bit.
|
||||
bool is_negative = (value < 0);
|
||||
uint32_t bits = (static_cast<uint32_t>(is_negative ? -value : value) << 1) |
|
||||
static_cast<uint32_t>(is_negative);
|
||||
// Encode the individual bytes using the least significant bit of
|
||||
// each byte to indicate whether or not more bytes follow.
|
||||
do {
|
||||
uint32_t next = bits >> 7;
|
||||
contents_.push_back(((bits << 1) & 0xFF) | (next != 0));
|
||||
bits = next;
|
||||
} while (bits != 0);
|
||||
}
|
||||
}
|
||||
|
||||
Handle<TranslationArray> TranslationArrayBuilder::ToTranslationArray(
|
||||
Factory* factory) {
|
||||
Handle<TranslationArray> result =
|
||||
factory->NewByteArray(Size(), AllocationType::kOld);
|
||||
if (V8_UNLIKELY(FLAG_turbo_compress_translation_arrays)) {
|
||||
const int input_size = SizeInBytes();
|
||||
uLongf compressed_data_size = compressBound(input_size);
|
||||
|
||||
memcpy(result->GetDataStartAddress(), contents_.data(),
|
||||
contents_.size() * sizeof(uint8_t));
|
||||
return result;
|
||||
ZoneVector<byte> compressed_data(compressed_data_size, zone());
|
||||
|
||||
CHECK_EQ(
|
||||
zlib_internal::CompressHelper(
|
||||
zlib_internal::ZRAW, compressed_data.data(), &compressed_data_size,
|
||||
bit_cast<const Bytef*>(contents_for_compression_.data()),
|
||||
input_size, Z_DEFAULT_COMPRESSION, nullptr, nullptr),
|
||||
Z_OK);
|
||||
|
||||
const int translation_array_size =
|
||||
static_cast<int>(compressed_data_size) + kUncompressedSizeSize;
|
||||
Handle<TranslationArray> result =
|
||||
factory->NewByteArray(translation_array_size, AllocationType::kOld);
|
||||
|
||||
result->set_int(kUncompressedSizeOffset, Size());
|
||||
std::memcpy(result->GetDataStartAddress() + kCompressedDataOffset,
|
||||
compressed_data.data(), compressed_data_size);
|
||||
|
||||
return result;
|
||||
} else {
|
||||
Handle<TranslationArray> result =
|
||||
factory->NewByteArray(SizeInBytes(), AllocationType::kOld);
|
||||
memcpy(result->GetDataStartAddress(), contents_.data(),
|
||||
contents_.size() * sizeof(uint8_t));
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
void TranslationArrayBuilder::BeginBuiltinContinuationFrame(
|
||||
|
@ -36,15 +36,15 @@ class TranslationArrayIterator {
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<int32_t> uncompressed_contents_;
|
||||
TranslationArray buffer_;
|
||||
int index_;
|
||||
};
|
||||
|
||||
class TranslationArrayBuilder {
|
||||
public:
|
||||
explicit TranslationArrayBuilder(Zone* zone) : contents_(zone), zone_(zone) {}
|
||||
|
||||
int Size() const { return static_cast<int>(contents_.size()); }
|
||||
explicit TranslationArrayBuilder(Zone* zone)
|
||||
: contents_(zone), contents_for_compression_(zone), zone_(zone) {}
|
||||
|
||||
Handle<TranslationArray> ToTranslationArray(Factory* factory);
|
||||
|
||||
@ -101,9 +101,21 @@ class TranslationArrayBuilder {
|
||||
void Add(int32_t value);
|
||||
void Add(TranslationOpcode opcode) { Add(static_cast<int32_t>(opcode)); }
|
||||
|
||||
int Size() const {
|
||||
return V8_UNLIKELY(FLAG_turbo_compress_translation_arrays)
|
||||
? static_cast<int>(contents_for_compression_.size())
|
||||
: static_cast<int>(contents_.size());
|
||||
}
|
||||
int SizeInBytes() const {
|
||||
return V8_UNLIKELY(FLAG_turbo_compress_translation_arrays)
|
||||
? Size() * kInt32Size
|
||||
: Size();
|
||||
}
|
||||
|
||||
Zone* zone() const { return zone_; }
|
||||
|
||||
ZoneVector<uint8_t> contents_;
|
||||
ZoneVector<int32_t> contents_for_compression_;
|
||||
Zone* const zone_;
|
||||
};
|
||||
|
||||
|
@ -744,6 +744,8 @@ DEFINE_INT(reuse_opt_code_count, 0,
|
||||
DEFINE_BOOL(turbo_dynamic_map_checks, true,
|
||||
"use dynamic map checks when generating code for property accesses "
|
||||
"if all handlers in an IC are the same for turboprop and NCI")
|
||||
DEFINE_BOOL(turbo_compress_translation_arrays, false,
|
||||
"compress translation arrays (experimental)")
|
||||
|
||||
// Native context independent (NCI) code.
|
||||
DEFINE_BOOL(turbo_nci, false,
|
||||
|
Loading…
Reference in New Issue
Block a user