From 83e2bce9293185f568746598ccefaab62bc84820 Mon Sep 17 00:00:00 2001 From: Seth Brenith Date: Wed, 25 Jan 2023 16:38:56 -0800 Subject: [PATCH] Smaller MATCH_PREVIOUS_TRANSLATION instructions in TranslationArrays In translation arrays, the most common opcode is MATCH_PREVIOUS_TRANSLATION. It is usually represented as two bytes: one byte for the opcode, and a second byte for how many instructions to match. In rare cases, it could extend to a third byte or further due to the variable-length encoding of the operand. In this change, I propose a more compact encoding for MATCH_PREVIOUS_TRANSLATION instructions. The encoding described above is still valid, but the decoder will also look for another option: if the opcode byte's value is greater than any real opcode, then the opcode is implicitly MATCH_PREVIOUS_TRANSLATION and its operand is equal to the opcode byte minus kNumTranslationOpcodes. This change saves about 10% of the total TranslationArray size in an Octane run (130 kB). I don't see any speed changes in encoding (based on V8.TFCodeGeneration) or decoding (based on js-perf-test/StackTrace). I recognize that we're reaching the point where continuing to fiddle with TranslationArray encoding yields diminishing returns, but the complexity introduced by this change is well encapsulated (within two functions in a single .cc file), so I think it's worth doing. I don't plan any further changes. Another option I considered for packing data into the opcode byte is at https://crrev.com/c/4190521 . Its benefit is greater than this change, but its complexity is too, especially in the decoder. Bug: v8:11354 Change-Id: I02fd4dc5f631e54f7a7acc483fbe82ceb5a9ccf9 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4190523 Commit-Queue: Seth Brenith Reviewed-by: Nico Hartmann Cr-Commit-Position: refs/heads/main@{#85500} --- src/deoptimizer/translation-array.cc | 40 +++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/src/deoptimizer/translation-array.cc b/src/deoptimizer/translation-array.cc index bc3dc04052..14def26df2 100644 --- a/src/deoptimizer/translation-array.cc +++ b/src/deoptimizer/translation-array.cc @@ -112,8 +112,24 @@ TranslationOpcode TranslationArrayIterator::NextOpcode() { if (remaining_ops_to_use_from_previous_translation_) { return NextOpcodeAtPreviousIndex(); } - TranslationOpcode opcode = - static_cast(buffer_.get(index_++)); + uint8_t opcode_byte = buffer_.get(index_++); + + // If the opcode byte is greater than any valid opcode, then the opcode is + // implicitly MATCH_PREVIOUS_TRANSLATION and the operand is the opcode byte + // minus kNumTranslationOpcodes. This special-case encoding of the most common + // opcode saves some memory. + if (opcode_byte >= kNumTranslationOpcodes) { + remaining_ops_to_use_from_previous_translation_ = + opcode_byte - kNumTranslationOpcodes; + opcode_byte = + static_cast(TranslationOpcode::MATCH_PREVIOUS_TRANSLATION); + } else if (opcode_byte == + static_cast( + TranslationOpcode::MATCH_PREVIOUS_TRANSLATION)) { + remaining_ops_to_use_from_previous_translation_ = NextOperandUnsigned(); + } + + TranslationOpcode opcode = static_cast(opcode_byte); DCHECK_LE(index_, buffer_.length()); DCHECK_LT(static_cast(opcode), kNumTranslationOpcodes); if (TranslationOpcodeIsBegin(opcode)) { @@ -133,7 +149,6 @@ TranslationOpcode TranslationArrayIterator::NextOpcode() { } ops_since_previous_index_was_updated_ = 1; } else if (opcode == TranslationOpcode::MATCH_PREVIOUS_TRANSLATION) { - remaining_ops_to_use_from_previous_translation_ = NextOperandUnsigned(); for (int i = 0; i < ops_since_previous_index_was_updated_; ++i) { SkipOpcodeAndItsOperandsAtPreviousIndex(); } @@ -283,9 +298,22 @@ void TranslationArrayBuilder::FinishPendingInstructionIfNeeded() { if (matching_instructions_count_) { total_matching_instructions_in_current_translation_ += matching_instructions_count_; - AddRawToContents( - TranslationOpcode::MATCH_PREVIOUS_TRANSLATION, - UnsignedOperand(static_cast(matching_instructions_count_))); + + // There is a short form for the MATCH_PREVIOUS_TRANSLATION instruction + // because it's the most common opcode: rather than spending a byte on the + // opcode and a second byte on the operand, we can use only a single byte + // which doesn't match any valid opcode. + const int kMaxShortenableOperand = + std::numeric_limits::max() - kNumTranslationOpcodes; + if (matching_instructions_count_ <= kMaxShortenableOperand) { + contents_.push_back(kNumTranslationOpcodes + + matching_instructions_count_); + } else { + // The operand didn't fit in the opcode byte, so encode it normally. + AddRawToContents( + TranslationOpcode::MATCH_PREVIOUS_TRANSLATION, + UnsignedOperand(static_cast(matching_instructions_count_))); + } matching_instructions_count_ = 0; } }