From 83e2bce9293185f568746598ccefaab62bc84820 Mon Sep 17 00:00:00 2001
From: Seth Brenith <seth.brenith@microsoft.com>
Date: Wed, 25 Jan 2023 16:38:56 -0800
Subject: [PATCH] Smaller MATCH_PREVIOUS_TRANSLATION instructions in
 TranslationArrays

In translation arrays, the most common opcode is
MATCH_PREVIOUS_TRANSLATION. It is usually represented as two bytes: one
byte for the opcode, and a second byte for how many instructions to
match. In rare cases, it could extend to a third byte or further due to
the variable-length encoding of the operand.

In this change, I propose a more compact encoding for
MATCH_PREVIOUS_TRANSLATION instructions. The encoding described above is
still valid, but the decoder will also look for another option: if the
opcode byte's value is greater than any real opcode, then the opcode is
implicitly MATCH_PREVIOUS_TRANSLATION and its operand is equal to the
opcode byte minus kNumTranslationOpcodes.

This change saves about 10% of the total TranslationArray size in an
Octane run (130 kB). I don't see any speed changes in encoding
(based on V8.TFCodeGeneration) or decoding (based on
js-perf-test/StackTrace).

I recognize that we're reaching the point where continuing to fiddle
with TranslationArray encoding yields diminishing returns, but the
complexity introduced by this change is well encapsulated (within two
functions in a single .cc file), so I think it's worth doing. I don't
plan any further changes.

Another option I considered for packing data into the opcode byte is
at https://crrev.com/c/4190521 . Its benefit is greater than this
change, but its complexity is too, especially in the decoder.

Bug: v8:11354
Change-Id: I02fd4dc5f631e54f7a7acc483fbe82ceb5a9ccf9
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4190523
Commit-Queue: Seth Brenith <seth.brenith@microsoft.com>
Reviewed-by: Nico Hartmann <nicohartmann@chromium.org>
Cr-Commit-Position: refs/heads/main@{#85500}
---
 src/deoptimizer/translation-array.cc | 40 +++++++++++++++++++++++-----
 1 file changed, 34 insertions(+), 6 deletions(-)

diff --git a/src/deoptimizer/translation-array.cc b/src/deoptimizer/translation-array.cc
index bc3dc04052..14def26df2 100644
--- a/src/deoptimizer/translation-array.cc
+++ b/src/deoptimizer/translation-array.cc
@@ -112,8 +112,24 @@ TranslationOpcode TranslationArrayIterator::NextOpcode() {
   if (remaining_ops_to_use_from_previous_translation_) {
     return NextOpcodeAtPreviousIndex();
   }
-  TranslationOpcode opcode =
-      static_cast<TranslationOpcode>(buffer_.get(index_++));
+  uint8_t opcode_byte = buffer_.get(index_++);
+
+  // If the opcode byte is greater than any valid opcode, then the opcode is
+  // implicitly MATCH_PREVIOUS_TRANSLATION and the operand is the opcode byte
+  // minus kNumTranslationOpcodes. This special-case encoding of the most common
+  // opcode saves some memory.
+  if (opcode_byte >= kNumTranslationOpcodes) {
+    remaining_ops_to_use_from_previous_translation_ =
+        opcode_byte - kNumTranslationOpcodes;
+    opcode_byte =
+        static_cast<uint8_t>(TranslationOpcode::MATCH_PREVIOUS_TRANSLATION);
+  } else if (opcode_byte ==
+             static_cast<uint8_t>(
+                 TranslationOpcode::MATCH_PREVIOUS_TRANSLATION)) {
+    remaining_ops_to_use_from_previous_translation_ = NextOperandUnsigned();
+  }
+
+  TranslationOpcode opcode = static_cast<TranslationOpcode>(opcode_byte);
   DCHECK_LE(index_, buffer_.length());
   DCHECK_LT(static_cast<uint32_t>(opcode), kNumTranslationOpcodes);
   if (TranslationOpcodeIsBegin(opcode)) {
@@ -133,7 +149,6 @@ TranslationOpcode TranslationArrayIterator::NextOpcode() {
     }
     ops_since_previous_index_was_updated_ = 1;
   } else if (opcode == TranslationOpcode::MATCH_PREVIOUS_TRANSLATION) {
-    remaining_ops_to_use_from_previous_translation_ = NextOperandUnsigned();
     for (int i = 0; i < ops_since_previous_index_was_updated_; ++i) {
       SkipOpcodeAndItsOperandsAtPreviousIndex();
     }
@@ -283,9 +298,22 @@ void TranslationArrayBuilder::FinishPendingInstructionIfNeeded() {
   if (matching_instructions_count_) {
     total_matching_instructions_in_current_translation_ +=
         matching_instructions_count_;
-    AddRawToContents(
-        TranslationOpcode::MATCH_PREVIOUS_TRANSLATION,
-        UnsignedOperand(static_cast<uint32_t>(matching_instructions_count_)));
+
+    // There is a short form for the MATCH_PREVIOUS_TRANSLATION instruction
+    // because it's the most common opcode: rather than spending a byte on the
+    // opcode and a second byte on the operand, we can use only a single byte
+    // which doesn't match any valid opcode.
+    const int kMaxShortenableOperand =
+        std::numeric_limits<uint8_t>::max() - kNumTranslationOpcodes;
+    if (matching_instructions_count_ <= kMaxShortenableOperand) {
+      contents_.push_back(kNumTranslationOpcodes +
+                          matching_instructions_count_);
+    } else {
+      // The operand didn't fit in the opcode byte, so encode it normally.
+      AddRawToContents(
+          TranslationOpcode::MATCH_PREVIOUS_TRANSLATION,
+          UnsignedOperand(static_cast<uint32_t>(matching_instructions_count_)));
+    }
     matching_instructions_count_ = 0;
   }
 }