Revert "[wasm-simd][arm] Prototype i64x2.bitmask"

This reverts commit 21e479441e. Reason for revert: Broke build https://ci.chromium.org/p/v8/builders/ci/V8%20Arm%20-%20debug/16872? Original change's description: > [wasm-simd][arm] Prototype i64x2.bitmask > > Cleanup to simulator to remove repetitive logic to get instruction > fields. > > Bug: v8:10997 > Change-Id: I01f0b99f85788b41e4cab505fc94362d637c396f > Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2554256 > Commit-Queue: Zhi An Ng <zhin@chromium.org> > Reviewed-by: Jakob Kummerow <jkummerow@chromium.org> > Reviewed-by: Bill Budge <bbudge@chromium.org> > Cr-Commit-Position: refs/heads/master@{#71391} TBR=bbudge@chromium.org,jkummerow@chromium.org,v8-arm-ports@googlegroups.com,zhin@chromium.org Change-Id: I15513cc5923db7d189d08a9faf5051e57af7190b No-Presubmit: true No-Tree-Checks: true No-Try: true Bug: v8:10997 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2558260 Reviewed-by: Zhi An Ng <zhin@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#71393}
2020-11-25 04:55:41 +00:00 · 2020-11-25 04:55:41 +00:00 · 063ee2349a
commit 063ee2349a
parent 62f64204d6
11 changed files with 56 additions and 203 deletions
--- a/src/codegen/arm/assembler-arm.cc
+++ b/src/codegen/arm/assembler-arm.cc
@ -3776,7 +3776,6 @@ void Assembler::vqmovn(NeonDataType dst_dt, NeonDataType src_dt,
  int vm, m;
  src.split_code(&vm, &m);
  int size = NeonSz(dst_dt);
-  DCHECK_NE(3, size);
  int op = NeonU(src_dt) ? 0b11 : NeonU(dst_dt) ? 0b01 : 0b10;
  emit(0x1E7U * B23 | d * B22 | 0x3 * B20 | size * B18 | 0x2 * B16 | vd * B12 |
       0x2 * B8 | op * B6 | m * B5 | vm);
@ -4438,7 +4437,7 @@ void Assembler::vmax(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
  emit(EncodeNeonBinOp(VMAX, dt, dst, src1, src2));
 }

-enum NeonShiftOp { VSHL, VSHR, VSLI, VSRI, VSRA };
+enum NeonShiftOp { VSHL, VSHR, VSLI, VSRI };

 static Instr EncodeNeonShiftRegisterOp(NeonShiftOp op, NeonDataType dt,
                                       NeonRegType reg_type, int dst_code,
@ -4488,13 +4487,6 @@ static Instr EncodeNeonShiftOp(NeonShiftOp op, NeonSize size, bool is_unsigned,
      op_encoding = B24 | 0x4 * B8;
      break;
    }
-    case VSRA: {
-      DCHECK(shift > 0 && size_in_bits >= shift);
-      imm6 = 2 * size_in_bits - shift;
-      op_encoding = B8;
-      if (is_unsigned) op_encoding |= B24;
-      break;
-    }
    default:
      UNREACHABLE();
  }
@ -4529,19 +4521,10 @@ void Assembler::vshl(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src,
                                 shift.code()));
 }

-void Assembler::vshr(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src,
-                     int shift) {
-  DCHECK(IsEnabled(NEON));
-  // Dd = vshr(Dm, bits) SIMD shift right immediate.
-  // Instruction details available in ARM DDI 0406C.b, A8-1052.
-  emit(EncodeNeonShiftOp(VSHR, NeonDataTypeToSize(dt), NeonU(dt), NEON_D,
-                         dst.code(), src.code(), shift));
-}
-
 void Assembler::vshr(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src,
                     int shift) {
  DCHECK(IsEnabled(NEON));
-  // Qd = vshr(Qm, bits) SIMD shift right immediate.
+  // Qd = vshl(Qm, bits) SIMD shift right immediate.
  // Instruction details available in ARM DDI 0406C.b, A8-1052.
  emit(EncodeNeonShiftOp(VSHR, NeonDataTypeToSize(dt), NeonU(dt), NEON_Q,
                         dst.code(), src.code(), shift));
@ -4565,15 +4548,6 @@ void Assembler::vsri(NeonSize size, DwVfpRegister dst, DwVfpRegister src,
                         shift));
 }

-void Assembler::vsra(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src,
-                     int imm) {
-  DCHECK(IsEnabled(NEON));
-  // Dd = vsra(Dm, imm) SIMD shift right and accumulate.
-  // Instruction details available in ARM DDI 0487F.b, F6-5569.
-  emit(EncodeNeonShiftOp(VSRA, NeonDataTypeToSize(dt), NeonU(dt), NEON_D,
-                         dst.code(), src.code(), imm));
-}
-
 static Instr EncodeNeonEstimateOp(bool is_rsqrt, QwNeonRegister dst,
                                  QwNeonRegister src) {
  int vd, d;
--- a/src/codegen/arm/assembler-arm.h
+++ b/src/codegen/arm/assembler-arm.h
@ -926,12 +926,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
  void vshl(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src, int shift);
  void vshl(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src,
            QwNeonRegister shift);
-  void vshr(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src, int shift);
  void vshr(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src, int shift);
  void vsli(NeonSize size, DwVfpRegister dst, DwVfpRegister src, int shift);
  void vsri(NeonSize size, DwVfpRegister dst, DwVfpRegister src, int shift);
-  void vsra(NeonDataType size, DwVfpRegister dst, DwVfpRegister src, int imm);
-
  // vrecpe and vrsqrte only support floating point lanes.
  void vrecpe(QwNeonRegister dst, QwNeonRegister src);
  void vrsqrte(QwNeonRegister dst, QwNeonRegister src);
--- a/src/compiler/backend/arm/code-generator-arm.cc
+++ b/src/compiler/backend/arm/code-generator-arm.cc
@ -2167,19 +2167,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
      ASSEMBLE_SIMD_SHIFT_RIGHT(vshr, 6, Neon32, NeonU64);
      break;
    }
-    case kArmI64x2BitMask: {
-      UseScratchRegisterScope temps(tasm());
-      Register dst = i.OutputRegister();
-      Simd128Register src = i.InputSimd128Register(0);
-      QwNeonRegister tmp1 = temps.AcquireQ();
-      Register tmp = temps.Acquire();
-
-      __ vshr(NeonU64, tmp1, src, 63);
-      __ vmov(NeonU32, dst, tmp1.low(), 0);
-      __ vmov(NeonU32, tmp, tmp1.high(), 0);
-      __ add(dst, dst, Operand(tmp, LSL, 1));
-      break;
-    }
    case kArmF32x4Splat: {
      int src_code = i.InputFloatRegister(0).code();
      __ vdup(Neon32, i.OutputSimd128Register(),
--- a/src/compiler/backend/arm/instruction-codes-arm.h
+++ b/src/compiler/backend/arm/instruction-codes-arm.h
@ -182,7 +182,6 @@ namespace compiler {
  V(ArmI64x2Sub)                   \
  V(ArmI64x2Mul)                   \
  V(ArmI64x2ShrU)                  \
-  V(ArmI64x2BitMask)               \
  V(ArmI32x4Splat)                 \
  V(ArmI32x4ExtractLane)           \
  V(ArmI32x4ReplaceLane)           \
--- a/src/compiler/backend/arm/instruction-scheduler-arm.cc
+++ b/src/compiler/backend/arm/instruction-scheduler-arm.cc
@ -162,7 +162,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
    case kArmI64x2Sub:
    case kArmI64x2Mul:
    case kArmI64x2ShrU:
-    case kArmI64x2BitMask:
    case kArmI32x4Splat:
    case kArmI32x4ExtractLane:
    case kArmI32x4ReplaceLane:
--- a/src/compiler/backend/arm/instruction-selector-arm.cc
+++ b/src/compiler/backend/arm/instruction-selector-arm.cc
@ -2984,10 +2984,6 @@ void InstructionSelector::VisitI32x4BitMask(Node* node) {
  VisitBitMask<kArmI32x4BitMask>(this, node);
 }

-void InstructionSelector::VisitI64x2BitMask(Node* node) {
-  VisitBitMask<kArmI64x2BitMask>(this, node);
-}
-
 namespace {
 void VisitF32x4PminOrPmax(InstructionSelector* selector, ArchOpcode opcode,
                          Node* node) {
--- a/src/compiler/backend/instruction-selector.cc
+++ b/src/compiler/backend/instruction-selector.cc
@ -2821,10 +2821,10 @@ void InstructionSelector::VisitI32x4SignSelect(Node* node) { UNIMPLEMENTED(); }
 void InstructionSelector::VisitI64x2SignSelect(Node* node) { UNIMPLEMENTED(); }
 #endif  // !V8_TARGET_ARCH_X64

-#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM
+#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64
 // TODO(v8:10997) Prototype i64x2.bitmask.
 void InstructionSelector::VisitI64x2BitMask(Node* node) { UNIMPLEMENTED(); }
-#endif  // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM
+#endif  // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64

 void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }

--- a/src/diagnostics/arm/disasm-arm.cc
+++ b/src/diagnostics/arm/disasm-arm.cc
@ -2153,38 +2153,11 @@ void Decoder::DecodeAdvancedSIMDDataProcessing(Instruction* instr) {
        int imm7 = (l << 6) | instr->Bits(21, 16);
        int size = base::bits::RoundDownToPowerOfTwo32(imm7);
        int shift = 2 * size - imm7;
-        if (q) {
-          int Vd = instr->VFPDRegValue(kSimd128Precision);
-          int Vm = instr->VFPMRegValue(kSimd128Precision);
-          out_buffer_pos_ +=
-              SNPrintF(out_buffer_ + out_buffer_pos_, "vshr.%s%d q%d, q%d, #%d",
-                       u ? "u" : "s", size, Vd, Vm, shift);
-        } else {
-          int Vd = instr->VFPDRegValue(kDoublePrecision);
-          int Vm = instr->VFPMRegValue(kDoublePrecision);
-          out_buffer_pos_ +=
-              SNPrintF(out_buffer_ + out_buffer_pos_, "vshr.%s%d d%d, d%d, #%d",
-                       u ? "u" : "s", size, Vd, Vm, shift);
-        }
-      } else if (imm3H_L != 0 && opc == 1) {
-        // vsra.<type><size> Qd, Qm, shift
-        // vsra.<type><size> Dd, Dm, shift
-        int imm7 = (l << 6) | instr->Bits(21, 16);
-        int size = base::bits::RoundDownToPowerOfTwo32(imm7);
-        int shift = 2 * size - imm7;
-        if (q) {
-          int Vd = instr->VFPDRegValue(kSimd128Precision);
-          int Vm = instr->VFPMRegValue(kSimd128Precision);
-          out_buffer_pos_ +=
-              SNPrintF(out_buffer_ + out_buffer_pos_, "vsra.%s%d q%d, q%d, #%d",
-                       u ? "u" : "s", size, Vd, Vm, shift);
-        } else {
-          int Vd = instr->VFPDRegValue(kDoublePrecision);
-          int Vm = instr->VFPMRegValue(kDoublePrecision);
-          out_buffer_pos_ +=
-              SNPrintF(out_buffer_ + out_buffer_pos_, "vsra.%s%d d%d, d%d, #%d",
-                       u ? "u" : "s", size, Vd, Vm, shift);
-        }
+        int Vd = instr->VFPDRegValue(kSimd128Precision);
+        int Vm = instr->VFPMRegValue(kSimd128Precision);
+        out_buffer_pos_ +=
+            SNPrintF(out_buffer_ + out_buffer_pos_, "vshr.%s%d q%d, q%d, #%d",
+                     u ? "u" : "s", size, Vd, Vm, shift);
      } else if (imm3H_L != 0 && imm3L == 0 && opc == 0b1010 && !q) {
        // vmovl
        if ((instr->VdValue() & 1) != 0) Unknown(instr);
--- a/src/execution/arm/simulator-arm.cc
+++ b/src/execution/arm/simulator-arm.cc
@ -3883,28 +3883,28 @@ void Simulator::DecodeType6CoprocessorIns(Instruction* instr) {

 // Helper functions for implementing NEON ops. Unop applies a unary op to each
 // lane. Binop applies a binary operation to matching input lanes.
-template <typename T, int SIZE = kSimd128Size>
+template <typename T>
 void Unop(Simulator* simulator, int Vd, int Vm, std::function<T(T)> unop) {
-  static const int kLanes = SIZE / sizeof(T);
+  static const int kLanes = 16 / sizeof(T);
  T src[kLanes];
-  simulator->get_neon_register<T, SIZE>(Vm, src);
+  simulator->get_neon_register(Vm, src);
  for (int i = 0; i < kLanes; i++) {
    src[i] = unop(src[i]);
  }
-  simulator->set_neon_register<T, SIZE>(Vd, src);
+  simulator->set_neon_register(Vd, src);
 }

-template <typename T, int SIZE = kSimd128Size>
+template <typename T>
 void Binop(Simulator* simulator, int Vd, int Vm, int Vn,
           std::function<T(T, T)> binop) {
-  static const int kLanes = SIZE / sizeof(T);
+  static const int kLanes = 16 / sizeof(T);
  T src1[kLanes], src2[kLanes];
-  simulator->get_neon_register<T, SIZE>(Vn, src1);
-  simulator->get_neon_register<T, SIZE>(Vm, src2);
+  simulator->get_neon_register(Vn, src1);
+  simulator->get_neon_register(Vm, src2);
  for (int i = 0; i < kLanes; i++) {
    src1[i] = binop(src1[i], src2[i]);
  }
-  simulator->set_neon_register<T, SIZE>(Vd, src1);
+  simulator->set_neon_register(Vd, src1);
 }

 // Templated operations for NEON instructions.
@ -4114,40 +4114,15 @@ void ShiftLeft(Simulator* simulator, int Vd, int Vm, int shift) {
 }

 template <typename T, int SIZE>
-void LogicalShiftRight(Simulator* simulator, int Vd, int Vm, int shift) {
-  Unop<T, SIZE>(simulator, Vd, Vm, [shift](T x) { return x >> shift; });
+void ShiftRight(Simulator* simulator, int Vd, int Vm, int shift) {
+  Unop<T>(simulator, Vd, Vm, [shift](T x) { return x >> shift; });
 }

 template <typename T, int SIZE>
 void ArithmeticShiftRight(Simulator* simulator, int Vd, int Vm, int shift) {
  auto shift_fn =
      std::bind(ArithmeticShiftRight<T>, std::placeholders::_1, shift);
-  Unop<T, SIZE>(simulator, Vd, Vm, shift_fn);
-}
-
-template <typename T, int SIZE>
-void ShiftRight(Simulator* simulator, int Vd, int Vm, int shift,
-                bool is_unsigned) {
-  if (is_unsigned) {
-    using unsigned_T = typename std::make_unsigned<T>::type;
-    LogicalShiftRight<unsigned_T, SIZE>(simulator, Vd, Vm, shift);
-  } else {
-    ArithmeticShiftRight<T, SIZE>(simulator, Vd, Vm, shift);
-  }
-}
-
-template <typename T, int SIZE>
-void ShiftRightAccumulate(Simulator* simulator, int Vd, int Vm, int shift) {
-  Binop<T, SIZE>(simulator, Vd, Vd, Vm,
-                 [shift](T a, T x) { return a + (x >> shift); });
-}
-
-template <typename T, int SIZE>
-void ArithmeticShiftRightAccumulate(Simulator* simulator, int Vd, int Vm,
-                                    int shift) {
-  Binop<T, SIZE>(simulator, Vd, Vd, Vm, [shift](T a, T x) {
-    return a + ArithmeticShiftRight<T>(x, shift);
-  });
+  Unop<T>(simulator, Vd, Vm, shift_fn);
 }

 template <typename T, int SIZE>
@ -4677,8 +4652,8 @@ void Simulator::DecodeAdvancedSIMDTwoOrThreeRegisters(Instruction* instr) {
          }
          break;
        }
-        case Neon64:
-          UNREACHABLE();
+        default:
+          UNIMPLEMENTED();
          break;
      }
    } else if (opc1 == 0b10 && instr->Bit(10) == 1) {
@ -5409,73 +5384,45 @@ void Simulator::DecodeAdvancedSIMDDataProcessing(Instruction* instr) {
      int l = instr->Bit(7);
      int q = instr->Bit(6);
      int imm3H_L = imm3H << 1 | l;
-      int imm7 = instr->Bits(21, 16);
-      imm7 += (l << 6);
-      int size = base::bits::RoundDownToPowerOfTwo32(imm7);
-      NeonSize ns =
-          static_cast<NeonSize>(base::bits::WhichPowerOfTwo(size >> 3));

      if (imm3H_L != 0 && opc == 0) {
-        // vshr.s/u<size> Qd, Qm, shift
+        // vshr.s<size> Qd, Qm, shift
+        int imm7 = instr->Bits(21, 16);
+        if (instr->Bit(7) != 0) imm7 += 64;
+        int size = base::bits::RoundDownToPowerOfTwo32(imm7);
        int shift = 2 * size - imm7;
-        int Vd = instr->VFPDRegValue(q ? kSimd128Precision : kDoublePrecision);
-        int Vm = instr->VFPMRegValue(q ? kSimd128Precision : kDoublePrecision);
-        switch (ns) {
-          case Neon8:
-            q ? ShiftRight<int8_t, kSimd128Size>(this, Vd, Vm, shift, u)
-              : ShiftRight<int8_t, kDoubleSize>(this, Vd, Vm, shift, u);
-            break;
-          case Neon16:
-            q ? ShiftRight<int16_t, kSimd128Size>(this, Vd, Vm, shift, u)
-              : ShiftRight<int16_t, kDoubleSize>(this, Vd, Vm, shift, u);
-            break;
-          case Neon32:
-            q ? ShiftRight<int32_t, kSimd128Size>(this, Vd, Vm, shift, u)
-              : ShiftRight<int32_t, kDoubleSize>(this, Vd, Vm, shift, u);
-            break;
-          case Neon64:
-            q ? ShiftRight<int64_t, kSimd128Size>(this, Vd, Vm, shift, u)
-              : ShiftRight<int64_t, kDoubleSize>(this, Vd, Vm, shift, u);
-            break;
-        }
-      } else if (imm3H_L != 0 && opc == 1) {
-        // vsra Dd, Dm, #imm
-        DCHECK(!q);  // Unimplemented for now.
-        int shift = 2 * size - imm7;
-        int Vd = instr->VFPDRegValue(kDoublePrecision);
-        int Vm = instr->VFPMRegValue(kDoublePrecision);
+        int Vd = instr->VFPDRegValue(kSimd128Precision);
+        int Vm = instr->VFPMRegValue(kSimd128Precision);
+        NeonSize ns =
+            static_cast<NeonSize>(base::bits::WhichPowerOfTwo(size >> 3));
        if (u) {
          switch (ns) {
            case Neon8:
-              ShiftRightAccumulate<uint8_t, kDoubleSize>(this, Vd, Vm, shift);
+              ShiftRight<uint8_t, kSimd128Size>(this, Vd, Vm, shift);
              break;
            case Neon16:
-              ShiftRightAccumulate<uint16_t, kDoubleSize>(this, Vd, Vm, shift);
+              ShiftRight<uint16_t, kSimd128Size>(this, Vd, Vm, shift);
              break;
            case Neon32:
-              ShiftRightAccumulate<uint32_t, kDoubleSize>(this, Vd, Vm, shift);
+              ShiftRight<uint32_t, kSimd128Size>(this, Vd, Vm, shift);
              break;
            case Neon64:
-              ShiftRightAccumulate<uint64_t, kDoubleSize>(this, Vd, Vm, shift);
+              ShiftRight<uint64_t, kSimd128Size>(this, Vd, Vm, shift);
              break;
          }
        } else {
          switch (ns) {
            case Neon8:
-              ArithmeticShiftRightAccumulate<int8_t, kDoubleSize>(this, Vd, Vm,
-                                                                  shift);
+              ArithmeticShiftRight<int8_t, kSimd128Size>(this, Vd, Vm, shift);
              break;
            case Neon16:
-              ArithmeticShiftRightAccumulate<int16_t, kDoubleSize>(this, Vd, Vm,
-                                                                   shift);
+              ArithmeticShiftRight<int16_t, kSimd128Size>(this, Vd, Vm, shift);
              break;
            case Neon32:
-              ArithmeticShiftRightAccumulate<int32_t, kDoubleSize>(this, Vd, Vm,
-                                                                   shift);
+              ArithmeticShiftRight<int32_t, kSimd128Size>(this, Vd, Vm, shift);
              break;
            case Neon64:
-              ArithmeticShiftRightAccumulate<int64_t, kDoubleSize>(this, Vd, Vm,
-                                                                   shift);
+              ArithmeticShiftRight<int64_t, kSimd128Size>(this, Vd, Vm, shift);
              break;
          }
        }
@ -5485,7 +5432,8 @@ void Simulator::DecodeAdvancedSIMDDataProcessing(Instruction* instr) {
          if ((instr->VdValue() & 1) != 0) UNIMPLEMENTED();
          int Vd = instr->VFPDRegValue(kSimd128Precision);
          int Vm = instr->VFPMRegValue(kDoublePrecision);
-          switch (imm3H) {
+          int imm3 = instr->Bits(21, 19);
+          switch (imm3) {
            case 1:
              Widen<uint8_t, uint16_t>(this, Vd, Vm);
              break;
@ -5504,7 +5452,8 @@ void Simulator::DecodeAdvancedSIMDDataProcessing(Instruction* instr) {
          if ((instr->VdValue() & 1) != 0) UNIMPLEMENTED();
          int Vd = instr->VFPDRegValue(kSimd128Precision);
          int Vm = instr->VFPMRegValue(kDoublePrecision);
-          switch (imm3H) {
+          int imm3 = instr->Bits(21, 19);
+          switch (imm3) {
            case 1:
              Widen<int8_t, int16_t>(this, Vd, Vm);
              break;
@ -5521,6 +5470,9 @@ void Simulator::DecodeAdvancedSIMDDataProcessing(Instruction* instr) {
        }
      } else if (!u && imm3H_L != 0 && opc == 0b0101) {
        // vshl.i<size> Qd, Qm, shift
+        int imm7 = instr->Bits(21, 16);
+        if (instr->Bit(7) != 0) imm7 += 64;
+        int size = base::bits::RoundDownToPowerOfTwo32(imm7);
        int shift = imm7 - size;
        int Vd = instr->VFPDRegValue(kSimd128Precision);
        int Vm = instr->VFPMRegValue(kSimd128Precision);
@ -5542,6 +5494,9 @@ void Simulator::DecodeAdvancedSIMDDataProcessing(Instruction* instr) {
        }
      } else if (u && imm3H_L != 0 && opc == 0b0100) {
        // vsri.<size> Dd, Dm, shift
+        int imm7 = instr->Bits(21, 16);
+        if (instr->Bit(7) != 0) imm7 += 64;
+        int size = base::bits::RoundDownToPowerOfTwo32(imm7);
        int shift = 2 * size - imm7;
        int Vd = instr->VFPDRegValue(kDoublePrecision);
        int Vm = instr->VFPMRegValue(kDoublePrecision);
@ -5564,6 +5519,9 @@ void Simulator::DecodeAdvancedSIMDDataProcessing(Instruction* instr) {
        }
      } else if (u && imm3H_L != 0 && opc == 0b0101) {
        // vsli.<size> Dd, Dm, shift
+        int imm7 = instr->Bits(21, 16);
+        if (instr->Bit(7) != 0) imm7 += 64;
+        int size = base::bits::RoundDownToPowerOfTwo32(imm7);
        int shift = imm7 - size;
        int Vd = instr->VFPDRegValue(kDoublePrecision);
        int Vm = instr->VFPMRegValue(kDoublePrecision);
--- a/test/cctest/test-assembler-arm.cc
+++ b/test/cctest/test-assembler-arm.cc
@ -1289,11 +1289,9 @@ TEST(15) {
    uint32_t vsub8[4], vsub16[4], vsub32[4];
    uint32_t vqsub_u8[4], vqsub_s16[4], vqsub_u32[4];
    uint32_t vmul8[4], vmul16[4], vmul32[4];
-    uint32_t vshl8[4], vshl16[4], vshl32[4];
-    uint32_t vshr_s8[4], vshr_u16[4], vshr_s32[4];
-    uint32_t vshr_s8_d[2], vshr_u16_d[2], vshr_s32_d[2];
+    uint32_t vshl8[4], vshl16[4], vshl32[5];
+    uint32_t vshr_s8[4], vshr_u16[4], vshr_s32[5];
    uint32_t vsli_64[2], vsri_64[2], vsli_32[2], vsri_32[2];
-    uint32_t vsra_64[2], vsra_32[2];
    uint32_t vceq[4], vceqf[4], vcgef[4], vcgtf[4];
    uint32_t vcge_s8[4], vcge_u16[4], vcge_s32[4];
    uint32_t vcgt_s8[4], vcgt_u16[4], vcgt_s32[4];
@ -1803,19 +1801,6 @@ TEST(15) {
    __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vshr_s32))));
    __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));

-    // vshr.s, vshr.u with d registers.
-    __ mov(r4, Operand(0x80));
-    __ vdup(Neon8, q0, r4);
-    __ vshr(NeonS8, d1, d0, 1);
-    __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vshr_s8_d))));
-    __ vst1(Neon8, NeonListOperand(d1), NeonMemOperand(r4));
-    __ vshr(NeonU16, d2, d0, 9);
-    __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vshr_u16_d))));
-    __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
-    __ vshr(NeonS32, d2, d0, 17);
-    __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vshr_s32_d))));
-    __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
-
    // vsli, vsri.
    __ mov(r4, Operand(0xFFFFFFFF));
    __ mov(r5, Operand(0x1));
@ -1836,16 +1821,6 @@ TEST(15) {
    __ vsri(Neon32, d1, d0, 16);
    __ vstr(d1, r0, offsetof(T, vsri_32));

-    // vsra.
-    __ vmov(d0, r4, r5);
-    // Check same dst and src registers.
-    __ vsra(NeonU64, d0, d0, 1);
-    __ vstr(d0, r0, offsetof(T, vsra_64));
-    __ vmov(d0, r4, r5);
-    __ vmov(d1, r5, r4);
-    __ vsra(NeonS32, d1, d0, 16);
-    __ vstr(d1, r0, offsetof(T, vsra_32));
-
    // vceq.
    __ mov(r4, Operand(0x03));
    __ vdup(Neon8, q0, r4);
@ -2222,15 +2197,10 @@ TEST(15) {
    CHECK_EQ_SPLAT(vshr_s8, 0xC0C0C0C0u);
    CHECK_EQ_SPLAT(vshr_u16, 0x00400040u);
    CHECK_EQ_SPLAT(vshr_s32, 0xFFFFC040u);
-    CHECK_EQ_32X2(vshr_s8_d, 0xC0C0C0C0u, 0xC0C0C0C0u);
-    CHECK_EQ_32X2(vshr_u16_d, 0x00400040u, 0x00400040u);
-    CHECK_EQ_32X2(vshr_s32_d, 0xFFFFC040u, 0xFFFFC040u);
    CHECK_EQ_32X2(vsli_64, 0x01u, 0xFFFFFFFFu);
    CHECK_EQ_32X2(vsri_64, 0xFFFFFFFFu, 0x01u);
    CHECK_EQ_32X2(vsli_32, 0xFFFF0001u, 0x00010001u);
    CHECK_EQ_32X2(vsri_32, 0x00000000u, 0x0000FFFFu);
-    CHECK_EQ_32X2(vsra_64, 0xFFFFFFFEu, 0x2);
-    CHECK_EQ_32X2(vsra_32, 0xFFFFFFFFu, 0x0);
    CHECK_EQ_SPLAT(vceq, 0x00FF00FFu);
    // [0, 3, 0, 3, ...] >= [3, 3, 3, 3, ...]
    CHECK_EQ_SPLAT(vcge_s8, 0x00FF00FFu);
--- a/test/cctest/wasm/test-run-wasm-simd.cc
+++ b/test/cctest/wasm/test-run-wasm-simd.cc
@ -1630,7 +1630,7 @@ WASM_SIMD_TEST(I32x4BitMask) {
 }

 // TODO(v8:10997) Prototyping i64x2.bitmask.
-#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM
+#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
 WASM_SIMD_TEST_NO_LOWERING(I64x2BitMask) {
  FLAG_SCOPE(wasm_simd_post_mvp);
  WasmRunner<int32_t, int64_t> r(execution_tier, lower_simd);
@ -1648,7 +1648,7 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2BitMask) {
    CHECK_EQ(actual, expected);
  }
 }
-#endif  // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM
+#endif  // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64

 WASM_SIMD_TEST(I8x16Splat) {
  WasmRunner<int32_t, int32_t> r(execution_tier, lower_simd);