b6643320b9
In order to reduce the codegen size of dynamic map checks, add the ability to have an eager with resume deopt point, which can call a given builitin to perform a more detailed check than can be done in codegen, and then either deoptimizes itself (as if the calling code had performed an eager deopt) or resumes execution in the calling code after the check. In addition, support for adding extra arguments to a deoptimization continuation is added to enable us to pass the necessary arguments to the DynamicMapChecks builtin. Finally, a trampoline is added to the DynamicMapChecks which saves the registers that might be clobbered by that builtin, to avoid having to save them in the generated code. This trampoline also performs the deoptimization based on the result of the DynamicMapChecks builtin. In order to ensure both the trampoline and DynamicMapChecks builtin have the same call interface, and to limit the number of registers that need saving in the trampoline, the DynamicMapChecks builtin is moved to be a CSA builtin with a custom CallInterfaceDescriptor, that calls an exported Torque macro that implements the actual functionality. All told, this changes the codegen for a monomorphic dynamic map check from: movl rbx,<expected_map> cmpl [<object>-0x1],rbx jnz <deferred_call> resume_point: ... deferred_call: <spill registers> movl rax,<slot> movq rbx,<object> movq rcx,<handler> movq r10,<DynamicMapChecks> call r10 cmpq rax,0x0 jz <restore_regs> cmpq rax,0x1 jz <deopt_point_1> cmpq rax,0x2 jz <deopt_point_2> int3l restore_regs: <restore_regs> jmp <resume_point> ... deopt_point_1: call Deoptimization_Eager deopt_point_2: call Deoptimization_Bailout To: movl rax,<slot> movl rcx,<expected_map> movq rdx,<handler> cmpl [<object>-0x1],rcx jnz <deopt_point> resume_point: ... deopt_point: call DynamicMapChecksTrampoline jmp <resume_point> BUG=v8:10582 Change-Id: Ica4927b9acc963b9b73dc62d9379a7815335650f Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2560197 Commit-Queue: Ross McIlroy <rmcilroy@chromium.org> Reviewed-by: Georg Neis <neis@chromium.org> Reviewed-by: Jakob Gruber <jgruber@chromium.org> Cr-Commit-Position: refs/heads/master@{#71545}
351 lines
12 KiB
C++
351 lines
12 KiB
C++
// Copyright 2013 the V8 project authors. All rights reserved.
|
|
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions are
|
|
// met:
|
|
//
|
|
// * Redistributions of source code must retain the above copyright
|
|
// notice, this list of conditions and the following disclaimer.
|
|
// * Redistributions in binary form must reproduce the above
|
|
// copyright notice, this list of conditions and the following
|
|
// disclaimer in the documentation and/or other materials provided
|
|
// with the distribution.
|
|
// * Neither the name of Google Inc. nor the names of its
|
|
// contributors may be used to endorse or promote products derived
|
|
// from this software without specific prior written permission.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include "src/codegen/assembler-inl.h"
|
|
#include "src/codegen/macro-assembler.h"
|
|
#include "src/deoptimizer/deoptimizer.h"
|
|
#include "src/execution/simulator.h"
|
|
#include "src/init/v8.h"
|
|
#include "src/objects/objects-inl.h"
|
|
#include "src/utils/ostreams.h"
|
|
#include "test/cctest/cctest.h"
|
|
#include "test/common/assembler-tester.h"
|
|
|
|
namespace v8 {
|
|
namespace internal {
|
|
namespace test_macro_assembler_arm {
|
|
|
|
using F = void*(int x, int y, int p2, int p3, int p4);
|
|
|
|
#define __ masm->
|
|
|
|
using F3 = void*(void* p0, int p1, int p2, int p3, int p4);
|
|
using F5 = int(void*, void*, void*, void*, void*);
|
|
|
|
TEST(ExtractLane) {
|
|
if (!CpuFeatures::IsSupported(NEON)) return;
|
|
|
|
Isolate* isolate = CcTest::i_isolate();
|
|
HandleScope handles(isolate);
|
|
|
|
auto buffer = AllocateAssemblerBuffer();
|
|
MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes,
|
|
buffer->CreateView());
|
|
MacroAssembler* masm = &assembler; // Create a pointer for the __ macro.
|
|
|
|
struct T {
|
|
int32_t i32x4_low[4];
|
|
int32_t i32x4_high[4];
|
|
int32_t i16x8_low[8];
|
|
int32_t i16x8_high[8];
|
|
int32_t i8x16_low[16];
|
|
int32_t i8x16_high[16];
|
|
int32_t f32x4_low[4];
|
|
int32_t f32x4_high[4];
|
|
int32_t i8x16_low_d[16];
|
|
int32_t i8x16_high_d[16];
|
|
};
|
|
T t;
|
|
|
|
__ stm(db_w, sp, r4.bit() | r5.bit() | lr.bit());
|
|
|
|
for (int i = 0; i < 4; i++) {
|
|
__ mov(r4, Operand(i));
|
|
__ vdup(Neon32, q1, r4);
|
|
__ ExtractLane(r5, q1, NeonS32, i);
|
|
__ str(r5, MemOperand(r0, offsetof(T, i32x4_low) + 4 * i));
|
|
SwVfpRegister si = SwVfpRegister::from_code(i);
|
|
__ ExtractLane(si, q1, i);
|
|
__ vstr(si, r0, offsetof(T, f32x4_low) + 4 * i);
|
|
}
|
|
|
|
for (int i = 0; i < 8; i++) {
|
|
__ mov(r4, Operand(i));
|
|
__ vdup(Neon16, q1, r4);
|
|
__ ExtractLane(r5, q1, NeonS16, i);
|
|
__ str(r5, MemOperand(r0, offsetof(T, i16x8_low) + 4 * i));
|
|
}
|
|
|
|
for (int i = 0; i < 16; i++) {
|
|
__ mov(r4, Operand(i));
|
|
__ vdup(Neon8, q1, r4);
|
|
__ ExtractLane(r5, q1, NeonS8, i);
|
|
__ str(r5, MemOperand(r0, offsetof(T, i8x16_low) + 4 * i));
|
|
}
|
|
|
|
for (int i = 0; i < 8; i++) {
|
|
__ mov(r4, Operand(i));
|
|
__ vdup(Neon8, q1, r4); // q1 = d2,d3
|
|
__ ExtractLane(r5, d2, NeonS8, i);
|
|
__ str(r5, MemOperand(r0, offsetof(T, i8x16_low_d) + 4 * i));
|
|
__ ExtractLane(r5, d3, NeonS8, i);
|
|
__ str(r5, MemOperand(r0, offsetof(T, i8x16_low_d) + 4 * (i + 8)));
|
|
}
|
|
|
|
if (CpuFeatures::IsSupported(VFP32DREGS)) {
|
|
for (int i = 0; i < 4; i++) {
|
|
__ mov(r4, Operand(-i));
|
|
__ vdup(Neon32, q15, r4);
|
|
__ ExtractLane(r5, q15, NeonS32, i);
|
|
__ str(r5, MemOperand(r0, offsetof(T, i32x4_high) + 4 * i));
|
|
SwVfpRegister si = SwVfpRegister::from_code(i);
|
|
__ ExtractLane(si, q15, i);
|
|
__ vstr(si, r0, offsetof(T, f32x4_high) + 4 * i);
|
|
}
|
|
|
|
for (int i = 0; i < 8; i++) {
|
|
__ mov(r4, Operand(-i));
|
|
__ vdup(Neon16, q15, r4);
|
|
__ ExtractLane(r5, q15, NeonS16, i);
|
|
__ str(r5, MemOperand(r0, offsetof(T, i16x8_high) + 4 * i));
|
|
}
|
|
|
|
for (int i = 0; i < 16; i++) {
|
|
__ mov(r4, Operand(-i));
|
|
__ vdup(Neon8, q15, r4);
|
|
__ ExtractLane(r5, q15, NeonS8, i);
|
|
__ str(r5, MemOperand(r0, offsetof(T, i8x16_high) + 4 * i));
|
|
}
|
|
|
|
for (int i = 0; i < 8; i++) {
|
|
__ mov(r4, Operand(-i));
|
|
__ vdup(Neon8, q15, r4); // q1 = d30,d31
|
|
__ ExtractLane(r5, d30, NeonS8, i);
|
|
__ str(r5, MemOperand(r0, offsetof(T, i8x16_high_d) + 4 * i));
|
|
__ ExtractLane(r5, d31, NeonS8, i);
|
|
__ str(r5, MemOperand(r0, offsetof(T, i8x16_high_d) + 4 * (i + 8)));
|
|
}
|
|
}
|
|
|
|
__ ldm(ia_w, sp, r4.bit() | r5.bit() | pc.bit());
|
|
|
|
CodeDesc desc;
|
|
masm->GetCode(isolate, &desc);
|
|
Handle<Code> code =
|
|
Factory::CodeBuilder(isolate, desc, CodeKind::FOR_TESTING).Build();
|
|
#ifdef DEBUG
|
|
StdoutStream os;
|
|
code->Print(os);
|
|
#endif
|
|
auto f = GeneratedCode<F3>::FromCode(*code);
|
|
f.Call(&t, 0, 0, 0, 0);
|
|
for (int i = 0; i < 4; i++) {
|
|
CHECK_EQ(i, t.i32x4_low[i]);
|
|
CHECK_EQ(i, t.f32x4_low[i]);
|
|
}
|
|
for (int i = 0; i < 8; i++) {
|
|
CHECK_EQ(i, t.i16x8_low[i]);
|
|
}
|
|
for (int i = 0; i < 16; i++) {
|
|
CHECK_EQ(i, t.i8x16_low[i]);
|
|
}
|
|
for (int i = 0; i < 8; i++) {
|
|
CHECK_EQ(i, t.i8x16_low_d[i]);
|
|
CHECK_EQ(i, t.i8x16_low_d[i + 8]);
|
|
}
|
|
if (CpuFeatures::IsSupported(VFP32DREGS)) {
|
|
for (int i = 0; i < 4; i++) {
|
|
CHECK_EQ(-i, t.i32x4_high[i]);
|
|
CHECK_EQ(-i, t.f32x4_high[i]);
|
|
}
|
|
for (int i = 0; i < 8; i++) {
|
|
CHECK_EQ(-i, t.i16x8_high[i]);
|
|
}
|
|
for (int i = 0; i < 16; i++) {
|
|
CHECK_EQ(-i, t.i8x16_high[i]);
|
|
}
|
|
for (int i = 0; i < 8; i++) {
|
|
CHECK_EQ(-i, t.i8x16_high_d[i]);
|
|
CHECK_EQ(-i, t.i8x16_high_d[i + 8]);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST(ReplaceLane) {
|
|
if (!CpuFeatures::IsSupported(NEON)) return;
|
|
|
|
Isolate* isolate = CcTest::i_isolate();
|
|
HandleScope handles(isolate);
|
|
|
|
auto buffer = AllocateAssemblerBuffer();
|
|
MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes,
|
|
buffer->CreateView());
|
|
MacroAssembler* masm = &assembler; // Create a pointer for the __ macro.
|
|
|
|
struct T {
|
|
int32_t i32x4_low[4];
|
|
int32_t i32x4_high[4];
|
|
int16_t i16x8_low[8];
|
|
int16_t i16x8_high[8];
|
|
int8_t i8x16_low[16];
|
|
int8_t i8x16_high[16];
|
|
int32_t f32x4_low[4];
|
|
int32_t f32x4_high[4];
|
|
};
|
|
T t;
|
|
|
|
__ stm(db_w, sp, r4.bit() | r5.bit() | r6.bit() | r7.bit() | lr.bit());
|
|
|
|
__ veor(q0, q0, q0); // Zero
|
|
__ veor(q1, q1, q1); // Zero
|
|
for (int i = 0; i < 4; i++) {
|
|
__ mov(r4, Operand(i));
|
|
__ ReplaceLane(q0, q0, r4, NeonS32, i);
|
|
SwVfpRegister si = SwVfpRegister::from_code(i);
|
|
__ vmov(si, r4);
|
|
__ ReplaceLane(q1, q1, si, i);
|
|
}
|
|
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, i32x4_low))));
|
|
__ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
|
|
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, f32x4_low))));
|
|
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
|
|
|
|
__ veor(q0, q0, q0); // Zero
|
|
for (int i = 0; i < 8; i++) {
|
|
__ mov(r4, Operand(i));
|
|
__ ReplaceLane(q0, q0, r4, NeonS16, i);
|
|
}
|
|
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, i16x8_low))));
|
|
__ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
|
|
|
|
__ veor(q0, q0, q0); // Zero
|
|
for (int i = 0; i < 16; i++) {
|
|
__ mov(r4, Operand(i));
|
|
__ ReplaceLane(q0, q0, r4, NeonS8, i);
|
|
}
|
|
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, i8x16_low))));
|
|
__ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
|
|
|
|
if (CpuFeatures::IsSupported(VFP32DREGS)) {
|
|
__ veor(q14, q14, q14); // Zero
|
|
__ veor(q15, q15, q15); // Zero
|
|
for (int i = 0; i < 4; i++) {
|
|
__ mov(r4, Operand(-i));
|
|
__ ReplaceLane(q14, q14, r4, NeonS32, i);
|
|
SwVfpRegister si = SwVfpRegister::from_code(i);
|
|
__ vmov(si, r4);
|
|
__ ReplaceLane(q15, q15, si, i);
|
|
}
|
|
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, i32x4_high))));
|
|
__ vst1(Neon8, NeonListOperand(q14), NeonMemOperand(r4));
|
|
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, f32x4_high))));
|
|
__ vst1(Neon8, NeonListOperand(q15), NeonMemOperand(r4));
|
|
|
|
__ veor(q14, q14, q14); // Zero
|
|
for (int i = 0; i < 8; i++) {
|
|
__ mov(r4, Operand(-i));
|
|
__ ReplaceLane(q14, q14, r4, NeonS16, i);
|
|
}
|
|
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, i16x8_high))));
|
|
__ vst1(Neon8, NeonListOperand(q14), NeonMemOperand(r4));
|
|
|
|
__ veor(q14, q14, q14); // Zero
|
|
for (int i = 0; i < 16; i++) {
|
|
__ mov(r4, Operand(-i));
|
|
__ ReplaceLane(q14, q14, r4, NeonS8, i);
|
|
}
|
|
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, i8x16_high))));
|
|
__ vst1(Neon8, NeonListOperand(q14), NeonMemOperand(r4));
|
|
}
|
|
|
|
__ ldm(ia_w, sp, r4.bit() | r5.bit() | r6.bit() | r7.bit() | pc.bit());
|
|
|
|
CodeDesc desc;
|
|
masm->GetCode(isolate, &desc);
|
|
Handle<Code> code =
|
|
Factory::CodeBuilder(isolate, desc, CodeKind::FOR_TESTING).Build();
|
|
#ifdef DEBUG
|
|
StdoutStream os;
|
|
code->Print(os);
|
|
#endif
|
|
auto f = GeneratedCode<F3>::FromCode(*code);
|
|
f.Call(&t, 0, 0, 0, 0);
|
|
for (int i = 0; i < 4; i++) {
|
|
CHECK_EQ(i, t.i32x4_low[i]);
|
|
CHECK_EQ(i, t.f32x4_low[i]);
|
|
}
|
|
for (int i = 0; i < 8; i++) {
|
|
CHECK_EQ(i, t.i16x8_low[i]);
|
|
}
|
|
for (int i = 0; i < 16; i++) {
|
|
CHECK_EQ(i, t.i8x16_low[i]);
|
|
}
|
|
if (CpuFeatures::IsSupported(VFP32DREGS)) {
|
|
for (int i = 0; i < 4; i++) {
|
|
CHECK_EQ(-i, t.i32x4_high[i]);
|
|
CHECK_EQ(-i, t.f32x4_high[i]);
|
|
}
|
|
for (int i = 0; i < 8; i++) {
|
|
CHECK_EQ(-i, t.i16x8_high[i]);
|
|
}
|
|
for (int i = 0; i < 16; i++) {
|
|
CHECK_EQ(-i, t.i8x16_high[i]);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST(DeoptExitSizeIsFixed) {
|
|
CHECK(Deoptimizer::kSupportsFixedDeoptExitSizes);
|
|
|
|
Isolate* isolate = CcTest::i_isolate();
|
|
HandleScope handles(isolate);
|
|
auto buffer = AllocateAssemblerBuffer();
|
|
MacroAssembler masm(isolate, v8::internal::CodeObjectRequired::kYes,
|
|
buffer->CreateView());
|
|
|
|
STATIC_ASSERT(static_cast<int>(kFirstDeoptimizeKind) == 0);
|
|
for (int i = 0; i < kDeoptimizeKindCount; i++) {
|
|
DeoptimizeKind kind = static_cast<DeoptimizeKind>(i);
|
|
Label before_exit;
|
|
masm.bind(&before_exit);
|
|
if (kind == DeoptimizeKind::kEagerWithResume) {
|
|
Builtins::Name target = Deoptimizer::GetDeoptWithResumeBuiltin(
|
|
DeoptimizeReason::kDynamicMapCheck);
|
|
masm.CallForDeoptimization(target, 42, &before_exit, kind, &before_exit,
|
|
nullptr);
|
|
CHECK_EQ(masm.SizeOfCodeGeneratedSince(&before_exit),
|
|
Deoptimizer::kEagerWithResumeDeoptExitSize);
|
|
} else {
|
|
Builtins::Name target = Deoptimizer::GetDeoptimizationEntry(kind);
|
|
masm.CallForDeoptimization(target, 42, &before_exit, kind, &before_exit,
|
|
nullptr);
|
|
CHECK_EQ(masm.SizeOfCodeGeneratedSince(&before_exit),
|
|
kind == DeoptimizeKind::kLazy
|
|
? Deoptimizer::kLazyDeoptExitSize
|
|
: Deoptimizer::kNonLazyDeoptExitSize);
|
|
}
|
|
}
|
|
}
|
|
|
|
#undef __
|
|
|
|
} // namespace test_macro_assembler_arm
|
|
} // namespace internal
|
|
} // namespace v8
|