2013-09-23 08:06:58 +00:00
|
|
|
// Copyright 2013 the V8 project authors. All rights reserved.
|
|
|
|
// Redistribution and use in source and binary forms, with or without
|
|
|
|
// modification, are permitted provided that the following conditions are
|
|
|
|
// met:
|
|
|
|
//
|
|
|
|
// * Redistributions of source code must retain the above copyright
|
|
|
|
// notice, this list of conditions and the following disclaimer.
|
|
|
|
// * Redistributions in binary form must reproduce the above
|
|
|
|
// copyright notice, this list of conditions and the following
|
|
|
|
// disclaimer in the documentation and/or other materials provided
|
|
|
|
// with the distribution.
|
|
|
|
// * Neither the name of Google Inc. nor the names of its
|
|
|
|
// contributors may be used to endorse or promote products derived
|
|
|
|
// from this software without specific prior written permission.
|
|
|
|
//
|
|
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
|
|
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
|
|
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
|
|
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
|
|
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
|
|
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
|
|
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
|
|
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
|
|
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
|
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
2019-05-21 09:30:15 +00:00
|
|
|
#include "src/codegen/assembler-inl.h"
|
|
|
|
#include "src/codegen/macro-assembler.h"
|
Reland "[deoptimizer] Change deopt entries into builtins"
This is a reland of 7f58ced72eb65b6b5530ccabaf2eaebe45bf9d33
It fixes the different exit size emitted on x64/Atom CPUs due to
performance tuning in TurboAssembler::Call. Additionally, add
cctests to verify the fixed size exits.
Original change's description:
> [deoptimizer] Change deopt entries into builtins
>
> While the overall goal of this commit is to change deoptimization
> entries into builtins, there are multiple related things happening:
>
> - Deoptimization entries, formerly stubs (i.e. Code objects generated
> at runtime, guaranteed to be immovable), have been converted into
> builtins. The major restriction is that we now need to preserve the
> kRootRegister, which was formerly used on most architectures to pass
> the deoptimization id. The solution differs based on platform.
> - Renamed DEOPT_ENTRIES_OR_FOR_TESTING code kind to FOR_TESTING.
> - Removed heap/ support for immovable Code generation.
> - Removed the DeserializerData class (no longer needed).
> - arm64: to preserve 4-byte deopt exits, introduced a new optimization
> in which the final jump to the deoptimization entry is generated
> once per Code object, and deopt exits can continue to emit a
> near-call.
> - arm,ia32,x64: change to fixed-size deopt exits. This reduces exit
> sizes by 4/8, 5, and 5 bytes, respectively.
>
> On arm the deopt exit size is reduced from 12 (or 16) bytes to 8 bytes
> by using the same strategy as on arm64 (recalc deopt id from return
> address). Before:
>
> e300a002 movw r10, <id>
> e59fc024 ldr ip, [pc, <entry offset>]
> e12fff3c blx ip
>
> After:
>
> e59acb35 ldr ip, [r10, <entry offset>]
> e12fff3c blx ip
>
> On arm64 the deopt exit size remains 4 bytes (or 8 bytes in same cases
> with CFI). Additionally, up to 4 builtin jumps are emitted per Code
> object (max 32 bytes added overhead per Code object). Before:
>
> 9401cdae bl <entry offset>
>
> After:
>
> # eager deoptimization entry jump.
> f95b1f50 ldr x16, [x26, <eager entry offset>]
> d61f0200 br x16
> # lazy deoptimization entry jump.
> f95b2b50 ldr x16, [x26, <lazy entry offset>]
> d61f0200 br x16
> # the deopt exit.
> 97fffffc bl <eager deoptimization entry jump offset>
>
> On ia32 the deopt exit size is reduced from 10 to 5 bytes. Before:
>
> bb00000000 mov ebx,<id>
> e825f5372b call <entry>
>
> After:
>
> e8ea2256ba call <entry>
>
> On x64 the deopt exit size is reduced from 12 to 7 bytes. Before:
>
> 49c7c511000000 REX.W movq r13,<id>
> e8ea2f0700 call <entry>
>
> After:
>
> 41ff9560360000 call [r13+<entry offset>]
>
> Bug: v8:8661,v8:8768
> Change-Id: I13e30aedc360474dc818fecc528ce87c3bfeed42
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2465834
> Commit-Queue: Jakob Gruber <jgruber@chromium.org>
> Reviewed-by: Ross McIlroy <rmcilroy@chromium.org>
> Reviewed-by: Tobias Tebbi <tebbi@chromium.org>
> Reviewed-by: Ulan Degenbaev <ulan@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#70597}
Tbr: ulan@chromium.org, tebbi@chromium.org, rmcilroy@chromium.org
Bug: v8:8661,v8:8768,chromium:1140165
Change-Id: Ibcd5c39c58a70bf2b2ac221aa375fc68d495e144
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2485506
Reviewed-by: Jakob Gruber <jgruber@chromium.org>
Reviewed-by: Tobias Tebbi <tebbi@chromium.org>
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#70655}
2020-10-20 11:41:06 +00:00
|
|
|
#include "src/deoptimizer/deoptimizer.h"
|
2019-05-22 07:55:37 +00:00
|
|
|
#include "src/execution/simulator.h"
|
2019-05-24 13:51:59 +00:00
|
|
|
#include "src/init/v8.h"
|
2019-05-23 08:51:46 +00:00
|
|
|
#include "src/objects/objects-inl.h"
|
2019-05-24 13:51:59 +00:00
|
|
|
#include "src/utils/ostreams.h"
|
2014-06-20 08:40:11 +00:00
|
|
|
#include "test/cctest/cctest.h"
|
Reland "[turboassembler] Introduce hard-abort mode"
This is a reland of a462a7854a081f4f34bb4c112ee33f3d69efa309
Original change's description:
> [turboassembler] Introduce hard-abort mode
>
> For checks and assertions (mostly for debug code, like stack alignment
> or zero extension), we had two modes: Emit a call to the {Abort}
> runtime function (the default), and emit a debug break (used for
> testing, enabled via --trap-on-abort).
> In wasm, where we cannot just call a runtime function because code must
> be isolate independent, we always used the trap-on-abort behaviour.
> This causes problems for our fuzzers, which do not catch SIGTRAP, and
> hence do not detect debug code failures.
>
> This CL introduces a third mode ("hard abort"), which calls a C
> function via {ExternalReference}. The C function still outputs the
> abort reason, but does not print the stack trace. It then aborts via
> "OS::Abort", just like the runtime function.
> This will allow fuzzers to detect the crash and even find a nice error
> message.
>
> Even though this looks like a lot of code churn, it is actually not.
> Most added lines are new tests, and other changes are minimal.
>
> R=mstarzinger@chromium.org
>
> Bug: chromium:863799
> Change-Id: I77c58ff72db552d49014614436259ccfb49ba87b
> Reviewed-on: https://chromium-review.googlesource.com/1142163
> Commit-Queue: Clemens Hammacher <clemensh@chromium.org>
> Reviewed-by: Michael Starzinger <mstarzinger@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#54592}
Bug: chromium:863799
Change-Id: I7729a47b4823a982a8e201df36520aa2b6ef5326
Reviewed-on: https://chromium-review.googlesource.com/1146100
Reviewed-by: Michael Starzinger <mstarzinger@chromium.org>
Commit-Queue: Clemens Hammacher <clemensh@chromium.org>
Cr-Commit-Position: refs/heads/master@{#54656}
2018-07-24 14:12:47 +00:00
|
|
|
#include "test/common/assembler-tester.h"
|
2014-06-03 08:12:43 +00:00
|
|
|
|
2017-08-31 12:34:55 +00:00
|
|
|
namespace v8 {
|
|
|
|
namespace internal {
|
2017-10-02 21:19:21 +00:00
|
|
|
namespace test_macro_assembler_arm {
|
2013-09-23 08:06:58 +00:00
|
|
|
|
2018-01-09 09:50:34 +00:00
|
|
|
using F = void*(int x, int y, int p2, int p3, int p4);
|
2013-09-23 08:06:58 +00:00
|
|
|
|
|
|
|
#define __ masm->
|
|
|
|
|
2018-12-25 00:19:47 +00:00
|
|
|
using F3 = void*(void* p0, int p1, int p2, int p3, int p4);
|
2018-01-09 09:50:34 +00:00
|
|
|
using F5 = int(void*, void*, void*, void*, void*);
|
2013-11-08 17:35:58 +00:00
|
|
|
|
2016-12-15 18:15:23 +00:00
|
|
|
TEST(ExtractLane) {
|
|
|
|
if (!CpuFeatures::IsSupported(NEON)) return;
|
|
|
|
|
|
|
|
Isolate* isolate = CcTest::i_isolate();
|
|
|
|
HandleScope handles(isolate);
|
2017-11-14 15:55:09 +00:00
|
|
|
|
2019-01-17 10:23:16 +00:00
|
|
|
auto buffer = AllocateAssemblerBuffer();
|
|
|
|
MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes,
|
|
|
|
buffer->CreateView());
|
2016-12-15 18:15:23 +00:00
|
|
|
MacroAssembler* masm = &assembler; // Create a pointer for the __ macro.
|
|
|
|
|
2019-05-27 13:12:51 +00:00
|
|
|
struct T {
|
2016-12-15 18:15:23 +00:00
|
|
|
int32_t i32x4_low[4];
|
|
|
|
int32_t i32x4_high[4];
|
|
|
|
int32_t i16x8_low[8];
|
|
|
|
int32_t i16x8_high[8];
|
|
|
|
int32_t i8x16_low[16];
|
|
|
|
int32_t i8x16_high[16];
|
|
|
|
int32_t f32x4_low[4];
|
|
|
|
int32_t f32x4_high[4];
|
2017-03-02 19:50:33 +00:00
|
|
|
int32_t i8x16_low_d[16];
|
|
|
|
int32_t i8x16_high_d[16];
|
2019-05-27 13:12:51 +00:00
|
|
|
};
|
2016-12-15 18:15:23 +00:00
|
|
|
T t;
|
|
|
|
|
|
|
|
__ stm(db_w, sp, r4.bit() | r5.bit() | lr.bit());
|
|
|
|
|
|
|
|
for (int i = 0; i < 4; i++) {
|
|
|
|
__ mov(r4, Operand(i));
|
|
|
|
__ vdup(Neon32, q1, r4);
|
|
|
|
__ ExtractLane(r5, q1, NeonS32, i);
|
|
|
|
__ str(r5, MemOperand(r0, offsetof(T, i32x4_low) + 4 * i));
|
|
|
|
SwVfpRegister si = SwVfpRegister::from_code(i);
|
2017-05-16 20:37:16 +00:00
|
|
|
__ ExtractLane(si, q1, i);
|
2016-12-15 18:15:23 +00:00
|
|
|
__ vstr(si, r0, offsetof(T, f32x4_low) + 4 * i);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (int i = 0; i < 8; i++) {
|
|
|
|
__ mov(r4, Operand(i));
|
|
|
|
__ vdup(Neon16, q1, r4);
|
|
|
|
__ ExtractLane(r5, q1, NeonS16, i);
|
|
|
|
__ str(r5, MemOperand(r0, offsetof(T, i16x8_low) + 4 * i));
|
|
|
|
}
|
|
|
|
|
|
|
|
for (int i = 0; i < 16; i++) {
|
|
|
|
__ mov(r4, Operand(i));
|
|
|
|
__ vdup(Neon8, q1, r4);
|
|
|
|
__ ExtractLane(r5, q1, NeonS8, i);
|
|
|
|
__ str(r5, MemOperand(r0, offsetof(T, i8x16_low) + 4 * i));
|
|
|
|
}
|
|
|
|
|
2017-03-02 19:50:33 +00:00
|
|
|
for (int i = 0; i < 8; i++) {
|
|
|
|
__ mov(r4, Operand(i));
|
|
|
|
__ vdup(Neon8, q1, r4); // q1 = d2,d3
|
|
|
|
__ ExtractLane(r5, d2, NeonS8, i);
|
|
|
|
__ str(r5, MemOperand(r0, offsetof(T, i8x16_low_d) + 4 * i));
|
|
|
|
__ ExtractLane(r5, d3, NeonS8, i);
|
|
|
|
__ str(r5, MemOperand(r0, offsetof(T, i8x16_low_d) + 4 * (i + 8)));
|
|
|
|
}
|
|
|
|
|
2016-12-15 18:15:23 +00:00
|
|
|
if (CpuFeatures::IsSupported(VFP32DREGS)) {
|
|
|
|
for (int i = 0; i < 4; i++) {
|
|
|
|
__ mov(r4, Operand(-i));
|
|
|
|
__ vdup(Neon32, q15, r4);
|
|
|
|
__ ExtractLane(r5, q15, NeonS32, i);
|
|
|
|
__ str(r5, MemOperand(r0, offsetof(T, i32x4_high) + 4 * i));
|
|
|
|
SwVfpRegister si = SwVfpRegister::from_code(i);
|
2017-05-16 20:37:16 +00:00
|
|
|
__ ExtractLane(si, q15, i);
|
2016-12-15 18:15:23 +00:00
|
|
|
__ vstr(si, r0, offsetof(T, f32x4_high) + 4 * i);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (int i = 0; i < 8; i++) {
|
|
|
|
__ mov(r4, Operand(-i));
|
|
|
|
__ vdup(Neon16, q15, r4);
|
|
|
|
__ ExtractLane(r5, q15, NeonS16, i);
|
|
|
|
__ str(r5, MemOperand(r0, offsetof(T, i16x8_high) + 4 * i));
|
|
|
|
}
|
|
|
|
|
|
|
|
for (int i = 0; i < 16; i++) {
|
|
|
|
__ mov(r4, Operand(-i));
|
|
|
|
__ vdup(Neon8, q15, r4);
|
|
|
|
__ ExtractLane(r5, q15, NeonS8, i);
|
|
|
|
__ str(r5, MemOperand(r0, offsetof(T, i8x16_high) + 4 * i));
|
|
|
|
}
|
2017-03-02 19:50:33 +00:00
|
|
|
|
|
|
|
for (int i = 0; i < 8; i++) {
|
|
|
|
__ mov(r4, Operand(-i));
|
|
|
|
__ vdup(Neon8, q15, r4); // q1 = d30,d31
|
|
|
|
__ ExtractLane(r5, d30, NeonS8, i);
|
|
|
|
__ str(r5, MemOperand(r0, offsetof(T, i8x16_high_d) + 4 * i));
|
|
|
|
__ ExtractLane(r5, d31, NeonS8, i);
|
|
|
|
__ str(r5, MemOperand(r0, offsetof(T, i8x16_high_d) + 4 * (i + 8)));
|
|
|
|
}
|
2016-12-15 18:15:23 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
__ ldm(ia_w, sp, r4.bit() | r5.bit() | pc.bit());
|
|
|
|
|
|
|
|
CodeDesc desc;
|
2017-05-31 14:00:11 +00:00
|
|
|
masm->GetCode(isolate, &desc);
|
Reland "[deoptimizer] Change deopt entries into builtins"
This is a reland of 7f58ced72eb65b6b5530ccabaf2eaebe45bf9d33
It fixes the different exit size emitted on x64/Atom CPUs due to
performance tuning in TurboAssembler::Call. Additionally, add
cctests to verify the fixed size exits.
Original change's description:
> [deoptimizer] Change deopt entries into builtins
>
> While the overall goal of this commit is to change deoptimization
> entries into builtins, there are multiple related things happening:
>
> - Deoptimization entries, formerly stubs (i.e. Code objects generated
> at runtime, guaranteed to be immovable), have been converted into
> builtins. The major restriction is that we now need to preserve the
> kRootRegister, which was formerly used on most architectures to pass
> the deoptimization id. The solution differs based on platform.
> - Renamed DEOPT_ENTRIES_OR_FOR_TESTING code kind to FOR_TESTING.
> - Removed heap/ support for immovable Code generation.
> - Removed the DeserializerData class (no longer needed).
> - arm64: to preserve 4-byte deopt exits, introduced a new optimization
> in which the final jump to the deoptimization entry is generated
> once per Code object, and deopt exits can continue to emit a
> near-call.
> - arm,ia32,x64: change to fixed-size deopt exits. This reduces exit
> sizes by 4/8, 5, and 5 bytes, respectively.
>
> On arm the deopt exit size is reduced from 12 (or 16) bytes to 8 bytes
> by using the same strategy as on arm64 (recalc deopt id from return
> address). Before:
>
> e300a002 movw r10, <id>
> e59fc024 ldr ip, [pc, <entry offset>]
> e12fff3c blx ip
>
> After:
>
> e59acb35 ldr ip, [r10, <entry offset>]
> e12fff3c blx ip
>
> On arm64 the deopt exit size remains 4 bytes (or 8 bytes in same cases
> with CFI). Additionally, up to 4 builtin jumps are emitted per Code
> object (max 32 bytes added overhead per Code object). Before:
>
> 9401cdae bl <entry offset>
>
> After:
>
> # eager deoptimization entry jump.
> f95b1f50 ldr x16, [x26, <eager entry offset>]
> d61f0200 br x16
> # lazy deoptimization entry jump.
> f95b2b50 ldr x16, [x26, <lazy entry offset>]
> d61f0200 br x16
> # the deopt exit.
> 97fffffc bl <eager deoptimization entry jump offset>
>
> On ia32 the deopt exit size is reduced from 10 to 5 bytes. Before:
>
> bb00000000 mov ebx,<id>
> e825f5372b call <entry>
>
> After:
>
> e8ea2256ba call <entry>
>
> On x64 the deopt exit size is reduced from 12 to 7 bytes. Before:
>
> 49c7c511000000 REX.W movq r13,<id>
> e8ea2f0700 call <entry>
>
> After:
>
> 41ff9560360000 call [r13+<entry offset>]
>
> Bug: v8:8661,v8:8768
> Change-Id: I13e30aedc360474dc818fecc528ce87c3bfeed42
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2465834
> Commit-Queue: Jakob Gruber <jgruber@chromium.org>
> Reviewed-by: Ross McIlroy <rmcilroy@chromium.org>
> Reviewed-by: Tobias Tebbi <tebbi@chromium.org>
> Reviewed-by: Ulan Degenbaev <ulan@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#70597}
Tbr: ulan@chromium.org, tebbi@chromium.org, rmcilroy@chromium.org
Bug: v8:8661,v8:8768,chromium:1140165
Change-Id: Ibcd5c39c58a70bf2b2ac221aa375fc68d495e144
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2485506
Reviewed-by: Jakob Gruber <jgruber@chromium.org>
Reviewed-by: Tobias Tebbi <tebbi@chromium.org>
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#70655}
2020-10-20 11:41:06 +00:00
|
|
|
Handle<Code> code =
|
|
|
|
Factory::CodeBuilder(isolate, desc, CodeKind::FOR_TESTING).Build();
|
2016-12-15 18:15:23 +00:00
|
|
|
#ifdef DEBUG
|
2018-06-14 12:46:07 +00:00
|
|
|
StdoutStream os;
|
2018-07-12 10:06:42 +00:00
|
|
|
code->Print(os);
|
2016-12-15 18:15:23 +00:00
|
|
|
#endif
|
2018-01-09 09:50:34 +00:00
|
|
|
auto f = GeneratedCode<F3>::FromCode(*code);
|
|
|
|
f.Call(&t, 0, 0, 0, 0);
|
2016-12-15 18:15:23 +00:00
|
|
|
for (int i = 0; i < 4; i++) {
|
|
|
|
CHECK_EQ(i, t.i32x4_low[i]);
|
|
|
|
CHECK_EQ(i, t.f32x4_low[i]);
|
|
|
|
}
|
|
|
|
for (int i = 0; i < 8; i++) {
|
|
|
|
CHECK_EQ(i, t.i16x8_low[i]);
|
|
|
|
}
|
|
|
|
for (int i = 0; i < 16; i++) {
|
|
|
|
CHECK_EQ(i, t.i8x16_low[i]);
|
|
|
|
}
|
2017-03-02 19:50:33 +00:00
|
|
|
for (int i = 0; i < 8; i++) {
|
|
|
|
CHECK_EQ(i, t.i8x16_low_d[i]);
|
|
|
|
CHECK_EQ(i, t.i8x16_low_d[i + 8]);
|
|
|
|
}
|
2016-12-15 18:15:23 +00:00
|
|
|
if (CpuFeatures::IsSupported(VFP32DREGS)) {
|
|
|
|
for (int i = 0; i < 4; i++) {
|
|
|
|
CHECK_EQ(-i, t.i32x4_high[i]);
|
|
|
|
CHECK_EQ(-i, t.f32x4_high[i]);
|
|
|
|
}
|
|
|
|
for (int i = 0; i < 8; i++) {
|
|
|
|
CHECK_EQ(-i, t.i16x8_high[i]);
|
|
|
|
}
|
|
|
|
for (int i = 0; i < 16; i++) {
|
|
|
|
CHECK_EQ(-i, t.i8x16_high[i]);
|
|
|
|
}
|
2017-03-02 19:50:33 +00:00
|
|
|
for (int i = 0; i < 8; i++) {
|
|
|
|
CHECK_EQ(-i, t.i8x16_high_d[i]);
|
|
|
|
CHECK_EQ(-i, t.i8x16_high_d[i + 8]);
|
|
|
|
}
|
2016-12-15 18:15:23 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(ReplaceLane) {
|
|
|
|
if (!CpuFeatures::IsSupported(NEON)) return;
|
|
|
|
|
|
|
|
Isolate* isolate = CcTest::i_isolate();
|
|
|
|
HandleScope handles(isolate);
|
2017-11-14 15:55:09 +00:00
|
|
|
|
2019-01-17 10:23:16 +00:00
|
|
|
auto buffer = AllocateAssemblerBuffer();
|
|
|
|
MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes,
|
|
|
|
buffer->CreateView());
|
2016-12-15 18:15:23 +00:00
|
|
|
MacroAssembler* masm = &assembler; // Create a pointer for the __ macro.
|
|
|
|
|
2019-05-27 13:12:51 +00:00
|
|
|
struct T {
|
2016-12-15 18:15:23 +00:00
|
|
|
int32_t i32x4_low[4];
|
|
|
|
int32_t i32x4_high[4];
|
|
|
|
int16_t i16x8_low[8];
|
|
|
|
int16_t i16x8_high[8];
|
|
|
|
int8_t i8x16_low[16];
|
|
|
|
int8_t i8x16_high[16];
|
|
|
|
int32_t f32x4_low[4];
|
|
|
|
int32_t f32x4_high[4];
|
2019-05-27 13:12:51 +00:00
|
|
|
};
|
2016-12-15 18:15:23 +00:00
|
|
|
T t;
|
|
|
|
|
|
|
|
__ stm(db_w, sp, r4.bit() | r5.bit() | r6.bit() | r7.bit() | lr.bit());
|
|
|
|
|
|
|
|
__ veor(q0, q0, q0); // Zero
|
|
|
|
__ veor(q1, q1, q1); // Zero
|
|
|
|
for (int i = 0; i < 4; i++) {
|
|
|
|
__ mov(r4, Operand(i));
|
|
|
|
__ ReplaceLane(q0, q0, r4, NeonS32, i);
|
|
|
|
SwVfpRegister si = SwVfpRegister::from_code(i);
|
|
|
|
__ vmov(si, r4);
|
2017-05-16 20:37:16 +00:00
|
|
|
__ ReplaceLane(q1, q1, si, i);
|
2016-12-15 18:15:23 +00:00
|
|
|
}
|
|
|
|
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, i32x4_low))));
|
|
|
|
__ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
|
|
|
|
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, f32x4_low))));
|
|
|
|
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
|
|
|
|
|
|
|
|
__ veor(q0, q0, q0); // Zero
|
|
|
|
for (int i = 0; i < 8; i++) {
|
|
|
|
__ mov(r4, Operand(i));
|
|
|
|
__ ReplaceLane(q0, q0, r4, NeonS16, i);
|
|
|
|
}
|
|
|
|
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, i16x8_low))));
|
|
|
|
__ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
|
|
|
|
|
|
|
|
__ veor(q0, q0, q0); // Zero
|
|
|
|
for (int i = 0; i < 16; i++) {
|
|
|
|
__ mov(r4, Operand(i));
|
|
|
|
__ ReplaceLane(q0, q0, r4, NeonS8, i);
|
|
|
|
}
|
|
|
|
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, i8x16_low))));
|
|
|
|
__ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
|
|
|
|
|
|
|
|
if (CpuFeatures::IsSupported(VFP32DREGS)) {
|
|
|
|
__ veor(q14, q14, q14); // Zero
|
|
|
|
__ veor(q15, q15, q15); // Zero
|
|
|
|
for (int i = 0; i < 4; i++) {
|
|
|
|
__ mov(r4, Operand(-i));
|
|
|
|
__ ReplaceLane(q14, q14, r4, NeonS32, i);
|
|
|
|
SwVfpRegister si = SwVfpRegister::from_code(i);
|
|
|
|
__ vmov(si, r4);
|
2017-05-16 20:37:16 +00:00
|
|
|
__ ReplaceLane(q15, q15, si, i);
|
2016-12-15 18:15:23 +00:00
|
|
|
}
|
|
|
|
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, i32x4_high))));
|
|
|
|
__ vst1(Neon8, NeonListOperand(q14), NeonMemOperand(r4));
|
|
|
|
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, f32x4_high))));
|
|
|
|
__ vst1(Neon8, NeonListOperand(q15), NeonMemOperand(r4));
|
|
|
|
|
|
|
|
__ veor(q14, q14, q14); // Zero
|
|
|
|
for (int i = 0; i < 8; i++) {
|
|
|
|
__ mov(r4, Operand(-i));
|
|
|
|
__ ReplaceLane(q14, q14, r4, NeonS16, i);
|
|
|
|
}
|
|
|
|
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, i16x8_high))));
|
|
|
|
__ vst1(Neon8, NeonListOperand(q14), NeonMemOperand(r4));
|
|
|
|
|
|
|
|
__ veor(q14, q14, q14); // Zero
|
|
|
|
for (int i = 0; i < 16; i++) {
|
|
|
|
__ mov(r4, Operand(-i));
|
|
|
|
__ ReplaceLane(q14, q14, r4, NeonS8, i);
|
|
|
|
}
|
|
|
|
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, i8x16_high))));
|
|
|
|
__ vst1(Neon8, NeonListOperand(q14), NeonMemOperand(r4));
|
|
|
|
}
|
|
|
|
|
|
|
|
__ ldm(ia_w, sp, r4.bit() | r5.bit() | r6.bit() | r7.bit() | pc.bit());
|
|
|
|
|
|
|
|
CodeDesc desc;
|
2017-05-31 14:00:11 +00:00
|
|
|
masm->GetCode(isolate, &desc);
|
Reland "[deoptimizer] Change deopt entries into builtins"
This is a reland of 7f58ced72eb65b6b5530ccabaf2eaebe45bf9d33
It fixes the different exit size emitted on x64/Atom CPUs due to
performance tuning in TurboAssembler::Call. Additionally, add
cctests to verify the fixed size exits.
Original change's description:
> [deoptimizer] Change deopt entries into builtins
>
> While the overall goal of this commit is to change deoptimization
> entries into builtins, there are multiple related things happening:
>
> - Deoptimization entries, formerly stubs (i.e. Code objects generated
> at runtime, guaranteed to be immovable), have been converted into
> builtins. The major restriction is that we now need to preserve the
> kRootRegister, which was formerly used on most architectures to pass
> the deoptimization id. The solution differs based on platform.
> - Renamed DEOPT_ENTRIES_OR_FOR_TESTING code kind to FOR_TESTING.
> - Removed heap/ support for immovable Code generation.
> - Removed the DeserializerData class (no longer needed).
> - arm64: to preserve 4-byte deopt exits, introduced a new optimization
> in which the final jump to the deoptimization entry is generated
> once per Code object, and deopt exits can continue to emit a
> near-call.
> - arm,ia32,x64: change to fixed-size deopt exits. This reduces exit
> sizes by 4/8, 5, and 5 bytes, respectively.
>
> On arm the deopt exit size is reduced from 12 (or 16) bytes to 8 bytes
> by using the same strategy as on arm64 (recalc deopt id from return
> address). Before:
>
> e300a002 movw r10, <id>
> e59fc024 ldr ip, [pc, <entry offset>]
> e12fff3c blx ip
>
> After:
>
> e59acb35 ldr ip, [r10, <entry offset>]
> e12fff3c blx ip
>
> On arm64 the deopt exit size remains 4 bytes (or 8 bytes in same cases
> with CFI). Additionally, up to 4 builtin jumps are emitted per Code
> object (max 32 bytes added overhead per Code object). Before:
>
> 9401cdae bl <entry offset>
>
> After:
>
> # eager deoptimization entry jump.
> f95b1f50 ldr x16, [x26, <eager entry offset>]
> d61f0200 br x16
> # lazy deoptimization entry jump.
> f95b2b50 ldr x16, [x26, <lazy entry offset>]
> d61f0200 br x16
> # the deopt exit.
> 97fffffc bl <eager deoptimization entry jump offset>
>
> On ia32 the deopt exit size is reduced from 10 to 5 bytes. Before:
>
> bb00000000 mov ebx,<id>
> e825f5372b call <entry>
>
> After:
>
> e8ea2256ba call <entry>
>
> On x64 the deopt exit size is reduced from 12 to 7 bytes. Before:
>
> 49c7c511000000 REX.W movq r13,<id>
> e8ea2f0700 call <entry>
>
> After:
>
> 41ff9560360000 call [r13+<entry offset>]
>
> Bug: v8:8661,v8:8768
> Change-Id: I13e30aedc360474dc818fecc528ce87c3bfeed42
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2465834
> Commit-Queue: Jakob Gruber <jgruber@chromium.org>
> Reviewed-by: Ross McIlroy <rmcilroy@chromium.org>
> Reviewed-by: Tobias Tebbi <tebbi@chromium.org>
> Reviewed-by: Ulan Degenbaev <ulan@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#70597}
Tbr: ulan@chromium.org, tebbi@chromium.org, rmcilroy@chromium.org
Bug: v8:8661,v8:8768,chromium:1140165
Change-Id: Ibcd5c39c58a70bf2b2ac221aa375fc68d495e144
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2485506
Reviewed-by: Jakob Gruber <jgruber@chromium.org>
Reviewed-by: Tobias Tebbi <tebbi@chromium.org>
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#70655}
2020-10-20 11:41:06 +00:00
|
|
|
Handle<Code> code =
|
|
|
|
Factory::CodeBuilder(isolate, desc, CodeKind::FOR_TESTING).Build();
|
2016-12-15 18:15:23 +00:00
|
|
|
#ifdef DEBUG
|
2018-06-14 12:46:07 +00:00
|
|
|
StdoutStream os;
|
2018-07-12 10:06:42 +00:00
|
|
|
code->Print(os);
|
2016-12-15 18:15:23 +00:00
|
|
|
#endif
|
2018-01-09 09:50:34 +00:00
|
|
|
auto f = GeneratedCode<F3>::FromCode(*code);
|
|
|
|
f.Call(&t, 0, 0, 0, 0);
|
2016-12-15 18:15:23 +00:00
|
|
|
for (int i = 0; i < 4; i++) {
|
|
|
|
CHECK_EQ(i, t.i32x4_low[i]);
|
|
|
|
CHECK_EQ(i, t.f32x4_low[i]);
|
|
|
|
}
|
|
|
|
for (int i = 0; i < 8; i++) {
|
|
|
|
CHECK_EQ(i, t.i16x8_low[i]);
|
|
|
|
}
|
|
|
|
for (int i = 0; i < 16; i++) {
|
|
|
|
CHECK_EQ(i, t.i8x16_low[i]);
|
|
|
|
}
|
|
|
|
if (CpuFeatures::IsSupported(VFP32DREGS)) {
|
|
|
|
for (int i = 0; i < 4; i++) {
|
|
|
|
CHECK_EQ(-i, t.i32x4_high[i]);
|
|
|
|
CHECK_EQ(-i, t.f32x4_high[i]);
|
|
|
|
}
|
|
|
|
for (int i = 0; i < 8; i++) {
|
|
|
|
CHECK_EQ(-i, t.i16x8_high[i]);
|
|
|
|
}
|
|
|
|
for (int i = 0; i < 16; i++) {
|
|
|
|
CHECK_EQ(-i, t.i8x16_high[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Reland "[deoptimizer] Change deopt entries into builtins"
This is a reland of 7f58ced72eb65b6b5530ccabaf2eaebe45bf9d33
It fixes the different exit size emitted on x64/Atom CPUs due to
performance tuning in TurboAssembler::Call. Additionally, add
cctests to verify the fixed size exits.
Original change's description:
> [deoptimizer] Change deopt entries into builtins
>
> While the overall goal of this commit is to change deoptimization
> entries into builtins, there are multiple related things happening:
>
> - Deoptimization entries, formerly stubs (i.e. Code objects generated
> at runtime, guaranteed to be immovable), have been converted into
> builtins. The major restriction is that we now need to preserve the
> kRootRegister, which was formerly used on most architectures to pass
> the deoptimization id. The solution differs based on platform.
> - Renamed DEOPT_ENTRIES_OR_FOR_TESTING code kind to FOR_TESTING.
> - Removed heap/ support for immovable Code generation.
> - Removed the DeserializerData class (no longer needed).
> - arm64: to preserve 4-byte deopt exits, introduced a new optimization
> in which the final jump to the deoptimization entry is generated
> once per Code object, and deopt exits can continue to emit a
> near-call.
> - arm,ia32,x64: change to fixed-size deopt exits. This reduces exit
> sizes by 4/8, 5, and 5 bytes, respectively.
>
> On arm the deopt exit size is reduced from 12 (or 16) bytes to 8 bytes
> by using the same strategy as on arm64 (recalc deopt id from return
> address). Before:
>
> e300a002 movw r10, <id>
> e59fc024 ldr ip, [pc, <entry offset>]
> e12fff3c blx ip
>
> After:
>
> e59acb35 ldr ip, [r10, <entry offset>]
> e12fff3c blx ip
>
> On arm64 the deopt exit size remains 4 bytes (or 8 bytes in same cases
> with CFI). Additionally, up to 4 builtin jumps are emitted per Code
> object (max 32 bytes added overhead per Code object). Before:
>
> 9401cdae bl <entry offset>
>
> After:
>
> # eager deoptimization entry jump.
> f95b1f50 ldr x16, [x26, <eager entry offset>]
> d61f0200 br x16
> # lazy deoptimization entry jump.
> f95b2b50 ldr x16, [x26, <lazy entry offset>]
> d61f0200 br x16
> # the deopt exit.
> 97fffffc bl <eager deoptimization entry jump offset>
>
> On ia32 the deopt exit size is reduced from 10 to 5 bytes. Before:
>
> bb00000000 mov ebx,<id>
> e825f5372b call <entry>
>
> After:
>
> e8ea2256ba call <entry>
>
> On x64 the deopt exit size is reduced from 12 to 7 bytes. Before:
>
> 49c7c511000000 REX.W movq r13,<id>
> e8ea2f0700 call <entry>
>
> After:
>
> 41ff9560360000 call [r13+<entry offset>]
>
> Bug: v8:8661,v8:8768
> Change-Id: I13e30aedc360474dc818fecc528ce87c3bfeed42
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2465834
> Commit-Queue: Jakob Gruber <jgruber@chromium.org>
> Reviewed-by: Ross McIlroy <rmcilroy@chromium.org>
> Reviewed-by: Tobias Tebbi <tebbi@chromium.org>
> Reviewed-by: Ulan Degenbaev <ulan@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#70597}
Tbr: ulan@chromium.org, tebbi@chromium.org, rmcilroy@chromium.org
Bug: v8:8661,v8:8768,chromium:1140165
Change-Id: Ibcd5c39c58a70bf2b2ac221aa375fc68d495e144
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2485506
Reviewed-by: Jakob Gruber <jgruber@chromium.org>
Reviewed-by: Tobias Tebbi <tebbi@chromium.org>
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#70655}
2020-10-20 11:41:06 +00:00
|
|
|
TEST(DeoptExitSizeIsFixed) {
|
|
|
|
CHECK(Deoptimizer::kSupportsFixedDeoptExitSizes);
|
|
|
|
|
|
|
|
Isolate* isolate = CcTest::i_isolate();
|
|
|
|
HandleScope handles(isolate);
|
|
|
|
auto buffer = AllocateAssemblerBuffer();
|
|
|
|
MacroAssembler masm(isolate, v8::internal::CodeObjectRequired::kYes,
|
|
|
|
buffer->CreateView());
|
|
|
|
|
|
|
|
STATIC_ASSERT(static_cast<int>(kFirstDeoptimizeKind) == 0);
|
|
|
|
for (int i = 0; i < kDeoptimizeKindCount; i++) {
|
|
|
|
DeoptimizeKind kind = static_cast<DeoptimizeKind>(i);
|
|
|
|
Builtins::Name target = Deoptimizer::GetDeoptimizationEntry(isolate, kind);
|
|
|
|
Label before_exit;
|
|
|
|
masm.bind(&before_exit);
|
|
|
|
masm.CallForDeoptimization(target, 42, &before_exit, kind, &before_exit);
|
|
|
|
CHECK_EQ(masm.SizeOfCodeGeneratedSince(&before_exit),
|
|
|
|
kind == DeoptimizeKind::kLazy
|
|
|
|
? Deoptimizer::kLazyDeoptExitSize
|
|
|
|
: Deoptimizer::kNonLazyDeoptExitSize);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-09-23 08:06:58 +00:00
|
|
|
#undef __
|
2017-08-31 12:34:55 +00:00
|
|
|
|
2017-10-02 21:19:21 +00:00
|
|
|
} // namespace test_macro_assembler_arm
|
2017-08-31 12:34:55 +00:00
|
|
|
} // namespace internal
|
|
|
|
} // namespace v8
|