v8/test/cctest/test-macro-assembler-mips64.cc

1728 lines
58 KiB
C++
Raw Normal View History

// Copyright 2013 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <stdlib.h>
#include <iostream>
#include "src/base/utils/random-number-generator.h"
#include "src/codegen/assembler-inl.h"
#include "src/codegen/macro-assembler.h"
#include "src/deoptimizer/deoptimizer.h"
#include "src/execution/simulator.h"
#include "src/init/v8.h"
#include "src/objects/objects-inl.h"
#include "src/utils/ostreams.h"
#include "test/cctest/cctest.h"
#include "test/common/assembler-tester.h"
namespace v8 {
namespace internal {
// TODO(mips64): Refine these signatures per test case.
using FV = void*(int64_t x, int64_t y, int p2, int p3, int p4);
using F1 = void*(int x, int p1, int p2, int p3, int p4);
using F3 = void*(void* p, int p1, int p2, int p3, int p4);
using F4 = void*(void* p0, void* p1, int p2, int p3, int p4);
#define __ masm->
TEST(BYTESWAP) {
DCHECK(kArchVariant == kMips64r6 || kArchVariant == kMips64r2);
CcTest::InitializeVM();
Isolate* isolate = CcTest::i_isolate();
HandleScope scope(isolate);
struct T {
uint64_t s8;
uint64_t s4;
uint64_t s2;
uint64_t u4;
uint64_t u2;
};
T t;
uint64_t test_values[] = {0x5612FFCD9D327ACC,
0x781A15C3,
0xFCDE,
0x9F,
0xC81A15C3,
0x8000000000000000,
0xFFFFFFFFFFFFFFFF,
0x0000000080000000,
0x0000000000008000};
MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes);
MacroAssembler* masm = &assembler;
__ Ld(a4, MemOperand(a0, offsetof(T, s8)));
__ nop();
__ ByteSwapSigned(a4, a4, 8);
__ Sd(a4, MemOperand(a0, offsetof(T, s8)));
__ Ld(a4, MemOperand(a0, offsetof(T, s4)));
__ nop();
__ ByteSwapSigned(a4, a4, 4);
__ Sd(a4, MemOperand(a0, offsetof(T, s4)));
__ Ld(a4, MemOperand(a0, offsetof(T, s2)));
__ nop();
__ ByteSwapSigned(a4, a4, 2);
__ Sd(a4, MemOperand(a0, offsetof(T, s2)));
__ Ld(a4, MemOperand(a0, offsetof(T, u4)));
__ nop();
__ ByteSwapUnsigned(a4, a4, 4);
__ Sd(a4, MemOperand(a0, offsetof(T, u4)));
__ Ld(a4, MemOperand(a0, offsetof(T, u2)));
__ nop();
__ ByteSwapUnsigned(a4, a4, 2);
__ Sd(a4, MemOperand(a0, offsetof(T, u2)));
__ jr(ra);
__ nop();
CodeDesc desc;
masm->GetCode(isolate, &desc);
Reland "Reland "[deoptimizer] Change deopt entries into builtins"" This is a reland of fbfa9bf4ec72b1b73a96b70ccb68cd98c321511b The arm64 was missing proper codegen for CFI, thus sizes were off. Original change's description: > Reland "[deoptimizer] Change deopt entries into builtins" > > This is a reland of 7f58ced72eb65b6b5530ccabaf2eaebe45bf9d33 > > It fixes the different exit size emitted on x64/Atom CPUs due to > performance tuning in TurboAssembler::Call. Additionally, add > cctests to verify the fixed size exits. > > Original change's description: > > [deoptimizer] Change deopt entries into builtins > > > > While the overall goal of this commit is to change deoptimization > > entries into builtins, there are multiple related things happening: > > > > - Deoptimization entries, formerly stubs (i.e. Code objects generated > > at runtime, guaranteed to be immovable), have been converted into > > builtins. The major restriction is that we now need to preserve the > > kRootRegister, which was formerly used on most architectures to pass > > the deoptimization id. The solution differs based on platform. > > - Renamed DEOPT_ENTRIES_OR_FOR_TESTING code kind to FOR_TESTING. > > - Removed heap/ support for immovable Code generation. > > - Removed the DeserializerData class (no longer needed). > > - arm64: to preserve 4-byte deopt exits, introduced a new optimization > > in which the final jump to the deoptimization entry is generated > > once per Code object, and deopt exits can continue to emit a > > near-call. > > - arm,ia32,x64: change to fixed-size deopt exits. This reduces exit > > sizes by 4/8, 5, and 5 bytes, respectively. > > > > On arm the deopt exit size is reduced from 12 (or 16) bytes to 8 bytes > > by using the same strategy as on arm64 (recalc deopt id from return > > address). Before: > > > > e300a002 movw r10, <id> > > e59fc024 ldr ip, [pc, <entry offset>] > > e12fff3c blx ip > > > > After: > > > > e59acb35 ldr ip, [r10, <entry offset>] > > e12fff3c blx ip > > > > On arm64 the deopt exit size remains 4 bytes (or 8 bytes in same cases > > with CFI). Additionally, up to 4 builtin jumps are emitted per Code > > object (max 32 bytes added overhead per Code object). Before: > > > > 9401cdae bl <entry offset> > > > > After: > > > > # eager deoptimization entry jump. > > f95b1f50 ldr x16, [x26, <eager entry offset>] > > d61f0200 br x16 > > # lazy deoptimization entry jump. > > f95b2b50 ldr x16, [x26, <lazy entry offset>] > > d61f0200 br x16 > > # the deopt exit. > > 97fffffc bl <eager deoptimization entry jump offset> > > > > On ia32 the deopt exit size is reduced from 10 to 5 bytes. Before: > > > > bb00000000 mov ebx,<id> > > e825f5372b call <entry> > > > > After: > > > > e8ea2256ba call <entry> > > > > On x64 the deopt exit size is reduced from 12 to 7 bytes. Before: > > > > 49c7c511000000 REX.W movq r13,<id> > > e8ea2f0700 call <entry> > > > > After: > > > > 41ff9560360000 call [r13+<entry offset>] > > > > Bug: v8:8661,v8:8768 > > Change-Id: I13e30aedc360474dc818fecc528ce87c3bfeed42 > > Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2465834 > > Commit-Queue: Jakob Gruber <jgruber@chromium.org> > > Reviewed-by: Ross McIlroy <rmcilroy@chromium.org> > > Reviewed-by: Tobias Tebbi <tebbi@chromium.org> > > Reviewed-by: Ulan Degenbaev <ulan@chromium.org> > > Cr-Commit-Position: refs/heads/master@{#70597} > > Tbr: ulan@chromium.org, tebbi@chromium.org, rmcilroy@chromium.org > Bug: v8:8661,v8:8768,chromium:1140165 > Change-Id: Ibcd5c39c58a70bf2b2ac221aa375fc68d495e144 > Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2485506 > Reviewed-by: Jakob Gruber <jgruber@chromium.org> > Reviewed-by: Tobias Tebbi <tebbi@chromium.org> > Commit-Queue: Jakob Gruber <jgruber@chromium.org> > Cr-Commit-Position: refs/heads/master@{#70655} Tbr: ulan@chromium.org, tebbi@chromium.org, rmcilroy@chromium.org Bug: v8:8661 Bug: v8:8768 Bug: chromium:1140165 Change-Id: I471cc94fc085e527dc9bfb5a84b96bd907c2333f Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2488682 Reviewed-by: Jakob Gruber <jgruber@chromium.org> Commit-Queue: Jakob Gruber <jgruber@chromium.org> Cr-Commit-Position: refs/heads/master@{#70672}
2020-10-21 05:12:25 +00:00
Handle<Code> code =
Factory::CodeBuilder(isolate, desc, CodeKind::FOR_TESTING).Build();
auto f = GeneratedCode<F3>::FromCode(*code);
for (size_t i = 0; i < arraysize(test_values); i++) {
int32_t in_s4 = static_cast<int32_t>(test_values[i]);
int16_t in_s2 = static_cast<int16_t>(test_values[i]);
uint32_t in_u4 = static_cast<uint32_t>(test_values[i]);
uint16_t in_u2 = static_cast<uint16_t>(test_values[i]);
t.s8 = test_values[i];
t.s4 = static_cast<uint64_t>(in_s4);
t.s2 = static_cast<uint64_t>(in_s2);
t.u4 = static_cast<uint64_t>(in_u4);
t.u2 = static_cast<uint64_t>(in_u2);
f.Call(&t, 0, 0, 0, 0);
CHECK_EQ(ByteReverse<uint64_t>(test_values[i]), t.s8);
CHECK_EQ(ByteReverse<int32_t>(in_s4), static_cast<int32_t>(t.s4));
CHECK_EQ(ByteReverse<int16_t>(in_s2), static_cast<int16_t>(t.s2));
CHECK_EQ(ByteReverse<uint32_t>(in_u4), static_cast<uint32_t>(t.u4));
CHECK_EQ(ByteReverse<uint16_t>(in_u2), static_cast<uint16_t>(t.u2));
}
}
TEST(LoadConstants) {
CcTest::InitializeVM();
Isolate* isolate = CcTest::i_isolate();
HandleScope handles(isolate);
int64_t refConstants[64];
int64_t result[64];
int64_t mask = 1;
for (int i = 0; i < 64; i++) {
refConstants[i] = ~(mask << i);
}
MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes);
MacroAssembler* masm = &assembler;
__ mov(a4, a0);
for (int i = 0; i < 64; i++) {
// Load constant.
__ li(a5, Operand(refConstants[i]));
__ Sd(a5, MemOperand(a4));
__ Daddu(a4, a4, Operand(kPointerSize));
}
__ jr(ra);
__ nop();
CodeDesc desc;
masm->GetCode(isolate, &desc);
Reland "Reland "[deoptimizer] Change deopt entries into builtins"" This is a reland of fbfa9bf4ec72b1b73a96b70ccb68cd98c321511b The arm64 was missing proper codegen for CFI, thus sizes were off. Original change's description: > Reland "[deoptimizer] Change deopt entries into builtins" > > This is a reland of 7f58ced72eb65b6b5530ccabaf2eaebe45bf9d33 > > It fixes the different exit size emitted on x64/Atom CPUs due to > performance tuning in TurboAssembler::Call. Additionally, add > cctests to verify the fixed size exits. > > Original change's description: > > [deoptimizer] Change deopt entries into builtins > > > > While the overall goal of this commit is to change deoptimization > > entries into builtins, there are multiple related things happening: > > > > - Deoptimization entries, formerly stubs (i.e. Code objects generated > > at runtime, guaranteed to be immovable), have been converted into > > builtins. The major restriction is that we now need to preserve the > > kRootRegister, which was formerly used on most architectures to pass > > the deoptimization id. The solution differs based on platform. > > - Renamed DEOPT_ENTRIES_OR_FOR_TESTING code kind to FOR_TESTING. > > - Removed heap/ support for immovable Code generation. > > - Removed the DeserializerData class (no longer needed). > > - arm64: to preserve 4-byte deopt exits, introduced a new optimization > > in which the final jump to the deoptimization entry is generated > > once per Code object, and deopt exits can continue to emit a > > near-call. > > - arm,ia32,x64: change to fixed-size deopt exits. This reduces exit > > sizes by 4/8, 5, and 5 bytes, respectively. > > > > On arm the deopt exit size is reduced from 12 (or 16) bytes to 8 bytes > > by using the same strategy as on arm64 (recalc deopt id from return > > address). Before: > > > > e300a002 movw r10, <id> > > e59fc024 ldr ip, [pc, <entry offset>] > > e12fff3c blx ip > > > > After: > > > > e59acb35 ldr ip, [r10, <entry offset>] > > e12fff3c blx ip > > > > On arm64 the deopt exit size remains 4 bytes (or 8 bytes in same cases > > with CFI). Additionally, up to 4 builtin jumps are emitted per Code > > object (max 32 bytes added overhead per Code object). Before: > > > > 9401cdae bl <entry offset> > > > > After: > > > > # eager deoptimization entry jump. > > f95b1f50 ldr x16, [x26, <eager entry offset>] > > d61f0200 br x16 > > # lazy deoptimization entry jump. > > f95b2b50 ldr x16, [x26, <lazy entry offset>] > > d61f0200 br x16 > > # the deopt exit. > > 97fffffc bl <eager deoptimization entry jump offset> > > > > On ia32 the deopt exit size is reduced from 10 to 5 bytes. Before: > > > > bb00000000 mov ebx,<id> > > e825f5372b call <entry> > > > > After: > > > > e8ea2256ba call <entry> > > > > On x64 the deopt exit size is reduced from 12 to 7 bytes. Before: > > > > 49c7c511000000 REX.W movq r13,<id> > > e8ea2f0700 call <entry> > > > > After: > > > > 41ff9560360000 call [r13+<entry offset>] > > > > Bug: v8:8661,v8:8768 > > Change-Id: I13e30aedc360474dc818fecc528ce87c3bfeed42 > > Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2465834 > > Commit-Queue: Jakob Gruber <jgruber@chromium.org> > > Reviewed-by: Ross McIlroy <rmcilroy@chromium.org> > > Reviewed-by: Tobias Tebbi <tebbi@chromium.org> > > Reviewed-by: Ulan Degenbaev <ulan@chromium.org> > > Cr-Commit-Position: refs/heads/master@{#70597} > > Tbr: ulan@chromium.org, tebbi@chromium.org, rmcilroy@chromium.org > Bug: v8:8661,v8:8768,chromium:1140165 > Change-Id: Ibcd5c39c58a70bf2b2ac221aa375fc68d495e144 > Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2485506 > Reviewed-by: Jakob Gruber <jgruber@chromium.org> > Reviewed-by: Tobias Tebbi <tebbi@chromium.org> > Commit-Queue: Jakob Gruber <jgruber@chromium.org> > Cr-Commit-Position: refs/heads/master@{#70655} Tbr: ulan@chromium.org, tebbi@chromium.org, rmcilroy@chromium.org Bug: v8:8661 Bug: v8:8768 Bug: chromium:1140165 Change-Id: I471cc94fc085e527dc9bfb5a84b96bd907c2333f Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2488682 Reviewed-by: Jakob Gruber <jgruber@chromium.org> Commit-Queue: Jakob Gruber <jgruber@chromium.org> Cr-Commit-Position: refs/heads/master@{#70672}
2020-10-21 05:12:25 +00:00
Handle<Code> code =
Factory::CodeBuilder(isolate, desc, CodeKind::FOR_TESTING).Build();
auto f = GeneratedCode<FV>::FromCode(*code);
(void)f.Call(reinterpret_cast<int64_t>(result), 0, 0, 0, 0);
// Check results.
for (int i = 0; i < 64; i++) {
CHECK(refConstants[i] == result[i]);
}
}
TEST(LoadAddress) {
CcTest::InitializeVM();
Isolate* isolate = CcTest::i_isolate();
HandleScope handles(isolate);
MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes);
MacroAssembler* masm = &assembler;
Label to_jump, skip;
__ mov(a4, a0);
__ Branch(&skip);
__ bind(&to_jump);
__ nop();
__ nop();
__ jr(ra);
__ nop();
__ bind(&skip);
__ li(a4, Operand(masm->jump_address(&to_jump)), ADDRESS_LOAD);
int check_size = masm->InstructionsGeneratedSince(&skip);
CHECK_EQ(4, check_size);
__ jr(a4);
__ nop();
__ stop();
__ stop();
__ stop();
__ stop();
__ stop();
CodeDesc desc;
masm->GetCode(isolate, &desc);
Reland "Reland "[deoptimizer] Change deopt entries into builtins"" This is a reland of fbfa9bf4ec72b1b73a96b70ccb68cd98c321511b The arm64 was missing proper codegen for CFI, thus sizes were off. Original change's description: > Reland "[deoptimizer] Change deopt entries into builtins" > > This is a reland of 7f58ced72eb65b6b5530ccabaf2eaebe45bf9d33 > > It fixes the different exit size emitted on x64/Atom CPUs due to > performance tuning in TurboAssembler::Call. Additionally, add > cctests to verify the fixed size exits. > > Original change's description: > > [deoptimizer] Change deopt entries into builtins > > > > While the overall goal of this commit is to change deoptimization > > entries into builtins, there are multiple related things happening: > > > > - Deoptimization entries, formerly stubs (i.e. Code objects generated > > at runtime, guaranteed to be immovable), have been converted into > > builtins. The major restriction is that we now need to preserve the > > kRootRegister, which was formerly used on most architectures to pass > > the deoptimization id. The solution differs based on platform. > > - Renamed DEOPT_ENTRIES_OR_FOR_TESTING code kind to FOR_TESTING. > > - Removed heap/ support for immovable Code generation. > > - Removed the DeserializerData class (no longer needed). > > - arm64: to preserve 4-byte deopt exits, introduced a new optimization > > in which the final jump to the deoptimization entry is generated > > once per Code object, and deopt exits can continue to emit a > > near-call. > > - arm,ia32,x64: change to fixed-size deopt exits. This reduces exit > > sizes by 4/8, 5, and 5 bytes, respectively. > > > > On arm the deopt exit size is reduced from 12 (or 16) bytes to 8 bytes > > by using the same strategy as on arm64 (recalc deopt id from return > > address). Before: > > > > e300a002 movw r10, <id> > > e59fc024 ldr ip, [pc, <entry offset>] > > e12fff3c blx ip > > > > After: > > > > e59acb35 ldr ip, [r10, <entry offset>] > > e12fff3c blx ip > > > > On arm64 the deopt exit size remains 4 bytes (or 8 bytes in same cases > > with CFI). Additionally, up to 4 builtin jumps are emitted per Code > > object (max 32 bytes added overhead per Code object). Before: > > > > 9401cdae bl <entry offset> > > > > After: > > > > # eager deoptimization entry jump. > > f95b1f50 ldr x16, [x26, <eager entry offset>] > > d61f0200 br x16 > > # lazy deoptimization entry jump. > > f95b2b50 ldr x16, [x26, <lazy entry offset>] > > d61f0200 br x16 > > # the deopt exit. > > 97fffffc bl <eager deoptimization entry jump offset> > > > > On ia32 the deopt exit size is reduced from 10 to 5 bytes. Before: > > > > bb00000000 mov ebx,<id> > > e825f5372b call <entry> > > > > After: > > > > e8ea2256ba call <entry> > > > > On x64 the deopt exit size is reduced from 12 to 7 bytes. Before: > > > > 49c7c511000000 REX.W movq r13,<id> > > e8ea2f0700 call <entry> > > > > After: > > > > 41ff9560360000 call [r13+<entry offset>] > > > > Bug: v8:8661,v8:8768 > > Change-Id: I13e30aedc360474dc818fecc528ce87c3bfeed42 > > Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2465834 > > Commit-Queue: Jakob Gruber <jgruber@chromium.org> > > Reviewed-by: Ross McIlroy <rmcilroy@chromium.org> > > Reviewed-by: Tobias Tebbi <tebbi@chromium.org> > > Reviewed-by: Ulan Degenbaev <ulan@chromium.org> > > Cr-Commit-Position: refs/heads/master@{#70597} > > Tbr: ulan@chromium.org, tebbi@chromium.org, rmcilroy@chromium.org > Bug: v8:8661,v8:8768,chromium:1140165 > Change-Id: Ibcd5c39c58a70bf2b2ac221aa375fc68d495e144 > Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2485506 > Reviewed-by: Jakob Gruber <jgruber@chromium.org> > Reviewed-by: Tobias Tebbi <tebbi@chromium.org> > Commit-Queue: Jakob Gruber <jgruber@chromium.org> > Cr-Commit-Position: refs/heads/master@{#70655} Tbr: ulan@chromium.org, tebbi@chromium.org, rmcilroy@chromium.org Bug: v8:8661 Bug: v8:8768 Bug: chromium:1140165 Change-Id: I471cc94fc085e527dc9bfb5a84b96bd907c2333f Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2488682 Reviewed-by: Jakob Gruber <jgruber@chromium.org> Commit-Queue: Jakob Gruber <jgruber@chromium.org> Cr-Commit-Position: refs/heads/master@{#70672}
2020-10-21 05:12:25 +00:00
Handle<Code> code =
Factory::CodeBuilder(isolate, desc, CodeKind::FOR_TESTING).Build();
auto f = GeneratedCode<FV>::FromCode(*code);
(void)f.Call(0, 0, 0, 0, 0);
// Check results.
}
TEST(jump_tables4) {
// Similar to test-assembler-mips jump_tables1, with extra test for branch
// trampoline required before emission of the dd table (where trampolines are
// blocked), and proper transition to long-branch mode.
// Regression test for v8:4294.
CcTest::InitializeVM();
Isolate* isolate = CcTest::i_isolate();
HandleScope scope(isolate);
MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes);
MacroAssembler* masm = &assembler;
const int kNumCases = 512;
int values[kNumCases];
isolate->random_number_generator()->NextBytes(values, sizeof(values));
Label labels[kNumCases];
Label near_start, end, done;
__ Push(ra);
__ mov(v0, zero_reg);
__ Branch(&end);
__ bind(&near_start);
// Generate slightly less than 32K instructions, which will soon require
// trampoline for branch distance fixup.
for (int i = 0; i < 32768 - 256; ++i) {
__ addiu(v0, v0, 1);
}
__ GenerateSwitchTable(a0, kNumCases,
[&labels](size_t i) { return labels + i; });
for (int i = 0; i < kNumCases; ++i) {
__ bind(&labels[i]);
__ li(v0, values[i]);
__ Branch(&done);
}
__ bind(&done);
__ Pop(ra);
__ jr(ra);
__ nop();
__ bind(&end);
__ Branch(&near_start);
CodeDesc desc;
masm->GetCode(isolate, &desc);
Reland "Reland "[deoptimizer] Change deopt entries into builtins"" This is a reland of fbfa9bf4ec72b1b73a96b70ccb68cd98c321511b The arm64 was missing proper codegen for CFI, thus sizes were off. Original change's description: > Reland "[deoptimizer] Change deopt entries into builtins" > > This is a reland of 7f58ced72eb65b6b5530ccabaf2eaebe45bf9d33 > > It fixes the different exit size emitted on x64/Atom CPUs due to > performance tuning in TurboAssembler::Call. Additionally, add > cctests to verify the fixed size exits. > > Original change's description: > > [deoptimizer] Change deopt entries into builtins > > > > While the overall goal of this commit is to change deoptimization > > entries into builtins, there are multiple related things happening: > > > > - Deoptimization entries, formerly stubs (i.e. Code objects generated > > at runtime, guaranteed to be immovable), have been converted into > > builtins. The major restriction is that we now need to preserve the > > kRootRegister, which was formerly used on most architectures to pass > > the deoptimization id. The solution differs based on platform. > > - Renamed DEOPT_ENTRIES_OR_FOR_TESTING code kind to FOR_TESTING. > > - Removed heap/ support for immovable Code generation. > > - Removed the DeserializerData class (no longer needed). > > - arm64: to preserve 4-byte deopt exits, introduced a new optimization > > in which the final jump to the deoptimization entry is generated > > once per Code object, and deopt exits can continue to emit a > > near-call. > > - arm,ia32,x64: change to fixed-size deopt exits. This reduces exit > > sizes by 4/8, 5, and 5 bytes, respectively. > > > > On arm the deopt exit size is reduced from 12 (or 16) bytes to 8 bytes > > by using the same strategy as on arm64 (recalc deopt id from return > > address). Before: > > > > e300a002 movw r10, <id> > > e59fc024 ldr ip, [pc, <entry offset>] > > e12fff3c blx ip > > > > After: > > > > e59acb35 ldr ip, [r10, <entry offset>] > > e12fff3c blx ip > > > > On arm64 the deopt exit size remains 4 bytes (or 8 bytes in same cases > > with CFI). Additionally, up to 4 builtin jumps are emitted per Code > > object (max 32 bytes added overhead per Code object). Before: > > > > 9401cdae bl <entry offset> > > > > After: > > > > # eager deoptimization entry jump. > > f95b1f50 ldr x16, [x26, <eager entry offset>] > > d61f0200 br x16 > > # lazy deoptimization entry jump. > > f95b2b50 ldr x16, [x26, <lazy entry offset>] > > d61f0200 br x16 > > # the deopt exit. > > 97fffffc bl <eager deoptimization entry jump offset> > > > > On ia32 the deopt exit size is reduced from 10 to 5 bytes. Before: > > > > bb00000000 mov ebx,<id> > > e825f5372b call <entry> > > > > After: > > > > e8ea2256ba call <entry> > > > > On x64 the deopt exit size is reduced from 12 to 7 bytes. Before: > > > > 49c7c511000000 REX.W movq r13,<id> > > e8ea2f0700 call <entry> > > > > After: > > > > 41ff9560360000 call [r13+<entry offset>] > > > > Bug: v8:8661,v8:8768 > > Change-Id: I13e30aedc360474dc818fecc528ce87c3bfeed42 > > Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2465834 > > Commit-Queue: Jakob Gruber <jgruber@chromium.org> > > Reviewed-by: Ross McIlroy <rmcilroy@chromium.org> > > Reviewed-by: Tobias Tebbi <tebbi@chromium.org> > > Reviewed-by: Ulan Degenbaev <ulan@chromium.org> > > Cr-Commit-Position: refs/heads/master@{#70597} > > Tbr: ulan@chromium.org, tebbi@chromium.org, rmcilroy@chromium.org > Bug: v8:8661,v8:8768,chromium:1140165 > Change-Id: Ibcd5c39c58a70bf2b2ac221aa375fc68d495e144 > Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2485506 > Reviewed-by: Jakob Gruber <jgruber@chromium.org> > Reviewed-by: Tobias Tebbi <tebbi@chromium.org> > Commit-Queue: Jakob Gruber <jgruber@chromium.org> > Cr-Commit-Position: refs/heads/master@{#70655} Tbr: ulan@chromium.org, tebbi@chromium.org, rmcilroy@chromium.org Bug: v8:8661 Bug: v8:8768 Bug: chromium:1140165 Change-Id: I471cc94fc085e527dc9bfb5a84b96bd907c2333f Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2488682 Reviewed-by: Jakob Gruber <jgruber@chromium.org> Commit-Queue: Jakob Gruber <jgruber@chromium.org> Cr-Commit-Position: refs/heads/master@{#70672}
2020-10-21 05:12:25 +00:00
Handle<Code> code =
Factory::CodeBuilder(isolate, desc, CodeKind::FOR_TESTING).Build();
#ifdef OBJECT_PRINT
code->Print(std::cout);
#endif
auto f = GeneratedCode<F1>::FromCode(*code);
for (int i = 0; i < kNumCases; ++i) {
int64_t res = reinterpret_cast<int64_t>(f.Call(i, 0, 0, 0, 0));
::printf("f(%d) = %" PRId64 "\n", i, res);
CHECK_EQ(values[i], res);
}
}
TEST(jump_tables5) {
if (kArchVariant != kMips64r6) return;
// Similar to test-assembler-mips jump_tables1, with extra test for emitting a
// compact branch instruction before emission of the dd table.
CcTest::InitializeVM();
Isolate* isolate = CcTest::i_isolate();
HandleScope scope(isolate);
MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes);
MacroAssembler* masm = &assembler;
const int kNumCases = 512;
int values[kNumCases];
isolate->random_number_generator()->NextBytes(values, sizeof(values));
Label labels[kNumCases];
Label done;
__ Push(ra);
// Opposite of Align(8) as we have unaligned number of instructions in the
// following block before the first dd().
if ((masm->pc_offset() & 7) == 0) {
__ nop();
}
{
__ BlockTrampolinePoolFor(kNumCases * 2 + 6 + 1);
__ addiupc(at, 6 + 1);
__ Dlsa(at, at, a0, 3);
__ Ld(at, MemOperand(at));
__ jalr(at);
__ nop(); // Branch delay slot nop.
__ bc(&done);
// A nop instruction must be generated by the forbidden slot guard
// (Assembler::dd(Label*)) so the first label goes to an 8 bytes aligned
// location.
for (int i = 0; i < kNumCases; ++i) {
__ dd(&labels[i]);
}
}
for (int i = 0; i < kNumCases; ++i) {
__ bind(&labels[i]);
__ li(v0, values[i]);
__ jr(ra);
__ nop();
}
__ bind(&done);
__ Pop(ra);
__ jr(ra);
__ nop();
CodeDesc desc;
masm->GetCode(isolate, &desc);
Reland "Reland "[deoptimizer] Change deopt entries into builtins"" This is a reland of fbfa9bf4ec72b1b73a96b70ccb68cd98c321511b The arm64 was missing proper codegen for CFI, thus sizes were off. Original change's description: > Reland "[deoptimizer] Change deopt entries into builtins" > > This is a reland of 7f58ced72eb65b6b5530ccabaf2eaebe45bf9d33 > > It fixes the different exit size emitted on x64/Atom CPUs due to > performance tuning in TurboAssembler::Call. Additionally, add > cctests to verify the fixed size exits. > > Original change's description: > > [deoptimizer] Change deopt entries into builtins > > > > While the overall goal of this commit is to change deoptimization > > entries into builtins, there are multiple related things happening: > > > > - Deoptimization entries, formerly stubs (i.e. Code objects generated > > at runtime, guaranteed to be immovable), have been converted into > > builtins. The major restriction is that we now need to preserve the > > kRootRegister, which was formerly used on most architectures to pass > > the deoptimization id. The solution differs based on platform. > > - Renamed DEOPT_ENTRIES_OR_FOR_TESTING code kind to FOR_TESTING. > > - Removed heap/ support for immovable Code generation. > > - Removed the DeserializerData class (no longer needed). > > - arm64: to preserve 4-byte deopt exits, introduced a new optimization > > in which the final jump to the deoptimization entry is generated > > once per Code object, and deopt exits can continue to emit a > > near-call. > > - arm,ia32,x64: change to fixed-size deopt exits. This reduces exit > > sizes by 4/8, 5, and 5 bytes, respectively. > > > > On arm the deopt exit size is reduced from 12 (or 16) bytes to 8 bytes > > by using the same strategy as on arm64 (recalc deopt id from return > > address). Before: > > > > e300a002 movw r10, <id> > > e59fc024 ldr ip, [pc, <entry offset>] > > e12fff3c blx ip > > > > After: > > > > e59acb35 ldr ip, [r10, <entry offset>] > > e12fff3c blx ip > > > > On arm64 the deopt exit size remains 4 bytes (or 8 bytes in same cases > > with CFI). Additionally, up to 4 builtin jumps are emitted per Code > > object (max 32 bytes added overhead per Code object). Before: > > > > 9401cdae bl <entry offset> > > > > After: > > > > # eager deoptimization entry jump. > > f95b1f50 ldr x16, [x26, <eager entry offset>] > > d61f0200 br x16 > > # lazy deoptimization entry jump. > > f95b2b50 ldr x16, [x26, <lazy entry offset>] > > d61f0200 br x16 > > # the deopt exit. > > 97fffffc bl <eager deoptimization entry jump offset> > > > > On ia32 the deopt exit size is reduced from 10 to 5 bytes. Before: > > > > bb00000000 mov ebx,<id> > > e825f5372b call <entry> > > > > After: > > > > e8ea2256ba call <entry> > > > > On x64 the deopt exit size is reduced from 12 to 7 bytes. Before: > > > > 49c7c511000000 REX.W movq r13,<id> > > e8ea2f0700 call <entry> > > > > After: > > > > 41ff9560360000 call [r13+<entry offset>] > > > > Bug: v8:8661,v8:8768 > > Change-Id: I13e30aedc360474dc818fecc528ce87c3bfeed42 > > Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2465834 > > Commit-Queue: Jakob Gruber <jgruber@chromium.org> > > Reviewed-by: Ross McIlroy <rmcilroy@chromium.org> > > Reviewed-by: Tobias Tebbi <tebbi@chromium.org> > > Reviewed-by: Ulan Degenbaev <ulan@chromium.org> > > Cr-Commit-Position: refs/heads/master@{#70597} > > Tbr: ulan@chromium.org, tebbi@chromium.org, rmcilroy@chromium.org > Bug: v8:8661,v8:8768,chromium:1140165 > Change-Id: Ibcd5c39c58a70bf2b2ac221aa375fc68d495e144 > Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2485506 > Reviewed-by: Jakob Gruber <jgruber@chromium.org> > Reviewed-by: Tobias Tebbi <tebbi@chromium.org> > Commit-Queue: Jakob Gruber <jgruber@chromium.org> > Cr-Commit-Position: refs/heads/master@{#70655} Tbr: ulan@chromium.org, tebbi@chromium.org, rmcilroy@chromium.org Bug: v8:8661 Bug: v8:8768 Bug: chromium:1140165 Change-Id: I471cc94fc085e527dc9bfb5a84b96bd907c2333f Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2488682 Reviewed-by: Jakob Gruber <jgruber@chromium.org> Commit-Queue: Jakob Gruber <jgruber@chromium.org> Cr-Commit-Position: refs/heads/master@{#70672}
2020-10-21 05:12:25 +00:00
Handle<Code> code =
Factory::CodeBuilder(isolate, desc, CodeKind::FOR_TESTING).Build();
#ifdef OBJECT_PRINT
code->Print(std::cout);
#endif
auto f = GeneratedCode<F1>::FromCode(*code);
for (int i = 0; i < kNumCases; ++i) {
int64_t res = reinterpret_cast<int64_t>(f.Call(i, 0, 0, 0, 0));
::printf("f(%d) = %" PRId64 "\n", i, res);
CHECK_EQ(values[i], res);
}
}
TEST(jump_tables6) {
// Similar to test-assembler-mips jump_tables1, with extra test for branch
// trampoline required after emission of the dd table (where trampolines are
// blocked). This test checks if number of really generated instructions is
// greater than number of counted instructions from code, as we are expecting
// generation of trampoline in this case (when number of kFillInstr
// instructions is close to 32K)
CcTest::InitializeVM();
Isolate* isolate = CcTest::i_isolate();
HandleScope scope(isolate);
MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes);
MacroAssembler* masm = &assembler;
const int kSwitchTableCases = 40;
const int kMaxBranchOffset = Assembler::kMaxBranchOffset;
const int kTrampolineSlotsSize = Assembler::kTrampolineSlotsSize;
const int kSwitchTablePrologueSize = MacroAssembler::kSwitchTablePrologueSize;
const int kMaxOffsetForTrampolineStart =
kMaxBranchOffset - 16 * kTrampolineSlotsSize;
const int kFillInstr = (kMaxOffsetForTrampolineStart / kInstrSize) -
(kSwitchTablePrologueSize + 2 * kSwitchTableCases) -
20;
int values[kSwitchTableCases];
isolate->random_number_generator()->NextBytes(values, sizeof(values));
Label labels[kSwitchTableCases];
Label near_start, end, done;
__ Push(ra);
__ mov(v0, zero_reg);
int offs1 = masm->pc_offset();
int gen_insn = 0;
__ Branch(&end);
gen_insn += Assembler::IsCompactBranchSupported() ? 1 : 2;
__ bind(&near_start);
// Generate slightly less than 32K instructions, which will soon require
// trampoline for branch distance fixup.
for (int i = 0; i < kFillInstr; ++i) {
__ addiu(v0, v0, 1);
}
gen_insn += kFillInstr;
__ GenerateSwitchTable(a0, kSwitchTableCases,
[&labels](size_t i) { return labels + i; });
gen_insn += (kSwitchTablePrologueSize + 2 * kSwitchTableCases);
for (int i = 0; i < kSwitchTableCases; ++i) {
__ bind(&labels[i]);
__ li(v0, values[i]);
__ Branch(&done);
}
gen_insn +=
((Assembler::IsCompactBranchSupported() ? 3 : 4) * kSwitchTableCases);
// If offset from here to first branch instr is greater than max allowed
// offset for trampoline ...
CHECK_LT(kMaxOffsetForTrampolineStart, masm->pc_offset() - offs1);
// ... number of generated instructions must be greater then "gen_insn",
// as we are expecting trampoline generation
CHECK_LT(gen_insn, (masm->pc_offset() - offs1) / kInstrSize);
__ bind(&done);
__ Pop(ra);
__ jr(ra);
__ nop();
__ bind(&end);
__ Branch(&near_start);
CodeDesc desc;
masm->GetCode(isolate, &desc);
Reland "Reland "[deoptimizer] Change deopt entries into builtins"" This is a reland of fbfa9bf4ec72b1b73a96b70ccb68cd98c321511b The arm64 was missing proper codegen for CFI, thus sizes were off. Original change's description: > Reland "[deoptimizer] Change deopt entries into builtins" > > This is a reland of 7f58ced72eb65b6b5530ccabaf2eaebe45bf9d33 > > It fixes the different exit size emitted on x64/Atom CPUs due to > performance tuning in TurboAssembler::Call. Additionally, add > cctests to verify the fixed size exits. > > Original change's description: > > [deoptimizer] Change deopt entries into builtins > > > > While the overall goal of this commit is to change deoptimization > > entries into builtins, there are multiple related things happening: > > > > - Deoptimization entries, formerly stubs (i.e. Code objects generated > > at runtime, guaranteed to be immovable), have been converted into > > builtins. The major restriction is that we now need to preserve the > > kRootRegister, which was formerly used on most architectures to pass > > the deoptimization id. The solution differs based on platform. > > - Renamed DEOPT_ENTRIES_OR_FOR_TESTING code kind to FOR_TESTING. > > - Removed heap/ support for immovable Code generation. > > - Removed the DeserializerData class (no longer needed). > > - arm64: to preserve 4-byte deopt exits, introduced a new optimization > > in which the final jump to the deoptimization entry is generated > > once per Code object, and deopt exits can continue to emit a > > near-call. > > - arm,ia32,x64: change to fixed-size deopt exits. This reduces exit > > sizes by 4/8, 5, and 5 bytes, respectively. > > > > On arm the deopt exit size is reduced from 12 (or 16) bytes to 8 bytes > > by using the same strategy as on arm64 (recalc deopt id from return > > address). Before: > > > > e300a002 movw r10, <id> > > e59fc024 ldr ip, [pc, <entry offset>] > > e12fff3c blx ip > > > > After: > > > > e59acb35 ldr ip, [r10, <entry offset>] > > e12fff3c blx ip > > > > On arm64 the deopt exit size remains 4 bytes (or 8 bytes in same cases > > with CFI). Additionally, up to 4 builtin jumps are emitted per Code > > object (max 32 bytes added overhead per Code object). Before: > > > > 9401cdae bl <entry offset> > > > > After: > > > > # eager deoptimization entry jump. > > f95b1f50 ldr x16, [x26, <eager entry offset>] > > d61f0200 br x16 > > # lazy deoptimization entry jump. > > f95b2b50 ldr x16, [x26, <lazy entry offset>] > > d61f0200 br x16 > > # the deopt exit. > > 97fffffc bl <eager deoptimization entry jump offset> > > > > On ia32 the deopt exit size is reduced from 10 to 5 bytes. Before: > > > > bb00000000 mov ebx,<id> > > e825f5372b call <entry> > > > > After: > > > > e8ea2256ba call <entry> > > > > On x64 the deopt exit size is reduced from 12 to 7 bytes. Before: > > > > 49c7c511000000 REX.W movq r13,<id> > > e8ea2f0700 call <entry> > > > > After: > > > > 41ff9560360000 call [r13+<entry offset>] > > > > Bug: v8:8661,v8:8768 > > Change-Id: I13e30aedc360474dc818fecc528ce87c3bfeed42 > > Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2465834 > > Commit-Queue: Jakob Gruber <jgruber@chromium.org> > > Reviewed-by: Ross McIlroy <rmcilroy@chromium.org> > > Reviewed-by: Tobias Tebbi <tebbi@chromium.org> > > Reviewed-by: Ulan Degenbaev <ulan@chromium.org> > > Cr-Commit-Position: refs/heads/master@{#70597} > > Tbr: ulan@chromium.org, tebbi@chromium.org, rmcilroy@chromium.org > Bug: v8:8661,v8:8768,chromium:1140165 > Change-Id: Ibcd5c39c58a70bf2b2ac221aa375fc68d495e144 > Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2485506 > Reviewed-by: Jakob Gruber <jgruber@chromium.org> > Reviewed-by: Tobias Tebbi <tebbi@chromium.org> > Commit-Queue: Jakob Gruber <jgruber@chromium.org> > Cr-Commit-Position: refs/heads/master@{#70655} Tbr: ulan@chromium.org, tebbi@chromium.org, rmcilroy@chromium.org Bug: v8:8661 Bug: v8:8768 Bug: chromium:1140165 Change-Id: I471cc94fc085e527dc9bfb5a84b96bd907c2333f Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2488682 Reviewed-by: Jakob Gruber <jgruber@chromium.org> Commit-Queue: Jakob Gruber <jgruber@chromium.org> Cr-Commit-Position: refs/heads/master@{#70672}
2020-10-21 05:12:25 +00:00
Handle<Code> code =
Factory::CodeBuilder(isolate, desc, CodeKind::FOR_TESTING).Build();
#ifdef OBJECT_PRINT
code->Print(std::cout);
#endif
auto f = GeneratedCode<F1>::FromCode(*code);
for (int i = 0; i < kSwitchTableCases; ++i) {
int64_t res = reinterpret_cast<int64_t>(f.Call(i, 0, 0, 0, 0));
::printf("f(%d) = %" PRId64 "\n", i, res);
CHECK_EQ(values[i], res);
}
}
static uint64_t run_lsa(uint32_t rt, uint32_t rs, int8_t sa) {
Isolate* isolate = CcTest::i_isolate();
HandleScope scope(isolate);
MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes);
MacroAssembler* masm = &assembler;
__ Lsa(v0, a0, a1, sa);
__ jr(ra);
__ nop();
CodeDesc desc;
assembler.GetCode(isolate, &desc);
Reland "Reland "[deoptimizer] Change deopt entries into builtins"" This is a reland of fbfa9bf4ec72b1b73a96b70ccb68cd98c321511b The arm64 was missing proper codegen for CFI, thus sizes were off. Original change's description: > Reland "[deoptimizer] Change deopt entries into builtins" > > This is a reland of 7f58ced72eb65b6b5530ccabaf2eaebe45bf9d33 > > It fixes the different exit size emitted on x64/Atom CPUs due to > performance tuning in TurboAssembler::Call. Additionally, add > cctests to verify the fixed size exits. > > Original change's description: > > [deoptimizer] Change deopt entries into builtins > > > > While the overall goal of this commit is to change deoptimization > > entries into builtins, there are multiple related things happening: > > > > - Deoptimization entries, formerly stubs (i.e. Code objects generated > > at runtime, guaranteed to be immovable), have been converted into > > builtins. The major restriction is that we now need to preserve the > > kRootRegister, which was formerly used on most architectures to pass > > the deoptimization id. The solution differs based on platform. > > - Renamed DEOPT_ENTRIES_OR_FOR_TESTING code kind to FOR_TESTING. > > - Removed heap/ support for immovable Code generation. > > - Removed the DeserializerData class (no longer needed). > > - arm64: to preserve 4-byte deopt exits, introduced a new optimization > > in which the final jump to the deoptimization entry is generated > > once per Code object, and deopt exits can continue to emit a > > near-call. > > - arm,ia32,x64: change to fixed-size deopt exits. This reduces exit > > sizes by 4/8, 5, and 5 bytes, respectively. > > > > On arm the deopt exit size is reduced from 12 (or 16) bytes to 8 bytes > > by using the same strategy as on arm64 (recalc deopt id from return > > address). Before: > > > > e300a002 movw r10, <id> > > e59fc024 ldr ip, [pc, <entry offset>] > > e12fff3c blx ip > > > > After: > > > > e59acb35 ldr ip, [r10, <entry offset>] > > e12fff3c blx ip > > > > On arm64 the deopt exit size remains 4 bytes (or 8 bytes in same cases > > with CFI). Additionally, up to 4 builtin jumps are emitted per Code > > object (max 32 bytes added overhead per Code object). Before: > > > > 9401cdae bl <entry offset> > > > > After: > > > > # eager deoptimization entry jump. > > f95b1f50 ldr x16, [x26, <eager entry offset>] > > d61f0200 br x16 > > # lazy deoptimization entry jump. > > f95b2b50 ldr x16, [x26, <lazy entry offset>] > > d61f0200 br x16 > > # the deopt exit. > > 97fffffc bl <eager deoptimization entry jump offset> > > > > On ia32 the deopt exit size is reduced from 10 to 5 bytes. Before: > > > > bb00000000 mov ebx,<id> > > e825f5372b call <entry> > > > > After: > > > > e8ea2256ba call <entry> > > > > On x64 the deopt exit size is reduced from 12 to 7 bytes. Before: > > > > 49c7c511000000 REX.W movq r13,<id> > > e8ea2f0700 call <entry> > > > > After: > > > > 41ff9560360000 call [r13+<entry offset>] > > > > Bug: v8:8661,v8:8768 > > Change-Id: I13e30aedc360474dc818fecc528ce87c3bfeed42 > > Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2465834 > > Commit-Queue: Jakob Gruber <jgruber@chromium.org> > > Reviewed-by: Ross McIlroy <rmcilroy@chromium.org> > > Reviewed-by: Tobias Tebbi <tebbi@chromium.org> > > Reviewed-by: Ulan Degenbaev <ulan@chromium.org> > > Cr-Commit-Position: refs/heads/master@{#70597} > > Tbr: ulan@chromium.org, tebbi@chromium.org, rmcilroy@chromium.org > Bug: v8:8661,v8:8768,chromium:1140165 > Change-Id: Ibcd5c39c58a70bf2b2ac221aa375fc68d495e144 > Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2485506 > Reviewed-by: Jakob Gruber <jgruber@chromium.org> > Reviewed-by: Tobias Tebbi <tebbi@chromium.org> > Commit-Queue: Jakob Gruber <jgruber@chromium.org> > Cr-Commit-Position: refs/heads/master@{#70655} Tbr: ulan@chromium.org, tebbi@chromium.org, rmcilroy@chromium.org Bug: v8:8661 Bug: v8:8768 Bug: chromium:1140165 Change-Id: I471cc94fc085e527dc9bfb5a84b96bd907c2333f Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2488682 Reviewed-by: Jakob Gruber <jgruber@chromium.org> Commit-Queue: Jakob Gruber <jgruber@chromium.org> Cr-Commit-Position: refs/heads/master@{#70672}
2020-10-21 05:12:25 +00:00
Handle<Code> code =
Factory::CodeBuilder(isolate, desc, CodeKind::FOR_TESTING).Build();
auto f = GeneratedCode<F1>::FromCode(*code);
uint64_t res = reinterpret_cast<uint64_t>(f.Call(rt, rs, 0, 0, 0));
return res;
}
TEST(Lsa) {
CcTest::InitializeVM();
struct TestCaseLsa {
int32_t rt;
int32_t rs;
uint8_t sa;
uint64_t expected_res;
};
struct TestCaseLsa tc[] = {// rt, rs, sa, expected_res
{0x4, 0x1, 1, 0x6},
{0x4, 0x1, 2, 0x8},
{0x4, 0x1, 3, 0xC},
{0x4, 0x1, 4, 0x14},
{0x4, 0x1, 5, 0x24},
{0x0, 0x1, 1, 0x2},
{0x0, 0x1, 2, 0x4},
{0x0, 0x1, 3, 0x8},
{0x0, 0x1, 4, 0x10},
{0x0, 0x1, 5, 0x20},
{0x4, 0x0, 1, 0x4},
{0x4, 0x0, 2, 0x4},
{0x4, 0x0, 3, 0x4},
{0x4, 0x0, 4, 0x4},
{0x4, 0x0, 5, 0x4},
// Shift overflow.
{0x4, INT32_MAX, 1, 0x2},
{0x4, INT32_MAX >> 1, 2, 0x0},
{0x4, INT32_MAX >> 2, 3, 0xFFFFFFFFFFFFFFFC},
{0x4, INT32_MAX >> 3, 4, 0xFFFFFFFFFFFFFFF4},
{0x4, INT32_MAX >> 4, 5, 0xFFFFFFFFFFFFFFE4},
// Signed addition overflow.
{INT32_MAX - 1, 0x1, 1, 0xFFFFFFFF80000000},
{INT32_MAX - 3, 0x1, 2, 0xFFFFFFFF80000000},
{INT32_MAX - 7, 0x1, 3, 0xFFFFFFFF80000000},
{INT32_MAX - 15, 0x1, 4, 0xFFFFFFFF80000000},
{INT32_MAX - 31, 0x1, 5, 0xFFFFFFFF80000000},
// Addition overflow.
{-2, 0x1, 1, 0x0},
{-4, 0x1, 2, 0x0},
{-8, 0x1, 3, 0x0},
{-16, 0x1, 4, 0x0},
{-32, 0x1, 5, 0x0}};
size_t nr_test_cases = sizeof(tc) / sizeof(TestCaseLsa);
for (size_t i = 0; i < nr_test_cases; ++i) {
uint64_t res = run_lsa(tc[i].rt, tc[i].rs, tc[i].sa);
PrintF("0x%" PRIx64 " =? 0x%" PRIx64 " == Lsa(v0, %x, %x, %hhu)\n",
tc[i].expected_res, res, tc[i].rt, tc[i].rs, tc[i].sa);
CHECK_EQ(tc[i].expected_res, res);
}
}
static uint64_t run_dlsa(uint64_t rt, uint64_t rs, int8_t sa) {
Isolate* isolate = CcTest::i_isolate();
HandleScope scope(isolate);
MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes);
MacroAssembler* masm = &assembler;
__ Dlsa(v0, a0, a1, sa);
__ jr(ra);
__ nop();
CodeDesc desc;
assembler.GetCode(isolate, &desc);
Reland "Reland "[deoptimizer] Change deopt entries into builtins"" This is a reland of fbfa9bf4ec72b1b73a96b70ccb68cd98c321511b The arm64 was missing proper codegen for CFI, thus sizes were off. Original change's description: > Reland "[deoptimizer] Change deopt entries into builtins" > > This is a reland of 7f58ced72eb65b6b5530ccabaf2eaebe45bf9d33 > > It fixes the different exit size emitted on x64/Atom CPUs due to > performance tuning in TurboAssembler::Call. Additionally, add > cctests to verify the fixed size exits. > > Original change's description: > > [deoptimizer] Change deopt entries into builtins > > > > While the overall goal of this commit is to change deoptimization > > entries into builtins, there are multiple related things happening: > > > > - Deoptimization entries, formerly stubs (i.e. Code objects generated > > at runtime, guaranteed to be immovable), have been converted into > > builtins. The major restriction is that we now need to preserve the > > kRootRegister, which was formerly used on most architectures to pass > > the deoptimization id. The solution differs based on platform. > > - Renamed DEOPT_ENTRIES_OR_FOR_TESTING code kind to FOR_TESTING. > > - Removed heap/ support for immovable Code generation. > > - Removed the DeserializerData class (no longer needed). > > - arm64: to preserve 4-byte deopt exits, introduced a new optimization > > in which the final jump to the deoptimization entry is generated > > once per Code object, and deopt exits can continue to emit a > > near-call. > > - arm,ia32,x64: change to fixed-size deopt exits. This reduces exit > > sizes by 4/8, 5, and 5 bytes, respectively. > > > > On arm the deopt exit size is reduced from 12 (or 16) bytes to 8 bytes > > by using the same strategy as on arm64 (recalc deopt id from return > > address). Before: > > > > e300a002 movw r10, <id> > > e59fc024 ldr ip, [pc, <entry offset>] > > e12fff3c blx ip > > > > After: > > > > e59acb35 ldr ip, [r10, <entry offset>] > > e12fff3c blx ip > > > > On arm64 the deopt exit size remains 4 bytes (or 8 bytes in same cases > > with CFI). Additionally, up to 4 builtin jumps are emitted per Code > > object (max 32 bytes added overhead per Code object). Before: > > > > 9401cdae bl <entry offset> > > > > After: > > > > # eager deoptimization entry jump. > > f95b1f50 ldr x16, [x26, <eager entry offset>] > > d61f0200 br x16 > > # lazy deoptimization entry jump. > > f95b2b50 ldr x16, [x26, <lazy entry offset>] > > d61f0200 br x16 > > # the deopt exit. > > 97fffffc bl <eager deoptimization entry jump offset> > > > > On ia32 the deopt exit size is reduced from 10 to 5 bytes. Before: > > > > bb00000000 mov ebx,<id> > > e825f5372b call <entry> > > > > After: > > > > e8ea2256ba call <entry> > > > > On x64 the deopt exit size is reduced from 12 to 7 bytes. Before: > > > > 49c7c511000000 REX.W movq r13,<id> > > e8ea2f0700 call <entry> > > > > After: > > > > 41ff9560360000 call [r13+<entry offset>] > > > > Bug: v8:8661,v8:8768 > > Change-Id: I13e30aedc360474dc818fecc528ce87c3bfeed42 > > Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2465834 > > Commit-Queue: Jakob Gruber <jgruber@chromium.org> > > Reviewed-by: Ross McIlroy <rmcilroy@chromium.org> > > Reviewed-by: Tobias Tebbi <tebbi@chromium.org> > > Reviewed-by: Ulan Degenbaev <ulan@chromium.org> > > Cr-Commit-Position: refs/heads/master@{#70597} > > Tbr: ulan@chromium.org, tebbi@chromium.org, rmcilroy@chromium.org > Bug: v8:8661,v8:8768,chromium:1140165 > Change-Id: Ibcd5c39c58a70bf2b2ac221aa375fc68d495e144 > Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2485506 > Reviewed-by: Jakob Gruber <jgruber@chromium.org> > Reviewed-by: Tobias Tebbi <tebbi@chromium.org> > Commit-Queue: Jakob Gruber <jgruber@chromium.org> > Cr-Commit-Position: refs/heads/master@{#70655} Tbr: ulan@chromium.org, tebbi@chromium.org, rmcilroy@chromium.org Bug: v8:8661 Bug: v8:8768 Bug: chromium:1140165 Change-Id: I471cc94fc085e527dc9bfb5a84b96bd907c2333f Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2488682 Reviewed-by: Jakob Gruber <jgruber@chromium.org> Commit-Queue: Jakob Gruber <jgruber@chromium.org> Cr-Commit-Position: refs/heads/master@{#70672}
2020-10-21 05:12:25 +00:00
Handle<Code> code =
Factory::CodeBuilder(isolate, desc, CodeKind::FOR_TESTING).Build();
auto f = GeneratedCode<FV>::FromCode(*code);
uint64_t res = reinterpret_cast<uint64_t>(f.Call(rt, rs, 0, 0, 0));
return res;
}
TEST(Dlsa) {
CcTest::InitializeVM();
struct TestCaseLsa {
int64_t rt;
int64_t rs;
uint8_t sa;
uint64_t expected_res;
};
struct TestCaseLsa tc[] = {// rt, rs, sa, expected_res
{0x4, 0x1, 1, 0x6},
{0x4, 0x1, 2, 0x8},
{0x4, 0x1, 3, 0xC},
{0x4, 0x1, 4, 0x14},
{0x4, 0x1, 5, 0x24},
{0x0, 0x1, 1, 0x2},
{0x0, 0x1, 2, 0x4},
{0x0, 0x1, 3, 0x8},
{0x0, 0x1, 4, 0x10},
{0x0, 0x1, 5, 0x20},
{0x4, 0x0, 1, 0x4},
{0x4, 0x0, 2, 0x4},
{0x4, 0x0, 3, 0x4},
{0x4, 0x0, 4, 0x4},
{0x4, 0x0, 5, 0x4},
// Shift overflow.
{0x4, INT64_MAX, 1, 0x2},
{0x4, INT64_MAX >> 1, 2, 0x0},
{0x4, INT64_MAX >> 2, 3, 0xFFFFFFFFFFFFFFFC},
{0x4, INT64_MAX >> 3, 4, 0xFFFFFFFFFFFFFFF4},
{0x4, INT64_MAX >> 4, 5, 0xFFFFFFFFFFFFFFE4},
// Signed addition overflow.
{INT64_MAX - 1, 0x1, 1, 0x8000000000000000},
{INT64_MAX - 3, 0x1, 2, 0x8000000000000000},
{INT64_MAX - 7, 0x1, 3, 0x8000000000000000},
{INT64_MAX - 15, 0x1, 4, 0x8000000000000000},
{INT64_MAX - 31, 0x1, 5, 0x8000000000000000},
// Addition overflow.
{-2, 0x1, 1, 0x0},
{-4, 0x1, 2, 0x0},
{-8, 0x1, 3, 0x0},
{-16, 0x1, 4, 0x0},
{-32, 0x1, 5, 0x0}};
size_t nr_test_cases = sizeof(tc) / sizeof(TestCaseLsa);
for (size_t i = 0; i < nr_test_cases; ++i) {
uint64_t res = run_dlsa(tc[i].rt, tc[i].rs, tc[i].sa);
PrintF("0x%" PRIx64 " =? 0x%" PRIx64 " == Dlsa(v0, %" PRIx64 ", %" PRIx64
", %hhu)\n",
tc[i].expected_res, res, tc[i].rt, tc[i].rs, tc[i].sa);
CHECK_EQ(tc[i].expected_res, res);
}
}
static const std::vector<uint32_t> cvt_trunc_uint32_test_values() {
static const uint32_t kValues[] = {0x00000000, 0x00000001, 0x00FFFF00,
0x7FFFFFFF, 0x80000000, 0x80000001,
0x80FFFF00, 0x8FFFFFFF, 0xFFFFFFFF};
return std::vector<uint32_t>(&kValues[0], &kValues[arraysize(kValues)]);
}
static const std::vector<int32_t> cvt_trunc_int32_test_values() {
static const int32_t kValues[] = {
static_cast<int32_t>(0x00000000), static_cast<int32_t>(0x00000001),
static_cast<int32_t>(0x00FFFF00), static_cast<int32_t>(0x7FFFFFFF),
static_cast<int32_t>(0x80000000), static_cast<int32_t>(0x80000001),
static_cast<int32_t>(0x80FFFF00), static_cast<int32_t>(0x8FFFFFFF),
static_cast<int32_t>(0xFFFFFFFF)};
return std::vector<int32_t>(&kValues[0], &kValues[arraysize(kValues)]);
}
static const std::vector<uint64_t> cvt_trunc_uint64_test_values() {
static const uint64_t kValues[] = {
0x0000000000000000, 0x0000000000000001, 0x0000FFFFFFFF0000,
0x7FFFFFFFFFFFFFFF, 0x8000000000000000, 0x8000000000000001,
0x8000FFFFFFFF0000, 0x8FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF};
return std::vector<uint64_t>(&kValues[0], &kValues[arraysize(kValues)]);
}
static const std::vector<int64_t> cvt_trunc_int64_test_values() {
static const int64_t kValues[] = {static_cast<int64_t>(0x0000000000000000),
static_cast<int64_t>(0x0000000000000001),
static_cast<int64_t>(0x0000FFFFFFFF0000),
static_cast<int64_t>(0x7FFFFFFFFFFFFFFF),
static_cast<int64_t>(0x8000000000000000),
static_cast<int64_t>(0x8000000000000001),
static_cast<int64_t>(0x8000FFFFFFFF0000),
static_cast<int64_t>(0x8FFFFFFFFFFFFFFF),
static_cast<int64_t>(0xFFFFFFFFFFFFFFFF)};
return std::vector<int64_t>(&kValues[0], &kValues[arraysize(kValues)]);
}
// Helper macros that can be used in FOR_INT32_INPUTS(i) { ... *i ... }
#define FOR_INPUTS(ctype, itype, var, test_vector) \
std::vector<ctype> var##_vec = test_vector(); \
for (std::vector<ctype>::iterator var = var##_vec.begin(); \
var != var##_vec.end(); ++var)
#define FOR_INPUTS2(ctype, itype, var, var2, test_vector) \
std::vector<ctype> var##_vec = test_vector(); \
std::vector<ctype>::iterator var; \
std::vector<ctype>::reverse_iterator var2; \
for (var = var##_vec.begin(), var2 = var##_vec.rbegin(); \
var != var##_vec.end(); ++var, ++var2)
#define FOR_ENUM_INPUTS(var, type, test_vector) \
FOR_INPUTS(enum type, type, var, test_vector)
#define FOR_STRUCT_INPUTS(var, type, test_vector) \
FOR_INPUTS(struct type, type, var, test_vector)
#define FOR_INT32_INPUTS(var, test_vector) \
FOR_INPUTS(int32_t, int32, var, test_vector)
#define FOR_INT32_INPUTS2(var, var2, test_vector) \
FOR_INPUTS2(int32_t, int32, var, var2, test_vector)
#define FOR_INT64_INPUTS(var, test_vector) \
FOR_INPUTS(int64_t, int64, var, test_vector)
#define FOR_UINT32_INPUTS(var, test_vector) \
FOR_INPUTS(uint32_t, uint32, var, test_vector)
#define FOR_UINT64_INPUTS(var, test_vector) \
FOR_INPUTS(uint64_t, uint64, var, test_vector)
template <typename RET_TYPE, typename IN_TYPE, typename Func>
RET_TYPE run_Cvt(IN_TYPE x, Func GenerateConvertInstructionFunc) {
using F_CVT = RET_TYPE(IN_TYPE x0, int x1, int x2, int x3, int x4);
Isolate* isolate = CcTest::i_isolate();
HandleScope scope(isolate);
MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
MacroAssembler* masm = &assm;
GenerateConvertInstructionFunc(masm);
__ dmfc1(v0, f2);
__ jr(ra);
__ nop();
CodeDesc desc;
assm.GetCode(isolate, &desc);
Reland "Reland "[deoptimizer] Change deopt entries into builtins"" This is a reland of fbfa9bf4ec72b1b73a96b70ccb68cd98c321511b The arm64 was missing proper codegen for CFI, thus sizes were off. Original change's description: > Reland "[deoptimizer] Change deopt entries into builtins" > > This is a reland of 7f58ced72eb65b6b5530ccabaf2eaebe45bf9d33 > > It fixes the different exit size emitted on x64/Atom CPUs due to > performance tuning in TurboAssembler::Call. Additionally, add > cctests to verify the fixed size exits. > > Original change's description: > > [deoptimizer] Change deopt entries into builtins > > > > While the overall goal of this commit is to change deoptimization > > entries into builtins, there are multiple related things happening: > > > > - Deoptimization entries, formerly stubs (i.e. Code objects generated > > at runtime, guaranteed to be immovable), have been converted into > > builtins. The major restriction is that we now need to preserve the > > kRootRegister, which was formerly used on most architectures to pass > > the deoptimization id. The solution differs based on platform. > > - Renamed DEOPT_ENTRIES_OR_FOR_TESTING code kind to FOR_TESTING. > > - Removed heap/ support for immovable Code generation. > > - Removed the DeserializerData class (no longer needed). > > - arm64: to preserve 4-byte deopt exits, introduced a new optimization > > in which the final jump to the deoptimization entry is generated > > once per Code object, and deopt exits can continue to emit a > > near-call. > > - arm,ia32,x64: change to fixed-size deopt exits. This reduces exit > > sizes by 4/8, 5, and 5 bytes, respectively. > > > > On arm the deopt exit size is reduced from 12 (or 16) bytes to 8 bytes > > by using the same strategy as on arm64 (recalc deopt id from return > > address). Before: > > > > e300a002 movw r10, <id> > > e59fc024 ldr ip, [pc, <entry offset>] > > e12fff3c blx ip > > > > After: > > > > e59acb35 ldr ip, [r10, <entry offset>] > > e12fff3c blx ip > > > > On arm64 the deopt exit size remains 4 bytes (or 8 bytes in same cases > > with CFI). Additionally, up to 4 builtin jumps are emitted per Code > > object (max 32 bytes added overhead per Code object). Before: > > > > 9401cdae bl <entry offset> > > > > After: > > > > # eager deoptimization entry jump. > > f95b1f50 ldr x16, [x26, <eager entry offset>] > > d61f0200 br x16 > > # lazy deoptimization entry jump. > > f95b2b50 ldr x16, [x26, <lazy entry offset>] > > d61f0200 br x16 > > # the deopt exit. > > 97fffffc bl <eager deoptimization entry jump offset> > > > > On ia32 the deopt exit size is reduced from 10 to 5 bytes. Before: > > > > bb00000000 mov ebx,<id> > > e825f5372b call <entry> > > > > After: > > > > e8ea2256ba call <entry> > > > > On x64 the deopt exit size is reduced from 12 to 7 bytes. Before: > > > > 49c7c511000000 REX.W movq r13,<id> > > e8ea2f0700 call <entry> > > > > After: > > > > 41ff9560360000 call [r13+<entry offset>] > > > > Bug: v8:8661,v8:8768 > > Change-Id: I13e30aedc360474dc818fecc528ce87c3bfeed42 > > Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2465834 > > Commit-Queue: Jakob Gruber <jgruber@chromium.org> > > Reviewed-by: Ross McIlroy <rmcilroy@chromium.org> > > Reviewed-by: Tobias Tebbi <tebbi@chromium.org> > > Reviewed-by: Ulan Degenbaev <ulan@chromium.org> > > Cr-Commit-Position: refs/heads/master@{#70597} > > Tbr: ulan@chromium.org, tebbi@chromium.org, rmcilroy@chromium.org > Bug: v8:8661,v8:8768,chromium:1140165 > Change-Id: Ibcd5c39c58a70bf2b2ac221aa375fc68d495e144 > Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2485506 > Reviewed-by: Jakob Gruber <jgruber@chromium.org> > Reviewed-by: Tobias Tebbi <tebbi@chromium.org> > Commit-Queue: Jakob Gruber <jgruber@chromium.org> > Cr-Commit-Position: refs/heads/master@{#70655} Tbr: ulan@chromium.org, tebbi@chromium.org, rmcilroy@chromium.org Bug: v8:8661 Bug: v8:8768 Bug: chromium:1140165 Change-Id: I471cc94fc085e527dc9bfb5a84b96bd907c2333f Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2488682 Reviewed-by: Jakob Gruber <jgruber@chromium.org> Commit-Queue: Jakob Gruber <jgruber@chromium.org> Cr-Commit-Position: refs/heads/master@{#70672}
2020-10-21 05:12:25 +00:00
Handle<Code> code =
Factory::CodeBuilder(isolate, desc, CodeKind::FOR_TESTING).Build();
auto f = GeneratedCode<F_CVT>::FromCode(*code);
return reinterpret_cast<RET_TYPE>(f.Call(x, 0, 0, 0, 0));
}
TEST(Cvt_s_uw_Trunc_uw_s) {
CcTest::InitializeVM();
FOR_UINT32_INPUTS(i, cvt_trunc_uint32_test_values) {
uint32_t input = *i;
auto fn = [](MacroAssembler* masm) {
__ Cvt_s_uw(f0, a0);
__ mthc1(zero_reg, f2);
__ Trunc_uw_s(f2, f0, f1);
};
CHECK_EQ(static_cast<float>(input), run_Cvt<uint64_t>(input, fn));
}
}
TEST(Cvt_s_ul_Trunc_ul_s) {
CcTest::InitializeVM();
FOR_UINT64_INPUTS(i, cvt_trunc_uint64_test_values) {
uint64_t input = *i;
auto fn = [](MacroAssembler* masm) {
__ Cvt_s_ul(f0, a0);
__ Trunc_ul_s(f2, f0, f1, v0);
};
CHECK_EQ(static_cast<float>(input), run_Cvt<uint64_t>(input, fn));
}
}
TEST(Cvt_d_ul_Trunc_ul_d) {
CcTest::InitializeVM();
FOR_UINT64_INPUTS(i, cvt_trunc_uint64_test_values) {
uint64_t input = *i;
auto fn = [](MacroAssembler* masm) {
__ Cvt_d_ul(f0, a0);
__ Trunc_ul_d(f2, f0, f1, v0);
};
CHECK_EQ(static_cast<double>(input), run_Cvt<uint64_t>(input, fn));
}
}
TEST(cvt_d_l_Trunc_l_d) {
CcTest::InitializeVM();
FOR_INT64_INPUTS(i, cvt_trunc_int64_test_values) {
int64_t input = *i;
auto fn = [](MacroAssembler* masm) {
__ dmtc1(a0, f4);
__ cvt_d_l(f0, f4);
__ Trunc_l_d(f2, f0);
};
CHECK_EQ(static_cast<double>(input), run_Cvt<int64_t>(input, fn));
}
}
TEST(cvt_d_l_Trunc_l_ud) {
CcTest::InitializeVM();
FOR_INT64_INPUTS(i, cvt_trunc_int64_test_values) {
int64_t input = *i;
uint64_t abs_input = (input < 0) ? -input : input;
auto fn = [](MacroAssembler* masm) {
__ dmtc1(a0, f4);
__ cvt_d_l(f0, f4);
__ Trunc_l_ud(f2, f0, f6);
};
CHECK_EQ(static_cast<double>(abs_input), run_Cvt<uint64_t>(input, fn));
}
}
TEST(cvt_d_w_Trunc_w_d) {
CcTest::InitializeVM();
FOR_INT32_INPUTS(i, cvt_trunc_int32_test_values) {
int32_t input = *i;
auto fn = [](MacroAssembler* masm) {
__ mtc1(a0, f4);
__ cvt_d_w(f0, f4);
__ Trunc_w_d(f2, f0);
__ mfc1(v1, f2);
__ dmtc1(v1, f2);
};
CHECK_EQ(static_cast<double>(input), run_Cvt<int64_t>(input, fn));
}
}
static const std::vector<int64_t> overflow_int64_test_values() {
static const int64_t kValues[] = {static_cast<int64_t>(0xF000000000000000),
static_cast<int64_t>(0x0000000000000001),
static_cast<int64_t>(0xFF00000000000000),
static_cast<int64_t>(0x0000F00111111110),
static_cast<int64_t>(0x0F00001000000000),
static_cast<int64_t>(0x991234AB12A96731),
static_cast<int64_t>(0xB0FFFF0F0F0F0F01),
static_cast<int64_t>(0x00006FFFFFFFFFFF),
static_cast<int64_t>(0xFFFFFFFFFFFFFFFF)};
return std::vector<int64_t>(&kValues[0], &kValues[arraysize(kValues)]);
}
TEST(OverflowInstructions) {
CcTest::InitializeVM();
Isolate* isolate = CcTest::i_isolate();
HandleScope handles(isolate);
struct T {
int64_t lhs;
int64_t rhs;
int64_t output_add;
int64_t output_add2;
int64_t output_sub;
int64_t output_sub2;
int64_t output_mul;
int64_t output_mul2;
int64_t overflow_add;
int64_t overflow_add2;
int64_t overflow_sub;
int64_t overflow_sub2;
int64_t overflow_mul;
int64_t overflow_mul2;
};
T t;
FOR_INT64_INPUTS(i, overflow_int64_test_values) {
FOR_INT64_INPUTS(j, overflow_int64_test_values) {
int64_t ii = *i;
int64_t jj = *j;
int64_t expected_add, expected_sub;
int32_t ii32 = static_cast<int32_t>(ii);
int32_t jj32 = static_cast<int32_t>(jj);
int32_t expected_mul;
int64_t expected_add_ovf, expected_sub_ovf, expected_mul_ovf;
MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes);
MacroAssembler* masm = &assembler;
__ ld(t0, MemOperand(a0, offsetof(T, lhs)));
__ ld(t1, MemOperand(a0, offsetof(T, rhs)));
__ DaddOverflow(t2, t0, Operand(t1), t3);
__ sd(t2, MemOperand(a0, offsetof(T, output_add)));
__ sd(t3, MemOperand(a0, offsetof(T, overflow_add)));
__ mov(t3, zero_reg);
__ DaddOverflow(t0, t0, Operand(t1), t3);
__ sd(t0, MemOperand(a0, offsetof(T, output_add2)));
__ sd(t3, MemOperand(a0, offsetof(T, overflow_add2)));
__ ld(t0, MemOperand(a0, offsetof(T, lhs)));
__ ld(t1, MemOperand(a0, offsetof(T, rhs)));
__ DsubOverflow(t2, t0, Operand(t1), t3);
__ sd(t2, MemOperand(a0, offsetof(T, output_sub)));
__ sd(t3, MemOperand(a0, offsetof(T, overflow_sub)));
__ mov(t3, zero_reg);
__ DsubOverflow(t0, t0, Operand(t1), t3);
__ sd(t0, MemOperand(a0, offsetof(T, output_sub2)));
__ sd(t3, MemOperand(a0, offsetof(T, overflow_sub2)));
__ ld(t0, MemOperand(a0, offsetof(T, lhs)));
__ ld(t1, MemOperand(a0, offsetof(T, rhs)));
__ sll(t0, t0, 0);
__ sll(t1, t1, 0);
__ MulOverflow(t2, t0, Operand(t1), t3);
__ sd(t2, MemOperand(a0, offsetof(T, output_mul)));
__ sd(t3, MemOperand(a0, offsetof(T, overflow_mul)));
__ mov(t3, zero_reg);
__ MulOverflow(t0, t0, Operand(t1), t3);
__ sd(t0, MemOperand(a0, offsetof(T, output_mul2)));
__ sd(t3, MemOperand(a0, offsetof(T, overflow_mul2)));
__ jr(ra);
__ nop();
CodeDesc desc;
masm->GetCode(isolate, &desc);
Handle<Code> code =
Reland "Reland "[deoptimizer] Change deopt entries into builtins"" This is a reland of fbfa9bf4ec72b1b73a96b70ccb68cd98c321511b The arm64 was missing proper codegen for CFI, thus sizes were off. Original change's description: > Reland "[deoptimizer] Change deopt entries into builtins" > > This is a reland of 7f58ced72eb65b6b5530ccabaf2eaebe45bf9d33 > > It fixes the different exit size emitted on x64/Atom CPUs due to > performance tuning in TurboAssembler::Call. Additionally, add > cctests to verify the fixed size exits. > > Original change's description: > > [deoptimizer] Change deopt entries into builtins > > > > While the overall goal of this commit is to change deoptimization > > entries into builtins, there are multiple related things happening: > > > > - Deoptimization entries, formerly stubs (i.e. Code objects generated > > at runtime, guaranteed to be immovable), have been converted into > > builtins. The major restriction is that we now need to preserve the > > kRootRegister, which was formerly used on most architectures to pass > > the deoptimization id. The solution differs based on platform. > > - Renamed DEOPT_ENTRIES_OR_FOR_TESTING code kind to FOR_TESTING. > > - Removed heap/ support for immovable Code generation. > > - Removed the DeserializerData class (no longer needed). > > - arm64: to preserve 4-byte deopt exits, introduced a new optimization > > in which the final jump to the deoptimization entry is generated > > once per Code object, and deopt exits can continue to emit a > > near-call. > > - arm,ia32,x64: change to fixed-size deopt exits. This reduces exit > > sizes by 4/8, 5, and 5 bytes, respectively. > > > > On arm the deopt exit size is reduced from 12 (or 16) bytes to 8 bytes > > by using the same strategy as on arm64 (recalc deopt id from return > > address). Before: > > > > e300a002 movw r10, <id> > > e59fc024 ldr ip, [pc, <entry offset>] > > e12fff3c blx ip > > > > After: > > > > e59acb35 ldr ip, [r10, <entry offset>] > > e12fff3c blx ip > > > > On arm64 the deopt exit size remains 4 bytes (or 8 bytes in same cases > > with CFI). Additionally, up to 4 builtin jumps are emitted per Code > > object (max 32 bytes added overhead per Code object). Before: > > > > 9401cdae bl <entry offset> > > > > After: > > > > # eager deoptimization entry jump. > > f95b1f50 ldr x16, [x26, <eager entry offset>] > > d61f0200 br x16 > > # lazy deoptimization entry jump. > > f95b2b50 ldr x16, [x26, <lazy entry offset>] > > d61f0200 br x16 > > # the deopt exit. > > 97fffffc bl <eager deoptimization entry jump offset> > > > > On ia32 the deopt exit size is reduced from 10 to 5 bytes. Before: > > > > bb00000000 mov ebx,<id> > > e825f5372b call <entry> > > > > After: > > > > e8ea2256ba call <entry> > > > > On x64 the deopt exit size is reduced from 12 to 7 bytes. Before: > > > > 49c7c511000000 REX.W movq r13,<id> > > e8ea2f0700 call <entry> > > > > After: > > > > 41ff9560360000 call [r13+<entry offset>] > > > > Bug: v8:8661,v8:8768 > > Change-Id: I13e30aedc360474dc818fecc528ce87c3bfeed42 > > Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2465834 > > Commit-Queue: Jakob Gruber <jgruber@chromium.org> > > Reviewed-by: Ross McIlroy <rmcilroy@chromium.org> > > Reviewed-by: Tobias Tebbi <tebbi@chromium.org> > > Reviewed-by: Ulan Degenbaev <ulan@chromium.org> > > Cr-Commit-Position: refs/heads/master@{#70597} > > Tbr: ulan@chromium.org, tebbi@chromium.org, rmcilroy@chromium.org > Bug: v8:8661,v8:8768,chromium:1140165 > Change-Id: Ibcd5c39c58a70bf2b2ac221aa375fc68d495e144 > Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2485506 > Reviewed-by: Jakob Gruber <jgruber@chromium.org> > Reviewed-by: Tobias Tebbi <tebbi@chromium.org> > Commit-Queue: Jakob Gruber <jgruber@chromium.org> > Cr-Commit-Position: refs/heads/master@{#70655} Tbr: ulan@chromium.org, tebbi@chromium.org, rmcilroy@chromium.org Bug: v8:8661 Bug: v8:8768 Bug: chromium:1140165 Change-Id: I471cc94fc085e527dc9bfb5a84b96bd907c2333f Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2488682 Reviewed-by: Jakob Gruber <jgruber@chromium.org> Commit-Queue: Jakob Gruber <jgruber@chromium.org> Cr-Commit-Position: refs/heads/master@{#70672}
2020-10-21 05:12:25 +00:00
Factory::CodeBuilder(isolate, desc, CodeKind::FOR_TESTING).Build();
auto f = GeneratedCode<F3>::FromCode(*code);
t.lhs = ii;
t.rhs = jj;
f.Call(&t, 0, 0, 0, 0);
expected_add_ovf = base::bits::SignedAddOverflow64(ii, jj, &expected_add);
expected_sub_ovf = base::bits::SignedSubOverflow64(ii, jj, &expected_sub);
expected_mul_ovf =
base::bits::SignedMulOverflow32(ii32, jj32, &expected_mul);
CHECK_EQ(expected_add_ovf, t.overflow_add < 0);
CHECK_EQ(expected_sub_ovf, t.overflow_sub < 0);
CHECK_EQ(expected_mul_ovf, t.overflow_mul != 0);
CHECK_EQ(t.overflow_add, t.overflow_add2);
CHECK_EQ(t.overflow_sub, t.overflow_sub2);
CHECK_EQ(t.overflow_mul, t.overflow_mul2);
CHECK_EQ(expected_add, t.output_add);
CHECK_EQ(expected_add, t.output_add2);
CHECK_EQ(expected_sub, t.output_sub);
CHECK_EQ(expected_sub, t.output_sub2);
if (!expected_mul_ovf) {
CHECK_EQ(expected_mul, t.output_mul);
CHECK_EQ(expected_mul, t.output_mul2);
}
}
}
}
TEST(min_max_nan) {
CcTest::InitializeVM();
Isolate* isolate = CcTest::i_isolate();
HandleScope scope(isolate);
MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes);
MacroAssembler* masm = &assembler;
struct TestFloat {
double a;
double b;
double c;
double d;
float e;
float f;
float g;
float h;
};
TestFloat test;
const double dnan = std::numeric_limits<double>::quiet_NaN();
const double dinf = std::numeric_limits<double>::infinity();
const double dminf = -std::numeric_limits<double>::infinity();
const float fnan = std::numeric_limits<float>::quiet_NaN();
const float finf = std::numeric_limits<float>::infinity();
const float fminf = std::numeric_limits<float>::infinity();
const int kTableLength = 13;
double inputsa[kTableLength] = {2.0, 3.0, -0.0, 0.0, 42.0, dinf, dminf,
dinf, dnan, 3.0, dinf, dnan, dnan};
double inputsb[kTableLength] = {3.0, 2.0, 0.0, -0.0, dinf, 42.0, dinf,
dminf, 3.0, dnan, dnan, dinf, dnan};
double outputsdmin[kTableLength] = {2.0, 2.0, -0.0, -0.0, 42.0,
42.0, dminf, dminf, dnan, dnan,
dnan, dnan, dnan};
double outputsdmax[kTableLength] = {3.0, 3.0, 0.0, 0.0, dinf, dinf, dinf,
dinf, dnan, dnan, dnan, dnan, dnan};
float inputse[kTableLength] = {2.0, 3.0, -0.0, 0.0, 42.0, finf, fminf,
finf, fnan, 3.0, finf, fnan, fnan};
float inputsf[kTableLength] = {3.0, 2.0, 0.0, -0.0, finf, 42.0, finf,
fminf, 3.0, fnan, fnan, finf, fnan};
float outputsfmin[kTableLength] = {2.0, 2.0, -0.0, -0.0, 42.0, 42.0, fminf,
fminf, fnan, fnan, fnan, fnan, fnan};
float outputsfmax[kTableLength] = {3.0, 3.0, 0.0, 0.0, finf, finf, finf,
finf, fnan, fnan, fnan, fnan, fnan};
auto handle_dnan = [masm](FPURegister dst, Label* nan, Label* back) {
__ bind(nan);
__ LoadRoot(t8, RootIndex::kNanValue);
__ Ldc1(dst, FieldMemOperand(t8, HeapNumber::kValueOffset));
__ Branch(back);
};
auto handle_snan = [masm, fnan](FPURegister dst, Label* nan, Label* back) {
__ bind(nan);
__ Move(dst, fnan);
__ Branch(back);
};
Label handle_mind_nan, handle_maxd_nan, handle_mins_nan, handle_maxs_nan;
Label back_mind_nan, back_maxd_nan, back_mins_nan, back_maxs_nan;
__ push(s6);
__ InitializeRootRegister();
__ Ldc1(f4, MemOperand(a0, offsetof(TestFloat, a)));
__ Ldc1(f8, MemOperand(a0, offsetof(TestFloat, b)));
__ Lwc1(f2, MemOperand(a0, offsetof(TestFloat, e)));
__ Lwc1(f6, MemOperand(a0, offsetof(TestFloat, f)));
__ Float64Min(f10, f4, f8, &handle_mind_nan);
__ bind(&back_mind_nan);
__ Float64Max(f12, f4, f8, &handle_maxd_nan);
__ bind(&back_maxd_nan);
__ Float32Min(f14, f2, f6, &handle_mins_nan);
__ bind(&back_mins_nan);
__ Float32Max(f16, f2, f6, &handle_maxs_nan);
__ bind(&back_maxs_nan);
__ Sdc1(f10, MemOperand(a0, offsetof(TestFloat, c)));
__ Sdc1(f12, MemOperand(a0, offsetof(TestFloat, d)));
__ Swc1(f14, MemOperand(a0, offsetof(TestFloat, g)));
__ Swc1(f16, MemOperand(a0, offsetof(TestFloat, h)));
__ pop(s6);
__ jr(ra);
__ nop();
handle_dnan(f10, &handle_mind_nan, &back_mind_nan);
handle_dnan(f12, &handle_maxd_nan, &back_maxd_nan);
handle_snan(f14, &handle_mins_nan, &back_mins_nan);
handle_snan(f16, &handle_maxs_nan, &back_maxs_nan);
CodeDesc desc;
masm->GetCode(isolate, &desc);
Reland "Reland "[deoptimizer] Change deopt entries into builtins"" This is a reland of fbfa9bf4ec72b1b73a96b70ccb68cd98c321511b The arm64 was missing proper codegen for CFI, thus sizes were off. Original change's description: > Reland "[deoptimizer] Change deopt entries into builtins" > > This is a reland of 7f58ced72eb65b6b5530ccabaf2eaebe45bf9d33 > > It fixes the different exit size emitted on x64/Atom CPUs due to > performance tuning in TurboAssembler::Call. Additionally, add > cctests to verify the fixed size exits. > > Original change's description: > > [deoptimizer] Change deopt entries into builtins > > > > While the overall goal of this commit is to change deoptimization > > entries into builtins, there are multiple related things happening: > > > > - Deoptimization entries, formerly stubs (i.e. Code objects generated > > at runtime, guaranteed to be immovable), have been converted into > > builtins. The major restriction is that we now need to preserve the > > kRootRegister, which was formerly used on most architectures to pass > > the deoptimization id. The solution differs based on platform. > > - Renamed DEOPT_ENTRIES_OR_FOR_TESTING code kind to FOR_TESTING. > > - Removed heap/ support for immovable Code generation. > > - Removed the DeserializerData class (no longer needed). > > - arm64: to preserve 4-byte deopt exits, introduced a new optimization > > in which the final jump to the deoptimization entry is generated > > once per Code object, and deopt exits can continue to emit a > > near-call. > > - arm,ia32,x64: change to fixed-size deopt exits. This reduces exit > > sizes by 4/8, 5, and 5 bytes, respectively. > > > > On arm the deopt exit size is reduced from 12 (or 16) bytes to 8 bytes > > by using the same strategy as on arm64 (recalc deopt id from return > > address). Before: > > > > e300a002 movw r10, <id> > > e59fc024 ldr ip, [pc, <entry offset>] > > e12fff3c blx ip > > > > After: > > > > e59acb35 ldr ip, [r10, <entry offset>] > > e12fff3c blx ip > > > > On arm64 the deopt exit size remains 4 bytes (or 8 bytes in same cases > > with CFI). Additionally, up to 4 builtin jumps are emitted per Code > > object (max 32 bytes added overhead per Code object). Before: > > > > 9401cdae bl <entry offset> > > > > After: > > > > # eager deoptimization entry jump. > > f95b1f50 ldr x16, [x26, <eager entry offset>] > > d61f0200 br x16 > > # lazy deoptimization entry jump. > > f95b2b50 ldr x16, [x26, <lazy entry offset>] > > d61f0200 br x16 > > # the deopt exit. > > 97fffffc bl <eager deoptimization entry jump offset> > > > > On ia32 the deopt exit size is reduced from 10 to 5 bytes. Before: > > > > bb00000000 mov ebx,<id> > > e825f5372b call <entry> > > > > After: > > > > e8ea2256ba call <entry> > > > > On x64 the deopt exit size is reduced from 12 to 7 bytes. Before: > > > > 49c7c511000000 REX.W movq r13,<id> > > e8ea2f0700 call <entry> > > > > After: > > > > 41ff9560360000 call [r13+<entry offset>] > > > > Bug: v8:8661,v8:8768 > > Change-Id: I13e30aedc360474dc818fecc528ce87c3bfeed42 > > Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2465834 > > Commit-Queue: Jakob Gruber <jgruber@chromium.org> > > Reviewed-by: Ross McIlroy <rmcilroy@chromium.org> > > Reviewed-by: Tobias Tebbi <tebbi@chromium.org> > > Reviewed-by: Ulan Degenbaev <ulan@chromium.org> > > Cr-Commit-Position: refs/heads/master@{#70597} > > Tbr: ulan@chromium.org, tebbi@chromium.org, rmcilroy@chromium.org > Bug: v8:8661,v8:8768,chromium:1140165 > Change-Id: Ibcd5c39c58a70bf2b2ac221aa375fc68d495e144 > Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2485506 > Reviewed-by: Jakob Gruber <jgruber@chromium.org> > Reviewed-by: Tobias Tebbi <tebbi@chromium.org> > Commit-Queue: Jakob Gruber <jgruber@chromium.org> > Cr-Commit-Position: refs/heads/master@{#70655} Tbr: ulan@chromium.org, tebbi@chromium.org, rmcilroy@chromium.org Bug: v8:8661 Bug: v8:8768 Bug: chromium:1140165 Change-Id: I471cc94fc085e527dc9bfb5a84b96bd907c2333f Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2488682 Reviewed-by: Jakob Gruber <jgruber@chromium.org> Commit-Queue: Jakob Gruber <jgruber@chromium.org> Cr-Commit-Position: refs/heads/master@{#70672}
2020-10-21 05:12:25 +00:00
Handle<Code> code =
Factory::CodeBuilder(isolate, desc, CodeKind::FOR_TESTING).Build();
auto f = GeneratedCode<F3>::FromCode(*code);
for (int i = 0; i < kTableLength; i++) {
test.a = inputsa[i];
test.b = inputsb[i];
test.e = inputse[i];
test.f = inputsf[i];
f.Call(&test, 0, 0, 0, 0);
CHECK_EQ(0, memcmp(&test.c, &outputsdmin[i], sizeof(test.c)));
CHECK_EQ(0, memcmp(&test.d, &outputsdmax[i], sizeof(test.d)));
CHECK_EQ(0, memcmp(&test.g, &outputsfmin[i], sizeof(test.g)));
CHECK_EQ(0, memcmp(&test.h, &outputsfmax[i], sizeof(test.h)));
}
}
template <typename IN_TYPE, typename Func>
bool run_Unaligned(char* memory_buffer, int32_t in_offset, int32_t out_offset,
IN_TYPE value, Func GenerateUnalignedInstructionFunc) {
using F_CVT = int32_t(char* x0, int x1, int x2, int x3, int x4);
Isolate* isolate = CcTest::i_isolate();
HandleScope scope(isolate);
MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
MacroAssembler* masm = &assm;
IN_TYPE res;
GenerateUnalignedInstructionFunc(masm, in_offset, out_offset);
__ jr(ra);
__ nop();
CodeDesc desc;
assm.GetCode(isolate, &desc);
Reland "Reland "[deoptimizer] Change deopt entries into builtins"" This is a reland of fbfa9bf4ec72b1b73a96b70ccb68cd98c321511b The arm64 was missing proper codegen for CFI, thus sizes were off. Original change's description: > Reland "[deoptimizer] Change deopt entries into builtins" > > This is a reland of 7f58ced72eb65b6b5530ccabaf2eaebe45bf9d33 > > It fixes the different exit size emitted on x64/Atom CPUs due to > performance tuning in TurboAssembler::Call. Additionally, add > cctests to verify the fixed size exits. > > Original change's description: > > [deoptimizer] Change deopt entries into builtins > > > > While the overall goal of this commit is to change deoptimization > > entries into builtins, there are multiple related things happening: > > > > - Deoptimization entries, formerly stubs (i.e. Code objects generated > > at runtime, guaranteed to be immovable), have been converted into > > builtins. The major restriction is that we now need to preserve the > > kRootRegister, which was formerly used on most architectures to pass > > the deoptimization id. The solution differs based on platform. > > - Renamed DEOPT_ENTRIES_OR_FOR_TESTING code kind to FOR_TESTING. > > - Removed heap/ support for immovable Code generation. > > - Removed the DeserializerData class (no longer needed). > > - arm64: to preserve 4-byte deopt exits, introduced a new optimization > > in which the final jump to the deoptimization entry is generated > > once per Code object, and deopt exits can continue to emit a > > near-call. > > - arm,ia32,x64: change to fixed-size deopt exits. This reduces exit > > sizes by 4/8, 5, and 5 bytes, respectively. > > > > On arm the deopt exit size is reduced from 12 (or 16) bytes to 8 bytes > > by using the same strategy as on arm64 (recalc deopt id from return > > address). Before: > > > > e300a002 movw r10, <id> > > e59fc024 ldr ip, [pc, <entry offset>] > > e12fff3c blx ip > > > > After: > > > > e59acb35 ldr ip, [r10, <entry offset>] > > e12fff3c blx ip > > > > On arm64 the deopt exit size remains 4 bytes (or 8 bytes in same cases > > with CFI). Additionally, up to 4 builtin jumps are emitted per Code > > object (max 32 bytes added overhead per Code object). Before: > > > > 9401cdae bl <entry offset> > > > > After: > > > > # eager deoptimization entry jump. > > f95b1f50 ldr x16, [x26, <eager entry offset>] > > d61f0200 br x16 > > # lazy deoptimization entry jump. > > f95b2b50 ldr x16, [x26, <lazy entry offset>] > > d61f0200 br x16 > > # the deopt exit. > > 97fffffc bl <eager deoptimization entry jump offset> > > > > On ia32 the deopt exit size is reduced from 10 to 5 bytes. Before: > > > > bb00000000 mov ebx,<id> > > e825f5372b call <entry> > > > > After: > > > > e8ea2256ba call <entry> > > > > On x64 the deopt exit size is reduced from 12 to 7 bytes. Before: > > > > 49c7c511000000 REX.W movq r13,<id> > > e8ea2f0700 call <entry> > > > > After: > > > > 41ff9560360000 call [r13+<entry offset>] > > > > Bug: v8:8661,v8:8768 > > Change-Id: I13e30aedc360474dc818fecc528ce87c3bfeed42 > > Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2465834 > > Commit-Queue: Jakob Gruber <jgruber@chromium.org> > > Reviewed-by: Ross McIlroy <rmcilroy@chromium.org> > > Reviewed-by: Tobias Tebbi <tebbi@chromium.org> > > Reviewed-by: Ulan Degenbaev <ulan@chromium.org> > > Cr-Commit-Position: refs/heads/master@{#70597} > > Tbr: ulan@chromium.org, tebbi@chromium.org, rmcilroy@chromium.org > Bug: v8:8661,v8:8768,chromium:1140165 > Change-Id: Ibcd5c39c58a70bf2b2ac221aa375fc68d495e144 > Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2485506 > Reviewed-by: Jakob Gruber <jgruber@chromium.org> > Reviewed-by: Tobias Tebbi <tebbi@chromium.org> > Commit-Queue: Jakob Gruber <jgruber@chromium.org> > Cr-Commit-Position: refs/heads/master@{#70655} Tbr: ulan@chromium.org, tebbi@chromium.org, rmcilroy@chromium.org Bug: v8:8661 Bug: v8:8768 Bug: chromium:1140165 Change-Id: I471cc94fc085e527dc9bfb5a84b96bd907c2333f Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2488682 Reviewed-by: Jakob Gruber <jgruber@chromium.org> Commit-Queue: Jakob Gruber <jgruber@chromium.org> Cr-Commit-Position: refs/heads/master@{#70672}
2020-10-21 05:12:25 +00:00
Handle<Code> code =
Factory::CodeBuilder(isolate, desc, CodeKind::FOR_TESTING).Build();
auto f = GeneratedCode<F_CVT>::FromCode(*code);
MemCopy(memory_buffer + in_offset, &value, sizeof(IN_TYPE));
f.Call(memory_buffer, 0, 0, 0, 0);
MemCopy(&res, memory_buffer + out_offset, sizeof(IN_TYPE));
return res == value;
}
static const std::vector<uint64_t> unsigned_test_values() {
static const uint64_t kValues[] = {
0x2180F18A06384414, 0x000A714532102277, 0xBC1ACCCF180649F0,
0x8000000080008000, 0x0000000000000001, 0xFFFFFFFFFFFFFFFF,
};
return std::vector<uint64_t>(&kValues[0], &kValues[arraysize(kValues)]);
}
static const std::vector<int32_t> unsigned_test_offset() {
static const int32_t kValues[] = {// value, offset
-132 * KB, -21 * KB, 0, 19 * KB, 135 * KB};
return std::vector<int32_t>(&kValues[0], &kValues[arraysize(kValues)]);
}
static const std::vector<int32_t> unsigned_test_offset_increment() {
static const int32_t kValues[] = {-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5};
return std::vector<int32_t>(&kValues[0], &kValues[arraysize(kValues)]);
}
TEST(Ulh) {
CcTest::InitializeVM();
static const int kBufferSize = 300 * KB;
char memory_buffer[kBufferSize];
char* buffer_middle = memory_buffer + (kBufferSize / 2);
FOR_UINT64_INPUTS(i, unsigned_test_values) {
FOR_INT32_INPUTS2(j1, j2, unsigned_test_offset) {
FOR_INT32_INPUTS2(k1, k2, unsigned_test_offset_increment) {
uint16_t value = static_cast<uint64_t>(*i & 0xFFFF);
int32_t in_offset = *j1 + *k1;
int32_t out_offset = *j2 + *k2;
auto fn_1 = [](MacroAssembler* masm, int32_t in_offset,
int32_t out_offset) {
__ Ulh(v0, MemOperand(a0, in_offset));
__ Ush(v0, MemOperand(a0, out_offset), v0);
};
CHECK_EQ(true, run_Unaligned<uint16_t>(buffer_middle, in_offset,
out_offset, value, fn_1));
auto fn_2 = [](MacroAssembler* masm, int32_t in_offset,
int32_t out_offset) {
__ mov(t0, a0);
__ Ulh(a0, MemOperand(a0, in_offset));
__ Ush(a0, MemOperand(t0, out_offset), v0);
};
CHECK_EQ(true, run_Unaligned<uint16_t>(buffer_middle, in_offset,
out_offset, value, fn_2));
auto fn_3 = [](MacroAssembler* masm, int32_t in_offset,
int32_t out_offset) {
__ mov(t0, a0);
__ Ulhu(a0, MemOperand(a0, in_offset));
__ Ush(a0, MemOperand(t0, out_offset), t1);
};
CHECK_EQ(true, run_Unaligned<uint16_t>(buffer_middle, in_offset,
out_offset, value, fn_3));
auto fn_4 = [](MacroAssembler* masm, int32_t in_offset,
int32_t out_offset) {
__ Ulhu(v0, MemOperand(a0, in_offset));
__ Ush(v0, MemOperand(a0, out_offset), t1);
};
CHECK_EQ(true, run_Unaligned<uint16_t>(buffer_middle, in_offset,
out_offset, value, fn_4));
}
}
}
}
TEST(Ulh_bitextension) {
CcTest::InitializeVM();
static const int kBufferSize = 300 * KB;
char memory_buffer[kBufferSize];
char* buffer_middle = memory_buffer + (kBufferSize / 2);
FOR_UINT64_INPUTS(i, unsigned_test_values) {
FOR_INT32_INPUTS2(j1, j2, unsigned_test_offset) {
FOR_INT32_INPUTS2(k1, k2, unsigned_test_offset_increment) {
uint16_t value = static_cast<uint64_t>(*i & 0xFFFF);
int32_t in_offset = *j1 + *k1;
int32_t out_offset = *j2 + *k2;
auto fn = [](MacroAssembler* masm, int32_t in_offset,
int32_t out_offset) {
Label success, fail, end, different;
__ Ulh(t0, MemOperand(a0, in_offset));
__ Ulhu(t1, MemOperand(a0, in_offset));
__ Branch(&different, ne, t0, Operand(t1));
// If signed and unsigned values are same, check
// the upper bits to see if they are zero
__ sra(t0, t0, 15);
__ Branch(&success, eq, t0, Operand(zero_reg));
__ Branch(&fail);
// If signed and unsigned values are different,
// check that the upper bits are complementary
__ bind(&different);
__ sra(t1, t1, 15);
__ Branch(&fail, ne, t1, Operand(1));
__ sra(t0, t0, 15);
__ addiu(t0, t0, 1);
__ Branch(&fail, ne, t0, Operand(zero_reg));
// Fall through to success
__ bind(&success);
__ Ulh(t0, MemOperand(a0, in_offset));
__ Ush(t0, MemOperand(a0, out_offset), v0);
__ Branch(&end);
__ bind(&fail);
__ Ush(zero_reg, MemOperand(a0, out_offset), v0);
__ bind(&end);
};
CHECK_EQ(true, run_Unaligned<uint16_t>(buffer_middle, in_offset,
out_offset, value, fn));
}
}
}
}
TEST(Ulw) {
CcTest::InitializeVM();
static const int kBufferSize = 300 * KB;
char memory_buffer[kBufferSize];
char* buffer_middle = memory_buffer + (kBufferSize / 2);
FOR_UINT64_INPUTS(i, unsigned_test_values) {
FOR_INT32_INPUTS2(j1, j2, unsigned_test_offset) {
FOR_INT32_INPUTS2(k1, k2, unsigned_test_offset_increment) {
uint32_t value = static_cast<uint32_t>(*i & 0xFFFFFFFF);
int32_t in_offset = *j1 + *k1;
int32_t out_offset = *j2 + *k2;
auto fn_1 = [](MacroAssembler* masm, int32_t in_offset,
int32_t out_offset) {
__ Ulw(v0, MemOperand(a0, in_offset));
__ Usw(v0, MemOperand(a0, out_offset));
};
CHECK_EQ(true, run_Unaligned<uint32_t>(buffer_middle, in_offset,
out_offset, value, fn_1));
auto fn_2 = [](MacroAssembler* masm, int32_t in_offset,
int32_t out_offset) {
__ mov(t0, a0);
__ Ulw(a0, MemOperand(a0, in_offset));
__ Usw(a0, MemOperand(t0, out_offset));
};
CHECK_EQ(true,
run_Unaligned<uint32_t>(buffer_middle, in_offset, out_offset,
(uint32_t)value, fn_2));
auto fn_3 = [](MacroAssembler* masm, int32_t in_offset,
int32_t out_offset) {
__ Ulwu(v0, MemOperand(a0, in_offset));
__ Usw(v0, MemOperand(a0, out_offset));
};
CHECK_EQ(true, run_Unaligned<uint32_t>(buffer_middle, in_offset,
out_offset, value, fn_3));
auto fn_4 = [](MacroAssembler* masm, int32_t in_offset,
int32_t out_offset) {
__ mov(t0, a0);
__ Ulwu(a0, MemOperand(a0, in_offset));
__ Usw(a0, MemOperand(t0, out_offset));
};
CHECK_EQ(true,
run_Unaligned<uint32_t>(buffer_middle, in_offset, out_offset,
(uint32_t)value, fn_4));
}
}
}
}
TEST(Ulw_extension) {
CcTest::InitializeVM();
static const int kBufferSize = 300 * KB;
char memory_buffer[kBufferSize];
char* buffer_middle = memory_buffer + (kBufferSize / 2);
FOR_UINT64_INPUTS(i, unsigned_test_values) {
FOR_INT32_INPUTS2(j1, j2, unsigned_test_offset) {
FOR_INT32_INPUTS2(k1, k2, unsigned_test_offset_increment) {
uint32_t value = static_cast<uint32_t>(*i & 0xFFFFFFFF);
int32_t in_offset = *j1 + *k1;
int32_t out_offset = *j2 + *k2;
auto fn = [](MacroAssembler* masm, int32_t in_offset,
int32_t out_offset) {
Label success, fail, end, different;
__ Ulw(t0, MemOperand(a0, in_offset));
__ Ulwu(t1, MemOperand(a0, in_offset));
__ Branch(&different, ne, t0, Operand(t1));
// If signed and unsigned values are same, check
// the upper bits to see if they are zero
__ dsra(t0, t0, 31);
__ Branch(&success, eq, t0, Operand(zero_reg));
__ Branch(&fail);
// If signed and unsigned values are different,
// check that the upper bits are complementary
__ bind(&different);
__ dsra(t1, t1, 31);
__ Branch(&fail, ne, t1, Operand(1));
__ dsra(t0, t0, 31);
__ daddiu(t0, t0, 1);
__ Branch(&fail, ne, t0, Operand(zero_reg));
// Fall through to success
__ bind(&success);
__ Ulw(t0, MemOperand(a0, in_offset));
__ Usw(t0, MemOperand(a0, out_offset));
__ Branch(&end);
__ bind(&fail);
__ Usw(zero_reg, MemOperand(a0, out_offset));
__ bind(&end);
};
CHECK_EQ(true, run_Unaligned<uint32_t>(buffer_middle, in_offset,
out_offset, value, fn));
}
}
}
}
TEST(Uld) {
CcTest::InitializeVM();
static const int kBufferSize = 300 * KB;
char memory_buffer[kBufferSize];
char* buffer_middle = memory_buffer + (kBufferSize / 2);
FOR_UINT64_INPUTS(i, unsigned_test_values) {
FOR_INT32_INPUTS2(j1, j2, unsigned_test_offset) {
FOR_INT32_INPUTS2(k1, k2, unsigned_test_offset_increment) {
uint64_t value = *i;
int32_t in_offset = *j1 + *k1;
int32_t out_offset = *j2 + *k2;
auto fn_1 = [](MacroAssembler* masm, int32_t in_offset,
int32_t out_offset) {
__ Uld(v0, MemOperand(a0, in_offset));
__ Usd(v0, MemOperand(a0, out_offset));
};
CHECK_EQ(true, run_Unaligned<uint64_t>(buffer_middle, in_offset,
out_offset, value, fn_1));
auto fn_2 = [](MacroAssembler* masm, int32_t in_offset,
int32_t out_offset) {
__ mov(t0, a0);
__ Uld(a0, MemOperand(a0, in_offset));
__ Usd(a0, MemOperand(t0, out_offset));
};
CHECK_EQ(true,
run_Unaligned<uint64_t>(buffer_middle, in_offset, out_offset,
(uint32_t)value, fn_2));
}
}
}
}
TEST(Ulwc1) {
CcTest::InitializeVM();
static const int kBufferSize = 300 * KB;
char memory_buffer[kBufferSize];
char* buffer_middle = memory_buffer + (kBufferSize / 2);
FOR_UINT64_INPUTS(i, unsigned_test_values) {
FOR_INT32_INPUTS2(j1, j2, unsigned_test_offset) {
FOR_INT32_INPUTS2(k1, k2, unsigned_test_offset_increment) {
float value = static_cast<float>(*i & 0xFFFFFFFF);
int32_t in_offset = *j1 + *k1;
int32_t out_offset = *j2 + *k2;
auto fn = [](MacroAssembler* masm, int32_t in_offset,
int32_t out_offset) {
__ Ulwc1(f0, MemOperand(a0, in_offset), t0);
__ Uswc1(f0, MemOperand(a0, out_offset), t0);
};
CHECK_EQ(true, run_Unaligned<float>(buffer_middle, in_offset,
out_offset, value, fn));
}
}
}
}
TEST(Uldc1) {
CcTest::InitializeVM();
static const int kBufferSize = 300 * KB;
char memory_buffer[kBufferSize];
char* buffer_middle = memory_buffer + (kBufferSize / 2);
FOR_UINT64_INPUTS(i, unsigned_test_values) {
FOR_INT32_INPUTS2(j1, j2, unsigned_test_offset) {
FOR_INT32_INPUTS2(k1, k2, unsigned_test_offset_increment) {
double value = static_cast<double>(*i);
int32_t in_offset = *j1 + *k1;
int32_t out_offset = *j2 + *k2;
auto fn = [](MacroAssembler* masm, int32_t in_offset,
int32_t out_offset) {
__ Uldc1(f0, MemOperand(a0, in_offset), t0);
__ Usdc1(f0, MemOperand(a0, out_offset), t0);
};
CHECK_EQ(true, run_Unaligned<double>(buffer_middle, in_offset,
out_offset, value, fn));
}
}
}
}
static const std::vector<uint64_t> sltu_test_values() {
static const uint64_t kValues[] = {
0,
1,
0x7FFE,
0x7FFF,
0x8000,
0x8001,
0xFFFE,
0xFFFF,
0xFFFFFFFFFFFF7FFE,
0xFFFFFFFFFFFF7FFF,
0xFFFFFFFFFFFF8000,
0xFFFFFFFFFFFF8001,
0xFFFFFFFFFFFFFFFE,
0xFFFFFFFFFFFFFFFF,
};
return std::vector<uint64_t>(&kValues[0], &kValues[arraysize(kValues)]);
}
template <typename Func>
bool run_Sltu(uint64_t rs, uint64_t rd, Func GenerateSltuInstructionFunc) {
using F_CVT = int64_t(uint64_t x0, uint64_t x1, int x2, int x3, int x4);
Isolate* isolate = CcTest::i_isolate();
HandleScope scope(isolate);
MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
MacroAssembler* masm = &assm;
GenerateSltuInstructionFunc(masm, rd);
__ jr(ra);
__ nop();
CodeDesc desc;
assm.GetCode(isolate, &desc);
Reland "Reland "[deoptimizer] Change deopt entries into builtins"" This is a reland of fbfa9bf4ec72b1b73a96b70ccb68cd98c321511b The arm64 was missing proper codegen for CFI, thus sizes were off. Original change's description: > Reland "[deoptimizer] Change deopt entries into builtins" > > This is a reland of 7f58ced72eb65b6b5530ccabaf2eaebe45bf9d33 > > It fixes the different exit size emitted on x64/Atom CPUs due to > performance tuning in TurboAssembler::Call. Additionally, add > cctests to verify the fixed size exits. > > Original change's description: > > [deoptimizer] Change deopt entries into builtins > > > > While the overall goal of this commit is to change deoptimization > > entries into builtins, there are multiple related things happening: > > > > - Deoptimization entries, formerly stubs (i.e. Code objects generated > > at runtime, guaranteed to be immovable), have been converted into > > builtins. The major restriction is that we now need to preserve the > > kRootRegister, which was formerly used on most architectures to pass > > the deoptimization id. The solution differs based on platform. > > - Renamed DEOPT_ENTRIES_OR_FOR_TESTING code kind to FOR_TESTING. > > - Removed heap/ support for immovable Code generation. > > - Removed the DeserializerData class (no longer needed). > > - arm64: to preserve 4-byte deopt exits, introduced a new optimization > > in which the final jump to the deoptimization entry is generated > > once per Code object, and deopt exits can continue to emit a > > near-call. > > - arm,ia32,x64: change to fixed-size deopt exits. This reduces exit > > sizes by 4/8, 5, and 5 bytes, respectively. > > > > On arm the deopt exit size is reduced from 12 (or 16) bytes to 8 bytes > > by using the same strategy as on arm64 (recalc deopt id from return > > address). Before: > > > > e300a002 movw r10, <id> > > e59fc024 ldr ip, [pc, <entry offset>] > > e12fff3c blx ip > > > > After: > > > > e59acb35 ldr ip, [r10, <entry offset>] > > e12fff3c blx ip > > > > On arm64 the deopt exit size remains 4 bytes (or 8 bytes in same cases > > with CFI). Additionally, up to 4 builtin jumps are emitted per Code > > object (max 32 bytes added overhead per Code object). Before: > > > > 9401cdae bl <entry offset> > > > > After: > > > > # eager deoptimization entry jump. > > f95b1f50 ldr x16, [x26, <eager entry offset>] > > d61f0200 br x16 > > # lazy deoptimization entry jump. > > f95b2b50 ldr x16, [x26, <lazy entry offset>] > > d61f0200 br x16 > > # the deopt exit. > > 97fffffc bl <eager deoptimization entry jump offset> > > > > On ia32 the deopt exit size is reduced from 10 to 5 bytes. Before: > > > > bb00000000 mov ebx,<id> > > e825f5372b call <entry> > > > > After: > > > > e8ea2256ba call <entry> > > > > On x64 the deopt exit size is reduced from 12 to 7 bytes. Before: > > > > 49c7c511000000 REX.W movq r13,<id> > > e8ea2f0700 call <entry> > > > > After: > > > > 41ff9560360000 call [r13+<entry offset>] > > > > Bug: v8:8661,v8:8768 > > Change-Id: I13e30aedc360474dc818fecc528ce87c3bfeed42 > > Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2465834 > > Commit-Queue: Jakob Gruber <jgruber@chromium.org> > > Reviewed-by: Ross McIlroy <rmcilroy@chromium.org> > > Reviewed-by: Tobias Tebbi <tebbi@chromium.org> > > Reviewed-by: Ulan Degenbaev <ulan@chromium.org> > > Cr-Commit-Position: refs/heads/master@{#70597} > > Tbr: ulan@chromium.org, tebbi@chromium.org, rmcilroy@chromium.org > Bug: v8:8661,v8:8768,chromium:1140165 > Change-Id: Ibcd5c39c58a70bf2b2ac221aa375fc68d495e144 > Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2485506 > Reviewed-by: Jakob Gruber <jgruber@chromium.org> > Reviewed-by: Tobias Tebbi <tebbi@chromium.org> > Commit-Queue: Jakob Gruber <jgruber@chromium.org> > Cr-Commit-Position: refs/heads/master@{#70655} Tbr: ulan@chromium.org, tebbi@chromium.org, rmcilroy@chromium.org Bug: v8:8661 Bug: v8:8768 Bug: chromium:1140165 Change-Id: I471cc94fc085e527dc9bfb5a84b96bd907c2333f Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2488682 Reviewed-by: Jakob Gruber <jgruber@chromium.org> Commit-Queue: Jakob Gruber <jgruber@chromium.org> Cr-Commit-Position: refs/heads/master@{#70672}
2020-10-21 05:12:25 +00:00
Handle<Code> code =
Factory::CodeBuilder(isolate, desc, CodeKind::FOR_TESTING).Build();
auto f = GeneratedCode<F_CVT>::FromCode(*code);
int64_t res = reinterpret_cast<int64_t>(f.Call(rs, rd, 0, 0, 0));
return res == 1;
}
TEST(Sltu) {
CcTest::InitializeVM();
FOR_UINT64_INPUTS(i, sltu_test_values) {
FOR_UINT64_INPUTS(j, sltu_test_values) {
uint64_t rs = *i;
uint64_t rd = *j;
auto fn_1 = [](MacroAssembler* masm, uint64_t imm) {
__ Sltu(v0, a0, Operand(imm));
};
CHECK_EQ(rs < rd, run_Sltu(rs, rd, fn_1));
auto fn_2 = [](MacroAssembler* masm, uint64_t imm) {
__ Sltu(v0, a0, a1);
};
CHECK_EQ(rs < rd, run_Sltu(rs, rd, fn_2));
}
}
}
template <typename T, typename Inputs, typename Results>
static GeneratedCode<F4> GenerateMacroFloat32MinMax(MacroAssembler* masm) {
T a = T::from_code(4); // f4
T b = T::from_code(6); // f6
T c = T::from_code(8); // f8
Label ool_min_abc, ool_min_aab, ool_min_aba;
Label ool_max_abc, ool_max_aab, ool_max_aba;
Label done_min_abc, done_min_aab, done_min_aba;
Label done_max_abc, done_max_aab, done_max_aba;
#define FLOAT_MIN_MAX(fminmax, res, x, y, done, ool, res_field) \
__ Lwc1(x, MemOperand(a0, offsetof(Inputs, src1_))); \
__ Lwc1(y, MemOperand(a0, offsetof(Inputs, src2_))); \
__ fminmax(res, x, y, &ool); \
__ bind(&done); \
__ Swc1(a, MemOperand(a1, offsetof(Results, res_field)))
// a = min(b, c);
FLOAT_MIN_MAX(Float32Min, a, b, c, done_min_abc, ool_min_abc, min_abc_);
// a = min(a, b);
FLOAT_MIN_MAX(Float32Min, a, a, b, done_min_aab, ool_min_aab, min_aab_);
// a = min(b, a);
FLOAT_MIN_MAX(Float32Min, a, b, a, done_min_aba, ool_min_aba, min_aba_);
// a = max(b, c);
FLOAT_MIN_MAX(Float32Max, a, b, c, done_max_abc, ool_max_abc, max_abc_);
// a = max(a, b);
FLOAT_MIN_MAX(Float32Max, a, a, b, done_max_aab, ool_max_aab, max_aab_);
// a = max(b, a);
FLOAT_MIN_MAX(Float32Max, a, b, a, done_max_aba, ool_max_aba, max_aba_);
#undef FLOAT_MIN_MAX
__ jr(ra);
__ nop();
// Generate out-of-line cases.
__ bind(&ool_min_abc);
__ Float32MinOutOfLine(a, b, c);
__ Branch(&done_min_abc);
__ bind(&ool_min_aab);
__ Float32MinOutOfLine(a, a, b);
__ Branch(&done_min_aab);
__ bind(&ool_min_aba);
__ Float32MinOutOfLine(a, b, a);
__ Branch(&done_min_aba);
__ bind(&ool_max_abc);
__ Float32MaxOutOfLine(a, b, c);
__ Branch(&done_max_abc);
__ bind(&ool_max_aab);
__ Float32MaxOutOfLine(a, a, b);
__ Branch(&done_max_aab);
__ bind(&ool_max_aba);
__ Float32MaxOutOfLine(a, b, a);
__ Branch(&done_max_aba);
CodeDesc desc;
masm->GetCode(masm->isolate(), &desc);
Handle<Code> code =
Reland "Reland "[deoptimizer] Change deopt entries into builtins"" This is a reland of fbfa9bf4ec72b1b73a96b70ccb68cd98c321511b The arm64 was missing proper codegen for CFI, thus sizes were off. Original change's description: > Reland "[deoptimizer] Change deopt entries into builtins" > > This is a reland of 7f58ced72eb65b6b5530ccabaf2eaebe45bf9d33 > > It fixes the different exit size emitted on x64/Atom CPUs due to > performance tuning in TurboAssembler::Call. Additionally, add > cctests to verify the fixed size exits. > > Original change's description: > > [deoptimizer] Change deopt entries into builtins > > > > While the overall goal of this commit is to change deoptimization > > entries into builtins, there are multiple related things happening: > > > > - Deoptimization entries, formerly stubs (i.e. Code objects generated > > at runtime, guaranteed to be immovable), have been converted into > > builtins. The major restriction is that we now need to preserve the > > kRootRegister, which was formerly used on most architectures to pass > > the deoptimization id. The solution differs based on platform. > > - Renamed DEOPT_ENTRIES_OR_FOR_TESTING code kind to FOR_TESTING. > > - Removed heap/ support for immovable Code generation. > > - Removed the DeserializerData class (no longer needed). > > - arm64: to preserve 4-byte deopt exits, introduced a new optimization > > in which the final jump to the deoptimization entry is generated > > once per Code object, and deopt exits can continue to emit a > > near-call. > > - arm,ia32,x64: change to fixed-size deopt exits. This reduces exit > > sizes by 4/8, 5, and 5 bytes, respectively. > > > > On arm the deopt exit size is reduced from 12 (or 16) bytes to 8 bytes > > by using the same strategy as on arm64 (recalc deopt id from return > > address). Before: > > > > e300a002 movw r10, <id> > > e59fc024 ldr ip, [pc, <entry offset>] > > e12fff3c blx ip > > > > After: > > > > e59acb35 ldr ip, [r10, <entry offset>] > > e12fff3c blx ip > > > > On arm64 the deopt exit size remains 4 bytes (or 8 bytes in same cases > > with CFI). Additionally, up to 4 builtin jumps are emitted per Code > > object (max 32 bytes added overhead per Code object). Before: > > > > 9401cdae bl <entry offset> > > > > After: > > > > # eager deoptimization entry jump. > > f95b1f50 ldr x16, [x26, <eager entry offset>] > > d61f0200 br x16 > > # lazy deoptimization entry jump. > > f95b2b50 ldr x16, [x26, <lazy entry offset>] > > d61f0200 br x16 > > # the deopt exit. > > 97fffffc bl <eager deoptimization entry jump offset> > > > > On ia32 the deopt exit size is reduced from 10 to 5 bytes. Before: > > > > bb00000000 mov ebx,<id> > > e825f5372b call <entry> > > > > After: > > > > e8ea2256ba call <entry> > > > > On x64 the deopt exit size is reduced from 12 to 7 bytes. Before: > > > > 49c7c511000000 REX.W movq r13,<id> > > e8ea2f0700 call <entry> > > > > After: > > > > 41ff9560360000 call [r13+<entry offset>] > > > > Bug: v8:8661,v8:8768 > > Change-Id: I13e30aedc360474dc818fecc528ce87c3bfeed42 > > Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2465834 > > Commit-Queue: Jakob Gruber <jgruber@chromium.org> > > Reviewed-by: Ross McIlroy <rmcilroy@chromium.org> > > Reviewed-by: Tobias Tebbi <tebbi@chromium.org> > > Reviewed-by: Ulan Degenbaev <ulan@chromium.org> > > Cr-Commit-Position: refs/heads/master@{#70597} > > Tbr: ulan@chromium.org, tebbi@chromium.org, rmcilroy@chromium.org > Bug: v8:8661,v8:8768,chromium:1140165 > Change-Id: Ibcd5c39c58a70bf2b2ac221aa375fc68d495e144 > Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2485506 > Reviewed-by: Jakob Gruber <jgruber@chromium.org> > Reviewed-by: Tobias Tebbi <tebbi@chromium.org> > Commit-Queue: Jakob Gruber <jgruber@chromium.org> > Cr-Commit-Position: refs/heads/master@{#70655} Tbr: ulan@chromium.org, tebbi@chromium.org, rmcilroy@chromium.org Bug: v8:8661 Bug: v8:8768 Bug: chromium:1140165 Change-Id: I471cc94fc085e527dc9bfb5a84b96bd907c2333f Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2488682 Reviewed-by: Jakob Gruber <jgruber@chromium.org> Commit-Queue: Jakob Gruber <jgruber@chromium.org> Cr-Commit-Position: refs/heads/master@{#70672}
2020-10-21 05:12:25 +00:00
Factory::CodeBuilder(masm->isolate(), desc, CodeKind::FOR_TESTING)
.Build();
#ifdef DEBUG
StdoutStream os;
code->Print(os);
#endif
return GeneratedCode<F4>::FromCode(*code);
}
TEST(macro_float_minmax_f32) {
// Test the Float32Min and Float32Max macros.
CcTest::InitializeVM();
Isolate* isolate = CcTest::i_isolate();
HandleScope scope(isolate);
MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes);
MacroAssembler* masm = &assembler;
struct Inputs {
float src1_;
float src2_;
};
struct Results {
// Check all register aliasing possibilities in order to exercise all
// code-paths in the macro assembler.
float min_abc_;
float min_aab_;
float min_aba_;
float max_abc_;
float max_aab_;
float max_aba_;
};
GeneratedCode<F4> f =
GenerateMacroFloat32MinMax<FPURegister, Inputs, Results>(masm);
#define CHECK_MINMAX(src1, src2, min, max) \
do { \
Inputs inputs = {src1, src2}; \
Results results; \
f.Call(&inputs, &results, 0, 0, 0); \
CHECK_EQ(bit_cast<uint32_t>(min), bit_cast<uint32_t>(results.min_abc_)); \
CHECK_EQ(bit_cast<uint32_t>(min), bit_cast<uint32_t>(results.min_aab_)); \
CHECK_EQ(bit_cast<uint32_t>(min), bit_cast<uint32_t>(results.min_aba_)); \
CHECK_EQ(bit_cast<uint32_t>(max), bit_cast<uint32_t>(results.max_abc_)); \
CHECK_EQ(bit_cast<uint32_t>(max), bit_cast<uint32_t>(results.max_aab_)); \
CHECK_EQ(bit_cast<uint32_t>(max), bit_cast<uint32_t>(results.max_aba_)); \
/* Use a bit_cast to correctly identify -0.0 and NaNs. */ \
} while (0)
float nan_a = std::numeric_limits<float>::quiet_NaN();
float nan_b = std::numeric_limits<float>::quiet_NaN();
CHECK_MINMAX(1.0f, -1.0f, -1.0f, 1.0f);
CHECK_MINMAX(-1.0f, 1.0f, -1.0f, 1.0f);
CHECK_MINMAX(0.0f, -1.0f, -1.0f, 0.0f);
CHECK_MINMAX(-1.0f, 0.0f, -1.0f, 0.0f);
CHECK_MINMAX(-0.0f, -1.0f, -1.0f, -0.0f);
CHECK_MINMAX(-1.0f, -0.0f, -1.0f, -0.0f);
CHECK_MINMAX(0.0f, 1.0f, 0.0f, 1.0f);
CHECK_MINMAX(1.0f, 0.0f, 0.0f, 1.0f);
CHECK_MINMAX(0.0f, 0.0f, 0.0f, 0.0f);
CHECK_MINMAX(-0.0f, -0.0f, -0.0f, -0.0f);
CHECK_MINMAX(-0.0f, 0.0f, -0.0f, 0.0f);
CHECK_MINMAX(0.0f, -0.0f, -0.0f, 0.0f);
CHECK_MINMAX(0.0f, nan_a, nan_a, nan_a);
CHECK_MINMAX(nan_a, 0.0f, nan_a, nan_a);
CHECK_MINMAX(nan_a, nan_b, nan_a, nan_a);
CHECK_MINMAX(nan_b, nan_a, nan_b, nan_b);
#undef CHECK_MINMAX
}
template <typename T, typename Inputs, typename Results>
static GeneratedCode<F4> GenerateMacroFloat64MinMax(MacroAssembler* masm) {
T a = T::from_code(4); // f4
T b = T::from_code(6); // f6
T c = T::from_code(8); // f8
Label ool_min_abc, ool_min_aab, ool_min_aba;
Label ool_max_abc, ool_max_aab, ool_max_aba;
Label done_min_abc, done_min_aab, done_min_aba;
Label done_max_abc, done_max_aab, done_max_aba;
#define FLOAT_MIN_MAX(fminmax, res, x, y, done, ool, res_field) \
__ Ldc1(x, MemOperand(a0, offsetof(Inputs, src1_))); \
__ Ldc1(y, MemOperand(a0, offsetof(Inputs, src2_))); \
__ fminmax(res, x, y, &ool); \
__ bind(&done); \
__ Sdc1(a, MemOperand(a1, offsetof(Results, res_field)))
// a = min(b, c);
FLOAT_MIN_MAX(Float64Min, a, b, c, done_min_abc, ool_min_abc, min_abc_);
// a = min(a, b);
FLOAT_MIN_MAX(Float64Min, a, a, b, done_min_aab, ool_min_aab, min_aab_);
// a = min(b, a);
FLOAT_MIN_MAX(Float64Min, a, b, a, done_min_aba, ool_min_aba, min_aba_);
// a = max(b, c);
FLOAT_MIN_MAX(Float64Max, a, b, c, done_max_abc, ool_max_abc, max_abc_);
// a = max(a, b);
FLOAT_MIN_MAX(Float64Max, a, a, b, done_max_aab, ool_max_aab, max_aab_);
// a = max(b, a);
FLOAT_MIN_MAX(Float64Max, a, b, a, done_max_aba, ool_max_aba, max_aba_);
#undef FLOAT_MIN_MAX
__ jr(ra);
__ nop();
// Generate out-of-line cases.
__ bind(&ool_min_abc);
__ Float64MinOutOfLine(a, b, c);
__ Branch(&done_min_abc);
__ bind(&ool_min_aab);
__ Float64MinOutOfLine(a, a, b);
__ Branch(&done_min_aab);
__ bind(&ool_min_aba);
__ Float64MinOutOfLine(a, b, a);
__ Branch(&done_min_aba);
__ bind(&ool_max_abc);
__ Float64MaxOutOfLine(a, b, c);
__ Branch(&done_max_abc);
__ bind(&ool_max_aab);
__ Float64MaxOutOfLine(a, a, b);
__ Branch(&done_max_aab);
__ bind(&ool_max_aba);
__ Float64MaxOutOfLine(a, b, a);
__ Branch(&done_max_aba);
CodeDesc desc;
masm->GetCode(masm->isolate(), &desc);
Handle<Code> code =
Reland "Reland "[deoptimizer] Change deopt entries into builtins"" This is a reland of fbfa9bf4ec72b1b73a96b70ccb68cd98c321511b The arm64 was missing proper codegen for CFI, thus sizes were off. Original change's description: > Reland "[deoptimizer] Change deopt entries into builtins" > > This is a reland of 7f58ced72eb65b6b5530ccabaf2eaebe45bf9d33 > > It fixes the different exit size emitted on x64/Atom CPUs due to > performance tuning in TurboAssembler::Call. Additionally, add > cctests to verify the fixed size exits. > > Original change's description: > > [deoptimizer] Change deopt entries into builtins > > > > While the overall goal of this commit is to change deoptimization > > entries into builtins, there are multiple related things happening: > > > > - Deoptimization entries, formerly stubs (i.e. Code objects generated > > at runtime, guaranteed to be immovable), have been converted into > > builtins. The major restriction is that we now need to preserve the > > kRootRegister, which was formerly used on most architectures to pass > > the deoptimization id. The solution differs based on platform. > > - Renamed DEOPT_ENTRIES_OR_FOR_TESTING code kind to FOR_TESTING. > > - Removed heap/ support for immovable Code generation. > > - Removed the DeserializerData class (no longer needed). > > - arm64: to preserve 4-byte deopt exits, introduced a new optimization > > in which the final jump to the deoptimization entry is generated > > once per Code object, and deopt exits can continue to emit a > > near-call. > > - arm,ia32,x64: change to fixed-size deopt exits. This reduces exit > > sizes by 4/8, 5, and 5 bytes, respectively. > > > > On arm the deopt exit size is reduced from 12 (or 16) bytes to 8 bytes > > by using the same strategy as on arm64 (recalc deopt id from return > > address). Before: > > > > e300a002 movw r10, <id> > > e59fc024 ldr ip, [pc, <entry offset>] > > e12fff3c blx ip > > > > After: > > > > e59acb35 ldr ip, [r10, <entry offset>] > > e12fff3c blx ip > > > > On arm64 the deopt exit size remains 4 bytes (or 8 bytes in same cases > > with CFI). Additionally, up to 4 builtin jumps are emitted per Code > > object (max 32 bytes added overhead per Code object). Before: > > > > 9401cdae bl <entry offset> > > > > After: > > > > # eager deoptimization entry jump. > > f95b1f50 ldr x16, [x26, <eager entry offset>] > > d61f0200 br x16 > > # lazy deoptimization entry jump. > > f95b2b50 ldr x16, [x26, <lazy entry offset>] > > d61f0200 br x16 > > # the deopt exit. > > 97fffffc bl <eager deoptimization entry jump offset> > > > > On ia32 the deopt exit size is reduced from 10 to 5 bytes. Before: > > > > bb00000000 mov ebx,<id> > > e825f5372b call <entry> > > > > After: > > > > e8ea2256ba call <entry> > > > > On x64 the deopt exit size is reduced from 12 to 7 bytes. Before: > > > > 49c7c511000000 REX.W movq r13,<id> > > e8ea2f0700 call <entry> > > > > After: > > > > 41ff9560360000 call [r13+<entry offset>] > > > > Bug: v8:8661,v8:8768 > > Change-Id: I13e30aedc360474dc818fecc528ce87c3bfeed42 > > Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2465834 > > Commit-Queue: Jakob Gruber <jgruber@chromium.org> > > Reviewed-by: Ross McIlroy <rmcilroy@chromium.org> > > Reviewed-by: Tobias Tebbi <tebbi@chromium.org> > > Reviewed-by: Ulan Degenbaev <ulan@chromium.org> > > Cr-Commit-Position: refs/heads/master@{#70597} > > Tbr: ulan@chromium.org, tebbi@chromium.org, rmcilroy@chromium.org > Bug: v8:8661,v8:8768,chromium:1140165 > Change-Id: Ibcd5c39c58a70bf2b2ac221aa375fc68d495e144 > Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2485506 > Reviewed-by: Jakob Gruber <jgruber@chromium.org> > Reviewed-by: Tobias Tebbi <tebbi@chromium.org> > Commit-Queue: Jakob Gruber <jgruber@chromium.org> > Cr-Commit-Position: refs/heads/master@{#70655} Tbr: ulan@chromium.org, tebbi@chromium.org, rmcilroy@chromium.org Bug: v8:8661 Bug: v8:8768 Bug: chromium:1140165 Change-Id: I471cc94fc085e527dc9bfb5a84b96bd907c2333f Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2488682 Reviewed-by: Jakob Gruber <jgruber@chromium.org> Commit-Queue: Jakob Gruber <jgruber@chromium.org> Cr-Commit-Position: refs/heads/master@{#70672}
2020-10-21 05:12:25 +00:00
Factory::CodeBuilder(masm->isolate(), desc, CodeKind::FOR_TESTING)
.Build();
#ifdef DEBUG
StdoutStream os;
code->Print(os);
#endif
return GeneratedCode<F4>::FromCode(*code);
}
TEST(macro_float_minmax_f64) {
// Test the Float64Min and Float64Max macros.
CcTest::InitializeVM();
Isolate* isolate = CcTest::i_isolate();
HandleScope scope(isolate);
MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes);
MacroAssembler* masm = &assembler;
struct Inputs {
double src1_;
double src2_;
};
struct Results {
// Check all register aliasing possibilities in order to exercise all
// code-paths in the macro assembler.
double min_abc_;
double min_aab_;
double min_aba_;
double max_abc_;
double max_aab_;
double max_aba_;
};
GeneratedCode<F4> f =
GenerateMacroFloat64MinMax<DoubleRegister, Inputs, Results>(masm);
#define CHECK_MINMAX(src1, src2, min, max) \
do { \
Inputs inputs = {src1, src2}; \
Results results; \
f.Call(&inputs, &results, 0, 0, 0); \
CHECK_EQ(bit_cast<uint64_t>(min), bit_cast<uint64_t>(results.min_abc_)); \
CHECK_EQ(bit_cast<uint64_t>(min), bit_cast<uint64_t>(results.min_aab_)); \
CHECK_EQ(bit_cast<uint64_t>(min), bit_cast<uint64_t>(results.min_aba_)); \
CHECK_EQ(bit_cast<uint64_t>(max), bit_cast<uint64_t>(results.max_abc_)); \
CHECK_EQ(bit_cast<uint64_t>(max), bit_cast<uint64_t>(results.max_aab_)); \
CHECK_EQ(bit_cast<uint64_t>(max), bit_cast<uint64_t>(results.max_aba_)); \
/* Use a bit_cast to correctly identify -0.0 and NaNs. */ \
} while (0)
double nan_a = std::numeric_limits<double>::quiet_NaN();
double nan_b = std::numeric_limits<double>::quiet_NaN();
CHECK_MINMAX(1.0, -1.0, -1.0, 1.0);
CHECK_MINMAX(-1.0, 1.0, -1.0, 1.0);
CHECK_MINMAX(0.0, -1.0, -1.0, 0.0);
CHECK_MINMAX(-1.0, 0.0, -1.0, 0.0);
CHECK_MINMAX(-0.0, -1.0, -1.0, -0.0);
CHECK_MINMAX(-1.0, -0.0, -1.0, -0.0);
CHECK_MINMAX(0.0, 1.0, 0.0, 1.0);
CHECK_MINMAX(1.0, 0.0, 0.0, 1.0);
CHECK_MINMAX(0.0, 0.0, 0.0, 0.0);
CHECK_MINMAX(-0.0, -0.0, -0.0, -0.0);
CHECK_MINMAX(-0.0, 0.0, -0.0, 0.0);
CHECK_MINMAX(0.0, -0.0, -0.0, 0.0);
CHECK_MINMAX(0.0, nan_a, nan_a, nan_a);
CHECK_MINMAX(nan_a, 0.0, nan_a, nan_a);
CHECK_MINMAX(nan_a, nan_b, nan_a, nan_a);
CHECK_MINMAX(nan_b, nan_a, nan_b, nan_b);
#undef CHECK_MINMAX
}
TEST(DeoptExitSizeIsFixed) {
CHECK(Deoptimizer::kSupportsFixedDeoptExitSizes);
Isolate* isolate = CcTest::i_isolate();
HandleScope handles(isolate);
auto buffer = AllocateAssemblerBuffer();
MacroAssembler masm(isolate, v8::internal::CodeObjectRequired::kYes,
buffer->CreateView());
STATIC_ASSERT(static_cast<int>(kFirstDeoptimizeKind) == 0);
for (int i = 0; i < kDeoptimizeKindCount; i++) {
DeoptimizeKind kind = static_cast<DeoptimizeKind>(i);
Label before_exit;
masm.bind(&before_exit);
if (kind == DeoptimizeKind::kEagerWithResume) {
Builtin target = Deoptimizer::GetDeoptWithResumeBuiltin(
DeoptimizeReason::kDynamicCheckMaps);
masm.CallForDeoptimization(target, 42, &before_exit, kind, &before_exit,
nullptr);
CHECK_EQ(masm.SizeOfCodeGeneratedSince(&before_exit),
Deoptimizer::kEagerWithResumeBeforeArgsSize);
} else {
Builtin target = Deoptimizer::GetDeoptimizationEntry(kind);
masm.CallForDeoptimization(target, 42, &before_exit, kind, &before_exit,
nullptr);
CHECK_EQ(masm.SizeOfCodeGeneratedSince(&before_exit),
kind == DeoptimizeKind::kLazy
? Deoptimizer::kLazyDeoptExitSize
: Deoptimizer::kNonLazyDeoptExitSize);
}
}
}
#undef __
} // namespace internal
} // namespace v8