Revert "[x64] Add support for "cold calls" in hot paths"
This reverts commit 31ccfed461
.
Reason for revert: Fails compilation on: https://ci.chromium.org/ui/p/v8/builders/ci/V8%20Linux64%20-%20cfi%20-%20builder/6527/overview
Original change's description:
> [x64] Add support for "cold calls" in hot paths
>
> This makes (specially annotated) calls to "cold functions" in hot paths
> more efficient by hiding the fact that we are actually calling a
> function here. Clang would otherwise unconditionally spill and reload
> registers that might be clobbered by the call. This would slow down the
> fast path.
>
> This CL allows to reverse priorities here: The fast path can stay fast
> (no spills and loads), but the slow path gets even slower. The inline
> assembly that implements the cold call spills and reloads *all*
> registers, because we do not know which registers are in use in the
> scope where the cold call is being emitted.
>
> I.e. this behaves like a custom calling convention with no caller-saved
> registers.
>
> The `preserve_all` attribute (experimental in clang, and incomplete for
> C++) would also solve this, but it is not production-ready yet (leads to
> crashes of clang and crashes of the generated code).
>
> R=leszeks@chromium.org
> CC=dlehmann@chromium.org
>
> Bug: v8:13565, v8:13570
> Change-Id: I2b54a480da1c689113a67c601c29d73239b0ff2b
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4116584
> Commit-Queue: Clemens Backes <clemensb@chromium.org>
> Reviewed-by: Anton Bikineev <bikineev@chromium.org>
> Reviewed-by: Leszek Swirski <leszeks@chromium.org>
> Cr-Commit-Position: refs/heads/main@{#85127}
Bug: v8:13565, v8:13570
Change-Id: I2f5b3343eb372fea13d2c4ab6354f2bc52e2c338
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4145819
Bot-Commit: Rubber Stamper <rubber-stamper@appspot.gserviceaccount.com>
Auto-Submit: Clemens Backes <clemensb@chromium.org>
Commit-Queue: Rubber Stamper <rubber-stamper@appspot.gserviceaccount.com>
Cr-Commit-Position: refs/heads/main@{#85128}
This commit is contained in:
parent
31ccfed461
commit
aa5f2e5c43
@ -595,8 +595,6 @@ filegroup(
|
||||
"src/base/bounded-page-allocator.h",
|
||||
"src/base/bounds.h",
|
||||
"src/base/build_config.h",
|
||||
"src/base/call_cold.cc",
|
||||
"src/base/call_cold.h",
|
||||
"src/base/compiler-specific.h",
|
||||
"src/base/container-utils.h",
|
||||
"src/base/cpu.cc",
|
||||
|
2
BUILD.gn
2
BUILD.gn
@ -5491,8 +5491,6 @@ v8_component("v8_libbase") {
|
||||
"src/base/bounded-page-allocator.h",
|
||||
"src/base/bounds.h",
|
||||
"src/base/build_config.h",
|
||||
"src/base/call_cold.cc",
|
||||
"src/base/call_cold.h",
|
||||
"src/base/compiler-specific.h",
|
||||
"src/base/container-utils.h",
|
||||
"src/base/cpu.cc",
|
||||
|
@ -1,58 +0,0 @@
|
||||
// Copyright 2023 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "src/base/call_cold.h"
|
||||
|
||||
namespace v8::base {
|
||||
|
||||
#if V8_HOST_ARCH_X64 && (defined(__clang__) || defined(__GNUC__))
|
||||
asm(".globl v8_base_call_cold\n"
|
||||
"v8_base_call_cold:\n"
|
||||
" push %rbp\n"
|
||||
" mov %rsp, %rbp\n"
|
||||
// Push all non-clobbered registers, except for callee-saved ones. The
|
||||
// compiler does not even know that it is executing a call, so we can not
|
||||
// clobber any register, not the registers holding the function address or
|
||||
// the arguments.
|
||||
" push %rax\n"
|
||||
" push %rcx\n"
|
||||
" push %rdx\n"
|
||||
#ifndef V8_OS_WIN
|
||||
// %rsi and %rdi are callee-saved on Windows.
|
||||
" push %rsi\n"
|
||||
" push %rdi\n"
|
||||
#endif // !V8_OS_WIN
|
||||
" push %r8\n"
|
||||
" push %r9\n"
|
||||
" push %r10\n"
|
||||
" push %r11\n"
|
||||
// Save %rsp to %r15 (after pushing it) and align %rsp to 16 bytes.
|
||||
// %r15 is callee-saved, so the value will still be there after the call.
|
||||
" push %r15\n"
|
||||
" mov %rsp, %r15\n"
|
||||
" and $-16, %rsp\n"
|
||||
// Now execute the actual call.
|
||||
" call *%rax\n"
|
||||
// Restore the potentially unaligned %rsp.
|
||||
" mov %r15, %rsp\n"
|
||||
// Pop the previously pushed registers. We have no return value, so we do
|
||||
// not need to preserve %rax.
|
||||
" pop %r15\n"
|
||||
" pop %r11\n"
|
||||
" pop %r10\n"
|
||||
" pop %r9\n"
|
||||
" pop %r8\n"
|
||||
#ifndef V8_OS_WIN
|
||||
" pop %rdi\n"
|
||||
" pop %rsi\n"
|
||||
#endif // !V8_OS_WIN
|
||||
" pop %rdx\n"
|
||||
" pop %rcx\n"
|
||||
" pop %rax\n"
|
||||
// Leave the frame and return.
|
||||
" pop %rbp\n"
|
||||
" ret");
|
||||
#endif
|
||||
|
||||
} // namespace v8::base
|
@ -1,103 +0,0 @@
|
||||
// Copyright 2023 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef V8_BASE_CALL_COLD_H_
|
||||
#define V8_BASE_CALL_COLD_H_
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
#include "include/v8config.h"
|
||||
|
||||
namespace v8::base {
|
||||
|
||||
// Use {call_cold} for calls in hot paths that are unlikely to be executed. The
|
||||
// compiler will not know that this executes a call, so it will not clobber any
|
||||
// registers (i.e. this behaves like a custom calling convention where all
|
||||
// registers are callee-save).
|
||||
// Executing the call will be significantly slower then without going through
|
||||
// {call_cold}, as all register will have to be spilled and an indirect call is
|
||||
// being executed.
|
||||
|
||||
// As a start, we added support for GCC and clang on x64. Other platforms can
|
||||
// be added later, as needed.
|
||||
|
||||
template <typename Fn, typename... Ps>
|
||||
constexpr bool IsValidForCallCold =
|
||||
// The callable object must be convertible to a function pointer (e.g. a
|
||||
// capture-less lambda).
|
||||
std::is_convertible_v<Fn, void (*)(Ps...)> &&
|
||||
// All parameters must be integral (support for floating-point arguments is
|
||||
// not implemented).
|
||||
(... && (std::is_integral_v<Ps> || std::is_pointer_v<Ps>));
|
||||
|
||||
// Do not use V8_CC_GNU, as this is not defined for clang on Windows. Explicitly
|
||||
// check for GCC or clang.
|
||||
#if V8_HOST_ARCH_X64 && (defined(__clang__) || defined(__GNUC__))
|
||||
|
||||
// Define the parameter registers. Windows uses a different calling convention
|
||||
// than other OSes.
|
||||
#define REG_FN "rax"
|
||||
#ifdef V8_OS_WIN
|
||||
#define REG_P1 "rcx"
|
||||
#define REG_P2 "rdx"
|
||||
#define REG_P3 "r8"
|
||||
#else
|
||||
#define REG_P1 "rdi"
|
||||
#define REG_P2 "rsi"
|
||||
#define REG_P3 "rdx"
|
||||
#endif
|
||||
// We clobber all xmm registers so we do not have to spill and reload them.
|
||||
#define CLOBBER \
|
||||
"memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", \
|
||||
"xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", \
|
||||
"xmm15", "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm6", "st", \
|
||||
"st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"
|
||||
#define V8_CALL_COLD_ASM \
|
||||
"sub $128, %%rsp\n" /* Bump %rsp by 128, beyond the red zone. */ \
|
||||
"call v8_base_call_cold\n" /* Call our trampoline. */ \
|
||||
"add $128, %%rsp" /* Restore previous %rsp. */
|
||||
|
||||
// 1 Parameter, no result.
|
||||
template <typename P1, typename Fn>
|
||||
V8_INLINE void call_cold(const Fn& fn, P1 p1) {
|
||||
static_assert(IsValidForCallCold<Fn, P1>);
|
||||
using FnPtr = void (*)(P1);
|
||||
register FnPtr fn_reg asm(REG_FN) = fn;
|
||||
register P1 p1_reg asm(REG_P1) = p1;
|
||||
asm(V8_CALL_COLD_ASM : : "r"(fn_reg), "r"(p1_reg) : CLOBBER);
|
||||
}
|
||||
|
||||
// 3 Parameters, no result.
|
||||
template <typename P1, typename P2, typename P3, typename Fn>
|
||||
V8_INLINE void call_cold(const Fn& fn, P1 p1, P2 p2, P3 p3) {
|
||||
static_assert(IsValidForCallCold<Fn, P1, P2, P3>);
|
||||
using FnPtr = void (*)(P1, P2, P3);
|
||||
register FnPtr fn_reg asm(REG_FN) = fn;
|
||||
register P1 p1_reg asm(REG_P1) = p1;
|
||||
register P2 p2_reg asm(REG_P2) = p2;
|
||||
register P3 p3_reg asm(REG_P3) = p3;
|
||||
asm(V8_CALL_COLD_ASM
|
||||
:
|
||||
: "r"(fn_reg), "r"(p1_reg), "r"(p2_reg), "r"(p3_reg)
|
||||
: CLOBBER);
|
||||
}
|
||||
|
||||
#else
|
||||
// Architectures without special support just execute the call directly.
|
||||
template <typename... Ps, typename Fn>
|
||||
V8_INLINE void call_cold(const Fn& fn, Ps... ps) {
|
||||
static_assert(IsValidForCallCold<Fn, Ps...>);
|
||||
fn(ps...);
|
||||
}
|
||||
#endif
|
||||
|
||||
#undef REG_P1
|
||||
#undef REG_P2
|
||||
#undef REG_P3
|
||||
#undef CLOBBER
|
||||
#undef V8_CALL_COLD_ASM
|
||||
|
||||
} // namespace v8::base
|
||||
|
||||
#endif // V8_BASE_CALL_COLD_H_
|
@ -42,7 +42,6 @@
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "src/base/call_cold.h"
|
||||
#include "src/base/export-template.h"
|
||||
#include "src/codegen/assembler.h"
|
||||
#include "src/codegen/cpu-features.h"
|
||||
@ -2641,10 +2640,7 @@ void Assembler::vinstr(byte op, YMMRegister dst, XMMRegister src1,
|
||||
class EnsureSpace {
|
||||
public:
|
||||
explicit V8_INLINE EnsureSpace(Assembler* assembler) : assembler_(assembler) {
|
||||
if (V8_UNLIKELY(assembler_->buffer_overflow())) {
|
||||
base::call_cold([](Assembler* assembler) { assembler->GrowBuffer(); },
|
||||
assembler_);
|
||||
}
|
||||
if (V8_UNLIKELY(assembler_->buffer_overflow())) assembler_->GrowBuffer();
|
||||
#ifdef DEBUG
|
||||
space_before_ = assembler_->available_space();
|
||||
#endif
|
||||
|
@ -16,7 +16,6 @@
|
||||
|
||||
#include <optional>
|
||||
|
||||
#include "src/base/call_cold.h"
|
||||
#include "src/base/small-vector.h"
|
||||
#include "src/base/strings.h"
|
||||
#include "src/base/v8-fallthrough.h"
|
||||
@ -1257,9 +1256,7 @@ class FastZoneVector {
|
||||
|
||||
V8_INLINE void EnsureMoreCapacity(int slots_needed, Zone* zone) {
|
||||
if (V8_LIKELY(capacity_end_ - end_ >= slots_needed)) return;
|
||||
base::call_cold([](FastZoneVector* vec, int slots_needed,
|
||||
Zone* zone) { vec->Grow(slots_needed, zone); },
|
||||
this, slots_needed, zone);
|
||||
Grow(slots_needed, zone);
|
||||
}
|
||||
|
||||
private:
|
||||
|
Loading…
Reference in New Issue
Block a user