Revert "[x64] Add support for "cold calls" in hot paths"

This reverts commit 31ccfed461.

Reason for revert: Fails compilation on: https://ci.chromium.org/ui/p/v8/builders/ci/V8%20Linux64%20-%20cfi%20-%20builder/6527/overview

Original change's description:
> [x64] Add support for "cold calls" in hot paths
>
> This makes (specially annotated) calls to "cold functions" in hot paths
> more efficient by hiding the fact that we are actually calling a
> function here. Clang would otherwise unconditionally spill and reload
> registers that might be clobbered by the call. This would slow down the
> fast path.
>
> This CL allows to reverse priorities here: The fast path can stay fast
> (no spills and loads), but the slow path gets even slower. The inline
> assembly that implements the cold call spills and reloads *all*
> registers, because we do not know which registers are in use in the
> scope where the cold call is being emitted.
>
> I.e. this behaves like a custom calling convention with no caller-saved
> registers.
>
> The `preserve_all` attribute (experimental in clang, and incomplete for
> C++) would also solve this, but it is not production-ready yet (leads to
> crashes of clang and crashes of the generated code).
>
> R=​leszeks@chromium.org
> CC=​​dlehmann@chromium.org
>
> Bug: v8:13565, v8:13570
> Change-Id: I2b54a480da1c689113a67c601c29d73239b0ff2b
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4116584
> Commit-Queue: Clemens Backes <clemensb@chromium.org>
> Reviewed-by: Anton Bikineev <bikineev@chromium.org>
> Reviewed-by: Leszek Swirski <leszeks@chromium.org>
> Cr-Commit-Position: refs/heads/main@{#85127}

Bug: v8:13565, v8:13570
Change-Id: I2f5b3343eb372fea13d2c4ab6354f2bc52e2c338
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4145819
Bot-Commit: Rubber Stamper <rubber-stamper@appspot.gserviceaccount.com>
Auto-Submit: Clemens Backes <clemensb@chromium.org>
Commit-Queue: Rubber Stamper <rubber-stamper@appspot.gserviceaccount.com>
Cr-Commit-Position: refs/heads/main@{#85128}
This commit is contained in:
Clemens Backes 2023-01-08 21:01:14 +00:00 committed by V8 LUCI CQ
parent 31ccfed461
commit aa5f2e5c43
6 changed files with 2 additions and 174 deletions

View File

@ -595,8 +595,6 @@ filegroup(
"src/base/bounded-page-allocator.h",
"src/base/bounds.h",
"src/base/build_config.h",
"src/base/call_cold.cc",
"src/base/call_cold.h",
"src/base/compiler-specific.h",
"src/base/container-utils.h",
"src/base/cpu.cc",

View File

@ -5491,8 +5491,6 @@ v8_component("v8_libbase") {
"src/base/bounded-page-allocator.h",
"src/base/bounds.h",
"src/base/build_config.h",
"src/base/call_cold.cc",
"src/base/call_cold.h",
"src/base/compiler-specific.h",
"src/base/container-utils.h",
"src/base/cpu.cc",

View File

@ -1,58 +0,0 @@
// Copyright 2023 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/base/call_cold.h"
namespace v8::base {
#if V8_HOST_ARCH_X64 && (defined(__clang__) || defined(__GNUC__))
asm(".globl v8_base_call_cold\n"
"v8_base_call_cold:\n"
" push %rbp\n"
" mov %rsp, %rbp\n"
// Push all non-clobbered registers, except for callee-saved ones. The
// compiler does not even know that it is executing a call, so we can not
// clobber any register, not the registers holding the function address or
// the arguments.
" push %rax\n"
" push %rcx\n"
" push %rdx\n"
#ifndef V8_OS_WIN
// %rsi and %rdi are callee-saved on Windows.
" push %rsi\n"
" push %rdi\n"
#endif // !V8_OS_WIN
" push %r8\n"
" push %r9\n"
" push %r10\n"
" push %r11\n"
// Save %rsp to %r15 (after pushing it) and align %rsp to 16 bytes.
// %r15 is callee-saved, so the value will still be there after the call.
" push %r15\n"
" mov %rsp, %r15\n"
" and $-16, %rsp\n"
// Now execute the actual call.
" call *%rax\n"
// Restore the potentially unaligned %rsp.
" mov %r15, %rsp\n"
// Pop the previously pushed registers. We have no return value, so we do
// not need to preserve %rax.
" pop %r15\n"
" pop %r11\n"
" pop %r10\n"
" pop %r9\n"
" pop %r8\n"
#ifndef V8_OS_WIN
" pop %rdi\n"
" pop %rsi\n"
#endif // !V8_OS_WIN
" pop %rdx\n"
" pop %rcx\n"
" pop %rax\n"
// Leave the frame and return.
" pop %rbp\n"
" ret");
#endif
} // namespace v8::base

View File

@ -1,103 +0,0 @@
// Copyright 2023 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_BASE_CALL_COLD_H_
#define V8_BASE_CALL_COLD_H_
#include <type_traits>
#include "include/v8config.h"
namespace v8::base {
// Use {call_cold} for calls in hot paths that are unlikely to be executed. The
// compiler will not know that this executes a call, so it will not clobber any
// registers (i.e. this behaves like a custom calling convention where all
// registers are callee-save).
// Executing the call will be significantly slower then without going through
// {call_cold}, as all register will have to be spilled and an indirect call is
// being executed.
// As a start, we added support for GCC and clang on x64. Other platforms can
// be added later, as needed.
template <typename Fn, typename... Ps>
constexpr bool IsValidForCallCold =
// The callable object must be convertible to a function pointer (e.g. a
// capture-less lambda).
std::is_convertible_v<Fn, void (*)(Ps...)> &&
// All parameters must be integral (support for floating-point arguments is
// not implemented).
(... && (std::is_integral_v<Ps> || std::is_pointer_v<Ps>));
// Do not use V8_CC_GNU, as this is not defined for clang on Windows. Explicitly
// check for GCC or clang.
#if V8_HOST_ARCH_X64 && (defined(__clang__) || defined(__GNUC__))
// Define the parameter registers. Windows uses a different calling convention
// than other OSes.
#define REG_FN "rax"
#ifdef V8_OS_WIN
#define REG_P1 "rcx"
#define REG_P2 "rdx"
#define REG_P3 "r8"
#else
#define REG_P1 "rdi"
#define REG_P2 "rsi"
#define REG_P3 "rdx"
#endif
// We clobber all xmm registers so we do not have to spill and reload them.
#define CLOBBER \
"memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", \
"xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", \
"xmm15", "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm6", "st", \
"st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"
#define V8_CALL_COLD_ASM \
"sub $128, %%rsp\n" /* Bump %rsp by 128, beyond the red zone. */ \
"call v8_base_call_cold\n" /* Call our trampoline. */ \
"add $128, %%rsp" /* Restore previous %rsp. */
// 1 Parameter, no result.
template <typename P1, typename Fn>
V8_INLINE void call_cold(const Fn& fn, P1 p1) {
static_assert(IsValidForCallCold<Fn, P1>);
using FnPtr = void (*)(P1);
register FnPtr fn_reg asm(REG_FN) = fn;
register P1 p1_reg asm(REG_P1) = p1;
asm(V8_CALL_COLD_ASM : : "r"(fn_reg), "r"(p1_reg) : CLOBBER);
}
// 3 Parameters, no result.
template <typename P1, typename P2, typename P3, typename Fn>
V8_INLINE void call_cold(const Fn& fn, P1 p1, P2 p2, P3 p3) {
static_assert(IsValidForCallCold<Fn, P1, P2, P3>);
using FnPtr = void (*)(P1, P2, P3);
register FnPtr fn_reg asm(REG_FN) = fn;
register P1 p1_reg asm(REG_P1) = p1;
register P2 p2_reg asm(REG_P2) = p2;
register P3 p3_reg asm(REG_P3) = p3;
asm(V8_CALL_COLD_ASM
:
: "r"(fn_reg), "r"(p1_reg), "r"(p2_reg), "r"(p3_reg)
: CLOBBER);
}
#else
// Architectures without special support just execute the call directly.
template <typename... Ps, typename Fn>
V8_INLINE void call_cold(const Fn& fn, Ps... ps) {
static_assert(IsValidForCallCold<Fn, Ps...>);
fn(ps...);
}
#endif
#undef REG_P1
#undef REG_P2
#undef REG_P3
#undef CLOBBER
#undef V8_CALL_COLD_ASM
} // namespace v8::base
#endif // V8_BASE_CALL_COLD_H_

View File

@ -42,7 +42,6 @@
#include <memory>
#include <vector>
#include "src/base/call_cold.h"
#include "src/base/export-template.h"
#include "src/codegen/assembler.h"
#include "src/codegen/cpu-features.h"
@ -2641,10 +2640,7 @@ void Assembler::vinstr(byte op, YMMRegister dst, XMMRegister src1,
class EnsureSpace {
public:
explicit V8_INLINE EnsureSpace(Assembler* assembler) : assembler_(assembler) {
if (V8_UNLIKELY(assembler_->buffer_overflow())) {
base::call_cold([](Assembler* assembler) { assembler->GrowBuffer(); },
assembler_);
}
if (V8_UNLIKELY(assembler_->buffer_overflow())) assembler_->GrowBuffer();
#ifdef DEBUG
space_before_ = assembler_->available_space();
#endif

View File

@ -16,7 +16,6 @@
#include <optional>
#include "src/base/call_cold.h"
#include "src/base/small-vector.h"
#include "src/base/strings.h"
#include "src/base/v8-fallthrough.h"
@ -1257,9 +1256,7 @@ class FastZoneVector {
V8_INLINE void EnsureMoreCapacity(int slots_needed, Zone* zone) {
if (V8_LIKELY(capacity_end_ - end_ >= slots_needed)) return;
base::call_cold([](FastZoneVector* vec, int slots_needed,
Zone* zone) { vec->Grow(slots_needed, zone); },
this, slots_needed, zone);
Grow(slots_needed, zone);
}
private: