[builtins] Remap embedded builtins into the heap on ARM64 macOS

For short builtin calls, the builtins are copied on the heap when they
cannot be put close enough to be in range of relative calls. This costs
memory, as the embedded builtins are part of the binary, and mapped from
the binary, and as a consequence shared with all running processes.

Rather than copying the memory, we can remap it at a different address,
avoiding the memory cost. This CL does that, on ARM64 macOS only for
now.

This saves at least ~1.4MiB of memory per V8 process. See below the
output of vmmap <PID>:

[...]
Memory Tag 255             7408308000-740833c000   [  208K   144K   144K     0K] r-x/rwx SM=ZER
Memory Tag 255             740833c000-7408340000   [   16K     0K     0K     0K] ---/rwx SM=ZER
Memory Tag 255             7408344000-7408348000   [   16K     0K     0K     0K] ---/rwx SM=ZER
Memory Tag 255             7408348000-740837c000   [  208K   144K   144K     0K] r-x/rwx SM=ZER
Memory Tag 255             740837c000-740fe80000   [123.0M     0K     0K     0K] ---/rwx SM=ZER
mapped file                740fe80000-740ffe4000   [ 1424K  1328K     0K     0K] r-x/rwx SM=COW          ...pp/Contents/Frameworks/Chromium Framework.framework/Versions/102.0.4958.0/Chromium Framework
Memory Tag 255             740ffe4000-7410000000   [  112K     0K     0K     0K] ---/rwx SM=ZER

The "208K" regions are 256kiB code pages, minus the header and guard
pages, meaning that they are code chunks. The mapped file are the
remapped builtins, showing that they aren't copied, but remapped from
the binary.

Bug: chromium:1298417
Change-Id: Ia30a43e671726d01450a7db0ecb7777b34763053
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3553006
Reviewed-by: Toon Verwaest <verwaest@chromium.org>
Reviewed-by: Michael Lippautz <mlippautz@chromium.org>
Reviewed-by: Igor Sheludko <ishell@chromium.org>
Commit-Queue: Benoit Lize <lizeb@chromium.org>
Cr-Commit-Position: refs/heads/main@{#79716}
This commit is contained in:
Benoit Lize 2022-03-30 16:51:21 +02:00 committed by V8 LUCI CQ
parent 5bc471f47f
commit da1a2d127d
8 changed files with 118 additions and 2 deletions

View File

@ -87,6 +87,33 @@ void* OS::AllocateShared(void* hint, size_t size, MemoryPermission access,
return reinterpret_cast<void*>(addr);
}
// static
bool OS::RemapPages(const void* address, size_t size, void* new_address,
MemoryPermission access) {
DCHECK(IsAligned(reinterpret_cast<uintptr_t>(address), AllocatePageSize()));
DCHECK(
IsAligned(reinterpret_cast<uintptr_t>(new_address), AllocatePageSize()));
DCHECK(IsAligned(size, AllocatePageSize()));
vm_prot_t cur_protection = GetVMProtFromMemoryPermission(access);
vm_prot_t max_protection;
// Asks the kernel to remap *on top* of an existing mapping, rather than
// copying the data.
int flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
mach_vm_address_t target = reinterpret_cast<mach_vm_address_t>(new_address);
kern_return_t ret =
mach_vm_remap(mach_task_self(), &target, size, 0, flags, mach_task_self(),
reinterpret_cast<mach_vm_address_t>(address), FALSE,
&cur_protection, &max_protection, VM_INHERIT_NONE);
if (ret != KERN_SUCCESS) return false;
// Did we get the address we wanted?
CHECK_EQ(new_address, reinterpret_cast<void*>(target));
return true;
}
bool AddressSpaceReservation::AllocateShared(void* address, size_t size,
OS::MemoryPermission access,
PlatformSharedMemoryHandle handle,

View File

@ -33,6 +33,7 @@
#include "src/base/optional.h"
#include "src/base/platform/mutex.h"
#include "src/base/platform/semaphore.h"
#include "testing/gtest/include/gtest/gtest_prod.h" // nogncheck
#if V8_OS_QNX
#include "src/base/qnx-math.h"
@ -313,6 +314,28 @@ class V8_BASE_EXPORT OS {
[[noreturn]] static void ExitProcess(int exit_code);
// Whether the platform supports mapping a given address in another location
// in the address space.
V8_WARN_UNUSED_RESULT static constexpr bool IsRemapPageSupported() {
#ifdef V8_OS_MACOS
return true;
#else
return false;
#endif
}
// Remaps already-mapped memory at |new_address| with |access| permissions.
//
// Both the source and target addresses must be page-aligned, and |size| must
// be a multiple of the system page size. If there is already memory mapped
// at the target address, it is replaced by the new mapping.
//
// Must not be called if |IsRemapPagesSupported()| return false.
// Returns true for success.
V8_WARN_UNUSED_RESULT static bool RemapPages(const void* address, size_t size,
void* new_address,
MemoryPermission access);
private:
// These classes use the private memory management API below.
friend class AddressSpaceReservation;
@ -321,6 +344,7 @@ class V8_BASE_EXPORT OS {
friend class v8::base::PageAllocator;
friend class v8::base::VirtualAddressSpace;
friend class v8::base::VirtualAddressSubspace;
FRIEND_TEST(OS, RemapPages);
static size_t AllocatePageSize();

View File

@ -191,6 +191,7 @@ uint8_t* CodeRange::RemapEmbeddedBuiltins(Isolate* isolate,
}
const size_t kAllocatePageSize = page_allocator()->AllocatePageSize();
const size_t kCommitPageSize = page_allocator()->CommitPageSize();
size_t allocate_code_size =
RoundUp(embedded_blob_code_size, kAllocatePageSize);
@ -207,8 +208,31 @@ uint8_t* CodeRange::RemapEmbeddedBuiltins(Isolate* isolate,
isolate, "Can't allocate space for re-embedded builtins");
}
size_t code_size =
RoundUp(embedded_blob_code_size, page_allocator()->CommitPageSize());
size_t code_size = RoundUp(embedded_blob_code_size, kCommitPageSize);
if constexpr (base::OS::IsRemapPageSupported()) {
// By default, the embedded builtins are not remapped, but copied. This
// costs memory, since builtins become private dirty anonymous memory,
// rather than shared, clean, file-backed memory for the embedded version.
// If the OS supports it, we can remap the builtins *on top* of the space
// allocated in the code range, making the "copy" shared, clean, file-backed
// memory, and thus saving sizeof(builtins).
//
// Builtins should start at a page boundary, see
// platform-embedded-file-writer-mac.cc. If it's not the case (e.g. if the
// embedded builtins are not coming from the binary), fall back to copying.
if (IsAligned(reinterpret_cast<uintptr_t>(embedded_blob_code),
kCommitPageSize)) {
bool ok = base::OS::RemapPages(embedded_blob_code, code_size,
embedded_blob_code_copy,
base::OS::MemoryPermission::kReadExecute);
if (ok) {
embedded_blob_code_copy_.store(embedded_blob_code_copy,
std::memory_order_release);
return embedded_blob_code_copy;
}
}
}
if (!page_allocator()->SetPermissions(embedded_blob_code_copy, code_size,
PageAllocator::kReadWrite)) {

View File

@ -163,6 +163,7 @@ void EmbeddedFileWriter::WriteCodeSection(PlatformEmbeddedFileWriterBase* w,
++builtin) {
WriteBuiltin(w, blob, builtin);
}
w->PaddingAfterCode();
w->Newline();
}

View File

@ -58,6 +58,7 @@ class PlatformEmbeddedFileWriterBase {
virtual void SectionRoData() = 0;
virtual void AlignToCodeAlignment() = 0;
virtual void PaddingAfterCode() {}
virtual void AlignToDataAlignment() = 0;
virtual void DeclareUint32(const char* name, uint32_t value) = 0;

View File

@ -69,12 +69,25 @@ void PlatformEmbeddedFileWriterMac::AlignToCodeAlignment() {
// don't cross 64-byte boundaries.
STATIC_ASSERT(64 >= kCodeAlignment);
fprintf(fp_, ".balign 64\n");
#elif V8_TARGET_ARCH_ARM64
// ARM64 macOS has a 16kiB page size. Since we want to remap it on the heap,
// needs to be page-aligned.
fprintf(fp_, ".balign 16384\n");
#else
STATIC_ASSERT(32 >= kCodeAlignment);
fprintf(fp_, ".balign 32\n");
#endif
}
void PlatformEmbeddedFileWriterMac::PaddingAfterCode() {
#if V8_TARGET_ARCH_ARM64
// ARM64 macOS has a 16kiB page size. Since we want to remap builtins on the
// heap, make sure that the trailing part of the page doesn't contain anything
// dangerous.
fprintf(fp_, ".balign 16384\n");
#endif
}
void PlatformEmbeddedFileWriterMac::AlignToDataAlignment() {
STATIC_ASSERT(8 >= Code::kMetadataAlignment);
fprintf(fp_, ".balign 8\n");

View File

@ -26,6 +26,7 @@ class PlatformEmbeddedFileWriterMac : public PlatformEmbeddedFileWriterBase {
void SectionRoData() override;
void AlignToCodeAlignment() override;
void PaddingAfterCode() override;
void AlignToDataAlignment() override;
void DeclareUint32(const char* name, uint32_t value) override;

View File

@ -4,6 +4,8 @@
#include "src/base/platform/platform.h"
#include <cstring>
#include "testing/gtest/include/gtest/gtest.h"
#if V8_OS_WIN
@ -24,6 +26,29 @@ TEST(OS, GetCurrentProcessId) {
#endif
}
TEST(OS, RemapPages) {
if constexpr (OS::IsRemapPageSupported()) {
size_t size = base::OS::AllocatePageSize();
// Data to be remapped, filled with data.
void* data = OS::Allocate(nullptr, size, base::OS::AllocatePageSize(),
OS::MemoryPermission::kReadWrite);
ASSERT_TRUE(data);
memset(data, 0xab, size);
// Target mapping.
void* remapped_data =
OS::Allocate(nullptr, size, base::OS::AllocatePageSize(),
OS::MemoryPermission::kReadWrite);
ASSERT_TRUE(remapped_data);
EXPECT_TRUE(OS::RemapPages(data, size, remapped_data,
OS::MemoryPermission::kReadExecute));
EXPECT_EQ(0, memcmp(remapped_data, data, size));
OS::Free(data, size);
OS::Free(remapped_data, size);
}
}
namespace {