From 33e1a6e944d1e570ad46ce25fa276d89bdca4116 Mon Sep 17 00:00:00 2001 From: Jakob Gruber Date: Mon, 12 Aug 2019 10:23:40 +0200 Subject: [PATCH] [compiler] Widen optimization for external reference loads Turbofan applies the following optimization to external reference loads on arm64 and x64: if the root-relative offset to an external reference's address is known to be constant (and the root register has been initialized), calculate the external reference as |kRootRegister + | instead of loading it from the external reference table. There are two main cases to consider: 1. External references to arbitrary addresses in the native address space, e.g. libc_memcpy. These kinds of external references have a fixed address within the same running process, but may (and likely will) change between processes (e.g.: mksnapshot and later chromium), and the root-relative offset is different for each Isolate within the same process. These kinds of external references can be optimized as above when *not* generating code which will later be serialized, and *not* generating isolate-independent code. 2. External references to addresses within the fixed-size region of the Isolate (essentially: within IsolateData). Since these move with the Isolate, their root-relative offset is guaranteed to be constant at all times. The optimization can always be applied to these cases as long as the root register has been initialized. Prior to this CL, we only recognized and optimized for case 1. This CL additionally adds support for 2. An example of improved code generated due to this CL: Before: // r13 is the kRootRegister on x64. // 0x3010 is the root-relative offset to Isolate::context_address. leaq rdx, [r13+0x3010] movq r8, [rdx] After: movq rdx, [r13+0x3010] Bug: v8:9534 Change-Id: Idfcca751e98a56c0e5ead2c701c12a677df75399 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1748727 Commit-Queue: Jakob Gruber Auto-Submit: Jakob Gruber Reviewed-by: Tobias Tebbi Cr-Commit-Position: refs/heads/master@{#63158} --- .../arm64/instruction-selector-arm64.cc | 32 +++++++++---------- src/compiler/backend/instruction-selector.cc | 24 ++++++++++++-- src/compiler/backend/instruction-selector.h | 3 +- .../backend/x64/instruction-selector-x64.cc | 5 +-- 4 files changed, 41 insertions(+), 23 deletions(-) diff --git a/src/compiler/backend/arm64/instruction-selector-arm64.cc b/src/compiler/backend/arm64/instruction-selector-arm64.cc index 0b27d33cbe..c346f030ec 100644 --- a/src/compiler/backend/arm64/instruction-selector-arm64.cc +++ b/src/compiler/backend/arm64/instruction-selector-arm64.cc @@ -564,23 +564,21 @@ void EmitLoad(InstructionSelector* selector, Node* node, InstructionCode opcode, // is used when we merge a conversion into the load. outputs[0] = g.DefineAsRegister(output == nullptr ? node : output); - if (selector->CanAddressRelativeToRootsRegister()) { - ExternalReferenceMatcher m(base); - if (m.HasValue() && g.IsIntegerConstant(index)) { - ptrdiff_t const delta = - g.GetIntegerConstantValue(index) + - TurboAssemblerBase::RootRegisterOffsetForExternalReference( - selector->isolate(), m.Value()); - input_count = 1; - // Check that the delta is a 32-bit integer due to the limitations of - // immediate operands. - if (is_int32(delta)) { - inputs[0] = g.UseImmediate(static_cast(delta)); - opcode |= AddressingModeField::encode(kMode_Root); - selector->Emit(opcode, arraysize(outputs), outputs, input_count, - inputs); - return; - } + ExternalReferenceMatcher m(base); + if (m.HasValue() && g.IsIntegerConstant(index) && + selector->CanAddressRelativeToRootsRegister(m.Value())) { + ptrdiff_t const delta = + g.GetIntegerConstantValue(index) + + TurboAssemblerBase::RootRegisterOffsetForExternalReference( + selector->isolate(), m.Value()); + input_count = 1; + // Check that the delta is a 32-bit integer due to the limitations of + // immediate operands. + if (is_int32(delta)) { + inputs[0] = g.UseImmediate(static_cast(delta)); + opcode |= AddressingModeField::encode(kMode_Root); + selector->Emit(opcode, arraysize(outputs), outputs, input_count, inputs); + return; } } diff --git a/src/compiler/backend/instruction-selector.cc b/src/compiler/backend/instruction-selector.cc index 3d3d004988..37544838b2 100644 --- a/src/compiler/backend/instruction-selector.cc +++ b/src/compiler/backend/instruction-selector.cc @@ -421,9 +421,27 @@ void InstructionSelector::SetEffectLevel(Node* node, int effect_level) { effect_level_[id] = effect_level; } -bool InstructionSelector::CanAddressRelativeToRootsRegister() const { - return enable_roots_relative_addressing_ == kEnableRootsRelativeAddressing && - CanUseRootsRegister(); +bool InstructionSelector::CanAddressRelativeToRootsRegister( + const ExternalReference& reference) const { + // There are three things to consider here: + // 1. CanUseRootsRegister: Is kRootRegister initialized? + const bool root_register_is_available_and_initialized = CanUseRootsRegister(); + if (!root_register_is_available_and_initialized) return false; + + // 2. enable_roots_relative_addressing_: Can we address everything on the heap + // through the root register, i.e. are root-relative addresses to arbitrary + // addresses guaranteed not to change between code generation and + // execution? + const bool all_root_relative_offsets_are_constant = + (enable_roots_relative_addressing_ == kEnableRootsRelativeAddressing); + if (all_root_relative_offsets_are_constant) return true; + + // 3. IsAddressableThroughRootRegister: Is the target address guaranteed to + // have a fixed root-relative offset? If so, we can ignore 2. + const bool this_root_relative_offset_is_constant = + TurboAssemblerBase::IsAddressableThroughRootRegister(isolate(), + reference); + return this_root_relative_offset_is_constant; } bool InstructionSelector::CanUseRootsRegister() const { diff --git a/src/compiler/backend/instruction-selector.h b/src/compiler/backend/instruction-selector.h index cd59df400e..870f666450 100644 --- a/src/compiler/backend/instruction-selector.h +++ b/src/compiler/backend/instruction-selector.h @@ -446,7 +446,8 @@ class V8_EXPORT_PRIVATE InstructionSelector final { // Check if we can generate loads and stores of ExternalConstants relative // to the roots register. - bool CanAddressRelativeToRootsRegister() const; + bool CanAddressRelativeToRootsRegister( + const ExternalReference& reference) const; // Check if we can use the roots register to access GC roots. bool CanUseRootsRegister() const; diff --git a/src/compiler/backend/x64/instruction-selector-x64.cc b/src/compiler/backend/x64/instruction-selector-x64.cc index 997e32fcac..6d3ba36f7d 100644 --- a/src/compiler/backend/x64/instruction-selector-x64.cc +++ b/src/compiler/backend/x64/instruction-selector-x64.cc @@ -170,9 +170,10 @@ class X64OperandGenerator final : public OperandGenerator { AddressingMode GetEffectiveAddressMemoryOperand(Node* operand, InstructionOperand inputs[], size_t* input_count) { - if (selector()->CanAddressRelativeToRootsRegister()) { + { LoadMatcher m(operand); - if (m.index().HasValue() && m.object().HasValue()) { + if (m.index().HasValue() && m.object().HasValue() && + selector()->CanAddressRelativeToRootsRegister(m.object().Value())) { ptrdiff_t const delta = m.index().Value() + TurboAssemblerBase::RootRegisterOffsetForExternalReference(