[compiler] Widen optimization for external reference loads
Turbofan applies the following optimization to external reference loads on arm64 and x64: if the root-relative offset to an external reference's address is known to be constant (and the root register has been initialized), calculate the external reference as |kRootRegister + <offset>| instead of loading it from the external reference table. There are two main cases to consider: 1. External references to arbitrary addresses in the native address space, e.g. libc_memcpy. These kinds of external references have a fixed address within the same running process, but may (and likely will) change between processes (e.g.: mksnapshot and later chromium), and the root-relative offset is different for each Isolate within the same process. These kinds of external references can be optimized as above when *not* generating code which will later be serialized, and *not* generating isolate-independent code. 2. External references to addresses within the fixed-size region of the Isolate (essentially: within IsolateData). Since these move with the Isolate, their root-relative offset is guaranteed to be constant at all times. The optimization can always be applied to these cases as long as the root register has been initialized. Prior to this CL, we only recognized and optimized for case 1. This CL additionally adds support for 2. An example of improved code generated due to this CL: Before: // r13 is the kRootRegister on x64. // 0x3010 is the root-relative offset to Isolate::context_address. leaq rdx, [r13+0x3010] movq r8, [rdx] After: movq rdx, [r13+0x3010] Bug: v8:9534 Change-Id: Idfcca751e98a56c0e5ead2c701c12a677df75399 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1748727 Commit-Queue: Jakob Gruber <jgruber@chromium.org> Auto-Submit: Jakob Gruber <jgruber@chromium.org> Reviewed-by: Tobias Tebbi <tebbi@chromium.org> Cr-Commit-Position: refs/heads/master@{#63158}
This commit is contained in:
parent
54eca65873
commit
33e1a6e944
@ -564,9 +564,9 @@ void EmitLoad(InstructionSelector* selector, Node* node, InstructionCode opcode,
|
||||
// is used when we merge a conversion into the load.
|
||||
outputs[0] = g.DefineAsRegister(output == nullptr ? node : output);
|
||||
|
||||
if (selector->CanAddressRelativeToRootsRegister()) {
|
||||
ExternalReferenceMatcher m(base);
|
||||
if (m.HasValue() && g.IsIntegerConstant(index)) {
|
||||
if (m.HasValue() && g.IsIntegerConstant(index) &&
|
||||
selector->CanAddressRelativeToRootsRegister(m.Value())) {
|
||||
ptrdiff_t const delta =
|
||||
g.GetIntegerConstantValue(index) +
|
||||
TurboAssemblerBase::RootRegisterOffsetForExternalReference(
|
||||
@ -577,12 +577,10 @@ void EmitLoad(InstructionSelector* selector, Node* node, InstructionCode opcode,
|
||||
if (is_int32(delta)) {
|
||||
inputs[0] = g.UseImmediate(static_cast<int32_t>(delta));
|
||||
opcode |= AddressingModeField::encode(kMode_Root);
|
||||
selector->Emit(opcode, arraysize(outputs), outputs, input_count,
|
||||
inputs);
|
||||
selector->Emit(opcode, arraysize(outputs), outputs, input_count, inputs);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inputs[0] = g.UseRegister(base);
|
||||
|
||||
|
@ -421,9 +421,27 @@ void InstructionSelector::SetEffectLevel(Node* node, int effect_level) {
|
||||
effect_level_[id] = effect_level;
|
||||
}
|
||||
|
||||
bool InstructionSelector::CanAddressRelativeToRootsRegister() const {
|
||||
return enable_roots_relative_addressing_ == kEnableRootsRelativeAddressing &&
|
||||
CanUseRootsRegister();
|
||||
bool InstructionSelector::CanAddressRelativeToRootsRegister(
|
||||
const ExternalReference& reference) const {
|
||||
// There are three things to consider here:
|
||||
// 1. CanUseRootsRegister: Is kRootRegister initialized?
|
||||
const bool root_register_is_available_and_initialized = CanUseRootsRegister();
|
||||
if (!root_register_is_available_and_initialized) return false;
|
||||
|
||||
// 2. enable_roots_relative_addressing_: Can we address everything on the heap
|
||||
// through the root register, i.e. are root-relative addresses to arbitrary
|
||||
// addresses guaranteed not to change between code generation and
|
||||
// execution?
|
||||
const bool all_root_relative_offsets_are_constant =
|
||||
(enable_roots_relative_addressing_ == kEnableRootsRelativeAddressing);
|
||||
if (all_root_relative_offsets_are_constant) return true;
|
||||
|
||||
// 3. IsAddressableThroughRootRegister: Is the target address guaranteed to
|
||||
// have a fixed root-relative offset? If so, we can ignore 2.
|
||||
const bool this_root_relative_offset_is_constant =
|
||||
TurboAssemblerBase::IsAddressableThroughRootRegister(isolate(),
|
||||
reference);
|
||||
return this_root_relative_offset_is_constant;
|
||||
}
|
||||
|
||||
bool InstructionSelector::CanUseRootsRegister() const {
|
||||
|
@ -446,7 +446,8 @@ class V8_EXPORT_PRIVATE InstructionSelector final {
|
||||
|
||||
// Check if we can generate loads and stores of ExternalConstants relative
|
||||
// to the roots register.
|
||||
bool CanAddressRelativeToRootsRegister() const;
|
||||
bool CanAddressRelativeToRootsRegister(
|
||||
const ExternalReference& reference) const;
|
||||
// Check if we can use the roots register to access GC roots.
|
||||
bool CanUseRootsRegister() const;
|
||||
|
||||
|
@ -170,9 +170,10 @@ class X64OperandGenerator final : public OperandGenerator {
|
||||
AddressingMode GetEffectiveAddressMemoryOperand(Node* operand,
|
||||
InstructionOperand inputs[],
|
||||
size_t* input_count) {
|
||||
if (selector()->CanAddressRelativeToRootsRegister()) {
|
||||
{
|
||||
LoadMatcher<ExternalReferenceMatcher> m(operand);
|
||||
if (m.index().HasValue() && m.object().HasValue()) {
|
||||
if (m.index().HasValue() && m.object().HasValue() &&
|
||||
selector()->CanAddressRelativeToRootsRegister(m.object().Value())) {
|
||||
ptrdiff_t const delta =
|
||||
m.index().Value() +
|
||||
TurboAssemblerBase::RootRegisterOffsetForExternalReference(
|
||||
|
Loading…
Reference in New Issue
Block a user