[compiler][ia32][arm] Fix pushing of arguments

- Fixes some incorrect assumptions about padding in the
  code generation. Slots may have apparent extra padding
  when allocation fragments go unused.
- Reworks 32 bit push code to simplify skipping slot gaps
  when 'push' instructions are used.
- Adds a ElementSizeInPointers function on machine
  representations.

Bug: chromium:1171759,v8:9198

Change-Id: I029e300fa9c306d7e35344576fd1c68857cf2bca
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2660379
Commit-Queue: Bill Budge <bbudge@chromium.org>
Reviewed-by: Andreas Haas <ahaas@chromium.org>
Reviewed-by: Georg Neis <neis@chromium.org>
Cr-Commit-Position: refs/heads/master@{#72502}
This commit is contained in:
Bill Budge 2021-02-02 15:46:15 -08:00 committed by Commit Bot
parent c781da6c8b
commit 8798b3ef4e
4 changed files with 159 additions and 64 deletions

View File

@ -324,6 +324,12 @@ V8_EXPORT_PRIVATE inline constexpr int ElementSizeInBytes(
return 1 << ElementSizeLog2Of(rep);
}
V8_EXPORT_PRIVATE inline constexpr int ElementSizeInPointers(
MachineRepresentation rep) {
return (ElementSizeInBytes(rep) + kSystemPointerSize - 1) /
kSystemPointerSize;
}
// Converts representation to bit for representation masks.
V8_EXPORT_PRIVATE inline constexpr int RepresentationBit(
MachineRepresentation rep) {

View File

@ -1763,48 +1763,31 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArmPush: {
int stack_decrement = i.InputInt32(0);
if (instr->InputAt(1)->IsFPRegister()) {
LocationOperand* op = LocationOperand::cast(instr->InputAt(1));
switch (op->representation()) {
case MachineRepresentation::kFloat32:
// 1 slot values are never padded.
DCHECK_EQ(stack_decrement, kSystemPointerSize);
__ vpush(i.InputFloatRegister(1));
frame_access_state()->IncreaseSPDelta(1);
break;
case MachineRepresentation::kFloat64:
// 2 slot values have up to 1 slot of padding.
DCHECK_GE(stack_decrement, kDoubleSize);
if (stack_decrement > kDoubleSize) {
DCHECK_EQ(stack_decrement, kDoubleSize + kSystemPointerSize);
__ AllocateStackSpace(kSystemPointerSize);
}
__ vpush(i.InputDoubleRegister(1));
frame_access_state()->IncreaseSPDelta(stack_decrement /
kSystemPointerSize);
break;
case MachineRepresentation::kSimd128: {
// 4 slot values have up to 3 slots of padding.
DCHECK_GE(stack_decrement, kSimd128Size);
if (stack_decrement > kSimd128Size) {
int padding = stack_decrement - kSimd128Size;
DCHECK_LT(padding, kSimd128Size);
__ AllocateStackSpace(padding);
}
__ vpush(i.InputSimd128Register(1));
frame_access_state()->IncreaseSPDelta(stack_decrement /
kSystemPointerSize);
break;
}
default:
UNREACHABLE();
break;
}
} else {
DCHECK_EQ(stack_decrement, kSystemPointerSize);
__ push(i.InputRegister(1));
frame_access_state()->IncreaseSPDelta(1);
int slots = stack_decrement / kSystemPointerSize;
LocationOperand* op = LocationOperand::cast(instr->InputAt(1));
MachineRepresentation rep = op->representation();
int pushed_slots = ElementSizeInPointers(rep);
// Slot-sized arguments are never padded but there may be a gap if
// the slot allocator reclaimed other padding slots. Adjust the stack
// here to skip any gap.
if (slots > pushed_slots) {
__ AllocateStackSpace(slots - pushed_slots);
}
switch (rep) {
case MachineRepresentation::kFloat32:
__ vpush(i.InputFloatRegister(1));
break;
case MachineRepresentation::kFloat64:
__ vpush(i.InputDoubleRegister(1));
break;
case MachineRepresentation::kSimd128:
__ vpush(i.InputSimd128Register(1));
break;
default:
__ push(i.InputRegister(1));
break;
}
frame_access_state()->IncreaseSPDelta(slots);
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
}

View File

@ -1783,28 +1783,25 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kIA32PushFloat32: {
// 1 slot values are never padded.
DCHECK_EQ(i.InputInt32(0), kFloatSize);
int stack_decrement = i.InputInt32(0);
if (instr->InputAt(1)->IsFPRegister()) {
__ AllocateStackSpace(kFloatSize);
__ AllocateStackSpace(stack_decrement);
__ Movss(Operand(esp, 0), i.InputDoubleRegister(1));
} else if (HasImmediateInput(instr, 1)) {
__ AllocateStackSpace(kFloatSize);
__ Move(kScratchDoubleReg, i.InputFloat32(1));
__ AllocateStackSpace(stack_decrement);
__ Movss(Operand(esp, 0), kScratchDoubleReg);
} else {
__ Movss(kScratchDoubleReg, i.InputOperand(1));
__ AllocateStackSpace(kFloatSize);
__ AllocateStackSpace(stack_decrement);
__ Movss(Operand(esp, 0), kScratchDoubleReg);
}
int slots = kFloatSize / kSystemPointerSize;
int slots = stack_decrement / kSystemPointerSize;
frame_access_state()->IncreaseSPDelta(slots);
break;
}
case kIA32PushFloat64: {
int stack_decrement = i.InputInt32(0);
// 2 slot values have up to 1 slot of padding.
DCHECK_GE(stack_decrement, kDoubleSize);
if (instr->InputAt(1)->IsFPRegister()) {
__ AllocateStackSpace(stack_decrement);
__ Movsd(Operand(esp, 0), i.InputDoubleRegister(1));
@ -1823,14 +1820,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kIA32PushSimd128: {
int stack_decrement = i.InputInt32(0);
// 4 slot values have up to 3 slots of padding.
DCHECK_GE(stack_decrement, kSimd128Size);
if (instr->InputAt(1)->IsFPRegister()) {
__ AllocateStackSpace(stack_decrement);
// TODO(bbudge) Use Movaps when slots are aligned.
__ Movups(Operand(esp, 0), i.InputSimd128Register(1));
} else {
__ Movups(kScratchDoubleReg, i.InputOperand(1));
__ AllocateStackSpace(stack_decrement);
// TODO(bbudge) Use Movaps when slots are aligned.
__ Movups(Operand(esp, 0), kScratchDoubleReg);
}
int slots = stack_decrement / kSystemPointerSize;
@ -1839,21 +1836,29 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kIA32Push: {
// TODO(bbudge) Merge the push opcodes into a single one, as on x64.
// 1 slot values are never padded.
DCHECK_EQ(i.InputInt32(0), kSystemPointerSize);
if (HasAddressingMode(instr)) {
size_t index = 1;
Operand operand = i.MemoryOperand(&index);
__ push(operand);
} else if (instr->InputAt(1)->IsFPRegister()) {
__ AllocateStackSpace(kSystemPointerSize);
int stack_decrement = i.InputInt32(0);
if (instr->InputAt(1)->IsFPRegister()) {
__ AllocateStackSpace(stack_decrement);
__ Movsd(Operand(esp, 0), i.InputDoubleRegister(1));
} else if (HasImmediateInput(instr, 1)) {
__ push(i.InputImmediate(1));
} else {
__ push(i.InputOperand(1));
// Slot-sized arguments are never padded but there may be a gap if
// the slot allocator reclaimed other padding slots. Adjust the stack
// here to skip any gap.
if (stack_decrement > kSystemPointerSize) {
__ AllocateStackSpace(stack_decrement - kSystemPointerSize);
}
if (HasAddressingMode(instr)) {
size_t index = 1;
Operand operand = i.MemoryOperand(&index);
__ push(operand);
} else if (HasImmediateInput(instr, 1)) {
__ push(i.InputImmediate(1));
} else {
__ push(i.InputOperand(1));
}
}
frame_access_state()->IncreaseSPDelta(1);
int slots = stack_decrement / kSystemPointerSize;
frame_access_state()->IncreaseSPDelta(slots);
break;
}
case kIA32Poke: {

View File

@ -10,13 +10,14 @@
#include "src/compiler/backend/code-generator.h"
#include "src/compiler/backend/instruction.h"
#include "src/compiler/linkage.h"
#include "src/compiler/wasm-compiler.h"
#include "src/execution/isolate.h"
#include "src/objects/heap-number-inl.h"
#include "src/objects/objects-inl.h"
#include "src/objects/smi.h"
#include "test/cctest/cctest.h"
#include "test/cctest/compiler/code-assembler-tester.h"
#include "test/cctest/compiler/codegen-tester.h"
#include "test/cctest/compiler/function-tester.h"
namespace v8 {
@ -1430,6 +1431,106 @@ TEST(AssembleTailCallGap) {
}
}
namespace {
std::shared_ptr<wasm::NativeModule> AllocateNativeModule(Isolate* isolate,
size_t code_size) {
std::shared_ptr<wasm::WasmModule> module(new wasm::WasmModule());
module->num_declared_functions = 1;
// We have to add the code object to a NativeModule, because the
// WasmCallDescriptor assumes that code is on the native heap and not
// within a code object.
auto native_module = isolate->wasm_engine()->NewNativeModule(
isolate, wasm::WasmFeatures::All(), std::move(module), code_size);
native_module->SetWireBytes({});
return native_module;
}
} // namespace
// Test stack argument pushing with some gaps that require stack pointer
// adjustment.
TEST(Regress_1171759) {
v8::internal::AccountingAllocator allocator;
Zone zone(&allocator, ZONE_NAME);
// Create a minimal callee with enlough parameters to exhaust parameter
// registers and force some stack parameters.
constexpr int kDoubleParams = 16;
// These are followed by a single, and another double to create a gap.
constexpr int kTotalParams = kDoubleParams + 2;
wasm::FunctionSig::Builder builder(&zone, 1, kTotalParams);
// Make the first parameter slots double width.
for (int i = 0; i < kDoubleParams; i++) {
builder.AddParam(wasm::ValueType::For(MachineType::Float64()));
}
// Allocate a single parameter.
builder.AddParam(wasm::ValueType::For(MachineType::Float32()));
// Allocate a double parameter which should create a stack gap.
builder.AddParam(wasm::ValueType::For(MachineType::Float64()));
builder.AddReturn(wasm::ValueType::For(MachineType::Int32()));
CallDescriptor* desc =
compiler::GetWasmCallDescriptor(&zone, builder.Build());
HandleAndZoneScope handles(kCompressGraphZone);
RawMachineAssembler m(handles.main_isolate(),
handles.main_zone()->New<Graph>(handles.main_zone()),
desc, MachineType::PointerRepresentation(),
InstructionSelector::SupportedMachineOperatorFlags());
m.Return(m.Int32Constant(0));
OptimizedCompilationInfo info(ArrayVector("testing"), handles.main_zone(),
CodeKind::WASM_FUNCTION);
Handle<Code> code =
Pipeline::GenerateCodeForTesting(
&info, handles.main_isolate(), desc, m.graph(),
AssemblerOptions::Default(handles.main_isolate()), m.ExportForTest())
.ToHandleChecked();
std::shared_ptr<wasm::NativeModule> module = AllocateNativeModule(
handles.main_isolate(), code->raw_instruction_size());
wasm::WasmCodeRefScope wasm_code_ref_scope;
byte* code_start = module->AddCodeForTesting(code)->instructions().begin();
// Generate a minimal calling function, to push stack arguments.
RawMachineAssemblerTester<int32_t> mt;
Node* function = mt.PointerConstant(code_start);
Node* dummy_context = mt.PointerConstant(nullptr);
Node* double_slot = mt.Float64Constant(0);
Node* single_slot_that_creates_gap = mt.Float32Constant(0);
Node* call_inputs[] = {function,
dummy_context,
double_slot,
double_slot,
double_slot,
double_slot,
double_slot,
double_slot,
double_slot,
double_slot,
double_slot,
double_slot,
double_slot,
double_slot,
double_slot,
double_slot,
double_slot,
double_slot,
single_slot_that_creates_gap,
double_slot};
Node* call =
mt.AddNode(mt.common()->Call(desc), 2 + kTotalParams, call_inputs);
mt.Return(call);
CHECK_EQ(0, mt.Call());
}
} // namespace compiler
} // namespace internal
} // namespace v8