[wasm-simd] Support returning Simd128 on caller's stack
In Liftoff, we were missing kS128 cases to load to/from stack. For the x64 and ARM64 instruction selector, the calculation of reverse_slot is incorrect for 128-bit values: - reverse_slot += 2 (size of 128-bit values, 2 pointers) - this copies from slot -2 into register - but the value starts at slot -1, it occupies slots -1 and -2 - we end up copying slot -2 (most significant half) of the register, and also slot -3, which is where rsi was store (Wasm instance addr) - the test ends up with a different result every time The calculation of reverse_slot is changed to follow how ia32 and ARM does it, which is to start with - reverse_slot = 0 - in the code-generator, add 1 to the slot - then after emitting Peek operation, reverse_slot += 2 The fixes for x64 and ARM64 are in both instruction-selector and code-generator. ia32 and ARM didn't support writing kSimd128 values yet, it was only a missing check in code-generator, so add that in. For ARM, the codegen is more involved, vld1 does not support addressing with an offset, so we have to do the addition into a scratch register. Also adding a test for returning multiple v128. V128 is not exposed to JavaScript, so we use a Wasm function call, and then an involved chain of extract lanes, returning 6 i32 which we verify the values of. It extracts the first and last lane of the i32x4 value in order to catch bugs where we write or read to a wrong stack slot (off by 1). The simd-scalar-lowering for kCall was only handling single s128 return, we adopt the way i64-lowering handles kCall, so that is can now handle any kinds of calls with s128 in the descriptor. Bug: v8:10794 Bug: chromium:1115230 Change-Id: I2ccdd55f6292bc5794be78053b27e14da8cce70e Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2355189 Commit-Queue: Zhi An Ng <zhin@chromium.org> Reviewed-by: Andreas Haas <ahaas@chromium.org> Reviewed-by: Bill Budge <bbudge@chromium.org> Cr-Commit-Position: refs/heads/master@{#69439}
This commit is contained in:
parent
a575608555
commit
360c9294a8
@ -1828,17 +1828,22 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
break;
|
||||
}
|
||||
case kArmPeek: {
|
||||
// The incoming value is 0-based, but we need a 1-based value.
|
||||
int reverse_slot = i.InputInt32(0) + 1;
|
||||
int reverse_slot = i.InputInt32(0);
|
||||
int offset =
|
||||
FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
|
||||
if (instr->OutputAt(0)->IsFPRegister()) {
|
||||
LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
|
||||
if (op->representation() == MachineRepresentation::kFloat64) {
|
||||
__ vldr(i.OutputDoubleRegister(), MemOperand(fp, offset));
|
||||
} else {
|
||||
DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
|
||||
} else if (op->representation() == MachineRepresentation::kFloat32) {
|
||||
__ vldr(i.OutputFloatRegister(), MemOperand(fp, offset));
|
||||
} else {
|
||||
DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
|
||||
UseScratchRegisterScope temps(tasm());
|
||||
Register scratch = temps.Acquire();
|
||||
__ add(scratch, fp, Operand(offset));
|
||||
__ vld1(Neon8, NeonListOperand(i.OutputSimd128Register()),
|
||||
NeonMemOperand(scratch));
|
||||
}
|
||||
} else {
|
||||
__ ldr(i.OutputRegister(), MemOperand(fp, offset));
|
||||
|
@ -1667,7 +1667,7 @@ void InstructionSelector::EmitPrepareResults(
|
||||
Node* node) {
|
||||
ArmOperandGenerator g(this);
|
||||
|
||||
int reverse_slot = 0;
|
||||
int reverse_slot = 1;
|
||||
for (PushParameter output : *results) {
|
||||
if (!output.location.IsCallerFrameSlot()) continue;
|
||||
// Skip any alignment holes in nodes.
|
||||
@ -1677,6 +1677,8 @@ void InstructionSelector::EmitPrepareResults(
|
||||
MarkAsFloat32(output.node);
|
||||
} else if (output.location.GetType() == MachineType::Float64()) {
|
||||
MarkAsFloat64(output.node);
|
||||
} else if (output.location.GetType() == MachineType::Simd128()) {
|
||||
MarkAsSimd128(output.node);
|
||||
}
|
||||
Emit(kArmPeek, g.DefineAsRegister(output.node),
|
||||
g.UseImmediate(reverse_slot));
|
||||
|
@ -1358,9 +1358,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
|
||||
if (op->representation() == MachineRepresentation::kFloat64) {
|
||||
__ Ldr(i.OutputDoubleRegister(), MemOperand(fp, offset));
|
||||
} else {
|
||||
DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
|
||||
} else if (op->representation() == MachineRepresentation::kFloat32) {
|
||||
__ Ldr(i.OutputFloatRegister(), MemOperand(fp, offset));
|
||||
} else {
|
||||
DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
|
||||
__ Ldr(i.OutputSimd128Register(), MemOperand(fp, offset));
|
||||
}
|
||||
} else {
|
||||
__ Ldr(i.OutputRegister(), MemOperand(fp, offset));
|
||||
|
@ -1909,22 +1909,25 @@ void InstructionSelector::EmitPrepareResults(
|
||||
Node* node) {
|
||||
Arm64OperandGenerator g(this);
|
||||
|
||||
int reverse_slot = 0;
|
||||
int reverse_slot = 1;
|
||||
for (PushParameter output : *results) {
|
||||
if (!output.location.IsCallerFrameSlot()) continue;
|
||||
reverse_slot += output.location.GetSizeInPointers();
|
||||
// Skip any alignment holes in nodes.
|
||||
if (output.node == nullptr) continue;
|
||||
DCHECK(!call_descriptor->IsCFunctionCall());
|
||||
if (output.node != nullptr) {
|
||||
DCHECK(!call_descriptor->IsCFunctionCall());
|
||||
|
||||
if (output.location.GetType() == MachineType::Float32()) {
|
||||
MarkAsFloat32(output.node);
|
||||
} else if (output.location.GetType() == MachineType::Float64()) {
|
||||
MarkAsFloat64(output.node);
|
||||
if (output.location.GetType() == MachineType::Float32()) {
|
||||
MarkAsFloat32(output.node);
|
||||
} else if (output.location.GetType() == MachineType::Float64()) {
|
||||
MarkAsFloat64(output.node);
|
||||
} else if (output.location.GetType() == MachineType::Simd128()) {
|
||||
MarkAsSimd128(output.node);
|
||||
}
|
||||
|
||||
Emit(kArm64Peek, g.DefineAsRegister(output.node),
|
||||
g.UseImmediate(reverse_slot));
|
||||
}
|
||||
|
||||
Emit(kArm64Peek, g.DefineAsRegister(output.node),
|
||||
g.UseImmediate(reverse_slot));
|
||||
reverse_slot += output.location.GetSizeInPointers();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1845,16 +1845,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
break;
|
||||
}
|
||||
case kIA32Peek: {
|
||||
int reverse_slot = i.InputInt32(0) + 1;
|
||||
int reverse_slot = i.InputInt32(0);
|
||||
int offset =
|
||||
FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
|
||||
if (instr->OutputAt(0)->IsFPRegister()) {
|
||||
LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
|
||||
if (op->representation() == MachineRepresentation::kFloat64) {
|
||||
__ movsd(i.OutputDoubleRegister(), Operand(ebp, offset));
|
||||
} else {
|
||||
DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
|
||||
} else if (op->representation() == MachineRepresentation::kFloat32) {
|
||||
__ movss(i.OutputFloatRegister(), Operand(ebp, offset));
|
||||
} else {
|
||||
DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
|
||||
__ movdqu(i.OutputSimd128Register(), Operand(ebp, offset));
|
||||
}
|
||||
} else {
|
||||
__ mov(i.OutputRegister(), Operand(ebp, offset));
|
||||
|
@ -1269,7 +1269,7 @@ void InstructionSelector::EmitPrepareResults(
|
||||
Node* node) {
|
||||
IA32OperandGenerator g(this);
|
||||
|
||||
int reverse_slot = 0;
|
||||
int reverse_slot = 1;
|
||||
for (PushParameter output : *results) {
|
||||
if (!output.location.IsCallerFrameSlot()) continue;
|
||||
// Skip any alignment holes in nodes.
|
||||
@ -1279,6 +1279,8 @@ void InstructionSelector::EmitPrepareResults(
|
||||
MarkAsFloat32(output.node);
|
||||
} else if (output.location.GetType() == MachineType::Float64()) {
|
||||
MarkAsFloat64(output.node);
|
||||
} else if (output.location.GetType() == MachineType::Simd128()) {
|
||||
MarkAsSimd128(output.node);
|
||||
}
|
||||
Emit(kIA32Peek, g.DefineAsRegister(output.node),
|
||||
g.UseImmediate(reverse_slot));
|
||||
|
@ -2357,9 +2357,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
|
||||
if (op->representation() == MachineRepresentation::kFloat64) {
|
||||
__ Movsd(i.OutputDoubleRegister(), Operand(rbp, offset));
|
||||
} else {
|
||||
DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
|
||||
} else if (op->representation() == MachineRepresentation::kFloat32) {
|
||||
__ Movss(i.OutputFloatRegister(), Operand(rbp, offset));
|
||||
} else {
|
||||
DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
|
||||
__ Movdqu(i.OutputSimd128Register(), Operand(rbp, offset));
|
||||
}
|
||||
} else {
|
||||
__ movq(i.OutputRegister(), Operand(rbp, offset));
|
||||
|
@ -1685,21 +1685,24 @@ void InstructionSelector::EmitPrepareResults(
|
||||
Node* node) {
|
||||
X64OperandGenerator g(this);
|
||||
|
||||
int reverse_slot = 0;
|
||||
int reverse_slot = 1;
|
||||
for (PushParameter output : *results) {
|
||||
if (!output.location.IsCallerFrameSlot()) continue;
|
||||
reverse_slot += output.location.GetSizeInPointers();
|
||||
// Skip any alignment holes in nodes.
|
||||
if (output.node == nullptr) continue;
|
||||
DCHECK(!call_descriptor->IsCFunctionCall());
|
||||
if (output.location.GetType() == MachineType::Float32()) {
|
||||
MarkAsFloat32(output.node);
|
||||
} else if (output.location.GetType() == MachineType::Float64()) {
|
||||
MarkAsFloat64(output.node);
|
||||
if (output.node != nullptr) {
|
||||
DCHECK(!call_descriptor->IsCFunctionCall());
|
||||
if (output.location.GetType() == MachineType::Float32()) {
|
||||
MarkAsFloat32(output.node);
|
||||
} else if (output.location.GetType() == MachineType::Float64()) {
|
||||
MarkAsFloat64(output.node);
|
||||
} else if (output.location.GetType() == MachineType::Simd128()) {
|
||||
MarkAsSimd128(output.node);
|
||||
}
|
||||
InstructionOperand result = g.DefineAsRegister(output.node);
|
||||
InstructionOperand slot = g.UseImmediate(reverse_slot);
|
||||
Emit(kX64Peek, 1, &result, 1, &slot);
|
||||
}
|
||||
InstructionOperand result = g.DefineAsRegister(output.node);
|
||||
InstructionOperand slot = g.UseImmediate(reverse_slot);
|
||||
Emit(kX64Peek, 1, &result, 1, &slot);
|
||||
reverse_slot += output.location.GetSizeInPointers();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -405,6 +405,24 @@ static int GetReturnCountAfterLoweringSimd128(
|
||||
return result;
|
||||
}
|
||||
|
||||
int GetReturnIndexAfterLowering(const CallDescriptor* call_descriptor,
|
||||
int old_index) {
|
||||
int result = old_index;
|
||||
for (int i = 0; i < old_index; ++i) {
|
||||
if (call_descriptor->GetReturnType(i).representation() ==
|
||||
MachineRepresentation::kSimd128) {
|
||||
result += kNumLanes32 - 1;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static int GetReturnCountAfterLoweringSimd128(
|
||||
const CallDescriptor* call_descriptor) {
|
||||
return GetReturnIndexAfterLowering(
|
||||
call_descriptor, static_cast<int>(call_descriptor->ReturnCount()));
|
||||
}
|
||||
|
||||
int SimdScalarLowering::NumLanes(SimdType type) {
|
||||
int num_lanes = 0;
|
||||
if (type == SimdType::kFloat64x2 || type == SimdType::kInt64x2) {
|
||||
@ -1182,23 +1200,46 @@ void SimdScalarLowering::LowerNode(Node* node) {
|
||||
// TODO(turbofan): Make wasm code const-correct wrt. CallDescriptor.
|
||||
auto call_descriptor =
|
||||
const_cast<CallDescriptor*>(CallDescriptorOf(node->op()));
|
||||
if (DefaultLowering(node) ||
|
||||
(call_descriptor->ReturnCount() == 1 &&
|
||||
call_descriptor->GetReturnType(0) == MachineType::Simd128())) {
|
||||
bool returns_require_lowering =
|
||||
GetReturnCountAfterLoweringSimd128(call_descriptor) !=
|
||||
static_cast<int>(call_descriptor->ReturnCount());
|
||||
|
||||
if (DefaultLowering(node) || returns_require_lowering) {
|
||||
// We have to adjust the call descriptor.
|
||||
const Operator* op = common()->Call(
|
||||
GetI32WasmCallDescriptorForSimd(zone(), call_descriptor));
|
||||
NodeProperties::ChangeOp(node, op);
|
||||
}
|
||||
if (call_descriptor->ReturnCount() == 1 &&
|
||||
call_descriptor->GetReturnType(0) == MachineType::Simd128()) {
|
||||
// We access the additional return values through projections.
|
||||
Node* rep_node[kNumLanes32];
|
||||
for (int i = 0; i < kNumLanes32; ++i) {
|
||||
rep_node[i] =
|
||||
graph()->NewNode(common()->Projection(i), node, graph()->start());
|
||||
|
||||
if (!returns_require_lowering) {
|
||||
break;
|
||||
}
|
||||
|
||||
size_t return_arity = call_descriptor->ReturnCount();
|
||||
ZoneVector<Node*> projections(return_arity, zone());
|
||||
NodeProperties::CollectValueProjections(node, projections.data(),
|
||||
return_arity);
|
||||
|
||||
for (size_t old_index = 0, new_index = 0; old_index < return_arity;
|
||||
++old_index, ++new_index) {
|
||||
Node* use_node = projections[old_index];
|
||||
DCHECK_EQ(ProjectionIndexOf(use_node->op()), old_index);
|
||||
DCHECK_EQ(GetReturnIndexAfterLowering(call_descriptor,
|
||||
static_cast<int>(old_index)),
|
||||
static_cast<int>(new_index));
|
||||
if (new_index != old_index) {
|
||||
NodeProperties::ChangeOp(use_node, common()->Projection(new_index));
|
||||
}
|
||||
if (call_descriptor->GetReturnType(old_index).representation() ==
|
||||
MachineRepresentation::kSimd128) {
|
||||
Node* rep_node[kNumLanes32];
|
||||
for (int i = 0; i < kNumLanes32; ++i) {
|
||||
rep_node[i] = graph()->NewNode(common()->Projection(new_index + i),
|
||||
node, graph()->start());
|
||||
}
|
||||
ReplaceNode(use_node, rep_node, kNumLanes32);
|
||||
new_index += kNumLanes32 - 1;
|
||||
}
|
||||
ReplaceNode(node, rep_node, kNumLanes32);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -79,6 +79,9 @@ inline void Store(LiftoffAssembler* assm, Register base, int32_t offset,
|
||||
case ValueType::kF64:
|
||||
assm->movsd(dst, src.fp());
|
||||
break;
|
||||
case ValueType::kS128:
|
||||
assm->movdqu(dst, src.fp());
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
@ -93,6 +93,9 @@ inline void Store(LiftoffAssembler* assm, Operand dst, LiftoffRegister src,
|
||||
case ValueType::kF64:
|
||||
assm->Movsd(dst, src.fp());
|
||||
break;
|
||||
case ValueType::kS128:
|
||||
assm->Movdqu(dst, src.fp());
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
92
test/mjsunit/wasm/multi-value-simd.js
Normal file
92
test/mjsunit/wasm/multi-value-simd.js
Normal file
@ -0,0 +1,92 @@
|
||||
// Copyright 2017 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Flags: --experimental-wasm-simd --experimental-wasm-mv
|
||||
|
||||
load("test/mjsunit/wasm/wasm-module-builder.js");
|
||||
|
||||
(function MultiReturnS128Test() {
|
||||
print("MultiReturnS128Test");
|
||||
// Most backends only support 2 fp return registers, so the third v128
|
||||
// onwards here will written to caller stack slot.
|
||||
let builder = new WasmModuleBuilder();
|
||||
let sig_v_sssss = builder.addType(
|
||||
makeSig([], [kWasmS128, kWasmS128, kWasmS128, kWasmS128, kWasmS128]));
|
||||
let sig_iiiiiiiiii_v = builder.addType(
|
||||
makeSig([], [kWasmI32, kWasmI32, kWasmI32, kWasmI32, kWasmI32, kWasmI32,
|
||||
kWasmI32, kWasmI32, kWasmI32, kWasmI32] ));
|
||||
|
||||
let callee = builder.addFunction("callee", sig_v_sssss)
|
||||
.addBody([
|
||||
kExprI32Const, 0,
|
||||
kSimdPrefix, kExprI32x4Splat,
|
||||
kExprI32Const, 1,
|
||||
kSimdPrefix, kExprI32x4Splat,
|
||||
kExprI32Const, 2,
|
||||
kSimdPrefix, kExprI32x4Splat,
|
||||
kExprI32Const, 3,
|
||||
kSimdPrefix, kExprI32x4Splat,
|
||||
kExprI32Const, 4,
|
||||
kSimdPrefix, kExprI32x4Splat,
|
||||
kExprReturn]);
|
||||
// For each v128 on the stack, we return the first and last lane. This help
|
||||
// catch bugs with reading/writing the wrong stack slots.
|
||||
builder.addFunction("main", sig_iiiiiiiiii_v)
|
||||
.addLocals({"i32_count": 10, "s128_count": 1})
|
||||
.addBody([
|
||||
kExprCallFunction, callee.index,
|
||||
|
||||
kExprLocalTee, 10,
|
||||
kSimdPrefix, kExprI32x4ExtractLane, 0,
|
||||
kExprLocalSet, 0,
|
||||
kExprLocalGet, 10,
|
||||
kSimdPrefix, kExprI32x4ExtractLane, 3,
|
||||
kExprLocalSet, 1,
|
||||
|
||||
kExprLocalTee, 10,
|
||||
kSimdPrefix, kExprI32x4ExtractLane, 0,
|
||||
kExprLocalSet, 2,
|
||||
kExprLocalGet, 10,
|
||||
kSimdPrefix, kExprI32x4ExtractLane, 3,
|
||||
kExprLocalSet, 3,
|
||||
|
||||
kExprLocalTee, 10,
|
||||
kSimdPrefix, kExprI32x4ExtractLane, 0,
|
||||
kExprLocalSet, 4,
|
||||
kExprLocalGet, 10,
|
||||
kSimdPrefix, kExprI32x4ExtractLane, 3,
|
||||
kExprLocalSet, 5,
|
||||
|
||||
kExprLocalTee, 10,
|
||||
kSimdPrefix, kExprI32x4ExtractLane, 0,
|
||||
kExprLocalSet, 6,
|
||||
kExprLocalGet, 10,
|
||||
kSimdPrefix, kExprI32x4ExtractLane, 3,
|
||||
kExprLocalSet, 7,
|
||||
|
||||
kExprLocalTee, 10,
|
||||
kSimdPrefix, kExprI32x4ExtractLane, 0,
|
||||
kExprLocalSet, 8,
|
||||
kExprLocalGet, 10,
|
||||
kSimdPrefix, kExprI32x4ExtractLane, 3,
|
||||
kExprLocalSet, 9,
|
||||
|
||||
// Return all the stored locals.
|
||||
kExprLocalGet, 0,
|
||||
kExprLocalGet, 1,
|
||||
kExprLocalGet, 2,
|
||||
kExprLocalGet, 3,
|
||||
kExprLocalGet, 4,
|
||||
kExprLocalGet, 5,
|
||||
kExprLocalGet, 6,
|
||||
kExprLocalGet, 7,
|
||||
kExprLocalGet, 8,
|
||||
kExprLocalGet, 9,
|
||||
])
|
||||
.exportAs("main");
|
||||
|
||||
let module = new WebAssembly.Module(builder.toBuffer());
|
||||
let instance = new WebAssembly.Instance(module);
|
||||
assertEquals(instance.exports.main(), [4, 4, 3, 3, 2, 2, 1, 1, 0, 0]);
|
||||
})();
|
Loading…
Reference in New Issue
Block a user