From 208578dc1c9549a56e5a450d8022faf625221b7b Mon Sep 17 00:00:00 2001 From: Ng Zhi An Date: Thu, 15 Oct 2020 15:02:24 -0700 Subject: [PATCH] [wasm-simd][x64] Prototype store lane Store lane loads a value from memory and replaces a single lane of a simd value. This implements store lane for x64 and interpreter. Bug: v8:10975 Change-Id: Ida79a03e0fd2bc18f2c06687311936b3cb550ed5 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2473383 Reviewed-by: Bill Budge Reviewed-by: Georg Neis Commit-Queue: Zhi An Ng Cr-Commit-Position: refs/heads/master@{#70586} --- src/compiler/backend/instruction-selector.cc | 5 ++ .../backend/x64/code-generator-x64.cc | 48 ++++++++++++ .../backend/x64/instruction-codes-x64.h | 4 + .../backend/x64/instruction-scheduler-x64.cc | 4 + .../backend/x64/instruction-selector-x64.cc | 35 +++++++++ src/compiler/machine-operator.cc | 61 +++++++++++++++ src/compiler/machine-operator.h | 16 ++++ src/compiler/opcodes.h | 3 +- src/compiler/wasm-compiler.cc | 32 ++++++++ src/compiler/wasm-compiler.h | 3 + src/wasm/baseline/liftoff-compiler.cc | 6 ++ src/wasm/function-body-decoder-impl.h | 34 ++++++++- src/wasm/graph-builder-interface.cc | 7 ++ src/wasm/wasm-opcodes-inl.h | 4 + src/wasm/wasm-opcodes.h | 6 +- test/cctest/wasm/test-run-wasm-simd.cc | 75 +++++++++++++++++++ test/common/wasm/wasm-interpreter.cc | 40 ++++++++++ 17 files changed, 380 insertions(+), 3 deletions(-) diff --git a/src/compiler/backend/instruction-selector.cc b/src/compiler/backend/instruction-selector.cc index 50b4823665..1436bc0eec 100644 --- a/src/compiler/backend/instruction-selector.cc +++ b/src/compiler/backend/instruction-selector.cc @@ -1424,6 +1424,10 @@ void InstructionSelector::VisitNode(Node* node) { return VisitStore(node); case IrOpcode::kProtectedStore: return VisitProtectedStore(node); + case IrOpcode::kStoreLane: { + MarkAsRepresentation(MachineRepresentation::kSimd128, node); + return VisitStoreLane(node); + } case IrOpcode::kWord32And: return MarkAsWord32(node), VisitWord32And(node); case IrOpcode::kWord32Or: @@ -2699,6 +2703,7 @@ void InstructionSelector::VisitI8x16Popcnt(Node* node) { UNIMPLEMENTED(); } #if !V8_TARGET_ARCH_X64 // TODO(v8:10975): Prototyping load lane and store lane. void InstructionSelector::VisitLoadLane(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitStoreLane(Node* node) { UNIMPLEMENTED(); } // TODO(v8:10997) Prototype i64x2.bitmask. void InstructionSelector::VisitI64x2BitMask(Node* node) { UNIMPLEMENTED(); } diff --git a/src/compiler/backend/x64/code-generator-x64.cc b/src/compiler/backend/x64/code-generator-x64.cc index 30bd2a403e..35581151f2 100644 --- a/src/compiler/backend/x64/code-generator-x64.cc +++ b/src/compiler/backend/x64/code-generator-x64.cc @@ -3235,6 +3235,28 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ Pextrb(dst, i.InputSimd128Register(0), i.InputUint8(1)); break; } + case kX64Pextrb: { + EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); + DCHECK(HasAddressingMode(instr)); + DCHECK(!instr->HasOutput()); + + size_t index = 0; + Operand operand = i.MemoryOperand(&index); + __ Pextrb(operand, i.InputSimd128Register(index), + i.InputUint8(index + 1)); + break; + } + case kX64Pextrw: { + EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); + DCHECK(HasAddressingMode(instr)); + DCHECK(!instr->HasOutput()); + + size_t index = 0; + Operand operand = i.MemoryOperand(&index); + __ Pextrw(operand, i.InputSimd128Register(index), + i.InputUint8(index + 1)); + break; + } case kX64I8x16ExtractLaneS: { Register dst = i.OutputRegister(); __ Pextrb(dst, i.InputSimd128Register(0), i.InputUint8(1)); @@ -3675,6 +3697,32 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ Movq(i.OutputSimd128Register(), i.MemoryOperand()); break; } + case kX64S128Store32Lane: { + EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); + size_t index = 0; + Operand operand = i.MemoryOperand(&index); + uint8_t lane = i.InputUint8(index + 1); + if (lane == 0) { + __ Movss(operand, i.InputSimd128Register(index)); + } else { + DCHECK_GE(3, lane); + __ Extractps(operand, i.InputSimd128Register(index), lane); + } + break; + } + case kX64S128Store64Lane: { + EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); + size_t index = 0; + Operand operand = i.MemoryOperand(&index); + uint8_t lane = i.InputUint8(index + 1); + if (lane == 0) { + __ Movlps(operand, i.InputSimd128Register(index)); + } else { + DCHECK_EQ(1, lane); + __ Movhps(operand, i.InputSimd128Register(index)); + } + break; + } case kX64S32x4Swizzle: { DCHECK_EQ(2, instr->InputCount()); ASSEMBLE_SIMD_IMM_INSTR(Pshufd, i.OutputSimd128Register(), 0, diff --git a/src/compiler/backend/x64/instruction-codes-x64.h b/src/compiler/backend/x64/instruction-codes-x64.h index 1ea0918ff6..593b0af3d9 100644 --- a/src/compiler/backend/x64/instruction-codes-x64.h +++ b/src/compiler/backend/x64/instruction-codes-x64.h @@ -281,6 +281,8 @@ namespace compiler { V(X64Pinsrw) \ V(X64Pinsrd) \ V(X64Pinsrq) \ + V(X64Pextrb) \ + V(X64Pextrw) \ V(X64I8x16SConvertI16x8) \ V(X64I8x16Neg) \ V(X64I8x16Shl) \ @@ -330,6 +332,8 @@ namespace compiler { V(X64S128Load32x2U) \ V(X64S128LoadMem32Zero) \ V(X64S128LoadMem64Zero) \ + V(X64S128Store32Lane) \ + V(X64S128Store64Lane) \ V(X64S32x4Swizzle) \ V(X64S32x4Shuffle) \ V(X64S16x8Blend) \ diff --git a/src/compiler/backend/x64/instruction-scheduler-x64.cc b/src/compiler/backend/x64/instruction-scheduler-x64.cc index 4225f44e84..a41e1bc8f7 100644 --- a/src/compiler/backend/x64/instruction-scheduler-x64.cc +++ b/src/compiler/backend/x64/instruction-scheduler-x64.cc @@ -348,8 +348,12 @@ int InstructionScheduler::GetTargetInstructionFlags( case kX64Movb: case kX64Movw: + case kX64S128Store32Lane: + case kX64S128Store64Lane: return kHasSideEffect; + case kX64Pextrb: + case kX64Pextrw: case kX64Movl: if (instr->HasOutput()) { DCHECK_LE(1, instr->InputCount()); diff --git a/src/compiler/backend/x64/instruction-selector-x64.cc b/src/compiler/backend/x64/instruction-selector-x64.cc index 517d39bced..8b4db51529 100644 --- a/src/compiler/backend/x64/instruction-selector-x64.cc +++ b/src/compiler/backend/x64/instruction-selector-x64.cc @@ -7,6 +7,7 @@ #include "src/base/iterator.h" #include "src/base/logging.h" #include "src/base/overflowing-math.h" +#include "src/codegen/machine-type.h" #include "src/compiler/backend/instruction-selector-impl.h" #include "src/compiler/machine-operator.h" #include "src/compiler/node-matchers.h" @@ -538,6 +539,40 @@ void InstructionSelector::VisitUnalignedLoad(Node* node) { UNREACHABLE(); } // Architecture supports unaligned access, therefore VisitStore is used instead void InstructionSelector::VisitUnalignedStore(Node* node) { UNREACHABLE(); } +void InstructionSelector::VisitStoreLane(Node* node) { + X64OperandGenerator g(this); + + StoreLaneParameters params = StoreLaneParametersOf(node->op()); + InstructionCode opcode = kArchNop; + if (params.rep == MachineRepresentation::kWord8) { + opcode = kX64Pextrb; + } else if (params.rep == MachineRepresentation::kWord16) { + opcode = kX64Pextrw; + } else if (params.rep == MachineRepresentation::kWord32) { + opcode = kX64S128Store32Lane; + } else if (params.rep == MachineRepresentation::kWord64) { + opcode = kX64S128Store64Lane; + } else { + UNREACHABLE(); + } + + InstructionOperand inputs[4]; + size_t input_count = 0; + AddressingMode addressing_mode = + g.GetEffectiveAddressMemoryOperand(node, inputs, &input_count); + opcode |= AddressingModeField::encode(addressing_mode); + + if (params.kind == LoadKind::kProtected) { + opcode |= MiscField::encode(kMemoryAccessProtected); + } + + InstructionOperand value_operand = g.UseRegister(node->InputAt(2)); + inputs[input_count++] = value_operand; + inputs[input_count++] = g.UseImmediate(params.laneidx); + DCHECK_GE(4, input_count); + Emit(opcode, 0, nullptr, input_count, inputs); +} + // Shared routine for multiple binary operations. static void VisitBinop(InstructionSelector* selector, Node* node, InstructionCode opcode, FlagsContinuation* cont) { diff --git a/src/compiler/machine-operator.cc b/src/compiler/machine-operator.cc index 649654010c..488594939e 100644 --- a/src/compiler/machine-operator.cc +++ b/src/compiler/machine-operator.cc @@ -141,6 +141,25 @@ UnalignedStoreRepresentation const& UnalignedStoreRepresentationOf( return OpParameter(op); } +size_t hash_value(StoreLaneParameters params) { + return base::hash_combine(params.kind, params.rep, params.laneidx); +} + +std::ostream& operator<<(std::ostream& os, StoreLaneParameters params) { + return os << "(" << params.kind << " " << params.rep << " " << params.laneidx + << ")"; +} + +StoreLaneParameters const& StoreLaneParametersOf(Operator const* op) { + DCHECK_EQ(IrOpcode::kStoreLane, op->opcode()); + return OpParameter(op); +} + +bool operator==(StoreLaneParameters lhs, StoreLaneParameters rhs) { + return lhs.kind == rhs.kind && lhs.rep == rhs.rep && + lhs.laneidx == rhs.laneidx; +} + bool operator==(StackSlotRepresentation lhs, StackSlotRepresentation rhs) { return lhs.size() == rhs.size() && lhs.alignment() == rhs.alignment(); } @@ -821,6 +840,15 @@ struct ProtectedStoreOperator : public Operator1 { StoreRepresentation(rep, kNoWriteBarrier)) {} }; +template +struct StoreLaneOperator : public Operator1 { + StoreLaneOperator() + : Operator1(IrOpcode::kStoreLane, + Operator::kNoDeopt | Operator::kNoRead | Operator::kNoThrow, + "StoreLane", 3, 1, 1, 0, 1, 0, + StoreLaneParameters{kind, rep, laneidx}) {} +}; + template struct Word32AtomicLoadOperator : public Operator1 { Word32AtomicLoadOperator() @@ -1200,6 +1228,39 @@ const Operator* MachineOperatorBuilder::LoadLane(LoadKind kind, UNREACHABLE(); } +const Operator* MachineOperatorBuilder::StoreLane(LoadKind kind, + MachineRepresentation rep, + uint8_t laneidx) { +#define STORE_LANE_KIND(REP, KIND, LANEIDX) \ + if (kind == LoadKind::k##KIND && rep == MachineRepresentation::REP && \ + laneidx == LANEIDX) { \ + return GetCachedOperator>(); \ + } + +#define STORE_LANE_T(T, LANE) \ + STORE_LANE_KIND(T, Normal, LANE) \ + STORE_LANE_KIND(T, Unaligned, LANE) \ + STORE_LANE_KIND(T, Protected, LANE) + +#define STORE_LANE_WORD8(LANE) STORE_LANE_T(kWord8, LANE) +#define STORE_LANE_WORD16(LANE) STORE_LANE_T(kWord16, LANE) +#define STORE_LANE_WORD32(LANE) STORE_LANE_T(kWord32, LANE) +#define STORE_LANE_WORD64(LANE) STORE_LANE_T(kWord64, LANE) + + // Semicolons unnecessary, but helps formatting. + SIMD_I8x16_LANES(STORE_LANE_WORD8); + SIMD_I16x8_LANES(STORE_LANE_WORD16); + SIMD_I32x4_LANES(STORE_LANE_WORD32); + SIMD_I64x2_LANES(STORE_LANE_WORD64); +#undef STORE_LANE_WORD8 +#undef STORE_LANE_WORD16 +#undef STORE_LANE_WORD32 +#undef STORE_LANE_WORD64 +#undef STORE_LANE_KIND + UNREACHABLE(); +} + const Operator* MachineOperatorBuilder::StackSlot(int size, int alignment) { DCHECK_LE(0, size); DCHECK(alignment == 0 || alignment == 4 || alignment == 8 || alignment == 16); diff --git a/src/compiler/machine-operator.h b/src/compiler/machine-operator.h index f4b6b3190d..9eaab56a17 100644 --- a/src/compiler/machine-operator.h +++ b/src/compiler/machine-operator.h @@ -49,6 +49,7 @@ using LoadRepresentation = MachineType; V8_EXPORT_PRIVATE LoadRepresentation LoadRepresentationOf(Operator const*) V8_WARN_UNUSED_RESULT; +// TODO(zhin): This is used by StoreLane too, rename this. enum class LoadKind { kNormal, kUnaligned, @@ -135,6 +136,17 @@ using UnalignedStoreRepresentation = MachineRepresentation; UnalignedStoreRepresentation const& UnalignedStoreRepresentationOf( Operator const*) V8_WARN_UNUSED_RESULT; +struct StoreLaneParameters { + LoadKind kind; + MachineRepresentation rep; + uint8_t laneidx; +}; + +V8_EXPORT_PRIVATE std::ostream& operator<<(std::ostream&, StoreLaneParameters); + +V8_EXPORT_PRIVATE StoreLaneParameters const& StoreLaneParametersOf( + Operator const*) V8_WARN_UNUSED_RESULT; + class StackSlotRepresentation final { public: StackSlotRepresentation(int size, int alignment) @@ -801,6 +813,10 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final const Operator* Store(StoreRepresentation rep); const Operator* ProtectedStore(MachineRepresentation rep); + // SIMD store: store a specified lane of value into [base + index]. + const Operator* StoreLane(LoadKind kind, MachineRepresentation rep, + uint8_t laneidx); + // unaligned load [base + index] const Operator* UnalignedLoad(LoadRepresentation rep); diff --git a/src/compiler/opcodes.h b/src/compiler/opcodes.h index 474a09a917..865775c81a 100644 --- a/src/compiler/opcodes.h +++ b/src/compiler/opcodes.h @@ -959,7 +959,8 @@ V(V8x16AnyTrue) \ V(V8x16AllTrue) \ V(LoadTransform) \ - V(LoadLane) + V(LoadLane) \ + V(StoreLane) #define VALUE_OP_LIST(V) \ COMMON_OP_LIST(V) \ diff --git a/src/compiler/wasm-compiler.cc b/src/compiler/wasm-compiler.cc index 1bb91420e8..9f48e0803a 100644 --- a/src/compiler/wasm-compiler.cc +++ b/src/compiler/wasm-compiler.cc @@ -16,6 +16,7 @@ #include "src/codegen/code-factory.h" #include "src/codegen/compiler.h" #include "src/codegen/interface-descriptors.h" +#include "src/codegen/machine-type.h" #include "src/codegen/optimized-compilation-info.h" #include "src/compiler/backend/code-generator.h" #include "src/compiler/backend/instruction-selector.h" @@ -4110,6 +4111,37 @@ Node* WasmGraphBuilder::LoadMem(wasm::ValueType type, MachineType memtype, return load; } +Node* WasmGraphBuilder::StoreLane(MachineRepresentation mem_rep, Node* index, + uint32_t offset, uint32_t alignment, + Node* val, uint8_t laneidx, + wasm::WasmCodePosition position, + wasm::ValueType type) { + Node* store; + has_simd_ = true; + index = BoundsCheckMem(i::ElementSizeInBytes(mem_rep), index, offset, + position, kCanOmitBoundsCheck); + + MachineType memtype = MachineType(mem_rep, MachineSemantic::kNone); + LoadKind load_kind = GetLoadKind(mcgraph(), memtype, use_trap_handler()); + + // {offset} is validated to be within uintptr_t range in {BoundsCheckMem}. + uintptr_t capped_offset = static_cast(offset); + + store = SetEffect(graph()->NewNode( + mcgraph()->machine()->StoreLane(load_kind, mem_rep, laneidx), + MemBuffer(capped_offset), index, val, effect(), control())); + + if (load_kind == LoadKind::kProtected) { + SetSourcePosition(store, position); + } + + if (FLAG_trace_wasm_memory) { + TraceMemoryOperation(true, mem_rep, index, capped_offset, position); + } + + return store; +} + Node* WasmGraphBuilder::StoreMem(MachineRepresentation mem_rep, Node* index, uint64_t offset, uint32_t alignment, Node* val, wasm::WasmCodePosition position, diff --git a/src/compiler/wasm-compiler.h b/src/compiler/wasm-compiler.h index d4c2245271..967d75edab 100644 --- a/src/compiler/wasm-compiler.h +++ b/src/compiler/wasm-compiler.h @@ -325,6 +325,9 @@ class WasmGraphBuilder { Node* StoreMem(MachineRepresentation mem_rep, Node* index, uint64_t offset, uint32_t alignment, Node* val, wasm::WasmCodePosition position, wasm::ValueType type); + Node* StoreLane(MachineRepresentation mem_rep, Node* index, uint32_t offset, + uint32_t alignment, Node* val, uint8_t laneidx, + wasm::WasmCodePosition position, wasm::ValueType type); static void PrintDebugName(Node* node); void set_instance_node(Node* instance_node) { diff --git a/src/wasm/baseline/liftoff-compiler.cc b/src/wasm/baseline/liftoff-compiler.cc index 3c647f840f..61e6d9fc5e 100644 --- a/src/wasm/baseline/liftoff-compiler.cc +++ b/src/wasm/baseline/liftoff-compiler.cc @@ -2374,6 +2374,12 @@ class LiftoffCompiler { } } + void StoreLane(FullDecoder* decoder, StoreType type, + const MemoryAccessImmediate& imm, const Value& index, + const Value& value, const uint8_t laneidx) { + unsupported(decoder, kSimd, "simd load lane"); + } + void CurrentMemoryPages(FullDecoder* decoder, Value* result) { Register mem_size = __ GetUnusedRegister(kGpReg, {}).gp(); LOAD_INSTANCE_FIELD(mem_size, MemorySize, kSystemPointerSize); diff --git a/src/wasm/function-body-decoder-impl.h b/src/wasm/function-body-decoder-impl.h index caef9aea91..bfe7ed69ab 100644 --- a/src/wasm/function-body-decoder-impl.h +++ b/src/wasm/function-body-decoder-impl.h @@ -983,6 +983,8 @@ struct ControlBase : public PcForErrors { Value* result) \ F(StoreMem, StoreType type, const MemoryAccessImmediate& imm, \ const Value& index, const Value& value) \ + F(StoreLane, StoreType type, const MemoryAccessImmediate& imm, \ + const Value& index, const Value& value, const uint8_t laneidx) \ F(CurrentMemoryPages, Value* result) \ F(MemoryGrow, const Value& value, Value* result) \ F(CallDirect, const CallFunctionImmediate& imm, \ @@ -1749,7 +1751,11 @@ class WasmDecoder : public Decoder { case kExprS128Load8Lane: case kExprS128Load16Lane: case kExprS128Load32Lane: - case kExprS128Load64Lane: { + case kExprS128Load64Lane: + case kExprS128Store8Lane: + case kExprS128Store16Lane: + case kExprS128Store32Lane: + case kExprS128Store64Lane: { MemoryAccessImmediate imm(decoder, pc + length, UINT32_MAX); // 1 more byte for lane index immediate. @@ -3334,6 +3340,20 @@ class WasmFullDecoder : public WasmDecoder { return opcode_length + mem_imm.length + lane_imm.length; } + int DecodeStoreLane(StoreType type, uint32_t opcode_length) { + if (!CheckHasMemory()) return 0; + MemoryAccessImmediate mem_imm(this, this->pc_ + opcode_length, + type.size_log_2()); + SimdLaneImmediate lane_imm( + this, this->pc_ + opcode_length + mem_imm.length); + Value v128 = Pop(1, kWasmS128); + Value index = Pop(0, kWasmI32); + + CALL_INTERFACE_IF_REACHABLE(StoreLane, type, mem_imm, index, v128, + lane_imm.lane); + return opcode_length + mem_imm.length + lane_imm.length; + } + int DecodeStoreMem(StoreType store, int prefix_len = 1) { if (!CheckHasMemory()) return 0; MemoryAccessImmediate imm(this, this->pc_ + prefix_len, @@ -3578,6 +3598,18 @@ class WasmFullDecoder : public WasmDecoder { case kExprS128Load64Lane: { return DecodeLoadLane(LoadType::kI64Load, opcode_length); } + case kExprS128Store8Lane: { + return DecodeStoreLane(StoreType::kI32Store8, opcode_length); + } + case kExprS128Store16Lane: { + return DecodeStoreLane(StoreType::kI32Store16, opcode_length); + } + case kExprS128Store32Lane: { + return DecodeStoreLane(StoreType::kI32Store, opcode_length); + } + case kExprS128Store64Lane: { + return DecodeStoreLane(StoreType::kI64Store, opcode_length); + } case kExprS128Const: return SimdConstOp(opcode_length); default: { diff --git a/src/wasm/graph-builder-interface.cc b/src/wasm/graph-builder-interface.cc index 984dd935e8..f6bb870980 100644 --- a/src/wasm/graph-builder-interface.cc +++ b/src/wasm/graph-builder-interface.cc @@ -450,6 +450,13 @@ class WasmGraphBuildingInterface { value.node, decoder->position(), type.value_type()); } + void StoreLane(FullDecoder* decoder, StoreType type, + const MemoryAccessImmediate& imm, const Value& index, + const Value& value, const uint8_t laneidx) { + BUILD(StoreLane, type.mem_rep(), index.node, imm.offset, imm.alignment, + value.node, laneidx, decoder->position(), type.value_type()); + } + void CurrentMemoryPages(FullDecoder* decoder, Value* result) { result->node = BUILD(CurrentMemoryPages); } diff --git a/src/wasm/wasm-opcodes-inl.h b/src/wasm/wasm-opcodes-inl.h index 5b50f1cd47..76901115ec 100644 --- a/src/wasm/wasm-opcodes-inl.h +++ b/src/wasm/wasm-opcodes-inl.h @@ -313,6 +313,10 @@ constexpr const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) { CASE_S128_OP(Load16Lane, "load16_lane") CASE_S128_OP(Load32Lane, "load32_lane") CASE_S128_OP(Load64Lane, "load64_lane") + CASE_S128_OP(Store8Lane, "store8_lane") + CASE_S128_OP(Store16Lane, "store16_lane") + CASE_S128_OP(Store32Lane, "store32_lane") + CASE_S128_OP(Store64Lane, "store64_lane") CASE_I8x16_OP(RoundingAverageU, "avgr_u") CASE_I16x8_OP(RoundingAverageU, "avgr_u") diff --git a/src/wasm/wasm-opcodes.h b/src/wasm/wasm-opcodes.h index e150b67176..e8908cd6d7 100644 --- a/src/wasm/wasm-opcodes.h +++ b/src/wasm/wasm-opcodes.h @@ -465,7 +465,11 @@ bool V8_EXPORT_PRIVATE IsJSCompatibleSignature(const FunctionSig* sig, V(S128Load8Lane, 0xfd58, s_is) \ V(S128Load16Lane, 0xfd59, s_is) \ V(S128Load32Lane, 0xfd5a, s_is) \ - V(S128Load64Lane, 0xfd5b, s_is) + V(S128Load64Lane, 0xfd5b, s_is) \ + V(S128Store8Lane, 0xfd5c, v_is) \ + V(S128Store16Lane, 0xfd5d, v_is) \ + V(S128Store32Lane, 0xfd5e, v_is) \ + V(S128Store64Lane, 0xfd5f, v_is) #define FOREACH_SIMD_POST_MVP_OPCODE(V) \ V(I8x16Mul, 0xfd75, s_ss) \ diff --git a/test/cctest/wasm/test-run-wasm-simd.cc b/test/cctest/wasm/test-run-wasm-simd.cc index 48614264c9..5f620e25ed 100644 --- a/test/cctest/wasm/test-run-wasm-simd.cc +++ b/test/cctest/wasm/test-run-wasm-simd.cc @@ -3688,6 +3688,81 @@ WASM_SIMD_TEST_NO_LOWERING(S128Load64Lane) { RunLoadLaneTest(execution_tier, lower_simd, kExprS128Load64Lane, kExprI64x2Splat); } + +template +void RunStoreLaneTest(TestExecutionTier execution_tier, LowerSimd lower_simd, + WasmOpcode store_op, WasmOpcode splat_op) { + FLAG_SCOPE(wasm_simd_post_mvp); + if (execution_tier == TestExecutionTier::kLiftoff) { + // Not yet implemented. + return; + } + + constexpr int lanes = kSimd128Size / sizeof(T); + constexpr int mem_index = 16; // Store from mem index 16 (bytes). + constexpr int splat_value = 33; + WasmOpcode const_op = + splat_op == kExprI64x2Splat ? kExprI64Const : kExprI32Const; + + for (int lane_index = 0; lane_index < lanes; lane_index++) { + WasmRunner r(execution_tier, lower_simd); + T* memory = r.builder().AddMemoryElems(kWasmPageSize / sizeof(T)); + + // Splat splat_value, then only Store and replace a single lane with the + BUILD(r, WASM_I32V(mem_index), const_op, splat_value, + WASM_SIMD_OP(splat_op), WASM_SIMD_OP(store_op), ZERO_ALIGNMENT, + ZERO_OFFSET, lane_index, WASM_ONE); + + r.builder().BlankMemory(); + r.Call(); + + for (int i = 0; i < lanes; i++) { + CHECK_EQ(0, r.builder().ReadMemory(&memory[i])); + } + + CHECK_EQ(splat_value, r.builder().ReadMemory(&memory[lanes])); + + for (int i = lanes + 1; i < lanes * 2; i++) { + CHECK_EQ(0, r.builder().ReadMemory(&memory[i])); + } + } + + // OOB stores + { + WasmRunner r(execution_tier, lower_simd); + r.builder().AddMemoryElems(kWasmPageSize / sizeof(T)); + + BUILD(r, WASM_GET_LOCAL(0), const_op, splat_value, WASM_SIMD_OP(splat_op), + WASM_SIMD_OP(store_op), ZERO_ALIGNMENT, ZERO_OFFSET, 0, WASM_ONE); + + // StoreLane stores sizeof(T) bytes. + for (uint32_t index = kWasmPageSize - (sizeof(T) - 1); + index < kWasmPageSize; ++index) { + CHECK_TRAP(r.Call(index)); + } + } +} + +WASM_SIMD_TEST_NO_LOWERING(S128Store8Lane) { + RunStoreLaneTest(execution_tier, lower_simd, kExprS128Store8Lane, + kExprI8x16Splat); +} + +WASM_SIMD_TEST_NO_LOWERING(S128Store16Lane) { + RunStoreLaneTest(execution_tier, lower_simd, kExprS128Store16Lane, + kExprI16x8Splat); +} + +WASM_SIMD_TEST_NO_LOWERING(S128Store32Lane) { + RunStoreLaneTest(execution_tier, lower_simd, kExprS128Store32Lane, + kExprI32x4Splat); +} + +WASM_SIMD_TEST_NO_LOWERING(S128Store64Lane) { + RunStoreLaneTest(execution_tier, lower_simd, kExprS128Store64Lane, + kExprI64x2Splat); +} + #endif // V8_TARGET_ARCH_X64 #define WASM_SIMD_ANYTRUE_TEST(format, lanes, max, param_type) \ diff --git a/test/common/wasm/wasm-interpreter.cc b/test/common/wasm/wasm-interpreter.cc index 72991a775a..c3caa9fd71 100644 --- a/test/common/wasm/wasm-interpreter.cc +++ b/test/common/wasm/wasm-interpreter.cc @@ -2692,6 +2692,22 @@ class WasmInterpreterInternals { return DoSimdLoadLane( decoder, code, pc, len, MachineRepresentation::kWord64); } + case kExprS128Store8Lane: { + return DoSimdStoreLane( + decoder, code, pc, len, MachineRepresentation::kWord8); + } + case kExprS128Store16Lane: { + return DoSimdStoreLane( + decoder, code, pc, len, MachineRepresentation::kWord16); + } + case kExprS128Store32Lane: { + return DoSimdStoreLane( + decoder, code, pc, len, MachineRepresentation::kWord32); + } + case kExprS128Store64Lane: { + return DoSimdStoreLane( + decoder, code, pc, len, MachineRepresentation::kWord64); + } default: return false; } @@ -2770,6 +2786,30 @@ class WasmInterpreterInternals { return true; } + template + bool DoSimdStoreLane(Decoder* decoder, InterpreterCode* code, pc_t pc, + int* const len, MachineRepresentation rep) { + // Extract a single lane, push it onto the stack, then store the lane. + s_type value = Pop().to_s128().to(); + + MemoryAccessImmediate imm( + decoder, code->at(pc + *len), sizeof(load_type)); + + SimdLaneImmediate lane_imm( + decoder, code->at(pc + *len + imm.length)); + + Push(WasmValue(value.val[LANE(lane_imm.lane, value)])); + + // ExecuteStore will update the len, so pass it unchanged here. + if (!ExecuteStore(decoder, code, pc, len, rep, + /*prefix_len=*/*len)) { + return false; + } + + *len += lane_imm.length; + return true; + } + // Check if our control stack (frames_) exceeds the limit. Trigger stack // overflow if it does, and unwinding the current frame. // Returns true if execution can continue, false if the stack was fully