[wasm-simd][x64] Prototype store lane
Store lane loads a value from memory and replaces a single lane of a simd value. This implements store lane for x64 and interpreter. Bug: v8:10975 Change-Id: Ida79a03e0fd2bc18f2c06687311936b3cb550ed5 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2473383 Reviewed-by: Bill Budge <bbudge@chromium.org> Reviewed-by: Georg Neis <neis@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#70586}
This commit is contained in:
parent
07b3e98036
commit
208578dc1c
@ -1424,6 +1424,10 @@ void InstructionSelector::VisitNode(Node* node) {
|
||||
return VisitStore(node);
|
||||
case IrOpcode::kProtectedStore:
|
||||
return VisitProtectedStore(node);
|
||||
case IrOpcode::kStoreLane: {
|
||||
MarkAsRepresentation(MachineRepresentation::kSimd128, node);
|
||||
return VisitStoreLane(node);
|
||||
}
|
||||
case IrOpcode::kWord32And:
|
||||
return MarkAsWord32(node), VisitWord32And(node);
|
||||
case IrOpcode::kWord32Or:
|
||||
@ -2699,6 +2703,7 @@ void InstructionSelector::VisitI8x16Popcnt(Node* node) { UNIMPLEMENTED(); }
|
||||
#if !V8_TARGET_ARCH_X64
|
||||
// TODO(v8:10975): Prototyping load lane and store lane.
|
||||
void InstructionSelector::VisitLoadLane(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitStoreLane(Node* node) { UNIMPLEMENTED(); }
|
||||
|
||||
// TODO(v8:10997) Prototype i64x2.bitmask.
|
||||
void InstructionSelector::VisitI64x2BitMask(Node* node) { UNIMPLEMENTED(); }
|
||||
|
@ -3235,6 +3235,28 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ Pextrb(dst, i.InputSimd128Register(0), i.InputUint8(1));
|
||||
break;
|
||||
}
|
||||
case kX64Pextrb: {
|
||||
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
|
||||
DCHECK(HasAddressingMode(instr));
|
||||
DCHECK(!instr->HasOutput());
|
||||
|
||||
size_t index = 0;
|
||||
Operand operand = i.MemoryOperand(&index);
|
||||
__ Pextrb(operand, i.InputSimd128Register(index),
|
||||
i.InputUint8(index + 1));
|
||||
break;
|
||||
}
|
||||
case kX64Pextrw: {
|
||||
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
|
||||
DCHECK(HasAddressingMode(instr));
|
||||
DCHECK(!instr->HasOutput());
|
||||
|
||||
size_t index = 0;
|
||||
Operand operand = i.MemoryOperand(&index);
|
||||
__ Pextrw(operand, i.InputSimd128Register(index),
|
||||
i.InputUint8(index + 1));
|
||||
break;
|
||||
}
|
||||
case kX64I8x16ExtractLaneS: {
|
||||
Register dst = i.OutputRegister();
|
||||
__ Pextrb(dst, i.InputSimd128Register(0), i.InputUint8(1));
|
||||
@ -3675,6 +3697,32 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ Movq(i.OutputSimd128Register(), i.MemoryOperand());
|
||||
break;
|
||||
}
|
||||
case kX64S128Store32Lane: {
|
||||
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
|
||||
size_t index = 0;
|
||||
Operand operand = i.MemoryOperand(&index);
|
||||
uint8_t lane = i.InputUint8(index + 1);
|
||||
if (lane == 0) {
|
||||
__ Movss(operand, i.InputSimd128Register(index));
|
||||
} else {
|
||||
DCHECK_GE(3, lane);
|
||||
__ Extractps(operand, i.InputSimd128Register(index), lane);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kX64S128Store64Lane: {
|
||||
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
|
||||
size_t index = 0;
|
||||
Operand operand = i.MemoryOperand(&index);
|
||||
uint8_t lane = i.InputUint8(index + 1);
|
||||
if (lane == 0) {
|
||||
__ Movlps(operand, i.InputSimd128Register(index));
|
||||
} else {
|
||||
DCHECK_EQ(1, lane);
|
||||
__ Movhps(operand, i.InputSimd128Register(index));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kX64S32x4Swizzle: {
|
||||
DCHECK_EQ(2, instr->InputCount());
|
||||
ASSEMBLE_SIMD_IMM_INSTR(Pshufd, i.OutputSimd128Register(), 0,
|
||||
|
@ -281,6 +281,8 @@ namespace compiler {
|
||||
V(X64Pinsrw) \
|
||||
V(X64Pinsrd) \
|
||||
V(X64Pinsrq) \
|
||||
V(X64Pextrb) \
|
||||
V(X64Pextrw) \
|
||||
V(X64I8x16SConvertI16x8) \
|
||||
V(X64I8x16Neg) \
|
||||
V(X64I8x16Shl) \
|
||||
@ -330,6 +332,8 @@ namespace compiler {
|
||||
V(X64S128Load32x2U) \
|
||||
V(X64S128LoadMem32Zero) \
|
||||
V(X64S128LoadMem64Zero) \
|
||||
V(X64S128Store32Lane) \
|
||||
V(X64S128Store64Lane) \
|
||||
V(X64S32x4Swizzle) \
|
||||
V(X64S32x4Shuffle) \
|
||||
V(X64S16x8Blend) \
|
||||
|
@ -348,8 +348,12 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
|
||||
case kX64Movb:
|
||||
case kX64Movw:
|
||||
case kX64S128Store32Lane:
|
||||
case kX64S128Store64Lane:
|
||||
return kHasSideEffect;
|
||||
|
||||
case kX64Pextrb:
|
||||
case kX64Pextrw:
|
||||
case kX64Movl:
|
||||
if (instr->HasOutput()) {
|
||||
DCHECK_LE(1, instr->InputCount());
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include "src/base/iterator.h"
|
||||
#include "src/base/logging.h"
|
||||
#include "src/base/overflowing-math.h"
|
||||
#include "src/codegen/machine-type.h"
|
||||
#include "src/compiler/backend/instruction-selector-impl.h"
|
||||
#include "src/compiler/machine-operator.h"
|
||||
#include "src/compiler/node-matchers.h"
|
||||
@ -538,6 +539,40 @@ void InstructionSelector::VisitUnalignedLoad(Node* node) { UNREACHABLE(); }
|
||||
// Architecture supports unaligned access, therefore VisitStore is used instead
|
||||
void InstructionSelector::VisitUnalignedStore(Node* node) { UNREACHABLE(); }
|
||||
|
||||
void InstructionSelector::VisitStoreLane(Node* node) {
|
||||
X64OperandGenerator g(this);
|
||||
|
||||
StoreLaneParameters params = StoreLaneParametersOf(node->op());
|
||||
InstructionCode opcode = kArchNop;
|
||||
if (params.rep == MachineRepresentation::kWord8) {
|
||||
opcode = kX64Pextrb;
|
||||
} else if (params.rep == MachineRepresentation::kWord16) {
|
||||
opcode = kX64Pextrw;
|
||||
} else if (params.rep == MachineRepresentation::kWord32) {
|
||||
opcode = kX64S128Store32Lane;
|
||||
} else if (params.rep == MachineRepresentation::kWord64) {
|
||||
opcode = kX64S128Store64Lane;
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
InstructionOperand inputs[4];
|
||||
size_t input_count = 0;
|
||||
AddressingMode addressing_mode =
|
||||
g.GetEffectiveAddressMemoryOperand(node, inputs, &input_count);
|
||||
opcode |= AddressingModeField::encode(addressing_mode);
|
||||
|
||||
if (params.kind == LoadKind::kProtected) {
|
||||
opcode |= MiscField::encode(kMemoryAccessProtected);
|
||||
}
|
||||
|
||||
InstructionOperand value_operand = g.UseRegister(node->InputAt(2));
|
||||
inputs[input_count++] = value_operand;
|
||||
inputs[input_count++] = g.UseImmediate(params.laneidx);
|
||||
DCHECK_GE(4, input_count);
|
||||
Emit(opcode, 0, nullptr, input_count, inputs);
|
||||
}
|
||||
|
||||
// Shared routine for multiple binary operations.
|
||||
static void VisitBinop(InstructionSelector* selector, Node* node,
|
||||
InstructionCode opcode, FlagsContinuation* cont) {
|
||||
|
@ -141,6 +141,25 @@ UnalignedStoreRepresentation const& UnalignedStoreRepresentationOf(
|
||||
return OpParameter<UnalignedStoreRepresentation>(op);
|
||||
}
|
||||
|
||||
size_t hash_value(StoreLaneParameters params) {
|
||||
return base::hash_combine(params.kind, params.rep, params.laneidx);
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, StoreLaneParameters params) {
|
||||
return os << "(" << params.kind << " " << params.rep << " " << params.laneidx
|
||||
<< ")";
|
||||
}
|
||||
|
||||
StoreLaneParameters const& StoreLaneParametersOf(Operator const* op) {
|
||||
DCHECK_EQ(IrOpcode::kStoreLane, op->opcode());
|
||||
return OpParameter<StoreLaneParameters>(op);
|
||||
}
|
||||
|
||||
bool operator==(StoreLaneParameters lhs, StoreLaneParameters rhs) {
|
||||
return lhs.kind == rhs.kind && lhs.rep == rhs.rep &&
|
||||
lhs.laneidx == rhs.laneidx;
|
||||
}
|
||||
|
||||
bool operator==(StackSlotRepresentation lhs, StackSlotRepresentation rhs) {
|
||||
return lhs.size() == rhs.size() && lhs.alignment() == rhs.alignment();
|
||||
}
|
||||
@ -821,6 +840,15 @@ struct ProtectedStoreOperator : public Operator1<StoreRepresentation> {
|
||||
StoreRepresentation(rep, kNoWriteBarrier)) {}
|
||||
};
|
||||
|
||||
template <LoadKind kind, MachineRepresentation rep, uint8_t laneidx>
|
||||
struct StoreLaneOperator : public Operator1<StoreLaneParameters> {
|
||||
StoreLaneOperator()
|
||||
: Operator1(IrOpcode::kStoreLane,
|
||||
Operator::kNoDeopt | Operator::kNoRead | Operator::kNoThrow,
|
||||
"StoreLane", 3, 1, 1, 0, 1, 0,
|
||||
StoreLaneParameters{kind, rep, laneidx}) {}
|
||||
};
|
||||
|
||||
template <MachineRepresentation rep, MachineSemantic sem>
|
||||
struct Word32AtomicLoadOperator : public Operator1<LoadRepresentation> {
|
||||
Word32AtomicLoadOperator()
|
||||
@ -1200,6 +1228,39 @@ const Operator* MachineOperatorBuilder::LoadLane(LoadKind kind,
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
const Operator* MachineOperatorBuilder::StoreLane(LoadKind kind,
|
||||
MachineRepresentation rep,
|
||||
uint8_t laneidx) {
|
||||
#define STORE_LANE_KIND(REP, KIND, LANEIDX) \
|
||||
if (kind == LoadKind::k##KIND && rep == MachineRepresentation::REP && \
|
||||
laneidx == LANEIDX) { \
|
||||
return GetCachedOperator<StoreLaneOperator< \
|
||||
LoadKind::k##KIND, MachineRepresentation::REP, LANEIDX>>(); \
|
||||
}
|
||||
|
||||
#define STORE_LANE_T(T, LANE) \
|
||||
STORE_LANE_KIND(T, Normal, LANE) \
|
||||
STORE_LANE_KIND(T, Unaligned, LANE) \
|
||||
STORE_LANE_KIND(T, Protected, LANE)
|
||||
|
||||
#define STORE_LANE_WORD8(LANE) STORE_LANE_T(kWord8, LANE)
|
||||
#define STORE_LANE_WORD16(LANE) STORE_LANE_T(kWord16, LANE)
|
||||
#define STORE_LANE_WORD32(LANE) STORE_LANE_T(kWord32, LANE)
|
||||
#define STORE_LANE_WORD64(LANE) STORE_LANE_T(kWord64, LANE)
|
||||
|
||||
// Semicolons unnecessary, but helps formatting.
|
||||
SIMD_I8x16_LANES(STORE_LANE_WORD8);
|
||||
SIMD_I16x8_LANES(STORE_LANE_WORD16);
|
||||
SIMD_I32x4_LANES(STORE_LANE_WORD32);
|
||||
SIMD_I64x2_LANES(STORE_LANE_WORD64);
|
||||
#undef STORE_LANE_WORD8
|
||||
#undef STORE_LANE_WORD16
|
||||
#undef STORE_LANE_WORD32
|
||||
#undef STORE_LANE_WORD64
|
||||
#undef STORE_LANE_KIND
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
const Operator* MachineOperatorBuilder::StackSlot(int size, int alignment) {
|
||||
DCHECK_LE(0, size);
|
||||
DCHECK(alignment == 0 || alignment == 4 || alignment == 8 || alignment == 16);
|
||||
|
@ -49,6 +49,7 @@ using LoadRepresentation = MachineType;
|
||||
V8_EXPORT_PRIVATE LoadRepresentation LoadRepresentationOf(Operator const*)
|
||||
V8_WARN_UNUSED_RESULT;
|
||||
|
||||
// TODO(zhin): This is used by StoreLane too, rename this.
|
||||
enum class LoadKind {
|
||||
kNormal,
|
||||
kUnaligned,
|
||||
@ -135,6 +136,17 @@ using UnalignedStoreRepresentation = MachineRepresentation;
|
||||
UnalignedStoreRepresentation const& UnalignedStoreRepresentationOf(
|
||||
Operator const*) V8_WARN_UNUSED_RESULT;
|
||||
|
||||
struct StoreLaneParameters {
|
||||
LoadKind kind;
|
||||
MachineRepresentation rep;
|
||||
uint8_t laneidx;
|
||||
};
|
||||
|
||||
V8_EXPORT_PRIVATE std::ostream& operator<<(std::ostream&, StoreLaneParameters);
|
||||
|
||||
V8_EXPORT_PRIVATE StoreLaneParameters const& StoreLaneParametersOf(
|
||||
Operator const*) V8_WARN_UNUSED_RESULT;
|
||||
|
||||
class StackSlotRepresentation final {
|
||||
public:
|
||||
StackSlotRepresentation(int size, int alignment)
|
||||
@ -801,6 +813,10 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
|
||||
const Operator* Store(StoreRepresentation rep);
|
||||
const Operator* ProtectedStore(MachineRepresentation rep);
|
||||
|
||||
// SIMD store: store a specified lane of value into [base + index].
|
||||
const Operator* StoreLane(LoadKind kind, MachineRepresentation rep,
|
||||
uint8_t laneidx);
|
||||
|
||||
// unaligned load [base + index]
|
||||
const Operator* UnalignedLoad(LoadRepresentation rep);
|
||||
|
||||
|
@ -959,7 +959,8 @@
|
||||
V(V8x16AnyTrue) \
|
||||
V(V8x16AllTrue) \
|
||||
V(LoadTransform) \
|
||||
V(LoadLane)
|
||||
V(LoadLane) \
|
||||
V(StoreLane)
|
||||
|
||||
#define VALUE_OP_LIST(V) \
|
||||
COMMON_OP_LIST(V) \
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "src/codegen/code-factory.h"
|
||||
#include "src/codegen/compiler.h"
|
||||
#include "src/codegen/interface-descriptors.h"
|
||||
#include "src/codegen/machine-type.h"
|
||||
#include "src/codegen/optimized-compilation-info.h"
|
||||
#include "src/compiler/backend/code-generator.h"
|
||||
#include "src/compiler/backend/instruction-selector.h"
|
||||
@ -4110,6 +4111,37 @@ Node* WasmGraphBuilder::LoadMem(wasm::ValueType type, MachineType memtype,
|
||||
return load;
|
||||
}
|
||||
|
||||
Node* WasmGraphBuilder::StoreLane(MachineRepresentation mem_rep, Node* index,
|
||||
uint32_t offset, uint32_t alignment,
|
||||
Node* val, uint8_t laneidx,
|
||||
wasm::WasmCodePosition position,
|
||||
wasm::ValueType type) {
|
||||
Node* store;
|
||||
has_simd_ = true;
|
||||
index = BoundsCheckMem(i::ElementSizeInBytes(mem_rep), index, offset,
|
||||
position, kCanOmitBoundsCheck);
|
||||
|
||||
MachineType memtype = MachineType(mem_rep, MachineSemantic::kNone);
|
||||
LoadKind load_kind = GetLoadKind(mcgraph(), memtype, use_trap_handler());
|
||||
|
||||
// {offset} is validated to be within uintptr_t range in {BoundsCheckMem}.
|
||||
uintptr_t capped_offset = static_cast<uintptr_t>(offset);
|
||||
|
||||
store = SetEffect(graph()->NewNode(
|
||||
mcgraph()->machine()->StoreLane(load_kind, mem_rep, laneidx),
|
||||
MemBuffer(capped_offset), index, val, effect(), control()));
|
||||
|
||||
if (load_kind == LoadKind::kProtected) {
|
||||
SetSourcePosition(store, position);
|
||||
}
|
||||
|
||||
if (FLAG_trace_wasm_memory) {
|
||||
TraceMemoryOperation(true, mem_rep, index, capped_offset, position);
|
||||
}
|
||||
|
||||
return store;
|
||||
}
|
||||
|
||||
Node* WasmGraphBuilder::StoreMem(MachineRepresentation mem_rep, Node* index,
|
||||
uint64_t offset, uint32_t alignment, Node* val,
|
||||
wasm::WasmCodePosition position,
|
||||
|
@ -325,6 +325,9 @@ class WasmGraphBuilder {
|
||||
Node* StoreMem(MachineRepresentation mem_rep, Node* index, uint64_t offset,
|
||||
uint32_t alignment, Node* val, wasm::WasmCodePosition position,
|
||||
wasm::ValueType type);
|
||||
Node* StoreLane(MachineRepresentation mem_rep, Node* index, uint32_t offset,
|
||||
uint32_t alignment, Node* val, uint8_t laneidx,
|
||||
wasm::WasmCodePosition position, wasm::ValueType type);
|
||||
static void PrintDebugName(Node* node);
|
||||
|
||||
void set_instance_node(Node* instance_node) {
|
||||
|
@ -2374,6 +2374,12 @@ class LiftoffCompiler {
|
||||
}
|
||||
}
|
||||
|
||||
void StoreLane(FullDecoder* decoder, StoreType type,
|
||||
const MemoryAccessImmediate<validate>& imm, const Value& index,
|
||||
const Value& value, const uint8_t laneidx) {
|
||||
unsupported(decoder, kSimd, "simd load lane");
|
||||
}
|
||||
|
||||
void CurrentMemoryPages(FullDecoder* decoder, Value* result) {
|
||||
Register mem_size = __ GetUnusedRegister(kGpReg, {}).gp();
|
||||
LOAD_INSTANCE_FIELD(mem_size, MemorySize, kSystemPointerSize);
|
||||
|
@ -983,6 +983,8 @@ struct ControlBase : public PcForErrors<validate> {
|
||||
Value* result) \
|
||||
F(StoreMem, StoreType type, const MemoryAccessImmediate<validate>& imm, \
|
||||
const Value& index, const Value& value) \
|
||||
F(StoreLane, StoreType type, const MemoryAccessImmediate<validate>& imm, \
|
||||
const Value& index, const Value& value, const uint8_t laneidx) \
|
||||
F(CurrentMemoryPages, Value* result) \
|
||||
F(MemoryGrow, const Value& value, Value* result) \
|
||||
F(CallDirect, const CallFunctionImmediate<validate>& imm, \
|
||||
@ -1749,7 +1751,11 @@ class WasmDecoder : public Decoder {
|
||||
case kExprS128Load8Lane:
|
||||
case kExprS128Load16Lane:
|
||||
case kExprS128Load32Lane:
|
||||
case kExprS128Load64Lane: {
|
||||
case kExprS128Load64Lane:
|
||||
case kExprS128Store8Lane:
|
||||
case kExprS128Store16Lane:
|
||||
case kExprS128Store32Lane:
|
||||
case kExprS128Store64Lane: {
|
||||
MemoryAccessImmediate<validate> imm(decoder, pc + length,
|
||||
UINT32_MAX);
|
||||
// 1 more byte for lane index immediate.
|
||||
@ -3334,6 +3340,20 @@ class WasmFullDecoder : public WasmDecoder<validate> {
|
||||
return opcode_length + mem_imm.length + lane_imm.length;
|
||||
}
|
||||
|
||||
int DecodeStoreLane(StoreType type, uint32_t opcode_length) {
|
||||
if (!CheckHasMemory()) return 0;
|
||||
MemoryAccessImmediate<validate> mem_imm(this, this->pc_ + opcode_length,
|
||||
type.size_log_2());
|
||||
SimdLaneImmediate<validate> lane_imm(
|
||||
this, this->pc_ + opcode_length + mem_imm.length);
|
||||
Value v128 = Pop(1, kWasmS128);
|
||||
Value index = Pop(0, kWasmI32);
|
||||
|
||||
CALL_INTERFACE_IF_REACHABLE(StoreLane, type, mem_imm, index, v128,
|
||||
lane_imm.lane);
|
||||
return opcode_length + mem_imm.length + lane_imm.length;
|
||||
}
|
||||
|
||||
int DecodeStoreMem(StoreType store, int prefix_len = 1) {
|
||||
if (!CheckHasMemory()) return 0;
|
||||
MemoryAccessImmediate<validate> imm(this, this->pc_ + prefix_len,
|
||||
@ -3578,6 +3598,18 @@ class WasmFullDecoder : public WasmDecoder<validate> {
|
||||
case kExprS128Load64Lane: {
|
||||
return DecodeLoadLane(LoadType::kI64Load, opcode_length);
|
||||
}
|
||||
case kExprS128Store8Lane: {
|
||||
return DecodeStoreLane(StoreType::kI32Store8, opcode_length);
|
||||
}
|
||||
case kExprS128Store16Lane: {
|
||||
return DecodeStoreLane(StoreType::kI32Store16, opcode_length);
|
||||
}
|
||||
case kExprS128Store32Lane: {
|
||||
return DecodeStoreLane(StoreType::kI32Store, opcode_length);
|
||||
}
|
||||
case kExprS128Store64Lane: {
|
||||
return DecodeStoreLane(StoreType::kI64Store, opcode_length);
|
||||
}
|
||||
case kExprS128Const:
|
||||
return SimdConstOp(opcode_length);
|
||||
default: {
|
||||
|
@ -450,6 +450,13 @@ class WasmGraphBuildingInterface {
|
||||
value.node, decoder->position(), type.value_type());
|
||||
}
|
||||
|
||||
void StoreLane(FullDecoder* decoder, StoreType type,
|
||||
const MemoryAccessImmediate<validate>& imm, const Value& index,
|
||||
const Value& value, const uint8_t laneidx) {
|
||||
BUILD(StoreLane, type.mem_rep(), index.node, imm.offset, imm.alignment,
|
||||
value.node, laneidx, decoder->position(), type.value_type());
|
||||
}
|
||||
|
||||
void CurrentMemoryPages(FullDecoder* decoder, Value* result) {
|
||||
result->node = BUILD(CurrentMemoryPages);
|
||||
}
|
||||
|
@ -313,6 +313,10 @@ constexpr const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
|
||||
CASE_S128_OP(Load16Lane, "load16_lane")
|
||||
CASE_S128_OP(Load32Lane, "load32_lane")
|
||||
CASE_S128_OP(Load64Lane, "load64_lane")
|
||||
CASE_S128_OP(Store8Lane, "store8_lane")
|
||||
CASE_S128_OP(Store16Lane, "store16_lane")
|
||||
CASE_S128_OP(Store32Lane, "store32_lane")
|
||||
CASE_S128_OP(Store64Lane, "store64_lane")
|
||||
|
||||
CASE_I8x16_OP(RoundingAverageU, "avgr_u")
|
||||
CASE_I16x8_OP(RoundingAverageU, "avgr_u")
|
||||
|
@ -465,7 +465,11 @@ bool V8_EXPORT_PRIVATE IsJSCompatibleSignature(const FunctionSig* sig,
|
||||
V(S128Load8Lane, 0xfd58, s_is) \
|
||||
V(S128Load16Lane, 0xfd59, s_is) \
|
||||
V(S128Load32Lane, 0xfd5a, s_is) \
|
||||
V(S128Load64Lane, 0xfd5b, s_is)
|
||||
V(S128Load64Lane, 0xfd5b, s_is) \
|
||||
V(S128Store8Lane, 0xfd5c, v_is) \
|
||||
V(S128Store16Lane, 0xfd5d, v_is) \
|
||||
V(S128Store32Lane, 0xfd5e, v_is) \
|
||||
V(S128Store64Lane, 0xfd5f, v_is)
|
||||
|
||||
#define FOREACH_SIMD_POST_MVP_OPCODE(V) \
|
||||
V(I8x16Mul, 0xfd75, s_ss) \
|
||||
|
@ -3688,6 +3688,81 @@ WASM_SIMD_TEST_NO_LOWERING(S128Load64Lane) {
|
||||
RunLoadLaneTest<int64_t>(execution_tier, lower_simd, kExprS128Load64Lane,
|
||||
kExprI64x2Splat);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void RunStoreLaneTest(TestExecutionTier execution_tier, LowerSimd lower_simd,
|
||||
WasmOpcode store_op, WasmOpcode splat_op) {
|
||||
FLAG_SCOPE(wasm_simd_post_mvp);
|
||||
if (execution_tier == TestExecutionTier::kLiftoff) {
|
||||
// Not yet implemented.
|
||||
return;
|
||||
}
|
||||
|
||||
constexpr int lanes = kSimd128Size / sizeof(T);
|
||||
constexpr int mem_index = 16; // Store from mem index 16 (bytes).
|
||||
constexpr int splat_value = 33;
|
||||
WasmOpcode const_op =
|
||||
splat_op == kExprI64x2Splat ? kExprI64Const : kExprI32Const;
|
||||
|
||||
for (int lane_index = 0; lane_index < lanes; lane_index++) {
|
||||
WasmRunner<int32_t> r(execution_tier, lower_simd);
|
||||
T* memory = r.builder().AddMemoryElems<T>(kWasmPageSize / sizeof(T));
|
||||
|
||||
// Splat splat_value, then only Store and replace a single lane with the
|
||||
BUILD(r, WASM_I32V(mem_index), const_op, splat_value,
|
||||
WASM_SIMD_OP(splat_op), WASM_SIMD_OP(store_op), ZERO_ALIGNMENT,
|
||||
ZERO_OFFSET, lane_index, WASM_ONE);
|
||||
|
||||
r.builder().BlankMemory();
|
||||
r.Call();
|
||||
|
||||
for (int i = 0; i < lanes; i++) {
|
||||
CHECK_EQ(0, r.builder().ReadMemory(&memory[i]));
|
||||
}
|
||||
|
||||
CHECK_EQ(splat_value, r.builder().ReadMemory(&memory[lanes]));
|
||||
|
||||
for (int i = lanes + 1; i < lanes * 2; i++) {
|
||||
CHECK_EQ(0, r.builder().ReadMemory(&memory[i]));
|
||||
}
|
||||
}
|
||||
|
||||
// OOB stores
|
||||
{
|
||||
WasmRunner<int32_t, uint32_t> r(execution_tier, lower_simd);
|
||||
r.builder().AddMemoryElems<T>(kWasmPageSize / sizeof(T));
|
||||
|
||||
BUILD(r, WASM_GET_LOCAL(0), const_op, splat_value, WASM_SIMD_OP(splat_op),
|
||||
WASM_SIMD_OP(store_op), ZERO_ALIGNMENT, ZERO_OFFSET, 0, WASM_ONE);
|
||||
|
||||
// StoreLane stores sizeof(T) bytes.
|
||||
for (uint32_t index = kWasmPageSize - (sizeof(T) - 1);
|
||||
index < kWasmPageSize; ++index) {
|
||||
CHECK_TRAP(r.Call(index));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
WASM_SIMD_TEST_NO_LOWERING(S128Store8Lane) {
|
||||
RunStoreLaneTest<int8_t>(execution_tier, lower_simd, kExprS128Store8Lane,
|
||||
kExprI8x16Splat);
|
||||
}
|
||||
|
||||
WASM_SIMD_TEST_NO_LOWERING(S128Store16Lane) {
|
||||
RunStoreLaneTest<int16_t>(execution_tier, lower_simd, kExprS128Store16Lane,
|
||||
kExprI16x8Splat);
|
||||
}
|
||||
|
||||
WASM_SIMD_TEST_NO_LOWERING(S128Store32Lane) {
|
||||
RunStoreLaneTest<int32_t>(execution_tier, lower_simd, kExprS128Store32Lane,
|
||||
kExprI32x4Splat);
|
||||
}
|
||||
|
||||
WASM_SIMD_TEST_NO_LOWERING(S128Store64Lane) {
|
||||
RunStoreLaneTest<int64_t>(execution_tier, lower_simd, kExprS128Store64Lane,
|
||||
kExprI64x2Splat);
|
||||
}
|
||||
|
||||
#endif // V8_TARGET_ARCH_X64
|
||||
|
||||
#define WASM_SIMD_ANYTRUE_TEST(format, lanes, max, param_type) \
|
||||
|
@ -2692,6 +2692,22 @@ class WasmInterpreterInternals {
|
||||
return DoSimdLoadLane<int2, int64_t, int64_t>(
|
||||
decoder, code, pc, len, MachineRepresentation::kWord64);
|
||||
}
|
||||
case kExprS128Store8Lane: {
|
||||
return DoSimdStoreLane<int16, int32_t, int8_t>(
|
||||
decoder, code, pc, len, MachineRepresentation::kWord8);
|
||||
}
|
||||
case kExprS128Store16Lane: {
|
||||
return DoSimdStoreLane<int8, int32_t, int16_t>(
|
||||
decoder, code, pc, len, MachineRepresentation::kWord16);
|
||||
}
|
||||
case kExprS128Store32Lane: {
|
||||
return DoSimdStoreLane<int4, int32_t, int32_t>(
|
||||
decoder, code, pc, len, MachineRepresentation::kWord32);
|
||||
}
|
||||
case kExprS128Store64Lane: {
|
||||
return DoSimdStoreLane<int2, int64_t, int64_t>(
|
||||
decoder, code, pc, len, MachineRepresentation::kWord64);
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
@ -2770,6 +2786,30 @@ class WasmInterpreterInternals {
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename s_type, typename result_type, typename load_type>
|
||||
bool DoSimdStoreLane(Decoder* decoder, InterpreterCode* code, pc_t pc,
|
||||
int* const len, MachineRepresentation rep) {
|
||||
// Extract a single lane, push it onto the stack, then store the lane.
|
||||
s_type value = Pop().to_s128().to<s_type>();
|
||||
|
||||
MemoryAccessImmediate<Decoder::kNoValidation> imm(
|
||||
decoder, code->at(pc + *len), sizeof(load_type));
|
||||
|
||||
SimdLaneImmediate<Decoder::kNoValidation> lane_imm(
|
||||
decoder, code->at(pc + *len + imm.length));
|
||||
|
||||
Push(WasmValue(value.val[LANE(lane_imm.lane, value)]));
|
||||
|
||||
// ExecuteStore will update the len, so pass it unchanged here.
|
||||
if (!ExecuteStore<result_type, load_type>(decoder, code, pc, len, rep,
|
||||
/*prefix_len=*/*len)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
*len += lane_imm.length;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check if our control stack (frames_) exceeds the limit. Trigger stack
|
||||
// overflow if it does, and unwinding the current frame.
|
||||
// Returns true if execution can continue, false if the stack was fully
|
||||
|
Loading…
Reference in New Issue
Block a user