[wasm-simd][arm64] Prototype prefetch arm64

Prototype 2 prefetch instructions (temporal and non-temporal) on arm64
and interpreter.

Add prfm to assembler, and use MiscField to encode the two versions.
Small tweak to simulator to handle these new instructions (no-op).

The implementation in the interpreter just pops the memory index and
does nothing.

Simple test cases added for these 2 new instructions, as well as a
prefetch with OOB index, which should not trap.

Bug: v8:11168
Change-Id: Ieced8081615d07f950d6d4c1128d1bc6a75839fd
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2543167
Reviewed-by: Bill Budge <bbudge@chromium.org>
Reviewed-by: Jakob Kummerow <jkummerow@chromium.org>
Reviewed-by: Tobias Tebbi <tebbi@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#71353}
This commit is contained in:
Zhi An Ng 2020-11-20 01:06:13 +00:00 committed by Commit Bot
parent 8f71a2675e
commit 7f770766d8
24 changed files with 331 additions and 9 deletions

View File

@ -1414,6 +1414,37 @@ void Assembler::stlxrh(const Register& rs, const Register& rt,
Emit(STLXR_h | Rs(rs) | Rt2(x31) | RnSP(rn) | Rt(rt)); Emit(STLXR_h | Rs(rs) | Rt2(x31) | RnSP(rn) | Rt(rt));
} }
void Assembler::prfm(int prfop, const MemOperand& addr) {
// Restricted support for prfm, only register offset.
// This can probably be merged with Assembler::LoadStore as we expand support.
DCHECK(addr.IsRegisterOffset());
DCHECK(is_uint5(prfop));
Instr memop = PRFM | prfop | RnSP(addr.base());
Extend ext = addr.extend();
Shift shift = addr.shift();
unsigned shift_amount = addr.shift_amount();
// LSL is encoded in the option field as UXTX.
if (shift == LSL) {
ext = UXTX;
}
// Shifts are encoded in one bit, indicating a left shift by the memory
// access size.
DCHECK((shift_amount == 0) ||
(shift_amount == static_cast<unsigned>(CalcLSDataSize(PRFM))));
Emit(LoadStoreRegisterOffsetFixed | memop | Rm(addr.regoffset()) |
ExtendMode(ext) | ImmShiftLS((shift_amount > 0) ? 1 : 0));
}
void Assembler::prfm(PrefetchOperation prfop, const MemOperand& addr) {
// Restricted support for prfm, only register offset.
// This can probably be merged with Assembler::LoadStore as we expand support.
prfm(static_cast<int>(prfop), addr);
}
void Assembler::NEON3DifferentL(const VRegister& vd, const VRegister& vn, void Assembler::NEON3DifferentL(const VRegister& vd, const VRegister& vn,
const VRegister& vm, NEON3DifferentOp vop) { const VRegister& vm, NEON3DifferentOp vop) {
DCHECK(AreSameFormat(vn, vm)); DCHECK(AreSameFormat(vn, vm));

View File

@ -880,6 +880,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
// Store-release exclusive half-word. // Store-release exclusive half-word.
void stlxrh(const Register& rs, const Register& rt, const Register& rn); void stlxrh(const Register& rs, const Register& rt, const Register& rn);
void prfm(int prfop, const MemOperand& addr);
void prfm(PrefetchOperation prfop, const MemOperand& addr);
// Move instructions. The default shift of -1 indicates that the move // Move instructions. The default shift of -1 indicates that the move
// instruction will calculate an appropriate 16-bit immediate and left shift // instruction will calculate an appropriate 16-bit immediate and left shift
// that is equal to the 64-bit immediate argument. If an explicit left shift // that is equal to the 64-bit immediate argument. If an explicit left shift

View File

@ -159,6 +159,9 @@ using float16 = uint16_t;
/* store second source. */ \ /* store second source. */ \
V_(Rs, 20, 16, Bits) /* Store-exclusive status */ \ V_(Rs, 20, 16, Bits) /* Store-exclusive status */ \
V_(PrefetchMode, 4, 0, Bits) \ V_(PrefetchMode, 4, 0, Bits) \
V_(PrefetchHint, 4, 3, Bits) \
V_(PrefetchTarget, 2, 1, Bits) \
V_(PrefetchStream, 0, 0, Bits) \
\ \
/* Common bits */ \ /* Common bits */ \
V_(SixtyFourBits, 31, 31, Bits) \ V_(SixtyFourBits, 31, 31, Bits) \
@ -216,6 +219,7 @@ using float16 = uint16_t;
V_(LSOpc, 23, 22, Bits) \ V_(LSOpc, 23, 22, Bits) \
V_(LSVector, 26, 26, Bits) \ V_(LSVector, 26, 26, Bits) \
V_(LSSize, 31, 30, Bits) \ V_(LSSize, 31, 30, Bits) \
V_(ImmPrefetchOperation, 4, 0, Bits) \
\ \
/* NEON generic fields */ \ /* NEON generic fields */ \
V_(NEONQ, 30, 30, Bits) \ V_(NEONQ, 30, 30, Bits) \
@ -443,6 +447,27 @@ enum SystemRegister {
ImmSystemRegister_offset ImmSystemRegister_offset
}; };
enum PrefetchOperation {
PLDL1KEEP = 0x00,
PLDL1STRM = 0x01,
PLDL2KEEP = 0x02,
PLDL2STRM = 0x03,
PLDL3KEEP = 0x04,
PLDL3STRM = 0x05,
PLIL1KEEP = 0x08,
PLIL1STRM = 0x09,
PLIL2KEEP = 0x0a,
PLIL2STRM = 0x0b,
PLIL3KEEP = 0x0c,
PLIL3STRM = 0x0d,
PSTL1KEEP = 0x10,
PSTL1STRM = 0x11,
PSTL2KEEP = 0x12,
PSTL2STRM = 0x13,
PSTL3KEEP = 0x14,
PSTL3STRM = 0x15,
};
// Instruction enumerations. // Instruction enumerations.
// //
// These are the masks that define a class of instructions, and the list of // These are the masks that define a class of instructions, and the list of

View File

@ -1442,6 +1442,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} }
break; break;
} }
case kArm64Prfm: {
__ prfm(MiscField::decode(opcode), i.MemoryOperand(0));
break;
}
case kArm64Clz: case kArm64Clz:
__ Clz(i.OutputRegister64(), i.InputRegister64(0)); __ Clz(i.OutputRegister64(), i.InputRegister64(0));
break; break;

View File

@ -93,6 +93,7 @@ namespace compiler {
V(Arm64Poke) \ V(Arm64Poke) \
V(Arm64PokePair) \ V(Arm64PokePair) \
V(Arm64Peek) \ V(Arm64Peek) \
V(Arm64Prfm) \
V(Arm64Float32Cmp) \ V(Arm64Float32Cmp) \
V(Arm64Float32Add) \ V(Arm64Float32Add) \
V(Arm64Float32Sub) \ V(Arm64Float32Sub) \

View File

@ -383,6 +383,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64StrCompressTagged: case kArm64StrCompressTagged:
case kArm64DmbIsh: case kArm64DmbIsh:
case kArm64DsbIsb: case kArm64DsbIsb:
case kArm64Prfm:
return kHasSideEffect; return kHasSideEffect;
case kArm64Word64AtomicLoadUint8: case kArm64Word64AtomicLoadUint8:

View File

@ -617,6 +617,26 @@ void EmitLoad(InstructionSelector* selector, Node* node, InstructionCode opcode,
selector->Emit(opcode, arraysize(outputs), outputs, input_count, inputs); selector->Emit(opcode, arraysize(outputs), outputs, input_count, inputs);
} }
void InstructionSelector::VisitPrefetchTemporal(Node* node) {
Arm64OperandGenerator g(this);
InstructionOperand inputs[2] = {g.UseRegister(node->InputAt(0)),
g.UseRegister(node->InputAt(1))};
InstructionCode opcode = kArm64Prfm;
opcode |= AddressingModeField::encode(kMode_MRR);
opcode |= MiscField::encode(PLDL1KEEP);
Emit(opcode, 0, nullptr, 2, inputs);
}
void InstructionSelector::VisitPrefetchNonTemporal(Node* node) {
Arm64OperandGenerator g(this);
InstructionOperand inputs[2] = {g.UseRegister(node->InputAt(0)),
g.UseRegister(node->InputAt(1))};
InstructionCode opcode = kArm64Prfm;
opcode |= AddressingModeField::encode(kMode_MRR);
opcode |= MiscField::encode(PLDL1STRM);
Emit(opcode, 0, nullptr, 2, inputs);
}
void InstructionSelector::VisitLoadTransform(Node* node) { void InstructionSelector::VisitLoadTransform(Node* node) {
LoadTransformParameters params = LoadTransformParametersOf(node->op()); LoadTransformParameters params = LoadTransformParametersOf(node->op());
InstructionCode opcode = kArchNop; InstructionCode opcode = kArchNop;

View File

@ -1416,6 +1416,12 @@ void InstructionSelector::VisitNode(Node* node) {
MarkAsRepresentation(MachineRepresentation::kSimd128, node); MarkAsRepresentation(MachineRepresentation::kSimd128, node);
return VisitLoadTransform(node); return VisitLoadTransform(node);
} }
case IrOpcode::kPrefetchTemporal: {
return VisitPrefetchTemporal(node);
}
case IrOpcode::kPrefetchNonTemporal: {
return VisitPrefetchNonTemporal(node);
}
case IrOpcode::kLoadLane: { case IrOpcode::kLoadLane: {
MarkAsRepresentation(MachineRepresentation::kSimd128, node); MarkAsRepresentation(MachineRepresentation::kSimd128, node);
return VisitLoadLane(node); return VisitLoadLane(node);
@ -2795,6 +2801,12 @@ void InstructionSelector::VisitI16x8ExtAddPairwiseI8x16S(Node* node) {
void InstructionSelector::VisitI16x8ExtAddPairwiseI8x16U(Node* node) { void InstructionSelector::VisitI16x8ExtAddPairwiseI8x16U(Node* node) {
UNIMPLEMENTED(); UNIMPLEMENTED();
} }
// TODO(v8:11168): Prototyping prefetch.
void InstructionSelector::VisitPrefetchTemporal(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitPrefetchNonTemporal(Node* node) {
UNIMPLEMENTED();
}
#endif // !V8_TARGET_ARCH_ARM64 #endif // !V8_TARGET_ARCH_ARM64
#if !V8_TARGET_ARCH_X64 #if !V8_TARGET_ARCH_X64

View File

@ -1160,6 +1160,18 @@ std::ostream& operator<<(std::ostream& os, TruncateKind kind) {
MACHINE_PURE_OP_LIST(PURE) MACHINE_PURE_OP_LIST(PURE)
#undef PURE #undef PURE
const Operator* MachineOperatorBuilder::PrefetchTemporal() {
return GetCachedOperator<
CachedOperator<IrOpcode::kPrefetchTemporal, 2, 1, 1, 0, 1, 0>>(
Operator::kNoDeopt | Operator::kNoThrow, "PrefetchTemporal");
}
const Operator* MachineOperatorBuilder::PrefetchNonTemporal() {
return GetCachedOperator<
CachedOperator<IrOpcode::kPrefetchNonTemporal, 2, 1, 1, 0, 1, 0>>(
Operator::kNoDeopt | Operator::kNoThrow, "PrefetchNonTemporal");
}
const Operator* MachineOperatorBuilder::Load(LoadRepresentation rep) { const Operator* MachineOperatorBuilder::Load(LoadRepresentation rep) {
#define LOAD(Type) \ #define LOAD(Type) \
if (rep == MachineType::Type()) { \ if (rep == MachineType::Type()) { \

View File

@ -828,6 +828,9 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* LoadTransform(MemoryAccessKind kind, const Operator* LoadTransform(MemoryAccessKind kind,
LoadTransformation transform); LoadTransformation transform);
const Operator* PrefetchTemporal();
const Operator* PrefetchNonTemporal();
// SIMD load: replace a specified lane with [base + index]. // SIMD load: replace a specified lane with [base + index].
const Operator* LoadLane(MemoryAccessKind kind, LoadRepresentation rep, const Operator* LoadLane(MemoryAccessKind kind, LoadRepresentation rep,
uint8_t laneidx); uint8_t laneidx);

View File

@ -979,6 +979,8 @@
V(V8x16AnyTrue) \ V(V8x16AnyTrue) \
V(V8x16AllTrue) \ V(V8x16AllTrue) \
V(LoadTransform) \ V(LoadTransform) \
V(PrefetchTemporal) \
V(PrefetchNonTemporal) \
V(LoadLane) \ V(LoadLane) \
V(StoreLane) V(StoreLane)

View File

@ -4158,6 +4158,17 @@ Node* WasmGraphBuilder::LoadTransform(wasm::ValueType type, MachineType memtype,
return load; return load;
} }
Node* WasmGraphBuilder::Prefetch(Node* index, uint64_t offset,
uint32_t alignment, bool temporal) {
uintptr_t capped_offset = static_cast<uintptr_t>(offset);
const Operator* prefetchOp =
temporal ? mcgraph()->machine()->PrefetchTemporal()
: mcgraph()->machine()->PrefetchNonTemporal();
Node* prefetch = SetEffect(graph()->NewNode(
prefetchOp, MemBuffer(capped_offset), index, effect(), control()));
return prefetch;
}
Node* WasmGraphBuilder::LoadMem(wasm::ValueType type, MachineType memtype, Node* WasmGraphBuilder::LoadMem(wasm::ValueType type, MachineType memtype,
Node* index, uint64_t offset, Node* index, uint64_t offset,
uint32_t alignment, uint32_t alignment,

View File

@ -301,6 +301,8 @@ class WasmGraphBuilder {
Node* CurrentMemoryPages(); Node* CurrentMemoryPages();
Node* TraceMemoryOperation(bool is_store, MachineRepresentation, Node* index, Node* TraceMemoryOperation(bool is_store, MachineRepresentation, Node* index,
uintptr_t offset, wasm::WasmCodePosition); uintptr_t offset, wasm::WasmCodePosition);
Node* Prefetch(Node* index, uint64_t offset, uint32_t alignment,
bool temporal);
Node* LoadMem(wasm::ValueType type, MachineType memtype, Node* index, Node* LoadMem(wasm::ValueType type, MachineType memtype, Node* index,
uint64_t offset, uint32_t alignment, uint64_t offset, uint32_t alignment,
wasm::WasmCodePosition position); wasm::WasmCodePosition position);

View File

@ -7,6 +7,8 @@
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#include <bitset>
#if V8_TARGET_ARCH_ARM64 #if V8_TARGET_ARCH_ARM64
#include "src/base/platform/platform.h" #include "src/base/platform/platform.h"
@ -4266,12 +4268,19 @@ int DisassemblingDecoder::SubstitutePrefetchField(Instruction* instr,
USE(format); USE(format);
int prefetch_mode = instr->PrefetchMode(); int prefetch_mode = instr->PrefetchMode();
const std::array<std::string, 3> hints = {"ld", "li", "st"};
unsigned hint = instr->PrefetchHint();
unsigned target = instr->PrefetchTarget() + 1;
const char* ls = (prefetch_mode & 0x10) ? "st" : "ld"; if (hint >= hints.size() || target > 3) {
int level = (prefetch_mode >> 1) + 1; std::bitset<5> prefetch_mode(instr->ImmPrefetchOperation());
const char* ks = (prefetch_mode & 1) ? "strm" : "keep"; AppendToOutput("#0b%s", prefetch_mode.to_string().c_str());
} else {
const char* ks = (prefetch_mode & 1) ? "strm" : "keep";
AppendToOutput("p%sl%d%s", hints[hint].c_str(), target, ks);
}
AppendToOutput("p%sl%d%s", ls, level, ks);
return 6; return 6;
} }

View File

@ -1799,14 +1799,17 @@ void Simulator::LoadStoreHelper(Instruction* instr, int64_t offset,
unsigned addr_reg = instr->Rn(); unsigned addr_reg = instr->Rn();
uintptr_t address = LoadStoreAddress(addr_reg, offset, addrmode); uintptr_t address = LoadStoreAddress(addr_reg, offset, addrmode);
uintptr_t stack = 0; uintptr_t stack = 0;
LoadStoreOp op = static_cast<LoadStoreOp>(instr->Mask(LoadStoreMask));
{ {
base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex); base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
if (instr->IsLoad()) { if (instr->IsLoad()) {
local_monitor_.NotifyLoad(); local_monitor_.NotifyLoad();
} else { } else if (instr->IsStore()) {
local_monitor_.NotifyStore(); local_monitor_.NotifyStore();
GlobalMonitor::Get()->NotifyStore_Locked(&global_monitor_processor_); GlobalMonitor::Get()->NotifyStore_Locked(&global_monitor_processor_);
} else {
DCHECK_EQ(op, PRFM);
} }
} }
@ -1825,7 +1828,6 @@ void Simulator::LoadStoreHelper(Instruction* instr, int64_t offset,
stack = sp(); stack = sp();
} }
LoadStoreOp op = static_cast<LoadStoreOp>(instr->Mask(LoadStoreMask));
switch (op) { switch (op) {
// Use _no_log variants to suppress the register trace (LOG_REGS, // Use _no_log variants to suppress the register trace (LOG_REGS,
// LOG_VREGS). We will print a more detailed log. // LOG_VREGS). We will print a more detailed log.
@ -1900,6 +1902,10 @@ void Simulator::LoadStoreHelper(Instruction* instr, int64_t offset,
MemoryWrite<qreg_t>(address, qreg(srcdst)); MemoryWrite<qreg_t>(address, qreg(srcdst));
break; break;
// Do nothing for prefetch.
case PRFM:
break;
default: default:
UNIMPLEMENTED(); UNIMPLEMENTED();
} }
@ -1915,7 +1921,7 @@ void Simulator::LoadStoreHelper(Instruction* instr, int64_t offset,
} else { } else {
LogRead(address, srcdst, GetPrintRegisterFormatForSize(access_size)); LogRead(address, srcdst, GetPrintRegisterFormatForSize(access_size));
} }
} else { } else if (instr->IsStore()) {
if ((op == STR_s) || (op == STR_d)) { if ((op == STR_s) || (op == STR_d)) {
LogVWrite(address, srcdst, GetPrintRegisterFormatForSizeFP(access_size)); LogVWrite(address, srcdst, GetPrintRegisterFormatForSizeFP(access_size));
} else if ((op == STR_b) || (op == STR_h) || (op == STR_q)) { } else if ((op == STR_b) || (op == STR_h) || (op == STR_q)) {
@ -1923,6 +1929,8 @@ void Simulator::LoadStoreHelper(Instruction* instr, int64_t offset,
} else { } else {
LogWrite(address, srcdst, GetPrintRegisterFormatForSize(access_size)); LogWrite(address, srcdst, GetPrintRegisterFormatForSize(access_size));
} }
} else {
DCHECK_EQ(op, PRFM);
} }
// Handle the writeback for loads after the load to ensure safe pop // Handle the writeback for loads after the load to ensure safe pop

View File

@ -2255,6 +2255,12 @@ class LiftoffCompiler {
return index; return index;
} }
void Prefetch(FullDecoder* decoder,
const MemoryAccessImmediate<validate>& imm,
const Value& index_val, bool temporal) {
unsupported(decoder, kSimd, "simd prefetch");
}
void LoadMem(FullDecoder* decoder, LoadType type, void LoadMem(FullDecoder* decoder, LoadType type,
const MemoryAccessImmediate<validate>& imm, const MemoryAccessImmediate<validate>& imm,
const Value& index_val, Value* result) { const Value& index_val, Value* result) {

View File

@ -1001,6 +1001,8 @@ struct ControlBase : public PcForErrors<validate> {
F(LoadLane, LoadType type, const Value& value, const Value& index, \ F(LoadLane, LoadType type, const Value& value, const Value& index, \
const MemoryAccessImmediate<validate>& imm, const uint8_t laneidx, \ const MemoryAccessImmediate<validate>& imm, const uint8_t laneidx, \
Value* result) \ Value* result) \
F(Prefetch, const MemoryAccessImmediate<validate>& imm, const Value& index, \
bool temporal) \
F(StoreMem, StoreType type, const MemoryAccessImmediate<validate>& imm, \ F(StoreMem, StoreType type, const MemoryAccessImmediate<validate>& imm, \
const Value& index, const Value& value) \ const Value& index, const Value& value) \
F(StoreLane, StoreType type, const MemoryAccessImmediate<validate>& imm, \ F(StoreLane, StoreType type, const MemoryAccessImmediate<validate>& imm, \
@ -1760,7 +1762,8 @@ class WasmDecoder : public Decoder {
#define DECLARE_OPCODE_CASE(name, opcode, sig) case kExpr##name: #define DECLARE_OPCODE_CASE(name, opcode, sig) case kExpr##name:
FOREACH_SIMD_MEM_OPCODE(DECLARE_OPCODE_CASE) FOREACH_SIMD_MEM_OPCODE(DECLARE_OPCODE_CASE)
#undef DECLARE_OPCODE_CASE #undef DECLARE_OPCODE_CASE
{ case kExprPrefetchT:
case kExprPrefetchNT: {
MemoryAccessImmediate<validate> imm(decoder, pc + length, MemoryAccessImmediate<validate> imm(decoder, pc + length,
UINT32_MAX); UINT32_MAX);
return length + imm.length; return length + imm.length;
@ -3506,6 +3509,18 @@ class WasmFullDecoder : public WasmDecoder<validate> {
return opcode_length + 16; return opcode_length + 16;
} }
uint32_t SimdPrefetch(uint32_t opcode_length, bool temporal) {
if (!CheckHasMemory()) return 0;
// Alignment doesn't matter, set to an arbitrary value.
uint32_t max_alignment = 4;
MemoryAccessImmediate<validate> imm(this, this->pc_ + opcode_length,
max_alignment);
ValueType index_type = this->module_->is_memory64 ? kWasmI64 : kWasmI32;
Value index = Pop(0, index_type);
CALL_INTERFACE_IF_REACHABLE(Prefetch, imm, index, temporal);
return opcode_length + imm.length;
}
uint32_t DecodeSimdOpcode(WasmOpcode opcode, uint32_t opcode_length) { uint32_t DecodeSimdOpcode(WasmOpcode opcode, uint32_t opcode_length) {
// opcode_length is the number of bytes that this SIMD-specific opcode takes // opcode_length is the number of bytes that this SIMD-specific opcode takes
// up in the LEB128 encoded form. // up in the LEB128 encoded form.
@ -3610,6 +3625,12 @@ class WasmFullDecoder : public WasmDecoder<validate> {
} }
case kExprS128Const: case kExprS128Const:
return SimdConstOp(opcode_length); return SimdConstOp(opcode_length);
case kExprPrefetchT: {
return SimdPrefetch(opcode_length, /*temporal=*/true);
}
case kExprPrefetchNT: {
return SimdPrefetch(opcode_length, /*temporal=*/false);
}
default: { default: {
const FunctionSig* sig = WasmOpcodes::Signature(opcode); const FunctionSig* sig = WasmOpcodes::Signature(opcode);
if (!VALIDATE(sig != nullptr)) { if (!VALIDATE(sig != nullptr)) {

View File

@ -419,6 +419,12 @@ class WasmGraphBuildingInterface {
SetEnv(if_block->false_env); SetEnv(if_block->false_env);
} }
void Prefetch(FullDecoder* decoder,
const MemoryAccessImmediate<validate>& imm, const Value& index,
bool temporal) {
BUILD(Prefetch, index.node, imm.offset, imm.alignment, temporal);
}
void LoadMem(FullDecoder* decoder, LoadType type, void LoadMem(FullDecoder* decoder, LoadType type,
const MemoryAccessImmediate<validate>& imm, const Value& index, const MemoryAccessImmediate<validate>& imm, const Value& index,
Value* result) { Value* result) {

View File

@ -360,6 +360,9 @@ constexpr const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_SIGN_OP(I32x4, ExtAddPairwiseI16x8, "extadd_pairwise_i16x8") CASE_SIGN_OP(I32x4, ExtAddPairwiseI16x8, "extadd_pairwise_i16x8")
CASE_SIGN_OP(I16x8, ExtAddPairwiseI8x16, "extadd_pairwise_i8x6") CASE_SIGN_OP(I16x8, ExtAddPairwiseI8x16, "extadd_pairwise_i8x6")
CASE_OP(PrefetchT, "prefetch_t")
CASE_OP(PrefetchNT, "prefetch_nt")
// Atomic operations. // Atomic operations.
CASE_OP(AtomicNotify, "atomic.notify") CASE_OP(AtomicNotify, "atomic.notify")
CASE_INT_OP(AtomicWait, "atomic.wait") CASE_INT_OP(AtomicWait, "atomic.wait")

View File

@ -470,7 +470,9 @@ bool V8_EXPORT_PRIVATE IsJSCompatibleSignature(const FunctionSig* sig,
V(S128Store8Lane, 0xfd5c, v_is) \ V(S128Store8Lane, 0xfd5c, v_is) \
V(S128Store16Lane, 0xfd5d, v_is) \ V(S128Store16Lane, 0xfd5d, v_is) \
V(S128Store32Lane, 0xfd5e, v_is) \ V(S128Store32Lane, 0xfd5e, v_is) \
V(S128Store64Lane, 0xfd5f, v_is) V(S128Store64Lane, 0xfd5f, v_is) \
V(PrefetchT, 0xfdc5, v_i) \
V(PrefetchNT, 0xfdc6, v_i)
#define FOREACH_SIMD_POST_MVP_OPCODE(V) \ #define FOREACH_SIMD_POST_MVP_OPCODE(V) \
V(I8x16Mul, 0xfd75, s_ss) \ V(I8x16Mul, 0xfd75, s_ss) \
@ -706,6 +708,7 @@ bool V8_EXPORT_PRIVATE IsJSCompatibleSignature(const FunctionSig* sig,
V(d_f, kWasmF64, kWasmF32) \ V(d_f, kWasmF64, kWasmF32) \
V(d_i, kWasmF64, kWasmI32) \ V(d_i, kWasmF64, kWasmI32) \
V(d_l, kWasmF64, kWasmI64) \ V(d_l, kWasmF64, kWasmI64) \
V(v_i, kWasmStmt, kWasmI32) \
V(v_ii, kWasmStmt, kWasmI32, kWasmI32) \ V(v_ii, kWasmStmt, kWasmI32, kWasmI32) \
V(v_id, kWasmStmt, kWasmI32, kWasmF64) \ V(v_id, kWasmStmt, kWasmI32, kWasmF64) \
V(d_id, kWasmF64, kWasmI32, kWasmF64) \ V(d_id, kWasmF64, kWasmI32, kWasmF64) \

View File

@ -6832,6 +6832,63 @@ TEST(ldr_literal_range_max_dist_no_emission_2) {
#endif #endif
static const PrefetchOperation kPrfmOperations[] = {
PLDL1KEEP, PLDL1STRM, PLDL2KEEP, PLDL2STRM, PLDL3KEEP, PLDL3STRM,
PLIL1KEEP, PLIL1STRM, PLIL2KEEP, PLIL2STRM, PLIL3KEEP, PLIL3STRM,
PSTL1KEEP, PSTL1STRM, PSTL2KEEP, PSTL2STRM, PSTL3KEEP, PSTL3STRM};
TEST(prfm_regoffset_assem) {
INIT_V8();
SETUP();
START();
// The address used in prfm doesn't have to be valid.
__ Mov(x0, 0x0123456789abcdef);
CPURegList inputs(CPURegister::kRegister, kXRegSizeInBits, 10, 18);
__ Mov(x10, 0);
__ Mov(x11, 1);
__ Mov(x12, 8);
__ Mov(x13, 255);
__ Mov(x14, -0);
__ Mov(x15, -1);
__ Mov(x16, -8);
__ Mov(x17, -255);
__ Mov(x18, 0xfedcba9876543210);
for (int op = 0; op < (1 << ImmPrefetchOperation_width); op++) {
// Unallocated prefetch operations are ignored, so test all of them.
// We have to use the Assembler directly for this.
CPURegList loop = inputs;
while (!loop.IsEmpty()) {
__ prfm(op, MemOperand(x0, Register::Create(loop.PopLowestIndex().code(),
kXRegSizeInBits)));
}
}
for (PrefetchOperation op : kPrfmOperations) {
// Also test named operations.
CPURegList loop = inputs;
while (!loop.IsEmpty()) {
Register input =
Register::Create(loop.PopLowestIndex().code(), kXRegSizeInBits);
__ prfm(op, MemOperand(x0, input, UXTW));
__ prfm(op, MemOperand(x0, input, UXTW, 3));
__ prfm(op, MemOperand(x0, input, LSL));
__ prfm(op, MemOperand(x0, input, LSL, 3));
__ prfm(op, MemOperand(x0, input, SXTW));
__ prfm(op, MemOperand(x0, input, SXTW, 3));
__ prfm(op, MemOperand(x0, input, SXTX));
__ prfm(op, MemOperand(x0, input, SXTX, 3));
}
}
END();
RUN();
}
TEST(add_sub_imm) { TEST(add_sub_imm) {
INIT_V8(); INIT_V8();
SETUP(); SETUP();

View File

@ -1518,6 +1518,24 @@ TEST_(load_literal) {
CLEANUP(); CLEANUP();
} }
TEST(prfm_regoffset) {
SET_UP_ASM();
COMPARE(prfm(PLIL1KEEP, MemOperand(x1, x2)), "prfm plil1keep, [x1, x2]");
COMPARE(prfm(PLIL1STRM, MemOperand(x3, w4, SXTW)),
"prfm plil1strm, [x3, w4, sxtw]");
COMPARE(prfm(PLIL2KEEP, MemOperand(x5, x6, LSL, 3)),
"prfm plil2keep, [x5, x6, lsl #3]");
COMPARE(prfm(PLIL2STRM, MemOperand(sp, xzr)), "prfm plil2strm, [sp, xzr]");
COMPARE(prfm(PLIL3KEEP, MemOperand(sp, wzr, SXTW)),
"prfm plil3keep, [sp, wzr, sxtw]");
COMPARE(prfm(PLIL3STRM, MemOperand(sp, xzr, LSL, 3)),
"prfm plil3strm, [sp, xzr, lsl #3]");
CLEANUP();
}
TEST_(cond_select) { TEST_(cond_select) {
SET_UP_ASM(); SET_UP_ASM();

View File

@ -3565,6 +3565,60 @@ WASM_SIMD_TEST(SimdF32x4SetGlobal) {
CHECK_EQ(GetScalar(global, 3), 65.0f); CHECK_EQ(GetScalar(global, 3), 65.0f);
} }
#if V8_TARGET_ARCH_ARM64
// TODO(v8:11168): Prototyping prefetch.
WASM_SIMD_TEST(SimdPrefetch) {
FLAG_SCOPE(wasm_simd_post_mvp);
{
// Test PrefetchT.
WasmRunner<int32_t> r(execution_tier, lower_simd);
int32_t* memory =
r.builder().AddMemoryElems<int32_t>(kWasmPageSize / sizeof(int32_t));
BUILD(r, WASM_ZERO, WASM_SIMD_OP(kExprPrefetchT), ZERO_ALIGNMENT,
ZERO_OFFSET,
WASM_SIMD_I32x4_EXTRACT_LANE(0, WASM_SIMD_LOAD_MEM(WASM_ZERO)));
FOR_INT32_INPUTS(i) {
r.builder().WriteMemory(&memory[0], i);
CHECK_EQ(i, r.Call());
}
}
{
// Test PrefetchNT.
WasmRunner<int32_t> r(execution_tier, lower_simd);
int32_t* memory =
r.builder().AddMemoryElems<int32_t>(kWasmPageSize / sizeof(int32_t));
BUILD(r, WASM_ZERO, WASM_SIMD_OP(kExprPrefetchNT), ZERO_ALIGNMENT,
ZERO_OFFSET,
WASM_SIMD_I32x4_EXTRACT_LANE(0, WASM_SIMD_LOAD_MEM(WASM_ZERO)));
FOR_INT32_INPUTS(i) {
r.builder().WriteMemory(&memory[0], i);
CHECK_EQ(i, r.Call());
}
}
{
// Test OOB.
WasmRunner<int32_t> r(execution_tier, lower_simd);
int32_t* memory =
r.builder().AddMemoryElems<int32_t>(kWasmPageSize / sizeof(int32_t));
// Prefetch kWasmPageSize+1 but still load from 0.
BUILD(r, WASM_I32V(kWasmPageSize + 1), WASM_SIMD_OP(kExprPrefetchNT),
ZERO_ALIGNMENT, ZERO_OFFSET,
WASM_SIMD_I32x4_EXTRACT_LANE(0, WASM_SIMD_LOAD_MEM(WASM_ZERO)));
FOR_INT32_INPUTS(i) {
r.builder().WriteMemory(&memory[0], i);
CHECK_EQ(i, r.Call());
}
}
}
#endif // V8_TARGET_ARCH_ARM64
WASM_SIMD_TEST(SimdLoadStoreLoad) { WASM_SIMD_TEST(SimdLoadStoreLoad) {
WasmRunner<int32_t> r(execution_tier, lower_simd); WasmRunner<int32_t> r(execution_tier, lower_simd);
int32_t* memory = int32_t* memory =

View File

@ -2801,6 +2801,16 @@ class WasmInterpreterInternals {
case kExprI16x8ExtAddPairwiseI8x16U: { case kExprI16x8ExtAddPairwiseI8x16U: {
return DoSimdExtAddPairwise<int8, int16, uint16_t, uint8_t>(); return DoSimdExtAddPairwise<int8, int16, uint16_t, uint8_t>();
} }
case kExprPrefetchT:
case kExprPrefetchNT: {
// Max alignment doesn't matter, use an arbitrary value.
MemoryAccessImmediate<Decoder::kNoValidation> imm(
decoder, code->at(pc + *len), 4);
// Pop address and do nothing.
Pop().to<uint32_t>();
*len += imm.length;
return true;
}
default: default:
return false; return false;
} }