[wasm-simd][arm64] Prototype prefetch arm64
Prototype 2 prefetch instructions (temporal and non-temporal) on arm64 and interpreter. Add prfm to assembler, and use MiscField to encode the two versions. Small tweak to simulator to handle these new instructions (no-op). The implementation in the interpreter just pops the memory index and does nothing. Simple test cases added for these 2 new instructions, as well as a prefetch with OOB index, which should not trap. Bug: v8:11168 Change-Id: Ieced8081615d07f950d6d4c1128d1bc6a75839fd Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2543167 Reviewed-by: Bill Budge <bbudge@chromium.org> Reviewed-by: Jakob Kummerow <jkummerow@chromium.org> Reviewed-by: Tobias Tebbi <tebbi@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#71353}
This commit is contained in:
parent
8f71a2675e
commit
7f770766d8
@ -1414,6 +1414,37 @@ void Assembler::stlxrh(const Register& rs, const Register& rt,
|
||||
Emit(STLXR_h | Rs(rs) | Rt2(x31) | RnSP(rn) | Rt(rt));
|
||||
}
|
||||
|
||||
void Assembler::prfm(int prfop, const MemOperand& addr) {
|
||||
// Restricted support for prfm, only register offset.
|
||||
// This can probably be merged with Assembler::LoadStore as we expand support.
|
||||
DCHECK(addr.IsRegisterOffset());
|
||||
DCHECK(is_uint5(prfop));
|
||||
Instr memop = PRFM | prfop | RnSP(addr.base());
|
||||
|
||||
Extend ext = addr.extend();
|
||||
Shift shift = addr.shift();
|
||||
unsigned shift_amount = addr.shift_amount();
|
||||
|
||||
// LSL is encoded in the option field as UXTX.
|
||||
if (shift == LSL) {
|
||||
ext = UXTX;
|
||||
}
|
||||
|
||||
// Shifts are encoded in one bit, indicating a left shift by the memory
|
||||
// access size.
|
||||
DCHECK((shift_amount == 0) ||
|
||||
(shift_amount == static_cast<unsigned>(CalcLSDataSize(PRFM))));
|
||||
|
||||
Emit(LoadStoreRegisterOffsetFixed | memop | Rm(addr.regoffset()) |
|
||||
ExtendMode(ext) | ImmShiftLS((shift_amount > 0) ? 1 : 0));
|
||||
}
|
||||
|
||||
void Assembler::prfm(PrefetchOperation prfop, const MemOperand& addr) {
|
||||
// Restricted support for prfm, only register offset.
|
||||
// This can probably be merged with Assembler::LoadStore as we expand support.
|
||||
prfm(static_cast<int>(prfop), addr);
|
||||
}
|
||||
|
||||
void Assembler::NEON3DifferentL(const VRegister& vd, const VRegister& vn,
|
||||
const VRegister& vm, NEON3DifferentOp vop) {
|
||||
DCHECK(AreSameFormat(vn, vm));
|
||||
|
@ -880,6 +880,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
|
||||
// Store-release exclusive half-word.
|
||||
void stlxrh(const Register& rs, const Register& rt, const Register& rn);
|
||||
|
||||
void prfm(int prfop, const MemOperand& addr);
|
||||
void prfm(PrefetchOperation prfop, const MemOperand& addr);
|
||||
|
||||
// Move instructions. The default shift of -1 indicates that the move
|
||||
// instruction will calculate an appropriate 16-bit immediate and left shift
|
||||
// that is equal to the 64-bit immediate argument. If an explicit left shift
|
||||
|
@ -159,6 +159,9 @@ using float16 = uint16_t;
|
||||
/* store second source. */ \
|
||||
V_(Rs, 20, 16, Bits) /* Store-exclusive status */ \
|
||||
V_(PrefetchMode, 4, 0, Bits) \
|
||||
V_(PrefetchHint, 4, 3, Bits) \
|
||||
V_(PrefetchTarget, 2, 1, Bits) \
|
||||
V_(PrefetchStream, 0, 0, Bits) \
|
||||
\
|
||||
/* Common bits */ \
|
||||
V_(SixtyFourBits, 31, 31, Bits) \
|
||||
@ -216,6 +219,7 @@ using float16 = uint16_t;
|
||||
V_(LSOpc, 23, 22, Bits) \
|
||||
V_(LSVector, 26, 26, Bits) \
|
||||
V_(LSSize, 31, 30, Bits) \
|
||||
V_(ImmPrefetchOperation, 4, 0, Bits) \
|
||||
\
|
||||
/* NEON generic fields */ \
|
||||
V_(NEONQ, 30, 30, Bits) \
|
||||
@ -443,6 +447,27 @@ enum SystemRegister {
|
||||
ImmSystemRegister_offset
|
||||
};
|
||||
|
||||
enum PrefetchOperation {
|
||||
PLDL1KEEP = 0x00,
|
||||
PLDL1STRM = 0x01,
|
||||
PLDL2KEEP = 0x02,
|
||||
PLDL2STRM = 0x03,
|
||||
PLDL3KEEP = 0x04,
|
||||
PLDL3STRM = 0x05,
|
||||
PLIL1KEEP = 0x08,
|
||||
PLIL1STRM = 0x09,
|
||||
PLIL2KEEP = 0x0a,
|
||||
PLIL2STRM = 0x0b,
|
||||
PLIL3KEEP = 0x0c,
|
||||
PLIL3STRM = 0x0d,
|
||||
PSTL1KEEP = 0x10,
|
||||
PSTL1STRM = 0x11,
|
||||
PSTL2KEEP = 0x12,
|
||||
PSTL2STRM = 0x13,
|
||||
PSTL3KEEP = 0x14,
|
||||
PSTL3STRM = 0x15,
|
||||
};
|
||||
|
||||
// Instruction enumerations.
|
||||
//
|
||||
// These are the masks that define a class of instructions, and the list of
|
||||
|
@ -1442,6 +1442,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kArm64Prfm: {
|
||||
__ prfm(MiscField::decode(opcode), i.MemoryOperand(0));
|
||||
break;
|
||||
}
|
||||
case kArm64Clz:
|
||||
__ Clz(i.OutputRegister64(), i.InputRegister64(0));
|
||||
break;
|
||||
|
@ -93,6 +93,7 @@ namespace compiler {
|
||||
V(Arm64Poke) \
|
||||
V(Arm64PokePair) \
|
||||
V(Arm64Peek) \
|
||||
V(Arm64Prfm) \
|
||||
V(Arm64Float32Cmp) \
|
||||
V(Arm64Float32Add) \
|
||||
V(Arm64Float32Sub) \
|
||||
|
@ -383,6 +383,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kArm64StrCompressTagged:
|
||||
case kArm64DmbIsh:
|
||||
case kArm64DsbIsb:
|
||||
case kArm64Prfm:
|
||||
return kHasSideEffect;
|
||||
|
||||
case kArm64Word64AtomicLoadUint8:
|
||||
|
@ -617,6 +617,26 @@ void EmitLoad(InstructionSelector* selector, Node* node, InstructionCode opcode,
|
||||
selector->Emit(opcode, arraysize(outputs), outputs, input_count, inputs);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitPrefetchTemporal(Node* node) {
|
||||
Arm64OperandGenerator g(this);
|
||||
InstructionOperand inputs[2] = {g.UseRegister(node->InputAt(0)),
|
||||
g.UseRegister(node->InputAt(1))};
|
||||
InstructionCode opcode = kArm64Prfm;
|
||||
opcode |= AddressingModeField::encode(kMode_MRR);
|
||||
opcode |= MiscField::encode(PLDL1KEEP);
|
||||
Emit(opcode, 0, nullptr, 2, inputs);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitPrefetchNonTemporal(Node* node) {
|
||||
Arm64OperandGenerator g(this);
|
||||
InstructionOperand inputs[2] = {g.UseRegister(node->InputAt(0)),
|
||||
g.UseRegister(node->InputAt(1))};
|
||||
InstructionCode opcode = kArm64Prfm;
|
||||
opcode |= AddressingModeField::encode(kMode_MRR);
|
||||
opcode |= MiscField::encode(PLDL1STRM);
|
||||
Emit(opcode, 0, nullptr, 2, inputs);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitLoadTransform(Node* node) {
|
||||
LoadTransformParameters params = LoadTransformParametersOf(node->op());
|
||||
InstructionCode opcode = kArchNop;
|
||||
|
@ -1416,6 +1416,12 @@ void InstructionSelector::VisitNode(Node* node) {
|
||||
MarkAsRepresentation(MachineRepresentation::kSimd128, node);
|
||||
return VisitLoadTransform(node);
|
||||
}
|
||||
case IrOpcode::kPrefetchTemporal: {
|
||||
return VisitPrefetchTemporal(node);
|
||||
}
|
||||
case IrOpcode::kPrefetchNonTemporal: {
|
||||
return VisitPrefetchNonTemporal(node);
|
||||
}
|
||||
case IrOpcode::kLoadLane: {
|
||||
MarkAsRepresentation(MachineRepresentation::kSimd128, node);
|
||||
return VisitLoadLane(node);
|
||||
@ -2795,6 +2801,12 @@ void InstructionSelector::VisitI16x8ExtAddPairwiseI8x16S(Node* node) {
|
||||
void InstructionSelector::VisitI16x8ExtAddPairwiseI8x16U(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
// TODO(v8:11168): Prototyping prefetch.
|
||||
void InstructionSelector::VisitPrefetchTemporal(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitPrefetchNonTemporal(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
#endif // !V8_TARGET_ARCH_ARM64
|
||||
|
||||
#if !V8_TARGET_ARCH_X64
|
||||
|
@ -1160,6 +1160,18 @@ std::ostream& operator<<(std::ostream& os, TruncateKind kind) {
|
||||
MACHINE_PURE_OP_LIST(PURE)
|
||||
#undef PURE
|
||||
|
||||
const Operator* MachineOperatorBuilder::PrefetchTemporal() {
|
||||
return GetCachedOperator<
|
||||
CachedOperator<IrOpcode::kPrefetchTemporal, 2, 1, 1, 0, 1, 0>>(
|
||||
Operator::kNoDeopt | Operator::kNoThrow, "PrefetchTemporal");
|
||||
}
|
||||
|
||||
const Operator* MachineOperatorBuilder::PrefetchNonTemporal() {
|
||||
return GetCachedOperator<
|
||||
CachedOperator<IrOpcode::kPrefetchNonTemporal, 2, 1, 1, 0, 1, 0>>(
|
||||
Operator::kNoDeopt | Operator::kNoThrow, "PrefetchNonTemporal");
|
||||
}
|
||||
|
||||
const Operator* MachineOperatorBuilder::Load(LoadRepresentation rep) {
|
||||
#define LOAD(Type) \
|
||||
if (rep == MachineType::Type()) { \
|
||||
|
@ -828,6 +828,9 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
|
||||
const Operator* LoadTransform(MemoryAccessKind kind,
|
||||
LoadTransformation transform);
|
||||
|
||||
const Operator* PrefetchTemporal();
|
||||
const Operator* PrefetchNonTemporal();
|
||||
|
||||
// SIMD load: replace a specified lane with [base + index].
|
||||
const Operator* LoadLane(MemoryAccessKind kind, LoadRepresentation rep,
|
||||
uint8_t laneidx);
|
||||
|
@ -979,6 +979,8 @@
|
||||
V(V8x16AnyTrue) \
|
||||
V(V8x16AllTrue) \
|
||||
V(LoadTransform) \
|
||||
V(PrefetchTemporal) \
|
||||
V(PrefetchNonTemporal) \
|
||||
V(LoadLane) \
|
||||
V(StoreLane)
|
||||
|
||||
|
@ -4158,6 +4158,17 @@ Node* WasmGraphBuilder::LoadTransform(wasm::ValueType type, MachineType memtype,
|
||||
return load;
|
||||
}
|
||||
|
||||
Node* WasmGraphBuilder::Prefetch(Node* index, uint64_t offset,
|
||||
uint32_t alignment, bool temporal) {
|
||||
uintptr_t capped_offset = static_cast<uintptr_t>(offset);
|
||||
const Operator* prefetchOp =
|
||||
temporal ? mcgraph()->machine()->PrefetchTemporal()
|
||||
: mcgraph()->machine()->PrefetchNonTemporal();
|
||||
Node* prefetch = SetEffect(graph()->NewNode(
|
||||
prefetchOp, MemBuffer(capped_offset), index, effect(), control()));
|
||||
return prefetch;
|
||||
}
|
||||
|
||||
Node* WasmGraphBuilder::LoadMem(wasm::ValueType type, MachineType memtype,
|
||||
Node* index, uint64_t offset,
|
||||
uint32_t alignment,
|
||||
|
@ -301,6 +301,8 @@ class WasmGraphBuilder {
|
||||
Node* CurrentMemoryPages();
|
||||
Node* TraceMemoryOperation(bool is_store, MachineRepresentation, Node* index,
|
||||
uintptr_t offset, wasm::WasmCodePosition);
|
||||
Node* Prefetch(Node* index, uint64_t offset, uint32_t alignment,
|
||||
bool temporal);
|
||||
Node* LoadMem(wasm::ValueType type, MachineType memtype, Node* index,
|
||||
uint64_t offset, uint32_t alignment,
|
||||
wasm::WasmCodePosition position);
|
||||
|
@ -7,6 +7,8 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <bitset>
|
||||
|
||||
#if V8_TARGET_ARCH_ARM64
|
||||
|
||||
#include "src/base/platform/platform.h"
|
||||
@ -4266,12 +4268,19 @@ int DisassemblingDecoder::SubstitutePrefetchField(Instruction* instr,
|
||||
USE(format);
|
||||
|
||||
int prefetch_mode = instr->PrefetchMode();
|
||||
const std::array<std::string, 3> hints = {"ld", "li", "st"};
|
||||
unsigned hint = instr->PrefetchHint();
|
||||
unsigned target = instr->PrefetchTarget() + 1;
|
||||
|
||||
const char* ls = (prefetch_mode & 0x10) ? "st" : "ld";
|
||||
int level = (prefetch_mode >> 1) + 1;
|
||||
if (hint >= hints.size() || target > 3) {
|
||||
std::bitset<5> prefetch_mode(instr->ImmPrefetchOperation());
|
||||
AppendToOutput("#0b%s", prefetch_mode.to_string().c_str());
|
||||
} else {
|
||||
const char* ks = (prefetch_mode & 1) ? "strm" : "keep";
|
||||
|
||||
AppendToOutput("p%sl%d%s", ls, level, ks);
|
||||
AppendToOutput("p%sl%d%s", hints[hint].c_str(), target, ks);
|
||||
}
|
||||
|
||||
return 6;
|
||||
}
|
||||
|
||||
|
@ -1799,14 +1799,17 @@ void Simulator::LoadStoreHelper(Instruction* instr, int64_t offset,
|
||||
unsigned addr_reg = instr->Rn();
|
||||
uintptr_t address = LoadStoreAddress(addr_reg, offset, addrmode);
|
||||
uintptr_t stack = 0;
|
||||
LoadStoreOp op = static_cast<LoadStoreOp>(instr->Mask(LoadStoreMask));
|
||||
|
||||
{
|
||||
base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
|
||||
if (instr->IsLoad()) {
|
||||
local_monitor_.NotifyLoad();
|
||||
} else {
|
||||
} else if (instr->IsStore()) {
|
||||
local_monitor_.NotifyStore();
|
||||
GlobalMonitor::Get()->NotifyStore_Locked(&global_monitor_processor_);
|
||||
} else {
|
||||
DCHECK_EQ(op, PRFM);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1825,7 +1828,6 @@ void Simulator::LoadStoreHelper(Instruction* instr, int64_t offset,
|
||||
stack = sp();
|
||||
}
|
||||
|
||||
LoadStoreOp op = static_cast<LoadStoreOp>(instr->Mask(LoadStoreMask));
|
||||
switch (op) {
|
||||
// Use _no_log variants to suppress the register trace (LOG_REGS,
|
||||
// LOG_VREGS). We will print a more detailed log.
|
||||
@ -1900,6 +1902,10 @@ void Simulator::LoadStoreHelper(Instruction* instr, int64_t offset,
|
||||
MemoryWrite<qreg_t>(address, qreg(srcdst));
|
||||
break;
|
||||
|
||||
// Do nothing for prefetch.
|
||||
case PRFM:
|
||||
break;
|
||||
|
||||
default:
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
@ -1915,7 +1921,7 @@ void Simulator::LoadStoreHelper(Instruction* instr, int64_t offset,
|
||||
} else {
|
||||
LogRead(address, srcdst, GetPrintRegisterFormatForSize(access_size));
|
||||
}
|
||||
} else {
|
||||
} else if (instr->IsStore()) {
|
||||
if ((op == STR_s) || (op == STR_d)) {
|
||||
LogVWrite(address, srcdst, GetPrintRegisterFormatForSizeFP(access_size));
|
||||
} else if ((op == STR_b) || (op == STR_h) || (op == STR_q)) {
|
||||
@ -1923,6 +1929,8 @@ void Simulator::LoadStoreHelper(Instruction* instr, int64_t offset,
|
||||
} else {
|
||||
LogWrite(address, srcdst, GetPrintRegisterFormatForSize(access_size));
|
||||
}
|
||||
} else {
|
||||
DCHECK_EQ(op, PRFM);
|
||||
}
|
||||
|
||||
// Handle the writeback for loads after the load to ensure safe pop
|
||||
|
@ -2255,6 +2255,12 @@ class LiftoffCompiler {
|
||||
return index;
|
||||
}
|
||||
|
||||
void Prefetch(FullDecoder* decoder,
|
||||
const MemoryAccessImmediate<validate>& imm,
|
||||
const Value& index_val, bool temporal) {
|
||||
unsupported(decoder, kSimd, "simd prefetch");
|
||||
}
|
||||
|
||||
void LoadMem(FullDecoder* decoder, LoadType type,
|
||||
const MemoryAccessImmediate<validate>& imm,
|
||||
const Value& index_val, Value* result) {
|
||||
|
@ -1001,6 +1001,8 @@ struct ControlBase : public PcForErrors<validate> {
|
||||
F(LoadLane, LoadType type, const Value& value, const Value& index, \
|
||||
const MemoryAccessImmediate<validate>& imm, const uint8_t laneidx, \
|
||||
Value* result) \
|
||||
F(Prefetch, const MemoryAccessImmediate<validate>& imm, const Value& index, \
|
||||
bool temporal) \
|
||||
F(StoreMem, StoreType type, const MemoryAccessImmediate<validate>& imm, \
|
||||
const Value& index, const Value& value) \
|
||||
F(StoreLane, StoreType type, const MemoryAccessImmediate<validate>& imm, \
|
||||
@ -1760,7 +1762,8 @@ class WasmDecoder : public Decoder {
|
||||
#define DECLARE_OPCODE_CASE(name, opcode, sig) case kExpr##name:
|
||||
FOREACH_SIMD_MEM_OPCODE(DECLARE_OPCODE_CASE)
|
||||
#undef DECLARE_OPCODE_CASE
|
||||
{
|
||||
case kExprPrefetchT:
|
||||
case kExprPrefetchNT: {
|
||||
MemoryAccessImmediate<validate> imm(decoder, pc + length,
|
||||
UINT32_MAX);
|
||||
return length + imm.length;
|
||||
@ -3506,6 +3509,18 @@ class WasmFullDecoder : public WasmDecoder<validate> {
|
||||
return opcode_length + 16;
|
||||
}
|
||||
|
||||
uint32_t SimdPrefetch(uint32_t opcode_length, bool temporal) {
|
||||
if (!CheckHasMemory()) return 0;
|
||||
// Alignment doesn't matter, set to an arbitrary value.
|
||||
uint32_t max_alignment = 4;
|
||||
MemoryAccessImmediate<validate> imm(this, this->pc_ + opcode_length,
|
||||
max_alignment);
|
||||
ValueType index_type = this->module_->is_memory64 ? kWasmI64 : kWasmI32;
|
||||
Value index = Pop(0, index_type);
|
||||
CALL_INTERFACE_IF_REACHABLE(Prefetch, imm, index, temporal);
|
||||
return opcode_length + imm.length;
|
||||
}
|
||||
|
||||
uint32_t DecodeSimdOpcode(WasmOpcode opcode, uint32_t opcode_length) {
|
||||
// opcode_length is the number of bytes that this SIMD-specific opcode takes
|
||||
// up in the LEB128 encoded form.
|
||||
@ -3610,6 +3625,12 @@ class WasmFullDecoder : public WasmDecoder<validate> {
|
||||
}
|
||||
case kExprS128Const:
|
||||
return SimdConstOp(opcode_length);
|
||||
case kExprPrefetchT: {
|
||||
return SimdPrefetch(opcode_length, /*temporal=*/true);
|
||||
}
|
||||
case kExprPrefetchNT: {
|
||||
return SimdPrefetch(opcode_length, /*temporal=*/false);
|
||||
}
|
||||
default: {
|
||||
const FunctionSig* sig = WasmOpcodes::Signature(opcode);
|
||||
if (!VALIDATE(sig != nullptr)) {
|
||||
|
@ -419,6 +419,12 @@ class WasmGraphBuildingInterface {
|
||||
SetEnv(if_block->false_env);
|
||||
}
|
||||
|
||||
void Prefetch(FullDecoder* decoder,
|
||||
const MemoryAccessImmediate<validate>& imm, const Value& index,
|
||||
bool temporal) {
|
||||
BUILD(Prefetch, index.node, imm.offset, imm.alignment, temporal);
|
||||
}
|
||||
|
||||
void LoadMem(FullDecoder* decoder, LoadType type,
|
||||
const MemoryAccessImmediate<validate>& imm, const Value& index,
|
||||
Value* result) {
|
||||
|
@ -360,6 +360,9 @@ constexpr const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
|
||||
CASE_SIGN_OP(I32x4, ExtAddPairwiseI16x8, "extadd_pairwise_i16x8")
|
||||
CASE_SIGN_OP(I16x8, ExtAddPairwiseI8x16, "extadd_pairwise_i8x6")
|
||||
|
||||
CASE_OP(PrefetchT, "prefetch_t")
|
||||
CASE_OP(PrefetchNT, "prefetch_nt")
|
||||
|
||||
// Atomic operations.
|
||||
CASE_OP(AtomicNotify, "atomic.notify")
|
||||
CASE_INT_OP(AtomicWait, "atomic.wait")
|
||||
|
@ -470,7 +470,9 @@ bool V8_EXPORT_PRIVATE IsJSCompatibleSignature(const FunctionSig* sig,
|
||||
V(S128Store8Lane, 0xfd5c, v_is) \
|
||||
V(S128Store16Lane, 0xfd5d, v_is) \
|
||||
V(S128Store32Lane, 0xfd5e, v_is) \
|
||||
V(S128Store64Lane, 0xfd5f, v_is)
|
||||
V(S128Store64Lane, 0xfd5f, v_is) \
|
||||
V(PrefetchT, 0xfdc5, v_i) \
|
||||
V(PrefetchNT, 0xfdc6, v_i)
|
||||
|
||||
#define FOREACH_SIMD_POST_MVP_OPCODE(V) \
|
||||
V(I8x16Mul, 0xfd75, s_ss) \
|
||||
@ -706,6 +708,7 @@ bool V8_EXPORT_PRIVATE IsJSCompatibleSignature(const FunctionSig* sig,
|
||||
V(d_f, kWasmF64, kWasmF32) \
|
||||
V(d_i, kWasmF64, kWasmI32) \
|
||||
V(d_l, kWasmF64, kWasmI64) \
|
||||
V(v_i, kWasmStmt, kWasmI32) \
|
||||
V(v_ii, kWasmStmt, kWasmI32, kWasmI32) \
|
||||
V(v_id, kWasmStmt, kWasmI32, kWasmF64) \
|
||||
V(d_id, kWasmF64, kWasmI32, kWasmF64) \
|
||||
|
@ -6832,6 +6832,63 @@ TEST(ldr_literal_range_max_dist_no_emission_2) {
|
||||
|
||||
#endif
|
||||
|
||||
static const PrefetchOperation kPrfmOperations[] = {
|
||||
PLDL1KEEP, PLDL1STRM, PLDL2KEEP, PLDL2STRM, PLDL3KEEP, PLDL3STRM,
|
||||
|
||||
PLIL1KEEP, PLIL1STRM, PLIL2KEEP, PLIL2STRM, PLIL3KEEP, PLIL3STRM,
|
||||
|
||||
PSTL1KEEP, PSTL1STRM, PSTL2KEEP, PSTL2STRM, PSTL3KEEP, PSTL3STRM};
|
||||
|
||||
TEST(prfm_regoffset_assem) {
|
||||
INIT_V8();
|
||||
SETUP();
|
||||
|
||||
START();
|
||||
// The address used in prfm doesn't have to be valid.
|
||||
__ Mov(x0, 0x0123456789abcdef);
|
||||
|
||||
CPURegList inputs(CPURegister::kRegister, kXRegSizeInBits, 10, 18);
|
||||
__ Mov(x10, 0);
|
||||
__ Mov(x11, 1);
|
||||
__ Mov(x12, 8);
|
||||
__ Mov(x13, 255);
|
||||
__ Mov(x14, -0);
|
||||
__ Mov(x15, -1);
|
||||
__ Mov(x16, -8);
|
||||
__ Mov(x17, -255);
|
||||
__ Mov(x18, 0xfedcba9876543210);
|
||||
|
||||
for (int op = 0; op < (1 << ImmPrefetchOperation_width); op++) {
|
||||
// Unallocated prefetch operations are ignored, so test all of them.
|
||||
// We have to use the Assembler directly for this.
|
||||
CPURegList loop = inputs;
|
||||
while (!loop.IsEmpty()) {
|
||||
__ prfm(op, MemOperand(x0, Register::Create(loop.PopLowestIndex().code(),
|
||||
kXRegSizeInBits)));
|
||||
}
|
||||
}
|
||||
|
||||
for (PrefetchOperation op : kPrfmOperations) {
|
||||
// Also test named operations.
|
||||
CPURegList loop = inputs;
|
||||
while (!loop.IsEmpty()) {
|
||||
Register input =
|
||||
Register::Create(loop.PopLowestIndex().code(), kXRegSizeInBits);
|
||||
__ prfm(op, MemOperand(x0, input, UXTW));
|
||||
__ prfm(op, MemOperand(x0, input, UXTW, 3));
|
||||
__ prfm(op, MemOperand(x0, input, LSL));
|
||||
__ prfm(op, MemOperand(x0, input, LSL, 3));
|
||||
__ prfm(op, MemOperand(x0, input, SXTW));
|
||||
__ prfm(op, MemOperand(x0, input, SXTW, 3));
|
||||
__ prfm(op, MemOperand(x0, input, SXTX));
|
||||
__ prfm(op, MemOperand(x0, input, SXTX, 3));
|
||||
}
|
||||
}
|
||||
|
||||
END();
|
||||
RUN();
|
||||
}
|
||||
|
||||
TEST(add_sub_imm) {
|
||||
INIT_V8();
|
||||
SETUP();
|
||||
|
@ -1518,6 +1518,24 @@ TEST_(load_literal) {
|
||||
CLEANUP();
|
||||
}
|
||||
|
||||
TEST(prfm_regoffset) {
|
||||
SET_UP_ASM();
|
||||
|
||||
COMPARE(prfm(PLIL1KEEP, MemOperand(x1, x2)), "prfm plil1keep, [x1, x2]");
|
||||
COMPARE(prfm(PLIL1STRM, MemOperand(x3, w4, SXTW)),
|
||||
"prfm plil1strm, [x3, w4, sxtw]");
|
||||
COMPARE(prfm(PLIL2KEEP, MemOperand(x5, x6, LSL, 3)),
|
||||
"prfm plil2keep, [x5, x6, lsl #3]");
|
||||
|
||||
COMPARE(prfm(PLIL2STRM, MemOperand(sp, xzr)), "prfm plil2strm, [sp, xzr]");
|
||||
COMPARE(prfm(PLIL3KEEP, MemOperand(sp, wzr, SXTW)),
|
||||
"prfm plil3keep, [sp, wzr, sxtw]");
|
||||
COMPARE(prfm(PLIL3STRM, MemOperand(sp, xzr, LSL, 3)),
|
||||
"prfm plil3strm, [sp, xzr, lsl #3]");
|
||||
|
||||
CLEANUP();
|
||||
}
|
||||
|
||||
TEST_(cond_select) {
|
||||
SET_UP_ASM();
|
||||
|
||||
|
@ -3565,6 +3565,60 @@ WASM_SIMD_TEST(SimdF32x4SetGlobal) {
|
||||
CHECK_EQ(GetScalar(global, 3), 65.0f);
|
||||
}
|
||||
|
||||
#if V8_TARGET_ARCH_ARM64
|
||||
// TODO(v8:11168): Prototyping prefetch.
|
||||
WASM_SIMD_TEST(SimdPrefetch) {
|
||||
FLAG_SCOPE(wasm_simd_post_mvp);
|
||||
|
||||
{
|
||||
// Test PrefetchT.
|
||||
WasmRunner<int32_t> r(execution_tier, lower_simd);
|
||||
int32_t* memory =
|
||||
r.builder().AddMemoryElems<int32_t>(kWasmPageSize / sizeof(int32_t));
|
||||
BUILD(r, WASM_ZERO, WASM_SIMD_OP(kExprPrefetchT), ZERO_ALIGNMENT,
|
||||
ZERO_OFFSET,
|
||||
WASM_SIMD_I32x4_EXTRACT_LANE(0, WASM_SIMD_LOAD_MEM(WASM_ZERO)));
|
||||
|
||||
FOR_INT32_INPUTS(i) {
|
||||
r.builder().WriteMemory(&memory[0], i);
|
||||
CHECK_EQ(i, r.Call());
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
// Test PrefetchNT.
|
||||
WasmRunner<int32_t> r(execution_tier, lower_simd);
|
||||
int32_t* memory =
|
||||
r.builder().AddMemoryElems<int32_t>(kWasmPageSize / sizeof(int32_t));
|
||||
BUILD(r, WASM_ZERO, WASM_SIMD_OP(kExprPrefetchNT), ZERO_ALIGNMENT,
|
||||
ZERO_OFFSET,
|
||||
WASM_SIMD_I32x4_EXTRACT_LANE(0, WASM_SIMD_LOAD_MEM(WASM_ZERO)));
|
||||
|
||||
FOR_INT32_INPUTS(i) {
|
||||
r.builder().WriteMemory(&memory[0], i);
|
||||
CHECK_EQ(i, r.Call());
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
// Test OOB.
|
||||
WasmRunner<int32_t> r(execution_tier, lower_simd);
|
||||
int32_t* memory =
|
||||
r.builder().AddMemoryElems<int32_t>(kWasmPageSize / sizeof(int32_t));
|
||||
|
||||
// Prefetch kWasmPageSize+1 but still load from 0.
|
||||
BUILD(r, WASM_I32V(kWasmPageSize + 1), WASM_SIMD_OP(kExprPrefetchNT),
|
||||
ZERO_ALIGNMENT, ZERO_OFFSET,
|
||||
WASM_SIMD_I32x4_EXTRACT_LANE(0, WASM_SIMD_LOAD_MEM(WASM_ZERO)));
|
||||
|
||||
FOR_INT32_INPUTS(i) {
|
||||
r.builder().WriteMemory(&memory[0], i);
|
||||
CHECK_EQ(i, r.Call());
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_ARM64
|
||||
|
||||
WASM_SIMD_TEST(SimdLoadStoreLoad) {
|
||||
WasmRunner<int32_t> r(execution_tier, lower_simd);
|
||||
int32_t* memory =
|
||||
|
@ -2801,6 +2801,16 @@ class WasmInterpreterInternals {
|
||||
case kExprI16x8ExtAddPairwiseI8x16U: {
|
||||
return DoSimdExtAddPairwise<int8, int16, uint16_t, uint8_t>();
|
||||
}
|
||||
case kExprPrefetchT:
|
||||
case kExprPrefetchNT: {
|
||||
// Max alignment doesn't matter, use an arbitrary value.
|
||||
MemoryAccessImmediate<Decoder::kNoValidation> imm(
|
||||
decoder, code->at(pc + *len), 4);
|
||||
// Pop address and do nothing.
|
||||
Pop().to<uint32_t>();
|
||||
*len += imm.length;
|
||||
return true;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user