[wasm-simd] Implement remaining load_extend for x64
This implements the rest of the load extend instructions: - i32x4.load16x4_s - i32x4.load16x4_u - i64x2.load32x2_s - i64x2.load32x2_u Bug: v8:9886 Change-Id: I4649f77bae5224042a1628d9f0498c050b1e599d Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1903812 Reviewed-by: Michael Starzinger <mstarzinger@chromium.org> Reviewed-by: Bill Budge <bbudge@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#65017}
This commit is contained in:
parent
88a2d01148
commit
461b98f3e0
@ -118,9 +118,11 @@
|
||||
V(ptest, 66, 0F, 38, 17) \
|
||||
V(pmovsxbw, 66, 0F, 38, 20) \
|
||||
V(pmovsxwd, 66, 0F, 38, 23) \
|
||||
V(pmovsxdq, 66, 0F, 38, 25) \
|
||||
V(packusdw, 66, 0F, 38, 2B) \
|
||||
V(pmovzxbw, 66, 0F, 38, 30) \
|
||||
V(pmovzxwd, 66, 0F, 38, 33) \
|
||||
V(pmovzxdq, 66, 0F, 38, 35) \
|
||||
V(pminsb, 66, 0F, 38, 38) \
|
||||
V(pminsd, 66, 0F, 38, 39) \
|
||||
V(pminuw, 66, 0F, 38, 3A) \
|
||||
|
@ -3718,15 +3718,41 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
break;
|
||||
}
|
||||
case kX64I16x8Load8x8S: {
|
||||
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
||||
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
|
||||
__ pmovsxbw(i.OutputSimd128Register(), i.MemoryOperand());
|
||||
break;
|
||||
}
|
||||
case kX64I16x8Load8x8U: {
|
||||
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
||||
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
|
||||
__ pmovzxbw(i.OutputSimd128Register(), i.MemoryOperand());
|
||||
break;
|
||||
}
|
||||
case kX64I32x4Load16x4S: {
|
||||
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
||||
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
|
||||
__ pmovsxwd(i.OutputSimd128Register(), i.MemoryOperand());
|
||||
break;
|
||||
}
|
||||
case kX64I32x4Load16x4U: {
|
||||
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
||||
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
|
||||
__ pmovzxwd(i.OutputSimd128Register(), i.MemoryOperand());
|
||||
break;
|
||||
}
|
||||
case kX64I64x2Load32x2S: {
|
||||
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
||||
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
|
||||
__ pmovsxdq(i.OutputSimd128Register(), i.MemoryOperand());
|
||||
break;
|
||||
}
|
||||
case kX64I64x2Load32x2U: {
|
||||
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
||||
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
|
||||
__ pmovzxdq(i.OutputSimd128Register(), i.MemoryOperand());
|
||||
break;
|
||||
}
|
||||
case kX64S32x4Swizzle: {
|
||||
DCHECK_EQ(2, instr->InputCount());
|
||||
ASSEMBLE_SIMD_IMM_INSTR(pshufd, i.OutputSimd128Register(), 0,
|
||||
|
@ -316,6 +316,10 @@ namespace compiler {
|
||||
V(X64S64x2LoadSplat) \
|
||||
V(X64I16x8Load8x8S) \
|
||||
V(X64I16x8Load8x8U) \
|
||||
V(X64I32x4Load16x4S) \
|
||||
V(X64I32x4Load16x4U) \
|
||||
V(X64I64x2Load32x2S) \
|
||||
V(X64I64x2Load32x2U) \
|
||||
V(X64S32x4Swizzle) \
|
||||
V(X64S32x4Shuffle) \
|
||||
V(X64S16x8Blend) \
|
||||
|
@ -367,6 +367,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kX64S64x2LoadSplat:
|
||||
case kX64I16x8Load8x8S:
|
||||
case kX64I16x8Load8x8U:
|
||||
case kX64I32x4Load16x4S:
|
||||
case kX64I32x4Load16x4U:
|
||||
case kX64I64x2Load32x2S:
|
||||
case kX64I64x2Load32x2U:
|
||||
return instr->HasOutput() ? kIsLoadOperation : kHasSideEffect;
|
||||
|
||||
case kX64Peek:
|
||||
|
@ -348,6 +348,18 @@ void InstructionSelector::VisitLoadTransform(Node* node) {
|
||||
case LoadTransformation::kI16x8Load8x8U:
|
||||
opcode = kX64I16x8Load8x8U;
|
||||
break;
|
||||
case LoadTransformation::kI32x4Load16x4S:
|
||||
opcode = kX64I32x4Load16x4S;
|
||||
break;
|
||||
case LoadTransformation::kI32x4Load16x4U:
|
||||
opcode = kX64I32x4Load16x4U;
|
||||
break;
|
||||
case LoadTransformation::kI64x2Load32x2S:
|
||||
opcode = kX64I64x2Load32x2S;
|
||||
break;
|
||||
case LoadTransformation::kI64x2Load32x2U:
|
||||
opcode = kX64I64x2Load32x2U;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
@ -62,6 +62,14 @@ std::ostream& operator<<(std::ostream& os, LoadTransformation rep) {
|
||||
return os << "kI16x8Load8x8S";
|
||||
case LoadTransformation::kI16x8Load8x8U:
|
||||
return os << "kI16x8Load8x8U";
|
||||
case LoadTransformation::kI32x4Load16x4S:
|
||||
return os << "kI32x4Load16x4S";
|
||||
case LoadTransformation::kI32x4Load16x4U:
|
||||
return os << "kI32x4Load16x4U";
|
||||
case LoadTransformation::kI64x2Load32x2S:
|
||||
return os << "kI64x2Load32x2S";
|
||||
case LoadTransformation::kI64x2Load32x2U:
|
||||
return os << "kI64x2Load32x2U";
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
@ -523,7 +531,11 @@ MachineType AtomicOpType(Operator const* op) {
|
||||
V(S32x4LoadSplat) \
|
||||
V(S64x2LoadSplat) \
|
||||
V(I16x8Load8x8S) \
|
||||
V(I16x8Load8x8U)
|
||||
V(I16x8Load8x8U) \
|
||||
V(I32x4Load16x4S) \
|
||||
V(I32x4Load16x4U) \
|
||||
V(I64x2Load32x2S) \
|
||||
V(I64x2Load32x2U)
|
||||
|
||||
#define ATOMIC_U32_TYPE_LIST(V) \
|
||||
V(Uint8) \
|
||||
|
@ -67,6 +67,10 @@ enum class LoadTransformation {
|
||||
kS64x2LoadSplat,
|
||||
kI16x8Load8x8S,
|
||||
kI16x8Load8x8U,
|
||||
kI32x4Load16x4S,
|
||||
kI32x4Load16x4U,
|
||||
kI64x2Load32x2S,
|
||||
kI64x2Load32x2U,
|
||||
};
|
||||
|
||||
size_t hash_value(LoadTransformation);
|
||||
|
@ -3703,6 +3703,14 @@ LoadTransformation GetLoadTransformation(
|
||||
return LoadTransformation::kI16x8Load8x8S;
|
||||
} else if (memtype == MachineType::Uint8()) {
|
||||
return LoadTransformation::kI16x8Load8x8U;
|
||||
} else if (memtype == MachineType::Int16()) {
|
||||
return LoadTransformation::kI32x4Load16x4S;
|
||||
} else if (memtype == MachineType::Uint16()) {
|
||||
return LoadTransformation::kI32x4Load16x4U;
|
||||
} else if (memtype == MachineType::Int32()) {
|
||||
return LoadTransformation::kI64x2Load32x2S;
|
||||
} else if (memtype == MachineType::Uint32()) {
|
||||
return LoadTransformation::kI64x2Load32x2U;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -2732,6 +2732,22 @@ class WasmFullDecoder : public WasmDecoder<validate> {
|
||||
len = DecodeLoadTransformMem(LoadType::kI32Load8U,
|
||||
LoadTransformationKind::kExtend);
|
||||
break;
|
||||
case kExprI32x4Load16x4S:
|
||||
len = DecodeLoadTransformMem(LoadType::kI32Load16S,
|
||||
LoadTransformationKind::kExtend);
|
||||
break;
|
||||
case kExprI32x4Load16x4U:
|
||||
len = DecodeLoadTransformMem(LoadType::kI32Load16U,
|
||||
LoadTransformationKind::kExtend);
|
||||
break;
|
||||
case kExprI64x2Load32x2S:
|
||||
len = DecodeLoadTransformMem(LoadType::kI64Load32S,
|
||||
LoadTransformationKind::kExtend);
|
||||
break;
|
||||
case kExprI64x2Load32x2U:
|
||||
len = DecodeLoadTransformMem(LoadType::kI64Load32U,
|
||||
LoadTransformationKind::kExtend);
|
||||
break;
|
||||
default: {
|
||||
FunctionSig* sig = WasmOpcodes::Signature(opcode);
|
||||
if (!VALIDATE(sig != nullptr)) {
|
||||
|
@ -329,6 +329,10 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
|
||||
CASE_S64x2_OP(LoadSplat, "load_splat")
|
||||
CASE_I16x8_OP(Load8x8S, "load8x8_s")
|
||||
CASE_I16x8_OP(Load8x8U, "load8x8_u")
|
||||
CASE_I32x4_OP(Load16x4S, "load16x4_s")
|
||||
CASE_I32x4_OP(Load16x4U, "load16x4_u")
|
||||
CASE_I64x2_OP(Load32x2S, "load32x2_s")
|
||||
CASE_I64x2_OP(Load32x2U, "load32x2_u")
|
||||
|
||||
// Atomic operations.
|
||||
CASE_OP(AtomicNotify, "atomic.notify")
|
||||
|
@ -441,6 +441,10 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, const WasmFeatures&);
|
||||
V(I32x4UConvertI16x8High, 0xfdd1, s_s) \
|
||||
V(I16x8Load8x8S, 0xfdd2, s_s) \
|
||||
V(I16x8Load8x8U, 0xfdd3, s_s) \
|
||||
V(I32x4Load16x4S, 0xfdd4, s_s) \
|
||||
V(I32x4Load16x4U, 0xfdd5, s_s) \
|
||||
V(I64x2Load32x2S, 0xfdd6, s_s) \
|
||||
V(I64x2Load32x2U, 0xfdd7, s_s) \
|
||||
V(I16x8AddHoriz, 0xfdbd, s_ss) \
|
||||
V(I32x4AddHoriz, 0xfdbe, s_ss) \
|
||||
V(F32x4AddHoriz, 0xfdbf, s_ss) \
|
||||
|
@ -335,11 +335,21 @@ constexpr Vector<const int16_t> ValueHelper::GetVector() {
|
||||
return int16_vector();
|
||||
}
|
||||
|
||||
template <>
|
||||
constexpr Vector<const uint16_t> ValueHelper::GetVector() {
|
||||
return uint16_vector();
|
||||
}
|
||||
|
||||
template <>
|
||||
constexpr Vector<const int32_t> ValueHelper::GetVector() {
|
||||
return int32_vector();
|
||||
}
|
||||
|
||||
template <>
|
||||
constexpr Vector<const uint32_t> ValueHelper::GetVector() {
|
||||
return uint32_vector();
|
||||
}
|
||||
|
||||
template <>
|
||||
constexpr Vector<const int64_t> ValueHelper::GetVector() {
|
||||
return int64_vector();
|
||||
|
@ -3352,6 +3352,26 @@ WASM_SIMD_TEST_NO_LOWERING(I16x8Load8x8S) {
|
||||
RunLoadExtendTest<int8_t, int16_t>(execution_tier, lower_simd,
|
||||
kExprI16x8Load8x8S);
|
||||
}
|
||||
|
||||
WASM_SIMD_TEST_NO_LOWERING(I32x4Load16x4U) {
|
||||
RunLoadExtendTest<uint16_t, uint32_t>(execution_tier, lower_simd,
|
||||
kExprI32x4Load16x4U);
|
||||
}
|
||||
|
||||
WASM_SIMD_TEST_NO_LOWERING(I32x4Load16x4S) {
|
||||
RunLoadExtendTest<int16_t, int32_t>(execution_tier, lower_simd,
|
||||
kExprI32x4Load16x4S);
|
||||
}
|
||||
|
||||
WASM_SIMD_TEST_NO_LOWERING(I64x2Load32x2U) {
|
||||
RunLoadExtendTest<uint32_t, uint64_t>(execution_tier, lower_simd,
|
||||
kExprI64x2Load32x2U);
|
||||
}
|
||||
|
||||
WASM_SIMD_TEST_NO_LOWERING(I64x2Load32x2S) {
|
||||
RunLoadExtendTest<int32_t, int64_t>(execution_tier, lower_simd,
|
||||
kExprI64x2Load32x2S);
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_X64
|
||||
|
||||
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64 || \
|
||||
|
Loading…
Reference in New Issue
Block a user