[wasm-simd] Implement remaining load_extend for x64
This implements the rest of the load extend instructions: - i32x4.load16x4_s - i32x4.load16x4_u - i64x2.load32x2_s - i64x2.load32x2_u Bug: v8:9886 Change-Id: I4649f77bae5224042a1628d9f0498c050b1e599d Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1903812 Reviewed-by: Michael Starzinger <mstarzinger@chromium.org> Reviewed-by: Bill Budge <bbudge@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#65017}
This commit is contained in:
parent
88a2d01148
commit
461b98f3e0
@ -118,9 +118,11 @@
|
|||||||
V(ptest, 66, 0F, 38, 17) \
|
V(ptest, 66, 0F, 38, 17) \
|
||||||
V(pmovsxbw, 66, 0F, 38, 20) \
|
V(pmovsxbw, 66, 0F, 38, 20) \
|
||||||
V(pmovsxwd, 66, 0F, 38, 23) \
|
V(pmovsxwd, 66, 0F, 38, 23) \
|
||||||
|
V(pmovsxdq, 66, 0F, 38, 25) \
|
||||||
V(packusdw, 66, 0F, 38, 2B) \
|
V(packusdw, 66, 0F, 38, 2B) \
|
||||||
V(pmovzxbw, 66, 0F, 38, 30) \
|
V(pmovzxbw, 66, 0F, 38, 30) \
|
||||||
V(pmovzxwd, 66, 0F, 38, 33) \
|
V(pmovzxwd, 66, 0F, 38, 33) \
|
||||||
|
V(pmovzxdq, 66, 0F, 38, 35) \
|
||||||
V(pminsb, 66, 0F, 38, 38) \
|
V(pminsb, 66, 0F, 38, 38) \
|
||||||
V(pminsd, 66, 0F, 38, 39) \
|
V(pminsd, 66, 0F, 38, 39) \
|
||||||
V(pminuw, 66, 0F, 38, 3A) \
|
V(pminuw, 66, 0F, 38, 3A) \
|
||||||
|
@ -3718,15 +3718,41 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kX64I16x8Load8x8S: {
|
case kX64I16x8Load8x8S: {
|
||||||
|
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
||||||
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
|
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
|
||||||
__ pmovsxbw(i.OutputSimd128Register(), i.MemoryOperand());
|
__ pmovsxbw(i.OutputSimd128Register(), i.MemoryOperand());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kX64I16x8Load8x8U: {
|
case kX64I16x8Load8x8U: {
|
||||||
|
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
||||||
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
|
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
|
||||||
__ pmovzxbw(i.OutputSimd128Register(), i.MemoryOperand());
|
__ pmovzxbw(i.OutputSimd128Register(), i.MemoryOperand());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case kX64I32x4Load16x4S: {
|
||||||
|
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
||||||
|
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
|
||||||
|
__ pmovsxwd(i.OutputSimd128Register(), i.MemoryOperand());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case kX64I32x4Load16x4U: {
|
||||||
|
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
||||||
|
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
|
||||||
|
__ pmovzxwd(i.OutputSimd128Register(), i.MemoryOperand());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case kX64I64x2Load32x2S: {
|
||||||
|
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
||||||
|
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
|
||||||
|
__ pmovsxdq(i.OutputSimd128Register(), i.MemoryOperand());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case kX64I64x2Load32x2U: {
|
||||||
|
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
||||||
|
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
|
||||||
|
__ pmovzxdq(i.OutputSimd128Register(), i.MemoryOperand());
|
||||||
|
break;
|
||||||
|
}
|
||||||
case kX64S32x4Swizzle: {
|
case kX64S32x4Swizzle: {
|
||||||
DCHECK_EQ(2, instr->InputCount());
|
DCHECK_EQ(2, instr->InputCount());
|
||||||
ASSEMBLE_SIMD_IMM_INSTR(pshufd, i.OutputSimd128Register(), 0,
|
ASSEMBLE_SIMD_IMM_INSTR(pshufd, i.OutputSimd128Register(), 0,
|
||||||
|
@ -316,6 +316,10 @@ namespace compiler {
|
|||||||
V(X64S64x2LoadSplat) \
|
V(X64S64x2LoadSplat) \
|
||||||
V(X64I16x8Load8x8S) \
|
V(X64I16x8Load8x8S) \
|
||||||
V(X64I16x8Load8x8U) \
|
V(X64I16x8Load8x8U) \
|
||||||
|
V(X64I32x4Load16x4S) \
|
||||||
|
V(X64I32x4Load16x4U) \
|
||||||
|
V(X64I64x2Load32x2S) \
|
||||||
|
V(X64I64x2Load32x2U) \
|
||||||
V(X64S32x4Swizzle) \
|
V(X64S32x4Swizzle) \
|
||||||
V(X64S32x4Shuffle) \
|
V(X64S32x4Shuffle) \
|
||||||
V(X64S16x8Blend) \
|
V(X64S16x8Blend) \
|
||||||
|
@ -367,6 +367,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
|||||||
case kX64S64x2LoadSplat:
|
case kX64S64x2LoadSplat:
|
||||||
case kX64I16x8Load8x8S:
|
case kX64I16x8Load8x8S:
|
||||||
case kX64I16x8Load8x8U:
|
case kX64I16x8Load8x8U:
|
||||||
|
case kX64I32x4Load16x4S:
|
||||||
|
case kX64I32x4Load16x4U:
|
||||||
|
case kX64I64x2Load32x2S:
|
||||||
|
case kX64I64x2Load32x2U:
|
||||||
return instr->HasOutput() ? kIsLoadOperation : kHasSideEffect;
|
return instr->HasOutput() ? kIsLoadOperation : kHasSideEffect;
|
||||||
|
|
||||||
case kX64Peek:
|
case kX64Peek:
|
||||||
|
@ -348,6 +348,18 @@ void InstructionSelector::VisitLoadTransform(Node* node) {
|
|||||||
case LoadTransformation::kI16x8Load8x8U:
|
case LoadTransformation::kI16x8Load8x8U:
|
||||||
opcode = kX64I16x8Load8x8U;
|
opcode = kX64I16x8Load8x8U;
|
||||||
break;
|
break;
|
||||||
|
case LoadTransformation::kI32x4Load16x4S:
|
||||||
|
opcode = kX64I32x4Load16x4S;
|
||||||
|
break;
|
||||||
|
case LoadTransformation::kI32x4Load16x4U:
|
||||||
|
opcode = kX64I32x4Load16x4U;
|
||||||
|
break;
|
||||||
|
case LoadTransformation::kI64x2Load32x2S:
|
||||||
|
opcode = kX64I64x2Load32x2S;
|
||||||
|
break;
|
||||||
|
case LoadTransformation::kI64x2Load32x2U:
|
||||||
|
opcode = kX64I64x2Load32x2U;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
@ -62,6 +62,14 @@ std::ostream& operator<<(std::ostream& os, LoadTransformation rep) {
|
|||||||
return os << "kI16x8Load8x8S";
|
return os << "kI16x8Load8x8S";
|
||||||
case LoadTransformation::kI16x8Load8x8U:
|
case LoadTransformation::kI16x8Load8x8U:
|
||||||
return os << "kI16x8Load8x8U";
|
return os << "kI16x8Load8x8U";
|
||||||
|
case LoadTransformation::kI32x4Load16x4S:
|
||||||
|
return os << "kI32x4Load16x4S";
|
||||||
|
case LoadTransformation::kI32x4Load16x4U:
|
||||||
|
return os << "kI32x4Load16x4U";
|
||||||
|
case LoadTransformation::kI64x2Load32x2S:
|
||||||
|
return os << "kI64x2Load32x2S";
|
||||||
|
case LoadTransformation::kI64x2Load32x2U:
|
||||||
|
return os << "kI64x2Load32x2U";
|
||||||
}
|
}
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
@ -523,7 +531,11 @@ MachineType AtomicOpType(Operator const* op) {
|
|||||||
V(S32x4LoadSplat) \
|
V(S32x4LoadSplat) \
|
||||||
V(S64x2LoadSplat) \
|
V(S64x2LoadSplat) \
|
||||||
V(I16x8Load8x8S) \
|
V(I16x8Load8x8S) \
|
||||||
V(I16x8Load8x8U)
|
V(I16x8Load8x8U) \
|
||||||
|
V(I32x4Load16x4S) \
|
||||||
|
V(I32x4Load16x4U) \
|
||||||
|
V(I64x2Load32x2S) \
|
||||||
|
V(I64x2Load32x2U)
|
||||||
|
|
||||||
#define ATOMIC_U32_TYPE_LIST(V) \
|
#define ATOMIC_U32_TYPE_LIST(V) \
|
||||||
V(Uint8) \
|
V(Uint8) \
|
||||||
|
@ -67,6 +67,10 @@ enum class LoadTransformation {
|
|||||||
kS64x2LoadSplat,
|
kS64x2LoadSplat,
|
||||||
kI16x8Load8x8S,
|
kI16x8Load8x8S,
|
||||||
kI16x8Load8x8U,
|
kI16x8Load8x8U,
|
||||||
|
kI32x4Load16x4S,
|
||||||
|
kI32x4Load16x4U,
|
||||||
|
kI64x2Load32x2S,
|
||||||
|
kI64x2Load32x2U,
|
||||||
};
|
};
|
||||||
|
|
||||||
size_t hash_value(LoadTransformation);
|
size_t hash_value(LoadTransformation);
|
||||||
|
@ -3703,6 +3703,14 @@ LoadTransformation GetLoadTransformation(
|
|||||||
return LoadTransformation::kI16x8Load8x8S;
|
return LoadTransformation::kI16x8Load8x8S;
|
||||||
} else if (memtype == MachineType::Uint8()) {
|
} else if (memtype == MachineType::Uint8()) {
|
||||||
return LoadTransformation::kI16x8Load8x8U;
|
return LoadTransformation::kI16x8Load8x8U;
|
||||||
|
} else if (memtype == MachineType::Int16()) {
|
||||||
|
return LoadTransformation::kI32x4Load16x4S;
|
||||||
|
} else if (memtype == MachineType::Uint16()) {
|
||||||
|
return LoadTransformation::kI32x4Load16x4U;
|
||||||
|
} else if (memtype == MachineType::Int32()) {
|
||||||
|
return LoadTransformation::kI64x2Load32x2S;
|
||||||
|
} else if (memtype == MachineType::Uint32()) {
|
||||||
|
return LoadTransformation::kI64x2Load32x2U;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -2732,6 +2732,22 @@ class WasmFullDecoder : public WasmDecoder<validate> {
|
|||||||
len = DecodeLoadTransformMem(LoadType::kI32Load8U,
|
len = DecodeLoadTransformMem(LoadType::kI32Load8U,
|
||||||
LoadTransformationKind::kExtend);
|
LoadTransformationKind::kExtend);
|
||||||
break;
|
break;
|
||||||
|
case kExprI32x4Load16x4S:
|
||||||
|
len = DecodeLoadTransformMem(LoadType::kI32Load16S,
|
||||||
|
LoadTransformationKind::kExtend);
|
||||||
|
break;
|
||||||
|
case kExprI32x4Load16x4U:
|
||||||
|
len = DecodeLoadTransformMem(LoadType::kI32Load16U,
|
||||||
|
LoadTransformationKind::kExtend);
|
||||||
|
break;
|
||||||
|
case kExprI64x2Load32x2S:
|
||||||
|
len = DecodeLoadTransformMem(LoadType::kI64Load32S,
|
||||||
|
LoadTransformationKind::kExtend);
|
||||||
|
break;
|
||||||
|
case kExprI64x2Load32x2U:
|
||||||
|
len = DecodeLoadTransformMem(LoadType::kI64Load32U,
|
||||||
|
LoadTransformationKind::kExtend);
|
||||||
|
break;
|
||||||
default: {
|
default: {
|
||||||
FunctionSig* sig = WasmOpcodes::Signature(opcode);
|
FunctionSig* sig = WasmOpcodes::Signature(opcode);
|
||||||
if (!VALIDATE(sig != nullptr)) {
|
if (!VALIDATE(sig != nullptr)) {
|
||||||
|
@ -329,6 +329,10 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
|
|||||||
CASE_S64x2_OP(LoadSplat, "load_splat")
|
CASE_S64x2_OP(LoadSplat, "load_splat")
|
||||||
CASE_I16x8_OP(Load8x8S, "load8x8_s")
|
CASE_I16x8_OP(Load8x8S, "load8x8_s")
|
||||||
CASE_I16x8_OP(Load8x8U, "load8x8_u")
|
CASE_I16x8_OP(Load8x8U, "load8x8_u")
|
||||||
|
CASE_I32x4_OP(Load16x4S, "load16x4_s")
|
||||||
|
CASE_I32x4_OP(Load16x4U, "load16x4_u")
|
||||||
|
CASE_I64x2_OP(Load32x2S, "load32x2_s")
|
||||||
|
CASE_I64x2_OP(Load32x2U, "load32x2_u")
|
||||||
|
|
||||||
// Atomic operations.
|
// Atomic operations.
|
||||||
CASE_OP(AtomicNotify, "atomic.notify")
|
CASE_OP(AtomicNotify, "atomic.notify")
|
||||||
|
@ -441,6 +441,10 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, const WasmFeatures&);
|
|||||||
V(I32x4UConvertI16x8High, 0xfdd1, s_s) \
|
V(I32x4UConvertI16x8High, 0xfdd1, s_s) \
|
||||||
V(I16x8Load8x8S, 0xfdd2, s_s) \
|
V(I16x8Load8x8S, 0xfdd2, s_s) \
|
||||||
V(I16x8Load8x8U, 0xfdd3, s_s) \
|
V(I16x8Load8x8U, 0xfdd3, s_s) \
|
||||||
|
V(I32x4Load16x4S, 0xfdd4, s_s) \
|
||||||
|
V(I32x4Load16x4U, 0xfdd5, s_s) \
|
||||||
|
V(I64x2Load32x2S, 0xfdd6, s_s) \
|
||||||
|
V(I64x2Load32x2U, 0xfdd7, s_s) \
|
||||||
V(I16x8AddHoriz, 0xfdbd, s_ss) \
|
V(I16x8AddHoriz, 0xfdbd, s_ss) \
|
||||||
V(I32x4AddHoriz, 0xfdbe, s_ss) \
|
V(I32x4AddHoriz, 0xfdbe, s_ss) \
|
||||||
V(F32x4AddHoriz, 0xfdbf, s_ss) \
|
V(F32x4AddHoriz, 0xfdbf, s_ss) \
|
||||||
|
@ -335,11 +335,21 @@ constexpr Vector<const int16_t> ValueHelper::GetVector() {
|
|||||||
return int16_vector();
|
return int16_vector();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
constexpr Vector<const uint16_t> ValueHelper::GetVector() {
|
||||||
|
return uint16_vector();
|
||||||
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
constexpr Vector<const int32_t> ValueHelper::GetVector() {
|
constexpr Vector<const int32_t> ValueHelper::GetVector() {
|
||||||
return int32_vector();
|
return int32_vector();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
constexpr Vector<const uint32_t> ValueHelper::GetVector() {
|
||||||
|
return uint32_vector();
|
||||||
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
constexpr Vector<const int64_t> ValueHelper::GetVector() {
|
constexpr Vector<const int64_t> ValueHelper::GetVector() {
|
||||||
return int64_vector();
|
return int64_vector();
|
||||||
|
@ -3352,6 +3352,26 @@ WASM_SIMD_TEST_NO_LOWERING(I16x8Load8x8S) {
|
|||||||
RunLoadExtendTest<int8_t, int16_t>(execution_tier, lower_simd,
|
RunLoadExtendTest<int8_t, int16_t>(execution_tier, lower_simd,
|
||||||
kExprI16x8Load8x8S);
|
kExprI16x8Load8x8S);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
WASM_SIMD_TEST_NO_LOWERING(I32x4Load16x4U) {
|
||||||
|
RunLoadExtendTest<uint16_t, uint32_t>(execution_tier, lower_simd,
|
||||||
|
kExprI32x4Load16x4U);
|
||||||
|
}
|
||||||
|
|
||||||
|
WASM_SIMD_TEST_NO_LOWERING(I32x4Load16x4S) {
|
||||||
|
RunLoadExtendTest<int16_t, int32_t>(execution_tier, lower_simd,
|
||||||
|
kExprI32x4Load16x4S);
|
||||||
|
}
|
||||||
|
|
||||||
|
WASM_SIMD_TEST_NO_LOWERING(I64x2Load32x2U) {
|
||||||
|
RunLoadExtendTest<uint32_t, uint64_t>(execution_tier, lower_simd,
|
||||||
|
kExprI64x2Load32x2U);
|
||||||
|
}
|
||||||
|
|
||||||
|
WASM_SIMD_TEST_NO_LOWERING(I64x2Load32x2S) {
|
||||||
|
RunLoadExtendTest<int32_t, int64_t>(execution_tier, lower_simd,
|
||||||
|
kExprI64x2Load32x2S);
|
||||||
|
}
|
||||||
#endif // V8_TARGET_ARCH_X64
|
#endif // V8_TARGET_ARCH_X64
|
||||||
|
|
||||||
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64 || \
|
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64 || \
|
||||||
|
Loading…
Reference in New Issue
Block a user