[wasm-simd] Implement remaining load_extend for x64

This implements the rest of the load extend instructions:

- i32x4.load16x4_s
- i32x4.load16x4_u
- i64x2.load32x2_s
- i64x2.load32x2_u

Bug: v8:9886
Change-Id: I4649f77bae5224042a1628d9f0498c050b1e599d
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1903812
Reviewed-by: Michael Starzinger <mstarzinger@chromium.org>
Reviewed-by: Bill Budge <bbudge@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#65017}
This commit is contained in:
Ng Zhi An 2019-11-15 10:41:44 -08:00 committed by Commit Bot
parent 88a2d01148
commit 461b98f3e0
13 changed files with 127 additions and 1 deletions

View File

@ -118,9 +118,11 @@
V(ptest, 66, 0F, 38, 17) \
V(pmovsxbw, 66, 0F, 38, 20) \
V(pmovsxwd, 66, 0F, 38, 23) \
V(pmovsxdq, 66, 0F, 38, 25) \
V(packusdw, 66, 0F, 38, 2B) \
V(pmovzxbw, 66, 0F, 38, 30) \
V(pmovzxwd, 66, 0F, 38, 33) \
V(pmovzxdq, 66, 0F, 38, 35) \
V(pminsb, 66, 0F, 38, 38) \
V(pminsd, 66, 0F, 38, 39) \
V(pminuw, 66, 0F, 38, 3A) \

View File

@ -3718,15 +3718,41 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64I16x8Load8x8S: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
__ pmovsxbw(i.OutputSimd128Register(), i.MemoryOperand());
break;
}
case kX64I16x8Load8x8U: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
__ pmovzxbw(i.OutputSimd128Register(), i.MemoryOperand());
break;
}
case kX64I32x4Load16x4S: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
__ pmovsxwd(i.OutputSimd128Register(), i.MemoryOperand());
break;
}
case kX64I32x4Load16x4U: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
__ pmovzxwd(i.OutputSimd128Register(), i.MemoryOperand());
break;
}
case kX64I64x2Load32x2S: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
__ pmovsxdq(i.OutputSimd128Register(), i.MemoryOperand());
break;
}
case kX64I64x2Load32x2U: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
__ pmovzxdq(i.OutputSimd128Register(), i.MemoryOperand());
break;
}
case kX64S32x4Swizzle: {
DCHECK_EQ(2, instr->InputCount());
ASSEMBLE_SIMD_IMM_INSTR(pshufd, i.OutputSimd128Register(), 0,

View File

@ -316,6 +316,10 @@ namespace compiler {
V(X64S64x2LoadSplat) \
V(X64I16x8Load8x8S) \
V(X64I16x8Load8x8U) \
V(X64I32x4Load16x4S) \
V(X64I32x4Load16x4U) \
V(X64I64x2Load32x2S) \
V(X64I64x2Load32x2U) \
V(X64S32x4Swizzle) \
V(X64S32x4Shuffle) \
V(X64S16x8Blend) \

View File

@ -367,6 +367,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64S64x2LoadSplat:
case kX64I16x8Load8x8S:
case kX64I16x8Load8x8U:
case kX64I32x4Load16x4S:
case kX64I32x4Load16x4U:
case kX64I64x2Load32x2S:
case kX64I64x2Load32x2U:
return instr->HasOutput() ? kIsLoadOperation : kHasSideEffect;
case kX64Peek:

View File

@ -348,6 +348,18 @@ void InstructionSelector::VisitLoadTransform(Node* node) {
case LoadTransformation::kI16x8Load8x8U:
opcode = kX64I16x8Load8x8U;
break;
case LoadTransformation::kI32x4Load16x4S:
opcode = kX64I32x4Load16x4S;
break;
case LoadTransformation::kI32x4Load16x4U:
opcode = kX64I32x4Load16x4U;
break;
case LoadTransformation::kI64x2Load32x2S:
opcode = kX64I64x2Load32x2S;
break;
case LoadTransformation::kI64x2Load32x2U:
opcode = kX64I64x2Load32x2U;
break;
default:
UNREACHABLE();
}

View File

@ -62,6 +62,14 @@ std::ostream& operator<<(std::ostream& os, LoadTransformation rep) {
return os << "kI16x8Load8x8S";
case LoadTransformation::kI16x8Load8x8U:
return os << "kI16x8Load8x8U";
case LoadTransformation::kI32x4Load16x4S:
return os << "kI32x4Load16x4S";
case LoadTransformation::kI32x4Load16x4U:
return os << "kI32x4Load16x4U";
case LoadTransformation::kI64x2Load32x2S:
return os << "kI64x2Load32x2S";
case LoadTransformation::kI64x2Load32x2U:
return os << "kI64x2Load32x2U";
}
UNREACHABLE();
}
@ -523,7 +531,11 @@ MachineType AtomicOpType(Operator const* op) {
V(S32x4LoadSplat) \
V(S64x2LoadSplat) \
V(I16x8Load8x8S) \
V(I16x8Load8x8U)
V(I16x8Load8x8U) \
V(I32x4Load16x4S) \
V(I32x4Load16x4U) \
V(I64x2Load32x2S) \
V(I64x2Load32x2U)
#define ATOMIC_U32_TYPE_LIST(V) \
V(Uint8) \

View File

@ -67,6 +67,10 @@ enum class LoadTransformation {
kS64x2LoadSplat,
kI16x8Load8x8S,
kI16x8Load8x8U,
kI32x4Load16x4S,
kI32x4Load16x4U,
kI64x2Load32x2S,
kI64x2Load32x2U,
};
size_t hash_value(LoadTransformation);

View File

@ -3703,6 +3703,14 @@ LoadTransformation GetLoadTransformation(
return LoadTransformation::kI16x8Load8x8S;
} else if (memtype == MachineType::Uint8()) {
return LoadTransformation::kI16x8Load8x8U;
} else if (memtype == MachineType::Int16()) {
return LoadTransformation::kI32x4Load16x4S;
} else if (memtype == MachineType::Uint16()) {
return LoadTransformation::kI32x4Load16x4U;
} else if (memtype == MachineType::Int32()) {
return LoadTransformation::kI64x2Load32x2S;
} else if (memtype == MachineType::Uint32()) {
return LoadTransformation::kI64x2Load32x2U;
}
break;
}

View File

@ -2732,6 +2732,22 @@ class WasmFullDecoder : public WasmDecoder<validate> {
len = DecodeLoadTransformMem(LoadType::kI32Load8U,
LoadTransformationKind::kExtend);
break;
case kExprI32x4Load16x4S:
len = DecodeLoadTransformMem(LoadType::kI32Load16S,
LoadTransformationKind::kExtend);
break;
case kExprI32x4Load16x4U:
len = DecodeLoadTransformMem(LoadType::kI32Load16U,
LoadTransformationKind::kExtend);
break;
case kExprI64x2Load32x2S:
len = DecodeLoadTransformMem(LoadType::kI64Load32S,
LoadTransformationKind::kExtend);
break;
case kExprI64x2Load32x2U:
len = DecodeLoadTransformMem(LoadType::kI64Load32U,
LoadTransformationKind::kExtend);
break;
default: {
FunctionSig* sig = WasmOpcodes::Signature(opcode);
if (!VALIDATE(sig != nullptr)) {

View File

@ -329,6 +329,10 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_S64x2_OP(LoadSplat, "load_splat")
CASE_I16x8_OP(Load8x8S, "load8x8_s")
CASE_I16x8_OP(Load8x8U, "load8x8_u")
CASE_I32x4_OP(Load16x4S, "load16x4_s")
CASE_I32x4_OP(Load16x4U, "load16x4_u")
CASE_I64x2_OP(Load32x2S, "load32x2_s")
CASE_I64x2_OP(Load32x2U, "load32x2_u")
// Atomic operations.
CASE_OP(AtomicNotify, "atomic.notify")

View File

@ -441,6 +441,10 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, const WasmFeatures&);
V(I32x4UConvertI16x8High, 0xfdd1, s_s) \
V(I16x8Load8x8S, 0xfdd2, s_s) \
V(I16x8Load8x8U, 0xfdd3, s_s) \
V(I32x4Load16x4S, 0xfdd4, s_s) \
V(I32x4Load16x4U, 0xfdd5, s_s) \
V(I64x2Load32x2S, 0xfdd6, s_s) \
V(I64x2Load32x2U, 0xfdd7, s_s) \
V(I16x8AddHoriz, 0xfdbd, s_ss) \
V(I32x4AddHoriz, 0xfdbe, s_ss) \
V(F32x4AddHoriz, 0xfdbf, s_ss) \

View File

@ -335,11 +335,21 @@ constexpr Vector<const int16_t> ValueHelper::GetVector() {
return int16_vector();
}
template <>
constexpr Vector<const uint16_t> ValueHelper::GetVector() {
return uint16_vector();
}
template <>
constexpr Vector<const int32_t> ValueHelper::GetVector() {
return int32_vector();
}
template <>
constexpr Vector<const uint32_t> ValueHelper::GetVector() {
return uint32_vector();
}
template <>
constexpr Vector<const int64_t> ValueHelper::GetVector() {
return int64_vector();

View File

@ -3352,6 +3352,26 @@ WASM_SIMD_TEST_NO_LOWERING(I16x8Load8x8S) {
RunLoadExtendTest<int8_t, int16_t>(execution_tier, lower_simd,
kExprI16x8Load8x8S);
}
WASM_SIMD_TEST_NO_LOWERING(I32x4Load16x4U) {
RunLoadExtendTest<uint16_t, uint32_t>(execution_tier, lower_simd,
kExprI32x4Load16x4U);
}
WASM_SIMD_TEST_NO_LOWERING(I32x4Load16x4S) {
RunLoadExtendTest<int16_t, int32_t>(execution_tier, lower_simd,
kExprI32x4Load16x4S);
}
WASM_SIMD_TEST_NO_LOWERING(I64x2Load32x2U) {
RunLoadExtendTest<uint32_t, uint64_t>(execution_tier, lower_simd,
kExprI64x2Load32x2U);
}
WASM_SIMD_TEST_NO_LOWERING(I64x2Load32x2S) {
RunLoadExtendTest<int32_t, int64_t>(execution_tier, lower_simd,
kExprI64x2Load32x2S);
}
#endif // V8_TARGET_ARCH_X64
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64 || \