[wasm-simd] Implement load extend with 4 and 8 lanes on IA32

This CL implements 4 of the 6 load extend operations. The added
opcodes include: I16x8Load8x8S, I16x8Load8x8U, I32x4Load16x4S,
I32x4Load16x4U.

Bug: v8:9886
Change-Id: I9961f97325168e3a0036e1b282b769cc65b06ffb
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1981329
Commit-Queue: Zhiguo Zhou <zhiguo.zhou@intel.com>
Reviewed-by: Deepti Gandluri <gdeepti@chromium.org>
Reviewed-by: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#65743}
This commit is contained in:
Zhou, Zhiguo 2020-01-14 09:25:16 +08:00 committed by Commit Bot
parent 8d511cbd20
commit 4648b83c7a
6 changed files with 87 additions and 6 deletions

View File

@ -3734,6 +3734,22 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ mov(esp, tmp); __ mov(esp, tmp);
break; break;
} }
case kIA32I16x8Load8x8S: {
__ Pmovsxbw(i.OutputSimd128Register(), i.MemoryOperand());
break;
}
case kIA32I16x8Load8x8U: {
__ Pmovzxbw(i.OutputSimd128Register(), i.MemoryOperand());
break;
}
case kIA32I32x4Load16x4S: {
__ Pmovsxwd(i.OutputSimd128Register(), i.MemoryOperand());
break;
}
case kIA32I32x4Load16x4U: {
__ Pmovzxwd(i.OutputSimd128Register(), i.MemoryOperand());
break;
}
case kIA32S32x4Swizzle: { case kIA32S32x4Swizzle: {
DCHECK_EQ(2, instr->InputCount()); DCHECK_EQ(2, instr->InputCount());
__ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), i.InputInt8(1)); __ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), i.InputInt8(1));

View File

@ -347,6 +347,10 @@ namespace compiler {
V(AVXS128Select) \ V(AVXS128Select) \
V(IA32S8x16Swizzle) \ V(IA32S8x16Swizzle) \
V(IA32S8x16Shuffle) \ V(IA32S8x16Shuffle) \
V(IA32I16x8Load8x8S) \
V(IA32I16x8Load8x8U) \
V(IA32I32x4Load16x4S) \
V(IA32I32x4Load16x4U) \
V(IA32S32x4Swizzle) \ V(IA32S32x4Swizzle) \
V(IA32S32x4Shuffle) \ V(IA32S32x4Shuffle) \
V(IA32S16x8Blend) \ V(IA32S16x8Blend) \

View File

@ -388,7 +388,11 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32Movss: case kIA32Movss:
case kIA32Movsd: case kIA32Movsd:
case kIA32Movdqu: case kIA32Movdqu:
// Moves are used for memory load/store operations. // Moves are used for memory load/store operations.
case kIA32I16x8Load8x8S:
case kIA32I16x8Load8x8U:
case kIA32I32x4Load16x4S:
case kIA32I32x4Load16x4U:
return instr->HasOutput() ? kIsLoadOperation : kHasSideEffect; return instr->HasOutput() ? kIsLoadOperation : kHasSideEffect;
case kIA32Peek: case kIA32Peek:

View File

@ -336,6 +336,62 @@ void InstructionSelector::VisitAbortCSAAssert(Node* node) {
Emit(kArchAbortCSAAssert, g.NoOutput(), g.UseFixed(node->InputAt(0), edx)); Emit(kArchAbortCSAAssert, g.NoOutput(), g.UseFixed(node->InputAt(0), edx));
} }
void InstructionSelector::VisitLoadTransform(Node* node) {
LoadTransformParameters params = LoadTransformParametersOf(node->op());
InstructionCode opcode = kArchNop;
switch (params.transformation) {
case LoadTransformation::kS8x16LoadSplat:
// TODO(zhiguo.zhou@intel.com): Implement the rest of load splat and load
// extend operations.
UNIMPLEMENTED();
break;
case LoadTransformation::kS16x8LoadSplat:
UNIMPLEMENTED();
break;
case LoadTransformation::kS32x4LoadSplat:
UNIMPLEMENTED();
break;
case LoadTransformation::kS64x2LoadSplat:
UNIMPLEMENTED();
break;
case LoadTransformation::kI16x8Load8x8S:
opcode = kIA32I16x8Load8x8S;
break;
case LoadTransformation::kI16x8Load8x8U:
opcode = kIA32I16x8Load8x8U;
break;
case LoadTransformation::kI32x4Load16x4S:
opcode = kIA32I32x4Load16x4S;
break;
case LoadTransformation::kI32x4Load16x4U:
opcode = kIA32I32x4Load16x4U;
break;
case LoadTransformation::kI64x2Load32x2S:
UNIMPLEMENTED();
break;
case LoadTransformation::kI64x2Load32x2U:
UNIMPLEMENTED();
break;
default:
UNREACHABLE();
}
// IA32 supports unaligned loads.
DCHECK_NE(params.kind, LoadKind::kUnaligned);
// Trap handler is not supported on IA32.
DCHECK_NE(params.kind, LoadKind::kProtected);
IA32OperandGenerator g(this);
InstructionOperand outputs[1];
outputs[0] = g.DefineAsRegister(node);
InstructionOperand inputs[3];
size_t input_count = 0;
AddressingMode mode =
g.GetEffectiveAddressMemoryOperand(node, inputs, &input_count);
InstructionCode code = opcode | AddressingModeField::encode(mode);
Emit(code, 1, outputs, input_count, inputs);
}
void InstructionSelector::VisitLoad(Node* node) { void InstructionSelector::VisitLoad(Node* node) {
LoadRepresentation load_rep = LoadRepresentationOf(node->op()); LoadRepresentation load_rep = LoadRepresentationOf(node->op());

View File

@ -2632,9 +2632,6 @@ void InstructionSelector::VisitF64x2UConvertI64x2(Node* node) {
UNIMPLEMENTED(); UNIMPLEMENTED();
} }
void InstructionSelector::VisitS128AndNot(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitS128AndNot(Node* node) { UNIMPLEMENTED(); }
#if !V8_TARGET_ARCH_ARM
void InstructionSelector::VisitLoadTransform(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM
#if !V8_TARGET_ARCH_IA32 #if !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitI64x2Mul(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2Mul(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_IA32 #endif // !V8_TARGET_ARCH_IA32

View File

@ -3310,7 +3310,9 @@ WASM_SIMD_TEST(SimdLoadStoreLoadMemargOffset) {
} }
} }
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM #if !V8_TARGET_ARCH_IA32
// TODO(zhiguo.zhou@intel.com): Add the tests on IA32 once these operations are
// implemented.
template <typename T> template <typename T>
void RunLoadSplatTest(ExecutionTier execution_tier, LowerSimd lower_simd, void RunLoadSplatTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode op) { WasmOpcode op) {
@ -3347,6 +3349,7 @@ WASM_SIMD_TEST_NO_LOWERING(S32x4LoadSplat) {
WASM_SIMD_TEST_NO_LOWERING(S64x2LoadSplat) { WASM_SIMD_TEST_NO_LOWERING(S64x2LoadSplat) {
RunLoadSplatTest<int64_t>(execution_tier, lower_simd, kExprS64x2LoadSplat); RunLoadSplatTest<int64_t>(execution_tier, lower_simd, kExprS64x2LoadSplat);
} }
#endif // !V8_TARGET_ARCH_IA32
template <typename S, typename T> template <typename S, typename T>
void RunLoadExtendTest(ExecutionTier execution_tier, LowerSimd lower_simd, void RunLoadExtendTest(ExecutionTier execution_tier, LowerSimd lower_simd,
@ -3391,6 +3394,7 @@ WASM_SIMD_TEST_NO_LOWERING(I32x4Load16x4S) {
kExprI32x4Load16x4S); kExprI32x4Load16x4S);
} }
#if !V8_TARGET_ARCH_IA32
WASM_SIMD_TEST_NO_LOWERING(I64x2Load32x2U) { WASM_SIMD_TEST_NO_LOWERING(I64x2Load32x2U) {
RunLoadExtendTest<uint32_t, uint64_t>(execution_tier, lower_simd, RunLoadExtendTest<uint32_t, uint64_t>(execution_tier, lower_simd,
kExprI64x2Load32x2U); kExprI64x2Load32x2U);
@ -3400,7 +3404,7 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2Load32x2S) {
RunLoadExtendTest<int32_t, int64_t>(execution_tier, lower_simd, RunLoadExtendTest<int32_t, int64_t>(execution_tier, lower_simd,
kExprI64x2Load32x2S); kExprI64x2Load32x2S);
} }
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM #endif // !V8_TARGET_ARCH_IA32
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64 || \ #if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64 || \
V8_TARGET_ARCH_ARM V8_TARGET_ARCH_ARM