[wasm-simd] Implement load extend with 4 and 8 lanes on IA32
This CL implements 4 of the 6 load extend operations. The added opcodes include: I16x8Load8x8S, I16x8Load8x8U, I32x4Load16x4S, I32x4Load16x4U. Bug: v8:9886 Change-Id: I9961f97325168e3a0036e1b282b769cc65b06ffb Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1981329 Commit-Queue: Zhiguo Zhou <zhiguo.zhou@intel.com> Reviewed-by: Deepti Gandluri <gdeepti@chromium.org> Reviewed-by: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#65743}
This commit is contained in:
parent
8d511cbd20
commit
4648b83c7a
@ -3734,6 +3734,22 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ mov(esp, tmp);
|
||||
break;
|
||||
}
|
||||
case kIA32I16x8Load8x8S: {
|
||||
__ Pmovsxbw(i.OutputSimd128Register(), i.MemoryOperand());
|
||||
break;
|
||||
}
|
||||
case kIA32I16x8Load8x8U: {
|
||||
__ Pmovzxbw(i.OutputSimd128Register(), i.MemoryOperand());
|
||||
break;
|
||||
}
|
||||
case kIA32I32x4Load16x4S: {
|
||||
__ Pmovsxwd(i.OutputSimd128Register(), i.MemoryOperand());
|
||||
break;
|
||||
}
|
||||
case kIA32I32x4Load16x4U: {
|
||||
__ Pmovzxwd(i.OutputSimd128Register(), i.MemoryOperand());
|
||||
break;
|
||||
}
|
||||
case kIA32S32x4Swizzle: {
|
||||
DCHECK_EQ(2, instr->InputCount());
|
||||
__ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), i.InputInt8(1));
|
||||
|
@ -347,6 +347,10 @@ namespace compiler {
|
||||
V(AVXS128Select) \
|
||||
V(IA32S8x16Swizzle) \
|
||||
V(IA32S8x16Shuffle) \
|
||||
V(IA32I16x8Load8x8S) \
|
||||
V(IA32I16x8Load8x8U) \
|
||||
V(IA32I32x4Load16x4S) \
|
||||
V(IA32I32x4Load16x4U) \
|
||||
V(IA32S32x4Swizzle) \
|
||||
V(IA32S32x4Shuffle) \
|
||||
V(IA32S16x8Blend) \
|
||||
|
@ -389,6 +389,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kIA32Movsd:
|
||||
case kIA32Movdqu:
|
||||
// Moves are used for memory load/store operations.
|
||||
case kIA32I16x8Load8x8S:
|
||||
case kIA32I16x8Load8x8U:
|
||||
case kIA32I32x4Load16x4S:
|
||||
case kIA32I32x4Load16x4U:
|
||||
return instr->HasOutput() ? kIsLoadOperation : kHasSideEffect;
|
||||
|
||||
case kIA32Peek:
|
||||
|
@ -336,6 +336,62 @@ void InstructionSelector::VisitAbortCSAAssert(Node* node) {
|
||||
Emit(kArchAbortCSAAssert, g.NoOutput(), g.UseFixed(node->InputAt(0), edx));
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitLoadTransform(Node* node) {
|
||||
LoadTransformParameters params = LoadTransformParametersOf(node->op());
|
||||
InstructionCode opcode = kArchNop;
|
||||
switch (params.transformation) {
|
||||
case LoadTransformation::kS8x16LoadSplat:
|
||||
// TODO(zhiguo.zhou@intel.com): Implement the rest of load splat and load
|
||||
// extend operations.
|
||||
UNIMPLEMENTED();
|
||||
break;
|
||||
case LoadTransformation::kS16x8LoadSplat:
|
||||
UNIMPLEMENTED();
|
||||
break;
|
||||
case LoadTransformation::kS32x4LoadSplat:
|
||||
UNIMPLEMENTED();
|
||||
break;
|
||||
case LoadTransformation::kS64x2LoadSplat:
|
||||
UNIMPLEMENTED();
|
||||
break;
|
||||
case LoadTransformation::kI16x8Load8x8S:
|
||||
opcode = kIA32I16x8Load8x8S;
|
||||
break;
|
||||
case LoadTransformation::kI16x8Load8x8U:
|
||||
opcode = kIA32I16x8Load8x8U;
|
||||
break;
|
||||
case LoadTransformation::kI32x4Load16x4S:
|
||||
opcode = kIA32I32x4Load16x4S;
|
||||
break;
|
||||
case LoadTransformation::kI32x4Load16x4U:
|
||||
opcode = kIA32I32x4Load16x4U;
|
||||
break;
|
||||
case LoadTransformation::kI64x2Load32x2S:
|
||||
UNIMPLEMENTED();
|
||||
break;
|
||||
case LoadTransformation::kI64x2Load32x2U:
|
||||
UNIMPLEMENTED();
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
// IA32 supports unaligned loads.
|
||||
DCHECK_NE(params.kind, LoadKind::kUnaligned);
|
||||
// Trap handler is not supported on IA32.
|
||||
DCHECK_NE(params.kind, LoadKind::kProtected);
|
||||
|
||||
IA32OperandGenerator g(this);
|
||||
InstructionOperand outputs[1];
|
||||
outputs[0] = g.DefineAsRegister(node);
|
||||
InstructionOperand inputs[3];
|
||||
size_t input_count = 0;
|
||||
AddressingMode mode =
|
||||
g.GetEffectiveAddressMemoryOperand(node, inputs, &input_count);
|
||||
InstructionCode code = opcode | AddressingModeField::encode(mode);
|
||||
Emit(code, 1, outputs, input_count, inputs);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitLoad(Node* node) {
|
||||
LoadRepresentation load_rep = LoadRepresentationOf(node->op());
|
||||
|
||||
|
@ -2632,9 +2632,6 @@ void InstructionSelector::VisitF64x2UConvertI64x2(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
void InstructionSelector::VisitS128AndNot(Node* node) { UNIMPLEMENTED(); }
|
||||
#if !V8_TARGET_ARCH_ARM
|
||||
void InstructionSelector::VisitLoadTransform(Node* node) { UNIMPLEMENTED(); }
|
||||
#endif // !V8_TARGET_ARCH_ARM
|
||||
#if !V8_TARGET_ARCH_IA32
|
||||
void InstructionSelector::VisitI64x2Mul(Node* node) { UNIMPLEMENTED(); }
|
||||
#endif // !V8_TARGET_ARCH_IA32
|
||||
|
@ -3310,7 +3310,9 @@ WASM_SIMD_TEST(SimdLoadStoreLoadMemargOffset) {
|
||||
}
|
||||
}
|
||||
|
||||
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM
|
||||
#if !V8_TARGET_ARCH_IA32
|
||||
// TODO(zhiguo.zhou@intel.com): Add the tests on IA32 once these operations are
|
||||
// implemented.
|
||||
template <typename T>
|
||||
void RunLoadSplatTest(ExecutionTier execution_tier, LowerSimd lower_simd,
|
||||
WasmOpcode op) {
|
||||
@ -3347,6 +3349,7 @@ WASM_SIMD_TEST_NO_LOWERING(S32x4LoadSplat) {
|
||||
WASM_SIMD_TEST_NO_LOWERING(S64x2LoadSplat) {
|
||||
RunLoadSplatTest<int64_t>(execution_tier, lower_simd, kExprS64x2LoadSplat);
|
||||
}
|
||||
#endif // !V8_TARGET_ARCH_IA32
|
||||
|
||||
template <typename S, typename T>
|
||||
void RunLoadExtendTest(ExecutionTier execution_tier, LowerSimd lower_simd,
|
||||
@ -3391,6 +3394,7 @@ WASM_SIMD_TEST_NO_LOWERING(I32x4Load16x4S) {
|
||||
kExprI32x4Load16x4S);
|
||||
}
|
||||
|
||||
#if !V8_TARGET_ARCH_IA32
|
||||
WASM_SIMD_TEST_NO_LOWERING(I64x2Load32x2U) {
|
||||
RunLoadExtendTest<uint32_t, uint64_t>(execution_tier, lower_simd,
|
||||
kExprI64x2Load32x2U);
|
||||
@ -3400,7 +3404,7 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2Load32x2S) {
|
||||
RunLoadExtendTest<int32_t, int64_t>(execution_tier, lower_simd,
|
||||
kExprI64x2Load32x2S);
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM
|
||||
#endif // !V8_TARGET_ARCH_IA32
|
||||
|
||||
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64 || \
|
||||
V8_TARGET_ARCH_ARM
|
||||
|
Loading…
Reference in New Issue
Block a user