[wasm] Add F32x4{Abs, Neg, AddHoriz}
- Remove redundant instruction from I16x8Splat - Force F32x4Splat to use movss, as using MacroAssembler can mix SSE/AVX instructions Bug: v8:6020 Change-Id: I781c22adecf892a79b6a38c3d83fc4022f9067de Reviewed-on: https://chromium-review.googlesource.com/898429 Reviewed-by: Bill Budge <bbudge@chromium.org> Reviewed-by: Jaroslav Sevcik <jarin@chromium.org> Commit-Queue: Deepti Gandluri <gdeepti@chromium.org> Cr-Commit-Position: refs/heads/master@{#51123}
This commit is contained in:
parent
cf9b487355
commit
3363e51958
@ -2131,33 +2131,15 @@ void InstructionSelector::VisitF32x4UConvertI32x4(Node* node) {
|
||||
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
|
||||
// && !V8_TARGET_ARCH_MIPS64
|
||||
|
||||
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
|
||||
!V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
|
||||
void InstructionSelector::VisitF32x4Abs(Node* node) { UNIMPLEMENTED(); }
|
||||
|
||||
void InstructionSelector::VisitF32x4Neg(Node* node) { UNIMPLEMENTED(); }
|
||||
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
|
||||
// && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
|
||||
|
||||
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
|
||||
!V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_X64
|
||||
void InstructionSelector::VisitF32x4RecipSqrtApprox(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
|
||||
// && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_X64
|
||||
|
||||
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
|
||||
!V8_TARGET_ARCH_MIPS64
|
||||
void InstructionSelector::VisitF32x4AddHoriz(Node* node) { UNIMPLEMENTED(); }
|
||||
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
|
||||
// && !V8_TARGET_ARCH_MIPS64
|
||||
|
||||
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
|
||||
!V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_X64
|
||||
void InstructionSelector::VisitF32x4RecipApprox(Node* node) { UNIMPLEMENTED(); }
|
||||
|
||||
void InstructionSelector::VisitI32x4AddHoriz(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitF32x4RecipSqrtApprox(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitF32x4AddHoriz(Node* node) { UNIMPLEMENTED(); }
|
||||
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64
|
||||
// && !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64
|
||||
|
||||
@ -2170,7 +2152,17 @@ void InstructionSelector::VisitI32x4SConvertF32x4(Node* node) {
|
||||
void InstructionSelector::VisitI32x4UConvertF32x4(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
|
||||
// && !V8_TARGET_ARCH_MIPS64
|
||||
|
||||
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
|
||||
!V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_X64
|
||||
void InstructionSelector::VisitI32x4AddHoriz(Node* node) { UNIMPLEMENTED(); }
|
||||
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
|
||||
// && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_X64
|
||||
|
||||
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
|
||||
!V8_TARGET_ARCH_MIPS64
|
||||
void InstructionSelector::VisitI32x4SConvertI16x8Low(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
@ -2066,9 +2066,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
case kX64F32x4Splat: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
if (instr->InputAt(0)->IsFPRegister()) {
|
||||
__ Movss(dst, i.InputDoubleRegister(0));
|
||||
__ movss(dst, i.InputDoubleRegister(0));
|
||||
} else {
|
||||
__ Movss(dst, i.InputOperand(0));
|
||||
__ movss(dst, i.InputOperand(0));
|
||||
}
|
||||
__ shufps(dst, dst, 0x0);
|
||||
break;
|
||||
@ -2087,6 +2087,34 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ insertps(i.OutputSimd128Register(), i.InputDoubleRegister(2), select);
|
||||
break;
|
||||
}
|
||||
case kX64F32x4Abs: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
XMMRegister src = i.InputSimd128Register(0);
|
||||
if (dst == src) {
|
||||
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ psrld(kScratchDoubleReg, 1);
|
||||
__ andps(i.OutputSimd128Register(), kScratchDoubleReg);
|
||||
} else {
|
||||
__ pcmpeqd(dst, dst);
|
||||
__ psrld(dst, 1);
|
||||
__ andps(dst, i.InputSimd128Register(0));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kX64F32x4Neg: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
XMMRegister src = i.InputSimd128Register(0);
|
||||
if (dst == src) {
|
||||
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ pslld(kScratchDoubleReg, 31);
|
||||
__ xorps(i.OutputSimd128Register(), kScratchDoubleReg);
|
||||
} else {
|
||||
__ pcmpeqd(dst, dst);
|
||||
__ pslld(dst, 31);
|
||||
__ xorps(dst, i.InputSimd128Register(0));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kX64F32x4RecipApprox: {
|
||||
__ rcpps(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
break;
|
||||
@ -2100,6 +2128,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ addps(i.OutputSimd128Register(), i.InputSimd128Register(1));
|
||||
break;
|
||||
}
|
||||
case kX64F32x4AddHoriz: {
|
||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
__ haddps(i.OutputSimd128Register(), i.InputSimd128Register(1));
|
||||
break;
|
||||
}
|
||||
case kX64F32x4Sub: {
|
||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
__ subps(i.OutputSimd128Register(), i.InputSimd128Register(1));
|
||||
@ -2273,7 +2306,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
__ movd(dst, i.InputRegister(0));
|
||||
__ pshuflw(dst, dst, 0x0);
|
||||
__ pshufhw(dst, dst, 0x0);
|
||||
__ pshufd(dst, dst, 0x0);
|
||||
break;
|
||||
}
|
||||
|
@ -149,9 +149,12 @@ namespace compiler {
|
||||
V(X64F32x4Splat) \
|
||||
V(X64F32x4ExtractLane) \
|
||||
V(X64F32x4ReplaceLane) \
|
||||
V(X64F32x4Abs) \
|
||||
V(X64F32x4Neg) \
|
||||
V(X64F32x4RecipApprox) \
|
||||
V(X64F32x4RecipSqrtApprox) \
|
||||
V(X64F32x4Add) \
|
||||
V(X64F32x4AddHoriz) \
|
||||
V(X64F32x4Sub) \
|
||||
V(X64F32x4Mul) \
|
||||
V(X64F32x4Min) \
|
||||
|
@ -128,7 +128,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kX64F32x4ReplaceLane:
|
||||
case kX64F32x4RecipApprox:
|
||||
case kX64F32x4RecipSqrtApprox:
|
||||
case kX64F32x4Abs:
|
||||
case kX64F32x4Neg:
|
||||
case kX64F32x4Add:
|
||||
case kX64F32x4AddHoriz:
|
||||
case kX64F32x4Sub:
|
||||
case kX64F32x4Mul:
|
||||
case kX64F32x4Min:
|
||||
|
@ -2374,6 +2374,7 @@ VISIT_ATOMIC_BINOP(Xor)
|
||||
|
||||
#define SIMD_BINOP_LIST(V) \
|
||||
V(F32x4Add) \
|
||||
V(F32x4AddHoriz) \
|
||||
V(F32x4Sub) \
|
||||
V(F32x4Mul) \
|
||||
V(F32x4Min) \
|
||||
@ -2435,6 +2436,8 @@ VISIT_ATOMIC_BINOP(Xor)
|
||||
V(S128Xor)
|
||||
|
||||
#define SIMD_UNOP_LIST(V) \
|
||||
V(F32x4Abs) \
|
||||
V(F32x4Neg) \
|
||||
V(F32x4RecipApprox) \
|
||||
V(F32x4RecipSqrtApprox) \
|
||||
V(I32x4Neg) \
|
||||
|
@ -494,16 +494,12 @@ void RunF32x4UnOpTest(LowerSimd lower_simd, WasmOpcode simd_op,
|
||||
}
|
||||
}
|
||||
|
||||
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
|
||||
V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
|
||||
WASM_SIMD_TEST(F32x4Abs) {
|
||||
RunF32x4UnOpTest(lower_simd, kExprF32x4Abs, std::abs);
|
||||
}
|
||||
WASM_SIMD_TEST(F32x4Neg) {
|
||||
RunF32x4UnOpTest(lower_simd, kExprF32x4Neg, Negate);
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
|
||||
// V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
|
||||
|
||||
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
|
||||
V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_X64
|
||||
@ -1621,16 +1617,16 @@ WASM_SIMD_COMPILED_TEST(I16x8AddHoriz) {
|
||||
RunBinaryLaneOpTest<int16_t>(lower_simd, kExprI16x8AddHoriz,
|
||||
{{1, 5, 9, 13, 17, 21, 25, 29}});
|
||||
}
|
||||
|
||||
WASM_SIMD_COMPILED_TEST(F32x4AddHoriz) {
|
||||
RunBinaryLaneOpTest<float>(lower_simd, kExprF32x4AddHoriz,
|
||||
{{1.0f, 5.0f, 9.0f, 13.0f}});
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_X64 ||
|
||||
// V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64
|
||||
|
||||
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
|
||||
V8_TARGET_ARCH_MIPS64
|
||||
WASM_SIMD_COMPILED_TEST(F32x4AddHoriz) {
|
||||
RunBinaryLaneOpTest<float>(lower_simd, kExprF32x4AddHoriz,
|
||||
{{1.0f, 5.0f, 9.0f, 13.0f}});
|
||||
}
|
||||
|
||||
// Test some regular shuffles that may have special handling on some targets.
|
||||
// Test a normal and unary versions (where second operand isn't used).
|
||||
WASM_SIMD_COMPILED_TEST(S32x4Dup) {
|
||||
|
Loading…
Reference in New Issue
Block a user