[wasm] Add F32x4{Abs, Neg, AddHoriz}

- Remove redundant instruction from I16x8Splat
 - Force F32x4Splat to use movss, as using MacroAssembler can mix SSE/AVX
 instructions

Bug: v8:6020
Change-Id: I781c22adecf892a79b6a38c3d83fc4022f9067de
Reviewed-on: https://chromium-review.googlesource.com/898429
Reviewed-by: Bill Budge <bbudge@chromium.org>
Reviewed-by: Jaroslav Sevcik <jarin@chromium.org>
Commit-Queue: Deepti Gandluri <gdeepti@chromium.org>
Cr-Commit-Position: refs/heads/master@{#51123}
This commit is contained in:
Deepti Gandluri 2018-02-01 15:36:59 -08:00 committed by Commit Bot
parent cf9b487355
commit 3363e51958
6 changed files with 64 additions and 35 deletions

View File

@ -2131,33 +2131,15 @@ void InstructionSelector::VisitF32x4UConvertI32x4(Node* node) {
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitF32x4Abs(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Neg(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_X64
void InstructionSelector::VisitF32x4RecipSqrtApprox(Node* node) {
UNIMPLEMENTED();
}
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_X64
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64
void InstructionSelector::VisitF32x4AddHoriz(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_X64
void InstructionSelector::VisitF32x4RecipApprox(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI32x4AddHoriz(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4RecipSqrtApprox(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitF32x4AddHoriz(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64
// && !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64
@ -2170,7 +2152,17 @@ void InstructionSelector::VisitI32x4SConvertF32x4(Node* node) {
void InstructionSelector::VisitI32x4UConvertF32x4(Node* node) {
UNIMPLEMENTED();
}
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_X64
void InstructionSelector::VisitI32x4AddHoriz(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_X64
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64
void InstructionSelector::VisitI32x4SConvertI16x8Low(Node* node) {
UNIMPLEMENTED();
}

View File

@ -2066,9 +2066,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kX64F32x4Splat: {
XMMRegister dst = i.OutputSimd128Register();
if (instr->InputAt(0)->IsFPRegister()) {
__ Movss(dst, i.InputDoubleRegister(0));
__ movss(dst, i.InputDoubleRegister(0));
} else {
__ Movss(dst, i.InputOperand(0));
__ movss(dst, i.InputOperand(0));
}
__ shufps(dst, dst, 0x0);
break;
@ -2087,6 +2087,34 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ insertps(i.OutputSimd128Register(), i.InputDoubleRegister(2), select);
break;
}
case kX64F32x4Abs: {
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
if (dst == src) {
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ psrld(kScratchDoubleReg, 1);
__ andps(i.OutputSimd128Register(), kScratchDoubleReg);
} else {
__ pcmpeqd(dst, dst);
__ psrld(dst, 1);
__ andps(dst, i.InputSimd128Register(0));
}
break;
}
case kX64F32x4Neg: {
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
if (dst == src) {
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ pslld(kScratchDoubleReg, 31);
__ xorps(i.OutputSimd128Register(), kScratchDoubleReg);
} else {
__ pcmpeqd(dst, dst);
__ pslld(dst, 31);
__ xorps(dst, i.InputSimd128Register(0));
}
break;
}
case kX64F32x4RecipApprox: {
__ rcpps(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
@ -2100,6 +2128,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ addps(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64F32x4AddHoriz: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ haddps(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64F32x4Sub: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ subps(i.OutputSimd128Register(), i.InputSimd128Register(1));
@ -2273,7 +2306,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
XMMRegister dst = i.OutputSimd128Register();
__ movd(dst, i.InputRegister(0));
__ pshuflw(dst, dst, 0x0);
__ pshufhw(dst, dst, 0x0);
__ pshufd(dst, dst, 0x0);
break;
}

View File

@ -149,9 +149,12 @@ namespace compiler {
V(X64F32x4Splat) \
V(X64F32x4ExtractLane) \
V(X64F32x4ReplaceLane) \
V(X64F32x4Abs) \
V(X64F32x4Neg) \
V(X64F32x4RecipApprox) \
V(X64F32x4RecipSqrtApprox) \
V(X64F32x4Add) \
V(X64F32x4AddHoriz) \
V(X64F32x4Sub) \
V(X64F32x4Mul) \
V(X64F32x4Min) \

View File

@ -128,7 +128,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64F32x4ReplaceLane:
case kX64F32x4RecipApprox:
case kX64F32x4RecipSqrtApprox:
case kX64F32x4Abs:
case kX64F32x4Neg:
case kX64F32x4Add:
case kX64F32x4AddHoriz:
case kX64F32x4Sub:
case kX64F32x4Mul:
case kX64F32x4Min:

View File

@ -2374,6 +2374,7 @@ VISIT_ATOMIC_BINOP(Xor)
#define SIMD_BINOP_LIST(V) \
V(F32x4Add) \
V(F32x4AddHoriz) \
V(F32x4Sub) \
V(F32x4Mul) \
V(F32x4Min) \
@ -2435,6 +2436,8 @@ VISIT_ATOMIC_BINOP(Xor)
V(S128Xor)
#define SIMD_UNOP_LIST(V) \
V(F32x4Abs) \
V(F32x4Neg) \
V(F32x4RecipApprox) \
V(F32x4RecipSqrtApprox) \
V(I32x4Neg) \

View File

@ -494,16 +494,12 @@ void RunF32x4UnOpTest(LowerSimd lower_simd, WasmOpcode simd_op,
}
}
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
WASM_SIMD_TEST(F32x4Abs) {
RunF32x4UnOpTest(lower_simd, kExprF32x4Abs, std::abs);
}
WASM_SIMD_TEST(F32x4Neg) {
RunF32x4UnOpTest(lower_simd, kExprF32x4Neg, Negate);
}
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
// V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_X64
@ -1621,16 +1617,16 @@ WASM_SIMD_COMPILED_TEST(I16x8AddHoriz) {
RunBinaryLaneOpTest<int16_t>(lower_simd, kExprI16x8AddHoriz,
{{1, 5, 9, 13, 17, 21, 25, 29}});
}
WASM_SIMD_COMPILED_TEST(F32x4AddHoriz) {
RunBinaryLaneOpTest<float>(lower_simd, kExprF32x4AddHoriz,
{{1.0f, 5.0f, 9.0f, 13.0f}});
}
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_X64 ||
// V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
V8_TARGET_ARCH_MIPS64
WASM_SIMD_COMPILED_TEST(F32x4AddHoriz) {
RunBinaryLaneOpTest<float>(lower_simd, kExprF32x4AddHoriz,
{{1.0f, 5.0f, 9.0f, 13.0f}});
}
// Test some regular shuffles that may have special handling on some targets.
// Test a normal and unary versions (where second operand isn't used).
WASM_SIMD_COMPILED_TEST(S32x4Dup) {