[wasm-simd][liftoff][ia32] Move v128.select into macro-assembler
This allows us to reuse this optimized code sequence in Liftoff. This is similar to the x64 implementation, except that the macro-assembler function takes an additional scratch register. Change-Id: Ieaa5899cd1be65abee1c6e0c0908a357777afcd9 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2610510 Reviewed-by: Clemens Backes <clemensb@chromium.org> Reviewed-by: Bill Budge <bbudge@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#71996}
This commit is contained in:
parent
aef1be398f
commit
2aa3e64f54
@ -720,6 +720,24 @@ void TurboAssembler::I16x8ExtMul(XMMRegister dst, XMMRegister src1,
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::S128Select(XMMRegister dst, XMMRegister mask,
|
||||
XMMRegister src1, XMMRegister src2,
|
||||
XMMRegister scratch) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vpandn(scratch, mask, src2);
|
||||
vpand(dst, src1, mask);
|
||||
vpor(dst, dst, scratch);
|
||||
} else {
|
||||
DCHECK_EQ(dst, mask);
|
||||
// Use float ops as they are 1 byte shorter than int ops.
|
||||
movaps(scratch, dst);
|
||||
andnps(scratch, src2);
|
||||
andps(dst, src1);
|
||||
orps(dst, scratch);
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::ShlPair(Register high, Register low, uint8_t shift) {
|
||||
DCHECK_GE(63, shift);
|
||||
if (shift >= 32) {
|
||||
|
@ -621,6 +621,9 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
XMMRegister scratch, bool low, bool is_signed);
|
||||
void I16x8ExtMul(XMMRegister dst, XMMRegister src1, XMMRegister src2,
|
||||
XMMRegister scratch, bool low, bool is_signed);
|
||||
// Requires dst == mask when AVX is not supported.
|
||||
void S128Select(XMMRegister dst, XMMRegister mask, XMMRegister src1,
|
||||
XMMRegister src2, XMMRegister scratch);
|
||||
|
||||
void Push(Register src) { push(src); }
|
||||
void Push(Operand src) { push(src); }
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include "src/codegen/assembler-inl.h"
|
||||
#include "src/codegen/callable.h"
|
||||
#include "src/codegen/ia32/assembler-ia32.h"
|
||||
#include "src/codegen/ia32/register-ia32.h"
|
||||
#include "src/codegen/macro-assembler.h"
|
||||
#include "src/codegen/optimized-compilation-info.h"
|
||||
#include "src/compiler/backend/code-generator-impl.h"
|
||||
@ -3835,24 +3836,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
i.InputOperand(1));
|
||||
break;
|
||||
}
|
||||
case kSSES128Select: {
|
||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
// Mask used here is stored in dst.
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
// Use float ops as they are 1 byte shorter than int ops.
|
||||
__ movaps(kScratchDoubleReg, i.InputSimd128Register(0));
|
||||
__ andnps(kScratchDoubleReg, i.InputSimd128Register(2));
|
||||
__ andps(dst, i.InputSimd128Register(1));
|
||||
__ orps(dst, kScratchDoubleReg);
|
||||
break;
|
||||
}
|
||||
case kAVXS128Select: {
|
||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
XMMRegister mask = i.InputSimd128Register(0);
|
||||
__ vpandn(kScratchDoubleReg, mask, i.InputSimd128Register(2));
|
||||
__ vpand(dst, i.InputSimd128Register(1), mask);
|
||||
__ vpor(dst, dst, kScratchDoubleReg);
|
||||
case kIA32S128Select: {
|
||||
__ S128Select(i.OutputSimd128Register(), i.InputSimd128Register(0),
|
||||
i.InputSimd128Register(1), i.InputSimd128Register(2),
|
||||
kScratchDoubleReg);
|
||||
break;
|
||||
}
|
||||
case kIA32S128AndNot: {
|
||||
|
@ -362,8 +362,7 @@ namespace compiler {
|
||||
V(AVXS128Or) \
|
||||
V(SSES128Xor) \
|
||||
V(AVXS128Xor) \
|
||||
V(SSES128Select) \
|
||||
V(AVXS128Select) \
|
||||
V(IA32S128Select) \
|
||||
V(IA32S128AndNot) \
|
||||
V(IA32I8x16Swizzle) \
|
||||
V(IA32I8x16Shuffle) \
|
||||
|
@ -344,8 +344,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kAVXS128Or:
|
||||
case kSSES128Xor:
|
||||
case kAVXS128Xor:
|
||||
case kSSES128Select:
|
||||
case kAVXS128Select:
|
||||
case kIA32S128Select:
|
||||
case kIA32S128AndNot:
|
||||
case kIA32I8x16Swizzle:
|
||||
case kIA32I8x16Shuffle:
|
||||
|
@ -2476,16 +2476,10 @@ void InstructionSelector::VisitS128Zero(Node* node) {
|
||||
|
||||
void InstructionSelector::VisitS128Select(Node* node) {
|
||||
IA32OperandGenerator g(this);
|
||||
InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
|
||||
InstructionOperand operand1 = g.UseRegister(node->InputAt(1));
|
||||
InstructionOperand operand2 = g.UseRegister(node->InputAt(2));
|
||||
if (IsSupported(AVX)) {
|
||||
Emit(kAVXS128Select, g.DefineAsRegister(node), operand0, operand1,
|
||||
operand2);
|
||||
} else {
|
||||
Emit(kSSES128Select, g.DefineSameAsFirst(node), operand0, operand1,
|
||||
operand2);
|
||||
}
|
||||
InstructionOperand dst =
|
||||
IsSupported(AVX) ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node);
|
||||
Emit(kIA32S128Select, dst, g.UseRegister(node->InputAt(0)),
|
||||
g.UseRegister(node->InputAt(1)), g.UseRegister(node->InputAt(2)));
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitS128AndNot(Node* node) {
|
||||
|
@ -3147,17 +3147,16 @@ void LiftoffAssembler::emit_s128_select(LiftoffRegister dst,
|
||||
LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
LiftoffRegister mask) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vxorps(liftoff::kScratchDoubleReg, src1.fp(), src2.fp());
|
||||
vandps(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, mask.fp());
|
||||
vxorps(dst.fp(), liftoff::kScratchDoubleReg, src2.fp());
|
||||
// Ensure that we don't overwrite any inputs with the movdqu below.
|
||||
DCHECK_NE(dst, src1);
|
||||
DCHECK_NE(dst, src2);
|
||||
if (!CpuFeatures::IsSupported(AVX) && dst != mask) {
|
||||
movdqu(dst.fp(), mask.fp());
|
||||
S128Select(dst.fp(), dst.fp(), src1.fp(), src2.fp(),
|
||||
liftoff::kScratchDoubleReg);
|
||||
} else {
|
||||
movaps(liftoff::kScratchDoubleReg, src1.fp());
|
||||
xorps(liftoff::kScratchDoubleReg, src2.fp());
|
||||
andps(liftoff::kScratchDoubleReg, mask.fp());
|
||||
if (dst.fp() != src2.fp()) movaps(dst.fp(), src2.fp());
|
||||
xorps(dst.fp(), liftoff::kScratchDoubleReg);
|
||||
S128Select(dst.fp(), mask.fp(), src1.fp(), src2.fp(),
|
||||
liftoff::kScratchDoubleReg);
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user