[wasm-simd] Enhance Shufps to copy src to dst
Extract Shufps to handle both AVX and SSE cases, in the SSE case it will copy src to dst if they are not the same. This allows us to use it in Liftoff as well, without the extra copy when AVX is supported. In other places, the usage of Shufps is unnecessary, since they are within a clause checking for non-AVX support, so we can simply use the shufps (non-macro-assembler). Bug: v8:9561 Change-Id: Icb043d7a43397c1b0810ece2666be567f0f5986c Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2513866 Reviewed-by: Clemens Backes <clemensb@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#70911}
This commit is contained in:
parent
d988237e7a
commit
14570fe0c9
@ -1727,6 +1727,18 @@ void TurboAssembler::RetpolineJump(Register reg) {
|
||||
ret(0);
|
||||
}
|
||||
|
||||
void TurboAssembler::Shufps(XMMRegister dst, XMMRegister src, byte imm8) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope avx_scope(this, AVX);
|
||||
vshufps(dst, src, src, imm8);
|
||||
} else {
|
||||
if (dst != src) {
|
||||
movss(dst, src);
|
||||
}
|
||||
shufps(dst, src, static_cast<byte>(0));
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::Pextrd(Register dst, XMMRegister src, uint8_t imm8) {
|
||||
if (imm8 == 0) {
|
||||
Movd(dst, src);
|
||||
|
@ -224,7 +224,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
AVX_OP(Divpd, divpd)
|
||||
AVX_OP(Maxps, maxps)
|
||||
AVX_OP(Maxpd, maxpd)
|
||||
AVX_OP(Shufps, shufps)
|
||||
AVX_OP(Cvtdq2ps, cvtdq2ps)
|
||||
AVX_OP(Rcpps, rcpps)
|
||||
AVX_OP(Rsqrtps, rsqrtps)
|
||||
@ -519,6 +518,9 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
void Trap() override;
|
||||
void DebugBreak() override;
|
||||
|
||||
// Shufps that will mov src into dst if AVX is not supported.
|
||||
void Shufps(XMMRegister dst, XMMRegister src, byte imm8);
|
||||
|
||||
// Non-SSE2 instructions.
|
||||
void Pextrd(Register dst, XMMRegister src, uint8_t imm8);
|
||||
|
||||
|
@ -2491,15 +2491,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
break;
|
||||
}
|
||||
case kX64F32x4Splat: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
XMMRegister src = i.InputDoubleRegister(0);
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
||||
__ vshufps(dst, src, src, byte{0x0});
|
||||
} else {
|
||||
DCHECK_EQ(dst, src);
|
||||
__ Shufps(dst, dst, byte{0x0});
|
||||
}
|
||||
__ Shufps(i.OutputSimd128Register(), i.InputDoubleRegister(0), 0);
|
||||
break;
|
||||
}
|
||||
case kX64F32x4ExtractLane: {
|
||||
@ -3663,8 +3655,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
||||
__ vbroadcastss(i.OutputSimd128Register(), i.MemoryOperand());
|
||||
} else {
|
||||
__ Movss(i.OutputSimd128Register(), i.MemoryOperand());
|
||||
__ Shufps(i.OutputSimd128Register(), i.OutputSimd128Register(),
|
||||
__ movss(i.OutputSimd128Register(), i.MemoryOperand());
|
||||
__ shufps(i.OutputSimd128Register(), i.OutputSimd128Register(),
|
||||
byte{0});
|
||||
}
|
||||
break;
|
||||
|
@ -2309,8 +2309,8 @@ void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
|
||||
CpuFeatureScope avx_scope(this, AVX);
|
||||
vbroadcastss(dst.fp(), src_op);
|
||||
} else {
|
||||
Movss(dst.fp(), src_op);
|
||||
Shufps(dst.fp(), dst.fp(), byte{0});
|
||||
movss(dst.fp(), src_op);
|
||||
shufps(dst.fp(), dst.fp(), byte{0});
|
||||
}
|
||||
} else if (memtype == MachineType::Int64()) {
|
||||
Movddup(dst.fp(), src_op);
|
||||
@ -2419,10 +2419,7 @@ void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
|
||||
|
||||
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
if (dst.fp() != src.fp()) {
|
||||
Movss(dst.fp(), src.fp());
|
||||
}
|
||||
Shufps(dst.fp(), src.fp(), static_cast<byte>(0));
|
||||
Shufps(dst.fp(), src.fp(), 0);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
|
||||
|
Loading…
Reference in New Issue
Block a user