[wasm-simd][ia32][x64] Optimize i64x2.extend_high_i32x4_{s,u}

- Use movhlps instead of pshufd if dst == src
- use xorps with punpckhdq instead of two shuffles

Bug: v8:11470
Change-Id: I50f77b8591372cccca48b9a6078fc3132fe48a2f
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2773902
Reviewed-by: Bill Budge <bbudge@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#73527}
This commit is contained in:
Ng Zhi An 2021-03-18 13:56:14 -07:00 committed by Commit Bot
parent b1c09571cc
commit 2969e67e3c
2 changed files with 20 additions and 8 deletions

View File

@ -799,7 +799,11 @@ void TurboAssembler::I64x2SConvertI32x4High(XMMRegister dst, XMMRegister src) {
vpmovsxdq(dst, dst);
} else {
CpuFeatureScope sse_scope(this, SSE4_1);
pshufd(dst, src, 0xEE);
if (dst == src) {
movhlps(dst, src);
} else {
pshufd(dst, src, 0xEE);
}
pmovsxdq(dst, dst);
}
}
@ -811,9 +815,11 @@ void TurboAssembler::I64x2UConvertI32x4High(XMMRegister dst, XMMRegister src,
vpxor(scratch, scratch, scratch);
vpunpckhdq(dst, src, scratch);
} else {
CpuFeatureScope sse_scope(this, SSE4_1);
pshufd(dst, src, 0xEE);
pmovzxdq(dst, dst);
if (dst != src) {
movaps(dst, src);
}
xorps(scratch, scratch);
punpckhdq(dst, scratch);
}
}

View File

@ -2227,7 +2227,11 @@ void TurboAssembler::I64x2SConvertI32x4High(XMMRegister dst, XMMRegister src) {
vpmovsxdq(dst, dst);
} else {
CpuFeatureScope sse_scope(this, SSE4_1);
pshufd(dst, src, 0xEE);
if (dst == src) {
movhlps(dst, src);
} else {
pshufd(dst, src, 0xEE);
}
pmovsxdq(dst, dst);
}
}
@ -2238,9 +2242,11 @@ void TurboAssembler::I64x2UConvertI32x4High(XMMRegister dst, XMMRegister src) {
vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
vpunpckhdq(dst, src, kScratchDoubleReg);
} else {
CpuFeatureScope sse_scope(this, SSE4_1);
pshufd(dst, src, 0xEE);
pmovzxdq(dst, dst);
if (dst != src) {
movaps(dst, src);
}
xorps(kScratchDoubleReg, kScratchDoubleReg);
punpckhdq(dst, kScratchDoubleReg);
}
}