[wasm-simd][ia32] Fix ext add codegen

The main problem here is that the macro-assembler for Pmaddubsw and
Pmaddwd expects dst == src1 when AVX is not supported.

For Pmaddwd, we use an existing macro to define the function.

For Pmaddubsw, we do the AVX check inline and use movaps if not
supported because it requires a SSSE3 scope, and we don't have an
existing macro to do this (we can find other uses and clean up as
necessary in the future).

Bug: v8:11086
Change-Id: I97bd29cd93456744414d28e5f1ffcbc875c3ab22
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2716740
Reviewed-by: Deepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#73111}
This commit is contained in:
Ng Zhi An 2021-02-26 13:25:04 -08:00 committed by Commit Bot
parent fcf29e4815
commit 39f9936c15
2 changed files with 7 additions and 8 deletions

View File

@ -1160,19 +1160,20 @@ void TurboAssembler::I16x8ExtAddPairwiseI8x16U(XMMRegister dst, XMMRegister src,
Register scratch) {
Operand op = ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_i8x16_splat_0x01(), scratch);
if (!CpuFeatures::IsSupported(AVX) && dst != src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vpmaddubsw(dst, src, op);
} else {
CpuFeatureScope sse_scope(this, SSSE3);
movaps(dst, src);
pmaddubsw(dst, op);
}
Pmaddubsw(dst, src, op);
}
void TurboAssembler::I32x4ExtAddPairwiseI16x8S(XMMRegister dst, XMMRegister src,
Register scratch) {
Operand op = ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_i16x8_splat_0x0001(), scratch);
if (!CpuFeatures::IsSupported(AVX) && dst != src) {
movaps(dst, src);
}
// pmaddwd multiplies signed words in src and op, producing
// signed doublewords, then adds pairwise.
// src = |a|b|c|d|e|f|g|h|

View File

@ -442,7 +442,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_PACKED_OP3(Psrlq, psrlq)
AVX_PACKED_OP3(Psraw, psraw)
AVX_PACKED_OP3(Psrad, psrad)
AVX_PACKED_OP3(Pmaddwd, pmaddwd)
AVX_PACKED_OP3(Paddd, paddd)
AVX_PACKED_OP3(Paddq, paddq)
AVX_PACKED_OP3(Psubd, psubd)
@ -491,6 +490,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
}
AVX_OP3_WITH_MOVE(Movlps, movlps, XMMRegister, Operand)
AVX_OP3_WITH_MOVE(Movhps, movhps, XMMRegister, Operand)
AVX_OP3_WITH_MOVE(Pmaddwd, pmaddwd, XMMRegister, Operand)
#undef AVX_OP3_WITH_MOVE
// Non-SSE2 instructions.
@ -564,8 +564,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP3_XO_SSE4(Pmaxsd, pmaxsd)
AVX_OP3_XO_SSE4(Pminsb, pminsb)
AVX_OP3_XO_SSE4(Pmaxsb, pmaxsb)
AVX_OP3_WITH_TYPE_SCOPE(Pmaddubsw, pmaddubsw, XMMRegister, XMMRegister, SSSE3)
AVX_OP3_WITH_TYPE_SCOPE(Pmaddubsw, pmaddubsw, XMMRegister, Operand, SSSE3)
#undef AVX_OP3_XO_SSE4
#undef AVX_OP3_WITH_TYPE_SCOPE