[wasm-simd] Share extadd pairwise implementation
Bug: v8:11589 Change-Id: I7c97920d8ab94408b5cde4e90e7ff1aa9bcaeeba Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3119995 Reviewed-by: Adam Klein <adamk@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/main@{#76511}
This commit is contained in:
parent
24af48d6e4
commit
eaf3044073
@ -701,95 +701,6 @@ void TurboAssembler::I8x16Popcnt(XMMRegister dst, XMMRegister src,
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::I16x8ExtAddPairwiseI8x16S(XMMRegister dst, XMMRegister src,
|
||||
XMMRegister tmp,
|
||||
Register scratch) {
|
||||
// pmaddubsw treats the first operand as unsigned, so pass the external
|
||||
// reference to as the first operand.
|
||||
Operand op = ExternalReferenceAsOperand(
|
||||
ExternalReference::address_of_wasm_i8x16_splat_0x01(), scratch);
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope avx_scope(this, AVX);
|
||||
vmovdqa(tmp, op);
|
||||
vpmaddubsw(dst, tmp, src);
|
||||
} else {
|
||||
CpuFeatureScope sse_scope(this, SSSE3);
|
||||
if (dst == src) {
|
||||
movaps(tmp, op);
|
||||
pmaddubsw(tmp, src);
|
||||
movaps(dst, tmp);
|
||||
} else {
|
||||
movaps(dst, op);
|
||||
pmaddubsw(dst, src);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::I16x8ExtAddPairwiseI8x16U(XMMRegister dst, XMMRegister src,
|
||||
Register scratch) {
|
||||
Operand op = ExternalReferenceAsOperand(
|
||||
ExternalReference::address_of_wasm_i8x16_splat_0x01(), scratch);
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope avx_scope(this, AVX);
|
||||
vpmaddubsw(dst, src, op);
|
||||
} else {
|
||||
CpuFeatureScope sse_scope(this, SSSE3);
|
||||
movaps(dst, src);
|
||||
pmaddubsw(dst, op);
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::I32x4ExtAddPairwiseI16x8S(XMMRegister dst, XMMRegister src,
|
||||
Register scratch) {
|
||||
Operand op = ExternalReferenceAsOperand(
|
||||
ExternalReference::address_of_wasm_i16x8_splat_0x0001(), scratch);
|
||||
// pmaddwd multiplies signed words in src and op, producing
|
||||
// signed doublewords, then adds pairwise.
|
||||
// src = |a|b|c|d|e|f|g|h|
|
||||
// dst = | a*1 + b*1 | c*1 + d*1 | e*1 + f*1 | g*1 + h*1 |
|
||||
Pmaddwd(dst, src, op);
|
||||
}
|
||||
|
||||
void TurboAssembler::I32x4ExtAddPairwiseI16x8U(XMMRegister dst, XMMRegister src,
|
||||
XMMRegister tmp) {
|
||||
ASM_CODE_COMMENT(this);
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope avx_scope(this, AVX);
|
||||
// src = |a|b|c|d|e|f|g|h| (low)
|
||||
// scratch = |0|a|0|c|0|e|0|g|
|
||||
vpsrld(tmp, src, 16);
|
||||
// dst = |0|b|0|d|0|f|0|h|
|
||||
vpblendw(dst, src, tmp, 0xAA);
|
||||
// dst = |a+b|c+d|e+f|g+h|
|
||||
vpaddd(dst, tmp, dst);
|
||||
} else if (CpuFeatures::IsSupported(SSE4_1)) {
|
||||
CpuFeatureScope sse_scope(this, SSE4_1);
|
||||
// There is a potentially better lowering if we get rip-relative constants,
|
||||
// see https://github.com/WebAssembly/simd/pull/380.
|
||||
movaps(tmp, src);
|
||||
psrld(tmp, 16);
|
||||
if (dst != src) {
|
||||
movaps(dst, src);
|
||||
}
|
||||
pblendw(dst, tmp, 0xAA);
|
||||
paddd(dst, tmp);
|
||||
} else {
|
||||
// src = |a|b|c|d|e|f|g|h|
|
||||
// tmp = i32x4.splat(0x0000FFFF)
|
||||
pcmpeqd(tmp, tmp);
|
||||
psrld(tmp, byte{16});
|
||||
// tmp =|0|b|0|d|0|f|0|h|
|
||||
andps(tmp, src);
|
||||
// dst = |0|a|0|c|0|e|0|g|
|
||||
if (dst != src) {
|
||||
movaps(dst, src);
|
||||
}
|
||||
psrld(dst, byte{16});
|
||||
// dst = |a+b|c+d|e+f|g+h|
|
||||
paddd(dst, tmp);
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::I8x16Swizzle(XMMRegister dst, XMMRegister src,
|
||||
XMMRegister mask, XMMRegister scratch,
|
||||
Register tmp, bool omit_add) {
|
||||
|
@ -326,7 +326,6 @@ class V8_EXPORT_PRIVATE TurboAssembler
|
||||
AVX_OP3_WITH_MOVE(Cmpeqps, cmpeqps, XMMRegister, Operand)
|
||||
AVX_OP3_WITH_MOVE(Movlps, movlps, XMMRegister, Operand)
|
||||
AVX_OP3_WITH_MOVE(Movhps, movhps, XMMRegister, Operand)
|
||||
AVX_OP3_WITH_MOVE(Pmaddwd, pmaddwd, XMMRegister, Operand)
|
||||
#undef AVX_OP3_WITH_MOVE
|
||||
|
||||
// TODO(zhin): Remove after moving more definitions into SharedTurboAssembler.
|
||||
@ -397,14 +396,6 @@ class V8_EXPORT_PRIVATE TurboAssembler
|
||||
// Defined here to allow usage on both TurboFan and Liftoff.
|
||||
void I8x16Popcnt(XMMRegister dst, XMMRegister src, XMMRegister tmp1,
|
||||
XMMRegister tmp2, Register scratch);
|
||||
void I16x8ExtAddPairwiseI8x16S(XMMRegister dst, XMMRegister src,
|
||||
XMMRegister tmp, Register scratch);
|
||||
void I16x8ExtAddPairwiseI8x16U(XMMRegister dst, XMMRegister src,
|
||||
Register scratch);
|
||||
void I32x4ExtAddPairwiseI16x8S(XMMRegister dst, XMMRegister src,
|
||||
Register scratch);
|
||||
void I32x4ExtAddPairwiseI16x8U(XMMRegister dst, XMMRegister src,
|
||||
XMMRegister tmp);
|
||||
void I8x16Swizzle(XMMRegister dst, XMMRegister src, XMMRegister mask,
|
||||
XMMRegister scratch, Register tmp, bool omit_add = false);
|
||||
|
||||
|
@ -588,6 +588,47 @@ void SharedTurboAssembler::I16x8Q15MulRSatS(XMMRegister dst, XMMRegister src1,
|
||||
Pxor(dst, scratch);
|
||||
}
|
||||
|
||||
void SharedTurboAssembler::I32x4ExtAddPairwiseI16x8U(XMMRegister dst,
|
||||
XMMRegister src,
|
||||
XMMRegister tmp) {
|
||||
ASM_CODE_COMMENT(this);
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope avx_scope(this, AVX);
|
||||
// src = |a|b|c|d|e|f|g|h| (low)
|
||||
// scratch = |0|a|0|c|0|e|0|g|
|
||||
vpsrld(tmp, src, 16);
|
||||
// dst = |0|b|0|d|0|f|0|h|
|
||||
vpblendw(dst, src, tmp, 0xAA);
|
||||
// dst = |a+b|c+d|e+f|g+h|
|
||||
vpaddd(dst, tmp, dst);
|
||||
} else if (CpuFeatures::IsSupported(SSE4_1)) {
|
||||
CpuFeatureScope sse_scope(this, SSE4_1);
|
||||
// There is a potentially better lowering if we get rip-relative
|
||||
// constants, see https://github.com/WebAssembly/simd/pull/380.
|
||||
movaps(tmp, src);
|
||||
psrld(tmp, 16);
|
||||
if (dst != src) {
|
||||
movaps(dst, src);
|
||||
}
|
||||
pblendw(dst, tmp, 0xAA);
|
||||
paddd(dst, tmp);
|
||||
} else {
|
||||
// src = |a|b|c|d|e|f|g|h|
|
||||
// tmp = i32x4.splat(0x0000FFFF)
|
||||
pcmpeqd(tmp, tmp);
|
||||
psrld(tmp, byte{16});
|
||||
// tmp =|0|b|0|d|0|f|0|h|
|
||||
andps(tmp, src);
|
||||
// dst = |0|a|0|c|0|e|0|g|
|
||||
if (dst != src) {
|
||||
movaps(dst, src);
|
||||
}
|
||||
psrld(dst, byte{16});
|
||||
// dst = |a+b|c+d|e+f|g+h|
|
||||
paddd(dst, tmp);
|
||||
}
|
||||
}
|
||||
|
||||
// 1. Multiply low word into scratch.
|
||||
// 2. Multiply high word (can be signed or unsigned) into dst.
|
||||
// 3. Unpack and interleave scratch and dst into dst.
|
||||
|
@ -235,6 +235,7 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
|
||||
AVX_OP(Pcmpeqd, pcmpeqd)
|
||||
AVX_OP(Pcmpeqw, pcmpeqw)
|
||||
AVX_OP(Pinsrw, pinsrw)
|
||||
AVX_OP(Pmaddwd, pmaddwd)
|
||||
AVX_OP(Pmaxsw, pmaxsw)
|
||||
AVX_OP(Pmaxub, pmaxub)
|
||||
AVX_OP(Pminsw, pminsw)
|
||||
@ -361,6 +362,8 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
|
||||
// Will move src1 to dst if AVX is not supported.
|
||||
void I16x8Q15MulRSatS(XMMRegister dst, XMMRegister src1, XMMRegister src2,
|
||||
XMMRegister scratch);
|
||||
void I32x4ExtAddPairwiseI16x8U(XMMRegister dst, XMMRegister src,
|
||||
XMMRegister tmp);
|
||||
// Requires that dst == src1 if AVX is not supported.
|
||||
void I32x4ExtMul(XMMRegister dst, XMMRegister src1, XMMRegister src2,
|
||||
XMMRegister scratch, bool low, bool is_signed);
|
||||
@ -515,6 +518,63 @@ class V8_EXPORT_PRIVATE SharedTurboAssemblerBase : public SharedTurboAssembler {
|
||||
}
|
||||
}
|
||||
|
||||
void I32x4ExtAddPairwiseI16x8S(XMMRegister dst, XMMRegister src,
|
||||
Register scratch) {
|
||||
Operand op = ExternalReferenceAsOperand(
|
||||
ExternalReference::address_of_wasm_i16x8_splat_0x0001(), scratch);
|
||||
// pmaddwd multiplies signed words in src and op, producing
|
||||
// signed doublewords, then adds pairwise.
|
||||
// src = |a|b|c|d|e|f|g|h|
|
||||
// dst = | a*1 + b*1 | c*1 + d*1 | e*1 + f*1 | g*1 + h*1 |
|
||||
if (!CpuFeatures::IsSupported(AVX) && (dst != src)) {
|
||||
movaps(dst, src);
|
||||
src = dst;
|
||||
}
|
||||
|
||||
Pmaddwd(dst, src, op);
|
||||
}
|
||||
|
||||
void I16x8ExtAddPairwiseI8x16S(XMMRegister dst, XMMRegister src,
|
||||
XMMRegister scratch, Register tmp) {
|
||||
ASM_CODE_COMMENT(this);
|
||||
// pmaddubsw treats the first operand as unsigned, so pass the external
|
||||
// reference to it as the first operand.
|
||||
Operand op = ExternalReferenceAsOperand(
|
||||
ExternalReference::address_of_wasm_i8x16_splat_0x01(), tmp);
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope avx_scope(this, AVX);
|
||||
vmovdqa(scratch, op);
|
||||
vpmaddubsw(dst, scratch, src);
|
||||
} else {
|
||||
CpuFeatureScope sse_scope(this, SSSE3);
|
||||
if (dst == src) {
|
||||
movaps(scratch, op);
|
||||
pmaddubsw(scratch, src);
|
||||
movaps(dst, scratch);
|
||||
} else {
|
||||
movaps(dst, op);
|
||||
pmaddubsw(dst, src);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void I16x8ExtAddPairwiseI8x16U(XMMRegister dst, XMMRegister src,
|
||||
Register scratch) {
|
||||
ASM_CODE_COMMENT(this);
|
||||
Operand op = ExternalReferenceAsOperand(
|
||||
ExternalReference::address_of_wasm_i8x16_splat_0x01(), scratch);
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope avx_scope(this, AVX);
|
||||
vpmaddubsw(dst, src, op);
|
||||
} else {
|
||||
CpuFeatureScope sse_scope(this, SSSE3);
|
||||
if (dst != src) {
|
||||
movaps(dst, src);
|
||||
}
|
||||
pmaddubsw(dst, op);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
// All implementation-specific methods must be called through this.
|
||||
Impl* impl() { return static_cast<Impl*>(this); }
|
||||
|
@ -2004,31 +2004,6 @@ void TurboAssembler::JumpCodeTObject(Register code, JumpMode jump_mode) {
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::Pmaddwd(XMMRegister dst, XMMRegister src1, Operand src2) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope avx_scope(this, AVX);
|
||||
vpmaddwd(dst, src1, src2);
|
||||
} else {
|
||||
if (dst != src1) {
|
||||
movaps(dst, src1);
|
||||
}
|
||||
pmaddwd(dst, src2);
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::Pmaddwd(XMMRegister dst, XMMRegister src1,
|
||||
XMMRegister src2) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope avx_scope(this, AVX);
|
||||
vpmaddwd(dst, src1, src2);
|
||||
} else {
|
||||
if (dst != src1) {
|
||||
movaps(dst, src1);
|
||||
}
|
||||
pmaddwd(dst, src2);
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::Pmaddubsw(XMMRegister dst, XMMRegister src1,
|
||||
Operand src2) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
@ -2302,68 +2277,6 @@ void TurboAssembler::I8x16Popcnt(XMMRegister dst, XMMRegister src,
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::I16x8ExtAddPairwiseI8x16S(XMMRegister dst,
|
||||
XMMRegister src) {
|
||||
// pmaddubsw treats the first operand as unsigned, so the external reference
|
||||
// to be passed to it as the first operand.
|
||||
Operand op = ExternalReferenceAsOperand(
|
||||
ExternalReference::address_of_wasm_i8x16_splat_0x01());
|
||||
if (dst == src) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope avx_scope(this, AVX);
|
||||
vmovdqa(kScratchDoubleReg, op);
|
||||
vpmaddubsw(dst, kScratchDoubleReg, src);
|
||||
} else {
|
||||
CpuFeatureScope sse_scope(this, SSSE3);
|
||||
movaps(kScratchDoubleReg, op);
|
||||
pmaddubsw(kScratchDoubleReg, src);
|
||||
movaps(dst, kScratchDoubleReg);
|
||||
}
|
||||
} else {
|
||||
Movdqa(dst, op);
|
||||
Pmaddubsw(dst, dst, src);
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::I32x4ExtAddPairwiseI16x8U(XMMRegister dst,
|
||||
XMMRegister src) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope avx_scope(this, AVX);
|
||||
// src = |a|b|c|d|e|f|g|h| (low)
|
||||
// scratch = |0|a|0|c|0|e|0|g|
|
||||
vpsrld(kScratchDoubleReg, src, 16);
|
||||
// dst = |0|b|0|d|0|f|0|h|
|
||||
vpblendw(dst, src, kScratchDoubleReg, 0xAA);
|
||||
// dst = |a+b|c+d|e+f|g+h|
|
||||
vpaddd(dst, kScratchDoubleReg, dst);
|
||||
} else if (CpuFeatures::IsSupported(SSE4_1)) {
|
||||
CpuFeatureScope sse_scope(this, SSE4_1);
|
||||
// There is a potentially better lowering if we get rip-relative constants,
|
||||
// see https://github.com/WebAssembly/simd/pull/380.
|
||||
movaps(kScratchDoubleReg, src);
|
||||
psrld(kScratchDoubleReg, 16);
|
||||
if (dst != src) {
|
||||
movaps(dst, src);
|
||||
}
|
||||
pblendw(dst, kScratchDoubleReg, 0xAA);
|
||||
paddd(dst, kScratchDoubleReg);
|
||||
} else {
|
||||
// src = |a|b|c|d|e|f|g|h|
|
||||
// kScratchDoubleReg = i32x4.splat(0x0000FFFF)
|
||||
pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
|
||||
psrld(kScratchDoubleReg, byte{16});
|
||||
// kScratchDoubleReg =|0|b|0|d|0|f|0|h|
|
||||
andps(kScratchDoubleReg, src);
|
||||
// dst = |0|a|0|c|0|e|0|g|
|
||||
if (dst != src) {
|
||||
movaps(dst, src);
|
||||
}
|
||||
psrld(dst, byte{16});
|
||||
// dst = |a+b|c+d|e+f|g+h|
|
||||
paddd(dst, kScratchDoubleReg);
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::I8x16Swizzle(XMMRegister dst, XMMRegister src,
|
||||
XMMRegister mask, bool omit_add) {
|
||||
if (omit_add) {
|
||||
|
@ -447,8 +447,6 @@ class V8_EXPORT_PRIVATE TurboAssembler
|
||||
void DebugBreak();
|
||||
|
||||
// Will move src1 to dst if dst != src1.
|
||||
void Pmaddwd(XMMRegister dst, XMMRegister src1, Operand src2);
|
||||
void Pmaddwd(XMMRegister dst, XMMRegister src1, XMMRegister src2);
|
||||
void Pmaddubsw(XMMRegister dst, XMMRegister src1, Operand src2);
|
||||
void Pmaddubsw(XMMRegister dst, XMMRegister src1, XMMRegister src2);
|
||||
|
||||
@ -481,9 +479,6 @@ class V8_EXPORT_PRIVATE TurboAssembler
|
||||
// Defined here to allow usage on both TurboFan and Liftoff.
|
||||
void I8x16Popcnt(XMMRegister dst, XMMRegister src, XMMRegister tmp);
|
||||
|
||||
void I16x8ExtAddPairwiseI8x16S(XMMRegister dst, XMMRegister src);
|
||||
void I32x4ExtAddPairwiseI16x8U(XMMRegister dst, XMMRegister src);
|
||||
|
||||
void I8x16Swizzle(XMMRegister dst, XMMRegister src, XMMRegister mask,
|
||||
bool omit_add = false);
|
||||
|
||||
|
@ -3167,21 +3167,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
break;
|
||||
}
|
||||
case kX64I32x4ExtAddPairwiseI16x8S: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
XMMRegister src1 = i.InputSimd128Register(0);
|
||||
// pmaddwd multiplies signed words in src1 and src2, producing signed
|
||||
// doublewords, then adds pairwise.
|
||||
// src1 = |a|b|c|d|e|f|g|h|
|
||||
// src2 = |1|1|1|1|1|1|1|1|
|
||||
// dst = | a*1 + b*1 | c*1 + d*1 | e*1 + f*1 | g*1 + h*1 |
|
||||
Operand src2 = __ ExternalReferenceAsOperand(
|
||||
ExternalReference::address_of_wasm_i16x8_splat_0x0001());
|
||||
__ Pmaddwd(dst, src1, src2);
|
||||
__ I32x4ExtAddPairwiseI16x8S(i.OutputSimd128Register(),
|
||||
i.InputSimd128Register(0), kScratchRegister);
|
||||
break;
|
||||
}
|
||||
case kX64I32x4ExtAddPairwiseI16x8U: {
|
||||
__ I32x4ExtAddPairwiseI16x8U(i.OutputSimd128Register(),
|
||||
i.InputSimd128Register(0));
|
||||
i.InputSimd128Register(0),
|
||||
kScratchDoubleReg);
|
||||
break;
|
||||
}
|
||||
case kX64S128Const: {
|
||||
@ -3394,15 +3387,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
}
|
||||
case kX64I16x8ExtAddPairwiseI8x16S: {
|
||||
__ I16x8ExtAddPairwiseI8x16S(i.OutputSimd128Register(),
|
||||
i.InputSimd128Register(0));
|
||||
i.InputSimd128Register(0), kScratchDoubleReg,
|
||||
kScratchRegister);
|
||||
break;
|
||||
}
|
||||
case kX64I16x8ExtAddPairwiseI8x16U: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
XMMRegister src1 = i.InputSimd128Register(0);
|
||||
Operand src2 = __ ExternalReferenceAsOperand(
|
||||
ExternalReference::address_of_wasm_i8x16_splat_0x01());
|
||||
__ Pmaddubsw(dst, src1, src2);
|
||||
__ I16x8ExtAddPairwiseI8x16U(i.OutputSimd128Register(),
|
||||
i.InputSimd128Register(0), kScratchRegister);
|
||||
break;
|
||||
}
|
||||
case kX64I16x8Q15MulRSatS: {
|
||||
|
@ -3131,14 +3131,13 @@ void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst,
|
||||
|
||||
void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_s(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
I16x8ExtAddPairwiseI8x16S(dst.fp(), src.fp());
|
||||
I16x8ExtAddPairwiseI8x16S(dst.fp(), src.fp(), kScratchDoubleReg,
|
||||
kScratchRegister);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_u(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
Operand op = ExternalReferenceAsOperand(
|
||||
ExternalReference::address_of_wasm_i8x16_splat_0x01());
|
||||
Pmaddubsw(dst.fp(), src.fp(), op);
|
||||
I16x8ExtAddPairwiseI8x16U(dst.fp(), src.fp(), kScratchRegister);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,
|
||||
@ -3287,14 +3286,12 @@ void LiftoffAssembler::emit_i32x4_dot_i16x8_s(LiftoffRegister dst,
|
||||
|
||||
void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_s(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
Operand op = ExternalReferenceAsOperand(
|
||||
ExternalReference::address_of_wasm_i16x8_splat_0x0001());
|
||||
Pmaddwd(dst.fp(), src.fp(), op);
|
||||
I32x4ExtAddPairwiseI16x8S(dst.fp(), src.fp(), kScratchRegister);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_u(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
I32x4ExtAddPairwiseI16x8U(dst.fp(), src.fp());
|
||||
I32x4ExtAddPairwiseI16x8U(dst.fp(), src.fp(), kScratchDoubleReg);
|
||||
}
|
||||
|
||||
namespace liftoff {
|
||||
|
Loading…
Reference in New Issue
Block a user