[wasm-simd][ia32][liftoff] Implement extended multiply

For i64x2.ext_mul and i16x8.ext_mul, we can simply call the
macro-assembler functions.

For i32x4.ext_mul, the macro-assembler requires that dst == src1 if AVX
is not supported, so we add a helper function to do that check, and make
sure dst == src1 before calling into macro-assembler.

This is the same implementation as x64 (https://crrev.com/c/2603765).

Bug: v8:11262
Change-Id: I85ebe3e033903aa1cb6a7a0840f2e93210cce65a
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2610506
Reviewed-by: Bill Budge <bbudge@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#71921}
This commit is contained in:
Zhi An Ng 2021-01-05 06:21:21 +00:00 committed by Commit Bot
parent 94f2212b4d
commit 3ecfc6ce7c

View File

@ -3542,25 +3542,29 @@ void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst,
void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i16x8.extmul_low_i8x16_s unsupported");
I16x8ExtMul(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg,
/*low=*/true, /*is_signed=*/true);
}
void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i16x8.extmul_low_i8x16_u unsupported");
I16x8ExtMul(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg,
/*low=*/true, /*is_signed=*/false);
}
void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i16x8.extmul_high_i8x16_s unsupported");
I16x8ExtMul(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg,
/*low=*/false, /*is_signed=*/true);
}
void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i16x8_extmul_high_i8x16_u unsupported");
I16x8ExtMul(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg,
/*low=*/false, /*is_signed=*/false);
}
void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,
@ -3680,28 +3684,59 @@ void LiftoffAssembler::emit_i32x4_dot_i16x8_s(LiftoffRegister dst,
this, dst, lhs, rhs);
}
namespace liftoff {
// Helper function to check for register aliasing, AVX support, and moves
// registers around before calling the actual macro-assembler function.
inline void I32x4ExtMulHelper(LiftoffAssembler* assm, XMMRegister dst,
XMMRegister src1, XMMRegister src2, bool low,
bool is_signed) {
// I32x4ExtMul requires dst == src1 if AVX is not supported.
if (CpuFeatures::IsSupported(AVX) || dst == src1) {
assm->I32x4ExtMul(dst, src1, src2, liftoff::kScratchDoubleReg, low,
is_signed);
} else if (dst != src2) {
// dst != src1 && dst != src2
assm->movaps(dst, src1);
assm->I32x4ExtMul(dst, dst, src2, liftoff::kScratchDoubleReg, low,
is_signed);
} else {
// dst == src2
// Extended multiplication is commutative,
assm->movaps(dst, src2);
assm->I32x4ExtMul(dst, dst, src1, liftoff::kScratchDoubleReg, low,
is_signed);
}
}
} // namespace liftoff
void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i32x4_extmul_low_i16x8_s unsupported");
liftoff::I32x4ExtMulHelper(this, dst.fp(), src1.fp(), src2.fp(), /*low=*/true,
/*is_signed=*/true);
}
void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i32x4_extmul_low_i16x8_u unsupported");
liftoff::I32x4ExtMulHelper(this, dst.fp(), src1.fp(), src2.fp(), /*low=*/true,
/*is_signed=*/false);
}
void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i32x4_extmul_high_i16x8_s unsupported");
liftoff::I32x4ExtMulHelper(this, dst.fp(), src1.fp(), src2.fp(),
/*low=*/false,
/*is_signed=*/true);
}
void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i32x4_extmul_high_i16x8_u unsupported");
liftoff::I32x4ExtMulHelper(this, dst.fp(), src1.fp(), src2.fp(),
/*low=*/false,
/*is_signed=*/false);
}
void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst,
@ -3831,25 +3866,29 @@ void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i64x2_extmul_low_i32x4_s unsupported");
I64x2ExtMul(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg,
/*low=*/true, /*is_signed=*/true);
}
void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i64x2_extmul_low_i32x4_u unsupported");
I64x2ExtMul(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg,
/*low=*/true, /*is_signed=*/false);
}
void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i64x2_extmul_high_i32x4_s unsupported");
I64x2ExtMul(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg,
/*low=*/false, /*is_signed=*/true);
}
void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i64x2_extmul_high_i32x4_u unsupported");
I64x2ExtMul(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg,
/*low=*/false, /*is_signed=*/false);
}
void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst,