[wasm-simd][arm64][liftoff] Implement extended multiply

The codegen is the same as on TurboFan, using Smull, Umull, Smull2, and
Umull2. The rest of the changes are adding skeleton functions to the
different archs, which bailout for now. Actual codegen will come in
future patches.

Bug: v8:11262
Change-Id: I06f0b018b85b5cca29382ab730510959e1a81ab6
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2589064
Reviewed-by: Clemens Backes <clemensb@chromium.org>
Reviewed-by: Deepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#71859}
This commit is contained in:
Zhi An Ng 2020-12-14 08:03:40 +00:00 committed by Commit Bot
parent 20feaf9a00
commit 1215f2a83b
6 changed files with 360 additions and 0 deletions

View File

@ -2851,6 +2851,30 @@ void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
vmlal(NeonU32, dst_neon, tmp1.low(), tmp2.low());
}
void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i64x2_extmul_low_i32x4_s unsupported");
}
void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i64x2_extmul_low_i32x4_u unsupported");
}
void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i64x2_extmul_high_i32x4_s unsupported");
}
void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i64x2_extmul_high_i32x4_u unsupported");
}
void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
LiftoffRegister src) {
vdup(Neon32, liftoff::GetSimd128Register(dst), src.gp());
@ -3016,6 +3040,30 @@ void LiftoffAssembler::emit_i32x4_dot_i16x8_s(LiftoffRegister dst,
vpadd(Neon32, dest.high(), scratch.low(), scratch.high());
}
void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i32x4_extmul_low_i16x8_s unsupported");
}
void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i32x4_extmul_low_i16x8_u unsupported");
}
void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i32x4_extmul_high_i16x8_s unsupported");
}
void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i32x4_extmul_high_i16x8_u unsupported");
}
void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
LiftoffRegister src) {
vdup(Neon16, liftoff::GetSimd128Register(dst), src.gp());
@ -3201,6 +3249,30 @@ void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst,
imm_lane_idx);
}
void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i16x8.extmul_low_i8x16_s unsupported");
}
void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i16x8.extmul_low_i8x16_u unsupported");
}
void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i16x8.extmul_high_i8x16_s unsupported");
}
void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i16x8_extmul_high_i8x16_u unsupported");
}
void LiftoffAssembler::emit_i8x16_shuffle(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs,

View File

@ -1981,6 +1981,30 @@ void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
Add(dst.fp().V2D(), dst.fp().V2D(), tmp1.V2D());
}
void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
Smull(dst.fp().V2D(), src1.fp().V2S(), src2.fp().V2S());
}
void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
Umull(dst.fp().V2D(), src1.fp().V2S(), src2.fp().V2S());
}
void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
Smull2(dst.fp().V2D(), src1.fp().V4S(), src2.fp().V4S());
}
void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
Umull2(dst.fp().V2D(), src1.fp().V4S(), src2.fp().V4S());
}
void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
LiftoffRegister src) {
Dup(dst.fp().V4S(), src.gp().W());
@ -2122,6 +2146,30 @@ void LiftoffAssembler::emit_i32x4_dot_i16x8_s(LiftoffRegister dst,
Addp(dst.fp().V4S(), tmp1, tmp2);
}
void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
Smull(dst.fp().V4S(), src1.fp().V4H(), src2.fp().V4H());
}
void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
Umull(dst.fp().V4S(), src1.fp().V4H(), src2.fp().V4H());
}
void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
Smull2(dst.fp().V4S(), src1.fp().V8H(), src2.fp().V8H());
}
void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
Umull2(dst.fp().V4S(), src1.fp().V8H(), src2.fp().V8H());
}
void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
LiftoffRegister src) {
Dup(dst.fp().V8H(), src.gp().W());
@ -2800,6 +2848,30 @@ void LiftoffAssembler::emit_i16x8_abs(LiftoffRegister dst,
Abs(dst.fp().V8H(), src.fp().V8H());
}
void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
Smull(dst.fp().V8H(), src1.fp().V8B(), src2.fp().V8B());
}
void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
Umull(dst.fp().V8H(), src1.fp().V8B(), src2.fp().V8B());
}
void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
Smull2(dst.fp().V8H(), src1.fp().V16B(), src2.fp().V16B());
}
void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
Umull2(dst.fp().V8H(), src1.fp().V16B(), src2.fp().V16B());
}
void LiftoffAssembler::emit_i32x4_abs(LiftoffRegister dst,
LiftoffRegister src) {
Abs(dst.fp().V4S(), src.fp().V4S());

View File

@ -3539,6 +3539,30 @@ void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst,
this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
}
void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i16x8.extmul_low_i8x16_s unsupported");
}
void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i16x8.extmul_low_i8x16_u unsupported");
}
void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i16x8.extmul_high_i8x16_s unsupported");
}
void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i16x8_extmul_high_i8x16_u unsupported");
}
void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,
LiftoffRegister src) {
if (dst.fp() == src.fp()) {
@ -3656,6 +3680,30 @@ void LiftoffAssembler::emit_i32x4_dot_i16x8_s(LiftoffRegister dst,
this, dst, lhs, rhs);
}
void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i32x4_extmul_low_i16x8_s unsupported");
}
void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i32x4_extmul_low_i16x8_u unsupported");
}
void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i32x4_extmul_high_i16x8_s unsupported");
}
void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i32x4_extmul_high_i16x8_u unsupported");
}
void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst,
LiftoffRegister src) {
DoubleRegister reg =
@ -3780,6 +3828,30 @@ void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
Paddq(dst.fp(), dst.fp(), tmp2.fp());
}
void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i64x2_extmul_low_i32x4_s unsupported");
}
void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i64x2_extmul_low_i32x4_u unsupported");
}
void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i64x2_extmul_high_i32x4_s unsupported");
}
void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i64x2_extmul_high_i32x4_u unsupported");
}
void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst,
LiftoffRegister src) {
if (dst.fp() == src.fp()) {

View File

@ -1020,6 +1020,18 @@ class LiftoffAssembler : public TurboAssembler {
LiftoffRegister rhs);
inline void emit_i16x8_max_u(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2);
inline void emit_i16x8_extmul_low_i8x16_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2);
inline void emit_i16x8_extmul_high_i8x16_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2);
inline void emit_i16x8_extmul_high_i8x16_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2);
inline void emit_i32x4_neg(LiftoffRegister dst, LiftoffRegister src);
inline void emit_v32x4_anytrue(LiftoffRegister dst, LiftoffRegister src);
inline void emit_v32x4_alltrue(LiftoffRegister dst, LiftoffRegister src);
@ -1052,6 +1064,18 @@ class LiftoffAssembler : public TurboAssembler {
LiftoffRegister rhs);
inline void emit_i32x4_dot_i16x8_s(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i32x4_extmul_low_i16x8_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2);
inline void emit_i32x4_extmul_low_i16x8_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2);
inline void emit_i32x4_extmul_high_i16x8_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2);
inline void emit_i32x4_extmul_high_i16x8_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2);
inline void emit_i64x2_neg(LiftoffRegister dst, LiftoffRegister src);
inline void emit_i64x2_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
@ -1071,6 +1095,18 @@ class LiftoffAssembler : public TurboAssembler {
LiftoffRegister rhs);
inline void emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2);
inline void emit_i64x2_extmul_low_i32x4_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2);
inline void emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2);
inline void emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2);
inline void emit_f32x4_abs(LiftoffRegister dst, LiftoffRegister src);
inline void emit_f32x4_neg(LiftoffRegister dst, LiftoffRegister src);
inline void emit_f32x4_sqrt(LiftoffRegister dst, LiftoffRegister src);

View File

@ -2876,6 +2876,18 @@ class LiftoffCompiler {
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i16x8_max_s);
case wasm::kExprI16x8MaxU:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i16x8_max_u);
case wasm::kExprI16x8ExtMulLowI8x16S:
return EmitBinOp<kS128, kS128>(
&LiftoffAssembler::emit_i16x8_extmul_low_i8x16_s);
case wasm::kExprI16x8ExtMulLowI8x16U:
return EmitBinOp<kS128, kS128>(
&LiftoffAssembler::emit_i16x8_extmul_low_i8x16_u);
case wasm::kExprI16x8ExtMulHighI8x16S:
return EmitBinOp<kS128, kS128>(
&LiftoffAssembler::emit_i16x8_extmul_high_i8x16_s);
case wasm::kExprI16x8ExtMulHighI8x16U:
return EmitBinOp<kS128, kS128>(
&LiftoffAssembler::emit_i16x8_extmul_high_i8x16_u);
case wasm::kExprI32x4Neg:
return EmitUnOp<kS128, kS128>(&LiftoffAssembler::emit_i32x4_neg);
case wasm::kExprV32x4AnyTrue:
@ -2910,6 +2922,18 @@ class LiftoffCompiler {
case wasm::kExprI32x4DotI16x8S:
return EmitBinOp<kS128, kS128>(
&LiftoffAssembler::emit_i32x4_dot_i16x8_s);
case wasm::kExprI32x4ExtMulLowI16x8S:
return EmitBinOp<kS128, kS128>(
&LiftoffAssembler::emit_i32x4_extmul_low_i16x8_s);
case wasm::kExprI32x4ExtMulLowI16x8U:
return EmitBinOp<kS128, kS128>(
&LiftoffAssembler::emit_i32x4_extmul_low_i16x8_u);
case wasm::kExprI32x4ExtMulHighI16x8S:
return EmitBinOp<kS128, kS128>(
&LiftoffAssembler::emit_i32x4_extmul_high_i16x8_s);
case wasm::kExprI32x4ExtMulHighI16x8U:
return EmitBinOp<kS128, kS128>(
&LiftoffAssembler::emit_i32x4_extmul_high_i16x8_u);
case wasm::kExprI64x2Neg:
return EmitUnOp<kS128, kS128>(&LiftoffAssembler::emit_i64x2_neg);
case wasm::kExprI64x2Shl:
@ -2927,6 +2951,18 @@ class LiftoffCompiler {
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i64x2_sub);
case wasm::kExprI64x2Mul:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i64x2_mul);
case wasm::kExprI64x2ExtMulLowI32x4S:
return EmitBinOp<kS128, kS128>(
&LiftoffAssembler::emit_i64x2_extmul_low_i32x4_s);
case wasm::kExprI64x2ExtMulLowI32x4U:
return EmitBinOp<kS128, kS128>(
&LiftoffAssembler::emit_i64x2_extmul_low_i32x4_u);
case wasm::kExprI64x2ExtMulHighI32x4S:
return EmitBinOp<kS128, kS128>(
&LiftoffAssembler::emit_i64x2_extmul_high_i32x4_s);
case wasm::kExprI64x2ExtMulHighI32x4U:
return EmitBinOp<kS128, kS128>(
&LiftoffAssembler::emit_i64x2_extmul_high_i32x4_u);
case wasm::kExprF32x4Abs:
return EmitUnOp<kS128, kS128>(&LiftoffAssembler::emit_f32x4_abs);
case wasm::kExprF32x4Neg:

View File

@ -3156,6 +3156,30 @@ void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst,
this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
}
void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i16x8.extmul_low_i8x16_s unsupported");
}
void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i16x8.extmul_low_i8x16_u unsupported");
}
void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i16x8.extmul_high_i8x16_s unsupported");
}
void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i16x8_extmul_high_i8x16_u unsupported");
}
void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,
LiftoffRegister src) {
if (dst.fp() == src.fp()) {
@ -3273,6 +3297,30 @@ void LiftoffAssembler::emit_i32x4_dot_i16x8_s(LiftoffRegister dst,
this, dst, lhs, rhs);
}
void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i32x4_extmul_low_i16x8_s unsupported");
}
void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i32x4_extmul_low_i16x8_u unsupported");
}
void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i32x4_extmul_high_i16x8_s unsupported");
}
void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i32x4_extmul_high_i16x8_u unsupported");
}
void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst,
LiftoffRegister src) {
DoubleRegister reg = dst.fp() == src.fp() ? kScratchDoubleReg : dst.fp();
@ -3362,6 +3410,30 @@ void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
Paddq(dst.fp(), tmp2.fp());
}
void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i64x2_extmul_low_i32x4_s unsupported");
}
void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i64x2_extmul_low_i32x4_u unsupported");
}
void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i64x2_extmul_high_i32x4_s unsupported");
}
void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i64x2_extmul_high_i32x4_u unsupported");
}
void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst,
LiftoffRegister src) {
if (dst.fp() == src.fp()) {