[wasm-simd] [liftoff] Implement replace_lane on x64 and ia32
Bug: v8:9909 Change-Id: Ia830b2fc00751abfb4dadb61651a252f1da48a1f Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2108299 Commit-Queue: Fanchen Kong <fanchen.kong@intel.com> Reviewed-by: Clemens Backes <clemensb@chromium.org> Reviewed-by: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#66837}
This commit is contained in:
parent
855b4945d3
commit
e5b4cb4567
@ -1566,6 +1566,13 @@ void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst,
|
||||
imm_lane_idx);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst,
|
||||
LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx) {
|
||||
bailout(kSimd, "f64x2replacelane");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
vadd(dst.low_fp(), lhs.low_fp(), rhs.low_fp());
|
||||
@ -1590,6 +1597,13 @@ void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst,
|
||||
liftoff::GetSimd128Register(lhs.low_fp()), imm_lane_idx);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst,
|
||||
LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx) {
|
||||
bailout(kSimd, "f32x4replacelane");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
vadd(liftoff::GetSimd128Register(dst.low_fp()),
|
||||
@ -1621,6 +1635,13 @@ void LiftoffAssembler::emit_i64x2_extract_lane(LiftoffRegister dst,
|
||||
imm_lane_idx * 2 + 1);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst,
|
||||
LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx) {
|
||||
bailout(kSimd, "i64x2replacelane");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
vadd(Neon64, liftoff::GetSimd128Register(dst.low_fp()),
|
||||
@ -1647,6 +1668,13 @@ void LiftoffAssembler::emit_i32x4_extract_lane(LiftoffRegister dst,
|
||||
imm_lane_idx);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst,
|
||||
LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx) {
|
||||
bailout(kSimd, "i32x4replacelane");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
vadd(Neon32, liftoff::GetSimd128Register(dst.low_fp()),
|
||||
@ -1694,6 +1722,13 @@ void LiftoffAssembler::emit_i16x8_extract_lane_s(LiftoffRegister dst,
|
||||
imm_lane_idx);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst,
|
||||
LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx) {
|
||||
bailout(kSimd, "i16x8replacelane");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
vdup(Neon8, liftoff::GetSimd128Register(dst.low_fp()), src.gp());
|
||||
@ -1713,6 +1748,13 @@ void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst,
|
||||
imm_lane_idx);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
|
||||
LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx) {
|
||||
bailout(kSimd, "i8x16replacelane");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
vadd(Neon8, liftoff::GetSimd128Register(dst.low_fp()),
|
||||
|
@ -1095,6 +1095,13 @@ void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst,
|
||||
Mov(dst.fp().D(), lhs.fp().V2D(), imm_lane_idx);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst,
|
||||
LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx) {
|
||||
bailout(kSimd, "f64x2replacelane");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
Fadd(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
|
||||
@ -1116,6 +1123,13 @@ void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst,
|
||||
Mov(dst.fp().S(), lhs.fp().V4S(), imm_lane_idx);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst,
|
||||
LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx) {
|
||||
bailout(kSimd, "f32x4replacelane");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
Fadd(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
|
||||
@ -1137,6 +1151,13 @@ void LiftoffAssembler::emit_i64x2_extract_lane(LiftoffRegister dst,
|
||||
Mov(dst.gp().X(), lhs.fp().V2D(), imm_lane_idx);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst,
|
||||
LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx) {
|
||||
bailout(kSimd, "i64x2replacelane");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
Add(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
|
||||
@ -1158,6 +1179,13 @@ void LiftoffAssembler::emit_i32x4_extract_lane(LiftoffRegister dst,
|
||||
Mov(dst.gp().W(), lhs.fp().V4S(), imm_lane_idx);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst,
|
||||
LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx) {
|
||||
bailout(kSimd, "i32x4replacelane");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
Add(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
|
||||
@ -1185,6 +1213,13 @@ void LiftoffAssembler::emit_i16x8_extract_lane_s(LiftoffRegister dst,
|
||||
Smov(dst.gp().W(), lhs.fp().V8H(), imm_lane_idx);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst,
|
||||
LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx) {
|
||||
bailout(kSimd, "i16x8replacelane");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
Add(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
|
||||
@ -1212,6 +1247,13 @@ void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst,
|
||||
Smov(dst.gp().W(), lhs.fp().V16B(), imm_lane_idx);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
|
||||
LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx) {
|
||||
bailout(kSimd, "i8x16replacelane");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
Add(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
|
||||
|
@ -1978,6 +1978,33 @@ void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst,
|
||||
}
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst,
|
||||
LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx) {
|
||||
// TODO(fanchenk): Use movlhps and blendpd
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
if (imm_lane_idx == 0) {
|
||||
vinsertps(dst.fp(), src1.fp(), src2.fp(), 0b00000000);
|
||||
vinsertps(dst.fp(), dst.fp(), src2.fp(), 0b01010000);
|
||||
} else {
|
||||
vinsertps(dst.fp(), src1.fp(), src2.fp(), 0b00100000);
|
||||
vinsertps(dst.fp(), dst.fp(), src2.fp(), 0b01110000);
|
||||
}
|
||||
} else {
|
||||
CpuFeatureScope scope(this, SSE4_1);
|
||||
if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
|
||||
if (imm_lane_idx == 0) {
|
||||
insertps(dst.fp(), src2.fp(), 0b00000000);
|
||||
insertps(dst.fp(), src2.fp(), 0b01010000);
|
||||
} else {
|
||||
insertps(dst.fp(), src2.fp(), 0b00100000);
|
||||
insertps(dst.fp(), src2.fp(), 0b01110000);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
liftoff::EmitSimdCommutativeBinOp<&Assembler::vaddpd, &Assembler::addpd>(
|
||||
@ -2015,6 +2042,20 @@ void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst,
|
||||
}
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst,
|
||||
LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vinsertps(dst.fp(), src1.fp(), src2.fp(), (imm_lane_idx << 4) & 0x30);
|
||||
} else {
|
||||
CpuFeatureScope scope(this, SSE4_1);
|
||||
if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
|
||||
insertps(dst.fp(), src2.fp(), (imm_lane_idx << 4) & 0x30);
|
||||
}
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
liftoff::EmitSimdCommutativeBinOp<&Assembler::vaddps, &Assembler::addps>(
|
||||
@ -2041,6 +2082,22 @@ void LiftoffAssembler::emit_i64x2_extract_lane(LiftoffRegister dst,
|
||||
Pextrd(dst.high_gp(), lhs.fp(), imm_lane_idx * 2 + 1);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst,
|
||||
LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vpinsrd(dst.fp(), src1.fp(), src2.low_gp(), imm_lane_idx * 2);
|
||||
vpinsrd(dst.fp(), dst.fp(), src2.high_gp(), imm_lane_idx * 2 + 1);
|
||||
} else {
|
||||
CpuFeatureScope scope(this, SSE4_1);
|
||||
if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
|
||||
pinsrd(dst.fp(), src2.low_gp(), imm_lane_idx * 2);
|
||||
pinsrd(dst.fp(), src2.high_gp(), imm_lane_idx * 2 + 1);
|
||||
}
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddq, &Assembler::paddq>(
|
||||
@ -2065,6 +2122,20 @@ void LiftoffAssembler::emit_i32x4_extract_lane(LiftoffRegister dst,
|
||||
Pextrd(dst.gp(), lhs.fp(), imm_lane_idx);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst,
|
||||
LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vpinsrd(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
|
||||
} else {
|
||||
CpuFeatureScope scope(this, SSE4_1);
|
||||
if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
|
||||
pinsrd(dst.fp(), src2.gp(), imm_lane_idx);
|
||||
}
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddd, &Assembler::paddd>(
|
||||
@ -2097,6 +2168,19 @@ void LiftoffAssembler::emit_i16x8_extract_lane_s(LiftoffRegister dst,
|
||||
movsx_w(dst.gp(), dst.gp());
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst,
|
||||
LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vpinsrw(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
|
||||
} else {
|
||||
if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
|
||||
pinsrw(dst.fp(), src2.gp(), imm_lane_idx);
|
||||
}
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddw, &Assembler::paddw>(
|
||||
@ -2129,6 +2213,20 @@ void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst,
|
||||
movsx_b(dst.gp(), dst.gp());
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
|
||||
LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vpinsrb(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
|
||||
} else {
|
||||
CpuFeatureScope scope(this, SSE4_1);
|
||||
if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
|
||||
pinsrb(dst.fp(), src2.gp(), imm_lane_idx);
|
||||
}
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddb, &Assembler::paddb>(
|
||||
|
@ -714,6 +714,9 @@ class LiftoffAssembler : public TurboAssembler {
|
||||
inline void emit_f64x2_splat(LiftoffRegister dst, LiftoffRegister src);
|
||||
inline void emit_f64x2_extract_lane(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
uint8_t imm_lane_idx);
|
||||
inline void emit_f64x2_replace_lane(LiftoffRegister dst, LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx);
|
||||
inline void emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs);
|
||||
inline void emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
@ -721,6 +724,9 @@ class LiftoffAssembler : public TurboAssembler {
|
||||
inline void emit_f32x4_splat(LiftoffRegister dst, LiftoffRegister src);
|
||||
inline void emit_f32x4_extract_lane(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
uint8_t imm_lane_idx);
|
||||
inline void emit_f32x4_replace_lane(LiftoffRegister dst, LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx);
|
||||
inline void emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs);
|
||||
inline void emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
@ -728,6 +734,9 @@ class LiftoffAssembler : public TurboAssembler {
|
||||
inline void emit_i64x2_splat(LiftoffRegister dst, LiftoffRegister src);
|
||||
inline void emit_i64x2_extract_lane(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
uint8_t imm_lane_idx);
|
||||
inline void emit_i64x2_replace_lane(LiftoffRegister dst, LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx);
|
||||
inline void emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs);
|
||||
inline void emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
@ -735,6 +744,9 @@ class LiftoffAssembler : public TurboAssembler {
|
||||
inline void emit_i32x4_splat(LiftoffRegister dst, LiftoffRegister src);
|
||||
inline void emit_i32x4_extract_lane(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
uint8_t imm_lane_idx);
|
||||
inline void emit_i32x4_replace_lane(LiftoffRegister dst, LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx);
|
||||
inline void emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs);
|
||||
inline void emit_i32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
@ -746,6 +758,9 @@ class LiftoffAssembler : public TurboAssembler {
|
||||
inline void emit_i16x8_extract_lane_s(LiftoffRegister dst,
|
||||
LiftoffRegister lhs,
|
||||
uint8_t imm_lane_idx);
|
||||
inline void emit_i16x8_replace_lane(LiftoffRegister dst, LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx);
|
||||
inline void emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs);
|
||||
inline void emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
@ -757,6 +772,9 @@ class LiftoffAssembler : public TurboAssembler {
|
||||
inline void emit_i8x16_extract_lane_s(LiftoffRegister dst,
|
||||
LiftoffRegister lhs,
|
||||
uint8_t imm_lane_idx);
|
||||
inline void emit_i8x16_replace_lane(LiftoffRegister dst, LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx);
|
||||
inline void emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs);
|
||||
inline void emit_i8x16_sub(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
|
@ -2412,6 +2412,26 @@ class LiftoffCompiler {
|
||||
__ PushRegister(ValueType(result_type), dst);
|
||||
}
|
||||
|
||||
template <ValueType::Kind src2_type, typename EmitFn>
|
||||
void EmitSimdReplaceLaneOp(EmitFn fn,
|
||||
const SimdLaneImmediate<validate>& imm) {
|
||||
static constexpr RegClass src1_rc = reg_class_for(ValueType::kS128);
|
||||
static constexpr RegClass src2_rc = reg_class_for(src2_type);
|
||||
static constexpr RegClass result_rc = reg_class_for(ValueType::kS128);
|
||||
LiftoffRegister src2 = __ PopToRegister();
|
||||
LiftoffRegister src1 = src1_rc == src2_rc
|
||||
? __ PopToRegister(LiftoffRegList::ForRegs(src2))
|
||||
: __
|
||||
PopToRegister();
|
||||
LiftoffRegister dst =
|
||||
src2_rc == result_rc
|
||||
? __ GetUnusedRegister(result_rc, {src1},
|
||||
LiftoffRegList::ForRegs(src2))
|
||||
: __ GetUnusedRegister(result_rc, {src1});
|
||||
fn(dst, src1, src2, imm.lane);
|
||||
__ PushRegister(kWasmS128, dst);
|
||||
}
|
||||
|
||||
void SimdLaneOp(FullDecoder* decoder, WasmOpcode opcode,
|
||||
const SimdLaneImmediate<validate>& imm,
|
||||
const Vector<Value> inputs, Value* result) {
|
||||
@ -2436,6 +2456,22 @@ class LiftoffCompiler {
|
||||
CASE_SIMD_EXTRACT_LANE_OP(I8x16ExtractLaneU, I32, i8x16_extract_lane_u)
|
||||
CASE_SIMD_EXTRACT_LANE_OP(I8x16ExtractLaneS, I32, i8x16_extract_lane_s)
|
||||
#undef CASE_SIMD_EXTRACT_LANE_OP
|
||||
#define CASE_SIMD_REPLACE_LANE_OP(opcode, type, fn) \
|
||||
case wasm::kExpr##opcode: \
|
||||
EmitSimdReplaceLaneOp<ValueType::k##type>( \
|
||||
[=](LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2, \
|
||||
uint8_t imm_lane_idx) { \
|
||||
__ emit_##fn(dst, src1, src2, imm_lane_idx); \
|
||||
}, \
|
||||
imm); \
|
||||
break;
|
||||
CASE_SIMD_REPLACE_LANE_OP(F64x2ReplaceLane, F64, f64x2_replace_lane)
|
||||
CASE_SIMD_REPLACE_LANE_OP(F32x4ReplaceLane, F32, f32x4_replace_lane)
|
||||
CASE_SIMD_REPLACE_LANE_OP(I64x2ReplaceLane, I64, i64x2_replace_lane)
|
||||
CASE_SIMD_REPLACE_LANE_OP(I32x4ReplaceLane, I32, i32x4_replace_lane)
|
||||
CASE_SIMD_REPLACE_LANE_OP(I16x8ReplaceLane, I32, i16x8_replace_lane)
|
||||
CASE_SIMD_REPLACE_LANE_OP(I8x16ReplaceLane, I32, i8x16_replace_lane)
|
||||
#undef CASE_SIMD_REPLACE_LANE_OP
|
||||
default:
|
||||
unsupported(decoder, kSimd, "simd");
|
||||
}
|
||||
|
@ -1923,6 +1923,28 @@ void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst,
|
||||
Movq(dst.fp(), kScratchRegister);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst,
|
||||
LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
if (imm_lane_idx == 0) {
|
||||
vpblendw(dst.fp(), src1.fp(), src2.fp(), 0b00001111);
|
||||
} else {
|
||||
vmovlhps(dst.fp(), src1.fp(), src2.fp());
|
||||
}
|
||||
} else {
|
||||
CpuFeatureScope scope(this, SSE4_1);
|
||||
if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
|
||||
if (imm_lane_idx == 0) {
|
||||
pblendw(dst.fp(), src2.fp(), 0b00001111);
|
||||
} else {
|
||||
movlhps(dst.fp(), src2.fp());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
liftoff::EmitSimdCommutativeBinOp<&Assembler::vaddpd, &Assembler::addpd>(
|
||||
@ -1955,6 +1977,20 @@ void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst,
|
||||
}
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst,
|
||||
LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vinsertps(dst.fp(), src1.fp(), src2.fp(), (imm_lane_idx << 4) & 0x30);
|
||||
} else {
|
||||
CpuFeatureScope scope(this, SSE4_1);
|
||||
if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
|
||||
insertps(dst.fp(), src2.fp(), (imm_lane_idx << 4) & 0x30);
|
||||
}
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
liftoff::EmitSimdCommutativeBinOp<&Assembler::vaddps, &Assembler::addps>(
|
||||
@ -1979,6 +2015,20 @@ void LiftoffAssembler::emit_i64x2_extract_lane(LiftoffRegister dst,
|
||||
Pextrq(dst.gp(), lhs.fp(), static_cast<int8_t>(imm_lane_idx));
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst,
|
||||
LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vpinsrq(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
|
||||
} else {
|
||||
CpuFeatureScope scope(this, SSE4_1);
|
||||
if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
|
||||
pinsrq(dst.fp(), src2.gp(), imm_lane_idx);
|
||||
}
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddq, &Assembler::paddq>(
|
||||
@ -2003,6 +2053,20 @@ void LiftoffAssembler::emit_i32x4_extract_lane(LiftoffRegister dst,
|
||||
Pextrd(dst.gp(), lhs.fp(), imm_lane_idx);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst,
|
||||
LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vpinsrd(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
|
||||
} else {
|
||||
CpuFeatureScope scope(this, SSE4_1);
|
||||
if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
|
||||
pinsrd(dst.fp(), src2.gp(), imm_lane_idx);
|
||||
}
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddd, &Assembler::paddd>(
|
||||
@ -2035,6 +2099,19 @@ void LiftoffAssembler::emit_i16x8_extract_lane_s(LiftoffRegister dst,
|
||||
movsxwl(dst.gp(), dst.gp());
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst,
|
||||
LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vpinsrw(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
|
||||
} else {
|
||||
if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
|
||||
pinsrw(dst.fp(), src2.gp(), imm_lane_idx);
|
||||
}
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddw, &Assembler::paddw>(
|
||||
@ -2067,6 +2144,20 @@ void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst,
|
||||
movsxbl(dst.gp(), dst.gp());
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
|
||||
LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vpinsrb(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
|
||||
} else {
|
||||
CpuFeatureScope scope(this, SSE4_1);
|
||||
if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
|
||||
pinsrb(dst.fp(), src2.gp(), imm_lane_idx);
|
||||
}
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddb, &Assembler::paddb>(
|
||||
|
Loading…
Reference in New Issue
Block a user