[wasm-simd][liftoff] Implement S8x16Shuffle on x64 and ia32
Bug: v8:9909 Change-Id: I99c599ac1d872a8b4e7c154a942026b52ecb0bd5 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2219688 Commit-Queue: Zhiguo Zhou <zhiguo.zhou@intel.com> Reviewed-by: Clemens Backes <clemensb@chromium.org> Reviewed-by: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#68220}
This commit is contained in:
parent
de8763314b
commit
df2ab0f076
@ -321,6 +321,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
AVX_OP3_XO(Pcmpeqb, pcmpeqb)
|
||||
AVX_OP3_XO(Pcmpeqw, pcmpeqw)
|
||||
AVX_OP3_XO(Pcmpeqd, pcmpeqd)
|
||||
AVX_OP3_XO(Por, por)
|
||||
AVX_OP3_XO(Psubb, psubb)
|
||||
AVX_OP3_XO(Psubw, psubw)
|
||||
AVX_OP3_XO(Psubd, psubd)
|
||||
|
@ -2866,6 +2866,13 @@ void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst,
|
||||
imm_lane_idx);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_s8x16_shuffle(LiftoffRegister dst,
|
||||
LiftoffRegister lhs,
|
||||
LiftoffRegister rhs,
|
||||
const uint8_t shuffle[16]) {
|
||||
bailout(kSimd, "s8x16_shuffle");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
vdup(Neon8, liftoff::GetSimd128Register(dst), src.gp());
|
||||
|
@ -1882,6 +1882,13 @@ void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst,
|
||||
Umax(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_s8x16_shuffle(LiftoffRegister dst,
|
||||
LiftoffRegister lhs,
|
||||
LiftoffRegister rhs,
|
||||
const uint8_t shuffle[16]) {
|
||||
bailout(kSimd, "s8x16_shuffle");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
Dup(dst.fp().V16B(), src.gp().W());
|
||||
|
@ -2195,6 +2195,44 @@ void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
|
||||
}
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_s8x16_shuffle(LiftoffRegister dst,
|
||||
LiftoffRegister lhs,
|
||||
LiftoffRegister rhs,
|
||||
const uint8_t shuffle[16]) {
|
||||
LiftoffRegister tmp = GetUnusedRegister(kGpReg, {});
|
||||
// Prepare 16 byte aligned buffer for shuffle control mask.
|
||||
mov(tmp.gp(), esp);
|
||||
and_(esp, -16);
|
||||
movups(liftoff::kScratchDoubleReg, lhs.fp());
|
||||
|
||||
for (int i = 3; i >= 0; i--) {
|
||||
uint32_t mask = 0;
|
||||
for (int j = 3; j >= 0; j--) {
|
||||
uint8_t lane = shuffle[i * 4 + j];
|
||||
mask <<= 8;
|
||||
mask |= lane < kSimd128Size ? lane : 0x80;
|
||||
}
|
||||
push(Immediate(mask));
|
||||
}
|
||||
Pshufb(liftoff::kScratchDoubleReg, Operand(esp, 0));
|
||||
|
||||
for (int i = 3; i >= 0; i--) {
|
||||
uint32_t mask = 0;
|
||||
for (int j = 3; j >= 0; j--) {
|
||||
uint8_t lane = shuffle[i * 4 + j];
|
||||
mask <<= 8;
|
||||
mask |= lane >= kSimd128Size ? (lane & 0x0F) : 0x80;
|
||||
}
|
||||
push(Immediate(mask));
|
||||
}
|
||||
if (dst.fp() != rhs.fp()) {
|
||||
movups(dst.fp(), rhs.fp());
|
||||
}
|
||||
Pshufb(dst.fp(), Operand(esp, 0));
|
||||
Por(dst.fp(), liftoff::kScratchDoubleReg);
|
||||
mov(esp, tmp.gp());
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_s8x16_swizzle(LiftoffRegister dst,
|
||||
LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
|
@ -748,6 +748,9 @@ class LiftoffAssembler : public TurboAssembler {
|
||||
Register offset_reg, uint32_t offset_imm,
|
||||
LoadType type, LoadTransformationKind transform,
|
||||
uint32_t* protected_load_pc);
|
||||
inline void emit_s8x16_shuffle(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs,
|
||||
const uint8_t shuffle[16]);
|
||||
inline void emit_s8x16_swizzle(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs);
|
||||
inline void emit_i8x16_splat(LiftoffRegister dst, LiftoffRegister src);
|
||||
|
@ -2941,8 +2941,15 @@ class LiftoffCompiler {
|
||||
const Simd8x16ShuffleImmediate<validate>& imm,
|
||||
const Value& input0, const Value& input1,
|
||||
Value* result) {
|
||||
unsupported(decoder, kSimd, "simd");
|
||||
static constexpr RegClass result_rc = reg_class_for(ValueType::kS128);
|
||||
LiftoffRegister rhs = __ PopToRegister();
|
||||
LiftoffRegister lhs = __ PopToRegister(LiftoffRegList::ForRegs(rhs));
|
||||
LiftoffRegister dst = __ GetUnusedRegister(result_rc, {lhs, rhs}, {});
|
||||
|
||||
__ LiftoffAssembler::emit_s8x16_shuffle(dst, lhs, rhs, imm.shuffle);
|
||||
__ PushRegister(kWasmS128, dst);
|
||||
}
|
||||
|
||||
void Throw(FullDecoder* decoder, const ExceptionIndexImmediate<validate>&,
|
||||
const Vector<Value>& args) {
|
||||
unsupported(decoder, kExceptionHandling, "throw");
|
||||
|
@ -2240,6 +2240,44 @@ void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
|
||||
}
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_s8x16_shuffle(LiftoffRegister dst,
|
||||
LiftoffRegister lhs,
|
||||
LiftoffRegister rhs,
|
||||
const uint8_t shuffle[16]) {
|
||||
LiftoffRegister tmp_simd =
|
||||
GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(dst, lhs, rhs));
|
||||
Movups(kScratchDoubleReg, lhs.fp());
|
||||
|
||||
uint64_t mask1[2] = {};
|
||||
for (int i = 15; i >= 0; i--) {
|
||||
uint8_t lane = shuffle[i];
|
||||
int j = i >> 3;
|
||||
mask1[j] <<= 8;
|
||||
mask1[j] |= lane < kSimd128Size ? lane : 0x80;
|
||||
}
|
||||
TurboAssembler::Move(tmp_simd.fp(), mask1[0]);
|
||||
movq(kScratchRegister, mask1[1]);
|
||||
Pinsrq(tmp_simd.fp(), kScratchRegister, int8_t{1});
|
||||
Pshufb(kScratchDoubleReg, tmp_simd.fp());
|
||||
|
||||
uint64_t mask2[2] = {};
|
||||
for (int i = 15; i >= 0; i--) {
|
||||
uint8_t lane = shuffle[i];
|
||||
int j = i >> 3;
|
||||
mask2[j] <<= 8;
|
||||
mask2[j] |= lane >= kSimd128Size ? (lane & 0x0F) : 0x80;
|
||||
}
|
||||
TurboAssembler::Move(tmp_simd.fp(), mask2[0]);
|
||||
movq(kScratchRegister, mask2[1]);
|
||||
Pinsrq(tmp_simd.fp(), kScratchRegister, int8_t{1});
|
||||
|
||||
if (dst.fp() != rhs.fp()) {
|
||||
Movups(dst.fp(), rhs.fp());
|
||||
}
|
||||
Pshufb(dst.fp(), tmp_simd.fp());
|
||||
Por(dst.fp(), kScratchDoubleReg);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_s8x16_swizzle(LiftoffRegister dst,
|
||||
LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
|
Loading…
Reference in New Issue
Block a user