[wasm-simd][liftoff][ia32] Prototype load lane

Prototype load lane instructions on ia32 Liftoff.

We generalize the pinsr* macro-assembler functions to take an extra
input, following the 3 operand + 1 imm form of the AVX instructions.

Bug: v8:10975
Change-Id: I3fa10d149b011b62edd58372148446b663f3dc3c
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2619417
Reviewed-by: Clemens Backes <clemensb@chromium.org>
Reviewed-by: Bill Budge <bbudge@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#72179}
This commit is contained in:
Zhi An Ng 2021-01-15 02:29:43 +00:00 committed by Commit Bot
parent 5208063b40
commit 857823fd68
3 changed files with 60 additions and 11 deletions

View File

@ -1936,28 +1936,40 @@ void TurboAssembler::Pextrd(Register dst, XMMRegister src, uint8_t imm8) {
}
void TurboAssembler::Pinsrb(XMMRegister dst, Operand src, int8_t imm8) {
Pinsrb(dst, dst, src, imm8);
}
void TurboAssembler::Pinsrb(XMMRegister dst, XMMRegister src1, Operand src2,
int8_t imm8) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpinsrb(dst, dst, src, imm8);
vpinsrb(dst, src1, src2, imm8);
return;
}
if (CpuFeatures::IsSupported(SSE4_1)) {
CpuFeatureScope sse_scope(this, SSE4_1);
pinsrb(dst, src, imm8);
if (dst != src1) {
movdqu(dst, src1);
}
pinsrb(dst, src2, imm8);
return;
}
FATAL("no AVX or SSE4.1 support");
}
void TurboAssembler::Pinsrd(XMMRegister dst, Operand src, uint8_t imm8) {
void TurboAssembler::Pinsrd(XMMRegister dst, XMMRegister src1, Operand src2,
uint8_t imm8) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpinsrd(dst, dst, src, imm8);
vpinsrd(dst, src1, src2, imm8);
return;
}
if (dst != src1) {
movdqu(dst, src1);
}
if (CpuFeatures::IsSupported(SSE4_1)) {
CpuFeatureScope sse_scope(this, SSE4_1);
pinsrd(dst, src, imm8);
pinsrd(dst, src2, imm8);
return;
}
// Without AVX or SSE, we can only have 64-bit values in xmm registers.
@ -1968,10 +1980,10 @@ void TurboAssembler::Pinsrd(XMMRegister dst, Operand src, uint8_t imm8) {
// Write original content of {dst} to the stack.
movsd(Operand(esp, 0), dst);
// Overwrite the portion specified in {imm8}.
if (src.is_reg_only()) {
mov(Operand(esp, imm8 * kUInt32Size), src.reg());
if (src2.is_reg_only()) {
mov(Operand(esp, imm8 * kUInt32Size), src2.reg());
} else {
movss(dst, src);
movss(dst, src2);
movss(Operand(esp, imm8 * kUInt32Size), dst);
}
// Load back the full value into {dst}.
@ -1979,13 +1991,25 @@ void TurboAssembler::Pinsrd(XMMRegister dst, Operand src, uint8_t imm8) {
add(esp, Immediate(kDoubleSize));
}
void TurboAssembler::Pinsrd(XMMRegister dst, Operand src, uint8_t imm8) {
Pinsrd(dst, dst, src, imm8);
}
void TurboAssembler::Pinsrw(XMMRegister dst, Operand src, int8_t imm8) {
Pinsrw(dst, dst, src, imm8);
}
void TurboAssembler::Pinsrw(XMMRegister dst, XMMRegister src1, Operand src2,
int8_t imm8) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpinsrw(dst, dst, src, imm8);
vpinsrw(dst, src1, src2, imm8);
return;
} else {
pinsrw(dst, src, imm8);
if (dst != src1) {
movdqu(dst, src1);
}
pinsrw(dst, src2, imm8);
return;
}
}

View File

@ -567,14 +567,20 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
Pinsrb(dst, Operand(src), imm8);
}
void Pinsrb(XMMRegister dst, Operand src, int8_t imm8);
// Moves src1 to dst if AVX is not supported.
void Pinsrb(XMMRegister dst, XMMRegister src1, Operand src2, int8_t imm8);
void Pinsrd(XMMRegister dst, Register src, uint8_t imm8) {
Pinsrd(dst, Operand(src), imm8);
}
void Pinsrd(XMMRegister dst, Operand src, uint8_t imm8);
// Moves src1 to dst if AVX is not supported.
void Pinsrd(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t imm8);
void Pinsrw(XMMRegister dst, Register src, int8_t imm8) {
Pinsrw(dst, Operand(src), imm8);
}
void Pinsrw(XMMRegister dst, Operand src, int8_t imm8);
// Moves src1 to dst if AVX is not supported.
void Pinsrw(XMMRegister dst, XMMRegister src1, Operand src2, int8_t imm8);
void Vbroadcastss(XMMRegister dst, Operand src);
void Extractps(Operand dst, XMMRegister src, uint8_t imm8);

View File

@ -2763,7 +2763,26 @@ void LiftoffAssembler::LoadLane(LiftoffRegister dst, LiftoffRegister src,
Register addr, Register offset_reg,
uintptr_t offset_imm, LoadType type,
uint8_t laneidx, uint32_t* protected_load_pc) {
bailout(kSimd, "loadlane");
DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
Operand src_op{addr, offset_reg, times_1, static_cast<int32_t>(offset_imm)};
*protected_load_pc = pc_offset();
MachineType mem_type = type.mem_type();
if (mem_type == MachineType::Int8()) {
Pinsrb(dst.fp(), src.fp(), src_op, laneidx);
} else if (mem_type == MachineType::Int16()) {
Pinsrw(dst.fp(), src.fp(), src_op, laneidx);
} else if (mem_type == MachineType::Int32()) {
Pinsrd(dst.fp(), src.fp(), src_op, laneidx);
} else {
DCHECK_EQ(MachineType::Int64(), mem_type);
if (laneidx == 0) {
Movlps(dst.fp(), src.fp(), src_op);
} else {
DCHECK_EQ(1, laneidx);
Movhps(dst.fp(), src.fp(), src_op);
}
}
}
void LiftoffAssembler::emit_i8x16_shuffle(LiftoffRegister dst,