[wasm-simd][liftoff][ia32] Prototype load lane
Prototype load lane instructions on ia32 Liftoff. We generalize the pinsr* macro-assembler functions to take an extra input, following the 3 operand + 1 imm form of the AVX instructions. Bug: v8:10975 Change-Id: I3fa10d149b011b62edd58372148446b663f3dc3c Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2619417 Reviewed-by: Clemens Backes <clemensb@chromium.org> Reviewed-by: Bill Budge <bbudge@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#72179}
This commit is contained in:
parent
5208063b40
commit
857823fd68
@ -1936,28 +1936,40 @@ void TurboAssembler::Pextrd(Register dst, XMMRegister src, uint8_t imm8) {
|
||||
}
|
||||
|
||||
void TurboAssembler::Pinsrb(XMMRegister dst, Operand src, int8_t imm8) {
|
||||
Pinsrb(dst, dst, src, imm8);
|
||||
}
|
||||
|
||||
void TurboAssembler::Pinsrb(XMMRegister dst, XMMRegister src1, Operand src2,
|
||||
int8_t imm8) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vpinsrb(dst, dst, src, imm8);
|
||||
vpinsrb(dst, src1, src2, imm8);
|
||||
return;
|
||||
}
|
||||
if (CpuFeatures::IsSupported(SSE4_1)) {
|
||||
CpuFeatureScope sse_scope(this, SSE4_1);
|
||||
pinsrb(dst, src, imm8);
|
||||
if (dst != src1) {
|
||||
movdqu(dst, src1);
|
||||
}
|
||||
pinsrb(dst, src2, imm8);
|
||||
return;
|
||||
}
|
||||
FATAL("no AVX or SSE4.1 support");
|
||||
}
|
||||
|
||||
void TurboAssembler::Pinsrd(XMMRegister dst, Operand src, uint8_t imm8) {
|
||||
void TurboAssembler::Pinsrd(XMMRegister dst, XMMRegister src1, Operand src2,
|
||||
uint8_t imm8) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vpinsrd(dst, dst, src, imm8);
|
||||
vpinsrd(dst, src1, src2, imm8);
|
||||
return;
|
||||
}
|
||||
if (dst != src1) {
|
||||
movdqu(dst, src1);
|
||||
}
|
||||
if (CpuFeatures::IsSupported(SSE4_1)) {
|
||||
CpuFeatureScope sse_scope(this, SSE4_1);
|
||||
pinsrd(dst, src, imm8);
|
||||
pinsrd(dst, src2, imm8);
|
||||
return;
|
||||
}
|
||||
// Without AVX or SSE, we can only have 64-bit values in xmm registers.
|
||||
@ -1968,10 +1980,10 @@ void TurboAssembler::Pinsrd(XMMRegister dst, Operand src, uint8_t imm8) {
|
||||
// Write original content of {dst} to the stack.
|
||||
movsd(Operand(esp, 0), dst);
|
||||
// Overwrite the portion specified in {imm8}.
|
||||
if (src.is_reg_only()) {
|
||||
mov(Operand(esp, imm8 * kUInt32Size), src.reg());
|
||||
if (src2.is_reg_only()) {
|
||||
mov(Operand(esp, imm8 * kUInt32Size), src2.reg());
|
||||
} else {
|
||||
movss(dst, src);
|
||||
movss(dst, src2);
|
||||
movss(Operand(esp, imm8 * kUInt32Size), dst);
|
||||
}
|
||||
// Load back the full value into {dst}.
|
||||
@ -1979,13 +1991,25 @@ void TurboAssembler::Pinsrd(XMMRegister dst, Operand src, uint8_t imm8) {
|
||||
add(esp, Immediate(kDoubleSize));
|
||||
}
|
||||
|
||||
void TurboAssembler::Pinsrd(XMMRegister dst, Operand src, uint8_t imm8) {
|
||||
Pinsrd(dst, dst, src, imm8);
|
||||
}
|
||||
|
||||
void TurboAssembler::Pinsrw(XMMRegister dst, Operand src, int8_t imm8) {
|
||||
Pinsrw(dst, dst, src, imm8);
|
||||
}
|
||||
|
||||
void TurboAssembler::Pinsrw(XMMRegister dst, XMMRegister src1, Operand src2,
|
||||
int8_t imm8) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vpinsrw(dst, dst, src, imm8);
|
||||
vpinsrw(dst, src1, src2, imm8);
|
||||
return;
|
||||
} else {
|
||||
pinsrw(dst, src, imm8);
|
||||
if (dst != src1) {
|
||||
movdqu(dst, src1);
|
||||
}
|
||||
pinsrw(dst, src2, imm8);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -567,14 +567,20 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
Pinsrb(dst, Operand(src), imm8);
|
||||
}
|
||||
void Pinsrb(XMMRegister dst, Operand src, int8_t imm8);
|
||||
// Moves src1 to dst if AVX is not supported.
|
||||
void Pinsrb(XMMRegister dst, XMMRegister src1, Operand src2, int8_t imm8);
|
||||
void Pinsrd(XMMRegister dst, Register src, uint8_t imm8) {
|
||||
Pinsrd(dst, Operand(src), imm8);
|
||||
}
|
||||
void Pinsrd(XMMRegister dst, Operand src, uint8_t imm8);
|
||||
// Moves src1 to dst if AVX is not supported.
|
||||
void Pinsrd(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t imm8);
|
||||
void Pinsrw(XMMRegister dst, Register src, int8_t imm8) {
|
||||
Pinsrw(dst, Operand(src), imm8);
|
||||
}
|
||||
void Pinsrw(XMMRegister dst, Operand src, int8_t imm8);
|
||||
// Moves src1 to dst if AVX is not supported.
|
||||
void Pinsrw(XMMRegister dst, XMMRegister src1, Operand src2, int8_t imm8);
|
||||
void Vbroadcastss(XMMRegister dst, Operand src);
|
||||
void Extractps(Operand dst, XMMRegister src, uint8_t imm8);
|
||||
|
||||
|
@ -2763,7 +2763,26 @@ void LiftoffAssembler::LoadLane(LiftoffRegister dst, LiftoffRegister src,
|
||||
Register addr, Register offset_reg,
|
||||
uintptr_t offset_imm, LoadType type,
|
||||
uint8_t laneidx, uint32_t* protected_load_pc) {
|
||||
bailout(kSimd, "loadlane");
|
||||
DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
|
||||
Operand src_op{addr, offset_reg, times_1, static_cast<int32_t>(offset_imm)};
|
||||
*protected_load_pc = pc_offset();
|
||||
|
||||
MachineType mem_type = type.mem_type();
|
||||
if (mem_type == MachineType::Int8()) {
|
||||
Pinsrb(dst.fp(), src.fp(), src_op, laneidx);
|
||||
} else if (mem_type == MachineType::Int16()) {
|
||||
Pinsrw(dst.fp(), src.fp(), src_op, laneidx);
|
||||
} else if (mem_type == MachineType::Int32()) {
|
||||
Pinsrd(dst.fp(), src.fp(), src_op, laneidx);
|
||||
} else {
|
||||
DCHECK_EQ(MachineType::Int64(), mem_type);
|
||||
if (laneidx == 0) {
|
||||
Movlps(dst.fp(), src.fp(), src_op);
|
||||
} else {
|
||||
DCHECK_EQ(1, laneidx);
|
||||
Movhps(dst.fp(), src.fp(), src_op);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i8x16_shuffle(LiftoffRegister dst,
|
||||
|
Loading…
Reference in New Issue
Block a user