[ia32][wasm] Support AVX instructions for I32x4Splat/ReplaceLane/ExtractLane
Add vpinsrd, vpextrd, vmovd. Supplement pshufd and add vpshufd. Add Movd and Pshufd macro. Add AVX support to Pextrd. Fix disassembler to recognize "pinsrd/pextrd xmm,[mem],imm8". BUG= Review-Url: https://codereview.chromium.org/2916093002 Cr-Commit-Position: refs/heads/master@{#45833}
This commit is contained in:
parent
dc3de67047
commit
6d18ae2589
@ -1892,16 +1892,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
}
|
||||
case kIA32I32x4Splat: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
__ movd(dst, i.InputOperand(0));
|
||||
__ pshufd(dst, dst, 0x0);
|
||||
__ Movd(dst, i.InputOperand(0));
|
||||
__ Pshufd(dst, dst, 0x0);
|
||||
break;
|
||||
}
|
||||
case kIA32I32x4ExtractLane: {
|
||||
__ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
|
||||
break;
|
||||
}
|
||||
case kIA32I32x4ReplaceLane: {
|
||||
__ Pinsrd(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
|
||||
case kSSEI32x4ReplaceLane: {
|
||||
CpuFeatureScope sse_scope(masm(), SSE4_1);
|
||||
__ pinsrd(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
|
||||
break;
|
||||
}
|
||||
case kSSEI32x4Add: {
|
||||
@ -1912,6 +1913,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ psubd(i.OutputSimd128Register(), i.InputOperand(1));
|
||||
break;
|
||||
}
|
||||
case kAVXI32x4ReplaceLane: {
|
||||
CpuFeatureScope avx_scope(masm(), AVX);
|
||||
__ vpinsrd(i.OutputSimd128Register(), i.InputSimd128Register(0),
|
||||
i.InputOperand(2), i.InputInt8(1));
|
||||
break;
|
||||
}
|
||||
case kAVXI32x4Add: {
|
||||
CpuFeatureScope avx_scope(masm(), AVX);
|
||||
__ vpaddd(i.OutputSimd128Register(), i.InputSimd128Register(0),
|
||||
|
@ -113,9 +113,10 @@ namespace compiler {
|
||||
V(IA32StackCheck) \
|
||||
V(IA32I32x4Splat) \
|
||||
V(IA32I32x4ExtractLane) \
|
||||
V(IA32I32x4ReplaceLane) \
|
||||
V(SSEI32x4ReplaceLane) \
|
||||
V(SSEI32x4Add) \
|
||||
V(SSEI32x4Sub) \
|
||||
V(AVXI32x4ReplaceLane) \
|
||||
V(AVXI32x4Add) \
|
||||
V(AVXI32x4Sub)
|
||||
|
||||
|
@ -99,9 +99,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kIA32BitcastIF:
|
||||
case kIA32I32x4Splat:
|
||||
case kIA32I32x4ExtractLane:
|
||||
case kIA32I32x4ReplaceLane:
|
||||
case kSSEI32x4ReplaceLane:
|
||||
case kSSEI32x4Add:
|
||||
case kSSEI32x4Sub:
|
||||
case kAVXI32x4ReplaceLane:
|
||||
case kAVXI32x4Add:
|
||||
case kAVXI32x4Sub:
|
||||
return (instr->addressing_mode() == kMode_None)
|
||||
|
@ -1906,10 +1906,16 @@ void InstructionSelector::VisitI32x4ExtractLane(Node* node) {
|
||||
|
||||
void InstructionSelector::VisitI32x4ReplaceLane(Node* node) {
|
||||
IA32OperandGenerator g(this);
|
||||
int32_t lane = OpParameter<int32_t>(node);
|
||||
Emit(kIA32I32x4ReplaceLane, g.DefineSameAsFirst(node),
|
||||
g.UseRegister(node->InputAt(0)), g.UseImmediate(lane),
|
||||
g.Use(node->InputAt(1)));
|
||||
InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
|
||||
InstructionOperand operand1 = g.UseImmediate(OpParameter<int32_t>(node));
|
||||
InstructionOperand operand2 = g.Use(node->InputAt(1));
|
||||
if (IsSupported(AVX)) {
|
||||
Emit(kAVXI32x4ReplaceLane, g.DefineAsRegister(node), operand0, operand1,
|
||||
operand2);
|
||||
} else {
|
||||
Emit(kSSEI32x4ReplaceLane, g.DefineSameAsFirst(node), operand0, operand1,
|
||||
operand2);
|
||||
}
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitInt32AbsWithOverflow(Node* node) {
|
||||
|
@ -2683,8 +2683,7 @@ void Assembler::psrlq(XMMRegister dst, XMMRegister src) {
|
||||
emit_sse_operand(dst, src);
|
||||
}
|
||||
|
||||
|
||||
void Assembler::pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
|
||||
void Assembler::pshufd(XMMRegister dst, const Operand& src, uint8_t shuffle) {
|
||||
EnsureSpace ensure_space(this);
|
||||
EMIT(0x66);
|
||||
EMIT(0x0F);
|
||||
@ -2884,6 +2883,22 @@ void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int8_t imm8) {
|
||||
EMIT(imm8);
|
||||
}
|
||||
|
||||
void Assembler::vpshufd(XMMRegister dst, const Operand& src, uint8_t shuffle) {
|
||||
vinstr(0x70, dst, xmm0, src, k66, k0F, kWIG);
|
||||
EMIT(shuffle);
|
||||
}
|
||||
|
||||
void Assembler::vpextrd(const Operand& dst, XMMRegister src, int8_t offset) {
|
||||
vinstr(0x16, src, xmm0, dst, k66, k0F3A, kWIG);
|
||||
EMIT(offset);
|
||||
}
|
||||
|
||||
void Assembler::vpinsrd(XMMRegister dst, XMMRegister src1, const Operand& src2,
|
||||
int8_t offset) {
|
||||
vinstr(0x22, dst, src1, src2, k66, k0F3A, kWIG);
|
||||
EMIT(offset);
|
||||
}
|
||||
|
||||
void Assembler::bmi1(byte op, Register reg, Register vreg, const Operand& rm) {
|
||||
DCHECK(IsEnabled(BMI1));
|
||||
EnsureSpace ensure_space(this);
|
||||
|
@ -1127,7 +1127,11 @@ class Assembler : public AssemblerBase {
|
||||
void psllq(XMMRegister dst, XMMRegister src);
|
||||
void psrlq(XMMRegister reg, int8_t shift);
|
||||
void psrlq(XMMRegister dst, XMMRegister src);
|
||||
void pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle);
|
||||
|
||||
void pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
|
||||
pshufd(dst, Operand(src), shuffle);
|
||||
}
|
||||
void pshufd(XMMRegister dst, const Operand& src, uint8_t shuffle);
|
||||
void pextrd(Register dst, XMMRegister src, int8_t offset) {
|
||||
pextrd(Operand(dst), src, offset);
|
||||
}
|
||||
@ -1384,6 +1388,21 @@ class Assembler : public AssemblerBase {
|
||||
void vpsraw(XMMRegister dst, XMMRegister src, int8_t imm8);
|
||||
void vpsrad(XMMRegister dst, XMMRegister src, int8_t imm8);
|
||||
|
||||
void vpshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
|
||||
vpshufd(dst, Operand(src), shuffle);
|
||||
}
|
||||
void vpshufd(XMMRegister dst, const Operand& src, uint8_t shuffle);
|
||||
void vpextrd(Register dst, XMMRegister src, int8_t offset) {
|
||||
vpextrd(Operand(dst), src, offset);
|
||||
}
|
||||
void vpextrd(const Operand& dst, XMMRegister src, int8_t offset);
|
||||
void vpinsrd(XMMRegister dst, XMMRegister src1, Register src2,
|
||||
int8_t offset) {
|
||||
vpinsrd(dst, src1, Operand(src2), offset);
|
||||
}
|
||||
void vpinsrd(XMMRegister dst, XMMRegister src1, const Operand& src2,
|
||||
int8_t offset);
|
||||
|
||||
void vcvtdq2ps(XMMRegister dst, XMMRegister src) {
|
||||
vcvtdq2ps(dst, Operand(src));
|
||||
}
|
||||
@ -1397,6 +1416,15 @@ class Assembler : public AssemblerBase {
|
||||
vinstr(0x5B, dst, xmm0, src, kF3, k0F, kWIG);
|
||||
}
|
||||
|
||||
void vmovd(XMMRegister dst, Register src) { vmovd(dst, Operand(src)); }
|
||||
void vmovd(XMMRegister dst, const Operand& src) {
|
||||
vinstr(0x6E, dst, xmm0, src, k66, k0F, kWIG);
|
||||
}
|
||||
void vmovd(Register dst, XMMRegister src) { movd(Operand(dst), src); }
|
||||
void vmovd(const Operand& dst, XMMRegister src) {
|
||||
vinstr(0x7E, src, xmm0, dst, k66, k0F, kWIG);
|
||||
}
|
||||
|
||||
// BMI instruction
|
||||
void andn(Register dst, Register src1, Register src2) {
|
||||
andn(dst, src1, Operand(src2));
|
||||
|
@ -817,6 +817,27 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
|
||||
default:
|
||||
UnimplementedInstruction();
|
||||
}
|
||||
} else if (vex_66() && vex_0f3a()) {
|
||||
int mod, regop, rm, vvvv = vex_vreg();
|
||||
get_modrm(*current, &mod, ®op, &rm);
|
||||
switch (opcode) {
|
||||
case 0x16:
|
||||
AppendToBuffer("vpextrd ");
|
||||
current += PrintRightOperand(current);
|
||||
AppendToBuffer(",%s,%d", NameOfXMMRegister(regop),
|
||||
*reinterpret_cast<int8_t*>(current));
|
||||
current++;
|
||||
break;
|
||||
case 0x22:
|
||||
AppendToBuffer("vpinsrd %s,%s,", NameOfXMMRegister(regop),
|
||||
NameOfXMMRegister(vvvv));
|
||||
current += PrintRightOperand(current);
|
||||
AppendToBuffer(",%d", *reinterpret_cast<int8_t*>(current));
|
||||
current++;
|
||||
break;
|
||||
default:
|
||||
UnimplementedInstruction();
|
||||
}
|
||||
} else if (vex_f2() && vex_0f()) {
|
||||
int mod, regop, rm, vvvv = vex_vreg();
|
||||
get_modrm(*current, &mod, ®op, &rm);
|
||||
@ -1101,6 +1122,16 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
|
||||
NameOfXMMRegister(vvvv));
|
||||
current += PrintRightXMMOperand(current);
|
||||
break;
|
||||
case 0x6E:
|
||||
AppendToBuffer("vmovd %s,", NameOfXMMRegister(regop));
|
||||
current += PrintRightOperand(current);
|
||||
break;
|
||||
case 0x70:
|
||||
AppendToBuffer("vpshufd %s,", NameOfXMMRegister(regop));
|
||||
current += PrintRightXMMOperand(current);
|
||||
AppendToBuffer(",%d", *reinterpret_cast<int8_t*>(current));
|
||||
current++;
|
||||
break;
|
||||
case 0x71:
|
||||
AppendToBuffer("vps%sw %s,%s", sf_str[regop / 2],
|
||||
NameOfXMMRegister(vvvv), NameOfXMMRegister(rm));
|
||||
@ -1113,6 +1144,11 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
|
||||
current++;
|
||||
AppendToBuffer(",%u", *current++);
|
||||
break;
|
||||
case 0x7E:
|
||||
AppendToBuffer("vmovd ");
|
||||
current += PrintRightOperand(current);
|
||||
AppendToBuffer(",%s", NameOfXMMRegister(regop));
|
||||
break;
|
||||
#define DECLARE_SSE_AVX_DIS_CASE(instruction, notUsed1, notUsed2, opcode) \
|
||||
case 0x##opcode: { \
|
||||
AppendToBuffer("v" #instruction " %s,%s,", NameOfXMMRegister(regop), \
|
||||
@ -1850,13 +1886,12 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
|
||||
} else if (*data == 0x16) {
|
||||
data++;
|
||||
int mod, regop, rm;
|
||||
get_modrm(*data, &mod, &rm, ®op);
|
||||
int8_t imm8 = static_cast<int8_t>(data[1]);
|
||||
AppendToBuffer("pextrd %s,%s,%d",
|
||||
NameOfCPURegister(regop),
|
||||
NameOfXMMRegister(rm),
|
||||
static_cast<int>(imm8));
|
||||
data += 2;
|
||||
get_modrm(*data, &mod, ®op, &rm);
|
||||
AppendToBuffer("pextrd ");
|
||||
data += PrintRightOperand(data);
|
||||
AppendToBuffer(",%s,%d", NameOfXMMRegister(regop),
|
||||
*reinterpret_cast<int8_t*>(data));
|
||||
data++;
|
||||
} else if (*data == 0x17) {
|
||||
data++;
|
||||
int mod, regop, rm;
|
||||
@ -1871,12 +1906,10 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
|
||||
data++;
|
||||
int mod, regop, rm;
|
||||
get_modrm(*data, &mod, ®op, &rm);
|
||||
int8_t imm8 = static_cast<int8_t>(data[1]);
|
||||
AppendToBuffer("pinsrd %s,%s,%d",
|
||||
NameOfXMMRegister(regop),
|
||||
NameOfCPURegister(rm),
|
||||
static_cast<int>(imm8));
|
||||
data += 2;
|
||||
AppendToBuffer("pinsrd %s,", NameOfXMMRegister(regop));
|
||||
data += PrintRightOperand(data);
|
||||
AppendToBuffer(",%d", *reinterpret_cast<int8_t*>(data));
|
||||
data++;
|
||||
} else {
|
||||
UnimplementedInstruction();
|
||||
}
|
||||
@ -1942,12 +1975,10 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
|
||||
data++;
|
||||
int mod, regop, rm;
|
||||
get_modrm(*data, &mod, ®op, &rm);
|
||||
int8_t imm8 = static_cast<int8_t>(data[1]);
|
||||
AppendToBuffer("pshufd %s,%s,%d",
|
||||
NameOfXMMRegister(regop),
|
||||
NameOfXMMRegister(rm),
|
||||
static_cast<int>(imm8));
|
||||
data += 2;
|
||||
AppendToBuffer("pshufd %s,", NameOfXMMRegister(regop));
|
||||
data += PrintRightXMMOperand(data);
|
||||
AppendToBuffer(",%d", *reinterpret_cast<int8_t*>(data));
|
||||
data++;
|
||||
} else if (*data == 0x90) {
|
||||
data++;
|
||||
AppendToBuffer("nop"); // 2 byte nop.
|
||||
|
@ -2136,10 +2136,24 @@ void MacroAssembler::Move(XMMRegister dst, uint64_t src) {
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::Pshufd(XMMRegister dst, const Operand& src,
|
||||
uint8_t shuffle) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vpshufd(dst, src, shuffle);
|
||||
} else {
|
||||
pshufd(dst, src, shuffle);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::Pextrd(Register dst, XMMRegister src, int8_t imm8) {
|
||||
if (imm8 == 0) {
|
||||
movd(dst, src);
|
||||
Movd(dst, src);
|
||||
return;
|
||||
}
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vpextrd(dst, src, imm8);
|
||||
return;
|
||||
}
|
||||
if (CpuFeatures::IsSupported(SSE4_1)) {
|
||||
|
@ -715,6 +715,29 @@ class MacroAssembler: public Assembler {
|
||||
void PushReturnAddressFrom(Register src) { push(src); }
|
||||
void PopReturnAddressTo(Register dst) { pop(dst); }
|
||||
|
||||
// SSE/SSE2 instructions with AVX version.
|
||||
#define AVX_OP2_WITH_TYPE(macro_name, name, dst_type, src_type) \
|
||||
void macro_name(dst_type dst, src_type src) { \
|
||||
if (CpuFeatures::IsSupported(AVX)) { \
|
||||
CpuFeatureScope scope(this, AVX); \
|
||||
v##name(dst, src); \
|
||||
} else { \
|
||||
name(dst, src); \
|
||||
} \
|
||||
}
|
||||
|
||||
AVX_OP2_WITH_TYPE(Movd, movd, XMMRegister, Register)
|
||||
AVX_OP2_WITH_TYPE(Movd, movd, XMMRegister, const Operand&)
|
||||
AVX_OP2_WITH_TYPE(Movd, movd, Register, XMMRegister)
|
||||
AVX_OP2_WITH_TYPE(Movd, movd, const Operand&, XMMRegister)
|
||||
|
||||
#undef AVX_OP2_WITH_TYPE
|
||||
|
||||
void Pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
|
||||
Pshufd(dst, Operand(src), shuffle);
|
||||
}
|
||||
void Pshufd(XMMRegister dst, const Operand& src, uint8_t shuffle);
|
||||
|
||||
// Non-SSE2 instructions.
|
||||
void Pextrd(Register dst, XMMRegister src, int8_t imm8);
|
||||
void Pinsrd(XMMRegister dst, Register src, int8_t imm8,
|
||||
|
@ -462,6 +462,11 @@ TEST(DisasmIa320) {
|
||||
__ movdqu(xmm0, Operand(ebx, ecx, times_4, 10000));
|
||||
__ movdqu(Operand(ebx, ecx, times_4, 10000), xmm0);
|
||||
|
||||
__ movd(xmm0, edi);
|
||||
__ movd(xmm0, Operand(ebx, ecx, times_4, 10000));
|
||||
__ movd(eax, xmm1);
|
||||
__ movd(Operand(ebx, ecx, times_4, 10000), xmm1);
|
||||
|
||||
__ addsd(xmm1, xmm0);
|
||||
__ addsd(xmm1, Operand(ebx, ecx, times_4, 10000));
|
||||
__ mulsd(xmm1, xmm0);
|
||||
@ -490,6 +495,8 @@ TEST(DisasmIa320) {
|
||||
__ psrlq(xmm0, 17);
|
||||
__ psrlq(xmm0, xmm1);
|
||||
|
||||
__ pshufd(xmm5, xmm1, 5);
|
||||
__ pshufd(xmm5, Operand(edx, 4), 5);
|
||||
__ pinsrw(xmm5, edx, 5);
|
||||
__ pinsrw(xmm5, Operand(edx, 4), 5);
|
||||
|
||||
@ -525,7 +532,9 @@ TEST(DisasmIa320) {
|
||||
if (CpuFeatures::IsSupported(SSE4_1)) {
|
||||
CpuFeatureScope scope(&assm, SSE4_1);
|
||||
__ pextrd(eax, xmm0, 1);
|
||||
__ pextrd(Operand(edx, 4), xmm0, 1);
|
||||
__ pinsrd(xmm1, eax, 0);
|
||||
__ pinsrd(xmm1, Operand(edx, 4), 0);
|
||||
__ extractps(eax, xmm1, 0);
|
||||
|
||||
#define EMIT_SSE4_INSTR(instruction, notUsed1, notUsed2, notUsed3, notUsed4) \
|
||||
@ -621,11 +630,22 @@ TEST(DisasmIa320) {
|
||||
__ vpsraw(xmm0, xmm7, 21);
|
||||
__ vpsrad(xmm0, xmm7, 21);
|
||||
|
||||
__ vpshufd(xmm5, xmm1, 5);
|
||||
__ vpshufd(xmm5, Operand(edx, 4), 5);
|
||||
__ vpextrd(eax, xmm0, 1);
|
||||
__ vpextrd(Operand(edx, 4), xmm0, 1);
|
||||
__ vpinsrd(xmm0, xmm1, eax, 0);
|
||||
__ vpinsrd(xmm0, xmm1, Operand(edx, 4), 0);
|
||||
|
||||
__ vcvtdq2ps(xmm1, xmm0);
|
||||
__ vcvtdq2ps(xmm1, Operand(ebx, ecx, times_4, 10000));
|
||||
__ vcvttps2dq(xmm1, xmm0);
|
||||
__ vcvttps2dq(xmm1, Operand(ebx, ecx, times_4, 10000));
|
||||
|
||||
__ vmovd(xmm0, edi);
|
||||
__ vmovd(xmm0, Operand(ebx, ecx, times_4, 10000));
|
||||
__ vmovd(eax, xmm1);
|
||||
__ vmovd(Operand(ebx, ecx, times_4, 10000), xmm1);
|
||||
#define EMIT_SSE2_AVXINSTR(instruction, notUsed1, notUsed2, notUsed3) \
|
||||
__ v##instruction(xmm7, xmm5, xmm1); \
|
||||
__ v##instruction(xmm7, xmm5, Operand(edx, 4));
|
||||
|
Loading…
Reference in New Issue
Block a user