[ia32][wasm] Support AVX instructions for I32x4Splat/ReplaceLane/ExtractLane

Add vpinsrd, vpextrd, vmovd. Supplement pshufd and add vpshufd.
Add Movd and Pshufd macro. Add AVX support to Pextrd.
Fix disassembler to recognize "pinsrd/pextrd xmm,[mem],imm8".

BUG=

Review-Url: https://codereview.chromium.org/2916093002
Cr-Commit-Position: refs/heads/master@{#45833}
This commit is contained in:
jing.bao 2017-06-11 18:57:05 -07:00 committed by Commit Bot
parent dc3de67047
commit 6d18ae2589
10 changed files with 179 additions and 33 deletions

View File

@ -1892,16 +1892,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kIA32I32x4Splat: {
XMMRegister dst = i.OutputSimd128Register();
__ movd(dst, i.InputOperand(0));
__ pshufd(dst, dst, 0x0);
__ Movd(dst, i.InputOperand(0));
__ Pshufd(dst, dst, 0x0);
break;
}
case kIA32I32x4ExtractLane: {
__ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
break;
}
case kIA32I32x4ReplaceLane: {
__ Pinsrd(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
case kSSEI32x4ReplaceLane: {
CpuFeatureScope sse_scope(masm(), SSE4_1);
__ pinsrd(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
break;
}
case kSSEI32x4Add: {
@ -1912,6 +1913,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ psubd(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kAVXI32x4ReplaceLane: {
CpuFeatureScope avx_scope(masm(), AVX);
__ vpinsrd(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(2), i.InputInt8(1));
break;
}
case kAVXI32x4Add: {
CpuFeatureScope avx_scope(masm(), AVX);
__ vpaddd(i.OutputSimd128Register(), i.InputSimd128Register(0),

View File

@ -113,9 +113,10 @@ namespace compiler {
V(IA32StackCheck) \
V(IA32I32x4Splat) \
V(IA32I32x4ExtractLane) \
V(IA32I32x4ReplaceLane) \
V(SSEI32x4ReplaceLane) \
V(SSEI32x4Add) \
V(SSEI32x4Sub) \
V(AVXI32x4ReplaceLane) \
V(AVXI32x4Add) \
V(AVXI32x4Sub)

View File

@ -99,9 +99,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32BitcastIF:
case kIA32I32x4Splat:
case kIA32I32x4ExtractLane:
case kIA32I32x4ReplaceLane:
case kSSEI32x4ReplaceLane:
case kSSEI32x4Add:
case kSSEI32x4Sub:
case kAVXI32x4ReplaceLane:
case kAVXI32x4Add:
case kAVXI32x4Sub:
return (instr->addressing_mode() == kMode_None)

View File

@ -1906,10 +1906,16 @@ void InstructionSelector::VisitI32x4ExtractLane(Node* node) {
void InstructionSelector::VisitI32x4ReplaceLane(Node* node) {
IA32OperandGenerator g(this);
int32_t lane = OpParameter<int32_t>(node);
Emit(kIA32I32x4ReplaceLane, g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(0)), g.UseImmediate(lane),
g.Use(node->InputAt(1)));
InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
InstructionOperand operand1 = g.UseImmediate(OpParameter<int32_t>(node));
InstructionOperand operand2 = g.Use(node->InputAt(1));
if (IsSupported(AVX)) {
Emit(kAVXI32x4ReplaceLane, g.DefineAsRegister(node), operand0, operand1,
operand2);
} else {
Emit(kSSEI32x4ReplaceLane, g.DefineSameAsFirst(node), operand0, operand1,
operand2);
}
}
void InstructionSelector::VisitInt32AbsWithOverflow(Node* node) {

View File

@ -2683,8 +2683,7 @@ void Assembler::psrlq(XMMRegister dst, XMMRegister src) {
emit_sse_operand(dst, src);
}
void Assembler::pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
void Assembler::pshufd(XMMRegister dst, const Operand& src, uint8_t shuffle) {
EnsureSpace ensure_space(this);
EMIT(0x66);
EMIT(0x0F);
@ -2884,6 +2883,22 @@ void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int8_t imm8) {
EMIT(imm8);
}
void Assembler::vpshufd(XMMRegister dst, const Operand& src, uint8_t shuffle) {
vinstr(0x70, dst, xmm0, src, k66, k0F, kWIG);
EMIT(shuffle);
}
void Assembler::vpextrd(const Operand& dst, XMMRegister src, int8_t offset) {
vinstr(0x16, src, xmm0, dst, k66, k0F3A, kWIG);
EMIT(offset);
}
void Assembler::vpinsrd(XMMRegister dst, XMMRegister src1, const Operand& src2,
int8_t offset) {
vinstr(0x22, dst, src1, src2, k66, k0F3A, kWIG);
EMIT(offset);
}
void Assembler::bmi1(byte op, Register reg, Register vreg, const Operand& rm) {
DCHECK(IsEnabled(BMI1));
EnsureSpace ensure_space(this);

View File

@ -1127,7 +1127,11 @@ class Assembler : public AssemblerBase {
void psllq(XMMRegister dst, XMMRegister src);
void psrlq(XMMRegister reg, int8_t shift);
void psrlq(XMMRegister dst, XMMRegister src);
void pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle);
void pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
pshufd(dst, Operand(src), shuffle);
}
void pshufd(XMMRegister dst, const Operand& src, uint8_t shuffle);
void pextrd(Register dst, XMMRegister src, int8_t offset) {
pextrd(Operand(dst), src, offset);
}
@ -1384,6 +1388,21 @@ class Assembler : public AssemblerBase {
void vpsraw(XMMRegister dst, XMMRegister src, int8_t imm8);
void vpsrad(XMMRegister dst, XMMRegister src, int8_t imm8);
void vpshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
vpshufd(dst, Operand(src), shuffle);
}
void vpshufd(XMMRegister dst, const Operand& src, uint8_t shuffle);
void vpextrd(Register dst, XMMRegister src, int8_t offset) {
vpextrd(Operand(dst), src, offset);
}
void vpextrd(const Operand& dst, XMMRegister src, int8_t offset);
void vpinsrd(XMMRegister dst, XMMRegister src1, Register src2,
int8_t offset) {
vpinsrd(dst, src1, Operand(src2), offset);
}
void vpinsrd(XMMRegister dst, XMMRegister src1, const Operand& src2,
int8_t offset);
void vcvtdq2ps(XMMRegister dst, XMMRegister src) {
vcvtdq2ps(dst, Operand(src));
}
@ -1397,6 +1416,15 @@ class Assembler : public AssemblerBase {
vinstr(0x5B, dst, xmm0, src, kF3, k0F, kWIG);
}
void vmovd(XMMRegister dst, Register src) { vmovd(dst, Operand(src)); }
void vmovd(XMMRegister dst, const Operand& src) {
vinstr(0x6E, dst, xmm0, src, k66, k0F, kWIG);
}
void vmovd(Register dst, XMMRegister src) { movd(Operand(dst), src); }
void vmovd(const Operand& dst, XMMRegister src) {
vinstr(0x7E, src, xmm0, dst, k66, k0F, kWIG);
}
// BMI instruction
void andn(Register dst, Register src1, Register src2) {
andn(dst, src1, Operand(src2));

View File

@ -817,6 +817,27 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
default:
UnimplementedInstruction();
}
} else if (vex_66() && vex_0f3a()) {
int mod, regop, rm, vvvv = vex_vreg();
get_modrm(*current, &mod, &regop, &rm);
switch (opcode) {
case 0x16:
AppendToBuffer("vpextrd ");
current += PrintRightOperand(current);
AppendToBuffer(",%s,%d", NameOfXMMRegister(regop),
*reinterpret_cast<int8_t*>(current));
current++;
break;
case 0x22:
AppendToBuffer("vpinsrd %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightOperand(current);
AppendToBuffer(",%d", *reinterpret_cast<int8_t*>(current));
current++;
break;
default:
UnimplementedInstruction();
}
} else if (vex_f2() && vex_0f()) {
int mod, regop, rm, vvvv = vex_vreg();
get_modrm(*current, &mod, &regop, &rm);
@ -1101,6 +1122,16 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x6E:
AppendToBuffer("vmovd %s,", NameOfXMMRegister(regop));
current += PrintRightOperand(current);
break;
case 0x70:
AppendToBuffer("vpshufd %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
AppendToBuffer(",%d", *reinterpret_cast<int8_t*>(current));
current++;
break;
case 0x71:
AppendToBuffer("vps%sw %s,%s", sf_str[regop / 2],
NameOfXMMRegister(vvvv), NameOfXMMRegister(rm));
@ -1113,6 +1144,11 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
current++;
AppendToBuffer(",%u", *current++);
break;
case 0x7E:
AppendToBuffer("vmovd ");
current += PrintRightOperand(current);
AppendToBuffer(",%s", NameOfXMMRegister(regop));
break;
#define DECLARE_SSE_AVX_DIS_CASE(instruction, notUsed1, notUsed2, opcode) \
case 0x##opcode: { \
AppendToBuffer("v" #instruction " %s,%s,", NameOfXMMRegister(regop), \
@ -1850,13 +1886,12 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
} else if (*data == 0x16) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &rm, &regop);
int8_t imm8 = static_cast<int8_t>(data[1]);
AppendToBuffer("pextrd %s,%s,%d",
NameOfCPURegister(regop),
NameOfXMMRegister(rm),
static_cast<int>(imm8));
data += 2;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("pextrd ");
data += PrintRightOperand(data);
AppendToBuffer(",%s,%d", NameOfXMMRegister(regop),
*reinterpret_cast<int8_t*>(data));
data++;
} else if (*data == 0x17) {
data++;
int mod, regop, rm;
@ -1871,12 +1906,10 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
int8_t imm8 = static_cast<int8_t>(data[1]);
AppendToBuffer("pinsrd %s,%s,%d",
NameOfXMMRegister(regop),
NameOfCPURegister(rm),
static_cast<int>(imm8));
data += 2;
AppendToBuffer("pinsrd %s,", NameOfXMMRegister(regop));
data += PrintRightOperand(data);
AppendToBuffer(",%d", *reinterpret_cast<int8_t*>(data));
data++;
} else {
UnimplementedInstruction();
}
@ -1942,12 +1975,10 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
int8_t imm8 = static_cast<int8_t>(data[1]);
AppendToBuffer("pshufd %s,%s,%d",
NameOfXMMRegister(regop),
NameOfXMMRegister(rm),
static_cast<int>(imm8));
data += 2;
AppendToBuffer("pshufd %s,", NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data);
AppendToBuffer(",%d", *reinterpret_cast<int8_t*>(data));
data++;
} else if (*data == 0x90) {
data++;
AppendToBuffer("nop"); // 2 byte nop.

View File

@ -2136,10 +2136,24 @@ void MacroAssembler::Move(XMMRegister dst, uint64_t src) {
}
}
void MacroAssembler::Pshufd(XMMRegister dst, const Operand& src,
uint8_t shuffle) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpshufd(dst, src, shuffle);
} else {
pshufd(dst, src, shuffle);
}
}
void MacroAssembler::Pextrd(Register dst, XMMRegister src, int8_t imm8) {
if (imm8 == 0) {
movd(dst, src);
Movd(dst, src);
return;
}
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpextrd(dst, src, imm8);
return;
}
if (CpuFeatures::IsSupported(SSE4_1)) {

View File

@ -715,6 +715,29 @@ class MacroAssembler: public Assembler {
void PushReturnAddressFrom(Register src) { push(src); }
void PopReturnAddressTo(Register dst) { pop(dst); }
// SSE/SSE2 instructions with AVX version.
#define AVX_OP2_WITH_TYPE(macro_name, name, dst_type, src_type) \
void macro_name(dst_type dst, src_type src) { \
if (CpuFeatures::IsSupported(AVX)) { \
CpuFeatureScope scope(this, AVX); \
v##name(dst, src); \
} else { \
name(dst, src); \
} \
}
AVX_OP2_WITH_TYPE(Movd, movd, XMMRegister, Register)
AVX_OP2_WITH_TYPE(Movd, movd, XMMRegister, const Operand&)
AVX_OP2_WITH_TYPE(Movd, movd, Register, XMMRegister)
AVX_OP2_WITH_TYPE(Movd, movd, const Operand&, XMMRegister)
#undef AVX_OP2_WITH_TYPE
void Pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
Pshufd(dst, Operand(src), shuffle);
}
void Pshufd(XMMRegister dst, const Operand& src, uint8_t shuffle);
// Non-SSE2 instructions.
void Pextrd(Register dst, XMMRegister src, int8_t imm8);
void Pinsrd(XMMRegister dst, Register src, int8_t imm8,

View File

@ -462,6 +462,11 @@ TEST(DisasmIa320) {
__ movdqu(xmm0, Operand(ebx, ecx, times_4, 10000));
__ movdqu(Operand(ebx, ecx, times_4, 10000), xmm0);
__ movd(xmm0, edi);
__ movd(xmm0, Operand(ebx, ecx, times_4, 10000));
__ movd(eax, xmm1);
__ movd(Operand(ebx, ecx, times_4, 10000), xmm1);
__ addsd(xmm1, xmm0);
__ addsd(xmm1, Operand(ebx, ecx, times_4, 10000));
__ mulsd(xmm1, xmm0);
@ -490,6 +495,8 @@ TEST(DisasmIa320) {
__ psrlq(xmm0, 17);
__ psrlq(xmm0, xmm1);
__ pshufd(xmm5, xmm1, 5);
__ pshufd(xmm5, Operand(edx, 4), 5);
__ pinsrw(xmm5, edx, 5);
__ pinsrw(xmm5, Operand(edx, 4), 5);
@ -525,7 +532,9 @@ TEST(DisasmIa320) {
if (CpuFeatures::IsSupported(SSE4_1)) {
CpuFeatureScope scope(&assm, SSE4_1);
__ pextrd(eax, xmm0, 1);
__ pextrd(Operand(edx, 4), xmm0, 1);
__ pinsrd(xmm1, eax, 0);
__ pinsrd(xmm1, Operand(edx, 4), 0);
__ extractps(eax, xmm1, 0);
#define EMIT_SSE4_INSTR(instruction, notUsed1, notUsed2, notUsed3, notUsed4) \
@ -621,11 +630,22 @@ TEST(DisasmIa320) {
__ vpsraw(xmm0, xmm7, 21);
__ vpsrad(xmm0, xmm7, 21);
__ vpshufd(xmm5, xmm1, 5);
__ vpshufd(xmm5, Operand(edx, 4), 5);
__ vpextrd(eax, xmm0, 1);
__ vpextrd(Operand(edx, 4), xmm0, 1);
__ vpinsrd(xmm0, xmm1, eax, 0);
__ vpinsrd(xmm0, xmm1, Operand(edx, 4), 0);
__ vcvtdq2ps(xmm1, xmm0);
__ vcvtdq2ps(xmm1, Operand(ebx, ecx, times_4, 10000));
__ vcvttps2dq(xmm1, xmm0);
__ vcvttps2dq(xmm1, Operand(ebx, ecx, times_4, 10000));
__ vmovd(xmm0, edi);
__ vmovd(xmm0, Operand(ebx, ecx, times_4, 10000));
__ vmovd(eax, xmm1);
__ vmovd(Operand(ebx, ecx, times_4, 10000), xmm1);
#define EMIT_SSE2_AVXINSTR(instruction, notUsed1, notUsed2, notUsed3) \
__ v##instruction(xmm7, xmm5, xmm1); \
__ v##instruction(xmm7, xmm5, Operand(edx, 4));