From d598386db8d405ef3ed6217e1191f1404827c0c1 Mon Sep 17 00:00:00 2001 From: "jing.bao" Date: Tue, 14 Mar 2017 19:01:05 -0700 Subject: [PATCH] [ia32] Add minps/maxps and AVX v_ps/v_pd for add/sub/mul/div/min/max Also use vinstr for vps/vpd/vss/vsd BUG= Review-Url: https://codereview.chromium.org/2747103002 Cr-Commit-Position: refs/heads/master@{#43809} --- src/ia32/assembler-ia32.cc | 37 ++++++++++---------- src/ia32/assembler-ia32.h | 13 ++++++- src/ia32/disasm-ia32.cc | 60 +++++++++++++++++++++++++++++++++ test/cctest/test-disasm-ia32.cc | 28 +++++++++++++++ 4 files changed, 117 insertions(+), 21 deletions(-) diff --git a/src/ia32/assembler-ia32.cc b/src/ia32/assembler-ia32.cc index 4dcf50ff85..4da15342c2 100644 --- a/src/ia32/assembler-ia32.cc +++ b/src/ia32/assembler-ia32.cc @@ -2261,6 +2261,19 @@ void Assembler::divps(XMMRegister dst, const Operand& src) { emit_sse_operand(dst, src); } +void Assembler::minps(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + EMIT(0x0F); + EMIT(0x5D); + emit_sse_operand(dst, src); +} + +void Assembler::maxps(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + EMIT(0x0F); + EMIT(0x5F); + emit_sse_operand(dst, src); +} void Assembler::sqrtsd(XMMRegister dst, const Operand& src) { EnsureSpace ensure_space(this); @@ -2759,41 +2772,25 @@ void Assembler::vfmass(byte op, XMMRegister dst, XMMRegister src1, void Assembler::vsd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2) { - DCHECK(IsEnabled(AVX)); - EnsureSpace ensure_space(this); - emit_vex_prefix(src1, kLIG, kF2, k0F, kWIG); - EMIT(op); - emit_sse_operand(dst, src2); + vinstr(op, dst, src1, src2, kF2, k0F, kWIG); } void Assembler::vss(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2) { - DCHECK(IsEnabled(AVX)); - EnsureSpace ensure_space(this); - emit_vex_prefix(src1, kLIG, kF3, k0F, kWIG); - EMIT(op); - emit_sse_operand(dst, src2); + vinstr(op, dst, src1, src2, kF3, k0F, kWIG); } void Assembler::vps(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2) { - DCHECK(IsEnabled(AVX)); - EnsureSpace ensure_space(this); - emit_vex_prefix(src1, kL128, kNone, k0F, kWIG); - EMIT(op); - emit_sse_operand(dst, src2); + vinstr(op, dst, src1, src2, kNone, k0F, kWIG); } void Assembler::vpd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2) { - DCHECK(IsEnabled(AVX)); - EnsureSpace ensure_space(this); - emit_vex_prefix(src1, kL128, k66, k0F, kWIG); - EMIT(op); - emit_sse_operand(dst, src2); + vinstr(op, dst, src1, src2, k66, k0F, kWIG); } void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int8_t imm8) { diff --git a/src/ia32/assembler-ia32.h b/src/ia32/assembler-ia32.h index 6fd334723f..d405a91a7d 100644 --- a/src/ia32/assembler-ia32.h +++ b/src/ia32/assembler-ia32.h @@ -979,6 +979,11 @@ class Assembler : public AssemblerBase { void divps(XMMRegister dst, const Operand& src); void divps(XMMRegister dst, XMMRegister src) { divps(dst, Operand(src)); } + void minps(XMMRegister dst, const Operand& src); + void minps(XMMRegister dst, XMMRegister src) { minps(dst, Operand(src)); } + void maxps(XMMRegister dst, const Operand& src); + void maxps(XMMRegister dst, XMMRegister src) { maxps(dst, Operand(src)); } + // SSE2 instructions void cvttss2si(Register dst, const Operand& src); void cvttss2si(Register dst, XMMRegister src) { @@ -1403,7 +1408,13 @@ class Assembler : public AssemblerBase { #define PACKED_OP_LIST(V) \ V(and, 0x54) \ - V(xor, 0x57) + V(xor, 0x57) \ + V(add, 0x58) \ + V(mul, 0x59) \ + V(sub, 0x5c) \ + V(min, 0x5d) \ + V(div, 0x5e) \ + V(max, 0x5f) #define AVX_PACKED_OP_DECLARE(name, opcode) \ void v##name##ps(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \ diff --git a/src/ia32/disasm-ia32.cc b/src/ia32/disasm-ia32.cc index 6d1c1727ff..789e7ba9fe 100644 --- a/src/ia32/disasm-ia32.cc +++ b/src/ia32/disasm-ia32.cc @@ -998,6 +998,36 @@ int DisassemblerIA32::AVXInstruction(byte* data) { NameOfXMMRegister(vvvv)); current += PrintRightXMMOperand(current); break; + case 0x58: + AppendToBuffer("vaddps %s,%s,", NameOfXMMRegister(regop), + NameOfXMMRegister(vvvv)); + current += PrintRightXMMOperand(current); + break; + case 0x59: + AppendToBuffer("vmulps %s,%s,", NameOfXMMRegister(regop), + NameOfXMMRegister(vvvv)); + current += PrintRightXMMOperand(current); + break; + case 0x5C: + AppendToBuffer("vsubps %s,%s,", NameOfXMMRegister(regop), + NameOfXMMRegister(vvvv)); + current += PrintRightXMMOperand(current); + break; + case 0x5D: + AppendToBuffer("vminps %s,%s,", NameOfXMMRegister(regop), + NameOfXMMRegister(vvvv)); + current += PrintRightXMMOperand(current); + break; + case 0x5E: + AppendToBuffer("vdivps %s,%s,", NameOfXMMRegister(regop), + NameOfXMMRegister(vvvv)); + current += PrintRightXMMOperand(current); + break; + case 0x5F: + AppendToBuffer("vmaxps %s,%s,", NameOfXMMRegister(regop), + NameOfXMMRegister(vvvv)); + current += PrintRightXMMOperand(current); + break; default: UnimplementedInstruction(); } @@ -1015,6 +1045,36 @@ int DisassemblerIA32::AVXInstruction(byte* data) { NameOfXMMRegister(vvvv)); current += PrintRightXMMOperand(current); break; + case 0x58: + AppendToBuffer("vaddpd %s,%s,", NameOfXMMRegister(regop), + NameOfXMMRegister(vvvv)); + current += PrintRightXMMOperand(current); + break; + case 0x59: + AppendToBuffer("vmulpd %s,%s,", NameOfXMMRegister(regop), + NameOfXMMRegister(vvvv)); + current += PrintRightXMMOperand(current); + break; + case 0x5C: + AppendToBuffer("vsubpd %s,%s,", NameOfXMMRegister(regop), + NameOfXMMRegister(vvvv)); + current += PrintRightXMMOperand(current); + break; + case 0x5D: + AppendToBuffer("vminpd %s,%s,", NameOfXMMRegister(regop), + NameOfXMMRegister(vvvv)); + current += PrintRightXMMOperand(current); + break; + case 0x5E: + AppendToBuffer("vdivpd %s,%s,", NameOfXMMRegister(regop), + NameOfXMMRegister(vvvv)); + current += PrintRightXMMOperand(current); + break; + case 0x5F: + AppendToBuffer("vmaxpd %s,%s,", NameOfXMMRegister(regop), + NameOfXMMRegister(vvvv)); + current += PrintRightXMMOperand(current); + break; case 0x71: AppendToBuffer("vps%sw %s,%s", sf_str[regop / 2], NameOfXMMRegister(vvvv), NameOfXMMRegister(rm)); diff --git a/test/cctest/test-disasm-ia32.cc b/test/cctest/test-disasm-ia32.cc index 292740ff19..3331521df9 100644 --- a/test/cctest/test-disasm-ia32.cc +++ b/test/cctest/test-disasm-ia32.cc @@ -424,6 +424,10 @@ TEST(DisasmIa320) { __ mulps(xmm1, Operand(ebx, ecx, times_4, 10000)); __ divps(xmm1, xmm0); __ divps(xmm1, Operand(ebx, ecx, times_4, 10000)); + __ minps(xmm1, xmm0); + __ minps(xmm1, Operand(ebx, ecx, times_4, 10000)); + __ maxps(xmm1, xmm0); + __ maxps(xmm1, Operand(ebx, ecx, times_4, 10000)); __ ucomiss(xmm0, xmm1); __ ucomiss(xmm0, Operand(ebx, ecx, times_4, 10000)); @@ -550,11 +554,35 @@ TEST(DisasmIa320) { __ vandps(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000)); __ vxorps(xmm0, xmm1, xmm2); __ vxorps(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000)); + __ vaddps(xmm0, xmm1, xmm2); + __ vaddps(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000)); + __ vmulps(xmm0, xmm1, xmm2); + __ vmulps(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000)); + __ vsubps(xmm0, xmm1, xmm2); + __ vsubps(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000)); + __ vminps(xmm0, xmm1, xmm2); + __ vminps(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000)); + __ vdivps(xmm0, xmm1, xmm2); + __ vdivps(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000)); + __ vmaxps(xmm0, xmm1, xmm2); + __ vmaxps(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000)); __ vandpd(xmm0, xmm1, xmm2); __ vandpd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000)); __ vxorpd(xmm0, xmm1, xmm2); __ vxorpd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000)); + __ vaddpd(xmm0, xmm1, xmm2); + __ vaddpd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000)); + __ vmulpd(xmm0, xmm1, xmm2); + __ vmulpd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000)); + __ vsubpd(xmm0, xmm1, xmm2); + __ vsubpd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000)); + __ vminpd(xmm0, xmm1, xmm2); + __ vminpd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000)); + __ vdivpd(xmm0, xmm1, xmm2); + __ vdivpd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000)); + __ vmaxpd(xmm0, xmm1, xmm2); + __ vmaxpd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000)); __ vpsllw(xmm0, xmm7, 21); __ vpslld(xmm0, xmm7, 21);