[ia32] Add minps/maxps and AVX v_ps/v_pd for add/sub/mul/div/min/max

Also use vinstr for vps/vpd/vss/vsd

BUG=

Review-Url: https://codereview.chromium.org/2747103002
Cr-Commit-Position: refs/heads/master@{#43809}
This commit is contained in:
jing.bao 2017-03-14 19:01:05 -07:00 committed by Commit bot
parent 2e002b9e20
commit d598386db8
4 changed files with 117 additions and 21 deletions

View File

@ -2261,6 +2261,19 @@ void Assembler::divps(XMMRegister dst, const Operand& src) {
emit_sse_operand(dst, src);
}
void Assembler::minps(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
EMIT(0x0F);
EMIT(0x5D);
emit_sse_operand(dst, src);
}
void Assembler::maxps(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
EMIT(0x0F);
EMIT(0x5F);
emit_sse_operand(dst, src);
}
void Assembler::sqrtsd(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
@ -2759,41 +2772,25 @@ void Assembler::vfmass(byte op, XMMRegister dst, XMMRegister src1,
void Assembler::vsd(byte op, XMMRegister dst, XMMRegister src1,
const Operand& src2) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit_vex_prefix(src1, kLIG, kF2, k0F, kWIG);
EMIT(op);
emit_sse_operand(dst, src2);
vinstr(op, dst, src1, src2, kF2, k0F, kWIG);
}
void Assembler::vss(byte op, XMMRegister dst, XMMRegister src1,
const Operand& src2) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit_vex_prefix(src1, kLIG, kF3, k0F, kWIG);
EMIT(op);
emit_sse_operand(dst, src2);
vinstr(op, dst, src1, src2, kF3, k0F, kWIG);
}
void Assembler::vps(byte op, XMMRegister dst, XMMRegister src1,
const Operand& src2) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit_vex_prefix(src1, kL128, kNone, k0F, kWIG);
EMIT(op);
emit_sse_operand(dst, src2);
vinstr(op, dst, src1, src2, kNone, k0F, kWIG);
}
void Assembler::vpd(byte op, XMMRegister dst, XMMRegister src1,
const Operand& src2) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit_vex_prefix(src1, kL128, k66, k0F, kWIG);
EMIT(op);
emit_sse_operand(dst, src2);
vinstr(op, dst, src1, src2, k66, k0F, kWIG);
}
void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int8_t imm8) {

View File

@ -979,6 +979,11 @@ class Assembler : public AssemblerBase {
void divps(XMMRegister dst, const Operand& src);
void divps(XMMRegister dst, XMMRegister src) { divps(dst, Operand(src)); }
void minps(XMMRegister dst, const Operand& src);
void minps(XMMRegister dst, XMMRegister src) { minps(dst, Operand(src)); }
void maxps(XMMRegister dst, const Operand& src);
void maxps(XMMRegister dst, XMMRegister src) { maxps(dst, Operand(src)); }
// SSE2 instructions
void cvttss2si(Register dst, const Operand& src);
void cvttss2si(Register dst, XMMRegister src) {
@ -1403,7 +1408,13 @@ class Assembler : public AssemblerBase {
#define PACKED_OP_LIST(V) \
V(and, 0x54) \
V(xor, 0x57)
V(xor, 0x57) \
V(add, 0x58) \
V(mul, 0x59) \
V(sub, 0x5c) \
V(min, 0x5d) \
V(div, 0x5e) \
V(max, 0x5f)
#define AVX_PACKED_OP_DECLARE(name, opcode) \
void v##name##ps(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \

View File

@ -998,6 +998,36 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x58:
AppendToBuffer("vaddps %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x59:
AppendToBuffer("vmulps %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x5C:
AppendToBuffer("vsubps %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x5D:
AppendToBuffer("vminps %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x5E:
AppendToBuffer("vdivps %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x5F:
AppendToBuffer("vmaxps %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
default:
UnimplementedInstruction();
}
@ -1015,6 +1045,36 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x58:
AppendToBuffer("vaddpd %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x59:
AppendToBuffer("vmulpd %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x5C:
AppendToBuffer("vsubpd %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x5D:
AppendToBuffer("vminpd %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x5E:
AppendToBuffer("vdivpd %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x5F:
AppendToBuffer("vmaxpd %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x71:
AppendToBuffer("vps%sw %s,%s", sf_str[regop / 2],
NameOfXMMRegister(vvvv), NameOfXMMRegister(rm));

View File

@ -424,6 +424,10 @@ TEST(DisasmIa320) {
__ mulps(xmm1, Operand(ebx, ecx, times_4, 10000));
__ divps(xmm1, xmm0);
__ divps(xmm1, Operand(ebx, ecx, times_4, 10000));
__ minps(xmm1, xmm0);
__ minps(xmm1, Operand(ebx, ecx, times_4, 10000));
__ maxps(xmm1, xmm0);
__ maxps(xmm1, Operand(ebx, ecx, times_4, 10000));
__ ucomiss(xmm0, xmm1);
__ ucomiss(xmm0, Operand(ebx, ecx, times_4, 10000));
@ -550,11 +554,35 @@ TEST(DisasmIa320) {
__ vandps(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vxorps(xmm0, xmm1, xmm2);
__ vxorps(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vaddps(xmm0, xmm1, xmm2);
__ vaddps(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vmulps(xmm0, xmm1, xmm2);
__ vmulps(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vsubps(xmm0, xmm1, xmm2);
__ vsubps(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vminps(xmm0, xmm1, xmm2);
__ vminps(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vdivps(xmm0, xmm1, xmm2);
__ vdivps(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vmaxps(xmm0, xmm1, xmm2);
__ vmaxps(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vandpd(xmm0, xmm1, xmm2);
__ vandpd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vxorpd(xmm0, xmm1, xmm2);
__ vxorpd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vaddpd(xmm0, xmm1, xmm2);
__ vaddpd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vmulpd(xmm0, xmm1, xmm2);
__ vmulpd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vsubpd(xmm0, xmm1, xmm2);
__ vsubpd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vminpd(xmm0, xmm1, xmm2);
__ vminpd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vdivpd(xmm0, xmm1, xmm2);
__ vdivpd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vmaxpd(xmm0, xmm1, xmm2);
__ vmaxpd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vpsllw(xmm0, xmm7, 21);
__ vpslld(xmm0, xmm7, 21);