[ia32] Add psignb/w/d and AVX version

Reconstruct pshufb using macro

Bug: 
Change-Id: I5556ce1108378fc7a7658443cd09c3f676c16aa7
Reviewed-on: https://chromium-review.googlesource.com/603907
Reviewed-by: Bill Budge <bbudge@chromium.org>
Reviewed-by: Benedikt Meurer <bmeurer@chromium.org>
Commit-Queue: Bill Budge <bbudge@chromium.org>
Cr-Commit-Position: refs/heads/master@{#47228}
This commit is contained in:
jing.bao 2017-08-07 21:51:30 +08:00 committed by Commit Bot
parent fc574b168b
commit 9359dc4de1
5 changed files with 58 additions and 54 deletions

View File

@ -2707,16 +2707,6 @@ void Assembler::psrlq(XMMRegister dst, XMMRegister src) {
emit_sse_operand(dst, src);
}
void Assembler::pshufb(XMMRegister dst, const Operand& src) {
DCHECK(IsEnabled(SSSE3));
EnsureSpace ensure_space(this);
EMIT(0x66);
EMIT(0x0F);
EMIT(0x38);
EMIT(0x00);
emit_sse_operand(dst, src);
}
void Assembler::pshuflw(XMMRegister dst, const Operand& src, uint8_t shuffle) {
EnsureSpace ensure_space(this);
EMIT(0xF2);
@ -3070,6 +3060,17 @@ void Assembler::sse2_instr(XMMRegister dst, const Operand& src, byte prefix,
emit_sse_operand(dst, src);
}
void Assembler::ssse3_instr(XMMRegister dst, const Operand& src, byte prefix,
byte escape1, byte escape2, byte opcode) {
DCHECK(IsEnabled(SSSE3));
EnsureSpace ensure_space(this);
EMIT(prefix);
EMIT(escape1);
EMIT(escape2);
EMIT(opcode);
emit_sse_operand(dst, src);
}
void Assembler::sse4_instr(XMMRegister dst, const Operand& src, byte prefix,
byte escape1, byte escape2, byte opcode) {
DCHECK(IsEnabled(SSE4_1));

View File

@ -1141,9 +1141,6 @@ class Assembler : public AssemblerBase {
void psrlq(XMMRegister reg, int8_t shift);
void psrlq(XMMRegister dst, XMMRegister src);
// pshufb is SSSE3 instruction
void pshufb(XMMRegister dst, XMMRegister src) { pshufb(dst, Operand(src)); }
void pshufb(XMMRegister dst, const Operand& src);
void pshuflw(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
pshuflw(dst, Operand(src), shuffle);
}
@ -1423,12 +1420,6 @@ class Assembler : public AssemblerBase {
void vpsraw(XMMRegister dst, XMMRegister src, int8_t imm8);
void vpsrad(XMMRegister dst, XMMRegister src, int8_t imm8);
void vpshufb(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vpshufb(dst, src1, Operand(src2));
}
void vpshufb(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vinstr(0x00, dst, src1, src2, k66, k0F38, kW0);
}
void vpshuflw(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
vpshuflw(dst, Operand(src), shuffle);
}
@ -1647,6 +1638,18 @@ class Assembler : public AssemblerBase {
SSE2_INSTRUCTION_LIST(DECLARE_SSE2_AVX_INSTRUCTION)
#undef DECLARE_SSE2_AVX_INSTRUCTION
#define DECLARE_SSSE3_INSTRUCTION(instruction, prefix, escape1, escape2, \
opcode) \
void instruction(XMMRegister dst, XMMRegister src) { \
instruction(dst, Operand(src)); \
} \
void instruction(XMMRegister dst, const Operand& src) { \
ssse3_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
}
SSSE3_INSTRUCTION_LIST(DECLARE_SSSE3_INSTRUCTION)
#undef DECLARE_SSSE3_INSTRUCTION
#define DECLARE_SSE4_INSTRUCTION(instruction, prefix, escape1, escape2, \
opcode) \
void instruction(XMMRegister dst, XMMRegister src) { \
@ -1659,8 +1662,8 @@ class Assembler : public AssemblerBase {
SSE4_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION)
#undef DECLARE_SSE4_INSTRUCTION
#define DECLARE_SSE4_AVX_INSTRUCTION(instruction, prefix, escape1, escape2, \
opcode) \
#define DECLARE_SSE34_AVX_INSTRUCTION(instruction, prefix, escape1, escape2, \
opcode) \
void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
v##instruction(dst, src1, Operand(src2)); \
} \
@ -1669,8 +1672,9 @@ class Assembler : public AssemblerBase {
vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0); \
}
SSE4_INSTRUCTION_LIST(DECLARE_SSE4_AVX_INSTRUCTION)
#undef DECLARE_SSE4_AVX_INSTRUCTION
SSSE3_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)
SSE4_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)
#undef DECLARE_SSE34_AVX_INSTRUCTION
// Prefetch src position into cache level.
// Level 1, 2 or 3 specifies CPU cache level. Level 0 specifies a
@ -1800,6 +1804,8 @@ class Assembler : public AssemblerBase {
void sse2_instr(XMMRegister dst, const Operand& src, byte prefix, byte escape,
byte opcode);
void ssse3_instr(XMMRegister dst, const Operand& src, byte prefix,
byte escape1, byte escape2, byte opcode);
void sse4_instr(XMMRegister dst, const Operand& src, byte prefix,
byte escape1, byte escape2, byte opcode);
void vinstr(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2,

View File

@ -738,11 +738,6 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
int mod, regop, rm, vvvv = vex_vreg();
get_modrm(*current, &mod, &regop, &rm);
switch (opcode) {
case 0x00:
AppendToBuffer("vpshufb %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x99:
AppendToBuffer("vfmadd132s%c %s,%s,", float_size_code(),
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
@ -817,6 +812,7 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
break; \
}
SSSE3_INSTRUCTION_LIST(DECLARE_SSE_AVX_DIS_CASE)
SSE4_INSTRUCTION_LIST(DECLARE_SSE_AVX_DIS_CASE)
#undef DECLARE_SSE_AVX_DIS_CASE
default:
@ -1885,24 +1881,21 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
switch (op) {
case 0x00:
AppendToBuffer("pshufb %s,", NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data);
break;
case 0x17:
AppendToBuffer("ptest %s,%s", NameOfXMMRegister(regop),
NameOfXMMRegister(rm));
data++;
break;
#define SSE4_DIS_CASE(instruction, notUsed1, notUsed2, notUsed3, opcode) \
case 0x##opcode: { \
AppendToBuffer(#instruction " %s,", NameOfXMMRegister(regop)); \
data += PrintRightXMMOperand(data); \
break; \
#define SSE34_DIS_CASE(instruction, notUsed1, notUsed2, notUsed3, opcode) \
case 0x##opcode: { \
AppendToBuffer(#instruction " %s,", NameOfXMMRegister(regop)); \
data += PrintRightXMMOperand(data); \
break; \
}
SSE4_INSTRUCTION_LIST(SSE4_DIS_CASE)
#undef SSE4_DIS_CASE
SSSE3_INSTRUCTION_LIST(SSE34_DIS_CASE)
SSE4_INSTRUCTION_LIST(SSE34_DIS_CASE)
#undef SSE34_DIS_CASE
default:
UnimplementedInstruction();
}

View File

@ -43,6 +43,12 @@
V(punpckldq, 66, 0F, 62) \
V(pxor, 66, 0F, EF)
#define SSSE3_INSTRUCTION_LIST(V) \
V(pshufb, 66, 0F, 38, 00) \
V(psignb, 66, 0F, 38, 08) \
V(psignw, 66, 0F, 38, 09) \
V(psignd, 66, 0F, 38, 0A)
#define SSE4_INSTRUCTION_LIST(V) \
V(pminsb, 66, 0F, 38, 38) \
V(pminsd, 66, 0F, 38, 39) \

View File

@ -530,11 +530,14 @@ TEST(DisasmIa320) {
__ cmov(greater, eax, Operand(edx, 3));
}
#define EMIT_SSE34_INSTR(instruction, notUsed1, notUsed2, notUsed3, notUsed4) \
__ instruction(xmm5, xmm1); \
__ instruction(xmm5, Operand(edx, 4));
{
if (CpuFeatures::IsSupported(SSSE3)) {
CpuFeatureScope scope(&assm, SSSE3);
__ pshufb(xmm5, xmm1);
__ pshufb(xmm5, Operand(edx, 4));
SSSE3_INSTRUCTION_LIST(EMIT_SSE34_INSTR)
}
}
@ -553,14 +556,10 @@ TEST(DisasmIa320) {
__ pinsrd(xmm1, Operand(edx, 4), 0);
__ extractps(eax, xmm1, 0);
#define EMIT_SSE4_INSTR(instruction, notUsed1, notUsed2, notUsed3, notUsed4) \
__ instruction(xmm5, xmm1); \
__ instruction(xmm5, Operand(edx, 4));
SSE4_INSTRUCTION_LIST(EMIT_SSE4_INSTR)
#undef EMIT_SSE4_INSTR
SSE4_INSTRUCTION_LIST(EMIT_SSE34_INSTR)
}
}
#undef EMIT_SSE34_INSTR
// AVX instruction
{
@ -646,8 +645,6 @@ TEST(DisasmIa320) {
__ vpsraw(xmm0, xmm7, 21);
__ vpsrad(xmm0, xmm7, 21);
__ vpshufb(xmm5, xmm0, xmm1);
__ vpshufb(xmm5, xmm0, Operand(edx, 4));
__ vpshuflw(xmm5, xmm1, 5);
__ vpshuflw(xmm5, Operand(edx, 4), 5);
__ vpshufd(xmm5, xmm1, 5);
@ -681,13 +678,14 @@ TEST(DisasmIa320) {
SSE2_INSTRUCTION_LIST(EMIT_SSE2_AVXINSTR)
#undef EMIT_SSE2_AVXINSTR
#define EMIT_SSE4_AVXINSTR(instruction, notUsed1, notUsed2, notUsed3, \
notUsed4) \
__ v##instruction(xmm7, xmm5, xmm1); \
#define EMIT_SSE34_AVXINSTR(instruction, notUsed1, notUsed2, notUsed3, \
notUsed4) \
__ v##instruction(xmm7, xmm5, xmm1); \
__ v##instruction(xmm7, xmm5, Operand(edx, 4));
SSE4_INSTRUCTION_LIST(EMIT_SSE4_AVXINSTR)
#undef EMIT_SSE4_AVXINSTR
SSSE3_INSTRUCTION_LIST(EMIT_SSE34_AVXINSTR)
SSE4_INSTRUCTION_LIST(EMIT_SSE34_AVXINSTR)
#undef EMIT_SSE34_AVXINSTR
}
}