Add phaddd/vphaddd, phaddw/vphaddw and vhaddps to ia32
Also Add vhaddps to x64 Fix haddps for SSE3 scope and disassembler on ia32/x64 Change-Id: If511e6428fa1ce034b4281943dfee1405c9d4ffc Reviewed-on: https://chromium-review.googlesource.com/939265 Reviewed-by: Deepti Gandluri <gdeepti@chromium.org> Commit-Queue: Jing Bao <jing.bao@intel.com> Cr-Commit-Position: refs/heads/master@{#51642}
This commit is contained in:
parent
a4b615eba1
commit
36ddd0755f
@ -2169,6 +2169,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
}
|
||||
case kX64F32x4AddHoriz: {
|
||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
CpuFeatureScope sse_scope(tasm(), SSE3);
|
||||
__ haddps(i.OutputSimd128Register(), i.InputSimd128Register(1));
|
||||
break;
|
||||
}
|
||||
|
@ -2364,6 +2364,7 @@ void Assembler::sqrtsd(XMMRegister dst, Operand src) {
|
||||
}
|
||||
|
||||
void Assembler::haddps(XMMRegister dst, Operand src) {
|
||||
DCHECK(IsEnabled(SSE3));
|
||||
EnsureSpace ensure_space(this);
|
||||
EMIT(0xF2);
|
||||
EMIT(0x0F);
|
||||
|
@ -1396,6 +1396,12 @@ class Assembler : public AssemblerBase {
|
||||
void vrsqrtps(XMMRegister dst, Operand src) {
|
||||
vinstr(0x52, dst, xmm0, src, kNone, k0F, kWIG);
|
||||
}
|
||||
void vhaddps(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
|
||||
vhaddps(dst, src1, Operand(src2));
|
||||
}
|
||||
void vhaddps(XMMRegister dst, XMMRegister src1, Operand src2) {
|
||||
vinstr(0x7C, dst, src1, src2, kF2, k0F, kWIG);
|
||||
}
|
||||
void vmovaps(XMMRegister dst, XMMRegister src) {
|
||||
vps(0x28, dst, xmm0, Operand(src));
|
||||
}
|
||||
|
@ -904,6 +904,11 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
|
||||
AppendToBuffer(",%d", *reinterpret_cast<int8_t*>(current));
|
||||
current++;
|
||||
break;
|
||||
case 0x7C:
|
||||
AppendToBuffer("vhaddps %s,%s,", NameOfXMMRegister(regop),
|
||||
NameOfXMMRegister(vvvv));
|
||||
current += PrintRightXMMOperand(current);
|
||||
break;
|
||||
default:
|
||||
UnimplementedInstruction();
|
||||
}
|
||||
|
@ -44,6 +44,8 @@
|
||||
V(pxor, 66, 0F, EF)
|
||||
|
||||
#define SSSE3_INSTRUCTION_LIST(V) \
|
||||
V(phaddd, 66, 0F, 38, 02) \
|
||||
V(phaddw, 66, 0F, 38, 01) \
|
||||
V(pshufb, 66, 0F, 38, 00) \
|
||||
V(psignb, 66, 0F, 38, 08) \
|
||||
V(psignw, 66, 0F, 38, 09) \
|
||||
|
@ -3949,6 +3949,7 @@ void Assembler::sqrtsd(XMMRegister dst, Operand src) {
|
||||
}
|
||||
|
||||
void Assembler::haddps(XMMRegister dst, XMMRegister src) {
|
||||
DCHECK(IsEnabled(SSE3));
|
||||
EnsureSpace ensure_space(this);
|
||||
emit(0xF2);
|
||||
emit_optional_rex_32(dst, src);
|
||||
@ -3958,6 +3959,7 @@ void Assembler::haddps(XMMRegister dst, XMMRegister src) {
|
||||
}
|
||||
|
||||
void Assembler::haddps(XMMRegister dst, Operand src) {
|
||||
DCHECK(IsEnabled(SSE3));
|
||||
EnsureSpace ensure_space(this);
|
||||
emit(0xF2);
|
||||
emit_optional_rex_32(dst, src);
|
||||
|
@ -1460,6 +1460,7 @@ class Assembler : public AssemblerBase {
|
||||
AVX_P_3(vor, 0x56);
|
||||
AVX_P_3(vxor, 0x57);
|
||||
AVX_3(vcvtsd2ss, 0x5a, vsd);
|
||||
AVX_3(vhaddps, 0x7c, vsd);
|
||||
|
||||
#undef AVX_3
|
||||
#undef AVX_S_3
|
||||
|
@ -1156,6 +1156,11 @@ int DisassemblerX64::AVXInstruction(byte* data) {
|
||||
AppendToBuffer("vlddqu %s,", NameOfXMMRegister(regop));
|
||||
current += PrintRightXMMOperand(current);
|
||||
break;
|
||||
case 0x7C:
|
||||
AppendToBuffer("vhaddps %s,%s,", NameOfXMMRegister(regop),
|
||||
NameOfXMMRegister(vvvv));
|
||||
current += PrintRightXMMOperand(current);
|
||||
break;
|
||||
default:
|
||||
UnimplementedInstruction();
|
||||
}
|
||||
@ -1980,7 +1985,7 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
|
||||
int mod, regop, rm;
|
||||
get_modrm(*current, &mod, ®op, &rm);
|
||||
AppendToBuffer("haddps %s,", NameOfXMMRegister(regop));
|
||||
current += PrintRightOperand(current);
|
||||
current += PrintRightXMMOperand(current);
|
||||
} else {
|
||||
UnimplementedInstruction();
|
||||
}
|
||||
|
@ -521,7 +521,6 @@ TEST(AssemblerIa32SSE) {
|
||||
__ mulps(xmm2, xmm1);
|
||||
__ subps(xmm2, xmm0);
|
||||
__ divps(xmm2, xmm1);
|
||||
__ haddps(xmm1, xmm0);
|
||||
__ cvttss2si(eax, xmm2);
|
||||
__ ret(0);
|
||||
}
|
||||
@ -539,6 +538,38 @@ TEST(AssemblerIa32SSE) {
|
||||
CHECK_EQ(2, f(1.0, 2.0));
|
||||
}
|
||||
|
||||
TEST(AssemblerIa32SSE3) {
|
||||
CcTest::InitializeVM();
|
||||
if (!CpuFeatures::IsSupported(SSE3)) return;
|
||||
|
||||
Isolate* isolate = reinterpret_cast<Isolate*>(CcTest::isolate());
|
||||
HandleScope scope(isolate);
|
||||
v8::internal::byte buffer[256];
|
||||
MacroAssembler assm(isolate, buffer, sizeof(buffer),
|
||||
v8::internal::CodeObjectRequired::kYes);
|
||||
{
|
||||
CpuFeatureScope fscope(&assm, SSE3);
|
||||
__ movss(xmm0, Operand(esp, kPointerSize));
|
||||
__ movss(xmm1, Operand(esp, 2 * kPointerSize));
|
||||
__ shufps(xmm0, xmm0, 0x0);
|
||||
__ shufps(xmm1, xmm1, 0x0);
|
||||
__ haddps(xmm1, xmm0);
|
||||
__ cvttss2si(eax, xmm1);
|
||||
__ ret(0);
|
||||
}
|
||||
|
||||
CodeDesc desc;
|
||||
assm.GetCode(isolate, &desc);
|
||||
Handle<Code> code =
|
||||
isolate->factory()->NewCode(desc, Code::STUB, Handle<Code>());
|
||||
#ifdef OBJECT_PRINT
|
||||
OFStream os(stdout);
|
||||
code->Print(os);
|
||||
#endif
|
||||
|
||||
F8 f = FUNCTION_CAST<F8>(code->entry());
|
||||
CHECK_EQ(4, f(1.0, 2.0));
|
||||
}
|
||||
|
||||
typedef int (*F9)(double x, double y, double z);
|
||||
TEST(AssemblerX64FMA_sd) {
|
||||
|
@ -908,7 +908,6 @@ TEST(AssemblerX64SSE) {
|
||||
__ subps(xmm2, xmm0);
|
||||
__ divps(xmm2, xmm1);
|
||||
__ cvttss2si(rax, xmm2);
|
||||
__ haddps(xmm1, xmm0);
|
||||
__ ret(0);
|
||||
}
|
||||
|
||||
@ -925,6 +924,36 @@ TEST(AssemblerX64SSE) {
|
||||
CHECK_EQ(2, f(1.0, 2.0));
|
||||
}
|
||||
|
||||
TEST(AssemblerX64SSE3) {
|
||||
CcTest::InitializeVM();
|
||||
if (!CpuFeatures::IsSupported(SSE3)) return;
|
||||
|
||||
Isolate* isolate = reinterpret_cast<Isolate*>(CcTest::isolate());
|
||||
HandleScope scope(isolate);
|
||||
v8::internal::byte buffer[256];
|
||||
MacroAssembler masm(isolate, buffer, sizeof(buffer),
|
||||
v8::internal::CodeObjectRequired::kYes);
|
||||
{
|
||||
CpuFeatureScope fscope(&masm, SSE3);
|
||||
__ shufps(xmm0, xmm0, 0x0); // brocast first argument
|
||||
__ shufps(xmm1, xmm1, 0x0); // brocast second argument
|
||||
__ haddps(xmm1, xmm0);
|
||||
__ cvttss2si(rax, xmm1);
|
||||
__ ret(0);
|
||||
}
|
||||
|
||||
CodeDesc desc;
|
||||
masm.GetCode(isolate, &desc);
|
||||
Handle<Code> code =
|
||||
isolate->factory()->NewCode(desc, Code::STUB, Handle<Code>());
|
||||
#ifdef OBJECT_PRINT
|
||||
OFStream os(stdout);
|
||||
code->Print(os);
|
||||
#endif
|
||||
|
||||
F6 f = FUNCTION_CAST<F6>(code->entry());
|
||||
CHECK_EQ(4, f(1.0, 2.0));
|
||||
}
|
||||
|
||||
typedef int (*F7)(double x, double y, double z);
|
||||
TEST(AssemblerX64FMA_sd) {
|
||||
|
@ -500,8 +500,6 @@ TEST(DisasmIa320) {
|
||||
__ maxsd(xmm1, Operand(ebx, ecx, times_4, 10000));
|
||||
__ ucomisd(xmm0, xmm1);
|
||||
__ cmpltsd(xmm0, xmm1);
|
||||
__ haddps(xmm1, xmm0);
|
||||
__ haddps(xmm1, Operand(ebx, ecx, times_4, 10000));
|
||||
|
||||
__ andpd(xmm0, xmm1);
|
||||
|
||||
@ -551,6 +549,14 @@ TEST(DisasmIa320) {
|
||||
__ cmov(greater, eax, Operand(edx, 3));
|
||||
}
|
||||
|
||||
{
|
||||
if (CpuFeatures::IsSupported(SSE3)) {
|
||||
CpuFeatureScope scope(&assm, SSE3);
|
||||
__ haddps(xmm1, xmm0);
|
||||
__ haddps(xmm1, Operand(ebx, ecx, times_4, 10000));
|
||||
}
|
||||
}
|
||||
|
||||
#define EMIT_SSE34_INSTR(instruction, notUsed1, notUsed2, notUsed3, notUsed4) \
|
||||
__ instruction(xmm5, xmm1); \
|
||||
__ instruction(xmm5, Operand(edx, 4));
|
||||
@ -637,6 +643,8 @@ TEST(DisasmIa320) {
|
||||
__ vmovaps(xmm0, xmm1);
|
||||
__ vshufps(xmm0, xmm1, xmm2, 3);
|
||||
__ vshufps(xmm0, xmm1, Operand(edx, 4), 3);
|
||||
__ vhaddps(xmm0, xmm1, xmm2);
|
||||
__ vhaddps(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
|
||||
|
||||
__ vcmpeqps(xmm5, xmm4, xmm1);
|
||||
__ vcmpeqps(xmm5, xmm4, Operand(ebx, ecx, times_4, 10000));
|
||||
|
@ -475,8 +475,6 @@ TEST(DisasmX64) {
|
||||
__ maxsd(xmm1, xmm0);
|
||||
__ maxsd(xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
__ ucomisd(xmm0, xmm1);
|
||||
__ haddps(xmm1, xmm0);
|
||||
__ haddps(xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
|
||||
__ andpd(xmm0, xmm1);
|
||||
__ andpd(xmm0, Operand(rbx, rcx, times_4, 10000));
|
||||
@ -530,6 +528,8 @@ TEST(DisasmX64) {
|
||||
{
|
||||
if (CpuFeatures::IsSupported(SSE3)) {
|
||||
CpuFeatureScope scope(&assm, SSE3);
|
||||
__ haddps(xmm1, xmm0);
|
||||
__ haddps(xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
__ lddqu(xmm1, Operand(rdx, 4));
|
||||
}
|
||||
}
|
||||
@ -696,6 +696,8 @@ TEST(DisasmX64) {
|
||||
__ vandps(xmm9, xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
__ vxorps(xmm0, xmm1, xmm9);
|
||||
__ vxorps(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
__ vhaddps(xmm0, xmm1, xmm9);
|
||||
__ vhaddps(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
|
||||
__ vandpd(xmm0, xmm9, xmm2);
|
||||
__ vandpd(xmm9, xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
|
Loading…
Reference in New Issue
Block a user