[ia32][wasm] Add I32x4Neg and I32x4 CompareOp

I32x4 Neg, Eq/Ne/GtS/GeS/GtU/GeU
Reconstruct ro/RRO macro instructions with AVX_OP3_WITH_TYPE

Bug: 
Change-Id: I3e391f489717db3456b884c6df0b1756497a1846
Reviewed-on: https://chromium-review.googlesource.com/616463
Reviewed-by: Bill Budge <bbudge@chromium.org>
Reviewed-by: Benedikt Meurer <bmeurer@chromium.org>
Commit-Queue: Jing Bao <jing.bao@intel.com>
Cr-Commit-Position: refs/heads/master@{#47385}
This commit is contained in:
jing.bao 2017-08-16 11:16:15 +08:00 committed by Commit Bot
parent e08cdc277b
commit f464961b2c
8 changed files with 185 additions and 33 deletions

View File

@ -1904,6 +1904,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputOperand(2), i.InputInt8(1));
break;
}
case kIA32I32x4Neg: {
XMMRegister dst = i.OutputSimd128Register();
Operand src = i.InputOperand(0);
Register ireg = {dst.code()};
if (src.is_reg(ireg)) {
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ Psignd(dst, kScratchDoubleReg);
} else {
__ Pxor(dst, dst);
__ Psubd(dst, src);
}
break;
}
case kSSEI32x4Shl: {
__ pslld(i.OutputSimd128Register(), i.InputInt8(1));
break;
@ -1977,6 +1990,57 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputOperand(1));
break;
}
case kSSEI32x4Eq: {
__ pcmpeqd(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kAVXI32x4Eq: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kSSEI32x4Ne: {
__ pcmpeqd(i.OutputSimd128Register(), i.InputOperand(1));
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
break;
}
case kAVXI32x4Ne: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
__ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ vpxor(i.OutputSimd128Register(), i.OutputSimd128Register(),
kScratchDoubleReg);
break;
}
case kSSEI32x4GtS: {
__ pcmpgtd(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kAVXI32x4GtS: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kSSEI32x4GeS: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register();
Operand src = i.InputOperand(1);
__ pminsd(dst, src);
__ pcmpeqd(dst, src);
break;
}
case kAVXI32x4GeS: {
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister src1 = i.InputSimd128Register(0);
Operand src2 = i.InputOperand(1);
__ vpminsd(kScratchDoubleReg, src1, src2);
__ vpcmpeqd(i.OutputSimd128Register(), kScratchDoubleReg, src2);
break;
}
case kSSEI32x4ShrU: {
__ psrld(i.OutputSimd128Register(), i.InputInt8(1));
break;
@ -2009,6 +2073,43 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputOperand(1));
break;
}
case kSSEI32x4GtU: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register();
Operand src = i.InputOperand(1);
__ pmaxud(dst, src);
__ pcmpeqd(dst, src);
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ pxor(dst, kScratchDoubleReg);
break;
}
case kAVXI32x4GtU: {
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src1 = i.InputSimd128Register(0);
Operand src2 = i.InputOperand(1);
__ vpmaxud(kScratchDoubleReg, src1, src2);
__ vpcmpeqd(dst, kScratchDoubleReg, src2);
__ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ vpxor(dst, dst, kScratchDoubleReg);
break;
}
case kSSEI32x4GeU: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register();
Operand src = i.InputOperand(1);
__ pminud(dst, src);
__ pcmpeqd(dst, src);
break;
}
case kAVXI32x4GeU: {
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister src1 = i.InputSimd128Register(0);
Operand src2 = i.InputOperand(1);
__ vpminud(kScratchDoubleReg, src1, src2);
__ vpcmpeqd(i.OutputSimd128Register(), kScratchDoubleReg, src2);
break;
}
case kIA32I16x8Splat: {
XMMRegister dst = i.OutputSimd128Register();
__ Movd(dst, i.InputOperand(0));

View File

@ -115,6 +115,7 @@ namespace compiler {
V(IA32I32x4ExtractLane) \
V(SSEI32x4ReplaceLane) \
V(AVXI32x4ReplaceLane) \
V(IA32I32x4Neg) \
V(SSEI32x4Shl) \
V(AVXI32x4Shl) \
V(SSEI32x4ShrS) \
@ -129,12 +130,24 @@ namespace compiler {
V(AVXI32x4MinS) \
V(SSEI32x4MaxS) \
V(AVXI32x4MaxS) \
V(SSEI32x4Eq) \
V(AVXI32x4Eq) \
V(SSEI32x4Ne) \
V(AVXI32x4Ne) \
V(SSEI32x4GtS) \
V(AVXI32x4GtS) \
V(SSEI32x4GeS) \
V(AVXI32x4GeS) \
V(SSEI32x4ShrU) \
V(AVXI32x4ShrU) \
V(SSEI32x4MinU) \
V(AVXI32x4MinU) \
V(SSEI32x4MaxU) \
V(AVXI32x4MaxU) \
V(SSEI32x4GtU) \
V(AVXI32x4GtU) \
V(SSEI32x4GeU) \
V(AVXI32x4GeU) \
V(IA32I16x8Splat) \
V(IA32I16x8ExtractLane) \
V(SSEI16x8ReplaceLane) \

View File

@ -101,6 +101,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32I32x4ExtractLane:
case kSSEI32x4ReplaceLane:
case kAVXI32x4ReplaceLane:
case kIA32I32x4Neg:
case kSSEI32x4Shl:
case kAVXI32x4Shl:
case kSSEI32x4ShrS:
@ -115,12 +116,24 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXI32x4MinS:
case kSSEI32x4MaxS:
case kAVXI32x4MaxS:
case kSSEI32x4Eq:
case kAVXI32x4Eq:
case kSSEI32x4Ne:
case kAVXI32x4Ne:
case kSSEI32x4GtS:
case kAVXI32x4GtS:
case kSSEI32x4GeS:
case kAVXI32x4GeS:
case kSSEI32x4ShrU:
case kAVXI32x4ShrU:
case kSSEI32x4MinU:
case kAVXI32x4MinU:
case kSSEI32x4MaxU:
case kAVXI32x4MaxU:
case kSSEI32x4GtU:
case kAVXI32x4GtU:
case kSSEI32x4GeU:
case kAVXI32x4GeU:
case kIA32I16x8Splat:
case kIA32I16x8ExtractLane:
case kSSEI16x8ReplaceLane:

View File

@ -1904,8 +1904,16 @@ VISIT_ATOMIC_BINOP(Xor)
V(I32x4Mul) \
V(I32x4MinS) \
V(I32x4MaxS) \
V(I32x4Eq) \
V(I32x4Ne) \
V(I32x4GtS) \
V(I32x4GeS) \
V(I32x4MinU) \
V(I32x4MaxU)
V(I32x4MaxU) \
V(I32x4GtU) \
V(I32x4GeU)
#define SIMD_UNOP_LIST(V) V(I32x4Neg)
#define SIMD_SHIFT_OPCODES(V) \
V(I32x4Shl) \
@ -1960,6 +1968,14 @@ SIMD_TYPES(VISIT_SIMD_REPLACE_LANE)
SIMD_SHIFT_OPCODES(VISIT_SIMD_SHIFT)
#undef VISIT_SIMD_SHIFT
#define VISIT_SIMD_UNOP(Opcode) \
void InstructionSelector::Visit##Opcode(Node* node) { \
IA32OperandGenerator g(this); \
Emit(kIA32##Opcode, g.DefineAsRegister(node), g.Use(node->InputAt(0))); \
}
SIMD_UNOP_LIST(VISIT_SIMD_UNOP)
#undef VISIT_SIMD_UNOP
#define VISIT_SIMD_BINOP(Opcode) \
void InstructionSelector::Visit##Opcode(Node* node) { \
VisitRROFloat(this, node, kAVX##Opcode, kSSE##Opcode); \

View File

@ -2132,20 +2132,11 @@ void InstructionSelector::VisitI32x4Mul(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI32x4MaxS(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI32x4MinS(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64
// && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64 && \
!V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64
void InstructionSelector::VisitI32x4Eq(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI32x4Ne(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64
// && !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64 && \
!V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64
void InstructionSelector::VisitI32x4MinU(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI32x4MaxU(Node* node) { UNIMPLEMENTED(); }
@ -2206,7 +2197,7 @@ void InstructionSelector::VisitI16x8SConvertI32x4(Node* node) {
// && !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64 && \
!V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64
!V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64
void InstructionSelector::VisitI32x4Neg(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI32x4GtS(Node* node) { UNIMPLEMENTED(); }
@ -2216,11 +2207,7 @@ void InstructionSelector::VisitI32x4GeS(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI32x4GtU(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI32x4GeU(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64
// && !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64 && \
!V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64
void InstructionSelector::VisitI16x8Splat(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI16x8ExtractLane(Node* node) { UNIMPLEMENTED(); }

View File

@ -1472,15 +1472,6 @@ void TurboAssembler::Move(XMMRegister dst, uint64_t src) {
}
}
void TurboAssembler::Pxor(XMMRegister dst, const Operand& src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpxor(dst, dst, src);
} else {
pxor(dst, src);
}
}
void TurboAssembler::Pshuflw(XMMRegister dst, const Operand& src,
uint8_t shuffle) {
if (CpuFeatures::IsSupported(AVX)) {
@ -1501,6 +1492,20 @@ void TurboAssembler::Pshufd(XMMRegister dst, const Operand& src,
}
}
void TurboAssembler::Psignd(XMMRegister dst, const Operand& src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpsignd(dst, dst, src);
return;
}
if (CpuFeatures::IsSupported(SSSE3)) {
CpuFeatureScope sse_scope(this, SSSE3);
psignd(dst, src);
return;
}
UNREACHABLE();
}
void TurboAssembler::Pshufb(XMMRegister dst, const Operand& src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);

View File

@ -202,9 +202,6 @@ class TurboAssembler : public Assembler {
// may be bigger than 2^16 - 1. Requires a scratch register.
void Ret(int bytes_dropped, Register scratch);
void Pxor(XMMRegister dst, XMMRegister src) { Pxor(dst, Operand(src)); }
void Pxor(XMMRegister dst, const Operand& src);
void Pshuflw(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
Pshuflw(dst, Operand(src), shuffle);
}
@ -232,9 +229,33 @@ class TurboAssembler : public Assembler {
#undef AVX_OP2_WITH_TYPE
// Only use these macros when non-destructive source of AVX version is not
// needed.
#define AVX_OP3_WITH_TYPE(macro_name, name, dst_type, src_type) \
void macro_name(dst_type dst, src_type src) { \
if (CpuFeatures::IsSupported(AVX)) { \
CpuFeatureScope scope(this, AVX); \
v##name(dst, dst, src); \
} else { \
name(dst, src); \
} \
}
#define AVX_OP3_XO(macro_name, name) \
AVX_OP3_WITH_TYPE(macro_name, name, XMMRegister, XMMRegister) \
AVX_OP3_WITH_TYPE(macro_name, name, XMMRegister, const Operand&)
AVX_OP3_XO(Pcmpeqd, pcmpeqd)
AVX_OP3_XO(Psubd, psubd)
AVX_OP3_XO(Pxor, pxor)
#undef AVX_OP3_XO
#undef AVX_OP3_WITH_TYPE
// Non-SSE2 instructions.
void Pshufb(XMMRegister dst, XMMRegister src) { Pshufb(dst, Operand(src)); }
void Pshufb(XMMRegister dst, const Operand& src);
void Psignd(XMMRegister dst, XMMRegister src) { Psignd(dst, Operand(src)); }
void Psignd(XMMRegister dst, const Operand& src);
void Pextrb(Register dst, XMMRegister src, int8_t imm8);
void Pextrw(Register dst, XMMRegister src, int8_t imm8);

View File

@ -923,8 +923,6 @@ WASM_SIMD_TEST(I32x4ConvertI16x8) {
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
// V8_TARGET_ARCH_MIPS64
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || SIMD_LOWERING_TARGET || \
V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_X64
void RunI32x4UnOpTest(WasmOpcode simd_op, Int32UnOp expected_op) {
WasmRunner<int32_t, int32_t, int32_t> r(kExecuteCompiled);
byte a = 0;
@ -938,8 +936,6 @@ void RunI32x4UnOpTest(WasmOpcode simd_op, Int32UnOp expected_op) {
}
WASM_SIMD_TEST(I32x4Neg) { RunI32x4UnOpTest(kExprI32x4Neg, Negate); }
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || SIMD_LOWERING_TARGET ||
// V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_X64
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || SIMD_LOWERING_TARGET || \
V8_TARGET_ARCH_X64
@ -990,6 +986,8 @@ WASM_SIMD_TEST(S128And) { RunI32x4BinOpTest(kExprS128And, And); }
WASM_SIMD_TEST(S128Or) { RunI32x4BinOpTest(kExprS128Or, Or); }
WASM_SIMD_TEST(S128Xor) { RunI32x4BinOpTest(kExprS128Xor, Xor); }
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_X64 ||
// SIMD_LOWERING_TARGET || V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64
void RunI32x4CompareOpTest(WasmOpcode simd_op, Int32CompareOp expected_op) {
WasmRunner<int32_t, int32_t, int32_t, int32_t> r(kExecuteCompiled);
@ -1034,8 +1032,6 @@ WASM_SIMD_TEST(I32x4GtU) {
WASM_SIMD_TEST(I32x4GeU) {
RunI32x4CompareOpTest(kExprI32x4GeU, UnsignedGreaterEqual);
}
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_X64 ||
// SIMD_LOWERING_TARGET || V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64
void RunI32x4ShiftOpTest(WasmOpcode simd_op, Int32ShiftOp expected_op,
int shift) {