[ia32][wasm] Add I32x4Neg and I32x4 CompareOp
I32x4 Neg, Eq/Ne/GtS/GeS/GtU/GeU Reconstruct ro/RRO macro instructions with AVX_OP3_WITH_TYPE Bug: Change-Id: I3e391f489717db3456b884c6df0b1756497a1846 Reviewed-on: https://chromium-review.googlesource.com/616463 Reviewed-by: Bill Budge <bbudge@chromium.org> Reviewed-by: Benedikt Meurer <bmeurer@chromium.org> Commit-Queue: Jing Bao <jing.bao@intel.com> Cr-Commit-Position: refs/heads/master@{#47385}
This commit is contained in:
parent
e08cdc277b
commit
f464961b2c
@ -1904,6 +1904,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
i.InputOperand(2), i.InputInt8(1));
|
||||
break;
|
||||
}
|
||||
case kIA32I32x4Neg: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
Operand src = i.InputOperand(0);
|
||||
Register ireg = {dst.code()};
|
||||
if (src.is_reg(ireg)) {
|
||||
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ Psignd(dst, kScratchDoubleReg);
|
||||
} else {
|
||||
__ Pxor(dst, dst);
|
||||
__ Psubd(dst, src);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kSSEI32x4Shl: {
|
||||
__ pslld(i.OutputSimd128Register(), i.InputInt8(1));
|
||||
break;
|
||||
@ -1977,6 +1990,57 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
i.InputOperand(1));
|
||||
break;
|
||||
}
|
||||
case kSSEI32x4Eq: {
|
||||
__ pcmpeqd(i.OutputSimd128Register(), i.InputOperand(1));
|
||||
break;
|
||||
}
|
||||
case kAVXI32x4Eq: {
|
||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
||||
__ vpcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(0),
|
||||
i.InputOperand(1));
|
||||
break;
|
||||
}
|
||||
case kSSEI32x4Ne: {
|
||||
__ pcmpeqd(i.OutputSimd128Register(), i.InputOperand(1));
|
||||
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
|
||||
break;
|
||||
}
|
||||
case kAVXI32x4Ne: {
|
||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
||||
__ vpcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(0),
|
||||
i.InputOperand(1));
|
||||
__ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ vpxor(i.OutputSimd128Register(), i.OutputSimd128Register(),
|
||||
kScratchDoubleReg);
|
||||
break;
|
||||
}
|
||||
case kSSEI32x4GtS: {
|
||||
__ pcmpgtd(i.OutputSimd128Register(), i.InputOperand(1));
|
||||
break;
|
||||
}
|
||||
case kAVXI32x4GtS: {
|
||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
||||
__ vpcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(0),
|
||||
i.InputOperand(1));
|
||||
break;
|
||||
}
|
||||
case kSSEI32x4GeS: {
|
||||
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
Operand src = i.InputOperand(1);
|
||||
__ pminsd(dst, src);
|
||||
__ pcmpeqd(dst, src);
|
||||
break;
|
||||
}
|
||||
case kAVXI32x4GeS: {
|
||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
||||
XMMRegister src1 = i.InputSimd128Register(0);
|
||||
Operand src2 = i.InputOperand(1);
|
||||
__ vpminsd(kScratchDoubleReg, src1, src2);
|
||||
__ vpcmpeqd(i.OutputSimd128Register(), kScratchDoubleReg, src2);
|
||||
break;
|
||||
}
|
||||
case kSSEI32x4ShrU: {
|
||||
__ psrld(i.OutputSimd128Register(), i.InputInt8(1));
|
||||
break;
|
||||
@ -2009,6 +2073,43 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
i.InputOperand(1));
|
||||
break;
|
||||
}
|
||||
case kSSEI32x4GtU: {
|
||||
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
Operand src = i.InputOperand(1);
|
||||
__ pmaxud(dst, src);
|
||||
__ pcmpeqd(dst, src);
|
||||
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ pxor(dst, kScratchDoubleReg);
|
||||
break;
|
||||
}
|
||||
case kAVXI32x4GtU: {
|
||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
XMMRegister src1 = i.InputSimd128Register(0);
|
||||
Operand src2 = i.InputOperand(1);
|
||||
__ vpmaxud(kScratchDoubleReg, src1, src2);
|
||||
__ vpcmpeqd(dst, kScratchDoubleReg, src2);
|
||||
__ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ vpxor(dst, dst, kScratchDoubleReg);
|
||||
break;
|
||||
}
|
||||
case kSSEI32x4GeU: {
|
||||
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
Operand src = i.InputOperand(1);
|
||||
__ pminud(dst, src);
|
||||
__ pcmpeqd(dst, src);
|
||||
break;
|
||||
}
|
||||
case kAVXI32x4GeU: {
|
||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
||||
XMMRegister src1 = i.InputSimd128Register(0);
|
||||
Operand src2 = i.InputOperand(1);
|
||||
__ vpminud(kScratchDoubleReg, src1, src2);
|
||||
__ vpcmpeqd(i.OutputSimd128Register(), kScratchDoubleReg, src2);
|
||||
break;
|
||||
}
|
||||
case kIA32I16x8Splat: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
__ Movd(dst, i.InputOperand(0));
|
||||
|
@ -115,6 +115,7 @@ namespace compiler {
|
||||
V(IA32I32x4ExtractLane) \
|
||||
V(SSEI32x4ReplaceLane) \
|
||||
V(AVXI32x4ReplaceLane) \
|
||||
V(IA32I32x4Neg) \
|
||||
V(SSEI32x4Shl) \
|
||||
V(AVXI32x4Shl) \
|
||||
V(SSEI32x4ShrS) \
|
||||
@ -129,12 +130,24 @@ namespace compiler {
|
||||
V(AVXI32x4MinS) \
|
||||
V(SSEI32x4MaxS) \
|
||||
V(AVXI32x4MaxS) \
|
||||
V(SSEI32x4Eq) \
|
||||
V(AVXI32x4Eq) \
|
||||
V(SSEI32x4Ne) \
|
||||
V(AVXI32x4Ne) \
|
||||
V(SSEI32x4GtS) \
|
||||
V(AVXI32x4GtS) \
|
||||
V(SSEI32x4GeS) \
|
||||
V(AVXI32x4GeS) \
|
||||
V(SSEI32x4ShrU) \
|
||||
V(AVXI32x4ShrU) \
|
||||
V(SSEI32x4MinU) \
|
||||
V(AVXI32x4MinU) \
|
||||
V(SSEI32x4MaxU) \
|
||||
V(AVXI32x4MaxU) \
|
||||
V(SSEI32x4GtU) \
|
||||
V(AVXI32x4GtU) \
|
||||
V(SSEI32x4GeU) \
|
||||
V(AVXI32x4GeU) \
|
||||
V(IA32I16x8Splat) \
|
||||
V(IA32I16x8ExtractLane) \
|
||||
V(SSEI16x8ReplaceLane) \
|
||||
|
@ -101,6 +101,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kIA32I32x4ExtractLane:
|
||||
case kSSEI32x4ReplaceLane:
|
||||
case kAVXI32x4ReplaceLane:
|
||||
case kIA32I32x4Neg:
|
||||
case kSSEI32x4Shl:
|
||||
case kAVXI32x4Shl:
|
||||
case kSSEI32x4ShrS:
|
||||
@ -115,12 +116,24 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kAVXI32x4MinS:
|
||||
case kSSEI32x4MaxS:
|
||||
case kAVXI32x4MaxS:
|
||||
case kSSEI32x4Eq:
|
||||
case kAVXI32x4Eq:
|
||||
case kSSEI32x4Ne:
|
||||
case kAVXI32x4Ne:
|
||||
case kSSEI32x4GtS:
|
||||
case kAVXI32x4GtS:
|
||||
case kSSEI32x4GeS:
|
||||
case kAVXI32x4GeS:
|
||||
case kSSEI32x4ShrU:
|
||||
case kAVXI32x4ShrU:
|
||||
case kSSEI32x4MinU:
|
||||
case kAVXI32x4MinU:
|
||||
case kSSEI32x4MaxU:
|
||||
case kAVXI32x4MaxU:
|
||||
case kSSEI32x4GtU:
|
||||
case kAVXI32x4GtU:
|
||||
case kSSEI32x4GeU:
|
||||
case kAVXI32x4GeU:
|
||||
case kIA32I16x8Splat:
|
||||
case kIA32I16x8ExtractLane:
|
||||
case kSSEI16x8ReplaceLane:
|
||||
|
@ -1904,8 +1904,16 @@ VISIT_ATOMIC_BINOP(Xor)
|
||||
V(I32x4Mul) \
|
||||
V(I32x4MinS) \
|
||||
V(I32x4MaxS) \
|
||||
V(I32x4Eq) \
|
||||
V(I32x4Ne) \
|
||||
V(I32x4GtS) \
|
||||
V(I32x4GeS) \
|
||||
V(I32x4MinU) \
|
||||
V(I32x4MaxU)
|
||||
V(I32x4MaxU) \
|
||||
V(I32x4GtU) \
|
||||
V(I32x4GeU)
|
||||
|
||||
#define SIMD_UNOP_LIST(V) V(I32x4Neg)
|
||||
|
||||
#define SIMD_SHIFT_OPCODES(V) \
|
||||
V(I32x4Shl) \
|
||||
@ -1960,6 +1968,14 @@ SIMD_TYPES(VISIT_SIMD_REPLACE_LANE)
|
||||
SIMD_SHIFT_OPCODES(VISIT_SIMD_SHIFT)
|
||||
#undef VISIT_SIMD_SHIFT
|
||||
|
||||
#define VISIT_SIMD_UNOP(Opcode) \
|
||||
void InstructionSelector::Visit##Opcode(Node* node) { \
|
||||
IA32OperandGenerator g(this); \
|
||||
Emit(kIA32##Opcode, g.DefineAsRegister(node), g.Use(node->InputAt(0))); \
|
||||
}
|
||||
SIMD_UNOP_LIST(VISIT_SIMD_UNOP)
|
||||
#undef VISIT_SIMD_UNOP
|
||||
|
||||
#define VISIT_SIMD_BINOP(Opcode) \
|
||||
void InstructionSelector::Visit##Opcode(Node* node) { \
|
||||
VisitRROFloat(this, node, kAVX##Opcode, kSSE##Opcode); \
|
||||
|
@ -2132,20 +2132,11 @@ void InstructionSelector::VisitI32x4Mul(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitI32x4MaxS(Node* node) { UNIMPLEMENTED(); }
|
||||
|
||||
void InstructionSelector::VisitI32x4MinS(Node* node) { UNIMPLEMENTED(); }
|
||||
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64
|
||||
// && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_MIPS
|
||||
// && !V8_TARGET_ARCH_MIPS64
|
||||
|
||||
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64 && \
|
||||
!V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64
|
||||
void InstructionSelector::VisitI32x4Eq(Node* node) { UNIMPLEMENTED(); }
|
||||
|
||||
void InstructionSelector::VisitI32x4Ne(Node* node) { UNIMPLEMENTED(); }
|
||||
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64
|
||||
// && !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64
|
||||
|
||||
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64 && \
|
||||
!V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64
|
||||
void InstructionSelector::VisitI32x4MinU(Node* node) { UNIMPLEMENTED(); }
|
||||
|
||||
void InstructionSelector::VisitI32x4MaxU(Node* node) { UNIMPLEMENTED(); }
|
||||
@ -2206,7 +2197,7 @@ void InstructionSelector::VisitI16x8SConvertI32x4(Node* node) {
|
||||
// && !V8_TARGET_ARCH_MIPS64
|
||||
|
||||
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64 && \
|
||||
!V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64
|
||||
!V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64
|
||||
void InstructionSelector::VisitI32x4Neg(Node* node) { UNIMPLEMENTED(); }
|
||||
|
||||
void InstructionSelector::VisitI32x4GtS(Node* node) { UNIMPLEMENTED(); }
|
||||
@ -2216,11 +2207,7 @@ void InstructionSelector::VisitI32x4GeS(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitI32x4GtU(Node* node) { UNIMPLEMENTED(); }
|
||||
|
||||
void InstructionSelector::VisitI32x4GeU(Node* node) { UNIMPLEMENTED(); }
|
||||
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64
|
||||
// && !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64
|
||||
|
||||
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64 && \
|
||||
!V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64
|
||||
void InstructionSelector::VisitI16x8Splat(Node* node) { UNIMPLEMENTED(); }
|
||||
|
||||
void InstructionSelector::VisitI16x8ExtractLane(Node* node) { UNIMPLEMENTED(); }
|
||||
|
@ -1472,15 +1472,6 @@ void TurboAssembler::Move(XMMRegister dst, uint64_t src) {
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::Pxor(XMMRegister dst, const Operand& src) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vpxor(dst, dst, src);
|
||||
} else {
|
||||
pxor(dst, src);
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::Pshuflw(XMMRegister dst, const Operand& src,
|
||||
uint8_t shuffle) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
@ -1501,6 +1492,20 @@ void TurboAssembler::Pshufd(XMMRegister dst, const Operand& src,
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::Psignd(XMMRegister dst, const Operand& src) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vpsignd(dst, dst, src);
|
||||
return;
|
||||
}
|
||||
if (CpuFeatures::IsSupported(SSSE3)) {
|
||||
CpuFeatureScope sse_scope(this, SSSE3);
|
||||
psignd(dst, src);
|
||||
return;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
void TurboAssembler::Pshufb(XMMRegister dst, const Operand& src) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
|
@ -202,9 +202,6 @@ class TurboAssembler : public Assembler {
|
||||
// may be bigger than 2^16 - 1. Requires a scratch register.
|
||||
void Ret(int bytes_dropped, Register scratch);
|
||||
|
||||
void Pxor(XMMRegister dst, XMMRegister src) { Pxor(dst, Operand(src)); }
|
||||
void Pxor(XMMRegister dst, const Operand& src);
|
||||
|
||||
void Pshuflw(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
|
||||
Pshuflw(dst, Operand(src), shuffle);
|
||||
}
|
||||
@ -232,9 +229,33 @@ class TurboAssembler : public Assembler {
|
||||
|
||||
#undef AVX_OP2_WITH_TYPE
|
||||
|
||||
// Only use these macros when non-destructive source of AVX version is not
|
||||
// needed.
|
||||
#define AVX_OP3_WITH_TYPE(macro_name, name, dst_type, src_type) \
|
||||
void macro_name(dst_type dst, src_type src) { \
|
||||
if (CpuFeatures::IsSupported(AVX)) { \
|
||||
CpuFeatureScope scope(this, AVX); \
|
||||
v##name(dst, dst, src); \
|
||||
} else { \
|
||||
name(dst, src); \
|
||||
} \
|
||||
}
|
||||
#define AVX_OP3_XO(macro_name, name) \
|
||||
AVX_OP3_WITH_TYPE(macro_name, name, XMMRegister, XMMRegister) \
|
||||
AVX_OP3_WITH_TYPE(macro_name, name, XMMRegister, const Operand&)
|
||||
|
||||
AVX_OP3_XO(Pcmpeqd, pcmpeqd)
|
||||
AVX_OP3_XO(Psubd, psubd)
|
||||
AVX_OP3_XO(Pxor, pxor)
|
||||
|
||||
#undef AVX_OP3_XO
|
||||
#undef AVX_OP3_WITH_TYPE
|
||||
|
||||
// Non-SSE2 instructions.
|
||||
void Pshufb(XMMRegister dst, XMMRegister src) { Pshufb(dst, Operand(src)); }
|
||||
void Pshufb(XMMRegister dst, const Operand& src);
|
||||
void Psignd(XMMRegister dst, XMMRegister src) { Psignd(dst, Operand(src)); }
|
||||
void Psignd(XMMRegister dst, const Operand& src);
|
||||
|
||||
void Pextrb(Register dst, XMMRegister src, int8_t imm8);
|
||||
void Pextrw(Register dst, XMMRegister src, int8_t imm8);
|
||||
|
@ -923,8 +923,6 @@ WASM_SIMD_TEST(I32x4ConvertI16x8) {
|
||||
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
|
||||
// V8_TARGET_ARCH_MIPS64
|
||||
|
||||
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || SIMD_LOWERING_TARGET || \
|
||||
V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_X64
|
||||
void RunI32x4UnOpTest(WasmOpcode simd_op, Int32UnOp expected_op) {
|
||||
WasmRunner<int32_t, int32_t, int32_t> r(kExecuteCompiled);
|
||||
byte a = 0;
|
||||
@ -938,8 +936,6 @@ void RunI32x4UnOpTest(WasmOpcode simd_op, Int32UnOp expected_op) {
|
||||
}
|
||||
|
||||
WASM_SIMD_TEST(I32x4Neg) { RunI32x4UnOpTest(kExprI32x4Neg, Negate); }
|
||||
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || SIMD_LOWERING_TARGET ||
|
||||
// V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_X64
|
||||
|
||||
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || SIMD_LOWERING_TARGET || \
|
||||
V8_TARGET_ARCH_X64
|
||||
@ -990,6 +986,8 @@ WASM_SIMD_TEST(S128And) { RunI32x4BinOpTest(kExprS128And, And); }
|
||||
WASM_SIMD_TEST(S128Or) { RunI32x4BinOpTest(kExprS128Or, Or); }
|
||||
|
||||
WASM_SIMD_TEST(S128Xor) { RunI32x4BinOpTest(kExprS128Xor, Xor); }
|
||||
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_X64 ||
|
||||
// SIMD_LOWERING_TARGET || V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64
|
||||
|
||||
void RunI32x4CompareOpTest(WasmOpcode simd_op, Int32CompareOp expected_op) {
|
||||
WasmRunner<int32_t, int32_t, int32_t, int32_t> r(kExecuteCompiled);
|
||||
@ -1034,8 +1032,6 @@ WASM_SIMD_TEST(I32x4GtU) {
|
||||
WASM_SIMD_TEST(I32x4GeU) {
|
||||
RunI32x4CompareOpTest(kExprI32x4GeU, UnsignedGreaterEqual);
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_X64 ||
|
||||
// SIMD_LOWERING_TARGET || V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64
|
||||
|
||||
void RunI32x4ShiftOpTest(WasmOpcode simd_op, Int32ShiftOp expected_op,
|
||||
int shift) {
|
||||
|
Loading…
Reference in New Issue
Block a user