[ia32][wasm] Add AnyTrue and AllTrue.
Complete ptest and add vptest Add Pcmpeqb, Pcmpeqw, Ptest macro Change-Id: I060aa5228d50f2dca7ed3e24324a08c04ec1a028 Reviewed-on: https://chromium-review.googlesource.com/1013236 Reviewed-by: Bill Budge <bbudge@chromium.org> Commit-Queue: Jing Bao <jing.bao@intel.com> Cr-Commit-Position: refs/heads/master@{#52687}
This commit is contained in:
parent
a0d4005c99
commit
87d8895494
@ -3108,6 +3108,40 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
|||||||
__ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), i.InputInt8(1));
|
__ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), i.InputInt8(1));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case kIA32S1x4AnyTrue:
|
||||||
|
case kIA32S1x8AnyTrue:
|
||||||
|
case kIA32S1x16AnyTrue: {
|
||||||
|
Register dst = i.OutputRegister();
|
||||||
|
XMMRegister src = i.InputSimd128Register(0);
|
||||||
|
Register tmp = i.TempRegister(0);
|
||||||
|
__ xor_(tmp, tmp);
|
||||||
|
__ mov(dst, Immediate(-1));
|
||||||
|
__ Ptest(src, src);
|
||||||
|
__ cmov(zero, dst, tmp);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case kIA32S1x4AllTrue:
|
||||||
|
case kIA32S1x8AllTrue:
|
||||||
|
case kIA32S1x16AllTrue: {
|
||||||
|
Register dst = i.OutputRegister();
|
||||||
|
Operand src = i.InputOperand(0);
|
||||||
|
Register tmp = i.TempRegister(0);
|
||||||
|
__ mov(tmp, Immediate(-1));
|
||||||
|
__ xor_(dst, dst);
|
||||||
|
// Compare all src lanes to false.
|
||||||
|
__ Pxor(kScratchDoubleReg, kScratchDoubleReg);
|
||||||
|
if (arch_opcode == kIA32S1x4AllTrue) {
|
||||||
|
__ Pcmpeqd(kScratchDoubleReg, src);
|
||||||
|
} else if (arch_opcode == kIA32S1x8AllTrue) {
|
||||||
|
__ Pcmpeqw(kScratchDoubleReg, src);
|
||||||
|
} else {
|
||||||
|
__ Pcmpeqb(kScratchDoubleReg, src);
|
||||||
|
}
|
||||||
|
// If kScratchDoubleReg is all zero, none of src lanes are false.
|
||||||
|
__ Ptest(kScratchDoubleReg, kScratchDoubleReg);
|
||||||
|
__ cmov(zero, dst, tmp);
|
||||||
|
break;
|
||||||
|
}
|
||||||
case kIA32StackCheck: {
|
case kIA32StackCheck: {
|
||||||
ExternalReference const stack_limit =
|
ExternalReference const stack_limit =
|
||||||
ExternalReference::address_of_stack_limit(__ isolate());
|
ExternalReference::address_of_stack_limit(__ isolate());
|
||||||
|
@ -293,7 +293,13 @@ namespace compiler {
|
|||||||
V(SSES128Select) \
|
V(SSES128Select) \
|
||||||
V(AVXS128Select) \
|
V(AVXS128Select) \
|
||||||
V(IA32S8x16Shuffle) \
|
V(IA32S8x16Shuffle) \
|
||||||
V(IA32S32x4Swizzle)
|
V(IA32S32x4Swizzle) \
|
||||||
|
V(IA32S1x4AnyTrue) \
|
||||||
|
V(IA32S1x4AllTrue) \
|
||||||
|
V(IA32S1x8AnyTrue) \
|
||||||
|
V(IA32S1x8AllTrue) \
|
||||||
|
V(IA32S1x16AnyTrue) \
|
||||||
|
V(IA32S1x16AllTrue)
|
||||||
|
|
||||||
// Addressing modes represent the "shape" of inputs to an instruction.
|
// Addressing modes represent the "shape" of inputs to an instruction.
|
||||||
// Many instructions support multiple addressing modes. Addressing modes
|
// Many instructions support multiple addressing modes. Addressing modes
|
||||||
|
@ -276,6 +276,12 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
|||||||
case kAVXS128Select:
|
case kAVXS128Select:
|
||||||
case kIA32S8x16Shuffle:
|
case kIA32S8x16Shuffle:
|
||||||
case kIA32S32x4Swizzle:
|
case kIA32S32x4Swizzle:
|
||||||
|
case kIA32S1x4AnyTrue:
|
||||||
|
case kIA32S1x4AllTrue:
|
||||||
|
case kIA32S1x8AnyTrue:
|
||||||
|
case kIA32S1x8AllTrue:
|
||||||
|
case kIA32S1x16AnyTrue:
|
||||||
|
case kIA32S1x16AllTrue:
|
||||||
return (instr->addressing_mode() == kMode_None)
|
return (instr->addressing_mode() == kMode_None)
|
||||||
? kNoOpcodeFlags
|
? kNoOpcodeFlags
|
||||||
: kIsLoadOperation | kHasSideEffect;
|
: kIsLoadOperation | kHasSideEffect;
|
||||||
|
@ -1808,6 +1808,16 @@ VISIT_ATOMIC_BINOP(Xor)
|
|||||||
V(F32x4Neg) \
|
V(F32x4Neg) \
|
||||||
V(S128Not)
|
V(S128Not)
|
||||||
|
|
||||||
|
#define SIMD_ANYTRUE_LIST(V) \
|
||||||
|
V(S1x4AnyTrue) \
|
||||||
|
V(S1x8AnyTrue) \
|
||||||
|
V(S1x16AnyTrue)
|
||||||
|
|
||||||
|
#define SIMD_ALLTRUE_LIST(V) \
|
||||||
|
V(S1x4AllTrue) \
|
||||||
|
V(S1x8AllTrue) \
|
||||||
|
V(S1x16AllTrue)
|
||||||
|
|
||||||
#define SIMD_SHIFT_OPCODES(V) \
|
#define SIMD_SHIFT_OPCODES(V) \
|
||||||
V(I32x4Shl) \
|
V(I32x4Shl) \
|
||||||
V(I32x4ShrS) \
|
V(I32x4ShrS) \
|
||||||
@ -1967,6 +1977,28 @@ SIMD_UNOP_PREFIX_LIST(VISIT_SIMD_UNOP_PREFIX)
|
|||||||
#undef VISIT_SIMD_UNOP_PREFIX
|
#undef VISIT_SIMD_UNOP_PREFIX
|
||||||
#undef SIMD_UNOP_PREFIX_LIST
|
#undef SIMD_UNOP_PREFIX_LIST
|
||||||
|
|
||||||
|
#define VISIT_SIMD_ANYTRUE(Opcode) \
|
||||||
|
void InstructionSelector::Visit##Opcode(Node* node) { \
|
||||||
|
IA32OperandGenerator g(this); \
|
||||||
|
InstructionOperand temps[] = {g.TempRegister()}; \
|
||||||
|
Emit(kIA32##Opcode, g.DefineAsRegister(node), \
|
||||||
|
g.UseRegister(node->InputAt(0)), arraysize(temps), temps); \
|
||||||
|
}
|
||||||
|
SIMD_ANYTRUE_LIST(VISIT_SIMD_ANYTRUE)
|
||||||
|
#undef VISIT_SIMD_ANYTRUE
|
||||||
|
#undef SIMD_ANYTRUE_LIST
|
||||||
|
|
||||||
|
#define VISIT_SIMD_ALLTRUE(Opcode) \
|
||||||
|
void InstructionSelector::Visit##Opcode(Node* node) { \
|
||||||
|
IA32OperandGenerator g(this); \
|
||||||
|
InstructionOperand temps[] = {g.TempRegister()}; \
|
||||||
|
Emit(kIA32##Opcode, g.DefineAsRegister(node), g.Use(node->InputAt(0)), \
|
||||||
|
arraysize(temps), temps); \
|
||||||
|
}
|
||||||
|
SIMD_ALLTRUE_LIST(VISIT_SIMD_ALLTRUE)
|
||||||
|
#undef VISIT_SIMD_ALLTRUE
|
||||||
|
#undef SIMD_ALLTRUE_LIST
|
||||||
|
|
||||||
#define VISIT_SIMD_BINOP(Opcode) \
|
#define VISIT_SIMD_BINOP(Opcode) \
|
||||||
void InstructionSelector::Visit##Opcode(Node* node) { \
|
void InstructionSelector::Visit##Opcode(Node* node) { \
|
||||||
VisitRROFloat(this, node, kAVX##Opcode, kSSE##Opcode); \
|
VisitRROFloat(this, node, kAVX##Opcode, kSSE##Opcode); \
|
||||||
|
@ -2422,11 +2422,7 @@ void InstructionSelector::VisitI8x16ShrU(Node* node) { UNIMPLEMENTED(); }
|
|||||||
void InstructionSelector::VisitI8x16Mul(Node* node) { UNIMPLEMENTED(); }
|
void InstructionSelector::VisitI8x16Mul(Node* node) { UNIMPLEMENTED(); }
|
||||||
|
|
||||||
void InstructionSelector::VisitS8x16Shuffle(Node* node) { UNIMPLEMENTED(); }
|
void InstructionSelector::VisitS8x16Shuffle(Node* node) { UNIMPLEMENTED(); }
|
||||||
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
|
|
||||||
// && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
|
|
||||||
|
|
||||||
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
|
|
||||||
!V8_TARGET_ARCH_MIPS64
|
|
||||||
void InstructionSelector::VisitS1x4AnyTrue(Node* node) { UNIMPLEMENTED(); }
|
void InstructionSelector::VisitS1x4AnyTrue(Node* node) { UNIMPLEMENTED(); }
|
||||||
|
|
||||||
void InstructionSelector::VisitS1x4AllTrue(Node* node) { UNIMPLEMENTED(); }
|
void InstructionSelector::VisitS1x4AllTrue(Node* node) { UNIMPLEMENTED(); }
|
||||||
@ -2439,7 +2435,7 @@ void InstructionSelector::VisitS1x16AnyTrue(Node* node) { UNIMPLEMENTED(); }
|
|||||||
|
|
||||||
void InstructionSelector::VisitS1x16AllTrue(Node* node) { UNIMPLEMENTED(); }
|
void InstructionSelector::VisitS1x16AllTrue(Node* node) { UNIMPLEMENTED(); }
|
||||||
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
|
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
|
||||||
// && !V8_TARGET_ARCH_MIPS64
|
// && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
|
||||||
|
|
||||||
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
|
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
|
||||||
|
|
||||||
|
@ -2605,7 +2605,7 @@ void Assembler::extractps(Register dst, XMMRegister src, byte imm8) {
|
|||||||
EMIT(imm8);
|
EMIT(imm8);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Assembler::ptest(XMMRegister dst, XMMRegister src) {
|
void Assembler::ptest(XMMRegister dst, Operand src) {
|
||||||
DCHECK(IsEnabled(SSE4_1));
|
DCHECK(IsEnabled(SSE4_1));
|
||||||
EnsureSpace ensure_space(this);
|
EnsureSpace ensure_space(this);
|
||||||
EMIT(0x66);
|
EMIT(0x66);
|
||||||
|
@ -1109,7 +1109,8 @@ class Assembler : public AssemblerBase {
|
|||||||
void movss(XMMRegister dst, XMMRegister src) { movss(dst, Operand(src)); }
|
void movss(XMMRegister dst, XMMRegister src) { movss(dst, Operand(src)); }
|
||||||
void extractps(Register dst, XMMRegister src, byte imm8);
|
void extractps(Register dst, XMMRegister src, byte imm8);
|
||||||
|
|
||||||
void ptest(XMMRegister dst, XMMRegister src);
|
void ptest(XMMRegister dst, XMMRegister src) { ptest(dst, Operand(src)); }
|
||||||
|
void ptest(XMMRegister dst, Operand src);
|
||||||
|
|
||||||
void psllw(XMMRegister reg, int8_t shift);
|
void psllw(XMMRegister reg, int8_t shift);
|
||||||
void pslld(XMMRegister reg, int8_t shift);
|
void pslld(XMMRegister reg, int8_t shift);
|
||||||
@ -1428,6 +1429,10 @@ class Assembler : public AssemblerBase {
|
|||||||
}
|
}
|
||||||
void vshufps(XMMRegister dst, XMMRegister src1, Operand src2, byte imm8);
|
void vshufps(XMMRegister dst, XMMRegister src1, Operand src2, byte imm8);
|
||||||
|
|
||||||
|
void vptest(XMMRegister dst, XMMRegister src) { vptest(dst, Operand(src)); }
|
||||||
|
void vptest(XMMRegister dst, Operand src) {
|
||||||
|
vinstr(0x17, dst, xmm0, src, k66, k0F38, kWIG);
|
||||||
|
}
|
||||||
void vpsllw(XMMRegister dst, XMMRegister src, int8_t imm8);
|
void vpsllw(XMMRegister dst, XMMRegister src, int8_t imm8);
|
||||||
void vpslld(XMMRegister dst, XMMRegister src, int8_t imm8);
|
void vpslld(XMMRegister dst, XMMRegister src, int8_t imm8);
|
||||||
void vpsrlw(XMMRegister dst, XMMRegister src, int8_t imm8);
|
void vpsrlw(XMMRegister dst, XMMRegister src, int8_t imm8);
|
||||||
|
@ -735,6 +735,10 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
|
|||||||
int mod, regop, rm, vvvv = vex_vreg();
|
int mod, regop, rm, vvvv = vex_vreg();
|
||||||
get_modrm(*current, &mod, ®op, &rm);
|
get_modrm(*current, &mod, ®op, &rm);
|
||||||
switch (opcode) {
|
switch (opcode) {
|
||||||
|
case 0x17:
|
||||||
|
AppendToBuffer("vptest %s,", NameOfXMMRegister(regop));
|
||||||
|
current += PrintRightXMMOperand(current);
|
||||||
|
break;
|
||||||
case 0x99:
|
case 0x99:
|
||||||
AppendToBuffer("vfmadd132s%c %s,%s,", float_size_code(),
|
AppendToBuffer("vfmadd132s%c %s,%s,", float_size_code(),
|
||||||
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
|
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
|
||||||
@ -1931,9 +1935,8 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
|
|||||||
get_modrm(*data, &mod, ®op, &rm);
|
get_modrm(*data, &mod, ®op, &rm);
|
||||||
switch (op) {
|
switch (op) {
|
||||||
case 0x17:
|
case 0x17:
|
||||||
AppendToBuffer("ptest %s,%s", NameOfXMMRegister(regop),
|
AppendToBuffer("ptest %s,", NameOfXMMRegister(regop));
|
||||||
NameOfXMMRegister(rm));
|
data += PrintRightXMMOperand(data);
|
||||||
data++;
|
|
||||||
break;
|
break;
|
||||||
#define SSE34_DIS_CASE(instruction, notUsed1, notUsed2, notUsed3, opcode) \
|
#define SSE34_DIS_CASE(instruction, notUsed1, notUsed2, notUsed3, opcode) \
|
||||||
case 0x##opcode: { \
|
case 0x##opcode: { \
|
||||||
|
@ -1323,6 +1323,20 @@ void TurboAssembler::Psignd(XMMRegister dst, Operand src) {
|
|||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void TurboAssembler::Ptest(XMMRegister dst, Operand src) {
|
||||||
|
if (CpuFeatures::IsSupported(AVX)) {
|
||||||
|
CpuFeatureScope scope(this, AVX);
|
||||||
|
vptest(dst, src);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (CpuFeatures::IsSupported(SSE4_1)) {
|
||||||
|
CpuFeatureScope sse_scope(this, SSE4_1);
|
||||||
|
ptest(dst, src);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
|
||||||
void TurboAssembler::Pshufb(XMMRegister dst, Operand src) {
|
void TurboAssembler::Pshufb(XMMRegister dst, Operand src) {
|
||||||
if (CpuFeatures::IsSupported(AVX)) {
|
if (CpuFeatures::IsSupported(AVX)) {
|
||||||
CpuFeatureScope scope(this, AVX);
|
CpuFeatureScope scope(this, AVX);
|
||||||
|
@ -250,6 +250,8 @@ class TurboAssembler : public Assembler {
|
|||||||
AVX_OP3_WITH_TYPE(macro_name, name, XMMRegister, XMMRegister) \
|
AVX_OP3_WITH_TYPE(macro_name, name, XMMRegister, XMMRegister) \
|
||||||
AVX_OP3_WITH_TYPE(macro_name, name, XMMRegister, Operand)
|
AVX_OP3_WITH_TYPE(macro_name, name, XMMRegister, Operand)
|
||||||
|
|
||||||
|
AVX_OP3_XO(Pcmpeqb, pcmpeqb)
|
||||||
|
AVX_OP3_XO(Pcmpeqw, pcmpeqw)
|
||||||
AVX_OP3_XO(Pcmpeqd, pcmpeqd)
|
AVX_OP3_XO(Pcmpeqd, pcmpeqd)
|
||||||
AVX_OP3_XO(Psubb, psubb)
|
AVX_OP3_XO(Psubb, psubb)
|
||||||
AVX_OP3_XO(Psubw, psubw)
|
AVX_OP3_XO(Psubw, psubw)
|
||||||
@ -266,6 +268,9 @@ class TurboAssembler : public Assembler {
|
|||||||
#undef AVX_OP3_WITH_TYPE
|
#undef AVX_OP3_WITH_TYPE
|
||||||
|
|
||||||
// Non-SSE2 instructions.
|
// Non-SSE2 instructions.
|
||||||
|
void Ptest(XMMRegister dst, XMMRegister src) { Ptest(dst, Operand(src)); }
|
||||||
|
void Ptest(XMMRegister dst, Operand src);
|
||||||
|
|
||||||
void Pshufb(XMMRegister dst, XMMRegister src) { Pshufb(dst, Operand(src)); }
|
void Pshufb(XMMRegister dst, XMMRegister src) { Pshufb(dst, Operand(src)); }
|
||||||
void Pshufb(XMMRegister dst, Operand src);
|
void Pshufb(XMMRegister dst, Operand src);
|
||||||
|
|
||||||
|
@ -568,6 +568,8 @@ TEST(DisasmIa320) {
|
|||||||
__ pinsrd(xmm1, eax, 0);
|
__ pinsrd(xmm1, eax, 0);
|
||||||
__ pinsrd(xmm1, Operand(edx, 4), 0);
|
__ pinsrd(xmm1, Operand(edx, 4), 0);
|
||||||
__ extractps(eax, xmm1, 0);
|
__ extractps(eax, xmm1, 0);
|
||||||
|
__ ptest(xmm5, xmm1);
|
||||||
|
__ ptest(xmm5, Operand(edx, 4));
|
||||||
|
|
||||||
SSE4_INSTRUCTION_LIST(EMIT_SSE34_INSTR)
|
SSE4_INSTRUCTION_LIST(EMIT_SSE34_INSTR)
|
||||||
}
|
}
|
||||||
@ -660,6 +662,9 @@ TEST(DisasmIa320) {
|
|||||||
__ vmaxpd(xmm0, xmm1, xmm2);
|
__ vmaxpd(xmm0, xmm1, xmm2);
|
||||||
__ vmaxpd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
|
__ vmaxpd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
|
||||||
|
|
||||||
|
__ vptest(xmm5, xmm1);
|
||||||
|
__ vptest(xmm5, Operand(edx, 4));
|
||||||
|
|
||||||
__ vpsllw(xmm0, xmm7, 21);
|
__ vpsllw(xmm0, xmm7, 21);
|
||||||
__ vpslld(xmm0, xmm7, 21);
|
__ vpslld(xmm0, xmm7, 21);
|
||||||
__ vpsrlw(xmm0, xmm7, 21);
|
__ vpsrlw(xmm0, xmm7, 21);
|
||||||
|
@ -1868,11 +1868,7 @@ WASM_SIMD_TEST(S8x16Concat) {
|
|||||||
RunBinaryLaneOpTest(lower_simd, kExprS8x16Shuffle, expected);
|
RunBinaryLaneOpTest(lower_simd, kExprS8x16Shuffle, expected);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
|
|
||||||
// V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
|
|
||||||
|
|
||||||
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
|
|
||||||
V8_TARGET_ARCH_MIPS64
|
|
||||||
// Boolean unary operations are 'AllTrue' and 'AnyTrue', which return an integer
|
// Boolean unary operations are 'AllTrue' and 'AnyTrue', which return an integer
|
||||||
// result. Use relational ops on numeric vectors to create the boolean vector
|
// result. Use relational ops on numeric vectors to create the boolean vector
|
||||||
// test inputs. Test inputs with all true, all false, one true, and one false.
|
// test inputs. Test inputs with all true, all false, one true, and one false.
|
||||||
@ -1960,7 +1956,7 @@ WASM_SIMD_TEST(SimdI32x4ExtractWithF32x4) {
|
|||||||
CHECK_EQ(1, r.Call());
|
CHECK_EQ(1, r.Call());
|
||||||
}
|
}
|
||||||
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
|
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
|
||||||
// V8_TARGET_ARCH_MIPS64
|
// V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
|
||||||
|
|
||||||
WASM_SIMD_TEST(SimdF32x4ExtractWithI32x4) {
|
WASM_SIMD_TEST(SimdF32x4ExtractWithI32x4) {
|
||||||
WasmRunner<int32_t> r(kExecuteTurbofan, lower_simd);
|
WasmRunner<int32_t> r(kExecuteTurbofan, lower_simd);
|
||||||
|
Loading…
Reference in New Issue
Block a user