[ia32][wasm] Add AnyTrue and AllTrue.
Complete ptest and add vptest Add Pcmpeqb, Pcmpeqw, Ptest macro Change-Id: I060aa5228d50f2dca7ed3e24324a08c04ec1a028 Reviewed-on: https://chromium-review.googlesource.com/1013236 Reviewed-by: Bill Budge <bbudge@chromium.org> Commit-Queue: Jing Bao <jing.bao@intel.com> Cr-Commit-Position: refs/heads/master@{#52687}
This commit is contained in:
parent
a0d4005c99
commit
87d8895494
@ -3108,6 +3108,40 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), i.InputInt8(1));
|
||||
break;
|
||||
}
|
||||
case kIA32S1x4AnyTrue:
|
||||
case kIA32S1x8AnyTrue:
|
||||
case kIA32S1x16AnyTrue: {
|
||||
Register dst = i.OutputRegister();
|
||||
XMMRegister src = i.InputSimd128Register(0);
|
||||
Register tmp = i.TempRegister(0);
|
||||
__ xor_(tmp, tmp);
|
||||
__ mov(dst, Immediate(-1));
|
||||
__ Ptest(src, src);
|
||||
__ cmov(zero, dst, tmp);
|
||||
break;
|
||||
}
|
||||
case kIA32S1x4AllTrue:
|
||||
case kIA32S1x8AllTrue:
|
||||
case kIA32S1x16AllTrue: {
|
||||
Register dst = i.OutputRegister();
|
||||
Operand src = i.InputOperand(0);
|
||||
Register tmp = i.TempRegister(0);
|
||||
__ mov(tmp, Immediate(-1));
|
||||
__ xor_(dst, dst);
|
||||
// Compare all src lanes to false.
|
||||
__ Pxor(kScratchDoubleReg, kScratchDoubleReg);
|
||||
if (arch_opcode == kIA32S1x4AllTrue) {
|
||||
__ Pcmpeqd(kScratchDoubleReg, src);
|
||||
} else if (arch_opcode == kIA32S1x8AllTrue) {
|
||||
__ Pcmpeqw(kScratchDoubleReg, src);
|
||||
} else {
|
||||
__ Pcmpeqb(kScratchDoubleReg, src);
|
||||
}
|
||||
// If kScratchDoubleReg is all zero, none of src lanes are false.
|
||||
__ Ptest(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ cmov(zero, dst, tmp);
|
||||
break;
|
||||
}
|
||||
case kIA32StackCheck: {
|
||||
ExternalReference const stack_limit =
|
||||
ExternalReference::address_of_stack_limit(__ isolate());
|
||||
|
@ -293,7 +293,13 @@ namespace compiler {
|
||||
V(SSES128Select) \
|
||||
V(AVXS128Select) \
|
||||
V(IA32S8x16Shuffle) \
|
||||
V(IA32S32x4Swizzle)
|
||||
V(IA32S32x4Swizzle) \
|
||||
V(IA32S1x4AnyTrue) \
|
||||
V(IA32S1x4AllTrue) \
|
||||
V(IA32S1x8AnyTrue) \
|
||||
V(IA32S1x8AllTrue) \
|
||||
V(IA32S1x16AnyTrue) \
|
||||
V(IA32S1x16AllTrue)
|
||||
|
||||
// Addressing modes represent the "shape" of inputs to an instruction.
|
||||
// Many instructions support multiple addressing modes. Addressing modes
|
||||
|
@ -276,6 +276,12 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kAVXS128Select:
|
||||
case kIA32S8x16Shuffle:
|
||||
case kIA32S32x4Swizzle:
|
||||
case kIA32S1x4AnyTrue:
|
||||
case kIA32S1x4AllTrue:
|
||||
case kIA32S1x8AnyTrue:
|
||||
case kIA32S1x8AllTrue:
|
||||
case kIA32S1x16AnyTrue:
|
||||
case kIA32S1x16AllTrue:
|
||||
return (instr->addressing_mode() == kMode_None)
|
||||
? kNoOpcodeFlags
|
||||
: kIsLoadOperation | kHasSideEffect;
|
||||
|
@ -1808,6 +1808,16 @@ VISIT_ATOMIC_BINOP(Xor)
|
||||
V(F32x4Neg) \
|
||||
V(S128Not)
|
||||
|
||||
#define SIMD_ANYTRUE_LIST(V) \
|
||||
V(S1x4AnyTrue) \
|
||||
V(S1x8AnyTrue) \
|
||||
V(S1x16AnyTrue)
|
||||
|
||||
#define SIMD_ALLTRUE_LIST(V) \
|
||||
V(S1x4AllTrue) \
|
||||
V(S1x8AllTrue) \
|
||||
V(S1x16AllTrue)
|
||||
|
||||
#define SIMD_SHIFT_OPCODES(V) \
|
||||
V(I32x4Shl) \
|
||||
V(I32x4ShrS) \
|
||||
@ -1967,6 +1977,28 @@ SIMD_UNOP_PREFIX_LIST(VISIT_SIMD_UNOP_PREFIX)
|
||||
#undef VISIT_SIMD_UNOP_PREFIX
|
||||
#undef SIMD_UNOP_PREFIX_LIST
|
||||
|
||||
#define VISIT_SIMD_ANYTRUE(Opcode) \
|
||||
void InstructionSelector::Visit##Opcode(Node* node) { \
|
||||
IA32OperandGenerator g(this); \
|
||||
InstructionOperand temps[] = {g.TempRegister()}; \
|
||||
Emit(kIA32##Opcode, g.DefineAsRegister(node), \
|
||||
g.UseRegister(node->InputAt(0)), arraysize(temps), temps); \
|
||||
}
|
||||
SIMD_ANYTRUE_LIST(VISIT_SIMD_ANYTRUE)
|
||||
#undef VISIT_SIMD_ANYTRUE
|
||||
#undef SIMD_ANYTRUE_LIST
|
||||
|
||||
#define VISIT_SIMD_ALLTRUE(Opcode) \
|
||||
void InstructionSelector::Visit##Opcode(Node* node) { \
|
||||
IA32OperandGenerator g(this); \
|
||||
InstructionOperand temps[] = {g.TempRegister()}; \
|
||||
Emit(kIA32##Opcode, g.DefineAsRegister(node), g.Use(node->InputAt(0)), \
|
||||
arraysize(temps), temps); \
|
||||
}
|
||||
SIMD_ALLTRUE_LIST(VISIT_SIMD_ALLTRUE)
|
||||
#undef VISIT_SIMD_ALLTRUE
|
||||
#undef SIMD_ALLTRUE_LIST
|
||||
|
||||
#define VISIT_SIMD_BINOP(Opcode) \
|
||||
void InstructionSelector::Visit##Opcode(Node* node) { \
|
||||
VisitRROFloat(this, node, kAVX##Opcode, kSSE##Opcode); \
|
||||
|
@ -2422,11 +2422,7 @@ void InstructionSelector::VisitI8x16ShrU(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitI8x16Mul(Node* node) { UNIMPLEMENTED(); }
|
||||
|
||||
void InstructionSelector::VisitS8x16Shuffle(Node* node) { UNIMPLEMENTED(); }
|
||||
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
|
||||
// && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
|
||||
|
||||
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
|
||||
!V8_TARGET_ARCH_MIPS64
|
||||
void InstructionSelector::VisitS1x4AnyTrue(Node* node) { UNIMPLEMENTED(); }
|
||||
|
||||
void InstructionSelector::VisitS1x4AllTrue(Node* node) { UNIMPLEMENTED(); }
|
||||
@ -2439,7 +2435,7 @@ void InstructionSelector::VisitS1x16AnyTrue(Node* node) { UNIMPLEMENTED(); }
|
||||
|
||||
void InstructionSelector::VisitS1x16AllTrue(Node* node) { UNIMPLEMENTED(); }
|
||||
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
|
||||
// && !V8_TARGET_ARCH_MIPS64
|
||||
// && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
|
||||
|
||||
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
|
||||
|
||||
|
@ -2605,7 +2605,7 @@ void Assembler::extractps(Register dst, XMMRegister src, byte imm8) {
|
||||
EMIT(imm8);
|
||||
}
|
||||
|
||||
void Assembler::ptest(XMMRegister dst, XMMRegister src) {
|
||||
void Assembler::ptest(XMMRegister dst, Operand src) {
|
||||
DCHECK(IsEnabled(SSE4_1));
|
||||
EnsureSpace ensure_space(this);
|
||||
EMIT(0x66);
|
||||
|
@ -1109,7 +1109,8 @@ class Assembler : public AssemblerBase {
|
||||
void movss(XMMRegister dst, XMMRegister src) { movss(dst, Operand(src)); }
|
||||
void extractps(Register dst, XMMRegister src, byte imm8);
|
||||
|
||||
void ptest(XMMRegister dst, XMMRegister src);
|
||||
void ptest(XMMRegister dst, XMMRegister src) { ptest(dst, Operand(src)); }
|
||||
void ptest(XMMRegister dst, Operand src);
|
||||
|
||||
void psllw(XMMRegister reg, int8_t shift);
|
||||
void pslld(XMMRegister reg, int8_t shift);
|
||||
@ -1428,6 +1429,10 @@ class Assembler : public AssemblerBase {
|
||||
}
|
||||
void vshufps(XMMRegister dst, XMMRegister src1, Operand src2, byte imm8);
|
||||
|
||||
void vptest(XMMRegister dst, XMMRegister src) { vptest(dst, Operand(src)); }
|
||||
void vptest(XMMRegister dst, Operand src) {
|
||||
vinstr(0x17, dst, xmm0, src, k66, k0F38, kWIG);
|
||||
}
|
||||
void vpsllw(XMMRegister dst, XMMRegister src, int8_t imm8);
|
||||
void vpslld(XMMRegister dst, XMMRegister src, int8_t imm8);
|
||||
void vpsrlw(XMMRegister dst, XMMRegister src, int8_t imm8);
|
||||
|
@ -735,6 +735,10 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
|
||||
int mod, regop, rm, vvvv = vex_vreg();
|
||||
get_modrm(*current, &mod, ®op, &rm);
|
||||
switch (opcode) {
|
||||
case 0x17:
|
||||
AppendToBuffer("vptest %s,", NameOfXMMRegister(regop));
|
||||
current += PrintRightXMMOperand(current);
|
||||
break;
|
||||
case 0x99:
|
||||
AppendToBuffer("vfmadd132s%c %s,%s,", float_size_code(),
|
||||
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
|
||||
@ -1931,9 +1935,8 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
|
||||
get_modrm(*data, &mod, ®op, &rm);
|
||||
switch (op) {
|
||||
case 0x17:
|
||||
AppendToBuffer("ptest %s,%s", NameOfXMMRegister(regop),
|
||||
NameOfXMMRegister(rm));
|
||||
data++;
|
||||
AppendToBuffer("ptest %s,", NameOfXMMRegister(regop));
|
||||
data += PrintRightXMMOperand(data);
|
||||
break;
|
||||
#define SSE34_DIS_CASE(instruction, notUsed1, notUsed2, notUsed3, opcode) \
|
||||
case 0x##opcode: { \
|
||||
|
@ -1323,6 +1323,20 @@ void TurboAssembler::Psignd(XMMRegister dst, Operand src) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
void TurboAssembler::Ptest(XMMRegister dst, Operand src) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vptest(dst, src);
|
||||
return;
|
||||
}
|
||||
if (CpuFeatures::IsSupported(SSE4_1)) {
|
||||
CpuFeatureScope sse_scope(this, SSE4_1);
|
||||
ptest(dst, src);
|
||||
return;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
void TurboAssembler::Pshufb(XMMRegister dst, Operand src) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
|
@ -250,6 +250,8 @@ class TurboAssembler : public Assembler {
|
||||
AVX_OP3_WITH_TYPE(macro_name, name, XMMRegister, XMMRegister) \
|
||||
AVX_OP3_WITH_TYPE(macro_name, name, XMMRegister, Operand)
|
||||
|
||||
AVX_OP3_XO(Pcmpeqb, pcmpeqb)
|
||||
AVX_OP3_XO(Pcmpeqw, pcmpeqw)
|
||||
AVX_OP3_XO(Pcmpeqd, pcmpeqd)
|
||||
AVX_OP3_XO(Psubb, psubb)
|
||||
AVX_OP3_XO(Psubw, psubw)
|
||||
@ -266,6 +268,9 @@ class TurboAssembler : public Assembler {
|
||||
#undef AVX_OP3_WITH_TYPE
|
||||
|
||||
// Non-SSE2 instructions.
|
||||
void Ptest(XMMRegister dst, XMMRegister src) { Ptest(dst, Operand(src)); }
|
||||
void Ptest(XMMRegister dst, Operand src);
|
||||
|
||||
void Pshufb(XMMRegister dst, XMMRegister src) { Pshufb(dst, Operand(src)); }
|
||||
void Pshufb(XMMRegister dst, Operand src);
|
||||
|
||||
|
@ -568,6 +568,8 @@ TEST(DisasmIa320) {
|
||||
__ pinsrd(xmm1, eax, 0);
|
||||
__ pinsrd(xmm1, Operand(edx, 4), 0);
|
||||
__ extractps(eax, xmm1, 0);
|
||||
__ ptest(xmm5, xmm1);
|
||||
__ ptest(xmm5, Operand(edx, 4));
|
||||
|
||||
SSE4_INSTRUCTION_LIST(EMIT_SSE34_INSTR)
|
||||
}
|
||||
@ -660,6 +662,9 @@ TEST(DisasmIa320) {
|
||||
__ vmaxpd(xmm0, xmm1, xmm2);
|
||||
__ vmaxpd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
|
||||
|
||||
__ vptest(xmm5, xmm1);
|
||||
__ vptest(xmm5, Operand(edx, 4));
|
||||
|
||||
__ vpsllw(xmm0, xmm7, 21);
|
||||
__ vpslld(xmm0, xmm7, 21);
|
||||
__ vpsrlw(xmm0, xmm7, 21);
|
||||
|
@ -1868,11 +1868,7 @@ WASM_SIMD_TEST(S8x16Concat) {
|
||||
RunBinaryLaneOpTest(lower_simd, kExprS8x16Shuffle, expected);
|
||||
}
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
|
||||
// V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
|
||||
|
||||
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
|
||||
V8_TARGET_ARCH_MIPS64
|
||||
// Boolean unary operations are 'AllTrue' and 'AnyTrue', which return an integer
|
||||
// result. Use relational ops on numeric vectors to create the boolean vector
|
||||
// test inputs. Test inputs with all true, all false, one true, and one false.
|
||||
@ -1960,7 +1956,7 @@ WASM_SIMD_TEST(SimdI32x4ExtractWithF32x4) {
|
||||
CHECK_EQ(1, r.Call());
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
|
||||
// V8_TARGET_ARCH_MIPS64
|
||||
// V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
|
||||
|
||||
WASM_SIMD_TEST(SimdF32x4ExtractWithI32x4) {
|
||||
WasmRunner<int32_t> r(kExecuteTurbofan, lower_simd);
|
||||
|
Loading…
Reference in New Issue
Block a user