[ia32][wasm] Add AnyTrue and AllTrue.

Complete ptest and add vptest
Add Pcmpeqb, Pcmpeqw, Ptest macro

Change-Id: I060aa5228d50f2dca7ed3e24324a08c04ec1a028
Reviewed-on: https://chromium-review.googlesource.com/1013236
Reviewed-by: Bill Budge <bbudge@chromium.org>
Commit-Queue: Jing Bao <jing.bao@intel.com>
Cr-Commit-Position: refs/heads/master@{#52687}
This commit is contained in:
jing.bao 2018-04-19 13:29:29 +08:00 committed by Commit Bot
parent a0d4005c99
commit 87d8895494
12 changed files with 118 additions and 16 deletions

View File

@ -3108,6 +3108,40 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), i.InputInt8(1));
break;
}
case kIA32S1x4AnyTrue:
case kIA32S1x8AnyTrue:
case kIA32S1x16AnyTrue: {
Register dst = i.OutputRegister();
XMMRegister src = i.InputSimd128Register(0);
Register tmp = i.TempRegister(0);
__ xor_(tmp, tmp);
__ mov(dst, Immediate(-1));
__ Ptest(src, src);
__ cmov(zero, dst, tmp);
break;
}
case kIA32S1x4AllTrue:
case kIA32S1x8AllTrue:
case kIA32S1x16AllTrue: {
Register dst = i.OutputRegister();
Operand src = i.InputOperand(0);
Register tmp = i.TempRegister(0);
__ mov(tmp, Immediate(-1));
__ xor_(dst, dst);
// Compare all src lanes to false.
__ Pxor(kScratchDoubleReg, kScratchDoubleReg);
if (arch_opcode == kIA32S1x4AllTrue) {
__ Pcmpeqd(kScratchDoubleReg, src);
} else if (arch_opcode == kIA32S1x8AllTrue) {
__ Pcmpeqw(kScratchDoubleReg, src);
} else {
__ Pcmpeqb(kScratchDoubleReg, src);
}
// If kScratchDoubleReg is all zero, none of src lanes are false.
__ Ptest(kScratchDoubleReg, kScratchDoubleReg);
__ cmov(zero, dst, tmp);
break;
}
case kIA32StackCheck: {
ExternalReference const stack_limit =
ExternalReference::address_of_stack_limit(__ isolate());

View File

@ -293,7 +293,13 @@ namespace compiler {
V(SSES128Select) \
V(AVXS128Select) \
V(IA32S8x16Shuffle) \
V(IA32S32x4Swizzle)
V(IA32S32x4Swizzle) \
V(IA32S1x4AnyTrue) \
V(IA32S1x4AllTrue) \
V(IA32S1x8AnyTrue) \
V(IA32S1x8AllTrue) \
V(IA32S1x16AnyTrue) \
V(IA32S1x16AllTrue)
// Addressing modes represent the "shape" of inputs to an instruction.
// Many instructions support multiple addressing modes. Addressing modes

View File

@ -276,6 +276,12 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXS128Select:
case kIA32S8x16Shuffle:
case kIA32S32x4Swizzle:
case kIA32S1x4AnyTrue:
case kIA32S1x4AllTrue:
case kIA32S1x8AnyTrue:
case kIA32S1x8AllTrue:
case kIA32S1x16AnyTrue:
case kIA32S1x16AllTrue:
return (instr->addressing_mode() == kMode_None)
? kNoOpcodeFlags
: kIsLoadOperation | kHasSideEffect;

View File

@ -1808,6 +1808,16 @@ VISIT_ATOMIC_BINOP(Xor)
V(F32x4Neg) \
V(S128Not)
#define SIMD_ANYTRUE_LIST(V) \
V(S1x4AnyTrue) \
V(S1x8AnyTrue) \
V(S1x16AnyTrue)
#define SIMD_ALLTRUE_LIST(V) \
V(S1x4AllTrue) \
V(S1x8AllTrue) \
V(S1x16AllTrue)
#define SIMD_SHIFT_OPCODES(V) \
V(I32x4Shl) \
V(I32x4ShrS) \
@ -1967,6 +1977,28 @@ SIMD_UNOP_PREFIX_LIST(VISIT_SIMD_UNOP_PREFIX)
#undef VISIT_SIMD_UNOP_PREFIX
#undef SIMD_UNOP_PREFIX_LIST
#define VISIT_SIMD_ANYTRUE(Opcode) \
void InstructionSelector::Visit##Opcode(Node* node) { \
IA32OperandGenerator g(this); \
InstructionOperand temps[] = {g.TempRegister()}; \
Emit(kIA32##Opcode, g.DefineAsRegister(node), \
g.UseRegister(node->InputAt(0)), arraysize(temps), temps); \
}
SIMD_ANYTRUE_LIST(VISIT_SIMD_ANYTRUE)
#undef VISIT_SIMD_ANYTRUE
#undef SIMD_ANYTRUE_LIST
#define VISIT_SIMD_ALLTRUE(Opcode) \
void InstructionSelector::Visit##Opcode(Node* node) { \
IA32OperandGenerator g(this); \
InstructionOperand temps[] = {g.TempRegister()}; \
Emit(kIA32##Opcode, g.DefineAsRegister(node), g.Use(node->InputAt(0)), \
arraysize(temps), temps); \
}
SIMD_ALLTRUE_LIST(VISIT_SIMD_ALLTRUE)
#undef VISIT_SIMD_ALLTRUE
#undef SIMD_ALLTRUE_LIST
#define VISIT_SIMD_BINOP(Opcode) \
void InstructionSelector::Visit##Opcode(Node* node) { \
VisitRROFloat(this, node, kAVX##Opcode, kSSE##Opcode); \

View File

@ -2422,11 +2422,7 @@ void InstructionSelector::VisitI8x16ShrU(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI8x16Mul(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS8x16Shuffle(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64
void InstructionSelector::VisitS1x4AnyTrue(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS1x4AllTrue(Node* node) { UNIMPLEMENTED(); }
@ -2439,7 +2435,7 @@ void InstructionSelector::VisitS1x16AnyTrue(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS1x16AllTrue(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64
// && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }

View File

@ -2605,7 +2605,7 @@ void Assembler::extractps(Register dst, XMMRegister src, byte imm8) {
EMIT(imm8);
}
void Assembler::ptest(XMMRegister dst, XMMRegister src) {
void Assembler::ptest(XMMRegister dst, Operand src) {
DCHECK(IsEnabled(SSE4_1));
EnsureSpace ensure_space(this);
EMIT(0x66);

View File

@ -1109,7 +1109,8 @@ class Assembler : public AssemblerBase {
void movss(XMMRegister dst, XMMRegister src) { movss(dst, Operand(src)); }
void extractps(Register dst, XMMRegister src, byte imm8);
void ptest(XMMRegister dst, XMMRegister src);
void ptest(XMMRegister dst, XMMRegister src) { ptest(dst, Operand(src)); }
void ptest(XMMRegister dst, Operand src);
void psllw(XMMRegister reg, int8_t shift);
void pslld(XMMRegister reg, int8_t shift);
@ -1428,6 +1429,10 @@ class Assembler : public AssemblerBase {
}
void vshufps(XMMRegister dst, XMMRegister src1, Operand src2, byte imm8);
void vptest(XMMRegister dst, XMMRegister src) { vptest(dst, Operand(src)); }
void vptest(XMMRegister dst, Operand src) {
vinstr(0x17, dst, xmm0, src, k66, k0F38, kWIG);
}
void vpsllw(XMMRegister dst, XMMRegister src, int8_t imm8);
void vpslld(XMMRegister dst, XMMRegister src, int8_t imm8);
void vpsrlw(XMMRegister dst, XMMRegister src, int8_t imm8);

View File

@ -735,6 +735,10 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
int mod, regop, rm, vvvv = vex_vreg();
get_modrm(*current, &mod, &regop, &rm);
switch (opcode) {
case 0x17:
AppendToBuffer("vptest %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
break;
case 0x99:
AppendToBuffer("vfmadd132s%c %s,%s,", float_size_code(),
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
@ -1931,9 +1935,8 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
get_modrm(*data, &mod, &regop, &rm);
switch (op) {
case 0x17:
AppendToBuffer("ptest %s,%s", NameOfXMMRegister(regop),
NameOfXMMRegister(rm));
data++;
AppendToBuffer("ptest %s,", NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data);
break;
#define SSE34_DIS_CASE(instruction, notUsed1, notUsed2, notUsed3, opcode) \
case 0x##opcode: { \

View File

@ -1323,6 +1323,20 @@ void TurboAssembler::Psignd(XMMRegister dst, Operand src) {
UNREACHABLE();
}
void TurboAssembler::Ptest(XMMRegister dst, Operand src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vptest(dst, src);
return;
}
if (CpuFeatures::IsSupported(SSE4_1)) {
CpuFeatureScope sse_scope(this, SSE4_1);
ptest(dst, src);
return;
}
UNREACHABLE();
}
void TurboAssembler::Pshufb(XMMRegister dst, Operand src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);

View File

@ -250,6 +250,8 @@ class TurboAssembler : public Assembler {
AVX_OP3_WITH_TYPE(macro_name, name, XMMRegister, XMMRegister) \
AVX_OP3_WITH_TYPE(macro_name, name, XMMRegister, Operand)
AVX_OP3_XO(Pcmpeqb, pcmpeqb)
AVX_OP3_XO(Pcmpeqw, pcmpeqw)
AVX_OP3_XO(Pcmpeqd, pcmpeqd)
AVX_OP3_XO(Psubb, psubb)
AVX_OP3_XO(Psubw, psubw)
@ -266,6 +268,9 @@ class TurboAssembler : public Assembler {
#undef AVX_OP3_WITH_TYPE
// Non-SSE2 instructions.
void Ptest(XMMRegister dst, XMMRegister src) { Ptest(dst, Operand(src)); }
void Ptest(XMMRegister dst, Operand src);
void Pshufb(XMMRegister dst, XMMRegister src) { Pshufb(dst, Operand(src)); }
void Pshufb(XMMRegister dst, Operand src);

View File

@ -568,6 +568,8 @@ TEST(DisasmIa320) {
__ pinsrd(xmm1, eax, 0);
__ pinsrd(xmm1, Operand(edx, 4), 0);
__ extractps(eax, xmm1, 0);
__ ptest(xmm5, xmm1);
__ ptest(xmm5, Operand(edx, 4));
SSE4_INSTRUCTION_LIST(EMIT_SSE34_INSTR)
}
@ -660,6 +662,9 @@ TEST(DisasmIa320) {
__ vmaxpd(xmm0, xmm1, xmm2);
__ vmaxpd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vptest(xmm5, xmm1);
__ vptest(xmm5, Operand(edx, 4));
__ vpsllw(xmm0, xmm7, 21);
__ vpslld(xmm0, xmm7, 21);
__ vpsrlw(xmm0, xmm7, 21);

View File

@ -1868,11 +1868,7 @@ WASM_SIMD_TEST(S8x16Concat) {
RunBinaryLaneOpTest(lower_simd, kExprS8x16Shuffle, expected);
}
}
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
// V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
V8_TARGET_ARCH_MIPS64
// Boolean unary operations are 'AllTrue' and 'AnyTrue', which return an integer
// result. Use relational ops on numeric vectors to create the boolean vector
// test inputs. Test inputs with all true, all false, one true, and one false.
@ -1960,7 +1956,7 @@ WASM_SIMD_TEST(SimdI32x4ExtractWithF32x4) {
CHECK_EQ(1, r.Call());
}
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
// V8_TARGET_ARCH_MIPS64
// V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
WASM_SIMD_TEST(SimdF32x4ExtractWithI32x4) {
WasmRunner<int32_t> r(kExecuteTurbofan, lower_simd);