[x64] Wasm SIMD x64 Conversion, AllTrue/AnyTrue operations
This CL mirrors the ia32 SIMD conversion, Alltrue/AnyTrue operations with minor cleanliness changes to use TempRegisters instead of ScratchRegisters Change-Id: I84d3e148200dd611a72380b24404b75c73c5352d Reviewed-on: https://chromium-review.googlesource.com/1174096 Commit-Queue: Deepti Gandluri <gdeepti@chromium.org> Reviewed-by: Bill Budge <bbudge@chromium.org> Cr-Commit-Position: refs/heads/master@{#56297}
This commit is contained in:
parent
02a6727bb8
commit
caf93c9f6f
@ -2427,18 +2427,6 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
|
||||
}
|
||||
#endif // !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM
|
||||
|
||||
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
|
||||
!V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
|
||||
void InstructionSelector::VisitF32x4SConvertI32x4(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitF32x4UConvertI32x4(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
|
||||
// && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
|
||||
|
||||
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS64
|
||||
void InstructionSelector::VisitWord64AtomicLoad(Node* node) { UNIMPLEMENTED(); }
|
||||
|
||||
@ -2467,61 +2455,6 @@ void InstructionSelector::VisitWord64AtomicCompareExchange(Node* node) {
|
||||
|
||||
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
|
||||
!V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
|
||||
void InstructionSelector::VisitI32x4SConvertF32x4(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI32x4UConvertF32x4(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI32x4SConvertI16x8Low(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI32x4SConvertI16x8High(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI32x4UConvertI16x8Low(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI32x4UConvertI16x8High(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI16x8SConvertI8x16Low(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI16x8SConvertI8x16High(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI16x8UConvertI8x16Low(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI16x8UConvertI8x16High(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI16x8SConvertI32x4(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
void InstructionSelector::VisitI16x8UConvertI32x4(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI8x16SConvertI16x8(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI8x16UConvertI16x8(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI8x16Shl(Node* node) { UNIMPLEMENTED(); }
|
||||
|
||||
void InstructionSelector::VisitI8x16ShrS(Node* node) { UNIMPLEMENTED(); }
|
||||
@ -2531,18 +2464,6 @@ void InstructionSelector::VisitI8x16ShrU(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitI8x16Mul(Node* node) { UNIMPLEMENTED(); }
|
||||
|
||||
void InstructionSelector::VisitS8x16Shuffle(Node* node) { UNIMPLEMENTED(); }
|
||||
|
||||
void InstructionSelector::VisitS1x4AnyTrue(Node* node) { UNIMPLEMENTED(); }
|
||||
|
||||
void InstructionSelector::VisitS1x4AllTrue(Node* node) { UNIMPLEMENTED(); }
|
||||
|
||||
void InstructionSelector::VisitS1x8AnyTrue(Node* node) { UNIMPLEMENTED(); }
|
||||
|
||||
void InstructionSelector::VisitS1x8AllTrue(Node* node) { UNIMPLEMENTED(); }
|
||||
|
||||
void InstructionSelector::VisitS1x16AnyTrue(Node* node) { UNIMPLEMENTED(); }
|
||||
|
||||
void InstructionSelector::VisitS1x16AllTrue(Node* node) { UNIMPLEMENTED(); }
|
||||
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
|
||||
// && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
|
||||
|
||||
|
@ -2137,6 +2137,25 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ insertps(i.OutputSimd128Register(), i.InputDoubleRegister(2), select);
|
||||
break;
|
||||
}
|
||||
case kX64F32x4SConvertI32x4: {
|
||||
__ cvtdq2ps(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
break;
|
||||
}
|
||||
case kX64F32x4UConvertI32x4: {
|
||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
DCHECK_NE(i.OutputSimd128Register(), kScratchDoubleReg);
|
||||
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
__ pxor(kScratchDoubleReg, kScratchDoubleReg); // zeros
|
||||
__ pblendw(kScratchDoubleReg, dst, 0x55); // get lo 16 bits
|
||||
__ psubd(dst, kScratchDoubleReg); // get hi 16 bits
|
||||
__ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly
|
||||
__ psrld(dst, 1); // divide by 2 to get in unsigned range
|
||||
__ cvtdq2ps(dst, dst); // convert hi exactly
|
||||
__ addps(dst, dst); // double hi, exactly
|
||||
__ addps(dst, kScratchDoubleReg); // add hi and lo, may round.
|
||||
break;
|
||||
}
|
||||
case kX64F32x4Abs: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
XMMRegister src = i.InputSimd128Register(0);
|
||||
@ -2245,6 +2264,36 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kX64I32x4SConvertF32x4: {
|
||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
// NAN->0
|
||||
__ movaps(kScratchDoubleReg, dst);
|
||||
__ cmpeqps(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ pand(dst, kScratchDoubleReg);
|
||||
// Set top bit if >= 0 (but not -0.0!)
|
||||
__ pxor(kScratchDoubleReg, dst);
|
||||
// Convert
|
||||
__ cvttps2dq(dst, dst);
|
||||
// Set top bit if >=0 is now < 0
|
||||
__ pand(kScratchDoubleReg, dst);
|
||||
__ psrad(kScratchDoubleReg, 31);
|
||||
// Set positive overflow lanes to 0x7FFFFFFF
|
||||
__ pxor(dst, kScratchDoubleReg);
|
||||
break;
|
||||
}
|
||||
case kX64I32x4SConvertI16x8Low: {
|
||||
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
||||
__ pmovsxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
break;
|
||||
}
|
||||
case kX64I32x4SConvertI16x8High: {
|
||||
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
__ palignr(dst, i.InputSimd128Register(0), 8);
|
||||
__ pmovsxwd(dst, dst);
|
||||
break;
|
||||
}
|
||||
case kX64I32x4Neg: {
|
||||
CpuFeatureScope sse_scope(tasm(), SSSE3);
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
@ -2316,6 +2365,46 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ pcmpeqd(dst, src);
|
||||
break;
|
||||
}
|
||||
case kX64I32x4UConvertF32x4: {
|
||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
|
||||
// NAN->0, negative->0
|
||||
__ pxor(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ maxps(dst, kScratchDoubleReg);
|
||||
// scratch: float representation of max_signed
|
||||
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ psrld(kScratchDoubleReg, 1); // 0x7fffffff
|
||||
__ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // 0x4f000000
|
||||
// tmp: convert (src-max_signed).
|
||||
// Positive overflow lanes -> 0x7FFFFFFF
|
||||
// Negative lanes -> 0
|
||||
__ movaps(tmp, dst);
|
||||
__ subps(tmp, kScratchDoubleReg);
|
||||
__ cmpleps(kScratchDoubleReg, tmp);
|
||||
__ cvttps2dq(tmp, tmp);
|
||||
__ pxor(tmp, kScratchDoubleReg);
|
||||
__ pxor(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ pmaxsd(tmp, kScratchDoubleReg);
|
||||
// convert. Overflow lanes above max_signed will be 0x80000000
|
||||
__ cvttps2dq(dst, dst);
|
||||
// Add (src-max_signed) for overflow lanes.
|
||||
__ paddd(dst, tmp);
|
||||
break;
|
||||
}
|
||||
case kX64I32x4UConvertI16x8Low: {
|
||||
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
||||
__ pmovzxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
break;
|
||||
}
|
||||
case kX64I32x4UConvertI16x8High: {
|
||||
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
__ palignr(dst, i.InputSimd128Register(0), 8);
|
||||
__ pmovzxwd(dst, dst);
|
||||
break;
|
||||
}
|
||||
case kX64I32x4ShrU: {
|
||||
__ psrld(i.OutputSimd128Register(), i.InputInt8(1));
|
||||
break;
|
||||
@ -2377,6 +2466,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kX64I16x8SConvertI8x16Low: {
|
||||
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
||||
__ pmovsxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
break;
|
||||
}
|
||||
case kX64I16x8SConvertI8x16High: {
|
||||
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
__ palignr(dst, i.InputSimd128Register(0), 8);
|
||||
__ pmovsxbw(dst, dst);
|
||||
break;
|
||||
}
|
||||
case kX64I16x8Neg: {
|
||||
CpuFeatureScope sse_scope(tasm(), SSSE3);
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
@ -2398,6 +2499,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ psraw(i.OutputSimd128Register(), i.InputInt8(1));
|
||||
break;
|
||||
}
|
||||
case kX64I16x8SConvertI32x4: {
|
||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
__ packssdw(i.OutputSimd128Register(), i.InputSimd128Register(1));
|
||||
break;
|
||||
}
|
||||
case kX64I16x8Add: {
|
||||
__ paddw(i.OutputSimd128Register(), i.InputSimd128Register(1));
|
||||
break;
|
||||
@ -2456,10 +2562,34 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ pcmpeqw(dst, src);
|
||||
break;
|
||||
}
|
||||
case kX64I16x8UConvertI8x16Low: {
|
||||
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
||||
__ pmovzxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
break;
|
||||
}
|
||||
case kX64I16x8UConvertI8x16High: {
|
||||
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
__ palignr(dst, i.InputSimd128Register(0), 8);
|
||||
__ pmovzxbw(dst, dst);
|
||||
break;
|
||||
}
|
||||
case kX64I16x8ShrU: {
|
||||
__ psrlw(i.OutputSimd128Register(), i.InputInt8(1));
|
||||
break;
|
||||
}
|
||||
case kX64I16x8UConvertI32x4: {
|
||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
// Change negative lanes to 0x7FFFFFFF
|
||||
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ psrld(kScratchDoubleReg, 1);
|
||||
__ pminud(dst, kScratchDoubleReg);
|
||||
__ pminud(kScratchDoubleReg, i.InputSimd128Register(1));
|
||||
__ packusdw(dst, kScratchDoubleReg);
|
||||
break;
|
||||
}
|
||||
case kX64I16x8AddSaturateU: {
|
||||
__ paddusw(i.OutputSimd128Register(), i.InputSimd128Register(1));
|
||||
break;
|
||||
@ -2521,6 +2651,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kX64I8x16SConvertI16x8: {
|
||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
__ packsswb(i.OutputSimd128Register(), i.InputSimd128Register(1));
|
||||
break;
|
||||
}
|
||||
case kX64I8x16Neg: {
|
||||
CpuFeatureScope sse_scope(tasm(), SSSE3);
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
@ -2582,6 +2717,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ pcmpeqb(dst, src);
|
||||
break;
|
||||
}
|
||||
case kX64I8x16UConvertI16x8: {
|
||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
// Change negative lanes to 0x7FFF
|
||||
__ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ psrlw(kScratchDoubleReg, 1);
|
||||
__ pminuw(dst, kScratchDoubleReg);
|
||||
__ pminuw(kScratchDoubleReg, i.InputSimd128Register(1));
|
||||
__ packuswb(dst, kScratchDoubleReg);
|
||||
break;
|
||||
}
|
||||
case kX64I8x16AddSaturateU: {
|
||||
__ paddusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
|
||||
break;
|
||||
@ -2653,6 +2800,42 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ xorps(dst, i.InputSimd128Register(2));
|
||||
break;
|
||||
}
|
||||
case kX64S1x4AnyTrue:
|
||||
case kX64S1x8AnyTrue:
|
||||
case kX64S1x16AnyTrue: {
|
||||
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
||||
Register dst = i.OutputRegister();
|
||||
XMMRegister src = i.InputSimd128Register(0);
|
||||
Register tmp = i.TempRegister(0);
|
||||
__ xorq(tmp, tmp);
|
||||
__ movq(dst, Immediate(-1));
|
||||
__ ptest(src, src);
|
||||
__ cmovq(zero, dst, tmp);
|
||||
break;
|
||||
}
|
||||
case kX64S1x4AllTrue:
|
||||
case kX64S1x8AllTrue:
|
||||
case kX64S1x16AllTrue: {
|
||||
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
||||
Register dst = i.OutputRegister();
|
||||
XMMRegister src = i.InputSimd128Register(0);
|
||||
Register tmp = i.TempRegister(0);
|
||||
__ movq(tmp, Immediate(-1));
|
||||
__ xorq(dst, dst);
|
||||
// Compare all src lanes to false.
|
||||
__ pxor(kScratchDoubleReg, kScratchDoubleReg);
|
||||
if (arch_opcode == kX64S1x4AllTrue) {
|
||||
__ pcmpeqd(kScratchDoubleReg, src);
|
||||
} else if (arch_opcode == kX64S1x8AllTrue) {
|
||||
__ pcmpeqw(kScratchDoubleReg, src);
|
||||
} else {
|
||||
__ pcmpeqb(kScratchDoubleReg, src);
|
||||
}
|
||||
// If kScratchDoubleReg is all zero, none of src lanes are false.
|
||||
__ ptest(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ cmovq(zero, dst, tmp);
|
||||
break;
|
||||
}
|
||||
case kX64StackCheck:
|
||||
__ CompareRoot(rsp, RootIndex::kStackLimit);
|
||||
break;
|
||||
|
@ -151,6 +151,8 @@ namespace compiler {
|
||||
V(X64F32x4Splat) \
|
||||
V(X64F32x4ExtractLane) \
|
||||
V(X64F32x4ReplaceLane) \
|
||||
V(X64F32x4SConvertI32x4) \
|
||||
V(X64F32x4UConvertI32x4) \
|
||||
V(X64F32x4Abs) \
|
||||
V(X64F32x4Neg) \
|
||||
V(X64F32x4RecipApprox) \
|
||||
@ -168,6 +170,9 @@ namespace compiler {
|
||||
V(X64I32x4Splat) \
|
||||
V(X64I32x4ExtractLane) \
|
||||
V(X64I32x4ReplaceLane) \
|
||||
V(X64I32x4SConvertF32x4) \
|
||||
V(X64I32x4SConvertI16x8Low) \
|
||||
V(X64I32x4SConvertI16x8High) \
|
||||
V(X64I32x4Neg) \
|
||||
V(X64I32x4Shl) \
|
||||
V(X64I32x4ShrS) \
|
||||
@ -181,6 +186,9 @@ namespace compiler {
|
||||
V(X64I32x4Ne) \
|
||||
V(X64I32x4GtS) \
|
||||
V(X64I32x4GeS) \
|
||||
V(X64I32x4UConvertF32x4) \
|
||||
V(X64I32x4UConvertI16x8Low) \
|
||||
V(X64I32x4UConvertI16x8High) \
|
||||
V(X64I32x4ShrU) \
|
||||
V(X64I32x4MinU) \
|
||||
V(X64I32x4MaxU) \
|
||||
@ -189,9 +197,12 @@ namespace compiler {
|
||||
V(X64I16x8Splat) \
|
||||
V(X64I16x8ExtractLane) \
|
||||
V(X64I16x8ReplaceLane) \
|
||||
V(X64I16x8SConvertI8x16Low) \
|
||||
V(X64I16x8SConvertI8x16High) \
|
||||
V(X64I16x8Neg) \
|
||||
V(X64I16x8Shl) \
|
||||
V(X64I16x8ShrS) \
|
||||
V(X64I16x8SConvertI32x4) \
|
||||
V(X64I16x8Add) \
|
||||
V(X64I16x8AddSaturateS) \
|
||||
V(X64I16x8AddHoriz) \
|
||||
@ -204,7 +215,10 @@ namespace compiler {
|
||||
V(X64I16x8Ne) \
|
||||
V(X64I16x8GtS) \
|
||||
V(X64I16x8GeS) \
|
||||
V(X64I16x8UConvertI8x16Low) \
|
||||
V(X64I16x8UConvertI8x16High) \
|
||||
V(X64I16x8ShrU) \
|
||||
V(X64I16x8UConvertI32x4) \
|
||||
V(X64I16x8AddSaturateU) \
|
||||
V(X64I16x8SubSaturateU) \
|
||||
V(X64I16x8MinU) \
|
||||
@ -214,6 +228,7 @@ namespace compiler {
|
||||
V(X64I8x16Splat) \
|
||||
V(X64I8x16ExtractLane) \
|
||||
V(X64I8x16ReplaceLane) \
|
||||
V(X64I8x16SConvertI16x8) \
|
||||
V(X64I8x16Neg) \
|
||||
V(X64I8x16Add) \
|
||||
V(X64I8x16AddSaturateS) \
|
||||
@ -225,6 +240,7 @@ namespace compiler {
|
||||
V(X64I8x16Ne) \
|
||||
V(X64I8x16GtS) \
|
||||
V(X64I8x16GeS) \
|
||||
V(X64I8x16UConvertI16x8) \
|
||||
V(X64I8x16AddSaturateU) \
|
||||
V(X64I8x16SubSaturateU) \
|
||||
V(X64I8x16MinU) \
|
||||
@ -237,6 +253,12 @@ namespace compiler {
|
||||
V(X64S128Not) \
|
||||
V(X64S128Select) \
|
||||
V(X64S128Zero) \
|
||||
V(X64S1x4AnyTrue) \
|
||||
V(X64S1x4AllTrue) \
|
||||
V(X64S1x8AnyTrue) \
|
||||
V(X64S1x8AllTrue) \
|
||||
V(X64S1x16AnyTrue) \
|
||||
V(X64S1x16AllTrue) \
|
||||
V(X64Word64AtomicLoadUint8) \
|
||||
V(X64Word64AtomicLoadUint16) \
|
||||
V(X64Word64AtomicLoadUint32) \
|
||||
|
@ -128,6 +128,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kX64F32x4Splat:
|
||||
case kX64F32x4ExtractLane:
|
||||
case kX64F32x4ReplaceLane:
|
||||
case kX64F32x4SConvertI32x4:
|
||||
case kX64F32x4UConvertI32x4:
|
||||
case kX64F32x4RecipApprox:
|
||||
case kX64F32x4RecipSqrtApprox:
|
||||
case kX64F32x4Abs:
|
||||
@ -145,6 +147,9 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kX64I32x4Splat:
|
||||
case kX64I32x4ExtractLane:
|
||||
case kX64I32x4ReplaceLane:
|
||||
case kX64I32x4SConvertF32x4:
|
||||
case kX64I32x4SConvertI16x8Low:
|
||||
case kX64I32x4SConvertI16x8High:
|
||||
case kX64I32x4Neg:
|
||||
case kX64I32x4Shl:
|
||||
case kX64I32x4ShrS:
|
||||
@ -158,6 +163,9 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kX64I32x4Ne:
|
||||
case kX64I32x4GtS:
|
||||
case kX64I32x4GeS:
|
||||
case kX64I32x4UConvertF32x4:
|
||||
case kX64I32x4UConvertI16x8Low:
|
||||
case kX64I32x4UConvertI16x8High:
|
||||
case kX64I32x4ShrU:
|
||||
case kX64I32x4MinU:
|
||||
case kX64I32x4MaxU:
|
||||
@ -166,9 +174,12 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kX64I16x8Splat:
|
||||
case kX64I16x8ExtractLane:
|
||||
case kX64I16x8ReplaceLane:
|
||||
case kX64I16x8SConvertI8x16Low:
|
||||
case kX64I16x8SConvertI8x16High:
|
||||
case kX64I16x8Neg:
|
||||
case kX64I16x8Shl:
|
||||
case kX64I16x8ShrS:
|
||||
case kX64I16x8SConvertI32x4:
|
||||
case kX64I16x8Add:
|
||||
case kX64I16x8AddSaturateS:
|
||||
case kX64I16x8AddHoriz:
|
||||
@ -181,6 +192,9 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kX64I16x8Ne:
|
||||
case kX64I16x8GtS:
|
||||
case kX64I16x8GeS:
|
||||
case kX64I16x8UConvertI8x16Low:
|
||||
case kX64I16x8UConvertI8x16High:
|
||||
case kX64I16x8UConvertI32x4:
|
||||
case kX64I16x8ShrU:
|
||||
case kX64I16x8AddSaturateU:
|
||||
case kX64I16x8SubSaturateU:
|
||||
@ -191,6 +205,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kX64I8x16Splat:
|
||||
case kX64I8x16ExtractLane:
|
||||
case kX64I8x16ReplaceLane:
|
||||
case kX64I8x16SConvertI16x8:
|
||||
case kX64I8x16Neg:
|
||||
case kX64I8x16Add:
|
||||
case kX64I8x16AddSaturateS:
|
||||
@ -202,6 +217,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kX64I8x16Ne:
|
||||
case kX64I8x16GtS:
|
||||
case kX64I8x16GeS:
|
||||
case kX64I8x16UConvertI16x8:
|
||||
case kX64I8x16AddSaturateU:
|
||||
case kX64I8x16SubSaturateU:
|
||||
case kX64I8x16MinU:
|
||||
@ -214,6 +230,12 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kX64S128Not:
|
||||
case kX64S128Select:
|
||||
case kX64S128Zero:
|
||||
case kX64S1x4AnyTrue:
|
||||
case kX64S1x4AllTrue:
|
||||
case kX64S1x8AnyTrue:
|
||||
case kX64S1x8AllTrue:
|
||||
case kX64S1x16AnyTrue:
|
||||
case kX64S1x16AllTrue:
|
||||
return (instr->addressing_mode() == kMode_None)
|
||||
? kNoOpcodeFlags
|
||||
: kIsLoadOperation | kHasSideEffect;
|
||||
|
@ -2496,6 +2496,7 @@ VISIT_ATOMIC_BINOP(Xor)
|
||||
V(I32x4MaxU) \
|
||||
V(I32x4GtU) \
|
||||
V(I32x4GeU) \
|
||||
V(I16x8SConvertI32x4) \
|
||||
V(I16x8Add) \
|
||||
V(I16x8AddSaturateS) \
|
||||
V(I16x8AddHoriz) \
|
||||
@ -2514,6 +2515,7 @@ VISIT_ATOMIC_BINOP(Xor)
|
||||
V(I16x8MaxU) \
|
||||
V(I16x8GtU) \
|
||||
V(I16x8GeU) \
|
||||
V(I8x16SConvertI16x8) \
|
||||
V(I8x16Add) \
|
||||
V(I8x16AddSaturateS) \
|
||||
V(I8x16Sub) \
|
||||
@ -2534,14 +2536,23 @@ VISIT_ATOMIC_BINOP(Xor)
|
||||
V(S128Or) \
|
||||
V(S128Xor)
|
||||
|
||||
#define SIMD_UNOP_LIST(V) \
|
||||
V(F32x4Abs) \
|
||||
V(F32x4Neg) \
|
||||
V(F32x4RecipApprox) \
|
||||
V(F32x4RecipSqrtApprox) \
|
||||
V(I32x4Neg) \
|
||||
V(I16x8Neg) \
|
||||
V(I8x16Neg) \
|
||||
#define SIMD_UNOP_LIST(V) \
|
||||
V(F32x4SConvertI32x4) \
|
||||
V(F32x4Abs) \
|
||||
V(F32x4Neg) \
|
||||
V(F32x4RecipApprox) \
|
||||
V(F32x4RecipSqrtApprox) \
|
||||
V(I32x4SConvertI16x8Low) \
|
||||
V(I32x4SConvertI16x8High) \
|
||||
V(I32x4Neg) \
|
||||
V(I32x4UConvertI16x8Low) \
|
||||
V(I32x4UConvertI16x8High) \
|
||||
V(I16x8SConvertI8x16Low) \
|
||||
V(I16x8SConvertI8x16High) \
|
||||
V(I16x8Neg) \
|
||||
V(I16x8UConvertI8x16Low) \
|
||||
V(I16x8UConvertI8x16High) \
|
||||
V(I8x16Neg) \
|
||||
V(S128Not)
|
||||
|
||||
#define SIMD_SHIFT_OPCODES(V) \
|
||||
@ -2552,6 +2563,16 @@ VISIT_ATOMIC_BINOP(Xor)
|
||||
V(I16x8ShrS) \
|
||||
V(I16x8ShrU)
|
||||
|
||||
#define SIMD_ANYTRUE_LIST(V) \
|
||||
V(S1x4AnyTrue) \
|
||||
V(S1x8AnyTrue) \
|
||||
V(S1x16AnyTrue)
|
||||
|
||||
#define SIMD_ALLTRUE_LIST(V) \
|
||||
V(S1x4AllTrue) \
|
||||
V(S1x8AllTrue) \
|
||||
V(S1x16AllTrue)
|
||||
|
||||
void InstructionSelector::VisitS128Zero(Node* node) {
|
||||
X64OperandGenerator g(this);
|
||||
Emit(kX64S128Zero, g.DefineAsRegister(node), g.DefineAsRegister(node));
|
||||
@ -2596,6 +2617,7 @@ SIMD_TYPES(VISIT_SIMD_REPLACE_LANE)
|
||||
}
|
||||
SIMD_SHIFT_OPCODES(VISIT_SIMD_SHIFT)
|
||||
#undef VISIT_SIMD_SHIFT
|
||||
#undef SIMD_SHIFT_OPCODES
|
||||
|
||||
#define VISIT_SIMD_UNOP(Opcode) \
|
||||
void InstructionSelector::Visit##Opcode(Node* node) { \
|
||||
@ -2605,6 +2627,7 @@ SIMD_SHIFT_OPCODES(VISIT_SIMD_SHIFT)
|
||||
}
|
||||
SIMD_UNOP_LIST(VISIT_SIMD_UNOP)
|
||||
#undef VISIT_SIMD_UNOP
|
||||
#undef SIMD_UNOP_LIST
|
||||
|
||||
#define VISIT_SIMD_BINOP(Opcode) \
|
||||
void InstructionSelector::Visit##Opcode(Node* node) { \
|
||||
@ -2614,10 +2637,30 @@ SIMD_UNOP_LIST(VISIT_SIMD_UNOP)
|
||||
}
|
||||
SIMD_BINOP_LIST(VISIT_SIMD_BINOP)
|
||||
#undef VISIT_SIMD_BINOP
|
||||
#undef SIMD_TYPES
|
||||
#undef SIMD_BINOP_LIST
|
||||
#undef SIMD_UNOP_LIST
|
||||
#undef SIMD_SHIFT_OPCODES
|
||||
|
||||
#define VISIT_SIMD_ANYTRUE(Opcode) \
|
||||
void InstructionSelector::Visit##Opcode(Node* node) { \
|
||||
X64OperandGenerator g(this); \
|
||||
InstructionOperand temps[] = {g.TempRegister()}; \
|
||||
Emit(kX64##Opcode, g.DefineAsRegister(node), \
|
||||
g.UseUniqueRegister(node->InputAt(0)), arraysize(temps), temps); \
|
||||
}
|
||||
SIMD_ANYTRUE_LIST(VISIT_SIMD_ANYTRUE)
|
||||
#undef VISIT_SIMD_ANYTRUE
|
||||
#undef SIMD_ANYTRUE_LIST
|
||||
|
||||
#define VISIT_SIMD_ALLTRUE(Opcode) \
|
||||
void InstructionSelector::Visit##Opcode(Node* node) { \
|
||||
X64OperandGenerator g(this); \
|
||||
InstructionOperand temps[] = {g.TempRegister()}; \
|
||||
Emit(kX64##Opcode, g.DefineAsRegister(node), \
|
||||
g.UseUniqueRegister(node->InputAt(0)), arraysize(temps), temps); \
|
||||
}
|
||||
SIMD_ALLTRUE_LIST(VISIT_SIMD_ALLTRUE)
|
||||
#undef VISIT_SIMD_ALLTRUE
|
||||
#undef SIMD_ALLTRUE_LIST
|
||||
#undef SIMD_TYPES
|
||||
|
||||
void InstructionSelector::VisitS128Select(Node* node) {
|
||||
X64OperandGenerator g(this);
|
||||
@ -2626,6 +2669,36 @@ void InstructionSelector::VisitS128Select(Node* node) {
|
||||
g.UseRegister(node->InputAt(2)));
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitF32x4UConvertI32x4(Node* node) {
|
||||
X64OperandGenerator g(this);
|
||||
Emit(kX64F32x4UConvertI32x4, g.DefineSameAsFirst(node),
|
||||
g.UseRegister(node->InputAt(0)));
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI32x4SConvertF32x4(Node* node) {
|
||||
X64OperandGenerator g(this);
|
||||
Emit(kX64I32x4SConvertF32x4, g.DefineSameAsFirst(node),
|
||||
g.UseRegister(node->InputAt(0)));
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI32x4UConvertF32x4(Node* node) {
|
||||
X64OperandGenerator g(this);
|
||||
Emit(kX64I32x4UConvertF32x4, g.DefineSameAsFirst(node),
|
||||
g.UseRegister(node->InputAt(0)));
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI16x8UConvertI32x4(Node* node) {
|
||||
X64OperandGenerator g(this);
|
||||
Emit(kX64I16x8UConvertI32x4, g.DefineSameAsFirst(node),
|
||||
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI8x16UConvertI16x8(Node* node) {
|
||||
X64OperandGenerator g(this);
|
||||
Emit(kX64I8x16UConvertI16x8, g.DefineSameAsFirst(node),
|
||||
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitInt32AbsWithOverflow(Node* node) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
@ -83,7 +83,10 @@ void CpuFeatures::ProbeImpl(bool cross_compile) {
|
||||
// Only use statically determined features for cross compile (snapshot).
|
||||
if (cross_compile) return;
|
||||
|
||||
if (cpu.has_sse41() && FLAG_enable_sse4_1) supported_ |= 1u << SSE4_1;
|
||||
if (cpu.has_sse41() && FLAG_enable_sse4_1) {
|
||||
supported_ |= 1u << SSE4_1;
|
||||
supported_ |= 1u << SSSE3;
|
||||
}
|
||||
if (cpu.has_ssse3() && FLAG_enable_ssse3) supported_ |= 1u << SSSE3;
|
||||
if (cpu.has_sse3() && FLAG_enable_sse3) supported_ |= 1u << SSE3;
|
||||
// SAHF is not generally available in long mode.
|
||||
@ -458,6 +461,9 @@ Assembler::Assembler(const AssemblerOptions& options, void* buffer,
|
||||
|
||||
ReserveCodeTargetSpace(100);
|
||||
reloc_info_writer.Reposition(buffer_ + buffer_size_, pc_);
|
||||
if (CpuFeatures::IsSupported(SSE4_1)) {
|
||||
EnableCpuFeature(SSSE3);
|
||||
}
|
||||
}
|
||||
|
||||
void Assembler::GetCode(Isolate* isolate, CodeDesc* desc) {
|
||||
|
@ -1080,8 +1080,6 @@ WASM_SIMD_TEST(I32x4ShrU) {
|
||||
LogicalShiftRight);
|
||||
}
|
||||
|
||||
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
|
||||
V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
|
||||
// Tests both signed and unsigned conversion from I8x16 (unpacking).
|
||||
WASM_SIMD_TEST(I16x8ConvertI8x16) {
|
||||
WasmRunner<int32_t, int32_t, int32_t, int32_t, int32_t> r(execution_tier,
|
||||
@ -1124,8 +1122,6 @@ WASM_SIMD_TEST(I16x8ConvertI8x16) {
|
||||
CHECK_EQ(1, r.Call(*i, unpacked_signed, unpacked_unsigned, 0));
|
||||
}
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
|
||||
// V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
|
||||
|
||||
void RunI16x8UnOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
|
||||
WasmOpcode simd_op, Int16UnOp expected_op) {
|
||||
@ -1144,8 +1140,6 @@ WASM_SIMD_TEST(I16x8Neg) {
|
||||
RunI16x8UnOpTest(execution_tier, lower_simd, kExprI16x8Neg, Negate);
|
||||
}
|
||||
|
||||
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
|
||||
V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
|
||||
// Tests both signed and unsigned conversion from I32x4 (packing).
|
||||
WASM_SIMD_TEST(I16x8ConvertI32x4) {
|
||||
WasmRunner<int32_t, int32_t, int32_t, int32_t, int32_t, int32_t, int32_t> r(
|
||||
@ -1190,8 +1184,6 @@ WASM_SIMD_TEST(I16x8ConvertI32x4) {
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
|
||||
// V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
|
||||
|
||||
void RunI16x8BinOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
|
||||
WasmOpcode simd_op, Int16BinOp expected_op) {
|
||||
@ -1374,8 +1366,6 @@ WASM_SIMD_TEST(I8x16Neg) {
|
||||
RunI8x16UnOpTest(execution_tier, lower_simd, kExprI8x16Neg, Negate);
|
||||
}
|
||||
|
||||
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
|
||||
V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
|
||||
// Tests both signed and unsigned conversion from I16x8 (packing).
|
||||
WASM_SIMD_TEST(I8x16ConvertI16x8) {
|
||||
WasmRunner<int32_t, int32_t, int32_t, int32_t, int32_t, int32_t, int32_t> r(
|
||||
@ -1422,8 +1412,6 @@ WASM_SIMD_TEST(I8x16ConvertI16x8) {
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
|
||||
// V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
|
||||
|
||||
void RunI8x16BinOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
|
||||
WasmOpcode simd_op, Int8BinOp expected_op) {
|
||||
@ -2012,6 +2000,8 @@ WASM_SIMD_COMPILED_TEST(S8x16MultiShuffleFuzz) {
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
|
||||
// V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
|
||||
|
||||
// Boolean unary operations are 'AllTrue' and 'AnyTrue', which return an integer
|
||||
// result. Use relational ops on numeric vectors to create the boolean vector
|
||||
@ -2099,8 +2089,6 @@ WASM_SIMD_TEST(SimdI32x4ExtractWithF32x4) {
|
||||
WASM_I32V(1), WASM_I32V(0)));
|
||||
CHECK_EQ(1, r.Call());
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
|
||||
// V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
|
||||
|
||||
WASM_SIMD_TEST(SimdF32x4ExtractWithI32x4) {
|
||||
WasmRunner<int32_t> r(execution_tier, lower_simd);
|
||||
|
Loading…
Reference in New Issue
Block a user