diff --git a/src/codegen/ia32/assembler-ia32.cc b/src/codegen/ia32/assembler-ia32.cc index 36a9e30648..2bf778ef87 100644 --- a/src/codegen/ia32/assembler-ia32.cc +++ b/src/codegen/ia32/assembler-ia32.cc @@ -2405,6 +2405,16 @@ void Assembler::shufps(XMMRegister dst, XMMRegister src, byte imm8) { EMIT(imm8); } +void Assembler::shufpd(XMMRegister dst, XMMRegister src, byte imm8) { + DCHECK(is_uint8(imm8)); + EnsureSpace ensure_space(this); + EMIT(0x66); + EMIT(0x0F); + EMIT(0xC6); + emit_sse_operand(dst, src); + EMIT(imm8); +} + void Assembler::movdqa(Operand dst, XMMRegister src) { EnsureSpace ensure_space(this); EMIT(0x66); @@ -2818,6 +2828,13 @@ void Assembler::vpd(byte op, XMMRegister dst, XMMRegister src1, Operand src2) { vinstr(op, dst, src1, src2, k66, k0F, kWIG); } +void Assembler::vshufpd(XMMRegister dst, XMMRegister src1, Operand src2, + byte imm8) { + DCHECK(is_uint8(imm8)); + vpd(0xC6, dst, src1, src2); + EMIT(imm8); +} + void Assembler::vcmpps(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t cmp) { vps(0xC2, dst, src1, src2); diff --git a/src/codegen/ia32/assembler-ia32.h b/src/codegen/ia32/assembler-ia32.h index 9068920b8c..df69f65205 100644 --- a/src/codegen/ia32/assembler-ia32.h +++ b/src/codegen/ia32/assembler-ia32.h @@ -850,6 +850,7 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { void movups(XMMRegister dst, Operand src); void movups(Operand dst, XMMRegister src); void shufps(XMMRegister dst, XMMRegister src, byte imm8); + void shufpd(XMMRegister dst, XMMRegister src, byte imm8); void maxss(XMMRegister dst, XMMRegister src) { maxss(dst, Operand(src)); } void maxss(XMMRegister dst, Operand src); @@ -1319,12 +1320,18 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { } void vmovaps(XMMRegister dst, XMMRegister src) { vmovaps(dst, Operand(src)); } void vmovaps(XMMRegister dst, Operand src) { vps(0x28, dst, xmm0, src); } + void vmovapd(XMMRegister dst, XMMRegister src) { vmovapd(dst, Operand(src)); } + void vmovapd(XMMRegister dst, Operand src) { vpd(0x28, dst, xmm0, src); } void vmovups(XMMRegister dst, XMMRegister src) { vmovups(dst, Operand(src)); } void vmovups(XMMRegister dst, Operand src) { vps(0x10, dst, xmm0, src); } void vshufps(XMMRegister dst, XMMRegister src1, XMMRegister src2, byte imm8) { vshufps(dst, src1, Operand(src2), imm8); } void vshufps(XMMRegister dst, XMMRegister src1, Operand src2, byte imm8); + void vshufpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, byte imm8) { + vshufpd(dst, src1, Operand(src2), imm8); + } + void vshufpd(XMMRegister dst, XMMRegister src1, Operand src2, byte imm8); void vpsllw(XMMRegister dst, XMMRegister src, uint8_t imm8); void vpslld(XMMRegister dst, XMMRegister src, uint8_t imm8); diff --git a/src/compiler/backend/ia32/code-generator-ia32.cc b/src/compiler/backend/ia32/code-generator-ia32.cc index fe380f5d1e..5943c90f94 100644 --- a/src/compiler/backend/ia32/code-generator-ia32.cc +++ b/src/compiler/backend/ia32/code-generator-ia32.cc @@ -1825,6 +1825,79 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } break; } + case kSSEF64x2Splat: { + DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); + XMMRegister dst = i.OutputSimd128Register(); + __ shufpd(dst, dst, 0x0); + break; + } + case kAVXF64x2Splat: { + CpuFeatureScope avx_scope(tasm(), AVX); + XMMRegister src = i.InputDoubleRegister(0); + __ vshufpd(i.OutputSimd128Register(), src, src, 0x0); + break; + } + case kSSEF64x2ExtractLane: { + DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); + XMMRegister dst = i.OutputDoubleRegister(); + int8_t lane = i.InputInt8(1); + if (lane != 0) { + DCHECK_LT(lane, 4); + __ shufpd(dst, dst, lane); + } + break; + } + case kAVXF64x2ExtractLane: { + CpuFeatureScope avx_scope(tasm(), AVX); + XMMRegister dst = i.OutputDoubleRegister(); + XMMRegister src = i.InputSimd128Register(0); + int8_t lane = i.InputInt8(1); + if (lane == 0) { + if (dst != src) __ vmovapd(dst, src); + } else { + DCHECK_LT(lane, 4); + __ vshufpd(dst, src, src, lane); + } + break; + } + case kSSEF64x2ReplaceLane: { + DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); + CpuFeatureScope sse_scope(tasm(), SSE4_1); + XMMRegister dst = i.OutputSimd128Register(); + int8_t lane = i.InputInt8(1); + DoubleRegister rep = i.InputDoubleRegister(2); + + // insertps takes a mask which contains (high to low): + // - 2 bit specifying source float element to copy + // - 2 bit specifying destination float element to write to + // - 4 bits specifying which elements of the destination to zero + DCHECK_LT(lane, 2); + if (lane == 0) { + __ insertps(dst, rep, 0b00000000); + __ insertps(dst, rep, 0b01010000); + } else { + __ insertps(dst, rep, 0b00100000); + __ insertps(dst, rep, 0b01110000); + } + break; + } + case kAVXF64x2ReplaceLane: { + CpuFeatureScope avx_scope(tasm(), AVX); + XMMRegister dst = i.OutputSimd128Register(); + XMMRegister src = i.InputSimd128Register(0); + int8_t lane = i.InputInt8(1); + DoubleRegister rep = i.InputDoubleRegister(2); + + DCHECK_LT(lane, 2); + if (lane == 0) { + __ vinsertps(dst, src, rep, 0b00000000); + __ vinsertps(dst, src, rep, 0b01010000); + } else { + __ vinsertps(dst, src, rep, 0b10100000); + __ vinsertps(dst, src, rep, 0b11110000); + } + break; + } case kSSEF32x4Splat: { DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); XMMRegister dst = i.OutputSimd128Register(); diff --git a/src/compiler/backend/ia32/instruction-codes-ia32.h b/src/compiler/backend/ia32/instruction-codes-ia32.h index 0a22e25d30..81e73f034a 100644 --- a/src/compiler/backend/ia32/instruction-codes-ia32.h +++ b/src/compiler/backend/ia32/instruction-codes-ia32.h @@ -116,6 +116,12 @@ namespace compiler { V(IA32PushSimd128) \ V(IA32Poke) \ V(IA32Peek) \ + V(SSEF64x2Splat) \ + V(AVXF64x2Splat) \ + V(SSEF64x2ExtractLane) \ + V(AVXF64x2ExtractLane) \ + V(SSEF64x2ReplaceLane) \ + V(AVXF64x2ReplaceLane) \ V(SSEF32x4Splat) \ V(AVXF32x4Splat) \ V(SSEF32x4ExtractLane) \ diff --git a/src/compiler/backend/ia32/instruction-scheduler-ia32.cc b/src/compiler/backend/ia32/instruction-scheduler-ia32.cc index 268ce4028f..bb74cf4281 100644 --- a/src/compiler/backend/ia32/instruction-scheduler-ia32.cc +++ b/src/compiler/backend/ia32/instruction-scheduler-ia32.cc @@ -97,6 +97,12 @@ int InstructionScheduler::GetTargetInstructionFlags( case kAVXFloat32Neg: case kIA32BitcastFI: case kIA32BitcastIF: + case kSSEF64x2Splat: + case kAVXF64x2Splat: + case kSSEF64x2ExtractLane: + case kAVXF64x2ExtractLane: + case kSSEF64x2ReplaceLane: + case kAVXF64x2ReplaceLane: case kSSEF32x4Splat: case kAVXF32x4Splat: case kSSEF32x4ExtractLane: diff --git a/src/compiler/backend/ia32/instruction-selector-ia32.cc b/src/compiler/backend/ia32/instruction-selector-ia32.cc index 4395113ba2..e49451f626 100644 --- a/src/compiler/backend/ia32/instruction-selector-ia32.cc +++ b/src/compiler/backend/ia32/instruction-selector-ia32.cc @@ -2000,6 +2000,14 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) { V(I8x16ShrS) \ V(I8x16ShrU) +void InstructionSelector::VisitF64x2Splat(Node* node) { + VisitRRSimd(this, node, kAVXF64x2Splat, kSSEF64x2Splat); +} + +void InstructionSelector::VisitF64x2ExtractLane(Node* node) { + VisitRRISimd(this, node, kAVXF64x2ExtractLane, kSSEF64x2ExtractLane); +} + void InstructionSelector::VisitF32x4Splat(Node* node) { VisitRRSimd(this, node, kAVXF32x4Splat, kSSEF32x4Splat); } @@ -2091,6 +2099,28 @@ VISIT_SIMD_REPLACE_LANE(F32x4) #undef VISIT_SIMD_REPLACE_LANE #undef SIMD_INT_TYPES +// The difference between this and VISIT_SIMD_REPLACE_LANE is that this forces +// operand2 to be UseRegister, because the codegen relies on insertps using +// registers. +// TODO(v8:9764) Remove this UseRegister requirement +#define VISIT_SIMD_REPLACE_LANE_USE_REG(Type) \ + void InstructionSelector::Visit##Type##ReplaceLane(Node* node) { \ + IA32OperandGenerator g(this); \ + InstructionOperand operand0 = g.UseRegister(node->InputAt(0)); \ + InstructionOperand operand1 = \ + g.UseImmediate(OpParameter(node->op())); \ + InstructionOperand operand2 = g.UseRegister(node->InputAt(1)); \ + if (IsSupported(AVX)) { \ + Emit(kAVX##Type##ReplaceLane, g.DefineAsRegister(node), operand0, \ + operand1, operand2); \ + } else { \ + Emit(kSSE##Type##ReplaceLane, g.DefineSameAsFirst(node), operand0, \ + operand1, operand2); \ + } \ + } +VISIT_SIMD_REPLACE_LANE_USE_REG(F64x2) +#undef VISIT_SIMD_REPLACE_LANE_USE_REG + #define VISIT_SIMD_SHIFT(Opcode) \ void InstructionSelector::Visit##Opcode(Node* node) { \ VisitRROSimdShift(this, node, kAVX##Opcode, kSSE##Opcode); \ diff --git a/src/compiler/backend/instruction-selector.cc b/src/compiler/backend/instruction-selector.cc index a59e356d03..570646b5f0 100644 --- a/src/compiler/backend/instruction-selector.cc +++ b/src/compiler/backend/instruction-selector.cc @@ -2621,9 +2621,11 @@ void InstructionSelector::VisitWord64AtomicCompareExchange(Node* node) { #if !V8_TARGET_ARCH_X64 #if !V8_TARGET_ARCH_ARM64 +#if !V8_TARGET_ARCH_IA32 void InstructionSelector::VisitF64x2Splat(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2ExtractLane(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2ReplaceLane(Node* node) { UNIMPLEMENTED(); } +#endif // !V8_TARGET_ARCH_IA32 void InstructionSelector::VisitF64x2Abs(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Neg(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Sqrt(Node* node) { UNIMPLEMENTED(); } diff --git a/src/diagnostics/ia32/disasm-ia32.cc b/src/diagnostics/ia32/disasm-ia32.cc index 49947ee317..96813e2483 100644 --- a/src/diagnostics/ia32/disasm-ia32.cc +++ b/src/diagnostics/ia32/disasm-ia32.cc @@ -1216,6 +1216,13 @@ int DisassemblerIA32::AVXInstruction(byte* data) { AppendToBuffer(",%d", Imm8(current)); current++; break; + case 0xC6: + AppendToBuffer("vshufpd %s,%s,", NameOfXMMRegister(regop), + NameOfXMMRegister(vvvv)); + current += PrintRightXMMOperand(current); + AppendToBuffer(",%d", Imm8(current)); + current++; + break; #define DECLARE_SSE_AVX_DIS_CASE(instruction, notUsed1, notUsed2, opcode) \ case 0x##opcode: { \ AppendToBuffer("v" #instruction " %s,%s,", NameOfXMMRegister(regop), \ @@ -2269,6 +2276,15 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector out_buffer, data += PrintRightOperand(data); AppendToBuffer(",%d", Imm8(data)); data++; + } else if (*data == 0xC6) { + // shufpd xmm, xmm/m128, imm8 + data++; + int mod, regop, rm; + get_modrm(*data, &mod, ®op, &rm); + AppendToBuffer("shufpd %s,", NameOfXMMRegister(regop)); + data += PrintRightXMMOperand(data); + AppendToBuffer(",%d", Imm8(data)); + data++; } else if (*data == 0xE7) { data++; int mod, regop, rm; diff --git a/test/cctest/wasm/test-run-wasm-simd.cc b/test/cctest/wasm/test-run-wasm-simd.cc index 12f96de376..1e7f39a526 100644 --- a/test/cctest/wasm/test-run-wasm-simd.cc +++ b/test/cctest/wasm/test-run-wasm-simd.cc @@ -877,6 +877,7 @@ WASM_SIMD_TEST_NO_LOWERING(F32x4Qfms) { } #endif // V8_TARGET_ARCH_X64 +#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32 #if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 WASM_SIMD_TEST_NO_LOWERING(I64x2Splat) { WasmRunner r(execution_tier, lower_simd); @@ -1072,6 +1073,7 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2GeU) { RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2GeU, UnsignedGreaterEqual); } +#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 WASM_SIMD_TEST_NO_LOWERING(F64x2Splat) { WasmRunner r(execution_tier, lower_simd); @@ -1095,6 +1097,7 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Splat) { } } +#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 WASM_SIMD_TEST_NO_LOWERING(F64x2ExtractLaneWithI64x2) { WasmRunner r(execution_tier, lower_simd); BUILD(r, WASM_IF_ELSE_L( @@ -1104,6 +1107,7 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2ExtractLaneWithI64x2) { WASM_I64V(1), WASM_I64V(0))); CHECK_EQ(1, r.Call()); } +#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 WASM_SIMD_TEST_NO_LOWERING(F64x2ExtractLane) { WasmRunner r(execution_tier, lower_simd); @@ -1127,6 +1131,7 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2ExtractLane) { } } +#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 WASM_SIMD_TEST_NO_LOWERING(I64x2ExtractWithF64x2) { WasmRunner r(execution_tier, lower_simd); BUILD(r, WASM_IF_ELSE_L( @@ -1136,6 +1141,7 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2ExtractWithF64x2) { WASM_I64V(1), WASM_I64V(0))); CHECK_EQ(1, r.Call()); } +#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 WASM_SIMD_TEST_NO_LOWERING(F64x2ReplaceLane) { WasmRunner r(execution_tier, lower_simd); @@ -1156,6 +1162,7 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2ReplaceLane) { } } +#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 bool IsExtreme(double x) { double abs_x = std::fabs(x); const double kSmallFloatThreshold = 1.0e-298; @@ -1278,11 +1285,9 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Neg) { RunF64x2UnOpTest(execution_tier, lower_simd, kExprF64x2Neg, Negate); } -#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 WASM_SIMD_TEST_NO_LOWERING(F64x2Sqrt) { RunF64x2UnOpTest(execution_tier, lower_simd, kExprF64x2Sqrt, Sqrt); } -#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 void RunF64x2BinOpTest(ExecutionTier execution_tier, LowerSimd lower_simd, WasmOpcode opcode, DoubleBinOp expected_op) { @@ -1413,6 +1418,7 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2Mul) { RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2Mul, base::MulWithWraparound); } +#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 #if V8_TARGET_ARCH_X64 WASM_SIMD_TEST_NO_LOWERING(I64x2MinS) { @@ -1481,7 +1487,7 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Qfms) { } } #endif // V8_TARGET_ARCH_X64 -#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 +#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32 WASM_SIMD_TEST(I32x4Splat) { WasmRunner r(execution_tier, lower_simd);