From df54d51148b4914b838a485d58d8892ca8bab2c1 Mon Sep 17 00:00:00 2001 From: Ng Zhi An Date: Wed, 26 Jun 2019 10:31:38 -0700 Subject: [PATCH] [wasm simd] Implement I64x2Splat on x64 Bug: v8:8460 Change-Id: Id159c81cd2d25924be96e49c64073e154ef32e6a Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1667867 Reviewed-by: Bill Budge Reviewed-by: Deepti Gandluri Reviewed-by: Michael Starzinger Auto-Submit: Zhi An Ng Commit-Queue: Zhi An Ng Cr-Commit-Position: refs/heads/master@{#62475} --- src/codegen/x64/assembler-x64.cc | 14 +++++++++++++ src/codegen/x64/assembler-x64.h | 1 + src/compiler/backend/instruction-selector.cc | 6 ++++++ .../backend/x64/code-generator-x64.cc | 10 +++++++++ .../backend/x64/instruction-codes-x64.h | 1 + .../backend/x64/instruction-scheduler-x64.cc | 1 + .../backend/x64/instruction-selector-x64.cc | 1 + src/compiler/machine-operator.cc | 1 + src/compiler/machine-operator.h | 1 + src/compiler/opcodes.h | 1 + src/compiler/simd-scalar-lowering.cc | 14 ++++++++++++- src/compiler/simd-scalar-lowering.h | 8 ++++++- src/compiler/wasm-compiler.cc | 2 ++ src/wasm/wasm-interpreter.cc | 1 + src/wasm/wasm-opcodes.cc | 2 ++ src/wasm/wasm-opcodes.h | 2 ++ src/wasm/wasm-value.h | 1 + test/cctest/wasm/test-run-wasm-simd.cc | 21 +++++++++++++++++++ 18 files changed, 86 insertions(+), 2 deletions(-) diff --git a/src/codegen/x64/assembler-x64.cc b/src/codegen/x64/assembler-x64.cc index 3236b0f52c..6d42c18e67 100644 --- a/src/codegen/x64/assembler-x64.cc +++ b/src/codegen/x64/assembler-x64.cc @@ -2883,6 +2883,18 @@ void Assembler::movd(Register dst, XMMRegister src) { } void Assembler::movq(XMMRegister dst, Register src) { + // Mixing AVX and non-AVX is expensive, catch those cases + DCHECK(!IsEnabled(AVX)); + EnsureSpace ensure_space(this); + emit(0x66); + emit_rex_64(dst, src); + emit(0x0F); + emit(0x6E); + emit_sse_operand(dst, src); +} + +void Assembler::movq(XMMRegister dst, Operand src) { + // Mixing AVX and non-AVX is expensive, catch those cases DCHECK(!IsEnabled(AVX)); EnsureSpace ensure_space(this); emit(0x66); @@ -2893,6 +2905,7 @@ void Assembler::movq(XMMRegister dst, Register src) { } void Assembler::movq(Register dst, XMMRegister src) { + // Mixing AVX and non-AVX is expensive, catch those cases DCHECK(!IsEnabled(AVX)); EnsureSpace ensure_space(this); emit(0x66); @@ -2903,6 +2916,7 @@ void Assembler::movq(Register dst, XMMRegister src) { } void Assembler::movq(XMMRegister dst, XMMRegister src) { + // Mixing AVX and non-AVX is expensive, catch those cases DCHECK(!IsEnabled(AVX)); EnsureSpace ensure_space(this); if (dst.low_bits() == 4) { diff --git a/src/codegen/x64/assembler-x64.h b/src/codegen/x64/assembler-x64.h index dc6acb67f4..0ec9a13046 100644 --- a/src/codegen/x64/assembler-x64.h +++ b/src/codegen/x64/assembler-x64.h @@ -969,6 +969,7 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { void movd(XMMRegister dst, Operand src); void movd(Register dst, XMMRegister src); void movq(XMMRegister dst, Register src); + void movq(XMMRegister dst, Operand src); void movq(Register dst, XMMRegister src); void movq(XMMRegister dst, XMMRegister src); diff --git a/src/compiler/backend/instruction-selector.cc b/src/compiler/backend/instruction-selector.cc index d7fa926854..a8868ec6e8 100644 --- a/src/compiler/backend/instruction-selector.cc +++ b/src/compiler/backend/instruction-selector.cc @@ -1849,6 +1849,8 @@ void InstructionSelector::VisitNode(Node* node) { return MarkAsSimd128(node), VisitF32x4Lt(node); case IrOpcode::kF32x4Le: return MarkAsSimd128(node), VisitF32x4Le(node); + case IrOpcode::kI64x2Splat: + return MarkAsSimd128(node), VisitI64x2Splat(node); case IrOpcode::kI32x4Splat: return MarkAsSimd128(node), VisitI32x4Splat(node); case IrOpcode::kI32x4ExtractLane: @@ -2492,6 +2494,10 @@ void InstructionSelector::VisitWord64AtomicCompareExchange(Node* node) { #endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_PPC // !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_S390 +#if !V8_TARGET_ARCH_X64 +void InstructionSelector::VisitI64x2Splat(Node* node) { UNIMPLEMENTED(); } +#endif // !V8_TARGET_ARCH_X64 + void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); } void InstructionSelector::VisitParameter(Node* node) { diff --git a/src/compiler/backend/x64/code-generator-x64.cc b/src/compiler/backend/x64/code-generator-x64.cc index 04ccf7d391..74034c3a13 100644 --- a/src/compiler/backend/x64/code-generator-x64.cc +++ b/src/compiler/backend/x64/code-generator-x64.cc @@ -2400,6 +2400,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ cmpleps(i.OutputSimd128Register(), i.InputSimd128Register(1)); break; } + case kX64I64x2Splat: { + XMMRegister dst = i.OutputSimd128Register(); + if (instr->InputAt(0)->IsRegister()) { + __ movq(dst, i.InputRegister(0)); + } else { + __ movq(dst, i.InputOperand(0)); + } + __ pshufd(dst, dst, 0x44); + break; + } case kX64I32x4Splat: { XMMRegister dst = i.OutputSimd128Register(); if (instr->InputAt(0)->IsRegister()) { diff --git a/src/compiler/backend/x64/instruction-codes-x64.h b/src/compiler/backend/x64/instruction-codes-x64.h index 57ef26dbd7..bf92cef51a 100644 --- a/src/compiler/backend/x64/instruction-codes-x64.h +++ b/src/compiler/backend/x64/instruction-codes-x64.h @@ -177,6 +177,7 @@ namespace compiler { V(X64F32x4Ne) \ V(X64F32x4Lt) \ V(X64F32x4Le) \ + V(X64I64x2Splat) \ V(X64I32x4Splat) \ V(X64I32x4ExtractLane) \ V(X64I32x4ReplaceLane) \ diff --git a/src/compiler/backend/x64/instruction-scheduler-x64.cc b/src/compiler/backend/x64/instruction-scheduler-x64.cc index 9d48e9175a..5670fa3b67 100644 --- a/src/compiler/backend/x64/instruction-scheduler-x64.cc +++ b/src/compiler/backend/x64/instruction-scheduler-x64.cc @@ -143,6 +143,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kX64F32x4Ne: case kX64F32x4Lt: case kX64F32x4Le: + case kX64I64x2Splat: case kX64I32x4Splat: case kX64I32x4ExtractLane: case kX64I32x4ReplaceLane: diff --git a/src/compiler/backend/x64/instruction-selector-x64.cc b/src/compiler/backend/x64/instruction-selector-x64.cc index 8e6336f576..ead00b6f71 100644 --- a/src/compiler/backend/x64/instruction-selector-x64.cc +++ b/src/compiler/backend/x64/instruction-selector-x64.cc @@ -2668,6 +2668,7 @@ void InstructionSelector::VisitS128Zero(Node* node) { g.Use(node->InputAt(0))); \ } SIMD_TYPES(VISIT_SIMD_SPLAT) +VISIT_SIMD_SPLAT(I64x2) #undef VISIT_SIMD_SPLAT #define VISIT_SIMD_EXTRACT_LANE(Type) \ diff --git a/src/compiler/machine-operator.cc b/src/compiler/machine-operator.cc index 2fc8c9a542..58b251e69a 100644 --- a/src/compiler/machine-operator.cc +++ b/src/compiler/machine-operator.cc @@ -262,6 +262,7 @@ MachineType AtomicOpType(Operator const* op) { V(F32x4Ne, Operator::kCommutative, 2, 0, 1) \ V(F32x4Lt, Operator::kNoProperties, 2, 0, 1) \ V(F32x4Le, Operator::kNoProperties, 2, 0, 1) \ + V(I64x2Splat, Operator::kNoProperties, 1, 0, 1) \ V(I32x4Splat, Operator::kNoProperties, 1, 0, 1) \ V(I32x4SConvertF32x4, Operator::kNoProperties, 1, 0, 1) \ V(I32x4SConvertI16x8Low, Operator::kNoProperties, 1, 0, 1) \ diff --git a/src/compiler/machine-operator.h b/src/compiler/machine-operator.h index ceab67decd..b843b6fa41 100644 --- a/src/compiler/machine-operator.h +++ b/src/compiler/machine-operator.h @@ -489,6 +489,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final const Operator* F32x4Lt(); const Operator* F32x4Le(); + const Operator* I64x2Splat(); const Operator* I32x4Splat(); const Operator* I32x4ExtractLane(int32_t); const Operator* I32x4ReplaceLane(int32_t); diff --git a/src/compiler/opcodes.h b/src/compiler/opcodes.h index ccc69f7ef7..74869f4f71 100644 --- a/src/compiler/opcodes.h +++ b/src/compiler/opcodes.h @@ -745,6 +745,7 @@ V(F32x4Le) \ V(F32x4Gt) \ V(F32x4Ge) \ + V(I64x2Splat) \ V(I32x4Splat) \ V(I32x4ExtractLane) \ V(I32x4ReplaceLane) \ diff --git a/src/compiler/simd-scalar-lowering.cc b/src/compiler/simd-scalar-lowering.cc index 862f363e63..3db19a6a6d 100644 --- a/src/compiler/simd-scalar-lowering.cc +++ b/src/compiler/simd-scalar-lowering.cc @@ -16,6 +16,7 @@ namespace internal { namespace compiler { namespace { +static const int kNumLanes64 = 2; static const int kNumLanes32 = 4; static const int kNumLanes16 = 8; static const int kNumLanes8 = 16; @@ -76,6 +77,8 @@ void SimdScalarLowering::LowerGraph() { } } +#define FOREACH_INT64X2_OPCODE(V) V(I64x2Splat) + #define FOREACH_INT32X4_OPCODE(V) \ V(I32x4Splat) \ V(I32x4ExtractLane) \ @@ -208,6 +211,8 @@ void SimdScalarLowering::LowerGraph() { MachineType SimdScalarLowering::MachineTypeFrom(SimdType simdType) { switch (simdType) { + case SimdType::kInt64x2: + return MachineType::Int64(); case SimdType::kFloat32x4: return MachineType::Float32(); case SimdType::kInt32x4: @@ -223,6 +228,10 @@ MachineType SimdScalarLowering::MachineTypeFrom(SimdType simdType) { void SimdScalarLowering::SetLoweredType(Node* node, Node* output) { switch (node->opcode()) { #define CASE_STMT(name) case IrOpcode::k##name: + FOREACH_INT64X2_OPCODE(CASE_STMT) { + replacements_[node->id()].type = SimdType::kInt64x2; + break; + } FOREACH_INT32X4_OPCODE(CASE_STMT) case IrOpcode::kReturn: case IrOpcode::kParameter: @@ -326,7 +335,9 @@ static int GetReturnCountAfterLoweringSimd128( int SimdScalarLowering::NumLanes(SimdType type) { int num_lanes = 0; - if (type == SimdType::kFloat32x4 || type == SimdType::kInt32x4) { + if (type == SimdType::kInt64x2) { + num_lanes = kNumLanes64; + } else if (type == SimdType::kFloat32x4 || type == SimdType::kInt32x4) { num_lanes = kNumLanes32; } else if (type == SimdType::kInt16x8) { num_lanes = kNumLanes16; @@ -1223,6 +1234,7 @@ void SimdScalarLowering::LowerNode(Node* node) { LowerUnaryOp(node, SimdType::kInt32x4, machine()->RoundUint32ToFloat32()); break; } + case IrOpcode::kI64x2Splat: case IrOpcode::kI32x4Splat: case IrOpcode::kF32x4Splat: case IrOpcode::kI16x8Splat: diff --git a/src/compiler/simd-scalar-lowering.h b/src/compiler/simd-scalar-lowering.h index 01ea195bdc..6de48ebccf 100644 --- a/src/compiler/simd-scalar-lowering.h +++ b/src/compiler/simd-scalar-lowering.h @@ -32,7 +32,13 @@ class SimdScalarLowering { private: enum class State : uint8_t { kUnvisited, kOnStack, kVisited }; - enum class SimdType : uint8_t { kFloat32x4, kInt32x4, kInt16x8, kInt8x16 }; + enum class SimdType : uint8_t { + kInt64x2, + kFloat32x4, + kInt32x4, + kInt16x8, + kInt8x16 + }; #if defined(V8_TARGET_BIG_ENDIAN) static constexpr int kLaneOffsets[16] = {15, 14, 13, 12, 11, 10, 9, 8, diff --git a/src/compiler/wasm-compiler.cc b/src/compiler/wasm-compiler.cc index 0b212c9fb1..027f2b928e 100644 --- a/src/compiler/wasm-compiler.cc +++ b/src/compiler/wasm-compiler.cc @@ -4055,6 +4055,8 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) { case wasm::kExprF32x4Ge: return graph()->NewNode(mcgraph()->machine()->F32x4Le(), inputs[1], inputs[0]); + case wasm::kExprI64x2Splat: + return graph()->NewNode(mcgraph()->machine()->I64x2Splat(), inputs[0]); case wasm::kExprI32x4Splat: return graph()->NewNode(mcgraph()->machine()->I32x4Splat(), inputs[0]); case wasm::kExprI32x4SConvertF32x4: diff --git a/src/wasm/wasm-interpreter.cc b/src/wasm/wasm-interpreter.cc index 35f769bb28..c6ec2c77ea 100644 --- a/src/wasm/wasm-interpreter.cc +++ b/src/wasm/wasm-interpreter.cc @@ -2134,6 +2134,7 @@ class ThreadImpl { Push(WasmValue(Simd128(s))); \ return true; \ } + SPLAT_CASE(I64x2, int2, int64_t, 2) SPLAT_CASE(I32x4, int4, int32_t, 4) SPLAT_CASE(F32x4, float4, float, 4) SPLAT_CASE(I16x8, int8, int32_t, 8) diff --git a/src/wasm/wasm-opcodes.cc b/src/wasm/wasm-opcodes.cc index 88b9e90381..3b55561a9b 100644 --- a/src/wasm/wasm-opcodes.cc +++ b/src/wasm/wasm-opcodes.cc @@ -25,6 +25,7 @@ namespace wasm { #define CASE_REF_OP(name, str) CASE_OP(Ref##name, "ref." str) #define CASE_F32x4_OP(name, str) CASE_OP(F32x4##name, "f32x4." str) #define CASE_I32x4_OP(name, str) CASE_OP(I32x4##name, "i32x4." str) +#define CASE_I64x2_OP(name, str) CASE_OP(I64x2##name, "i64x2." str) #define CASE_I16x8_OP(name, str) CASE_OP(I16x8##name, "i16x8." str) #define CASE_I8x16_OP(name, str) CASE_OP(I8x16##name, "i8x16." str) #define CASE_S128_OP(name, str) CASE_OP(S128##name, "s128." str) @@ -252,6 +253,7 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) { CASE_SIGN_OP(SIMDI, Ge, "ge") CASE_SIGN_OP(SIMDI, Shr, "shr") CASE_SIMDI_OP(Shl, "shl") + CASE_I64x2_OP(Splat, "splat") CASE_I32x4_OP(AddHoriz, "add_horizontal") CASE_I16x8_OP(AddHoriz, "add_horizontal") CASE_SIGN_OP(I16x8, AddSaturate, "add_saturate") diff --git a/src/wasm/wasm-opcodes.h b/src/wasm/wasm-opcodes.h index 3f58b72636..ccbe6b3810 100644 --- a/src/wasm/wasm-opcodes.h +++ b/src/wasm/wasm-opcodes.h @@ -272,6 +272,7 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, bool hasBigIntFeature); V(I8x16Splat, 0xfd04, s_i) \ V(I16x8Splat, 0xfd08, s_i) \ V(I32x4Splat, 0xfd0c, s_i) \ + V(I64x2Splat, 0xfd0f, s_l) \ V(F32x4Splat, 0xfd12, s_f) \ V(I8x16Eq, 0xfd18, s_ss) \ V(I8x16Ne, 0xfd19, s_ss) \ @@ -560,6 +561,7 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, bool hasBigIntFeature); V(s_f, kWasmS128, kWasmF32) \ V(s_ss, kWasmS128, kWasmS128, kWasmS128) \ V(s_i, kWasmS128, kWasmI32) \ + V(s_l, kWasmS128, kWasmI64) \ V(s_si, kWasmS128, kWasmS128, kWasmI32) \ V(i_s, kWasmI32, kWasmS128) \ V(v_is, kWasmStmt, kWasmI32, kWasmS128) \ diff --git a/src/wasm/wasm-value.h b/src/wasm/wasm-value.h index 8c7571b72f..457b2fefd8 100644 --- a/src/wasm/wasm-value.h +++ b/src/wasm/wasm-value.h @@ -17,6 +17,7 @@ namespace wasm { #define FOREACH_SIMD_TYPE(V) \ V(float, float4, f32x4, 4) \ + V(int64_t, int2, i64x2, 2) \ V(int32_t, int4, i32x4, 4) \ V(int16_t, int8, i16x8, 8) \ V(int8_t, int16, i8x16, 16) diff --git a/test/cctest/wasm/test-run-wasm-simd.cc b/test/cctest/wasm/test-run-wasm-simd.cc index 74a401725e..9325e00978 100644 --- a/test/cctest/wasm/test-run-wasm-simd.cc +++ b/test/cctest/wasm/test-run-wasm-simd.cc @@ -289,6 +289,7 @@ T Sqrt(T a) { #define WASM_SIMD_F32x4_REPLACE_LANE(lane, x, y) \ x, y, WASM_SIMD_OP(kExprF32x4ReplaceLane), TO_BYTE(lane) +#define WASM_SIMD_I64x2_SPLAT(x) WASM_SIMD_SPLAT(I64x2, x) #define WASM_SIMD_I32x4_SPLAT(x) WASM_SIMD_SPLAT(I32x4, x) #define WASM_SIMD_I32x4_EXTRACT_LANE(lane, x) \ x, WASM_SIMD_OP(kExprI32x4ExtractLane), TO_BYTE(lane) @@ -680,6 +681,26 @@ WASM_SIMD_TEST(F32x4Le) { RunF32x4CompareOpTest(execution_tier, lower_simd, kExprF32x4Le, LessEqual); } +#if V8_TARGET_ARCH_X64 +WASM_SIMD_TEST(I64x2Splat) { + WasmRunner r(execution_tier, lower_simd); + // Set up a global to hold output vector. + int64_t* g = r.builder().AddGlobal(kWasmS128); + byte param1 = 0; + BUILD(r, WASM_SET_GLOBAL(0, WASM_SIMD_I64x2_SPLAT(WASM_GET_LOCAL(param1))), + WASM_ONE); + + FOR_INT64_INPUTS(x) { + r.Call(x); + int64_t expected = x; + for (int i = 0; i < 2; i++) { + int64_t actual = ReadLittleEndianValue(&g[i]); + CHECK_EQ(actual, expected); + } + } +} +#endif // V8_TARGET_ARCH_X64 + WASM_SIMD_TEST(I32x4Splat) { WasmRunner r(execution_tier, lower_simd); // Set up a global to hold output vector.