[wasm simd] Implement I64x2Splat on x64
Bug: v8:8460 Change-Id: Id159c81cd2d25924be96e49c64073e154ef32e6a Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1667867 Reviewed-by: Bill Budge <bbudge@chromium.org> Reviewed-by: Deepti Gandluri <gdeepti@chromium.org> Reviewed-by: Michael Starzinger <mstarzinger@chromium.org> Auto-Submit: Zhi An Ng <zhin@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#62475}
This commit is contained in:
parent
0c2cd56551
commit
df54d51148
@ -2883,6 +2883,18 @@ void Assembler::movd(Register dst, XMMRegister src) {
|
||||
}
|
||||
|
||||
void Assembler::movq(XMMRegister dst, Register src) {
|
||||
// Mixing AVX and non-AVX is expensive, catch those cases
|
||||
DCHECK(!IsEnabled(AVX));
|
||||
EnsureSpace ensure_space(this);
|
||||
emit(0x66);
|
||||
emit_rex_64(dst, src);
|
||||
emit(0x0F);
|
||||
emit(0x6E);
|
||||
emit_sse_operand(dst, src);
|
||||
}
|
||||
|
||||
void Assembler::movq(XMMRegister dst, Operand src) {
|
||||
// Mixing AVX and non-AVX is expensive, catch those cases
|
||||
DCHECK(!IsEnabled(AVX));
|
||||
EnsureSpace ensure_space(this);
|
||||
emit(0x66);
|
||||
@ -2893,6 +2905,7 @@ void Assembler::movq(XMMRegister dst, Register src) {
|
||||
}
|
||||
|
||||
void Assembler::movq(Register dst, XMMRegister src) {
|
||||
// Mixing AVX and non-AVX is expensive, catch those cases
|
||||
DCHECK(!IsEnabled(AVX));
|
||||
EnsureSpace ensure_space(this);
|
||||
emit(0x66);
|
||||
@ -2903,6 +2916,7 @@ void Assembler::movq(Register dst, XMMRegister src) {
|
||||
}
|
||||
|
||||
void Assembler::movq(XMMRegister dst, XMMRegister src) {
|
||||
// Mixing AVX and non-AVX is expensive, catch those cases
|
||||
DCHECK(!IsEnabled(AVX));
|
||||
EnsureSpace ensure_space(this);
|
||||
if (dst.low_bits() == 4) {
|
||||
|
@ -969,6 +969,7 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
|
||||
void movd(XMMRegister dst, Operand src);
|
||||
void movd(Register dst, XMMRegister src);
|
||||
void movq(XMMRegister dst, Register src);
|
||||
void movq(XMMRegister dst, Operand src);
|
||||
void movq(Register dst, XMMRegister src);
|
||||
void movq(XMMRegister dst, XMMRegister src);
|
||||
|
||||
|
@ -1849,6 +1849,8 @@ void InstructionSelector::VisitNode(Node* node) {
|
||||
return MarkAsSimd128(node), VisitF32x4Lt(node);
|
||||
case IrOpcode::kF32x4Le:
|
||||
return MarkAsSimd128(node), VisitF32x4Le(node);
|
||||
case IrOpcode::kI64x2Splat:
|
||||
return MarkAsSimd128(node), VisitI64x2Splat(node);
|
||||
case IrOpcode::kI32x4Splat:
|
||||
return MarkAsSimd128(node), VisitI32x4Splat(node);
|
||||
case IrOpcode::kI32x4ExtractLane:
|
||||
@ -2492,6 +2494,10 @@ void InstructionSelector::VisitWord64AtomicCompareExchange(Node* node) {
|
||||
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_PPC
|
||||
// !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_S390
|
||||
|
||||
#if !V8_TARGET_ARCH_X64
|
||||
void InstructionSelector::VisitI64x2Splat(Node* node) { UNIMPLEMENTED(); }
|
||||
#endif // !V8_TARGET_ARCH_X64
|
||||
|
||||
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
|
||||
|
||||
void InstructionSelector::VisitParameter(Node* node) {
|
||||
|
@ -2400,6 +2400,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ cmpleps(i.OutputSimd128Register(), i.InputSimd128Register(1));
|
||||
break;
|
||||
}
|
||||
case kX64I64x2Splat: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
if (instr->InputAt(0)->IsRegister()) {
|
||||
__ movq(dst, i.InputRegister(0));
|
||||
} else {
|
||||
__ movq(dst, i.InputOperand(0));
|
||||
}
|
||||
__ pshufd(dst, dst, 0x44);
|
||||
break;
|
||||
}
|
||||
case kX64I32x4Splat: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
if (instr->InputAt(0)->IsRegister()) {
|
||||
|
@ -177,6 +177,7 @@ namespace compiler {
|
||||
V(X64F32x4Ne) \
|
||||
V(X64F32x4Lt) \
|
||||
V(X64F32x4Le) \
|
||||
V(X64I64x2Splat) \
|
||||
V(X64I32x4Splat) \
|
||||
V(X64I32x4ExtractLane) \
|
||||
V(X64I32x4ReplaceLane) \
|
||||
|
@ -143,6 +143,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kX64F32x4Ne:
|
||||
case kX64F32x4Lt:
|
||||
case kX64F32x4Le:
|
||||
case kX64I64x2Splat:
|
||||
case kX64I32x4Splat:
|
||||
case kX64I32x4ExtractLane:
|
||||
case kX64I32x4ReplaceLane:
|
||||
|
@ -2668,6 +2668,7 @@ void InstructionSelector::VisitS128Zero(Node* node) {
|
||||
g.Use(node->InputAt(0))); \
|
||||
}
|
||||
SIMD_TYPES(VISIT_SIMD_SPLAT)
|
||||
VISIT_SIMD_SPLAT(I64x2)
|
||||
#undef VISIT_SIMD_SPLAT
|
||||
|
||||
#define VISIT_SIMD_EXTRACT_LANE(Type) \
|
||||
|
@ -262,6 +262,7 @@ MachineType AtomicOpType(Operator const* op) {
|
||||
V(F32x4Ne, Operator::kCommutative, 2, 0, 1) \
|
||||
V(F32x4Lt, Operator::kNoProperties, 2, 0, 1) \
|
||||
V(F32x4Le, Operator::kNoProperties, 2, 0, 1) \
|
||||
V(I64x2Splat, Operator::kNoProperties, 1, 0, 1) \
|
||||
V(I32x4Splat, Operator::kNoProperties, 1, 0, 1) \
|
||||
V(I32x4SConvertF32x4, Operator::kNoProperties, 1, 0, 1) \
|
||||
V(I32x4SConvertI16x8Low, Operator::kNoProperties, 1, 0, 1) \
|
||||
|
@ -489,6 +489,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
|
||||
const Operator* F32x4Lt();
|
||||
const Operator* F32x4Le();
|
||||
|
||||
const Operator* I64x2Splat();
|
||||
const Operator* I32x4Splat();
|
||||
const Operator* I32x4ExtractLane(int32_t);
|
||||
const Operator* I32x4ReplaceLane(int32_t);
|
||||
|
@ -745,6 +745,7 @@
|
||||
V(F32x4Le) \
|
||||
V(F32x4Gt) \
|
||||
V(F32x4Ge) \
|
||||
V(I64x2Splat) \
|
||||
V(I32x4Splat) \
|
||||
V(I32x4ExtractLane) \
|
||||
V(I32x4ReplaceLane) \
|
||||
|
@ -16,6 +16,7 @@ namespace internal {
|
||||
namespace compiler {
|
||||
|
||||
namespace {
|
||||
static const int kNumLanes64 = 2;
|
||||
static const int kNumLanes32 = 4;
|
||||
static const int kNumLanes16 = 8;
|
||||
static const int kNumLanes8 = 16;
|
||||
@ -76,6 +77,8 @@ void SimdScalarLowering::LowerGraph() {
|
||||
}
|
||||
}
|
||||
|
||||
#define FOREACH_INT64X2_OPCODE(V) V(I64x2Splat)
|
||||
|
||||
#define FOREACH_INT32X4_OPCODE(V) \
|
||||
V(I32x4Splat) \
|
||||
V(I32x4ExtractLane) \
|
||||
@ -208,6 +211,8 @@ void SimdScalarLowering::LowerGraph() {
|
||||
|
||||
MachineType SimdScalarLowering::MachineTypeFrom(SimdType simdType) {
|
||||
switch (simdType) {
|
||||
case SimdType::kInt64x2:
|
||||
return MachineType::Int64();
|
||||
case SimdType::kFloat32x4:
|
||||
return MachineType::Float32();
|
||||
case SimdType::kInt32x4:
|
||||
@ -223,6 +228,10 @@ MachineType SimdScalarLowering::MachineTypeFrom(SimdType simdType) {
|
||||
void SimdScalarLowering::SetLoweredType(Node* node, Node* output) {
|
||||
switch (node->opcode()) {
|
||||
#define CASE_STMT(name) case IrOpcode::k##name:
|
||||
FOREACH_INT64X2_OPCODE(CASE_STMT) {
|
||||
replacements_[node->id()].type = SimdType::kInt64x2;
|
||||
break;
|
||||
}
|
||||
FOREACH_INT32X4_OPCODE(CASE_STMT)
|
||||
case IrOpcode::kReturn:
|
||||
case IrOpcode::kParameter:
|
||||
@ -326,7 +335,9 @@ static int GetReturnCountAfterLoweringSimd128(
|
||||
|
||||
int SimdScalarLowering::NumLanes(SimdType type) {
|
||||
int num_lanes = 0;
|
||||
if (type == SimdType::kFloat32x4 || type == SimdType::kInt32x4) {
|
||||
if (type == SimdType::kInt64x2) {
|
||||
num_lanes = kNumLanes64;
|
||||
} else if (type == SimdType::kFloat32x4 || type == SimdType::kInt32x4) {
|
||||
num_lanes = kNumLanes32;
|
||||
} else if (type == SimdType::kInt16x8) {
|
||||
num_lanes = kNumLanes16;
|
||||
@ -1223,6 +1234,7 @@ void SimdScalarLowering::LowerNode(Node* node) {
|
||||
LowerUnaryOp(node, SimdType::kInt32x4, machine()->RoundUint32ToFloat32());
|
||||
break;
|
||||
}
|
||||
case IrOpcode::kI64x2Splat:
|
||||
case IrOpcode::kI32x4Splat:
|
||||
case IrOpcode::kF32x4Splat:
|
||||
case IrOpcode::kI16x8Splat:
|
||||
|
@ -32,7 +32,13 @@ class SimdScalarLowering {
|
||||
private:
|
||||
enum class State : uint8_t { kUnvisited, kOnStack, kVisited };
|
||||
|
||||
enum class SimdType : uint8_t { kFloat32x4, kInt32x4, kInt16x8, kInt8x16 };
|
||||
enum class SimdType : uint8_t {
|
||||
kInt64x2,
|
||||
kFloat32x4,
|
||||
kInt32x4,
|
||||
kInt16x8,
|
||||
kInt8x16
|
||||
};
|
||||
|
||||
#if defined(V8_TARGET_BIG_ENDIAN)
|
||||
static constexpr int kLaneOffsets[16] = {15, 14, 13, 12, 11, 10, 9, 8,
|
||||
|
@ -4055,6 +4055,8 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
|
||||
case wasm::kExprF32x4Ge:
|
||||
return graph()->NewNode(mcgraph()->machine()->F32x4Le(), inputs[1],
|
||||
inputs[0]);
|
||||
case wasm::kExprI64x2Splat:
|
||||
return graph()->NewNode(mcgraph()->machine()->I64x2Splat(), inputs[0]);
|
||||
case wasm::kExprI32x4Splat:
|
||||
return graph()->NewNode(mcgraph()->machine()->I32x4Splat(), inputs[0]);
|
||||
case wasm::kExprI32x4SConvertF32x4:
|
||||
|
@ -2134,6 +2134,7 @@ class ThreadImpl {
|
||||
Push(WasmValue(Simd128(s))); \
|
||||
return true; \
|
||||
}
|
||||
SPLAT_CASE(I64x2, int2, int64_t, 2)
|
||||
SPLAT_CASE(I32x4, int4, int32_t, 4)
|
||||
SPLAT_CASE(F32x4, float4, float, 4)
|
||||
SPLAT_CASE(I16x8, int8, int32_t, 8)
|
||||
|
@ -25,6 +25,7 @@ namespace wasm {
|
||||
#define CASE_REF_OP(name, str) CASE_OP(Ref##name, "ref." str)
|
||||
#define CASE_F32x4_OP(name, str) CASE_OP(F32x4##name, "f32x4." str)
|
||||
#define CASE_I32x4_OP(name, str) CASE_OP(I32x4##name, "i32x4." str)
|
||||
#define CASE_I64x2_OP(name, str) CASE_OP(I64x2##name, "i64x2." str)
|
||||
#define CASE_I16x8_OP(name, str) CASE_OP(I16x8##name, "i16x8." str)
|
||||
#define CASE_I8x16_OP(name, str) CASE_OP(I8x16##name, "i8x16." str)
|
||||
#define CASE_S128_OP(name, str) CASE_OP(S128##name, "s128." str)
|
||||
@ -252,6 +253,7 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
|
||||
CASE_SIGN_OP(SIMDI, Ge, "ge")
|
||||
CASE_SIGN_OP(SIMDI, Shr, "shr")
|
||||
CASE_SIMDI_OP(Shl, "shl")
|
||||
CASE_I64x2_OP(Splat, "splat")
|
||||
CASE_I32x4_OP(AddHoriz, "add_horizontal")
|
||||
CASE_I16x8_OP(AddHoriz, "add_horizontal")
|
||||
CASE_SIGN_OP(I16x8, AddSaturate, "add_saturate")
|
||||
|
@ -272,6 +272,7 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, bool hasBigIntFeature);
|
||||
V(I8x16Splat, 0xfd04, s_i) \
|
||||
V(I16x8Splat, 0xfd08, s_i) \
|
||||
V(I32x4Splat, 0xfd0c, s_i) \
|
||||
V(I64x2Splat, 0xfd0f, s_l) \
|
||||
V(F32x4Splat, 0xfd12, s_f) \
|
||||
V(I8x16Eq, 0xfd18, s_ss) \
|
||||
V(I8x16Ne, 0xfd19, s_ss) \
|
||||
@ -560,6 +561,7 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, bool hasBigIntFeature);
|
||||
V(s_f, kWasmS128, kWasmF32) \
|
||||
V(s_ss, kWasmS128, kWasmS128, kWasmS128) \
|
||||
V(s_i, kWasmS128, kWasmI32) \
|
||||
V(s_l, kWasmS128, kWasmI64) \
|
||||
V(s_si, kWasmS128, kWasmS128, kWasmI32) \
|
||||
V(i_s, kWasmI32, kWasmS128) \
|
||||
V(v_is, kWasmStmt, kWasmI32, kWasmS128) \
|
||||
|
@ -17,6 +17,7 @@ namespace wasm {
|
||||
|
||||
#define FOREACH_SIMD_TYPE(V) \
|
||||
V(float, float4, f32x4, 4) \
|
||||
V(int64_t, int2, i64x2, 2) \
|
||||
V(int32_t, int4, i32x4, 4) \
|
||||
V(int16_t, int8, i16x8, 8) \
|
||||
V(int8_t, int16, i8x16, 16)
|
||||
|
@ -289,6 +289,7 @@ T Sqrt(T a) {
|
||||
#define WASM_SIMD_F32x4_REPLACE_LANE(lane, x, y) \
|
||||
x, y, WASM_SIMD_OP(kExprF32x4ReplaceLane), TO_BYTE(lane)
|
||||
|
||||
#define WASM_SIMD_I64x2_SPLAT(x) WASM_SIMD_SPLAT(I64x2, x)
|
||||
#define WASM_SIMD_I32x4_SPLAT(x) WASM_SIMD_SPLAT(I32x4, x)
|
||||
#define WASM_SIMD_I32x4_EXTRACT_LANE(lane, x) \
|
||||
x, WASM_SIMD_OP(kExprI32x4ExtractLane), TO_BYTE(lane)
|
||||
@ -680,6 +681,26 @@ WASM_SIMD_TEST(F32x4Le) {
|
||||
RunF32x4CompareOpTest(execution_tier, lower_simd, kExprF32x4Le, LessEqual);
|
||||
}
|
||||
|
||||
#if V8_TARGET_ARCH_X64
|
||||
WASM_SIMD_TEST(I64x2Splat) {
|
||||
WasmRunner<int32_t, int64_t> r(execution_tier, lower_simd);
|
||||
// Set up a global to hold output vector.
|
||||
int64_t* g = r.builder().AddGlobal<int64_t>(kWasmS128);
|
||||
byte param1 = 0;
|
||||
BUILD(r, WASM_SET_GLOBAL(0, WASM_SIMD_I64x2_SPLAT(WASM_GET_LOCAL(param1))),
|
||||
WASM_ONE);
|
||||
|
||||
FOR_INT64_INPUTS(x) {
|
||||
r.Call(x);
|
||||
int64_t expected = x;
|
||||
for (int i = 0; i < 2; i++) {
|
||||
int64_t actual = ReadLittleEndianValue<int64_t>(&g[i]);
|
||||
CHECK_EQ(actual, expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_X64
|
||||
|
||||
WASM_SIMD_TEST(I32x4Splat) {
|
||||
WasmRunner<int32_t, int32_t> r(execution_tier, lower_simd);
|
||||
// Set up a global to hold output vector.
|
||||
|
Loading…
Reference in New Issue
Block a user