[wasm-simd] Canonicalize shuffles when creating TurboFan graph
We currently canonicalize shuffles in the architecture specific instruction selector. This has the drawback that if we want to pattern match on nodes that have a shuffle as input, they need to individually canonicalize the shuffle. There can also be a subtle bug if we canonicalize the same shuffle node twice (see bug for details). This moves the canonicalization to "construction time", in wasm-compiler, when building the graph. As such, any pattern matches in instruction-selector will only need to deal with canonicalized shuffles. We introduce a new kind of parameter for shuffle nodes, ShuffleParameter, to store the 16 bytes plus a bool indicating if this is a swizzle. A swizzle essentially: inputs to the shuffle are the same or all indices only touch 1 input. We calculate this when canonicalizing, so store this bit of information inside of the node's parameter. We update the tests in x64 to handle special cases where, even though the node's inputs are not swapped (due to canonicalization), they need to be swapped for the specific instruction selected (e.g. palignr). The test data also contains canonicalized shuffles, so we have to manually canonicalize them. Bug: v8:11542 Change-Id: I4e78082267bd03d6caedf43d68d81ef3f5f364a8 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2762420 Reviewed-by: Bill Budge <bbudge@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#73495}
This commit is contained in:
parent
fdae1b6583
commit
d16eefe0f2
@ -2922,8 +2922,9 @@ void ArrangeShuffleTable(ArmOperandGenerator* g, Node* input0, Node* input1,
|
||||
|
||||
void InstructionSelector::VisitI8x16Shuffle(Node* node) {
|
||||
uint8_t shuffle[kSimd128Size];
|
||||
bool is_swizzle;
|
||||
CanonicalizeShuffle(node, shuffle, &is_swizzle);
|
||||
auto param = ShuffleParameterOf(node->op());
|
||||
bool is_swizzle = param.is_swizzle();
|
||||
base::Memcpy(shuffle, param.imm().data(), kSimd128Size);
|
||||
Node* input0 = node->InputAt(0);
|
||||
Node* input1 = node->InputAt(1);
|
||||
uint8_t shuffle32x4[4];
|
||||
|
@ -3855,8 +3855,9 @@ void ArrangeShuffleTable(Arm64OperandGenerator* g, Node* input0, Node* input1,
|
||||
|
||||
void InstructionSelector::VisitI8x16Shuffle(Node* node) {
|
||||
uint8_t shuffle[kSimd128Size];
|
||||
bool is_swizzle;
|
||||
CanonicalizeShuffle(node, shuffle, &is_swizzle);
|
||||
auto param = ShuffleParameterOf(node->op());
|
||||
bool is_swizzle = param.is_swizzle();
|
||||
base::Memcpy(shuffle, param.imm().data(), kSimd128Size);
|
||||
uint8_t shuffle32x4[4];
|
||||
Arm64OperandGenerator g(this);
|
||||
ArchOpcode opcode;
|
||||
|
@ -2877,8 +2877,9 @@ bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table,
|
||||
|
||||
void InstructionSelector::VisitI8x16Shuffle(Node* node) {
|
||||
uint8_t shuffle[kSimd128Size];
|
||||
bool is_swizzle;
|
||||
CanonicalizeShuffle(node, shuffle, &is_swizzle);
|
||||
auto param = ShuffleParameterOf(node->op());
|
||||
bool is_swizzle = param.is_swizzle();
|
||||
base::Memcpy(shuffle, param.imm().data(), kSimd128Size);
|
||||
|
||||
int imm_count = 0;
|
||||
static const int kMaxImms = 6;
|
||||
|
@ -3360,26 +3360,6 @@ FrameStateDescriptor* InstructionSelector::GetFrameStateDescriptor(
|
||||
}
|
||||
|
||||
#if V8_ENABLE_WEBASSEMBLY
|
||||
void InstructionSelector::CanonicalizeShuffle(Node* node, uint8_t* shuffle,
|
||||
bool* is_swizzle) {
|
||||
// Get raw shuffle indices.
|
||||
base::Memcpy(shuffle, S128ImmediateParameterOf(node->op()).data(),
|
||||
kSimd128Size);
|
||||
bool needs_swap;
|
||||
bool inputs_equal = GetVirtualRegister(node->InputAt(0)) ==
|
||||
GetVirtualRegister(node->InputAt(1));
|
||||
wasm::SimdShuffle::CanonicalizeShuffle(inputs_equal, shuffle, &needs_swap,
|
||||
is_swizzle);
|
||||
if (needs_swap) {
|
||||
SwapShuffleInputs(node);
|
||||
}
|
||||
// Duplicate the first input; for some shuffles on some architectures, it's
|
||||
// easiest to implement a swizzle as a shuffle so it might be used.
|
||||
if (*is_swizzle) {
|
||||
node->ReplaceInput(1, node->InputAt(0));
|
||||
}
|
||||
}
|
||||
|
||||
// static
|
||||
void InstructionSelector::SwapShuffleInputs(Node* node) {
|
||||
Node* input0 = node->InputAt(0);
|
||||
|
@ -656,10 +656,6 @@ class V8_EXPORT_PRIVATE InstructionSelector final {
|
||||
// ===========================================================================
|
||||
|
||||
#if V8_ENABLE_WEBASSEMBLY
|
||||
// Canonicalize shuffles to make pattern matching simpler. Returns the shuffle
|
||||
// indices, and a boolean indicating if the shuffle is a swizzle (one input).
|
||||
void CanonicalizeShuffle(Node* node, uint8_t* shuffle, bool* is_swizzle);
|
||||
|
||||
// Swaps the two first input operands of the node, to help match shuffles
|
||||
// to specific architectural instructions.
|
||||
void SwapShuffleInputs(Node* node);
|
||||
|
@ -2429,8 +2429,9 @@ bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table,
|
||||
|
||||
void InstructionSelector::VisitI8x16Shuffle(Node* node) {
|
||||
uint8_t shuffle[kSimd128Size];
|
||||
bool is_swizzle;
|
||||
CanonicalizeShuffle(node, shuffle, &is_swizzle);
|
||||
auto param = ShuffleParameterOf(node->op());
|
||||
bool is_swizzle = param.is_swizzle();
|
||||
base::Memcpy(shuffle, param.imm().data(), kSimd128Size);
|
||||
uint8_t shuffle32x4[4];
|
||||
ArchOpcode opcode;
|
||||
if (TryMatchArchShuffle(shuffle, arch_shuffles, arraysize(arch_shuffles),
|
||||
|
@ -3170,8 +3170,9 @@ bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table,
|
||||
|
||||
void InstructionSelector::VisitI8x16Shuffle(Node* node) {
|
||||
uint8_t shuffle[kSimd128Size];
|
||||
bool is_swizzle;
|
||||
CanonicalizeShuffle(node, shuffle, &is_swizzle);
|
||||
auto param = ShuffleParameterOf(node->op());
|
||||
bool is_swizzle = param.is_swizzle();
|
||||
base::Memcpy(shuffle, param.imm().data(), kSimd128Size);
|
||||
uint8_t shuffle32x4[4];
|
||||
ArchOpcode opcode;
|
||||
if (TryMatchArchShuffle(shuffle, arch_shuffles, arraysize(arch_shuffles),
|
||||
|
@ -2426,8 +2426,9 @@ SIMD_VISIT_PMIN_MAX(F32x4Pmax)
|
||||
#if V8_ENABLE_WEBASSEMBLY
|
||||
void InstructionSelector::VisitI8x16Shuffle(Node* node) {
|
||||
uint8_t shuffle[kSimd128Size];
|
||||
bool is_swizzle;
|
||||
CanonicalizeShuffle(node, shuffle, &is_swizzle);
|
||||
auto param = ShuffleParameterOf(node->op());
|
||||
bool is_swizzle = param.is_swizzle();
|
||||
base::Memcpy(shuffle, param.imm().data(), kSimd128Size);
|
||||
PPCOperandGenerator g(this);
|
||||
Node* input0 = node->InputAt(0);
|
||||
Node* input1 = node->InputAt(1);
|
||||
|
@ -3400,8 +3400,9 @@ bool TryMatchShufps(const uint8_t* shuffle32x4) {
|
||||
|
||||
void InstructionSelector::VisitI8x16Shuffle(Node* node) {
|
||||
uint8_t shuffle[kSimd128Size];
|
||||
bool is_swizzle;
|
||||
CanonicalizeShuffle(node, shuffle, &is_swizzle);
|
||||
auto param = ShuffleParameterOf(node->op());
|
||||
bool is_swizzle = param.is_swizzle();
|
||||
base::Memcpy(shuffle, param.imm().data(), kSimd128Size);
|
||||
|
||||
int imm_count = 0;
|
||||
static const int kMaxImms = 6;
|
||||
|
@ -1871,8 +1871,7 @@ std::ostream& operator<<(std::ostream& os, S128ImmediateParameter const& p) {
|
||||
}
|
||||
|
||||
S128ImmediateParameter const& S128ImmediateParameterOf(Operator const* op) {
|
||||
DCHECK(IrOpcode::kI8x16Shuffle == op->opcode() ||
|
||||
IrOpcode::kS128Const == op->opcode());
|
||||
DCHECK(IrOpcode::kS128Const == op->opcode());
|
||||
return OpParameter<S128ImmediateParameter>(op);
|
||||
}
|
||||
|
||||
@ -1882,11 +1881,33 @@ const Operator* MachineOperatorBuilder::S128Const(const uint8_t value[16]) {
|
||||
S128ImmediateParameter(value));
|
||||
}
|
||||
|
||||
const Operator* MachineOperatorBuilder::I8x16Shuffle(
|
||||
const uint8_t shuffle[16]) {
|
||||
return zone_->New<Operator1<S128ImmediateParameter>>(
|
||||
IrOpcode::kI8x16Shuffle, Operator::kPure, "Shuffle", 2, 0, 0, 1, 0, 0,
|
||||
S128ImmediateParameter(shuffle));
|
||||
ShuffleParameter const& ShuffleParameterOf(Operator const* op) {
|
||||
DCHECK(IrOpcode::kI8x16Shuffle == op->opcode());
|
||||
return OpParameter<ShuffleParameter>(op);
|
||||
}
|
||||
|
||||
bool operator==(ShuffleParameter const& lhs, ShuffleParameter const& rhs) {
|
||||
return (lhs.imm() == rhs.imm()) && (lhs.is_swizzle() == rhs.is_swizzle());
|
||||
}
|
||||
|
||||
bool operator!=(ShuffleParameter const& lhs, ShuffleParameter const& rhs) {
|
||||
return !(lhs == rhs);
|
||||
}
|
||||
|
||||
size_t hash_value(ShuffleParameter const& p) {
|
||||
return base::hash_combine(p.imm(), p.is_swizzle());
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, ShuffleParameter const& p) {
|
||||
os << p.imm() << " (is_swizzle: " << p.is_swizzle() << ")";
|
||||
return os;
|
||||
}
|
||||
|
||||
const Operator* MachineOperatorBuilder::I8x16Shuffle(const uint8_t shuffle[16],
|
||||
bool is_swizzle) {
|
||||
return zone_->New<Operator1<ShuffleParameter>>(
|
||||
IrOpcode::kI8x16Shuffle, Operator::kPure, "I8x16Shuffle", 2, 0, 0, 1, 0,
|
||||
0, ShuffleParameter(shuffle, is_swizzle));
|
||||
}
|
||||
|
||||
StackCheckKind StackCheckKindOf(Operator const* op) {
|
||||
|
@ -204,6 +204,24 @@ V8_EXPORT_PRIVATE std::ostream& operator<<(std::ostream&,
|
||||
V8_EXPORT_PRIVATE S128ImmediateParameter const& S128ImmediateParameterOf(
|
||||
Operator const* op) V8_WARN_UNUSED_RESULT;
|
||||
|
||||
struct ShuffleParameter {
|
||||
public:
|
||||
ShuffleParameter(const uint8_t immediate[16], bool is_swizzle)
|
||||
: s128_imm_(immediate), is_swizzle_(is_swizzle) {}
|
||||
const S128ImmediateParameter imm() const { return s128_imm_; }
|
||||
bool is_swizzle() const { return is_swizzle_; }
|
||||
|
||||
private:
|
||||
S128ImmediateParameter s128_imm_;
|
||||
bool is_swizzle_;
|
||||
};
|
||||
|
||||
V8_EXPORT_PRIVATE std::ostream& operator<<(std::ostream&,
|
||||
ShuffleParameter const&);
|
||||
|
||||
V8_EXPORT_PRIVATE ShuffleParameter const& ShuffleParameterOf(Operator const* op)
|
||||
V8_WARN_UNUSED_RESULT;
|
||||
|
||||
StackCheckKind StackCheckKindOf(Operator const* op) V8_WARN_UNUSED_RESULT;
|
||||
|
||||
// ShiftKind::kShiftOutZeros means that it is guaranteed that the bits shifted
|
||||
@ -813,7 +831,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
|
||||
const Operator* S128AndNot();
|
||||
|
||||
const Operator* I8x16Swizzle();
|
||||
const Operator* I8x16Shuffle(const uint8_t shuffle[16]);
|
||||
const Operator* I8x16Shuffle(const uint8_t shuffle[16], bool is_swizzle);
|
||||
|
||||
const Operator* V128AnyTrue();
|
||||
const Operator* I64x2AllTrue();
|
||||
|
@ -2234,7 +2234,7 @@ void SimdScalarLowering::LowerNode(Node* node) {
|
||||
}
|
||||
case IrOpcode::kI8x16Shuffle: {
|
||||
DCHECK_EQ(2, node->InputCount());
|
||||
S128ImmediateParameter shuffle = S128ImmediateParameterOf(node->op());
|
||||
S128ImmediateParameter shuffle = ShuffleParameterOf(node->op()).imm();
|
||||
Node** rep_left = GetReplacementsWithType(node->InputAt(0), rep_type);
|
||||
Node** rep_right = GetReplacementsWithType(node->InputAt(1), rep_type);
|
||||
Node** rep_node = zone()->NewArray<Node*>(16);
|
||||
|
@ -5180,8 +5180,21 @@ Node* WasmGraphBuilder::SimdLaneOp(wasm::WasmOpcode opcode, uint8_t lane,
|
||||
Node* WasmGraphBuilder::Simd8x16ShuffleOp(const uint8_t shuffle[16],
|
||||
Node* const* inputs) {
|
||||
has_simd_ = true;
|
||||
return graph()->NewNode(mcgraph()->machine()->I8x16Shuffle(shuffle),
|
||||
inputs[0], inputs[1]);
|
||||
uint8_t canonicalized_shuffle[16];
|
||||
base::Memcpy(canonicalized_shuffle, shuffle, kSimd128Size);
|
||||
bool needs_swap;
|
||||
bool inputs_equal = inputs[0]->id() == inputs[1]->id();
|
||||
bool is_swizzle;
|
||||
wasm::SimdShuffle::CanonicalizeShuffle(inputs_equal, canonicalized_shuffle,
|
||||
&needs_swap, &is_swizzle);
|
||||
Node* lhs = needs_swap ? inputs[1] : inputs[0];
|
||||
Node* rhs = needs_swap ? inputs[0] : inputs[1];
|
||||
if (is_swizzle) {
|
||||
rhs = lhs;
|
||||
}
|
||||
return graph()->NewNode(
|
||||
mcgraph()->machine()->I8x16Shuffle(canonicalized_shuffle, is_swizzle),
|
||||
lhs, rhs);
|
||||
}
|
||||
|
||||
Node* WasmGraphBuilder::AtomicOp(wasm::WasmOpcode opcode, Node* const* inputs,
|
||||
|
@ -2275,7 +2275,7 @@ TEST_P(InstructionSelectorSimdF32x4MulWithDupTest, MulWithDup) {
|
||||
const MachineType type = MachineType::Simd128();
|
||||
{
|
||||
StreamBuilder m(this, type, type, type, type);
|
||||
Node* shuffle = m.AddNode(m.machine()->I8x16Shuffle(param.shuffle),
|
||||
Node* shuffle = m.AddNode(m.machine()->I8x16Shuffle(param.shuffle, true),
|
||||
m.Parameter(0), m.Parameter(1));
|
||||
m.Return(m.AddNode(m.machine()->F32x4Mul(), m.Parameter(2), shuffle));
|
||||
Stream s = m.Build();
|
||||
@ -2291,7 +2291,7 @@ TEST_P(InstructionSelectorSimdF32x4MulWithDupTest, MulWithDup) {
|
||||
// Multiplication operator should be commutative, so test shuffle op as lhs.
|
||||
{
|
||||
StreamBuilder m(this, type, type, type, type);
|
||||
Node* shuffle = m.AddNode(m.machine()->I8x16Shuffle(param.shuffle),
|
||||
Node* shuffle = m.AddNode(m.machine()->I8x16Shuffle(param.shuffle, true),
|
||||
m.Parameter(0), m.Parameter(1));
|
||||
m.Return(m.AddNode(m.machine()->F32x4Mul(), shuffle, m.Parameter(2)));
|
||||
Stream s = m.Build();
|
||||
@ -2315,8 +2315,8 @@ TEST_F(InstructionSelectorTest, SimdF32x4MulWithDupNegativeTest) {
|
||||
const uint8_t mask[kSimd128Size] = {0};
|
||||
{
|
||||
StreamBuilder m(this, type, type, type, type);
|
||||
Node* shuffle = m.AddNode((m.machine()->I8x16Shuffle(mask)), m.Parameter(0),
|
||||
m.Parameter(1));
|
||||
Node* shuffle = m.AddNode((m.machine()->I8x16Shuffle(mask, true)),
|
||||
m.Parameter(0), m.Parameter(1));
|
||||
m.Return(m.AddNode(m.machine()->F32x4Mul(), m.Parameter(2), shuffle));
|
||||
Stream s = m.Build();
|
||||
ASSERT_EQ(2U, s.size());
|
||||
@ -2361,7 +2361,7 @@ TEST_P(InstructionSelectorSimdF64x2MulWithDupTest, MulWithDup) {
|
||||
const MachineType type = MachineType::Simd128();
|
||||
{
|
||||
StreamBuilder m(this, type, type, type, type);
|
||||
Node* shuffle = m.AddNode(m.machine()->I8x16Shuffle(param.shuffle),
|
||||
Node* shuffle = m.AddNode(m.machine()->I8x16Shuffle(param.shuffle, true),
|
||||
m.Parameter(0), m.Parameter(1));
|
||||
m.Return(m.AddNode(m.machine()->F64x2Mul(), m.Parameter(2), shuffle));
|
||||
Stream s = m.Build();
|
||||
@ -2377,7 +2377,7 @@ TEST_P(InstructionSelectorSimdF64x2MulWithDupTest, MulWithDup) {
|
||||
// Multiplication operator should be commutative, so test shuffle op as lhs.
|
||||
{
|
||||
StreamBuilder m(this, type, type, type, type);
|
||||
Node* shuffle = m.AddNode(m.machine()->I8x16Shuffle(param.shuffle),
|
||||
Node* shuffle = m.AddNode(m.machine()->I8x16Shuffle(param.shuffle, true),
|
||||
m.Parameter(0), m.Parameter(1));
|
||||
m.Return(m.AddNode(m.machine()->F64x2Mul(), shuffle, m.Parameter(2)));
|
||||
Stream s = m.Build();
|
||||
@ -2401,8 +2401,8 @@ TEST_F(InstructionSelectorTest, SimdF64x2MulWithDupNegativeTest) {
|
||||
const uint8_t mask[kSimd128Size] = {0};
|
||||
{
|
||||
StreamBuilder m(this, type, type, type, type);
|
||||
Node* shuffle = m.AddNode((m.machine()->I8x16Shuffle(mask)), m.Parameter(0),
|
||||
m.Parameter(1));
|
||||
Node* shuffle = m.AddNode((m.machine()->I8x16Shuffle(mask, true)),
|
||||
m.Parameter(0), m.Parameter(1));
|
||||
m.Return(m.AddNode(m.machine()->F64x2Mul(), m.Parameter(2), shuffle));
|
||||
Stream s = m.Build();
|
||||
ASSERT_EQ(2U, s.size());
|
||||
|
@ -1943,6 +1943,7 @@ struct ArchShuffle {
|
||||
uint8_t shuffle[kSimd128Size];
|
||||
ArchOpcode arch_opcode;
|
||||
size_t input_count;
|
||||
bool inputs_are_swapped = false;
|
||||
};
|
||||
|
||||
static constexpr ArchShuffle kArchShuffles[] = {
|
||||
@ -2054,16 +2055,19 @@ static constexpr ArchShuffle kArchShuffles[] = {
|
||||
{3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2},
|
||||
kX64S8x16Alignr,
|
||||
3,
|
||||
true,
|
||||
},
|
||||
{
|
||||
{2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1},
|
||||
kX64S8x16Alignr,
|
||||
3,
|
||||
true,
|
||||
},
|
||||
{
|
||||
{2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17},
|
||||
kX64S8x16Alignr,
|
||||
3,
|
||||
true,
|
||||
},
|
||||
// These are matched by TryMatch32x4Shuffle && is_swizzle.
|
||||
{
|
||||
@ -2158,14 +2162,35 @@ TEST_P(InstructionSelectorSIMDArchShuffleTest, SIMDArchShuffle) {
|
||||
StreamBuilder m(this, type, type, type);
|
||||
auto param = GetParam();
|
||||
auto shuffle = param.shuffle;
|
||||
const Operator* op = m.machine()->I8x16Shuffle(shuffle);
|
||||
Node* n = m.AddNode(op, m.Parameter(0), m.Parameter(1));
|
||||
|
||||
// The shuffle constants defined in the test cases are not canonicalized.
|
||||
bool needs_swap;
|
||||
bool inputs_equal = false;
|
||||
bool is_swizzle;
|
||||
wasm::SimdShuffle::CanonicalizeShuffle(inputs_equal, shuffle, &needs_swap,
|
||||
&is_swizzle);
|
||||
|
||||
const Operator* op = m.machine()->I8x16Shuffle(shuffle, is_swizzle);
|
||||
Node* const p0 = m.Parameter(0);
|
||||
Node* const p1 = m.Parameter(1);
|
||||
Node* n = m.AddNode(op, p0, p1);
|
||||
m.Return(n);
|
||||
Stream s = m.Build();
|
||||
ASSERT_EQ(1U, s.size());
|
||||
EXPECT_EQ(param.arch_opcode, s[0]->arch_opcode());
|
||||
ASSERT_EQ(param.input_count, s[0]->InputCount());
|
||||
EXPECT_EQ(1U, s[0]->OutputCount());
|
||||
if (param.inputs_are_swapped) {
|
||||
ASSERT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(1)));
|
||||
if (!is_swizzle) {
|
||||
ASSERT_EQ(s.ToVreg(p1), s.ToVreg(s[0]->InputAt(0)));
|
||||
}
|
||||
} else {
|
||||
ASSERT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
|
||||
if (!is_swizzle) {
|
||||
ASSERT_EQ(s.ToVreg(p1), s.ToVreg(s[0]->InputAt(1)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user