[wasm-simd] Canonicalize shuffles when creating TurboFan graph

We currently canonicalize shuffles in the architecture specific
instruction selector. This has the drawback that if we want to pattern
match on nodes that have a shuffle as input, they need to individually
canonicalize the shuffle. There can also be a subtle bug if we
canonicalize the same shuffle node twice (see bug for details).

This moves the canonicalization to "construction time", in
wasm-compiler, when building the graph. As such, any pattern matches in
instruction-selector will only need to deal with canonicalized shuffles.

We introduce a new kind of parameter for shuffle nodes,
ShuffleParameter, to store the 16 bytes plus a bool indicating if this
is a swizzle. A swizzle essentially: inputs to the shuffle are the same
or all indices only touch 1 input. We calculate this when
canonicalizing, so store this bit of information inside of the node's
parameter.

We update the tests in x64 to handle special cases where, even though
the node's inputs are not swapped (due to canonicalization), they need
to be swapped for the specific instruction selected (e.g. palignr). The
test data also contains canonicalized shuffles, so we have to manually
canonicalize them.

Bug: v8:11542
Change-Id: I4e78082267bd03d6caedf43d68d81ef3f5f364a8
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2762420
Reviewed-by: Bill Budge <bbudge@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#73495}
This commit is contained in:
Ng Zhi An 2021-03-16 17:24:52 -07:00 committed by Commit Bot
parent fdae1b6583
commit d16eefe0f2
15 changed files with 119 additions and 59 deletions

View File

@ -2922,8 +2922,9 @@ void ArrangeShuffleTable(ArmOperandGenerator* g, Node* input0, Node* input1,
void InstructionSelector::VisitI8x16Shuffle(Node* node) {
uint8_t shuffle[kSimd128Size];
bool is_swizzle;
CanonicalizeShuffle(node, shuffle, &is_swizzle);
auto param = ShuffleParameterOf(node->op());
bool is_swizzle = param.is_swizzle();
base::Memcpy(shuffle, param.imm().data(), kSimd128Size);
Node* input0 = node->InputAt(0);
Node* input1 = node->InputAt(1);
uint8_t shuffle32x4[4];

View File

@ -3855,8 +3855,9 @@ void ArrangeShuffleTable(Arm64OperandGenerator* g, Node* input0, Node* input1,
void InstructionSelector::VisitI8x16Shuffle(Node* node) {
uint8_t shuffle[kSimd128Size];
bool is_swizzle;
CanonicalizeShuffle(node, shuffle, &is_swizzle);
auto param = ShuffleParameterOf(node->op());
bool is_swizzle = param.is_swizzle();
base::Memcpy(shuffle, param.imm().data(), kSimd128Size);
uint8_t shuffle32x4[4];
Arm64OperandGenerator g(this);
ArchOpcode opcode;

View File

@ -2877,8 +2877,9 @@ bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table,
void InstructionSelector::VisitI8x16Shuffle(Node* node) {
uint8_t shuffle[kSimd128Size];
bool is_swizzle;
CanonicalizeShuffle(node, shuffle, &is_swizzle);
auto param = ShuffleParameterOf(node->op());
bool is_swizzle = param.is_swizzle();
base::Memcpy(shuffle, param.imm().data(), kSimd128Size);
int imm_count = 0;
static const int kMaxImms = 6;

View File

@ -3360,26 +3360,6 @@ FrameStateDescriptor* InstructionSelector::GetFrameStateDescriptor(
}
#if V8_ENABLE_WEBASSEMBLY
void InstructionSelector::CanonicalizeShuffle(Node* node, uint8_t* shuffle,
bool* is_swizzle) {
// Get raw shuffle indices.
base::Memcpy(shuffle, S128ImmediateParameterOf(node->op()).data(),
kSimd128Size);
bool needs_swap;
bool inputs_equal = GetVirtualRegister(node->InputAt(0)) ==
GetVirtualRegister(node->InputAt(1));
wasm::SimdShuffle::CanonicalizeShuffle(inputs_equal, shuffle, &needs_swap,
is_swizzle);
if (needs_swap) {
SwapShuffleInputs(node);
}
// Duplicate the first input; for some shuffles on some architectures, it's
// easiest to implement a swizzle as a shuffle so it might be used.
if (*is_swizzle) {
node->ReplaceInput(1, node->InputAt(0));
}
}
// static
void InstructionSelector::SwapShuffleInputs(Node* node) {
Node* input0 = node->InputAt(0);

View File

@ -656,10 +656,6 @@ class V8_EXPORT_PRIVATE InstructionSelector final {
// ===========================================================================
#if V8_ENABLE_WEBASSEMBLY
// Canonicalize shuffles to make pattern matching simpler. Returns the shuffle
// indices, and a boolean indicating if the shuffle is a swizzle (one input).
void CanonicalizeShuffle(Node* node, uint8_t* shuffle, bool* is_swizzle);
// Swaps the two first input operands of the node, to help match shuffles
// to specific architectural instructions.
void SwapShuffleInputs(Node* node);

View File

@ -2429,8 +2429,9 @@ bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table,
void InstructionSelector::VisitI8x16Shuffle(Node* node) {
uint8_t shuffle[kSimd128Size];
bool is_swizzle;
CanonicalizeShuffle(node, shuffle, &is_swizzle);
auto param = ShuffleParameterOf(node->op());
bool is_swizzle = param.is_swizzle();
base::Memcpy(shuffle, param.imm().data(), kSimd128Size);
uint8_t shuffle32x4[4];
ArchOpcode opcode;
if (TryMatchArchShuffle(shuffle, arch_shuffles, arraysize(arch_shuffles),

View File

@ -3170,8 +3170,9 @@ bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table,
void InstructionSelector::VisitI8x16Shuffle(Node* node) {
uint8_t shuffle[kSimd128Size];
bool is_swizzle;
CanonicalizeShuffle(node, shuffle, &is_swizzle);
auto param = ShuffleParameterOf(node->op());
bool is_swizzle = param.is_swizzle();
base::Memcpy(shuffle, param.imm().data(), kSimd128Size);
uint8_t shuffle32x4[4];
ArchOpcode opcode;
if (TryMatchArchShuffle(shuffle, arch_shuffles, arraysize(arch_shuffles),

View File

@ -2426,8 +2426,9 @@ SIMD_VISIT_PMIN_MAX(F32x4Pmax)
#if V8_ENABLE_WEBASSEMBLY
void InstructionSelector::VisitI8x16Shuffle(Node* node) {
uint8_t shuffle[kSimd128Size];
bool is_swizzle;
CanonicalizeShuffle(node, shuffle, &is_swizzle);
auto param = ShuffleParameterOf(node->op());
bool is_swizzle = param.is_swizzle();
base::Memcpy(shuffle, param.imm().data(), kSimd128Size);
PPCOperandGenerator g(this);
Node* input0 = node->InputAt(0);
Node* input1 = node->InputAt(1);

View File

@ -3400,8 +3400,9 @@ bool TryMatchShufps(const uint8_t* shuffle32x4) {
void InstructionSelector::VisitI8x16Shuffle(Node* node) {
uint8_t shuffle[kSimd128Size];
bool is_swizzle;
CanonicalizeShuffle(node, shuffle, &is_swizzle);
auto param = ShuffleParameterOf(node->op());
bool is_swizzle = param.is_swizzle();
base::Memcpy(shuffle, param.imm().data(), kSimd128Size);
int imm_count = 0;
static const int kMaxImms = 6;

View File

@ -1871,8 +1871,7 @@ std::ostream& operator<<(std::ostream& os, S128ImmediateParameter const& p) {
}
S128ImmediateParameter const& S128ImmediateParameterOf(Operator const* op) {
DCHECK(IrOpcode::kI8x16Shuffle == op->opcode() ||
IrOpcode::kS128Const == op->opcode());
DCHECK(IrOpcode::kS128Const == op->opcode());
return OpParameter<S128ImmediateParameter>(op);
}
@ -1882,11 +1881,33 @@ const Operator* MachineOperatorBuilder::S128Const(const uint8_t value[16]) {
S128ImmediateParameter(value));
}
const Operator* MachineOperatorBuilder::I8x16Shuffle(
const uint8_t shuffle[16]) {
return zone_->New<Operator1<S128ImmediateParameter>>(
IrOpcode::kI8x16Shuffle, Operator::kPure, "Shuffle", 2, 0, 0, 1, 0, 0,
S128ImmediateParameter(shuffle));
ShuffleParameter const& ShuffleParameterOf(Operator const* op) {
DCHECK(IrOpcode::kI8x16Shuffle == op->opcode());
return OpParameter<ShuffleParameter>(op);
}
bool operator==(ShuffleParameter const& lhs, ShuffleParameter const& rhs) {
return (lhs.imm() == rhs.imm()) && (lhs.is_swizzle() == rhs.is_swizzle());
}
bool operator!=(ShuffleParameter const& lhs, ShuffleParameter const& rhs) {
return !(lhs == rhs);
}
size_t hash_value(ShuffleParameter const& p) {
return base::hash_combine(p.imm(), p.is_swizzle());
}
std::ostream& operator<<(std::ostream& os, ShuffleParameter const& p) {
os << p.imm() << " (is_swizzle: " << p.is_swizzle() << ")";
return os;
}
const Operator* MachineOperatorBuilder::I8x16Shuffle(const uint8_t shuffle[16],
bool is_swizzle) {
return zone_->New<Operator1<ShuffleParameter>>(
IrOpcode::kI8x16Shuffle, Operator::kPure, "I8x16Shuffle", 2, 0, 0, 1, 0,
0, ShuffleParameter(shuffle, is_swizzle));
}
StackCheckKind StackCheckKindOf(Operator const* op) {

View File

@ -204,6 +204,24 @@ V8_EXPORT_PRIVATE std::ostream& operator<<(std::ostream&,
V8_EXPORT_PRIVATE S128ImmediateParameter const& S128ImmediateParameterOf(
Operator const* op) V8_WARN_UNUSED_RESULT;
struct ShuffleParameter {
public:
ShuffleParameter(const uint8_t immediate[16], bool is_swizzle)
: s128_imm_(immediate), is_swizzle_(is_swizzle) {}
const S128ImmediateParameter imm() const { return s128_imm_; }
bool is_swizzle() const { return is_swizzle_; }
private:
S128ImmediateParameter s128_imm_;
bool is_swizzle_;
};
V8_EXPORT_PRIVATE std::ostream& operator<<(std::ostream&,
ShuffleParameter const&);
V8_EXPORT_PRIVATE ShuffleParameter const& ShuffleParameterOf(Operator const* op)
V8_WARN_UNUSED_RESULT;
StackCheckKind StackCheckKindOf(Operator const* op) V8_WARN_UNUSED_RESULT;
// ShiftKind::kShiftOutZeros means that it is guaranteed that the bits shifted
@ -813,7 +831,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* S128AndNot();
const Operator* I8x16Swizzle();
const Operator* I8x16Shuffle(const uint8_t shuffle[16]);
const Operator* I8x16Shuffle(const uint8_t shuffle[16], bool is_swizzle);
const Operator* V128AnyTrue();
const Operator* I64x2AllTrue();

View File

@ -2234,7 +2234,7 @@ void SimdScalarLowering::LowerNode(Node* node) {
}
case IrOpcode::kI8x16Shuffle: {
DCHECK_EQ(2, node->InputCount());
S128ImmediateParameter shuffle = S128ImmediateParameterOf(node->op());
S128ImmediateParameter shuffle = ShuffleParameterOf(node->op()).imm();
Node** rep_left = GetReplacementsWithType(node->InputAt(0), rep_type);
Node** rep_right = GetReplacementsWithType(node->InputAt(1), rep_type);
Node** rep_node = zone()->NewArray<Node*>(16);

View File

@ -5180,8 +5180,21 @@ Node* WasmGraphBuilder::SimdLaneOp(wasm::WasmOpcode opcode, uint8_t lane,
Node* WasmGraphBuilder::Simd8x16ShuffleOp(const uint8_t shuffle[16],
Node* const* inputs) {
has_simd_ = true;
return graph()->NewNode(mcgraph()->machine()->I8x16Shuffle(shuffle),
inputs[0], inputs[1]);
uint8_t canonicalized_shuffle[16];
base::Memcpy(canonicalized_shuffle, shuffle, kSimd128Size);
bool needs_swap;
bool inputs_equal = inputs[0]->id() == inputs[1]->id();
bool is_swizzle;
wasm::SimdShuffle::CanonicalizeShuffle(inputs_equal, canonicalized_shuffle,
&needs_swap, &is_swizzle);
Node* lhs = needs_swap ? inputs[1] : inputs[0];
Node* rhs = needs_swap ? inputs[0] : inputs[1];
if (is_swizzle) {
rhs = lhs;
}
return graph()->NewNode(
mcgraph()->machine()->I8x16Shuffle(canonicalized_shuffle, is_swizzle),
lhs, rhs);
}
Node* WasmGraphBuilder::AtomicOp(wasm::WasmOpcode opcode, Node* const* inputs,

View File

@ -2275,7 +2275,7 @@ TEST_P(InstructionSelectorSimdF32x4MulWithDupTest, MulWithDup) {
const MachineType type = MachineType::Simd128();
{
StreamBuilder m(this, type, type, type, type);
Node* shuffle = m.AddNode(m.machine()->I8x16Shuffle(param.shuffle),
Node* shuffle = m.AddNode(m.machine()->I8x16Shuffle(param.shuffle, true),
m.Parameter(0), m.Parameter(1));
m.Return(m.AddNode(m.machine()->F32x4Mul(), m.Parameter(2), shuffle));
Stream s = m.Build();
@ -2291,7 +2291,7 @@ TEST_P(InstructionSelectorSimdF32x4MulWithDupTest, MulWithDup) {
// Multiplication operator should be commutative, so test shuffle op as lhs.
{
StreamBuilder m(this, type, type, type, type);
Node* shuffle = m.AddNode(m.machine()->I8x16Shuffle(param.shuffle),
Node* shuffle = m.AddNode(m.machine()->I8x16Shuffle(param.shuffle, true),
m.Parameter(0), m.Parameter(1));
m.Return(m.AddNode(m.machine()->F32x4Mul(), shuffle, m.Parameter(2)));
Stream s = m.Build();
@ -2315,8 +2315,8 @@ TEST_F(InstructionSelectorTest, SimdF32x4MulWithDupNegativeTest) {
const uint8_t mask[kSimd128Size] = {0};
{
StreamBuilder m(this, type, type, type, type);
Node* shuffle = m.AddNode((m.machine()->I8x16Shuffle(mask)), m.Parameter(0),
m.Parameter(1));
Node* shuffle = m.AddNode((m.machine()->I8x16Shuffle(mask, true)),
m.Parameter(0), m.Parameter(1));
m.Return(m.AddNode(m.machine()->F32x4Mul(), m.Parameter(2), shuffle));
Stream s = m.Build();
ASSERT_EQ(2U, s.size());
@ -2361,7 +2361,7 @@ TEST_P(InstructionSelectorSimdF64x2MulWithDupTest, MulWithDup) {
const MachineType type = MachineType::Simd128();
{
StreamBuilder m(this, type, type, type, type);
Node* shuffle = m.AddNode(m.machine()->I8x16Shuffle(param.shuffle),
Node* shuffle = m.AddNode(m.machine()->I8x16Shuffle(param.shuffle, true),
m.Parameter(0), m.Parameter(1));
m.Return(m.AddNode(m.machine()->F64x2Mul(), m.Parameter(2), shuffle));
Stream s = m.Build();
@ -2377,7 +2377,7 @@ TEST_P(InstructionSelectorSimdF64x2MulWithDupTest, MulWithDup) {
// Multiplication operator should be commutative, so test shuffle op as lhs.
{
StreamBuilder m(this, type, type, type, type);
Node* shuffle = m.AddNode(m.machine()->I8x16Shuffle(param.shuffle),
Node* shuffle = m.AddNode(m.machine()->I8x16Shuffle(param.shuffle, true),
m.Parameter(0), m.Parameter(1));
m.Return(m.AddNode(m.machine()->F64x2Mul(), shuffle, m.Parameter(2)));
Stream s = m.Build();
@ -2401,8 +2401,8 @@ TEST_F(InstructionSelectorTest, SimdF64x2MulWithDupNegativeTest) {
const uint8_t mask[kSimd128Size] = {0};
{
StreamBuilder m(this, type, type, type, type);
Node* shuffle = m.AddNode((m.machine()->I8x16Shuffle(mask)), m.Parameter(0),
m.Parameter(1));
Node* shuffle = m.AddNode((m.machine()->I8x16Shuffle(mask, true)),
m.Parameter(0), m.Parameter(1));
m.Return(m.AddNode(m.machine()->F64x2Mul(), m.Parameter(2), shuffle));
Stream s = m.Build();
ASSERT_EQ(2U, s.size());

View File

@ -1943,6 +1943,7 @@ struct ArchShuffle {
uint8_t shuffle[kSimd128Size];
ArchOpcode arch_opcode;
size_t input_count;
bool inputs_are_swapped = false;
};
static constexpr ArchShuffle kArchShuffles[] = {
@ -2054,16 +2055,19 @@ static constexpr ArchShuffle kArchShuffles[] = {
{3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2},
kX64S8x16Alignr,
3,
true,
},
{
{2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1},
kX64S8x16Alignr,
3,
true,
},
{
{2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17},
kX64S8x16Alignr,
3,
true,
},
// These are matched by TryMatch32x4Shuffle && is_swizzle.
{
@ -2158,14 +2162,35 @@ TEST_P(InstructionSelectorSIMDArchShuffleTest, SIMDArchShuffle) {
StreamBuilder m(this, type, type, type);
auto param = GetParam();
auto shuffle = param.shuffle;
const Operator* op = m.machine()->I8x16Shuffle(shuffle);
Node* n = m.AddNode(op, m.Parameter(0), m.Parameter(1));
// The shuffle constants defined in the test cases are not canonicalized.
bool needs_swap;
bool inputs_equal = false;
bool is_swizzle;
wasm::SimdShuffle::CanonicalizeShuffle(inputs_equal, shuffle, &needs_swap,
&is_swizzle);
const Operator* op = m.machine()->I8x16Shuffle(shuffle, is_swizzle);
Node* const p0 = m.Parameter(0);
Node* const p1 = m.Parameter(1);
Node* n = m.AddNode(op, p0, p1);
m.Return(n);
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(param.arch_opcode, s[0]->arch_opcode());
ASSERT_EQ(param.input_count, s[0]->InputCount());
EXPECT_EQ(1U, s[0]->OutputCount());
if (param.inputs_are_swapped) {
ASSERT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(1)));
if (!is_swizzle) {
ASSERT_EQ(s.ToVreg(p1), s.ToVreg(s[0]->InputAt(0)));
}
} else {
ASSERT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
if (!is_swizzle) {
ASSERT_EQ(s.ToVreg(p1), s.ToVreg(s[0]->InputAt(1)));
}
}
}
}