[wasm-simd][x64][ia32] Optimize swizzle with constant indices
When swizzle is called with a v128.const node, we can check that the indices are either all in bounds, or if they are out of bounds the top bit of each byte is set. This will match exactly pshufb behavior, and so we can omit the paddusb (and getting external reference). Bug: v8:10992 Change-Id: I5479a9eb92ebcfc12bedff5efd3e72bb4a43ff40 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2766222 Reviewed-by: Deepti Gandluri <gdeepti@chromium.org> Reviewed-by: Georg Neis <neis@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#73583}
This commit is contained in:
parent
656f35ab6c
commit
8c9213a191
@ -1272,7 +1272,12 @@ void TurboAssembler::I32x4ExtAddPairwiseI16x8U(XMMRegister dst, XMMRegister src,
|
||||
|
||||
void TurboAssembler::I8x16Swizzle(XMMRegister dst, XMMRegister src,
|
||||
XMMRegister mask, XMMRegister scratch,
|
||||
Register tmp) {
|
||||
Register tmp, bool omit_add) {
|
||||
if (omit_add) {
|
||||
Pshufb(dst, src, scratch);
|
||||
return;
|
||||
}
|
||||
|
||||
// Out-of-range indices should return 0, add 112 so that any value > 15
|
||||
// saturates to 128 (top bit set), so pshufb will zero that lane.
|
||||
Operand op = ExternalReferenceAsOperand(
|
||||
|
@ -758,7 +758,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
void I32x4ExtAddPairwiseI16x8U(XMMRegister dst, XMMRegister src,
|
||||
XMMRegister tmp);
|
||||
void I8x16Swizzle(XMMRegister dst, XMMRegister src, XMMRegister mask,
|
||||
XMMRegister scratch, Register tmp);
|
||||
XMMRegister scratch, Register tmp, bool omit_add = false);
|
||||
|
||||
void Push(Register src) { push(src); }
|
||||
void Push(Operand src) { push(src); }
|
||||
|
@ -2701,7 +2701,14 @@ void TurboAssembler::I32x4ExtAddPairwiseI16x8U(XMMRegister dst,
|
||||
}
|
||||
|
||||
void TurboAssembler::I8x16Swizzle(XMMRegister dst, XMMRegister src,
|
||||
XMMRegister mask) {
|
||||
XMMRegister mask, bool omit_add) {
|
||||
if (omit_add) {
|
||||
// We have determined that the indices are immediates, and they are either
|
||||
// within bounds, or the top bit is set, so we can omit the add.
|
||||
Pshufb(dst, src, kScratchDoubleReg);
|
||||
return;
|
||||
}
|
||||
|
||||
// Out-of-range indices should return 0, add 112 so that any value > 15
|
||||
// saturates to 128 (top bit set), so pshufb will zero that lane.
|
||||
Operand op = ExternalReferenceAsOperand(
|
||||
|
@ -645,7 +645,8 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
void I16x8ExtAddPairwiseI8x16S(XMMRegister dst, XMMRegister src);
|
||||
void I32x4ExtAddPairwiseI16x8U(XMMRegister dst, XMMRegister src);
|
||||
|
||||
void I8x16Swizzle(XMMRegister dst, XMMRegister src, XMMRegister mask);
|
||||
void I8x16Swizzle(XMMRegister dst, XMMRegister src, XMMRegister mask,
|
||||
bool omit_add = false);
|
||||
|
||||
void Abspd(XMMRegister dst);
|
||||
void Negpd(XMMRegister dst);
|
||||
|
@ -3624,7 +3624,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
case kIA32I8x16Swizzle: {
|
||||
__ I8x16Swizzle(i.OutputSimd128Register(), i.InputSimd128Register(0),
|
||||
i.InputSimd128Register(1), kScratchDoubleReg,
|
||||
i.TempRegister(0));
|
||||
i.TempRegister(0), MiscField::decode(instr->opcode()));
|
||||
break;
|
||||
}
|
||||
case kIA32I8x16Shuffle: {
|
||||
|
@ -3002,9 +3002,20 @@ void InstructionSelector::VisitI8x16Shuffle(Node* node) { UNREACHABLE(); }
|
||||
#endif // V8_ENABLE_WEBASSEMBLY
|
||||
|
||||
void InstructionSelector::VisitI8x16Swizzle(Node* node) {
|
||||
InstructionCode op = kIA32I8x16Swizzle;
|
||||
|
||||
auto m = V128ConstMatcher(node->InputAt(1));
|
||||
if (m.HasResolvedValue()) {
|
||||
// If the indices vector is a const, check if they are in range, or if the
|
||||
// top bit is set, then we can avoid the paddusb in the codegen and simply
|
||||
// emit a pshufb.
|
||||
auto imms = m.ResolvedValue().immediate();
|
||||
op |= MiscField::encode(wasm::SimdSwizzle::AllInRangeOrTopBitSet(imms));
|
||||
}
|
||||
|
||||
IA32OperandGenerator g(this);
|
||||
InstructionOperand temps[] = {g.TempRegister()};
|
||||
Emit(kIA32I8x16Swizzle,
|
||||
Emit(op,
|
||||
IsSupported(AVX) ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node),
|
||||
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)),
|
||||
arraysize(temps), temps);
|
||||
|
@ -860,6 +860,7 @@ class V8_EXPORT_PRIVATE Instruction final {
|
||||
FlagsCondition flags_condition() const {
|
||||
return FlagsConditionField::decode(opcode());
|
||||
}
|
||||
int misc() const { return MiscField::decode(opcode()); }
|
||||
|
||||
static Instruction* New(Zone* zone, InstructionCode opcode) {
|
||||
return New(zone, opcode, 0, nullptr, 0, nullptr, 0, nullptr);
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include "src/compiler/backend/code-generator-impl.h"
|
||||
#include "src/compiler/backend/code-generator.h"
|
||||
#include "src/compiler/backend/gap-resolver.h"
|
||||
#include "src/compiler/backend/instruction-codes.h"
|
||||
#include "src/compiler/node-matchers.h"
|
||||
#include "src/compiler/osr.h"
|
||||
#include "src/heap/memory-chunk.h"
|
||||
@ -3691,8 +3692,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
break;
|
||||
}
|
||||
case kX64I8x16Swizzle: {
|
||||
bool omit_add = MiscField::decode(instr->opcode());
|
||||
__ I8x16Swizzle(i.OutputSimd128Register(), i.InputSimd128Register(0),
|
||||
i.InputSimd128Register(1));
|
||||
i.InputSimd128Register(1), omit_add);
|
||||
break;
|
||||
}
|
||||
case kX64I8x16Shuffle: {
|
||||
|
@ -3544,12 +3544,27 @@ void InstructionSelector::VisitI8x16Shuffle(Node* node) {
|
||||
void InstructionSelector::VisitI8x16Shuffle(Node* node) { UNREACHABLE(); }
|
||||
#endif // V8_ENABLE_WEBASSEMBLY
|
||||
|
||||
#if V8_ENABLE_WEBASSEMBLY
|
||||
void InstructionSelector::VisitI8x16Swizzle(Node* node) {
|
||||
InstructionCode op = kX64I8x16Swizzle;
|
||||
|
||||
auto m = V128ConstMatcher(node->InputAt(1));
|
||||
if (m.HasResolvedValue()) {
|
||||
// If the indices vector is a const, check if they are in range, or if the
|
||||
// top bit is set, then we can avoid the paddusb in the codegen and simply
|
||||
// emit a pshufb
|
||||
auto imms = m.ResolvedValue().immediate();
|
||||
op |= MiscField::encode(wasm::SimdSwizzle::AllInRangeOrTopBitSet(imms));
|
||||
}
|
||||
|
||||
X64OperandGenerator g(this);
|
||||
Emit(kX64I8x16Swizzle,
|
||||
Emit(op,
|
||||
IsSupported(AVX) ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node),
|
||||
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
|
||||
}
|
||||
#else
|
||||
void InstructionSelector::VisitI8x16Swizzle(Node* node) { UNREACHABLE(); }
|
||||
#endif // V8_ENABLE_WEBASSEMBLY
|
||||
|
||||
namespace {
|
||||
void VisitPminOrPmax(InstructionSelector* selector, Node* node,
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include "src/codegen/external-reference.h"
|
||||
#include "src/common/globals.h"
|
||||
#include "src/compiler/common-operator.h"
|
||||
#include "src/compiler/machine-operator.h"
|
||||
#include "src/compiler/node.h"
|
||||
#include "src/compiler/operator.h"
|
||||
#include "src/numbers/double.h"
|
||||
@ -169,6 +170,8 @@ using Int32Matcher = IntMatcher<int32_t, IrOpcode::kInt32Constant>;
|
||||
using Uint32Matcher = IntMatcher<uint32_t, IrOpcode::kInt32Constant>;
|
||||
using Int64Matcher = IntMatcher<int64_t, IrOpcode::kInt64Constant>;
|
||||
using Uint64Matcher = IntMatcher<uint64_t, IrOpcode::kInt64Constant>;
|
||||
using V128ConstMatcher =
|
||||
ValueMatcher<S128ImmediateParameter, IrOpcode::kS128Const>;
|
||||
#if V8_HOST_ARCH_32_BIT
|
||||
using IntPtrMatcher = Int32Matcher;
|
||||
using UintPtrMatcher = Uint32Matcher;
|
||||
|
@ -864,6 +864,9 @@ class V8_EXPORT_PRIVATE RawMachineAssembler {
|
||||
}
|
||||
|
||||
// SIMD operations.
|
||||
Node* S128Const(const uint8_t value[16]) {
|
||||
return AddNode(machine()->S128Const(value));
|
||||
}
|
||||
Node* I64x2Splat(Node* a) { return AddNode(machine()->I64x2Splat(), a); }
|
||||
Node* I64x2SplatI32Pair(Node* a, Node* b) {
|
||||
return AddNode(machine()->I64x2SplatI32Pair(), a, b);
|
||||
|
@ -161,6 +161,12 @@ void SimdShuffle::Pack16Lanes(uint32_t* dst, const uint8_t* shuffle) {
|
||||
}
|
||||
}
|
||||
|
||||
bool SimdSwizzle::AllInRangeOrTopBitSet(
|
||||
std::array<uint8_t, kSimd128Size> shuffle) {
|
||||
return std::all_of(shuffle.begin(), shuffle.end(),
|
||||
[](auto i) { return (i < kSimd128Size) || (i & 0x80); });
|
||||
}
|
||||
|
||||
} // namespace wasm
|
||||
} // namespace internal
|
||||
} // namespace v8
|
||||
|
@ -95,6 +95,14 @@ class V8_EXPORT_PRIVATE SimdShuffle {
|
||||
// Packs 16 bytes of shuffle into an array of 4 uint32_t.
|
||||
static void Pack16Lanes(uint32_t* dst, const uint8_t* shuffle);
|
||||
};
|
||||
|
||||
class V8_EXPORT_PRIVATE SimdSwizzle {
|
||||
public:
|
||||
// Checks if all the immediates are in range (< kSimd128Size), and if they are
|
||||
// not, the top bit is set.
|
||||
static bool AllInRangeOrTopBitSet(std::array<uint8_t, kSimd128Size> shuffle);
|
||||
};
|
||||
|
||||
} // namespace wasm
|
||||
} // namespace internal
|
||||
} // namespace v8
|
||||
|
@ -886,6 +886,50 @@ TEST_F(InstructionSelectorTest, SIMDSplatZero) {
|
||||
}
|
||||
}
|
||||
|
||||
struct SwizzleConstants {
|
||||
uint8_t shuffle[kSimd128Size];
|
||||
bool omit_add;
|
||||
};
|
||||
|
||||
static constexpr SwizzleConstants kSwizzleConstants[] = {
|
||||
{
|
||||
// all lanes < kSimd128Size
|
||||
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
|
||||
true,
|
||||
},
|
||||
{
|
||||
// lanes that are >= kSimd128Size have top bit set
|
||||
{12, 13, 14, 15, 0x90, 0x91, 0x92, 0x93, 0xA0, 0xA1, 0xA2, 0xA3, 0xFC,
|
||||
0xFD, 0xFE, 0xFF},
|
||||
true,
|
||||
},
|
||||
{
|
||||
{12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27},
|
||||
false,
|
||||
},
|
||||
};
|
||||
|
||||
using InstructionSelectorSIMDSwizzleConstantTest =
|
||||
InstructionSelectorTestWithParam<SwizzleConstants>;
|
||||
|
||||
TEST_P(InstructionSelectorSIMDSwizzleConstantTest, SimdSwizzleConstant) {
|
||||
// Test optimization of swizzle with constant indices.
|
||||
auto param = GetParam();
|
||||
StreamBuilder m(this, MachineType::Simd128(), MachineType::Simd128());
|
||||
Node* const c = m.S128Const(param.shuffle);
|
||||
Node* swizzle = m.AddNode(m.machine()->I8x16Swizzle(), m.Parameter(0), c);
|
||||
m.Return(swizzle);
|
||||
Stream s = m.Build();
|
||||
ASSERT_EQ(2U, s.size());
|
||||
ASSERT_EQ(kIA32I8x16Swizzle, s[1]->arch_opcode());
|
||||
ASSERT_EQ(param.omit_add, s[1]->misc());
|
||||
ASSERT_EQ(1U, s[0]->OutputCount());
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(InstructionSelectorTest,
|
||||
InstructionSelectorSIMDSwizzleConstantTest,
|
||||
::testing::ValuesIn(kSwizzleConstants));
|
||||
|
||||
} // namespace compiler
|
||||
} // namespace internal
|
||||
} // namespace v8
|
||||
|
@ -2204,6 +2204,50 @@ INSTANTIATE_TEST_SUITE_P(InstructionSelectorTest,
|
||||
::testing::ValuesIn(kArchShuffles));
|
||||
#endif // V8_ENABLE_WEBASSEMBLY
|
||||
|
||||
struct SwizzleConstants {
|
||||
uint8_t shuffle[kSimd128Size];
|
||||
bool omit_add;
|
||||
};
|
||||
|
||||
static constexpr SwizzleConstants kSwizzleConstants[] = {
|
||||
{
|
||||
// all lanes < kSimd128Size
|
||||
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
|
||||
true,
|
||||
},
|
||||
{
|
||||
// lanes that are >= kSimd128Size have top bit set
|
||||
{12, 13, 14, 15, 0x90, 0x91, 0x92, 0x93, 0xA0, 0xA1, 0xA2, 0xA3, 0xFC,
|
||||
0xFD, 0xFE, 0xFF},
|
||||
true,
|
||||
},
|
||||
{
|
||||
{12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27},
|
||||
false,
|
||||
},
|
||||
};
|
||||
|
||||
using InstructionSelectorSIMDSwizzleConstantTest =
|
||||
InstructionSelectorTestWithParam<SwizzleConstants>;
|
||||
|
||||
TEST_P(InstructionSelectorSIMDSwizzleConstantTest, SimdSwizzleConstant) {
|
||||
// Test optimization of swizzle with constant indices.
|
||||
auto param = GetParam();
|
||||
StreamBuilder m(this, MachineType::Simd128(), MachineType::Simd128());
|
||||
Node* const c = m.S128Const(param.shuffle);
|
||||
Node* swizzle = m.AddNode(m.machine()->I8x16Swizzle(), m.Parameter(0), c);
|
||||
m.Return(swizzle);
|
||||
Stream s = m.Build();
|
||||
ASSERT_EQ(2U, s.size());
|
||||
ASSERT_EQ(kX64I8x16Swizzle, s[1]->arch_opcode());
|
||||
ASSERT_EQ(param.omit_add, s[1]->misc());
|
||||
ASSERT_EQ(1U, s[0]->OutputCount());
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(InstructionSelectorTest,
|
||||
InstructionSelectorSIMDSwizzleConstantTest,
|
||||
::testing::ValuesIn(kSwizzleConstants));
|
||||
|
||||
} // namespace compiler
|
||||
} // namespace internal
|
||||
} // namespace v8
|
||||
|
Loading…
Reference in New Issue
Block a user