[wasm-relaxed-simd][x64] Prototype relaxed lane selects
4 instructions, i8x16, i16x8, i32x4, i64x2 relaxed lane select. These instructions only guarantee results when the entire lane is set or unset, so vpblendvb will give correct results for all of them. Bug: v8:12284 Change-Id: I76959a23f2d97de8ecc3bef43d138184484e3c4d Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3207006 Reviewed-by: Deepti Gandluri <gdeepti@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/main@{#77401}
This commit is contained in:
parent
33634d76ec
commit
ee3b4eadab
@ -99,6 +99,19 @@ void SharedTurboAssembler::Movlps(XMMRegister dst, XMMRegister src1,
|
||||
}
|
||||
}
|
||||
|
||||
void SharedTurboAssembler::Pblendvb(XMMRegister dst, XMMRegister src1,
|
||||
XMMRegister src2, XMMRegister mask) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vpblendvb(dst, src1, src2, mask);
|
||||
} else {
|
||||
CpuFeatureScope scope(this, SSE4_1);
|
||||
DCHECK_EQ(mask, xmm0);
|
||||
DCHECK_EQ(dst, src1);
|
||||
pblendvb(dst, src2);
|
||||
}
|
||||
}
|
||||
|
||||
void SharedTurboAssembler::Shufps(XMMRegister dst, XMMRegister src1,
|
||||
XMMRegister src2, uint8_t imm8) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
|
@ -50,6 +50,9 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
|
||||
void Movhps(XMMRegister dst, XMMRegister src1, Operand src2);
|
||||
void Movlps(XMMRegister dst, XMMRegister src1, Operand src2);
|
||||
|
||||
void Pblendvb(XMMRegister dst, XMMRegister src1, XMMRegister src2,
|
||||
XMMRegister mask);
|
||||
|
||||
template <typename Op>
|
||||
void Pinsrb(XMMRegister dst, XMMRegister src1, Op src2, uint8_t imm8,
|
||||
uint32_t* load_pc_offset = nullptr) {
|
||||
|
@ -2352,6 +2352,14 @@ void InstructionSelector::VisitNode(Node* node) {
|
||||
return MarkAsWord32(node), VisitI16x8AllTrue(node);
|
||||
case IrOpcode::kI8x16AllTrue:
|
||||
return MarkAsWord32(node), VisitI8x16AllTrue(node);
|
||||
case IrOpcode::kI8x16RelaxedLaneSelect:
|
||||
return MarkAsSimd128(node), VisitI8x16RelaxedLaneSelect(node);
|
||||
case IrOpcode::kI16x8RelaxedLaneSelect:
|
||||
return MarkAsSimd128(node), VisitI16x8RelaxedLaneSelect(node);
|
||||
case IrOpcode::kI32x4RelaxedLaneSelect:
|
||||
return MarkAsSimd128(node), VisitI32x4RelaxedLaneSelect(node);
|
||||
case IrOpcode::kI64x2RelaxedLaneSelect:
|
||||
return MarkAsSimd128(node), VisitI64x2RelaxedLaneSelect(node);
|
||||
default:
|
||||
FATAL("Unexpected operator #%d:%s @ node #%d", node->opcode(),
|
||||
node->op()->mnemonic(), node->id());
|
||||
@ -2765,6 +2773,21 @@ void InstructionSelector::VisitF32x4Qfms(Node* node) { UNIMPLEMENTED(); }
|
||||
#endif // !V8_TARGET_ARCH_ARM64
|
||||
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X && !V8_TARGET_ARCH_PPC64
|
||||
|
||||
#if !V8_TARGET_ARCH_X64
|
||||
void InstructionSelector::VisitI8x16RelaxedLaneSelect(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
void InstructionSelector::VisitI16x8RelaxedLaneSelect(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
void InstructionSelector::VisitI32x4RelaxedLaneSelect(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
void InstructionSelector::VisitI64x2RelaxedLaneSelect(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
#endif // !V8_TARGET_ARCH_X64
|
||||
|
||||
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
|
||||
|
||||
void InstructionSelector::VisitParameter(Node* node) {
|
||||
|
@ -4069,6 +4069,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqb);
|
||||
break;
|
||||
}
|
||||
case kX64Pblendvb: {
|
||||
__ Pblendvb(i.OutputSimd128Register(), i.InputSimd128Register(0),
|
||||
i.InputSimd128Register(1), i.InputSimd128Register(2));
|
||||
break;
|
||||
}
|
||||
case kAtomicStoreWord8: {
|
||||
ASSEMBLE_SEQ_CST_STORE(MachineRepresentation::kWord8);
|
||||
break;
|
||||
|
@ -392,6 +392,7 @@ namespace compiler {
|
||||
V(X64I32x4AllTrue) \
|
||||
V(X64I16x8AllTrue) \
|
||||
V(X64I8x16AllTrue) \
|
||||
V(X64Pblendvb) \
|
||||
V(X64Word64AtomicAddUint64) \
|
||||
V(X64Word64AtomicSubUint64) \
|
||||
V(X64Word64AtomicAndUint64) \
|
||||
|
@ -345,6 +345,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kX64S8x2Reverse:
|
||||
case kX64V128AnyTrue:
|
||||
case kX64I8x16AllTrue:
|
||||
case kX64Pblendvb:
|
||||
return (instr->addressing_mode() == kMode_None)
|
||||
? kNoOpcodeFlags
|
||||
: kIsLoadOperation | kHasSideEffect;
|
||||
|
@ -3706,8 +3706,53 @@ void InstructionSelector::VisitI8x16Swizzle(Node* node) {
|
||||
IsSupported(AVX) ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node),
|
||||
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
|
||||
}
|
||||
|
||||
namespace {
|
||||
// pblendvb is a correct implementation for all the various relaxed lane select,
|
||||
// see https://github.com/WebAssembly/relaxed-simd/issues/17.
|
||||
void VisitRelaxedLaneSelect(InstructionSelector* selector, Node* node) {
|
||||
X64OperandGenerator g(selector);
|
||||
// pblendvb copies src2 when mask is set, opposite from Wasm semantics.
|
||||
if (selector->IsSupported(AVX)) {
|
||||
selector->Emit(
|
||||
kX64Pblendvb, g.DefineAsRegister(node), g.UseRegister(node->InputAt(1)),
|
||||
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(2)));
|
||||
} else {
|
||||
// SSE4.1 pblendvb requires xmm0 to hold the mask as an implicit operand.
|
||||
selector->Emit(kX64Pblendvb, g.DefineSameAsFirst(node),
|
||||
g.UseRegister(node->InputAt(1)),
|
||||
g.UseRegister(node->InputAt(0)),
|
||||
g.UseFixed(node->InputAt(2), xmm0));
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void InstructionSelector::VisitI8x16RelaxedLaneSelect(Node* node) {
|
||||
VisitRelaxedLaneSelect(this, node);
|
||||
}
|
||||
void InstructionSelector::VisitI16x8RelaxedLaneSelect(Node* node) {
|
||||
VisitRelaxedLaneSelect(this, node);
|
||||
}
|
||||
void InstructionSelector::VisitI32x4RelaxedLaneSelect(Node* node) {
|
||||
VisitRelaxedLaneSelect(this, node);
|
||||
}
|
||||
void InstructionSelector::VisitI64x2RelaxedLaneSelect(Node* node) {
|
||||
VisitRelaxedLaneSelect(this, node);
|
||||
}
|
||||
#else
|
||||
void InstructionSelector::VisitI8x16Swizzle(Node* node) { UNREACHABLE(); }
|
||||
void InstructionSelector::VisitI8x16RelaxedLaneSelect(Node* node) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
void InstructionSelector::VisitI16x8RelaxedLaneSelect(Node* node) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
void InstructionSelector::VisitI32x4RelaxedLaneSelect(Node* node) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
void InstructionSelector::VisitI64x2RelaxedLaneSelect(Node* node) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
#endif // V8_ENABLE_WEBASSEMBLY
|
||||
|
||||
namespace {
|
||||
|
@ -597,7 +597,11 @@ std::ostream& operator<<(std::ostream& os, TruncateKind kind) {
|
||||
V(I64x2AllTrue, Operator::kNoProperties, 1, 0, 1) \
|
||||
V(I32x4AllTrue, Operator::kNoProperties, 1, 0, 1) \
|
||||
V(I16x8AllTrue, Operator::kNoProperties, 1, 0, 1) \
|
||||
V(I8x16AllTrue, Operator::kNoProperties, 1, 0, 1)
|
||||
V(I8x16AllTrue, Operator::kNoProperties, 1, 0, 1) \
|
||||
V(I8x16RelaxedLaneSelect, Operator::kNoProperties, 3, 0, 1) \
|
||||
V(I16x8RelaxedLaneSelect, Operator::kNoProperties, 3, 0, 1) \
|
||||
V(I32x4RelaxedLaneSelect, Operator::kNoProperties, 3, 0, 1) \
|
||||
V(I64x2RelaxedLaneSelect, Operator::kNoProperties, 3, 0, 1)
|
||||
|
||||
// The format is:
|
||||
// V(Name, properties, value_input_count, control_input_count, output_count)
|
||||
|
@ -914,6 +914,12 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
|
||||
const Operator* I16x8AllTrue();
|
||||
const Operator* I8x16AllTrue();
|
||||
|
||||
// Relaxed SIMD operators.
|
||||
const Operator* I8x16RelaxedLaneSelect();
|
||||
const Operator* I16x8RelaxedLaneSelect();
|
||||
const Operator* I32x4RelaxedLaneSelect();
|
||||
const Operator* I64x2RelaxedLaneSelect();
|
||||
|
||||
// load [base + index]
|
||||
const Operator* Load(LoadRepresentation rep);
|
||||
const Operator* LoadImmutable(LoadRepresentation rep);
|
||||
|
@ -981,6 +981,10 @@
|
||||
V(S128Select) \
|
||||
V(S128AndNot) \
|
||||
V(I8x16Swizzle) \
|
||||
V(I8x16RelaxedLaneSelect) \
|
||||
V(I16x8RelaxedLaneSelect) \
|
||||
V(I32x4RelaxedLaneSelect) \
|
||||
V(I64x2RelaxedLaneSelect) \
|
||||
V(I8x16Shuffle) \
|
||||
V(V128AnyTrue) \
|
||||
V(I64x2AllTrue) \
|
||||
|
@ -5116,6 +5116,18 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
|
||||
case wasm::kExprI8x16RelaxedSwizzle:
|
||||
return graph()->NewNode(mcgraph()->machine()->I8x16Swizzle(true),
|
||||
inputs[0], inputs[1]);
|
||||
case wasm::kExprI8x16RelaxedLaneSelect:
|
||||
return graph()->NewNode(mcgraph()->machine()->I8x16RelaxedLaneSelect(),
|
||||
inputs[0], inputs[1], inputs[2]);
|
||||
case wasm::kExprI16x8RelaxedLaneSelect:
|
||||
return graph()->NewNode(mcgraph()->machine()->I16x8RelaxedLaneSelect(),
|
||||
inputs[0], inputs[1], inputs[2]);
|
||||
case wasm::kExprI32x4RelaxedLaneSelect:
|
||||
return graph()->NewNode(mcgraph()->machine()->I32x4RelaxedLaneSelect(),
|
||||
inputs[0], inputs[1], inputs[2]);
|
||||
case wasm::kExprI64x2RelaxedLaneSelect:
|
||||
return graph()->NewNode(mcgraph()->machine()->I64x2RelaxedLaneSelect(),
|
||||
inputs[0], inputs[1], inputs[2]);
|
||||
default:
|
||||
FATAL_UNSUPPORTED_OPCODE(opcode);
|
||||
}
|
||||
|
@ -368,6 +368,10 @@ constexpr const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
|
||||
CASE_SIMDF_OP(Qfma, "qfma")
|
||||
CASE_SIMDF_OP(Qfms, "qfms")
|
||||
CASE_I8x16_OP(RelaxedSwizzle, "relaxed_swizzle");
|
||||
CASE_I8x16_OP(RelaxedLaneSelect, "relaxed_laneselect");
|
||||
CASE_I16x8_OP(RelaxedLaneSelect, "relaxed_laneselect");
|
||||
CASE_I32x4_OP(RelaxedLaneSelect, "relaxed_laneselect");
|
||||
CASE_I64x2_OP(RelaxedLaneSelect, "relaxed_laneselect");
|
||||
|
||||
// Atomic operations.
|
||||
CASE_OP(AtomicNotify, "atomic.notify")
|
||||
|
@ -514,14 +514,18 @@ bool V8_EXPORT_PRIVATE IsJSCompatibleSignature(const FunctionSig* sig,
|
||||
V(F64x2ConvertLowI32x4S, 0xfdfe, s_s) \
|
||||
V(F64x2ConvertLowI32x4U, 0xfdff, s_s)
|
||||
|
||||
#define FOREACH_RELAXED_SIMD_OPCODE(V) \
|
||||
V(I8x16RelaxedSwizzle, 0xfda2, s_ss) \
|
||||
V(F32x4Qfma, 0xfdaf, s_sss) \
|
||||
V(F32x4Qfms, 0xfdb0, s_sss) \
|
||||
V(F64x2Qfma, 0xfdcf, s_sss) \
|
||||
V(F64x2Qfms, 0xfdd0, s_sss) \
|
||||
V(F32x4RecipApprox, 0xfdd2, s_s) \
|
||||
V(F32x4RecipSqrtApprox, 0xfdd3, s_s)
|
||||
#define FOREACH_RELAXED_SIMD_OPCODE(V) \
|
||||
V(I8x16RelaxedSwizzle, 0xfda2, s_ss) \
|
||||
V(I8x16RelaxedLaneSelect, 0xfdb2, s_sss) \
|
||||
V(I16x8RelaxedLaneSelect, 0xfdb3, s_sss) \
|
||||
V(I32x4RelaxedLaneSelect, 0xfdd2, s_sss) \
|
||||
V(I64x2RelaxedLaneSelect, 0xfdd3, s_sss) \
|
||||
V(F32x4Qfma, 0xfdaf, s_sss) \
|
||||
V(F32x4Qfms, 0xfdb0, s_sss) \
|
||||
V(F64x2Qfma, 0xfdcf, s_sss) \
|
||||
V(F64x2Qfms, 0xfdd0, s_sss) \
|
||||
V(F32x4RecipApprox, 0xfda5, s_s) \
|
||||
V(F32x4RecipSqrtApprox, 0xfda6, s_s)
|
||||
|
||||
#define FOREACH_SIMD_1_OPERAND_1_PARAM_OPCODE(V) \
|
||||
V(I8x16ExtractLaneS, 0xfd15, _) \
|
||||
|
@ -3,6 +3,7 @@
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "src/base/overflowing-math.h"
|
||||
#include "src/common/globals.h"
|
||||
#include "src/wasm/compilation-environment.h"
|
||||
#include "test/cctest/cctest.h"
|
||||
#include "test/cctest/wasm/wasm-run-utils.h"
|
||||
@ -252,6 +253,85 @@ WASM_RELAXED_SIMD_TEST(I8x16RelaxedSwizzle) {
|
||||
CHECK_EQ(LANE(dst, i), i);
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
// Helper to convert an array of T into an array of uint8_t to be used a v128
|
||||
// constants.
|
||||
template <typename T, size_t N = kSimd128Size / sizeof(T)>
|
||||
std::array<uint8_t, kSimd128Size> as_uint8(const T* src) {
|
||||
std::array<uint8_t, kSimd128Size> arr;
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
WriteLittleEndianValue<T>(bit_cast<T*>(&arr[0]) + i, src[i]);
|
||||
}
|
||||
return arr;
|
||||
}
|
||||
|
||||
template <typename T, int kElems>
|
||||
void RelaxedLaneSelectTest(TestExecutionTier execution_tier, const T v1[kElems],
|
||||
const T v2[kElems], const T s[kElems],
|
||||
const T expected[kElems], WasmOpcode laneselect) {
|
||||
auto lhs = as_uint8<T>(v1);
|
||||
auto rhs = as_uint8<T>(v2);
|
||||
auto mask = as_uint8<T>(s);
|
||||
WasmRunner<int32_t> r(execution_tier);
|
||||
T* dst = r.builder().AddGlobal<T>(kWasmS128);
|
||||
BUILD(r,
|
||||
WASM_GLOBAL_SET(0, WASM_SIMD_OPN(laneselect, WASM_SIMD_CONSTANT(lhs),
|
||||
WASM_SIMD_CONSTANT(rhs),
|
||||
WASM_SIMD_CONSTANT(mask))),
|
||||
WASM_ONE);
|
||||
|
||||
CHECK_EQ(1, r.Call());
|
||||
for (int i = 0; i < kElems; i++) {
|
||||
CHECK_EQ(expected[i], LANE(dst, i));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
WASM_RELAXED_SIMD_TEST(I8x16RelaxedLaneSelect) {
|
||||
constexpr int kElems = 16;
|
||||
constexpr uint8_t v1[kElems] = {0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15};
|
||||
constexpr uint8_t v2[kElems] = {16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31};
|
||||
constexpr uint8_t s[kElems] = {0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF,
|
||||
0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF};
|
||||
constexpr uint8_t expected[kElems] = {16, 1, 18, 3, 20, 5, 22, 7,
|
||||
24, 9, 26, 11, 28, 13, 30, 15};
|
||||
RelaxedLaneSelectTest<uint8_t, kElems>(execution_tier, v1, v2, s, expected,
|
||||
kExprI8x16RelaxedLaneSelect);
|
||||
}
|
||||
|
||||
WASM_RELAXED_SIMD_TEST(I16x8RelaxedLaneSelect) {
|
||||
constexpr int kElems = 8;
|
||||
uint16_t v1[kElems] = {0, 1, 2, 3, 4, 5, 6, 7};
|
||||
uint16_t v2[kElems] = {8, 9, 10, 11, 12, 13, 14, 15};
|
||||
uint16_t s[kElems] = {0, 0xFFFF, 0, 0xFFFF, 0, 0xFFFF, 0, 0xFFFF};
|
||||
constexpr uint16_t expected[kElems] = {8, 1, 10, 3, 12, 5, 14, 7};
|
||||
RelaxedLaneSelectTest<uint16_t, kElems>(execution_tier, v1, v2, s, expected,
|
||||
kExprI16x8RelaxedLaneSelect);
|
||||
}
|
||||
|
||||
WASM_RELAXED_SIMD_TEST(I32x4RelaxedLaneSelect) {
|
||||
constexpr int kElems = 4;
|
||||
uint32_t v1[kElems] = {0, 1, 2, 3};
|
||||
uint32_t v2[kElems] = {4, 5, 6, 7};
|
||||
uint32_t s[kElems] = {0, 0xFFFF'FFFF, 0, 0xFFFF'FFFF};
|
||||
constexpr uint32_t expected[kElems] = {4, 1, 6, 3};
|
||||
RelaxedLaneSelectTest<uint32_t, kElems>(execution_tier, v1, v2, s, expected,
|
||||
kExprI32x4RelaxedLaneSelect);
|
||||
}
|
||||
|
||||
WASM_RELAXED_SIMD_TEST(I64x2RelaxedLaneSelect) {
|
||||
constexpr int kElems = 2;
|
||||
uint64_t v1[kElems] = {0, 1};
|
||||
uint64_t v2[kElems] = {2, 3};
|
||||
uint64_t s[kElems] = {0, 0xFFFF'FFFF'FFFF'FFFF};
|
||||
constexpr uint64_t expected[kElems] = {2, 1};
|
||||
RelaxedLaneSelectTest<uint64_t, kElems>(execution_tier, v1, v2, s, expected,
|
||||
kExprI64x2RelaxedLaneSelect);
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_X64
|
||||
|
||||
#undef WASM_RELAXED_SIMD_TEST
|
||||
|
@ -2724,6 +2724,10 @@ class WasmInterpreterInternals {
|
||||
PACK_CASE(I8x16SConvertI16x8, int8, i16x8, int16, 16, int8_t)
|
||||
PACK_CASE(I8x16UConvertI16x8, int8, i16x8, int16, 16, uint8_t)
|
||||
#undef PACK_CASE
|
||||
case kExprI8x16RelaxedLaneSelect:
|
||||
case kExprI16x8RelaxedLaneSelect:
|
||||
case kExprI32x4RelaxedLaneSelect:
|
||||
case kExprI64x2RelaxedLaneSelect:
|
||||
case kExprS128Select: {
|
||||
int4 bool_val = Pop().to_s128().to_i32x4();
|
||||
int4 v2 = Pop().to_s128().to_i32x4();
|
||||
|
Loading…
Reference in New Issue
Block a user