[wasm-relaxed-simd][ia32] Prototype relaxed lane selects

4 instructions, i8x16, i16x8, i32x4, i64x2 relaxed lane select. These
instructions only guarantee results when the entire lane is set or
unset, so vpblendvb will give correct results for all of them.

Same instruction selector code as x64.

Bug: v8:12284
Change-Id: Icd0bc8c5125cd2780141117830d6cb6e6babbb74
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3251701
Reviewed-by: Deepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/main@{#77779}
This commit is contained in:
Ng Zhi An 2021-10-28 10:58:35 -07:00 committed by V8 LUCI CQ
parent f7cb5a2182
commit 614ed93bbb
6 changed files with 81 additions and 35 deletions

View File

@ -3435,6 +3435,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
ASSEMBLE_SIMD_ALL_TRUE(pcmpeqb);
break;
}
case kIA32Pblendvb: {
__ Pblendvb(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), i.InputSimd128Register(2));
break;
}
case kIA32Word32AtomicPairLoad: {
__ movq(kScratchDoubleReg, i.MemoryOperand());
__ Pextrd(i.OutputRegister(0), kScratchDoubleReg, 0);

View File

@ -102,6 +102,7 @@ namespace compiler {
V(IA32BitcastFI) \
V(IA32BitcastIF) \
V(IA32Lea) \
V(IA32Pblendvb) \
V(IA32Push) \
V(IA32Poke) \
V(IA32Peek) \

View File

@ -89,6 +89,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kFloat32Neg:
case kIA32BitcastFI:
case kIA32BitcastIF:
case kIA32Pblendvb:
case kIA32F64x2Splat:
case kF64x2ExtractLane:
case kF64x2ReplaceLane:

View File

@ -3189,6 +3189,40 @@ void InstructionSelector::VisitF64x2PromoteLowF32x4(Node* node) {
VisitRR(this, node, code);
}
namespace {
// pblendvb is a correct implementation for all the various relaxed lane select,
// see https://github.com/WebAssembly/relaxed-simd/issues/17.
void VisitRelaxedLaneSelect(InstructionSelector* selector, Node* node) {
IA32OperandGenerator g(selector);
// pblendvb copies src2 when mask is set, opposite from Wasm semantics.
if (selector->IsSupported(AVX)) {
selector->Emit(kIA32Pblendvb, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(1)),
g.UseRegister(node->InputAt(0)),
g.UseRegister(node->InputAt(2)));
} else {
// SSE4.1 pblendvb requires xmm0 to hold the mask as an implicit operand.
selector->Emit(kIA32Pblendvb, g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(1)),
g.UseRegister(node->InputAt(0)),
g.UseFixed(node->InputAt(2), xmm0));
}
}
} // namespace
void InstructionSelector::VisitI8x16RelaxedLaneSelect(Node* node) {
VisitRelaxedLaneSelect(this, node);
}
void InstructionSelector::VisitI16x8RelaxedLaneSelect(Node* node) {
VisitRelaxedLaneSelect(this, node);
}
void InstructionSelector::VisitI32x4RelaxedLaneSelect(Node* node) {
VisitRelaxedLaneSelect(this, node);
}
void InstructionSelector::VisitI64x2RelaxedLaneSelect(Node* node) {
VisitRelaxedLaneSelect(this, node);
}
void InstructionSelector::AddOutputToSelectContinuation(OperandGenerator* g,
int first_input_index,
Node* node) {

View File

@ -2790,18 +2790,6 @@ void InstructionSelector::VisitF32x4Qfms(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X && !V8_TARGET_ARCH_PPC64
#if !V8_TARGET_ARCH_X64
void InstructionSelector::VisitI8x16RelaxedLaneSelect(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI16x8RelaxedLaneSelect(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI32x4RelaxedLaneSelect(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI64x2RelaxedLaneSelect(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitF32x4RelaxedMin(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4RelaxedMax(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2RelaxedMin(Node* node) { UNIMPLEMENTED(); }
@ -2820,6 +2808,21 @@ void InstructionSelector::VisitI32x4RelaxedTruncF32x4U(Node* node) {
}
#endif // !V8_TARGET_ARCH_X64
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitI8x16RelaxedLaneSelect(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI16x8RelaxedLaneSelect(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI32x4RelaxedLaneSelect(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI64x2RelaxedLaneSelect(Node* node) {
UNIMPLEMENTED();
}
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
void InstructionSelector::VisitParameter(Node* node) {

View File

@ -234,29 +234,7 @@ WASM_RELAXED_SIMD_TEST(F32x4RecipSqrtApprox) {
false /* !exact */);
}
#if V8_TARGET_ARCH_X64
WASM_RELAXED_SIMD_TEST(I8x16RelaxedSwizzle) {
// Output is only defined for indices in the range [0,15].
WasmRunner<int32_t> r(execution_tier);
static const int kElems = kSimd128Size / sizeof(uint8_t);
uint8_t* dst = r.builder().AddGlobal<uint8_t>(kWasmS128);
uint8_t* src = r.builder().AddGlobal<uint8_t>(kWasmS128);
uint8_t* indices = r.builder().AddGlobal<uint8_t>(kWasmS128);
BUILD(r,
WASM_GLOBAL_SET(
0, WASM_SIMD_BINOP(kExprI8x16RelaxedSwizzle, WASM_GLOBAL_GET(1),
WASM_GLOBAL_GET(2))),
WASM_ONE);
for (int i = 0; i < kElems; i++) {
LANE(src, i) = kElems - i - 1;
LANE(indices, i) = kElems - i - 1;
}
CHECK_EQ(1, r.Call());
for (int i = 0; i < kElems; i++) {
CHECK_EQ(LANE(dst, i), i);
}
}
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
namespace {
// Helper to convert an array of T into an array of uint8_t to be used a v128
// constants.
@ -335,6 +313,30 @@ WASM_RELAXED_SIMD_TEST(I64x2RelaxedLaneSelect) {
RelaxedLaneSelectTest<uint64_t, kElems>(execution_tier, v1, v2, s, expected,
kExprI64x2RelaxedLaneSelect);
}
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
#if V8_TARGET_ARCH_X64
WASM_RELAXED_SIMD_TEST(I8x16RelaxedSwizzle) {
// Output is only defined for indices in the range [0,15].
WasmRunner<int32_t> r(execution_tier);
static const int kElems = kSimd128Size / sizeof(uint8_t);
uint8_t* dst = r.builder().AddGlobal<uint8_t>(kWasmS128);
uint8_t* src = r.builder().AddGlobal<uint8_t>(kWasmS128);
uint8_t* indices = r.builder().AddGlobal<uint8_t>(kWasmS128);
BUILD(r,
WASM_GLOBAL_SET(
0, WASM_SIMD_BINOP(kExprI8x16RelaxedSwizzle, WASM_GLOBAL_GET(1),
WASM_GLOBAL_GET(2))),
WASM_ONE);
for (int i = 0; i < kElems; i++) {
LANE(src, i) = kElems - i - 1;
LANE(indices, i) = kElems - i - 1;
}
CHECK_EQ(1, r.Call());
for (int i = 0; i < kElems; i++) {
CHECK_EQ(LANE(dst, i), i);
}
}
WASM_RELAXED_SIMD_TEST(F32x4RelaxedMin) {
RunF32x4BinOpTest(execution_tier, kExprF32x4RelaxedMin, Minimum);