[wasm-relaxed-simd][ia32] Prototype relaxed lane selects
4 instructions, i8x16, i16x8, i32x4, i64x2 relaxed lane select. These instructions only guarantee results when the entire lane is set or unset, so vpblendvb will give correct results for all of them. Same instruction selector code as x64. Bug: v8:12284 Change-Id: Icd0bc8c5125cd2780141117830d6cb6e6babbb74 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3251701 Reviewed-by: Deepti Gandluri <gdeepti@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/main@{#77779}
This commit is contained in:
parent
f7cb5a2182
commit
614ed93bbb
@ -3435,6 +3435,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
ASSEMBLE_SIMD_ALL_TRUE(pcmpeqb);
|
||||
break;
|
||||
}
|
||||
case kIA32Pblendvb: {
|
||||
__ Pblendvb(i.OutputSimd128Register(), i.InputSimd128Register(0),
|
||||
i.InputSimd128Register(1), i.InputSimd128Register(2));
|
||||
break;
|
||||
}
|
||||
case kIA32Word32AtomicPairLoad: {
|
||||
__ movq(kScratchDoubleReg, i.MemoryOperand());
|
||||
__ Pextrd(i.OutputRegister(0), kScratchDoubleReg, 0);
|
||||
|
@ -102,6 +102,7 @@ namespace compiler {
|
||||
V(IA32BitcastFI) \
|
||||
V(IA32BitcastIF) \
|
||||
V(IA32Lea) \
|
||||
V(IA32Pblendvb) \
|
||||
V(IA32Push) \
|
||||
V(IA32Poke) \
|
||||
V(IA32Peek) \
|
||||
|
@ -89,6 +89,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kFloat32Neg:
|
||||
case kIA32BitcastFI:
|
||||
case kIA32BitcastIF:
|
||||
case kIA32Pblendvb:
|
||||
case kIA32F64x2Splat:
|
||||
case kF64x2ExtractLane:
|
||||
case kF64x2ReplaceLane:
|
||||
|
@ -3189,6 +3189,40 @@ void InstructionSelector::VisitF64x2PromoteLowF32x4(Node* node) {
|
||||
VisitRR(this, node, code);
|
||||
}
|
||||
|
||||
namespace {
|
||||
// pblendvb is a correct implementation for all the various relaxed lane select,
|
||||
// see https://github.com/WebAssembly/relaxed-simd/issues/17.
|
||||
void VisitRelaxedLaneSelect(InstructionSelector* selector, Node* node) {
|
||||
IA32OperandGenerator g(selector);
|
||||
// pblendvb copies src2 when mask is set, opposite from Wasm semantics.
|
||||
if (selector->IsSupported(AVX)) {
|
||||
selector->Emit(kIA32Pblendvb, g.DefineAsRegister(node),
|
||||
g.UseRegister(node->InputAt(1)),
|
||||
g.UseRegister(node->InputAt(0)),
|
||||
g.UseRegister(node->InputAt(2)));
|
||||
} else {
|
||||
// SSE4.1 pblendvb requires xmm0 to hold the mask as an implicit operand.
|
||||
selector->Emit(kIA32Pblendvb, g.DefineSameAsFirst(node),
|
||||
g.UseRegister(node->InputAt(1)),
|
||||
g.UseRegister(node->InputAt(0)),
|
||||
g.UseFixed(node->InputAt(2), xmm0));
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void InstructionSelector::VisitI8x16RelaxedLaneSelect(Node* node) {
|
||||
VisitRelaxedLaneSelect(this, node);
|
||||
}
|
||||
void InstructionSelector::VisitI16x8RelaxedLaneSelect(Node* node) {
|
||||
VisitRelaxedLaneSelect(this, node);
|
||||
}
|
||||
void InstructionSelector::VisitI32x4RelaxedLaneSelect(Node* node) {
|
||||
VisitRelaxedLaneSelect(this, node);
|
||||
}
|
||||
void InstructionSelector::VisitI64x2RelaxedLaneSelect(Node* node) {
|
||||
VisitRelaxedLaneSelect(this, node);
|
||||
}
|
||||
|
||||
void InstructionSelector::AddOutputToSelectContinuation(OperandGenerator* g,
|
||||
int first_input_index,
|
||||
Node* node) {
|
||||
|
@ -2790,18 +2790,6 @@ void InstructionSelector::VisitF32x4Qfms(Node* node) { UNIMPLEMENTED(); }
|
||||
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X && !V8_TARGET_ARCH_PPC64
|
||||
|
||||
#if !V8_TARGET_ARCH_X64
|
||||
void InstructionSelector::VisitI8x16RelaxedLaneSelect(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
void InstructionSelector::VisitI16x8RelaxedLaneSelect(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
void InstructionSelector::VisitI32x4RelaxedLaneSelect(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
void InstructionSelector::VisitI64x2RelaxedLaneSelect(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
void InstructionSelector::VisitF32x4RelaxedMin(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitF32x4RelaxedMax(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitF64x2RelaxedMin(Node* node) { UNIMPLEMENTED(); }
|
||||
@ -2820,6 +2808,21 @@ void InstructionSelector::VisitI32x4RelaxedTruncF32x4U(Node* node) {
|
||||
}
|
||||
#endif // !V8_TARGET_ARCH_X64
|
||||
|
||||
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
|
||||
void InstructionSelector::VisitI8x16RelaxedLaneSelect(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
void InstructionSelector::VisitI16x8RelaxedLaneSelect(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
void InstructionSelector::VisitI32x4RelaxedLaneSelect(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
void InstructionSelector::VisitI64x2RelaxedLaneSelect(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
|
||||
|
||||
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
|
||||
|
||||
void InstructionSelector::VisitParameter(Node* node) {
|
||||
|
@ -234,29 +234,7 @@ WASM_RELAXED_SIMD_TEST(F32x4RecipSqrtApprox) {
|
||||
false /* !exact */);
|
||||
}
|
||||
|
||||
#if V8_TARGET_ARCH_X64
|
||||
WASM_RELAXED_SIMD_TEST(I8x16RelaxedSwizzle) {
|
||||
// Output is only defined for indices in the range [0,15].
|
||||
WasmRunner<int32_t> r(execution_tier);
|
||||
static const int kElems = kSimd128Size / sizeof(uint8_t);
|
||||
uint8_t* dst = r.builder().AddGlobal<uint8_t>(kWasmS128);
|
||||
uint8_t* src = r.builder().AddGlobal<uint8_t>(kWasmS128);
|
||||
uint8_t* indices = r.builder().AddGlobal<uint8_t>(kWasmS128);
|
||||
BUILD(r,
|
||||
WASM_GLOBAL_SET(
|
||||
0, WASM_SIMD_BINOP(kExprI8x16RelaxedSwizzle, WASM_GLOBAL_GET(1),
|
||||
WASM_GLOBAL_GET(2))),
|
||||
WASM_ONE);
|
||||
for (int i = 0; i < kElems; i++) {
|
||||
LANE(src, i) = kElems - i - 1;
|
||||
LANE(indices, i) = kElems - i - 1;
|
||||
}
|
||||
CHECK_EQ(1, r.Call());
|
||||
for (int i = 0; i < kElems; i++) {
|
||||
CHECK_EQ(LANE(dst, i), i);
|
||||
}
|
||||
}
|
||||
|
||||
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
|
||||
namespace {
|
||||
// Helper to convert an array of T into an array of uint8_t to be used a v128
|
||||
// constants.
|
||||
@ -335,6 +313,30 @@ WASM_RELAXED_SIMD_TEST(I64x2RelaxedLaneSelect) {
|
||||
RelaxedLaneSelectTest<uint64_t, kElems>(execution_tier, v1, v2, s, expected,
|
||||
kExprI64x2RelaxedLaneSelect);
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
|
||||
|
||||
#if V8_TARGET_ARCH_X64
|
||||
WASM_RELAXED_SIMD_TEST(I8x16RelaxedSwizzle) {
|
||||
// Output is only defined for indices in the range [0,15].
|
||||
WasmRunner<int32_t> r(execution_tier);
|
||||
static const int kElems = kSimd128Size / sizeof(uint8_t);
|
||||
uint8_t* dst = r.builder().AddGlobal<uint8_t>(kWasmS128);
|
||||
uint8_t* src = r.builder().AddGlobal<uint8_t>(kWasmS128);
|
||||
uint8_t* indices = r.builder().AddGlobal<uint8_t>(kWasmS128);
|
||||
BUILD(r,
|
||||
WASM_GLOBAL_SET(
|
||||
0, WASM_SIMD_BINOP(kExprI8x16RelaxedSwizzle, WASM_GLOBAL_GET(1),
|
||||
WASM_GLOBAL_GET(2))),
|
||||
WASM_ONE);
|
||||
for (int i = 0; i < kElems; i++) {
|
||||
LANE(src, i) = kElems - i - 1;
|
||||
LANE(indices, i) = kElems - i - 1;
|
||||
}
|
||||
CHECK_EQ(1, r.Call());
|
||||
for (int i = 0; i < kElems; i++) {
|
||||
CHECK_EQ(LANE(dst, i), i);
|
||||
}
|
||||
}
|
||||
|
||||
WASM_RELAXED_SIMD_TEST(F32x4RelaxedMin) {
|
||||
RunF32x4BinOpTest(execution_tier, kExprF32x4RelaxedMin, Minimum);
|
||||
|
Loading…
Reference in New Issue
Block a user