[wasm-simd][arm64] Bitmask instructions
Implement i8x16.bitmask, i16x8.bitmask, i32x4.bitmask on interpreter and arm64. These operations are behind wasm_simd_post_mvp flag, as we are only prototyping to evaluate performance. The codegen is based on guidance at https://github.com/WebAssembly/simd/pull/201. Bug: v8:10308 Change-Id: I835aa8a23e677a00ee7897c1c31a028850e238a9 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2099451 Reviewed-by: Tobias Tebbi <tebbi@chromium.org> Reviewed-by: Deepti Gandluri <gdeepti@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#66793}
This commit is contained in:
parent
ca5ee9d636
commit
3406cba8fe
@ -2128,6 +2128,21 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
SIMD_BINOP_CASE(kArm64I32x4GtU, Cmhi, 4S);
|
||||
SIMD_BINOP_CASE(kArm64I32x4GeU, Cmhs, 4S);
|
||||
SIMD_UNOP_CASE(kArm64I32x4Abs, Abs, 4S);
|
||||
case kArm64I32x4BitMask: {
|
||||
Register dst = i.OutputRegister32();
|
||||
VRegister src = i.InputSimd128Register(0);
|
||||
VRegister tmp = i.TempSimd128Register(0);
|
||||
VRegister mask = i.TempSimd128Register(1);
|
||||
|
||||
__ Sshr(tmp.V4S(), src.V4S(), 31);
|
||||
// Set i-th bit of each lane i. When AND with tmp, the lanes that
|
||||
// are signed will have i-th bit set, unsigned will be 0.
|
||||
__ Movi(mask.V2D(), 0x0000'0008'0000'0004, 0x0000'0002'0000'0001);
|
||||
__ And(tmp.V16B(), mask.V16B(), tmp.V16B());
|
||||
__ Addv(tmp.S(), tmp.V4S());
|
||||
__ Mov(dst.W(), tmp.V4S(), 0);
|
||||
break;
|
||||
}
|
||||
case kArm64I16x8Splat: {
|
||||
__ Dup(i.OutputSimd128Register().V8H(), i.InputRegister32(0));
|
||||
break;
|
||||
@ -2229,6 +2244,21 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
SIMD_BINOP_CASE(kArm64I16x8GeU, Cmhs, 8H);
|
||||
SIMD_BINOP_CASE(kArm64I16x8RoundingAverageU, Urhadd, 8H);
|
||||
SIMD_UNOP_CASE(kArm64I16x8Abs, Abs, 8H);
|
||||
case kArm64I16x8BitMask: {
|
||||
Register dst = i.OutputRegister32();
|
||||
VRegister src = i.InputSimd128Register(0);
|
||||
VRegister tmp = i.TempSimd128Register(0);
|
||||
VRegister mask = i.TempSimd128Register(1);
|
||||
|
||||
__ Sshr(tmp.V8H(), src.V8H(), 15);
|
||||
// Set i-th bit of each lane i. When AND with tmp, the lanes that
|
||||
// are signed will have i-th bit set, unsigned will be 0.
|
||||
__ Movi(mask.V2D(), 0x0080'0040'0020'0010, 0x0008'0004'0002'0001);
|
||||
__ And(tmp.V16B(), mask.V16B(), tmp.V16B());
|
||||
__ Addv(tmp.H(), tmp.V8H());
|
||||
__ Mov(dst.W(), tmp.V8H(), 0);
|
||||
break;
|
||||
}
|
||||
case kArm64I8x16Splat: {
|
||||
__ Dup(i.OutputSimd128Register().V16B(), i.InputRegister32(0));
|
||||
break;
|
||||
@ -2318,6 +2348,23 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
SIMD_BINOP_CASE(kArm64I8x16GeU, Cmhs, 16B);
|
||||
SIMD_BINOP_CASE(kArm64I8x16RoundingAverageU, Urhadd, 16B);
|
||||
SIMD_UNOP_CASE(kArm64I8x16Abs, Abs, 16B);
|
||||
case kArm64I8x16BitMask: {
|
||||
Register dst = i.OutputRegister32();
|
||||
VRegister src = i.InputSimd128Register(0);
|
||||
VRegister tmp = i.TempSimd128Register(0);
|
||||
VRegister mask = i.TempSimd128Register(1);
|
||||
|
||||
// Set i-th bit of each lane i. When AND with tmp, the lanes that
|
||||
// are signed will have i-th bit set, unsigned will be 0.
|
||||
__ Sshr(tmp.V16B(), src.V16B(), 7);
|
||||
__ Movi(mask.V2D(), 0x8040'2010'0804'0201);
|
||||
__ And(tmp.V16B(), mask.V16B(), tmp.V16B());
|
||||
__ Ext(mask.V16B(), tmp.V16B(), tmp.V16B(), 8);
|
||||
__ Zip1(tmp.V16B(), tmp.V16B(), mask.V16B());
|
||||
__ Addv(tmp.H(), tmp.V8H());
|
||||
__ Mov(dst.W(), tmp.V8H(), 0);
|
||||
break;
|
||||
}
|
||||
case kArm64S128Zero: {
|
||||
__ Movi(i.OutputSimd128Register().V16B(), 0);
|
||||
break;
|
||||
|
@ -253,6 +253,7 @@ namespace compiler {
|
||||
V(Arm64I32x4GtU) \
|
||||
V(Arm64I32x4GeU) \
|
||||
V(Arm64I32x4Abs) \
|
||||
V(Arm64I32x4BitMask) \
|
||||
V(Arm64I16x8Splat) \
|
||||
V(Arm64I16x8ExtractLaneU) \
|
||||
V(Arm64I16x8ExtractLaneS) \
|
||||
@ -287,6 +288,7 @@ namespace compiler {
|
||||
V(Arm64I16x8GeU) \
|
||||
V(Arm64I16x8RoundingAverageU) \
|
||||
V(Arm64I16x8Abs) \
|
||||
V(Arm64I16x8BitMask) \
|
||||
V(Arm64I8x16Splat) \
|
||||
V(Arm64I8x16ExtractLaneU) \
|
||||
V(Arm64I8x16ExtractLaneS) \
|
||||
@ -316,6 +318,7 @@ namespace compiler {
|
||||
V(Arm64I8x16GeU) \
|
||||
V(Arm64I8x16RoundingAverageU) \
|
||||
V(Arm64I8x16Abs) \
|
||||
V(Arm64I8x16BitMask) \
|
||||
V(Arm64S128Zero) \
|
||||
V(Arm64S128Dup) \
|
||||
V(Arm64S128And) \
|
||||
|
@ -223,6 +223,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kArm64I32x4GtU:
|
||||
case kArm64I32x4GeU:
|
||||
case kArm64I32x4Abs:
|
||||
case kArm64I32x4BitMask:
|
||||
case kArm64I16x8Splat:
|
||||
case kArm64I16x8ExtractLaneU:
|
||||
case kArm64I16x8ExtractLaneS:
|
||||
@ -257,6 +258,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kArm64I16x8GeU:
|
||||
case kArm64I16x8RoundingAverageU:
|
||||
case kArm64I16x8Abs:
|
||||
case kArm64I16x8BitMask:
|
||||
case kArm64I8x16Splat:
|
||||
case kArm64I8x16ExtractLaneU:
|
||||
case kArm64I8x16ExtractLaneS:
|
||||
@ -286,6 +288,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kArm64I8x16GeU:
|
||||
case kArm64I8x16RoundingAverageU:
|
||||
case kArm64I8x16Abs:
|
||||
case kArm64I8x16BitMask:
|
||||
case kArm64S128Zero:
|
||||
case kArm64S128Dup:
|
||||
case kArm64S128And:
|
||||
|
@ -3367,6 +3367,29 @@ VISIT_SIMD_QFMOP(F32x4Qfma)
|
||||
VISIT_SIMD_QFMOP(F32x4Qfms)
|
||||
#undef VISIT_SIMD_QFMOP
|
||||
|
||||
namespace {
|
||||
template <ArchOpcode opcode>
|
||||
void VisitBitMask(InstructionSelector* selector, Node* node) {
|
||||
Arm64OperandGenerator g(selector);
|
||||
InstructionOperand temps[] = {g.TempSimd128Register(),
|
||||
g.TempSimd128Register()};
|
||||
selector->Emit(opcode, g.DefineAsRegister(node),
|
||||
g.UseRegister(node->InputAt(0)), arraysize(temps), temps);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void InstructionSelector::VisitI8x16BitMask(Node* node) {
|
||||
VisitBitMask<kArm64I8x16BitMask>(this, node);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI16x8BitMask(Node* node) {
|
||||
VisitBitMask<kArm64I16x8BitMask>(this, node);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI32x4BitMask(Node* node) {
|
||||
VisitBitMask<kArm64I32x4BitMask>(this, node);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
struct ShuffleEntry {
|
||||
|
@ -2025,6 +2025,8 @@ void InstructionSelector::VisitNode(Node* node) {
|
||||
return MarkAsSimd128(node), VisitI32x4GeU(node);
|
||||
case IrOpcode::kI32x4Abs:
|
||||
return MarkAsSimd128(node), VisitI32x4Abs(node);
|
||||
case IrOpcode::kI32x4BitMask:
|
||||
return MarkAsWord32(node), VisitI32x4BitMask(node);
|
||||
case IrOpcode::kI16x8Splat:
|
||||
return MarkAsSimd128(node), VisitI16x8Splat(node);
|
||||
case IrOpcode::kI16x8ExtractLaneU:
|
||||
@ -2093,6 +2095,8 @@ void InstructionSelector::VisitNode(Node* node) {
|
||||
return MarkAsSimd128(node), VisitI16x8RoundingAverageU(node);
|
||||
case IrOpcode::kI16x8Abs:
|
||||
return MarkAsSimd128(node), VisitI16x8Abs(node);
|
||||
case IrOpcode::kI16x8BitMask:
|
||||
return MarkAsWord32(node), VisitI16x8BitMask(node);
|
||||
case IrOpcode::kI8x16Splat:
|
||||
return MarkAsSimd128(node), VisitI8x16Splat(node);
|
||||
case IrOpcode::kI8x16ExtractLaneU:
|
||||
@ -2151,6 +2155,8 @@ void InstructionSelector::VisitNode(Node* node) {
|
||||
return MarkAsSimd128(node), VisitI8x16RoundingAverageU(node);
|
||||
case IrOpcode::kI8x16Abs:
|
||||
return MarkAsSimd128(node), VisitI8x16Abs(node);
|
||||
case IrOpcode::kI8x16BitMask:
|
||||
return MarkAsWord32(node), VisitI8x16BitMask(node);
|
||||
case IrOpcode::kS128Zero:
|
||||
return MarkAsSimd128(node), VisitS128Zero(node);
|
||||
case IrOpcode::kS128And:
|
||||
@ -2628,6 +2634,12 @@ void InstructionSelector::VisitI64x2MinU(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitI64x2MaxU(Node* node) { UNIMPLEMENTED(); }
|
||||
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X
|
||||
|
||||
#if !V8_TARGET_ARCH_ARM64
|
||||
void InstructionSelector::VisitI8x16BitMask(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitI16x8BitMask(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitI32x4BitMask(Node* node) { UNIMPLEMENTED(); }
|
||||
#endif // !V8_TARGET_ARCH_ARM64
|
||||
|
||||
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
|
||||
|
||||
void InstructionSelector::VisitParameter(Node* node) {
|
||||
|
@ -390,6 +390,7 @@ MachineType AtomicOpType(Operator const* op) {
|
||||
V(I32x4GtU, Operator::kNoProperties, 2, 0, 1) \
|
||||
V(I32x4GeU, Operator::kNoProperties, 2, 0, 1) \
|
||||
V(I32x4Abs, Operator::kNoProperties, 1, 0, 1) \
|
||||
V(I32x4BitMask, Operator::kNoProperties, 1, 0, 1) \
|
||||
V(I16x8Splat, Operator::kNoProperties, 1, 0, 1) \
|
||||
V(I16x8SConvertI8x16Low, Operator::kNoProperties, 1, 0, 1) \
|
||||
V(I16x8SConvertI8x16High, Operator::kNoProperties, 1, 0, 1) \
|
||||
@ -421,6 +422,7 @@ MachineType AtomicOpType(Operator const* op) {
|
||||
V(I16x8GeU, Operator::kNoProperties, 2, 0, 1) \
|
||||
V(I16x8RoundingAverageU, Operator::kCommutative, 2, 0, 1) \
|
||||
V(I16x8Abs, Operator::kNoProperties, 1, 0, 1) \
|
||||
V(I16x8BitMask, Operator::kNoProperties, 1, 0, 1) \
|
||||
V(I8x16Splat, Operator::kNoProperties, 1, 0, 1) \
|
||||
V(I8x16Neg, Operator::kNoProperties, 1, 0, 1) \
|
||||
V(I8x16Shl, Operator::kNoProperties, 2, 0, 1) \
|
||||
@ -447,6 +449,7 @@ MachineType AtomicOpType(Operator const* op) {
|
||||
V(I8x16GeU, Operator::kNoProperties, 2, 0, 1) \
|
||||
V(I8x16RoundingAverageU, Operator::kCommutative, 2, 0, 1) \
|
||||
V(I8x16Abs, Operator::kNoProperties, 1, 0, 1) \
|
||||
V(I8x16BitMask, Operator::kNoProperties, 1, 0, 1) \
|
||||
V(S128Load, Operator::kNoProperties, 2, 0, 1) \
|
||||
V(S128Store, Operator::kNoProperties, 3, 0, 1) \
|
||||
V(S128Zero, Operator::kNoProperties, 0, 0, 1) \
|
||||
|
@ -630,6 +630,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
|
||||
const Operator* I32x4GtU();
|
||||
const Operator* I32x4GeU();
|
||||
const Operator* I32x4Abs();
|
||||
const Operator* I32x4BitMask();
|
||||
|
||||
const Operator* I16x8Splat();
|
||||
const Operator* I16x8ExtractLaneU(int32_t);
|
||||
@ -666,6 +667,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
|
||||
const Operator* I16x8GeU();
|
||||
const Operator* I16x8RoundingAverageU();
|
||||
const Operator* I16x8Abs();
|
||||
const Operator* I16x8BitMask();
|
||||
|
||||
const Operator* I8x16Splat();
|
||||
const Operator* I8x16ExtractLaneU(int32_t);
|
||||
@ -697,6 +699,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
|
||||
const Operator* I8x16GeU();
|
||||
const Operator* I8x16RoundingAverageU();
|
||||
const Operator* I8x16Abs();
|
||||
const Operator* I8x16BitMask();
|
||||
|
||||
const Operator* S128Load();
|
||||
const Operator* S128Store();
|
||||
|
@ -840,6 +840,7 @@
|
||||
V(I32x4GtU) \
|
||||
V(I32x4GeU) \
|
||||
V(I32x4Abs) \
|
||||
V(I32x4BitMask) \
|
||||
V(I16x8Splat) \
|
||||
V(I16x8ExtractLaneU) \
|
||||
V(I16x8ExtractLaneS) \
|
||||
@ -878,6 +879,7 @@
|
||||
V(I16x8GeU) \
|
||||
V(I16x8RoundingAverageU) \
|
||||
V(I16x8Abs) \
|
||||
V(I16x8BitMask) \
|
||||
V(I8x16Splat) \
|
||||
V(I8x16ExtractLaneU) \
|
||||
V(I8x16ExtractLaneS) \
|
||||
@ -911,6 +913,7 @@
|
||||
V(I8x16GeU) \
|
||||
V(I8x16RoundingAverageU) \
|
||||
V(I8x16Abs) \
|
||||
V(I8x16BitMask) \
|
||||
V(S128Load) \
|
||||
V(S128Store) \
|
||||
V(S128Zero) \
|
||||
|
@ -4368,6 +4368,8 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
|
||||
inputs[1]);
|
||||
case wasm::kExprI32x4Abs:
|
||||
return graph()->NewNode(mcgraph()->machine()->I32x4Abs(), inputs[0]);
|
||||
case wasm::kExprI32x4BitMask:
|
||||
return graph()->NewNode(mcgraph()->machine()->I32x4BitMask(), inputs[0]);
|
||||
case wasm::kExprI16x8Splat:
|
||||
return graph()->NewNode(mcgraph()->machine()->I16x8Splat(), inputs[0]);
|
||||
case wasm::kExprI16x8SConvertI8x16Low:
|
||||
@ -4470,6 +4472,8 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
|
||||
inputs[0], inputs[1]);
|
||||
case wasm::kExprI16x8Abs:
|
||||
return graph()->NewNode(mcgraph()->machine()->I16x8Abs(), inputs[0]);
|
||||
case wasm::kExprI16x8BitMask:
|
||||
return graph()->NewNode(mcgraph()->machine()->I16x8BitMask(), inputs[0]);
|
||||
case wasm::kExprI8x16Splat:
|
||||
return graph()->NewNode(mcgraph()->machine()->I8x16Splat(), inputs[0]);
|
||||
case wasm::kExprI8x16Neg:
|
||||
@ -4557,6 +4561,8 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
|
||||
inputs[0], inputs[1]);
|
||||
case wasm::kExprI8x16Abs:
|
||||
return graph()->NewNode(mcgraph()->machine()->I8x16Abs(), inputs[0]);
|
||||
case wasm::kExprI8x16BitMask:
|
||||
return graph()->NewNode(mcgraph()->machine()->I8x16BitMask(), inputs[0]);
|
||||
case wasm::kExprS128And:
|
||||
return graph()->NewNode(mcgraph()->machine()->S128And(), inputs[0],
|
||||
inputs[1]);
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include "src/wasm/wasm-limits.h"
|
||||
#include "src/wasm/wasm-module.h"
|
||||
#include "src/wasm/wasm-objects-inl.h"
|
||||
#include "src/wasm/wasm-opcodes.h"
|
||||
#include "src/zone/accounting-allocator.h"
|
||||
#include "src/zone/zone-containers.h"
|
||||
|
||||
@ -2379,6 +2380,26 @@ class ThreadImpl {
|
||||
UNOP_CASE(I8x16Neg, i8x16, int16, 16, base::NegateWithWraparound(a))
|
||||
UNOP_CASE(I8x16Abs, i8x16, int16, 16, std::abs(a))
|
||||
#undef UNOP_CASE
|
||||
|
||||
// Cast to double in call to signbit is due to MSCV issue, see
|
||||
// https://github.com/microsoft/STL/issues/519.
|
||||
#define BITMASK_CASE(op, name, stype, count) \
|
||||
case kExpr##op: { \
|
||||
WasmValue v = Pop(); \
|
||||
stype s = v.to_s128().to_##name(); \
|
||||
int32_t res = 0; \
|
||||
for (size_t i = 0; i < count; ++i) { \
|
||||
bool sign = std::signbit(static_cast<double>(s.val[i])); \
|
||||
res |= (sign << i); \
|
||||
} \
|
||||
Push(WasmValue(res)); \
|
||||
return true; \
|
||||
}
|
||||
BITMASK_CASE(I8x16BitMask, i8x16, int16, 16)
|
||||
BITMASK_CASE(I16x8BitMask, i16x8, int8, 8)
|
||||
BITMASK_CASE(I32x4BitMask, i32x4, int4, 4)
|
||||
#undef BITMASK_CASE
|
||||
|
||||
#define CMPOP_CASE(op, name, stype, out_stype, count, expr) \
|
||||
case kExpr##op: { \
|
||||
WasmValue v2 = Pop(); \
|
||||
|
@ -317,6 +317,10 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
|
||||
CASE_I16x8_OP(Abs, "abs")
|
||||
CASE_I32x4_OP(Abs, "abs")
|
||||
|
||||
CASE_I8x16_OP(BitMask, "bitmask")
|
||||
CASE_I16x8_OP(BitMask, "bitmask")
|
||||
CASE_I32x4_OP(BitMask, "bitmask")
|
||||
|
||||
// Atomic operations.
|
||||
CASE_OP(AtomicNotify, "atomic.notify")
|
||||
CASE_INT_OP(AtomicWait, "atomic.wait")
|
||||
|
@ -453,6 +453,9 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, const WasmFeatures&);
|
||||
V(I16x8AddHoriz, 0xfdbd, s_ss) \
|
||||
V(I32x4AddHoriz, 0xfdbe, s_ss) \
|
||||
V(F32x4AddHoriz, 0xfdbf, s_ss) \
|
||||
V(I8x16BitMask, 0xfde4, i_s) \
|
||||
V(I16x8BitMask, 0xfde5, i_s) \
|
||||
V(I32x4BitMask, 0xfde6, i_s) \
|
||||
V(F32x4RecipApprox, 0xfdee, s_s) \
|
||||
V(F32x4RecipSqrtApprox, 0xfdef, s_s)
|
||||
|
||||
|
@ -1658,6 +1658,68 @@ WASM_SIMD_TEST(I16x8ReplaceLane) {
|
||||
}
|
||||
}
|
||||
|
||||
#if V8_TARGET_ARCH_ARM64
|
||||
WASM_SIMD_TEST_NO_LOWERING(I8x16BitMask) {
|
||||
FLAG_SCOPE(wasm_simd_post_mvp);
|
||||
WasmRunner<int32_t, int32_t> r(execution_tier, lower_simd);
|
||||
byte value1 = r.AllocateLocal(kWasmS128);
|
||||
|
||||
BUILD(r, WASM_SET_LOCAL(value1, WASM_SIMD_I8x16_SPLAT(WASM_GET_LOCAL(0))),
|
||||
WASM_SET_LOCAL(value1, WASM_SIMD_I8x16_REPLACE_LANE(
|
||||
0, WASM_GET_LOCAL(value1), WASM_I32V(0))),
|
||||
WASM_SET_LOCAL(value1, WASM_SIMD_I8x16_REPLACE_LANE(
|
||||
1, WASM_GET_LOCAL(value1), WASM_I32V(-1))),
|
||||
WASM_SIMD_UNOP(kExprI8x16BitMask, WASM_GET_LOCAL(value1)));
|
||||
|
||||
FOR_INT8_INPUTS(x) {
|
||||
int32_t actual = r.Call(x);
|
||||
// Lane 0 is always 0 (positive), lane 1 is always -1.
|
||||
int32_t expected = std::signbit(x) ? 0xFFFE : 0x0002;
|
||||
CHECK_EQ(actual, expected);
|
||||
}
|
||||
}
|
||||
|
||||
WASM_SIMD_TEST_NO_LOWERING(I16x8BitMask) {
|
||||
FLAG_SCOPE(wasm_simd_post_mvp);
|
||||
WasmRunner<int32_t, int32_t> r(execution_tier, lower_simd);
|
||||
byte value1 = r.AllocateLocal(kWasmS128);
|
||||
|
||||
BUILD(r, WASM_SET_LOCAL(value1, WASM_SIMD_I16x8_SPLAT(WASM_GET_LOCAL(0))),
|
||||
WASM_SET_LOCAL(value1, WASM_SIMD_I16x8_REPLACE_LANE(
|
||||
0, WASM_GET_LOCAL(value1), WASM_I32V(0))),
|
||||
WASM_SET_LOCAL(value1, WASM_SIMD_I16x8_REPLACE_LANE(
|
||||
1, WASM_GET_LOCAL(value1), WASM_I32V(-1))),
|
||||
WASM_SIMD_UNOP(kExprI16x8BitMask, WASM_GET_LOCAL(value1)));
|
||||
|
||||
FOR_INT16_INPUTS(x) {
|
||||
int32_t actual = r.Call(x);
|
||||
// Lane 0 is always 0 (positive), lane 1 is always -1.
|
||||
int32_t expected = std::signbit(x) ? 0xFE : 2;
|
||||
CHECK_EQ(actual, expected);
|
||||
}
|
||||
}
|
||||
|
||||
WASM_SIMD_TEST_NO_LOWERING(I32x4BitMask) {
|
||||
FLAG_SCOPE(wasm_simd_post_mvp);
|
||||
WasmRunner<int32_t, int32_t> r(execution_tier, lower_simd);
|
||||
byte value1 = r.AllocateLocal(kWasmS128);
|
||||
|
||||
BUILD(r, WASM_SET_LOCAL(value1, WASM_SIMD_I32x4_SPLAT(WASM_GET_LOCAL(0))),
|
||||
WASM_SET_LOCAL(value1, WASM_SIMD_I32x4_REPLACE_LANE(
|
||||
0, WASM_GET_LOCAL(value1), WASM_I32V(0))),
|
||||
WASM_SET_LOCAL(value1, WASM_SIMD_I32x4_REPLACE_LANE(
|
||||
1, WASM_GET_LOCAL(value1), WASM_I32V(-1))),
|
||||
WASM_SIMD_UNOP(kExprI32x4BitMask, WASM_GET_LOCAL(value1)));
|
||||
|
||||
FOR_INT32_INPUTS(x) {
|
||||
int32_t actual = r.Call(x);
|
||||
// Lane 0 is always 0 (positive), lane 1 is always -1.
|
||||
int32_t expected = std::signbit(x) ? 0xE : 2;
|
||||
CHECK_EQ(actual, expected);
|
||||
}
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_ARM64
|
||||
|
||||
WASM_SIMD_TEST(I8x16Splat) {
|
||||
WasmRunner<int32_t, int32_t> r(execution_tier, lower_simd);
|
||||
// Set up a global to hold output vector.
|
||||
|
Loading…
Reference in New Issue
Block a user