[wasm-simd][x64] Implement integer absolute on x64 and interpreter

Implements i8x16.abs, i16x8.abs, and i32x4.abs.

Bug: v8:10233
Change-Id: Iefe3c70bdc229c6da6a0ef07273ca654ca1e937e
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2063200
Reviewed-by: Tobias Tebbi <tebbi@chromium.org>
Reviewed-by: Deepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#66440}
This commit is contained in:
Ng Zhi An 2020-02-21 04:14:53 +00:00 committed by Commit Bot
parent a67a16aae4
commit fa350b5bf4
14 changed files with 83 additions and 3 deletions

View File

@ -229,6 +229,9 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP_SSSE3(Pshufb, pshufb)
AVX_OP_SSSE3(Psignd, psignd)
AVX_OP_SSSE3(Palignr, palignr)
AVX_OP_SSSE3(Pabsb, pabsb)
AVX_OP_SSSE3(Pabsw, pabsw)
AVX_OP_SSSE3(Pabsd, pabsd)
AVX_OP_SSE4_1(Pcmpeqq, pcmpeqq)
AVX_OP_SSE4_1(Pmulld, pmulld)
AVX_OP_SSE4_1(Pminsd, pminsd)

View File

@ -2026,6 +2026,8 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitI32x4GtU(node);
case IrOpcode::kI32x4GeU:
return MarkAsSimd128(node), VisitI32x4GeU(node);
case IrOpcode::kI32x4Abs:
return MarkAsSimd128(node), VisitI32x4Abs(node);
case IrOpcode::kI16x8Splat:
return MarkAsSimd128(node), VisitI16x8Splat(node);
case IrOpcode::kI16x8ExtractLaneU:
@ -2092,6 +2094,8 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitI16x8GeU(node);
case IrOpcode::kI16x8RoundingAverageU:
return MarkAsSimd128(node), VisitI16x8RoundingAverageU(node);
case IrOpcode::kI16x8Abs:
return MarkAsSimd128(node), VisitI16x8Abs(node);
case IrOpcode::kI8x16Splat:
return MarkAsSimd128(node), VisitI8x16Splat(node);
case IrOpcode::kI8x16ExtractLaneU:
@ -2148,6 +2152,8 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitI8x16GeU(node);
case IrOpcode::kI8x16RoundingAverageU:
return MarkAsSimd128(node), VisitI8x16RoundingAverageU(node);
case IrOpcode::kI8x16Abs:
return MarkAsSimd128(node), VisitI8x16Abs(node);
case IrOpcode::kS128Zero:
return MarkAsSimd128(node), VisitS128Zero(node);
case IrOpcode::kS128And:
@ -2621,6 +2627,9 @@ void InstructionSelector::VisitI64x2ReplaceLaneI32Pair(Node* node) {
#endif // !V8_TARGET_ARCH_IA32
#if !V8_TARGET_ARCH_X64
void InstructionSelector::VisitI8x16Abs(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI16x8Abs(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI32x4Abs(Node* node) { UNIMPLEMENTED(); }
#if !V8_TARGET_ARCH_ARM64
void InstructionSelector::VisitI64x2Splat(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2ExtractLane(Node* node) { UNIMPLEMENTED(); }
@ -3235,7 +3244,6 @@ bool InstructionSelector::NeedsPoisoning(IsSafetyCheck safety_check) const {
}
UNREACHABLE();
}
} // namespace compiler
} // namespace internal
} // namespace v8

View File

@ -3087,6 +3087,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Pcmpeqd(dst, src);
break;
}
case kX64I32x4Abs: {
__ Pabsd(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kX64S128Zero: {
XMMRegister dst = i.OutputSimd128Register();
__ xorps(dst, dst);
@ -3286,6 +3290,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Pavgw(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64I16x8Abs: {
__ Pabsw(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kX64I8x16Splat: {
CpuFeatureScope sse_scope(tasm(), SSSE3);
XMMRegister dst = i.OutputSimd128Register();
@ -3529,6 +3537,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Pavgb(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64I8x16Abs: {
__ Pabsb(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kX64S128And: {
__ pand(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;

View File

@ -240,6 +240,7 @@ namespace compiler {
V(X64I32x4MaxU) \
V(X64I32x4GtU) \
V(X64I32x4GeU) \
V(X64I32x4Abs) \
V(X64I16x8Splat) \
V(X64I16x8ExtractLaneU) \
V(X64I16x8ExtractLaneS) \
@ -273,6 +274,7 @@ namespace compiler {
V(X64I16x8GtU) \
V(X64I16x8GeU) \
V(X64I16x8RoundingAverageU) \
V(X64I16x8Abs) \
V(X64I8x16Splat) \
V(X64I8x16ExtractLaneU) \
V(X64I8x16ExtractLaneS) \
@ -301,6 +303,7 @@ namespace compiler {
V(X64I8x16GtU) \
V(X64I8x16GeU) \
V(X64I8x16RoundingAverageU) \
V(X64I8x16Abs) \
V(X64S128Zero) \
V(X64S128Not) \
V(X64S128And) \

View File

@ -212,6 +212,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64I32x4MaxU:
case kX64I32x4GtU:
case kX64I32x4GeU:
case kX64I32x4Abs:
case kX64I16x8Splat:
case kX64I16x8ExtractLaneU:
case kX64I16x8ExtractLaneS:
@ -245,6 +246,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64I16x8GtU:
case kX64I16x8GeU:
case kX64I16x8RoundingAverageU:
case kX64I16x8Abs:
case kX64I8x16Splat:
case kX64I8x16ExtractLaneU:
case kX64I8x16ExtractLaneS:
@ -273,6 +275,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64I8x16GtU:
case kX64I8x16GeU:
case kX64I8x16RoundingAverageU:
case kX64I8x16Abs:
case kX64S128And:
case kX64S128Or:
case kX64S128Xor:

View File

@ -2729,12 +2729,15 @@ VISIT_ATOMIC_BINOP(Xor)
V(I32x4Neg) \
V(I32x4UConvertI16x8Low) \
V(I32x4UConvertI16x8High) \
V(I32x4Abs) \
V(I16x8SConvertI8x16Low) \
V(I16x8SConvertI8x16High) \
V(I16x8Neg) \
V(I16x8UConvertI8x16Low) \
V(I16x8UConvertI8x16High) \
V(I16x8Abs) \
V(I8x16Neg) \
V(I8x16Abs) \
V(S128Not)
#define SIMD_SHIFT_OPCODES(V) \

View File

@ -389,6 +389,7 @@ MachineType AtomicOpType(Operator const* op) {
V(I32x4MaxU, Operator::kCommutative, 2, 0, 1) \
V(I32x4GtU, Operator::kNoProperties, 2, 0, 1) \
V(I32x4GeU, Operator::kNoProperties, 2, 0, 1) \
V(I32x4Abs, Operator::kNoProperties, 1, 0, 1) \
V(I16x8Splat, Operator::kNoProperties, 1, 0, 1) \
V(I16x8SConvertI8x16Low, Operator::kNoProperties, 1, 0, 1) \
V(I16x8SConvertI8x16High, Operator::kNoProperties, 1, 0, 1) \
@ -419,6 +420,7 @@ MachineType AtomicOpType(Operator const* op) {
V(I16x8GtU, Operator::kNoProperties, 2, 0, 1) \
V(I16x8GeU, Operator::kNoProperties, 2, 0, 1) \
V(I16x8RoundingAverageU, Operator::kCommutative, 2, 0, 1) \
V(I16x8Abs, Operator::kNoProperties, 1, 0, 1) \
V(I8x16Splat, Operator::kNoProperties, 1, 0, 1) \
V(I8x16Neg, Operator::kNoProperties, 1, 0, 1) \
V(I8x16Shl, Operator::kNoProperties, 2, 0, 1) \
@ -444,6 +446,7 @@ MachineType AtomicOpType(Operator const* op) {
V(I8x16GtU, Operator::kNoProperties, 2, 0, 1) \
V(I8x16GeU, Operator::kNoProperties, 2, 0, 1) \
V(I8x16RoundingAverageU, Operator::kCommutative, 2, 0, 1) \
V(I8x16Abs, Operator::kNoProperties, 1, 0, 1) \
V(S128Load, Operator::kNoProperties, 2, 0, 1) \
V(S128Store, Operator::kNoProperties, 3, 0, 1) \
V(S128Zero, Operator::kNoProperties, 0, 0, 1) \

View File

@ -629,6 +629,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* I32x4MaxU();
const Operator* I32x4GtU();
const Operator* I32x4GeU();
const Operator* I32x4Abs();
const Operator* I16x8Splat();
const Operator* I16x8ExtractLaneU(int32_t);
@ -664,6 +665,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* I16x8GtU();
const Operator* I16x8GeU();
const Operator* I16x8RoundingAverageU();
const Operator* I16x8Abs();
const Operator* I8x16Splat();
const Operator* I8x16ExtractLaneU(int32_t);
@ -694,6 +696,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* I8x16GtU();
const Operator* I8x16GeU();
const Operator* I8x16RoundingAverageU();
const Operator* I8x16Abs();
const Operator* S128Load();
const Operator* S128Store();

View File

@ -838,6 +838,7 @@
V(I32x4LeU) \
V(I32x4GtU) \
V(I32x4GeU) \
V(I32x4Abs) \
V(I16x8Splat) \
V(I16x8ExtractLaneU) \
V(I16x8ExtractLaneS) \
@ -875,6 +876,7 @@
V(I16x8GtU) \
V(I16x8GeU) \
V(I16x8RoundingAverageU) \
V(I16x8Abs) \
V(I8x16Splat) \
V(I8x16ExtractLaneU) \
V(I8x16ExtractLaneS) \
@ -907,6 +909,7 @@
V(I8x16GtU) \
V(I8x16GeU) \
V(I8x16RoundingAverageU) \
V(I8x16Abs) \
V(S128Load) \
V(S128Store) \
V(S128Zero) \

View File

@ -4357,6 +4357,8 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
case wasm::kExprI32x4GeU:
return graph()->NewNode(mcgraph()->machine()->I32x4GeU(), inputs[0],
inputs[1]);
case wasm::kExprI32x4Abs:
return graph()->NewNode(mcgraph()->machine()->I32x4Abs(), inputs[0]);
case wasm::kExprI16x8Splat:
return graph()->NewNode(mcgraph()->machine()->I16x8Splat(), inputs[0]);
case wasm::kExprI16x8SConvertI8x16Low:
@ -4457,6 +4459,8 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
case wasm::kExprI16x8RoundingAverageU:
return graph()->NewNode(mcgraph()->machine()->I16x8RoundingAverageU(),
inputs[0], inputs[1]);
case wasm::kExprI16x8Abs:
return graph()->NewNode(mcgraph()->machine()->I16x8Abs(), inputs[0]);
case wasm::kExprI8x16Splat:
return graph()->NewNode(mcgraph()->machine()->I8x16Splat(), inputs[0]);
case wasm::kExprI8x16Neg:
@ -4542,6 +4546,8 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
case wasm::kExprI8x16RoundingAverageU:
return graph()->NewNode(mcgraph()->machine()->I8x16RoundingAverageU(),
inputs[0], inputs[1]);
case wasm::kExprI8x16Abs:
return graph()->NewNode(mcgraph()->machine()->I8x16Abs(), inputs[0]);
case wasm::kExprS128And:
return graph()->NewNode(mcgraph()->machine()->S128And(), inputs[0],
inputs[1]);

View File

@ -2373,9 +2373,12 @@ class ThreadImpl {
UNOP_CASE(F32x4RecipSqrtApprox, f32x4, float4, 4, base::RecipSqrt(a))
UNOP_CASE(I64x2Neg, i64x2, int2, 2, base::NegateWithWraparound(a))
UNOP_CASE(I32x4Neg, i32x4, int4, 4, base::NegateWithWraparound(a))
UNOP_CASE(I32x4Abs, i32x4, int4, 4, std::abs(a))
UNOP_CASE(S128Not, i32x4, int4, 4, ~a)
UNOP_CASE(I16x8Neg, i16x8, int8, 8, base::NegateWithWraparound(a))
UNOP_CASE(I16x8Abs, i16x8, int8, 8, std::abs(a))
UNOP_CASE(I8x16Neg, i8x16, int16, 16, base::NegateWithWraparound(a))
UNOP_CASE(I8x16Abs, i8x16, int16, 16, std::abs(a))
#undef UNOP_CASE
#define CMPOP_CASE(op, name, stype, out_stype, count, expr) \
case kExpr##op: { \

View File

@ -337,6 +337,10 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_I8x16_OP(RoundingAverageU, "avgr_u")
CASE_I16x8_OP(RoundingAverageU, "avgr_u")
CASE_I8x16_OP(Abs, "abs")
CASE_I16x8_OP(Abs, "abs")
CASE_I32x4_OP(Abs, "abs")
// Atomic operations.
CASE_OP(AtomicNotify, "atomic.notify")
CASE_INT_OP(AtomicWait, "atomic.wait")

View File

@ -448,8 +448,11 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, const WasmFeatures&);
V(I16x8AddHoriz, 0xfdbd, s_ss) \
V(I32x4AddHoriz, 0xfdbe, s_ss) \
V(F32x4AddHoriz, 0xfdbf, s_ss) \
V(F32x4RecipApprox, 0xfde0, s_s) \
V(F32x4RecipSqrtApprox, 0xfde1, s_s)
V(I8x16Abs, 0xfde1, s_s) \
V(I16x8Abs, 0xfde2, s_s) \
V(I32x4Abs, 0xfde3, s_s) \
V(F32x4RecipApprox, 0xfdee, s_s) \
V(F32x4RecipSqrtApprox, 0xfdef, s_s)
#define FOREACH_SIMD_1_OPERAND_1_PARAM_OPCODE(V) \
V(I8x16ExtractLaneS, 0xfd05, _) \

View File

@ -288,6 +288,11 @@ T AndNot(T a, T b) {
return a & ~b;
}
template <typename T>
T Abs(T a) {
return std::abs(a);
}
// only used for F64x2 tests below
int64_t Equal(double a, double b) { return a == b ? -1 : 0; }
@ -1819,6 +1824,12 @@ WASM_SIMD_TEST(I32x4Neg) {
base::NegateWithWraparound);
}
#if V8_TARGET_ARCH_X64
WASM_SIMD_TEST_NO_LOWERING(I32x4Abs) {
RunI32x4UnOpTest(execution_tier, lower_simd, kExprI32x4Abs, Abs);
}
#endif
WASM_SIMD_TEST(S128Not) {
RunI32x4UnOpTest(execution_tier, lower_simd, kExprS128Not, Not);
}
@ -2078,6 +2089,12 @@ WASM_SIMD_TEST(I16x8Neg) {
base::NegateWithWraparound);
}
#if V8_TARGET_ARCH_X64
WASM_SIMD_TEST_NO_LOWERING(I16x8Abs) {
RunI16x8UnOpTest(execution_tier, lower_simd, kExprI16x8Abs, Abs);
}
#endif
template <typename T = int16_t, typename OpType = T (*)(T, T)>
void RunI16x8BinOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, OpType expected_op) {
@ -2279,6 +2296,12 @@ WASM_SIMD_TEST(I8x16Neg) {
base::NegateWithWraparound);
}
#if V8_TARGET_ARCH_X64
WASM_SIMD_TEST_NO_LOWERING(I8x16Abs) {
RunI8x16UnOpTest(execution_tier, lower_simd, kExprI8x16Abs, Abs);
}
#endif
// Tests both signed and unsigned conversion from I16x8 (packing).
WASM_SIMD_TEST(I8x16ConvertI16x8) {
WasmRunner<int32_t, int32_t> r(execution_tier, lower_simd);