[wasm-simd][ia32] Implement integer absolute

Implements i8x16.abs, i16x8.abs, and i32x4.abs.

Bug: v8:10233
Change-Id: I573c9969ed0ccc9dfe7ff6bde4fcddb5ffdaa789
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2067844
Reviewed-by: Deepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#66492}
This commit is contained in:
Ng Zhi An 2020-02-27 08:40:41 +00:00 committed by Commit Bot
parent fa52a03e7e
commit 577fd8a736
11 changed files with 52 additions and 7 deletions

View File

@ -1619,6 +1619,7 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
}
SSSE3_INSTRUCTION_LIST(DECLARE_SSSE3_INSTRUCTION)
SSSE3_UNOP_INSTRUCTION_LIST(DECLARE_SSSE3_INSTRUCTION)
#undef DECLARE_SSSE3_INSTRUCTION
#define DECLARE_SSE4_INSTRUCTION(instruction, prefix, escape1, escape2, \
@ -1656,6 +1657,7 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
vinstr(0x##opcode, dst, xmm0, src, k##prefix, k##escape1##escape2, kW0); \
}
SSSE3_UNOP_INSTRUCTION_LIST(DECLARE_SSE4_AVX_RM_INSTRUCTION)
SSE4_RM_INSTRUCTION_LIST(DECLARE_SSE4_AVX_RM_INSTRUCTION)
#undef DECLARE_SSE4_AVX_RM_INSTRUCTION

View File

@ -382,6 +382,16 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP2_XO_SSE3(Movddup, movddup)
#undef AVX_OP2_XO_SSE3
#define AVX_OP2_XO_SSSE3(macro_name, name) \
AVX_OP2_WITH_TYPE_SCOPE(macro_name, name, XMMRegister, XMMRegister, SSSE3) \
AVX_OP2_WITH_TYPE_SCOPE(macro_name, name, XMMRegister, Operand, SSSE3)
AVX_OP2_XO_SSSE3(Pabsb, pabsb)
AVX_OP2_XO_SSSE3(Pabsw, pabsw)
AVX_OP2_XO_SSSE3(Pabsd, pabsd)
#undef AVX_OP2_XO_SSE3
#define AVX_OP2_XO_SSE4(macro_name, name) \
AVX_OP2_WITH_TYPE_SCOPE(macro_name, name, XMMRegister, XMMRegister, SSE4_1) \
AVX_OP2_WITH_TYPE_SCOPE(macro_name, name, XMMRegister, Operand, SSE4_1)

View File

@ -67,6 +67,12 @@
V(psignw, 66, 0F, 38, 09) \
V(psignd, 66, 0F, 38, 0A)
// SSSE3 instructions whose AVX version has two operands.
#define SSSE3_UNOP_INSTRUCTION_LIST(V) \
V(pabsb, 66, 0F, 38, 1C) \
V(pabsw, 66, 0F, 38, 1D) \
V(pabsd, 66, 0F, 38, 1E)
#define SSE4_INSTRUCTION_LIST(V) \
V(packusdw, 66, 0F, 38, 2B) \
V(pminsb, 66, 0F, 38, 38) \

View File

@ -2758,6 +2758,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vpcmpeqd(i.OutputSimd128Register(), kScratchDoubleReg, src2);
break;
}
case kIA32I32x4Abs: {
__ Pabsd(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kIA32I16x8Splat: {
XMMRegister dst = i.OutputSimd128Register();
__ Movd(dst, i.InputOperand(0));
@ -3088,6 +3092,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputOperand(1));
break;
}
case kIA32I16x8Abs: {
__ Pabsw(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kIA32I8x16Splat: {
XMMRegister dst = i.OutputSimd128Register();
__ Movd(dst, i.InputOperand(0));
@ -3521,6 +3529,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputOperand(1));
break;
}
case kIA32I8x16Abs: {
__ Pabsb(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kIA32S128Zero: {
XMMRegister dst = i.OutputSimd128Register();
__ Pxor(dst, dst);

View File

@ -225,6 +225,7 @@ namespace compiler {
V(AVXI32x4GtU) \
V(SSEI32x4GeU) \
V(AVXI32x4GeU) \
V(IA32I32x4Abs) \
V(IA32I16x8Splat) \
V(IA32I16x8ExtractLaneU) \
V(IA32I16x8ExtractLaneS) \
@ -279,6 +280,7 @@ namespace compiler {
V(SSEI16x8GeU) \
V(AVXI16x8GeU) \
V(IA32I16x8RoundingAverageU) \
V(IA32I16x8Abs) \
V(IA32I8x16Splat) \
V(IA32I8x16ExtractLaneU) \
V(IA32I8x16ExtractLaneS) \
@ -328,6 +330,7 @@ namespace compiler {
V(SSEI8x16GeU) \
V(AVXI8x16GeU) \
V(IA32I8x16RoundingAverageU) \
V(IA32I8x16Abs) \
V(IA32S128Zero) \
V(SSES128Not) \
V(AVXS128Not) \

View File

@ -206,6 +206,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXI32x4GtU:
case kSSEI32x4GeU:
case kAVXI32x4GeU:
case kIA32I32x4Abs:
case kIA32I16x8Splat:
case kIA32I16x8ExtractLaneU:
case kIA32I16x8ExtractLaneS:
@ -260,6 +261,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kSSEI16x8GeU:
case kAVXI16x8GeU:
case kIA32I16x8RoundingAverageU:
case kIA32I16x8Abs:
case kIA32I8x16Splat:
case kIA32I8x16ExtractLaneU:
case kIA32I8x16ExtractLaneS:
@ -309,6 +311,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kSSEI8x16GeU:
case kAVXI8x16GeU:
case kIA32I8x16RoundingAverageU:
case kIA32I8x16Abs:
case kIA32S128Zero:
case kSSES128Not:
case kAVXS128Not:

View File

@ -2109,12 +2109,15 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
V(I32x4Neg) \
V(I32x4UConvertI16x8Low) \
V(I32x4UConvertI16x8High) \
V(I32x4Abs) \
V(I16x8SConvertI8x16Low) \
V(I16x8SConvertI8x16High) \
V(I16x8Neg) \
V(I16x8UConvertI8x16Low) \
V(I16x8UConvertI8x16High) \
V(I8x16Neg)
V(I16x8Abs) \
V(I8x16Neg) \
V(I8x16Abs)
#define SIMD_UNOP_PREFIX_LIST(V) \
V(F32x4Abs) \

View File

@ -2643,9 +2643,11 @@ void InstructionSelector::VisitF64x2Qfma(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Qfms(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Qfma(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Qfms(Node* node) { UNIMPLEMENTED(); }
#if !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitI8x16Abs(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI16x8Abs(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI32x4Abs(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_IA32
#endif // !V8_TARGET_ARCH_ARM64
void InstructionSelector::VisitI64x2MinS(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2MaxS(Node* node) { UNIMPLEMENTED(); }

View File

@ -781,6 +781,7 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
break; \
}
SSSE3_UNOP_INSTRUCTION_LIST(DECLARE_SSE_AVX_RM_DIS_CASE)
SSE4_RM_INSTRUCTION_LIST(DECLARE_SSE_AVX_RM_DIS_CASE)
#undef DECLARE_SSE_AVX_RM_DIS_CASE
default:
@ -2100,6 +2101,7 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
}
SSSE3_INSTRUCTION_LIST(SSE34_DIS_CASE)
SSSE3_UNOP_INSTRUCTION_LIST(SSE34_DIS_CASE)
SSE4_INSTRUCTION_LIST(SSE34_DIS_CASE)
SSE4_RM_INSTRUCTION_LIST(SSE34_DIS_CASE)
#undef SSE34_DIS_CASE

View File

@ -592,6 +592,7 @@ TEST(DisasmIa320) {
if (CpuFeatures::IsSupported(SSSE3)) {
CpuFeatureScope scope(&assm, SSSE3);
SSSE3_INSTRUCTION_LIST(EMIT_SSE34_INSTR)
SSSE3_UNOP_INSTRUCTION_LIST(EMIT_SSE34_INSTR)
__ palignr(xmm5, xmm1, 5);
__ palignr(xmm5, Operand(edx, 4), 5);
}
@ -802,6 +803,7 @@ TEST(DisasmIa320) {
__ v##instruction(xmm5, xmm1); \
__ v##instruction(xmm5, Operand(edx, 4));
SSSE3_UNOP_INSTRUCTION_LIST(EMIT_SSE4_RM_AVXINSTR)
SSE4_RM_INSTRUCTION_LIST(EMIT_SSE4_RM_AVXINSTR)
#undef EMIT_SSE4_RM_AVXINSTR
}

View File

@ -1824,11 +1824,11 @@ WASM_SIMD_TEST(I32x4Neg) {
base::NegateWithWraparound);
}
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
WASM_SIMD_TEST_NO_LOWERING(I32x4Abs) {
RunI32x4UnOpTest(execution_tier, lower_simd, kExprI32x4Abs, Abs);
}
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
WASM_SIMD_TEST(S128Not) {
RunI32x4UnOpTest(execution_tier, lower_simd, kExprS128Not, Not);
@ -2089,11 +2089,11 @@ WASM_SIMD_TEST(I16x8Neg) {
base::NegateWithWraparound);
}
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
WASM_SIMD_TEST_NO_LOWERING(I16x8Abs) {
RunI16x8UnOpTest(execution_tier, lower_simd, kExprI16x8Abs, Abs);
}
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
template <typename T = int16_t, typename OpType = T (*)(T, T)>
void RunI16x8BinOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
@ -2296,11 +2296,11 @@ WASM_SIMD_TEST(I8x16Neg) {
base::NegateWithWraparound);
}
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
WASM_SIMD_TEST_NO_LOWERING(I8x16Abs) {
RunI8x16UnOpTest(execution_tier, lower_simd, kExprI8x16Abs, Abs);
}
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
// Tests both signed and unsigned conversion from I16x8 (packing).
WASM_SIMD_TEST(I8x16ConvertI16x8) {