From cb4ff11d83933fece6e462376e3cb4c1aec1daee Mon Sep 17 00:00:00 2001 From: Ng Zhi An Date: Tue, 7 Jan 2020 11:03:05 -0800 Subject: [PATCH] [wasm-simd] Implement rounding average for ia32 Bug: v8:10039 Change-Id: I3568bd3d01508e8bca81959341c75369c5bdf700 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1958051 Reviewed-by: Deepti Gandluri Commit-Queue: Zhi An Ng Cr-Commit-Position: refs/heads/master@{#65618} --- src/codegen/ia32/macro-assembler-ia32.h | 2 ++ src/codegen/ia32/sse-instr.h | 2 ++ src/compiler/backend/ia32/code-generator-ia32.cc | 10 ++++++++++ src/compiler/backend/ia32/instruction-codes-ia32.h | 2 ++ .../backend/ia32/instruction-scheduler-ia32.cc | 2 ++ src/compiler/backend/ia32/instruction-selector-ia32.cc | 4 +++- src/compiler/backend/instruction-selector.cc | 6 ------ test/cctest/wasm/test-run-wasm-simd.cc | 4 ---- 8 files changed, 21 insertions(+), 11 deletions(-) diff --git a/src/codegen/ia32/macro-assembler-ia32.h b/src/codegen/ia32/macro-assembler-ia32.h index 4602ab217e..13dce8c992 100644 --- a/src/codegen/ia32/macro-assembler-ia32.h +++ b/src/codegen/ia32/macro-assembler-ia32.h @@ -338,6 +338,8 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { AVX_PACKED_OP3(Paddq, paddq) AVX_PACKED_OP3(Psubq, psubq) AVX_PACKED_OP3(Pmuludq, pmuludq) + AVX_PACKED_OP3(Pavgb, pavgb) + AVX_PACKED_OP3(Pavgw, pavgw) #undef AVX_PACKED_OP3 AVX_PACKED_OP3_WITH_TYPE(Psllq, psllq, XMMRegister, uint8_t) diff --git a/src/codegen/ia32/sse-instr.h b/src/codegen/ia32/sse-instr.h index bf48767506..4361c773ef 100644 --- a/src/codegen/ia32/sse-instr.h +++ b/src/codegen/ia32/sse-instr.h @@ -34,8 +34,10 @@ V(pslld, 66, 0F, F2) \ V(psllq, 66, 0F, F3) \ V(pmuludq, 66, 0F, F4) \ + V(pavgb, 66, 0F, E0) \ V(psraw, 66, 0F, E1) \ V(psrad, 66, 0F, E2) \ + V(pavgw, 66, 0F, E3) \ V(psrlw, 66, 0F, D1) \ V(psrld, 66, 0F, D2) \ V(psrlq, 66, 0F, D3) \ diff --git a/src/compiler/backend/ia32/code-generator-ia32.cc b/src/compiler/backend/ia32/code-generator-ia32.cc index b9b584554d..bcfb80959b 100644 --- a/src/compiler/backend/ia32/code-generator-ia32.cc +++ b/src/compiler/backend/ia32/code-generator-ia32.cc @@ -3164,6 +3164,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ vpcmpeqw(i.OutputSimd128Register(), kScratchDoubleReg, src2); break; } + case kIA32I16x8RoundingAverageU: { + __ Pavgw(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputOperand(1)); + break; + } case kIA32I8x16Splat: { XMMRegister dst = i.OutputSimd128Register(); __ Movd(dst, i.InputOperand(0)); @@ -3592,6 +3597,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ vpcmpeqb(i.OutputSimd128Register(), kScratchDoubleReg, src2); break; } + case kIA32I8x16RoundingAverageU: { + __ Pavgb(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputOperand(1)); + break; + } case kIA32S128Zero: { XMMRegister dst = i.OutputSimd128Register(); __ Pxor(dst, dst); diff --git a/src/compiler/backend/ia32/instruction-codes-ia32.h b/src/compiler/backend/ia32/instruction-codes-ia32.h index 86c119c377..91261c4a58 100644 --- a/src/compiler/backend/ia32/instruction-codes-ia32.h +++ b/src/compiler/backend/ia32/instruction-codes-ia32.h @@ -284,6 +284,7 @@ namespace compiler { V(AVXI16x8GtU) \ V(SSEI16x8GeU) \ V(AVXI16x8GeU) \ + V(IA32I16x8RoundingAverageU) \ V(IA32I8x16Splat) \ V(IA32I8x16ExtractLaneU) \ V(IA32I8x16ExtractLaneS) \ @@ -332,6 +333,7 @@ namespace compiler { V(AVXI8x16GtU) \ V(SSEI8x16GeU) \ V(AVXI8x16GeU) \ + V(IA32I8x16RoundingAverageU) \ V(IA32S128Zero) \ V(SSES128Not) \ V(AVXS128Not) \ diff --git a/src/compiler/backend/ia32/instruction-scheduler-ia32.cc b/src/compiler/backend/ia32/instruction-scheduler-ia32.cc index b100d0a97a..14e4ff318f 100644 --- a/src/compiler/backend/ia32/instruction-scheduler-ia32.cc +++ b/src/compiler/backend/ia32/instruction-scheduler-ia32.cc @@ -265,6 +265,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kAVXI16x8GtU: case kSSEI16x8GeU: case kAVXI16x8GeU: + case kIA32I16x8RoundingAverageU: case kIA32I8x16Splat: case kIA32I8x16ExtractLaneU: case kIA32I8x16ExtractLaneS: @@ -313,6 +314,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kAVXI8x16GtU: case kSSEI8x16GeU: case kAVXI8x16GeU: + case kIA32I8x16RoundingAverageU: case kIA32S128Zero: case kSSES128Not: case kAVXS128Not: diff --git a/src/compiler/backend/ia32/instruction-selector-ia32.cc b/src/compiler/backend/ia32/instruction-selector-ia32.cc index b22974f5d4..21d3e537fb 100644 --- a/src/compiler/backend/ia32/instruction-selector-ia32.cc +++ b/src/compiler/backend/ia32/instruction-selector-ia32.cc @@ -2027,7 +2027,9 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) { #define SIMD_BINOP_UNIFIED_SSE_AVX_LIST(V) \ V(I64x2Add) \ - V(I64x2Sub) + V(I64x2Sub) \ + V(I16x8RoundingAverageU) \ + V(I8x16RoundingAverageU) #define SIMD_UNOP_LIST(V) \ V(F32x4SConvertI32x4) \ diff --git a/src/compiler/backend/instruction-selector.cc b/src/compiler/backend/instruction-selector.cc index 211a0b367a..1b9870fa7c 100644 --- a/src/compiler/backend/instruction-selector.cc +++ b/src/compiler/backend/instruction-selector.cc @@ -2636,12 +2636,6 @@ void InstructionSelector::VisitF64x2UConvertI64x2(Node* node) { } #if !V8_TARGET_ARCH_ARM64 #if !V8_TARGET_ARCH_ARM -void InstructionSelector::VisitI16x8RoundingAverageU(Node* node) { - UNIMPLEMENTED(); -} -void InstructionSelector::VisitI8x16RoundingAverageU(Node* node) { - UNIMPLEMENTED(); -} void InstructionSelector::VisitLoadTransform(Node* node) { UNIMPLEMENTED(); } #endif // !V8_TARGET_ARCH_ARM #if !V8_TARGET_ARCH_IA32 diff --git a/test/cctest/wasm/test-run-wasm-simd.cc b/test/cctest/wasm/test-run-wasm-simd.cc index c13e32a25a..377783fcb9 100644 --- a/test/cctest/wasm/test-run-wasm-simd.cc +++ b/test/cctest/wasm/test-run-wasm-simd.cc @@ -2181,13 +2181,11 @@ WASM_SIMD_TEST(I16x8LeU) { UnsignedLessEqual); } -#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM WASM_SIMD_TEST_NO_LOWERING(I16x8RoundingAverageU) { RunI16x8BinOpTest(execution_tier, lower_simd, kExprI16x8RoundingAverageU, base::RoundingAverageUnsigned); } -#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM void RunI16x8ShiftOpTest(ExecutionTier execution_tier, LowerSimd lower_simd, WasmOpcode opcode, Int16ShiftOp expected_op) { @@ -2407,13 +2405,11 @@ WASM_SIMD_TEST(I8x16Mul) { base::MulWithWraparound); } -#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM WASM_SIMD_TEST_NO_LOWERING(I8x16RoundingAverageU) { RunI8x16BinOpTest(execution_tier, lower_simd, kExprI8x16RoundingAverageU, base::RoundingAverageUnsigned); } -#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM void RunI8x16ShiftOpTest(ExecutionTier execution_tier, LowerSimd lower_simd, WasmOpcode opcode, Int8ShiftOp expected_op) {