[ia32] Unify F32x4UConvertI32x4 SSE and AVX opcodes

Drive-by cleanup: IWYU for macro-assembler-ia32.h and
instruction-selector-ia32.cc

Ran using `iwyu_tool.py -p out/ia32.debug <filename>`, with a local
build of llvm and iwyu.

Bug: v8:11217,v8:7490
Change-Id: I4f8e95fa9be2f51f6764c994bb4da9ae86854c4d
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2583671
Reviewed-by: Bill Budge <bbudge@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#71728}
This commit is contained in:
Zhi An Ng 2020-12-11 05:53:03 +00:00 committed by Commit Bot
parent f22a6474c8
commit d628e5e1dd
5 changed files with 71 additions and 31 deletions

View File

@ -9,14 +9,36 @@
#ifndef V8_CODEGEN_IA32_MACRO_ASSEMBLER_IA32_H_
#define V8_CODEGEN_IA32_MACRO_ASSEMBLER_IA32_H_
#include <stdint.h>
#include "include/v8-internal.h"
#include "src/base/logging.h"
#include "src/base/macros.h"
#include "src/builtins/builtins.h"
#include "src/codegen/assembler.h"
#include "src/codegen/bailout-reason.h"
#include "src/codegen/cpu-features.h"
#include "src/codegen/ia32/assembler-ia32.h"
#include "src/codegen/ia32/register-ia32.h"
#include "src/codegen/label.h"
#include "src/codegen/reglist.h"
#include "src/codegen/reloc-info.h"
#include "src/codegen/turbo-assembler.h"
#include "src/common/globals.h"
#include "src/execution/frames.h"
#include "src/handles/handles.h"
#include "src/objects/heap-object.h"
#include "src/objects/smi.h"
#include "src/roots/roots.h"
#include "src/runtime/runtime.h"
namespace v8 {
namespace internal {
class Code;
class ExternalReference;
class StatsCounter;
// Convenience for platform-independent signatures. We do not normally
// distinguish memory operands from other operands on ia32.
using MemOperand = Operand;
@ -408,6 +430,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_PACKED_OP3(Pmaddwd, pmaddwd)
AVX_PACKED_OP3(Paddd, paddd)
AVX_PACKED_OP3(Paddq, paddq)
AVX_PACKED_OP3(Psubd, psubd)
AVX_PACKED_OP3(Psubq, psubq)
AVX_PACKED_OP3(Pmuludq, pmuludq)
AVX_PACKED_OP3(Pavgb, pavgb)

View File

@ -2383,34 +2383,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Cvtdq2ps(i.OutputSimd128Register(), i.InputOperand(0));
break;
}
case kSSEF32x4UConvertI32x4: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register();
__ pxor(kScratchDoubleReg, kScratchDoubleReg); // zeros
__ pblendw(kScratchDoubleReg, dst, 0x55); // get lo 16 bits
__ psubd(dst, kScratchDoubleReg); // get hi 16 bits
__ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly
__ psrld(dst, 1); // divide by 2 to get in unsigned range
__ cvtdq2ps(dst, dst); // convert hi exactly
__ addps(dst, dst); // double hi, exactly
__ addps(dst, kScratchDoubleReg); // add hi and lo, may round.
break;
}
case kAVXF32x4UConvertI32x4: {
CpuFeatureScope avx_scope(tasm(), AVX);
case kIA32F32x4UConvertI32x4: {
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
__ vpxor(kScratchDoubleReg, kScratchDoubleReg,
kScratchDoubleReg); // zeros
__ vpblendw(kScratchDoubleReg, kScratchDoubleReg, src,
0x55); // get lo 16 bits
__ vpsubd(dst, src, kScratchDoubleReg); // get hi 16 bits
__ vcvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly
__ vpsrld(dst, dst, 1); // divide by 2 to get in unsigned range
__ vcvtdq2ps(dst, dst); // convert hi exactly
__ vaddps(dst, dst, dst); // double hi, exactly
__ vaddps(dst, dst, kScratchDoubleReg); // add hi and lo, may round.
__ Pxor(kScratchDoubleReg, kScratchDoubleReg); // zeros
__ Pblendw(kScratchDoubleReg, src, 0x55); // get lo 16 bits
__ Psubd(dst, src, kScratchDoubleReg); // get hi 16 bits
__ Cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly
__ Psrld(dst, dst, 1); // divide by 2 to get in unsigned range
__ Cvtdq2ps(dst, dst); // convert hi exactly
__ Addps(dst, dst, dst); // double hi, exactly
__ Addps(dst, dst, kScratchDoubleReg); // add hi and lo, may round.
break;
}
case kSSEF32x4Abs: {

View File

@ -159,8 +159,7 @@ namespace compiler {
V(AVXF32x4ExtractLane) \
V(IA32Insertps) \
V(IA32F32x4SConvertI32x4) \
V(SSEF32x4UConvertI32x4) \
V(AVXF32x4UConvertI32x4) \
V(IA32F32x4UConvertI32x4) \
V(SSEF32x4Abs) \
V(AVXF32x4Abs) \
V(SSEF32x4Neg) \

View File

@ -138,8 +138,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXF32x4ExtractLane:
case kIA32Insertps:
case kIA32F32x4SConvertI32x4:
case kSSEF32x4UConvertI32x4:
case kAVXF32x4UConvertI32x4:
case kIA32F32x4UConvertI32x4:
case kSSEF32x4Abs:
case kAVXF32x4Abs:
case kSSEF32x4Neg:

View File

@ -2,11 +2,43 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <stddef.h>
#include <stdint.h>
#include <limits>
#include <type_traits>
#include <vector>
#include "src/base/flags.h"
#include "src/base/iterator.h"
#include "src/base/logging.h"
#include "src/base/macros.h"
#include "src/base/platform/wrappers.h"
#include "src/codegen/cpu-features.h"
#include "src/codegen/ia32/assembler-ia32.h"
#include "src/codegen/ia32/register-ia32.h"
#include "src/codegen/machine-type.h"
#include "src/codegen/turbo-assembler.h"
#include "src/common/globals.h"
#include "src/compiler/backend/instruction-codes.h"
#include "src/compiler/backend/instruction-selector-impl.h"
#include "src/compiler/backend/instruction-selector.h"
#include "src/compiler/backend/instruction.h"
#include "src/compiler/common-operator.h"
#include "src/compiler/frame.h"
#include "src/compiler/globals.h"
#include "src/compiler/linkage.h"
#include "src/compiler/machine-operator.h"
#include "src/compiler/node-matchers.h"
#include "src/compiler/node-properties.h"
#include "src/compiler/node.h"
#include "src/compiler/opcodes.h"
#include "src/compiler/operator.h"
#include "src/compiler/write-barrier-kind.h"
#include "src/flags/flags.h"
#include "src/utils/utils.h"
#include "src/wasm/simd-shuffle.h"
#include "src/zone/zone-containers.h"
namespace v8 {
namespace internal {
@ -279,6 +311,10 @@ void VisitRRSimd(InstructionSelector* selector, Node* node,
}
}
void VisitRRSimd(InstructionSelector* selector, Node* node, ArchOpcode opcode) {
VisitRRSimd(selector, node, opcode, opcode);
}
// TODO(v8:9198): Like VisitRROFloat, but for SIMD. SSE requires operand1 to be
// a register as we don't have memory alignment yet. For AVX, memory operands
// are fine, but can have performance issues if not aligned to 16/32 bytes
@ -2399,7 +2435,7 @@ void InstructionSelector::VisitF32x4ExtractLane(Node* node) {
}
void InstructionSelector::VisitF32x4UConvertI32x4(Node* node) {
VisitRRSimd(this, node, kAVXF32x4UConvertI32x4, kSSEF32x4UConvertI32x4);
VisitRRSimd(this, node, kIA32F32x4UConvertI32x4);
}
void InstructionSelector::VisitI32x4SConvertF32x4(Node* node) {