Revert of Add several SIMD opcodes to IA32 (patchset #9 id:160001 of https://codereview.chromium.org/2695613004/ )
Reason for revert:
Fails with nosse4:
https://build.chromium.org/p/client.v8/builders/V8%20Linux%20-%20debug/builds/13853
Original issue's description:
> Add several SIMD opcodes to IA32
>
> CreateInt32x4, Int32x4ExtractLane, Int32x4ReplaceLane
> Int32x4Add, Int32x4Sub
>
> Also add paddd and psubd to ia32-assembler
>
> BUG=
>
> Review-Url: https://codereview.chromium.org/2695613004
> Cr-Commit-Position: refs/heads/master@{#43483}
> Committed: 4deb9ffdec
TBR=bbudge@chromium.org,gdeepti@chromium.org,bmeurer@chromium.org,jing.bao@intel.com
# Skipping CQ checks because original CL landed less than 1 days ago.
NOPRESUBMIT=true
NOTREECHECKS=true
NOTRY=true
BUG=
Review-Url: https://codereview.chromium.org/2717423003
Cr-Commit-Position: refs/heads/master@{#43489}
This commit is contained in:
parent
a48d7ce226
commit
a6b20adbf9
1
BUILD.gn
1
BUILD.gn
@ -1875,7 +1875,6 @@ v8_source_set("v8_base") {
|
||||
"src/ia32/macro-assembler-ia32.h",
|
||||
"src/ia32/simulator-ia32.cc",
|
||||
"src/ia32/simulator-ia32.h",
|
||||
"src/ia32/sse-instr.h",
|
||||
"src/ic/ia32/access-compiler-ia32.cc",
|
||||
"src/ic/ia32/handler-compiler-ia32.cc",
|
||||
"src/ic/ia32/ic-ia32.cc",
|
||||
|
@ -1906,41 +1906,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ xchg(i.InputRegister(index), operand);
|
||||
break;
|
||||
}
|
||||
case kIA32Int32x4Splat: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
__ movd(dst, i.InputOperand(0));
|
||||
__ pshufd(dst, dst, 0x0);
|
||||
break;
|
||||
}
|
||||
case kIA32Int32x4ExtractLane: {
|
||||
__ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
|
||||
break;
|
||||
}
|
||||
case kIA32Int32x4ReplaceLane: {
|
||||
CpuFeatureScope sse_scope(masm(), SSE4_1);
|
||||
__ pinsrd(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
|
||||
break;
|
||||
}
|
||||
case kSSEInt32x4Add: {
|
||||
__ paddd(i.OutputSimd128Register(), i.InputOperand(1));
|
||||
break;
|
||||
}
|
||||
case kSSEInt32x4Sub: {
|
||||
__ psubd(i.OutputSimd128Register(), i.InputOperand(1));
|
||||
break;
|
||||
}
|
||||
case kAVXInt32x4Add: {
|
||||
CpuFeatureScope avx_scope(masm(), AVX);
|
||||
__ vpaddd(i.OutputSimd128Register(), i.InputSimd128Register(0),
|
||||
i.InputOperand(1));
|
||||
break;
|
||||
}
|
||||
case kAVXInt32x4Sub: {
|
||||
CpuFeatureScope avx_scope(masm(), AVX);
|
||||
__ vpsubd(i.OutputSimd128Register(), i.InputSimd128Register(0),
|
||||
i.InputOperand(1));
|
||||
break;
|
||||
}
|
||||
case kCheckedLoadInt8:
|
||||
ASSEMBLE_CHECKED_LOAD_INTEGER(movsx_b);
|
||||
break;
|
||||
|
@ -113,14 +113,7 @@ namespace compiler {
|
||||
V(IA32StackCheck) \
|
||||
V(IA32Xchgb) \
|
||||
V(IA32Xchgw) \
|
||||
V(IA32Xchgl) \
|
||||
V(IA32Int32x4Splat) \
|
||||
V(IA32Int32x4ExtractLane) \
|
||||
V(IA32Int32x4ReplaceLane) \
|
||||
V(SSEInt32x4Add) \
|
||||
V(SSEInt32x4Sub) \
|
||||
V(AVXInt32x4Add) \
|
||||
V(AVXInt32x4Sub)
|
||||
V(IA32Xchgl)
|
||||
|
||||
// Addressing modes represent the "shape" of inputs to an instruction.
|
||||
// Many instructions support multiple addressing modes. Addressing modes
|
||||
|
@ -97,13 +97,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kAVXFloat32Neg:
|
||||
case kIA32BitcastFI:
|
||||
case kIA32BitcastIF:
|
||||
case kIA32Int32x4Splat:
|
||||
case kIA32Int32x4ExtractLane:
|
||||
case kIA32Int32x4ReplaceLane:
|
||||
case kSSEInt32x4Add:
|
||||
case kSSEInt32x4Sub:
|
||||
case kAVXInt32x4Add:
|
||||
case kAVXInt32x4Sub:
|
||||
return (instr->addressing_mode() == kMode_None)
|
||||
? kNoOpcodeFlags
|
||||
: kIsLoadOperation | kHasSideEffect;
|
||||
|
@ -873,9 +873,7 @@ void InstructionSelector::VisitWord32Ror(Node* node) {
|
||||
V(Float32Mul, kAVXFloat32Mul, kSSEFloat32Mul) \
|
||||
V(Float64Mul, kAVXFloat64Mul, kSSEFloat64Mul) \
|
||||
V(Float32Div, kAVXFloat32Div, kSSEFloat32Div) \
|
||||
V(Float64Div, kAVXFloat64Div, kSSEFloat64Div) \
|
||||
V(Int32x4Add, kAVXInt32x4Add, kSSEInt32x4Add) \
|
||||
V(Int32x4Sub, kAVXInt32x4Sub, kSSEInt32x4Sub)
|
||||
V(Float64Div, kAVXFloat64Div, kSSEFloat64Div)
|
||||
|
||||
#define FLOAT_UNOP_LIST(V) \
|
||||
V(Float32Abs, kAVXFloat32Abs, kSSEFloat32Abs) \
|
||||
@ -1718,25 +1716,6 @@ void InstructionSelector::VisitAtomicStore(Node* node) {
|
||||
Emit(code, 0, nullptr, input_count, inputs);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitInt32x4Splat(Node* node) {
|
||||
VisitRO(this, node, kIA32Int32x4Splat);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitInt32x4ExtractLane(Node* node) {
|
||||
IA32OperandGenerator g(this);
|
||||
int32_t lane = OpParameter<int32_t>(node);
|
||||
Emit(kIA32Int32x4ExtractLane, g.DefineAsRegister(node),
|
||||
g.UseRegister(node->InputAt(0)), g.UseImmediate(lane));
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitInt32x4ReplaceLane(Node* node) {
|
||||
IA32OperandGenerator g(this);
|
||||
int32_t lane = OpParameter<int32_t>(node);
|
||||
Emit(kIA32Int32x4ReplaceLane, g.DefineSameAsFirst(node),
|
||||
g.UseRegister(node->InputAt(0)), g.UseImmediate(lane),
|
||||
g.Use(node->InputAt(1)));
|
||||
}
|
||||
|
||||
// static
|
||||
MachineOperatorBuilder::Flags
|
||||
InstructionSelector::SupportedMachineOperatorFlags() {
|
||||
|
@ -1966,7 +1966,7 @@ void InstructionSelector::VisitWord32PairShr(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitWord32PairSar(Node* node) { UNIMPLEMENTED(); }
|
||||
#endif // V8_TARGET_ARCH_64_BIT
|
||||
|
||||
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_IA32
|
||||
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM
|
||||
void InstructionSelector::VisitInt32x4Splat(Node* node) { UNIMPLEMENTED(); }
|
||||
|
||||
void InstructionSelector::VisitInt32x4ExtractLane(Node* node) {
|
||||
@ -1980,9 +1980,7 @@ void InstructionSelector::VisitInt32x4ReplaceLane(Node* node) {
|
||||
void InstructionSelector::VisitInt32x4Add(Node* node) { UNIMPLEMENTED(); }
|
||||
|
||||
void InstructionSelector::VisitInt32x4Sub(Node* node) { UNIMPLEMENTED(); }
|
||||
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_IA32
|
||||
|
||||
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM
|
||||
void InstructionSelector::VisitSimd128Zero(Node* node) { UNIMPLEMENTED(); }
|
||||
|
||||
void InstructionSelector::VisitSimd1x4Zero(Node* node) { UNIMPLEMENTED(); }
|
||||
|
@ -48,7 +48,7 @@ namespace internal {
|
||||
|
||||
bool CpuFeatures::SupportsCrankshaft() { return true; }
|
||||
|
||||
bool CpuFeatures::SupportsSimd128() { return true; }
|
||||
bool CpuFeatures::SupportsSimd128() { return false; }
|
||||
|
||||
static const byte kCallOpcode = 0xE8;
|
||||
static const int kNoCodeAgeSequenceLength = 5;
|
||||
|
@ -2864,24 +2864,6 @@ void Assembler::rorx(Register dst, const Operand& src, byte imm8) {
|
||||
EMIT(imm8);
|
||||
}
|
||||
|
||||
void Assembler::sse2_instr(XMMRegister dst, const Operand& src, byte prefix,
|
||||
byte escape, byte opcode) {
|
||||
EnsureSpace ensure_space(this);
|
||||
EMIT(prefix);
|
||||
EMIT(escape);
|
||||
EMIT(opcode);
|
||||
emit_sse_operand(dst, src);
|
||||
}
|
||||
|
||||
void Assembler::vinstr(byte op, XMMRegister dst, XMMRegister src1,
|
||||
const Operand& src2, SIMDPrefix pp, LeadingOpcode m,
|
||||
VexW w) {
|
||||
DCHECK(IsEnabled(AVX));
|
||||
EnsureSpace ensure_space(this);
|
||||
emit_vex_prefix(src1, kL128, pp, m, w);
|
||||
EMIT(op);
|
||||
emit_sse_operand(dst, src2);
|
||||
}
|
||||
|
||||
void Assembler::emit_sse_operand(XMMRegister reg, const Operand& adr) {
|
||||
Register ireg = { reg.code() };
|
||||
|
@ -40,7 +40,6 @@
|
||||
#include <deque>
|
||||
|
||||
#include "src/assembler.h"
|
||||
#include "src/ia32/sse-instr.h"
|
||||
#include "src/isolate.h"
|
||||
#include "src/utils.h"
|
||||
|
||||
@ -1419,30 +1418,6 @@ class Assembler : public AssemblerBase {
|
||||
void vpd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
|
||||
void vpd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
|
||||
|
||||
// Other SSE and AVX instructions
|
||||
#define DECLARE_SSE2_INSTRUCTION(instruction, prefix, escape, opcode) \
|
||||
void instruction(XMMRegister dst, XMMRegister src) { \
|
||||
instruction(dst, Operand(src)); \
|
||||
} \
|
||||
void instruction(XMMRegister dst, const Operand& src) { \
|
||||
sse2_instr(dst, src, 0x##prefix, 0x##escape, 0x##opcode); \
|
||||
}
|
||||
|
||||
SSE2_INSTRUCTION_LIST(DECLARE_SSE2_INSTRUCTION)
|
||||
#undef DECLARE_SSE2_INSTRUCTION
|
||||
|
||||
#define DECLARE_SSE2_AVX_INSTRUCTION(instruction, prefix, escape, opcode) \
|
||||
void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
|
||||
v##instruction(dst, src1, Operand(src2)); \
|
||||
} \
|
||||
void v##instruction(XMMRegister dst, XMMRegister src1, \
|
||||
const Operand& src2) { \
|
||||
vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0); \
|
||||
}
|
||||
|
||||
SSE2_INSTRUCTION_LIST(DECLARE_SSE2_AVX_INSTRUCTION)
|
||||
#undef DECLARE_SSE2_AVX_INSTRUCTION
|
||||
|
||||
// Prefetch src position into cache level.
|
||||
// Level 1, 2 or 3 specifies CPU cache level. Level 0 specifies a
|
||||
// non-temporal
|
||||
@ -1573,10 +1548,6 @@ class Assembler : public AssemblerBase {
|
||||
inline void emit_disp(Label* L, Displacement::Type type);
|
||||
inline void emit_near_disp(Label* L);
|
||||
|
||||
void sse2_instr(XMMRegister dst, const Operand& src, byte prefix, byte escape,
|
||||
byte opcode);
|
||||
void vinstr(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2,
|
||||
SIMDPrefix pp, LeadingOpcode m, VexW w);
|
||||
// Most BMI instructions are similiar.
|
||||
void bmi1(byte op, Register reg, Register vreg, const Operand& rm);
|
||||
void bmi2(SIMDPrefix pp, byte op, Register reg, Register vreg,
|
||||
|
@ -10,7 +10,6 @@
|
||||
|
||||
#include "src/base/compiler-specific.h"
|
||||
#include "src/disasm.h"
|
||||
#include "src/ia32/sse-instr.h"
|
||||
|
||||
namespace disasm {
|
||||
|
||||
@ -1003,16 +1002,6 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
|
||||
NameOfXMMRegister(vvvv));
|
||||
current += PrintRightXMMOperand(current);
|
||||
break;
|
||||
#define DECLARE_SSE_AVX_DIS_CASE(instruction, notUsed1, notUsed2, opcode) \
|
||||
case 0x##opcode: { \
|
||||
AppendToBuffer("v" #instruction " %s,%s,", NameOfXMMRegister(regop), \
|
||||
NameOfXMMRegister(vvvv)); \
|
||||
current += PrintRightXMMOperand(current); \
|
||||
break; \
|
||||
}
|
||||
|
||||
SSE2_INSTRUCTION_LIST(DECLARE_SSE_AVX_DIS_CASE)
|
||||
#undef DECLARE_SSE_AVX_DIS_CASE
|
||||
default:
|
||||
UnimplementedInstruction();
|
||||
}
|
||||
@ -1940,18 +1929,6 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
|
||||
NameOfXMMRegister(regop),
|
||||
NameOfXMMRegister(rm));
|
||||
data++;
|
||||
} else if (*data == 0xFA) {
|
||||
data++;
|
||||
int mod, regop, rm;
|
||||
get_modrm(*data, &mod, ®op, &rm);
|
||||
AppendToBuffer("psubd %s,", NameOfXMMRegister(regop));
|
||||
data += PrintRightXMMOperand(data);
|
||||
} else if (*data == 0xFE) {
|
||||
data++;
|
||||
int mod, regop, rm;
|
||||
get_modrm(*data, &mod, ®op, &rm);
|
||||
AppendToBuffer("paddd %s,", NameOfXMMRegister(regop));
|
||||
data += PrintRightXMMOperand(data);
|
||||
} else if (*data == 0xB1) {
|
||||
data++;
|
||||
data += PrintOperands("cmpxchg_w", OPER_REG_OP_ORDER, data);
|
||||
|
@ -2270,18 +2270,19 @@ void MacroAssembler::Pextrd(Register dst, XMMRegister src, int8_t imm8) {
|
||||
movd(dst, src);
|
||||
return;
|
||||
}
|
||||
DCHECK_EQ(1, imm8);
|
||||
if (CpuFeatures::IsSupported(SSE4_1)) {
|
||||
CpuFeatureScope sse_scope(this, SSE4_1);
|
||||
pextrd(dst, src, imm8);
|
||||
return;
|
||||
}
|
||||
DCHECK_LT(imm8, 4);
|
||||
pshufd(xmm0, src, imm8);
|
||||
pshufd(xmm0, src, 1);
|
||||
movd(dst, xmm0);
|
||||
}
|
||||
|
||||
|
||||
void MacroAssembler::Pinsrd(XMMRegister dst, const Operand& src, int8_t imm8) {
|
||||
DCHECK(imm8 == 0 || imm8 == 1);
|
||||
if (CpuFeatures::IsSupported(SSE4_1)) {
|
||||
CpuFeatureScope sse_scope(this, SSE4_1);
|
||||
pinsrd(dst, src, imm8);
|
||||
|
@ -1,12 +0,0 @@
|
||||
// Copyright 2012 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef V8_SSE_INSTR_H_
|
||||
#define V8_SSE_INSTR_H_
|
||||
|
||||
#define SSE2_INSTRUCTION_LIST(V) \
|
||||
V(paddd, 66, 0F, FE) \
|
||||
V(psubd, 66, 0F, FA)
|
||||
|
||||
#endif // V8_SSE_INSTR_H_
|
@ -1483,7 +1483,6 @@
|
||||
'ia32/macro-assembler-ia32.h',
|
||||
'ia32/simulator-ia32.cc',
|
||||
'ia32/simulator-ia32.h',
|
||||
'ia32/sse-instr.h',
|
||||
'builtins/ia32/builtins-ia32.cc',
|
||||
'compiler/ia32/code-generator-ia32.cc',
|
||||
'compiler/ia32/instruction-codes-ia32.h',
|
||||
|
@ -468,13 +468,6 @@ TEST(DisasmIa320) {
|
||||
|
||||
__ punpckldq(xmm1, xmm6);
|
||||
__ punpckhdq(xmm7, xmm5);
|
||||
|
||||
#define EMIT_SSE2_INSTR(instruction, notUsed1, notUsed2, notUsed3) \
|
||||
__ instruction(xmm5, xmm1); \
|
||||
__ instruction(xmm5, Operand(edx, 4));
|
||||
|
||||
SSE2_INSTRUCTION_LIST(EMIT_SSE2_INSTR)
|
||||
#undef EMIT_SSE2_INSTR
|
||||
}
|
||||
|
||||
// cmov.
|
||||
@ -545,13 +538,6 @@ TEST(DisasmIa320) {
|
||||
__ vandpd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
|
||||
__ vxorpd(xmm0, xmm1, xmm2);
|
||||
__ vxorpd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
|
||||
|
||||
#define EMIT_SSE2_AVXINSTR(instruction, notUsed1, notUsed2, notUsed3) \
|
||||
__ v##instruction(xmm7, xmm5, xmm1); \
|
||||
__ v##instruction(xmm7, xmm5, Operand(edx, 4));
|
||||
|
||||
SSE2_INSTRUCTION_LIST(EMIT_SSE2_AVXINSTR)
|
||||
#undef EMIT_SSE2_AVXINSTR
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -214,11 +214,11 @@ T Not(T a) {
|
||||
|
||||
} // namespace
|
||||
|
||||
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
|
||||
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_X64
|
||||
#define SIMD_LOWERING_TARGET 1
|
||||
#else
|
||||
#define SIMD_LOWERING_TARGET 0
|
||||
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
|
||||
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_X64
|
||||
|
||||
// TODO(gdeepti): These are tests using sample values to verify functional
|
||||
// correctness of opcodes, add more tests for a range of values and macroize
|
||||
|
Loading…
Reference in New Issue
Block a user