Add several SIMD opcodes to IA32

CreateInt32x4, Int32x4ExtractLane, Int32x4ReplaceLane
Int32x4Add, Int32x4Sub

Also add paddd, psubd, vpaddd, vpsubd, pinsrw to ia32-assembler

BUG=

Review-Url: https://codereview.chromium.org/2695613004
Cr-Original-Commit-Position: refs/heads/master@{#43483}
Committed: 4deb9ffdec
Review-Url: https://codereview.chromium.org/2695613004
Cr-Commit-Position: refs/heads/master@{#43708}
This commit is contained in:
jing.bao 2017-03-09 18:40:06 -08:00 committed by Commit bot
parent fd5b3e755d
commit b9614d4bd1
16 changed files with 227 additions and 23 deletions

View File

@ -1889,6 +1889,7 @@ v8_source_set("v8_base") {
"src/ia32/macro-assembler-ia32.h",
"src/ia32/simulator-ia32.cc",
"src/ia32/simulator-ia32.h",
"src/ia32/sse-instr.h",
"src/ic/ia32/access-compiler-ia32.cc",
"src/ic/ia32/handler-compiler-ia32.cc",
"src/ic/ia32/ic-ia32.cc",

View File

@ -1614,10 +1614,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
break;
case kSSEFloat64InsertLowWord32:
__ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0);
__ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0, true);
break;
case kSSEFloat64InsertHighWord32:
__ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1);
__ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1, true);
break;
case kSSEFloat64LoadLowWord32:
__ movd(i.OutputDoubleRegister(), i.InputOperand(0));
@ -1888,6 +1888,40 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
break;
}
case kIA32Int32x4Splat: {
XMMRegister dst = i.OutputSimd128Register();
__ movd(dst, i.InputOperand(0));
__ pshufd(dst, dst, 0x0);
break;
}
case kIA32Int32x4ExtractLane: {
__ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
break;
}
case kIA32Int32x4ReplaceLane: {
__ Pinsrd(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
break;
}
case kSSEInt32x4Add: {
__ paddd(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kSSEInt32x4Sub: {
__ psubd(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kAVXInt32x4Add: {
CpuFeatureScope avx_scope(masm(), AVX);
__ vpaddd(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kAVXInt32x4Sub: {
CpuFeatureScope avx_scope(masm(), AVX);
__ vpsubd(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kCheckedLoadInt8:
ASSEMBLE_CHECKED_LOAD_INTEGER(movsx_b);
break;

View File

@ -110,7 +110,14 @@ namespace compiler {
V(IA32PushFloat32) \
V(IA32PushFloat64) \
V(IA32Poke) \
V(IA32StackCheck)
V(IA32StackCheck) \
V(IA32Int32x4Splat) \
V(IA32Int32x4ExtractLane) \
V(IA32Int32x4ReplaceLane) \
V(SSEInt32x4Add) \
V(SSEInt32x4Sub) \
V(AVXInt32x4Add) \
V(AVXInt32x4Sub)
// Addressing modes represent the "shape" of inputs to an instruction.
// Many instructions support multiple addressing modes. Addressing modes

View File

@ -97,6 +97,13 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXFloat32Neg:
case kIA32BitcastFI:
case kIA32BitcastIF:
case kIA32Int32x4Splat:
case kIA32Int32x4ExtractLane:
case kIA32Int32x4ReplaceLane:
case kSSEInt32x4Add:
case kSSEInt32x4Sub:
case kAVXInt32x4Add:
case kAVXInt32x4Sub:
return (instr->addressing_mode() == kMode_None)
? kNoOpcodeFlags
: kIsLoadOperation | kHasSideEffect;

View File

@ -873,7 +873,9 @@ void InstructionSelector::VisitWord32Ror(Node* node) {
V(Float32Mul, kAVXFloat32Mul, kSSEFloat32Mul) \
V(Float64Mul, kAVXFloat64Mul, kSSEFloat64Mul) \
V(Float32Div, kAVXFloat32Div, kSSEFloat32Div) \
V(Float64Div, kAVXFloat64Div, kSSEFloat64Div)
V(Float64Div, kAVXFloat64Div, kSSEFloat64Div) \
V(Int32x4Add, kAVXInt32x4Add, kSSEInt32x4Add) \
V(Int32x4Sub, kAVXInt32x4Sub, kSSEInt32x4Sub)
#define FLOAT_UNOP_LIST(V) \
V(Float32Abs, kAVXFloat32Abs, kSSEFloat32Abs) \
@ -1756,6 +1758,25 @@ void InstructionSelector::VisitAtomicExchange(Node* node) {
Emit(code, 1, outputs, input_count, inputs);
}
void InstructionSelector::VisitInt32x4Splat(Node* node) {
VisitRO(this, node, kIA32Int32x4Splat);
}
void InstructionSelector::VisitInt32x4ExtractLane(Node* node) {
IA32OperandGenerator g(this);
int32_t lane = OpParameter<int32_t>(node);
Emit(kIA32Int32x4ExtractLane, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)), g.UseImmediate(lane));
}
void InstructionSelector::VisitInt32x4ReplaceLane(Node* node) {
IA32OperandGenerator g(this);
int32_t lane = OpParameter<int32_t>(node);
Emit(kIA32Int32x4ReplaceLane, g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(0)), g.UseImmediate(lane),
g.Use(node->InputAt(1)));
}
// static
MachineOperatorBuilder::Flags
InstructionSelector::SupportedMachineOperatorFlags() {

View File

@ -2031,7 +2031,7 @@ void InstructionSelector::VisitWord32PairShr(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitWord32PairSar(Node* node) { UNIMPLEMENTED(); }
#endif // V8_TARGET_ARCH_64_BIT
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitInt32x4Splat(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitInt32x4ExtractLane(Node* node) {
@ -2045,7 +2045,9 @@ void InstructionSelector::VisitInt32x4ReplaceLane(Node* node) {
void InstructionSelector::VisitInt32x4Add(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitInt32x4Sub(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_IA32
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM
void InstructionSelector::VisitSimd128Zero(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitSimd1x4Zero(Node* node) { UNIMPLEMENTED(); }

View File

@ -48,7 +48,7 @@ namespace internal {
bool CpuFeatures::SupportsCrankshaft() { return true; }
bool CpuFeatures::SupportsSimd128() { return false; }
bool CpuFeatures::SupportsSimd128() { return true; }
static const byte kCallOpcode = 0xE8;
static const int kNoCodeAgeSequenceLength = 5;

View File

@ -2665,6 +2665,15 @@ void Assembler::pextrd(const Operand& dst, XMMRegister src, int8_t offset) {
EMIT(offset);
}
void Assembler::pinsrw(XMMRegister dst, const Operand& src, int8_t offset) {
DCHECK(is_uint8(offset));
EnsureSpace ensure_space(this);
EMIT(0x66);
EMIT(0x0F);
EMIT(0xC4);
emit_sse_operand(dst, src);
EMIT(offset);
}
void Assembler::pinsrd(XMMRegister dst, const Operand& src, int8_t offset) {
DCHECK(IsEnabled(SSE4_1));
@ -2870,6 +2879,24 @@ void Assembler::rorx(Register dst, const Operand& src, byte imm8) {
EMIT(imm8);
}
void Assembler::sse2_instr(XMMRegister dst, const Operand& src, byte prefix,
byte escape, byte opcode) {
EnsureSpace ensure_space(this);
EMIT(prefix);
EMIT(escape);
EMIT(opcode);
emit_sse_operand(dst, src);
}
void Assembler::vinstr(byte op, XMMRegister dst, XMMRegister src1,
const Operand& src2, SIMDPrefix pp, LeadingOpcode m,
VexW w) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit_vex_prefix(src1, kL128, pp, m, w);
EMIT(op);
emit_sse_operand(dst, src2);
}
void Assembler::emit_sse_operand(XMMRegister reg, const Operand& adr) {
Register ireg = { reg.code() };

View File

@ -40,6 +40,7 @@
#include <deque>
#include "src/assembler.h"
#include "src/ia32/sse-instr.h"
#include "src/isolate.h"
#include "src/utils.h"
@ -1078,6 +1079,10 @@ class Assembler : public AssemblerBase {
pextrd(Operand(dst), src, offset);
}
void pextrd(const Operand& dst, XMMRegister src, int8_t offset);
void pinsrw(XMMRegister dst, Register src, int8_t offset) {
pinsrw(dst, Operand(src), offset);
}
void pinsrw(XMMRegister dst, const Operand& src, int8_t offset);
void pinsrd(XMMRegister dst, Register src, int8_t offset) {
pinsrd(dst, Operand(src), offset);
}
@ -1416,6 +1421,30 @@ class Assembler : public AssemblerBase {
void vpd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
void vpd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
// Other SSE and AVX instructions
#define DECLARE_SSE2_INSTRUCTION(instruction, prefix, escape, opcode) \
void instruction(XMMRegister dst, XMMRegister src) { \
instruction(dst, Operand(src)); \
} \
void instruction(XMMRegister dst, const Operand& src) { \
sse2_instr(dst, src, 0x##prefix, 0x##escape, 0x##opcode); \
}
SSE2_INSTRUCTION_LIST(DECLARE_SSE2_INSTRUCTION)
#undef DECLARE_SSE2_INSTRUCTION
#define DECLARE_SSE2_AVX_INSTRUCTION(instruction, prefix, escape, opcode) \
void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
v##instruction(dst, src1, Operand(src2)); \
} \
void v##instruction(XMMRegister dst, XMMRegister src1, \
const Operand& src2) { \
vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0); \
}
SSE2_INSTRUCTION_LIST(DECLARE_SSE2_AVX_INSTRUCTION)
#undef DECLARE_SSE2_AVX_INSTRUCTION
// Prefetch src position into cache level.
// Level 1, 2 or 3 specifies CPU cache level. Level 0 specifies a
// non-temporal
@ -1546,6 +1575,10 @@ class Assembler : public AssemblerBase {
inline void emit_disp(Label* L, Displacement::Type type);
inline void emit_near_disp(Label* L);
void sse2_instr(XMMRegister dst, const Operand& src, byte prefix, byte escape,
byte opcode);
void vinstr(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2,
SIMDPrefix pp, LeadingOpcode m, VexW w);
// Most BMI instructions are similiar.
void bmi1(byte op, Register reg, Register vreg, const Operand& rm);
void bmi2(SIMDPrefix pp, byte op, Register reg, Register vreg,

View File

@ -10,6 +10,7 @@
#include "src/base/compiler-specific.h"
#include "src/disasm.h"
#include "src/ia32/sse-instr.h"
namespace disasm {
@ -1002,6 +1003,16 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
#define DECLARE_SSE_AVX_DIS_CASE(instruction, notUsed1, notUsed2, opcode) \
case 0x##opcode: { \
AppendToBuffer("v" #instruction " %s,%s,", NameOfXMMRegister(regop), \
NameOfXMMRegister(vvvv)); \
current += PrintRightXMMOperand(current); \
break; \
}
SSE2_INSTRUCTION_LIST(DECLARE_SSE_AVX_DIS_CASE)
#undef DECLARE_SSE_AVX_DIS_CASE
default:
UnimplementedInstruction();
}
@ -1895,6 +1906,14 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
AppendToBuffer("movd ");
data += PrintRightOperand(data);
AppendToBuffer(",%s", NameOfXMMRegister(regop));
} else if (*data == 0xC4) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("pinsrw %s,", NameOfXMMRegister(regop));
data += PrintRightOperand(data);
AppendToBuffer(",%d", *reinterpret_cast<int8_t*>(data));
data++;
} else if (*data == 0xDB) {
data++;
int mod, regop, rm;
@ -1929,6 +1948,18 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
NameOfXMMRegister(regop),
NameOfXMMRegister(rm));
data++;
} else if (*data == 0xFA) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("psubd %s,", NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data);
} else if (*data == 0xFE) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("paddd %s,", NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data);
} else if (*data == 0xB1) {
data++;
data += PrintOperands("cmpxchg_w", OPER_REG_OP_ORDER, data);

View File

@ -2270,32 +2270,41 @@ void MacroAssembler::Pextrd(Register dst, XMMRegister src, int8_t imm8) {
movd(dst, src);
return;
}
DCHECK_EQ(1, imm8);
if (CpuFeatures::IsSupported(SSE4_1)) {
CpuFeatureScope sse_scope(this, SSE4_1);
pextrd(dst, src, imm8);
return;
}
pshufd(xmm0, src, 1);
DCHECK_LT(imm8, 4);
pshufd(xmm0, src, imm8);
movd(dst, xmm0);
}
void MacroAssembler::Pinsrd(XMMRegister dst, const Operand& src, int8_t imm8) {
DCHECK(imm8 == 0 || imm8 == 1);
void MacroAssembler::Pinsrd(XMMRegister dst, const Operand& src, int8_t imm8,
bool is_64_bits) {
if (CpuFeatures::IsSupported(SSE4_1)) {
CpuFeatureScope sse_scope(this, SSE4_1);
pinsrd(dst, src, imm8);
return;
}
movd(xmm0, src);
if (imm8 == 1) {
punpckldq(dst, xmm0);
if (is_64_bits) {
movd(xmm0, src);
if (imm8 == 1) {
punpckldq(dst, xmm0);
} else {
DCHECK_EQ(0, imm8);
psrlq(dst, 32);
punpckldq(xmm0, dst);
movaps(dst, xmm0);
}
} else {
DCHECK_EQ(0, imm8);
psrlq(dst, 32);
punpckldq(xmm0, dst);
movaps(dst, xmm0);
DCHECK_LT(imm8, 4);
push(eax);
mov(eax, src);
pinsrw(dst, eax, imm8 * 2);
shr(eax, 16);
pinsrw(dst, eax, imm8 * 2 + 1);
pop(eax);
}
}

View File

@ -752,10 +752,12 @@ class MacroAssembler: public Assembler {
// Non-SSE2 instructions.
void Pextrd(Register dst, XMMRegister src, int8_t imm8);
void Pinsrd(XMMRegister dst, Register src, int8_t imm8) {
Pinsrd(dst, Operand(src), imm8);
void Pinsrd(XMMRegister dst, Register src, int8_t imm8,
bool is_64_bits = false) {
Pinsrd(dst, Operand(src), imm8, is_64_bits);
}
void Pinsrd(XMMRegister dst, const Operand& src, int8_t imm8);
void Pinsrd(XMMRegister dst, const Operand& src, int8_t imm8,
bool is_64_bits = false);
void Lzcnt(Register dst, Register src) { Lzcnt(dst, Operand(src)); }
void Lzcnt(Register dst, const Operand& src);

12
src/ia32/sse-instr.h Normal file
View File

@ -0,0 +1,12 @@
// Copyright 2012 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_SSE_INSTR_H_
#define V8_SSE_INSTR_H_
#define SSE2_INSTRUCTION_LIST(V) \
V(paddd, 66, 0F, FE) \
V(psubd, 66, 0F, FA)
#endif // V8_SSE_INSTR_H_

View File

@ -1486,6 +1486,7 @@
'ia32/macro-assembler-ia32.h',
'ia32/simulator-ia32.cc',
'ia32/simulator-ia32.h',
'ia32/sse-instr.h',
'builtins/ia32/builtins-ia32.cc',
'compiler/ia32/code-generator-ia32.cc',
'compiler/ia32/instruction-codes-ia32.h',

View File

@ -468,6 +468,16 @@ TEST(DisasmIa320) {
__ punpckldq(xmm1, xmm6);
__ punpckhdq(xmm7, xmm5);
__ pinsrw(xmm5, edx, 5);
__ pinsrw(xmm5, Operand(edx, 4), 5);
#define EMIT_SSE2_INSTR(instruction, notUsed1, notUsed2, notUsed3) \
__ instruction(xmm5, xmm1); \
__ instruction(xmm5, Operand(edx, 4));
SSE2_INSTRUCTION_LIST(EMIT_SSE2_INSTR)
#undef EMIT_SSE2_INSTR
}
// cmov.
@ -538,6 +548,13 @@ TEST(DisasmIa320) {
__ vandpd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vxorpd(xmm0, xmm1, xmm2);
__ vxorpd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
#define EMIT_SSE2_AVXINSTR(instruction, notUsed1, notUsed2, notUsed3) \
__ v##instruction(xmm7, xmm5, xmm1); \
__ v##instruction(xmm7, xmm5, Operand(edx, 4));
SSE2_INSTRUCTION_LIST(EMIT_SSE2_AVXINSTR)
#undef EMIT_SSE2_AVXINSTR
}
}

View File

@ -31,11 +31,11 @@ typedef int8_t (*Int8BinOp)(int8_t, int8_t);
typedef int (*Int8CompareOp)(int8_t, int8_t);
typedef int8_t (*Int8ShiftOp)(int8_t, int);
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_X64
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
#define SIMD_LOWERING_TARGET 1
#else
#define SIMD_LOWERING_TARGET 0
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_X64
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
// Generic expected value functions.
template <typename T>