[wasm-simd][ia32] Merge extract/replace lane opcodes

Some extract lane and replace lane opcodes overlap with the ones used
for load lane, with a different addressing mode. Merge those cases
together and delete unused opcodes.

Drive by clean-up to rename kF32x4ReplaceLane to kIA32Insertps to follow
the naming convetion (kIA32 prefix) and also make it more general if in
the future we use insertps for other purposes.

Bug: v8:10975
Change-Id: Id143670f63e69cb45cf7c1ce358297a928383035
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2568924
Reviewed-by: Bill Budge <bbudge@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#71605}
This commit is contained in:
Zhi An Ng 2020-12-03 12:20:57 +00:00 committed by Commit Bot
parent f80dfe98fa
commit ced43bcd4b
4 changed files with 77 additions and 119 deletions

View File

@ -524,7 +524,14 @@ class OutOfLineRecordWrite final : public OutOfLineCode {
__ OPCODE(dst, i.MemoryOperand(2), laneidx); \
} \
} else { \
UNIMPLEMENTED(); \
if (CpuFeatures::IsSupported(AVX)) { \
CpuFeatureScope avx_scope(tasm(), AVX); \
__ v##OPCODE(dst, src, i.InputOperand(2), laneidx); \
} else { \
DCHECK_EQ(dst, src); \
CpuFeatureScope sse_scope(tasm(), CPU_FEATURE); \
__ OPCODE(dst, i.InputOperand(2), laneidx); \
} \
} \
} while (false)
@ -2186,17 +2193,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
break;
}
case kSSEF32x4ReplaceLane: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ insertps(i.OutputSimd128Register(), i.InputOperand(2),
i.InputInt8(1) << 4);
break;
}
case kAVXF32x4ReplaceLane: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vinsertps(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(2), i.InputInt8(1) << 4);
case kIA32Insertps: {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vinsertps(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(2), i.InputInt8(1) << 4);
} else {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ insertps(i.OutputSimd128Register(), i.InputOperand(2),
i.InputInt8(1) << 4);
}
break;
}
case kIA32F32x4SConvertI32x4: {
@ -2482,18 +2489,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
break;
}
case kSSEI32x4ReplaceLane: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ pinsrd(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
break;
}
case kAVXI32x4ReplaceLane: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpinsrd(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(2), i.InputInt8(1));
break;
}
case kSSEI32x4SConvertF32x4: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
XMMRegister dst = i.OutputSimd128Register();
@ -2837,28 +2832,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Pshufd(dst, dst, 0x0);
break;
}
case kIA32I16x8ExtractLaneU: {
Register dst = i.OutputRegister();
__ Pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
break;
}
case kIA32I16x8ExtractLaneS: {
Register dst = i.OutputRegister();
__ Pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
__ movsx_w(dst, dst);
break;
}
case kSSEI16x8ReplaceLane: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ pinsrw(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
break;
}
case kAVXI16x8ReplaceLane: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpinsrw(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(2), i.InputInt8(1));
break;
}
case kIA32I16x8SConvertI8x16Low: {
__ Pmovsxbw(i.OutputSimd128Register(), i.InputOperand(0));
break;
@ -3179,41 +3158,21 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Pshufb(dst, kScratchDoubleReg);
break;
}
case kIA32I8x16ExtractLaneU: {
Register dst = i.OutputRegister();
__ Pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
break;
}
case kIA32I8x16ExtractLaneS: {
Register dst = i.OutputRegister();
__ Pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
__ movsx_b(dst, dst);
break;
}
case kSSEI8x16ReplaceLane: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ pinsrb(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
break;
}
case kAVXI8x16ReplaceLane: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpinsrb(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(2), i.InputInt8(1));
break;
}
case kIA32Pinsrb: {
// TODO(zhin): Move i8x16 replace lane into this opcode.
ASSEMBLE_SIMD_PINSR(pinsrb, SSE4_1);
break;
}
case kIA32Pinsrw: {
// TODO(zhin): Move i16x8 replace lane into this opcode.
ASSEMBLE_SIMD_PINSR(pinsrw, SSE4_1);
break;
}
case kIA32Pinsrd: {
// TODO(zhin): Move i32x4 replace lane into this opcode.
ASSEMBLE_SIMD_PINSR(pinsrd, SSE4_1);
break;
}
@ -3240,21 +3199,27 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kIA32Pextrb: {
// TODO(zhin): Move i8x16 extract lane u into this opcode.
DCHECK(HasAddressingMode(instr));
size_t index = 0;
Operand operand = i.MemoryOperand(&index);
__ Pextrb(operand, i.InputSimd128Register(index),
i.InputUint8(index + 1));
if (HasAddressingMode(instr)) {
size_t index = 0;
Operand operand = i.MemoryOperand(&index);
__ Pextrb(operand, i.InputSimd128Register(index),
i.InputUint8(index + 1));
} else {
Register dst = i.OutputRegister();
__ Pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
}
break;
}
case kIA32Pextrw: {
// TODO(zhin): Move i16x8 extract lane u into this opcode.
DCHECK(HasAddressingMode(instr));
size_t index = 0;
Operand operand = i.MemoryOperand(&index);
__ Pextrw(operand, i.InputSimd128Register(index),
i.InputUint8(index + 1));
if (HasAddressingMode(instr)) {
size_t index = 0;
Operand operand = i.MemoryOperand(&index);
__ Pextrw(operand, i.InputSimd128Register(index),
i.InputUint8(index + 1));
} else {
Register dst = i.OutputRegister();
__ Pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
}
break;
}
case kIA32S128Store32Lane: {

View File

@ -154,8 +154,7 @@ namespace compiler {
V(AVXF32x4Splat) \
V(SSEF32x4ExtractLane) \
V(AVXF32x4ExtractLane) \
V(SSEF32x4ReplaceLane) \
V(AVXF32x4ReplaceLane) \
V(IA32Insertps) \
V(IA32F32x4SConvertI32x4) \
V(SSEF32x4UConvertI32x4) \
V(AVXF32x4UConvertI32x4) \
@ -194,8 +193,6 @@ namespace compiler {
V(IA32F32x4Round) \
V(IA32I32x4Splat) \
V(IA32I32x4ExtractLane) \
V(SSEI32x4ReplaceLane) \
V(AVXI32x4ReplaceLane) \
V(SSEI32x4SConvertF32x4) \
V(AVXI32x4SConvertF32x4) \
V(IA32I32x4SConvertI16x8Low) \
@ -240,10 +237,7 @@ namespace compiler {
V(IA32I32x4BitMask) \
V(IA32I32x4DotI16x8S) \
V(IA32I16x8Splat) \
V(IA32I16x8ExtractLaneU) \
V(IA32I16x8ExtractLaneS) \
V(SSEI16x8ReplaceLane) \
V(AVXI16x8ReplaceLane) \
V(IA32I16x8SConvertI8x16Low) \
V(IA32I16x8SConvertI8x16High) \
V(IA32I16x8Neg) \
@ -296,10 +290,7 @@ namespace compiler {
V(IA32I16x8Abs) \
V(IA32I16x8BitMask) \
V(IA32I8x16Splat) \
V(IA32I8x16ExtractLaneU) \
V(IA32I8x16ExtractLaneS) \
V(SSEI8x16ReplaceLane) \
V(AVXI8x16ReplaceLane) \
V(IA32Pinsrb) \
V(IA32Pinsrw) \
V(IA32Pinsrd) \

View File

@ -133,8 +133,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXF32x4Splat:
case kSSEF32x4ExtractLane:
case kAVXF32x4ExtractLane:
case kSSEF32x4ReplaceLane:
case kAVXF32x4ReplaceLane:
case kIA32Insertps:
case kIA32F32x4SConvertI32x4:
case kSSEF32x4UConvertI32x4:
case kAVXF32x4UConvertI32x4:
@ -173,8 +172,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32F32x4Round:
case kIA32I32x4Splat:
case kIA32I32x4ExtractLane:
case kSSEI32x4ReplaceLane:
case kAVXI32x4ReplaceLane:
case kSSEI32x4SConvertF32x4:
case kAVXI32x4SConvertF32x4:
case kIA32I32x4SConvertI16x8Low:
@ -219,10 +216,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32I32x4BitMask:
case kIA32I32x4DotI16x8S:
case kIA32I16x8Splat:
case kIA32I16x8ExtractLaneU:
case kIA32I16x8ExtractLaneS:
case kSSEI16x8ReplaceLane:
case kAVXI16x8ReplaceLane:
case kIA32I16x8SConvertI8x16Low:
case kIA32I16x8SConvertI8x16High:
case kIA32I16x8Neg:
@ -275,10 +269,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32I16x8Abs:
case kIA32I16x8BitMask:
case kIA32I8x16Splat:
case kIA32I8x16ExtractLaneU:
case kIA32I8x16ExtractLaneS:
case kSSEI8x16ReplaceLane:
case kAVXI8x16ReplaceLane:
case kIA32Pinsrb:
case kIA32Pinsrw:
case kIA32Pinsrd:

View File

@ -2454,38 +2454,49 @@ void InstructionSelector::VisitS128AndNot(Node* node) {
} \
}
SIMD_INT_TYPES(VISIT_SIMD_SPLAT)
#undef SIMD_INT_TYPES
#undef VISIT_SIMD_SPLAT
#define SIMD_VISIT_EXTRACT_LANE(Type, Sign) \
void InstructionSelector::Visit##Type##ExtractLane##Sign(Node* node) { \
VisitRRISimd(this, node, kIA32##Type##ExtractLane##Sign); \
}
SIMD_VISIT_EXTRACT_LANE(I32x4, )
SIMD_VISIT_EXTRACT_LANE(I16x8, U)
SIMD_VISIT_EXTRACT_LANE(I16x8, S)
SIMD_VISIT_EXTRACT_LANE(I8x16, U)
SIMD_VISIT_EXTRACT_LANE(I8x16, S)
#undef SIMD_VISIT_EXTRACT_LANE
void InstructionSelector::VisitI8x16ExtractLaneU(Node* node) {
VisitRRISimd(this, node, kIA32Pextrb);
}
#define VISIT_SIMD_REPLACE_LANE(Type) \
void InstructionSelector::Visit##Type##ReplaceLane(Node* node) { \
IA32OperandGenerator g(this); \
InstructionOperand operand0 = g.UseRegister(node->InputAt(0)); \
InstructionOperand operand1 = \
g.UseImmediate(OpParameter<int32_t>(node->op())); \
InstructionOperand operand2 = g.Use(node->InputAt(1)); \
if (IsSupported(AVX)) { \
Emit(kAVX##Type##ReplaceLane, g.DefineAsRegister(node), operand0, \
operand1, operand2); \
} else { \
Emit(kSSE##Type##ReplaceLane, g.DefineSameAsFirst(node), operand0, \
operand1, operand2); \
} \
void InstructionSelector::VisitI8x16ExtractLaneS(Node* node) {
VisitRRISimd(this, node, kIA32I8x16ExtractLaneS);
}
void InstructionSelector::VisitI16x8ExtractLaneU(Node* node) {
VisitRRISimd(this, node, kIA32Pextrw);
}
void InstructionSelector::VisitI16x8ExtractLaneS(Node* node) {
VisitRRISimd(this, node, kIA32I16x8ExtractLaneS);
}
void InstructionSelector::VisitI32x4ExtractLane(Node* node) {
VisitRRISimd(this, node, kIA32I32x4ExtractLane);
}
#define SIMD_REPLACE_LANE_TYPE_OP(V) \
V(I32x4, kIA32Pinsrd) \
V(I16x8, kIA32Pinsrw) \
V(I8x16, kIA32Pinsrb) \
V(F32x4, kIA32Insertps)
#define VISIT_SIMD_REPLACE_LANE(TYPE, OPCODE) \
void InstructionSelector::Visit##TYPE##ReplaceLane(Node* node) { \
IA32OperandGenerator g(this); \
InstructionOperand operand0 = g.UseRegister(node->InputAt(0)); \
InstructionOperand operand1 = \
g.UseImmediate(OpParameter<int32_t>(node->op())); \
InstructionOperand operand2 = g.Use(node->InputAt(1)); \
InstructionOperand dst = IsSupported(AVX) ? g.DefineAsRegister(node) \
: g.DefineSameAsFirst(node); \
Emit(OPCODE, dst, operand0, operand1, operand2); \
}
SIMD_INT_TYPES(VISIT_SIMD_REPLACE_LANE)
VISIT_SIMD_REPLACE_LANE(F32x4)
SIMD_REPLACE_LANE_TYPE_OP(VISIT_SIMD_REPLACE_LANE)
#undef VISIT_SIMD_REPLACE_LANE
#undef SIMD_INT_TYPES
#undef SIMD_REPLACE_LANE_TYPE_OP
// The difference between this and VISIT_SIMD_REPLACE_LANE is that this forces
// operand2 to be UseRegister, because the codegen relies on insertps using