[wasm-simd][ia32] Merge extract/replace lane opcodes
Some extract lane and replace lane opcodes overlap with the ones used for load lane, with a different addressing mode. Merge those cases together and delete unused opcodes. Drive by clean-up to rename kF32x4ReplaceLane to kIA32Insertps to follow the naming convetion (kIA32 prefix) and also make it more general if in the future we use insertps for other purposes. Bug: v8:10975 Change-Id: Id143670f63e69cb45cf7c1ce358297a928383035 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2568924 Reviewed-by: Bill Budge <bbudge@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#71605}
This commit is contained in:
parent
f80dfe98fa
commit
ced43bcd4b
@ -524,7 +524,14 @@ class OutOfLineRecordWrite final : public OutOfLineCode {
|
||||
__ OPCODE(dst, i.MemoryOperand(2), laneidx); \
|
||||
} \
|
||||
} else { \
|
||||
UNIMPLEMENTED(); \
|
||||
if (CpuFeatures::IsSupported(AVX)) { \
|
||||
CpuFeatureScope avx_scope(tasm(), AVX); \
|
||||
__ v##OPCODE(dst, src, i.InputOperand(2), laneidx); \
|
||||
} else { \
|
||||
DCHECK_EQ(dst, src); \
|
||||
CpuFeatureScope sse_scope(tasm(), CPU_FEATURE); \
|
||||
__ OPCODE(dst, i.InputOperand(2), laneidx); \
|
||||
} \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
@ -2186,17 +2193,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kSSEF32x4ReplaceLane: {
|
||||
case kIA32Insertps: {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
||||
__ vinsertps(i.OutputSimd128Register(), i.InputSimd128Register(0),
|
||||
i.InputOperand(2), i.InputInt8(1) << 4);
|
||||
} else {
|
||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
||||
__ insertps(i.OutputSimd128Register(), i.InputOperand(2),
|
||||
i.InputInt8(1) << 4);
|
||||
break;
|
||||
}
|
||||
case kAVXF32x4ReplaceLane: {
|
||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
||||
__ vinsertps(i.OutputSimd128Register(), i.InputSimd128Register(0),
|
||||
i.InputOperand(2), i.InputInt8(1) << 4);
|
||||
break;
|
||||
}
|
||||
case kIA32F32x4SConvertI32x4: {
|
||||
@ -2482,18 +2489,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
|
||||
break;
|
||||
}
|
||||
case kSSEI32x4ReplaceLane: {
|
||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
||||
__ pinsrd(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
|
||||
break;
|
||||
}
|
||||
case kAVXI32x4ReplaceLane: {
|
||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
||||
__ vpinsrd(i.OutputSimd128Register(), i.InputSimd128Register(0),
|
||||
i.InputOperand(2), i.InputInt8(1));
|
||||
break;
|
||||
}
|
||||
case kSSEI32x4SConvertF32x4: {
|
||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
@ -2837,28 +2832,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ Pshufd(dst, dst, 0x0);
|
||||
break;
|
||||
}
|
||||
case kIA32I16x8ExtractLaneU: {
|
||||
Register dst = i.OutputRegister();
|
||||
__ Pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
|
||||
break;
|
||||
}
|
||||
case kIA32I16x8ExtractLaneS: {
|
||||
Register dst = i.OutputRegister();
|
||||
__ Pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
|
||||
__ movsx_w(dst, dst);
|
||||
break;
|
||||
}
|
||||
case kSSEI16x8ReplaceLane: {
|
||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
__ pinsrw(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
|
||||
break;
|
||||
}
|
||||
case kAVXI16x8ReplaceLane: {
|
||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
||||
__ vpinsrw(i.OutputSimd128Register(), i.InputSimd128Register(0),
|
||||
i.InputOperand(2), i.InputInt8(1));
|
||||
break;
|
||||
}
|
||||
case kIA32I16x8SConvertI8x16Low: {
|
||||
__ Pmovsxbw(i.OutputSimd128Register(), i.InputOperand(0));
|
||||
break;
|
||||
@ -3179,41 +3158,21 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ Pshufb(dst, kScratchDoubleReg);
|
||||
break;
|
||||
}
|
||||
case kIA32I8x16ExtractLaneU: {
|
||||
Register dst = i.OutputRegister();
|
||||
__ Pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
|
||||
break;
|
||||
}
|
||||
case kIA32I8x16ExtractLaneS: {
|
||||
Register dst = i.OutputRegister();
|
||||
__ Pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
|
||||
__ movsx_b(dst, dst);
|
||||
break;
|
||||
}
|
||||
case kSSEI8x16ReplaceLane: {
|
||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
||||
__ pinsrb(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
|
||||
break;
|
||||
}
|
||||
case kAVXI8x16ReplaceLane: {
|
||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
||||
__ vpinsrb(i.OutputSimd128Register(), i.InputSimd128Register(0),
|
||||
i.InputOperand(2), i.InputInt8(1));
|
||||
break;
|
||||
}
|
||||
case kIA32Pinsrb: {
|
||||
// TODO(zhin): Move i8x16 replace lane into this opcode.
|
||||
ASSEMBLE_SIMD_PINSR(pinsrb, SSE4_1);
|
||||
break;
|
||||
}
|
||||
case kIA32Pinsrw: {
|
||||
// TODO(zhin): Move i16x8 replace lane into this opcode.
|
||||
ASSEMBLE_SIMD_PINSR(pinsrw, SSE4_1);
|
||||
break;
|
||||
}
|
||||
case kIA32Pinsrd: {
|
||||
// TODO(zhin): Move i32x4 replace lane into this opcode.
|
||||
ASSEMBLE_SIMD_PINSR(pinsrd, SSE4_1);
|
||||
break;
|
||||
}
|
||||
@ -3240,21 +3199,27 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
break;
|
||||
}
|
||||
case kIA32Pextrb: {
|
||||
// TODO(zhin): Move i8x16 extract lane u into this opcode.
|
||||
DCHECK(HasAddressingMode(instr));
|
||||
if (HasAddressingMode(instr)) {
|
||||
size_t index = 0;
|
||||
Operand operand = i.MemoryOperand(&index);
|
||||
__ Pextrb(operand, i.InputSimd128Register(index),
|
||||
i.InputUint8(index + 1));
|
||||
} else {
|
||||
Register dst = i.OutputRegister();
|
||||
__ Pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kIA32Pextrw: {
|
||||
// TODO(zhin): Move i16x8 extract lane u into this opcode.
|
||||
DCHECK(HasAddressingMode(instr));
|
||||
if (HasAddressingMode(instr)) {
|
||||
size_t index = 0;
|
||||
Operand operand = i.MemoryOperand(&index);
|
||||
__ Pextrw(operand, i.InputSimd128Register(index),
|
||||
i.InputUint8(index + 1));
|
||||
} else {
|
||||
Register dst = i.OutputRegister();
|
||||
__ Pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kIA32S128Store32Lane: {
|
||||
|
@ -154,8 +154,7 @@ namespace compiler {
|
||||
V(AVXF32x4Splat) \
|
||||
V(SSEF32x4ExtractLane) \
|
||||
V(AVXF32x4ExtractLane) \
|
||||
V(SSEF32x4ReplaceLane) \
|
||||
V(AVXF32x4ReplaceLane) \
|
||||
V(IA32Insertps) \
|
||||
V(IA32F32x4SConvertI32x4) \
|
||||
V(SSEF32x4UConvertI32x4) \
|
||||
V(AVXF32x4UConvertI32x4) \
|
||||
@ -194,8 +193,6 @@ namespace compiler {
|
||||
V(IA32F32x4Round) \
|
||||
V(IA32I32x4Splat) \
|
||||
V(IA32I32x4ExtractLane) \
|
||||
V(SSEI32x4ReplaceLane) \
|
||||
V(AVXI32x4ReplaceLane) \
|
||||
V(SSEI32x4SConvertF32x4) \
|
||||
V(AVXI32x4SConvertF32x4) \
|
||||
V(IA32I32x4SConvertI16x8Low) \
|
||||
@ -240,10 +237,7 @@ namespace compiler {
|
||||
V(IA32I32x4BitMask) \
|
||||
V(IA32I32x4DotI16x8S) \
|
||||
V(IA32I16x8Splat) \
|
||||
V(IA32I16x8ExtractLaneU) \
|
||||
V(IA32I16x8ExtractLaneS) \
|
||||
V(SSEI16x8ReplaceLane) \
|
||||
V(AVXI16x8ReplaceLane) \
|
||||
V(IA32I16x8SConvertI8x16Low) \
|
||||
V(IA32I16x8SConvertI8x16High) \
|
||||
V(IA32I16x8Neg) \
|
||||
@ -296,10 +290,7 @@ namespace compiler {
|
||||
V(IA32I16x8Abs) \
|
||||
V(IA32I16x8BitMask) \
|
||||
V(IA32I8x16Splat) \
|
||||
V(IA32I8x16ExtractLaneU) \
|
||||
V(IA32I8x16ExtractLaneS) \
|
||||
V(SSEI8x16ReplaceLane) \
|
||||
V(AVXI8x16ReplaceLane) \
|
||||
V(IA32Pinsrb) \
|
||||
V(IA32Pinsrw) \
|
||||
V(IA32Pinsrd) \
|
||||
|
@ -133,8 +133,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kAVXF32x4Splat:
|
||||
case kSSEF32x4ExtractLane:
|
||||
case kAVXF32x4ExtractLane:
|
||||
case kSSEF32x4ReplaceLane:
|
||||
case kAVXF32x4ReplaceLane:
|
||||
case kIA32Insertps:
|
||||
case kIA32F32x4SConvertI32x4:
|
||||
case kSSEF32x4UConvertI32x4:
|
||||
case kAVXF32x4UConvertI32x4:
|
||||
@ -173,8 +172,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kIA32F32x4Round:
|
||||
case kIA32I32x4Splat:
|
||||
case kIA32I32x4ExtractLane:
|
||||
case kSSEI32x4ReplaceLane:
|
||||
case kAVXI32x4ReplaceLane:
|
||||
case kSSEI32x4SConvertF32x4:
|
||||
case kAVXI32x4SConvertF32x4:
|
||||
case kIA32I32x4SConvertI16x8Low:
|
||||
@ -219,10 +216,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kIA32I32x4BitMask:
|
||||
case kIA32I32x4DotI16x8S:
|
||||
case kIA32I16x8Splat:
|
||||
case kIA32I16x8ExtractLaneU:
|
||||
case kIA32I16x8ExtractLaneS:
|
||||
case kSSEI16x8ReplaceLane:
|
||||
case kAVXI16x8ReplaceLane:
|
||||
case kIA32I16x8SConvertI8x16Low:
|
||||
case kIA32I16x8SConvertI8x16High:
|
||||
case kIA32I16x8Neg:
|
||||
@ -275,10 +269,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kIA32I16x8Abs:
|
||||
case kIA32I16x8BitMask:
|
||||
case kIA32I8x16Splat:
|
||||
case kIA32I8x16ExtractLaneU:
|
||||
case kIA32I8x16ExtractLaneS:
|
||||
case kSSEI8x16ReplaceLane:
|
||||
case kAVXI8x16ReplaceLane:
|
||||
case kIA32Pinsrb:
|
||||
case kIA32Pinsrw:
|
||||
case kIA32Pinsrd:
|
||||
|
@ -2454,38 +2454,49 @@ void InstructionSelector::VisitS128AndNot(Node* node) {
|
||||
} \
|
||||
}
|
||||
SIMD_INT_TYPES(VISIT_SIMD_SPLAT)
|
||||
#undef SIMD_INT_TYPES
|
||||
#undef VISIT_SIMD_SPLAT
|
||||
|
||||
#define SIMD_VISIT_EXTRACT_LANE(Type, Sign) \
|
||||
void InstructionSelector::Visit##Type##ExtractLane##Sign(Node* node) { \
|
||||
VisitRRISimd(this, node, kIA32##Type##ExtractLane##Sign); \
|
||||
void InstructionSelector::VisitI8x16ExtractLaneU(Node* node) {
|
||||
VisitRRISimd(this, node, kIA32Pextrb);
|
||||
}
|
||||
SIMD_VISIT_EXTRACT_LANE(I32x4, )
|
||||
SIMD_VISIT_EXTRACT_LANE(I16x8, U)
|
||||
SIMD_VISIT_EXTRACT_LANE(I16x8, S)
|
||||
SIMD_VISIT_EXTRACT_LANE(I8x16, U)
|
||||
SIMD_VISIT_EXTRACT_LANE(I8x16, S)
|
||||
#undef SIMD_VISIT_EXTRACT_LANE
|
||||
|
||||
#define VISIT_SIMD_REPLACE_LANE(Type) \
|
||||
void InstructionSelector::Visit##Type##ReplaceLane(Node* node) { \
|
||||
void InstructionSelector::VisitI8x16ExtractLaneS(Node* node) {
|
||||
VisitRRISimd(this, node, kIA32I8x16ExtractLaneS);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI16x8ExtractLaneU(Node* node) {
|
||||
VisitRRISimd(this, node, kIA32Pextrw);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI16x8ExtractLaneS(Node* node) {
|
||||
VisitRRISimd(this, node, kIA32I16x8ExtractLaneS);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI32x4ExtractLane(Node* node) {
|
||||
VisitRRISimd(this, node, kIA32I32x4ExtractLane);
|
||||
}
|
||||
|
||||
#define SIMD_REPLACE_LANE_TYPE_OP(V) \
|
||||
V(I32x4, kIA32Pinsrd) \
|
||||
V(I16x8, kIA32Pinsrw) \
|
||||
V(I8x16, kIA32Pinsrb) \
|
||||
V(F32x4, kIA32Insertps)
|
||||
|
||||
#define VISIT_SIMD_REPLACE_LANE(TYPE, OPCODE) \
|
||||
void InstructionSelector::Visit##TYPE##ReplaceLane(Node* node) { \
|
||||
IA32OperandGenerator g(this); \
|
||||
InstructionOperand operand0 = g.UseRegister(node->InputAt(0)); \
|
||||
InstructionOperand operand1 = \
|
||||
g.UseImmediate(OpParameter<int32_t>(node->op())); \
|
||||
InstructionOperand operand2 = g.Use(node->InputAt(1)); \
|
||||
if (IsSupported(AVX)) { \
|
||||
Emit(kAVX##Type##ReplaceLane, g.DefineAsRegister(node), operand0, \
|
||||
operand1, operand2); \
|
||||
} else { \
|
||||
Emit(kSSE##Type##ReplaceLane, g.DefineSameAsFirst(node), operand0, \
|
||||
operand1, operand2); \
|
||||
} \
|
||||
InstructionOperand dst = IsSupported(AVX) ? g.DefineAsRegister(node) \
|
||||
: g.DefineSameAsFirst(node); \
|
||||
Emit(OPCODE, dst, operand0, operand1, operand2); \
|
||||
}
|
||||
SIMD_INT_TYPES(VISIT_SIMD_REPLACE_LANE)
|
||||
VISIT_SIMD_REPLACE_LANE(F32x4)
|
||||
SIMD_REPLACE_LANE_TYPE_OP(VISIT_SIMD_REPLACE_LANE)
|
||||
#undef VISIT_SIMD_REPLACE_LANE
|
||||
#undef SIMD_INT_TYPES
|
||||
#undef SIMD_REPLACE_LANE_TYPE_OP
|
||||
|
||||
// The difference between this and VISIT_SIMD_REPLACE_LANE is that this forces
|
||||
// operand2 to be UseRegister, because the codegen relies on insertps using
|
||||
|
Loading…
Reference in New Issue
Block a user