[wasm-simd] Merge extract lane ops into pinsr{b,w,d,q}
The only one that doesn't use a pinsr* is f32x4, which uses insertps, so that is kept as it is. Bug: v8:10933 Change-Id: I7442668812c674d4242949e13ef595978290bc8d Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2458787 Reviewed-by: Bill Budge <bbudge@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#70493}
This commit is contained in:
parent
d2ab873de9
commit
99e252bae9
@ -647,6 +647,26 @@ void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
#define ASSEMBLE_PINSR(ASM_INSTR) \
|
||||
do { \
|
||||
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); \
|
||||
XMMRegister dst = i.OutputSimd128Register(); \
|
||||
XMMRegister src = i.InputSimd128Register(0); \
|
||||
uint8_t laneidx = i.InputUint8(1); \
|
||||
if (HasAddressingMode(instr)) { \
|
||||
__ ASM_INSTR(dst, src, i.MemoryOperand(2), laneidx); \
|
||||
break; \
|
||||
} \
|
||||
if (instr->InputAt(2)->IsFPRegister()) { \
|
||||
__ Movq(kScratchRegister, i.InputDoubleRegister(2)); \
|
||||
__ ASM_INSTR(dst, src, kScratchRegister, laneidx); \
|
||||
} else if (instr->InputAt(2)->IsRegister()) { \
|
||||
__ ASM_INSTR(dst, src, i.InputRegister(2), laneidx); \
|
||||
} else { \
|
||||
__ ASM_INSTR(dst, src, i.InputOperand(2), laneidx); \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
void CodeGenerator::AssembleDeconstructFrame() {
|
||||
unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
|
||||
__ movq(rsp, rbp);
|
||||
@ -2354,16 +2374,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kX64F64x2ReplaceLane: {
|
||||
if (instr->InputAt(2)->IsFPRegister()) {
|
||||
__ Movq(kScratchRegister, i.InputDoubleRegister(2));
|
||||
__ Pinsrq(i.OutputSimd128Register(), kScratchRegister, i.InputUint8(1));
|
||||
} else {
|
||||
__ Pinsrq(i.OutputSimd128Register(), i.InputOperand(2),
|
||||
i.InputUint8(1));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kX64F64x2ExtractLane: {
|
||||
__ Pextrq(kScratchRegister, i.InputSimd128Register(0), i.InputInt8(1));
|
||||
__ Movq(i.OutputDoubleRegister(), kScratchRegister);
|
||||
@ -2718,16 +2728,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ Pextrq(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
|
||||
break;
|
||||
}
|
||||
case kX64I64x2ReplaceLane: {
|
||||
if (HasRegisterInput(instr, 2)) {
|
||||
__ Pinsrq(i.OutputSimd128Register(), i.InputRegister(2),
|
||||
i.InputUint8(1));
|
||||
} else {
|
||||
__ Pinsrq(i.OutputSimd128Register(), i.InputOperand(2),
|
||||
i.InputUint8(1));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kX64I64x2Neg: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
XMMRegister src = i.InputSimd128Register(0);
|
||||
@ -2826,16 +2826,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
|
||||
break;
|
||||
}
|
||||
case kX64I32x4ReplaceLane: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
XMMRegister src = i.InputSimd128Register(0);
|
||||
if (HasRegisterInput(instr, 2)) {
|
||||
__ Pinsrd(dst, src, i.InputRegister(2), i.InputInt8(1));
|
||||
} else {
|
||||
__ Pinsrd(dst, src, i.InputOperand(2), i.InputInt8(1));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kX64I32x4SConvertF32x4: {
|
||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
@ -3056,16 +3046,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ movsxwl(dst, dst);
|
||||
break;
|
||||
}
|
||||
case kX64I16x8ReplaceLane: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
XMMRegister src = i.InputSimd128Register(0);
|
||||
if (HasRegisterInput(instr, 2)) {
|
||||
__ Pinsrw(dst, src, i.InputRegister(2), i.InputInt8(1));
|
||||
} else {
|
||||
__ Pinsrw(dst, src, i.InputOperand(2), i.InputInt8(1));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kX64I16x8SConvertI8x16Low: {
|
||||
__ Pmovsxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
break;
|
||||
@ -3248,52 +3228,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ movsxbl(dst, dst);
|
||||
break;
|
||||
}
|
||||
case kX64I8x16ReplaceLane: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
XMMRegister src = i.InputSimd128Register(0);
|
||||
if (HasRegisterInput(instr, 2)) {
|
||||
__ Pinsrb(dst, src, i.InputRegister(2), i.InputInt8(1));
|
||||
} else {
|
||||
__ Pinsrb(dst, src, i.InputOperand(2), i.InputInt8(1));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kX64Pinsrb: {
|
||||
// TODO(zhin): consolidate this opcode with the other usages, like
|
||||
// ReplaceLane, by implementing support when this has no addressing mode.
|
||||
DCHECK(HasAddressingMode(instr));
|
||||
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
|
||||
size_t offset = 0;
|
||||
Operand mem = i.MemoryOperand(&offset);
|
||||
__ Pinsrb(i.OutputSimd128Register(), i.InputSimd128Register(offset + 1),
|
||||
mem, i.InputUint8(offset));
|
||||
ASSEMBLE_PINSR(Pinsrb);
|
||||
break;
|
||||
}
|
||||
case kX64Pinsrw: {
|
||||
DCHECK(HasAddressingMode(instr));
|
||||
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
|
||||
size_t offset = 0;
|
||||
Operand mem = i.MemoryOperand(&offset);
|
||||
__ Pinsrw(i.OutputSimd128Register(), i.InputSimd128Register(offset + 1),
|
||||
mem, i.InputUint8(offset));
|
||||
ASSEMBLE_PINSR(Pinsrw);
|
||||
break;
|
||||
}
|
||||
case kX64Pinsrd: {
|
||||
DCHECK(HasAddressingMode(instr));
|
||||
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
|
||||
size_t offset = 0;
|
||||
Operand mem = i.MemoryOperand(&offset);
|
||||
__ Pinsrd(i.OutputSimd128Register(), i.InputSimd128Register(offset + 1),
|
||||
mem, i.InputUint8(offset));
|
||||
ASSEMBLE_PINSR(Pinsrd);
|
||||
break;
|
||||
}
|
||||
case kX64Pinsrq: {
|
||||
DCHECK(HasAddressingMode(instr));
|
||||
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
|
||||
size_t offset = 0;
|
||||
Operand mem = i.MemoryOperand(&offset);
|
||||
__ Pinsrq(i.OutputSimd128Register(), i.InputSimd128Register(offset + 1),
|
||||
mem, i.InputUint8(offset));
|
||||
ASSEMBLE_PINSR(Pinsrq);
|
||||
break;
|
||||
}
|
||||
case kX64I8x16SConvertI16x8: {
|
||||
|
@ -156,7 +156,6 @@ namespace compiler {
|
||||
V(X64Peek) \
|
||||
V(X64F64x2Splat) \
|
||||
V(X64F64x2ExtractLane) \
|
||||
V(X64F64x2ReplaceLane) \
|
||||
V(X64F64x2Abs) \
|
||||
V(X64F64x2Neg) \
|
||||
V(X64F64x2Sqrt) \
|
||||
@ -203,7 +202,6 @@ namespace compiler {
|
||||
V(X64F32x4Round) \
|
||||
V(X64I64x2Splat) \
|
||||
V(X64I64x2ExtractLane) \
|
||||
V(X64I64x2ReplaceLane) \
|
||||
V(X64I64x2Neg) \
|
||||
V(X64I64x2BitMask) \
|
||||
V(X64I64x2Shl) \
|
||||
@ -215,7 +213,6 @@ namespace compiler {
|
||||
V(X64I64x2ShrU) \
|
||||
V(X64I32x4Splat) \
|
||||
V(X64I32x4ExtractLane) \
|
||||
V(X64I32x4ReplaceLane) \
|
||||
V(X64I32x4SConvertF32x4) \
|
||||
V(X64I32x4SConvertI16x8Low) \
|
||||
V(X64I32x4SConvertI16x8High) \
|
||||
@ -246,7 +243,6 @@ namespace compiler {
|
||||
V(X64I16x8Splat) \
|
||||
V(X64I16x8ExtractLaneU) \
|
||||
V(X64I16x8ExtractLaneS) \
|
||||
V(X64I16x8ReplaceLane) \
|
||||
V(X64I16x8SConvertI8x16Low) \
|
||||
V(X64I16x8SConvertI8x16High) \
|
||||
V(X64I16x8Neg) \
|
||||
@ -281,7 +277,6 @@ namespace compiler {
|
||||
V(X64I8x16Splat) \
|
||||
V(X64I8x16ExtractLaneU) \
|
||||
V(X64I8x16ExtractLaneS) \
|
||||
V(X64I8x16ReplaceLane) \
|
||||
V(X64Pinsrb) \
|
||||
V(X64Pinsrw) \
|
||||
V(X64Pinsrd) \
|
||||
|
@ -126,9 +126,12 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kX64Lea:
|
||||
case kX64Dec32:
|
||||
case kX64Inc32:
|
||||
case kX64Pinsrb:
|
||||
case kX64Pinsrw:
|
||||
case kX64Pinsrd:
|
||||
case kX64Pinsrq:
|
||||
case kX64F64x2Splat:
|
||||
case kX64F64x2ExtractLane:
|
||||
case kX64F64x2ReplaceLane:
|
||||
case kX64F64x2Abs:
|
||||
case kX64F64x2Neg:
|
||||
case kX64F64x2Sqrt:
|
||||
@ -175,7 +178,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kX64F32x4Round:
|
||||
case kX64I64x2Splat:
|
||||
case kX64I64x2ExtractLane:
|
||||
case kX64I64x2ReplaceLane:
|
||||
case kX64I64x2Neg:
|
||||
case kX64I64x2BitMask:
|
||||
case kX64I64x2Shl:
|
||||
@ -187,7 +189,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kX64I64x2ShrU:
|
||||
case kX64I32x4Splat:
|
||||
case kX64I32x4ExtractLane:
|
||||
case kX64I32x4ReplaceLane:
|
||||
case kX64I32x4SConvertF32x4:
|
||||
case kX64I32x4SConvertI16x8Low:
|
||||
case kX64I32x4SConvertI16x8High:
|
||||
@ -218,7 +219,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kX64I16x8Splat:
|
||||
case kX64I16x8ExtractLaneU:
|
||||
case kX64I16x8ExtractLaneS:
|
||||
case kX64I16x8ReplaceLane:
|
||||
case kX64I16x8SConvertI8x16Low:
|
||||
case kX64I16x8SConvertI8x16High:
|
||||
case kX64I16x8Neg:
|
||||
@ -253,11 +253,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kX64I8x16Splat:
|
||||
case kX64I8x16ExtractLaneU:
|
||||
case kX64I8x16ExtractLaneS:
|
||||
case kX64I8x16ReplaceLane:
|
||||
case kX64Pinsrb:
|
||||
case kX64Pinsrw:
|
||||
case kX64Pinsrd:
|
||||
case kX64Pinsrq:
|
||||
case kX64I8x16SConvertI16x8:
|
||||
case kX64I8x16Neg:
|
||||
case kX64I8x16Shl:
|
||||
|
@ -361,17 +361,19 @@ void InstructionSelector::VisitLoadLane(Node* node) {
|
||||
|
||||
X64OperandGenerator g(this);
|
||||
InstructionOperand outputs[] = {g.DefineAsRegister(node)};
|
||||
// GetEffectiveAddressMemoryOperand uses up to 3 inputs, 4th is laneidx, 5th
|
||||
// is the value node.
|
||||
// Input 0 is value node, 1 is lane idx, and GetEffectiveAddressMemoryOperand
|
||||
// uses up to 3 inputs. This ordering is consistent with other operations that
|
||||
// use the same opcode.
|
||||
InstructionOperand inputs[5];
|
||||
size_t input_count = 0;
|
||||
|
||||
inputs[input_count++] = g.UseRegister(node->InputAt(2));
|
||||
inputs[input_count++] = g.UseImmediate(params.laneidx);
|
||||
|
||||
AddressingMode mode =
|
||||
g.GetEffectiveAddressMemoryOperand(node, inputs, &input_count);
|
||||
opcode |= AddressingModeField::encode(mode);
|
||||
|
||||
inputs[input_count++] = g.UseImmediate(params.laneidx);
|
||||
inputs[input_count++] = g.UseRegister(node->InputAt(2));
|
||||
DCHECK_GE(5, input_count);
|
||||
|
||||
// x64 supports unaligned loads.
|
||||
@ -2963,15 +2965,31 @@ SIMD_VISIT_EXTRACT_LANE(I8x16, U)
|
||||
SIMD_VISIT_EXTRACT_LANE(I8x16, S)
|
||||
#undef SIMD_VISIT_EXTRACT_LANE
|
||||
|
||||
#define VISIT_SIMD_REPLACE_LANE(Type) \
|
||||
void InstructionSelector::Visit##Type##ReplaceLane(Node* node) { \
|
||||
X64OperandGenerator g(this); \
|
||||
int32_t lane = OpParameter<int32_t>(node->op()); \
|
||||
Emit(kX64##Type##ReplaceLane, g.DefineSameAsFirst(node), \
|
||||
g.UseRegister(node->InputAt(0)), g.UseImmediate(lane), \
|
||||
g.Use(node->InputAt(1))); \
|
||||
void InstructionSelector::VisitF32x4ReplaceLane(Node* node) {
|
||||
X64OperandGenerator g(this);
|
||||
int32_t lane = OpParameter<int32_t>(node->op());
|
||||
Emit(kX64F32x4ReplaceLane, g.DefineSameAsFirst(node),
|
||||
g.UseRegister(node->InputAt(0)), g.UseImmediate(lane),
|
||||
g.Use(node->InputAt(1)));
|
||||
}
|
||||
|
||||
#define VISIT_SIMD_REPLACE_LANE(TYPE, OPCODE) \
|
||||
void InstructionSelector::Visit##TYPE##ReplaceLane(Node* node) { \
|
||||
X64OperandGenerator g(this); \
|
||||
int32_t lane = OpParameter<int32_t>(node->op()); \
|
||||
Emit(OPCODE, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)), \
|
||||
g.UseImmediate(lane), g.Use(node->InputAt(1))); \
|
||||
}
|
||||
SIMD_TYPES(VISIT_SIMD_REPLACE_LANE)
|
||||
|
||||
#define SIMD_TYPES_FOR_REPLACE_LANE(V) \
|
||||
V(F64x2, kX64Pinsrq) \
|
||||
V(I64x2, kX64Pinsrq) \
|
||||
V(I32x4, kX64Pinsrd) \
|
||||
V(I16x8, kX64Pinsrw) \
|
||||
V(I8x16, kX64Pinsrb)
|
||||
|
||||
SIMD_TYPES_FOR_REPLACE_LANE(VISIT_SIMD_REPLACE_LANE)
|
||||
#undef SIMD_TYPES_FOR_REPLACE_LANE
|
||||
#undef VISIT_SIMD_REPLACE_LANE
|
||||
|
||||
#define VISIT_SIMD_SHIFT(Opcode) \
|
||||
|
Loading…
Reference in New Issue
Block a user