[wasm-simd] Improve i8x16 shift ins-sel and temps usage
We no longer require dst == src (output = input[0]) in all cases, only when AVX is not supported. This can help remove an extra move when AVX is supported. Also in many cases (when input[0] is an immediate), we require less temporary registers. Bug: v8:11589 Change-Id: I0d272df12de54f55b4c7a0a330c38ccaca82e927 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3092553 Reviewed-by: Clemens Backes <clemensb@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#76286}
This commit is contained in:
parent
c4e4868e03
commit
ebdc98824f
@ -3111,30 +3111,29 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
}
|
||||
case kIA32I8x16Shl: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
// TODO(zhin): remove this restriction from instruction-selector.
|
||||
DCHECK_EQ(dst, i.InputSimd128Register(0));
|
||||
XMMRegister src = i.InputSimd128Register(0);
|
||||
DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
|
||||
Register tmp = i.TempRegister(0);
|
||||
XMMRegister tmp_simd = i.TempSimd128Register(1);
|
||||
|
||||
if (HasImmediateInput(instr, 1)) {
|
||||
__ I8x16Shl(dst, i.InputSimd128Register(0), i.InputInt3(1), tmp,
|
||||
kScratchDoubleReg);
|
||||
__ I8x16Shl(dst, src, i.InputInt3(1), tmp, kScratchDoubleReg);
|
||||
} else {
|
||||
__ I8x16Shl(dst, i.InputSimd128Register(0), i.InputRegister(1), tmp,
|
||||
kScratchDoubleReg, tmp_simd);
|
||||
XMMRegister tmp_simd = i.TempSimd128Register(1);
|
||||
__ I8x16Shl(dst, src, i.InputRegister(1), tmp, kScratchDoubleReg,
|
||||
tmp_simd);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kIA32I8x16ShrS: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
// TODO(zhin): remove this restriction from instruction-selector.
|
||||
DCHECK_EQ(dst, i.InputSimd128Register(0));
|
||||
XMMRegister src = i.InputSimd128Register(0);
|
||||
DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
|
||||
|
||||
if (HasImmediateInput(instr, 1)) {
|
||||
__ I8x16ShrS(dst, i.InputSimd128Register(0), i.InputInt3(1),
|
||||
kScratchDoubleReg);
|
||||
__ I8x16ShrS(dst, src, i.InputInt3(1), kScratchDoubleReg);
|
||||
} else {
|
||||
__ I8x16ShrS(dst, i.InputSimd128Register(0), i.InputRegister(1),
|
||||
i.TempRegister(0), kScratchDoubleReg,
|
||||
i.TempSimd128Register(1));
|
||||
__ I8x16ShrS(dst, src, i.InputRegister(1), i.TempRegister(0),
|
||||
kScratchDoubleReg, i.TempSimd128Register(1));
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -3237,16 +3236,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
}
|
||||
case kIA32I8x16ShrU: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
// TODO(zhin): remove this restriction from instruction-selector.
|
||||
DCHECK_EQ(dst, i.InputSimd128Register(0));
|
||||
Register tmp = i.ToRegister(instr->TempAt(0));
|
||||
XMMRegister src = i.InputSimd128Register(0);
|
||||
DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
|
||||
Register tmp = i.TempRegister(0);
|
||||
|
||||
if (HasImmediateInput(instr, 1)) {
|
||||
__ I8x16ShrU(dst, i.InputSimd128Register(0), i.InputInt3(1), tmp,
|
||||
kScratchDoubleReg);
|
||||
__ I8x16ShrU(dst, src, i.InputInt3(1), tmp, kScratchDoubleReg);
|
||||
} else {
|
||||
__ I8x16ShrU(dst, i.InputSimd128Register(0), i.InputRegister(1), tmp,
|
||||
kScratchDoubleReg, i.TempSimd128Register(1));
|
||||
__ I8x16ShrU(dst, src, i.InputRegister(1), tmp, kScratchDoubleReg,
|
||||
i.TempSimd128Register(1));
|
||||
}
|
||||
|
||||
break;
|
||||
|
@ -389,14 +389,28 @@ void VisitRROSimdShift(InstructionSelector* selector, Node* node,
|
||||
}
|
||||
}
|
||||
|
||||
void VisitRROI8x16SimdShift(InstructionSelector* selector, Node* node,
|
||||
ArchOpcode opcode) {
|
||||
void VisitI8x16Shift(InstructionSelector* selector, Node* node,
|
||||
ArchOpcode opcode) {
|
||||
IA32OperandGenerator g(selector);
|
||||
InstructionOperand operand0 = g.UseUniqueRegister(node->InputAt(0));
|
||||
InstructionOperand operand1 = g.UseUniqueRegister(node->InputAt(1));
|
||||
InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()};
|
||||
selector->Emit(opcode, g.DefineSameAsFirst(node), operand0, operand1,
|
||||
arraysize(temps), temps);
|
||||
InstructionOperand output = CpuFeatures::IsSupported(AVX)
|
||||
? g.UseRegister(node)
|
||||
: g.DefineSameAsFirst(node);
|
||||
|
||||
if (g.CanBeImmediate(node->InputAt(1))) {
|
||||
if (opcode == kIA32I8x16ShrS) {
|
||||
selector->Emit(opcode, output, g.UseRegister(node->InputAt(0)),
|
||||
g.UseImmediate(node->InputAt(1)));
|
||||
} else {
|
||||
InstructionOperand temps[] = {g.TempRegister()};
|
||||
selector->Emit(opcode, output, g.UseRegister(node->InputAt(0)),
|
||||
g.UseImmediate(node->InputAt(1)), arraysize(temps), temps);
|
||||
}
|
||||
} else {
|
||||
InstructionOperand operand0 = g.UseUniqueRegister(node->InputAt(0));
|
||||
InstructionOperand operand1 = g.UseUniqueRegister(node->InputAt(1));
|
||||
InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()};
|
||||
selector->Emit(opcode, output, operand0, operand1, arraysize(temps), temps);
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
@ -2651,38 +2665,15 @@ void InstructionSelector::VisitI8x16UConvertI16x8(Node* node) {
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI8x16Shl(Node* node) {
|
||||
IA32OperandGenerator g(this);
|
||||
if (g.CanBeImmediate(node->InputAt(1))) {
|
||||
InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()};
|
||||
this->Emit(kIA32I8x16Shl, g.DefineSameAsFirst(node),
|
||||
g.UseRegister(node->InputAt(0)),
|
||||
g.UseImmediate(node->InputAt(1)), arraysize(temps), temps);
|
||||
} else {
|
||||
VisitRROI8x16SimdShift(this, node, kIA32I8x16Shl);
|
||||
}
|
||||
VisitI8x16Shift(this, node, kIA32I8x16Shl);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI8x16ShrS(Node* node) {
|
||||
IA32OperandGenerator g(this);
|
||||
if (g.CanBeImmediate(node->InputAt(1))) {
|
||||
this->Emit(kIA32I8x16ShrS, g.DefineSameAsFirst(node),
|
||||
g.UseRegister(node->InputAt(0)),
|
||||
g.UseImmediate(node->InputAt(1)));
|
||||
} else {
|
||||
VisitRROI8x16SimdShift(this, node, kIA32I8x16ShrS);
|
||||
}
|
||||
VisitI8x16Shift(this, node, kIA32I8x16ShrS);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI8x16ShrU(Node* node) {
|
||||
IA32OperandGenerator g(this);
|
||||
if (g.CanBeImmediate(node->InputAt(1))) {
|
||||
InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()};
|
||||
this->Emit(kIA32I8x16ShrU, g.DefineSameAsFirst(node),
|
||||
g.UseRegister(node->InputAt(0)),
|
||||
g.UseImmediate(node->InputAt(1)), arraysize(temps), temps);
|
||||
} else {
|
||||
VisitRROI8x16SimdShift(this, node, kIA32I8x16ShrU);
|
||||
}
|
||||
VisitI8x16Shift(this, node, kIA32I8x16ShrU);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitInt32AbsWithOverflow(Node* node) {
|
||||
|
@ -3492,32 +3492,26 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
}
|
||||
case kX64I8x16Shl: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
// TODO(zhin): remove this restriction from instruction-selector.
|
||||
DCHECK_EQ(dst, i.InputSimd128Register(0));
|
||||
// Temp registers for shift mask and additional moves to XMM registers.
|
||||
Register tmp = i.TempRegister(0);
|
||||
XMMRegister tmp_simd = i.TempSimd128Register(1);
|
||||
XMMRegister src = i.InputSimd128Register(0);
|
||||
DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
|
||||
if (HasImmediateInput(instr, 1)) {
|
||||
__ I8x16Shl(dst, i.InputSimd128Register(0), i.InputInt3(1), tmp,
|
||||
__ I8x16Shl(dst, src, i.InputInt3(1), kScratchRegister,
|
||||
kScratchDoubleReg);
|
||||
} else {
|
||||
__ I8x16Shl(dst, i.InputSimd128Register(0), i.InputRegister(1), tmp,
|
||||
kScratchDoubleReg, tmp_simd);
|
||||
__ I8x16Shl(dst, src, i.InputRegister(1), kScratchRegister,
|
||||
kScratchDoubleReg, i.TempSimd128Register(0));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kX64I8x16ShrS: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
// TODO(zhin): remove this restriction from instruction-selector.
|
||||
DCHECK_EQ(dst, i.InputSimd128Register(0));
|
||||
XMMRegister src = i.InputSimd128Register(0);
|
||||
DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
|
||||
if (HasImmediateInput(instr, 1)) {
|
||||
__ I8x16ShrS(dst, i.InputSimd128Register(0), i.InputInt3(1),
|
||||
kScratchDoubleReg);
|
||||
__ I8x16ShrS(dst, src, i.InputInt3(1), kScratchDoubleReg);
|
||||
} else {
|
||||
// TODO(zhin): use kScratchRegister instead of TempRegister.
|
||||
__ I8x16ShrS(dst, i.InputSimd128Register(0), i.InputRegister(1),
|
||||
i.TempRegister(0), kScratchDoubleReg,
|
||||
i.TempSimd128Register(1));
|
||||
__ I8x16ShrS(dst, src, i.InputRegister(1), kScratchRegister,
|
||||
kScratchDoubleReg, i.TempSimd128Register(0));
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -3573,16 +3567,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
}
|
||||
case kX64I8x16ShrU: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
// TODO(zhin): remove this restriction from instruction-selector.
|
||||
DCHECK_EQ(dst, i.InputSimd128Register(0));
|
||||
// TODO(zhin): use kScratchRegister instead of tmp.
|
||||
Register tmp = i.TempRegister(0);
|
||||
XMMRegister src = i.InputSimd128Register(0);
|
||||
DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
|
||||
if (HasImmediateInput(instr, 1)) {
|
||||
__ I8x16ShrU(dst, i.InputSimd128Register(0), i.InputInt3(1), tmp,
|
||||
__ I8x16ShrU(dst, src, i.InputInt3(1), kScratchRegister,
|
||||
kScratchDoubleReg);
|
||||
} else {
|
||||
__ I8x16ShrU(dst, i.InputSimd128Register(0), i.InputRegister(1), tmp,
|
||||
kScratchDoubleReg, i.TempSimd128Register(1));
|
||||
__ I8x16ShrU(dst, src, i.InputRegister(1), kScratchRegister,
|
||||
kScratchDoubleReg, i.TempSimd128Register(0));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -3047,6 +3047,7 @@ VISIT_ATOMIC_BINOP(Xor)
|
||||
|
||||
#define SIMD_NARROW_SHIFT_OPCODES(V) \
|
||||
V(I8x16Shl) \
|
||||
V(I8x16ShrS) \
|
||||
V(I8x16ShrU)
|
||||
|
||||
void InstructionSelector::VisitS128Const(Node* node) {
|
||||
@ -3176,19 +3177,19 @@ SIMD_SHIFT_OPCODES(VISIT_SIMD_SHIFT)
|
||||
#undef VISIT_SIMD_SHIFT
|
||||
#undef SIMD_SHIFT_OPCODES
|
||||
|
||||
#define VISIT_SIMD_NARROW_SHIFT(Opcode) \
|
||||
void InstructionSelector::Visit##Opcode(Node* node) { \
|
||||
X64OperandGenerator g(this); \
|
||||
InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()}; \
|
||||
if (g.CanBeImmediate(node->InputAt(1))) { \
|
||||
Emit(kX64##Opcode, g.DefineSameAsFirst(node), \
|
||||
g.UseRegister(node->InputAt(0)), g.UseImmediate(node->InputAt(1)), \
|
||||
arraysize(temps), temps); \
|
||||
} else { \
|
||||
Emit(kX64##Opcode, g.DefineSameAsFirst(node), \
|
||||
g.UseUniqueRegister(node->InputAt(0)), \
|
||||
g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps); \
|
||||
} \
|
||||
#define VISIT_SIMD_NARROW_SHIFT(Opcode) \
|
||||
void InstructionSelector::Visit##Opcode(Node* node) { \
|
||||
X64OperandGenerator g(this); \
|
||||
InstructionOperand output = \
|
||||
IsSupported(AVX) ? g.UseRegister(node) : g.DefineSameAsFirst(node); \
|
||||
if (g.CanBeImmediate(node->InputAt(1))) { \
|
||||
Emit(kX64##Opcode, output, g.UseRegister(node->InputAt(0)), \
|
||||
g.UseImmediate(node->InputAt(1))); \
|
||||
} else { \
|
||||
InstructionOperand temps[] = {g.TempSimd128Register()}; \
|
||||
Emit(kX64##Opcode, output, g.UseUniqueRegister(node->InputAt(0)), \
|
||||
g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps); \
|
||||
} \
|
||||
}
|
||||
SIMD_NARROW_SHIFT_OPCODES(VISIT_SIMD_NARROW_SHIFT)
|
||||
#undef VISIT_SIMD_NARROW_SHIFT
|
||||
@ -3328,19 +3329,6 @@ void InstructionSelector::VisitI32x4UConvertF32x4(Node* node) {
|
||||
g.UseRegister(node->InputAt(0)), arraysize(temps), temps);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI8x16ShrS(Node* node) {
|
||||
X64OperandGenerator g(this);
|
||||
if (g.CanBeImmediate(node->InputAt(1))) {
|
||||
Emit(kX64I8x16ShrS, g.DefineSameAsFirst(node),
|
||||
g.UseRegister(node->InputAt(0)), g.UseImmediate(node->InputAt(1)));
|
||||
} else {
|
||||
InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()};
|
||||
Emit(kX64I8x16ShrS, g.DefineSameAsFirst(node),
|
||||
g.UseUniqueRegister(node->InputAt(0)),
|
||||
g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps);
|
||||
}
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitInt32AbsWithOverflow(Node* node) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user