[wasm-simd] Improve i8x16 shift ins-sel and temps usage

We no longer require dst == src (output = input[0]) in all cases, only
when AVX is not supported. This can help remove an extra move when AVX
is supported. Also in many cases (when input[0] is an immediate), we
require less temporary registers.

Bug: v8:11589
Change-Id: I0d272df12de54f55b4c7a0a330c38ccaca82e927
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3092553
Reviewed-by: Clemens Backes <clemensb@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#76286}
This commit is contained in:
Ng Zhi An 2021-08-12 15:21:05 -07:00 committed by V8 LUCI CQ
parent c4e4868e03
commit ebdc98824f
4 changed files with 72 additions and 103 deletions

View File

@ -3111,30 +3111,29 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kIA32I8x16Shl: {
XMMRegister dst = i.OutputSimd128Register();
// TODO(zhin): remove this restriction from instruction-selector.
DCHECK_EQ(dst, i.InputSimd128Register(0));
XMMRegister src = i.InputSimd128Register(0);
DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
Register tmp = i.TempRegister(0);
XMMRegister tmp_simd = i.TempSimd128Register(1);
if (HasImmediateInput(instr, 1)) {
__ I8x16Shl(dst, i.InputSimd128Register(0), i.InputInt3(1), tmp,
kScratchDoubleReg);
__ I8x16Shl(dst, src, i.InputInt3(1), tmp, kScratchDoubleReg);
} else {
__ I8x16Shl(dst, i.InputSimd128Register(0), i.InputRegister(1), tmp,
kScratchDoubleReg, tmp_simd);
XMMRegister tmp_simd = i.TempSimd128Register(1);
__ I8x16Shl(dst, src, i.InputRegister(1), tmp, kScratchDoubleReg,
tmp_simd);
}
break;
}
case kIA32I8x16ShrS: {
XMMRegister dst = i.OutputSimd128Register();
// TODO(zhin): remove this restriction from instruction-selector.
DCHECK_EQ(dst, i.InputSimd128Register(0));
XMMRegister src = i.InputSimd128Register(0);
DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
if (HasImmediateInput(instr, 1)) {
__ I8x16ShrS(dst, i.InputSimd128Register(0), i.InputInt3(1),
kScratchDoubleReg);
__ I8x16ShrS(dst, src, i.InputInt3(1), kScratchDoubleReg);
} else {
__ I8x16ShrS(dst, i.InputSimd128Register(0), i.InputRegister(1),
i.TempRegister(0), kScratchDoubleReg,
i.TempSimd128Register(1));
__ I8x16ShrS(dst, src, i.InputRegister(1), i.TempRegister(0),
kScratchDoubleReg, i.TempSimd128Register(1));
}
break;
}
@ -3237,16 +3236,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kIA32I8x16ShrU: {
XMMRegister dst = i.OutputSimd128Register();
// TODO(zhin): remove this restriction from instruction-selector.
DCHECK_EQ(dst, i.InputSimd128Register(0));
Register tmp = i.ToRegister(instr->TempAt(0));
XMMRegister src = i.InputSimd128Register(0);
DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
Register tmp = i.TempRegister(0);
if (HasImmediateInput(instr, 1)) {
__ I8x16ShrU(dst, i.InputSimd128Register(0), i.InputInt3(1), tmp,
kScratchDoubleReg);
__ I8x16ShrU(dst, src, i.InputInt3(1), tmp, kScratchDoubleReg);
} else {
__ I8x16ShrU(dst, i.InputSimd128Register(0), i.InputRegister(1), tmp,
kScratchDoubleReg, i.TempSimd128Register(1));
__ I8x16ShrU(dst, src, i.InputRegister(1), tmp, kScratchDoubleReg,
i.TempSimd128Register(1));
}
break;

View File

@ -389,14 +389,28 @@ void VisitRROSimdShift(InstructionSelector* selector, Node* node,
}
}
void VisitRROI8x16SimdShift(InstructionSelector* selector, Node* node,
ArchOpcode opcode) {
void VisitI8x16Shift(InstructionSelector* selector, Node* node,
ArchOpcode opcode) {
IA32OperandGenerator g(selector);
InstructionOperand operand0 = g.UseUniqueRegister(node->InputAt(0));
InstructionOperand operand1 = g.UseUniqueRegister(node->InputAt(1));
InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()};
selector->Emit(opcode, g.DefineSameAsFirst(node), operand0, operand1,
arraysize(temps), temps);
InstructionOperand output = CpuFeatures::IsSupported(AVX)
? g.UseRegister(node)
: g.DefineSameAsFirst(node);
if (g.CanBeImmediate(node->InputAt(1))) {
if (opcode == kIA32I8x16ShrS) {
selector->Emit(opcode, output, g.UseRegister(node->InputAt(0)),
g.UseImmediate(node->InputAt(1)));
} else {
InstructionOperand temps[] = {g.TempRegister()};
selector->Emit(opcode, output, g.UseRegister(node->InputAt(0)),
g.UseImmediate(node->InputAt(1)), arraysize(temps), temps);
}
} else {
InstructionOperand operand0 = g.UseUniqueRegister(node->InputAt(0));
InstructionOperand operand1 = g.UseUniqueRegister(node->InputAt(1));
InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()};
selector->Emit(opcode, output, operand0, operand1, arraysize(temps), temps);
}
}
} // namespace
@ -2651,38 +2665,15 @@ void InstructionSelector::VisitI8x16UConvertI16x8(Node* node) {
}
void InstructionSelector::VisitI8x16Shl(Node* node) {
IA32OperandGenerator g(this);
if (g.CanBeImmediate(node->InputAt(1))) {
InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()};
this->Emit(kIA32I8x16Shl, g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(0)),
g.UseImmediate(node->InputAt(1)), arraysize(temps), temps);
} else {
VisitRROI8x16SimdShift(this, node, kIA32I8x16Shl);
}
VisitI8x16Shift(this, node, kIA32I8x16Shl);
}
void InstructionSelector::VisitI8x16ShrS(Node* node) {
IA32OperandGenerator g(this);
if (g.CanBeImmediate(node->InputAt(1))) {
this->Emit(kIA32I8x16ShrS, g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(0)),
g.UseImmediate(node->InputAt(1)));
} else {
VisitRROI8x16SimdShift(this, node, kIA32I8x16ShrS);
}
VisitI8x16Shift(this, node, kIA32I8x16ShrS);
}
void InstructionSelector::VisitI8x16ShrU(Node* node) {
IA32OperandGenerator g(this);
if (g.CanBeImmediate(node->InputAt(1))) {
InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()};
this->Emit(kIA32I8x16ShrU, g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(0)),
g.UseImmediate(node->InputAt(1)), arraysize(temps), temps);
} else {
VisitRROI8x16SimdShift(this, node, kIA32I8x16ShrU);
}
VisitI8x16Shift(this, node, kIA32I8x16ShrU);
}
void InstructionSelector::VisitInt32AbsWithOverflow(Node* node) {

View File

@ -3492,32 +3492,26 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kX64I8x16Shl: {
XMMRegister dst = i.OutputSimd128Register();
// TODO(zhin): remove this restriction from instruction-selector.
DCHECK_EQ(dst, i.InputSimd128Register(0));
// Temp registers for shift mask and additional moves to XMM registers.
Register tmp = i.TempRegister(0);
XMMRegister tmp_simd = i.TempSimd128Register(1);
XMMRegister src = i.InputSimd128Register(0);
DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
if (HasImmediateInput(instr, 1)) {
__ I8x16Shl(dst, i.InputSimd128Register(0), i.InputInt3(1), tmp,
__ I8x16Shl(dst, src, i.InputInt3(1), kScratchRegister,
kScratchDoubleReg);
} else {
__ I8x16Shl(dst, i.InputSimd128Register(0), i.InputRegister(1), tmp,
kScratchDoubleReg, tmp_simd);
__ I8x16Shl(dst, src, i.InputRegister(1), kScratchRegister,
kScratchDoubleReg, i.TempSimd128Register(0));
}
break;
}
case kX64I8x16ShrS: {
XMMRegister dst = i.OutputSimd128Register();
// TODO(zhin): remove this restriction from instruction-selector.
DCHECK_EQ(dst, i.InputSimd128Register(0));
XMMRegister src = i.InputSimd128Register(0);
DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
if (HasImmediateInput(instr, 1)) {
__ I8x16ShrS(dst, i.InputSimd128Register(0), i.InputInt3(1),
kScratchDoubleReg);
__ I8x16ShrS(dst, src, i.InputInt3(1), kScratchDoubleReg);
} else {
// TODO(zhin): use kScratchRegister instead of TempRegister.
__ I8x16ShrS(dst, i.InputSimd128Register(0), i.InputRegister(1),
i.TempRegister(0), kScratchDoubleReg,
i.TempSimd128Register(1));
__ I8x16ShrS(dst, src, i.InputRegister(1), kScratchRegister,
kScratchDoubleReg, i.TempSimd128Register(0));
}
break;
}
@ -3573,16 +3567,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kX64I8x16ShrU: {
XMMRegister dst = i.OutputSimd128Register();
// TODO(zhin): remove this restriction from instruction-selector.
DCHECK_EQ(dst, i.InputSimd128Register(0));
// TODO(zhin): use kScratchRegister instead of tmp.
Register tmp = i.TempRegister(0);
XMMRegister src = i.InputSimd128Register(0);
DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
if (HasImmediateInput(instr, 1)) {
__ I8x16ShrU(dst, i.InputSimd128Register(0), i.InputInt3(1), tmp,
__ I8x16ShrU(dst, src, i.InputInt3(1), kScratchRegister,
kScratchDoubleReg);
} else {
__ I8x16ShrU(dst, i.InputSimd128Register(0), i.InputRegister(1), tmp,
kScratchDoubleReg, i.TempSimd128Register(1));
__ I8x16ShrU(dst, src, i.InputRegister(1), kScratchRegister,
kScratchDoubleReg, i.TempSimd128Register(0));
}
break;
}

View File

@ -3047,6 +3047,7 @@ VISIT_ATOMIC_BINOP(Xor)
#define SIMD_NARROW_SHIFT_OPCODES(V) \
V(I8x16Shl) \
V(I8x16ShrS) \
V(I8x16ShrU)
void InstructionSelector::VisitS128Const(Node* node) {
@ -3176,19 +3177,19 @@ SIMD_SHIFT_OPCODES(VISIT_SIMD_SHIFT)
#undef VISIT_SIMD_SHIFT
#undef SIMD_SHIFT_OPCODES
#define VISIT_SIMD_NARROW_SHIFT(Opcode) \
void InstructionSelector::Visit##Opcode(Node* node) { \
X64OperandGenerator g(this); \
InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()}; \
if (g.CanBeImmediate(node->InputAt(1))) { \
Emit(kX64##Opcode, g.DefineSameAsFirst(node), \
g.UseRegister(node->InputAt(0)), g.UseImmediate(node->InputAt(1)), \
arraysize(temps), temps); \
} else { \
Emit(kX64##Opcode, g.DefineSameAsFirst(node), \
g.UseUniqueRegister(node->InputAt(0)), \
g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps); \
} \
#define VISIT_SIMD_NARROW_SHIFT(Opcode) \
void InstructionSelector::Visit##Opcode(Node* node) { \
X64OperandGenerator g(this); \
InstructionOperand output = \
IsSupported(AVX) ? g.UseRegister(node) : g.DefineSameAsFirst(node); \
if (g.CanBeImmediate(node->InputAt(1))) { \
Emit(kX64##Opcode, output, g.UseRegister(node->InputAt(0)), \
g.UseImmediate(node->InputAt(1))); \
} else { \
InstructionOperand temps[] = {g.TempSimd128Register()}; \
Emit(kX64##Opcode, output, g.UseUniqueRegister(node->InputAt(0)), \
g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps); \
} \
}
SIMD_NARROW_SHIFT_OPCODES(VISIT_SIMD_NARROW_SHIFT)
#undef VISIT_SIMD_NARROW_SHIFT
@ -3328,19 +3329,6 @@ void InstructionSelector::VisitI32x4UConvertF32x4(Node* node) {
g.UseRegister(node->InputAt(0)), arraysize(temps), temps);
}
void InstructionSelector::VisitI8x16ShrS(Node* node) {
X64OperandGenerator g(this);
if (g.CanBeImmediate(node->InputAt(1))) {
Emit(kX64I8x16ShrS, g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(0)), g.UseImmediate(node->InputAt(1)));
} else {
InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()};
Emit(kX64I8x16ShrS, g.DefineSameAsFirst(node),
g.UseUniqueRegister(node->InputAt(0)),
g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps);
}
}
void InstructionSelector::VisitInt32AbsWithOverflow(Node* node) {
UNREACHABLE();
}