Add bit left-rotation machine operator

ROL will be optional operator as arm, arm64 only have ROR.

The reason for this CL is inefficient Wasm codegen for 64-bit
left-rotation.

Bug: v8:10216
Change-Id: I0cd13e4b6de5276a0d0b80eac5ed9c2e52ba1f96
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2157648
Commit-Queue: Z Nguyen-Huu <duongn@microsoft.com>
Reviewed-by: Tobias Tebbi <tebbi@chromium.org>
Reviewed-by: Andreas Haas <ahaas@chromium.org>
Reviewed-by: Clemens Backes <clemensb@chromium.org>
Cr-Commit-Position: refs/heads/master@{#67518}
This commit is contained in:
Z Nguyen-Huu 2020-05-01 09:44:41 -07:00 committed by Commit Bot
parent 41fbbd12a3
commit c0eee179b3
20 changed files with 144 additions and 40 deletions

View File

@ -1091,6 +1091,25 @@ void Assembler::rcr(Register dst, uint8_t imm8) {
}
}
void Assembler::rol(Operand dst, uint8_t imm8) {
EnsureSpace ensure_space(this);
DCHECK(is_uint5(imm8)); // illegal shift count
if (imm8 == 1) {
EMIT(0xD1);
emit_operand(eax, dst);
} else {
EMIT(0xC1);
emit_operand(eax, dst);
EMIT(imm8);
}
}
void Assembler::rol_cl(Operand dst) {
EnsureSpace ensure_space(this);
EMIT(0xD3);
emit_operand(eax, dst);
}
void Assembler::ror(Operand dst, uint8_t imm8) {
EnsureSpace ensure_space(this);
DCHECK(is_uint5(imm8)); // illegal shift count

View File

@ -635,6 +635,11 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void rcl(Register dst, uint8_t imm8);
void rcr(Register dst, uint8_t imm8);
void rol(Register dst, uint8_t imm8) { rol(Operand(dst), imm8); }
void rol(Operand dst, uint8_t imm8);
void rol_cl(Register dst) { rol_cl(Operand(dst)); }
void rol_cl(Operand dst);
void ror(Register dst, uint8_t imm8) { ror(Operand(dst), imm8); }
void ror(Operand dst, uint8_t imm8);
void ror_cl(Register dst) { ror_cl(Operand(dst)); }

View File

@ -1233,6 +1233,8 @@ void InstructionSelector::VisitWord32PairSar(Node* node) {
VisitWord32PairShift(this, kArmAsrPair, node);
}
void InstructionSelector::VisitWord32Rol(Node* node) { UNREACHABLE(); }
void InstructionSelector::VisitWord32Ror(Node* node) {
VisitShift(this, node, TryMatchROR);
}

View File

@ -1301,6 +1301,10 @@ void InstructionSelector::VisitWord64Sar(Node* node) {
VisitRRO(this, kArm64Asr, node, kShift64Imm);
}
void InstructionSelector::VisitWord32Rol(Node* node) { UNREACHABLE(); }
void InstructionSelector::VisitWord64Rol(Node* node) { UNREACHABLE(); }
void InstructionSelector::VisitWord32Ror(Node* node) {
VisitRRO(this, kArm64Ror32, node, kShift32Imm);
}

View File

@ -1235,6 +1235,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ SarPair_cl(i.InputRegister(1), i.InputRegister(0));
}
break;
case kIA32Rol:
if (HasImmediateInput(instr, 1)) {
__ rol(i.OutputOperand(), i.InputInt5(1));
} else {
__ rol_cl(i.OutputOperand());
}
break;
case kIA32Ror:
if (HasImmediateInput(instr, 1)) {
__ ror(i.OutputOperand(), i.InputInt5(1));

View File

@ -39,6 +39,7 @@ namespace compiler {
V(IA32ShlPair) \
V(IA32ShrPair) \
V(IA32SarPair) \
V(IA32Rol) \
V(IA32Ror) \
V(IA32Lzcnt) \
V(IA32Tzcnt) \

View File

@ -38,6 +38,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32ShlPair:
case kIA32ShrPair:
case kIA32SarPair:
case kIA32Rol:
case kIA32Ror:
case kIA32Lzcnt:
case kIA32Tzcnt:

View File

@ -895,6 +895,10 @@ void InstructionSelector::VisitWord32PairSar(Node* node) {
VisitWord32PairShift(this, kIA32SarPair, node);
}
void InstructionSelector::VisitWord32Rol(Node* node) {
VisitShift(this, node, kIA32Rol);
}
void InstructionSelector::VisitWord32Ror(Node* node) {
VisitShift(this, node, kIA32Ror);
}
@ -2811,7 +2815,7 @@ MachineOperatorBuilder::Flags
InstructionSelector::SupportedMachineOperatorFlags() {
MachineOperatorBuilder::Flags flags =
MachineOperatorBuilder::kWord32ShiftIsSafe |
MachineOperatorBuilder::kWord32Ctz;
MachineOperatorBuilder::kWord32Ctz | MachineOperatorBuilder::kWord32Rol;
if (CpuFeatures::IsSupported(POPCNT)) {
flags |= MachineOperatorBuilder::kWord32Popcnt;
}

View File

@ -1417,6 +1417,8 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsWord32(node), VisitWord32Shr(node);
case IrOpcode::kWord32Sar:
return MarkAsWord32(node), VisitWord32Sar(node);
case IrOpcode::kWord32Rol:
return MarkAsWord32(node), VisitWord32Rol(node);
case IrOpcode::kWord32Ror:
return MarkAsWord32(node), VisitWord32Ror(node);
case IrOpcode::kWord32Equal:
@ -1447,6 +1449,8 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsWord64(node), VisitWord64Shr(node);
case IrOpcode::kWord64Sar:
return MarkAsWord64(node), VisitWord64Sar(node);
case IrOpcode::kWord64Rol:
return MarkAsWord64(node), VisitWord64Rol(node);
case IrOpcode::kWord64Ror:
return MarkAsWord64(node), VisitWord64Ror(node);
case IrOpcode::kWord64Clz:
@ -2387,6 +2391,8 @@ void InstructionSelector::VisitWord64Shr(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitWord64Sar(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitWord64Rol(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitWord64Ror(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitWord64Clz(Node* node) { UNIMPLEMENTED(); }

View File

@ -1292,6 +1292,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kX64Sar:
ASSEMBLE_SHIFT(sarq, 6);
break;
case kX64Rol32:
ASSEMBLE_SHIFT(roll, 5);
break;
case kX64Rol:
ASSEMBLE_SHIFT(rolq, 6);
break;
case kX64Ror32:
ASSEMBLE_SHIFT(rorl, 5);
break;

View File

@ -48,6 +48,8 @@ namespace compiler {
V(X64Shr32) \
V(X64Sar) \
V(X64Sar32) \
V(X64Rol) \
V(X64Rol32) \
V(X64Ror) \
V(X64Ror32) \
V(X64Lzcnt) \

View File

@ -45,6 +45,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64Shr32:
case kX64Sar:
case kX64Sar32:
case kX64Rol:
case kX64Rol32:
case kX64Ror:
case kX64Ror32:
case kX64Lzcnt:

View File

@ -911,6 +911,14 @@ void InstructionSelector::VisitWord64Sar(Node* node) {
VisitWord64Shift(this, node, kX64Sar);
}
void InstructionSelector::VisitWord32Rol(Node* node) {
VisitWord32Shift(this, node, kX64Rol32);
}
void InstructionSelector::VisitWord64Rol(Node* node) {
VisitWord64Shift(this, node, kX64Rol);
}
void InstructionSelector::VisitWord32Ror(Node* node) {
VisitWord32Shift(this, node, kX64Ror32);
}
@ -1290,6 +1298,7 @@ bool ZeroExtendsWord32ToWord64(Node* node) {
case IrOpcode::kWord32Shl:
case IrOpcode::kWord32Shr:
case IrOpcode::kWord32Sar:
case IrOpcode::kWord32Rol:
case IrOpcode::kWord32Ror:
case IrOpcode::kWord32Equal:
case IrOpcode::kInt32Add:
@ -3376,7 +3385,8 @@ MachineOperatorBuilder::Flags
InstructionSelector::SupportedMachineOperatorFlags() {
MachineOperatorBuilder::Flags flags =
MachineOperatorBuilder::kWord32ShiftIsSafe |
MachineOperatorBuilder::kWord32Ctz | MachineOperatorBuilder::kWord64Ctz;
MachineOperatorBuilder::kWord32Ctz | MachineOperatorBuilder::kWord64Ctz |
MachineOperatorBuilder::kWord32Rol | MachineOperatorBuilder::kWord64Rol;
if (CpuFeatures::IsSupported(POPCNT)) {
flags |= MachineOperatorBuilder::kWord32Popcnt |
MachineOperatorBuilder::kWord64Popcnt;

View File

@ -670,6 +670,9 @@ void Int64Lowering::LowerNode(Node* node) {
ReplaceNode(node, low_node, high_node);
break;
}
case IrOpcode::kWord64Rol:
DCHECK(machine()->Word32Rol().IsSupported());
V8_FALLTHROUGH;
case IrOpcode::kWord64Ror: {
DCHECK_EQ(2, node->InputCount());
Node* input = node->InputAt(0);
@ -702,14 +705,19 @@ void Int64Lowering::LowerNode(Node* node) {
Node* inv_shift = graph()->NewNode(
common()->Int32Constant(32 - masked_shift_value));
Node* low_node = graph()->NewNode(
machine()->Word32Or(),
graph()->NewNode(machine()->Word32Shr(), low_input, masked_shift),
graph()->NewNode(machine()->Word32Shl(), high_input, inv_shift));
Node* high_node = graph()->NewNode(
machine()->Word32Or(), graph()->NewNode(machine()->Word32Shr(),
high_input, masked_shift),
graph()->NewNode(machine()->Word32Shl(), low_input, inv_shift));
auto* op1 = machine()->Word32Shr();
auto* op2 = machine()->Word32Shl();
bool is_ror = node->opcode() == IrOpcode::kWord64Ror;
if (!is_ror) std::swap(op1, op2);
Node* low_node =
graph()->NewNode(machine()->Word32Or(),
graph()->NewNode(op1, low_input, masked_shift),
graph()->NewNode(op2, high_input, inv_shift));
Node* high_node =
graph()->NewNode(machine()->Word32Or(),
graph()->NewNode(op1, high_input, masked_shift),
graph()->NewNode(op2, low_input, inv_shift));
ReplaceNode(node, low_node, high_node);
}
} else {
@ -720,15 +728,19 @@ void Int64Lowering::LowerNode(Node* node) {
graph()->NewNode(common()->Int32Constant(0x1F)));
}
// By creating this bit-mask with SAR and SHL we do not have to deal
// with shift == 0 as a special case.
Node* inv_mask = graph()->NewNode(
machine()->Word32Shl(),
graph()->NewNode(machine()->Word32Sar(),
graph()->NewNode(common()->Int32Constant(
std::numeric_limits<int32_t>::min())),
bool is_ror = node->opcode() == IrOpcode::kWord64Ror;
Node* inv_mask =
is_ror ? graph()->NewNode(
machine()->Word32Xor(),
graph()->NewNode(
machine()->Word32Shr(),
graph()->NewNode(common()->Int32Constant(-1)),
safe_shift),
graph()->NewNode(common()->Int32Constant(1)));
graph()->NewNode(common()->Int32Constant(-1)))
: graph()->NewNode(
machine()->Word32Shl(),
graph()->NewNode(common()->Int32Constant(-1)),
safe_shift);
Node* bit_mask =
graph()->NewNode(machine()->Word32Xor(), inv_mask,
@ -759,21 +771,24 @@ void Int64Lowering::LowerNode(Node* node) {
lt32.Phi(MachineRepresentation::kWord32, GetReplacementHigh(input),
GetReplacementLow(input));
Node* rotate_low =
graph()->NewNode(machine()->Word32Ror(), input_low, safe_shift);
Node* rotate_high =
graph()->NewNode(machine()->Word32Ror(), input_high, safe_shift);
const Operator* oper =
is_ror ? machine()->Word32Ror() : machine()->Word32Rol().op();
Node* rotate_low = graph()->NewNode(oper, input_low, safe_shift);
Node* rotate_high = graph()->NewNode(oper, input_high, safe_shift);
auto* mask1 = bit_mask;
auto* mask2 = inv_mask;
if (!is_ror) std::swap(mask1, mask2);
Node* low_node = graph()->NewNode(
machine()->Word32Or(),
graph()->NewNode(machine()->Word32And(), rotate_low, bit_mask),
graph()->NewNode(machine()->Word32And(), rotate_high, inv_mask));
graph()->NewNode(machine()->Word32And(), rotate_low, mask1),
graph()->NewNode(machine()->Word32And(), rotate_high, mask2));
Node* high_node = graph()->NewNode(
machine()->Word32Or(),
graph()->NewNode(machine()->Word32And(), rotate_high, bit_mask),
graph()->NewNode(machine()->Word32And(), rotate_low, inv_mask));
graph()->NewNode(machine()->Word32And(), rotate_high, mask1),
graph()->NewNode(machine()->Word32And(), rotate_low, mask2));
ReplaceNode(node, low_node, high_node);
}
break;

View File

@ -487,6 +487,8 @@ ShiftKind ShiftKindOf(Operator const* op) {
#define PURE_OPTIONAL_OP_LIST(V) \
V(Word32Ctz, Operator::kNoProperties, 1, 0, 1) \
V(Word64Ctz, Operator::kNoProperties, 1, 0, 1) \
V(Word32Rol, Operator::kNoProperties, 2, 0, 1) \
V(Word64Rol, Operator::kNoProperties, 2, 0, 1) \
V(Word32ReverseBits, Operator::kNoProperties, 1, 0, 1) \
V(Word64ReverseBits, Operator::kNoProperties, 1, 0, 1) \
V(Int32AbsWithOverflow, Operator::kNoProperties, 1, 0, 2) \

View File

@ -220,13 +220,15 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
kWord64ReverseBits = 1u << 17,
kInt32AbsWithOverflow = 1u << 20,
kInt64AbsWithOverflow = 1u << 21,
kAllOptionalOps = kFloat32RoundDown | kFloat64RoundDown | kFloat32RoundUp |
kFloat64RoundUp | kFloat32RoundTruncate |
kFloat64RoundTruncate | kFloat64RoundTiesAway |
kFloat32RoundTiesEven | kFloat64RoundTiesEven |
kWord32Ctz | kWord64Ctz | kWord32Popcnt | kWord64Popcnt |
kWord32ReverseBits | kWord64ReverseBits |
kInt32AbsWithOverflow | kInt64AbsWithOverflow
kWord32Rol = 1u << 22,
kWord64Rol = 1u << 23,
kAllOptionalOps =
kFloat32RoundDown | kFloat64RoundDown | kFloat32RoundUp |
kFloat64RoundUp | kFloat32RoundTruncate | kFloat64RoundTruncate |
kFloat64RoundTiesAway | kFloat32RoundTiesEven | kFloat64RoundTiesEven |
kWord32Ctz | kWord64Ctz | kWord32Popcnt | kWord64Popcnt |
kWord32ReverseBits | kWord64ReverseBits | kInt32AbsWithOverflow |
kInt64AbsWithOverflow | kWord32Rol | kWord64Rol
};
using Flags = base::Flags<Flag, unsigned>;
@ -308,6 +310,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* Word32SarShiftOutZeros() {
return Word32Sar(ShiftKind::kShiftOutZeros);
}
const OptionalOperator Word32Rol();
const Operator* Word32Ror();
const Operator* Word32Equal();
const Operator* Word32Clz();
@ -337,6 +340,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* Word64SarShiftOutZeros() {
return Word64Sar(ShiftKind::kShiftOutZeros);
}
const OptionalOperator Word64Rol();
const Operator* Word64Ror();
const Operator* Word64Clz();
const OptionalOperator Word64Ctz();

View File

@ -532,6 +532,7 @@
V(Word32Shl) \
V(Word32Shr) \
V(Word32Sar) \
V(Word32Rol) \
V(Word32Ror) \
V(Int32Add) \
V(Int32AddWithOverflow) \
@ -553,6 +554,7 @@
V(Word64Shl) \
V(Word64Shr) \
V(Word64Sar) \
V(Word64Rol) \
V(Word64Ror) \
V(Int64Add) \
V(Int64AddWithOverflow) \

View File

@ -1623,6 +1623,7 @@ void Verifier::Visitor::Check(Node* node, const AllNodes& all) {
case IrOpcode::kWord32Shl:
case IrOpcode::kWord32Shr:
case IrOpcode::kWord32Sar:
case IrOpcode::kWord32Rol:
case IrOpcode::kWord32Ror:
case IrOpcode::kWord32Equal:
case IrOpcode::kWord32Clz:
@ -1637,6 +1638,7 @@ void Verifier::Visitor::Check(Node* node, const AllNodes& all) {
case IrOpcode::kWord64Shl:
case IrOpcode::kWord64Shr:
case IrOpcode::kWord64Sar:
case IrOpcode::kWord64Rol:
case IrOpcode::kWord64Ror:
case IrOpcode::kWord64Clz:
case IrOpcode::kWord64Popcnt:

View File

@ -432,7 +432,11 @@ Node* WasmGraphBuilder::Binop(wasm::WasmOpcode opcode, Node* left, Node* right,
right = MaskShiftCount32(right);
break;
case wasm::kExprI32Rol:
right = MaskShiftCount32(right);
if (m->Word32Rol().IsSupported()) {
op = m->Word32Rol().op();
right = MaskShiftCount32(right);
break;
}
return BuildI32Rol(left, right);
case wasm::kExprI32Eq:
op = m->Word32Equal();
@ -543,6 +547,14 @@ Node* WasmGraphBuilder::Binop(wasm::WasmOpcode opcode, Node* left, Node* right,
right = MaskShiftCount64(right);
break;
case wasm::kExprI64Rol:
if (m->Word64Rol().IsSupported()) {
op = m->Word64Rol().op();
right = MaskShiftCount64(right);
break;
} else if (m->Word32Rol().IsSupported()) {
op = m->Word64Rol().placeholder();
break;
}
return BuildI64Rol(left, right);
case wasm::kExprF32CopySign:
return BuildF32CopySign(left, right);

View File

@ -816,10 +816,8 @@ TEST_F(Int64LoweringTest, I64Ror) {
Matcher<Node*> shift_matcher =
IsWord32And(IsParameter(0), IsInt32Constant(0x1F));
Matcher<Node*> bit_mask_matcher = IsWord32Shl(
IsWord32Sar(IsInt32Constant(std::numeric_limits<int32_t>::min()),
shift_matcher),
IsInt32Constant(1));
Matcher<Node*> bit_mask_matcher = IsWord32Xor(
IsWord32Shr(IsInt32Constant(-1), shift_matcher), IsInt32Constant(-1));
Matcher<Node*> inv_mask_matcher =
IsWord32Xor(bit_mask_matcher, IsInt32Constant(-1));