Implemented the ctz Turbo Fan operator for x64.

Ctz is implemented as an optional operator at the moment, which is only
implemented by x64 at the moment.

R=titzer@chromium.org

Review URL: https://codereview.chromium.org/1421163005

Cr-Commit-Position: refs/heads/master@{#31912}
This commit is contained in:
ahaas 2015-11-10 00:42:17 -08:00 committed by Commit bot
parent 9a569ec2c8
commit a594ff73a9
16 changed files with 165 additions and 2 deletions

View File

@ -936,6 +936,9 @@ void InstructionSelector::VisitWord32Clz(Node* node) {
void InstructionSelector::VisitWord32Ctz(Node* node) { UNREACHABLE(); }
void InstructionSelector::VisitWord64Ctz(Node* node) { UNREACHABLE(); }
void InstructionSelector::VisitWord32Popcnt(Node* node) { UNREACHABLE(); }

View File

@ -738,6 +738,8 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsWord64(node), VisitWord64Ror(node);
case IrOpcode::kWord64Clz:
return MarkAsWord64(node), VisitWord64Clz(node);
case IrOpcode::kWord64Ctz:
return MarkAsWord64(node), VisitWord64Ctz(node);
case IrOpcode::kWord64Equal:
return VisitWord64Equal(node);
case IrOpcode::kInt32Add:
@ -976,6 +978,9 @@ void InstructionSelector::VisitWord64Ror(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitWord64Clz(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitWord64Ctz(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitWord64Equal(Node* node) { UNIMPLEMENTED(); }

View File

@ -170,6 +170,7 @@ CheckedStoreRepresentation CheckedStoreRepresentationOf(Operator const* op) {
#define PURE_OPTIONAL_OP_LIST(V) \
V(Word32Ctz, Operator::kNoProperties, 1, 0, 1) \
V(Word64Ctz, Operator::kNoProperties, 1, 0, 1) \
V(Word32Popcnt, Operator::kNoProperties, 1, 0, 1) \
V(Float32Max, Operator::kNoProperties, 2, 0, 1) \
V(Float32Min, Operator::kNoProperties, 2, 0, 1) \

View File

@ -119,9 +119,11 @@ class MachineOperatorBuilder final : public ZoneObject {
kWord32ShiftIsSafe = 1u << 9,
kWord32Ctz = 1u << 10,
kWord32Popcnt = 1u << 11,
kWord64Ctz = 1u << 12,
kAllOptionalOps = kFloat32Max | kFloat32Min | kFloat64Max | kFloat64Min |
kFloat64RoundDown | kFloat64RoundTruncate |
kFloat64RoundTiesAway | kWord32Ctz | kWord32Popcnt
kFloat64RoundTiesAway | kWord32Ctz | kWord32Popcnt |
kWord64Ctz
};
typedef base::Flags<Flag, unsigned> Flags;
@ -149,6 +151,7 @@ class MachineOperatorBuilder final : public ZoneObject {
const Operator* Word64Sar();
const Operator* Word64Ror();
const Operator* Word64Clz();
const OptionalOperator Word64Ctz();
const Operator* Word64Equal();
const Operator* Int32Add();

View File

@ -324,6 +324,9 @@ void InstructionSelector::VisitWord32Clz(Node* node) {
void InstructionSelector::VisitWord32Ctz(Node* node) { UNREACHABLE(); }
void InstructionSelector::VisitWord64Ctz(Node* node) { UNREACHABLE(); }
void InstructionSelector::VisitWord32Popcnt(Node* node) { UNREACHABLE(); }

View File

@ -243,6 +243,7 @@
V(Word64Sar) \
V(Word64Ror) \
V(Word64Clz) \
V(Word64Ctz) \
V(Int32Add) \
V(Int32AddWithOverflow) \
V(Int32Sub) \

View File

@ -1958,6 +1958,9 @@ Type* Typer::Visitor::TypeWord64Ror(Node* node) { return Type::Internal(); }
Type* Typer::Visitor::TypeWord64Clz(Node* node) { return Type::Internal(); }
Type* Typer::Visitor::TypeWord64Ctz(Node* node) { return Type::Internal(); }
Type* Typer::Visitor::TypeWord64Equal(Node* node) { return Type::Boolean(); }

View File

@ -829,6 +829,7 @@ void Verifier::Visitor::Check(Node* node) {
case IrOpcode::kWord64Sar:
case IrOpcode::kWord64Ror:
case IrOpcode::kWord64Clz:
case IrOpcode::kWord64Ctz:
case IrOpcode::kWord64Equal:
case IrOpcode::kInt32Add:
case IrOpcode::kInt32AddWithOverflow:

View File

@ -782,6 +782,13 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
__ Lzcntl(i.OutputRegister(), i.InputOperand(0));
}
break;
case kX64Tzcnt:
if (instr->InputAt(0)->IsRegister()) {
__ Tzcntq(i.OutputRegister(), i.InputRegister(0));
} else {
__ Tzcntq(i.OutputRegister(), i.InputOperand(0));
}
break;
case kX64Tzcnt32:
if (instr->InputAt(0)->IsRegister()) {
__ Tzcntl(i.OutputRegister(), i.InputRegister(0));

View File

@ -48,6 +48,7 @@ namespace compiler {
V(X64Ror32) \
V(X64Lzcnt) \
V(X64Lzcnt32) \
V(X64Tzcnt) \
V(X64Tzcnt32) \
V(X64Popcnt32) \
V(SSEFloat32Cmp) \

View File

@ -584,6 +584,12 @@ void InstructionSelector::VisitWord32Clz(Node* node) {
}
void InstructionSelector::VisitWord64Ctz(Node* node) {
X64OperandGenerator g(this);
Emit(kX64Tzcnt, g.DefineAsRegister(node), g.Use(node->InputAt(0)));
}
void InstructionSelector::VisitWord32Ctz(Node* node) {
X64OperandGenerator g(this);
Emit(kX64Tzcnt32, g.DefineAsRegister(node), g.Use(node->InputAt(0)));
@ -1606,7 +1612,7 @@ InstructionSelector::SupportedMachineOperatorFlags() {
MachineOperatorBuilder::kFloat64Max |
MachineOperatorBuilder::kFloat64Min |
MachineOperatorBuilder::kWord32ShiftIsSafe |
MachineOperatorBuilder::kWord32Ctz;
MachineOperatorBuilder::kWord32Ctz | MachineOperatorBuilder::kWord64Ctz;
if (CpuFeatures::IsSupported(POPCNT)) {
flags |= MachineOperatorBuilder::kWord32Popcnt;
}

View File

@ -781,6 +781,24 @@ void Assembler::bsfl(Register dst, const Operand& src) {
}
void Assembler::bsfq(Register dst, Register src) {
EnsureSpace ensure_space(this);
emit_rex_64(dst, src);
emit(0x0F);
emit(0xBC);
emit_modrm(dst, src);
}
void Assembler::bsfq(Register dst, const Operand& src) {
EnsureSpace ensure_space(this);
emit_rex_64(dst, src);
emit(0x0F);
emit(0xBC);
emit_operand(dst, src);
}
void Assembler::call(Label* L) {
positions_recorder()->WriteRecordedPositions();
EnsureSpace ensure_space(this);

View File

@ -851,6 +851,8 @@ class Assembler : public AssemblerBase {
void bsrq(Register dst, const Operand& src);
void bsrl(Register dst, Register src);
void bsrl(Register dst, const Operand& src);
void bsfq(Register dst, Register src);
void bsfq(Register dst, const Operand& src);
void bsfl(Register dst, Register src);
void bsfl(Register dst, const Operand& src);

View File

@ -3189,6 +3189,36 @@ void MacroAssembler::Lzcntq(Register dst, const Operand& src) {
}
void MacroAssembler::Tzcntq(Register dst, Register src) {
if (CpuFeatures::IsSupported(BMI1)) {
CpuFeatureScope scope(this, BMI1);
tzcntq(dst, src);
return;
}
Label not_zero_src;
bsfq(dst, src);
j(not_zero, &not_zero_src, Label::kNear);
// Define the result of tzcnt(0) separately, because bsf(0) is undefined.
Set(dst, 64);
bind(&not_zero_src);
}
void MacroAssembler::Tzcntq(Register dst, const Operand& src) {
if (CpuFeatures::IsSupported(BMI1)) {
CpuFeatureScope scope(this, BMI1);
tzcntq(dst, src);
return;
}
Label not_zero_src;
bsfq(dst, src);
j(not_zero, &not_zero_src, Label::kNear);
// Define the result of tzcnt(0) separately, because bsf(0) is undefined.
Set(dst, 64);
bind(&not_zero_src);
}
void MacroAssembler::Tzcntl(Register dst, Register src) {
if (CpuFeatures::IsSupported(BMI1)) {
CpuFeatureScope scope(this, BMI1);

View File

@ -1016,6 +1016,9 @@ class MacroAssembler: public Assembler {
void Lzcntl(Register dst, Register src);
void Lzcntl(Register dst, const Operand& src);
void Tzcntq(Register dst, Register src);
void Tzcntq(Register dst, const Operand& src);
void Tzcntl(Register dst, Register src);
void Tzcntl(Register dst, const Operand& src);

View File

@ -208,6 +208,82 @@ TEST(RunWord64Clz) {
CHECK_EQ(63, m.Call(uint64_t(0x0000000000000001)));
CHECK_EQ(64, m.Call(uint64_t(0x0000000000000000)));
}
TEST(RunWord64Ctz) {
RawMachineAssemblerTester<int32_t> m(kMachUint64);
if (!m.machine()->Word64Ctz().IsSupported()) {
return;
}
m.Return(m.AddNode(m.machine()->Word64Ctz().op(), m.Parameter(0)));
CHECK_EQ(64, m.Call(uint64_t(0x0000000000000000)));
CHECK_EQ(63, m.Call(uint64_t(0x8000000000000000)));
CHECK_EQ(62, m.Call(uint64_t(0x4000000000000000)));
CHECK_EQ(61, m.Call(uint64_t(0x2000000000000000)));
CHECK_EQ(60, m.Call(uint64_t(0x1000000000000000)));
CHECK_EQ(59, m.Call(uint64_t(0xa800000000000000)));
CHECK_EQ(58, m.Call(uint64_t(0xf400000000000000)));
CHECK_EQ(57, m.Call(uint64_t(0x6200000000000000)));
CHECK_EQ(56, m.Call(uint64_t(0x9100000000000000)));
CHECK_EQ(55, m.Call(uint64_t(0xcd80000000000000)));
CHECK_EQ(54, m.Call(uint64_t(0x0940000000000000)));
CHECK_EQ(53, m.Call(uint64_t(0xaf20000000000000)));
CHECK_EQ(52, m.Call(uint64_t(0xac10000000000000)));
CHECK_EQ(51, m.Call(uint64_t(0xe0b8000000000000)));
CHECK_EQ(50, m.Call(uint64_t(0x9ce4000000000000)));
CHECK_EQ(49, m.Call(uint64_t(0xc792000000000000)));
CHECK_EQ(48, m.Call(uint64_t(0xb8f1000000000000)));
CHECK_EQ(47, m.Call(uint64_t(0x3b9f800000000000)));
CHECK_EQ(46, m.Call(uint64_t(0xdb4c400000000000)));
CHECK_EQ(45, m.Call(uint64_t(0xe9a3200000000000)));
CHECK_EQ(44, m.Call(uint64_t(0xfca6100000000000)));
CHECK_EQ(43, m.Call(uint64_t(0x6c8a780000000000)));
CHECK_EQ(42, m.Call(uint64_t(0x8ce5a40000000000)));
CHECK_EQ(41, m.Call(uint64_t(0xcb7d020000000000)));
CHECK_EQ(40, m.Call(uint64_t(0xcb4dc10000000000)));
CHECK_EQ(39, m.Call(uint64_t(0xdfbec58000000000)));
CHECK_EQ(38, m.Call(uint64_t(0x27a9db4000000000)));
CHECK_EQ(37, m.Call(uint64_t(0xde3bcb2000000000)));
CHECK_EQ(36, m.Call(uint64_t(0xd7e8a61000000000)));
CHECK_EQ(35, m.Call(uint64_t(0x9afdbc8800000000)));
CHECK_EQ(34, m.Call(uint64_t(0x9afdbc8400000000)));
CHECK_EQ(33, m.Call(uint64_t(0x9afdbc8200000000)));
CHECK_EQ(32, m.Call(uint64_t(0x9afdbc8100000000)));
CHECK_EQ(31, m.Call(uint64_t(0x0000000080000000)));
CHECK_EQ(30, m.Call(uint64_t(0x0000000040000000)));
CHECK_EQ(29, m.Call(uint64_t(0x0000000020000000)));
CHECK_EQ(28, m.Call(uint64_t(0x0000000010000000)));
CHECK_EQ(27, m.Call(uint64_t(0x00000000a8000000)));
CHECK_EQ(26, m.Call(uint64_t(0x00000000f4000000)));
CHECK_EQ(25, m.Call(uint64_t(0x0000000062000000)));
CHECK_EQ(24, m.Call(uint64_t(0x0000000091000000)));
CHECK_EQ(23, m.Call(uint64_t(0x00000000cd800000)));
CHECK_EQ(22, m.Call(uint64_t(0x0000000009400000)));
CHECK_EQ(21, m.Call(uint64_t(0x00000000af200000)));
CHECK_EQ(20, m.Call(uint64_t(0x00000000ac100000)));
CHECK_EQ(19, m.Call(uint64_t(0x00000000e0b80000)));
CHECK_EQ(18, m.Call(uint64_t(0x000000009ce40000)));
CHECK_EQ(17, m.Call(uint64_t(0x00000000c7920000)));
CHECK_EQ(16, m.Call(uint64_t(0x00000000b8f10000)));
CHECK_EQ(15, m.Call(uint64_t(0x000000003b9f8000)));
CHECK_EQ(14, m.Call(uint64_t(0x00000000db4c4000)));
CHECK_EQ(13, m.Call(uint64_t(0x00000000e9a32000)));
CHECK_EQ(12, m.Call(uint64_t(0x00000000fca61000)));
CHECK_EQ(11, m.Call(uint64_t(0x000000006c8a7800)));
CHECK_EQ(10, m.Call(uint64_t(0x000000008ce5a400)));
CHECK_EQ(9, m.Call(uint64_t(0x00000000cb7d0200)));
CHECK_EQ(8, m.Call(uint64_t(0x00000000cb4dc100)));
CHECK_EQ(7, m.Call(uint64_t(0x00000000dfbec580)));
CHECK_EQ(6, m.Call(uint64_t(0x0000000027a9db40)));
CHECK_EQ(5, m.Call(uint64_t(0x00000000de3bcb20)));
CHECK_EQ(4, m.Call(uint64_t(0x00000000d7e8a610)));
CHECK_EQ(3, m.Call(uint64_t(0x000000009afdbc88)));
CHECK_EQ(2, m.Call(uint64_t(0x000000009afdbc84)));
CHECK_EQ(1, m.Call(uint64_t(0x000000009afdbc82)));
CHECK_EQ(0, m.Call(uint64_t(0x000000009afdbc81)));
}
#endif // V8_TARGET_ARCH_64_BIT