Make profile-guided optimization of builtins more configurable

Introduce get_hints.py and combine_hints.py in order to make
the interpretation of basic block counts into hints more
configurable and explicit, as well as allowing more accurate
and consistent methods of combining multiple profiles.

get_hints.py allows for the minimum count and threshold ratio
values to be easily altered for different profiles, while
combine_hints.py allows the hints produced from different
benchmarks and threshold values to be easily and sensibly
combined.

Simply summing together basic block counts from different
benchmarks could previously lead to a longer running benchmark
overshadowing multiple shorter benchmarks with conflicting
hints.

Allowing alteration of the current threshold values gives a
doubling of performance, while the new method of combining
distinct profiles can double the performance improvement of the
secondary benchmark while losing as little as 4% of the
improvement gained in the primary benchmark.

Design doc: https://docs.google.com/document/d/1OhwZnIZom47IX0lyceyt-S9i8AApDB0UqJdvQD6NuKQ/edit?usp=sharing

Bug: v8:10470
Change-Id: I1c09d1eabfdda5ed6794592e2c13ff8b461be361
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3545181
Reviewed-by: Tobias Tebbi <tebbi@chromium.org>
Commit-Queue: George Wort <george.wort@arm.com>
Cr-Commit-Position: refs/heads/main@{#80282}
This commit is contained in:
George Wort 2022-04-29 15:08:41 +01:00 committed by V8 LUCI CQ
parent 3cb4634153
commit 0e127bcef7
16 changed files with 398 additions and 67 deletions

View File

@ -180,10 +180,13 @@ declare_args() {
# chrome --no-sandbox --disable-extensions
# --js-flags="--turbo-profiling-log-builtins --logfile=path/to/v8.log"
# "http://localhost/test-suite"
# 3. Optionally repeat step 2 for additional workloads, and concatenate all of
# the resulting log files into a single file.
# 4. Build again with v8_builtins_profiling_log_file set to the file created
# in steps 2-3.
# 3. Run tools/get_hints.py to produce the branch hints, selecting min_count
# and threshold_ratio as you wish.
# 4. Optionally repeat steps 2-3 for additional workloads, and use
# tools/combine_hints.py to combine the hints produced in step 3 into a
# single file.
# 5. Build again with v8_builtins_profiling_log_file set to the file created
# in step 3 or 4.
v8_builtins_profiling_log_file = ""
# Enables various testing features.

View File

@ -26,12 +26,11 @@ class ProfileDataFromFileInternal : public ProfileDataFromFile {
hash_has_value_ = true;
}
void AddCountToBlock(size_t block_id, double count) {
if (block_counts_by_id_.size() <= block_id) {
// std::vector initializes new data to zero when resizing.
block_counts_by_id_.resize(block_id + 1);
}
block_counts_by_id_[block_id] += count;
void AddHintToBlock(size_t true_block_id, size_t false_block_id,
uint64_t hint) {
CHECK_LT(hint, 2);
block_hints_by_id.insert(std::make_pair(
std::make_pair(true_block_id, false_block_id), hint != 0));
}
private:
@ -55,25 +54,28 @@ EnsureInitProfileData() {
std::string token;
std::istringstream line_stream(line);
if (!std::getline(line_stream, token, ',')) continue;
if (token == ProfileDataFromFileConstants::kBlockCounterMarker) {
// Any line starting with kBlockCounterMarker is a block usage count.
// As defined by V8FileLogger::BasicBlockCounterEvent, the format is:
// literal kBlockCounterMarker , builtin_name , block_id , usage_count
if (token == ProfileDataFromFileConstants::kBlockHintMarker) {
// Any line starting with kBlockHintMarker is a basic block branch hint.
// The format is:
// literal kBlockHintMarker , builtin_name , true_id , false_id , hint
std::string builtin_name;
CHECK(std::getline(line_stream, builtin_name, ','));
CHECK(std::getline(line_stream, token, ','));
char* end = nullptr;
uint32_t id = static_cast<uint32_t>(strtoul(token.c_str(), &end, 0));
uint32_t true_id = static_cast<uint32_t>(strtoul(token.c_str(), &end, 0));
CHECK(errno == 0 && end != token.c_str());
CHECK(std::getline(line_stream, token, ','));
uint32_t false_id =
static_cast<uint32_t>(strtoul(token.c_str(), &end, 0));
CHECK(errno == 0 && end != token.c_str());
std::getline(line_stream, token, ',');
CHECK(line_stream.eof());
double count = strtod(token.c_str(), &end);
uint64_t hint = strtoul(token.c_str(), &end, 10);
CHECK(errno == 0 && end != token.c_str());
ProfileDataFromFileInternal& counters_and_hash =
(*data.get())[builtin_name];
// We allow concatenating data from several Isolates, so we might see the
// same block multiple times. Just sum them all.
counters_and_hash.AddCountToBlock(id, count);
ProfileDataFromFileInternal& hints_and_hash = (*data.get())[builtin_name];
// Only the first hint for each branch will be used.
hints_and_hash.AddHintToBlock(true_id, false_id, hint);
CHECK(line_stream.eof());
} else if (token == ProfileDataFromFileConstants::kBuiltinHashMarker) {
// Any line starting with kBuiltinHashMarker is a function hash record.
// As defined by V8FileLogger::BuiltinHashEvent, the format is:
@ -85,14 +87,13 @@ EnsureInitProfileData() {
char* end = nullptr;
int hash = static_cast<int>(strtol(token.c_str(), &end, 0));
CHECK(errno == 0 && end != token.c_str());
ProfileDataFromFileInternal& counters_and_hash =
(*data.get())[builtin_name];
ProfileDataFromFileInternal& hints_and_hash = (*data.get())[builtin_name];
// We allow concatenating data from several Isolates, but expect them all
// to be running the same build. Any file with mismatched hashes for a
// function is considered ill-formed.
CHECK_IMPLIES(counters_and_hash.hash_has_value(),
counters_and_hash.hash() == hash);
counters_and_hash.set_hash(hash);
CHECK_IMPLIES(hints_and_hash.hash_has_value(),
hints_and_hash.hash() == hash);
hints_and_hash.set_hash(hash);
}
}
for (const auto& pair : *data.get()) {
@ -101,9 +102,10 @@ EnsureInitProfileData() {
}
if (data.get()->size() == 0) {
PrintF(
"No basic block counters were found in log file.\n"
"Did you build with v8_enable_builtins_profiling=true\n"
"and run with --turbo-profiling-log-builtins?\n");
"No branch hints were found in log file.\n"
"See the description of v8_builtins_profiling_log_file in BUILD.gn\n"
"for instructions on how to produce hints from a profiled v8.log "
"file\n");
}
return *data.get();

View File

@ -7,8 +7,11 @@
#include <cstddef>
#include <cstdint>
#include <map>
#include <vector>
#include "src/common/globals.h"
namespace v8 {
namespace internal {
@ -18,13 +21,14 @@ class ProfileDataFromFile {
// profiling data if the function has been changed.
int hash() const { return hash_; }
// Returns how many times the block with the given ID was executed during
// profiling.
double GetCounter(size_t block_id) const {
// The profile data is allowed to omit blocks which were never hit, so be
// careful to avoid out-of-bounds access.
return block_id < block_counts_by_id_.size() ? block_counts_by_id_[block_id]
: 0;
// Returns the hint for a pair of blocks with the given IDs.
BranchHint GetHint(size_t true_block_id, size_t false_block_id) const {
auto it =
block_hints_by_id.find(std::make_pair(true_block_id, false_block_id));
if (it != block_hints_by_id.end()) {
return it->second ? BranchHint::kTrue : BranchHint::kFalse;
}
return BranchHint::kNone;
}
// Load basic block profiling data for the builtin with the given name, if
@ -35,10 +39,10 @@ class ProfileDataFromFile {
protected:
int hash_ = 0;
// How many times each block was executed, indexed by block ID. This vector
// may be shorter than the total number of blocks; any omitted block should be
// treated as a zero.
std::vector<double> block_counts_by_id_;
// Branch hints, indicated by true or false to reflect the hinted result of
// the branch condition. The vector is indexed by the basic block ids of
// the two destinations of the branch.
std::map<std::pair<size_t, size_t>, bool> block_hints_by_id;
};
// The following strings can't be static members of ProfileDataFromFile until
@ -50,6 +54,10 @@ namespace ProfileDataFromFileConstants {
// counter.
static constexpr char kBlockCounterMarker[] = "block";
// Any line in the profile beginning with this string represents a basic block
// branch hint.
static constexpr char kBlockHintMarker[] = "block_hint";
// Any line in a v8.log beginning with this string represents the hash of the
// function Graph for a builtin.
static constexpr char kBuiltinHashMarker[] = "builtin_hash";

View File

@ -1143,6 +1143,9 @@ constexpr int kIeeeDoubleExponentWordOffset = 0;
#define DOUBLE_POINTER_ALIGN(value) \
(((value) + ::i::kDoubleAlignmentMask) & ~::i::kDoubleAlignmentMask)
// Prediction hint for branches.
enum class BranchHint : uint8_t { kNone, kTrue, kFalse };
// Defines hints about receiver values based on structural knowledge.
enum class ConvertReceiverMode : unsigned {
kNullOrUndefined, // Guaranteed to be null or undefined.

View File

@ -143,6 +143,16 @@ BasicBlockProfilerData* BasicBlockInstrumentor::Instrument(
for (int i = insertion_start; i < kArraySize; ++i) {
schedule->SetBlockForNode(block, to_insert[i]);
}
// The exit block is not instrumented and so we must ignore that block
// count.
if (block->control() == BasicBlock::kBranch &&
block->successors()[0]->rpo_number() !=
static_cast<int32_t>(n_blocks) &&
block->successors()[1]->rpo_number() !=
static_cast<int32_t>(n_blocks)) {
data->AddBranch(block->successors()[0]->id().ToInt(),
block->successors()[1]->id().ToInt());
}
}
return data;
}

View File

@ -14,7 +14,6 @@
namespace v8 {
namespace internal {
namespace compiler {
std::ostream& operator<<(std::ostream& os, BranchHint hint) {
switch (hint) {
@ -28,6 +27,8 @@ std::ostream& operator<<(std::ostream& os, BranchHint hint) {
UNREACHABLE();
}
namespace compiler {
std::ostream& operator<<(std::ostream& os, TrapId trap_id) {
switch (trap_id) {
#define TRAP_CASE(Name) \

View File

@ -23,6 +23,8 @@ namespace internal {
class StringConstantBase;
V8_EXPORT_PRIVATE std::ostream& operator<<(std::ostream&, BranchHint);
namespace compiler {
// Forward declarations.
@ -39,9 +41,6 @@ class Node;
// should be treated.
enum class BranchSemantics { kJS, kMachine };
// Prediction hint for branches.
enum class BranchHint : uint8_t { kNone, kTrue, kFalse };
inline BranchHint NegateBranchHint(BranchHint hint) {
switch (hint) {
case BranchHint::kNone:
@ -54,10 +53,6 @@ inline BranchHint NegateBranchHint(BranchHint hint) {
UNREACHABLE();
}
inline size_t hash_value(BranchHint hint) { return static_cast<size_t>(hint); }
V8_EXPORT_PRIVATE std::ostream& operator<<(std::ostream&, BranchHint);
enum class TrapId : uint32_t {
#define DEF_ENUM(Name, ...) k##Name,
FOREACH_WASM_TRAPREASON(DEF_ENUM)

View File

@ -468,21 +468,9 @@ class CFGBuilder : public ZoneObject {
BranchHint hint_from_profile = BranchHint::kNone;
if (const ProfileDataFromFile* profile_data = scheduler_->profile_data()) {
double block_zero_count =
profile_data->GetCounter(successor_blocks[0]->id().ToSize());
double block_one_count =
profile_data->GetCounter(successor_blocks[1]->id().ToSize());
// If a branch is visited a non-trivial number of times and substantially
// more often than its alternative, then mark it as likely.
constexpr double kMinimumCount = 100000;
constexpr double kThresholdRatio = 4000;
if (block_zero_count > kMinimumCount &&
block_zero_count / kThresholdRatio > block_one_count) {
hint_from_profile = BranchHint::kTrue;
} else if (block_one_count > kMinimumCount &&
block_one_count / kThresholdRatio > block_zero_count) {
hint_from_profile = BranchHint::kFalse;
}
hint_from_profile =
profile_data->GetHint(successor_blocks[0]->id().ToSize(),
successor_blocks[1]->id().ToSize());
}
// Consider branch hints.

View File

@ -28,6 +28,7 @@ namespace v8 {
namespace internal {
struct AssemblerOptions;
class TurbofanCompilationJob;
enum class BranchHint : uint8_t;
namespace compiler {
// Forward declarations for some compiler data structures.
@ -44,7 +45,6 @@ enum class TrapId : uint32_t;
struct Int64LoweringSpecialCase;
template <size_t VarCount>
class GraphAssemblerLabel;
enum class BranchHint : uint8_t;
} // namespace compiler
namespace wasm {

View File

@ -45,6 +45,11 @@ void BasicBlockProfilerData::ResetCounts() {
}
}
void BasicBlockProfilerData::AddBranch(int32_t true_block_id,
int32_t false_block_id) {
branches_.emplace_back(true_block_id, false_block_id);
}
BasicBlockProfilerData* BasicBlockProfiler::NewData(size_t n_blocks) {
base::MutexGuard lock(&data_list_mutex_);
auto data = std::make_unique<BasicBlockProfilerData>(n_blocks);
@ -87,6 +92,10 @@ void BasicBlockProfilerData::CopyFromJSHeap(
for (int i = 0; i < block_ids.length() / kBlockIdSlotSize; ++i) {
block_ids_.push_back(block_ids.get_int(i));
}
PodArray<std::pair<int32_t, int32_t>> branches = js_heap_data.branches();
for (int i = 0; i < branches.length(); ++i) {
branches_.push_back(branches.get(i));
}
CHECK_EQ(block_ids_.size(), counts_.size());
hash_ = js_heap_data.hash();
}
@ -113,12 +122,20 @@ Handle<OnHeapBasicBlockProfilerData> BasicBlockProfilerData::CopyToJSHeap(
for (int i = 0; i < static_cast<int>(n_blocks()); ++i) {
counts->set_uint32(i, counts_[i]);
}
Handle<PodArray<std::pair<int32_t, int32_t>>> branches =
PodArray<std::pair<int32_t, int32_t>>::New(
isolate, static_cast<int>(branches_.size()), AllocationType::kOld);
for (int i = 0; i < static_cast<int>(branches_.size()); ++i) {
branches->set(i, branches_[i]);
}
Handle<String> name = CopyStringToJSHeap(function_name_, isolate);
Handle<String> schedule = CopyStringToJSHeap(schedule_, isolate);
Handle<String> code = CopyStringToJSHeap(code_, isolate);
return isolate->factory()->NewOnHeapBasicBlockProfilerData(
block_ids, counts, name, schedule, code, hash_, AllocationType::kOld);
block_ids, counts, branches, name, schedule, code, hash_,
AllocationType::kOld);
}
void BasicBlockProfiler::ResetCounts(Isolate* isolate) {
@ -191,6 +208,10 @@ void BasicBlockProfilerData::Log(Isolate* isolate) {
}
}
if (any_nonzero_counter) {
for (size_t i = 0; i < branches_.size(); ++i) {
isolate->logger()->BasicBlockBranchEvent(
function_name_.c_str(), branches_[i].first, branches_[i].second);
}
isolate->logger()->BuiltinHashEvent(function_name_.c_str(), hash_);
}
}

View File

@ -43,6 +43,7 @@ class BasicBlockProfilerData {
void SetSchedule(const std::ostringstream& os);
void SetBlockId(size_t offset, int32_t id);
void SetHash(int hash);
void AddBranch(int32_t true_block_id, int32_t false_block_id);
// Copy the data from this object into an equivalent object stored on the JS
// heap, so that it can survive snapshotting and relocation. This must
@ -63,6 +64,7 @@ class BasicBlockProfilerData {
// These vectors are indexed by reverse post-order block number.
std::vector<int32_t> block_ids_;
std::vector<uint32_t> counts_;
std::vector<std::pair<int32_t, int32_t>> branches_;
std::string function_name_;
std::string schedule_;
std::string code_;

View File

@ -1168,6 +1168,15 @@ void V8FileLogger::BasicBlockCounterEvent(const char* name, int block_id,
msg.WriteToLogFile();
}
void V8FileLogger::BasicBlockBranchEvent(const char* name, int true_block_id,
int false_block_id) {
if (!FLAG_turbo_profiling_log_builtins) return;
MSG_BUILDER();
msg << ProfileDataFromFileConstants::kBlockHintMarker << kNext << name
<< kNext << true_block_id << kNext << false_block_id;
msg.WriteToLogFile();
}
void V8FileLogger::BuiltinHashEvent(const char* name, int hash) {
if (!FLAG_turbo_profiling_log_builtins) return;
MSG_BUILDER();

View File

@ -236,6 +236,9 @@ class V8FileLogger : public LogEventListener {
void BasicBlockCounterEvent(const char* name, int block_id, uint32_t count);
void BasicBlockBranchEvent(const char* name, int true_block_id,
int false_block_id);
void BuiltinHashEvent(const char* name, int hash);
static void EnterExternal(Isolate* isolate);

View File

@ -147,10 +147,15 @@ extern class UncompiledDataWithPreparseDataAndJob extends
job: RawPtr;
}
@useParentTypeChecker
type PodArrayOfIntegerPairs extends ByteArray
constexpr 'PodArray<std::pair<int32_t, int32_t>>';
@export
class OnHeapBasicBlockProfilerData extends HeapObject {
block_ids: ByteArray; // Stored as 4-byte ints
counts: ByteArray; // Stored as 4-byte unsigned ints
block_ids: ByteArray; // Stored as 4-byte ints
counts: ByteArray; // Stored as 4-byte unsigned ints
branches: PodArrayOfIntegerPairs; // Stored as pairs of 4-byte ints
name: String;
schedule: String;
code: String;

136
tools/combine_hints.py Normal file
View File

@ -0,0 +1,136 @@
#!/usr/bin/env python
# Copyright 2022 the V8 project authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can
# be found in the LICENSE file.
"""
This script combines the branch hints for profile-guided optimization
produced by get_hints.py. The hints can simply be concatenated in priority order
instead of using this script if the earliest seen hint is to be used.
Usage: combine_hints.py combine-option N output_file hints_file_1 weight_1 hints_file_2 weight_2 ...
where weights_n is the integer weight applied to the hints in hints_file_n
and combine-option N is one of the below:
diff N: Only use the hint when the weighted sum of the hints in one
direction is equal to or greater than the weighted sum of hints
in the opposite direction by at least N.
agreed N: Only use the hint if every file containing this branch agrees
and the weighted sum of these hints is at least N.
Using diff num_input_files and using a weight of 1 for every hints_file will
give the strict intersection of all files.
"""
import argparse
import sys
PARSER = argparse.ArgumentParser(
description="A script that combines the hints produced by get_hints.py",
epilog="Example:\n\tcombine_hints.py combine-option N output_file hints_file_1 2 hints_file_2 1 ...\""
)
PARSER.add_argument(
'combine_option',
choices=['diff', 'agreed'],
help="The combine option dictates how the hints will be combined, diff \
only uses the hint if the positive/negative hints outweigh the \
negative/positive hints by N, while agreed only uses the hint if \
the weighted sum of hints in one direction matches or exceeds N and \
no conflicting hints are found.")
PARSER.add_argument(
'weight_threshold',
type=int,
help="The threshold value which the hint's weight must match or exceed \
to be used.")
PARSER.add_argument(
'output_file',
help="The file which the hints and builtin hashes are written to")
PARSER.add_argument(
'hint_files_and_weights',
nargs=argparse.REMAINDER,
help="The hint files produced by get_hints.py along with their weights")
ARGS = vars(PARSER.parse_args())
BRANCH_HINT_MARKER = "block_hint"
BUILTIN_HASH_MARKER = "builtin_hash"
must_agree = ARGS['combine_option'] == "agreed"
weight_threshold = max(1, ARGS['weight_threshold'])
hint_args = ARGS['hint_files_and_weights']
hint_files_and_weights = zip(hint_args[0::2], hint_args[1::2])
def add_branch_hints(hint_file, weight, branch_hints, builtin_hashes):
try:
with open(hint_file, "r") as f:
for line in f.readlines():
fields = line.split(',')
if fields[0] == BRANCH_HINT_MARKER:
builtin_name = fields[1]
true_block_id = int(fields[2])
false_block_id = int(fields[3])
key = (builtin_name, true_block_id, false_block_id)
delta = weight if (int(fields[4]) > 0) else -weight
if key not in branch_hints:
if must_agree:
# The boolean value records whether or not any conflicts have been
# found for this branch.
initial_hint = (False, 0)
else:
initial_hint = 0
branch_hints[key] = initial_hint
if must_agree:
(has_conflicts, count) = branch_hints[key]
if not has_conflicts:
if abs(delta) + abs(count) == abs(delta + count):
branch_hints[key] = (False, count + delta)
else:
branch_hints[key] = (True, 0)
else:
branch_hints[key] += delta
elif fields[0] == BUILTIN_HASH_MARKER:
builtin_name = fields[1]
builtin_hash = int(fields[2])
if builtin_name in builtin_hashes:
if builtin_hashes[builtin_name] != builtin_hash:
print("Builtin hashes {} and {} for {} do not match.".format(
builtin_hashes[builtin_name], builtin_hash, builtin_name))
sys.exit(1)
else:
builtin_hashes[builtin_name] = builtin_hash
except IOError as e:
print("Cannot read from {}. {}.".format(hint_file, e.strerror))
sys.exit(1)
def write_hints_to_output(output_file, branch_hints, builtin_hashes):
try:
with open(output_file, "w") as f:
for key in branch_hints:
if must_agree:
(has_conflicts, count) = branch_hints[key]
if has_conflicts:
count = 0
else:
count = branch_hints[key]
if abs(count) >= abs(weight_threshold):
hint = 1 if count > 0 else 0
f.write("{},{},{},{},{}\n".format(BRANCH_HINT_MARKER, key[0], key[1],
key[2], hint))
for builtin_name in builtin_hashes:
f.write("{},{},{}\n".format(BUILTIN_HASH_MARKER, builtin_name,
builtin_hashes[builtin_name]))
except IOError as e:
print("Cannot write to {}. {}.".format(output_file, e.strerror))
sys.exit(1)
branch_hints = {}
builtin_hashes = {}
for (hint_file, weight) in hint_files_and_weights:
add_branch_hints(hint_file, int(weight), branch_hints, builtin_hashes)
write_hints_to_output(ARGS['output_file'], branch_hints, builtin_hashes)

145
tools/get_hints.py Normal file
View File

@ -0,0 +1,145 @@
#!/usr/bin/env python
# Copyright 2022 the V8 project authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can
# be found in the LICENSE file.
"""
This script generates the branch hints for profile-guided optimization of
the builtins in the following format:
block_hint,<builtin_name>,<basic_block_id_for_true_destination>,<basic_block_id_for_false_destination>,<hint>
where hint is an integer representation of the expected boolean result of the
branch condition. The expected boolean result is generated for a specific given
branch by comparing the counts of the two destination basic blocks. V8's
control flow graph is always in edge-split form, guaranteeing that each
destination block only has a single predecessor, and thus guaranteeing that the
execution counts of these basic blocks are equal to how many times the branch
condition is true or false.
Usage: get_hints.py [--min MIN] [--ratio RATIO] log_file output_file
where:
1. log_file is the v8.log file produced after running v8 with the
--turbo-profiling-log-builtins flag after building with
v8_enable_builtins_profiling = true.
2. output_file is the file which the hints and builtin hashes are written
to.
3. --min MIN provides the minimum count at which a basic block will be taken
as a valid destination of a hinted branch decision.
4. --ratio RATIO provides the ratio at which, when compared to the
alternative destination's count, a branch destination's count is
considered sufficient to require a branch hint to be produced.
"""
import argparse
import sys
PARSER = argparse.ArgumentParser(
description="A script that generates the branch hints for profile-guided \
optimization",
epilog="Example:\n\tget_hints.py --min n1 --ratio n2 branches_file log_file output_file\""
)
PARSER.add_argument(
'--min',
type=int,
default=1000,
help="The minimum count at which a basic block will be taken as a valid \
destination of a hinted branch decision")
PARSER.add_argument(
'--ratio',
type=int,
default=40,
help="The ratio at which, when compared to the alternative destination's \
count,a branch destination's count is considered sufficient to \
require a branch hint to be produced")
PARSER.add_argument(
'log_file',
help="The v8.log file produced after running v8 with the \
--turbo-profiling-log-builtins flag after building with \
v8_enable_builtins_profiling = true")
PARSER.add_argument(
'output_file',
help="The file which the hints and builtin hashes are written to")
ARGS = vars(PARSER.parse_args())
BLOCK_COUNT_MARKER = "block"
BRANCH_HINT_MARKER = "block_hint"
BUILTIN_HASH_MARKER = "builtin_hash"
def parse_log_file(log_file):
block_counts = {}
branches = []
builtin_hashes = {}
try:
with open(log_file, "r") as f:
for line in f.readlines():
fields = line.split(',')
if fields[0] == BLOCK_COUNT_MARKER:
builtin_name = fields[1]
block_id = int(fields[2])
count = float(fields[3])
if builtin_name not in block_counts:
block_counts[builtin_name] = []
while len(block_counts[builtin_name]) <= block_id:
block_counts[builtin_name].append(0)
block_counts[builtin_name][block_id] += count
elif fields[0] == BUILTIN_HASH_MARKER:
builtin_name = fields[1]
builtin_hash = int(fields[2])
builtin_hashes[builtin_name] = builtin_hash
elif fields[0] == BRANCH_HINT_MARKER:
builtin_name = fields[1]
true_block_id = int(fields[2])
false_block_id = int(fields[3])
branches.append((builtin_name, true_block_id, false_block_id))
except IOError as e:
print("Cannot read from {}. {}.".format(log_file, e.strerror))
sys.exit(1)
return [block_counts, branches, builtin_hashes]
def get_branch_hints(block_counts, branches, min_count, threshold_ratio):
branch_hints = {}
for (builtin_name, true_block_id, false_block_id) in branches:
if builtin_name in block_counts:
true_block_count = 0
false_block_count = 0
if true_block_id < len(block_counts[builtin_name]):
true_block_count = block_counts[builtin_name][true_block_id]
if false_block_id < len(block_counts[builtin_name]):
false_block_count = block_counts[builtin_name][false_block_id]
hint = -1
if (true_block_count >= min_count) and (true_block_count / threshold_ratio
>= false_block_count):
hint = 1
elif (false_block_count >= min_count) and (
false_block_count / threshold_ratio >= true_block_count):
hint = 0
if hint >= 0:
branch_hints[(builtin_name, true_block_id, false_block_id)] = hint
return branch_hints
def write_hints_to_output(output_file, branch_hints, builtin_hashes):
try:
with open(output_file, "w") as f:
for key in branch_hints:
f.write("{},{},{},{},{}\n".format(BRANCH_HINT_MARKER, key[0], key[1],
key[2], branch_hints[key]))
for builtin_name in builtin_hashes:
f.write("{},{},{}\n".format(BUILTIN_HASH_MARKER, builtin_name,
builtin_hashes[builtin_name]))
except IOError as e:
print("Cannot write to {}. {}.".format(output_file, e.strerror))
sys.exit(1)
[block_counts, branches, builtin_hashes] = parse_log_file(ARGS['log_file'])
branch_hints = get_branch_hints(block_counts, branches, ARGS['min'],
ARGS['ratio'])
write_hints_to_output(ARGS['output_file'], branch_hints, builtin_hashes)