v8/tools/get_hints.py
George Wort 0e127bcef7 Make profile-guided optimization of builtins more configurable
Introduce get_hints.py and combine_hints.py in order to make
the interpretation of basic block counts into hints more
configurable and explicit, as well as allowing more accurate
and consistent methods of combining multiple profiles.

get_hints.py allows for the minimum count and threshold ratio
values to be easily altered for different profiles, while
combine_hints.py allows the hints produced from different
benchmarks and threshold values to be easily and sensibly
combined.

Simply summing together basic block counts from different
benchmarks could previously lead to a longer running benchmark
overshadowing multiple shorter benchmarks with conflicting
hints.

Allowing alteration of the current threshold values gives a
doubling of performance, while the new method of combining
distinct profiles can double the performance improvement of the
secondary benchmark while losing as little as 4% of the
improvement gained in the primary benchmark.

Design doc: https://docs.google.com/document/d/1OhwZnIZom47IX0lyceyt-S9i8AApDB0UqJdvQD6NuKQ/edit?usp=sharing

Bug: v8:10470
Change-Id: I1c09d1eabfdda5ed6794592e2c13ff8b461be361
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3545181
Reviewed-by: Tobias Tebbi <tebbi@chromium.org>
Commit-Queue: George Wort <george.wort@arm.com>
Cr-Commit-Position: refs/heads/main@{#80282}
2022-04-29 15:21:31 +00:00

146 lines
5.6 KiB
Python

#!/usr/bin/env python
# Copyright 2022 the V8 project authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can
# be found in the LICENSE file.
"""
This script generates the branch hints for profile-guided optimization of
the builtins in the following format:
block_hint,<builtin_name>,<basic_block_id_for_true_destination>,<basic_block_id_for_false_destination>,<hint>
where hint is an integer representation of the expected boolean result of the
branch condition. The expected boolean result is generated for a specific given
branch by comparing the counts of the two destination basic blocks. V8's
control flow graph is always in edge-split form, guaranteeing that each
destination block only has a single predecessor, and thus guaranteeing that the
execution counts of these basic blocks are equal to how many times the branch
condition is true or false.
Usage: get_hints.py [--min MIN] [--ratio RATIO] log_file output_file
where:
1. log_file is the v8.log file produced after running v8 with the
--turbo-profiling-log-builtins flag after building with
v8_enable_builtins_profiling = true.
2. output_file is the file which the hints and builtin hashes are written
to.
3. --min MIN provides the minimum count at which a basic block will be taken
as a valid destination of a hinted branch decision.
4. --ratio RATIO provides the ratio at which, when compared to the
alternative destination's count, a branch destination's count is
considered sufficient to require a branch hint to be produced.
"""
import argparse
import sys
PARSER = argparse.ArgumentParser(
description="A script that generates the branch hints for profile-guided \
optimization",
epilog="Example:\n\tget_hints.py --min n1 --ratio n2 branches_file log_file output_file\""
)
PARSER.add_argument(
'--min',
type=int,
default=1000,
help="The minimum count at which a basic block will be taken as a valid \
destination of a hinted branch decision")
PARSER.add_argument(
'--ratio',
type=int,
default=40,
help="The ratio at which, when compared to the alternative destination's \
count,a branch destination's count is considered sufficient to \
require a branch hint to be produced")
PARSER.add_argument(
'log_file',
help="The v8.log file produced after running v8 with the \
--turbo-profiling-log-builtins flag after building with \
v8_enable_builtins_profiling = true")
PARSER.add_argument(
'output_file',
help="The file which the hints and builtin hashes are written to")
ARGS = vars(PARSER.parse_args())
BLOCK_COUNT_MARKER = "block"
BRANCH_HINT_MARKER = "block_hint"
BUILTIN_HASH_MARKER = "builtin_hash"
def parse_log_file(log_file):
block_counts = {}
branches = []
builtin_hashes = {}
try:
with open(log_file, "r") as f:
for line in f.readlines():
fields = line.split(',')
if fields[0] == BLOCK_COUNT_MARKER:
builtin_name = fields[1]
block_id = int(fields[2])
count = float(fields[3])
if builtin_name not in block_counts:
block_counts[builtin_name] = []
while len(block_counts[builtin_name]) <= block_id:
block_counts[builtin_name].append(0)
block_counts[builtin_name][block_id] += count
elif fields[0] == BUILTIN_HASH_MARKER:
builtin_name = fields[1]
builtin_hash = int(fields[2])
builtin_hashes[builtin_name] = builtin_hash
elif fields[0] == BRANCH_HINT_MARKER:
builtin_name = fields[1]
true_block_id = int(fields[2])
false_block_id = int(fields[3])
branches.append((builtin_name, true_block_id, false_block_id))
except IOError as e:
print("Cannot read from {}. {}.".format(log_file, e.strerror))
sys.exit(1)
return [block_counts, branches, builtin_hashes]
def get_branch_hints(block_counts, branches, min_count, threshold_ratio):
branch_hints = {}
for (builtin_name, true_block_id, false_block_id) in branches:
if builtin_name in block_counts:
true_block_count = 0
false_block_count = 0
if true_block_id < len(block_counts[builtin_name]):
true_block_count = block_counts[builtin_name][true_block_id]
if false_block_id < len(block_counts[builtin_name]):
false_block_count = block_counts[builtin_name][false_block_id]
hint = -1
if (true_block_count >= min_count) and (true_block_count / threshold_ratio
>= false_block_count):
hint = 1
elif (false_block_count >= min_count) and (
false_block_count / threshold_ratio >= true_block_count):
hint = 0
if hint >= 0:
branch_hints[(builtin_name, true_block_id, false_block_id)] = hint
return branch_hints
def write_hints_to_output(output_file, branch_hints, builtin_hashes):
try:
with open(output_file, "w") as f:
for key in branch_hints:
f.write("{},{},{},{},{}\n".format(BRANCH_HINT_MARKER, key[0], key[1],
key[2], branch_hints[key]))
for builtin_name in builtin_hashes:
f.write("{},{},{}\n".format(BUILTIN_HASH_MARKER, builtin_name,
builtin_hashes[builtin_name]))
except IOError as e:
print("Cannot write to {}. {}.".format(output_file, e.strerror))
sys.exit(1)
[block_counts, branches, builtin_hashes] = parse_log_file(ARGS['log_file'])
branch_hints = get_branch_hints(block_counts, branches, ARGS['min'],
ARGS['ratio'])
write_hints_to_output(ARGS['output_file'], branch_hints, builtin_hashes)