[runtime] Improve builtins PGO logging

- Add explicit --turbo-profiling-output and --turbo-profiling-input
- Rename --turbo-profiling-log-file to --turbo-profiling-input
- No longer log PGO data to v8.log
- Add runtime %GetAndResetTurboProfilingData helper function for
  more controlled logging within chrome
- Rewrite generate.py script to use more python3

Bug: v8:10470
Change-Id: Ib817b5c3793a0a7ae77103075ea2d6f6d0282150
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3820381
Reviewed-by: Igor Sheludko <ishell@chromium.org>
Commit-Queue: Camillo Bruni <cbruni@chromium.org>
Cr-Commit-Position: refs/heads/main@{#82350}
This commit is contained in:
Camillo 2022-08-10 15:10:56 +02:00 committed by V8 LUCI CQ
parent 9d36b2dd0d
commit 012fa89948
13 changed files with 134 additions and 111 deletions

View File

@ -181,7 +181,7 @@ declare_args() {
# 2. Run your chosen workload with the --turbo-profiling-log-builtins flag.
# For Chrome, the invocation might look like this:
# chrome --no-sandbox --disable-extensions
# --js-flags="--turbo-profiling-log-builtins --logfile=path/to/v8.log"
# --js-flags="--turbo-profiler-output=v8.builtins.pgo"
# "http://localhost/test-suite"
# 3. Run tools/builtins-pgo/get_hints.py to produce the branch hints,
# selecting min_count and threshold_ratio as you wish.
@ -2209,7 +2209,7 @@ template("run_mksnapshot") {
if (v8_builtins_profiling_log_file != "") {
sources += [ v8_builtins_profiling_log_file ]
args += [
"--turbo-profiling-log-file",
"--turbo-profiling-input",
rebase_path(v8_builtins_profiling_log_file, root_build_dir),
]
}

View File

@ -46,7 +46,7 @@ EnsureInitProfileData() {
if (initialized) return *data.get();
initialized = true;
const char* filename = FLAG_turbo_profiling_log_file;
const char* filename = FLAG_turbo_profiling_input;
if (filename == nullptr) return *data.get();
std::ifstream file(filename);
CHECK_WITH_MSG(file.good(), "Can't read log file");

View File

@ -9,6 +9,7 @@
#include <sstream>
#include "src/base/lazy-instance.h"
#include "src/builtins/profile-data-reader.h"
#include "src/heap/heap-inl.h"
#include "src/objects/shared-function-info-inl.h"
@ -159,7 +160,7 @@ bool BasicBlockProfiler::HasData(Isolate* isolate) {
isolate->heap()->basic_block_profiling_data().Length() > 0;
}
void BasicBlockProfiler::Print(std::ostream& os, Isolate* isolate) {
void BasicBlockProfiler::Print(Isolate* isolate, std::ostream& os) {
os << "---- Start Profiling Data ----" << std::endl;
for (const auto& data : data_list_) {
os << *data;
@ -172,10 +173,7 @@ void BasicBlockProfiler::Print(std::ostream& os, Isolate* isolate) {
BasicBlockProfilerData data(
handle(OnHeapBasicBlockProfilerData::cast(list->Get(i)), isolate),
isolate);
// Print data for builtins to both stdout and the log file, if logging is
// enabled.
os << data;
data.Log(isolate);
// Ensure that all builtin names are unique; otherwise profile-guided
// optimization might get confused.
CHECK(builtin_names.insert(data.function_name_).second);
@ -183,6 +181,22 @@ void BasicBlockProfiler::Print(std::ostream& os, Isolate* isolate) {
os << "---- End Profiling Data ----" << std::endl;
}
void BasicBlockProfiler::Log(Isolate* isolate, std::ostream& os) {
HandleScope scope(isolate);
Handle<ArrayList> list(isolate->heap()->basic_block_profiling_data(),
isolate);
std::unordered_set<std::string> builtin_names;
for (int i = 0; i < list->Length(); ++i) {
BasicBlockProfilerData data(
handle(OnHeapBasicBlockProfilerData::cast(list->Get(i)), isolate),
isolate);
data.Log(isolate, os);
// Ensure that all builtin names are unique; otherwise profile-guided
// optimization might get confused.
CHECK(builtin_names.insert(data.function_name_).second);
}
}
std::vector<bool> BasicBlockProfiler::GetCoverageBitmap(Isolate* isolate) {
DisallowGarbageCollection no_gc;
ArrayList list(isolate->heap()->basic_block_profiling_data());
@ -198,21 +212,25 @@ std::vector<bool> BasicBlockProfiler::GetCoverageBitmap(Isolate* isolate) {
return out;
}
void BasicBlockProfilerData::Log(Isolate* isolate) {
void BasicBlockProfilerData::Log(Isolate* isolate, std::ostream& os) {
bool any_nonzero_counter = false;
constexpr char kNext[] = "\t";
for (size_t i = 0; i < n_blocks(); ++i) {
if (counts_[i] > 0) {
any_nonzero_counter = true;
isolate->v8_file_logger()->BasicBlockCounterEvent(
function_name_.c_str(), block_ids_[i], counts_[i]);
os << ProfileDataFromFileConstants::kBlockCounterMarker << kNext
<< function_name_.c_str() << kNext << block_ids_[i] << kNext
<< counts_[i] << std::endl;
}
}
if (any_nonzero_counter) {
for (size_t i = 0; i < branches_.size(); ++i) {
isolate->v8_file_logger()->BasicBlockBranchEvent(
function_name_.c_str(), branches_[i].first, branches_[i].second);
os << ProfileDataFromFileConstants::kBlockHintMarker << kNext
<< function_name_.c_str() << kNext << branches_[i].first << kNext
<< branches_[i].second << std::endl;
}
isolate->v8_file_logger()->BuiltinHashEvent(function_name_.c_str(), hash_);
os << ProfileDataFromFileConstants::kBuiltinHashMarker << kNext
<< function_name_.c_str() << kNext << hash_ << std::endl;
}
}

View File

@ -50,7 +50,7 @@ class BasicBlockProfilerData {
// happen on the main thread during finalization of the compilation.
Handle<OnHeapBasicBlockProfilerData> CopyToJSHeap(Isolate* isolate);
void Log(Isolate* isolate);
void Log(Isolate* isolate, std::ostream& os);
private:
friend class BasicBlockProfiler;
@ -84,7 +84,8 @@ class BasicBlockProfiler {
BasicBlockProfilerData* NewData(size_t n_blocks);
V8_EXPORT_PRIVATE void ResetCounts(Isolate* isolate);
V8_EXPORT_PRIVATE bool HasData(Isolate* isolate);
V8_EXPORT_PRIVATE void Print(std::ostream& os, Isolate* isolate);
V8_EXPORT_PRIVATE void Print(Isolate* isolate, std::ostream& os);
V8_EXPORT_PRIVATE void Log(Isolate* isolate, std::ostream& os);
// Coverage bitmap in this context includes only on heap BasicBlockProfiler
// data. It is used to export coverage of builtins function loaded from

View File

@ -4458,9 +4458,19 @@ void Isolate::DumpAndResetStats() {
}
#endif // V8_RUNTIME_CALL_STATS
if (BasicBlockProfiler::Get()->HasData(this)) {
StdoutStream out;
BasicBlockProfiler::Get()->Print(out, this);
if (FLAG_turbo_profiling_output) {
auto f = std::fopen(FLAG_turbo_profiling_output, "a");
OFStream pgo_stream(f);
BasicBlockProfiler::Get()->Log(this, pgo_stream);
} else {
StdoutStream out;
BasicBlockProfiler::Get()->Print(this, out);
}
BasicBlockProfiler::Get()->ResetCounts(this);
} else {
// Only log builtins PGO data if v8 was built with
// v8_enable_builtins_profiling=true
CHECK_NULL(FLAG_turbo_profiling_output);
}
}

View File

@ -927,9 +927,11 @@ DEFINE_BOOL(turbo_profiling_verbose, false,
"enable basic block profiling in TurboFan, and include each "
"function's schedule and disassembly in the output")
DEFINE_IMPLICATION(turbo_profiling_verbose, turbo_profiling)
DEFINE_BOOL(turbo_profiling_log_builtins, false,
"emit data about basic block usage in builtins to v8.log (requires "
"that V8 was built with v8_enable_builtins_profiling=true)")
DEFINE_STRING(
turbo_profiling_output, nullptr,
"emit data about basic block usage in builtins to this file "
"(requires that V8 was built with v8_enable_builtins_profiling=true)")
DEFINE_BOOL(
warn_about_builtin_profile_data, false,
"flag for mksnapshot, emit warnings when applying builtin profile data")
@ -1946,7 +1948,7 @@ DEFINE_BOOL(target_is_simulator, false,
"Instruct mksnapshot that the target is meant to run in the "
"simulator and it can generate simulator-specific instructions. "
"(mksnapshot only)")
DEFINE_STRING(turbo_profiling_log_file, nullptr,
DEFINE_STRING(turbo_profiling_input, nullptr,
"Path of the input file containing basic block counters for "
"builtins. (mksnapshot only)")

View File

@ -125,8 +125,7 @@ void V8::Initialize() {
CHECK(platform_);
// Update logging information before enforcing flag implications.
FlagValue<bool>* log_all_flags[] = {&FLAG_turbo_profiling_log_builtins,
&FLAG_log_all,
FlagValue<bool>* log_all_flags[] = {&FLAG_log_all,
&FLAG_log_code,
&FLAG_log_code_disassemble,
&FLAG_log_source_code,

View File

@ -1207,32 +1207,6 @@ void V8FileLogger::TimerEvent(v8::LogEventStatus se, const char* name) {
msg.WriteToLogFile();
}
void V8FileLogger::BasicBlockCounterEvent(const char* name, int block_id,
uint32_t count) {
if (!FLAG_turbo_profiling_log_builtins) return;
MSG_BUILDER();
msg << ProfileDataFromFileConstants::kBlockCounterMarker << kNext << name
<< kNext << block_id << kNext << count;
msg.WriteToLogFile();
}
void V8FileLogger::BasicBlockBranchEvent(const char* name, int true_block_id,
int false_block_id) {
if (!FLAG_turbo_profiling_log_builtins) return;
MSG_BUILDER();
msg << ProfileDataFromFileConstants::kBlockHintMarker << kNext << name
<< kNext << true_block_id << kNext << false_block_id;
msg.WriteToLogFile();
}
void V8FileLogger::BuiltinHashEvent(const char* name, int hash) {
if (!FLAG_turbo_profiling_log_builtins) return;
MSG_BUILDER();
msg << ProfileDataFromFileConstants::kBuiltinHashMarker << kNext << name
<< kNext << hash;
msg.WriteToLogFile();
}
bool V8FileLogger::is_logging() {
// Disable logging while the CPU profiler is running.
if (isolate_->is_profiling()) return false;

View File

@ -235,13 +235,6 @@ class V8FileLogger : public LogEventListener {
V8_EXPORT_PRIVATE void TimerEvent(v8::LogEventStatus se, const char* name);
void BasicBlockCounterEvent(const char* name, int block_id, uint32_t count);
void BasicBlockBranchEvent(const char* name, int true_block_id,
int false_block_id);
void BuiltinHashEvent(const char* name, int hash);
static void EnterExternal(Isolate* isolate);
static void LeaveExternal(Isolate* isolate);

View File

@ -560,6 +560,27 @@ RUNTIME_FUNCTION(Runtime_IncrementUseCounter) {
return ReadOnlyRoots(isolate).undefined_value();
}
RUNTIME_FUNCTION(Runtime_GetAndResetTurboProfilingData) {
HandleScope scope(isolate);
DCHECK_LE(args.length(), 2);
if (!BasicBlockProfiler::Get()->HasData(isolate)) {
THROW_NEW_ERROR_RETURN_FAILURE(
isolate,
NewTypeError(
MessageTemplate::kInvalid,
isolate->factory()->NewStringFromAsciiChecked("Runtime Call"),
isolate->factory()->NewStringFromAsciiChecked(
"V8 was not built with v8_enable_builtins_profiling=true")));
}
std::stringstream stats_stream;
BasicBlockProfiler::Get()->Log(isolate, stats_stream);
Handle<String> result =
isolate->factory()->NewStringFromAsciiChecked(stats_stream.str().c_str());
BasicBlockProfiler::Get()->ResetCounts(isolate);
return *result;
}
RUNTIME_FUNCTION(Runtime_GetAndResetRuntimeCallStats) {
HandleScope scope(isolate);
DCHECK_LE(args.length(), 2);

View File

@ -221,6 +221,7 @@ namespace internal {
F(FatalProcessOutOfMemoryInAllocateRaw, 0, 1) \
F(FatalProcessOutOfMemoryInvalidArrayLength, 0, 1) \
F(GetAndResetRuntimeCallStats, -1 /* <= 2 */, 1) \
F(GetAndResetTurboProfilingData, 0, 1) \
F(GetTemplateObject, 3, 1) \
F(IncrementUseCounter, 1, 1) \
F(BytecodeBudgetInterrupt, 1, 1) \

View File

@ -6,86 +6,84 @@
import subprocess
import argparse
import os
from pathlib import Path
parser = argparse.ArgumentParser(
description='Generate builtin PGO profiles. ' +
'The script has to be run from the root of a V8 checkout and updates the profiles in `tools/builtins-pgo`.'
)
parser.add_argument(
'arch', help='target cpu to build the profile for: x64 or arm64')
'v8_target_cpu', help='target cpu to build the profile for: x64 or arm64')
parser.add_argument(
'--target-cpu',
default=None,
help='target cpu for V8 binary (for simulator builds), by default it\'s equal to `arch`'
help='target cpu for V8 binary (for simulator builds), by default it\'s equal to `v8_target_cpu`'
)
parser.add_argument(
'benchmark_path',
help='path to benchmark runner .js file, usually JetStream2\'s `cli.js`')
help='path to benchmark runner .js file, usually JetStream2\'s `cli.js`',
type=Path)
parser.add_argument(
'--out-path',
default="out",
help='directory to be used for building V8, by default `./out`')
default=Path("out"),
help='directory to be used for building V8, by default `./out`',
type=Path)
args = parser.parse_args()
if args.target_cpu == None:
args.target_cpu = args.arch
args.target_cpu = args.v8_target_cpu
def run(cmd, **kwargs):
print(f"# CMD: {cmd} {kwargs}")
return subprocess.run(cmd, **kwargs)
def try_start_goma():
res = subprocess.run(["goma_ctl", "ensure_start"])
res = run(["goma_ctl", "ensure_start"])
print(res.returncode)
has_goma = res.returncode == 0
print("Detected Goma:", has_goma)
return has_goma
def _Write(path, content):
with open(path, "w") as f:
f.write(content)
def build_d8(path, gn_args):
if not os.path.exists(path):
os.makedirs(path)
_Write(os.path.join(path, "args.gn"), gn_args)
subprocess.run(["gn", "gen", path])
subprocess.run(["autoninja", "-C", path, "d8"])
return os.path.abspath(os.path.join(path, "d8"))
if not path.exists():
path.mkdir(parents=True, exists_ok=True)
with (path / "args.gn").open("w") as f:
f.write(gn_args)
run(["gn", "gen", path])
run(["autoninja", "-C", path, "d8"])
return (path / "d8").absolute()
if not os.path.exists(os.path.join("tools", "builtins-pgo", "generate.py")):
print("Please run this script from the root of a V8 checkout.")
tools_pgo_dir = Path(__file__).parent
v8_path = tools_pgo_dir.parent.parent
if not args.benchmark_path.is_file() or args.benchmark_path.suffix != ".js":
print(f"Invalid benchmark argument: {args.benchmark_path}")
exit(1)
if not os.path.splitext(args.benchmark_path)[1] != ".js":
print("\"")
has_goma_str = "true" if try_start_goma() else "false"
v8_path = os.getcwd()
has_goma = try_start_goma()
ARGS_TEMPLATE = """\
GN_ARGS_TEMPLATE = f"""\
is_debug = false
target_cpu = "{target_cpu}"
v8_target_cpu = "{v8_target_cpu}"
use_goma = {has_goma}
target_cpu = "{args.target_cpu}"
v8_target_cpu = "{args.v8_target_cpu}"
use_goma = {has_goma_str}
v8_enable_builtins_profiling = true
""".format(
v8_target_cpu=args.arch,
target_cpu=args.target_cpu,
has_goma="true" if has_goma else "false")
"""
for arch, gn_args in [(args.arch, ARGS_TEMPLATE)]:
build_dir = os.path.join(args.out_path,
arch + ".release.generate_builtin_pgo_profile")
for arch, gn_args in [(args.v8_target_cpu, GN_ARGS_TEMPLATE)]:
build_dir = args.out_path / f"{arch}.release.generate_builtin_pgo_profile"
d8_path = build_d8(build_dir, gn_args)
benchmark_dir = os.path.dirname(args.benchmark_path)
benchmark_file = os.path.basename(args.benchmark_path)
subprocess.run([d8_path, "--turbo-profiling-log-builtins", benchmark_file],
cwd=benchmark_dir)
get_hints_path = os.path.join("tools", "builtins-pgo", "get_hints.py")
log_path = os.path.join(benchmark_dir, "v8.log")
profile_path = os.path.join("tools", "builtins-pgo", arch + ".profile")
subprocess.run([get_hints_path, log_path, profile_path])
benchmark_dir = args.benchmark_path.parent
benchmark_file = args.benchmark_path.name
log_path = build_dir / "v8.builtins.pgo"
run([d8_path, f"--turbo-profiling-output={log_path}", benchmark_file],
cwd=benchmark_dir)
get_hints_path = tools_pgo_dir / "get_hints.py"
profile_path = tools_pgo_dir / f"{arch}.profile"
run([get_hints_path, log_path, profile_path])

View File

@ -20,8 +20,8 @@ condition is true or false.
Usage: get_hints.py [--min MIN] [--ratio RATIO] log_file output_file
where:
1. log_file is the v8.log file produced after running v8 with the
--turbo-profiling-log-builtins flag after building with
1. log_file is the file produced after running v8 with the
--turbo-profiling-output=log_file flag after building with
v8_enable_builtins_profiling = true.
2. output_file is the file which the hints and builtin hashes are written
to.
@ -37,7 +37,7 @@ import sys
PARSER = argparse.ArgumentParser(
description="A script that generates the branch hints for profile-guided \
optimization",
optimization" ,
epilog="Example:\n\tget_hints.py --min n1 --ratio n2 branches_file log_file output_file\""
)
PARSER.add_argument(
@ -45,18 +45,18 @@ PARSER.add_argument(
type=int,
default=1000,
help="The minimum count at which a basic block will be taken as a valid \
destination of a hinted branch decision")
destination of a hinted branch decision" )
PARSER.add_argument(
'--ratio',
type=int,
default=40,
help="The ratio at which, when compared to the alternative destination's \
count,a branch destination's count is considered sufficient to \
require a branch hint to be produced")
require a branch hint to be produced" )
PARSER.add_argument(
'log_file',
help="The v8.log file produced after running v8 with the \
--turbo-profiling-log-builtins flag after building with \
--turbo-profiling-output=log_file flag after building with \
v8_enable_builtins_profiling = true")
PARSER.add_argument(
'output_file',
@ -76,7 +76,7 @@ def parse_log_file(log_file):
try:
with open(log_file, "r") as f:
for line in f.readlines():
fields = line.split(',')
fields = line.split('\t')
if fields[0] == BLOCK_COUNT_MARKER:
builtin_name = fields[1]
block_id = int(fields[2])
@ -89,14 +89,20 @@ def parse_log_file(log_file):
elif fields[0] == BUILTIN_HASH_MARKER:
builtin_name = fields[1]
builtin_hash = int(fields[2])
builtin_hashes[builtin_name] = builtin_hash
if builtin_name in builtin_hashes:
old_hash = builtin_hashes[builtin_name]
assert old_hash == builtin_hash, (
"Merged PGO file contains multiple incompatible builtin "
"versions: {old_hash} != {builtin_hash}")
else:
builtin_hashes[builtin_name] = builtin_hash
elif fields[0] == BRANCH_HINT_MARKER:
builtin_name = fields[1]
true_block_id = int(fields[2])
false_block_id = int(fields[3])
branches.append((builtin_name, true_block_id, false_block_id))
except IOError as e:
print("Cannot read from {}. {}.".format(log_file, e.strerror))
print(f"Cannot read from {log_file}. {e.strerror}.")
sys.exit(1)
return [block_counts, branches, builtin_hashes]
@ -134,7 +140,7 @@ def write_hints_to_output(output_file, branch_hints, builtin_hashes):
f.write("{},{},{}\n".format(BUILTIN_HASH_MARKER, builtin_name,
builtin_hashes[builtin_name]))
except IOError as e:
print("Cannot write to {}. {}.".format(output_file, e.strerror))
print(f"Cannot read from {output_file}. {e.strerror}.")
sys.exit(1)