From 012fa89948557ddc9d8bea1f5fee9e976a57a966 Mon Sep 17 00:00:00 2001 From: Camillo Date: Wed, 10 Aug 2022 15:10:56 +0200 Subject: [PATCH] [runtime] Improve builtins PGO logging - Add explicit --turbo-profiling-output and --turbo-profiling-input - Rename --turbo-profiling-log-file to --turbo-profiling-input - No longer log PGO data to v8.log - Add runtime %GetAndResetTurboProfilingData helper function for more controlled logging within chrome - Rewrite generate.py script to use more python3 Bug: v8:10470 Change-Id: Ib817b5c3793a0a7ae77103075ea2d6f6d0282150 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3820381 Reviewed-by: Igor Sheludko Commit-Queue: Camillo Bruni Cr-Commit-Position: refs/heads/main@{#82350} --- BUILD.gn | 4 +- src/builtins/profile-data-reader.cc | 2 +- src/diagnostics/basic-block-profiler.cc | 38 ++++++++--- src/diagnostics/basic-block-profiler.h | 5 +- src/execution/isolate.cc | 14 +++- src/flags/flag-definitions.h | 10 +-- src/init/v8.cc | 3 +- src/logging/log.cc | 26 -------- src/logging/log.h | 7 -- src/runtime/runtime-internal.cc | 21 ++++++ src/runtime/runtime.h | 1 + tools/builtins-pgo/generate.py | 88 ++++++++++++------------- tools/builtins-pgo/get_hints.py | 26 +++++--- 13 files changed, 134 insertions(+), 111 deletions(-) diff --git a/BUILD.gn b/BUILD.gn index 6c4d7993aa..dd388b9a98 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -181,7 +181,7 @@ declare_args() { # 2. Run your chosen workload with the --turbo-profiling-log-builtins flag. # For Chrome, the invocation might look like this: # chrome --no-sandbox --disable-extensions - # --js-flags="--turbo-profiling-log-builtins --logfile=path/to/v8.log" + # --js-flags="--turbo-profiler-output=v8.builtins.pgo" # "http://localhost/test-suite" # 3. Run tools/builtins-pgo/get_hints.py to produce the branch hints, # selecting min_count and threshold_ratio as you wish. @@ -2209,7 +2209,7 @@ template("run_mksnapshot") { if (v8_builtins_profiling_log_file != "") { sources += [ v8_builtins_profiling_log_file ] args += [ - "--turbo-profiling-log-file", + "--turbo-profiling-input", rebase_path(v8_builtins_profiling_log_file, root_build_dir), ] } diff --git a/src/builtins/profile-data-reader.cc b/src/builtins/profile-data-reader.cc index f151907bf3..f35eb0396d 100644 --- a/src/builtins/profile-data-reader.cc +++ b/src/builtins/profile-data-reader.cc @@ -46,7 +46,7 @@ EnsureInitProfileData() { if (initialized) return *data.get(); initialized = true; - const char* filename = FLAG_turbo_profiling_log_file; + const char* filename = FLAG_turbo_profiling_input; if (filename == nullptr) return *data.get(); std::ifstream file(filename); CHECK_WITH_MSG(file.good(), "Can't read log file"); diff --git a/src/diagnostics/basic-block-profiler.cc b/src/diagnostics/basic-block-profiler.cc index 179d050ad0..7ecdd0c139 100644 --- a/src/diagnostics/basic-block-profiler.cc +++ b/src/diagnostics/basic-block-profiler.cc @@ -9,6 +9,7 @@ #include #include "src/base/lazy-instance.h" +#include "src/builtins/profile-data-reader.h" #include "src/heap/heap-inl.h" #include "src/objects/shared-function-info-inl.h" @@ -159,7 +160,7 @@ bool BasicBlockProfiler::HasData(Isolate* isolate) { isolate->heap()->basic_block_profiling_data().Length() > 0; } -void BasicBlockProfiler::Print(std::ostream& os, Isolate* isolate) { +void BasicBlockProfiler::Print(Isolate* isolate, std::ostream& os) { os << "---- Start Profiling Data ----" << std::endl; for (const auto& data : data_list_) { os << *data; @@ -172,10 +173,7 @@ void BasicBlockProfiler::Print(std::ostream& os, Isolate* isolate) { BasicBlockProfilerData data( handle(OnHeapBasicBlockProfilerData::cast(list->Get(i)), isolate), isolate); - // Print data for builtins to both stdout and the log file, if logging is - // enabled. os << data; - data.Log(isolate); // Ensure that all builtin names are unique; otherwise profile-guided // optimization might get confused. CHECK(builtin_names.insert(data.function_name_).second); @@ -183,6 +181,22 @@ void BasicBlockProfiler::Print(std::ostream& os, Isolate* isolate) { os << "---- End Profiling Data ----" << std::endl; } +void BasicBlockProfiler::Log(Isolate* isolate, std::ostream& os) { + HandleScope scope(isolate); + Handle list(isolate->heap()->basic_block_profiling_data(), + isolate); + std::unordered_set builtin_names; + for (int i = 0; i < list->Length(); ++i) { + BasicBlockProfilerData data( + handle(OnHeapBasicBlockProfilerData::cast(list->Get(i)), isolate), + isolate); + data.Log(isolate, os); + // Ensure that all builtin names are unique; otherwise profile-guided + // optimization might get confused. + CHECK(builtin_names.insert(data.function_name_).second); + } +} + std::vector BasicBlockProfiler::GetCoverageBitmap(Isolate* isolate) { DisallowGarbageCollection no_gc; ArrayList list(isolate->heap()->basic_block_profiling_data()); @@ -198,21 +212,25 @@ std::vector BasicBlockProfiler::GetCoverageBitmap(Isolate* isolate) { return out; } -void BasicBlockProfilerData::Log(Isolate* isolate) { +void BasicBlockProfilerData::Log(Isolate* isolate, std::ostream& os) { bool any_nonzero_counter = false; + constexpr char kNext[] = "\t"; for (size_t i = 0; i < n_blocks(); ++i) { if (counts_[i] > 0) { any_nonzero_counter = true; - isolate->v8_file_logger()->BasicBlockCounterEvent( - function_name_.c_str(), block_ids_[i], counts_[i]); + os << ProfileDataFromFileConstants::kBlockCounterMarker << kNext + << function_name_.c_str() << kNext << block_ids_[i] << kNext + << counts_[i] << std::endl; } } if (any_nonzero_counter) { for (size_t i = 0; i < branches_.size(); ++i) { - isolate->v8_file_logger()->BasicBlockBranchEvent( - function_name_.c_str(), branches_[i].first, branches_[i].second); + os << ProfileDataFromFileConstants::kBlockHintMarker << kNext + << function_name_.c_str() << kNext << branches_[i].first << kNext + << branches_[i].second << std::endl; } - isolate->v8_file_logger()->BuiltinHashEvent(function_name_.c_str(), hash_); + os << ProfileDataFromFileConstants::kBuiltinHashMarker << kNext + << function_name_.c_str() << kNext << hash_ << std::endl; } } diff --git a/src/diagnostics/basic-block-profiler.h b/src/diagnostics/basic-block-profiler.h index ffb6c01a7a..ae4bee99b7 100644 --- a/src/diagnostics/basic-block-profiler.h +++ b/src/diagnostics/basic-block-profiler.h @@ -50,7 +50,7 @@ class BasicBlockProfilerData { // happen on the main thread during finalization of the compilation. Handle CopyToJSHeap(Isolate* isolate); - void Log(Isolate* isolate); + void Log(Isolate* isolate, std::ostream& os); private: friend class BasicBlockProfiler; @@ -84,7 +84,8 @@ class BasicBlockProfiler { BasicBlockProfilerData* NewData(size_t n_blocks); V8_EXPORT_PRIVATE void ResetCounts(Isolate* isolate); V8_EXPORT_PRIVATE bool HasData(Isolate* isolate); - V8_EXPORT_PRIVATE void Print(std::ostream& os, Isolate* isolate); + V8_EXPORT_PRIVATE void Print(Isolate* isolate, std::ostream& os); + V8_EXPORT_PRIVATE void Log(Isolate* isolate, std::ostream& os); // Coverage bitmap in this context includes only on heap BasicBlockProfiler // data. It is used to export coverage of builtins function loaded from diff --git a/src/execution/isolate.cc b/src/execution/isolate.cc index 4609f4eeb0..69493fff1b 100644 --- a/src/execution/isolate.cc +++ b/src/execution/isolate.cc @@ -4458,9 +4458,19 @@ void Isolate::DumpAndResetStats() { } #endif // V8_RUNTIME_CALL_STATS if (BasicBlockProfiler::Get()->HasData(this)) { - StdoutStream out; - BasicBlockProfiler::Get()->Print(out, this); + if (FLAG_turbo_profiling_output) { + auto f = std::fopen(FLAG_turbo_profiling_output, "a"); + OFStream pgo_stream(f); + BasicBlockProfiler::Get()->Log(this, pgo_stream); + } else { + StdoutStream out; + BasicBlockProfiler::Get()->Print(this, out); + } BasicBlockProfiler::Get()->ResetCounts(this); + } else { + // Only log builtins PGO data if v8 was built with + // v8_enable_builtins_profiling=true + CHECK_NULL(FLAG_turbo_profiling_output); } } diff --git a/src/flags/flag-definitions.h b/src/flags/flag-definitions.h index 7d7db31303..a9d63938b2 100644 --- a/src/flags/flag-definitions.h +++ b/src/flags/flag-definitions.h @@ -927,9 +927,11 @@ DEFINE_BOOL(turbo_profiling_verbose, false, "enable basic block profiling in TurboFan, and include each " "function's schedule and disassembly in the output") DEFINE_IMPLICATION(turbo_profiling_verbose, turbo_profiling) -DEFINE_BOOL(turbo_profiling_log_builtins, false, - "emit data about basic block usage in builtins to v8.log (requires " - "that V8 was built with v8_enable_builtins_profiling=true)") +DEFINE_STRING( + turbo_profiling_output, nullptr, + "emit data about basic block usage in builtins to this file " + "(requires that V8 was built with v8_enable_builtins_profiling=true)") + DEFINE_BOOL( warn_about_builtin_profile_data, false, "flag for mksnapshot, emit warnings when applying builtin profile data") @@ -1946,7 +1948,7 @@ DEFINE_BOOL(target_is_simulator, false, "Instruct mksnapshot that the target is meant to run in the " "simulator and it can generate simulator-specific instructions. " "(mksnapshot only)") -DEFINE_STRING(turbo_profiling_log_file, nullptr, +DEFINE_STRING(turbo_profiling_input, nullptr, "Path of the input file containing basic block counters for " "builtins. (mksnapshot only)") diff --git a/src/init/v8.cc b/src/init/v8.cc index 02c0e9cb49..18fab17246 100644 --- a/src/init/v8.cc +++ b/src/init/v8.cc @@ -125,8 +125,7 @@ void V8::Initialize() { CHECK(platform_); // Update logging information before enforcing flag implications. - FlagValue* log_all_flags[] = {&FLAG_turbo_profiling_log_builtins, - &FLAG_log_all, + FlagValue* log_all_flags[] = {&FLAG_log_all, &FLAG_log_code, &FLAG_log_code_disassemble, &FLAG_log_source_code, diff --git a/src/logging/log.cc b/src/logging/log.cc index 1f670c14f9..3340c8d23a 100644 --- a/src/logging/log.cc +++ b/src/logging/log.cc @@ -1207,32 +1207,6 @@ void V8FileLogger::TimerEvent(v8::LogEventStatus se, const char* name) { msg.WriteToLogFile(); } -void V8FileLogger::BasicBlockCounterEvent(const char* name, int block_id, - uint32_t count) { - if (!FLAG_turbo_profiling_log_builtins) return; - MSG_BUILDER(); - msg << ProfileDataFromFileConstants::kBlockCounterMarker << kNext << name - << kNext << block_id << kNext << count; - msg.WriteToLogFile(); -} - -void V8FileLogger::BasicBlockBranchEvent(const char* name, int true_block_id, - int false_block_id) { - if (!FLAG_turbo_profiling_log_builtins) return; - MSG_BUILDER(); - msg << ProfileDataFromFileConstants::kBlockHintMarker << kNext << name - << kNext << true_block_id << kNext << false_block_id; - msg.WriteToLogFile(); -} - -void V8FileLogger::BuiltinHashEvent(const char* name, int hash) { - if (!FLAG_turbo_profiling_log_builtins) return; - MSG_BUILDER(); - msg << ProfileDataFromFileConstants::kBuiltinHashMarker << kNext << name - << kNext << hash; - msg.WriteToLogFile(); -} - bool V8FileLogger::is_logging() { // Disable logging while the CPU profiler is running. if (isolate_->is_profiling()) return false; diff --git a/src/logging/log.h b/src/logging/log.h index a959aa7f75..40d61f5515 100644 --- a/src/logging/log.h +++ b/src/logging/log.h @@ -235,13 +235,6 @@ class V8FileLogger : public LogEventListener { V8_EXPORT_PRIVATE void TimerEvent(v8::LogEventStatus se, const char* name); - void BasicBlockCounterEvent(const char* name, int block_id, uint32_t count); - - void BasicBlockBranchEvent(const char* name, int true_block_id, - int false_block_id); - - void BuiltinHashEvent(const char* name, int hash); - static void EnterExternal(Isolate* isolate); static void LeaveExternal(Isolate* isolate); diff --git a/src/runtime/runtime-internal.cc b/src/runtime/runtime-internal.cc index 5c9ba18abf..f9b84621e3 100644 --- a/src/runtime/runtime-internal.cc +++ b/src/runtime/runtime-internal.cc @@ -560,6 +560,27 @@ RUNTIME_FUNCTION(Runtime_IncrementUseCounter) { return ReadOnlyRoots(isolate).undefined_value(); } +RUNTIME_FUNCTION(Runtime_GetAndResetTurboProfilingData) { + HandleScope scope(isolate); + DCHECK_LE(args.length(), 2); + if (!BasicBlockProfiler::Get()->HasData(isolate)) { + THROW_NEW_ERROR_RETURN_FAILURE( + isolate, + NewTypeError( + MessageTemplate::kInvalid, + isolate->factory()->NewStringFromAsciiChecked("Runtime Call"), + isolate->factory()->NewStringFromAsciiChecked( + "V8 was not built with v8_enable_builtins_profiling=true"))); + } + + std::stringstream stats_stream; + BasicBlockProfiler::Get()->Log(isolate, stats_stream); + Handle result = + isolate->factory()->NewStringFromAsciiChecked(stats_stream.str().c_str()); + BasicBlockProfiler::Get()->ResetCounts(isolate); + return *result; +} + RUNTIME_FUNCTION(Runtime_GetAndResetRuntimeCallStats) { HandleScope scope(isolate); DCHECK_LE(args.length(), 2); diff --git a/src/runtime/runtime.h b/src/runtime/runtime.h index c23b3f88fe..283a8dad10 100644 --- a/src/runtime/runtime.h +++ b/src/runtime/runtime.h @@ -221,6 +221,7 @@ namespace internal { F(FatalProcessOutOfMemoryInAllocateRaw, 0, 1) \ F(FatalProcessOutOfMemoryInvalidArrayLength, 0, 1) \ F(GetAndResetRuntimeCallStats, -1 /* <= 2 */, 1) \ + F(GetAndResetTurboProfilingData, 0, 1) \ F(GetTemplateObject, 3, 1) \ F(IncrementUseCounter, 1, 1) \ F(BytecodeBudgetInterrupt, 1, 1) \ diff --git a/tools/builtins-pgo/generate.py b/tools/builtins-pgo/generate.py index a77b7bb9a6..8743a920d5 100755 --- a/tools/builtins-pgo/generate.py +++ b/tools/builtins-pgo/generate.py @@ -6,86 +6,84 @@ import subprocess import argparse -import os +from pathlib import Path parser = argparse.ArgumentParser( description='Generate builtin PGO profiles. ' + 'The script has to be run from the root of a V8 checkout and updates the profiles in `tools/builtins-pgo`.' ) parser.add_argument( - 'arch', help='target cpu to build the profile for: x64 or arm64') + 'v8_target_cpu', help='target cpu to build the profile for: x64 or arm64') parser.add_argument( '--target-cpu', default=None, - help='target cpu for V8 binary (for simulator builds), by default it\'s equal to `arch`' + help='target cpu for V8 binary (for simulator builds), by default it\'s equal to `v8_target_cpu`' ) parser.add_argument( 'benchmark_path', - help='path to benchmark runner .js file, usually JetStream2\'s `cli.js`') + help='path to benchmark runner .js file, usually JetStream2\'s `cli.js`', + type=Path) parser.add_argument( '--out-path', - default="out", - help='directory to be used for building V8, by default `./out`') + default=Path("out"), + help='directory to be used for building V8, by default `./out`', + type=Path) args = parser.parse_args() if args.target_cpu == None: - args.target_cpu = args.arch + args.target_cpu = args.v8_target_cpu + + +def run(cmd, **kwargs): + print(f"# CMD: {cmd} {kwargs}") + return subprocess.run(cmd, **kwargs) def try_start_goma(): - res = subprocess.run(["goma_ctl", "ensure_start"]) + res = run(["goma_ctl", "ensure_start"]) print(res.returncode) has_goma = res.returncode == 0 print("Detected Goma:", has_goma) return has_goma -def _Write(path, content): - with open(path, "w") as f: - f.write(content) - def build_d8(path, gn_args): - if not os.path.exists(path): - os.makedirs(path) - _Write(os.path.join(path, "args.gn"), gn_args) - subprocess.run(["gn", "gen", path]) - subprocess.run(["autoninja", "-C", path, "d8"]) - return os.path.abspath(os.path.join(path, "d8")) + if not path.exists(): + path.mkdir(parents=True, exists_ok=True) + with (path / "args.gn").open("w") as f: + f.write(gn_args) + run(["gn", "gen", path]) + run(["autoninja", "-C", path, "d8"]) + return (path / "d8").absolute() -if not os.path.exists(os.path.join("tools", "builtins-pgo", "generate.py")): - print("Please run this script from the root of a V8 checkout.") +tools_pgo_dir = Path(__file__).parent +v8_path = tools_pgo_dir.parent.parent + +if not args.benchmark_path.is_file() or args.benchmark_path.suffix != ".js": + print(f"Invalid benchmark argument: {args.benchmark_path}") exit(1) -if not os.path.splitext(args.benchmark_path)[1] != ".js": - print("\"") +has_goma_str = "true" if try_start_goma() else "false" -v8_path = os.getcwd() - -has_goma = try_start_goma() - -ARGS_TEMPLATE = """\ +GN_ARGS_TEMPLATE = f"""\ is_debug = false -target_cpu = "{target_cpu}" -v8_target_cpu = "{v8_target_cpu}" -use_goma = {has_goma} +target_cpu = "{args.target_cpu}" +v8_target_cpu = "{args.v8_target_cpu}" +use_goma = {has_goma_str} v8_enable_builtins_profiling = true -""".format( - v8_target_cpu=args.arch, - target_cpu=args.target_cpu, - has_goma="true" if has_goma else "false") +""" -for arch, gn_args in [(args.arch, ARGS_TEMPLATE)]: - build_dir = os.path.join(args.out_path, - arch + ".release.generate_builtin_pgo_profile") +for arch, gn_args in [(args.v8_target_cpu, GN_ARGS_TEMPLATE)]: + build_dir = args.out_path / f"{arch}.release.generate_builtin_pgo_profile" d8_path = build_d8(build_dir, gn_args) - benchmark_dir = os.path.dirname(args.benchmark_path) - benchmark_file = os.path.basename(args.benchmark_path) - subprocess.run([d8_path, "--turbo-profiling-log-builtins", benchmark_file], - cwd=benchmark_dir) - get_hints_path = os.path.join("tools", "builtins-pgo", "get_hints.py") - log_path = os.path.join(benchmark_dir, "v8.log") - profile_path = os.path.join("tools", "builtins-pgo", arch + ".profile") - subprocess.run([get_hints_path, log_path, profile_path]) + benchmark_dir = args.benchmark_path.parent + benchmark_file = args.benchmark_path.name + log_path = build_dir / "v8.builtins.pgo" + run([d8_path, f"--turbo-profiling-output={log_path}", benchmark_file], + cwd=benchmark_dir) + get_hints_path = tools_pgo_dir / "get_hints.py" + profile_path = tools_pgo_dir / f"{arch}.profile" + run([get_hints_path, log_path, profile_path]) diff --git a/tools/builtins-pgo/get_hints.py b/tools/builtins-pgo/get_hints.py index b717dc05ca..a91a75db44 100755 --- a/tools/builtins-pgo/get_hints.py +++ b/tools/builtins-pgo/get_hints.py @@ -20,8 +20,8 @@ condition is true or false. Usage: get_hints.py [--min MIN] [--ratio RATIO] log_file output_file where: - 1. log_file is the v8.log file produced after running v8 with the - --turbo-profiling-log-builtins flag after building with + 1. log_file is the file produced after running v8 with the + --turbo-profiling-output=log_file flag after building with v8_enable_builtins_profiling = true. 2. output_file is the file which the hints and builtin hashes are written to. @@ -37,7 +37,7 @@ import sys PARSER = argparse.ArgumentParser( description="A script that generates the branch hints for profile-guided \ - optimization", + optimization" , epilog="Example:\n\tget_hints.py --min n1 --ratio n2 branches_file log_file output_file\"" ) PARSER.add_argument( @@ -45,18 +45,18 @@ PARSER.add_argument( type=int, default=1000, help="The minimum count at which a basic block will be taken as a valid \ - destination of a hinted branch decision") + destination of a hinted branch decision" ) PARSER.add_argument( '--ratio', type=int, default=40, help="The ratio at which, when compared to the alternative destination's \ count,a branch destination's count is considered sufficient to \ - require a branch hint to be produced") + require a branch hint to be produced" ) PARSER.add_argument( 'log_file', help="The v8.log file produced after running v8 with the \ - --turbo-profiling-log-builtins flag after building with \ + --turbo-profiling-output=log_file flag after building with \ v8_enable_builtins_profiling = true") PARSER.add_argument( 'output_file', @@ -76,7 +76,7 @@ def parse_log_file(log_file): try: with open(log_file, "r") as f: for line in f.readlines(): - fields = line.split(',') + fields = line.split('\t') if fields[0] == BLOCK_COUNT_MARKER: builtin_name = fields[1] block_id = int(fields[2]) @@ -89,14 +89,20 @@ def parse_log_file(log_file): elif fields[0] == BUILTIN_HASH_MARKER: builtin_name = fields[1] builtin_hash = int(fields[2]) - builtin_hashes[builtin_name] = builtin_hash + if builtin_name in builtin_hashes: + old_hash = builtin_hashes[builtin_name] + assert old_hash == builtin_hash, ( + "Merged PGO file contains multiple incompatible builtin " + "versions: {old_hash} != {builtin_hash}") + else: + builtin_hashes[builtin_name] = builtin_hash elif fields[0] == BRANCH_HINT_MARKER: builtin_name = fields[1] true_block_id = int(fields[2]) false_block_id = int(fields[3]) branches.append((builtin_name, true_block_id, false_block_id)) except IOError as e: - print("Cannot read from {}. {}.".format(log_file, e.strerror)) + print(f"Cannot read from {log_file}. {e.strerror}.") sys.exit(1) return [block_counts, branches, builtin_hashes] @@ -134,7 +140,7 @@ def write_hints_to_output(output_file, branch_hints, builtin_hashes): f.write("{},{},{}\n".format(BUILTIN_HASH_MARKER, builtin_name, builtin_hashes[builtin_name])) except IOError as e: - print("Cannot write to {}. {}.".format(output_file, e.strerror)) + print(f"Cannot read from {output_file}. {e.strerror}.") sys.exit(1)