[wasm] Tweak constants for estimating code space size

It turned out that on arm and arm64 we over-estimated the code size of a Wasm module quite a bit. This CL adds some more output for the --trace-wasm-compilation-times flag, and adds a script to compute the factors we use for code size estimates from that output. I ran the script on a few benchmarks (an older Epic module, the current Photoshop module, and the benchmark from the linked bug), and adjusted the constants accordingly. Also, simplify the API of {ReservationSize} to only return a single number, and fail internally if we need to allocate more than the engine supports (which would only fail for artificially large modules). R=jkummerow@chromium.org Bug: chromium:1302310 Change-Id: I5b2c27ff3e360fb6738cf5dd697bcee09e106b6d Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3522067 Reviewed-by: Jakob Kummerow <jkummerow@chromium.org> Reviewed-by: Maya Lekova <mslekova@chromium.org> Commit-Queue: Clemens Backes <clemensb@chromium.org> Cr-Commit-Position: refs/heads/main@{#79482}
2022-03-15 14:27:58 +01:00 · 2022-03-15 14:27:58 +01:00 · 4e983705e5
commit 4e983705e5
parent 7ff9683243
5 changed files with 160 additions and 66 deletions
--- a/src/compiler/pipeline.cc
+++ b/src/compiler/pipeline.cc
@ -3217,8 +3217,10 @@ void Pipeline::GenerateCodeForWasmFunction(
                   << time.InMilliseconds() << " ms and "
                   << zone_stats.GetMaxAllocatedBytes() << " / "
                   << zone_stats.GetTotalAllocatedBytes()
-                   << " max/total bytes, codesize " << codesize << " name "
-                   << data.info()->GetDebugName().get() << std::endl;
+                   << " max/total bytes; bodysize "
+                   << function_body.end - function_body.start << " codesize "
+                   << codesize << " name " << data.info()->GetDebugName().get()
+                   << std::endl;
  }

  DCHECK(result->succeeded());
--- a/src/compiler/wasm-compiler.cc
+++ b/src/compiler/wasm-compiler.cc
@ -8064,6 +8064,11 @@ wasm::WasmCompilationResult CompileWasmImportCallWrapper(

  TRACE_EVENT0(TRACE_DISABLED_BY_DEFAULT("v8.wasm.detailed"),
               "wasm.CompileWasmImportCallWrapper");
+  base::TimeTicks start_time;
+  if (V8_UNLIKELY(FLAG_trace_wasm_compilation_times)) {
+    start_time = base::TimeTicks::Now();
+  }
+
  //----------------------------------------------------------------------------
  // Create the Graph
  //----------------------------------------------------------------------------
@ -8099,9 +8104,19 @@ wasm::WasmCompilationResult CompileWasmImportCallWrapper(
  if (machine->Is32()) {
    incoming = GetI32WasmCallDescriptor(&zone, incoming);
  }
-  return Pipeline::GenerateCodeForWasmNativeStub(
+  wasm::WasmCompilationResult result = Pipeline::GenerateCodeForWasmNativeStub(
      incoming, mcgraph, CodeKind::WASM_TO_JS_FUNCTION, func_name,
      WasmStubAssemblerOptions(), source_position_table);
+
+  if (V8_UNLIKELY(FLAG_trace_wasm_compilation_times)) {
+    base::TimeDelta time = base::TimeTicks::Now() - start_time;
+    int codesize = result.code_desc.body_size();
+    StdoutStream{} << "Compiled WasmToJS wrapper " << func_name << ", took "
+                   << time.InMilliseconds() << " ms; codesize " << codesize
+                   << std::endl;
+  }
+
+  return result;
 }

 wasm::WasmCode* CompileWasmCapiCallWrapper(wasm::NativeModule* native_module,
--- a/src/wasm/wasm-code-manager.cc
+++ b/src/wasm/wasm-code-manager.cc
@ -599,28 +599,39 @@ size_t OverheadPerCodeSpace(uint32_t num_declared_functions) {
  return overhead;
 }

-// Returns both the minimum size to reserve, and an estimate how much should be
-// reserved.
-std::pair<size_t, size_t> ReservationSize(size_t code_size_estimate,
-                                          int num_declared_functions,
-                                          size_t total_reserved) {
+// Returns an estimate how much code space should be reserved.
+size_t ReservationSize(size_t code_size_estimate, int num_declared_functions,
+                       size_t total_reserved) {
  size_t overhead = OverheadPerCodeSpace(num_declared_functions);

-  // Reserve a power of two at least as big as any of
+  // Reserve the maximum of
  //   a) needed size + overhead (this is the minimum needed)
  //   b) 2 * overhead (to not waste too much space by overhead)
  //   c) 1/4 of current total reservation size (to grow exponentially)
  size_t minimum_size = 2 * overhead;
-  size_t suggested_size = base::bits::RoundUpToPowerOfTwo(
+  size_t suggested_size =
      std::max(std::max(RoundUp<kCodeAlignment>(code_size_estimate) + overhead,
                        minimum_size),
-               total_reserved / 4));
+               total_reserved / 4);
+
+  if (V8_UNLIKELY(minimum_size > WasmCodeAllocator::kMaxCodeSpaceSize)) {
+    constexpr auto format = base::StaticCharVector(
+        "wasm code reservation: required minimum (%zu) is bigger than "
+        "supported maximum (%zu)");
+    constexpr int kMaxMessageLength =
+        format.size() - 6 + 2 * std::numeric_limits<size_t>::digits10;
+    base::EmbeddedVector<char, kMaxMessageLength + 1> message;
+    SNPrintF(message, format.begin(), minimum_size,
+             WasmCodeAllocator::kMaxCodeSpaceSize);
+    V8::FatalProcessOutOfMemory(nullptr, message.begin());
+    UNREACHABLE();
+  }

  // Limit by the maximum supported code space size.
  size_t reserve_size =
      std::min(WasmCodeAllocator::kMaxCodeSpaceSize, suggested_size);

-  return {minimum_size, reserve_size};
+  return reserve_size;
 }

 #ifdef DEBUG
@ -709,14 +720,18 @@ base::Vector<byte> WasmCodeAllocator::AllocateForCodeInRegion(

    size_t total_reserved = 0;
    for (auto& vmem : owned_code_space_) total_reserved += vmem.size();
-    size_t min_reservation;
-    size_t reserve_size;
-    std::tie(min_reservation, reserve_size) = ReservationSize(
+    size_t reserve_size = ReservationSize(
        size, native_module->module()->num_declared_functions, total_reserved);
    VirtualMemory new_mem =
        code_manager->TryAllocate(reserve_size, reinterpret_cast<void*>(hint));
-    if (!new_mem.IsReserved() || new_mem.size() < min_reservation) {
-      V8::FatalProcessOutOfMemory(nullptr, "wasm code reservation");
+    if (!new_mem.IsReserved()) {
+      constexpr auto format = base::StaticCharVector(
+          "Cannot allocate more code space (%zu bytes, currently %zu)");
+      constexpr int kMaxMessageLength =
+          format.size() - 6 + 2 * std::numeric_limits<size_t>::digits10;
+      base::EmbeddedVector<char, kMaxMessageLength + 1> message;
+      SNPrintF(message, format.begin(), total_reserved, reserve_size);
+      V8::FatalProcessOutOfMemory(nullptr, message.begin());
      UNREACHABLE();
    }

@ -2000,47 +2015,43 @@ namespace {
 // separate code spaces being allocated (compile time and runtime overhead),
 // choosing them too large results in over-reservation (virtual address space
 // only).
-// The current numbers have been determined on 2019-11-11 by clemensb@, based
-// on one small and one large module compiled from C++ by Emscripten. If in
-// doubt, they where chosen slightly larger than required, as over-reservation
-// is not a big issue currently.
-// Numbers will change when Liftoff or TurboFan evolve, other toolchains are
-// used to produce the wasm code, or characteristics of wasm modules on the
-// web change. They might require occasional tuning.
-// This patch might help to find reasonable numbers for any future adaptation:
-// https://crrev.com/c/1910945
+// In doubt, choose the numbers slightly too large, because over-reservation is
+// less critical than multiple separate code spaces (especially on 64-bit).
+// Numbers can be determined by running benchmarks with
+// --trace-wasm-compilation-times, and piping the output through
+// tools/wasm/code-size-factors.py.
 #if V8_TARGET_ARCH_X64
-constexpr size_t kTurbofanFunctionOverhead = 20;
+constexpr size_t kTurbofanFunctionOverhead = 24;
 constexpr size_t kTurbofanCodeSizeMultiplier = 3;
-constexpr size_t kLiftoffFunctionOverhead = 60;
+constexpr size_t kLiftoffFunctionOverhead = 56;
 constexpr size_t kLiftoffCodeSizeMultiplier = 4;
-constexpr size_t kImportSize = 350;
+constexpr size_t kImportSize = 640;
 #elif V8_TARGET_ARCH_IA32
 constexpr size_t kTurbofanFunctionOverhead = 20;
 constexpr size_t kTurbofanCodeSizeMultiplier = 4;
-constexpr size_t kLiftoffFunctionOverhead = 60;
+constexpr size_t kLiftoffFunctionOverhead = 48;
 constexpr size_t kLiftoffCodeSizeMultiplier = 5;
-constexpr size_t kImportSize = 480;
+constexpr size_t kImportSize = 320;
 #elif V8_TARGET_ARCH_ARM
-constexpr size_t kTurbofanFunctionOverhead = 40;
+constexpr size_t kTurbofanFunctionOverhead = 44;
 constexpr size_t kTurbofanCodeSizeMultiplier = 4;
-constexpr size_t kLiftoffFunctionOverhead = 108;
-constexpr size_t kLiftoffCodeSizeMultiplier = 7;
-constexpr size_t kImportSize = 750;
+constexpr size_t kLiftoffFunctionOverhead = 96;
+constexpr size_t kLiftoffCodeSizeMultiplier = 5;
+constexpr size_t kImportSize = 550;
 #elif V8_TARGET_ARCH_ARM64
-constexpr size_t kTurbofanFunctionOverhead = 60;
-constexpr size_t kTurbofanCodeSizeMultiplier = 4;
-constexpr size_t kLiftoffFunctionOverhead = 80;
-constexpr size_t kLiftoffCodeSizeMultiplier = 7;
+constexpr size_t kTurbofanFunctionOverhead = 40;
+constexpr size_t kTurbofanCodeSizeMultiplier = 3;
+constexpr size_t kLiftoffFunctionOverhead = 68;
+constexpr size_t kLiftoffCodeSizeMultiplier = 4;
 constexpr size_t kImportSize = 750;
 #else
-// Other platforms should add their own estimates if needed. Numbers below are
-// the minimum of other architectures.
-constexpr size_t kTurbofanFunctionOverhead = 20;
-constexpr size_t kTurbofanCodeSizeMultiplier = 3;
-constexpr size_t kLiftoffFunctionOverhead = 60;
-constexpr size_t kLiftoffCodeSizeMultiplier = 4;
-constexpr size_t kImportSize = 350;
+// Other platforms should add their own estimates for best performance. Numbers
+// below are the maximum of other architectures.
+constexpr size_t kTurbofanFunctionOverhead = 44;
+constexpr size_t kTurbofanCodeSizeMultiplier = 4;
+constexpr size_t kLiftoffFunctionOverhead = 96;
+constexpr size_t kLiftoffCodeSizeMultiplier = 5;
+constexpr size_t kImportSize = 750;
 #endif
 }  // namespace

@ -2179,9 +2190,7 @@ std::shared_ptr<NativeModule> WasmCodeManager::NewNativeModule(
        committed + (max_committed_code_space_ - committed) / 2);
  }

-  size_t min_code_size;
-  size_t code_vmem_size;
-  std::tie(min_code_size, code_vmem_size) =
+  size_t code_vmem_size =
      ReservationSize(code_size_estimate, module->num_declared_functions, 0);

  // The '--wasm-max-initial-code-space-reservation' testing flag can be used to
@ -2192,18 +2201,6 @@ std::shared_ptr<NativeModule> WasmCodeManager::NewNativeModule(
    if (flag_max_bytes < code_vmem_size) code_vmem_size = flag_max_bytes;
  }

-  // If we cannot allocate enough code space, fail with an OOM message.
-  if (code_vmem_size < min_code_size) {
-    constexpr auto format = base::StaticCharVector(
-        "NewNativeModule cannot allocate required minimum (%zu)");
-    constexpr int kMaxMessageLength =
-        format.size() - 3 + std::numeric_limits<size_t>::digits10;
-    base::EmbeddedVector<char, kMaxMessageLength + 1> message;
-    SNPrintF(message, format.begin(), min_code_size);
-    V8::FatalProcessOutOfMemory(isolate, message.begin());
-    UNREACHABLE();
-  }
-
  // Try up to two times; getting rid of dead JSArrayBuffer allocations might
  // require two GCs because the first GC maybe incremental and may have
  // floating garbage.
--- a/tools/process-wasm-compilation-times.py
+++ b/tools/process-wasm-compilation-times.py
@ -70,13 +70,13 @@ class Function:
    self.has_tf = True
    # 0        1        2  3     4         5    6 7  8   9     10 11
    # Compiled function #6 using TurboFan, took 0 ms and 14440 / 44656
-    # 12        13     14       15 16   17
-    # max/total bytes, codesize 24 name wasm-function#6
+    # 12        13     14       15 16       17 18   19
+    # max/total bytes; bodysize 12 codesize 24 name wasm-function#6
    self.time_tf = int(words[6])
    self.mem_tf_max = int(words[9])
    self.mem_tf_total = int(words[11])
-    self.size_tf = int(words[15])
-    self.name = words[17]
+    self.size_tf = int(words[17])
+    self.name = words[19]

  def AddLiftoffLine(self, words):
    assert self.index == words[2], "wrong function"
@ -109,7 +109,8 @@ if len(sys.argv) < 2 or sys.argv[1] in ("-h", "--help", "help"):
 with open(sys.argv[1], "r") as f:
  for line in f.readlines():
    words = line.strip().split(" ")
-    if words[0] != "Compiled": continue
+    if words[0] != "Compiled" or words[1] != "function":
+      continue
    name = words[2]
    RegisterName(name)
    if name in funcs_dict:
--- a/tools/wasm/code-size-factors.py
+++ b/tools/wasm/code-size-factors.py
@ -0,0 +1,79 @@
+#!/usr/bin/env python3
+# vim:fenc=utf-8:ts=2:sw=2:softtabstop=2:expandtab:
+# Copyright 2022 the V8 project authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import sys
+import re
+
+liftoff_regex = re.compile('^Compiled function .* using Liftoff, '
+                           '.*bodysize ([0-9]+) codesize ([0-9]+)$')
+turbofan_regex = re.compile('^Compiled function .* using TurboFan, '
+                            '.*bodysize ([0-9]+) codesize ([0-9]+) ')
+wasm2js_regex = re.compile('^Compiled WasmToJS wrapper .* '
+                           'codesize ([0-9]+)$')
+
+
+def main():
+  print('Reading --trace-wasm-compilation-times lines from stdin...')
+  liftoff_values = []
+  turbofan_values = []
+  wasm2js_values = []
+  for line in sys.stdin:
+    match(line, liftoff_regex, liftoff_values)
+    match(line, turbofan_regex, turbofan_values)
+    match_wasm2js(line, wasm2js_values)
+
+  evaluate('Liftoff', liftoff_values)
+  evaluate('TurboFan', turbofan_values)
+  evaluate_wasm2js(wasm2js_values)
+
+
+def match(line, regex, array):
+  m = regex.match(line)
+  if m:
+    array.append([int(m.group(1)), int(m.group(2))])
+
+
+def match_wasm2js(line, array):
+  m = wasm2js_regex.match(line)
+  if m:
+    array.append(int(m.group(1)))
+
+
+def evaluate(name, values):
+  n = len(values)
+  if n == 0:
+    print(f'No values for {name}')
+    return
+
+  print(f'Computing base and factor for {name} based on {n} values')
+  sum_xy = sum(x * y for [x, y] in values)
+  sum_x = sum(x for [x, y] in values)
+  sum_y = sum(y for [x, y] in values)
+  sum_xx = sum(x * x for [x, y] in values)
+
+  factor = (n * sum_xy - sum_x * sum_y) / (n * sum_xx - sum_x * sum_x)
+  base = (sum_y - factor * sum_x) / n
+
+  print(f'--> [{name}] Trend line: base: {base:.2f}, factor {factor:.2f}')
+
+  min_y = min(y for [x, y] in values)
+
+  simple_factor = (sum_y - n * min_y) / sum_x
+  print(f'--> [{name}] Simple analysis: Min {min_y}, '
+        f'factor {simple_factor:.2f}')
+
+
+def evaluate_wasm2js(values):
+  n = len(values)
+  if n == 0:
+    print('No wasm2js wrappers')
+    return
+
+  print(f'--> [Wasm2js wrappers] {n} compiled, size min {min(values)}, '
+        f'max {max(values)}, avg {(sum(values) / n):.2f}')
+
+
+main()