[foozzie] Compare baseline/default in every run

Previously we ran baseline (e.g. ignition) and one random secondary comparison configuration (e.g. turbofan) from the list of experiments. But Clusterfuzz imposes limitations on the total amount of fuzz tests. Therefore this change enables more throughput by always running the default configuration (ignition_turbofan like V8 is shipped) additionally to the baseline and the secondary configuration. This, hence, doubles the number of comparisons we run, with less than 50% additional runtime, since the slow baseline configuration is only run once. The experiments table is updated accordingly. Explicit entries running ignition_turbofan are removed (as it always runs now), instead some of the other configurations are increased in their relative percentage. We also get a few new configurations that didn't run before (e.g. forcing the slow path on x86). No-Try: true Bug: chromium:1100114 Change-Id: I69b2a41d78c06e556b309743a2aace1053c22f91 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2270307 Reviewed-by: Liviu Rau <liviurau@chromium.org> Commit-Queue: Michael Achenbach <machenbach@chromium.org> Cr-Commit-Position: refs/heads/master@{#68607}
2020-06-29 20:33:42 +02:00 · 2020-06-29 20:33:42 +02:00 · cca290945d
commit cca290945d
parent c1a57bc2d2
3 changed files with 56 additions and 46 deletions
--- a/tools/clusterfuzz/v8_foozzie.py
+++ b/tools/clusterfuzz/v8_foozzie.py
@ -208,11 +208,11 @@ class ExecutionArgumentsConfig(object):
        'default: bundled in the directory of this script',
        default=DEFAULT_D8)

-  def make_options(self, options):
+  def make_options(self, options, default_config=None):
    def get(name):
      return getattr(options, '%s_%s' % (self.label, name))

-    config = get('config')
+    config = default_config or get('config')
    assert config in CONFIGS

    d8 = get('d8')
@ -270,6 +270,8 @@ def parse_args():

  options.first = first_config_arguments.make_options(options)
  options.second = second_config_arguments.make_options(options)
+  options.default = second_config_arguments.make_options(
+      options, DEFAULT_CONFIG)

  # Ensure we make a valid comparison.
  if (options.first.d8 == options.second.d8 and
@ -367,14 +369,14 @@ def cluster_failures(source, known_failures=None):
  return long_key[:ORIGINAL_SOURCE_HASH_LENGTH]


-def run_comparisons(suppress, first_config, second_config, test_case, timeout,
+def run_comparisons(suppress, execution_configs, test_case, timeout,
                    verbose=True, ignore_crashes=True, source_key=None):
-  """Runs two configurations and bails out on output difference.
+  """Runs different configurations and bails out on output difference.

  Args:
    suppress: The helper object for textual suppressions.
-    first_config: The baseline configuration to run and compare.
-    second_config: The secondary configuration to run and compare.
+    execution_configs: Two or more configurations to run. The first one will be
+        used as baseline to compare all others to.
    test_case: The test case to run.
    timeout: Timeout in seconds for one run.
    verbose: Prints the executed commands.
@ -384,27 +386,33 @@ def run_comparisons(suppress, first_config, second_config, test_case, timeout,
    source_key: A fixed source key. If not given, it will be inferred from the
        output.
  """
-  first_config_output = first_config.command.run(
+  run_test_case = lambda config: config.command.run(
      test_case, timeout=timeout, verbose=verbose)

-  second_config_output = second_config.command.run(
-      test_case, timeout=timeout, verbose=verbose)
+  # Run the baseline configuration.
+  baseline_config = execution_configs[0]
+  baseline_output = run_test_case(baseline_config)
+  has_crashed = baseline_output.HasCrashed()

-  difference, source = suppress.diff(first_config_output, second_config_output)
+  # Iterate over the remaining configurations, run and compare.
+  for comparison_config in execution_configs[1:]:
+    comparison_output = run_test_case(comparison_config)
+    has_crashed = has_crashed or comparison_output.HasCrashed()
+    difference, source = suppress.diff(baseline_output, comparison_output)

-  if difference:
-    # Only bail out due to suppressed output if there was a difference. If a
-    # suppression doesn't show up anymore in the statistics, we might want to
-    # remove it.
-    fail_bailout(first_config_output, suppress.ignore_by_output)
-    fail_bailout(second_config_output, suppress.ignore_by_output)
+    if difference:
+      # Only bail out due to suppressed output if there was a difference. If a
+      # suppression doesn't show up anymore in the statistics, we might want to
+      # remove it.
+      fail_bailout(baseline_output, suppress.ignore_by_output)
+      fail_bailout(comparison_output, suppress.ignore_by_output)

-    source_key = source_key or cluster_failures(source)
-    raise FailException(format_difference(
-        source_key, first_config, second_config,
-        first_config_output, second_config_output, difference, source))
+      source_key = source_key or cluster_failures(source)
+      raise FailException(format_difference(
+          source_key, baseline_config, comparison_config,
+          baseline_output, comparison_output, difference, source))

-  if first_config_output.HasCrashed() or second_config_output.HasCrashed():
+  if has_crashed:
    if ignore_crashes:
      # Show if a crash has happened in one of the runs and no difference was
      # detected. This is only for the statistics during experiments.
@ -429,14 +437,20 @@ def main():
  content_bailout(get_meta_data(content), suppress.ignore_by_metadata)
  content_bailout(content, suppress.ignore_by_content)

-  first_config = ExecutionConfig(options, 'first')
-  second_config = ExecutionConfig(options, 'second')
+  # Prepare the baseline, default and a secondary configuration to compare to.
+  # The baseline (turbofan) takes precedence as many of the secondary configs
+  # are based on the turbofan config with additional parameters.
+  execution_configs = [
+    ExecutionConfig(options, 'first'),
+    ExecutionConfig(options, 'default'),
+    ExecutionConfig(options, 'second'),
+  ]

  # First, run some fixed smoke tests in all configs to ensure nothing
  # is fundamentally wrong, in order to prevent bug flooding.
  if not options.skip_sanity_checks:
    run_comparisons(
-        suppress, first_config, second_config,
+        suppress, execution_configs,
        test_case=SANITY_CHECKS,
        timeout=SANITY_CHECK_TIMEOUT_SEC,
        verbose=False,
@ -450,7 +464,7 @@ def main():

  # Second, run all configs against the fuzz test case.
  run_comparisons(
-      suppress, first_config, second_config,
+      suppress, execution_configs,
      test_case=options.testcase,
      timeout=TEST_TIMEOUT_SEC,
  )
--- a/tools/clusterfuzz/v8_foozzie_test.py
+++ b/tools/clusterfuzz/v8_foozzie_test.py
@ -49,10 +49,6 @@ class ConfigTest(unittest.TestCase):
    assert all(map(lambda x: x[2] in CONFIGS, EXPERIMENTS))
    # The last config item points to a known build configuration.
    assert all(map(lambda x: x[3] in KNOWN_BUILDS, EXPERIMENTS))
-    # Ensure we compare different configs and same d8, or same config
-    # to different d8.
-    is_valid_comparison = lambda x: (x[1] == x[2]) == ('d8' != x[3])
-    assert all(map(is_valid_comparison, EXPERIMENTS))
    # All flags have a probability.
    first_is_float = lambda x: type(x[0]) == float
    assert all(map(first_is_float, FLAGS))
@ -238,9 +234,10 @@ other weird stuff
    check('123', '45', True, True, '12', '45')


-def cut_verbose_output(stdout):
-  # This removes first lines containing d8 commands.
-  return '\n'.join(stdout.split('\n')[4:])
+def cut_verbose_output(stdout, n_comp):
+  # This removes the first lines containing d8 commands of `n_comp` comparison
+  # runs.
+  return '\n'.join(stdout.split('\n')[n_comp * 2:])


 def run_foozzie(second_d8_dir, *extra_flags, **kwargs):
@ -274,7 +271,8 @@ class SystemTest(unittest.TestCase):
  """
  def testSyntaxErrorDiffPass(self):
    stdout = run_foozzie('build1', '--skip-sanity-checks')
-    self.assertEqual('# V8 correctness - pass\n', cut_verbose_output(stdout))
+    self.assertEqual('# V8 correctness - pass\n',
+                     cut_verbose_output(stdout, 3))
    # Default comparison includes suppressions.
    self.assertIn('v8_suppressions.js', stdout)
    # Default comparison doesn't include any specific mock files.
@ -291,7 +289,7 @@ class SystemTest(unittest.TestCase):
                  '--second-config-extra-flags=--flag3')
    e = ctx.exception
    self.assertEqual(v8_foozzie.RETURN_FAIL, e.returncode)
-    self.assertEqual(expected_output, cut_verbose_output(e.output))
+    self.assertEqual(expected_output, cut_verbose_output(e.output, 2))

  def testSanityCheck(self):
    with open(os.path.join(TEST_DATA, 'sanity_check_output.txt')) as f:
--- a/tools/clusterfuzz/v8_fuzz_experiments.json
+++ b/tools/clusterfuzz/v8_fuzz_experiments.json
@ -1,17 +1,15 @@
 [
-  [10, "ignition", "jitless", "d8"],
+  [15, "ignition", "jitless", "d8"],
  [10, "ignition", "slow_path", "d8"],
+  [10, "ignition_no_ic", "slow_path", "d8"],
  [5, "ignition", "slow_path_opt", "d8"],
-  [25, "ignition", "ignition_turbo", "d8"],
-  [2, "ignition_no_ic", "ignition_turbo", "d8"],
-  [2, "ignition", "ignition_turbo_no_ic", "d8"],
-  [15, "ignition", "ignition_turbo_opt", "d8"],
+  [5, "ignition", "ignition_turbo_no_ic", "d8"],
+  [20, "ignition", "ignition_turbo_opt", "d8"],
  [5, "ignition_no_ic", "ignition_turbo_opt", "d8"],
-  [3, "ignition_turbo_opt", "ignition_turbo_opt", "clang_x64_pointer_compression/d8"],
-  [3, "ignition_turbo", "ignition_turbo", "clang_x64_pointer_compression/d8"],
-  [4, "ignition", "ignition", "clang_x86/d8"],
-  [4, "ignition_turbo", "ignition_turbo", "clang_x86/d8"],
-  [4, "ignition_turbo_opt", "ignition_turbo_opt", "clang_x86/d8"],
-  [4, "ignition_turbo", "ignition_turbo", "clang_x64_v8_arm64/d8"],
-  [4, "ignition_turbo", "ignition_turbo", "clang_x86_v8_arm/d8"]
+  [5, "ignition_turbo", "ignition_turbo_opt", "clang_x64_pointer_compression/d8"],
+  [5, "ignition", "ignition", "clang_x86/d8"],
+  [5, "ignition", "ignition_turbo_opt", "clang_x86/d8"],
+  [5, "ignition", "slow_path", "clang_x86/d8"],
+  [5, "ignition", "ignition_turbo_opt", "clang_x64_v8_arm64/d8"],
+  [5, "ignition", "ignition_turbo_opt", "clang_x86_v8_arm/d8"]
 ]