f16da87de2
This adds the option to list disallowed flags for differential fuzzing directly in the harness. Flags that can crash in smoke tests shoule be added there. No-Try: true Bug: chromium:1240812 Change-Id: I57c772bedeac0ca6ba023c6b4929515b4b0e6cca Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3103314 Auto-Submit: Michael Achenbach <machenbach@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Reviewed-by: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/main@{#76369}
565 lines
18 KiB
Python
Executable File
565 lines
18 KiB
Python
Executable File
#!/usr/bin/env python
|
|
# Copyright 2016 the V8 project authors. All rights reserved.
|
|
# Use of this source code is governed by a BSD-style license that can be
|
|
# found in the LICENSE file.
|
|
|
|
"""
|
|
V8 correctness fuzzer launcher script.
|
|
"""
|
|
|
|
# for py2/py3 compatibility
|
|
from __future__ import print_function
|
|
|
|
import argparse
|
|
import hashlib
|
|
import itertools
|
|
import json
|
|
import os
|
|
import random
|
|
import re
|
|
import sys
|
|
import traceback
|
|
|
|
from collections import namedtuple
|
|
|
|
from v8_commands import Command, FailException, PassException
|
|
import v8_suppressions
|
|
|
|
PYTHON3 = sys.version_info >= (3, 0)
|
|
|
|
CONFIGS = dict(
|
|
default=[],
|
|
ignition=[
|
|
'--turbo-filter=~',
|
|
'--noopt',
|
|
'--liftoff',
|
|
'--no-wasm-tier-up',
|
|
],
|
|
ignition_asm=[
|
|
'--turbo-filter=~',
|
|
'--noopt',
|
|
'--validate-asm',
|
|
'--stress-validate-asm',
|
|
],
|
|
ignition_eager=[
|
|
'--turbo-filter=~',
|
|
'--noopt',
|
|
'--no-lazy',
|
|
'--no-lazy-inner-functions',
|
|
],
|
|
ignition_no_ic=[
|
|
'--turbo-filter=~',
|
|
'--noopt',
|
|
'--liftoff',
|
|
'--no-wasm-tier-up',
|
|
'--no-use-ic',
|
|
'--no-lazy-feedback-allocation',
|
|
],
|
|
ignition_turbo=[],
|
|
ignition_turbo_no_ic=[
|
|
'--no-use-ic',
|
|
],
|
|
ignition_turbo_opt=[
|
|
'--always-opt',
|
|
'--no-liftoff',
|
|
],
|
|
ignition_turbo_opt_eager=[
|
|
'--always-opt',
|
|
'--no-lazy',
|
|
'--no-lazy-inner-functions',
|
|
],
|
|
jitless=[
|
|
'--jitless',
|
|
],
|
|
slow_path=[
|
|
'--force-slow-path',
|
|
],
|
|
slow_path_opt=[
|
|
'--always-opt',
|
|
'--force-slow-path',
|
|
],
|
|
)
|
|
|
|
BASELINE_CONFIG = 'ignition'
|
|
DEFAULT_CONFIG = 'ignition_turbo'
|
|
DEFAULT_D8 = 'd8'
|
|
|
|
# Return codes.
|
|
RETURN_PASS = 0
|
|
RETURN_FAIL = 2
|
|
|
|
BASE_PATH = os.path.dirname(os.path.abspath(__file__))
|
|
SMOKE_TESTS = os.path.join(BASE_PATH, 'v8_smoke_tests.js')
|
|
|
|
# Timeout for one d8 run.
|
|
SMOKE_TEST_TIMEOUT_SEC = 1
|
|
TEST_TIMEOUT_SEC = 3
|
|
|
|
SUPPORTED_ARCHS = ['ia32', 'x64', 'arm', 'arm64']
|
|
|
|
# Output for suppressed failure case.
|
|
FAILURE_HEADER_TEMPLATE = """#
|
|
# V8 correctness failure
|
|
# V8 correctness configs: %(configs)s
|
|
# V8 correctness sources: %(source_key)s
|
|
# V8 correctness suppression: %(suppression)s
|
|
"""
|
|
|
|
# Extended output for failure case. The 'CHECK' is for the minimizer.
|
|
FAILURE_TEMPLATE = FAILURE_HEADER_TEMPLATE + """#
|
|
# CHECK
|
|
#
|
|
# Compared %(first_config_label)s with %(second_config_label)s
|
|
#
|
|
# Flags of %(first_config_label)s:
|
|
%(first_config_flags)s
|
|
# Flags of %(second_config_label)s:
|
|
%(second_config_flags)s
|
|
#
|
|
# Difference:
|
|
%(difference)s%(source_file_text)s
|
|
#
|
|
### Start of configuration %(first_config_label)s:
|
|
%(first_config_output)s
|
|
### End of configuration %(first_config_label)s
|
|
#
|
|
### Start of configuration %(second_config_label)s:
|
|
%(second_config_output)s
|
|
### End of configuration %(second_config_label)s
|
|
"""
|
|
|
|
SOURCE_FILE_TEMPLATE = """
|
|
#
|
|
# Source file:
|
|
%s"""
|
|
|
|
|
|
FUZZ_TEST_RE = re.compile(r'.*fuzz(-\d+\.js)')
|
|
SOURCE_RE = re.compile(r'print\("v8-foozzie source: (.*)"\);')
|
|
|
|
# The number of hex digits used from the hash of the original source file path.
|
|
# Keep the number small to avoid duplicate explosion.
|
|
ORIGINAL_SOURCE_HASH_LENGTH = 3
|
|
|
|
# Placeholder string if no original source file could be determined.
|
|
ORIGINAL_SOURCE_DEFAULT = 'none'
|
|
|
|
# Placeholder string for failures from crash tests. If a failure is found with
|
|
# this signature, the matching sources should be moved to the mapping below.
|
|
ORIGINAL_SOURCE_CRASHTESTS = 'placeholder for CrashTests'
|
|
|
|
# Mapping from relative original source path (e.g. CrashTests/path/to/file.js)
|
|
# to a string key. Map to the same key for duplicate issues. The key should
|
|
# have more than 3 characters to not collide with other existing hashes.
|
|
# If a symptom from a particular original source file is known to map to a
|
|
# known failure, it can be added to this mapping. This should be done for all
|
|
# failures from CrashTests, as those by default map to the placeholder above.
|
|
KNOWN_FAILURES = {
|
|
# Foo.caller with asm.js: https://crbug.com/1042556
|
|
'CrashTests/4782147262545920/494.js': '.caller',
|
|
'CrashTests/5637524389167104/01457.js': '.caller',
|
|
'CrashTests/5703451898085376/02176.js': '.caller',
|
|
'CrashTests/4846282433495040/04342.js': '.caller',
|
|
'CrashTests/5712410200899584/04483.js': '.caller',
|
|
'v8/test/mjsunit/regress/regress-105.js': '.caller',
|
|
# Flaky issue that almost never repros.
|
|
'CrashTests/5694376231632896/1033966.js': 'flaky',
|
|
}
|
|
|
|
# Flags that are already crashy during smoke tests should not be used.
|
|
DISALLOWED_FLAGS = [
|
|
'--gdbjit',
|
|
]
|
|
|
|
|
|
def filter_flags(flags):
|
|
return [flag for flag in flags if flag not in DISALLOWED_FLAGS]
|
|
|
|
|
|
def infer_arch(d8):
|
|
"""Infer the V8 architecture from the build configuration next to the
|
|
executable.
|
|
"""
|
|
with open(os.path.join(os.path.dirname(d8), 'v8_build_config.json')) as f:
|
|
arch = json.load(f)['v8_current_cpu']
|
|
arch = 'ia32' if arch == 'x86' else arch
|
|
assert arch in SUPPORTED_ARCHS
|
|
return arch
|
|
|
|
|
|
class ExecutionArgumentsConfig(object):
|
|
def __init__(self, label):
|
|
self.label = label
|
|
|
|
def add_arguments(self, parser, default_config):
|
|
def add_argument(flag_template, help_template, **kwargs):
|
|
parser.add_argument(
|
|
flag_template % self.label,
|
|
help=help_template % self.label,
|
|
**kwargs)
|
|
|
|
add_argument(
|
|
'--%s-config',
|
|
'%s configuration',
|
|
default=default_config)
|
|
add_argument(
|
|
'--%s-config-extra-flags',
|
|
'additional flags passed to the %s run',
|
|
action='append',
|
|
default=[])
|
|
add_argument(
|
|
'--%s-d8',
|
|
'optional path to %s d8 executable, '
|
|
'default: bundled in the directory of this script',
|
|
default=DEFAULT_D8)
|
|
|
|
def make_options(self, options, default_config=None, default_d8=None):
|
|
def get(name):
|
|
return getattr(options, '%s_%s' % (self.label, name))
|
|
|
|
config = default_config or get('config')
|
|
assert config in CONFIGS
|
|
|
|
d8 = default_d8 or get('d8')
|
|
if not os.path.isabs(d8):
|
|
d8 = os.path.join(BASE_PATH, d8)
|
|
assert os.path.exists(d8)
|
|
|
|
flags = CONFIGS[config] + filter_flags(get('config_extra_flags'))
|
|
|
|
RunOptions = namedtuple('RunOptions', ['arch', 'config', 'd8', 'flags'])
|
|
return RunOptions(infer_arch(d8), config, d8, flags)
|
|
|
|
|
|
class ExecutionConfig(object):
|
|
def __init__(self, options, label):
|
|
self.options = options
|
|
self.label = label
|
|
self.arch = getattr(options, label).arch
|
|
self.config = getattr(options, label).config
|
|
d8 = getattr(options, label).d8
|
|
flags = getattr(options, label).flags
|
|
self.command = Command(options, label, d8, flags)
|
|
|
|
# Options for a fallback configuration only exist when comparing
|
|
# different architectures.
|
|
fallback_label = label + '_fallback'
|
|
self.fallback = None
|
|
if getattr(options, fallback_label, None):
|
|
self.fallback = ExecutionConfig(options, fallback_label)
|
|
|
|
@property
|
|
def flags(self):
|
|
return self.command.flags
|
|
|
|
@property
|
|
def is_error_simulation(self):
|
|
return '--simulate-errors' in self.flags
|
|
|
|
|
|
def parse_args():
|
|
first_config_arguments = ExecutionArgumentsConfig('first')
|
|
second_config_arguments = ExecutionArgumentsConfig('second')
|
|
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument(
|
|
'--random-seed', type=int, required=True,
|
|
help='random seed passed to both runs')
|
|
parser.add_argument(
|
|
'--skip-smoke-tests', default=False, action='store_true',
|
|
help='skip smoke tests for testing purposes')
|
|
parser.add_argument(
|
|
'--skip-suppressions', default=False, action='store_true',
|
|
help='skip suppressions to reproduce known issues')
|
|
|
|
# Add arguments for each run configuration.
|
|
first_config_arguments.add_arguments(parser, BASELINE_CONFIG)
|
|
second_config_arguments.add_arguments(parser, DEFAULT_CONFIG)
|
|
|
|
parser.add_argument('testcase', help='path to test case')
|
|
options = parser.parse_args()
|
|
|
|
# Ensure we have a test case.
|
|
assert (os.path.exists(options.testcase) and
|
|
os.path.isfile(options.testcase)), (
|
|
'Test case %s doesn\'t exist' % options.testcase)
|
|
|
|
options.first = first_config_arguments.make_options(options)
|
|
options.second = second_config_arguments.make_options(options)
|
|
options.default = second_config_arguments.make_options(
|
|
options, default_config=DEFAULT_CONFIG)
|
|
|
|
# Use fallback configurations only on diffrent architectures. In this
|
|
# case we are going to re-test against the first architecture.
|
|
if options.first.arch != options.second.arch:
|
|
options.second_fallback = second_config_arguments.make_options(
|
|
options, default_d8=options.first.d8)
|
|
options.default_fallback = second_config_arguments.make_options(
|
|
options, default_config=DEFAULT_CONFIG, default_d8=options.first.d8)
|
|
|
|
# Ensure we make a valid comparison.
|
|
if (options.first.d8 == options.second.d8 and
|
|
options.first.config == options.second.config):
|
|
parser.error('Need either executable or config difference.')
|
|
|
|
return options
|
|
|
|
|
|
def get_meta_data(content):
|
|
"""Extracts original-source-file paths from test case content."""
|
|
sources = []
|
|
for line in content.splitlines():
|
|
match = SOURCE_RE.match(line)
|
|
if match:
|
|
sources.append(match.group(1))
|
|
return {'sources': sources}
|
|
|
|
|
|
def content_bailout(content, ignore_fun):
|
|
"""Print failure state and return if ignore_fun matches content."""
|
|
bug = (ignore_fun(content) or '').strip()
|
|
if bug:
|
|
raise FailException(FAILURE_HEADER_TEMPLATE % dict(
|
|
configs='', source_key='', suppression=bug))
|
|
|
|
|
|
def fail_bailout(output, ignore_by_output_fun):
|
|
"""Print failure state and return if ignore_by_output_fun matches output."""
|
|
bug = (ignore_by_output_fun(output.stdout) or '').strip()
|
|
if bug:
|
|
raise FailException(FAILURE_HEADER_TEMPLATE % dict(
|
|
configs='', source_key='', suppression=bug))
|
|
|
|
|
|
def format_difference(
|
|
first_config, second_config,
|
|
first_config_output, second_config_output,
|
|
difference, source_key=None, source=None):
|
|
# The first three entries will be parsed by clusterfuzz. Format changes
|
|
# will require changes on the clusterfuzz side.
|
|
source_key = source_key or cluster_failures(source)
|
|
first_config_label = '%s,%s' % (first_config.arch, first_config.config)
|
|
second_config_label = '%s,%s' % (second_config.arch, second_config.config)
|
|
source_file_text = SOURCE_FILE_TEMPLATE % source if source else ''
|
|
|
|
if PYTHON3:
|
|
first_stdout = first_config_output.stdout
|
|
second_stdout = second_config_output.stdout
|
|
else:
|
|
first_stdout = first_config_output.stdout.decode('utf-8', 'replace')
|
|
second_stdout = second_config_output.stdout.decode('utf-8', 'replace')
|
|
difference = difference.decode('utf-8', 'replace')
|
|
|
|
text = (FAILURE_TEMPLATE % dict(
|
|
configs='%s:%s' % (first_config_label, second_config_label),
|
|
source_file_text=source_file_text,
|
|
source_key=source_key,
|
|
suppression='', # We can't tie bugs to differences.
|
|
first_config_label=first_config_label,
|
|
second_config_label=second_config_label,
|
|
first_config_flags=' '.join(first_config.flags),
|
|
second_config_flags=' '.join(second_config.flags),
|
|
first_config_output=first_stdout,
|
|
second_config_output=second_stdout,
|
|
source=source,
|
|
difference=difference,
|
|
))
|
|
if PYTHON3:
|
|
return text
|
|
else:
|
|
return text.encode('utf-8', 'replace')
|
|
|
|
|
|
def cluster_failures(source, known_failures=None):
|
|
"""Returns a string key for clustering duplicate failures.
|
|
|
|
Args:
|
|
source: The original source path where the failure happened.
|
|
known_failures: Mapping from original source path to failure key.
|
|
"""
|
|
known_failures = known_failures or KNOWN_FAILURES
|
|
# No source known. Typical for manually uploaded issues. This
|
|
# requires also manual issue creation.
|
|
if not source:
|
|
return ORIGINAL_SOURCE_DEFAULT
|
|
# Source is known to produce a particular failure.
|
|
if source in known_failures:
|
|
return known_failures[source]
|
|
# Subsume all other sources from CrashTests under one key. Otherwise
|
|
# failures lead to new crash tests which in turn lead to new failures.
|
|
if source.startswith('CrashTests'):
|
|
return ORIGINAL_SOURCE_CRASHTESTS
|
|
|
|
# We map all remaining failures to a short hash of the original source.
|
|
long_key = hashlib.sha1(source.encode('utf-8')).hexdigest()
|
|
return long_key[:ORIGINAL_SOURCE_HASH_LENGTH]
|
|
|
|
|
|
class RepeatedRuns(object):
|
|
"""Helper class for storing statistical data from repeated runs."""
|
|
def __init__(self, test_case, timeout, verbose):
|
|
self.test_case = test_case
|
|
self.timeout = timeout
|
|
self.verbose = verbose
|
|
|
|
# Stores if any run has crashed or was simulated.
|
|
self.has_crashed = False
|
|
self.simulated = False
|
|
|
|
def run(self, config):
|
|
comparison_output = config.command.run(
|
|
self.test_case, timeout=self.timeout, verbose=self.verbose)
|
|
self.has_crashed = self.has_crashed or comparison_output.HasCrashed()
|
|
self.simulated = self.simulated or config.is_error_simulation
|
|
return comparison_output
|
|
|
|
@property
|
|
def crash_state(self):
|
|
return '_simulated_crash_' if self.simulated else '_unexpected_crash_'
|
|
|
|
|
|
def run_comparisons(suppress, execution_configs, test_case, timeout,
|
|
verbose=True, ignore_crashes=True, source_key=None):
|
|
"""Runs different configurations and bails out on output difference.
|
|
|
|
Args:
|
|
suppress: The helper object for textual suppressions.
|
|
execution_configs: Two or more configurations to run. The first one will be
|
|
used as baseline to compare all others to.
|
|
test_case: The test case to run.
|
|
timeout: Timeout in seconds for one run.
|
|
verbose: Prints the executed commands.
|
|
ignore_crashes: Typically we ignore crashes during fuzzing as they are
|
|
frequent. However, when running smoke tests we should not crash
|
|
and immediately flag crashes as a failure.
|
|
source_key: A fixed source key. If not given, it will be inferred from the
|
|
output.
|
|
"""
|
|
runner = RepeatedRuns(test_case, timeout, verbose)
|
|
|
|
# Run the baseline configuration.
|
|
baseline_config = execution_configs[0]
|
|
baseline_output = runner.run(baseline_config)
|
|
|
|
# Iterate over the remaining configurations, run and compare.
|
|
for comparison_config in execution_configs[1:]:
|
|
comparison_output = runner.run(comparison_config)
|
|
difference, source = suppress.diff(baseline_output, comparison_output)
|
|
|
|
if difference:
|
|
# Only bail out due to suppressed output if there was a difference. If a
|
|
# suppression doesn't show up anymore in the statistics, we might want to
|
|
# remove it.
|
|
fail_bailout(baseline_output, suppress.ignore_by_output)
|
|
fail_bailout(comparison_output, suppress.ignore_by_output)
|
|
|
|
# Check if a difference also occurs with the fallback configuration and
|
|
# give it precedence. E.g. we always prefer x64 differences.
|
|
if comparison_config.fallback:
|
|
fallback_output = runner.run(comparison_config.fallback)
|
|
fallback_difference, fallback_source = suppress.diff(
|
|
baseline_output, fallback_output)
|
|
if fallback_difference:
|
|
fail_bailout(fallback_output, suppress.ignore_by_output)
|
|
source = fallback_source
|
|
comparison_config = comparison_config.fallback
|
|
comparison_output = fallback_output
|
|
difference = fallback_difference
|
|
|
|
raise FailException(format_difference(
|
|
baseline_config, comparison_config,
|
|
baseline_output, comparison_output,
|
|
difference, source_key, source))
|
|
|
|
if runner.has_crashed:
|
|
if ignore_crashes:
|
|
# Show if a crash has happened in one of the runs and no difference was
|
|
# detected. This is only for the statistics during experiments.
|
|
raise PassException('# V8 correctness - C-R-A-S-H')
|
|
else:
|
|
# Subsume simulated and unexpected crashes (e.g. during smoke tests)
|
|
# with one failure state.
|
|
raise FailException(FAILURE_HEADER_TEMPLATE % dict(
|
|
configs='', source_key='', suppression=runner.crash_state))
|
|
|
|
|
|
def main():
|
|
options = parse_args()
|
|
suppress = v8_suppressions.get_suppression(options.skip_suppressions)
|
|
|
|
# Static bailout based on test case content or metadata.
|
|
kwargs = {}
|
|
if PYTHON3:
|
|
kwargs['encoding'] = 'utf-8'
|
|
with open(options.testcase, 'r', **kwargs) as f:
|
|
content = f.read()
|
|
content_bailout(get_meta_data(content), suppress.ignore_by_metadata)
|
|
content_bailout(content, suppress.ignore_by_content)
|
|
|
|
# Prepare the baseline, default and a secondary configuration to compare to.
|
|
# The default (turbofan) takes precedence as many of the secondary configs
|
|
# are based on the turbofan config with additional parameters.
|
|
execution_configs = [
|
|
ExecutionConfig(options, 'first'),
|
|
ExecutionConfig(options, 'default'),
|
|
ExecutionConfig(options, 'second'),
|
|
]
|
|
|
|
# First, run some fixed smoke tests in all configs to ensure nothing
|
|
# is fundamentally wrong, in order to prevent bug flooding.
|
|
if not options.skip_smoke_tests:
|
|
run_comparisons(
|
|
suppress, execution_configs,
|
|
test_case=SMOKE_TESTS,
|
|
timeout=SMOKE_TEST_TIMEOUT_SEC,
|
|
verbose=False,
|
|
# Don't accept crashes during smoke tests. A crash would hint at
|
|
# a flag that might be incompatible or a broken test file.
|
|
ignore_crashes=False,
|
|
# Special source key for smoke tests so that clusterfuzz dedupes all
|
|
# cases on this in case it's hit.
|
|
source_key = 'smoke test failed',
|
|
)
|
|
|
|
# Second, run all configs against the fuzz test case.
|
|
run_comparisons(
|
|
suppress, execution_configs,
|
|
test_case=options.testcase,
|
|
timeout=TEST_TIMEOUT_SEC,
|
|
)
|
|
|
|
# TODO(machenbach): Figure out if we could also return a bug in case
|
|
# there's no difference, but one of the line suppressions has matched -
|
|
# and without the match there would be a difference.
|
|
print('# V8 correctness - pass')
|
|
return RETURN_PASS
|
|
|
|
|
|
if __name__ == "__main__":
|
|
try:
|
|
result = main()
|
|
except FailException as e:
|
|
print(e.message)
|
|
result = RETURN_FAIL
|
|
except PassException as e:
|
|
print(e.message)
|
|
result = RETURN_PASS
|
|
except SystemExit:
|
|
# Make sure clusterfuzz reports internal errors and wrong usage.
|
|
# Use one label for all internal and usage errors.
|
|
print(FAILURE_HEADER_TEMPLATE % dict(
|
|
configs='', source_key='', suppression='wrong_usage'))
|
|
result = RETURN_FAIL
|
|
except MemoryError:
|
|
# Running out of memory happens occasionally but is not actionable.
|
|
print('# V8 correctness - pass')
|
|
result = RETURN_PASS
|
|
except Exception as e:
|
|
print(FAILURE_HEADER_TEMPLATE % dict(
|
|
configs='', source_key='', suppression='internal_error'))
|
|
print('# Internal error: %s' % e)
|
|
traceback.print_exc(file=sys.stdout)
|
|
result = RETURN_FAIL
|
|
|
|
sys.exit(result)
|