[automated_benchmarking] Make arguments optional and add --dict argument (#1968)
* Make arugments optional and add --dict argument * Removing accidental print statement * Change to more likely scenario for dictionary compression benchmark
This commit is contained in:
parent
9a71d07aa4
commit
8fe562a770
@ -238,7 +238,7 @@ versionsTest: clean
|
||||
$(PYTHON) test-zstd-versions.py
|
||||
|
||||
automated_benchmarking: clean
|
||||
$(PYTHON) automated_benchmarking.py golden-compression 1 current 1 "" 60
|
||||
$(PYTHON) automated_benchmarking.py
|
||||
|
||||
checkTag: checkTag.c $(ZSTDDIR)/zstd.h
|
||||
$(CC) $(FLAGS) $< -o $@$(EXT)
|
||||
|
@ -33,21 +33,32 @@ pull requests from the zstd repo and compare facebook:dev to all of them once, c
|
||||
will continuously get pull requests from the zstd repo and run benchmarks against facebook:dev.
|
||||
|
||||
```
|
||||
Example usage: python automated_benchmarking.py golden-compression 1 current 1 "" 60
|
||||
Example usage: python automated_benchmarking.py
|
||||
```
|
||||
|
||||
```
|
||||
usage: automated_benchmarking.py [-h] directory levels mode emails
|
||||
usage: automated_benchmarking.py [-h] [--directory DIRECTORY]
|
||||
[--levels LEVELS] [--iterations ITERATIONS]
|
||||
[--emails EMAILS] [--frequency FREQUENCY]
|
||||
[--mode MODE] [--dict DICT]
|
||||
|
||||
positional arguments:
|
||||
directory directory with files to benchmark
|
||||
levels levels to test eg ('1,2,3')
|
||||
mode 'fastmode', 'onetime', 'current' or 'continuous'
|
||||
iterations number of benchmark iterations to run
|
||||
emails email addresses of people who will be alerted upon regression.
|
||||
Only for continuous mode
|
||||
frequency specifies the number of seconds to wait before each successive
|
||||
check for new PRs in continuous mode
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
--directory DIRECTORY
|
||||
directory with files to benchmark
|
||||
--levels LEVELS levels to test eg ('1,2,3')
|
||||
--iterations ITERATIONS
|
||||
number of benchmark iterations to run
|
||||
--emails EMAILS email addresses of people who will be alerted upon
|
||||
regression. Only for continuous mode
|
||||
--frequency FREQUENCY
|
||||
specifies the number of seconds to wait before each
|
||||
successive check for new PRs in continuous mode
|
||||
--mode MODE 'fastmode', 'onetime', 'current', or 'continuous' (see
|
||||
README.md for details)
|
||||
--dict DICT filename of dictionary to use (when set, this
|
||||
dictioanry will be used to compress the files provided
|
||||
inside --directory)
|
||||
```
|
||||
|
||||
#### `test-zstd-speed.py` - script for testing zstd speed difference between commits
|
||||
|
@ -94,16 +94,19 @@ def clone_and_build(build):
|
||||
return "../zstd"
|
||||
|
||||
|
||||
def parse_benchmark_output(output):
|
||||
idx = [i for i, d in enumerate(output) if d == "MB/s"]
|
||||
return [float(output[idx[0] - 1]), float(output[idx[1] - 1])]
|
||||
|
||||
|
||||
def benchmark_single(executable, level, filename):
|
||||
tmp = (
|
||||
return parse_benchmark_output((
|
||||
subprocess.run(
|
||||
[executable, "-qb{}".format(level), filename], stderr=subprocess.PIPE
|
||||
)
|
||||
.stderr.decode("utf-8")
|
||||
.split(" ")
|
||||
)
|
||||
idx = [i for i, d in enumerate(tmp) if d == "MB/s"]
|
||||
return [float(tmp[idx[0] - 1]), float(tmp[idx[1] - 1])]
|
||||
))
|
||||
|
||||
|
||||
def benchmark_n(executable, level, filename, n):
|
||||
@ -129,6 +132,45 @@ def benchmark(build, filenames, levels, iterations):
|
||||
]
|
||||
|
||||
|
||||
def benchmark_dictionary_single(executable, filenames_directory, dictionary_filename, level, iterations):
|
||||
cspeeds, dspeeds = [], []
|
||||
for _ in range(iterations):
|
||||
output = subprocess.run([executable, "-qb{}".format(level), "-D", dictionary_filename, "-r", filenames_directory], stderr=subprocess.PIPE).stderr.decode("utf-8").split(" ")
|
||||
cspeed, dspeed = parse_benchmark_output(output)
|
||||
cspeeds.append(cspeed)
|
||||
dspeeds.append(dspeed)
|
||||
max_cspeed, max_dspeed = max(cspeeds), max(dspeeds)
|
||||
print(
|
||||
"Bench (executable={} level={} filenames_directory={}, dictionary_filename={}, iterations={}):\n\t[cspeed: {} MB/s, dspeed: {} MB/s]".format(
|
||||
os.path.basename(executable),
|
||||
level,
|
||||
os.path.basename(filenames_directory),
|
||||
os.path.basename(dictionary_filename),
|
||||
iterations,
|
||||
max_cspeed,
|
||||
max_dspeed,
|
||||
)
|
||||
)
|
||||
return (max_cspeed, max_dspeed)
|
||||
|
||||
|
||||
def benchmark_dictionary(build, filenames_directory, dictionary_filename, levels, iterations):
|
||||
executable = clone_and_build(build)
|
||||
return [benchmark_dictionary_single(executable, filenames_directory, dictionary_filename, l, iterations) for l in levels]
|
||||
|
||||
|
||||
def parse_regressions_and_labels(old_cspeed, new_cspeed, old_dspeed, new_dspeed, baseline_build, test_build):
|
||||
cspeed_reg = (old_cspeed - new_cspeed) / old_cspeed
|
||||
dspeed_reg = (old_dspeed - new_dspeed) / old_dspeed
|
||||
baseline_label = "{}:{} ({})".format(
|
||||
baseline_build["user"], baseline_build["branch"], baseline_build["hash"]
|
||||
)
|
||||
test_label = "{}:{} ({})".format(
|
||||
test_build["user"], test_build["branch"], test_build["hash"]
|
||||
)
|
||||
return cspeed_reg, dspeed_reg, baseline_label, test_label
|
||||
|
||||
|
||||
def get_regressions(baseline_build, test_build, iterations, filenames, levels):
|
||||
old = benchmark(baseline_build, filenames, levels, iterations)
|
||||
new = benchmark(test_build, filenames, levels, iterations)
|
||||
@ -137,13 +179,8 @@ def get_regressions(baseline_build, test_build, iterations, filenames, levels):
|
||||
for k, filename in enumerate(filenames):
|
||||
old_cspeed, old_dspeed = old[j][k]
|
||||
new_cspeed, new_dspeed = new[j][k]
|
||||
cspeed_reg = (old_cspeed - new_cspeed) / old_cspeed
|
||||
dspeed_reg = (old_dspeed - new_dspeed) / old_dspeed
|
||||
baseline_label = "{}:{} ({})".format(
|
||||
baseline_build["user"], baseline_build["branch"], baseline_build["hash"]
|
||||
)
|
||||
test_label = "{}:{} ({})".format(
|
||||
test_build["user"], test_build["branch"], test_build["hash"]
|
||||
cspeed_reg, dspeed_reg, baseline_build, test_label = parse_regressions_and_labels(
|
||||
old_cspeed, new_cspeed, old_dspeed, new_dspeed, baseline_build, test_build
|
||||
)
|
||||
if cspeed_reg > CSPEED_REGRESSION_TOLERANCE:
|
||||
regressions.append(
|
||||
@ -171,14 +208,58 @@ def get_regressions(baseline_build, test_build, iterations, filenames, levels):
|
||||
)
|
||||
return regressions
|
||||
|
||||
def main(filenames, levels, iterations, builds=None, emails=None, continuous=False, frequency=DEFAULT_MAX_API_CALL_FREQUENCY_SEC):
|
||||
def get_regressions_dictionary(baseline_build, test_build, filenames_directory, dictionary_filename, levels, iterations):
|
||||
old = benchmark_dictionary(baseline_build, filenames_directory, dictionary_filename, levels, iterations)
|
||||
new = benchmark_dictionary(test_build, filenames_directory, dictionary_filename, levels, iterations)
|
||||
regressions = []
|
||||
for j, level in enumerate(levels):
|
||||
old_cspeed, old_dspeed = old[j]
|
||||
new_cspeed, new_dspeed = new[j]
|
||||
cspeed_reg, dspeed_reg, baesline_label, test_label = parse_regressions_and_labels(
|
||||
old_cspeed, new_cspeed, old_dspeed, new_dspeed, baseline_build, test_build
|
||||
)
|
||||
if cspeed_reg > CSPEED_REGRESSION_TOLERANCE:
|
||||
regressions.append(
|
||||
"[COMPRESSION REGRESSION] (level={} filenames_directory={} dictionary_filename={})\n\t{} -> {}\n\t{} -> {} ({:0.2f}%)".format(
|
||||
level,
|
||||
filenames_directory,
|
||||
dictionary_filename,
|
||||
baseline_label,
|
||||
test_label,
|
||||
old_cspeed,
|
||||
new_cspeed,
|
||||
cspeed_reg * 100.0,
|
||||
)
|
||||
)
|
||||
if dspeed_reg > DSPEED_REGRESSION_TOLERANCE:
|
||||
regressions.append(
|
||||
"[DECOMPRESSION REGRESSION] (level={} filenames_directory={} dictionary_filename={})\n\t{} -> {}\n\t{} -> {} ({:0.2f}%)".format(
|
||||
level,
|
||||
filenames_directory,
|
||||
dictionary_filename,
|
||||
baseline_label,
|
||||
test_label,
|
||||
old_dspeed,
|
||||
new_dspeed,
|
||||
dspeed_reg * 100.0,
|
||||
)
|
||||
)
|
||||
return regressions
|
||||
|
||||
|
||||
def main(filenames, levels, iterations, builds=None, emails=None, continuous=False, frequency=DEFAULT_MAX_API_CALL_FREQUENCY_SEC, dictionary_filename=None):
|
||||
if builds == None:
|
||||
builds = get_new_open_pr_builds()
|
||||
while True:
|
||||
for test_build in builds:
|
||||
regressions = get_regressions(
|
||||
MASTER_BUILD, test_build, iterations, filenames, levels
|
||||
)
|
||||
if dictionary_filename == None:
|
||||
regressions = get_regressions(
|
||||
MASTER_BUILD, test_build, iterations, filenames, levels
|
||||
)
|
||||
else:
|
||||
regressions = get_regressions_dictionary(
|
||||
MASTER_BUILD, test_build, filenames, dictionary_filename, levels, iterations
|
||||
)
|
||||
body = "\n".join(regressions)
|
||||
if len(regressions) > 0:
|
||||
if emails != None:
|
||||
@ -198,42 +279,38 @@ def main(filenames, levels, iterations, builds=None, emails=None, continuous=Fal
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"directory", help="directory with files to benchmark", default="fuzz"
|
||||
)
|
||||
parser.add_argument("levels", help="levels to test eg ('1,2,3')", default="1,2,3")
|
||||
parser.add_argument(
|
||||
"mode", help="'fastmode', 'onetime', 'current' or 'continuous'", default="onetime"
|
||||
)
|
||||
parser.add_argument(
|
||||
"iterations", help="number of benchmark iterations to run", default=5
|
||||
)
|
||||
parser.add_argument(
|
||||
"emails",
|
||||
help="email addresses of people who will be alerted upon regression. Only for continuous mode",
|
||||
default=None,
|
||||
)
|
||||
parser.add_argument(
|
||||
"frequency",
|
||||
help="specifies the number of seconds to wait before each successive check for new PRs in continuous mode",
|
||||
default=DEFAULT_MAX_API_CALL_FREQUENCY_SEC
|
||||
)
|
||||
|
||||
parser.add_argument("--directory", help="directory with files to benchmark", default="golden-compression")
|
||||
parser.add_argument("--levels", help="levels to test eg ('1,2,3')", default="1")
|
||||
parser.add_argument("--iterations", help="number of benchmark iterations to run", default="1")
|
||||
parser.add_argument("--emails", help="email addresses of people who will be alerted upon regression. Only for continuous mode", default=None)
|
||||
parser.add_argument("--frequency", help="specifies the number of seconds to wait before each successive check for new PRs in continuous mode", default=DEFAULT_MAX_API_CALL_FREQUENCY_SEC)
|
||||
parser.add_argument("--mode", help="'fastmode', 'onetime', 'current', or 'continuous' (see README.md for details)", default="current")
|
||||
parser.add_argument("--dict", help="filename of dictionary to use (when set, this dictioanry will be used to compress the files provided inside --directory)", default=None)
|
||||
|
||||
args = parser.parse_args()
|
||||
filenames = glob.glob("{}/**".format(args.directory))
|
||||
filenames = args.directory
|
||||
levels = [int(l) for l in args.levels.split(",")]
|
||||
mode = args.mode
|
||||
iterations = int(args.iterations)
|
||||
emails = args.emails
|
||||
frequency = int(args.frequency)
|
||||
dictionary_filename = args.dict
|
||||
|
||||
if dictionary_filename == None:
|
||||
filenames = glob.glob("{}/**".format(filenames))
|
||||
|
||||
if (len(filenames) == 0):
|
||||
print("0 files found")
|
||||
quit()
|
||||
|
||||
if mode == "onetime":
|
||||
main(filenames, levels, iterations, frequency=frequency)
|
||||
main(filenames, levels, iterations, frequency=frequenc, dictionary_filename=dictionary_filename)
|
||||
elif mode == "current":
|
||||
builds = [{"user": None, "branch": "None", "hash": None}]
|
||||
main(filenames, levels, iterations, builds, frequency=frequency)
|
||||
main(filenames, levels, iterations, builds, frequency=frequency, dictionary_filename=dictionary_filename)
|
||||
elif mode == "fastmode":
|
||||
builds = [{"user": "facebook", "branch": "master", "hash": None}]
|
||||
main(filenames, levels, iterations, builds, frequency=frequency)
|
||||
main(filenames, levels, iterations, builds, frequency=frequency, dictionary_filename=dictionary_filename)
|
||||
else:
|
||||
main(filenames, levels, iterations, None, emails, True, frequency=frequency)
|
||||
main(filenames, levels, iterations, None, emails, True, frequency=frequency, dictionary_filename=dictionary_filename)
|
||||
|
Loading…
Reference in New Issue
Block a user