From 1db33e5fffe792b9c63dd1a9db15a27781d74acc Mon Sep 17 00:00:00 2001
From: Tamer Tas <tmrts@chromium.org>
Date: Mon, 8 Jul 2019 14:47:55 +0200
Subject: [PATCH] [browser-stories] gate features behind {experimental} flag

Performance infra recipes use callstats.py to run web page replays. Split (v8,
infa, perf-infra) repositories make experimenting with callstats.py hard.

This CL creates feature gating for simplifying performance infra experiments.

R=ulan@chromium.org

No-Try: true
Bug: v8:9448
Change-Id: I2c3e139f4b9d6bce1ea4fdda1a44960d74d7d414
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1690950
Auto-Submit: Tamer Tas <tmrts@chromium.org>
Reviewed-by: Ulan Degenbaev <ulan@chromium.org>
Commit-Queue: Tamer Tas <tmrts@chromium.org>
Cr-Commit-Position: refs/heads/master@{#62565}
---
 tools/callstats.py | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)
diff --git a/tools/callstats.py b/tools/callstats.py
index 70db89b5da..f95e4d30de 100755
--- a/tools/callstats.py
+++ b/tools/callstats.py
@@ -355,6 +355,37 @@ def statistics(data):
   return { 'samples': N, 'average': average, 'median': median,
            'stddev': stddev, 'min': low, 'max': high, 'ci': ci }
 
+def experimental_statistics(data):
+  # TODO(tmrts): copied from statistics for experimenting, will be removed
+  # afterwards
+  N = len(data)
+  average = numpy.average(data)
+  median = numpy.median(data)
+  low = numpy.min(data)
+  high= numpy.max(data)
+  if N > 1:
+    # evaluate sample variance by setting delta degrees of freedom (ddof) to
+    # 1. The degree used in calculations is N - ddof
+    stddev = numpy.std(data, ddof=1)
+    # Get the endpoints of the range that contains 95% of the distribution
+    t_bounds = scipy.stats.t.interval(0.95, N-1)
+    #assert abs(t_bounds[0] + t_bounds[1]) < 1e-6
+    # sum mean to the confidence interval
+    ci = {
+        'abs': t_bounds[1] * stddev / sqrt(N),
+        'low': average + t_bounds[0] * stddev / sqrt(N),
+        'high': average + t_bounds[1] * stddev / sqrt(N)
+    }
+  else:
+    stddev = 0
+    ci = { 'abs': 0, 'low': average, 'high': average }
+  if abs(stddev) > 0.0001 and abs(average) > 0.0001:
+    ci['perc'] = t_bounds[1] * stddev / sqrt(N) / average * 100
+  else:
+    ci['perc'] = 0
+  return { 'samples': N, 'average': average, 'median': median,
+           'stddev': stddev, 'min': low, 'max': high, 'ci': ci }
+
 
 def add_category_total(entries, groups, category_prefix):
   group_data = { 'time': 0, 'count': 0 }
@@ -661,6 +692,9 @@ def main():
     subparser.add_argument(
         "--sites", type=str, metavar="<URL>", nargs="*",
         help="specify benchmark website")
+    subparser.add_argument(
+        "--experimental", action="store_true", default=False,
+        help="enable the experimental mode")
   add_replay_args(subparsers["run"])
 
   # Command: replay-server
@@ -719,6 +753,8 @@ def main():
   # Execute the command.
   args = parser.parse_args()
   setattr(args, 'script_path', os.path.dirname(sys.argv[0]))
+  if args.experimental:
+    statistics = experimental_statistics
   if args.command == "run" and coexist(args.sites_file, args.sites):
     args.error("use either option --sites-file or site URLs")
     sys.exit(1)