Add support for rebaselining from trybots.

This adds support to rebaseline_server to fetch actual results from tryjobs run on rietveld issues. Review URL: https://codereview.chromium.org/688353003
2014-11-11 08:11:55 -08:00 · 2014-11-11 08:11:55 -08:00 · d09ade4b44
commit d09ade4b44
parent 28246af2ef
2 changed files with 163 additions and 17 deletions
--- a/gm/rebaseline_server/download_actuals.py
+++ b/gm/rebaseline_server/download_actuals.py
@ -10,6 +10,8 @@ Download actual GM results for a particular builder.
 """

 # System-level imports
+import httplib
+import logging
 import optparse
 import os
 import posixpath
@ -97,6 +99,136 @@ def get_builders_list(summaries_bucket=GM_SUMMARIES_BUCKET):
  return dirs


+class ActualLocation(object):
+  def __init__(self, bucket, path, generation):
+    self.bucket = bucket
+    self.path = path
+    self.generation = generation
+
+
+class TipOfTreeActuals(object):
+  def __init__(self, summaries_bucket=GM_SUMMARIES_BUCKET,
+               json_filename=DEFAULT_JSON_FILENAME):
+    """
+    Args:
+      summaries_bucket: URL pointing at the root directory
+          containing all actual-results.json files, e.g.,
+          http://domain.name/path/to/dir  OR
+          file:///absolute/path/to/localdir
+      json_filename: The JSON filename to read from within each directory.
+    """
+    self._json_filename = json_filename
+    self._summaries_bucket = summaries_bucket
+
+  def description(self):
+    return 'gm_summaries_bucket %s' % (self._summaries_bucket,)
+
+  def get_builders(self):
+    """ Returns the list of builders we have actual results for.
+    {builder:string -> ActualLocation}
+    """
+    dirs, _ = get_builders_list(self._summaries_bucket)
+    result = dict()
+    for builder in dirs:
+      result[builder] = ActualLocation(
+          self._summaries_bucket,
+          "%s/%s" % (builder, self._json_filename),
+          None)
+    return result
+
+
+class RietveldIssueActuals(object):
+  def __init__(self, issue, json_filename=DEFAULT_JSON_FILENAME):
+    """
+    Args:
+      issue: The rietveld issue from which to obtain actuals.
+      json_filename: The JSON filename to read from within each directory.
+    """
+    self._issue = issue
+    self._json_filename = json_filename
+
+  def description(self):
+    return 'rietveld issue %s' % (self._issue,)
+
+  def get_builders(self):
+    """ Returns the actuals for the given rietveld issue's tryjobs.
+    {builder:string -> ActualLocation}
+
+    e.g.
+    {'Test-Android-Xoom-Tegra2-Arm7-Release': (
+        'chromium-skia-gm-summaries',
+        'Test-Android-Xoom-Tegra2-Arm7-Release-Trybot/actual-results.json',
+        '1415041165535000')}
+    """
+    result = dict()
+    json_filename_re = re.compile(
+        '^Created: gs://([^/]+)/((?:[^/]+/)+%s)#(\d+)$'
+        % re.escape(self._json_filename), re.MULTILINE)
+    codereview_api_url = 'https://codereview.chromium.org/api'
+    upload_gm_step_url = '/steps/Upload GM Results/logs/stdio'
+
+    logging.info('Fetching issue %s ...' % (self._issue,))
+    json_issue_url = '%s/%s' % (codereview_api_url, self._issue)
+    json_issue_data = urllib2.urlopen(json_issue_url).read()
+    issue_dict = gm_json.LoadFromString(json_issue_data)
+
+    patchsets = issue_dict.get("patchsets", [])
+    patchset = patchsets[-1]
+    if not patchset:
+      logging.warning('No patchsets for rietveld issue %s.' % (self._issue,))
+      return result
+
+    logging.info('Fetching issue %s patch %s...' % (self._issue, patchset))
+    json_patchset_url = '%s/%s/%s' % (codereview_api_url, self._issue, patchset)
+    json_patchset_data = urllib2.urlopen(json_patchset_url).read()
+    patchset_dict = gm_json.LoadFromString(json_patchset_data)
+
+    # try_job_results is ordered reverse chronologically
+    try_job_results = patchset_dict.get('try_job_results', [])
+    for try_job_result in try_job_results:
+      try_builder = try_job_result.get('builder', '<bad builder>')
+      if not try_builder.endswith('-Trybot'):
+        logging.warning('Builder %s is not a trybot?' % (try_builder,))
+        continue
+      builder = try_builder[:-len('-Trybot')]
+      if builder in result:
+        continue
+
+      logging.info('Fetching issue %s patch %s try %s...' %
+                  (self._issue, patchset, try_builder))
+      build_url = try_job_result.get('url', '<bad url>')
+      gm_upload_output_url = build_url + urllib2.quote(upload_gm_step_url)
+      logging.info('Fetching %s ...' % (gm_upload_output_url,))
+
+      # Tryjobs might not produce the step, but don't let that fail everything.
+      gm_upload_output = None
+      try:
+        gm_upload_output = urllib2.urlopen(gm_upload_output_url).read()
+      except (urllib2.HTTPError, urllib2.URLError, httplib.HTTPException) as e:
+        logging.warning(e)
+      except Exception:
+        logging.exception('Error opening %s .' % (gm_upload_output_url,))
+      if not gm_upload_output:
+        logging.warning('Could not fetch %s .' % (gm_upload_output_url,))
+        continue
+
+      json_filename_match = json_filename_re.search(gm_upload_output)
+      if json_filename_match:
+        logging.info('Found issue %s patch %s try %s result gs://%s/%s#%s .' %
+                    (self._issue, patchset, builder,
+                    json_filename_match.group(1),
+                    json_filename_match.group(2),
+                    json_filename_match.group(3)))
+        result[builder] = ActualLocation(json_filename_match.group(1),
+                                         json_filename_match.group(2),
+                                         json_filename_match.group(3))
+      else:
+        logging.warning('Did not find %s for issue %s patch %s try %s.' %
+                      (self._json_filename, self._issue, patchset, try_builder))
+
+    return result
+
+
 def main():
  parser = optparse.OptionParser()
  required_params = []
--- a/gm/rebaseline_server/server.py
+++ b/gm/rebaseline_server/server.py
@ -260,21 +260,22 @@ class Server(object):
  """ HTTP server for our HTML rebaseline viewer. """

  def __init__(self,
+               actuals_source,
               actuals_dir=DEFAULT_ACTUALS_DIR,
               json_filename=DEFAULT_JSON_FILENAME,
-               gm_summaries_bucket=DEFAULT_GM_SUMMARIES_BUCKET,
               port=DEFAULT_PORT, export=False, editable=True,
               reload_seconds=0, config_pairs=None, builder_regex_list=None,
               boto_file_path=None,
               imagediffdb_threads=imagediffdb.DEFAULT_NUM_WORKER_THREADS):
    """
    Args:
+      actuals_source: actuals_source.get_builders() ->
+          {builder:string -> [ bucket:string, path:string, generation:string ]}
+          If None, don't fetch new actual-results files
+          at all, just compare to whatever files are already in actuals_dir
      actuals_dir: directory under which we will check out the latest actual
          GM results
      json_filename: basename of the JSON summary file to load for each builder
-      gm_summaries_bucket: Google Storage bucket to download json_filename
-          files from; if None or '', don't fetch new actual-results files
-          at all, just compare to whatever files are already in actuals_dir
      port: which TCP port to listen on for HTTP requests
      export: whether to allow HTTP clients on other hosts to access this server
      editable: whether HTTP clients are allowed to submit new GM baselines
@ -292,9 +293,9 @@ class Server(object):
          public GS buckets.
      imagediffdb_threads: How many threads to spin up within imagediffdb.
    """
+    self._actuals_source = actuals_source
    self._actuals_dir = actuals_dir
    self._json_filename = json_filename
-    self._gm_summaries_bucket = gm_summaries_bucket
    self._port = port
    self._export = export
    self._editable = editable
@ -385,28 +386,29 @@ class Server(object):
    with self.results_rlock:
      if invalidate:
        self._results = None
-      if self._gm_summaries_bucket:
+
+      if self._actuals_source:
        logging.info(
-            'Updating GM result summaries in %s from gm_summaries_bucket %s ...'
-            % (self._actuals_dir, self._gm_summaries_bucket))
+            'Updating GM result summaries in %s from %s ...'
+            % (self._actuals_dir, self._actuals_source.description()))

        # Clean out actuals_dir first, in case some builders have gone away
        # since we last ran.
        if os.path.isdir(self._actuals_dir):
          shutil.rmtree(self._actuals_dir)

-        # Get the list of builders we care about.
-        all_builders = download_actuals.get_builders_list(
-            summaries_bucket=self._gm_summaries_bucket)
+        # Get the list of actuals we care about.
+        all_actuals = self._actuals_source.get_builders()
+
        if self._builder_regex_list:
          matching_builders = []
-          for builder in all_builders:
+          for builder in all_actuals:
            for regex in self._builder_regex_list:
              if re.match(regex, builder):
                matching_builders.append(builder)
                break  # go on to the next builder, no need to try more regexes
        else:
-          matching_builders = all_builders
+          matching_builders = all_actuals.keys()

        # Download the JSON file for each builder we care about.
        #
@ -414,8 +416,9 @@ class Server(object):
        # better off downloading them in parallel!
        for builder in matching_builders:
          self._gs.download_file(
-              source_bucket=self._gm_summaries_bucket,
-              source_path=posixpath.join(builder, self._json_filename),
+              source_bucket=all_actuals[builder].bucket,
+              source_path=all_actuals[builder].path,
+              source_generation=all_actuals[builder].generation,
              dest_path=os.path.join(self._actuals_dir, builder,
                                     self._json_filename),
              create_subdirs_if_needed=True)
@ -899,6 +902,10 @@ def main():
                            'to access this server.  WARNING: doing so will '
                            'allow users on other hosts to modify your '
                            'GM expectations, if combined with --editable.'))
+  parser.add_argument('--rietveld-issue',
+                      help=('Download json_filename files from latest trybot'
+                            'runs on this codereview.chromium.org issue.'
+                            'Overrides --gm-summaries-bucket.'))
  parser.add_argument('--gm-summaries-bucket',
                    help=('Google Cloud Storage bucket to download '
                          'JSON_FILENAME files from. '
@ -936,10 +943,17 @@ def main():
  else:
    config_pairs = None

+  if args.rietveld_issue:
+    actuals_source = download_actuals.RietveldIssueActuals(args.rietveld_issue,
+                                                           args.json_filename)
+  else:
+    actuals_source = download_actuals.TipOfTreeActuals(args.gm_summaries_bucket,
+                                                       args.json_filename)
+
  global _SERVER
-  _SERVER = Server(actuals_dir=args.actuals_dir,
+  _SERVER = Server(actuals_source,
+                   actuals_dir=args.actuals_dir,
                   json_filename=args.json_filename,
-                   gm_summaries_bucket=args.gm_summaries_bucket,
                   port=args.port, export=args.export, editable=args.editable,
                   reload_seconds=args.reload, config_pairs=config_pairs,
                   builder_regex_list=args.builders, boto_file_path=args.boto,