skia2/infra/bots/recipe_modules/swarming/api.py
borenet 1436a09e1f Re-organize Skia recipes
Break Skia recipe module into:
  - skia_vars: defines and stores variables (eg. paths)
  - skia_step: utilities for running Skia steps
  - skia_flavor: flavor-specific stuff
  - skia: top-level setup, checkout, test/perf steps, etc etc

This establishes a saner dependency structure for the recipes; skia_vars
is at the bottom level, skia_step depends on it, skia_flavor depends on
both of them, skia depends on all of the above, and the recipes
themselves may depend on any or all of them.

Next steps:
  - Merge buildbot_spec into skia_vars
  - Move test_steps and perf_steps from skia recipe_module into
    swarm_test and swarm_perf recipes
  - Cleaner checkout_steps process

BUG=skia:5578
GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2198173002

Review-Url: https://codereview.chromium.org/2198173002
2016-08-03 08:23:10 -07:00

295 lines
12 KiB
Python

# Copyright 2016 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
from recipe_engine import recipe_api
import shlex
DEFAULT_TASK_EXPIRATION = 20*60*60
DEFAULT_TASK_TIMEOUT = 4*60*60
DEFAULT_IO_TIMEOUT = 40*60
MILO_LOG_LINK = 'https://luci-milo.appspot.com/swarming/task/%s'
class SkiaSwarmingApi(recipe_api.RecipeApi):
"""Provides steps to run Skia tasks on swarming bots."""
@property
def swarming_temp_dir(self):
"""Path where artifacts like isolate file and json output will be stored."""
return self.m.path['slave_build'].join('swarming_temp_dir')
@property
def tasks_output_dir(self):
"""Directory where the outputs of the swarming tasks will be stored."""
return self.swarming_temp_dir.join('outputs')
def isolated_file_path(self, task_name):
"""Get the path to the given task's .isolated file."""
return self.swarming_temp_dir.join('skia-task-%s.isolated' % task_name)
def setup(self, luci_go_dir, swarming_rev=None):
"""Performs setup steps for swarming."""
self.m.swarming_client.checkout(revision=swarming_rev)
self.m.swarming.check_client_version(step_test_data=(0, 8, 6))
self.setup_go_isolate(luci_go_dir)
self.m.swarming.add_default_tag('allow_milo:1')
# TODO(rmistry): Remove once the Go binaries are moved to recipes or buildbot.
def setup_go_isolate(self, luci_go_dir):
"""Generates and puts in place the isolate Go binary."""
depot_tools_path = self.m.depot_tools.package_repo_resource()
env = {'PATH': self.m.path.pathsep.join([
str(depot_tools_path), '%(PATH)s'])}
self.m.step('download luci-go linux',
['download_from_google_storage', '--no_resume',
'--platform=linux*', '--no_auth', '--bucket', 'chromium-luci',
'-d', luci_go_dir.join('linux64')],
env=env)
self.m.step('download luci-go mac',
['download_from_google_storage', '--no_resume',
'--platform=darwin', '--no_auth', '--bucket', 'chromium-luci',
'-d', luci_go_dir.join('mac64')],
env=env)
self.m.step('download luci-go win',
['download_from_google_storage', '--no_resume',
'--platform=win32', '--no_auth', '--bucket', 'chromium-luci',
'-d', luci_go_dir.join('win64')],
env=env)
# Copy binaries to the expected location.
dest = self.m.path['slave_build'].join('luci-go')
self.m.run.rmtree(dest)
self.m.file.copytree('Copy Go binary',
source=luci_go_dir,
dest=dest)
def isolate_and_trigger_task(
self, isolate_path, isolate_base_dir, task_name, isolate_vars,
swarm_dimensions, isolate_blacklist=None, extra_isolate_hashes=None,
idempotent=False, store_output=True, extra_args=None, expiration=None,
hard_timeout=None, io_timeout=None, cipd_packages=None):
"""Isolate inputs and trigger the task to run."""
os_type = swarm_dimensions.get('os', 'linux')
isolated_hash = self.isolate_task(
isolate_path, isolate_base_dir, os_type, task_name, isolate_vars,
blacklist=isolate_blacklist, extra_hashes=extra_isolate_hashes)
tasks = self.trigger_swarming_tasks([(task_name, isolated_hash)],
swarm_dimensions,
idempotent=idempotent,
store_output=store_output,
extra_args=extra_args,
expiration=expiration,
hard_timeout=hard_timeout,
io_timeout=io_timeout,
cipd_packages=cipd_packages)
assert len(tasks) == 1
return tasks[0]
def isolate_task(self, isolate_path, base_dir, os_type, task_name,
isolate_vars, blacklist=None, extra_hashes=None):
"""Isolate inputs for the given task."""
self.create_isolated_gen_json(isolate_path, base_dir, os_type,
task_name, isolate_vars,
blacklist=blacklist)
hashes = self.batcharchive([task_name])
assert len(hashes) == 1
isolated_hash = hashes[0][1]
if extra_hashes:
isolated_hash = self.add_isolated_includes(task_name, extra_hashes)
return isolated_hash
def create_isolated_gen_json(self, isolate_path, base_dir, os_type,
task_name, extra_variables, blacklist=None):
"""Creates an isolated.gen.json file (used by the isolate recipe module).
Args:
isolate_path: path obj. Path to the isolate file.
base_dir: path obj. Dir that is the base of all paths in the isolate file.
os_type: str. The OS type to use when archiving the isolate file.
Eg: linux.
task_name: str. The isolated.gen.json file will be suffixed by this str.
extra_variables: dict of str to str. The extra vars to pass to isolate.
Eg: {'SLAVE_NUM': '1', 'MASTER': 'ChromiumPerfFYI'}
blacklist: list of regular expressions indicating which files/directories
not to archive.
"""
self.m.file.makedirs('swarming tmp dir', self.swarming_temp_dir)
isolated_path = self.isolated_file_path(task_name)
isolate_args = [
'--isolate', isolate_path,
'--isolated', isolated_path,
'--config-variable', 'OS', os_type,
]
if blacklist:
for b in blacklist:
isolate_args.extend(['--blacklist', b])
for k, v in extra_variables.iteritems():
isolate_args.extend(['--extra-variable', k, v])
isolated_gen_dict = {
'version': 1,
'dir': base_dir,
'args': isolate_args,
}
isolated_gen_json = self.swarming_temp_dir.join(
'%s.isolated.gen.json' % task_name)
self.m.file.write(
'Write %s.isolated.gen.json' % task_name,
isolated_gen_json,
self.m.json.dumps(isolated_gen_dict, indent=4),
)
def batcharchive(self, targets):
"""Calls batcharchive on the skia.isolated.gen.json file.
Args:
targets: list of str. The suffixes of the isolated.gen.json files to
archive.
Returns:
list of tuples containing (task_name, swarming_hash).
"""
return self.m.isolate.isolate_tests(
verbose=True, # To avoid no output timeouts.
build_dir=self.swarming_temp_dir,
targets=targets).presentation.properties['swarm_hashes'].items()
def add_isolated_includes(self, task_name, include_hashes):
"""Add the hashes to the task's .isolated file, return new .isolated hash.
Args:
task: str. Name of the task to which to add the given hash.
include_hashes: list of str. Hashes of the new includes.
Returns:
Updated hash of the .isolated file.
"""
isolated_file = self.isolated_file_path(task_name)
self.m.python.inline('add_isolated_input', program="""
import json
import sys
with open(sys.argv[1]) as f:
isolated = json.load(f)
if not isolated.get('includes'):
isolated['includes'] = []
for h in sys.argv[2:]:
isolated['includes'].append(h)
with open(sys.argv[1], 'w') as f:
json.dump(isolated, f, sort_keys=True)
""", args=[isolated_file] + include_hashes)
isolateserver = self.m.swarming_client.path.join('isolateserver.py')
r = self.m.python('upload new .isolated file for %s' % task_name,
script=isolateserver,
args=['archive', '--isolate-server',
self.m.isolate.isolate_server, isolated_file],
stdout=self.m.raw_io.output())
return shlex.split(r.stdout)[0]
def trigger_swarming_tasks(
self, swarm_hashes, dimensions, idempotent=False, store_output=True,
extra_args=None, expiration=None, hard_timeout=None, io_timeout=None,
cipd_packages=None):
"""Triggers swarming tasks using swarm hashes.
Args:
swarm_hashes: list of str. List of swarm hashes from the isolate server.
dimensions: dict of str to str. The dimensions to run the task on.
Eg: {'os': 'Ubuntu', 'gpu': '10de', 'pool': 'Skia'}
idempotent: bool. Whether or not to de-duplicate tasks.
store_output: bool. Whether task output should be stored.
extra_args: list of str. Extra arguments to pass to the task.
expiration: int. Task will expire if not picked up within this time.
DEFAULT_TASK_EXPIRATION is used if this argument is None.
hard_timeout: int. Task will timeout if not completed within this time.
DEFAULT_TASK_TIMEOUT is used if this argument is None.
io_timeout: int. Task will timeout if there is no output within this time.
DEFAULT_IO_TIMEOUT is used if this argument is None.
cipd_packages: CIPD packages which these tasks depend on.
Returns:
List of swarming.SwarmingTask instances.
"""
swarming_tasks = []
for task_name, swarm_hash in swarm_hashes:
swarming_task = self.m.swarming.task(
title=task_name,
cipd_packages=cipd_packages,
isolated_hash=swarm_hash)
if store_output:
swarming_task.task_output_dir = self.tasks_output_dir.join(task_name)
swarming_task.dimensions = dimensions
swarming_task.idempotent = idempotent
swarming_task.priority = 90
swarming_task.expiration = (
expiration if expiration else DEFAULT_TASK_EXPIRATION)
swarming_task.hard_timeout = (
hard_timeout if hard_timeout else DEFAULT_TASK_TIMEOUT)
swarming_task.io_timeout = (
io_timeout if io_timeout else DEFAULT_IO_TIMEOUT)
if extra_args:
swarming_task.extra_args = extra_args
revision = self.m.properties.get('revision')
if revision:
swarming_task.tags.add('revision:%s' % revision)
swarming_tasks.append(swarming_task)
step_results = self.m.swarming.trigger(swarming_tasks)
for step_result in step_results:
self._add_log_links(step_result)
return swarming_tasks
def collect_swarming_task(self, swarming_task):
"""Collects the specified swarming task.
Args:
swarming_task: An instance of swarming.SwarmingTask.
"""
try:
rv = self.m.swarming.collect_task(swarming_task)
except self.m.step.StepFailure as e: # pragma: no cover
step_result = self.m.step.active_result
# Change step result to Infra failure if the swarming task failed due to
# expiration, time outs, bot crashes or task cancelations.
# Infra failures have step.EXCEPTION.
states_infra_failure = (
self.m.swarming.State.EXPIRED, self.m.swarming.State.TIMED_OUT,
self.m.swarming.State.BOT_DIED, self.m.swarming.State.CANCELED)
if step_result.json.output['shards'][0]['state'] in states_infra_failure:
step_result.presentation.status = self.m.step.EXCEPTION
raise self.m.step.InfraFailure(e.name, step_result)
raise
finally:
step_result = self.m.step.active_result
# Add log link.
self._add_log_links(step_result)
return rv
def collect_swarming_task_isolate_hash(self, swarming_task):
"""Wait for the given swarming task to finish and return its output hash.
Args:
swarming_task: An instance of swarming.SwarmingTask.
Returns:
the hash of the isolate output of the task.
"""
res = self.collect_swarming_task(swarming_task)
return res.json.output['shards'][0]['isolated_out']['isolated']
def _add_log_links(self, step_result):
"""Add Milo log links to all shards in the step."""
ids = []
shards = step_result.json.output.get('shards')
if shards:
for shard in shards:
ids.append(shard['id'])
else:
for _, task in step_result.json.output.get('tasks', {}).iteritems():
ids.append(task['task_id'])
for idx, task_id in enumerate(ids):
link = MILO_LOG_LINK % task_id
k = 'view steps on Milo'
if len(ids) > 1: # pragma: nocover
k += ' (shard index %d, %d total)' % (idx, len(ids))
step_result.presentation.links[k] = link