8e4a44fb92
Bug: skia:10563 Bug: skia:10639 Change-Id: I0b500e4996f8edc0328c1d63d50ac876c231734c Reviewed-on: https://skia-review.googlesource.com/c/skia/+/313079 Commit-Queue: Ravi Mistry <rmistry@google.com> Reviewed-by: Eric Boren <borenet@google.com>
175 lines
5.7 KiB
Go
175 lines
5.7 KiB
Go
// Copyright 2020 The Chromium Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file.
|
|
|
|
package main
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"flag"
|
|
"fmt"
|
|
"time"
|
|
|
|
"cloud.google.com/go/datastore"
|
|
|
|
"go.skia.org/infra/autoroll/go/manual"
|
|
"go.skia.org/infra/go/auth"
|
|
"go.skia.org/infra/go/firestore"
|
|
"go.skia.org/infra/go/skerr"
|
|
"go.skia.org/infra/task_driver/go/lib/auth_steps"
|
|
"go.skia.org/infra/task_driver/go/lib/checkout"
|
|
"go.skia.org/infra/task_driver/go/td"
|
|
)
|
|
|
|
const (
|
|
MinWaitDuration = 4 * time.Minute
|
|
)
|
|
|
|
var (
|
|
canaryRollNotCreatedErr = errors.New("Canary roll could not be created. Ask the trooper to investigate (or directly ping rmistry@) if this happens consistently.")
|
|
canaryRollSuccessTooQuicklyErr = fmt.Errorf("Canary roll returned success in less than %s. Failing canary due to skbug.com/10563.", MinWaitDuration)
|
|
|
|
// Lets add the roll link only once to step data.
|
|
addedRollLinkStepData = false
|
|
)
|
|
|
|
func main() {
|
|
var (
|
|
projectId = flag.String("project_id", "", "ID of the Google Cloud project.")
|
|
taskId = flag.String("task_id", "", "ID of this task.")
|
|
taskName = flag.String("task_name", "", "Name of the task.")
|
|
output = flag.String("o", "", "If provided, dump a JSON blob of step data to the given file. Prints to stdout if '-' is given.")
|
|
local = flag.Bool("local", true, "True if running locally (as opposed to on the bots)")
|
|
|
|
checkoutFlags = checkout.SetupFlags(nil)
|
|
|
|
rollerName = flag.String("roller_name", "", "The roller we will use to create the canary with.")
|
|
)
|
|
ctx := td.StartRun(projectId, taskId, taskName, output, local)
|
|
defer td.EndRun(ctx)
|
|
if *rollerName == "" {
|
|
td.Fatalf(ctx, "--roller_name must be specified")
|
|
}
|
|
|
|
rs, err := checkout.GetRepoState(checkoutFlags)
|
|
if err != nil {
|
|
td.Fatal(ctx, skerr.Wrap(err))
|
|
}
|
|
if rs.Issue == "" || rs.Patchset == "" {
|
|
td.Fatalf(ctx, "This task driver should be run only as a try bot")
|
|
}
|
|
|
|
// Create token source with scope for datastore access.
|
|
ts, err := auth_steps.Init(ctx, *local, auth.SCOPE_USERINFO_EMAIL, datastore.ScopeDatastore)
|
|
if err != nil {
|
|
td.Fatal(ctx, skerr.Wrap(err))
|
|
}
|
|
|
|
// Add documentation link for canary rolls.
|
|
td.StepText(ctx, "Canary roll doc", "https://goto.google.com/autoroller-canary-bots")
|
|
|
|
// Instantiate firestore DB.
|
|
manualRollDB, err := manual.NewDBWithParams(ctx, firestore.FIRESTORE_PROJECT, "production", ts)
|
|
if err != nil {
|
|
td.Fatal(ctx, skerr.Wrap(err))
|
|
}
|
|
|
|
// Retry if canary roll could not be created or if canary roll returned
|
|
// success too quickly (skbug.com/10563).
|
|
retryAttempts := 3
|
|
for retry := 0; ; retry++ {
|
|
retryText := ""
|
|
if retry > 0 {
|
|
retryText = fmt.Sprintf(" (Retry #%d)", retry)
|
|
}
|
|
|
|
req := manual.ManualRollRequest{
|
|
Requester: *rollerName,
|
|
RollerName: *rollerName,
|
|
Status: manual.STATUS_PENDING,
|
|
Timestamp: firestore.FixTimestamp(time.Now()),
|
|
Revision: rs.GetPatchRef(),
|
|
|
|
DryRun: true,
|
|
NoEmail: true,
|
|
NoResolveRevision: true,
|
|
}
|
|
if err := td.Do(ctx, td.Props(fmt.Sprintf("Trigger canary roll%s", retryText)).Infra(), func(ctx context.Context) error {
|
|
return manualRollDB.Put(&req)
|
|
}); err != nil {
|
|
// Immediately fail for errors in triggering.
|
|
td.Fatal(ctx, skerr.Wrap(err))
|
|
}
|
|
|
|
if err := waitForCanaryRoll(ctx, manualRollDB, req.Id, fmt.Sprintf("Wait for canary roll%s", retryText)); err != nil {
|
|
// Retry these errors.
|
|
if err == canaryRollNotCreatedErr || err == canaryRollSuccessTooQuicklyErr {
|
|
if retry >= (retryAttempts - 1) {
|
|
td.Fatal(ctx, skerr.Wrapf(err, "failed inspite of 3 retries"))
|
|
}
|
|
time.Sleep(time.Minute)
|
|
continue
|
|
}
|
|
// Immediately fail for all other errors.
|
|
td.Fatal(ctx, skerr.Wrap(err))
|
|
} else {
|
|
// The canary roll was successful, break out of the
|
|
// retry loop.
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
func waitForCanaryRoll(parentCtx context.Context, manualRollDB manual.DB, rollId, stepName string) error {
|
|
ctx := td.StartStep(parentCtx, td.Props(stepName))
|
|
defer td.EndStep(ctx)
|
|
startTime := time.Now()
|
|
|
|
// For writing to the step's log stream.
|
|
stdout := td.NewLogStream(ctx, "stdout", td.Info)
|
|
for {
|
|
roll, err := manualRollDB.Get(ctx, rollId)
|
|
if err != nil {
|
|
return td.FailStep(ctx, fmt.Errorf("Could not find canary roll with ID: %s", rollId))
|
|
}
|
|
cl := roll.Url
|
|
var rollStatus string
|
|
if cl == "" {
|
|
rollStatus = fmt.Sprintf("Canary roll has status %s", roll.Status)
|
|
} else {
|
|
if !addedRollLinkStepData {
|
|
// Add the roll link to both the current step and it's parent.
|
|
td.StepText(ctx, "Canary roll CL", cl)
|
|
td.StepText(parentCtx, "Canary roll CL", cl)
|
|
addedRollLinkStepData = true
|
|
}
|
|
rollStatus = fmt.Sprintf("Canary roll [ %s ] has status %s", roll.Url, roll.Status)
|
|
}
|
|
if _, err := stdout.Write([]byte(rollStatus)); err != nil {
|
|
return td.FailStep(ctx, fmt.Errorf("Could not write to stdout: %s", err))
|
|
}
|
|
|
|
if roll.Status == manual.STATUS_COMPLETE {
|
|
if roll.Result == manual.RESULT_SUCCESS {
|
|
// This is a hopefully temperory workaround for skbug.com/10563. Sometimes
|
|
// Canary-Chromium returns success immediately after creating a change and
|
|
// before the tryjobs have a chance to run. If we have waited
|
|
// for < MinWaitDuration then be cautious and assume failure.
|
|
if time.Now().Before(startTime.Add(MinWaitDuration)) {
|
|
return td.FailStep(ctx, canaryRollSuccessTooQuicklyErr)
|
|
}
|
|
return nil
|
|
} else if roll.Result == manual.RESULT_FAILURE {
|
|
if cl == "" {
|
|
return td.FailStep(ctx, canaryRollNotCreatedErr)
|
|
}
|
|
return td.FailStep(ctx, fmt.Errorf("Canary roll [ %s ] failed", cl))
|
|
} else if roll.Result == manual.RESULT_UNKNOWN {
|
|
return td.FailStep(ctx, fmt.Errorf("Canary roll [ %s ] completed with an unknown result", cl))
|
|
}
|
|
}
|
|
time.Sleep(30 * time.Second)
|
|
}
|
|
}
|