[test] Add logging to test runner
This adds Python logging to several code locations that previously used to just print. The locations aren't yet complete. The changed code locations should help for investigating hanging test runs. The default level is WARNING for running tests locally, and INFO when called from bots that pass the --swarming parameter. Bug: v8:13113 Change-Id: If3a336703e7c346a5c718f2359b1a80e37e1ca6d Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3876183 Commit-Queue: Michael Achenbach <machenbach@chromium.org> Reviewed-by: Alexander Schulze <alexschulze@chromium.org> Cr-Commit-Position: refs/heads/main@{#83006}
This commit is contained in:
parent
519c430b4c
commit
b37760d2c9
@ -7,6 +7,7 @@ from functools import reduce
|
||||
from os.path import dirname as up
|
||||
|
||||
import json
|
||||
import logging
|
||||
import multiprocessing
|
||||
import optparse
|
||||
import os
|
||||
@ -116,6 +117,14 @@ TRY_RELEASE_MODE = ModeConfig(
|
||||
status_mode="debug",
|
||||
)
|
||||
|
||||
# Set up logging. No need to log a date in timestamps as we can get that from
|
||||
# test run start times.
|
||||
logging.basicConfig(
|
||||
format='%(asctime)s %(message)s',
|
||||
datefmt='%H:%M:%S',
|
||||
level=logging.WARNING,
|
||||
)
|
||||
|
||||
class TestRunnerError(Exception):
|
||||
pass
|
||||
|
||||
@ -142,6 +151,8 @@ class BaseTestRunner(object):
|
||||
self.options, args = self._parse_args(parser, sys_args)
|
||||
self.infra_staging = self.options.infra_staging
|
||||
if self.options.swarming:
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
|
||||
# Swarming doesn't print how isolated commands are called. Lets make
|
||||
# this less cryptic by printing it ourselves.
|
||||
print(' '.join(sys.argv))
|
||||
|
@ -3,6 +3,7 @@
|
||||
# found in the LICENSE file.
|
||||
|
||||
from contextlib import contextmanager
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import signal
|
||||
@ -145,13 +146,10 @@ class BaseCommand(object):
|
||||
started_as = self.to_string(relative=True)
|
||||
process_text = 'process %d started as:\n %s\n' % (process.pid, started_as)
|
||||
try:
|
||||
print('Attempting to kill ' + process_text)
|
||||
sys.stdout.flush()
|
||||
logging.warning('Attempting to kill %s', process_text)
|
||||
self._kill_process(process)
|
||||
except OSError as e:
|
||||
print(e)
|
||||
print('Unruly ' + process_text)
|
||||
sys.stdout.flush()
|
||||
except OSError:
|
||||
logging.exception('Unruly %s', process_text)
|
||||
|
||||
def __str__(self):
|
||||
return self.to_string()
|
||||
@ -216,11 +214,10 @@ def taskkill_windows(process, verbose=False, force=True):
|
||||
)
|
||||
stdout, stderr = tk.communicate()
|
||||
if verbose:
|
||||
print('Taskkill results for %d' % process.pid)
|
||||
print(stdout)
|
||||
print(stderr)
|
||||
print('Return code: %d' % tk.returncode)
|
||||
sys.stdout.flush()
|
||||
logging.info('Taskkill results for %d', process.pid)
|
||||
logging.info(stdout.decode('utf-8', errors='ignore'))
|
||||
logging.info(stderr.decode('utf-8', errors='ignore'))
|
||||
logging.info('Return code: %d', tk.returncode)
|
||||
|
||||
|
||||
class WindowsCommand(BaseCommand):
|
||||
|
@ -4,6 +4,7 @@
|
||||
# found in the LICENSE file.
|
||||
|
||||
import collections
|
||||
import logging
|
||||
import os
|
||||
import signal
|
||||
import traceback
|
||||
@ -76,8 +77,7 @@ def Worker(fn, work_queue, done_queue,
|
||||
# SIGINT, SIGTERM or internal hard timeout.
|
||||
break
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
print(">>> EXCEPTION: %s" % e)
|
||||
logging.exception('Unhandled error during worker execution.')
|
||||
done_queue.put(ExceptionResult(e))
|
||||
# When we reach here on normal tear down, all items have been pulled from
|
||||
# the done_queue before and this should have no effect. On fast abort, it's
|
||||
@ -245,9 +245,8 @@ class DefaultExecutionPool(ContextPool):
|
||||
self.advance(gen)
|
||||
except KeyboardInterrupt:
|
||||
assert False, 'Unreachable'
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
print(">>> EXCEPTION: %s" % e)
|
||||
except Exception:
|
||||
logging.exception('Unhandled error during pool execution.')
|
||||
finally:
|
||||
self._terminate()
|
||||
|
||||
@ -321,12 +320,17 @@ class DefaultExecutionPool(ContextPool):
|
||||
self.notify("Draining queues")
|
||||
try:
|
||||
while True: self.work_queue.get(False)
|
||||
except:
|
||||
except Empty:
|
||||
pass
|
||||
except:
|
||||
logging.exception('Error draining work queue.')
|
||||
try:
|
||||
while True: self.done_queue.get(False)
|
||||
except:
|
||||
except Empty:
|
||||
pass
|
||||
except:
|
||||
logging.exception('Error draining done queue.')
|
||||
self.notify("Pool terminated")
|
||||
|
||||
def _get_result_from_queue(self):
|
||||
"""Attempts to get the next result from the queue.
|
||||
|
@ -4,6 +4,7 @@
|
||||
|
||||
import datetime
|
||||
import json
|
||||
import logging
|
||||
import platform
|
||||
import sys
|
||||
import time
|
||||
@ -140,13 +141,13 @@ class VerboseProgressIndicator(SimpleProgressIndicator):
|
||||
|
||||
# TODO(machenbach): Remove this platform specific hack and implement a proper
|
||||
# feedback channel from the workers, providing which tests are currently run.
|
||||
def _print_processes(self):
|
||||
def _log_processes(self):
|
||||
procs = self.context.list_processes()
|
||||
if procs:
|
||||
self._print('List of processes:')
|
||||
logging.info('List of processes:')
|
||||
for pid, cmd in self.context.list_processes():
|
||||
# Show command with pid, but other process info cut off.
|
||||
self._print('pid: %d cmd: %s' % (pid, cmd))
|
||||
logging.info('pid: %d cmd: %s', pid, cmd)
|
||||
|
||||
def _ensure_delay(self, delay):
|
||||
return time.time() - self._last_printed_time > delay
|
||||
@ -156,11 +157,11 @@ class VerboseProgressIndicator(SimpleProgressIndicator):
|
||||
# Print something every 30 seconds to not get killed by an output
|
||||
# timeout.
|
||||
self._print('Still working...')
|
||||
self._print_processes()
|
||||
self._log_processes()
|
||||
|
||||
def on_event(self, event):
|
||||
self._print(event)
|
||||
self._print_processes()
|
||||
logging.info(event)
|
||||
self._log_processes()
|
||||
|
||||
|
||||
class CIProgressIndicator(VerboseProgressIndicator):
|
||||
|
@ -2,6 +2,7 @@
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
|
||||
import logging
|
||||
import signal
|
||||
|
||||
from . import base
|
||||
@ -16,12 +17,12 @@ class SignalProc(base.TestProcObserver):
|
||||
signal.signal(signal.SIGTERM, self._on_sigterm)
|
||||
|
||||
def _on_ctrlc(self, _signum, _stack_frame):
|
||||
print('>>> Ctrl-C detected, early abort...')
|
||||
logging.warning('Ctrl-C detected, early abort...')
|
||||
self.exit_code = utils.EXIT_CODE_INTERRUPTED
|
||||
self.stop()
|
||||
|
||||
def _on_sigterm(self, _signum, _stack_frame):
|
||||
print('>>> SIGTERM received, early abort...')
|
||||
logging.warning('SIGTERM received, early abort...')
|
||||
self.exit_code = utils.EXIT_CODE_TERMINATED
|
||||
self.stop()
|
||||
|
||||
|
@ -2,6 +2,7 @@
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
|
||||
import logging
|
||||
import time
|
||||
|
||||
from . import base
|
||||
@ -31,5 +32,5 @@ class TimeoutProc(base.TestProcObserver):
|
||||
def __on_event(self):
|
||||
if not self.is_stopped:
|
||||
if time.time() - self._start > self._duration_sec:
|
||||
print('>>> Total timeout reached.')
|
||||
logging.info('Total timeout reached.')
|
||||
self.stop()
|
||||
|
@ -4,6 +4,7 @@
|
||||
# found in the LICENSE file.
|
||||
|
||||
import heapq
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import signal
|
||||
@ -29,10 +30,11 @@ def list_processes_linux():
|
||||
]
|
||||
# Filter strange process with name as out dir.
|
||||
return [p for p in processes if p[1] != OUT_DIR]
|
||||
except Exception as e:
|
||||
# TODO(https://crbug.com/v8/13101): Remove after investigation.
|
||||
print('Fetching process list failed.')
|
||||
print(e)
|
||||
except subprocess.CalledProcessError as e:
|
||||
# Return code 1 means no processes found.
|
||||
if e.returncode != 1:
|
||||
# TODO(https://crbug.com/v8/13101): Remove after investigation.
|
||||
logging.exception('Fetching process list failed.')
|
||||
return []
|
||||
|
||||
|
||||
@ -45,10 +47,10 @@ def kill_processes_linux():
|
||||
return
|
||||
for pid, cmd in list_processes_linux():
|
||||
try:
|
||||
print('Attempting to kill %d - %s' % (pid, cmd))
|
||||
logging.warning('Attempting to kill %d - %s', pid, cmd)
|
||||
os.kill(pid, signal.SIGKILL)
|
||||
except:
|
||||
pass
|
||||
logging.exception('Failed to kill process')
|
||||
|
||||
|
||||
class FixedSizeTopList():
|
||||
|
Loading…
Reference in New Issue
Block a user