Reland "[test] Print hanging tests on linux on test-runner termination"
This is a reland of 3fc9663159
The private method on_event in timeout proc is now renamed to be truly
private.
Original change's description:
> [test] Print hanging tests on linux on test-runner termination
>
> This will print the list of processes still running before and after
> joining workers during termination. This will help debugging hanging
> tests during flake-bisect or with num-fuzzer, which both terminate
> on total timeout and currently still sometimes hang without printing
> processes.
>
> Bug: v8:8292
> Change-Id: I124b65fa35b8d7a6aa198fcf50f2c20df94dc51a
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1735312
> Reviewed-by: Tamer Tas <tmrts@chromium.org>
> Commit-Queue: Michael Achenbach <machenbach@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#63065}
Bug: v8:8292
Change-Id: Ibad1172666d6f4d2c07884a54edfe9d6499b57fe
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1735318
Reviewed-by: Tamer Tas <tmrts@chromium.org>
Commit-Queue: Michael Achenbach <machenbach@chromium.org>
Cr-Commit-Position: refs/heads/master@{#63072}
This commit is contained in:
parent
8c3da74f18
commit
cdfadf4a99
@ -115,7 +115,15 @@ class Pool():
|
||||
# Necessary to not overflow the queue's pipe if a keyboard interrupt happens.
|
||||
BUFFER_FACTOR = 4
|
||||
|
||||
def __init__(self, num_workers, heartbeat_timeout=1):
|
||||
def __init__(self, num_workers, heartbeat_timeout=1, notify_fun=None):
|
||||
"""
|
||||
Args:
|
||||
num_workers: Number of worker processes to run in parallel.
|
||||
heartbeat_timeout: Timeout in seconds for waiting for results. Each time
|
||||
the timeout is reached, a heartbeat is signalled and timeout is reset.
|
||||
notify_fun: Callable called to signale some events like termination. The
|
||||
event name is passed as string.
|
||||
"""
|
||||
self.num_workers = num_workers
|
||||
self.processes = []
|
||||
self.terminated = False
|
||||
@ -130,6 +138,7 @@ class Pool():
|
||||
# work_queue.
|
||||
self.processing_count = 0
|
||||
self.heartbeat_timeout = heartbeat_timeout
|
||||
self.notify = notify_fun or (lambda x: x)
|
||||
|
||||
# Disable sigint and sigterm to prevent subprocesses from capturing the
|
||||
# signals.
|
||||
@ -261,11 +270,13 @@ class Pool():
|
||||
for p in self.processes:
|
||||
os.kill(p.pid, signal.SIGTERM)
|
||||
|
||||
self.notify("Joining workers")
|
||||
for p in self.processes:
|
||||
p.join()
|
||||
|
||||
# Drain the queues to prevent stderr chatter when queues are garbage
|
||||
# collected.
|
||||
self.notify("Draining queues")
|
||||
try:
|
||||
while True: self.work_queue.get(False)
|
||||
except:
|
||||
|
@ -109,6 +109,19 @@ class TestProc(object):
|
||||
|
||||
### Communication
|
||||
|
||||
def notify_previous(self, event):
|
||||
self._on_event(event)
|
||||
if self._prev_proc:
|
||||
self._prev_proc.notify_previous(event)
|
||||
|
||||
def _on_event(self, event):
|
||||
"""Called when processors to the right signal events, e.g. termination.
|
||||
|
||||
Args:
|
||||
event: A text describing the signalled event.
|
||||
"""
|
||||
pass
|
||||
|
||||
def _send_test(self, test):
|
||||
"""Helper method for sending test to the next processor."""
|
||||
return self._next_proc.next_test(test)
|
||||
@ -120,7 +133,6 @@ class TestProc(object):
|
||||
self._prev_proc.result_for(test, result)
|
||||
|
||||
|
||||
|
||||
class TestProcObserver(TestProc):
|
||||
"""Processor used for observing the data."""
|
||||
def __init__(self):
|
||||
|
@ -45,7 +45,7 @@ class ExecutionProc(base.TestProc):
|
||||
|
||||
def __init__(self, jobs, outproc_factory=None):
|
||||
super(ExecutionProc, self).__init__()
|
||||
self._pool = pool.Pool(jobs)
|
||||
self._pool = pool.Pool(jobs, notify_fun=self.notify_previous)
|
||||
self._outproc_factory = outproc_factory or (lambda t: t.output_proc)
|
||||
self._tests = {}
|
||||
|
||||
|
@ -149,6 +149,10 @@ class VerboseProgressIndicator(SimpleProgressIndicator):
|
||||
self._print('Still working...')
|
||||
self._print_processes_linux()
|
||||
|
||||
def _on_event(self, event):
|
||||
self._print(event)
|
||||
self._print_processes_linux()
|
||||
|
||||
|
||||
class DotsProgressIndicator(SimpleProgressIndicator):
|
||||
def __init__(self):
|
||||
|
@ -14,15 +14,15 @@ class TimeoutProc(base.TestProcObserver):
|
||||
self._start = time.time()
|
||||
|
||||
def _on_next_test(self, test):
|
||||
self._on_event()
|
||||
self.__on_event()
|
||||
|
||||
def _on_result_for(self, test, result):
|
||||
self._on_event()
|
||||
self.__on_event()
|
||||
|
||||
def _on_heartbeat(self):
|
||||
self._on_event()
|
||||
self.__on_event()
|
||||
|
||||
def _on_event(self):
|
||||
def __on_event(self):
|
||||
if not self.is_stopped:
|
||||
if time.time() - self._start > self._duration_sec:
|
||||
print('>>> Total timeout reached.')
|
||||
|
Loading…
Reference in New Issue
Block a user