From 2274f7ab8b3c0fb09c0cf82fbc9d40f06115d4d9 Mon Sep 17 00:00:00 2001 From: Xavier-Do Date: Thu, 16 Jan 2020 14:25:02 +0100 Subject: [PATCH] [FIX] runbot: handle externally killed docker. If a docker is killed from outside, the start file will still be there but not the end file. (when restarting docker service for instance) This commits add a docker_state specific to non running docker with start a file, that should be handled like unknow state: This state is acceptable for a while, but build should be killed if this state remains for to long. --- runbot/container.py | 11 ++++++++++- runbot/models/build.py | 6 +++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/runbot/container.py b/runbot/container.py index 1b51fa50..bc01bcf5 100644 --- a/runbot/container.py +++ b/runbot/container.py @@ -185,7 +185,16 @@ def docker_is_running(container_name): def docker_state(container_name, build_dir): started = os.path.exists(os.path.join(build_dir, 'start-%s' % container_name)) ended = os.path.exists(os.path.join(build_dir, 'end-%s' % container_name)) - return 'END' if ended else 'RUNNING' if started else 'UNKNOWN' + if ended: + return 'END' + + if started: + if docker_is_running(container_name): + return 'RUNNING' + else: + return 'GHOST' + + return 'UNKNOWN' def docker_clear_state(container_name, build_dir): """Return True if container is still running""" diff --git a/runbot/models/build.py b/runbot/models/build.py index 40be4d06..63ada6ac 100644 --- a/runbot/models/build.py +++ b/runbot/models/build.py @@ -666,14 +666,14 @@ class runbot_build(models.Model): build._log('_schedule', '%s time exceeded (%ss)' % (build.active_step.name if build.active_step else "?", build.job_time)) build._kill(result='killed') continue - elif _docker_state == 'UNKNOWN' and (build.local_state == 'running' or build.active_step._is_docker_step()): + elif _docker_state in ('UNKNOWN', 'GHOST') and (build.local_state == 'running' or build.active_step._is_docker_step()): if build.job_time < 5: continue elif build.job_time < 60: - _logger.debug('container "%s" seems too take a while to start', build._get_docker_name()) + _logger.debug('container "%s" seems too take a while to start :%s' % (build.job_time, build._get_docker_name())) continue else: - build._log('_schedule', 'Docker not started after 60 seconds, skipping', level='ERROR') + build._log('_schedule', 'Docker with state %s not started after 60 seconds, skipping' % _docker_state, level='ERROR') # No job running, make result and select nex job build_values = { 'job_end': now(),