[FIX] runbot: handle externally killed docker.

If a docker is killed from outside, the start file will still be there but
not the end file. (when restarting docker service for instance)
This commits add a docker_state specific to non running docker
with start a file, that should be handled like unknow state:
This state is acceptable for a while, but build should be killed
if this state remains for to long.
This commit is contained in:
Xavier-Do 2020-01-16 14:25:02 +01:00 committed by Christophe Monniez
parent d75c4f085f
commit 2274f7ab8b
2 changed files with 13 additions and 4 deletions

View File

@ -185,7 +185,16 @@ def docker_is_running(container_name):
def docker_state(container_name, build_dir):
started = os.path.exists(os.path.join(build_dir, 'start-%s' % container_name))
ended = os.path.exists(os.path.join(build_dir, 'end-%s' % container_name))
return 'END' if ended else 'RUNNING' if started else 'UNKNOWN'
if ended:
return 'END'
if started:
if docker_is_running(container_name):
return 'RUNNING'
else:
return 'GHOST'
return 'UNKNOWN'
def docker_clear_state(container_name, build_dir):
"""Return True if container is still running"""

View File

@ -666,14 +666,14 @@ class runbot_build(models.Model):
build._log('_schedule', '%s time exceeded (%ss)' % (build.active_step.name if build.active_step else "?", build.job_time))
build._kill(result='killed')
continue
elif _docker_state == 'UNKNOWN' and (build.local_state == 'running' or build.active_step._is_docker_step()):
elif _docker_state in ('UNKNOWN', 'GHOST') and (build.local_state == 'running' or build.active_step._is_docker_step()):
if build.job_time < 5:
continue
elif build.job_time < 60:
_logger.debug('container "%s" seems too take a while to start', build._get_docker_name())
_logger.debug('container "%s" seems too take a while to start :%s' % (build.job_time, build._get_docker_name()))
continue
else:
build._log('_schedule', 'Docker not started after 60 seconds, skipping', level='ERROR')
build._log('_schedule', 'Docker with state %s not started after 60 seconds, skipping' % _docker_state, level='ERROR')
# No job running, make result and select nex job
build_values = {
'job_end': now(),