[IMP] runbot: add docker state

docker_is_running is ambiguous since we dont know if it was started once.
This new feature tries to add tools to know if a docker was started or not.

The main reason of this is that sometimes docker_run may take more than 15 seconds
creating unpredictable errors on build when the second step is launched and
the previous one is still running. Hopefully this fix will help to solve this
issue and detect late docker run.
This commit is contained in:
Xavier-Do 2019-12-05 13:47:38 +01:00
parent 83651c408e
commit a3abdf8505
3 changed files with 27 additions and 11 deletions

View File

@ -128,7 +128,8 @@ def docker_run(run_cmd, log_path, build_dir, container_name, exposed_ports=None,
cmd_object = Command([], run_cmd.split(' '), [])
_logger.debug('Docker run command: %s', run_cmd)
logs = open(log_path, 'w')
run_cmd = 'cd /data/build && %s' % run_cmd
run_cmd = 'cd /data/build;touch start-%s;%s;cd /data/build;touch end-%s' % (container_name, run_cmd, container_name)
docker_clear_state(container_name, build_dir) # ensure that no state are remaining
logs.write("Docker command:\n%s\n=================================================\n" % cmd_object)
# create start script
docker_command = [
@ -173,10 +174,21 @@ def docker_stop(container_name):
dstop = subprocess.run(['docker', 'stop', container_name])
def docker_is_running(container_name):
"""Return True if container is still running"""
dinspect = subprocess.run(['docker', 'container', 'inspect', container_name], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL)
return True if dinspect.returncode == 0 else False
def docker_state(container_name, build_dir):
started = os.path.exists(os.path.join(build_dir, 'start-%s' % container_name))
ended = os.path.exists(os.path.join(build_dir, 'end-%s' % container_name))
return 'END' if ended else 'RUNNING' if started else 'UNKNOWN'
def docker_clear_state(container_name, build_dir):
"""Return True if container is still running"""
if os.path.exists(os.path.join(build_dir, 'start-%s' % container_name)):
os.remove(os.path.join(build_dir, 'start-%s' % container_name))
if os.path.exists(os.path.join(build_dir, 'end-%s' % container_name)):
os.remove(os.path.join(build_dir, 'end-%s' % container_name))
def docker_get_gateway_ip():
"""Return the host ip of the docker default bridge gateway"""
docker_net_inspect = subprocess.run(['docker', 'network', 'inspect', 'bridge'], stdout=subprocess.PIPE)

View File

@ -9,7 +9,7 @@ import subprocess
import time
import datetime
from ..common import dt2time, fqdn, now, grep, uniq_list, local_pgadmin_cursor, s2human, Commit, dest_reg, os
from ..container import docker_build, docker_stop, docker_is_running, Command
from ..container import docker_build, docker_stop, docker_state, Command
from odoo.addons.runbot.models.repo import RunbotException
from odoo import models, fields, api
from odoo.exceptions import UserError, ValidationError
@ -587,7 +587,7 @@ class runbot_build(models.Model):
continue
if build.requested_action == 'wake_up':
if docker_is_running(build._get_docker_name()):
if docker_state(build._get_docker_name(), build._path()) == 'RUNNING':
build.write({'requested_action': False, 'local_state': 'running'})
build._log('wake_up', 'Waking up failed, docker is already running', level='SEPARATOR')
elif not os.path.exists(build._path()):
@ -651,15 +651,19 @@ class runbot_build(models.Model):
build.local_result = build.triggered_result
build._github_status() # failfast
# check if current job is finished
if docker_is_running(build._get_docker_name()):
_docker_state = docker_state(build._get_docker_name(), build._path())
if _docker_state == 'RUNNING':
timeout = min(build.active_step.cpu_limit, int(icp.get_param('runbot.runbot_timeout', default=10000)))
if build.local_state != 'running' and build.job_time > timeout:
build._log('_schedule', '%s time exceeded (%ss)' % (build.active_step.name if build.active_step else "?", build.job_time))
build._kill(result='killed')
continue
elif build.active_step._is_docker_step() and build.job_time < 15:
_logger.debug('container "%s" seems too take a while to start', build._get_docker_name())
continue
elif _docker_state == 'UNKNOWN' and build.active_step._is_docker_step():
if build.job_time < 60:
_logger.debug('container "%s" seems too take a while to start', build._get_docker_name())
continue
else:
build._log('_schedule', 'Docker not started after 60 seconds, skipping', level='ERROR')
# No job running, make result and select nex job
build_values = {
'job_end': now(),

View File

@ -22,8 +22,8 @@ class TestSchedule(RunbotCase):
})
@patch('odoo.addons.runbot.models.build.os.path.getmtime')
@patch('odoo.addons.runbot.models.build.docker_is_running')
def test_schedule_mark_done(self, mock_running, mock_getmtime):
@patch('odoo.addons.runbot.models.build.docker_state')
def test_schedule_mark_done(self, mock_docker_state, mock_getmtime):
""" Test that results are set even when job_30_run is skipped """
job_end_time = datetime.datetime.now()
mock_getmtime.return_value = job_end_time.timestamp()
@ -41,7 +41,7 @@ class TestSchedule(RunbotCase):
domain = [('repo_id', 'in', (self.repo.id, ))]
domain_host = domain + [('host', '=', 'runbotxx')]
build_ids = self.Build.search(domain_host + [('local_state', 'in', ['testing', 'running'])])
mock_running.return_value = False
mock_docker_state.return_value = 'RUNNING'
self.assertEqual(build.local_state, 'testing')
build_ids._schedule() # too fast, docker not started
self.assertEqual(build.local_state, 'testing')