diff --git a/runbot/container.py b/runbot/container.py index e18a45a5..e58779ce 100644 --- a/runbot/container.py +++ b/runbot/container.py @@ -128,7 +128,8 @@ def docker_run(run_cmd, log_path, build_dir, container_name, exposed_ports=None, cmd_object = Command([], run_cmd.split(' '), []) _logger.debug('Docker run command: %s', run_cmd) logs = open(log_path, 'w') - run_cmd = 'cd /data/build && %s' % run_cmd + run_cmd = 'cd /data/build;touch start-%s;%s;cd /data/build;touch end-%s' % (container_name, run_cmd, container_name) + docker_clear_state(container_name, build_dir) # ensure that no state are remaining logs.write("Docker command:\n%s\n=================================================\n" % cmd_object) # create start script docker_command = [ @@ -173,10 +174,21 @@ def docker_stop(container_name): dstop = subprocess.run(['docker', 'stop', container_name]) def docker_is_running(container_name): - """Return True if container is still running""" dinspect = subprocess.run(['docker', 'container', 'inspect', container_name], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL) return True if dinspect.returncode == 0 else False +def docker_state(container_name, build_dir): + started = os.path.exists(os.path.join(build_dir, 'start-%s' % container_name)) + ended = os.path.exists(os.path.join(build_dir, 'end-%s' % container_name)) + return 'END' if ended else 'RUNNING' if started else 'UNKNOWN' + +def docker_clear_state(container_name, build_dir): + """Return True if container is still running""" + if os.path.exists(os.path.join(build_dir, 'start-%s' % container_name)): + os.remove(os.path.join(build_dir, 'start-%s' % container_name)) + if os.path.exists(os.path.join(build_dir, 'end-%s' % container_name)): + os.remove(os.path.join(build_dir, 'end-%s' % container_name)) + def docker_get_gateway_ip(): """Return the host ip of the docker default bridge gateway""" docker_net_inspect = subprocess.run(['docker', 'network', 'inspect', 'bridge'], stdout=subprocess.PIPE) diff --git a/runbot/models/build.py b/runbot/models/build.py index d593de38..b3e1b14d 100644 --- a/runbot/models/build.py +++ b/runbot/models/build.py @@ -9,7 +9,7 @@ import subprocess import time import datetime from ..common import dt2time, fqdn, now, grep, uniq_list, local_pgadmin_cursor, s2human, Commit, dest_reg, os -from ..container import docker_build, docker_stop, docker_is_running, Command +from ..container import docker_build, docker_stop, docker_state, Command from odoo.addons.runbot.models.repo import RunbotException from odoo import models, fields, api from odoo.exceptions import UserError, ValidationError @@ -587,7 +587,7 @@ class runbot_build(models.Model): continue if build.requested_action == 'wake_up': - if docker_is_running(build._get_docker_name()): + if docker_state(build._get_docker_name(), build._path()) == 'RUNNING': build.write({'requested_action': False, 'local_state': 'running'}) build._log('wake_up', 'Waking up failed, docker is already running', level='SEPARATOR') elif not os.path.exists(build._path()): @@ -651,15 +651,19 @@ class runbot_build(models.Model): build.local_result = build.triggered_result build._github_status() # failfast # check if current job is finished - if docker_is_running(build._get_docker_name()): + _docker_state = docker_state(build._get_docker_name(), build._path()) + if _docker_state == 'RUNNING': timeout = min(build.active_step.cpu_limit, int(icp.get_param('runbot.runbot_timeout', default=10000))) if build.local_state != 'running' and build.job_time > timeout: build._log('_schedule', '%s time exceeded (%ss)' % (build.active_step.name if build.active_step else "?", build.job_time)) build._kill(result='killed') continue - elif build.active_step._is_docker_step() and build.job_time < 15: - _logger.debug('container "%s" seems too take a while to start', build._get_docker_name()) - continue + elif _docker_state == 'UNKNOWN' and build.active_step._is_docker_step(): + if build.job_time < 60: + _logger.debug('container "%s" seems too take a while to start', build._get_docker_name()) + continue + else: + build._log('_schedule', 'Docker not started after 60 seconds, skipping', level='ERROR') # No job running, make result and select nex job build_values = { 'job_end': now(), diff --git a/runbot/tests/test_schedule.py b/runbot/tests/test_schedule.py index 194a5d4f..f08b3642 100644 --- a/runbot/tests/test_schedule.py +++ b/runbot/tests/test_schedule.py @@ -22,8 +22,8 @@ class TestSchedule(RunbotCase): }) @patch('odoo.addons.runbot.models.build.os.path.getmtime') - @patch('odoo.addons.runbot.models.build.docker_is_running') - def test_schedule_mark_done(self, mock_running, mock_getmtime): + @patch('odoo.addons.runbot.models.build.docker_state') + def test_schedule_mark_done(self, mock_docker_state, mock_getmtime): """ Test that results are set even when job_30_run is skipped """ job_end_time = datetime.datetime.now() mock_getmtime.return_value = job_end_time.timestamp() @@ -41,7 +41,7 @@ class TestSchedule(RunbotCase): domain = [('repo_id', 'in', (self.repo.id, ))] domain_host = domain + [('host', '=', 'runbotxx')] build_ids = self.Build.search(domain_host + [('local_state', 'in', ['testing', 'running'])]) - mock_running.return_value = False + mock_docker_state.return_value = 'RUNNING' self.assertEqual(build.local_state, 'testing') build_ids._schedule() # too fast, docker not started self.assertEqual(build.local_state, 'testing')