mirror of
https://github.com/odoo/runbot.git
synced 2025-03-27 13:25:47 +07:00
[IMP] runbot: add docker state
docker_is_running is ambiguous since we dont know if it was started once. This new feature tries to add tools to know if a docker was started or not. The main reason of this is that sometimes docker_run may take more than 15 seconds creating unpredictable errors on build when the second step is launched and the previous one is still running. Hopefully this fix will help to solve this issue and detect late docker run.
This commit is contained in:
parent
83651c408e
commit
a3abdf8505
@ -128,7 +128,8 @@ def docker_run(run_cmd, log_path, build_dir, container_name, exposed_ports=None,
|
|||||||
cmd_object = Command([], run_cmd.split(' '), [])
|
cmd_object = Command([], run_cmd.split(' '), [])
|
||||||
_logger.debug('Docker run command: %s', run_cmd)
|
_logger.debug('Docker run command: %s', run_cmd)
|
||||||
logs = open(log_path, 'w')
|
logs = open(log_path, 'w')
|
||||||
run_cmd = 'cd /data/build && %s' % run_cmd
|
run_cmd = 'cd /data/build;touch start-%s;%s;cd /data/build;touch end-%s' % (container_name, run_cmd, container_name)
|
||||||
|
docker_clear_state(container_name, build_dir) # ensure that no state are remaining
|
||||||
logs.write("Docker command:\n%s\n=================================================\n" % cmd_object)
|
logs.write("Docker command:\n%s\n=================================================\n" % cmd_object)
|
||||||
# create start script
|
# create start script
|
||||||
docker_command = [
|
docker_command = [
|
||||||
@ -173,10 +174,21 @@ def docker_stop(container_name):
|
|||||||
dstop = subprocess.run(['docker', 'stop', container_name])
|
dstop = subprocess.run(['docker', 'stop', container_name])
|
||||||
|
|
||||||
def docker_is_running(container_name):
|
def docker_is_running(container_name):
|
||||||
"""Return True if container is still running"""
|
|
||||||
dinspect = subprocess.run(['docker', 'container', 'inspect', container_name], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL)
|
dinspect = subprocess.run(['docker', 'container', 'inspect', container_name], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL)
|
||||||
return True if dinspect.returncode == 0 else False
|
return True if dinspect.returncode == 0 else False
|
||||||
|
|
||||||
|
def docker_state(container_name, build_dir):
|
||||||
|
started = os.path.exists(os.path.join(build_dir, 'start-%s' % container_name))
|
||||||
|
ended = os.path.exists(os.path.join(build_dir, 'end-%s' % container_name))
|
||||||
|
return 'END' if ended else 'RUNNING' if started else 'UNKNOWN'
|
||||||
|
|
||||||
|
def docker_clear_state(container_name, build_dir):
|
||||||
|
"""Return True if container is still running"""
|
||||||
|
if os.path.exists(os.path.join(build_dir, 'start-%s' % container_name)):
|
||||||
|
os.remove(os.path.join(build_dir, 'start-%s' % container_name))
|
||||||
|
if os.path.exists(os.path.join(build_dir, 'end-%s' % container_name)):
|
||||||
|
os.remove(os.path.join(build_dir, 'end-%s' % container_name))
|
||||||
|
|
||||||
def docker_get_gateway_ip():
|
def docker_get_gateway_ip():
|
||||||
"""Return the host ip of the docker default bridge gateway"""
|
"""Return the host ip of the docker default bridge gateway"""
|
||||||
docker_net_inspect = subprocess.run(['docker', 'network', 'inspect', 'bridge'], stdout=subprocess.PIPE)
|
docker_net_inspect = subprocess.run(['docker', 'network', 'inspect', 'bridge'], stdout=subprocess.PIPE)
|
||||||
|
@ -9,7 +9,7 @@ import subprocess
|
|||||||
import time
|
import time
|
||||||
import datetime
|
import datetime
|
||||||
from ..common import dt2time, fqdn, now, grep, uniq_list, local_pgadmin_cursor, s2human, Commit, dest_reg, os
|
from ..common import dt2time, fqdn, now, grep, uniq_list, local_pgadmin_cursor, s2human, Commit, dest_reg, os
|
||||||
from ..container import docker_build, docker_stop, docker_is_running, Command
|
from ..container import docker_build, docker_stop, docker_state, Command
|
||||||
from odoo.addons.runbot.models.repo import RunbotException
|
from odoo.addons.runbot.models.repo import RunbotException
|
||||||
from odoo import models, fields, api
|
from odoo import models, fields, api
|
||||||
from odoo.exceptions import UserError, ValidationError
|
from odoo.exceptions import UserError, ValidationError
|
||||||
@ -587,7 +587,7 @@ class runbot_build(models.Model):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
if build.requested_action == 'wake_up':
|
if build.requested_action == 'wake_up':
|
||||||
if docker_is_running(build._get_docker_name()):
|
if docker_state(build._get_docker_name(), build._path()) == 'RUNNING':
|
||||||
build.write({'requested_action': False, 'local_state': 'running'})
|
build.write({'requested_action': False, 'local_state': 'running'})
|
||||||
build._log('wake_up', 'Waking up failed, docker is already running', level='SEPARATOR')
|
build._log('wake_up', 'Waking up failed, docker is already running', level='SEPARATOR')
|
||||||
elif not os.path.exists(build._path()):
|
elif not os.path.exists(build._path()):
|
||||||
@ -651,15 +651,19 @@ class runbot_build(models.Model):
|
|||||||
build.local_result = build.triggered_result
|
build.local_result = build.triggered_result
|
||||||
build._github_status() # failfast
|
build._github_status() # failfast
|
||||||
# check if current job is finished
|
# check if current job is finished
|
||||||
if docker_is_running(build._get_docker_name()):
|
_docker_state = docker_state(build._get_docker_name(), build._path())
|
||||||
|
if _docker_state == 'RUNNING':
|
||||||
timeout = min(build.active_step.cpu_limit, int(icp.get_param('runbot.runbot_timeout', default=10000)))
|
timeout = min(build.active_step.cpu_limit, int(icp.get_param('runbot.runbot_timeout', default=10000)))
|
||||||
if build.local_state != 'running' and build.job_time > timeout:
|
if build.local_state != 'running' and build.job_time > timeout:
|
||||||
build._log('_schedule', '%s time exceeded (%ss)' % (build.active_step.name if build.active_step else "?", build.job_time))
|
build._log('_schedule', '%s time exceeded (%ss)' % (build.active_step.name if build.active_step else "?", build.job_time))
|
||||||
build._kill(result='killed')
|
build._kill(result='killed')
|
||||||
continue
|
continue
|
||||||
elif build.active_step._is_docker_step() and build.job_time < 15:
|
elif _docker_state == 'UNKNOWN' and build.active_step._is_docker_step():
|
||||||
_logger.debug('container "%s" seems too take a while to start', build._get_docker_name())
|
if build.job_time < 60:
|
||||||
continue
|
_logger.debug('container "%s" seems too take a while to start', build._get_docker_name())
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
build._log('_schedule', 'Docker not started after 60 seconds, skipping', level='ERROR')
|
||||||
# No job running, make result and select nex job
|
# No job running, make result and select nex job
|
||||||
build_values = {
|
build_values = {
|
||||||
'job_end': now(),
|
'job_end': now(),
|
||||||
|
@ -22,8 +22,8 @@ class TestSchedule(RunbotCase):
|
|||||||
})
|
})
|
||||||
|
|
||||||
@patch('odoo.addons.runbot.models.build.os.path.getmtime')
|
@patch('odoo.addons.runbot.models.build.os.path.getmtime')
|
||||||
@patch('odoo.addons.runbot.models.build.docker_is_running')
|
@patch('odoo.addons.runbot.models.build.docker_state')
|
||||||
def test_schedule_mark_done(self, mock_running, mock_getmtime):
|
def test_schedule_mark_done(self, mock_docker_state, mock_getmtime):
|
||||||
""" Test that results are set even when job_30_run is skipped """
|
""" Test that results are set even when job_30_run is skipped """
|
||||||
job_end_time = datetime.datetime.now()
|
job_end_time = datetime.datetime.now()
|
||||||
mock_getmtime.return_value = job_end_time.timestamp()
|
mock_getmtime.return_value = job_end_time.timestamp()
|
||||||
@ -41,7 +41,7 @@ class TestSchedule(RunbotCase):
|
|||||||
domain = [('repo_id', 'in', (self.repo.id, ))]
|
domain = [('repo_id', 'in', (self.repo.id, ))]
|
||||||
domain_host = domain + [('host', '=', 'runbotxx')]
|
domain_host = domain + [('host', '=', 'runbotxx')]
|
||||||
build_ids = self.Build.search(domain_host + [('local_state', 'in', ['testing', 'running'])])
|
build_ids = self.Build.search(domain_host + [('local_state', 'in', ['testing', 'running'])])
|
||||||
mock_running.return_value = False
|
mock_docker_state.return_value = 'RUNNING'
|
||||||
self.assertEqual(build.local_state, 'testing')
|
self.assertEqual(build.local_state, 'testing')
|
||||||
build_ids._schedule() # too fast, docker not started
|
build_ids._schedule() # too fast, docker not started
|
||||||
self.assertEqual(build.local_state, 'testing')
|
self.assertEqual(build.local_state, 'testing')
|
||||||
|
Loading…
Reference in New Issue
Block a user