mirror of
https://github.com/odoo/runbot.git
synced 2025-03-27 13:25:47 +07:00
[REF] runbot: refactor sheduler
This commit is contained in:
parent
ae5f2906bf
commit
426b7af2cb
@ -166,12 +166,13 @@ def docker_run(run_cmd, log_path, build_dir, container_name, exposed_ports=None,
|
|||||||
docker_command.extend(['odoo:runbot_tests', '/bin/bash', '-c', "%s" % run_cmd])
|
docker_command.extend(['odoo:runbot_tests', '/bin/bash', '-c', "%s" % run_cmd])
|
||||||
docker_run = subprocess.Popen(docker_command, stdout=logs, stderr=logs, preexec_fn=preexec_fn, close_fds=False, cwd=build_dir)
|
docker_run = subprocess.Popen(docker_command, stdout=logs, stderr=logs, preexec_fn=preexec_fn, close_fds=False, cwd=build_dir)
|
||||||
_logger.info('Started Docker container %s', container_name)
|
_logger.info('Started Docker container %s', container_name)
|
||||||
return docker_run.pid
|
return
|
||||||
|
|
||||||
def docker_stop(container_name):
|
def docker_stop(container_name):
|
||||||
"""Stops the container named container_name"""
|
"""Stops the container named container_name"""
|
||||||
_logger.info('Stopping container %s', container_name)
|
_logger.info('Stopping container %s', container_name)
|
||||||
dstop = subprocess.run(['docker', 'stop', container_name])
|
dstop = subprocess.run(['docker', 'stop', container_name])
|
||||||
|
# todo delete os.path.join(build_dir, 'end-%s' % container_name)
|
||||||
|
|
||||||
def docker_is_running(container_name):
|
def docker_is_running(container_name):
|
||||||
dinspect = subprocess.run(['docker', 'container', 'inspect', container_name], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL)
|
dinspect = subprocess.run(['docker', 'container', 'inspect', container_name], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL)
|
||||||
|
@ -67,7 +67,6 @@ class runbot_build(models.Model):
|
|||||||
nb_running = fields.Integer("Number of test slot use", default=0)
|
nb_running = fields.Integer("Number of test slot use", default=0)
|
||||||
|
|
||||||
# should we add a stored field for children results?
|
# should we add a stored field for children results?
|
||||||
pid = fields.Integer('Pid')
|
|
||||||
active_step = fields.Many2one('runbot.build.config.step', 'Active step')
|
active_step = fields.Many2one('runbot.build.config.step', 'Active step')
|
||||||
job = fields.Char('Active step display name', compute='_compute_job')
|
job = fields.Char('Active step display name', compute='_compute_job')
|
||||||
job_start = fields.Datetime('Job start')
|
job_start = fields.Datetime('Job start')
|
||||||
@ -573,14 +572,40 @@ class runbot_build(models.Model):
|
|||||||
self.ensure_one()
|
self.ensure_one()
|
||||||
return '%s_%s' % (self.dest, self.active_step.name)
|
return '%s_%s' % (self.dest, self.active_step.name)
|
||||||
|
|
||||||
def _schedule(self):
|
def _init_pendings(self, host):
|
||||||
"""schedule the build"""
|
for build in self:
|
||||||
icp = self.env['ir.config_parameter']
|
if build.local_state != 'pending':
|
||||||
# For retro-compatibility, keep this parameter in seconds
|
raise UserError("Build %s is not pending" % build.id)
|
||||||
|
if build.host != host.name:
|
||||||
|
raise UserError("Build %s does not have correct host" % build.id)
|
||||||
|
# allocate port and schedule first job
|
||||||
|
values = {
|
||||||
|
'port': self._find_port(),
|
||||||
|
'job_start': now(),
|
||||||
|
'build_start': now(),
|
||||||
|
'job_end': False,
|
||||||
|
}
|
||||||
|
values.update(build._next_job_values())
|
||||||
|
build.write(values)
|
||||||
|
if not build.active_step:
|
||||||
|
build._log('_schedule', 'No job in config, doing nothing')
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
build._log('_schedule', 'Init build environment with config %s ' % build.config_id.name)
|
||||||
|
# notify pending build - avoid confusing users by saying nothing
|
||||||
|
build._github_status()
|
||||||
|
os.makedirs(build._path('logs'), exist_ok=True)
|
||||||
|
build._log('_schedule', 'Building docker image')
|
||||||
|
docker_build(build._path('logs', 'docker_build.txt'), build._path())
|
||||||
|
except Exception:
|
||||||
|
_logger.exception('Failed initiating build %s', build.dest)
|
||||||
|
build._log('_schedule', 'Failed initiating build')
|
||||||
|
build._kill(result='ko')
|
||||||
|
continue
|
||||||
|
build._run_job()
|
||||||
|
|
||||||
|
def _process_requested_actions(self):
|
||||||
for build in self:
|
for build in self:
|
||||||
self.env.cr.commit() # commit between each build to minimise transactionnal errors due to state computations
|
|
||||||
self.invalidate_cache()
|
|
||||||
if build.requested_action == 'deathrow':
|
if build.requested_action == 'deathrow':
|
||||||
result = None
|
result = None
|
||||||
if build.local_state != 'running' and build.global_result not in ('warn', 'ko'):
|
if build.local_state != 'running' and build.global_result not in ('warn', 'ko'):
|
||||||
@ -617,97 +642,76 @@ class runbot_build(models.Model):
|
|||||||
build.write({'requested_action': False, 'local_state': 'done'})
|
build.write({'requested_action': False, 'local_state': 'done'})
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if build.local_state == 'pending':
|
def _schedule(self):
|
||||||
# allocate port and schedule first job
|
"""schedule the build"""
|
||||||
port = self._find_port()
|
icp = self.env['ir.config_parameter']
|
||||||
values = {
|
for build in self:
|
||||||
'host': fqdn(), # or ip? of false?
|
if build.local_state not in ['testing', 'running']:
|
||||||
'port': port,
|
raise UserError("Build %s is not testing/running: %s" % (build.id, build.local_state))
|
||||||
'job_start': now(),
|
if build.local_state == 'testing':
|
||||||
'build_start': now(),
|
# failfast in case of docker error (triggered in database)
|
||||||
'job_end': False,
|
if build.triggered_result and not build.active_step.ignore_triggered_result:
|
||||||
}
|
worst_result = self._get_worst_result([build.triggered_result, build.local_result])
|
||||||
values.update(build._next_job_values())
|
if worst_result != build.local_result:
|
||||||
build.write(values)
|
build.local_result = build.triggered_result
|
||||||
if not build.active_step:
|
build._github_status() # failfast
|
||||||
build._log('_schedule', 'No job in config, doing nothing')
|
# check if current job is finished
|
||||||
|
_docker_state = docker_state(build._get_docker_name(), build._path())
|
||||||
|
if _docker_state == 'RUNNING':
|
||||||
|
timeout = min(build.active_step.cpu_limit, int(icp.get_param('runbot.runbot_timeout', default=10000)))
|
||||||
|
if build.local_state != 'running' and build.job_time > timeout:
|
||||||
|
build._log('_schedule', '%s time exceeded (%ss)' % (build.active_step.name if build.active_step else "?", build.job_time))
|
||||||
|
build._kill(result='killed')
|
||||||
|
continue
|
||||||
|
elif _docker_state == 'UNKNOWN' and build.active_step._is_docker_step():
|
||||||
|
if build.job_time < 60:
|
||||||
|
_logger.debug('container "%s" seems too take a while to start', build._get_docker_name())
|
||||||
continue
|
continue
|
||||||
try:
|
else:
|
||||||
build._log('_schedule', 'Init build environment with config %s ' % build.config_id.name)
|
build._log('_schedule', 'Docker not started after 60 seconds, skipping', level='ERROR')
|
||||||
# notify pending build - avoid confusing users by saying nothing
|
# No job running, make result and select nex job
|
||||||
build._github_status()
|
build_values = {
|
||||||
os.makedirs(build._path('logs'), exist_ok=True)
|
'job_end': now(),
|
||||||
build._log('_schedule', 'Building docker image')
|
}
|
||||||
docker_build(build._path('logs', 'docker_build.txt'), build._path())
|
# make result of previous job
|
||||||
except Exception:
|
try:
|
||||||
_logger.exception('Failed initiating build %s', build.dest)
|
results = build.active_step._make_results(build)
|
||||||
build._log('_schedule', 'Failed initiating build')
|
except Exception as e:
|
||||||
build._kill(result='ko')
|
if isinstance(e, RunbotException):
|
||||||
continue
|
message = e.args[0]
|
||||||
else: # testing/running build
|
else:
|
||||||
if build.local_state == 'testing':
|
message = 'An error occured while computing results of %s:\n %s' % (build.job, str(e).replace('\\n', '\n').replace("\\'", "'"))
|
||||||
# failfast in case of docker error (triggered in database)
|
_logger.exception(message)
|
||||||
if build.triggered_result and not build.active_step.ignore_triggered_result:
|
build._log('_make_results', message, level='ERROR')
|
||||||
worst_result = self._get_worst_result([build.triggered_result, build.local_result])
|
results = {'local_result': 'ko'}
|
||||||
if worst_result != build.local_result:
|
|
||||||
build.local_result = build.triggered_result
|
|
||||||
build._github_status() # failfast
|
|
||||||
# check if current job is finished
|
|
||||||
_docker_state = docker_state(build._get_docker_name(), build._path())
|
|
||||||
if _docker_state == 'RUNNING':
|
|
||||||
timeout = min(build.active_step.cpu_limit, int(icp.get_param('runbot.runbot_timeout', default=10000)))
|
|
||||||
if build.local_state != 'running' and build.job_time > timeout:
|
|
||||||
build._log('_schedule', '%s time exceeded (%ss)' % (build.active_step.name if build.active_step else "?", build.job_time))
|
|
||||||
build._kill(result='killed')
|
|
||||||
continue
|
|
||||||
elif _docker_state == 'UNKNOWN' and build.active_step._is_docker_step():
|
|
||||||
if build.job_time < 60:
|
|
||||||
_logger.debug('container "%s" seems too take a while to start', build._get_docker_name())
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
build._log('_schedule', 'Docker not started after 60 seconds, skipping', level='ERROR')
|
|
||||||
# No job running, make result and select nex job
|
|
||||||
build_values = {
|
|
||||||
'job_end': now(),
|
|
||||||
}
|
|
||||||
# make result of previous job
|
|
||||||
try:
|
|
||||||
results = build.active_step._make_results(build)
|
|
||||||
except Exception as e:
|
|
||||||
if isinstance(e, RunbotException):
|
|
||||||
message = e.args[0]
|
|
||||||
else:
|
|
||||||
message = 'An error occured while computing results of %s:\n %s' % (build.job, str(e).replace('\\n', '\n').replace("\\'", "'"))
|
|
||||||
_logger.exception(message)
|
|
||||||
build._log('_make_results', message, level='ERROR')
|
|
||||||
results = {'local_result': 'ko'}
|
|
||||||
|
|
||||||
build_values.update(results)
|
build_values.update(results)
|
||||||
|
|
||||||
build.active_step.log_end(build)
|
build.active_step.log_end(build)
|
||||||
|
|
||||||
build_values.update(build._next_job_values()) # find next active_step or set to done
|
build_values.update(build._next_job_values()) # find next active_step or set to done
|
||||||
|
|
||||||
ending_build = build.local_state not in ('done', 'running') and build_values.get('local_state') in ('done', 'running')
|
ending_build = build.local_state not in ('done', 'running') and build_values.get('local_state') in ('done', 'running')
|
||||||
if ending_build:
|
if ending_build:
|
||||||
build.update_build_end()
|
build.update_build_end()
|
||||||
|
|
||||||
build.write(build_values)
|
build.write(build_values)
|
||||||
if ending_build:
|
if ending_build:
|
||||||
build._github_status()
|
build._github_status()
|
||||||
if not build.local_result: # Set 'ok' result if no result set (no tests job on build)
|
if not build.local_result: # Set 'ok' result if no result set (no tests job on build)
|
||||||
build.local_result = 'ok'
|
build.local_result = 'ok'
|
||||||
build._logger("No result set, setting ok by default")
|
build._logger("No result set, setting ok by default")
|
||||||
|
build._run_job()
|
||||||
|
|
||||||
# run job
|
def _run_job(self):
|
||||||
pid = None
|
# run job
|
||||||
|
for build in self:
|
||||||
if build.local_state != 'done':
|
if build.local_state != 'done':
|
||||||
build._logger('running %s', build.active_step.name)
|
build._logger('running %s', build.active_step.name)
|
||||||
os.makedirs(build._path('logs'), exist_ok=True)
|
os.makedirs(build._path('logs'), exist_ok=True)
|
||||||
os.makedirs(build._path('datadir'), exist_ok=True)
|
os.makedirs(build._path('datadir'), exist_ok=True)
|
||||||
try:
|
try:
|
||||||
pid = build.active_step._run(build) # run should be on build?
|
build.active_step._run(build) # run should be on build?
|
||||||
build.write({'pid': pid}) # no really usefull anymore with dockers
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if isinstance(e, RunbotException):
|
if isinstance(e, RunbotException):
|
||||||
message = e.args[0]
|
message = e.args[0]
|
||||||
@ -716,10 +720,6 @@ class runbot_build(models.Model):
|
|||||||
_logger.exception(message)
|
_logger.exception(message)
|
||||||
build._log("run", message, level='ERROR')
|
build._log("run", message, level='ERROR')
|
||||||
build._kill(result='ko')
|
build._kill(result='ko')
|
||||||
continue
|
|
||||||
|
|
||||||
self.env.cr.commit()
|
|
||||||
self.invalidate_cache()
|
|
||||||
|
|
||||||
def _path(self, *l, **kw):
|
def _path(self, *l, **kw):
|
||||||
"""Return the repo build path"""
|
"""Return the repo build path"""
|
||||||
@ -844,16 +844,6 @@ class runbot_build(models.Model):
|
|||||||
'line': '0',
|
'line': '0',
|
||||||
})
|
})
|
||||||
|
|
||||||
def _reap(self):
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
pid, status, rusage = os.wait3(os.WNOHANG)
|
|
||||||
except OSError:
|
|
||||||
break
|
|
||||||
if pid == 0:
|
|
||||||
break
|
|
||||||
_logger.debug('reaping: pid: %s status: %s', pid, status)
|
|
||||||
|
|
||||||
def _kill(self, result=None):
|
def _kill(self, result=None):
|
||||||
host = fqdn()
|
host = fqdn()
|
||||||
for build in self:
|
for build in self:
|
||||||
|
@ -52,6 +52,10 @@ class RunboHost(models.Model):
|
|||||||
icp = self.env['ir.config_parameter']
|
icp = self.env['ir.config_parameter']
|
||||||
return self.nb_worker or int(icp.sudo().get_param('runbot.runbot_workers', default=6))
|
return self.nb_worker or int(icp.sudo().get_param('runbot.runbot_workers', default=6))
|
||||||
|
|
||||||
|
def get_running_max(self):
|
||||||
|
icp = self.env['ir.config_parameter']
|
||||||
|
return int(icp.get_param('runbot.runbot_running_max', default=75))
|
||||||
|
|
||||||
def set_psql_conn_count(self):
|
def set_psql_conn_count(self):
|
||||||
self.ensure_one()
|
self.ensure_one()
|
||||||
with local_pgadmin_cursor() as local_cr:
|
with local_pgadmin_cursor() as local_cr:
|
||||||
|
@ -17,9 +17,11 @@ from odoo.tools.misc import DEFAULT_SERVER_DATETIME_FORMAT
|
|||||||
from odoo import models, fields, api, registry
|
from odoo import models, fields, api, registry
|
||||||
from odoo.modules.module import get_module_resource
|
from odoo.modules.module import get_module_resource
|
||||||
from odoo.tools import config
|
from odoo.tools import config
|
||||||
|
from odoo.osv import expression
|
||||||
from ..common import fqdn, dt2time, Commit, dest_reg, os
|
from ..common import fqdn, dt2time, Commit, dest_reg, os
|
||||||
from ..container import docker_ps, docker_stop
|
from ..container import docker_ps, docker_stop
|
||||||
from psycopg2.extensions import TransactionRollbackError
|
from psycopg2.extensions import TransactionRollbackError
|
||||||
|
|
||||||
_logger = logging.getLogger(__name__)
|
_logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
class RunbotException(Exception):
|
class RunbotException(Exception):
|
||||||
@ -456,97 +458,130 @@ class runbot_repo(models.Model):
|
|||||||
except Exception:
|
except Exception:
|
||||||
_logger.exception('Fail to update repo %s', repo.name)
|
_logger.exception('Fail to update repo %s', repo.name)
|
||||||
|
|
||||||
@api.multi
|
def _commit(self):
|
||||||
def _scheduler(self, host=None):
|
|
||||||
"""Schedule builds for the repository"""
|
|
||||||
ids = self.ids
|
|
||||||
if not ids:
|
|
||||||
return
|
|
||||||
icp = self.env['ir.config_parameter']
|
|
||||||
host = host or self.env['runbot.host']._get_current()
|
|
||||||
workers = host.get_nb_worker()
|
|
||||||
running_max = int(icp.get_param('runbot.runbot_running_max', default=75))
|
|
||||||
assigned_only = host.assigned_only
|
|
||||||
|
|
||||||
Build = self.env['runbot.build']
|
|
||||||
domain = [('repo_id', 'in', ids)]
|
|
||||||
domain_host = domain + [('host', '=', host.name)]
|
|
||||||
|
|
||||||
# schedule jobs (transitions testing -> running, kill jobs, ...)
|
|
||||||
build_ids = Build.search(domain_host + ['|', ('local_state', 'in', ['testing', 'running']), ('requested_action', 'in', ['wake_up', 'deathrow'])])
|
|
||||||
build_ids._schedule()
|
|
||||||
self.env.cr.commit()
|
self.env.cr.commit()
|
||||||
self.invalidate_cache()
|
self.invalidate_cache()
|
||||||
|
self.env.reset()
|
||||||
|
|
||||||
# launch new tests
|
@api.multi
|
||||||
|
def _scheduler(self, host):
|
||||||
|
nb_workers = host.get_nb_worker()
|
||||||
|
|
||||||
nb_testing = Build.search_count(domain_host + [('local_state', '=', 'testing')])
|
for build in self._get_builds_with_requested_actions(host):
|
||||||
available_slots = workers - nb_testing
|
build._process_requested_actions()
|
||||||
reserved_slots = Build.search_count(domain_host + [('local_state', '=', 'pending')])
|
self._commit()
|
||||||
assignable_slots = (available_slots - reserved_slots) if not assigned_only else 0
|
for build in self._get_builds_to_schedule(host):
|
||||||
if available_slots > 0:
|
build._schedule()
|
||||||
if assignable_slots > 0: # note: slots have been addapt to be able to force host on pending build. Normally there is no pending with host.
|
self._commit()
|
||||||
# commit transaction to reduce the critical section duration
|
self._assign_pending_builds(host, nb_workers, [('build_type', '!=', 'scheduled')])
|
||||||
def allocate_builds(where_clause, limit):
|
self._commit()
|
||||||
self.env.cr.commit()
|
self._assign_pending_builds(host, nb_workers-1 or nb_workers)
|
||||||
self.invalidate_cache()
|
self._commit()
|
||||||
# self-assign to be sure that another runbot instance cannot self assign the same builds
|
for build in self._get_builds_to_init(host):
|
||||||
query = """UPDATE
|
build._init_pendings(host)
|
||||||
runbot_build
|
self._commit()
|
||||||
SET
|
self._gc_running(host)
|
||||||
host = %%(host)s
|
self._commit()
|
||||||
WHERE
|
self._reload_nginx()
|
||||||
runbot_build.id IN (
|
|
||||||
SELECT runbot_build.id
|
|
||||||
FROM runbot_build
|
|
||||||
LEFT JOIN runbot_branch
|
|
||||||
ON runbot_branch.id = runbot_build.branch_id
|
|
||||||
WHERE
|
|
||||||
runbot_build.repo_id IN %%(repo_ids)s
|
|
||||||
AND runbot_build.local_state = 'pending'
|
|
||||||
AND runbot_build.host IS NULL
|
|
||||||
%s
|
|
||||||
ORDER BY
|
|
||||||
array_position(array['normal','rebuild','indirect','scheduled']::varchar[], runbot_build.build_type) ASC,
|
|
||||||
runbot_branch.sticky DESC,
|
|
||||||
runbot_branch.priority DESC,
|
|
||||||
runbot_build.sequence ASC
|
|
||||||
FOR UPDATE OF runbot_build SKIP LOCKED
|
|
||||||
LIMIT %%(limit)s
|
|
||||||
)
|
|
||||||
RETURNING id""" % where_clause
|
|
||||||
|
|
||||||
self.env.cr.execute(query, {'repo_ids': tuple(ids), 'host': host.name, 'limit': limit})
|
def build_domain_host(self, host, domain=None):
|
||||||
return self.env.cr.fetchall()
|
domain = domain or []
|
||||||
|
return [('repo_id', 'in', self.ids), ('host', '=', host.name)] + domain
|
||||||
|
|
||||||
allocated = allocate_builds("""AND runbot_build.build_type != 'scheduled'""", assignable_slots)
|
def _get_builds_with_requested_actions(self, host):
|
||||||
if allocated:
|
return self.env['runbot.build'].search(self.build_domain_host(host, [('requested_action', 'in', ['wake_up', 'deathrow'])]))
|
||||||
_logger.debug('Normal builds %s where allocated to runbot' % allocated)
|
|
||||||
weak_slot = assignable_slots - len(allocated) - 1
|
|
||||||
if weak_slot > 0:
|
|
||||||
allocated = allocate_builds('', weak_slot)
|
|
||||||
if allocated:
|
|
||||||
_logger.debug('Scheduled builds %s where allocated to runbot' % allocated)
|
|
||||||
|
|
||||||
pending_build = Build.search(domain_host + [('local_state', '=', 'pending')], limit=available_slots)
|
def _get_builds_to_schedule(self, host):
|
||||||
if pending_build:
|
return self.env['runbot.build'].search(self.build_domain_host(host, [('local_state', 'in', ['testing', 'running'])]))
|
||||||
pending_build._schedule()
|
|
||||||
|
|
||||||
|
def _assign_pending_builds(self, host, nb_workers, domain=None):
|
||||||
|
if not self.ids or host.assigned_only or nb_workers <= 0:
|
||||||
|
return
|
||||||
|
domain_host = self.build_domain_host(host)
|
||||||
|
reserved_slots = self.env['runbot.build'].search_count(domain_host + [('local_state', 'in', ('testing', 'pending'))])
|
||||||
|
assignable_slots = (nb_workers - reserved_slots)
|
||||||
|
if assignable_slots > 0:
|
||||||
|
allocated = self._allocate_builds(host, assignable_slots, domain)
|
||||||
|
if allocated:
|
||||||
|
_logger.debug('Builds %s where allocated to runbot' % allocated)
|
||||||
|
|
||||||
|
def _get_builds_to_init(self, host):
|
||||||
|
domain_host = self.build_domain_host(host)
|
||||||
|
used_slots = self.env['runbot.build'].search_count(domain_host + [('local_state', '=', 'testing')])
|
||||||
|
available_slots = host.get_nb_worker() - used_slots
|
||||||
|
if available_slots <= 0:
|
||||||
|
return self.env['runbot.build']
|
||||||
|
return self.env['runbot.build'].search(domain_host + [('local_state', '=', 'pending')], limit=available_slots)
|
||||||
|
|
||||||
|
def _gc_running(self, host):
|
||||||
|
running_max = host.get_running_max()
|
||||||
# terminate and reap doomed build
|
# terminate and reap doomed build
|
||||||
build_ids = Build.search(domain_host + [('local_state', '=', 'running'), ('keep_running', '!=', True)], order='job_start desc').ids
|
domain_host = self.build_domain_host(host)
|
||||||
# sort builds: the last build of each sticky branch then the rest
|
Build = self.env['runbot.build']
|
||||||
sticky = {}
|
# some builds are marked as keep running
|
||||||
non_sticky = []
|
cannot_be_killed_ids = Build.search(domain_host + [('keep_running', '!=', True)]).ids
|
||||||
for build in Build.browse(build_ids):
|
# we want to keep one build running per sticky, no mather which host
|
||||||
if build.branch_id.sticky and build.branch_id.id not in sticky:
|
sticky_branches_ids = self.env['runbot.branch'].search([('sticky', '=', True)]).ids
|
||||||
sticky[build.branch_id.id] = build.id
|
# search builds on host on sticky branches, order by position in branch history
|
||||||
else:
|
if sticky_branches_ids:
|
||||||
non_sticky.append(build.id)
|
self.env.cr.execute("""
|
||||||
build_ids = list(sticky.values())
|
SELECT
|
||||||
build_ids += non_sticky
|
id
|
||||||
# terminate extra running builds
|
FROM (
|
||||||
|
SELECT
|
||||||
|
bu.id AS id,
|
||||||
|
bu.host as host,
|
||||||
|
row_number() OVER (PARTITION BY branch_id order by bu.id desc) AS row
|
||||||
|
FROM
|
||||||
|
runbot_branch br INNER JOIN runbot_build bu ON br.id=bu.branch_id
|
||||||
|
WHERE
|
||||||
|
br.id in %s AND (bu.hidden = 'f' OR bu.hidden IS NULL)
|
||||||
|
) AS br_bu
|
||||||
|
WHERE
|
||||||
|
row <= 4 AND host = %s
|
||||||
|
ORDER BY row, id desc
|
||||||
|
""", [tuple(sticky_branches_ids), host.name]
|
||||||
|
)
|
||||||
|
cannot_be_killed_ids += self.env.cr.fetchall()
|
||||||
|
cannot_be_killed_ids = cannot_be_killed_ids[:running_max] # ensure that we don't try to keep more than we can handle
|
||||||
|
|
||||||
|
build_ids = Build.search(domain_host + [('local_state', '=', 'running'), ('id', 'not in', cannot_be_killed_ids)], order='job_start desc').ids
|
||||||
Build.browse(build_ids)[running_max:]._kill()
|
Build.browse(build_ids)[running_max:]._kill()
|
||||||
Build.browse(build_ids)._reap()
|
|
||||||
|
def _allocate_builds(self, host, nb_slots, domain=None):
|
||||||
|
if nb_slots <= 0:
|
||||||
|
return []
|
||||||
|
non_allocated_domain = [('repo_id', 'in', self.ids), ('local_state', '=', 'pending'), ('host', '=', False)]
|
||||||
|
if domain:
|
||||||
|
non_allocated_domain = expression.AND([non_allocated_domain, domain])
|
||||||
|
e = expression.expression(non_allocated_domain, self.env['runbot.build'])
|
||||||
|
assert e.get_tables() == ['"runbot_build"']
|
||||||
|
where_clause, where_params = e.to_sql()
|
||||||
|
|
||||||
|
# self-assign to be sure that another runbot instance cannot self assign the same builds
|
||||||
|
query = """UPDATE
|
||||||
|
runbot_build
|
||||||
|
SET
|
||||||
|
host = %%s
|
||||||
|
WHERE
|
||||||
|
runbot_build.id IN (
|
||||||
|
SELECT runbot_build.id
|
||||||
|
FROM runbot_build
|
||||||
|
LEFT JOIN runbot_branch
|
||||||
|
ON runbot_branch.id = runbot_build.branch_id
|
||||||
|
WHERE
|
||||||
|
%s
|
||||||
|
ORDER BY
|
||||||
|
array_position(array['normal','rebuild','indirect','scheduled']::varchar[], runbot_build.build_type) ASC,
|
||||||
|
runbot_branch.sticky DESC,
|
||||||
|
runbot_branch.priority DESC,
|
||||||
|
runbot_build.sequence ASC
|
||||||
|
FOR UPDATE OF runbot_build SKIP LOCKED
|
||||||
|
LIMIT %%s
|
||||||
|
)
|
||||||
|
RETURNING id""" % where_clause
|
||||||
|
self.env.cr.execute(query, [host.name] + where_params + [nb_slots])
|
||||||
|
return self.env.cr.fetchall()
|
||||||
|
|
||||||
def _domain(self):
|
def _domain(self):
|
||||||
return self.env.get('ir.config_parameter').get_param('runbot.runbot_domain', fqdn())
|
return self.env.get('ir.config_parameter').get_param('runbot.runbot_domain', fqdn())
|
||||||
@ -613,9 +648,7 @@ class runbot_repo(models.Model):
|
|||||||
repos = self.search([('mode', '!=', 'disabled')])
|
repos = self.search([('mode', '!=', 'disabled')])
|
||||||
repos._update(force=False)
|
repos._update(force=False)
|
||||||
repos._create_pending_builds()
|
repos._create_pending_builds()
|
||||||
|
self._commit()
|
||||||
self.env.cr.commit()
|
|
||||||
self.invalidate_cache()
|
|
||||||
time.sleep(update_frequency)
|
time.sleep(update_frequency)
|
||||||
|
|
||||||
def _cron_fetch_and_build(self, hostname):
|
def _cron_fetch_and_build(self, hostname):
|
||||||
@ -629,7 +662,8 @@ class runbot_repo(models.Model):
|
|||||||
host = self.env['runbot.host']._get_current()
|
host = self.env['runbot.host']._get_current()
|
||||||
host.set_psql_conn_count()
|
host.set_psql_conn_count()
|
||||||
host.last_start_loop = fields.Datetime.now()
|
host.last_start_loop = fields.Datetime.now()
|
||||||
self.env.cr.commit()
|
|
||||||
|
self._commit()
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
# 1. source cleanup
|
# 1. source cleanup
|
||||||
# -> Remove sources when no build is using them
|
# -> Remove sources when no build is using them
|
||||||
@ -638,53 +672,41 @@ class runbot_repo(models.Model):
|
|||||||
# 2. db and log cleanup
|
# 2. db and log cleanup
|
||||||
# -> Keep them as long as possible
|
# -> Keep them as long as possible
|
||||||
self.env['runbot.build']._local_cleanup()
|
self.env['runbot.build']._local_cleanup()
|
||||||
|
|
||||||
# 3. docker cleanup
|
# 3. docker cleanup
|
||||||
docker_ps_result = docker_ps()
|
self.env['runbot.repo']._docker_cleanup()
|
||||||
containers = {int(dc.split('-', 1)[0]):dc for dc in docker_ps_result if dest_reg.match(dc)}
|
|
||||||
if containers:
|
|
||||||
candidates = self.env['runbot.build'].search([('id', 'in', list(containers.keys())), ('local_state', '=', 'done')])
|
|
||||||
for c in candidates:
|
|
||||||
_logger.info('container %s found running with build state done', containers[c.id])
|
|
||||||
docker_stop(containers[c.id])
|
|
||||||
ignored = {dc for dc in docker_ps_result if not dest_reg.match(dc)}
|
|
||||||
if ignored:
|
|
||||||
_logger.debug('docker (%s) not deleted because not dest format', " ".join(list(ignored)))
|
|
||||||
|
|
||||||
timeout = self._get_cron_period()
|
timeout = self._get_cron_period()
|
||||||
icp = self.env['ir.config_parameter']
|
icp = self.env['ir.config_parameter']
|
||||||
update_frequency = int(icp.get_param('runbot.runbot_update_frequency', default=10))
|
update_frequency = int(icp.get_param('runbot.runbot_update_frequency', default=10))
|
||||||
while time.time() - start_time < timeout:
|
while time.time() - start_time < timeout:
|
||||||
repos = self.search([('mode', '!=', 'disabled')])
|
time.sleep(self._scheduler_loop_turn(host, update_frequency))
|
||||||
try:
|
|
||||||
repos._scheduler(host)
|
|
||||||
host.last_success = fields.Datetime.now()
|
|
||||||
self.env.cr.commit()
|
|
||||||
self.env.reset()
|
|
||||||
self = self.env()[self._name]
|
|
||||||
self._reload_nginx()
|
|
||||||
time.sleep(update_frequency)
|
|
||||||
except TransactionRollbackError: # can lead to psycopg2.InternalError'>: "current transaction is aborted, commands ignored until end of transaction block
|
|
||||||
_logger.exception('Trying to rollback')
|
|
||||||
self.env.cr.rollback()
|
|
||||||
self.env.reset()
|
|
||||||
time.sleep(random.uniform(0, 3))
|
|
||||||
except Exception as e:
|
|
||||||
with registry(self._cr.dbname).cursor() as cr: # user another cursor since transaction will be rollbacked
|
|
||||||
message = str(e)
|
|
||||||
chost = host.with_env(self.env(cr=cr))
|
|
||||||
if chost.last_exception == message:
|
|
||||||
chost.exception_count += 1
|
|
||||||
else:
|
|
||||||
chost.with_env(self.env(cr=cr)).last_exception = str(e)
|
|
||||||
chost.exception_count = 1
|
|
||||||
raise
|
|
||||||
|
|
||||||
if host.last_exception:
|
|
||||||
host.last_exception = ""
|
|
||||||
host.exception_count = 0
|
|
||||||
host.last_end_loop = fields.Datetime.now()
|
host.last_end_loop = fields.Datetime.now()
|
||||||
|
|
||||||
|
def _scheduler_loop_turn(self, host, default_sleep=1):
|
||||||
|
repos = self.search([('mode', '!=', 'disabled')])
|
||||||
|
try:
|
||||||
|
repos._scheduler(host)
|
||||||
|
host.last_success = fields.Datetime.now()
|
||||||
|
self._commit()
|
||||||
|
except Exception as e:
|
||||||
|
self.env.cr.rollback()
|
||||||
|
self.env.reset()
|
||||||
|
_logger.exception(e)
|
||||||
|
message = str(e)
|
||||||
|
if host.last_exception == message:
|
||||||
|
host.exception_count += 1
|
||||||
|
else:
|
||||||
|
host.last_exception = str(e)
|
||||||
|
host.exception_count = 1
|
||||||
|
self._commit()
|
||||||
|
return random.uniform(0, 3)
|
||||||
|
else:
|
||||||
|
if host.last_exception:
|
||||||
|
host.last_exception = ""
|
||||||
|
host.exception_count = 0
|
||||||
|
return default_sleep
|
||||||
|
|
||||||
def _source_cleanup(self):
|
def _source_cleanup(self):
|
||||||
try:
|
try:
|
||||||
if self.pool._init:
|
if self.pool._init:
|
||||||
@ -721,23 +743,34 @@ class runbot_repo(models.Model):
|
|||||||
assert 'static' in source_dir
|
assert 'static' in source_dir
|
||||||
shutil.rmtree(source_dir)
|
shutil.rmtree(source_dir)
|
||||||
_logger.info('%s/%s source folder where deleted (%s kept)' % (len(to_delete), len(to_delete+to_keep), len(to_keep)))
|
_logger.info('%s/%s source folder where deleted (%s kept)' % (len(to_delete), len(to_delete+to_keep), len(to_keep)))
|
||||||
|
|
||||||
except:
|
except:
|
||||||
_logger.error('An exception occured while cleaning sources')
|
_logger.error('An exception occured while cleaning sources')
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def _docker_cleanup(self):
|
||||||
class RefTime(models.Model):
|
docker_ps_result = docker_ps()
|
||||||
_name = "runbot.repo.reftime"
|
containers = {int(dc.split('-', 1)[0]):dc for dc in docker_ps_result if dest_reg.match(dc)}
|
||||||
_log_access = False
|
if containers:
|
||||||
|
candidates = self.env['runbot.build'].search([('id', 'in', list(containers.keys())), ('local_state', '=', 'done')])
|
||||||
time = fields.Float('Time', index=True, required=True)
|
for c in candidates:
|
||||||
repo_id = fields.Many2one('runbot.repo', 'Repository', required=True, ondelete='cascade')
|
_logger.info('container %s found running with build state done', containers[c.id])
|
||||||
|
docker_stop(containers[c.id])
|
||||||
|
ignored = {dc for dc in docker_ps_result if not dest_reg.match(dc)}
|
||||||
|
if ignored:
|
||||||
|
_logger.debug('docker (%s) not deleted because not dest format', " ".join(list(ignored)))
|
||||||
|
|
||||||
|
|
||||||
class HookTime(models.Model):
|
class RefTime(models.Model):
|
||||||
_name = "runbot.repo.hooktime"
|
_name = "runbot.repo.reftime"
|
||||||
_log_access = False
|
_log_access = False
|
||||||
|
|
||||||
time = fields.Float('Time')
|
time = fields.Float('Time', index=True, required=True)
|
||||||
repo_id = fields.Many2one('runbot.repo', 'Repository', required=True, ondelete='cascade')
|
repo_id = fields.Many2one('runbot.repo', 'Repository', required=True, ondelete='cascade')
|
||||||
|
|
||||||
|
|
||||||
|
class HookTime(models.Model):
|
||||||
|
_name = "runbot.repo.hooktime"
|
||||||
|
_log_access = False
|
||||||
|
|
||||||
|
time = fields.Float('Time')
|
||||||
|
repo_id = fields.Many2one('runbot.repo', 'Repository', required=True, ondelete='cascade')
|
@ -36,6 +36,7 @@ class RunbotCase(TransactionCase):
|
|||||||
self.start_patcher('isdir', 'odoo.addons.runbot.common.os.path.isdir', True)
|
self.start_patcher('isdir', 'odoo.addons.runbot.common.os.path.isdir', True)
|
||||||
self.start_patcher('isfile', 'odoo.addons.runbot.common.os.path.isfile', True)
|
self.start_patcher('isfile', 'odoo.addons.runbot.common.os.path.isfile', True)
|
||||||
self.start_patcher('docker_run', 'odoo.addons.runbot.models.build_config.docker_run')
|
self.start_patcher('docker_run', 'odoo.addons.runbot.models.build_config.docker_run')
|
||||||
|
self.start_patcher('docker_build', 'odoo.addons.runbot.models.build.docker_build')
|
||||||
self.start_patcher('docker_ps', 'odoo.addons.runbot.models.repo.docker_ps', [])
|
self.start_patcher('docker_ps', 'odoo.addons.runbot.models.repo.docker_ps', [])
|
||||||
self.start_patcher('docker_stop', 'odoo.addons.runbot.models.repo.docker_stop')
|
self.start_patcher('docker_stop', 'odoo.addons.runbot.models.repo.docker_stop')
|
||||||
|
|
||||||
|
@ -56,7 +56,6 @@ class Test_Cron(RunbotCase):
|
|||||||
ret = self.Repo._cron_fetch_and_build(hostname)
|
ret = self.Repo._cron_fetch_and_build(hostname)
|
||||||
self.assertEqual(None, ret)
|
self.assertEqual(None, ret)
|
||||||
mock_scheduler.assert_called()
|
mock_scheduler.assert_called()
|
||||||
self.assertTrue(mock_reload.called)
|
|
||||||
host = self.env['runbot.host'].search([('name', '=', hostname)])
|
host = self.env['runbot.host'].search([('name', '=', hostname)])
|
||||||
self.assertEqual(host.name, hostname, 'A new host should have been created')
|
self.assertEqual(host.name, hostname, 'A new host should have been created')
|
||||||
self.assertGreater(host.psql_conn_count, 0, 'A least one connection should exist on the current psql instance')
|
self.assertGreater(host.psql_conn_count, 0, 'A least one connection should exist on the current psql instance')
|
||||||
|
@ -257,10 +257,10 @@ class Test_Repo_Scheduler(RunbotCase):
|
|||||||
'name': 'refs/head/foo'
|
'name': 'refs/head/foo'
|
||||||
})
|
})
|
||||||
|
|
||||||
@patch('odoo.addons.runbot.models.build.runbot_build._reap')
|
|
||||||
@patch('odoo.addons.runbot.models.build.runbot_build._kill')
|
@patch('odoo.addons.runbot.models.build.runbot_build._kill')
|
||||||
@patch('odoo.addons.runbot.models.build.runbot_build._schedule')
|
@patch('odoo.addons.runbot.models.build.runbot_build._schedule')
|
||||||
def test_repo_scheduler(self, mock_schedule, mock_kill, mock_reap):
|
@patch('odoo.addons.runbot.models.build.runbot_build._init_pendings')
|
||||||
|
def test_repo_scheduler(self, mock_init_pendings, mock_schedule, mock_kill):
|
||||||
self.env['ir.config_parameter'].set_param('runbot.runbot_workers', 6)
|
self.env['ir.config_parameter'].set_param('runbot.runbot_workers', 6)
|
||||||
builds = []
|
builds = []
|
||||||
# create 6 builds that are testing on the host to verify that
|
# create 6 builds that are testing on the host to verify that
|
||||||
@ -293,8 +293,8 @@ class Test_Repo_Scheduler(RunbotCase):
|
|||||||
'local_state': 'pending',
|
'local_state': 'pending',
|
||||||
})
|
})
|
||||||
builds.append(build)
|
builds.append(build)
|
||||||
|
host = self.env['runbot.host']._get_current()
|
||||||
self.foo_repo._scheduler()
|
self.foo_repo._scheduler(host)
|
||||||
|
|
||||||
build.invalidate_cache()
|
build.invalidate_cache()
|
||||||
scheduled_build.invalidate_cache()
|
scheduled_build.invalidate_cache()
|
||||||
@ -304,7 +304,7 @@ class Test_Repo_Scheduler(RunbotCase):
|
|||||||
# give some room for the pending build
|
# give some room for the pending build
|
||||||
self.Build.search([('name', '=', 'a')]).write({'local_state': 'done'})
|
self.Build.search([('name', '=', 'a')]).write({'local_state': 'done'})
|
||||||
|
|
||||||
self.foo_repo._scheduler()
|
self.foo_repo._scheduler(host)
|
||||||
build.invalidate_cache()
|
build.invalidate_cache()
|
||||||
scheduled_build.invalidate_cache()
|
scheduled_build.invalidate_cache()
|
||||||
self.assertEqual(build.host, 'host.runbot.com')
|
self.assertEqual(build.host, 'host.runbot.com')
|
||||||
|
@ -22,7 +22,6 @@
|
|||||||
<field name="local_result"/>
|
<field name="local_result"/>
|
||||||
<field name="global_result"/>
|
<field name="global_result"/>
|
||||||
<field name="triggered_result" groups="base.group_no_one"/>
|
<field name="triggered_result" groups="base.group_no_one"/>
|
||||||
<field name="pid"/>
|
|
||||||
<field name="host"/>
|
<field name="host"/>
|
||||||
<field name="job_start" groups="base.group_no_one"/>
|
<field name="job_start" groups="base.group_no_one"/>
|
||||||
<field name="job_end" groups="base.group_no_one"/>
|
<field name="job_end" groups="base.group_no_one"/>
|
||||||
@ -58,7 +57,6 @@
|
|||||||
<field name="port"/>
|
<field name="port"/>
|
||||||
<field name="job"/>
|
<field name="job"/>
|
||||||
<field name="coverage_result"/>
|
<field name="coverage_result"/>
|
||||||
<field name="pid"/>
|
|
||||||
<field name="host"/>
|
<field name="host"/>
|
||||||
<field name="build_time"/>
|
<field name="build_time"/>
|
||||||
<field name="build_age"/>
|
<field name="build_age"/>
|
||||||
|
@ -18,10 +18,8 @@ _logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
class RunbotClient():
|
class RunbotClient():
|
||||||
|
|
||||||
def __init__(self, env, args):
|
def __init__(self, env):
|
||||||
self.env = env
|
self.env = env
|
||||||
self.args = args
|
|
||||||
self.fqdn = socket.getfqdn()
|
|
||||||
self.ask_interrupt = threading.Event()
|
self.ask_interrupt = threading.Event()
|
||||||
|
|
||||||
def main_loop(self):
|
def main_loop(self):
|
||||||
@ -31,15 +29,17 @@ class RunbotClient():
|
|||||||
host = self.env['runbot.host']._get_current()
|
host = self.env['runbot.host']._get_current()
|
||||||
count = 0
|
count = 0
|
||||||
while True:
|
while True:
|
||||||
|
host.last_start_loop = fields.Datetime.now()
|
||||||
count = count % 60
|
count = count % 60
|
||||||
if count == 0:
|
if count == 0:
|
||||||
logging.info('Host %s running with %s slots on pid %s%s', host.name, host.get_nb_worker(), os.getpid(), ' (assigned only)' if host.assigned_only else '')
|
logging.info('Host %s running with %s slots on pid %s%s', host.name, host.get_nb_worker(), os.getpid(), ' (assigned only)' if host.assigned_only else '')
|
||||||
self.env['runbot.repo']._source_cleanup()
|
self.env['runbot.repo']._source_cleanup()
|
||||||
self.env['runbot.build']._local_cleanup()
|
self.env['runbot.build']._local_cleanup()
|
||||||
host.last_end_loop = host.last_start_loop = fields.Datetime.now()
|
self.env['runbot.repo']._docker_cleanup()
|
||||||
host.set_psql_conn_count()
|
host.set_psql_conn_count()
|
||||||
count += 1
|
count += 1
|
||||||
sleep_time = self.env['runbot.repo']._scheduler_loop_turn(host)
|
sleep_time = self.env['runbot.repo']._scheduler_loop_turn(host)
|
||||||
|
host.last_end_loop = fields.Datetime.now()
|
||||||
self.env.cr.commit()
|
self.env.cr.commit()
|
||||||
self.env.reset()
|
self.env.reset()
|
||||||
self.sleep(sleep_time)
|
self.sleep(sleep_time)
|
||||||
@ -97,7 +97,7 @@ def run():
|
|||||||
with odoo.api.Environment.manage():
|
with odoo.api.Environment.manage():
|
||||||
with registry.cursor() as cr:
|
with registry.cursor() as cr:
|
||||||
env = odoo.api.Environment(cr, odoo.SUPERUSER_ID, {})
|
env = odoo.api.Environment(cr, odoo.SUPERUSER_ID, {})
|
||||||
runbot_client = RunbotClient(env, args)
|
runbot_client = RunbotClient(env)
|
||||||
# run main loop
|
# run main loop
|
||||||
runbot_client.main_loop()
|
runbot_client.main_loop()
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user