mirror of
https://github.com/odoo/runbot.git
synced 2025-03-15 15:35:46 +07:00
[REF] runbot: refactor sheduler
This commit is contained in:
parent
ae5f2906bf
commit
426b7af2cb
@ -166,12 +166,13 @@ def docker_run(run_cmd, log_path, build_dir, container_name, exposed_ports=None,
|
||||
docker_command.extend(['odoo:runbot_tests', '/bin/bash', '-c', "%s" % run_cmd])
|
||||
docker_run = subprocess.Popen(docker_command, stdout=logs, stderr=logs, preexec_fn=preexec_fn, close_fds=False, cwd=build_dir)
|
||||
_logger.info('Started Docker container %s', container_name)
|
||||
return docker_run.pid
|
||||
return
|
||||
|
||||
def docker_stop(container_name):
|
||||
"""Stops the container named container_name"""
|
||||
_logger.info('Stopping container %s', container_name)
|
||||
dstop = subprocess.run(['docker', 'stop', container_name])
|
||||
# todo delete os.path.join(build_dir, 'end-%s' % container_name)
|
||||
|
||||
def docker_is_running(container_name):
|
||||
dinspect = subprocess.run(['docker', 'container', 'inspect', container_name], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL)
|
||||
|
@ -67,7 +67,6 @@ class runbot_build(models.Model):
|
||||
nb_running = fields.Integer("Number of test slot use", default=0)
|
||||
|
||||
# should we add a stored field for children results?
|
||||
pid = fields.Integer('Pid')
|
||||
active_step = fields.Many2one('runbot.build.config.step', 'Active step')
|
||||
job = fields.Char('Active step display name', compute='_compute_job')
|
||||
job_start = fields.Datetime('Job start')
|
||||
@ -573,14 +572,40 @@ class runbot_build(models.Model):
|
||||
self.ensure_one()
|
||||
return '%s_%s' % (self.dest, self.active_step.name)
|
||||
|
||||
def _schedule(self):
|
||||
"""schedule the build"""
|
||||
icp = self.env['ir.config_parameter']
|
||||
# For retro-compatibility, keep this parameter in seconds
|
||||
|
||||
def _init_pendings(self, host):
|
||||
for build in self:
|
||||
if build.local_state != 'pending':
|
||||
raise UserError("Build %s is not pending" % build.id)
|
||||
if build.host != host.name:
|
||||
raise UserError("Build %s does not have correct host" % build.id)
|
||||
# allocate port and schedule first job
|
||||
values = {
|
||||
'port': self._find_port(),
|
||||
'job_start': now(),
|
||||
'build_start': now(),
|
||||
'job_end': False,
|
||||
}
|
||||
values.update(build._next_job_values())
|
||||
build.write(values)
|
||||
if not build.active_step:
|
||||
build._log('_schedule', 'No job in config, doing nothing')
|
||||
continue
|
||||
try:
|
||||
build._log('_schedule', 'Init build environment with config %s ' % build.config_id.name)
|
||||
# notify pending build - avoid confusing users by saying nothing
|
||||
build._github_status()
|
||||
os.makedirs(build._path('logs'), exist_ok=True)
|
||||
build._log('_schedule', 'Building docker image')
|
||||
docker_build(build._path('logs', 'docker_build.txt'), build._path())
|
||||
except Exception:
|
||||
_logger.exception('Failed initiating build %s', build.dest)
|
||||
build._log('_schedule', 'Failed initiating build')
|
||||
build._kill(result='ko')
|
||||
continue
|
||||
build._run_job()
|
||||
|
||||
def _process_requested_actions(self):
|
||||
for build in self:
|
||||
self.env.cr.commit() # commit between each build to minimise transactionnal errors due to state computations
|
||||
self.invalidate_cache()
|
||||
if build.requested_action == 'deathrow':
|
||||
result = None
|
||||
if build.local_state != 'running' and build.global_result not in ('warn', 'ko'):
|
||||
@ -617,97 +642,76 @@ class runbot_build(models.Model):
|
||||
build.write({'requested_action': False, 'local_state': 'done'})
|
||||
continue
|
||||
|
||||
if build.local_state == 'pending':
|
||||
# allocate port and schedule first job
|
||||
port = self._find_port()
|
||||
values = {
|
||||
'host': fqdn(), # or ip? of false?
|
||||
'port': port,
|
||||
'job_start': now(),
|
||||
'build_start': now(),
|
||||
'job_end': False,
|
||||
}
|
||||
values.update(build._next_job_values())
|
||||
build.write(values)
|
||||
if not build.active_step:
|
||||
build._log('_schedule', 'No job in config, doing nothing')
|
||||
def _schedule(self):
|
||||
"""schedule the build"""
|
||||
icp = self.env['ir.config_parameter']
|
||||
for build in self:
|
||||
if build.local_state not in ['testing', 'running']:
|
||||
raise UserError("Build %s is not testing/running: %s" % (build.id, build.local_state))
|
||||
if build.local_state == 'testing':
|
||||
# failfast in case of docker error (triggered in database)
|
||||
if build.triggered_result and not build.active_step.ignore_triggered_result:
|
||||
worst_result = self._get_worst_result([build.triggered_result, build.local_result])
|
||||
if worst_result != build.local_result:
|
||||
build.local_result = build.triggered_result
|
||||
build._github_status() # failfast
|
||||
# check if current job is finished
|
||||
_docker_state = docker_state(build._get_docker_name(), build._path())
|
||||
if _docker_state == 'RUNNING':
|
||||
timeout = min(build.active_step.cpu_limit, int(icp.get_param('runbot.runbot_timeout', default=10000)))
|
||||
if build.local_state != 'running' and build.job_time > timeout:
|
||||
build._log('_schedule', '%s time exceeded (%ss)' % (build.active_step.name if build.active_step else "?", build.job_time))
|
||||
build._kill(result='killed')
|
||||
continue
|
||||
elif _docker_state == 'UNKNOWN' and build.active_step._is_docker_step():
|
||||
if build.job_time < 60:
|
||||
_logger.debug('container "%s" seems too take a while to start', build._get_docker_name())
|
||||
continue
|
||||
try:
|
||||
build._log('_schedule', 'Init build environment with config %s ' % build.config_id.name)
|
||||
# notify pending build - avoid confusing users by saying nothing
|
||||
build._github_status()
|
||||
os.makedirs(build._path('logs'), exist_ok=True)
|
||||
build._log('_schedule', 'Building docker image')
|
||||
docker_build(build._path('logs', 'docker_build.txt'), build._path())
|
||||
except Exception:
|
||||
_logger.exception('Failed initiating build %s', build.dest)
|
||||
build._log('_schedule', 'Failed initiating build')
|
||||
build._kill(result='ko')
|
||||
continue
|
||||
else: # testing/running build
|
||||
if build.local_state == 'testing':
|
||||
# failfast in case of docker error (triggered in database)
|
||||
if build.triggered_result and not build.active_step.ignore_triggered_result:
|
||||
worst_result = self._get_worst_result([build.triggered_result, build.local_result])
|
||||
if worst_result != build.local_result:
|
||||
build.local_result = build.triggered_result
|
||||
build._github_status() # failfast
|
||||
# check if current job is finished
|
||||
_docker_state = docker_state(build._get_docker_name(), build._path())
|
||||
if _docker_state == 'RUNNING':
|
||||
timeout = min(build.active_step.cpu_limit, int(icp.get_param('runbot.runbot_timeout', default=10000)))
|
||||
if build.local_state != 'running' and build.job_time > timeout:
|
||||
build._log('_schedule', '%s time exceeded (%ss)' % (build.active_step.name if build.active_step else "?", build.job_time))
|
||||
build._kill(result='killed')
|
||||
continue
|
||||
elif _docker_state == 'UNKNOWN' and build.active_step._is_docker_step():
|
||||
if build.job_time < 60:
|
||||
_logger.debug('container "%s" seems too take a while to start', build._get_docker_name())
|
||||
continue
|
||||
else:
|
||||
build._log('_schedule', 'Docker not started after 60 seconds, skipping', level='ERROR')
|
||||
# No job running, make result and select nex job
|
||||
build_values = {
|
||||
'job_end': now(),
|
||||
}
|
||||
# make result of previous job
|
||||
try:
|
||||
results = build.active_step._make_results(build)
|
||||
except Exception as e:
|
||||
if isinstance(e, RunbotException):
|
||||
message = e.args[0]
|
||||
else:
|
||||
message = 'An error occured while computing results of %s:\n %s' % (build.job, str(e).replace('\\n', '\n').replace("\\'", "'"))
|
||||
_logger.exception(message)
|
||||
build._log('_make_results', message, level='ERROR')
|
||||
results = {'local_result': 'ko'}
|
||||
else:
|
||||
build._log('_schedule', 'Docker not started after 60 seconds, skipping', level='ERROR')
|
||||
# No job running, make result and select nex job
|
||||
build_values = {
|
||||
'job_end': now(),
|
||||
}
|
||||
# make result of previous job
|
||||
try:
|
||||
results = build.active_step._make_results(build)
|
||||
except Exception as e:
|
||||
if isinstance(e, RunbotException):
|
||||
message = e.args[0]
|
||||
else:
|
||||
message = 'An error occured while computing results of %s:\n %s' % (build.job, str(e).replace('\\n', '\n').replace("\\'", "'"))
|
||||
_logger.exception(message)
|
||||
build._log('_make_results', message, level='ERROR')
|
||||
results = {'local_result': 'ko'}
|
||||
|
||||
build_values.update(results)
|
||||
build_values.update(results)
|
||||
|
||||
build.active_step.log_end(build)
|
||||
build.active_step.log_end(build)
|
||||
|
||||
build_values.update(build._next_job_values()) # find next active_step or set to done
|
||||
build_values.update(build._next_job_values()) # find next active_step or set to done
|
||||
|
||||
ending_build = build.local_state not in ('done', 'running') and build_values.get('local_state') in ('done', 'running')
|
||||
if ending_build:
|
||||
build.update_build_end()
|
||||
ending_build = build.local_state not in ('done', 'running') and build_values.get('local_state') in ('done', 'running')
|
||||
if ending_build:
|
||||
build.update_build_end()
|
||||
|
||||
build.write(build_values)
|
||||
if ending_build:
|
||||
build._github_status()
|
||||
if not build.local_result: # Set 'ok' result if no result set (no tests job on build)
|
||||
build.local_result = 'ok'
|
||||
build._logger("No result set, setting ok by default")
|
||||
build.write(build_values)
|
||||
if ending_build:
|
||||
build._github_status()
|
||||
if not build.local_result: # Set 'ok' result if no result set (no tests job on build)
|
||||
build.local_result = 'ok'
|
||||
build._logger("No result set, setting ok by default")
|
||||
build._run_job()
|
||||
|
||||
# run job
|
||||
pid = None
|
||||
def _run_job(self):
|
||||
# run job
|
||||
for build in self:
|
||||
if build.local_state != 'done':
|
||||
build._logger('running %s', build.active_step.name)
|
||||
os.makedirs(build._path('logs'), exist_ok=True)
|
||||
os.makedirs(build._path('datadir'), exist_ok=True)
|
||||
try:
|
||||
pid = build.active_step._run(build) # run should be on build?
|
||||
build.write({'pid': pid}) # no really usefull anymore with dockers
|
||||
build.active_step._run(build) # run should be on build?
|
||||
except Exception as e:
|
||||
if isinstance(e, RunbotException):
|
||||
message = e.args[0]
|
||||
@ -716,10 +720,6 @@ class runbot_build(models.Model):
|
||||
_logger.exception(message)
|
||||
build._log("run", message, level='ERROR')
|
||||
build._kill(result='ko')
|
||||
continue
|
||||
|
||||
self.env.cr.commit()
|
||||
self.invalidate_cache()
|
||||
|
||||
def _path(self, *l, **kw):
|
||||
"""Return the repo build path"""
|
||||
@ -844,16 +844,6 @@ class runbot_build(models.Model):
|
||||
'line': '0',
|
||||
})
|
||||
|
||||
def _reap(self):
|
||||
while True:
|
||||
try:
|
||||
pid, status, rusage = os.wait3(os.WNOHANG)
|
||||
except OSError:
|
||||
break
|
||||
if pid == 0:
|
||||
break
|
||||
_logger.debug('reaping: pid: %s status: %s', pid, status)
|
||||
|
||||
def _kill(self, result=None):
|
||||
host = fqdn()
|
||||
for build in self:
|
||||
|
@ -52,6 +52,10 @@ class RunboHost(models.Model):
|
||||
icp = self.env['ir.config_parameter']
|
||||
return self.nb_worker or int(icp.sudo().get_param('runbot.runbot_workers', default=6))
|
||||
|
||||
def get_running_max(self):
|
||||
icp = self.env['ir.config_parameter']
|
||||
return int(icp.get_param('runbot.runbot_running_max', default=75))
|
||||
|
||||
def set_psql_conn_count(self):
|
||||
self.ensure_one()
|
||||
with local_pgadmin_cursor() as local_cr:
|
||||
|
@ -17,9 +17,11 @@ from odoo.tools.misc import DEFAULT_SERVER_DATETIME_FORMAT
|
||||
from odoo import models, fields, api, registry
|
||||
from odoo.modules.module import get_module_resource
|
||||
from odoo.tools import config
|
||||
from odoo.osv import expression
|
||||
from ..common import fqdn, dt2time, Commit, dest_reg, os
|
||||
from ..container import docker_ps, docker_stop
|
||||
from psycopg2.extensions import TransactionRollbackError
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
class RunbotException(Exception):
|
||||
@ -456,97 +458,130 @@ class runbot_repo(models.Model):
|
||||
except Exception:
|
||||
_logger.exception('Fail to update repo %s', repo.name)
|
||||
|
||||
@api.multi
|
||||
def _scheduler(self, host=None):
|
||||
"""Schedule builds for the repository"""
|
||||
ids = self.ids
|
||||
if not ids:
|
||||
return
|
||||
icp = self.env['ir.config_parameter']
|
||||
host = host or self.env['runbot.host']._get_current()
|
||||
workers = host.get_nb_worker()
|
||||
running_max = int(icp.get_param('runbot.runbot_running_max', default=75))
|
||||
assigned_only = host.assigned_only
|
||||
|
||||
Build = self.env['runbot.build']
|
||||
domain = [('repo_id', 'in', ids)]
|
||||
domain_host = domain + [('host', '=', host.name)]
|
||||
|
||||
# schedule jobs (transitions testing -> running, kill jobs, ...)
|
||||
build_ids = Build.search(domain_host + ['|', ('local_state', 'in', ['testing', 'running']), ('requested_action', 'in', ['wake_up', 'deathrow'])])
|
||||
build_ids._schedule()
|
||||
def _commit(self):
|
||||
self.env.cr.commit()
|
||||
self.invalidate_cache()
|
||||
self.env.reset()
|
||||
|
||||
# launch new tests
|
||||
@api.multi
|
||||
def _scheduler(self, host):
|
||||
nb_workers = host.get_nb_worker()
|
||||
|
||||
nb_testing = Build.search_count(domain_host + [('local_state', '=', 'testing')])
|
||||
available_slots = workers - nb_testing
|
||||
reserved_slots = Build.search_count(domain_host + [('local_state', '=', 'pending')])
|
||||
assignable_slots = (available_slots - reserved_slots) if not assigned_only else 0
|
||||
if available_slots > 0:
|
||||
if assignable_slots > 0: # note: slots have been addapt to be able to force host on pending build. Normally there is no pending with host.
|
||||
# commit transaction to reduce the critical section duration
|
||||
def allocate_builds(where_clause, limit):
|
||||
self.env.cr.commit()
|
||||
self.invalidate_cache()
|
||||
# self-assign to be sure that another runbot instance cannot self assign the same builds
|
||||
query = """UPDATE
|
||||
runbot_build
|
||||
SET
|
||||
host = %%(host)s
|
||||
WHERE
|
||||
runbot_build.id IN (
|
||||
SELECT runbot_build.id
|
||||
FROM runbot_build
|
||||
LEFT JOIN runbot_branch
|
||||
ON runbot_branch.id = runbot_build.branch_id
|
||||
WHERE
|
||||
runbot_build.repo_id IN %%(repo_ids)s
|
||||
AND runbot_build.local_state = 'pending'
|
||||
AND runbot_build.host IS NULL
|
||||
%s
|
||||
ORDER BY
|
||||
array_position(array['normal','rebuild','indirect','scheduled']::varchar[], runbot_build.build_type) ASC,
|
||||
runbot_branch.sticky DESC,
|
||||
runbot_branch.priority DESC,
|
||||
runbot_build.sequence ASC
|
||||
FOR UPDATE OF runbot_build SKIP LOCKED
|
||||
LIMIT %%(limit)s
|
||||
)
|
||||
RETURNING id""" % where_clause
|
||||
for build in self._get_builds_with_requested_actions(host):
|
||||
build._process_requested_actions()
|
||||
self._commit()
|
||||
for build in self._get_builds_to_schedule(host):
|
||||
build._schedule()
|
||||
self._commit()
|
||||
self._assign_pending_builds(host, nb_workers, [('build_type', '!=', 'scheduled')])
|
||||
self._commit()
|
||||
self._assign_pending_builds(host, nb_workers-1 or nb_workers)
|
||||
self._commit()
|
||||
for build in self._get_builds_to_init(host):
|
||||
build._init_pendings(host)
|
||||
self._commit()
|
||||
self._gc_running(host)
|
||||
self._commit()
|
||||
self._reload_nginx()
|
||||
|
||||
self.env.cr.execute(query, {'repo_ids': tuple(ids), 'host': host.name, 'limit': limit})
|
||||
return self.env.cr.fetchall()
|
||||
def build_domain_host(self, host, domain=None):
|
||||
domain = domain or []
|
||||
return [('repo_id', 'in', self.ids), ('host', '=', host.name)] + domain
|
||||
|
||||
allocated = allocate_builds("""AND runbot_build.build_type != 'scheduled'""", assignable_slots)
|
||||
if allocated:
|
||||
_logger.debug('Normal builds %s where allocated to runbot' % allocated)
|
||||
weak_slot = assignable_slots - len(allocated) - 1
|
||||
if weak_slot > 0:
|
||||
allocated = allocate_builds('', weak_slot)
|
||||
if allocated:
|
||||
_logger.debug('Scheduled builds %s where allocated to runbot' % allocated)
|
||||
def _get_builds_with_requested_actions(self, host):
|
||||
return self.env['runbot.build'].search(self.build_domain_host(host, [('requested_action', 'in', ['wake_up', 'deathrow'])]))
|
||||
|
||||
pending_build = Build.search(domain_host + [('local_state', '=', 'pending')], limit=available_slots)
|
||||
if pending_build:
|
||||
pending_build._schedule()
|
||||
def _get_builds_to_schedule(self, host):
|
||||
return self.env['runbot.build'].search(self.build_domain_host(host, [('local_state', 'in', ['testing', 'running'])]))
|
||||
|
||||
def _assign_pending_builds(self, host, nb_workers, domain=None):
|
||||
if not self.ids or host.assigned_only or nb_workers <= 0:
|
||||
return
|
||||
domain_host = self.build_domain_host(host)
|
||||
reserved_slots = self.env['runbot.build'].search_count(domain_host + [('local_state', 'in', ('testing', 'pending'))])
|
||||
assignable_slots = (nb_workers - reserved_slots)
|
||||
if assignable_slots > 0:
|
||||
allocated = self._allocate_builds(host, assignable_slots, domain)
|
||||
if allocated:
|
||||
_logger.debug('Builds %s where allocated to runbot' % allocated)
|
||||
|
||||
def _get_builds_to_init(self, host):
|
||||
domain_host = self.build_domain_host(host)
|
||||
used_slots = self.env['runbot.build'].search_count(domain_host + [('local_state', '=', 'testing')])
|
||||
available_slots = host.get_nb_worker() - used_slots
|
||||
if available_slots <= 0:
|
||||
return self.env['runbot.build']
|
||||
return self.env['runbot.build'].search(domain_host + [('local_state', '=', 'pending')], limit=available_slots)
|
||||
|
||||
def _gc_running(self, host):
|
||||
running_max = host.get_running_max()
|
||||
# terminate and reap doomed build
|
||||
build_ids = Build.search(domain_host + [('local_state', '=', 'running'), ('keep_running', '!=', True)], order='job_start desc').ids
|
||||
# sort builds: the last build of each sticky branch then the rest
|
||||
sticky = {}
|
||||
non_sticky = []
|
||||
for build in Build.browse(build_ids):
|
||||
if build.branch_id.sticky and build.branch_id.id not in sticky:
|
||||
sticky[build.branch_id.id] = build.id
|
||||
else:
|
||||
non_sticky.append(build.id)
|
||||
build_ids = list(sticky.values())
|
||||
build_ids += non_sticky
|
||||
# terminate extra running builds
|
||||
domain_host = self.build_domain_host(host)
|
||||
Build = self.env['runbot.build']
|
||||
# some builds are marked as keep running
|
||||
cannot_be_killed_ids = Build.search(domain_host + [('keep_running', '!=', True)]).ids
|
||||
# we want to keep one build running per sticky, no mather which host
|
||||
sticky_branches_ids = self.env['runbot.branch'].search([('sticky', '=', True)]).ids
|
||||
# search builds on host on sticky branches, order by position in branch history
|
||||
if sticky_branches_ids:
|
||||
self.env.cr.execute("""
|
||||
SELECT
|
||||
id
|
||||
FROM (
|
||||
SELECT
|
||||
bu.id AS id,
|
||||
bu.host as host,
|
||||
row_number() OVER (PARTITION BY branch_id order by bu.id desc) AS row
|
||||
FROM
|
||||
runbot_branch br INNER JOIN runbot_build bu ON br.id=bu.branch_id
|
||||
WHERE
|
||||
br.id in %s AND (bu.hidden = 'f' OR bu.hidden IS NULL)
|
||||
) AS br_bu
|
||||
WHERE
|
||||
row <= 4 AND host = %s
|
||||
ORDER BY row, id desc
|
||||
""", [tuple(sticky_branches_ids), host.name]
|
||||
)
|
||||
cannot_be_killed_ids += self.env.cr.fetchall()
|
||||
cannot_be_killed_ids = cannot_be_killed_ids[:running_max] # ensure that we don't try to keep more than we can handle
|
||||
|
||||
build_ids = Build.search(domain_host + [('local_state', '=', 'running'), ('id', 'not in', cannot_be_killed_ids)], order='job_start desc').ids
|
||||
Build.browse(build_ids)[running_max:]._kill()
|
||||
Build.browse(build_ids)._reap()
|
||||
|
||||
def _allocate_builds(self, host, nb_slots, domain=None):
|
||||
if nb_slots <= 0:
|
||||
return []
|
||||
non_allocated_domain = [('repo_id', 'in', self.ids), ('local_state', '=', 'pending'), ('host', '=', False)]
|
||||
if domain:
|
||||
non_allocated_domain = expression.AND([non_allocated_domain, domain])
|
||||
e = expression.expression(non_allocated_domain, self.env['runbot.build'])
|
||||
assert e.get_tables() == ['"runbot_build"']
|
||||
where_clause, where_params = e.to_sql()
|
||||
|
||||
# self-assign to be sure that another runbot instance cannot self assign the same builds
|
||||
query = """UPDATE
|
||||
runbot_build
|
||||
SET
|
||||
host = %%s
|
||||
WHERE
|
||||
runbot_build.id IN (
|
||||
SELECT runbot_build.id
|
||||
FROM runbot_build
|
||||
LEFT JOIN runbot_branch
|
||||
ON runbot_branch.id = runbot_build.branch_id
|
||||
WHERE
|
||||
%s
|
||||
ORDER BY
|
||||
array_position(array['normal','rebuild','indirect','scheduled']::varchar[], runbot_build.build_type) ASC,
|
||||
runbot_branch.sticky DESC,
|
||||
runbot_branch.priority DESC,
|
||||
runbot_build.sequence ASC
|
||||
FOR UPDATE OF runbot_build SKIP LOCKED
|
||||
LIMIT %%s
|
||||
)
|
||||
RETURNING id""" % where_clause
|
||||
self.env.cr.execute(query, [host.name] + where_params + [nb_slots])
|
||||
return self.env.cr.fetchall()
|
||||
|
||||
def _domain(self):
|
||||
return self.env.get('ir.config_parameter').get_param('runbot.runbot_domain', fqdn())
|
||||
@ -613,9 +648,7 @@ class runbot_repo(models.Model):
|
||||
repos = self.search([('mode', '!=', 'disabled')])
|
||||
repos._update(force=False)
|
||||
repos._create_pending_builds()
|
||||
|
||||
self.env.cr.commit()
|
||||
self.invalidate_cache()
|
||||
self._commit()
|
||||
time.sleep(update_frequency)
|
||||
|
||||
def _cron_fetch_and_build(self, hostname):
|
||||
@ -629,7 +662,8 @@ class runbot_repo(models.Model):
|
||||
host = self.env['runbot.host']._get_current()
|
||||
host.set_psql_conn_count()
|
||||
host.last_start_loop = fields.Datetime.now()
|
||||
self.env.cr.commit()
|
||||
|
||||
self._commit()
|
||||
start_time = time.time()
|
||||
# 1. source cleanup
|
||||
# -> Remove sources when no build is using them
|
||||
@ -638,53 +672,41 @@ class runbot_repo(models.Model):
|
||||
# 2. db and log cleanup
|
||||
# -> Keep them as long as possible
|
||||
self.env['runbot.build']._local_cleanup()
|
||||
|
||||
# 3. docker cleanup
|
||||
docker_ps_result = docker_ps()
|
||||
containers = {int(dc.split('-', 1)[0]):dc for dc in docker_ps_result if dest_reg.match(dc)}
|
||||
if containers:
|
||||
candidates = self.env['runbot.build'].search([('id', 'in', list(containers.keys())), ('local_state', '=', 'done')])
|
||||
for c in candidates:
|
||||
_logger.info('container %s found running with build state done', containers[c.id])
|
||||
docker_stop(containers[c.id])
|
||||
ignored = {dc for dc in docker_ps_result if not dest_reg.match(dc)}
|
||||
if ignored:
|
||||
_logger.debug('docker (%s) not deleted because not dest format', " ".join(list(ignored)))
|
||||
self.env['runbot.repo']._docker_cleanup()
|
||||
|
||||
timeout = self._get_cron_period()
|
||||
icp = self.env['ir.config_parameter']
|
||||
update_frequency = int(icp.get_param('runbot.runbot_update_frequency', default=10))
|
||||
while time.time() - start_time < timeout:
|
||||
repos = self.search([('mode', '!=', 'disabled')])
|
||||
try:
|
||||
repos._scheduler(host)
|
||||
host.last_success = fields.Datetime.now()
|
||||
self.env.cr.commit()
|
||||
self.env.reset()
|
||||
self = self.env()[self._name]
|
||||
self._reload_nginx()
|
||||
time.sleep(update_frequency)
|
||||
except TransactionRollbackError: # can lead to psycopg2.InternalError'>: "current transaction is aborted, commands ignored until end of transaction block
|
||||
_logger.exception('Trying to rollback')
|
||||
self.env.cr.rollback()
|
||||
self.env.reset()
|
||||
time.sleep(random.uniform(0, 3))
|
||||
except Exception as e:
|
||||
with registry(self._cr.dbname).cursor() as cr: # user another cursor since transaction will be rollbacked
|
||||
message = str(e)
|
||||
chost = host.with_env(self.env(cr=cr))
|
||||
if chost.last_exception == message:
|
||||
chost.exception_count += 1
|
||||
else:
|
||||
chost.with_env(self.env(cr=cr)).last_exception = str(e)
|
||||
chost.exception_count = 1
|
||||
raise
|
||||
time.sleep(self._scheduler_loop_turn(host, update_frequency))
|
||||
|
||||
if host.last_exception:
|
||||
host.last_exception = ""
|
||||
host.exception_count = 0
|
||||
host.last_end_loop = fields.Datetime.now()
|
||||
|
||||
def _scheduler_loop_turn(self, host, default_sleep=1):
|
||||
repos = self.search([('mode', '!=', 'disabled')])
|
||||
try:
|
||||
repos._scheduler(host)
|
||||
host.last_success = fields.Datetime.now()
|
||||
self._commit()
|
||||
except Exception as e:
|
||||
self.env.cr.rollback()
|
||||
self.env.reset()
|
||||
_logger.exception(e)
|
||||
message = str(e)
|
||||
if host.last_exception == message:
|
||||
host.exception_count += 1
|
||||
else:
|
||||
host.last_exception = str(e)
|
||||
host.exception_count = 1
|
||||
self._commit()
|
||||
return random.uniform(0, 3)
|
||||
else:
|
||||
if host.last_exception:
|
||||
host.last_exception = ""
|
||||
host.exception_count = 0
|
||||
return default_sleep
|
||||
|
||||
def _source_cleanup(self):
|
||||
try:
|
||||
if self.pool._init:
|
||||
@ -721,23 +743,34 @@ class runbot_repo(models.Model):
|
||||
assert 'static' in source_dir
|
||||
shutil.rmtree(source_dir)
|
||||
_logger.info('%s/%s source folder where deleted (%s kept)' % (len(to_delete), len(to_delete+to_keep), len(to_keep)))
|
||||
|
||||
except:
|
||||
_logger.error('An exception occured while cleaning sources')
|
||||
pass
|
||||
|
||||
|
||||
class RefTime(models.Model):
|
||||
_name = "runbot.repo.reftime"
|
||||
_log_access = False
|
||||
|
||||
time = fields.Float('Time', index=True, required=True)
|
||||
repo_id = fields.Many2one('runbot.repo', 'Repository', required=True, ondelete='cascade')
|
||||
def _docker_cleanup(self):
|
||||
docker_ps_result = docker_ps()
|
||||
containers = {int(dc.split('-', 1)[0]):dc for dc in docker_ps_result if dest_reg.match(dc)}
|
||||
if containers:
|
||||
candidates = self.env['runbot.build'].search([('id', 'in', list(containers.keys())), ('local_state', '=', 'done')])
|
||||
for c in candidates:
|
||||
_logger.info('container %s found running with build state done', containers[c.id])
|
||||
docker_stop(containers[c.id])
|
||||
ignored = {dc for dc in docker_ps_result if not dest_reg.match(dc)}
|
||||
if ignored:
|
||||
_logger.debug('docker (%s) not deleted because not dest format', " ".join(list(ignored)))
|
||||
|
||||
|
||||
class HookTime(models.Model):
|
||||
_name = "runbot.repo.hooktime"
|
||||
_log_access = False
|
||||
class RefTime(models.Model):
|
||||
_name = "runbot.repo.reftime"
|
||||
_log_access = False
|
||||
|
||||
time = fields.Float('Time')
|
||||
repo_id = fields.Many2one('runbot.repo', 'Repository', required=True, ondelete='cascade')
|
||||
time = fields.Float('Time', index=True, required=True)
|
||||
repo_id = fields.Many2one('runbot.repo', 'Repository', required=True, ondelete='cascade')
|
||||
|
||||
|
||||
class HookTime(models.Model):
|
||||
_name = "runbot.repo.hooktime"
|
||||
_log_access = False
|
||||
|
||||
time = fields.Float('Time')
|
||||
repo_id = fields.Many2one('runbot.repo', 'Repository', required=True, ondelete='cascade')
|
@ -36,6 +36,7 @@ class RunbotCase(TransactionCase):
|
||||
self.start_patcher('isdir', 'odoo.addons.runbot.common.os.path.isdir', True)
|
||||
self.start_patcher('isfile', 'odoo.addons.runbot.common.os.path.isfile', True)
|
||||
self.start_patcher('docker_run', 'odoo.addons.runbot.models.build_config.docker_run')
|
||||
self.start_patcher('docker_build', 'odoo.addons.runbot.models.build.docker_build')
|
||||
self.start_patcher('docker_ps', 'odoo.addons.runbot.models.repo.docker_ps', [])
|
||||
self.start_patcher('docker_stop', 'odoo.addons.runbot.models.repo.docker_stop')
|
||||
|
||||
|
@ -56,7 +56,6 @@ class Test_Cron(RunbotCase):
|
||||
ret = self.Repo._cron_fetch_and_build(hostname)
|
||||
self.assertEqual(None, ret)
|
||||
mock_scheduler.assert_called()
|
||||
self.assertTrue(mock_reload.called)
|
||||
host = self.env['runbot.host'].search([('name', '=', hostname)])
|
||||
self.assertEqual(host.name, hostname, 'A new host should have been created')
|
||||
self.assertGreater(host.psql_conn_count, 0, 'A least one connection should exist on the current psql instance')
|
||||
|
@ -257,10 +257,10 @@ class Test_Repo_Scheduler(RunbotCase):
|
||||
'name': 'refs/head/foo'
|
||||
})
|
||||
|
||||
@patch('odoo.addons.runbot.models.build.runbot_build._reap')
|
||||
@patch('odoo.addons.runbot.models.build.runbot_build._kill')
|
||||
@patch('odoo.addons.runbot.models.build.runbot_build._schedule')
|
||||
def test_repo_scheduler(self, mock_schedule, mock_kill, mock_reap):
|
||||
@patch('odoo.addons.runbot.models.build.runbot_build._init_pendings')
|
||||
def test_repo_scheduler(self, mock_init_pendings, mock_schedule, mock_kill):
|
||||
self.env['ir.config_parameter'].set_param('runbot.runbot_workers', 6)
|
||||
builds = []
|
||||
# create 6 builds that are testing on the host to verify that
|
||||
@ -293,8 +293,8 @@ class Test_Repo_Scheduler(RunbotCase):
|
||||
'local_state': 'pending',
|
||||
})
|
||||
builds.append(build)
|
||||
|
||||
self.foo_repo._scheduler()
|
||||
host = self.env['runbot.host']._get_current()
|
||||
self.foo_repo._scheduler(host)
|
||||
|
||||
build.invalidate_cache()
|
||||
scheduled_build.invalidate_cache()
|
||||
@ -304,7 +304,7 @@ class Test_Repo_Scheduler(RunbotCase):
|
||||
# give some room for the pending build
|
||||
self.Build.search([('name', '=', 'a')]).write({'local_state': 'done'})
|
||||
|
||||
self.foo_repo._scheduler()
|
||||
self.foo_repo._scheduler(host)
|
||||
build.invalidate_cache()
|
||||
scheduled_build.invalidate_cache()
|
||||
self.assertEqual(build.host, 'host.runbot.com')
|
||||
|
@ -22,7 +22,6 @@
|
||||
<field name="local_result"/>
|
||||
<field name="global_result"/>
|
||||
<field name="triggered_result" groups="base.group_no_one"/>
|
||||
<field name="pid"/>
|
||||
<field name="host"/>
|
||||
<field name="job_start" groups="base.group_no_one"/>
|
||||
<field name="job_end" groups="base.group_no_one"/>
|
||||
@ -58,7 +57,6 @@
|
||||
<field name="port"/>
|
||||
<field name="job"/>
|
||||
<field name="coverage_result"/>
|
||||
<field name="pid"/>
|
||||
<field name="host"/>
|
||||
<field name="build_time"/>
|
||||
<field name="build_age"/>
|
||||
|
@ -18,10 +18,8 @@ _logger = logging.getLogger(__name__)
|
||||
|
||||
class RunbotClient():
|
||||
|
||||
def __init__(self, env, args):
|
||||
def __init__(self, env):
|
||||
self.env = env
|
||||
self.args = args
|
||||
self.fqdn = socket.getfqdn()
|
||||
self.ask_interrupt = threading.Event()
|
||||
|
||||
def main_loop(self):
|
||||
@ -31,15 +29,17 @@ class RunbotClient():
|
||||
host = self.env['runbot.host']._get_current()
|
||||
count = 0
|
||||
while True:
|
||||
host.last_start_loop = fields.Datetime.now()
|
||||
count = count % 60
|
||||
if count == 0:
|
||||
logging.info('Host %s running with %s slots on pid %s%s', host.name, host.get_nb_worker(), os.getpid(), ' (assigned only)' if host.assigned_only else '')
|
||||
self.env['runbot.repo']._source_cleanup()
|
||||
self.env['runbot.build']._local_cleanup()
|
||||
host.last_end_loop = host.last_start_loop = fields.Datetime.now()
|
||||
self.env['runbot.repo']._docker_cleanup()
|
||||
host.set_psql_conn_count()
|
||||
count += 1
|
||||
sleep_time = self.env['runbot.repo']._scheduler_loop_turn(host)
|
||||
host.last_end_loop = fields.Datetime.now()
|
||||
self.env.cr.commit()
|
||||
self.env.reset()
|
||||
self.sleep(sleep_time)
|
||||
@ -97,7 +97,7 @@ def run():
|
||||
with odoo.api.Environment.manage():
|
||||
with registry.cursor() as cr:
|
||||
env = odoo.api.Environment(cr, odoo.SUPERUSER_ID, {})
|
||||
runbot_client = RunbotClient(env, args)
|
||||
runbot_client = RunbotClient(env)
|
||||
# run main loop
|
||||
runbot_client.main_loop()
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user