mirror of
https://github.com/odoo/runbot.git
synced 2025-03-16 07:55:45 +07:00
[REF] runbot: split cron to speed up builds
The Runbot Cron is executed on each runbot instance. When the number of instances scales, the time needed for an instance to obtain the cron increases. With this commit, the original runbot_cron is removed. Instead, a cron have to be created to run the _cron_fetch_and_schedule method. This method will fetch the repo and create pending builds. This cron is intended to run only on one runbot instance. This method needs a host parameter to specify which runbot instance will be in charge of this task. On the other hand, a dedicated cron have to be manually created for each runbot instance that will have the build task. Those cron's only have to call the _cron_fetch_and_build method with the runbot hostname as a parameter. This method will then self assign pending builds if there are slots available. All available build slots are reserved in a single LOCKED SQL query. Both methods are intended to last a large amount of time, just a few minutes below the cron timeout to maximize the cron productivity. The timeout is randomized to avoid deadlocks if the runbot instances are started at the same time. So the --limit-time-real parameter have to be set to a minimum of 180 sec (600 or 1200 are probably better targets).
This commit is contained in:
parent
ffd27739a4
commit
fe018aeefa
@ -6,7 +6,7 @@
|
||||
'author': "Odoo SA",
|
||||
'website': "http://runbot.odoo.com",
|
||||
'category': 'Website',
|
||||
'version': '2.6',
|
||||
'version': '3.0',
|
||||
'depends': ['website', 'base'],
|
||||
'data': [
|
||||
'security/runbot_security.xml',
|
||||
@ -23,6 +23,5 @@
|
||||
'templates/nginx.xml',
|
||||
'templates/badge.xml',
|
||||
'templates/branch.xml',
|
||||
'data/runbot_cron.xml'
|
||||
],
|
||||
}
|
||||
|
@ -1,13 +0,0 @@
|
||||
<?xml version="1.0" encoding='UTF-8'?>
|
||||
<odoo>
|
||||
<record model="ir.cron" id="runbot_repo_cron">
|
||||
<field name="name">Runbot Cron</field>
|
||||
<field name="model_id" ref="model_runbot_repo"/>
|
||||
<field name="state">code</field>
|
||||
<field name="code">model._cron()</field>
|
||||
<field name="interval_number">1</field>
|
||||
<field name="interval_type">minutes</field>
|
||||
<field name="numbercall">-1</field>
|
||||
<field name="doall" eval="False"/>
|
||||
</record>
|
||||
</odoo>
|
@ -103,7 +103,7 @@ class runbot_build(models.Model):
|
||||
if branch.job_type == 'none' or vals.get('job_type', '') == 'none':
|
||||
return self.env['runbot.build']
|
||||
build_id = super(runbot_build, self).create(vals)
|
||||
extra_info = {'sequence': build_id.id}
|
||||
extra_info = {'sequence': build_id.id if not build_id.sequence else build_id.sequence}
|
||||
job_type = vals['job_type'] if 'job_type' in vals else build_id.branch_id.job_type
|
||||
extra_info.update({'job_type': job_type})
|
||||
context = self.env.context
|
||||
@ -462,8 +462,6 @@ class runbot_build(models.Model):
|
||||
build._log(build.job, "failed running job method, see runbot log")
|
||||
build._kill(result='ko')
|
||||
continue
|
||||
# needed to prevent losing pids if multiple jobs are started and one them raise an exception
|
||||
self.env.cr.commit()
|
||||
|
||||
if pid == -2:
|
||||
# no process to wait, directly call next job
|
||||
|
@ -4,6 +4,7 @@ import dateutil
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import requests
|
||||
import signal
|
||||
@ -115,36 +116,14 @@ class runbot_repo(models.Model):
|
||||
else:
|
||||
raise
|
||||
|
||||
def _update_git(self):
|
||||
""" Update the git repo on FS """
|
||||
def _find_new_commits(self, repo):
|
||||
""" Find new commits in bare repo """
|
||||
self.ensure_one()
|
||||
repo = self
|
||||
_logger.debug('repo %s updating branches', repo.name)
|
||||
|
||||
Branch = self.env['runbot.branch']
|
||||
Build = self.env['runbot.build']
|
||||
icp = self.env['ir.config_parameter']
|
||||
max_age = int(icp.get_param('runbot.runbot_max_age', default=30))
|
||||
|
||||
Build = self.env['runbot.build']
|
||||
Branch = self.env['runbot.branch']
|
||||
|
||||
if not os.path.isdir(os.path.join(repo.path)):
|
||||
os.makedirs(repo.path)
|
||||
if not os.path.isdir(os.path.join(repo.path, 'refs')):
|
||||
_logger.info("Cloning repository '%s' in '%s'" % (repo.name, repo.path))
|
||||
subprocess.call(['git', 'clone', '--bare', repo.name, repo.path])
|
||||
|
||||
# check for mode == hook
|
||||
fname_fetch_head = os.path.join(repo.path, 'FETCH_HEAD')
|
||||
if os.path.isfile(fname_fetch_head):
|
||||
fetch_time = os.path.getmtime(fname_fetch_head)
|
||||
if repo.mode == 'hook' and repo.hook_time and dt2time(repo.hook_time) < fetch_time:
|
||||
t0 = time.time()
|
||||
_logger.debug('repo %s skip hook fetch fetch_time: %ss ago hook_time: %ss ago',
|
||||
repo.name, int(t0 - fetch_time), int(t0 - dt2time(repo.hook_time)))
|
||||
return
|
||||
|
||||
repo._git(['fetch', '-p', 'origin', '+refs/heads/*:refs/heads/*', '+refs/pull/*/head:refs/pull/*'])
|
||||
|
||||
fields = ['refname', 'objectname', 'committerdate:iso8601', 'authorname', 'authoremail', 'subject', 'committername', 'committeremail']
|
||||
fmt = "%00".join(["%(" + field + ")" for field in fields])
|
||||
git_refs = repo._git(['for-each-ref', '--format', fmt, '--sort=-committerdate', 'refs/heads', 'refs/pull'])
|
||||
@ -162,14 +141,6 @@ class runbot_repo(models.Model):
|
||||
|
||||
for name, sha, date, author, author_email, subject, committer, committer_email in refs:
|
||||
# create or get branch
|
||||
# branch = repo.branch_ids.search([('name', '=', name), ('repo_id', '=', repo.id)])
|
||||
# if not branch:
|
||||
# _logger.debug('repo %s found new branch %s', repo.name, name)
|
||||
# branch = self.branch_ids.create({
|
||||
# 'repo_id': repo.id,
|
||||
# 'name': name})
|
||||
# keep for next version with a branch_ids field
|
||||
|
||||
if ref_branches.get(name):
|
||||
branch_id = ref_branches[name]
|
||||
else:
|
||||
@ -232,6 +203,44 @@ class runbot_repo(models.Model):
|
||||
builds_to_be_skipped = Build.search(skippable_domain, order='sequence desc', offset=running_max)
|
||||
builds_to_be_skipped._skip()
|
||||
|
||||
def _create_pending_builds(self, repos):
|
||||
""" Find new commits in physical repos"""
|
||||
for repo in repos:
|
||||
try:
|
||||
repo._find_new_commits(repo)
|
||||
except Exception:
|
||||
_logger.exception('Fail to find new commits in repo %s', repo.name)
|
||||
|
||||
def _clone(self):
|
||||
""" Clone the remote repo if needed """
|
||||
self.ensure_one()
|
||||
repo = self
|
||||
if not os.path.isdir(os.path.join(repo.path, 'refs')):
|
||||
_logger.info("Cloning repository '%s' in '%s'" % (repo.name, repo.path))
|
||||
subprocess.call(['git', 'clone', '--bare', repo.name, repo.path])
|
||||
|
||||
def _update_git(self):
|
||||
""" Update the git repo on FS """
|
||||
self.ensure_one()
|
||||
repo = self
|
||||
_logger.debug('repo %s updating branches', repo.name)
|
||||
|
||||
if not os.path.isdir(os.path.join(repo.path)):
|
||||
os.makedirs(repo.path)
|
||||
self._clone()
|
||||
|
||||
# check for mode == hook
|
||||
fname_fetch_head = os.path.join(repo.path, 'FETCH_HEAD')
|
||||
if os.path.isfile(fname_fetch_head):
|
||||
fetch_time = os.path.getmtime(fname_fetch_head)
|
||||
if repo.mode == 'hook' and repo.hook_time and dt2time(repo.hook_time) < fetch_time:
|
||||
t0 = time.time()
|
||||
_logger.debug('repo %s skip hook fetch fetch_time: %ss ago hook_time: %ss ago',
|
||||
repo.name, int(t0 - fetch_time), int(t0 - dt2time(repo.hook_time)))
|
||||
return
|
||||
|
||||
repo._git(['fetch', '-p', 'origin', '+refs/heads/*:refs/heads/*', '+refs/pull/*/head:refs/pull/*'])
|
||||
|
||||
def _update(self, repos):
|
||||
""" Update the physical git reposotories on FS"""
|
||||
for repo in repos:
|
||||
@ -242,6 +251,8 @@ class runbot_repo(models.Model):
|
||||
|
||||
def _scheduler(self, ids=None):
|
||||
"""Schedule builds for the repository"""
|
||||
if not ids:
|
||||
return
|
||||
icp = self.env['ir.config_parameter']
|
||||
workers = int(icp.get_param('runbot.runbot_workers', default=6))
|
||||
running_max = int(icp.get_param('runbot.runbot_running_max', default=75))
|
||||
@ -256,22 +267,40 @@ class runbot_repo(models.Model):
|
||||
build_ids._schedule()
|
||||
|
||||
# launch new tests
|
||||
testing = Build.search_count(domain_host + [('state', '=', 'testing')])
|
||||
pending = Build.search_count(domain + [('state', '=', 'pending')])
|
||||
nb_testing = Build.search_count(domain_host + [('state', '=', 'testing')])
|
||||
available_slots = workers - nb_testing
|
||||
if available_slots > 0:
|
||||
# commit transaction to reduce the critical section duration
|
||||
self.env.cr.commit()
|
||||
# self-assign to be sure that another runbot instance cannot self assign the same builds
|
||||
query = """UPDATE
|
||||
runbot_build
|
||||
SET
|
||||
host = %(host)s
|
||||
WHERE
|
||||
runbot_build.id IN (
|
||||
SELECT
|
||||
runbot_build.id
|
||||
FROM
|
||||
runbot_build
|
||||
LEFT JOIN runbot_branch ON runbot_branch.id = runbot_build.branch_id
|
||||
WHERE
|
||||
runbot_build.repo_id IN %(repo_ids)s
|
||||
AND runbot_build.state = 'pending'
|
||||
AND runbot_branch.job_type != 'none'
|
||||
AND runbot_build.host IS NULL
|
||||
ORDER BY
|
||||
runbot_branch.sticky DESC,
|
||||
runbot_branch.priority DESC,
|
||||
runbot_build.sequence ASC
|
||||
FOR UPDATE OF runbot_build SKIP LOCKED
|
||||
LIMIT %(available_slots)s)"""
|
||||
|
||||
while testing < workers and pending > 0:
|
||||
|
||||
# find sticky / priority pending build if any, otherwise, last pending (by id, not by sequence) will do the job
|
||||
|
||||
pending_ids = Build.search(domain + [('state', '=', 'pending'), '|', ('branch_id.sticky', '=', True), ('branch_id.priority', '=', True)], limit=1)
|
||||
if not pending_ids:
|
||||
pending_ids = Build.search(domain + [('state', '=', 'pending')], order="sequence", limit=1)
|
||||
|
||||
pending_ids._schedule()
|
||||
|
||||
# compute the number of testing and pending jobs again
|
||||
testing = Build.search_count(domain_host + [('state', '=', 'testing')])
|
||||
pending = Build.search_count(domain + [('state', '=', 'pending')])
|
||||
self.env.cr.execute(query, {'repo_ids': tuple(ids), 'host': fqdn(), 'available_slots': available_slots})
|
||||
pending_build = Build.search(domain + domain_host + [('state', '=', 'pending')])
|
||||
if pending_build:
|
||||
pending_build._schedule()
|
||||
self.env.cr.commit()
|
||||
|
||||
# terminate and reap doomed build
|
||||
build_ids = Build.search(domain_host + [('state', '=', 'running')]).ids
|
||||
@ -321,8 +350,46 @@ class runbot_repo(models.Model):
|
||||
else:
|
||||
_logger.debug('failed to start nginx - failed to kill orphan worker - oh well')
|
||||
|
||||
def _cron(self):
|
||||
repos = self.search([('mode', '!=', 'disabled')])
|
||||
self._update(repos)
|
||||
self._scheduler(repos.ids)
|
||||
self._reload_nginx()
|
||||
def _get_cron_period(self, min_margin=120):
|
||||
""" Compute a randomized cron period with a 2 min margin below
|
||||
real cron timeout from config.
|
||||
"""
|
||||
cron_limit = config.get('limit_time_real_cron')
|
||||
req_limit = config.get('limit_time_real')
|
||||
cron_timeout = cron_limit if cron_limit > -1 else req_limit
|
||||
return cron_timeout - (min_margin + random.randint(1, 60))
|
||||
|
||||
def _cron_fetch_and_schedule(self, hostname):
|
||||
"""This method have to be called from a dedicated cron on a runbot
|
||||
in charge of orchestration.
|
||||
"""
|
||||
if hostname != fqdn():
|
||||
return 'Not for me'
|
||||
start_time = time.time()
|
||||
timeout = self._get_cron_period()
|
||||
icp = self.env['ir.config_parameter']
|
||||
update_frequency = int(icp.get_param('runbot.runbot_update_frequency', default=10))
|
||||
while time.time() - start_time < timeout:
|
||||
repos = self.search([('mode', '!=', 'disabled')])
|
||||
self._update(repos)
|
||||
self._create_pending_builds(repos)
|
||||
self.env.cr.commit()
|
||||
time.sleep(update_frequency)
|
||||
|
||||
def _cron_fetch_and_build(self, hostname):
|
||||
""" This method have to be called from a dedicated cron
|
||||
created on each runbot instance.
|
||||
"""
|
||||
if hostname != fqdn():
|
||||
return 'Not for me'
|
||||
start_time = time.time()
|
||||
timeout = self._get_cron_period()
|
||||
icp = self.env['ir.config_parameter']
|
||||
update_frequency = int(icp.get_param('runbot.runbot_update_frequency', default=10))
|
||||
while time.time() - start_time < timeout:
|
||||
repos = self.search([('mode', '!=', 'disabled')])
|
||||
self._update(repos)
|
||||
self._scheduler(repos.ids)
|
||||
self.env.cr.commit()
|
||||
self._reload_nginx()
|
||||
time.sleep(update_frequency)
|
||||
|
@ -14,6 +14,7 @@ class ResConfigSettings(models.TransientModel):
|
||||
runbot_domain = fields.Char('Runbot domain')
|
||||
runbot_max_age = fields.Integer('Max branch age (in days)')
|
||||
runbot_logdb_uri = fields.Char('Runbot URI for build logs')
|
||||
runbot_update_frequency = fields.Integer('Update frequency (in seconds)')
|
||||
|
||||
@api.model
|
||||
def get_values(self):
|
||||
@ -26,6 +27,7 @@ class ResConfigSettings(models.TransientModel):
|
||||
runbot_domain=get_param('runbot.runbot_domain', default=common.fqdn()),
|
||||
runbot_max_age=int(get_param('runbot.runbot_max_age', default=30)),
|
||||
runbot_logdb_uri=get_param('runbot.runbot_logdb_uri', default=False),
|
||||
runbot_update_frequency=get_param('runbot.runbot_update_frequency', default=10),
|
||||
)
|
||||
return res
|
||||
|
||||
@ -40,3 +42,4 @@ class ResConfigSettings(models.TransientModel):
|
||||
set_param("runbot.runbot_domain", self.runbot_domain)
|
||||
set_param("runbot.runbot_max_age", self.runbot_max_age)
|
||||
set_param("runbot.runbot_logdb_uri", self.runbot_logdb_uri)
|
||||
set_param('runbot.runbot_update_frequency', self.runbot_update_frequency)
|
||||
|
@ -5,3 +5,4 @@ from . import test_jobs
|
||||
from . import test_frontend
|
||||
from . import test_job_types
|
||||
from . import test_schedule
|
||||
from . import test_cron
|
||||
|
62
runbot/tests/test_cron.py
Normal file
62
runbot/tests/test_cron.py
Normal file
@ -0,0 +1,62 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from unittest.mock import patch
|
||||
from odoo.tests import common
|
||||
|
||||
|
||||
class Test_Cron(common.TransactionCase):
|
||||
|
||||
def setUp(self):
|
||||
super(Test_Cron, self).setUp()
|
||||
self.Repo = self.env['runbot.repo']
|
||||
|
||||
@patch('odoo.addons.runbot.models.repo.config.get')
|
||||
def test_cron_period(self, mock_config_get):
|
||||
""" Test that the random cron period stays below margin
|
||||
Assuming a configuration of 10 minutes cron limit
|
||||
"""
|
||||
mock_config_get.return_value = 600
|
||||
period = self.Repo._get_cron_period(min_margin=200)
|
||||
for i in range(200):
|
||||
self.assertLess(period, 400)
|
||||
|
||||
@patch('odoo.addons.runbot.models.repo.fqdn')
|
||||
def test_crons_returns(self, mock_fqdn):
|
||||
""" test that cron_fetch_and_schedule and _cron_fetch_and_build
|
||||
return directly when called on wrong host
|
||||
"""
|
||||
mock_fqdn.return_value = 'runboty.foo.com'
|
||||
ret = self.Repo._cron_fetch_and_schedule('runbotx.foo.com')
|
||||
self.assertEqual(ret, 'Not for me')
|
||||
|
||||
ret = self.Repo._cron_fetch_and_build('runbotx.foo.com')
|
||||
self.assertEqual(ret, 'Not for me')
|
||||
|
||||
@patch('odoo.addons.runbot.models.repo.runbot_repo._get_cron_period')
|
||||
@patch('odoo.addons.runbot.models.repo.runbot_repo._create_pending_builds')
|
||||
@patch('odoo.addons.runbot.models.repo.runbot_repo._update')
|
||||
@patch('odoo.addons.runbot.models.repo.fqdn')
|
||||
def test_cron_schedule(self, mock_fqdn, mock_update, mock_create, mock_cron_period):
|
||||
""" test that cron_fetch_and_schedule do its work """
|
||||
mock_fqdn.return_value = 'runbotx.foo.com'
|
||||
mock_cron_period.return_value = 2
|
||||
self.env['ir.config_parameter'].sudo().set_param('runbot.runbot_update_frequency', 1)
|
||||
ret = self.Repo._cron_fetch_and_schedule('runbotx.foo.com')
|
||||
self.assertEqual(None, ret)
|
||||
mock_update.assert_called_with(self.Repo)
|
||||
mock_create.assert_called_with(self.Repo)
|
||||
|
||||
@patch('odoo.addons.runbot.models.repo.runbot_repo._get_cron_period')
|
||||
@patch('odoo.addons.runbot.models.repo.runbot_repo._reload_nginx')
|
||||
@patch('odoo.addons.runbot.models.repo.runbot_repo._scheduler')
|
||||
@patch('odoo.addons.runbot.models.repo.runbot_repo._update')
|
||||
@patch('odoo.addons.runbot.models.repo.fqdn')
|
||||
def test_cron_build(self, mock_fqdn, mock_update, mock_scheduler, mock_reload, mock_cron_period):
|
||||
""" test that cron_fetch_and_build do its work """
|
||||
mock_fqdn.return_value = 'runbotx.foo.com'
|
||||
mock_cron_period.return_value = 2
|
||||
self.env['ir.config_parameter'].sudo().set_param('runbot.runbot_update_frequency', 1)
|
||||
ret = self.Repo._cron_fetch_and_build('runbotx.foo.com')
|
||||
self.assertEqual(None, ret)
|
||||
mock_update.assert_called_with(self.Repo)
|
||||
mock_scheduler.assert_called_with([])
|
||||
self.assertTrue(mock_reload.called)
|
@ -46,6 +46,10 @@
|
||||
<label for="runbot_logdb_uri" class="col-xs-3 o_light_label" style="width: 60%;"/>
|
||||
<field name="runbot_logdb_uri" style="width: 30%;"/>
|
||||
</div>
|
||||
<div class="mt-16 row">
|
||||
<label for="runbot_update_frequency" class="col-xs-3 o_light_label" style="width: 60%;"/>
|
||||
<field name="runbot_update_frequency" style="width: 30%;"/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
Loading…
Reference in New Issue
Block a user