[IMP] runbot: split branch creation and pending builds creation

When searching for new builds by parsing git refs, the new branches are
created as well as the pending builds in the same _find_new_commits
method.

With this commit, this behavior is splitted into two methods, that way,
it's now possible to create missing branches without creating new
builds. The closest_branch detection is enhanced because all the new
branches are created before the builds (separated loops).

The find_new_commits method uses an optimized way to search for
existsing builds. Before this commit, a build search was performed for
each git reference, potentially a huge number.

With this commit, a raw sql query is performed to create a set of tuples
(branch_id, sha) which is enough to decide if a build already exists.

A test was added to verify that new refs leads to pending builds.

Also, a performance test was added but is skipped by default because it
needs an existing repo with 20000 branches in the database so it will
not work with an empty database. This test showed a gain of performance
from 8sec to 2sec when creating builds from new commits.

co-authored by @Xavier-Do
This commit is contained in:
Christophe Monniez 2019-04-30 15:31:01 +02:00
parent 4206d75256
commit 5dd889de3c
4 changed files with 197 additions and 35 deletions

View File

@ -471,7 +471,7 @@ class runbot_build(models.Model):
# update repo if needed
if not build.repo_id._hash_exists(build.name):
build.repo_id._update(build.repo_id)
build.repo_id._update()
# checkout branch
build.branch_id.repo_id._git_export(build.name, build._path())
@ -512,7 +512,7 @@ class runbot_build(models.Model):
'%s match branch %s of %s' % (build_dependency.match_type, closest_name, repo.name)
)
if not repo._hash_exists(latest_commit):
repo._update(repo, force=True)
repo._update(force=True)
if not repo._hash_exists(latest_commit):
repo._git(['fetch', 'origin', latest_commit])
if not repo._hash_exists(latest_commit):

View File

@ -133,21 +133,26 @@ class runbot_repo(models.Model):
else:
raise
def _find_new_commits(self):
""" Find new commits in bare repo """
def _get_refs(self):
"""Find new refs
:return: list of tuples with following refs informations:
name, sha, date, author, author_email, subject, committer, committer_email
"""
self.ensure_one()
Branch = self.env['runbot.branch']
Build = self.env['runbot.build']
icp = self.env['ir.config_parameter']
max_age = int(icp.get_param('runbot.runbot_max_age', default=30))
fields = ['refname', 'objectname', 'committerdate:iso8601', 'authorname', 'authoremail', 'subject', 'committername', 'committeremail']
fmt = "%00".join(["%(" + field + ")" for field in fields])
git_refs = self._git(['for-each-ref', '--format', fmt, '--sort=-committerdate', 'refs/heads', 'refs/pull'])
git_refs = git_refs.strip()
return [tuple(field for field in line.split('\x00')) for line in git_refs.split('\n')]
refs = [[field for field in line.split('\x00')] for line in git_refs.split('\n')]
def _find_or_create_branches(self, refs):
"""Parse refs and create branches that does not exists yet
:param refs: list of tuples returned by _get_refs()
:return: dict {branch.name: branch.id}
The returned structure contains all the branches from refs newly created
or older ones.
"""
Branch = self.env['runbot.branch']
self.env.cr.execute("""
WITH t (branch) AS (SELECT unnest(%s))
SELECT t.branch, b.id
@ -157,22 +162,42 @@ class runbot_repo(models.Model):
ref_branches = {r[0]: r[1] for r in self.env.cr.fetchall()}
for name, sha, date, author, author_email, subject, committer, committer_email in refs:
# create or get branch
if ref_branches.get(name):
branch_id = ref_branches[name]
else:
if not ref_branches.get(name):
_logger.debug('repo %s found new branch %s', self.name, name)
branch_id = Branch.create({'repo_id': self.id, 'name': name}).id
branch = Branch.browse([branch_id])[0]
new_branch = Branch.create({'repo_id': self.id, 'name': name})
ref_branches[name] = new_branch.id
return ref_branches
def _find_new_commits(self, refs, ref_branches):
"""Find new commits in bare repo
:param refs: list of tuples returned by _get_refs()
:param ref_branches: dict structure {branch.name: branch.id}
described in _find_or_create_branches
"""
self.ensure_one()
Branch = self.env['runbot.branch']
Build = self.env['runbot.build']
icp = self.env['ir.config_parameter']
max_age = int(icp.get_param('runbot.runbot_max_age', default=30))
self.env.cr.execute("""
WITH t (build, branch_id) AS (SELECT unnest(%s), unnest(%s))
SELECT b.name, b.branch_id
FROM t LEFT JOIN runbot_build b ON (b.name = t.build) AND (b.branch_id = t.branch_id)
""", ([r[1] for r in refs], [ref_branches[r[0]] for r in refs]))
# generate a set of tuples (branch_id, sha)
builds_candidates = {(r[1], r[0]) for r in self.env.cr.fetchall()}
for name, sha, date, author, author_email, subject, committer, committer_email in refs:
branch = Branch.browse(ref_branches[name])
# skip the build for old branches (Could be checked before creating the branch in DB ?)
if dateutil.parser.parse(date[:19]) + datetime.timedelta(days=max_age) < datetime.datetime.now():
continue
# create build (and mark previous builds as skipped) if not found
build_ids = Build.search([('branch_id', '=', branch.id), ('name', '=', sha)])
if not build_ids:
_logger.debug('repo %s branch %s new build found revno %s', branch.repo_id.name, branch.name, sha)
if not (branch.id, sha) in builds_candidates:
_logger.debug('repo %s branch %s new build found revno %s', self.name, branch.name, sha)
build_info = {
'branch_id': branch.id,
'name': sha,
@ -220,13 +245,24 @@ class runbot_repo(models.Model):
builds_to_be_skipped = Build.search(skippable_domain, order='sequence desc', offset=running_max)
builds_to_be_skipped._skip()
def _create_pending_builds(self, repos):
@api.multi
def _create_pending_builds(self):
""" Find new commits in physical repos"""
for repo in repos:
refs = {}
ref_branches = {}
for repo in self:
try:
repo._find_new_commits()
refs[repo] = repo._get_refs()
except Exception:
_logger.exception('Fail to find new commits in repo %s', repo.name)
_logger.exception('Fail to get refs for repo %s', repo.name)
if repo in refs:
ref_branches[repo] = repo._find_or_create_branches(refs[repo])
# keep _find_or_create_branches separated from build creation to ease
# closest branch detection
for repo in self:
if repo in refs:
repo._find_new_commits(refs[repo], ref_branches[repo])
def _clone(self):
""" Clone the remote repo if needed """
@ -258,16 +294,19 @@ class runbot_repo(models.Model):
repo._git(['fetch', '-p', 'origin', '+refs/heads/*:refs/heads/*', '+refs/pull/*/head:refs/pull/*'])
def _update(self, repos, force=True):
@api.multi
def _update(self, force=True):
""" Update the physical git reposotories on FS"""
for repo in repos:
for repo in self:
try:
repo._update_git(force)
except Exception:
_logger.exception('Fail to update repo %s', repo.name)
def _scheduler(self, ids=None):
@api.multi
def _scheduler(self):
"""Schedule builds for the repository"""
ids = self.ids
if not ids:
return
icp = self.env['ir.config_parameter']
@ -389,8 +428,8 @@ class runbot_repo(models.Model):
update_frequency = int(icp.get_param('runbot.runbot_update_frequency', default=10))
while time.time() - start_time < timeout:
repos = self.search([('mode', '!=', 'disabled')])
self._update(repos, force=False)
self._create_pending_builds(repos)
repos._update(force=False)
repos._create_pending_builds()
self.env.cr.commit()
self.invalidate_cache()
@ -408,7 +447,7 @@ class runbot_repo(models.Model):
update_frequency = int(icp.get_param('runbot.runbot_update_frequency', default=10))
while time.time() - start_time < timeout:
repos = self.search([('mode', '!=', 'disabled')])
self._scheduler(repos.ids)
repos._scheduler()
self.env.cr.commit()
self.env.reset()
self = self.env()[self._name]

View File

@ -42,8 +42,8 @@ class Test_Cron(common.TransactionCase):
self.env['ir.config_parameter'].sudo().set_param('runbot.runbot_update_frequency', 1)
ret = self.Repo._cron_fetch_and_schedule('runbotx.foo.com')
self.assertEqual(None, ret)
mock_update.assert_called_with(self.Repo, force=False)
mock_create.assert_called_with(self.Repo)
mock_update.assert_called_with(force=False)
mock_create.assert_called_with()
@patch('odoo.addons.runbot.models.repo.runbot_repo._get_cron_period')
@patch('odoo.addons.runbot.models.repo.runbot_repo._reload_nginx')
@ -56,5 +56,5 @@ class Test_Cron(common.TransactionCase):
self.env['ir.config_parameter'].sudo().set_param('runbot.runbot_update_frequency', 1)
ret = self.Repo._cron_fetch_and_build('runbotx.foo.com')
self.assertEqual(None, ret)
mock_scheduler.assert_called_with([])
mock_scheduler.assert_called()
self.assertTrue(mock_reload.called)

View File

@ -1,13 +1,27 @@
# -*- coding: utf-8 -*-
from unittest import skip
from unittest.mock import patch
from odoo.tests import common
import logging
import odoo
import time
_logger = logging.getLogger(__name__)
class Test_Repo(common.TransactionCase):
def setUp(self):
super(Test_Repo, self).setUp()
self.Repo = self.env['runbot.repo']
self.commit_list = []
def mock_git_helper(self):
"""Helper that returns a mock for repo._git()"""
def mock_git(repo, cmd):
if cmd[0] == 'for-each-ref' and self.commit_list:
return '\n'.join(['\0'.join(commit_fields) for commit_fields in self.commit_list])
return mock_git
@patch('odoo.addons.runbot.models.repo.runbot_repo._root')
def test_base_fields(self, mock_root):
@ -24,6 +38,115 @@ class Test_Repo(common.TransactionCase):
local_repo = self.Repo.create({'name': '/path/somewhere/rep.git'})
self.assertEqual(local_repo.short_name, 'somewhere/rep')
@patch('odoo.addons.runbot.models.repo.runbot_repo._root')
def test_repo_create_pending_builds(self, mock_root):
""" Test that when finding new refs in a repo, the missing branches
are created and new builds are created in pending state
"""
mock_root.return_value = '/tmp/static'
repo = self.Repo.create({'name': 'bla@example.com:foo/bar'})
# create another repo and branch to ensure there is no mismatch
other_repo = self.Repo.create({'name': 'bla@example.com:foo/foo'})
self.env['runbot.branch'].create({
'repo_id': other_repo.id,
'name': 'refs/heads/bidon'
})
self.commit_list = [('refs/heads/bidon',
'd0d0caca',
'2019-04-29 13:03:17 +0200',
'Marc Bidule',
'<marc.bidule@somewhere.com>',
'A nice subject',
'Marc Bidule',
'<marc.bidule@somewhere.com>')]
with patch('odoo.addons.runbot.models.repo.runbot_repo._git', new=self.mock_git_helper()):
repo._create_pending_builds()
branch = self.env['runbot.branch'].search([('repo_id', '=', repo.id)])
self.assertEqual(branch.name, 'refs/heads/bidon', 'A new branch should have been created')
build = self.env['runbot.build'].search([('repo_id', '=', repo.id), ('branch_id', '=', branch.id)])
self.assertEqual(build.subject, 'A nice subject')
self.assertEqual(build.state, 'pending')
self.assertFalse(build.result)
# Simulate that a new commit is found in the other repo
self.commit_list = [('refs/heads/bidon',
'deadbeef',
'2019-04-29 13:05:30 +0200',
'Marc Bidule',
'<marc.bidule@somewhere.com>',
'A better subject',
'Marc Bidule',
'<marc.bidule@somewhere.com>')]
with patch('odoo.addons.runbot.models.repo.runbot_repo._git', new=self.mock_git_helper()):
other_repo._create_pending_builds()
branch_count = self.env['runbot.branch'].search_count([('repo_id', '=', repo.id)])
self.assertEqual(branch_count, 1, 'No new branch should have been created')
build = self.env['runbot.build'].search([('repo_id', '=', repo.id), ('branch_id', '=', branch.id)])
self.assertEqual(build.subject, 'A nice subject')
self.assertEqual(build.state, 'pending')
self.assertFalse(build.result)
# A new commit is found in the first repo, the previous pending build should be skipped
self.commit_list = [('refs/heads/bidon',
'b00b',
'2019-04-29 13:07:30 +0200',
'Marc Bidule',
'<marc.bidule@somewhere.com>',
'Another subject',
'Marc Bidule',
'<marc.bidule@somewhere.com>')]
with patch('odoo.addons.runbot.models.repo.runbot_repo._git', new=self.mock_git_helper()):
repo._create_pending_builds()
branch_count = self.env['runbot.branch'].search_count([('repo_id', '=', repo.id)])
self.assertEqual(branch_count, 1, 'No new branch should have been created')
build = self.env['runbot.build'].search([('repo_id', '=', repo.id), ('branch_id', '=', branch.id), ('name', '=', 'b00b')])
self.assertEqual(build.subject, 'Another subject')
self.assertEqual(build.state, 'pending')
self.assertFalse(build.result)
previous_build = self.env['runbot.build'].search([('repo_id', '=', repo.id), ('branch_id', '=', branch.id), ('name', '=', 'd0d0caca')])
self.assertEqual(previous_build.state, 'done', 'Previous pending build should be done')
self.assertEqual(previous_build.result, 'skipped', 'Previous pending build result should be skipped')
@skip('This test is for performances. It needs a lot of real branches in DB to mean something')
@patch('odoo.addons.runbot.models.repo.runbot_repo._root')
def test_repo_perf_find_new_commits(self, mock_root):
mock_root.return_value = '/tmp/static'
repo = self.env['runbot.repo'].search([('name', '=', 'blabla')])
self.commit_list = []
# create 20000 branches and refs
start_time = time.time()
self.env['runbot.build'].search([], limit=5).write({'name': 'jflsdjflj'})
for i in range(20005):
self.commit_list.append(['refs/heads/bidon-%05d' % i,
'd0d0caca %s' % i,
'2019-04-29 13:03:17 +0200',
'Marc Bidule',
'<marc.bidule@somewhere.com>',
'A nice subject',
'Marc Bidule',
'<marc.bidule@somewhere.com>'])
inserted_time = time.time()
_logger.info('Insert took: %ssec', (inserted_time - start_time))
with patch('odoo.addons.runbot.models.repo.runbot_repo._git', new=self.mock_git_helper()):
repo._create_pending_builds()
_logger.info('Create pending builds took: %ssec', (time.time() - inserted_time))
class Test_Repo_Scheduler(common.TransactionCase):
@ -83,7 +206,7 @@ class Test_Repo_Scheduler(common.TransactionCase):
'state': 'pending',
})
builds.append(build)
self.env['runbot.repo']._scheduler(ids=[self.foo_repo.id, ])
self.foo_repo._scheduler()
build.invalidate_cache()
scheduled_build.invalidate_cache()
@ -93,7 +216,7 @@ class Test_Repo_Scheduler(common.TransactionCase):
# give some room for the pending build
Build_model.search([('name', '=', 'a')]).write({'state': 'done'})
self.env['runbot.repo']._scheduler(ids=[self.foo_repo.id, ])
self.foo_repo._scheduler()
build.invalidate_cache()
scheduled_build.invalidate_cache()
self.assertEqual(build.host, 'test_host')