[IMP] runbot: kill build only when needed

On non sticky branches, when a new build is found while another is
already testing, the older build is killed. This happens during when the
main runbot instance is discovering new commits and create new builds.
As a result, concurrent updates may occur while the builders access the
concerned build.

With this commit, this garbage collecting procedure occurs during the
scheduler loop that runs on runbot builder hosts.

Also, the logic changed in a way that the kill is requested only if the
host needs room to handle pending builds.
This commit is contained in:
Christophe Monniez 2020-01-08 15:46:05 +01:00 committed by XavierDo
parent 149ae4a074
commit baea2e73be
3 changed files with 105 additions and 18 deletions

View File

@ -178,6 +178,13 @@ class runbot_build(models.Model):
max_days += int(build.gc_delay if build.gc_delay else 0)
build.gc_date = ref_date + datetime.timedelta(days=(max_days))
def _get_top_parent(self):
self.ensure_one()
build = self
while build.parent_id:
build = build.parent_id
return build
def _get_youngest_state(self, states):
index = min([self._get_state_score(state) for state in states])
return state_order[index]
@ -898,25 +905,25 @@ class runbot_build(models.Model):
build._github_status()
self.invalidate_cache()
def _ask_kill(self, lock=True):
def _ask_kill(self, lock=True, message=None):
if lock:
self.env.cr.execute("""SELECT id FROM runbot_build WHERE parent_path like %s FOR UPDATE""", ['%s%%' % self.parent_path])
self.ensure_one()
user = request.env.user if request else self.env.user
uid = user.id
build = self
message = message or 'Killing build %s, requested by %s (user #%s)' % (build.dest, user.name, uid)
build._log('_ask_kill', message)
if build.duplicate_id:
if build.duplicate_id.branch_id.sticky:
if self.branch_id.pull_branch_name == self.duplicate_id.branch_id.pull_branch_name:
build = build.duplicate_id
else:
build._skip()
build._log('_ask_kill', 'Skipping build %s, requested by %s (user #%s)(duplicate of sticky, kill duplicate)' % (build.dest, user.name, uid))
return
build = build.duplicate_id # if duplicate is not sticky, most likely a pr, kill other build
if build.local_state == 'pending':
build._skip()
build._log('_ask_kill', 'Skipping build %s, requested by %s (user #%s)' % (build.dest, user.name, uid))
elif build.local_state in ['testing', 'running']:
build.requested_action = 'deathrow'
build._log('_ask_kill', 'Killing build %s, requested by %s (user #%s)' % (build.dest, user.name, uid))
for child in build.children_ids: # should we filter build that are target of a duplicate_id?
if not child.duplicate_id:
child._ask_kill(lock=False)

View File

@ -366,15 +366,6 @@ class runbot_repo(models.Model):
builds_to_skip._skip(reason='New ref found')
if builds_to_skip:
build_info['sequence'] = builds_to_skip[0].sequence
# testing builds are killed
builds_to_kill = Build.search([
('branch_id', '=', branch.id),
('local_state', '=', 'testing'),
('committer', '=', committer)
])
for btk in builds_to_kill:
btk._log('repo._update_git', 'Build automatically killed, newer build found.', level='WARNING')
builds_to_kill.write({'requested_action': 'deathrow'})
new_build = Build.create(build_info)
# create a reverse dependency build if needed
@ -466,6 +457,8 @@ class runbot_repo(models.Model):
def _scheduler(self, host):
nb_workers = host.get_nb_worker()
self._gc_testing(host)
self._commit()
for build in self._get_builds_with_requested_actions(host):
build._process_requested_actions()
self._commit()
@ -547,6 +540,33 @@ class runbot_repo(models.Model):
build_ids = Build.search(domain_host + [('local_state', '=', 'running'), ('id', 'not in', cannot_be_killed_ids)], order='job_start desc').ids
Build.browse(build_ids)[running_max:]._kill()
def _gc_testing(self, host):
"""garbage collect builds that could be killed"""
# decide if we need room
Build = self.env['runbot.build']
domain_host = self.build_domain_host(host)
testing_builds = Build.search(domain_host + [('local_state', 'in', ['testing', 'pending'])])
used_slots = len(testing_builds)
available_slots = host.get_nb_worker() - used_slots
nb_pending = Build.search_count([('local_state', '=', 'pending'), ('host', '=', False)])
if available_slots > 0 or nb_pending == 0:
return
builds_to_kill = self.env['runbot.build']
builds_to_skip = self.env['runbot.build']
for build in testing_builds:
top_parent = build._get_top_parent()
if not build.branch_id.sticky:
newer_candidates = Build.search([
('id', '>', build.id),
('branch_id', '=', build.branch_id.id),
('build_type', '=', 'normal'),
('parent_id', '=', False),
('hidden', '=', False),
('config_id', '=', top_parent.config_id.id)
])
if newer_candidates:
top_parent._ask_kill(message='Build automatically killed, newer build found %s.' % newer_candidates.ids)
def _allocate_builds(self, host, nb_slots, domain=None):
if nb_slots <= 0:
return []

View File

@ -344,6 +344,55 @@ class Test_Build(RunbotCase):
build._local_cleanup()
self.patchers['_local_pg_dropdb_patcher'].assert_called_with(dbname)
def test_repo_gc_testing(self):
""" test that builds are killed when room is needed on a host """
host = self.env['runbot.host'].create({
'name': 'runbot_xxx',
'nb_worker': 2
})
build_other_host = self.create_build({
'branch_id': self.branch.id,
'name': 'd0d0caca0000ffffffffffffffffffffffffffff',
'local_state': 'testing',
'host': 'runbot_yyy'
})
child_build = self.create_build({
'branch_id': self.branch.id,
'name': 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
'local_state': 'testing',
'host': 'runbot_xxx',
'extra_params': '2',
'parent_id': build_other_host.id
})
self.branch.repo_id._gc_testing(host)
self.assertFalse(build_other_host.requested_action)
self.assertFalse(child_build.requested_action)
build_same_branch = self.create_build({
'branch_id': self.branch_11.id,
'name': 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',
'local_state': 'testing',
'host': 'runbot_xxx',
})
self.branch.repo_id._gc_testing(host)
self.assertFalse(build_same_branch.requested_action)
build_pending = self.create_build({
'branch_id': self.branch.id,
'name': 'deadbeafffffffffffffffffffffffffffffffff',
'local_state': 'pending',
})
self.branch.repo_id._gc_testing(host)
self.assertEqual(build_other_host.requested_action, 'deathrow')
self.assertEqual(child_build.requested_action, 'deathrow')
self.assertFalse(build_same_branch.requested_action)
self.assertFalse(build_pending.requested_action)
@patch('odoo.addons.runbot.models.build._logger')
def test_build_skip(self, mock_logger):
"""test build is skipped"""
@ -368,14 +417,25 @@ class Test_Build(RunbotCase):
mock_logger.debug.assert_called_with(log_first_part, 'A good reason')
def test_ask_kill_duplicate(self):
""" Test that the _ask_kill method works on duplicate"""
""" Test that the _ask_kill method works on duplicate when they are related PR/branch"""
branch = self.Branch.create({
'repo_id': self.repo.id,
'name': 'refs/heads/master-test-branch-xxx'
})
pr = self.Branch.create({
'repo_id': self.repo.id,
'name': 'refs/pull/1234',
'pull_head_name': 'odoo:master-test-branch-xxx'
})
build1 = self.create_build({
'branch_id': self.branch_10.id,
'branch_id': branch.id,
'name': 'd0d0caca0000ffffffffffffffffffffffffffff',
})
build2 = self.create_build({
'branch_id': self.branch_10.id,
'branch_id': pr.id,
'name': 'd0d0caca0000ffffffffffffffffffffffffffff',
})
build2.write({'local_state': 'duplicate', 'duplicate_id': build1.id}) # this may not be usefull if we detect duplicate in same repo.