[IMP] runbot: kill build only when needed

On non sticky branches, when a new build is found while another is already testing, the older build is killed. This happens during when the main runbot instance is discovering new commits and create new builds. As a result, concurrent updates may occur while the builders access the concerned build. With this commit, this garbage collecting procedure occurs during the scheduler loop that runs on runbot builder hosts. Also, the logic changed in a way that the kill is requested only if the host needs room to handle pending builds.
2025-05-30 21:28:00 +07:00 · 2020-01-08 15:46:05 +01:00 · 2020-01-08 15:46:05 +01:00 · baea2e73be
commit baea2e73be
parent 149ae4a074
3 changed files with 105 additions and 18 deletions
--- a/runbot/models/build.py
+++ b/runbot/models/build.py
@ -178,6 +178,13 @@ class runbot_build(models.Model):
            max_days += int(build.gc_delay if build.gc_delay else 0)
            build.gc_date = ref_date + datetime.timedelta(days=(max_days))

+    def _get_top_parent(self):
+        self.ensure_one()
+        build = self
+        while build.parent_id:
+            build = build.parent_id
+        return build
+
    def _get_youngest_state(self, states):
        index = min([self._get_state_score(state) for state in states])
        return state_order[index]
@ -898,25 +905,25 @@ class runbot_build(models.Model):
            build._github_status()
            self.invalidate_cache()

-    def _ask_kill(self, lock=True):
+    def _ask_kill(self, lock=True, message=None):
        if lock:
            self.env.cr.execute("""SELECT id FROM runbot_build WHERE parent_path like %s FOR UPDATE""", ['%s%%' % self.parent_path])
        self.ensure_one()
        user = request.env.user if request else self.env.user
        uid = user.id
        build = self
+        message = message or 'Killing build %s, requested by %s (user #%s)' % (build.dest, user.name, uid)
+        build._log('_ask_kill', message)
        if build.duplicate_id:
-            if build.duplicate_id.branch_id.sticky:
+            if self.branch_id.pull_branch_name == self.duplicate_id.branch_id.pull_branch_name:
+                build = build.duplicate_id
+            else:
                build._skip()
-                build._log('_ask_kill', 'Skipping build %s, requested by %s (user #%s)(duplicate of sticky, kill duplicate)' % (build.dest, user.name, uid))
                return
-            build = build.duplicate_id  # if duplicate is not sticky, most likely a pr, kill other build
        if build.local_state == 'pending':
            build._skip()
-            build._log('_ask_kill', 'Skipping build %s, requested by %s (user #%s)' % (build.dest, user.name, uid))
        elif build.local_state in ['testing', 'running']:
            build.requested_action = 'deathrow'
-            build._log('_ask_kill', 'Killing build %s, requested by %s (user #%s)' % (build.dest, user.name, uid))
        for child in build.children_ids:  # should we filter build that are target of a duplicate_id?
            if not child.duplicate_id:
                child._ask_kill(lock=False)
--- a/runbot/models/repo.py
+++ b/runbot/models/repo.py
@ -366,15 +366,6 @@ class runbot_repo(models.Model):
                    builds_to_skip._skip(reason='New ref found')
                    if builds_to_skip:
                        build_info['sequence'] = builds_to_skip[0].sequence
-                    # testing builds are killed
-                    builds_to_kill = Build.search([
-                        ('branch_id', '=', branch.id),
-                        ('local_state', '=', 'testing'),
-                        ('committer', '=', committer)
-                    ])
-                    for btk in builds_to_kill:
-                        btk._log('repo._update_git', 'Build automatically killed, newer build found.', level='WARNING')
-                    builds_to_kill.write({'requested_action': 'deathrow'})

                new_build = Build.create(build_info)
                # create a reverse dependency build if needed
@ -466,6 +457,8 @@ class runbot_repo(models.Model):
    def _scheduler(self, host):
        nb_workers = host.get_nb_worker()

+        self._gc_testing(host)
+        self._commit()
        for build in self._get_builds_with_requested_actions(host):
            build._process_requested_actions()
            self._commit()
@ -547,6 +540,33 @@ class runbot_repo(models.Model):
        build_ids = Build.search(domain_host + [('local_state', '=', 'running'), ('id', 'not in', cannot_be_killed_ids)], order='job_start desc').ids
        Build.browse(build_ids)[running_max:]._kill()

+    def _gc_testing(self, host):
+        """garbage collect builds that could be killed"""
+        # decide if we need room
+        Build = self.env['runbot.build']
+        domain_host = self.build_domain_host(host)
+        testing_builds = Build.search(domain_host + [('local_state', 'in', ['testing', 'pending'])])
+        used_slots = len(testing_builds)
+        available_slots = host.get_nb_worker() - used_slots
+        nb_pending = Build.search_count([('local_state', '=', 'pending'), ('host', '=', False)])
+        if available_slots > 0 or nb_pending == 0:
+            return
+        builds_to_kill = self.env['runbot.build']
+        builds_to_skip = self.env['runbot.build']
+        for build in testing_builds:
+            top_parent = build._get_top_parent()
+            if not build.branch_id.sticky:
+                newer_candidates = Build.search([
+                    ('id', '>', build.id),
+                    ('branch_id', '=', build.branch_id.id),
+                    ('build_type', '=', 'normal'),
+                    ('parent_id', '=', False),
+                    ('hidden', '=', False),
+                    ('config_id', '=', top_parent.config_id.id)
+                ])
+                if newer_candidates:
+                    top_parent._ask_kill(message='Build automatically killed, newer build found %s.' % newer_candidates.ids)
+
    def _allocate_builds(self, host, nb_slots, domain=None):
        if nb_slots <= 0:
            return []
--- a/runbot/tests/test_build.py
+++ b/runbot/tests/test_build.py
@ -344,6 +344,55 @@ class Test_Build(RunbotCase):
        build._local_cleanup()
        self.patchers['_local_pg_dropdb_patcher'].assert_called_with(dbname)

+    def test_repo_gc_testing(self):
+        """ test that builds are killed when room is needed on a host """
+        host = self.env['runbot.host'].create({
+            'name': 'runbot_xxx',
+            'nb_worker': 2
+        })
+
+        build_other_host = self.create_build({
+            'branch_id': self.branch.id,
+            'name': 'd0d0caca0000ffffffffffffffffffffffffffff',
+            'local_state': 'testing',
+            'host': 'runbot_yyy'
+        })
+
+        child_build = self.create_build({
+            'branch_id': self.branch.id,
+            'name': 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
+            'local_state': 'testing',
+            'host': 'runbot_xxx',
+            'extra_params': '2',
+            'parent_id': build_other_host.id
+        })
+
+        self.branch.repo_id._gc_testing(host)
+        self.assertFalse(build_other_host.requested_action)
+        self.assertFalse(child_build.requested_action)
+
+        build_same_branch = self.create_build({
+            'branch_id': self.branch_11.id,
+            'name': 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',
+            'local_state': 'testing',
+            'host': 'runbot_xxx',
+        })
+
+        self.branch.repo_id._gc_testing(host)
+        self.assertFalse(build_same_branch.requested_action)
+
+        build_pending = self.create_build({
+            'branch_id': self.branch.id,
+            'name': 'deadbeafffffffffffffffffffffffffffffffff',
+            'local_state': 'pending',
+        })
+
+        self.branch.repo_id._gc_testing(host)
+        self.assertEqual(build_other_host.requested_action, 'deathrow')
+        self.assertEqual(child_build.requested_action, 'deathrow')
+        self.assertFalse(build_same_branch.requested_action)
+        self.assertFalse(build_pending.requested_action)
+
    @patch('odoo.addons.runbot.models.build._logger')
    def test_build_skip(self, mock_logger):
        """test build is skipped"""
@ -368,14 +417,25 @@ class Test_Build(RunbotCase):
        mock_logger.debug.assert_called_with(log_first_part, 'A good reason')

    def test_ask_kill_duplicate(self):
-        """ Test that the _ask_kill method works on duplicate"""
+        """ Test that the _ask_kill method works on duplicate when they are related PR/branch"""
+
+        branch = self.Branch.create({
+            'repo_id': self.repo.id,
+            'name': 'refs/heads/master-test-branch-xxx'
+        })
+
+        pr = self.Branch.create({
+            'repo_id': self.repo.id,
+            'name': 'refs/pull/1234',
+            'pull_head_name': 'odoo:master-test-branch-xxx'
+        })

        build1 = self.create_build({
-            'branch_id': self.branch_10.id,
+            'branch_id': branch.id,
            'name': 'd0d0caca0000ffffffffffffffffffffffffffff',
        })
        build2 = self.create_build({
-            'branch_id': self.branch_10.id,
+            'branch_id': pr.id,
            'name': 'd0d0caca0000ffffffffffffffffffffffffffff',
        })
        build2.write({'local_state': 'duplicate', 'duplicate_id': build1.id}) # this may not be usefull if we detect duplicate in same repo.