[FIX] runbot_merge: try to fix race condition again

Original issue (staging would get cancelled just as it was being merged) was not really fixed but traded for a new one: serialization errors which can lock up the mergebot for a long time, stopping handling of all incoming signals (possibly/probably because all of them try to write on the PR which is locked?). Splitting the tagging cron out should already way improve things as the status update cron should be way shorter (and thus hold its locks for a smaller amount of time). This should also avoid the "close" handler waiting on the extant transaction, and make the "pr update" transaction be much shorter as each staging gets its own tnx.
2025-03-15 23:45:44 +07:00 · 2018-10-16 14:49:57 +02:00 · 2018-10-16 14:49:57 +02:00 · fe8a4588d3
commit fe8a4588d3
parent eef9ede686
3 changed files with 16 additions and 2 deletions
--- a/runbot_merge/controllers/init.py
+++ b/runbot_merge/controllers/init.py
@ -166,6 +166,16 @@ def handle_pr(env, event):

    # don't marked merged PRs as closed (!!!)
    if event['action'] == 'closed' and pr_obj.state != 'merged':
+        # ignore if the PR is already being updated in a separate transaction
+        # (most likely being merged?)
+        env.cr.execute('''
+        SELECT id FROM runbot_merge_pull_requests
+        WHERE id = %s AND state != 'merged'
+        FOR UPDATE SKIP LOCKED;
+        ''', [pr_obj.id])
+        if not env.cr.fetchall():
+            return 'Ignored: could not lock rows (probably being merged)'
+
        env.cr.execute('''
        UPDATE runbot_merge_pull_requests
        SET state = 'closed'
--- a/runbot_merge/data/merge_cron.xml
+++ b/runbot_merge/data/merge_cron.xml
@ -3,7 +3,7 @@
    <field name="name">Check for progress of PRs &amp; Stagings</field>
    <field name="model_id" ref="model_runbot_merge_project"/>
    <field name="state">code</field>
-    <field name="code">model._check_progress()</field>
+    <field name="code">model._check_progress(True)</field>
    <field name="interval_number">1</field>
    <field name="interval_type">minutes</field>
    <field name="numbercall">-1</field>
--- a/runbot_merge/models/pull_requests.py
+++ b/runbot_merge/models/pull_requests.py
@ -61,13 +61,17 @@ class Project(models.Model):
             "will lead to webhook rejection. Should only use ASCII."
    )

-    def _check_progress(self):
+    def _check_progress(self, commit=False):
        for project in self.search([]):
            for staging in project.mapped('branch_ids.active_staging_id'):
                staging.check_status()
+                if commit:
+                    self.env.cr.commit()

            for branch in project.branch_ids:
                branch.try_staging()
+                if commit:
+                    self.env.cr.commit()

        # I have no idea why this is necessary for tests to pass, the only
        # DB update done not through the ORM is when receiving a notification