From fe8a4588d3af70ff2569dc470af3431ea789a2ea Mon Sep 17 00:00:00 2001
From: Xavier Morel <xmo@odoo.com>
Date: Tue, 16 Oct 2018 14:49:57 +0200
Subject: [PATCH] [FIX] runbot_merge: try to fix race condition again

Original issue (staging would get cancelled just as it was being
merged) was not really fixed but traded for a new one: serialization
errors which can lock up the mergebot for a long time, stopping
handling of all incoming signals (possibly/probably because all of
them try to write on the PR which is locked?).

Splitting the tagging cron out should already way improve things as
the status update cron should be way shorter (and thus hold its locks
for a smaller amount of time). This should also avoid the "close"
handler waiting on the extant transaction, and make the "pr update"
transaction be much shorter as each staging gets its own tnx.
---
 runbot_merge/controllers/__init__.py | 10 ++++++++++
 runbot_merge/data/merge_cron.xml     |  2 +-
 runbot_merge/models/pull_requests.py |  6 +++++-
 3 files changed, 16 insertions(+), 2 deletions(-)
diff --git a/runbot_merge/controllers/__init__.py b/runbot_merge/controllers/__init__.py
index 49a6323b..3c15a50c 100644
--- a/runbot_merge/controllers/__init__.py
+++ b/runbot_merge/controllers/__init__.py
@@ -166,6 +166,16 @@ def handle_pr(env, event):
 
     # don't marked merged PRs as closed (!!!)
     if event['action'] == 'closed' and pr_obj.state != 'merged':
+        # ignore if the PR is already being updated in a separate transaction
+        # (most likely being merged?)
+        env.cr.execute('''
+        SELECT id FROM runbot_merge_pull_requests
+        WHERE id = %s AND state != 'merged'
+        FOR UPDATE SKIP LOCKED;
+        ''', [pr_obj.id])
+        if not env.cr.fetchall():
+            return 'Ignored: could not lock rows (probably being merged)'
+
         env.cr.execute('''
         UPDATE runbot_merge_pull_requests
         SET state = 'closed'
diff --git a/runbot_merge/data/merge_cron.xml b/runbot_merge/data/merge_cron.xml
index c3e225a3..091efa08 100644
--- a/runbot_merge/data/merge_cron.xml
+++ b/runbot_merge/data/merge_cron.xml
@@ -3,7 +3,7 @@
     <field name="name">Check for progress of PRs &amp; Stagings</field>
     <field name="model_id" ref="model_runbot_merge_project"/>
     <field name="state">code</field>
-    <field name="code">model._check_progress()</field>
+    <field name="code">model._check_progress(True)</field>
     <field name="interval_number">1</field>
     <field name="interval_type">minutes</field>
     <field name="numbercall">-1</field>
diff --git a/runbot_merge/models/pull_requests.py b/runbot_merge/models/pull_requests.py
index 4285e9bd..d1998fc0 100644
--- a/runbot_merge/models/pull_requests.py
+++ b/runbot_merge/models/pull_requests.py
@@ -61,13 +61,17 @@ class Project(models.Model):
              "will lead to webhook rejection. Should only use ASCII."
     )
 
-    def _check_progress(self):
+    def _check_progress(self, commit=False):
         for project in self.search([]):
             for staging in project.mapped('branch_ids.active_staging_id'):
                 staging.check_status()
+                if commit:
+                    self.env.cr.commit()
 
             for branch in project.branch_ids:
                 branch.try_staging()
+                if commit:
+                    self.env.cr.commit()
 
         # I have no idea why this is necessary for tests to pass, the only
         # DB update done not through the ORM is when receiving a notification