From be4c8bd4910a21eade86cbdd2e3a6394e9ed91f2 Mon Sep 17 00:00:00 2001
From: Xavier Morel <xmo@odoo.com>
Date: Wed, 17 Oct 2018 14:18:49 +0200
Subject: [PATCH] [FIX] runbot_merge: forcefully rollback ref after a failed
 rebase()

rebase() can fail after merge(), during set_ref(), having already
updated the target.

Under the pre-rebase model, stage() assumed on a staging failure on a
given repo it only had to rollback stagings having succeeded. This
assumption fails in a post-rebase model as even a failed staging can
have modified the target, leading to the next staging (if multiple
batches are ready) containing the failed one.

Things can get really strange if the set_ref failure was (as it
probably is) some sort of transient failure, as the following staging
will likely succeed (under the assumption that most PRs/batches pass
staging) as PR1's content gets merged as part of PR2, and PR2 is
merged empty of content later on.
---
 runbot_merge/models/pull_requests.py |  5 ++-
 runbot_merge/tests/local.py          |  4 ++
 runbot_merge/tests/remote.py         |  4 ++
 runbot_merge/tests/test_basic.py     | 66 +++++++++++++++++++++++++++-
 4 files changed, 76 insertions(+), 3 deletions(-)

diff --git a/runbot_merge/models/pull_requests.py b/runbot_merge/models/pull_requests.py
index d1998fc0..9f17c184 100644
--- a/runbot_merge/models/pull_requests.py
+++ b/runbot_merge/models/pull_requests.py
@@ -1227,7 +1227,10 @@ class Batch(models.Model):
                 pr.state = 'error'
                 gh.comment(pr.number, "Unable to stage PR (%s)" % e)
 
-                # reset other PRs
+                # reset the head which failed, as rebase() may have partially
+                # updated it (despite later steps failing)
+                gh.set_ref(target, original_head)
+                # then reset every previous update
                 for to_revert in new_heads.keys():
                     it = meta[to_revert.repository]
                     it['gh'].set_ref('tmp.{}'.format(to_revert.target.name), it['head'])
diff --git a/runbot_merge/tests/local.py b/runbot_merge/tests/local.py
index 269a5c10..abb06a14 100644
--- a/runbot_merge/tests/local.py
+++ b/runbot_merge/tests/local.py
@@ -3,6 +3,10 @@ import odoo
 import pytest
 import fake_github
 
+@pytest.fixture(scope='session')
+def remote_p():
+    return False
+
 @pytest.fixture
 def gh():
     with fake_github.Github() as gh:
diff --git a/runbot_merge/tests/remote.py b/runbot_merge/tests/remote.py
index a518e51e..2e2b4f59 100644
--- a/runbot_merge/tests/remote.py
+++ b/runbot_merge/tests/remote.py
@@ -113,6 +113,10 @@ def wait_for_server(db, timeout=120):
             if time.time() > limit:
                 raise socket.timeout()
 
+@pytest.fixture(scope='session')
+def remote_p():
+    return True
+
 @pytest.fixture
 def env(request):
     """
diff --git a/runbot_merge/tests/test_basic.py b/runbot_merge/tests/test_basic.py
index 1b573558..10a32f7d 100644
--- a/runbot_merge/tests/test_basic.py
+++ b/runbot_merge/tests/test_basic.py
@@ -1,11 +1,11 @@
 import datetime
+import itertools
 import json
 import re
 import time
-import requests
+from unittest import mock
 
 import pytest
-from requests import HTTPError
 
 import odoo
 
@@ -582,6 +582,68 @@ def test_forward_port(env, repo):
     commits = {c['sha'] for c in repo.log('heads/master')}
     assert len(commits) == 112
 
+def test_rebase_failure(env, repo, users, remote_p):
+    """ It looks like gh.rebase() can fail in the final ref-setting after
+    the merging & commits creation has been performed. At this point, the
+    staging will fail (yay) but the target branch (tmp) would not get reset,
+    leading to the next PR being staged *on top* of the one being staged
+    right there, and pretty much integrating it, leading to very, very
+    strange results if the entire thing passes staging.
+
+    Seen: https://github.com/odoo/odoo/pull/27835#issuecomment-430505429
+    PR 27835 was merged to tmp at df0ae6c00e085dbaabcfec821208c9ace2f4b02d
+    then the set_ref failed, following which PR 27840 is merged to tmp at
+    819b5414c27a92031a9ce3f159a8f466a4fd698c note that the first (left)
+    parent is the merge commit from PR 27835. The set_ref of PR 27840
+    succeeded resulting in PR 27835 being integrated into the squashing of
+    27840 (without any renaming or anything, just the content), following
+    which PR 27835 was merged and squashed as a "no-content" commit.
+
+    Problem: I need to make try_staging > stage > rebase > set_ref fail
+    but only the first time, and not the set_ref in try_staging itself, and
+    that call is performed *in a subprocess* when running <remote> tests.
+    """
+    # FIXME: remote mode
+    if remote_p:
+        pytest.skip("Needs to find a way to make set_ref fail on *second* call in remote mode.")
+
+    m = repo.make_commit(None, 'initial', None, tree={'m': 'm'})
+    repo.make_ref('heads/master', m)
+
+    commit_a = repo.make_commit(m, 'A', None, tree={'m': 'm', 'a': 'a'})
+    pr_a = repo.make_pr('A', None, target='master', ctid=commit_a, user='user', label='a')
+    repo.post_status(pr_a.head, 'success', 'ci/runbot')
+    repo.post_status(pr_a.head, 'success', 'legal/cla')
+    pr_a.post_comment('hansen r+', 'reviewer')
+
+    commit_b = repo.make_commit(m, 'B', None, tree={'m': 'm', 'b': 'b'})
+    pr_b = repo.make_pr('B', None, target='master', ctid=commit_b, user='user', label='b')
+    repo.post_status(pr_b.head, 'success', 'ci/runbot')
+    repo.post_status(pr_b.head, 'success', 'legal/cla')
+    pr_b.post_comment('hansen r+', 'reviewer')
+
+    from odoo.addons.runbot_merge.github import GH
+    original = GH.set_ref
+    counter = itertools.count(start=1)
+    def wrapper(*args):
+        assert next(counter) != 2, "make it seem like updating the branch post-rebase fails"
+        return original(*args)
+
+    with mock.patch.object(GH, 'set_ref', autospec=True, side_effect=wrapper) as m:
+        env['runbot_merge.project']._check_progress()
+
+    assert pr_a.comments == [
+        (users['reviewer'], 'hansen r+'),
+        (users['user'], re_matches(r'^Unable to stage PR')),
+    ]
+    assert pr_b.comments == [
+        (users['reviewer'], 'hansen r+'),
+    ]
+    assert repo.read_tree(repo.commit('heads/staging.master')) == {
+        'm': b'm',
+        'b': b'b',
+    }
+
 class TestRetry:
     @pytest.mark.xfail(reason="This may not be a good idea as it could lead to tons of rebuild spam")
     def test_auto_retry_push(self, env, repo):