From 10ef6b82f0c02c867bf49e25c49c619b2fac9f74 Mon Sep 17 00:00:00 2001 From: Xavier Morel Date: Thu, 7 Nov 2019 07:39:20 +0100 Subject: [PATCH] [IMP] runbot_merge: sanity check PATCH git/ref/heads Turns out not only can that operation fail, that operation can succeed but have its effect delayed. To try and guard against that, immediately check that we get the correct ref' after having reset it. This is the cause of the November 6 mess: when preparing a staging, the mergebot does the following, 1. get the head of 2. hard-reset tmp. to that 3. start merging PRs, which requires getting the current state of tmp. back On the 6ths, these steps looked like this ```text 2019-11-06 10:03:21,588 head(odoo/odoo, master) -> ab6d0c38512e4944458b0b6f80f38d6c26b6b597 2019-11-06 10:03:22,375 set_ref(update, odoo/odoo, tmp.master, ab6d0c38512e4944458b0b6f80f38d6c26b6b597 -> 200 (OK) 2019-11-06 10:03:28,674 head(odoo/odoo, tmp.master) -> de2a852e7cc1f390e50190cfc497bc253687fba8 2019-11-06 10:03:30,292 head(odoo/odoo, tmp.master) -> de2a852e7cc1f390e50190cfc497bc253687fba8 ``` So the 'bot fetched the commit at the head of master (ab6d0c), reset tmp.master to that... and then got a different commit when it fetched the tmp head to stage a PR on it. That different head being of course a previous rejected staging. When the new staging succeeded, it brought the entire thing in and made a mess. This was compounded by an issue I still have to investigate: the staging of the new PR took the wrong base commit *but the right base tree*, as a result the first thing it did was *reverse the entire previous commit* (without that we could probably have left it as-is rather than need to force-push master -- twice). --- runbot_merge/github.py | 14 ++++++++++++++ runbot_merge/models/pull_requests.py | 2 ++ 2 files changed, 16 insertions(+) diff --git a/runbot_merge/github.py b/runbot_merge/github.py index 0a073233..f8d9e2f0 100644 --- a/runbot_merge/github.py +++ b/runbot_merge/github.py @@ -114,6 +114,12 @@ class GH(object): try: self('patch', 'git/refs/heads/{}'.format(branch), json={'sha': sha}) _logger.debug('fast_forward(%s, %s, %s) -> OK', self._repo, branch, sha) + head = self.head(branch) + if head != sha: + _logger.error("Sanity check ref update of %s, expected %s got %s", + branch, sha, head + ) + raise exceptions.FastForwardError(self._repo) except requests.HTTPError: _logger.debug('fast_forward(%s, %s, %s) -> ERROR', self._repo, branch, sha, exc_info=True) raise exceptions.FastForwardError(self._repo) @@ -132,6 +138,10 @@ class GH(object): 'OK' if status0 == 200 else r.text or r.reason ) if status0 == 200: + head = self.head(branch) + assert head == sha, "Sanity check ref update of %s, expected %s got %s" % ( + branch, sha, head + ) return # 422 makes no sense but that's what github returns, leaving 404 just @@ -150,6 +160,10 @@ class GH(object): 'OK' if status1 == 201 else r.text or r.reason ) if status1 == 201: + head = self.head(branch) + assert head == sha, "Sanity check ref update of %s, expected %s got %s" % ( + branch, sha, head + ) return raise AssertionError("set_ref failed(%s, %s)" % (status0, status1)) diff --git a/runbot_merge/models/pull_requests.py b/runbot_merge/models/pull_requests.py index efb5e79e..1c98e074 100644 --- a/runbot_merge/models/pull_requests.py +++ b/runbot_merge/models/pull_requests.py @@ -1623,6 +1623,8 @@ class Stagings(models.Model): time.sleep(pause) continue raise + else: + break first = False return repo_name