From f6aff3e71c680295e5c5db2b08def1f8e49c318b Mon Sep 17 00:00:00 2001 From: Xavier Morel Date: Tue, 3 Aug 2021 13:45:21 +0200 Subject: [PATCH] [FIX] *: prevent merging conflicts commits with loss of authorship Proper attribution is important in general, but especially for external contributors. Before this change, and the previous change fixing authorship deduplication, it was rather easy for a "squashed" conflict commit (attributed to the 'bot for lack of a really clean option) to get merged by mistake. This commit changes two things: * The mergebot now refuses to stage commits without an email set, the github API rejects those commits anyway so any integration mode other than `merge` would fail, just with a very unclear error * The forwardbot now creates commits with an empty author / committer email when the pull request as a whole has multiple authors / committers. This leverages the mergebot update. Also clean up the staging process to provide richer error reporting using bespoke exceptions instead of simple assertions. I'm not sure we've ever encountered most of these errors so they're really sanity checks but the old reporting would be... less than great. Fixes #505 --- forwardport/models/project.py | 2 +- forwardport/tests/test_conflicts.py | 44 ++++++++-- runbot_merge/exceptions.py | 4 +- runbot_merge/github.py | 7 +- runbot_merge/models/pull_requests.py | 120 ++++++++++++++++----------- 5 files changed, 118 insertions(+), 59 deletions(-) diff --git a/forwardport/models/project.py b/forwardport/models/project.py index 3fd0ee42..86b11e5e 100644 --- a/forwardport/models/project.py +++ b/forwardport/models/project.py @@ -873,7 +873,7 @@ This PR targets %s and is part of the forward-port chain. Further PRs will be cr for c in (c['commit'] for c in root_commits): authors.add(to_tuple(c['author'])) committers.add(to_tuple(c['committer'])) - fp_authorship = (project_id.fp_github_name, project_id.fp_github_email, '') + fp_authorship = (project_id.fp_github_name, '', '') author = fp_authorship if len(authors) != 1\ else authors.pop() + (head_commit['author']['date'],) committer = fp_authorship if len(committers) != 1 \ diff --git a/forwardport/tests/test_conflicts.py b/forwardport/tests/test_conflicts.py index 3b13a210..42523c06 100644 --- a/forwardport/tests/test_conflicts.py +++ b/forwardport/tests/test_conflicts.py @@ -268,11 +268,9 @@ def test_multiple_commits_same_authorship(env, config, make_repo): assert get(c.author) == get(author) assert get(c.committer) == get(committer) - def test_multiple_commits_different_authorship(env, config, make_repo, users, rolemap): - """ When a PR has multiple commits by the same author and its - forward-porting triggers a conflict, the resulting (squashed) conflict - commit should have the original author (same with the committer). + """ When a PR has multiple commits by different authors, the resulting + (squashed) conflict commit should have """ author = {'name': 'George Pearce', 'email': 'gp@example.org'} committer = {'name': 'G. P. W. Meredith', 'email': 'gpwm@example.org'} @@ -315,7 +313,41 @@ def test_multiple_commits_different_authorship(env, config, make_repo, users, ro assert 0, "timed out" c = prod.commit(pr2_id.head) + assert len(c.parents) == 1 get = itemgetter('name', 'email') rm = rolemap['user'] - assert get(c.author) == (rm['login'], rm['email']) - assert get(c.committer) == (rm['login'], rm['email']) + assert get(c.author) == (rm['login'], ''), \ + "In a multi-author PR, the squashed conflict commit should have the " \ + "author set to the bot but an empty email" + assert get(c.committer) == (rm['login'], '') + + assert re.match(r'''<<<\x3c<<< HEAD +b +======= +2 +>>>\x3e>>> [0-9a-f]{7,}.* +''', prod.read_tree(c)['g']) + + # I'd like to fix the conflict so everything is clean and proper *but* + # github's API apparently rejects creating commits with an empty email. + # + # So fuck that, I'll just "merge the conflict". Still works at simulating + # a resolution error as technically that's the sort of things people do. + + pr2 = prod.get_pr(pr2_id.number) + with prod: + prod.post_status(pr2_id.head, 'success', 'legal/cla') + prod.post_status(pr2_id.head, 'success', 'ci/runbot') + pr2.post_comment('hansen r+', config['role_reviewer']['token']) + env.run_crons() + + assert pr2.comments == [ + seen(env, pr2, users), + (users['user'], re_matches(r'Ping.*CONFLICT', re.DOTALL)), + (users['reviewer'], 'hansen r+'), + (users['user'], f"All commits must have author and committer email, " + f"missing email on {pr2_id.head} indicates the " + f"authorship is most likely incorrect."), + ] + assert pr2_id.state == 'error' + assert not pr2_id.staging_id, "staging should have been rejected" diff --git a/runbot_merge/exceptions.py b/runbot_merge/exceptions.py index 4ee61061..4ef79f2e 100644 --- a/runbot_merge/exceptions.py +++ b/runbot_merge/exceptions.py @@ -2,5 +2,7 @@ class MergeError(Exception): pass class FastForwardError(Exception): pass -class Skip(MergeError): +class Mismatch(MergeError): pass +class Unmergeable(MergeError): + ... diff --git a/runbot_merge/github.py b/runbot_merge/github.py index ead3ca2c..dca0e50f 100644 --- a/runbot_merge/github.py +++ b/runbot_merge/github.py @@ -16,6 +16,7 @@ import odoo.netsvc from odoo.tools import topological_sort, config from . import exceptions, utils +class MergeError(Exception): ... def _is_json(r): return r and r.headers.get('content-type', '').startswith(('application/json', 'application/javascript')) @@ -258,11 +259,11 @@ class GH(object): 'base': dest, 'head': sha, 'commit_message': message, - }, check={409: exceptions.MergeError}) + }, check={409: MergeError}) try: r = r.json() except Exception: - raise exceptions.MergeError("Got non-JSON reponse from github: %s %s (%s)" % (r.status_code, r.reason, r.text)) + raise MergeError("Got non-JSON reponse from github: %s %s (%s)" % (r.status_code, r.reason, r.text)) _logger.debug( "merge(%s, %s (%s), %s) -> %s", self._repo, dest, r['parents'][0]['sha'], @@ -312,7 +313,7 @@ class GH(object): 'parents': [prev], 'author': c['commit']['author'], 'committer': c['commit']['committer'], - }, check={409: exceptions.MergeError}).json() + }, check={409: MergeError}).json() logger.debug('copied %s to %s (parent: %s)', c['sha'], copy['sha'], prev) prev = mapping[c['sha']] = copy['sha'] diff --git a/runbot_merge/models/pull_requests.py b/runbot_merge/models/pull_requests.py index 0245563e..c017c3c7 100644 --- a/runbot_merge/models/pull_requests.py +++ b/runbot_merge/models/pull_requests.py @@ -533,15 +533,20 @@ class Branch(models.Model): try: staged |= Batch.stage(meta, batch) except exceptions.MergeError as e: - if first: + if first or isinstance(e, exceptions.Unmergeable): pr = e.args[0] - _logger.exception("Failed to merge %s into staging branch", - pr.display_name) + if len(e.args) > 1 and e.args[1]: + message = e.args[1] + else: + message = "Unable to stage PR (%s)" % e.__context__ + _logger.exception( + "Failed to merge %s into staging branch", + pr.display_name) pr.state = 'error' self.env['runbot_merge.pull_requests.feedback'].create({ 'repository': pr.repository.id, 'pull_request': pr.number, - 'message': "Unable to stage PR (%s)" % e.__context__, + 'message': message, }) else: first = False @@ -1354,13 +1359,25 @@ class PullRequests(models.Model): _, prdict = gh.pr(self.number) commits = prdict['commits'] method = self.merge_method or ('rebase-ff' if commits == 1 else None) - assert commits < 50 or not method.startswith('rebase'), \ - "rebasing a PR of more than 50 commits is a tad excessive" - assert commits < 250, "merging PRs of 250+ commits is not supported (https://developer.github.com/v3/pulls/#list-commits-on-a-pull-request)" + if commits > 50 and method.startswith('rebase'): + raise exceptions.Unmergeable(self, "Rebasing 50 commits is too much.") + if commits > 250: + raise exceptions.Unmergeable( + self, "Merging PRs of 250 or more commits is not supported " + "(https://developer.github.com/v3/pulls/#list-commits-on-a-pull-request)" + ) pr_commits = gh.commits(self.number) + for c in pr_commits: + if not (c['commit']['author']['email'] and c['commit']['committer']['email']): + raise exceptions.Unmergeable( + self, + f"All commits must have author and committer email, " + f"missing email on {c['sha']} indicates the authorship is " + f"most likely incorrect." + ) pr_head = pr_commits[-1]['sha'] if pr_head != self.head: - raise exceptions.Skip(self.head, pr_head, commits == 1) + raise exceptions.Mismatch(self.head, pr_head, commits == 1) if self.reviewed_by and self.reviewed_by.name == self.reviewed_by.github_login: # XXX: find other trigger(s) to sync github name? @@ -1397,9 +1414,15 @@ class PullRequests(models.Model): # look for parent(s?) of pr_head not in PR, means it's # from target (so we merged target in pr) merge = head_parents - {c['sha'] for c in commits} - assert len(merge) <= 1, \ - ">1 parent from base in PR's head is not supported" - if len(merge) == 1: + external_parents = len(merge) + if external_parents > 1: + raise exceptions.Unmergeable( + "The PR head can only have one parent from the base branch " + "(not part of the PR itself), found %d: %s" % ( + external_parents, + ', '.join(merge) + )) + if external_parents == 1: [base_commit] = merge commits_map = {c['sha']: c['sha'] for c in commits} @@ -2008,44 +2031,45 @@ class Batch(models.Model): target = 'tmp.{}'.format(pr.target.name) original_head = gh.head(target) try: - method, new_heads[pr] = pr._stage(gh, target, related_prs=(prs - pr)) - _logger.info( - "Staged pr %s to %s by %s: %s -> %s", - pr.display_name, pr.target.name, method, - original_head, new_heads[pr] - ) - except (exceptions.MergeError, AssertionError) as e: - # reset the head which failed, as rebase() may have partially - # updated it (despite later steps failing) - gh.set_ref(target, original_head) - # then reset every previous update - for to_revert in new_heads.keys(): - it = meta[to_revert.repository] - it['gh'].set_ref('tmp.{}'.format(to_revert.target.name), it['head']) - - if isinstance(e, exceptions.Skip): - old_head, new_head, to_squash = e.args - pr.write({ - 'state': 'opened', - 'squash': to_squash, - 'head': new_head, - }) - _logger.warning( - "head mismatch on %s: had %s but found %s", - pr.display_name, old_head, new_head + try: + method, new_heads[pr] = pr._stage(gh, target, related_prs=(prs - pr)) + _logger.info( + "Staged pr %s to %s by %s: %s -> %s", + pr.display_name, pr.target.name, method, + original_head, new_heads[pr] ) - self.env['runbot_merge.pull_requests.feedback'].create({ - 'repository': pr.repository.id, - 'pull_request': pr.number, - 'message': "We apparently missed an update to this PR " - "and tried to stage it in a state which " - "might not have been approved. PR has been " - "updated to %s, please check and approve or " - "re-approve." % new_head - }) - return self.env['runbot_merge.batch'] - else: - raise exceptions.MergeError(pr) + except Exception: + # reset the head which failed, as rebase() may have partially + # updated it (despite later steps failing) + gh.set_ref(target, original_head) + # then reset every previous update + for to_revert in new_heads.keys(): + it = meta[to_revert.repository] + it['gh'].set_ref('tmp.{}'.format(to_revert.target.name), it['head']) + raise + except github.MergeError: + raise exceptions.MergeError(pr) + except exceptions.Mismatch as e: + old_head, new_head, to_squash = e.args + pr.write({ + 'state': 'opened', + 'squash': to_squash, + 'head': new_head, + }) + _logger.warning( + "head mismatch on %s: had %s but found %s", + pr.display_name, old_head, new_head + ) + self.env['runbot_merge.pull_requests.feedback'].create({ + 'repository': pr.repository.id, + 'pull_request': pr.number, + 'message': "We apparently missed an update to this PR " + "and tried to stage it in a state which " + "might not have been approved. PR has been " + "updated to %s, please check and approve or " + "re-approve." % new_head + }) + return self.env['runbot_merge.batch'] # update meta to new heads for pr, head in new_heads.items():