[FIX] *: prevent merging conflicts commits with loss of authorship

Proper attribution is important in general, but especially for
external contributors. Before this change, and the previous change
fixing authorship deduplication, it was rather easy for a "squashed"
conflict commit (attributed to the 'bot for lack of a really clean
option) to get merged by mistake.

This commit changes two things:

* The mergebot now refuses to stage commits without an email set, the
  github API rejects those commits anyway so any integration mode
  other than `merge` would fail, just with a very unclear error
* The forwardbot now creates commits with an empty author / committer
  email when the pull request as a whole has multiple authors /
  committers. This leverages the mergebot update.

Also clean up the staging process to provide richer error reporting
using bespoke exceptions instead of simple assertions. I'm not sure
we've ever encountered most of these errors so they're really sanity
checks but the old reporting would be... less than great.

Fixes #505
This commit is contained in:
Xavier Morel 2021-08-03 13:45:21 +02:00 committed by xmo-odoo
parent d249417ceb
commit f6aff3e71c
5 changed files with 118 additions and 59 deletions

View File

@ -873,7 +873,7 @@ This PR targets %s and is part of the forward-port chain. Further PRs will be cr
for c in (c['commit'] for c in root_commits):
authors.add(to_tuple(c['author']))
committers.add(to_tuple(c['committer']))
fp_authorship = (project_id.fp_github_name, project_id.fp_github_email, '')
fp_authorship = (project_id.fp_github_name, '', '')
author = fp_authorship if len(authors) != 1\
else authors.pop() + (head_commit['author']['date'],)
committer = fp_authorship if len(committers) != 1 \

View File

@ -268,11 +268,9 @@ def test_multiple_commits_same_authorship(env, config, make_repo):
assert get(c.author) == get(author)
assert get(c.committer) == get(committer)
def test_multiple_commits_different_authorship(env, config, make_repo, users, rolemap):
""" When a PR has multiple commits by the same author and its
forward-porting triggers a conflict, the resulting (squashed) conflict
commit should have the original author (same with the committer).
""" When a PR has multiple commits by different authors, the resulting
(squashed) conflict commit should have
"""
author = {'name': 'George Pearce', 'email': 'gp@example.org'}
committer = {'name': 'G. P. W. Meredith', 'email': 'gpwm@example.org'}
@ -315,7 +313,41 @@ def test_multiple_commits_different_authorship(env, config, make_repo, users, ro
assert 0, "timed out"
c = prod.commit(pr2_id.head)
assert len(c.parents) == 1
get = itemgetter('name', 'email')
rm = rolemap['user']
assert get(c.author) == (rm['login'], rm['email'])
assert get(c.committer) == (rm['login'], rm['email'])
assert get(c.author) == (rm['login'], ''), \
"In a multi-author PR, the squashed conflict commit should have the " \
"author set to the bot but an empty email"
assert get(c.committer) == (rm['login'], '')
assert re.match(r'''<<<\x3c<<< HEAD
b
=======
2
>>>\x3e>>> [0-9a-f]{7,}.*
''', prod.read_tree(c)['g'])
# I'd like to fix the conflict so everything is clean and proper *but*
# github's API apparently rejects creating commits with an empty email.
#
# So fuck that, I'll just "merge the conflict". Still works at simulating
# a resolution error as technically that's the sort of things people do.
pr2 = prod.get_pr(pr2_id.number)
with prod:
prod.post_status(pr2_id.head, 'success', 'legal/cla')
prod.post_status(pr2_id.head, 'success', 'ci/runbot')
pr2.post_comment('hansen r+', config['role_reviewer']['token'])
env.run_crons()
assert pr2.comments == [
seen(env, pr2, users),
(users['user'], re_matches(r'Ping.*CONFLICT', re.DOTALL)),
(users['reviewer'], 'hansen r+'),
(users['user'], f"All commits must have author and committer email, "
f"missing email on {pr2_id.head} indicates the "
f"authorship is most likely incorrect."),
]
assert pr2_id.state == 'error'
assert not pr2_id.staging_id, "staging should have been rejected"

View File

@ -2,5 +2,7 @@ class MergeError(Exception):
pass
class FastForwardError(Exception):
pass
class Skip(MergeError):
class Mismatch(MergeError):
pass
class Unmergeable(MergeError):
...

View File

@ -16,6 +16,7 @@ import odoo.netsvc
from odoo.tools import topological_sort, config
from . import exceptions, utils
class MergeError(Exception): ...
def _is_json(r):
return r and r.headers.get('content-type', '').startswith(('application/json', 'application/javascript'))
@ -258,11 +259,11 @@ class GH(object):
'base': dest,
'head': sha,
'commit_message': message,
}, check={409: exceptions.MergeError})
}, check={409: MergeError})
try:
r = r.json()
except Exception:
raise exceptions.MergeError("Got non-JSON reponse from github: %s %s (%s)" % (r.status_code, r.reason, r.text))
raise MergeError("Got non-JSON reponse from github: %s %s (%s)" % (r.status_code, r.reason, r.text))
_logger.debug(
"merge(%s, %s (%s), %s) -> %s",
self._repo, dest, r['parents'][0]['sha'],
@ -312,7 +313,7 @@ class GH(object):
'parents': [prev],
'author': c['commit']['author'],
'committer': c['commit']['committer'],
}, check={409: exceptions.MergeError}).json()
}, check={409: MergeError}).json()
logger.debug('copied %s to %s (parent: %s)', c['sha'], copy['sha'], prev)
prev = mapping[c['sha']] = copy['sha']

View File

@ -533,15 +533,20 @@ class Branch(models.Model):
try:
staged |= Batch.stage(meta, batch)
except exceptions.MergeError as e:
if first:
if first or isinstance(e, exceptions.Unmergeable):
pr = e.args[0]
_logger.exception("Failed to merge %s into staging branch",
pr.display_name)
if len(e.args) > 1 and e.args[1]:
message = e.args[1]
else:
message = "Unable to stage PR (%s)" % e.__context__
_logger.exception(
"Failed to merge %s into staging branch",
pr.display_name)
pr.state = 'error'
self.env['runbot_merge.pull_requests.feedback'].create({
'repository': pr.repository.id,
'pull_request': pr.number,
'message': "Unable to stage PR (%s)" % e.__context__,
'message': message,
})
else:
first = False
@ -1354,13 +1359,25 @@ class PullRequests(models.Model):
_, prdict = gh.pr(self.number)
commits = prdict['commits']
method = self.merge_method or ('rebase-ff' if commits == 1 else None)
assert commits < 50 or not method.startswith('rebase'), \
"rebasing a PR of more than 50 commits is a tad excessive"
assert commits < 250, "merging PRs of 250+ commits is not supported (https://developer.github.com/v3/pulls/#list-commits-on-a-pull-request)"
if commits > 50 and method.startswith('rebase'):
raise exceptions.Unmergeable(self, "Rebasing 50 commits is too much.")
if commits > 250:
raise exceptions.Unmergeable(
self, "Merging PRs of 250 or more commits is not supported "
"(https://developer.github.com/v3/pulls/#list-commits-on-a-pull-request)"
)
pr_commits = gh.commits(self.number)
for c in pr_commits:
if not (c['commit']['author']['email'] and c['commit']['committer']['email']):
raise exceptions.Unmergeable(
self,
f"All commits must have author and committer email, "
f"missing email on {c['sha']} indicates the authorship is "
f"most likely incorrect."
)
pr_head = pr_commits[-1]['sha']
if pr_head != self.head:
raise exceptions.Skip(self.head, pr_head, commits == 1)
raise exceptions.Mismatch(self.head, pr_head, commits == 1)
if self.reviewed_by and self.reviewed_by.name == self.reviewed_by.github_login:
# XXX: find other trigger(s) to sync github name?
@ -1397,9 +1414,15 @@ class PullRequests(models.Model):
# look for parent(s?) of pr_head not in PR, means it's
# from target (so we merged target in pr)
merge = head_parents - {c['sha'] for c in commits}
assert len(merge) <= 1, \
">1 parent from base in PR's head is not supported"
if len(merge) == 1:
external_parents = len(merge)
if external_parents > 1:
raise exceptions.Unmergeable(
"The PR head can only have one parent from the base branch "
"(not part of the PR itself), found %d: %s" % (
external_parents,
', '.join(merge)
))
if external_parents == 1:
[base_commit] = merge
commits_map = {c['sha']: c['sha'] for c in commits}
@ -2008,44 +2031,45 @@ class Batch(models.Model):
target = 'tmp.{}'.format(pr.target.name)
original_head = gh.head(target)
try:
method, new_heads[pr] = pr._stage(gh, target, related_prs=(prs - pr))
_logger.info(
"Staged pr %s to %s by %s: %s -> %s",
pr.display_name, pr.target.name, method,
original_head, new_heads[pr]
)
except (exceptions.MergeError, AssertionError) as e:
# reset the head which failed, as rebase() may have partially
# updated it (despite later steps failing)
gh.set_ref(target, original_head)
# then reset every previous update
for to_revert in new_heads.keys():
it = meta[to_revert.repository]
it['gh'].set_ref('tmp.{}'.format(to_revert.target.name), it['head'])
if isinstance(e, exceptions.Skip):
old_head, new_head, to_squash = e.args
pr.write({
'state': 'opened',
'squash': to_squash,
'head': new_head,
})
_logger.warning(
"head mismatch on %s: had %s but found %s",
pr.display_name, old_head, new_head
try:
method, new_heads[pr] = pr._stage(gh, target, related_prs=(prs - pr))
_logger.info(
"Staged pr %s to %s by %s: %s -> %s",
pr.display_name, pr.target.name, method,
original_head, new_heads[pr]
)
self.env['runbot_merge.pull_requests.feedback'].create({
'repository': pr.repository.id,
'pull_request': pr.number,
'message': "We apparently missed an update to this PR "
"and tried to stage it in a state which "
"might not have been approved. PR has been "
"updated to %s, please check and approve or "
"re-approve." % new_head
})
return self.env['runbot_merge.batch']
else:
raise exceptions.MergeError(pr)
except Exception:
# reset the head which failed, as rebase() may have partially
# updated it (despite later steps failing)
gh.set_ref(target, original_head)
# then reset every previous update
for to_revert in new_heads.keys():
it = meta[to_revert.repository]
it['gh'].set_ref('tmp.{}'.format(to_revert.target.name), it['head'])
raise
except github.MergeError:
raise exceptions.MergeError(pr)
except exceptions.Mismatch as e:
old_head, new_head, to_squash = e.args
pr.write({
'state': 'opened',
'squash': to_squash,
'head': new_head,
})
_logger.warning(
"head mismatch on %s: had %s but found %s",
pr.display_name, old_head, new_head
)
self.env['runbot_merge.pull_requests.feedback'].create({
'repository': pr.repository.id,
'pull_request': pr.number,
'message': "We apparently missed an update to this PR "
"and tried to stage it in a state which "
"might not have been approved. PR has been "
"updated to %s, please check and approve or "
"re-approve." % new_head
})
return self.env['runbot_merge.batch']
# update meta to new heads
for pr, head in new_heads.items():