[FIX] *: prevent merging conflicts commits with loss of authorship

Proper attribution is important in general, but especially for
external contributors. Before this change, and the previous change
fixing authorship deduplication, it was rather easy for a "squashed"
conflict commit (attributed to the 'bot for lack of a really clean
option) to get merged by mistake.

This commit changes two things:

* The mergebot now refuses to stage commits without an email set, the
  github API rejects those commits anyway so any integration mode
  other than `merge` would fail, just with a very unclear error
* The forwardbot now creates commits with an empty author / committer
  email when the pull request as a whole has multiple authors /
  committers. This leverages the mergebot update.

Also clean up the staging process to provide richer error reporting
using bespoke exceptions instead of simple assertions. I'm not sure
we've ever encountered most of these errors so they're really sanity
checks but the old reporting would be... less than great.

Fixes #505
This commit is contained in:
Xavier Morel 2021-08-03 13:45:21 +02:00 committed by xmo-odoo
parent d249417ceb
commit f6aff3e71c
5 changed files with 118 additions and 59 deletions

View File

@ -873,7 +873,7 @@ This PR targets %s and is part of the forward-port chain. Further PRs will be cr
for c in (c['commit'] for c in root_commits): for c in (c['commit'] for c in root_commits):
authors.add(to_tuple(c['author'])) authors.add(to_tuple(c['author']))
committers.add(to_tuple(c['committer'])) committers.add(to_tuple(c['committer']))
fp_authorship = (project_id.fp_github_name, project_id.fp_github_email, '') fp_authorship = (project_id.fp_github_name, '', '')
author = fp_authorship if len(authors) != 1\ author = fp_authorship if len(authors) != 1\
else authors.pop() + (head_commit['author']['date'],) else authors.pop() + (head_commit['author']['date'],)
committer = fp_authorship if len(committers) != 1 \ committer = fp_authorship if len(committers) != 1 \

View File

@ -268,11 +268,9 @@ def test_multiple_commits_same_authorship(env, config, make_repo):
assert get(c.author) == get(author) assert get(c.author) == get(author)
assert get(c.committer) == get(committer) assert get(c.committer) == get(committer)
def test_multiple_commits_different_authorship(env, config, make_repo, users, rolemap): def test_multiple_commits_different_authorship(env, config, make_repo, users, rolemap):
""" When a PR has multiple commits by the same author and its """ When a PR has multiple commits by different authors, the resulting
forward-porting triggers a conflict, the resulting (squashed) conflict (squashed) conflict commit should have
commit should have the original author (same with the committer).
""" """
author = {'name': 'George Pearce', 'email': 'gp@example.org'} author = {'name': 'George Pearce', 'email': 'gp@example.org'}
committer = {'name': 'G. P. W. Meredith', 'email': 'gpwm@example.org'} committer = {'name': 'G. P. W. Meredith', 'email': 'gpwm@example.org'}
@ -315,7 +313,41 @@ def test_multiple_commits_different_authorship(env, config, make_repo, users, ro
assert 0, "timed out" assert 0, "timed out"
c = prod.commit(pr2_id.head) c = prod.commit(pr2_id.head)
assert len(c.parents) == 1
get = itemgetter('name', 'email') get = itemgetter('name', 'email')
rm = rolemap['user'] rm = rolemap['user']
assert get(c.author) == (rm['login'], rm['email']) assert get(c.author) == (rm['login'], ''), \
assert get(c.committer) == (rm['login'], rm['email']) "In a multi-author PR, the squashed conflict commit should have the " \
"author set to the bot but an empty email"
assert get(c.committer) == (rm['login'], '')
assert re.match(r'''<<<\x3c<<< HEAD
b
=======
2
>>>\x3e>>> [0-9a-f]{7,}.*
''', prod.read_tree(c)['g'])
# I'd like to fix the conflict so everything is clean and proper *but*
# github's API apparently rejects creating commits with an empty email.
#
# So fuck that, I'll just "merge the conflict". Still works at simulating
# a resolution error as technically that's the sort of things people do.
pr2 = prod.get_pr(pr2_id.number)
with prod:
prod.post_status(pr2_id.head, 'success', 'legal/cla')
prod.post_status(pr2_id.head, 'success', 'ci/runbot')
pr2.post_comment('hansen r+', config['role_reviewer']['token'])
env.run_crons()
assert pr2.comments == [
seen(env, pr2, users),
(users['user'], re_matches(r'Ping.*CONFLICT', re.DOTALL)),
(users['reviewer'], 'hansen r+'),
(users['user'], f"All commits must have author and committer email, "
f"missing email on {pr2_id.head} indicates the "
f"authorship is most likely incorrect."),
]
assert pr2_id.state == 'error'
assert not pr2_id.staging_id, "staging should have been rejected"

View File

@ -2,5 +2,7 @@ class MergeError(Exception):
pass pass
class FastForwardError(Exception): class FastForwardError(Exception):
pass pass
class Skip(MergeError): class Mismatch(MergeError):
pass pass
class Unmergeable(MergeError):
...

View File

@ -16,6 +16,7 @@ import odoo.netsvc
from odoo.tools import topological_sort, config from odoo.tools import topological_sort, config
from . import exceptions, utils from . import exceptions, utils
class MergeError(Exception): ...
def _is_json(r): def _is_json(r):
return r and r.headers.get('content-type', '').startswith(('application/json', 'application/javascript')) return r and r.headers.get('content-type', '').startswith(('application/json', 'application/javascript'))
@ -258,11 +259,11 @@ class GH(object):
'base': dest, 'base': dest,
'head': sha, 'head': sha,
'commit_message': message, 'commit_message': message,
}, check={409: exceptions.MergeError}) }, check={409: MergeError})
try: try:
r = r.json() r = r.json()
except Exception: except Exception:
raise exceptions.MergeError("Got non-JSON reponse from github: %s %s (%s)" % (r.status_code, r.reason, r.text)) raise MergeError("Got non-JSON reponse from github: %s %s (%s)" % (r.status_code, r.reason, r.text))
_logger.debug( _logger.debug(
"merge(%s, %s (%s), %s) -> %s", "merge(%s, %s (%s), %s) -> %s",
self._repo, dest, r['parents'][0]['sha'], self._repo, dest, r['parents'][0]['sha'],
@ -312,7 +313,7 @@ class GH(object):
'parents': [prev], 'parents': [prev],
'author': c['commit']['author'], 'author': c['commit']['author'],
'committer': c['commit']['committer'], 'committer': c['commit']['committer'],
}, check={409: exceptions.MergeError}).json() }, check={409: MergeError}).json()
logger.debug('copied %s to %s (parent: %s)', c['sha'], copy['sha'], prev) logger.debug('copied %s to %s (parent: %s)', c['sha'], copy['sha'], prev)
prev = mapping[c['sha']] = copy['sha'] prev = mapping[c['sha']] = copy['sha']

View File

@ -533,15 +533,20 @@ class Branch(models.Model):
try: try:
staged |= Batch.stage(meta, batch) staged |= Batch.stage(meta, batch)
except exceptions.MergeError as e: except exceptions.MergeError as e:
if first: if first or isinstance(e, exceptions.Unmergeable):
pr = e.args[0] pr = e.args[0]
_logger.exception("Failed to merge %s into staging branch", if len(e.args) > 1 and e.args[1]:
pr.display_name) message = e.args[1]
else:
message = "Unable to stage PR (%s)" % e.__context__
_logger.exception(
"Failed to merge %s into staging branch",
pr.display_name)
pr.state = 'error' pr.state = 'error'
self.env['runbot_merge.pull_requests.feedback'].create({ self.env['runbot_merge.pull_requests.feedback'].create({
'repository': pr.repository.id, 'repository': pr.repository.id,
'pull_request': pr.number, 'pull_request': pr.number,
'message': "Unable to stage PR (%s)" % e.__context__, 'message': message,
}) })
else: else:
first = False first = False
@ -1354,13 +1359,25 @@ class PullRequests(models.Model):
_, prdict = gh.pr(self.number) _, prdict = gh.pr(self.number)
commits = prdict['commits'] commits = prdict['commits']
method = self.merge_method or ('rebase-ff' if commits == 1 else None) method = self.merge_method or ('rebase-ff' if commits == 1 else None)
assert commits < 50 or not method.startswith('rebase'), \ if commits > 50 and method.startswith('rebase'):
"rebasing a PR of more than 50 commits is a tad excessive" raise exceptions.Unmergeable(self, "Rebasing 50 commits is too much.")
assert commits < 250, "merging PRs of 250+ commits is not supported (https://developer.github.com/v3/pulls/#list-commits-on-a-pull-request)" if commits > 250:
raise exceptions.Unmergeable(
self, "Merging PRs of 250 or more commits is not supported "
"(https://developer.github.com/v3/pulls/#list-commits-on-a-pull-request)"
)
pr_commits = gh.commits(self.number) pr_commits = gh.commits(self.number)
for c in pr_commits:
if not (c['commit']['author']['email'] and c['commit']['committer']['email']):
raise exceptions.Unmergeable(
self,
f"All commits must have author and committer email, "
f"missing email on {c['sha']} indicates the authorship is "
f"most likely incorrect."
)
pr_head = pr_commits[-1]['sha'] pr_head = pr_commits[-1]['sha']
if pr_head != self.head: if pr_head != self.head:
raise exceptions.Skip(self.head, pr_head, commits == 1) raise exceptions.Mismatch(self.head, pr_head, commits == 1)
if self.reviewed_by and self.reviewed_by.name == self.reviewed_by.github_login: if self.reviewed_by and self.reviewed_by.name == self.reviewed_by.github_login:
# XXX: find other trigger(s) to sync github name? # XXX: find other trigger(s) to sync github name?
@ -1397,9 +1414,15 @@ class PullRequests(models.Model):
# look for parent(s?) of pr_head not in PR, means it's # look for parent(s?) of pr_head not in PR, means it's
# from target (so we merged target in pr) # from target (so we merged target in pr)
merge = head_parents - {c['sha'] for c in commits} merge = head_parents - {c['sha'] for c in commits}
assert len(merge) <= 1, \ external_parents = len(merge)
">1 parent from base in PR's head is not supported" if external_parents > 1:
if len(merge) == 1: raise exceptions.Unmergeable(
"The PR head can only have one parent from the base branch "
"(not part of the PR itself), found %d: %s" % (
external_parents,
', '.join(merge)
))
if external_parents == 1:
[base_commit] = merge [base_commit] = merge
commits_map = {c['sha']: c['sha'] for c in commits} commits_map = {c['sha']: c['sha'] for c in commits}
@ -2008,44 +2031,45 @@ class Batch(models.Model):
target = 'tmp.{}'.format(pr.target.name) target = 'tmp.{}'.format(pr.target.name)
original_head = gh.head(target) original_head = gh.head(target)
try: try:
method, new_heads[pr] = pr._stage(gh, target, related_prs=(prs - pr)) try:
_logger.info( method, new_heads[pr] = pr._stage(gh, target, related_prs=(prs - pr))
"Staged pr %s to %s by %s: %s -> %s", _logger.info(
pr.display_name, pr.target.name, method, "Staged pr %s to %s by %s: %s -> %s",
original_head, new_heads[pr] pr.display_name, pr.target.name, method,
) original_head, new_heads[pr]
except (exceptions.MergeError, AssertionError) as e:
# reset the head which failed, as rebase() may have partially
# updated it (despite later steps failing)
gh.set_ref(target, original_head)
# then reset every previous update
for to_revert in new_heads.keys():
it = meta[to_revert.repository]
it['gh'].set_ref('tmp.{}'.format(to_revert.target.name), it['head'])
if isinstance(e, exceptions.Skip):
old_head, new_head, to_squash = e.args
pr.write({
'state': 'opened',
'squash': to_squash,
'head': new_head,
})
_logger.warning(
"head mismatch on %s: had %s but found %s",
pr.display_name, old_head, new_head
) )
self.env['runbot_merge.pull_requests.feedback'].create({ except Exception:
'repository': pr.repository.id, # reset the head which failed, as rebase() may have partially
'pull_request': pr.number, # updated it (despite later steps failing)
'message': "We apparently missed an update to this PR " gh.set_ref(target, original_head)
"and tried to stage it in a state which " # then reset every previous update
"might not have been approved. PR has been " for to_revert in new_heads.keys():
"updated to %s, please check and approve or " it = meta[to_revert.repository]
"re-approve." % new_head it['gh'].set_ref('tmp.{}'.format(to_revert.target.name), it['head'])
}) raise
return self.env['runbot_merge.batch'] except github.MergeError:
else: raise exceptions.MergeError(pr)
raise exceptions.MergeError(pr) except exceptions.Mismatch as e:
old_head, new_head, to_squash = e.args
pr.write({
'state': 'opened',
'squash': to_squash,
'head': new_head,
})
_logger.warning(
"head mismatch on %s: had %s but found %s",
pr.display_name, old_head, new_head
)
self.env['runbot_merge.pull_requests.feedback'].create({
'repository': pr.repository.id,
'pull_request': pr.number,
'message': "We apparently missed an update to this PR "
"and tried to stage it in a state which "
"might not have been approved. PR has been "
"updated to %s, please check and approve or "
"re-approve." % new_head
})
return self.env['runbot_merge.batch']
# update meta to new heads # update meta to new heads
for pr, head in new_heads.items(): for pr, head in new_heads.items():