runbot/runbot_merge/github.py
Xavier Morel a1384b3868 [FIX] runbot_merge: iterate commits in topological order
Previously, runbot_merge assumed github would return commits in
topological order (from base to head of PR). However as in the UI
github sorts commits using the author's date field, so depending on
rebasing/cherrypick/... it's possible to have the head of the commit
be "younger" than the rest. In that case robodoo will try to merge
it *first*, then attempt to merge the rest on top of it (-ish, it'd
probably make a hash of it if that worked), at which point github
replies with a 204 (nothing to merge) because the PR head has already
included everything which topologically precedes it.

Fix: re-sort commits topologically when fetching the PR's log. That
way they're rebased in the proper order and correctly linked to one
another.

Example problematic PR: odoo/enterprise#2794, the commits are

773aef03a59d50b33221d7cdcdf54cd0cbe0c914
    author.date: 2018-10-01T14:58:38Z
879547c8dd37e7f413a97393a82f92377785b50b (parent: 773aef03)
    author.date: 2018-10-01T12:02:08Z

Because 879547c8 is "older" than 773aef03, github returns it first,
both in the UI and via the API.

Also fixed up support for committer & author metadata in fake_github
so the local tests would both expose the issue properly and allow
fixing it.
2018-10-09 15:08:13 +02:00

222 lines
7.9 KiB
Python

import collections
import itertools
import json as json_
import logging
import requests
from odoo.tools import topological_sort
from . import exceptions
_logger = logging.getLogger(__name__)
class GH(object):
def __init__(self, token, repo):
self._url = 'https://api.github.com'
self._repo = repo
session = self._session = requests.Session()
session.headers['Authorization'] = 'token {}'.format(token)
def __call__(self, method, path, params=None, json=None, check=True):
"""
:type check: bool | dict[int:Exception]
"""
r = self._session.request(
method,
'{}/repos/{}/{}'.format(self._url, self._repo, path),
params=params,
json=json
)
if check:
if isinstance(check, collections.Mapping):
exc = check.get(r.status_code)
if exc:
raise exc(r.content)
if r.status_code == 422:
# dump & format body if it's a 422 as GH's HTTP Reason is
# completely useless (only states
# "Unprocessable Entity for URL: <endpoint>" which is not
# exactly great for debugging what went wrong
raise requests.HTTPError(
json_.dumps(r.json(), indent=4),
response=r
)
r.raise_for_status()
return r
def head(self, branch):
d = self('get', 'git/refs/heads/{}'.format(branch)).json()
assert d['ref'] == 'refs/heads/{}'.format(branch)
assert d['object']['type'] == 'commit'
_logger.debug("head(%s, %s) -> %s", self._repo, branch, d['object']['sha'])
return d['object']['sha']
def commit(self, sha):
c = self('GET', 'git/commits/{}'.format(sha)).json()
_logger.debug('commit(%s, %s) -> %s', self._repo, sha, shorten(c['message']))
return c
def comment(self, pr, message):
self('POST', 'issues/{}/comments'.format(pr), json={'body': message})
_logger.debug('comment(%s, %s, %s)', self._repo, pr, shorten(message))
def close(self, pr, message):
self.comment(pr, message)
self('PATCH', 'pulls/{}'.format(pr), json={'state': 'closed'})
def change_tags(self, pr, from_, to_):
to_add, to_remove = to_ - from_, from_ - to_
for t in to_remove:
r = self('DELETE', 'issues/{}/labels/{}'.format(pr, t), check=False)
# successful deletion or attempt to delete a tag which isn't there
# is fine, otherwise trigger an error
if r.status_code not in (200, 404):
r.raise_for_status()
if to_add:
self('POST', 'issues/{}/labels'.format(pr), json=list(to_add))
_logger.debug('change_tags(%s, %s, remove=%s, add=%s)', self._repo, pr, to_remove, to_add)
def fast_forward(self, branch, sha):
try:
self('patch', 'git/refs/heads/{}'.format(branch), json={'sha': sha})
_logger.debug('fast_forward(%s, %s, %s) -> OK', self._repo, branch, sha)
except requests.HTTPError:
_logger.debug('fast_forward(%s, %s, %s) -> ERROR', self._repo, branch, sha, exc_info=True)
raise exceptions.FastForwardError()
def set_ref(self, branch, sha):
# force-update ref
r = self('patch', 'git/refs/heads/{}'.format(branch), json={
'sha': sha,
'force': True,
}, check=False)
if r.status_code == 200:
_logger.debug('set_ref(update, %s, %s, %s) -> OK', self._repo, branch, sha)
return
# 422 makes no sense but that's what github returns, leaving 404 just
# in case
if r.status_code in (404, 422):
# fallback: create ref
r = self('post', 'git/refs', json={
'ref': 'refs/heads/{}'.format(branch),
'sha': sha,
}, check=False)
if r.status_code == 201:
_logger.debug('set_ref(create, %s, %s, %s) -> OK', self._repo, branch, sha)
return
raise AssertionError("{}: {}".format(r.status_code, r.json()))
def merge(self, sha, dest, message):
r = self('post', 'merges', json={
'base': dest,
'head': sha,
'commit_message': message,
}, check={409: exceptions.MergeError})
try:
r = r.json()
except Exception:
raise exceptions.MergeError("Got non-JSON reponse from github: %s %s (%s)" % (r.status_code, r.reason, r.content.decode('iso-8859-1')))
_logger.debug("merge(%s, %s, %s) -> %s", self._repo, dest, shorten(message), r['sha'])
return dict(r['commit'], sha=r['sha'])
def rebase(self, pr, dest, reset=False, commits=None):
""" Rebase pr's commits on top of dest, updates dest unless ``reset``
is set.
Returns the hash of the rebased head.
"""
original_head = self.head(dest)
if commits is None:
commits = self.commits(pr)
assert commits, "can't rebase a PR with no commits"
for c in commits:
assert len(c['parents']) == 1, "can't rebase commits with more than one parent"
tmp_msg = 'temp rebasing PR %s (%s)' % (pr, c['sha'])
c['new_tree'] = self.merge(c['sha'], dest, tmp_msg)['tree']['sha']
prev = original_head
for c in commits:
copy = self('post', 'git/commits', json={
'message': c['commit']['message'],
'tree': c['new_tree'],
'parents': [prev],
'author': c['commit']['author'],
'committer': c['commit']['committer'],
}, check={409: exceptions.MergeError}).json()
prev = copy['sha']
if reset:
self.set_ref(dest, original_head)
else:
self.set_ref(dest, prev)
_logger.debug('%s, %s, %s, reset=%s, commits=%s) -> %s',
self._repo, pr, dest, reset, commits and len(commits),
prev)
# prev is updated after each copy so it's the rebased PR head
return prev
# fetch various bits of issues / prs to load them
def pr(self, number):
return (
self('get', 'issues/{}'.format(number)).json(),
self('get', 'pulls/{}'.format(number)).json()
)
def comments(self, number):
for page in itertools.count(1):
r = self('get', 'issues/{}/comments'.format(number), params={'page': page})
yield from r.json()
if not r.links.get('next'):
return
def reviews(self, number):
for page in itertools.count(1):
r = self('get', 'pulls/{}/reviews'.format(number), params={'page': page})
yield from r.json()
if not r.links.get('next'):
return
def commits_lazy(self, pr):
for page in itertools.count(1):
r = self('get', 'pulls/{}/commits'.format(pr), params={'page': page})
yield from r.json()
if not r.links.get('next'):
return
def commits(self, pr):
""" Returns a PR's commits oldest first (that's what GH does &
is what we want)
"""
commits = list(self.commits_lazy(pr))
# map shas to the position the commit *should* have
idx = {
c: i
for i, c in enumerate(topological_sort({
c['sha']: [p['sha'] for p in c['parents']]
for c in commits
}))
}
return sorted(commits, key=lambda c: idx[c['sha']])
def statuses(self, h):
r = self('get', 'commits/{}/status'.format(h)).json()
return [{
'sha': r['sha'],
**s,
} for s in r['statuses']]
def shorten(s):
if not s:
return s
line1 = s.split('\n', 1)[0]
if len(line1) < 50:
return line1
return line1[:47] + '...'