From 3885ca244cf09d0fccae303005a589138b65c1d7 Mon Sep 17 00:00:00 2001
From: Xavier Morel <xmo@odoo.com>
Date: Thu, 20 Sep 2018 15:02:18 +0200
Subject: [PATCH] [FIX] runbot_merge: handling of PRs of >50 commits

A limitation to 50 commits PRs was put in place to avoid rebasing
huge PRs (as a rebase means 1 merge + 1 commit *per source commit*),
however the way it was done would also limit regular merges, and the
way the limitation was implemented was not clear.

* explicitly check that limit in the rebase case
* make error message on PR sizes (rebase 50 or merge 250) clearer
* remove limit from commits-fetching (check it beforehand)
* add a test to merge >50 commits PRs
* fix the local implementation of pulls/:number/commits to properly
  paginate
---
 runbot_merge/controllers/dashboard.py      |  2 +-
 runbot_merge/github.py                     | 12 ++++++---
 runbot_merge/models/pull_requests.py       |  9 +++++--
 runbot_merge/tests/fake_github/__init__.py | 29 +++++++++++++++++---
 runbot_merge/tests/remote.py               |  4 +--
 runbot_merge/tests/test_basic.py           | 31 +++++++++++++++++++++-
 runbot_merge/tests/test_multirepo.py       |  2 +-
 7 files changed, 75 insertions(+), 14 deletions(-)

diff --git a/runbot_merge/controllers/dashboard.py b/runbot_merge/controllers/dashboard.py
index 4d5f0d2e..5934958a 100644
--- a/runbot_merge/controllers/dashboard.py
+++ b/runbot_merge/controllers/dashboard.py
@@ -6,5 +6,5 @@ class MergebotDashboard(Controller):
     @route('/runbot_merge', auth="public", type="http", website=True)
     def dashboard(self):
         return request.render('runbot_merge.dashboard', {
-            'projects': request.env['runbot_merge.project'].sudo().search([]),
+            'projects': request.env['runbot_merge.project'].with_context(active_test=False).sudo().search([]),
         })
diff --git a/runbot_merge/github.py b/runbot_merge/github.py
index 7e52cf17..76314692 100644
--- a/runbot_merge/github.py
+++ b/runbot_merge/github.py
@@ -169,13 +169,18 @@ class GH(object):
             if not r.links.get('next'):
                 return
 
+    def commits_lazy(self, pr):
+        for page in itertools.count(1):
+            r = self('get', 'pulls/{}/commits'.format(pr), params={'page': page})
+            yield from r.json()
+            if not r.links.get('next'):
+                return
+
     def commits(self, pr):
         """ Returns a PR's commits oldest first (that's what GH does &
         is what we want)
         """
-        r = self('get', 'pulls/{}/commits'.format(pr), params={'per_page': PR_COMMITS_MAX})
-        assert not r.links.get('next'), "more than {} commits".format(PR_COMMITS_MAX)
-        return r.json()
+        return list(self.commits_lazy(pr))
 
     def statuses(self, h):
         r = self('get', 'commits/{}/status'.format(h)).json()
@@ -184,7 +189,6 @@ class GH(object):
             **s,
         } for s in r['statuses']]
 
-PR_COMMITS_MAX = 50
 def shorten(s):
     if not s:
         return s
diff --git a/runbot_merge/models/pull_requests.py b/runbot_merge/models/pull_requests.py
index 5e9fc653..72bedba4 100644
--- a/runbot_merge/models/pull_requests.py
+++ b/runbot_merge/models/pull_requests.py
@@ -950,9 +950,14 @@ class Batch(models.Model):
             original_head = gh.head(target)
             try:
                 # nb: pr_commits is oldest to newest so pr.head is pr_commits[-1]
+                _, prdict = gh.pr(pr.number)
+                commits = prdict['commits']
+                assert commits < 50 or not pr.rebase, \
+                    "rebasing a PR or more than 50 commits is a tad excessive"
+                assert commits < 250, "merging PRs of 250+ commits is not supported (https://developer.github.com/v3/pulls/#list-commits-on-a-pull-request)"
                 pr_commits = gh.commits(pr.number)
                 rebase_and_merge = pr.rebase
-                squash = rebase_and_merge and len(pr_commits) == 1
+                squash = rebase_and_merge and commits == 1
                 if squash:
                     method = 'squash'
                     msg = build_message(pr_commits[0]['commit']['message'], pr)
@@ -1007,7 +1012,7 @@ class Batch(models.Model):
             except (exceptions.MergeError, AssertionError) as e:
                 _logger.exception("Failed to merge %s:%s into staging branch (error: %s)", pr.repository.name, pr.number, e)
                 pr.state = 'error'
-                gh.comment(pr.number, "Unable to stage PR (merge conflict)")
+                gh.comment(pr.number, "Unable to stage PR (%s)" % e)
 
                 # reset other PRs
                 for to_revert in new_heads.keys():
diff --git a/runbot_merge/tests/fake_github/__init__.py b/runbot_merge/tests/fake_github/__init__.py
index 47f5d529..695aeeee 100644
--- a/runbot_merge/tests/fake_github/__init__.py
+++ b/runbot_merge/tests/fake_github/__init__.py
@@ -10,6 +10,7 @@ import responses
 import werkzeug.urls
 import werkzeug.test
 import werkzeug.wrappers
+from werkzeug.urls import url_parse, url_encode
 
 from . import git
 
@@ -32,8 +33,11 @@ class APIResponse(responses.BaseResponse):
     def get_response(self, request):
         m = self.url.match(request.url)
 
-        (status, r) = self.sim.repos[m.group('repo')].api(m.group('path'), request)
+        r = self.sim.repos[m.group('repo')].api(m.group('path'), request)
+        if isinstance(r, responses.HTTPResponse):
+            return r
 
+        (status, r) = r
         headers = self.get_headers()
         body = io.BytesIO(b'')
         if r is not None:
@@ -138,7 +142,7 @@ class Repo(object):
         c.statuses.append({'state': state, 'context': context, **kw})
         self.notify('status', self.name, context, state, c.id, kw)
 
-    def make_commit(self, ref, message, author, committer=None, tree=None):
+    def make_commit(self, ref, message, author, committer=None, tree=None, wait=True):
         assert tree, "a commit must provide either a full tree"
 
         refs = ref or []
@@ -420,8 +424,27 @@ class Repo(object):
         if not isinstance(pr, PR):
             return (404, None)
 
-        return (200, [c.to_json() for c in pr.commits])
+        url = url_parse(r.url)
+        qs = url.decode_query()
+        # github pages are 1-indexeds
+        page = int(qs.get('page') or 1) - 1
+        per_page = int(qs.get('per_page') or 100)
 
+        offset = page * per_page
+        limit = page + 1 * per_page
+        headers = {'Content-Type': 'application/json'}
+        if len(pr.commits) > limit:
+            nextlink = url.replace(query=url_encode(dict(qs, page=page+1)))
+            headers['Link'] = '<%s>; rel="next"' % str(nextlink)
+
+        commits = [c.to_json() for c in pr.commits[offset:limit]]
+        body = io.BytesIO(json.dumps(commits).encode('utf-8'))
+
+        return responses.HTTPResponse(
+            status=200, reason="OK",
+            headers=headers,
+            body=body, preload_content=False,
+        )
 
     def _add_labels(self, r, number):
         try:
diff --git a/runbot_merge/tests/remote.py b/runbot_merge/tests/remote.py
index e0f313d0..fb99905c 100644
--- a/runbot_merge/tests/remote.py
+++ b/runbot_merge/tests/remote.py
@@ -451,7 +451,7 @@ class Repo:
         assert 200 <= r.status_code < 300, r.json()
         wait_for_hook()
 
-    def make_commit(self, ref, message, author, committer=None, tree=None):
+    def make_commit(self, ref, message, author, committer=None, tree=None, wait=True):
         assert tree, "not supporting changes/updates"
         assert not (author or committer)
 
@@ -496,7 +496,7 @@ class Repo:
         # if the first parent is an actual ref (rather than a hash) update it
         if parents[0] != refs[0]:
             self.update_ref(refs[0], commit_sha)
-        else:
+        elif wait:
             wait_for_hook()
         return commit_sha
 
diff --git a/runbot_merge/tests/test_basic.py b/runbot_merge/tests/test_basic.py
index a8cadd5e..aa446acb 100644
--- a/runbot_merge/tests/test_basic.py
+++ b/runbot_merge/tests/test_basic.py
@@ -1,6 +1,7 @@
 import datetime
 import json
 import re
+import time
 
 import pytest
 
@@ -311,7 +312,7 @@ def test_staging_merge_fail(env, repo, users):
     assert prx.labels == {'seen 🙂', 'error 🙅'}
     assert prx.comments == [
         (users['reviewer'], 'hansen r+'),
-        (users['user'], 'Unable to stage PR (merge conflict)'),
+        (users['user'], re_matches('^Unable to stage PR')),
     ]
 
 def test_staging_ci_timeout(env, repo, users):
@@ -544,6 +545,34 @@ def test_close_staged(env, repo):
     assert not pr.staging_id
     assert not env['runbot_merge.stagings'].search([])
 
+def test_forward_port(env, repo):
+    m = repo.make_commit(None, 'initial', None, tree={'m': 'm'})
+    repo.make_ref('heads/master', m)
+
+    head = m
+    for i in range(110):
+        head = repo.make_commit(head, 'c_%03d' % i, None, tree={'m': 'm', 'f': str(i)}, wait=False)
+    # for remote since we're not waiting in commit creation
+    time.sleep(10)
+    pr = repo.make_pr('PR', None, target='master', ctid=head, user='user')
+    repo.post_status(pr.head, 'success', 'legal/cla')
+    repo.post_status(pr.head, 'success', 'ci/runbot')
+    pr.post_comment('hansen rebase- r+', "reviewer")
+    env['runbot_merge.project']._check_progress()
+
+    st = repo.commit('heads/staging.master')
+    assert st.message.startswith('force rebuild')
+
+    repo.post_status(st.id, 'success', 'legal/cla')
+    repo.post_status(st.id, 'success', 'ci/runbot')
+    env['runbot_merge.project']._check_progress()
+
+    h = repo.commit('heads/master')
+    assert set(st.parents) == {h.id}
+    assert set(h.parents) == {m, pr.head}
+    commits = {c['sha'] for c in repo.log('heads/master')}
+    assert len(commits) == 112
+
 class TestRetry:
     @pytest.mark.xfail(reason="This may not be a good idea as it could lead to tons of rebuild spam")
     def test_auto_retry_push(self, env, repo):
diff --git a/runbot_merge/tests/test_multirepo.py b/runbot_merge/tests/test_multirepo.py
index dee975ee..21e42631 100644
--- a/runbot_merge/tests/test_multirepo.py
+++ b/runbot_merge/tests/test_multirepo.py
@@ -186,7 +186,7 @@ def test_merge_fail(env, project, repo_a, repo_b, users):
     assert failed.state == 'error'
     assert pr1b.comments == [
         (users['reviewer'], 'hansen r+'),
-        (users['user'], 'Unable to stage PR (merge conflict)'),
+        (users['user'], re_matches('^Unable to stage PR')),
     ]
     other = to_pr(env, pr1a)
     assert not other.staging_id