From 4b12d88b3e5b9edc6d1b829889c0ad37c902edd1 Mon Sep 17 00:00:00 2001
From: Xavier Morel <xmo@odoo.com>
Date: Mon, 9 Aug 2021 13:21:24 +0200
Subject: [PATCH] [IMP] runbot_merge: remove unnecessary uniquifier dummy
 commits

"Uniquifier" commits were introduced to ensure branches of a staging
on which nothing had been staged would still be rebuilt properly.

This means technically the branches on which something had been
staged never *needed* a uniquifier, strictly speaking. And those lead
to extra building, because once the actually staged PRs get pushed
from staging to their final destination it's an unknown commit to the
runbot, which needs to rebuild it instead of being able to just use
the staging it already has.

Thus only add the uniquifier where it *might* be necessary:
technically the runbot should not manage this use case much better,
however there are still issues like an ancillary build working with
the same branch tip (e.g. the "current master") and sending a failure
result which would fail the entire staging. The uniquifier guards
against this issue.

Also update rebase semantics to always update the *commit date* of the
rebased commits: this ensures the tip commit is always "recent" in the
case of a rebase-ff (which is common as that's what single-commit PRs
do), as the runbot may skip commits it considers "old".

Also update some of the utility methods around repos / commits to be
simpler, and avoid assuming the result is JSON-decodable (sometimes it
is not).

Also update the handling of commit statuses using postgres' ON
CONFLICT and jsonb support, hopefully this improves (or even fixes)
the serialization errors. Should be compatible with 9.5 onwards which
is *ancient* at this point.

Fixes #509
---
 conftest.py                          |  8 +--
 forwardport/tests/test_simple.py     |  7 ++-
 runbot_merge/controllers/__init__.py | 39 ++++++--------
 runbot_merge/github.py               |  6 ++-
 runbot_merge/models/pull_requests.py | 30 +++++++----
 runbot_merge/tests/test_basic.py     | 81 +++++++++++++++-------------
 runbot_merge/tests/test_multirepo.py | 27 ++++++----
 7 files changed, 108 insertions(+), 90 deletions(-)

diff --git a/conftest.py b/conftest.py
index 678deec3..ed8526d3 100644
--- a/conftest.py
+++ b/conftest.py
@@ -577,12 +577,12 @@ class Repo:
         if force and r.status_code == 422:
             self.update_ref(name, commit, force=force)
             return
-        assert 200 <= r.status_code < 300, r.json()
+        assert r.ok, r.text
 
     def update_ref(self, name, commit, force=False):
         assert self.hook
         r = self._session.patch('https://api.github.com/repos/{}/git/refs/{}'.format(self.name, name), json={'sha': commit, 'force': force})
-        assert 200 <= r.status_code < 300, r.json()
+        assert r.ok, r.text
 
     def protect(self, branch):
         assert self.hook
@@ -638,7 +638,7 @@ class Repo:
                 ],
                 'base_tree': tree
             })
-            assert 200 <= r.status_code < 300, r.json()
+            assert r.ok, r.text
             tree = r.json()['sha']
 
             data = {
@@ -652,7 +652,7 @@ class Repo:
                 data['committer'] = commit.committer
 
             r = self._session.post('https://api.github.com/repos/{}/git/commits'.format(self.name), json=data)
-            assert 200 <= r.status_code < 300, r.json()
+            assert r.ok, r.text
 
             hashes.append(r.json()['sha'])
             parents = [hashes[-1]]
diff --git a/forwardport/tests/test_simple.py b/forwardport/tests/test_simple.py
index 3377bfa4..818f176b 100644
--- a/forwardport/tests/test_simple.py
+++ b/forwardport/tests/test_simple.py
@@ -474,11 +474,10 @@ def test_access_rights(env, config, make_repo, users, author, reviewer, delegate
         assert pr1.staging_id and pr2.staging_id,\
             "%s should have approved FP of PRs by %s" % (reviewer, author)
         st = prod.commit('staging.b')
-        c = prod.commit(st.parents[0])
         # Should be signed-off by both original reviewer and forward port reviewer
-        original_signoff = signoff(config['role_user'], c.message)
-        forward_signoff = signoff(config['role_' + reviewer], c.message)
-        assert c.message.index(original_signoff) <= c.message.index(forward_signoff),\
+        original_signoff = signoff(config['role_user'], st.message)
+        forward_signoff = signoff(config['role_' + reviewer], st.message)
+        assert st.message.index(original_signoff) <= st.message.index(forward_signoff),\
             "Check that FP approver is after original PR approver as that's " \
             "the review path for the PR"
     else:
diff --git a/runbot_merge/controllers/__init__.py b/runbot_merge/controllers/__init__.py
index 70765449..c91afcb2 100644
--- a/runbot_merge/controllers/__init__.py
+++ b/runbot_merge/controllers/__init__.py
@@ -237,30 +237,23 @@ def handle_status(env, event):
         'status on %(sha)s %(context)s:%(state)s (%(target_url)s) [%(description)r]',
         event
     )
-    Commits = env['runbot_merge.commit']
-    env.cr.execute('SELECT id FROM runbot_merge_commit WHERE sha=%s FOR UPDATE', [event['sha']])
-    c = Commits.browse(env.cr.fetchone())
-    if c:
-        old = json.loads(c.statuses)
-        new = {
-            **old,
-            event['context']: {
-                'state': event['state'],
-                'target_url': event['target_url'],
-                'description': event['description']
-            }
+    status_value = json.dumps({
+        event['context']: {
+            'state': event['state'],
+            'target_url': event['target_url'],
+            'description': event['description']
         }
-        if new != old: # don't update the commit if nothing's changed (e.g dupe status)
-            c.statuses = json.dumps(new)
-    else:
-        Commits.create({
-            'sha': event['sha'],
-            'statuses': json.dumps({event['context']: {
-                'state': event['state'],
-                'target_url': event['target_url'],
-                'description': event['description']
-            }})
-        })
+    })
+    # create status, or merge update into commit *unless* the update is already
+    # part of the status (dupe status)
+    env.cr.execute("""
+        INSERT INTO runbot_merge_commit AS c (sha, to_check, statuses)
+        VALUES (%s, true, %s)
+        ON CONFLICT (sha) DO UPDATE
+            SET to_check = true,
+                statuses = c.statuses::jsonb || EXCLUDED.statuses::jsonb
+            WHERE NOT c.statuses::jsonb @> EXCLUDED.statuses::jsonb
+    """, [event['sha'], status_value])
 
     return 'ok'
 
diff --git a/runbot_merge/github.py b/runbot_merge/github.py
index dca0e50f..89a873e4 100644
--- a/runbot_merge/github.py
+++ b/runbot_merge/github.py
@@ -8,6 +8,7 @@ import pathlib
 import pprint
 import textwrap
 import unicodedata
+from datetime import datetime, timezone
 
 import requests
 import werkzeug.urls
@@ -312,7 +313,10 @@ class GH(object):
                 'tree': c['new_tree'],
                 'parents': [prev],
                 'author': c['commit']['author'],
-                'committer': c['commit']['committer'],
+                'committer': dict(
+                    c['commit']['committer'],
+                    date=datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
+                ),
             }, check={409: MergeError}).json()
             logger.debug('copied %s to %s (parent: %s)', c['sha'], copy['sha'], prev)
             prev = mapping[c['sha']] = copy['sha']
diff --git a/runbot_merge/models/pull_requests.py b/runbot_merge/models/pull_requests.py
index 84233d0c..19d0f2b0 100644
--- a/runbot_merge/models/pull_requests.py
+++ b/runbot_merge/models/pull_requests.py
@@ -518,10 +518,11 @@ class Branch(models.Model):
 
         Batch = self.env['runbot_merge.batch']
         staged = Batch
+        original_heads = {}
         meta = {repo: {} for repo in self.project_id.repo_ids.having_branch(self)}
         for repo, it in meta.items():
             gh = it['gh'] = repo.github()
-            it['head'] = gh.head(self.name)
+            it['head'] = original_heads[repo] = gh.head(self.name)
             # create tmp staging branch
             gh.set_ref('tmp.{}'.format(self.name), it['head'])
 
@@ -567,23 +568,30 @@ class Branch(models.Model):
                     for repo, h in heads.items()
                     if not repo.endswith('^')
                 )
-            dummy_head = it['gh']('post', 'git/commits', json={
-                'message': '''force rebuild
+            dummy_head = {'sha': it['head']}
+            if it['head'] == original_heads[repo]:
+                # if the repo has not been updated by the staging, create a
+                # dummy commit to force rebuild
+                dummy_head = it['gh']('post', 'git/commits', json={
+                    'message': '''force rebuild
 
 uniquifier: %s
 For-Commit-Id: %s
 %s''' % (r, it['head'], trailer),
-                'tree': tree['sha'],
-                'parents': [it['head']],
-            }).json()
+                    'tree': tree['sha'],
+                    'parents': [it['head']],
+                }).json()
 
-            # $repo is the head to check, $repo^ is the head to merge
+            # $repo is the head to check, $repo^ is the head to merge (they
+            # might be the same)
             heads[repo.name + '^'] = it['head']
             heads[repo.name] = dummy_head['sha']
-            self.env['runbot_merge.commit'].create({
-                'sha': dummy_head['sha'],
-                'statuses': '{}',
-            })
+            self.env.cr.execute(
+                "INSERT INTO runbot_merge_commit (sha, to_check, statuses) "
+                "VALUES (%s, true, '{}') "
+                "ON CONFLICT (sha) DO UPDATE SET to_check=true",
+                [dummy_head['sha']]
+            )
 
         # create actual staging object
         st = self.env['runbot_merge.stagings'].create({
diff --git a/runbot_merge/tests/test_basic.py b/runbot_merge/tests/test_basic.py
index 17b011ba..08730262 100644
--- a/runbot_merge/tests/test_basic.py
+++ b/runbot_merge/tests/test_basic.py
@@ -98,8 +98,6 @@ def test_trivial_flow(env, repo, page, users, config):
     assert s[1].get('class') == 'bg-success'
     assert s[1][0].text.strip() == '{}: ci/runbot'.format(repo.name)
 
-    assert re.match('^force rebuild', staging_head.message)
-
     assert st.state == 'success'
     assert pr_id.state == 'merged'
     assert pr_page(page, pr).cssselect('.alert-success')
@@ -846,18 +844,17 @@ def test_forward_port(env, repo, config):
         pr.post_comment('hansen r+ merge', config['role_reviewer']['token'])
     env.run_crons()
 
-    st = repo.commit('heads/staging.master')
-    assert st.message.startswith('force rebuild')
+    st = repo.commit('staging.master')
 
     with repo:
         repo.post_status(st.id, 'success', 'legal/cla')
         repo.post_status(st.id, 'success', 'ci/runbot')
     env.run_crons()
 
-    h = repo.commit('heads/master')
-    assert set(st.parents) == {h.id}
+    h = repo.commit('master')
+    assert st.id == h.id
     assert set(h.parents) == {m, pr.head}
-    commits = {c['sha'] for c in repo.log('heads/master')}
+    commits = {c['sha'] for c in repo.log('master')}
     assert len(commits) == 112
 
 @pytest.mark.skip("Needs to find a way to make set_ref fail on *second* call.")
@@ -1231,13 +1228,10 @@ class TestMergeMethod:
         staging = repo.commit('heads/staging.master')
         assert not repo.is_ancestor(prx.head, of=staging.id),\
             "the pr head should not be an ancestor of the staging branch in a squash merge"
-        assert re.match('^force rebuild', staging.message)
         assert repo.read_tree(staging) == {
             'm': 'c1', 'm2': 'm2',
         }, "the tree should still be correctly merged"
-        [actual_sha] = staging.parents
-        actual = repo.commit(actual_sha)
-        assert actual.parents == [m2],\
+        assert staging.parents == [m2],\
             "dummy commit aside, the previous master's tip should be the sole parent of the staging commit"
 
         with repo:
@@ -1251,8 +1245,8 @@ class TestMergeMethod:
         assert pr.state == 'merged'
         assert prx.state == 'closed'
         assert json.loads(pr.commits_map) == {
-            c1: actual_sha,
-            '': actual_sha,
+            c1: staging.id,
+            '': staging.id,
         }, "for a squash, the one PR commit should be mapped to the one rebased commit"
 
     def test_pr_update_to_many_commits(self, repo, env):
@@ -1443,8 +1437,7 @@ class TestMergeMethod:
             f'title\n\nbody\n\ncloses {pr_id.display_name}\n\nSigned-off-by: {reviewer}',
             frozenset([nm2, nb1])
         )
-        expected = (re_matches('^force rebuild'), frozenset([merge_head]))
-        assert staging == expected
+        assert staging == merge_head
 
         with repo:
             repo.post_status('heads/staging.master', 'success', 'legal/cla')
@@ -1497,13 +1490,20 @@ class TestMergeMethod:
              +------+                   +--^---+
         """
         with repo:
-            m0 = repo.make_commit(None, 'M0', None, tree={'m': '0'})
-            m1 = repo.make_commit(m0, 'M1', None, tree={'m': '1'})
-            m2 = repo.make_commit(m1, 'M2', None, tree={'m': '2'})
-            repo.make_ref('heads/master', m2)
+            _, m1, m2 = repo.make_commits(
+                None,
+                Commit('M0', tree={'m': '0'}),
+                Commit('M1', tree={'m': '1'}),
+                Commit('M2', tree={'m': '2'}),
+                ref='heads/master'
+            )
+
+            b0, b1 = repo.make_commits(
+                m1,
+                Commit('B0', tree={'b': '0'}, author={'name': 'Maarten Tromp', 'email': 'm.tromp@example.nl', 'date': '1651-03-30T12:00:00Z'}),
+                Commit('B1', tree={'b': '1'}, author={'name': 'Rein Huydecoper', 'email': 'r.huydecoper@example.nl', 'date': '1986-04-17T12:00:00Z'}),
+            )
 
-            b0 = repo.make_commit(m1, 'B0', None, tree={'m': '1', 'b': '0'})
-            b1 = repo.make_commit(b0, 'B1', None, tree={'m': '1', 'b': '1'})
             prx = repo.make_pr(title='title', body='body', target='master', head=b1)
             repo.post_status(prx.head, 'success', 'legal/cla')
             repo.post_status(prx.head, 'success', 'ci/runbot')
@@ -1518,8 +1518,7 @@ class TestMergeMethod:
         reviewer = get_partner(env, users["reviewer"]).formatted_email
         nb1 = node(f'B1\n\ncloses {pr_id.display_name}\n\nSigned-off-by: {reviewer}',
                    node(part_of('B0', pr_id), nm2))
-        expected = node(re_matches('^force rebuild'), nb1)
-        assert staging == expected
+        assert staging == nb1
 
         with repo:
             repo.post_status('heads/staging.master', 'success', 'legal/cla')
@@ -1547,6 +1546,23 @@ class TestMergeMethod:
             b0: m0.id, # first PR's commit
         }
         assert m0.parents == [m2], "can't hurt to check the parent of our root commit"
+        assert m0.author['date'] != m0.committer['date'], "commit date should have been rewritten"
+        assert m1.author['date'] != m1.committer['date'], "commit date should have been rewritten"
+
+        utcday = datetime.datetime.utcnow().date()
+        def parse(dt):
+            return datetime.datetime.strptime(dt, "%Y-%m-%dT%H:%M:%SZ")
+
+        # FIXME: actual commit creation could run before the date rollover and
+        #        local datetime.utcnow() after
+        assert parse(m0.committer['date']).date() == utcday
+        # FIXME: git date storage is unreliable and non-portable outside of an
+        #        unsigned 31b epoch range so the m0 event may get flung in the
+        #        future (compared to the literal datum), this test unexpectedly
+        #        becoming true if run on the exact wrong day
+        assert parse(m0.author['date']).date() != utcday
+        assert parse(m1.committer['date']).date() == utcday
+        assert parse(m0.author['date']).date() != utcday
 
     @pytest.mark.skip(reason="what do if the PR contains merge commits???")
     def test_pr_contains_merges(self, repo, env):
@@ -2397,11 +2413,7 @@ class TestBatching(object):
             p1,
             node(part_of('commit_PR2_01', pr2), node(part_of('commit_PR2_00', pr2), p1))
         )
-        expected = (re_matches('^force rebuild'), frozenset([p2]))
-        import pprint
-        pprint.pprint(staging)
-        pprint.pprint(expected)
-        assert staging == expected
+        assert staging == p2
 
     def test_staging_batch_norebase(self, env, repo, users, config):
         """ If multiple PRs are ready for the same target at the same point,
@@ -2426,7 +2438,7 @@ class TestBatching(object):
         assert pr2.staging_id
         assert pr1.staging_id == pr2.staging_id
 
-        log = list(repo.log('heads/staging.master'))
+        log = list(repo.log('staging.master'))
 
         staging = log_to_node(log)
         reviewer = get_partner(env, users["reviewer"]).formatted_email
@@ -2441,8 +2453,7 @@ class TestBatching(object):
             p1,
             node('commit_PR2_01', node('commit_PR2_00', node('initial')))
         )
-        expected = (re_matches('^force rebuild'), frozenset([p2]))
-        assert staging == expected
+        assert staging == p2
 
     def test_staging_batch_squash(self, env, repo, users, config):
         """ If multiple PRs are ready for the same target at the same point,
@@ -2467,11 +2478,9 @@ class TestBatching(object):
 
         staging = log_to_node(log)
         reviewer = get_partner(env, users["reviewer"]).formatted_email
-        expected = node(
-            re_matches('^force rebuild'),
-            node('commit_PR2_00\n\ncloses {}#{}\n\nSigned-off-by: {}'.format(repo.name, pr2.number, reviewer),
-                 node('commit_PR1_00\n\ncloses {}#{}\n\nSigned-off-by: {}'.format(repo.name, pr1.number, reviewer),
-                      node('initial'))))
+        expected = node('commit_PR2_00\n\ncloses {}#{}\n\nSigned-off-by: {}'.format(repo.name, pr2.number, reviewer),
+             node('commit_PR1_00\n\ncloses {}#{}\n\nSigned-off-by: {}'.format(repo.name, pr1.number, reviewer),
+                  node('initial')))
         assert staging == expected
 
     def test_batching_pressing(self, env, repo, config):
diff --git a/runbot_merge/tests/test_multirepo.py b/runbot_merge/tests/test_multirepo.py
index 99d3f48b..ebc90c61 100644
--- a/runbot_merge/tests/test_multirepo.py
+++ b/runbot_merge/tests/test_multirepo.py
@@ -108,10 +108,15 @@ def test_stage_one(env, project, repo_a, repo_b, config):
         )
     env.run_crons()
 
-    assert to_pr(env, pr_a).state == 'ready'
-    assert to_pr(env, pr_a).staging_id
-    assert to_pr(env, pr_b).state == 'ready'
-    assert not to_pr(env, pr_b).staging_id
+    pra_id = to_pr(env, pr_a)
+    assert pra_id.state == 'ready'
+    assert pra_id.staging_id
+    assert repo_a.commit('staging.master').message.startswith('commit_A_00')
+    assert repo_b.commit('staging.master').message.startswith('force rebuild')
+
+    prb_id = to_pr(env, pr_b)
+    assert prb_id.state == 'ready'
+    assert not prb_id.staging_id
 
 get_related_pr_labels = XPath('.//*[normalize-space(text()) = "Linked pull requests"]//a/text()')
 def test_stage_match(env, project, repo_a, repo_b, config, page):
@@ -364,15 +369,16 @@ def test_sub_match(env, project, repo_a, repo_b, repo_c, config):
         "branch-matched PRs should be part of the same staging"
 
     st = pr_b.staging_id
-    b_staging = repo_b.commit('heads/staging.master')
-    c_staging = repo_c.commit('heads/staging.master')
+    a_staging = repo_a.commit('staging.master')
+    b_staging = repo_b.commit('staging.master')
+    c_staging = repo_c.commit('staging.master')
     assert json.loads(st.heads) == {
-        repo_a.name: repo_a.commit('heads/staging.master').id,
-        repo_a.name + '^': repo_a.commit('heads/master').id,
+        repo_a.name: a_staging.id,
+        repo_a.name + '^': a_staging.parents[0],
         repo_b.name: b_staging.id,
-        repo_b.name + '^': b_staging.parents[0],
+        repo_b.name + '^': b_staging.id,
         repo_c.name: c_staging.id,
-        repo_c.name + '^': c_staging.parents[0],
+        repo_c.name + '^': c_staging.id,
     }
 
 def test_merge_fail(env, project, repo_a, repo_b, users, config):
@@ -432,7 +438,6 @@ def test_merge_fail(env, project, repo_a, repo_b, users, config):
         c['commit']['message']
         for c in repo_a.log('heads/staging.master')
     ] == [
-        re_matches('^force rebuild'),
         """commit_do-b-thing_00
 
 closes %s