[IMP] forwardport: handling of missing magic refs

Github can fail to create the magic refs on PRs (`pull/refs/?/head`). Since forwardport relies on these refs to fetch PR content this is an issue when it occurs, as the forward ports fail in a loop. After discussion with Github support, it turns out Github enabled `allowReachableSHA1InWant` a while back, meaning it's possible to fetch content by commit (rather than ref) as long as the content is "in network". Use this property as fallback when checking if we can see the PR head before forward porting. Also: - remove explicit configuration of GC during fetch, it doesn't disable the autogc (yet?) but that's likely going to happen anyway - update logging and logger hierarchy during forward port to make things clearer and easier to extract, although based on PR id rather than number - rate limit failing forward ports to avoid running them on every cron (~ every minute), run them every ~30mn instead, this provides higher odds of recovery with less log garbage in case of transient github failure, and if the PR is stuck it limits the log pollution Fixes #658
2025-03-15 23:45:44 +07:00 · 2022-10-28 08:03:12 +02:00 · 2022-10-28 08:03:12 +02:00 · b45ecf08f9
commit b45ecf08f9
parent fb8f44dd01
4 changed files with 108 additions and 20 deletions
--- a/forwardport/models/forwardport.py
+++ b/forwardport/models/forwardport.py
@ -2,7 +2,7 @@
 import logging
 import uuid
 from contextlib import ExitStack
-from datetime import datetime
+from datetime import datetime, timedelta

 from dateutil import relativedelta

@ -30,7 +30,11 @@ class Queue:
            except Exception:
                _logger.exception("Error while processing %s, skipping", b)
                self.env.cr.rollback()
-            self.clear_caches()
+                b._on_failure()
+                self.env.cr.commit()
+
+    def _on_failure(self):
+        pass

    def _search_domain(self):
        return []
@ -47,6 +51,16 @@ class ForwardPortTasks(models.Model, Queue):
        ('fp', 'Forward Port Followup'),
        ('insert', 'New branch port')
    ], required=True)
+    retry_after = fields.Datetime(required=True, default='1900-01-01 01:01:01')
+
+    def _search_domain(self):
+        return super()._search_domain() + [
+            ('retry_after', '<=', fields.Datetime.to_string(fields.Datetime.now())),
+        ]
+
+    def _on_failure(self):
+        super()._on_failure()
+        self.retry_after = fields.Datetime.to_string(fields.Datetime.now() + timedelta(minutes=30))

    def _process_item(self):
        batch = self.batch_id
--- a/forwardport/models/project.py
+++ b/forwardport/models/project.py
@ -26,6 +26,7 @@ import re
 import subprocess
 import tempfile
 import typing
+from functools import partial

 import dateutil.relativedelta
 import requests
@ -805,21 +806,30 @@ This PR targets %s and is part of the forward-port chain. Further PRs will be cr
                 cherrypick and a list of all PR commit hashes
        :rtype: (None | (str, str, str, list[str]), Repo)
        """
-        source = self._get_local_directory()
+        logger = _logger.getChild(str(self.id))
        root = self._get_root()
-        # update all the branches & PRs
-        r = source.with_params('gc.pruneExpire=1.day.ago')\
-            .with_config(
-                stdout=subprocess.PIPE,
-                stderr=subprocess.STDOUT
-            )\
-            .fetch('-p', 'origin')
-        _logger.info("Updated %s:\n%s", source._directory, r.stdout.decode())
-        source.cat_file(e=root.head)
-        # create working copy
-        _logger.info(
-            "Create working copy to forward-port %s (really %s) to %s",
-            self.display_name, root.display_name, target_branch.name)
+        logger.info(
+            "Forward-porting %s (%s) to %s",
+            self.display_name, root.display_name, target_branch.name
+        )
+        source = self._get_local_directory()
+
+        check_commit = partial(source.check(False).cat_file, e=root.head)
+        r = source.with_config(stdout=subprocess.PIPE, stderr=subprocess.STDOUT)\
+            .fetch('origin')
+        logger.info("Updated %s:\n%s", source._directory, r.stdout.decode())
+        if check_commit().returncode:
+            # fallback: try to fetch the commit directly if the magic ref doesn't work?
+            r = source.with_config(stdout=subprocess.PIPE, stderr=subprocess.STDOUT) \
+                .fetch('origin', root.head)
+            logger.info("Updated %s:\n%s", source._directory, r.stdout.decode())
+            if check_commit().returncode:
+                raise ForwardPortError(
+                    f"During forward port of {self.display_name}, unable to find "
+                    f"expected head of {root.display_name} ({root.head})"
+                )
+
+        logger.info("Create working copy...")
        working_copy = source.clone(
            cleanup.enter_context(
                tempfile.TemporaryDirectory(
@ -840,7 +850,7 @@ This PR targets %s and is part of the forward-port chain. Further PRs will be cr
                p=project_id
            )
        )
-        _logger.info("Create FP branch %s", fp_branch_name)
+        logger.info("Create FP branch %s in %s", fp_branch_name, working_copy._directory)
        working_copy.checkout(b=fp_branch_name)

        try:
@ -921,7 +931,7 @@ stderr:
        :return: ``True`` if the cherrypick was successful, ``False`` otherwise
        """
        # <xxx>.cherrypick.<number>
-        logger = _logger.getChild('cherrypick').getChild(str(self.number))
+        logger = _logger.getChild(str(self.id)).getChild('cherrypick')

        # original head so we can reset
        prev = original_head = working_copy.stdout().rev_parse('HEAD').stdout.decode().strip()
@ -1208,6 +1218,9 @@ class GitCommand:
 class CherrypickError(Exception):
    ...

+class ForwardPortError(Exception):
+    pass
+
 def _clean_rename(s):
    """ Filters out the "inexact rename detection" spam of cherry-pick: it's
    useless but there seems to be no good way to silence these messages.
--- a/forwardport/tests/test_conflicts.py
+++ b/forwardport/tests/test_conflicts.py
@ -271,7 +271,7 @@ def test_multiple_commits_same_authorship(env, config, make_repo):

 def test_multiple_commits_different_authorship(env, config, make_repo, users, rolemap):
    """ When a PR has multiple commits by different authors, the resulting
-    (squashed) conflict commit should have
+    (squashed) conflict commit should have an empty email
    """
    author = {'name': 'George Pearce', 'email': 'gp@example.org'}
    committer = {'name': 'G. P. W. Meredith', 'email': 'gpwm@example.org'}
--- a/forwardport/tests/test_weird.py
+++ b/forwardport/tests/test_weird.py
@ -1,4 +1,6 @@
 # -*- coding: utf-8 -*-
+from datetime import datetime
+
 import pytest

 from utils import seen, Commit, to_pr
@ -812,3 +814,62 @@ def test_freeze(env, config, make_repo, users):
    assert not env['runbot_merge.pull_requests'].search([
        ('state', '!=', 'merged')
    ]), "the release PRs should not be forward-ported"
+
+def test_missing_magic_ref(env, config, make_repo):
+    """There are cases where github fails to create / publish or fails to update
+    the magic refs in refs/pull/*.
+
+    In that case, pulling from the regular remote does not bring in the contents
+    of the PR we're trying to forward port, and the forward porting process
+    fails.
+
+    Emulate this behaviour by updating the PR with a commit which lives in the
+    repo but has no ref.
+    """
+    _, prod, _ = make_basic(env, config, make_repo, fp_token=True, fp_remote=True)
+    a_head = prod.commit('refs/heads/a')
+    with prod:
+        [c] = prod.make_commits(a_head.id, Commit('x', tree={'x': '0'}), ref='heads/change')
+        pr = prod.make_pr(target='a', head='change')
+        prod.post_status(c, 'success', 'legal/cla')
+        prod.post_status(c, 'success', 'ci/runbot')
+        pr.post_comment('hansen r+', config['role_reviewer']['token'])
+    env.run_crons()
+
+    # create variant of pr head in fork, update PR with that commit as head so
+    # it's not found after a fetch, simulating an outdated or missing magic ref
+    pr_id = to_pr(env, pr)
+    assert pr_id.staging_id
+
+    pr_id.head = '0'*40
+    with prod:
+        prod.post_status('staging.a', 'success', 'legal/cla')
+        prod.post_status('staging.a', 'success', 'ci/runbot')
+    env.run_crons()
+
+    assert not pr_id.staging_id
+    assert pr_id.state == 'merged'
+
+    # check that the fw failed
+    assert not env['runbot_merge.pull_requests'].search([('source_id', '=', pr_id.id)]),\
+        "forward port should not have been created"
+    # check that the batch is still here and targeted for the future
+    req = env['forwardport.batches'].search([])
+    assert len(req) == 1
+    assert req.retry_after > datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')
+    # reset retry_after
+    req.retry_after = '1900-01-01 01:01:01'
+
+    # add a real commit
+    with prod:
+        [c2] = prod.make_commits(a_head.id, Commit('y', tree={'x': '0'}))
+    assert c2 != c
+    pr_id.head = c2
+    env.run_crons()
+
+    fp_id = env['runbot_merge.pull_requests'].search([('source_id', '=', pr_id.id)])
+    assert fp_id
+    # the cherrypick process fetches the commits on the PR in order to know
+    # what they are (rather than e.g. diff the HEAD it branch with the target)
+    # as a result it doesn't forwardport our fake, we'd have to reset the PR's
+    # branch for that to happen