From e6057afde7dc1837b9cb2aa6ce2a4a12971bbb52 Mon Sep 17 00:00:00 2001
From: Xavier Morel <xmo@odoo.com>
Date: Tue, 25 Feb 2025 14:00:45 +0100
Subject: [PATCH] [FIX] runbot_merge: handle missing patch commits

Commits can take some time to propagate through the network (I guess),
or human error can lead to the wrong commit being set.

Either way, because the entire thing was done using a single fetch in
`check=True` the cron job would fail entirely if any of the patch
commits was yet unavailable.

Update the updater to:

- fallback on individual fetches
- remove the patch from the set of applicable patch if we (still)
  can't find its commit

I'd have hoped `fetch` could retrieve whatever it found, but
apparently the server just crashes out when it doesn't find the commit
we ask for, and `fetch` doesn't update anything.

No linked issue because I apparently forgot to jot it down (and only
remembered about this issue with the #1063 patching issue) but this
was reported by mat last week (2025-02-21) when they were wondering
why one of their patches was taking a while:

- At 0832 patch was created by automated script.
- At 0947, an attempt to apply was made, the commit was not found.
- At 1126, a second attempt was made but an other patch had been
  created whose commit was not found, failing both.
- At 1255, there was a concurrency error ("cannot lock ref" on the
  target branch).
- Finally at 1427 the patch was applied.

All in all it took 6 hours to apply the patch, which is 3-4 staging
cycles.
---
 runbot_merge/models/patcher.py      | 36 ++++++++++++++++++++++-------
 runbot_merge/tests/test_patching.py | 32 +++++++++++++++++++++++++
 2 files changed, 60 insertions(+), 8 deletions(-)

diff --git a/runbot_merge/models/patcher.py b/runbot_merge/models/patcher.py
index 889075a5..feeff440 100644
--- a/runbot_merge/models/patcher.py
+++ b/runbot_merge/models/patcher.py
@@ -6,6 +6,7 @@ overhead, or FBI backdoors oh wait forget about that last one.
 """
 from __future__ import annotations
 
+import collections
 import contextlib
 import logging
 import pathlib
@@ -14,6 +15,7 @@ import subprocess
 import tarfile
 import tempfile
 from dataclasses import dataclass
+from datetime import timedelta
 from email import message_from_string, policy
 from email.utils import parseaddr
 from typing import Union
@@ -241,24 +243,42 @@ class Patch(models.Model):
 
     def _apply_patches(self, target: Branch) -> bool:
         patches = self.search([('target', '=', target.id)], order='id asc')
-        if not patches:
+        selected = len(patches)
+        if not selected:
             return True
 
-        commits = {}
+        commits = collections.defaultdict(set)
         repos = {}
         for p in patches:
-            repos[p.repository] = git.get_local(p.repository).check(True)
-            commits.setdefault(p.repository, set())
+            if p.repository not in repos:
+                repos[p.repository] = git.get_local(p.repository).check(True)
             if p.commit:
-                commits[p.repository].add(p.commit)
+                commits[p.repository].add(p)
+
+        for repo, ps in commits.items():
+            r = repos[repo].check(False)
+            remote = git.source_url(repo)
+            if r.fetch(
+                remote,
+                f"+refs/heads/{target.name}:refs/heads/{target.name}",
+                *(p.commit for p in ps),
+                no_tags=True
+            ).returncode:
+                r.fetch(remote, f"+refs/heads/{target.name}:refs/heads/{target.name}", no_tags=True)
+                for p in ps:
+                    if r.fetch(remote, p.commit, no_tags=True).returncode:
+                        patches -= p
+                        p.message_post(body=f"Unable to apply patch: commit {p.commit} not found.")
+        # if some of the commits are not available (yet) schedule a new staging
+        # in case this is a low traffic period (so there might not be staging
+        # triggers every other minute
+        if len(patches) < selected:
+            self.env.ref('runbot_merge.staging_cron')._trigger(fields.Datetime.now() + timedelta(minutes=30))
 
         for patch in patches:
             patch.active = False
             r = repos[patch.repository]
             remote = git.source_url(patch.repository)
-            if (cs := commits.pop(patch.repository, None)) is not None:
-                # first time encountering a repo, fetch the branch and commits
-                r.fetch(remote, f"+refs/heads/{target.name}:refs/heads/{target.name}", *cs, no_tags=True)
 
             _logger.info(
                 "Applying %s to %r (in %s)",
diff --git a/runbot_merge/tests/test_patching.py b/runbot_merge/tests/test_patching.py
index 0c03bf27..6867f2cf 100644
--- a/runbot_merge/tests/test_patching.py
+++ b/runbot_merge/tests/test_patching.py
@@ -193,6 +193,38 @@ CONFLICT (content): Merge conflict in b<br></p>\
         (False, '', [('active', 1, 0)]),
     ]
 
+def test_apply_not_found(env, project, repo, users):
+    """ Github can take some time to propagate commits through the network,
+    resulting in patches getting not found and killing the application.
+
+    Commits which are not found should just be skipped (and trigger a new
+    staging?).
+    """
+    with repo:
+        [c] = repo.make_commits("x", Commit("c", tree={"b": "2"}), ref="heads/abranch")
+        repo.delete_ref('heads/abranch')
+
+    p1 = env['runbot_merge.patch'].create({
+        'target': project.branch_ids.id,
+        'repository': project.repo_ids.id,
+        'commit': c,
+    })
+    # simulate commit which hasn't propagated yet
+    p2 = env['runbot_merge.patch'].create({
+        'target': project.branch_ids.id,
+        'repository': project.repo_ids.id,
+        'commit': "0123456789012345678901234567890123456789",
+    })
+
+    env.run_crons()
+
+    assert not p1.active
+    assert p2.active
+    assert p2.message_ids.mapped('body')[::-1] == [
+        "<p>Unstaged direct-application patch created</p>",
+        "<p>Unable to apply patch: commit 0123456789012345678901234567890123456789 not found.</p>",
+    ]
+
 def test_apply_udiff(env, project, repo, users):
     p = env['runbot_merge.patch'].create({
         'target': project.branch_ids.id,