From e6057afde7dc1837b9cb2aa6ce2a4a12971bbb52 Mon Sep 17 00:00:00 2001 From: Xavier Morel Date: Tue, 25 Feb 2025 14:00:45 +0100 Subject: [PATCH] [FIX] runbot_merge: handle missing patch commits Commits can take some time to propagate through the network (I guess), or human error can lead to the wrong commit being set. Either way, because the entire thing was done using a single fetch in `check=True` the cron job would fail entirely if any of the patch commits was yet unavailable. Update the updater to: - fallback on individual fetches - remove the patch from the set of applicable patch if we (still) can't find its commit I'd have hoped `fetch` could retrieve whatever it found, but apparently the server just crashes out when it doesn't find the commit we ask for, and `fetch` doesn't update anything. No linked issue because I apparently forgot to jot it down (and only remembered about this issue with the #1063 patching issue) but this was reported by mat last week (2025-02-21) when they were wondering why one of their patches was taking a while: - At 0832 patch was created by automated script. - At 0947, an attempt to apply was made, the commit was not found. - At 1126, a second attempt was made but an other patch had been created whose commit was not found, failing both. - At 1255, there was a concurrency error ("cannot lock ref" on the target branch). - Finally at 1427 the patch was applied. All in all it took 6 hours to apply the patch, which is 3-4 staging cycles. --- runbot_merge/models/patcher.py | 36 ++++++++++++++++++++++------- runbot_merge/tests/test_patching.py | 32 +++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 8 deletions(-) diff --git a/runbot_merge/models/patcher.py b/runbot_merge/models/patcher.py index 889075a5..feeff440 100644 --- a/runbot_merge/models/patcher.py +++ b/runbot_merge/models/patcher.py @@ -6,6 +6,7 @@ overhead, or FBI backdoors oh wait forget about that last one. """ from __future__ import annotations +import collections import contextlib import logging import pathlib @@ -14,6 +15,7 @@ import subprocess import tarfile import tempfile from dataclasses import dataclass +from datetime import timedelta from email import message_from_string, policy from email.utils import parseaddr from typing import Union @@ -241,24 +243,42 @@ class Patch(models.Model): def _apply_patches(self, target: Branch) -> bool: patches = self.search([('target', '=', target.id)], order='id asc') - if not patches: + selected = len(patches) + if not selected: return True - commits = {} + commits = collections.defaultdict(set) repos = {} for p in patches: - repos[p.repository] = git.get_local(p.repository).check(True) - commits.setdefault(p.repository, set()) + if p.repository not in repos: + repos[p.repository] = git.get_local(p.repository).check(True) if p.commit: - commits[p.repository].add(p.commit) + commits[p.repository].add(p) + + for repo, ps in commits.items(): + r = repos[repo].check(False) + remote = git.source_url(repo) + if r.fetch( + remote, + f"+refs/heads/{target.name}:refs/heads/{target.name}", + *(p.commit for p in ps), + no_tags=True + ).returncode: + r.fetch(remote, f"+refs/heads/{target.name}:refs/heads/{target.name}", no_tags=True) + for p in ps: + if r.fetch(remote, p.commit, no_tags=True).returncode: + patches -= p + p.message_post(body=f"Unable to apply patch: commit {p.commit} not found.") + # if some of the commits are not available (yet) schedule a new staging + # in case this is a low traffic period (so there might not be staging + # triggers every other minute + if len(patches) < selected: + self.env.ref('runbot_merge.staging_cron')._trigger(fields.Datetime.now() + timedelta(minutes=30)) for patch in patches: patch.active = False r = repos[patch.repository] remote = git.source_url(patch.repository) - if (cs := commits.pop(patch.repository, None)) is not None: - # first time encountering a repo, fetch the branch and commits - r.fetch(remote, f"+refs/heads/{target.name}:refs/heads/{target.name}", *cs, no_tags=True) _logger.info( "Applying %s to %r (in %s)", diff --git a/runbot_merge/tests/test_patching.py b/runbot_merge/tests/test_patching.py index 0c03bf27..6867f2cf 100644 --- a/runbot_merge/tests/test_patching.py +++ b/runbot_merge/tests/test_patching.py @@ -193,6 +193,38 @@ CONFLICT (content): Merge conflict in b

\ (False, '', [('active', 1, 0)]), ] +def test_apply_not_found(env, project, repo, users): + """ Github can take some time to propagate commits through the network, + resulting in patches getting not found and killing the application. + + Commits which are not found should just be skipped (and trigger a new + staging?). + """ + with repo: + [c] = repo.make_commits("x", Commit("c", tree={"b": "2"}), ref="heads/abranch") + repo.delete_ref('heads/abranch') + + p1 = env['runbot_merge.patch'].create({ + 'target': project.branch_ids.id, + 'repository': project.repo_ids.id, + 'commit': c, + }) + # simulate commit which hasn't propagated yet + p2 = env['runbot_merge.patch'].create({ + 'target': project.branch_ids.id, + 'repository': project.repo_ids.id, + 'commit': "0123456789012345678901234567890123456789", + }) + + env.run_crons() + + assert not p1.active + assert p2.active + assert p2.message_ids.mapped('body')[::-1] == [ + "

Unstaged direct-application patch created

", + "

Unable to apply patch: commit 0123456789012345678901234567890123456789 not found.

", + ] + def test_apply_udiff(env, project, repo, users): p = env['runbot_merge.patch'].create({ 'target': project.branch_ids.id,