[FIX] runbot_merge: handle missing patch commits

Commits can take some time to propagate through the network (I guess),
or human error can lead to the wrong commit being set.

Either way, because the entire thing was done using a single fetch in
`check=True` the cron job would fail entirely if any of the patch
commits was yet unavailable.

Update the updater to:

- fallback on individual fetches
- remove the patch from the set of applicable patch if we (still)
  can't find its commit

I'd have hoped `fetch` could retrieve whatever it found, but
apparently the server just crashes out when it doesn't find the commit
we ask for, and `fetch` doesn't update anything.

No linked issue because I apparently forgot to jot it down (and only
remembered about this issue with the #1063 patching issue) but this
was reported by mat last week (2025-02-21) when they were wondering
why one of their patches was taking a while:

- At 0832 patch was created by automated script.
- At 0947, an attempt to apply was made, the commit was not found.
- At 1126, a second attempt was made but an other patch had been
  created whose commit was not found, failing both.
- At 1255, there was a concurrency error ("cannot lock ref" on the
  target branch).
- Finally at 1427 the patch was applied.

All in all it took 6 hours to apply the patch, which is 3-4 staging
cycles.
This commit is contained in:
Xavier Morel 2025-02-25 14:00:45 +01:00
parent 5d08b79550
commit e6057afde7
2 changed files with 60 additions and 8 deletions

View File

@ -6,6 +6,7 @@ overhead, or FBI backdoors oh wait forget about that last one.
"""
from __future__ import annotations
import collections
import contextlib
import logging
import pathlib
@ -14,6 +15,7 @@ import subprocess
import tarfile
import tempfile
from dataclasses import dataclass
from datetime import timedelta
from email import message_from_string, policy
from email.utils import parseaddr
from typing import Union
@ -241,24 +243,42 @@ class Patch(models.Model):
def _apply_patches(self, target: Branch) -> bool:
patches = self.search([('target', '=', target.id)], order='id asc')
if not patches:
selected = len(patches)
if not selected:
return True
commits = {}
commits = collections.defaultdict(set)
repos = {}
for p in patches:
repos[p.repository] = git.get_local(p.repository).check(True)
commits.setdefault(p.repository, set())
if p.repository not in repos:
repos[p.repository] = git.get_local(p.repository).check(True)
if p.commit:
commits[p.repository].add(p.commit)
commits[p.repository].add(p)
for repo, ps in commits.items():
r = repos[repo].check(False)
remote = git.source_url(repo)
if r.fetch(
remote,
f"+refs/heads/{target.name}:refs/heads/{target.name}",
*(p.commit for p in ps),
no_tags=True
).returncode:
r.fetch(remote, f"+refs/heads/{target.name}:refs/heads/{target.name}", no_tags=True)
for p in ps:
if r.fetch(remote, p.commit, no_tags=True).returncode:
patches -= p
p.message_post(body=f"Unable to apply patch: commit {p.commit} not found.")
# if some of the commits are not available (yet) schedule a new staging
# in case this is a low traffic period (so there might not be staging
# triggers every other minute
if len(patches) < selected:
self.env.ref('runbot_merge.staging_cron')._trigger(fields.Datetime.now() + timedelta(minutes=30))
for patch in patches:
patch.active = False
r = repos[patch.repository]
remote = git.source_url(patch.repository)
if (cs := commits.pop(patch.repository, None)) is not None:
# first time encountering a repo, fetch the branch and commits
r.fetch(remote, f"+refs/heads/{target.name}:refs/heads/{target.name}", *cs, no_tags=True)
_logger.info(
"Applying %s to %r (in %s)",

View File

@ -193,6 +193,38 @@ CONFLICT (content): Merge conflict in b<br></p>\
(False, '', [('active', 1, 0)]),
]
def test_apply_not_found(env, project, repo, users):
""" Github can take some time to propagate commits through the network,
resulting in patches getting not found and killing the application.
Commits which are not found should just be skipped (and trigger a new
staging?).
"""
with repo:
[c] = repo.make_commits("x", Commit("c", tree={"b": "2"}), ref="heads/abranch")
repo.delete_ref('heads/abranch')
p1 = env['runbot_merge.patch'].create({
'target': project.branch_ids.id,
'repository': project.repo_ids.id,
'commit': c,
})
# simulate commit which hasn't propagated yet
p2 = env['runbot_merge.patch'].create({
'target': project.branch_ids.id,
'repository': project.repo_ids.id,
'commit': "0123456789012345678901234567890123456789",
})
env.run_crons()
assert not p1.active
assert p2.active
assert p2.message_ids.mapped('body')[::-1] == [
"<p>Unstaged direct-application patch created</p>",
"<p>Unable to apply patch: commit 0123456789012345678901234567890123456789 not found.</p>",
]
def test_apply_udiff(env, project, repo, users):
p = env['runbot_merge.patch'].create({
'target': project.branch_ids.id,