From 86a1b5523e91f1c2a1e08d781f73b8906fefaa38 Mon Sep 17 00:00:00 2001
From: Xavier Morel <xmo@odoo.com>
Date: Wed, 16 Aug 2023 15:31:42 +0200
Subject: [PATCH] [MOV] runbot_merge: all the staging creation code to a
 separate module

Move *almost* all the staging code to free functions, in a separate
module, and extensively typed.

The only bits which didn't move are:

- the entry point (the cron hook), because it has to be a model method
  in order to be called
- the `_build_merge_message` method, because it needs to be
  overridable

There's also a bit of an import mess, because the cron &
`_build_merge_message` need to call into the new module, but the new
module wants the types they belong to, so it's a bit circular.
---
 forwardport/models/project.py          |   3 +-
 runbot_merge/github.py                 |  45 +-
 runbot_merge/models/__init__.py        |   1 +
 runbot_merge/models/project.py         |   7 +-
 runbot_merge/models/pull_requests.py   | 676 +----------------------
 runbot_merge/models/stagings_create.py | 727 +++++++++++++++++++++++++
 runbot_merge/tests/test_multirepo.py   |   8 -
 7 files changed, 791 insertions(+), 676 deletions(-)
 create mode 100644 runbot_merge/models/stagings_create.py

diff --git a/forwardport/models/project.py b/forwardport/models/project.py
index 573d0086..b71a08f5 100644
--- a/forwardport/models/project.py
+++ b/forwardport/models/project.py
@@ -37,6 +37,7 @@ from odoo.tools.sql import reverse_order
 from odoo.tools.appdirs import user_cache_dir
 from odoo.addons.runbot_merge import git, utils
 from odoo.addons.runbot_merge.models.pull_requests import RPLUS
+from odoo.addons.runbot_merge.models.stagings_create import Message
 
 footer = '\nMore info at https://github.com/odoo/odoo/wiki/Mergebot#forward-port\n'
 
@@ -1064,7 +1065,7 @@ stderr:
 
     def _make_fp_message(self, commit):
         cmap = json.loads(self.commits_map)
-        msg = self._parse_commit_message(commit['commit']['message'])
+        msg = Message.from_message(commit['commit']['message'])
         # write the *merged* commit as "original", not the PR's
         msg.headers['x-original-commit'] = cmap.get(commit['sha'], commit['sha'])
         # don't stringify so caller can still perform alterations
diff --git a/runbot_merge/github.py b/runbot_merge/github.py
index ed3950b0..3fdec209 100644
--- a/runbot_merge/github.py
+++ b/runbot_merge/github.py
@@ -8,6 +8,7 @@ import pathlib
 import pprint
 import time
 import unicodedata
+from typing import Iterable, List, TypedDict, Literal
 
 import requests
 import werkzeug.urls
@@ -47,6 +48,42 @@ def _init_gh_logger():
 if odoo.netsvc._logger_init:
     _init_gh_logger()
 
+SimpleUser = TypedDict('SimpleUser', {
+    'login': str,
+    'url': str,
+    'type': Literal['User', 'Organization'],
+})
+Authorship = TypedDict('Authorship', {
+    'name': str,
+    'email': str,
+})
+Commit = TypedDict('Commit', {
+    'tree': str,
+    'url': str,
+    'message': str,
+    # optional when creating a commit
+    'author': Authorship,
+    'committer': Authorship,
+    'comments_count': int,
+})
+CommitLink = TypedDict('CommitLink', {
+    'html_url': str,
+    'sha': str,
+    'url': str,
+})
+PrCommit = TypedDict('PrCommit', {
+    'url': str,
+    'sha': str,
+    'commit': Commit,
+    # optional when creating a commit (in which case it uses the current user)
+    'author': SimpleUser,
+    'committer': SimpleUser,
+    'parents': List[CommitLink],
+    # not actually true but we're smuggling stuff via that key
+    'new_tree': str,
+})
+
+
 GH_LOG_PATTERN = """=> {method} {path}{qs}{body}
 
 <= {r.status_code} {r.reason}
@@ -137,7 +174,7 @@ class GH(object):
         r.raise_for_status()
         return r.json()
 
-    def head(self, branch):
+    def head(self, branch: str) -> str:
         d = utils.backoff(
             lambda: self('get', 'git/refs/heads/{}'.format(branch)).json(),
             exc=requests.HTTPError
@@ -383,14 +420,14 @@ class GH(object):
             if not r.links.get('next'):
                 return
 
-    def commits_lazy(self, pr):
+    def commits_lazy(self, pr: int) -> Iterable[PrCommit]:
         for page in itertools.count(1):
-            r = self('get', 'pulls/{}/commits'.format(pr), params={'page': page})
+            r = self('get', f'pulls/{pr}/commits', params={'page': page})
             yield from r.json()
             if not r.links.get('next'):
                 return
 
-    def commits(self, pr):
+    def commits(self, pr: int) -> List[PrCommit]:
         """ Returns a PR's commits oldest first (that's what GH does &
         is what we want)
         """
diff --git a/runbot_merge/models/__init__.py b/runbot_merge/models/__init__.py
index 9ca405e4..6cbd92cf 100644
--- a/runbot_merge/models/__init__.py
+++ b/runbot_merge/models/__init__.py
@@ -3,5 +3,6 @@ from . import res_partner
 from . import project
 from . import pull_requests
 from . import project_freeze
+from . import stagings_create
 from . import staging_cancel
 from . import crons
diff --git a/runbot_merge/models/project.py b/runbot_merge/models/project.py
index 8578aeb5..67222ea8 100644
--- a/runbot_merge/models/project.py
+++ b/runbot_merge/models/project.py
@@ -32,8 +32,9 @@ class Project(models.Model):
         required=True,
         default="hanson", # mergebot du bot du bot du~
         help="Prefix (~bot name) used when sending commands from PR "
-             "comments e.g. [hanson retry] or [hanson r+ p=1]"
+             "comments e.g. [hanson retry] or [hanson r+ p=1]",
     )
+    github_name = fields.Char(related='github_prefix')
 
     batch_limit = fields.Integer(
         default=8, group_operator=None, help="Maximum number of PRs staged together")
@@ -64,6 +65,8 @@ class Project(models.Model):
                     self.env.cr.commit()
 
     def _create_stagings(self, commit=False):
+        from .stagings_create import try_staging
+
         # look up branches which can be staged on and have no active staging
         for branch in self.env['runbot_merge.branch'].search([
             ('active_staging_id', '=', False),
@@ -74,7 +77,7 @@ class Project(models.Model):
                 with self.env.cr.savepoint(), \
                     sentry_sdk.start_span(description=f'create staging {branch.name}') as span:
                     span.set_tag('branch', branch.name)
-                    branch.try_staging()
+                    try_staging(branch)
             except Exception:
                 _logger.exception("Failed to create staging for branch %r", branch.name)
             else:
diff --git a/runbot_merge/models/pull_requests.py b/runbot_merge/models/pull_requests.py
index 0005704b..a392b880 100644
--- a/runbot_merge/models/pull_requests.py
+++ b/runbot_merge/models/pull_requests.py
@@ -1,37 +1,24 @@
-# coding: utf-8
-
 import ast
-import base64
 import collections
 import contextlib
 import datetime
-import io
 import itertools
 import json
 import logging
-import os
 import pprint
 import re
 import time
+from typing import Optional, Union
 
-from difflib import Differ
-from itertools import takewhile
-from typing import Optional
-
-import requests
 import sentry_sdk
 import werkzeug
-from werkzeug.datastructures import Headers
 
 from odoo import api, fields, models, tools
 from odoo.exceptions import ValidationError
 from odoo.osv import expression
-from odoo.tools import OrderedSet
 
 from .. import github, exceptions, controllers, utils
 
-WAIT_FOR_VISIBILITY = [10, 10, 10, 10]
-
 _logger = logging.getLogger(__name__)
 
 
@@ -67,6 +54,8 @@ class Repository(models.Model):
     _name = _description = 'runbot_merge.repository'
     _order = 'sequence, id'
 
+    id: int
+
     sequence = fields.Integer(default=50, group_operator=None)
     name = fields.Char(required=True)
     project_id = fields.Many2one('runbot_merge.project', required=True, index=True)
@@ -98,7 +87,7 @@ All substitutions are tentatively applied sequentially to the input.
             vals['status_ids'] = [(5, 0, {})] + [(0, 0, {'context': c}) for c in st.split(',')]
         return super().write(vals)
 
-    def github(self, token_field='github_token'):
+    def github(self, token_field='github_token') -> github.GH:
         return github.GH(self.project_id[token_field], self.name)
 
     def _auto_init(self):
@@ -245,6 +234,8 @@ class Branch(models.Model):
     _name = _description = 'runbot_merge.branch'
     _order = 'sequence, name'
 
+    id: int
+
     name = fields.Char(required=True)
     project_id = fields.Many2one('runbot_merge.project', required=True, index=True)
 
@@ -298,235 +289,6 @@ class Branch(models.Model):
         for b in self:
             b.active_staging_id = b.with_context(active_test=True).staging_ids
 
-    def _ready(self):
-        self.env.cr.execute("""
-        SELECT
-          min(pr.priority) as priority,
-          array_agg(pr.id) AS match
-        FROM runbot_merge_pull_requests pr
-        WHERE pr.target = any(%s)
-          -- exclude terminal states (so there's no issue when
-          -- deleting branches & reusing labels)
-          AND pr.state != 'merged'
-          AND pr.state != 'closed'
-        GROUP BY
-            pr.target,
-            CASE
-                WHEN pr.label SIMILAR TO '%%:patch-[[:digit:]]+'
-                    THEN pr.id::text
-                ELSE pr.label
-            END
-        HAVING
-            bool_or(pr.state = 'ready') or bool_or(pr.priority = 0)
-        ORDER BY min(pr.priority), min(pr.id)
-        """, [self.ids])
-        browse = self.env['runbot_merge.pull_requests'].browse
-        return [(p, browse(ids)) for p, ids in self.env.cr.fetchall()]
-
-    def _stageable(self):
-        return [
-            (p, prs)
-            for p, prs in self._ready()
-            if not any(prs.mapped('blocked'))
-        ]
-
-    def try_staging(self):
-        """ Tries to create a staging if the current branch does not already
-        have one. Returns None if the branch already has a staging or there
-        is nothing to stage, the newly created staging otherwise.
-        """
-        logger = _logger.getChild('cron')
-
-        logger.info(
-            "Checking %s (%s) for staging: %s, skip? %s",
-            self, self.name,
-            self.active_staging_id,
-            bool(self.active_staging_id)
-        )
-        if self.active_staging_id:
-            return
-
-        rows = self._stageable()
-        priority = rows[0][0] if rows else -1
-        if priority == 0 or priority == 1:
-            # p=0 take precedence over all else
-            # p=1 allows merging a fix inside / ahead of a split (e.g. branch
-            # is broken or widespread false positive) without having to cancel
-            # the existing staging
-            batched_prs = [pr_ids for _, pr_ids in takewhile(lambda r: r[0] == priority, rows)]
-        elif self.split_ids:
-            split_ids = self.split_ids[0]
-            logger.info("Found split of PRs %s, re-staging", split_ids.mapped('batch_ids.prs'))
-            batched_prs = [batch.prs for batch in split_ids.batch_ids]
-            split_ids.unlink()
-        else: # p=2
-            batched_prs = [pr_ids for _, pr_ids in takewhile(lambda r: r[0] == priority, rows)]
-
-        if not batched_prs:
-            return
-
-        Batch = self.env['runbot_merge.batch']
-        staged = Batch
-        original_heads = {}
-        meta = {repo: {} for repo in self.project_id.repo_ids.having_branch(self)}
-        for repo, it in meta.items():
-            gh = it['gh'] = repo.github()
-            it['head'] = original_heads[repo] = gh.head(self.name)
-            # create tmp staging branch
-            gh.set_ref('tmp.{}'.format(self.name), it['head'])
-
-        batch_limit = self.project_id.batch_limit
-        first = True
-        for batch in batched_prs:
-            if len(staged) >= batch_limit:
-                break
-            try:
-                staged |= Batch.stage(meta, batch)
-            except exceptions.MergeError as e:
-                pr = e.args[0]
-                _logger.exception("Failed to merge %s into staging branch", pr.display_name)
-                if first or isinstance(e, exceptions.Unmergeable):
-                    if len(e.args) > 1 and e.args[1]:
-                        reason = e.args[1]
-                    else:
-                        reason = e.__cause__ or e.__context__
-                    # if the reason is a json document, assume it's a github
-                    # error and try to extract the error message to give it to
-                    # the user
-                    with contextlib.suppress(Exception):
-                        reason = json.loads(str(reason))['message'].lower()
-
-                    pr.state = 'error'
-                    self.env.ref('runbot_merge.pr.merge.failed')._send(
-                        repository=pr.repository,
-                        pull_request=pr.number,
-                        format_args= {'pr': pr, 'reason': reason, 'exc': e},
-                    )
-            else:
-                first = False
-
-        if not staged:
-            return
-
-        heads = []
-        heads_map = {}
-        commits = []
-        for repo, it in meta.items():
-            tree = it['gh'].commit(it['head'])['tree']
-            # ensures staging branches are unique and always
-            # rebuilt
-            r = base64.b64encode(os.urandom(12)).decode('ascii')
-            trailer = ''
-            if heads_map:
-                trailer = '\n'.join(
-                    'Runbot-dependency: %s:%s' % (repo, h)
-                    for repo, h in heads_map.items()
-                )
-            dummy_head = {'sha': it['head']}
-            if it['head'] == original_heads[repo]:
-                # if the repo has not been updated by the staging, create a
-                # dummy commit to force rebuild
-                dummy_head = it['gh']('post', 'git/commits', json={
-                    'message': '''force rebuild
-
-uniquifier: %s
-For-Commit-Id: %s
-%s''' % (r, it['head'], trailer),
-                    'tree': tree['sha'],
-                    'parents': [it['head']],
-                }).json()
-
-            # special case if the two commits are identical because otherwise
-            # postgres raises error "ensure that no rows proposed for insertion
-            # within the same command have duplicate constained values"
-            if it['head'] == dummy_head['sha']:
-                self.env.cr.execute(
-                    "INSERT INTO runbot_merge_commit (sha, to_check, statuses) "
-                    "VALUES (%s, true, '{}') "
-                    "ON CONFLICT (sha) DO UPDATE SET to_check=true "
-                    "RETURNING id",
-                    [it['head']]
-                )
-                [commit] = [head] = self.env.cr.fetchone()
-            else:
-                self.env.cr.execute(
-                    "INSERT INTO runbot_merge_commit (sha, to_check, statuses) "
-                    "VALUES (%s, false, '{}'), (%s, true, '{}') "
-                    "ON CONFLICT (sha) DO UPDATE SET to_check=true "
-                    "RETURNING id",
-                    [it['head'], dummy_head['sha']]
-                )
-                ([commit], [head]) = self.env.cr.fetchall()
-
-            heads_map[repo.name] = dummy_head['sha']
-            heads.append(fields.Command.create({
-                'repository_id': repo.id,
-                'commit_id': head,
-            }))
-            commits.append(fields.Command.create({
-                'repository_id': repo.id,
-                'commit_id': commit,
-            }))
-
-        # create actual staging object
-        st = self.env['runbot_merge.stagings'].create({
-            'target': self.id,
-            'batch_ids': [(4, batch.id, 0) for batch in staged],
-            'heads': heads,
-            'commits': commits,
-        })
-        # create staging branch from tmp
-        token = self.project_id.github_token
-        for r in self.project_id.repo_ids.having_branch(self):
-            it = meta[r]
-            staging_head = heads_map[r.name]
-            _logger.info(
-                "%s: create staging for %s:%s at %s",
-                self.project_id.name, r.name, self.name,
-                staging_head
-            )
-            refname = 'staging.{}'.format(self.name)
-            it['gh'].set_ref(refname, staging_head)
-
-            i = itertools.count()
-            @utils.backoff(delays=WAIT_FOR_VISIBILITY, exc=TimeoutError)
-            def wait_for_visibility():
-                if self._check_visibility(r, refname, staging_head, token):
-                    _logger.info(
-                        "[repo] updated %s:%s to %s: ok (at %d/%d)",
-                        r.name, refname, staging_head,
-                        next(i), len(WAIT_FOR_VISIBILITY)
-                    )
-                    return
-                _logger.warning(
-                    "[repo] updated %s:%s to %s: failed (at %d/%d)",
-                    r.name, refname, staging_head,
-                    next(i), len(WAIT_FOR_VISIBILITY)
-                )
-                raise TimeoutError("Staged head not updated after %d seconds" % sum(WAIT_FOR_VISIBILITY))
-
-        logger.info("Created staging %s (%s) to %s", st, ', '.join(
-            '%s[%s]' % (batch, batch.prs)
-            for batch in staged
-        ), st.target.name)
-        return st
-
-    def _check_visibility(self, repo, branch_name, expected_head, token):
-        """ Checks the repository actual to see if the new / expected head is
-        now visible
-        """
-        # v1 protocol provides URL for ref discovery: https://github.com/git/git/blob/6e0cc6776106079ed4efa0cc9abace4107657abf/Documentation/technical/http-protocol.txt#L187
-        # for more complete client this is also the capabilities discovery and
-        # the "entry point" for the service
-        url = 'https://github.com/{}.git/info/refs?service=git-upload-pack'.format(repo.name)
-        with requests.get(url, stream=True, auth=(token, '')) as resp:
-            if not resp.ok:
-                return False
-            for head, ref in parse_refs_smart(resp.raw.read):
-                if ref != ('refs/heads/' + branch_name):
-                    continue
-                return head == expected_head
-            return False
 
 ACL = collections.namedtuple('ACL', 'is_admin is_reviewer is_author')
 class PullRequests(models.Model):
@@ -534,6 +296,9 @@ class PullRequests(models.Model):
     _order = 'number desc'
     _rec_name = 'number'
 
+    id: int
+    display_name: str
+
     target = fields.Many2one('runbot_merge.branch', required=True, index=True)
     repository = fields.Many2one('runbot_merge.repository', required=True)
     # NB: check that target & repo have same project & provide project related?
@@ -1265,35 +1030,14 @@ class PullRequests(models.Model):
             if commit:
                 self.env.cr.commit()
 
-    def _parse_commit_message(self, message):
-        """ Parses a commit message to split out the pseudo-headers (which
-        should be at the end) from the body, and serialises back with a
-        predefined pseudo-headers ordering.
-        """
-        return Message.from_message(message)
-
-    def _is_mentioned(self, message, *, full_reference=False):
-        """Returns whether ``self`` is mentioned in ``message```
-
-        :param str | PullRequest message:
-        :param bool full_reference: whether the repository name must be present
-        :rtype: bool
-        """
-        if full_reference:
-            pattern = fr'\b{re.escape(self.display_name)}\b'
-        else:
-            repository = self.repository.name # .replace('/', '\\/')
-            pattern = fr'( |\b{repository})#{self.number}\b'
-        return bool(re.search(pattern, message if isinstance(message, str) else message.message))
-
-    def _build_merge_message(self, message, related_prs=()):
+    def _build_merge_message(self, message: Union['PullRequests', str], related_prs=()) -> 'Message':
         # handle co-authored commits (https://help.github.com/articles/creating-a-commit-with-multiple-authors/)
-        m = self._parse_commit_message(message)
-        if not self._is_mentioned(message):
-            m.body += '\n\ncloses {pr.display_name}'.format(pr=self)
+        m = Message.from_message(message)
+        if not is_mentioned(message, self):
+            m.body += f'\n\ncloses {self.display_name}'
 
         for r in related_prs:
-            if not r._is_mentioned(message, full_reference=True):
+            if not is_mentioned(message, r, full_reference=True):
                 m.headers.add('Related', r.display_name)
 
         if self.reviewed_by:
@@ -1301,190 +1045,6 @@ class PullRequests(models.Model):
 
         return m
 
-    def _add_self_references(self, commits):
-        """Adds a footer reference to ``self`` to all ``commits`` if they don't
-        already refer to the PR.
-        """
-        for c in (c['commit'] for c in commits):
-            if not self._is_mentioned(c['message']):
-                m = self._parse_commit_message(c['message'])
-                m.headers.pop('Part-Of', None)
-                m.headers.add('Part-Of', self.display_name)
-                c['message'] = str(m)
-
-    def _stage(self, gh, target, related_prs=()):
-        # nb: pr_commits is oldest to newest so pr.head is pr_commits[-1]
-        _, prdict = gh.pr(self.number)
-        commits = prdict['commits']
-        method = self.merge_method or ('rebase-ff' if commits == 1 else None)
-        if commits > 50 and method.startswith('rebase'):
-            raise exceptions.Unmergeable(self, "Rebasing 50 commits is too much.")
-        if commits > 250:
-            raise exceptions.Unmergeable(
-                self, "Merging PRs of 250 or more commits is not supported "
-                "(https://developer.github.com/v3/pulls/#list-commits-on-a-pull-request)"
-            )
-        pr_commits = gh.commits(self.number)
-        for c in pr_commits:
-            if not (c['commit']['author']['email'] and c['commit']['committer']['email']):
-                raise exceptions.Unmergeable(
-                    self,
-                    f"All commits must have author and committer email, "
-                    f"missing email on {c['sha']} indicates the authorship is "
-                    f"most likely incorrect."
-                )
-
-        # sync and signal possibly missed updates
-        invalid = {}
-        diff = []
-        pr_head = pr_commits[-1]['sha']
-        if self.head != pr_head:
-            invalid['head'] = pr_head
-            diff.append(('Head', self.head, pr_head))
-
-        if self.target.name != prdict['base']['ref']:
-            branch = self.env['runbot_merge.branch'].with_context(active_test=False).search([
-                ('name', '=', prdict['base']['ref']),
-                ('project_id', '=', self.repository.project_id.id),
-            ])
-            if not branch:
-                self.unlink()
-                raise exceptions.Unmergeable(self, "While staging, found this PR had been retargeted to an un-managed branch.")
-            invalid['target'] = branch.id
-            diff.append(('Target branch', self.target.name, branch.name))
-
-        if self.squash != commits == 1:
-            invalid['squash'] = commits == 1
-            diff.append(('Single commit', self.squash, commits == 1))
-
-        msg = utils.make_message(prdict)
-        if self.message != msg:
-            invalid['message'] = msg
-            diff.append(('Message', self.message, msg))
-
-        if invalid:
-            self.write({**invalid, 'state': 'opened', 'head': pr_head})
-            raise exceptions.Mismatch(invalid, diff)
-
-        if self.reviewed_by and self.reviewed_by.name == self.reviewed_by.github_login:
-            # XXX: find other trigger(s) to sync github name?
-            gh_name = gh.user(self.reviewed_by.github_login)['name']
-            if gh_name:
-                self.reviewed_by.name = gh_name
-
-        # NOTE: lost merge v merge/copy distinction (head being
-        #       a merge commit reused instead of being re-merged)
-        return method, getattr(self, '_stage_' + method.replace('-', '_'))(
-            gh, target, pr_commits, related_prs=related_prs)
-
-    def _stage_squash(self, gh, target, commits, related_prs=()):
-        msg = self._build_merge_message(self, related_prs=related_prs)
-        authorship = {}
-
-        authors = {
-            (c['commit']['author']['name'], c['commit']['author']['email'])
-            for c in commits
-        }
-        if len(authors) == 1:
-            name, email = authors.pop()
-            authorship['author']  = {'name': name, 'email': email}
-        else:
-            msg.headers.extend(sorted(
-                ('Co-Authored-By', "%s <%s>" % author)
-                for author in authors
-            ))
-
-        committers = {
-            (c['commit']['committer']['name'], c['commit']['committer']['email'])
-            for c in commits
-        }
-        if len(committers) == 1:
-            name, email = committers.pop()
-            authorship['committer'] = {'name': name, 'email': email}
-        # should committers also be added to co-authors?
-
-        original_head = gh.head(target)
-        merge_tree = gh.merge(self.head, target, 'temp merge')['tree']['sha']
-        head = gh('post', 'git/commits', json={
-            **authorship,
-            'message': str(msg),
-            'tree': merge_tree,
-            'parents': [original_head],
-        }).json()['sha']
-        gh.set_ref(target, head)
-
-        commits_map = {c['sha']: head for c in commits}
-        commits_map[''] = head
-        self.commits_map = json.dumps(commits_map)
-
-        return head
-
-    def _stage_rebase_ff(self, gh, target, commits, related_prs=()):
-        # updates head commit with PR number (if necessary) then rebases
-        # on top of target
-        msg = self._build_merge_message(commits[-1]['commit']['message'], related_prs=related_prs)
-        commits[-1]['commit']['message'] = str(msg)
-        self._add_self_references(commits[:-1])
-        head, mapping = gh.rebase(self.number, target, commits=commits)
-        self.commits_map = json.dumps({**mapping, '': head})
-        return head
-
-    def _stage_rebase_merge(self, gh, target, commits, related_prs=()):
-        self._add_self_references(commits)
-        h, mapping = gh.rebase(self.number, target, reset=True, commits=commits)
-        msg = self._build_merge_message(self, related_prs=related_prs)
-        merge_head = gh.merge(h, target, str(msg))['sha']
-        self.commits_map = json.dumps({**mapping, '': merge_head})
-        return merge_head
-
-    def _stage_merge(self, gh, target, commits, related_prs=()):
-        pr_head = commits[-1] # oldest to newest
-        base_commit = None
-        head_parents = {p['sha'] for p in pr_head['parents']}
-        if len(head_parents) > 1:
-            # look for parent(s?) of pr_head not in PR, means it's
-            # from target (so we merged target in pr)
-            merge = head_parents - {c['sha'] for c in commits}
-            external_parents = len(merge)
-            if external_parents > 1:
-                raise exceptions.Unmergeable(
-                    "The PR head can only have one parent from the base branch "
-                    "(not part of the PR itself), found %d: %s" % (
-                        external_parents,
-                        ', '.join(merge)
-                    ))
-            if external_parents == 1:
-                [base_commit] = merge
-
-        commits_map = {c['sha']: c['sha'] for c in commits}
-        if base_commit:
-            # replicate pr_head with base_commit replaced by
-            # the current head
-            original_head = gh.head(target)
-            merge_tree = gh.merge(pr_head['sha'], target, 'temp merge')['tree']['sha']
-            new_parents = [original_head] + list(head_parents - {base_commit})
-            msg = self._build_merge_message(pr_head['commit']['message'], related_prs=related_prs)
-            copy = gh('post', 'git/commits', json={
-                'message': str(msg),
-                'tree': merge_tree,
-                'author': pr_head['commit']['author'],
-                'committer': pr_head['commit']['committer'],
-                'parents': new_parents,
-            }).json()
-            gh.set_ref(target, copy['sha'])
-            # merge commit *and old PR head* map to the pr head replica
-            commits_map[''] = commits_map[pr_head['sha']] = copy['sha']
-            self.commits_map = json.dumps(commits_map)
-            return copy['sha']
-        else:
-            # otherwise do a regular merge
-            msg = self._build_merge_message(self)
-            merge_head = gh.merge(self.head, target, str(msg))['sha']
-            # and the merge commit is the normal merge head
-            commits_map[''] = merge_head
-            self.commits_map = json.dumps(commits_map)
-            return merge_head
-
     def unstage(self, reason, *args):
         """ If the PR is staged, cancel the staging. If the PR is split and
         waiting, remove it from the split (possibly delete the split entirely)
@@ -2241,82 +1801,6 @@ class Batch(models.Model):
                     raise ValidationError("All prs of a batch must have different target repositories, got a duplicate %s on %s" % (pr.repository, pr))
                 repos |= pr.repository
 
-    def stage(self, meta, prs):
-        """
-        Updates meta[*][head] on success
-
-        :return: () or Batch object (if all prs successfully staged)
-        """
-        new_heads = {}
-        pr_fields = self.env['runbot_merge.pull_requests']._fields
-        for pr in prs:
-            gh = meta[pr.repository]['gh']
-
-            _logger.info(
-                "Staging pr %s for target %s; method=%s",
-                pr.display_name, pr.target.name,
-                pr.merge_method or (pr.squash and 'single') or None
-            )
-
-            target = 'tmp.{}'.format(pr.target.name)
-            original_head = gh.head(target)
-            try:
-                try:
-                    method, new_heads[pr] = pr._stage(gh, target, related_prs=(prs - pr))
-                    _logger.info(
-                        "Staged pr %s to %s by %s: %s -> %s",
-                        pr.display_name, pr.target.name, method,
-                        original_head, new_heads[pr]
-                    )
-                except Exception:
-                    # reset the head which failed, as rebase() may have partially
-                    # updated it (despite later steps failing)
-                    gh.set_ref(target, original_head)
-                    # then reset every previous update
-                    for to_revert in new_heads.keys():
-                        it = meta[to_revert.repository]
-                        it['gh'].set_ref('tmp.{}'.format(to_revert.target.name), it['head'])
-                    raise
-            except github.MergeError as e:
-                raise exceptions.MergeError(pr) from e
-            except exceptions.Mismatch as e:
-                def format_items(items):
-                    """ Bit of a pain in the ass because difflib really wants
-                    all lines to be newline-terminated, but not all values are
-                    actual lines, and also needs to split multiline values.
-                    """
-                    for name, value in items:
-                        yield name + ':\n'
-                        if not value.endswith('\n'):
-                            value += '\n'
-                        yield from value.splitlines(keepends=True)
-                        yield '\n'
-
-                old = list(format_items((n, str(v)) for n, v, _ in e.args[1]))
-                new = list(format_items((n, str(v)) for n, _, v in e.args[1]))
-                diff = ''.join(Differ().compare(old, new))
-                _logger.info("data mismatch on %s:\n%s", pr.display_name, diff)
-                self.env.ref('runbot_merge.pr.staging.mismatch')._send(
-                    repository=pr.repository,
-                    pull_request=pr.number,
-                    format_args={
-                        'pr': pr,
-                        'mismatch': ', '.join(pr_fields[f].string for f in e.args[0]),
-                        'diff': diff,
-                        'unchecked': ', '.join(pr_fields[f].string for f in UNCHECKABLE)
-                    }
-                )
-                return self.env['runbot_merge.batch']
-
-        # update meta to new heads
-        for pr, head in new_heads.items():
-            meta[pr.repository]['head'] = head
-        return self.create({
-            'target': prs[0].target.id,
-            'prs': [(4, pr.id, 0) for pr in prs],
-        })
-
-UNCHECKABLE = ['merge_method', 'overrides', 'draft']
 
 class FetchJob(models.Model):
     _name = _description = 'runbot_merge.fetch_job'
@@ -2372,134 +1856,4 @@ def to_status(v):
         return v
     return {'state': v, 'target_url': None, 'description': None}
 
-refline = re.compile(rb'([\da-f]{40}) ([^\0\n]+)(\0.*)?\n?$')
-ZERO_REF = b'0'*40
-def parse_refs_smart(read):
-    """ yields pkt-line data (bytes), or None for flush lines """
-    def read_line():
-        length = int(read(4), 16)
-        if length == 0:
-            return None
-        return read(length - 4)
-
-    header = read_line()
-    assert header.rstrip() == b'# service=git-upload-pack', header
-    assert read_line() is None, "failed to find first flush line"
-    # read lines until second delimiter
-    for line in iter(read_line, None):
-        if line.startswith(ZERO_REF):
-            break # empty list (no refs)
-        m = refline.match(line)
-        yield m[1].decode(), m[2].decode()
-
-BREAK = re.compile(r'''
-    ^
-    [ ]{0,3} # 0-3 spaces of indentation
-    # followed by a sequence of three or more matching -, _, or * characters,
-    # each followed optionally by any number of spaces or tabs
-    # so needs to start with a _, - or *, then have at least 2 more such
-    # interspersed with any number of spaces or tabs
-    ([*_-])
-    ([ \t]*\1){2,}
-    [ \t]*
-    $
-''', flags=re.VERBOSE)
-SETEX_UNDERLINE = re.compile(r'''
-    ^
-    [ ]{0,3} # no more than 3 spaces indentation
-    [-=]+ # a sequence of = characters or a sequence of - characters
-    [ ]* # any number of trailing spaces
-    $
-    # we don't care about "a line containing a single -" because we want to
-    # disambiguate SETEX headings from thematic breaks, and thematic breaks have
-    # 3+ -. Doesn't look like GH interprets `- - -` as a line so yay...
-''', flags=re.VERBOSE)
-HEADER = re.compile('^([A-Za-z-]+): (.*)$')
-class Message:
-    @classmethod
-    def from_message(cls, msg):
-        in_headers = True
-        maybe_setex = None
-        # creating from PR message -> remove content following break
-        msg, handle_break = (msg, False) if isinstance(msg, str) else (msg.message, True)
-        headers = []
-        body = []
-        # don't process the title (first line) of the commit message
-        msg = msg.splitlines()
-        for line in reversed(msg[1:]):
-            if maybe_setex:
-                # NOTE: actually slightly more complicated: it's a SETEX heading
-                #       only if preceding line(s) can be interpreted as a
-                #       paragraph so e.g. a title followed by a line of dashes
-                #       would indeed be a break, but this should be good enough
-                #       for now, if we need more we'll need a full-blown
-                #       markdown parser probably
-                if line: # actually a SETEX title -> add underline to body then process current
-                    body.append(maybe_setex)
-                else: # actually break, remove body then process current
-                    body = []
-                maybe_setex = None
-
-            if not line:
-                if not in_headers and body and body[-1]:
-                    body.append(line)
-                continue
-
-            if handle_break and BREAK.match(line):
-                if SETEX_UNDERLINE.match(line):
-                    maybe_setex = line
-                else:
-                    body = []
-                continue
-
-            h = HEADER.match(line)
-            if h:
-                # c-a-b = special case from an existing test, not sure if actually useful?
-                if in_headers or h.group(1).lower() == 'co-authored-by':
-                    headers.append(h.groups())
-                    continue
-
-            body.append(line)
-            in_headers = False
-
-        # if there are non-title body lines, add a separation after the title
-        if body and body[-1]:
-            body.append('')
-        body.append(msg[0])
-        return cls('\n'.join(reversed(body)), Headers(reversed(headers)))
-
-    def __init__(self, body, headers=None):
-        self.body = body
-        self.headers = headers or Headers()
-
-    def __setattr__(self, name, value):
-        # make sure stored body is always stripped
-        if name == 'body':
-            value = value and value.strip()
-        super().__setattr__(name, value)
-
-    def __str__(self):
-        if not self.headers:
-            return self.body + '\n'
-
-        with io.StringIO(self.body) as msg:
-            msg.write(self.body)
-            msg.write('\n\n')
-            # https://git.wiki.kernel.org/index.php/CommitMessageConventions
-            # seems to mostly use capitalised names (rather than title-cased)
-            keys = list(OrderedSet(k.capitalize() for k in self.headers.keys()))
-            # c-a-b must be at the very end otherwise github doesn't see it
-            keys.sort(key=lambda k: k == 'Co-authored-by')
-            for k in keys:
-                for v in self.headers.getlist(k):
-                    msg.write(k)
-                    msg.write(': ')
-                    msg.write(v)
-                    msg.write('\n')
-
-            return msg.getvalue()
-
-    def sub(self, pattern, repl, *, flags):
-        """ Performs in-place replacements on the body
-        """
-        self.body = re.sub(pattern, repl, self.body, flags=flags)
+from .stagings_create import is_mentioned, Message
diff --git a/runbot_merge/models/stagings_create.py b/runbot_merge/models/stagings_create.py
new file mode 100644
index 00000000..b00342dc
--- /dev/null
+++ b/runbot_merge/models/stagings_create.py
@@ -0,0 +1,727 @@
+import base64
+import contextlib
+import dataclasses
+import io
+import json
+import logging
+import os
+import re
+import tempfile
+from difflib import Differ
+from itertools import count, takewhile
+from pathlib import Path
+from typing import Dict, Union, Optional, Literal, Callable, Iterator, Tuple, List, TypeAlias
+
+import requests
+from werkzeug.datastructures import Headers
+
+from odoo import api, models, fields
+from odoo.tools import OrderedSet
+from odoo.tools.appdirs import user_cache_dir
+from .pull_requests import Branch, Stagings, PullRequests, Repository, Batch
+from .. import exceptions, utils, github, git
+
+WAIT_FOR_VISIBILITY = [10, 10, 10, 10]
+_logger = logging.getLogger(__name__)
+
+
+class Project(models.Model):
+    _inherit = 'runbot_merge.project'
+
+
+@dataclasses.dataclass(slots=True)
+class StagingSlice:
+    """Staging state for a single repository:
+
+    - gh is a cache for the github proxy object (contains a session for reusing
+      connection)
+    - head is the current staging head for the branch of that repo
+    - working_copy is the local working copy for the staging for that repo
+    """
+    gh: github.GH
+    head: str
+    working_copy: git.Repo
+
+
+StagingState: TypeAlias = Dict[Repository, StagingSlice]
+
+def try_staging(branch: Branch) -> Optional[Stagings]:
+    """ Tries to create a staging if the current branch does not already
+    have one. Returns None if the branch already has a staging or there
+    is nothing to stage, the newly created staging otherwise.
+    """
+    _logger.info(
+        "Checking %s (%s) for staging: %s, skip? %s",
+        branch, branch.name,
+        branch.active_staging_id,
+        bool(branch.active_staging_id)
+    )
+    if branch.active_staging_id:
+        return None
+
+    rows = [
+        (p, prs)
+        for p, prs in ready_prs(for_branch=branch)
+        if not any(prs.mapped('blocked'))
+    ]
+    if not rows:
+        return
+
+    priority = rows[0][0]
+    if priority == 0 or priority == 1:
+        # p=0 take precedence over all else
+        # p=1 allows merging a fix inside / ahead of a split (e.g. branch
+        # is broken or widespread false positive) without having to cancel
+        # the existing staging
+        batched_prs = [pr_ids for _, pr_ids in takewhile(lambda r: r[0] == priority, rows)]
+    elif branch.split_ids:
+        split_ids = branch.split_ids[0]
+        _logger.info("Found split of PRs %s, re-staging", split_ids.mapped('batch_ids.prs'))
+        batched_prs = [batch.prs for batch in split_ids.batch_ids]
+        split_ids.unlink()
+    else: # p=2
+        batched_prs = [pr_ids for _, pr_ids in takewhile(lambda r: r[0] == priority, rows)]
+
+    with contextlib.ExitStack() as cleanup:
+        return stage_into(branch, batched_prs, cleanup)
+
+
+def ready_prs(for_branch: Branch) -> List[Tuple[int, PullRequests]]:
+    env = for_branch.env
+    env.cr.execute("""
+    SELECT
+      min(pr.priority) as priority,
+      array_agg(pr.id) AS match
+    FROM runbot_merge_pull_requests pr
+    WHERE pr.target = any(%s)
+      -- exclude terminal states (so there's no issue when
+      -- deleting branches & reusing labels)
+      AND pr.state != 'merged'
+      AND pr.state != 'closed'
+    GROUP BY
+        pr.target,
+        CASE
+            WHEN pr.label SIMILAR TO '%%:patch-[[:digit:]]+'
+                THEN pr.id::text
+            ELSE pr.label
+        END
+    HAVING
+        bool_or(pr.state = 'ready') or bool_or(pr.priority = 0)
+    ORDER BY min(pr.priority), min(pr.id)
+    """, [for_branch.ids])
+    browse = env['runbot_merge.pull_requests'].browse
+    return [(p, browse(ids)) for p, ids in env.cr.fetchall()]
+
+
+def stage_into(
+        branch: Branch,
+        batched_prs: List[PullRequests],
+        cleanup: contextlib.ExitStack,
+) -> Optional[Stagings]:
+    original_heads, staging_state = staging_setup(branch, batched_prs, cleanup)
+
+    staged = stage_batches(branch, batched_prs, staging_state)
+
+    if not staged:
+        return None
+
+    env = branch.env
+    heads = []
+    commits = []
+    for repo, it in staging_state.items():
+        if it.head != original_heads[repo]:
+            # if we staged something for that repo, just create a record for
+            # that commit, or flag existing one as to-recheck in case there are
+            # already statuses we want to propagate to the staging or something
+            env.cr.execute(
+                "INSERT INTO runbot_merge_commit (sha, to_check, statuses) "
+                "VALUES (%s, true, '{}') "
+                "ON CONFLICT (sha) DO UPDATE SET to_check=true "
+                "RETURNING id",
+                [it.head]
+            )
+            [commit] = [head] = env.cr.fetchone()
+        else:
+            # if we didn't stage anything for that repo, create a dummy commit
+            # (with a uniquifier to ensure we don't hit a previous version of
+            # the same) to ensure the staging head is new and we're building
+            # everything
+            tree = it.gh.commit(it.head)['tree']
+            uniquifier = base64.b64encode(os.urandom(12)).decode('ascii')
+            dummy_head = it.gh('post', 'git/commits', json={
+                'tree': tree['sha'],
+                'parents': [it.head],
+                'message': f'''\
+force rebuild
+
+uniquifier: {uniquifier}
+For-Commit-Id: {it.head}
+''',
+            }).json()['sha']
+            # see above, ideally we don't need to mark the real head as
+            # `to_check` because it's an old commit but `DO UPDATE` is necessary
+            # for `RETURNING` to work, and it doesn't really hurt (maybe)
+            env.cr.execute(
+                "INSERT INTO runbot_merge_commit (sha, to_check, statuses) "
+                "VALUES (%s, false, '{}'), (%s, true, '{}') "
+                "ON CONFLICT (sha) DO UPDATE SET to_check=true "
+                "RETURNING id",
+                [it.head, dummy_head]
+            )
+            ([commit], [head]) = env.cr.fetchall()
+            it.head = dummy_head
+
+        heads.append(fields.Command.create({
+            'repository_id': repo.id,
+            'commit_id': head,
+        }))
+        commits.append(fields.Command.create({
+            'repository_id': repo.id,
+            'commit_id': commit,
+        }))
+
+    # create actual staging object
+    st: Stagings = env['runbot_merge.stagings'].create({
+        'target': branch.id,
+        'batch_ids': [(4, batch.id, 0) for batch in staged],
+        'heads': heads,
+        'commits': commits,
+    })
+    # create staging branch from tmp
+    token = branch.project_id.github_token
+    for repo in branch.project_id.repo_ids.having_branch(branch):
+        it = staging_state[repo]
+        _logger.info(
+            "%s: create staging for %s:%s at %s",
+            branch.project_id.name, repo.name, branch.name,
+            it.head
+        )
+        refname = 'staging.{}'.format(branch.name)
+        it.gh.set_ref(refname, it.head)
+
+        i = count()
+        @utils.backoff(delays=WAIT_FOR_VISIBILITY, exc=TimeoutError)
+        def wait_for_visibility():
+            if check_visibility(repo, refname, it.head, token):
+                _logger.info(
+                    "[repo] updated %s:%s to %s: ok (at %d/%d)",
+                    repo.name, refname, it.head,
+                    next(i), len(WAIT_FOR_VISIBILITY)
+                )
+                return
+            _logger.warning(
+                "[repo] updated %s:%s to %s: failed (at %d/%d)",
+                repo.name, refname, it.head,
+                next(i), len(WAIT_FOR_VISIBILITY)
+            )
+            raise TimeoutError("Staged head not updated after %d seconds" % sum(WAIT_FOR_VISIBILITY))
+
+    _logger.info("Created staging %s (%s) to %s", st, ', '.join(
+        '%s[%s]' % (batch, batch.prs)
+        for batch in staged
+    ), st.target.name)
+    return st
+
+
+def staging_setup(
+        target: Branch,
+        batched_prs: List[PullRequests],
+        cleanup: contextlib.ExitStack
+) -> Tuple[Dict[Repository, str], StagingState]:
+    """Sets up the staging:
+
+    - stores baseline info
+    - creates tmp branch via gh API (to remove)
+    - generates working copy for each repository with the target branch
+    """
+    all_prs: PullRequests = target.env['runbot_merge.pull_requests'].concat(*batched_prs)
+    cache_dir = user_cache_dir('mergebot')
+    staging_state = {}
+    original_heads = {}
+    for repo in target.project_id.repo_ids.having_branch(target):
+        gh = repo.github()
+        head = gh.head(target.name)
+        # create tmp staging branch
+        gh.set_ref('tmp.{}'.format(target.name), head)
+
+        source = git.get_local(repo, 'github')
+        source.fetch(
+            git.source_url(repo, 'github'),
+            # a full refspec is necessary to ensure we actually fetch the ref
+            # (not just the commit it points to) and update it.
+            # `git fetch $remote $branch` seems to work locally, but it might
+            # be hooked only to "proper" remote-tracking branches
+            # (in `refs/remotes`), it doesn't seem to work here
+            f'+refs/heads/{target.name}:refs/heads/{target.name}',
+            *(pr.head for pr in all_prs if pr.repository == repo)
+        )
+        Path(cache_dir, repo.name).parent.mkdir(parents=True, exist_ok=True)
+        d = cleanup.enter_context(tempfile.TemporaryDirectory(
+            prefix=f'{repo.name}-{target.name}-staging',
+            dir=cache_dir,
+        ))
+        working_copy = source.clone(d, branch=target.name)
+        original_heads[repo] = head
+        staging_state[repo] = StagingSlice(gh=gh, head=head, working_copy=working_copy)
+
+    return original_heads, staging_state
+
+
+def stage_batches(branch: Branch, batched_prs: List[PullRequests], staging_state: StagingState) -> Stagings:
+    batch_limit = branch.project_id.batch_limit
+    env = branch.env
+    staged = env['runbot_merge.batch']
+    for batch in batched_prs:
+        if len(staged) >= batch_limit:
+            break
+
+        try:
+            staged |= stage_batch(env, batch, staging_state)
+        except exceptions.MergeError as e:
+            pr = e.args[0]
+            _logger.info("Failed to stage %s into %s", pr.display_name, branch.name, exc_info=True)
+            if not staged or isinstance(e, exceptions.Unmergeable):
+                if len(e.args) > 1 and e.args[1]:
+                    reason = e.args[1]
+                else:
+                    reason = e.__cause__ or e.__context__
+                # if the reason is a json document, assume it's a github error
+                # and try to extract the error message to give it to the user
+                with contextlib.suppress(Exception):
+                    reason = json.loads(str(reason))['message'].lower()
+
+                pr.state = 'error'
+                env.ref('runbot_merge.pr.merge.failed')._send(
+                    repository=pr.repository,
+                    pull_request=pr.number,
+                    format_args={'pr': pr, 'reason': reason, 'exc': e},
+                )
+    return staged
+
+def check_visibility(repo: Repository, branch_name: str, expected_head: str, token: str):
+    """ Checks the repository actual to see if the new / expected head is
+    now visible
+    """
+    # v1 protocol provides URL for ref discovery: https://github.com/git/git/blob/6e0cc6776106079ed4efa0cc9abace4107657abf/Documentation/technical/http-protocol.txt#L187
+    # for more complete client this is also the capabilities discovery and
+    # the "entry point" for the service
+    url = 'https://github.com/{}.git/info/refs?service=git-upload-pack'.format(repo.name)
+    with requests.get(url, stream=True, auth=(token, '')) as resp:
+        if not resp.ok:
+            return False
+        for head, ref in parse_refs_smart(resp.raw.read):
+            if ref != ('refs/heads/' + branch_name):
+                continue
+            return head == expected_head
+        return False
+
+
+refline = re.compile(rb'([\da-f]{40}) ([^\0\n]+)(\0.*)?\n?')
+ZERO_REF = b'0'*40
+
+def parse_refs_smart(read: Callable[[int], bytes]) -> Iterator[Tuple[str, str]]:
+    """ yields pkt-line data (bytes), or None for flush lines """
+    def read_line() -> Optional[bytes]:
+        length = int(read(4), 16)
+        if length == 0:
+            return None
+        return read(length - 4)
+
+    header = read_line()
+    assert header and header.rstrip() == b'# service=git-upload-pack', header
+    assert read_line() is None, "failed to find first flush line"
+    # read lines until second delimiter
+    for line in iter(read_line, None):
+        if line.startswith(ZERO_REF):
+            break # empty list (no refs)
+        m = refline.fullmatch(line)
+        assert m
+        yield m[1].decode(), m[2].decode()
+
+
+UNCHECKABLE = ['merge_method', 'overrides', 'draft']
+
+
+def stage_batch(env: api.Environment, prs: PullRequests, staging: StagingState) -> Batch:
+    """
+    Updates meta[*][head] on success
+    """
+    new_heads: Dict[PullRequests, str] = {}
+    pr_fields = env['runbot_merge.pull_requests']._fields
+    for pr in prs:
+        gh = staging[pr.repository].gh
+
+        _logger.info(
+            "Staging pr %s for target %s; method=%s",
+            pr.display_name, pr.target.name,
+            pr.merge_method or (pr.squash and 'single') or None
+        )
+
+        target = 'tmp.{}'.format(pr.target.name)
+        original_head = gh.head(target)
+        try:
+            try:
+                method, new_heads[pr] = stage(pr, gh, target, related_prs=(prs - pr))
+                _logger.info(
+                    "Staged pr %s to %s by %s: %s -> %s",
+                    pr.display_name, pr.target.name, method,
+                    original_head, new_heads[pr]
+                )
+            except Exception:
+                # reset the head which failed, as rebase() may have partially
+                # updated it (despite later steps failing)
+                gh.set_ref(target, original_head)
+                # then reset every previous update
+                for to_revert in new_heads.keys():
+                    it = staging[to_revert.repository]
+                    it.gh.set_ref('tmp.{}'.format(to_revert.target.name), it.head)
+                raise
+        except github.MergeError as e:
+            raise exceptions.MergeError(pr) from e
+        except exceptions.Mismatch as e:
+            diff = ''.join(Differ().compare(
+                list(format_for_difflib((n, v) for n, v, _ in e.args[1])),
+                list(format_for_difflib((n, v) for n, _, v in e.args[1])),
+            ))
+            _logger.info("data mismatch on %s:\n%s", pr.display_name, diff)
+            env.ref('runbot_merge.pr.staging.mismatch')._send(
+                repository=pr.repository,
+                pull_request=pr.number,
+                format_args={
+                    'pr': pr,
+                    'mismatch': ', '.join(pr_fields[f].string for f in e.args[0]),
+                    'diff': diff,
+                    'unchecked': ', '.join(pr_fields[f].string for f in UNCHECKABLE)
+                }
+            )
+            return env['runbot_merge.batch']
+
+    # update meta to new heads
+    for pr, head in new_heads.items():
+        staging[pr.repository].head = head
+    return env['runbot_merge.batch'].create({
+        'target': prs[0].target.id,
+        'prs': [(4, pr.id, 0) for pr in prs],
+    })
+
+def format_for_difflib(items: Iterator[Tuple[str, object]]) -> Iterator[str]:
+    """ Bit of a pain in the ass because difflib really wants
+    all lines to be newline-terminated, but not all values are
+    actual lines, and also needs to split multiline values.
+    """
+    for name, value in items:
+        yield name + ':\n'
+        value = str(value)
+        if not value.endswith('\n'):
+            value += '\n'
+        yield from value.splitlines(keepends=True)
+        yield '\n'
+
+
+Method = Literal['merge', 'rebase-merge', 'rebase-ff', 'squash']
+def stage(pr: PullRequests, gh: github.GH, target: str, related_prs: PullRequests) -> Tuple[Method, str]:
+    # nb: pr_commits is oldest to newest so pr.head is pr_commits[-1]
+    _, prdict = gh.pr(pr.number)
+    commits = prdict['commits']
+    method: Method = pr.merge_method or ('rebase-ff' if commits == 1 else None)
+    if commits > 50 and method.startswith('rebase'):
+        raise exceptions.Unmergeable(pr, "Rebasing 50 commits is too much.")
+    if commits > 250:
+        raise exceptions.Unmergeable(
+            pr, "Merging PRs of 250 or more commits is not supported "
+                "(https://developer.github.com/v3/pulls/#list-commits-on-a-pull-request)"
+        )
+    pr_commits = gh.commits(pr.number)
+    for c in pr_commits:
+        if not (c['commit']['author']['email'] and c['commit']['committer']['email']):
+            raise exceptions.Unmergeable(
+                pr,
+                f"All commits must have author and committer email, "
+                f"missing email on {c['sha']} indicates the authorship is "
+                f"most likely incorrect."
+            )
+
+    # sync and signal possibly missed updates
+    invalid = {}
+    diff = []
+    pr_head = pr_commits[-1]['sha']
+    if pr.head != pr_head:
+        invalid['head'] = pr_head
+        diff.append(('Head', pr.head, pr_head))
+
+    if pr.target.name != prdict['base']['ref']:
+        branch = pr.env['runbot_merge.branch'].with_context(active_test=False).search([
+            ('name', '=', prdict['base']['ref']),
+            ('project_id', '=', pr.repository.project_id.id),
+        ])
+        if not branch:
+            pr.unlink()
+            raise exceptions.Unmergeable(pr, "While staging, found this PR had been retargeted to an un-managed branch.")
+        invalid['target'] = branch.id
+        diff.append(('Target branch', pr.target.name, branch.name))
+
+    if pr.squash != commits == 1:
+        invalid['squash'] = commits == 1
+        diff.append(('Single commit', pr.squash, commits == 1))
+
+    msg = utils.make_message(prdict)
+    if pr.message != msg:
+        invalid['message'] = msg
+        diff.append(('Message', pr.message, msg))
+
+    if invalid:
+        pr.write({**invalid, 'state': 'opened', 'head': pr_head})
+        raise exceptions.Mismatch(invalid, diff)
+
+    if pr.reviewed_by and pr.reviewed_by.name == pr.reviewed_by.github_login:
+        # XXX: find other trigger(s) to sync github name?
+        gh_name = gh.user(pr.reviewed_by.github_login)['name']
+        if gh_name:
+            pr.reviewed_by.name = gh_name
+
+    match method:
+        case 'merge':
+            fn = stage_merge
+        case 'rebase-merge':
+            fn = stage_rebase_merge
+        case 'rebase-ff':
+            fn = stage_rebase_ff
+        case 'squash':
+            fn = stage_squash
+    return method, fn(pr, gh, target, pr_commits, related_prs=related_prs)
+
+def stage_squash(pr: PullRequests, gh: github.GH, target: str, commits: List[github.PrCommit], related_prs: PullRequests) -> str:
+    msg = pr._build_merge_message(pr, related_prs=related_prs)
+    authorship = {}
+
+    authors = {
+        (c['commit']['author']['name'], c['commit']['author']['email'])
+        for c in commits
+    }
+    if len(authors) == 1:
+        name, email = authors.pop()
+        authorship['author']  = {'name': name, 'email': email}
+    else:
+        msg.headers.extend(sorted(
+            ('Co-Authored-By', "%s <%s>" % author)
+            for author in authors
+        ))
+
+    committers = {
+        (c['commit']['committer']['name'], c['commit']['committer']['email'])
+        for c in commits
+    }
+    if len(committers) == 1:
+        name, email = committers.pop()
+        authorship['committer'] = {'name': name, 'email': email}
+    # should committers also be added to co-authors?
+
+    original_head = gh.head(target)
+    merge_tree = gh.merge(pr.head, target, 'temp merge')['tree']['sha']
+    head = gh('post', 'git/commits', json={
+        **authorship,
+        'message': str(msg),
+        'tree': merge_tree,
+        'parents': [original_head],
+    }).json()['sha']
+    gh.set_ref(target, head)
+
+    commits_map = {c['sha']: head for c in commits}
+    commits_map[''] = head
+    pr.commits_map = json.dumps(commits_map)
+
+    return head
+
+def stage_rebase_ff(pr: PullRequests, gh: github.GH, target: str, commits: List[github.PrCommit], related_prs: PullRequests) -> str:
+    # updates head commit with PR number (if necessary) then rebases
+    # on top of target
+    msg = pr._build_merge_message(commits[-1]['commit']['message'], related_prs=related_prs)
+    commits[-1]['commit']['message'] = str(msg)
+    add_self_references(pr, commits[:-1])
+    head, mapping = gh.rebase(pr.number, target, commits=commits)
+    pr.commits_map = json.dumps({**mapping, '': head})
+    return head
+
+def stage_rebase_merge(pr: PullRequests, gh: github.GH, target: str, commits: List[github.PrCommit], related_prs: PullRequests) -> str :
+    add_self_references(pr, commits)
+    h, mapping = gh.rebase(pr.number, target, reset=True, commits=commits)
+    msg = pr._build_merge_message(pr, related_prs=related_prs)
+    merge_head = gh.merge(h, target, str(msg))['sha']
+    pr.commits_map = json.dumps({**mapping, '': merge_head})
+    return merge_head
+
+def stage_merge(pr: PullRequests, gh: github.GH, target: str, commits: List[github.PrCommit], related_prs: PullRequests) -> str:
+    pr_head = commits[-1] # oldest to newest
+    base_commit = None
+    head_parents = {p['sha'] for p in pr_head['parents']}
+    if len(head_parents) > 1:
+        # look for parent(s?) of pr_head not in PR, means it's
+        # from target (so we merged target in pr)
+        merge = head_parents - {c['sha'] for c in commits}
+        external_parents = len(merge)
+        if external_parents > 1:
+            raise exceptions.Unmergeable(
+                "The PR head can only have one parent from the base branch "
+                "(not part of the PR itself), found %d: %s" % (
+                    external_parents,
+                    ', '.join(merge)
+                ))
+        if external_parents == 1:
+            [base_commit] = merge
+
+    commits_map = {c['sha']: c['sha'] for c in commits}
+    if base_commit:
+        # replicate pr_head with base_commit replaced by
+        # the current head
+        original_head = gh.head(target)
+        merge_tree = gh.merge(pr_head['sha'], target, 'temp merge')['tree']['sha']
+        new_parents = [original_head] + list(head_parents - {base_commit})
+        msg = pr._build_merge_message(pr_head['commit']['message'], related_prs=related_prs)
+        copy = gh('post', 'git/commits', json={
+            'message': str(msg),
+            'tree': merge_tree,
+            'author': pr_head['commit']['author'],
+            'committer': pr_head['commit']['committer'],
+            'parents': new_parents,
+        }).json()
+        gh.set_ref(target, copy['sha'])
+        # merge commit *and old PR head* map to the pr head replica
+        commits_map[''] = commits_map[pr_head['sha']] = copy['sha']
+        pr.commits_map = json.dumps(commits_map)
+        return copy['sha']
+    else:
+        # otherwise do a regular merge
+        msg = pr._build_merge_message(pr)
+        merge_head = gh.merge(pr.head, target, str(msg))['sha']
+        # and the merge commit is the normal merge head
+        commits_map[''] = merge_head
+        pr.commits_map = json.dumps(commits_map)
+        return merge_head
+
+def is_mentioned(message: Union[PullRequests, str], pr: PullRequests, *, full_reference: bool = False) -> bool:
+    """Returns whether ``pr`` is mentioned in ``message```
+    """
+    if full_reference:
+        pattern = fr'\b{re.escape(pr.display_name)}\b'
+    else:
+        repository = pr.repository.name  # .replace('/', '\\/')
+        pattern = fr'( |\b{repository})#{pr.number}\b'
+    return bool(re.search(pattern, message if isinstance(message, str) else message.message))
+
+def add_self_references(pr: PullRequests, commits: List[github.PrCommit]):
+    """Adds a footer reference to ``self`` to all ``commits`` if they don't
+    already refer to the PR.
+    """
+    for c in (c['commit'] for c in commits):
+        if not is_mentioned(c['message'], pr):
+            message = c['message']
+            m = Message.from_message(message)
+            m.headers.pop('Part-Of', None)
+            m.headers.add('Part-Of', pr.display_name)
+            c['message'] = str(m)
+
+BREAK = re.compile(r'''
+    [ ]{0,3} # 0-3 spaces of indentation
+    # followed by a sequence of three or more matching -, _, or * characters,
+    # each followed optionally by any number of spaces or tabs
+    # so needs to start with a _, - or *, then have at least 2 more such
+    # interspersed with any number of spaces or tabs
+    ([*_-])
+    ([ \t]*\1){2,}
+    [ \t]*
+''', flags=re.VERBOSE)
+SETEX_UNDERLINE = re.compile(r'''
+    [ ]{0,3} # no more than 3 spaces indentation
+    [-=]+ # a sequence of = characters or a sequence of - characters
+    [ ]* # any number of trailing spaces
+    # we don't care about "a line containing a single -" because we want to
+    # disambiguate SETEX headings from thematic breaks, and thematic breaks have
+    # 3+ -. Doesn't look like GH interprets `- - -` as a line so yay...
+''', flags=re.VERBOSE)
+HEADER = re.compile('([A-Za-z-]+): (.*)')
+class Message:
+    @classmethod
+    def from_message(cls, msg: Union[PullRequests, str]) -> 'Message':
+        in_headers = True
+        maybe_setex = None
+        # creating from PR message -> remove content following break
+        if isinstance(msg, str):
+            message, handle_break = (msg, False)
+        else:
+            message, handle_break = (msg.message, True)
+        headers = []
+        body: List[str] = []
+        # don't process the title (first line) of the commit message
+        lines = message.splitlines()
+        for line in reversed(lines[1:]):
+            if maybe_setex:
+                # NOTE: actually slightly more complicated: it's a SETEX heading
+                #       only if preceding line(s) can be interpreted as a
+                #       paragraph so e.g. a title followed by a line of dashes
+                #       would indeed be a break, but this should be good enough
+                #       for now, if we need more we'll need a full-blown
+                #       markdown parser probably
+                if line: # actually a SETEX title -> add underline to body then process current
+                    body.append(maybe_setex)
+                else: # actually break, remove body then process current
+                    body = []
+                maybe_setex = None
+
+            if not line:
+                if not in_headers and body and body[-1]:
+                    body.append(line)
+                continue
+
+            if handle_break and BREAK.fullmatch(line):
+                if SETEX_UNDERLINE.fullmatch(line):
+                    maybe_setex = line
+                else:
+                    body = []
+                continue
+
+            h = HEADER.fullmatch(line)
+            if h:
+                # c-a-b = special case from an existing test, not sure if actually useful?
+                if in_headers or h[1].lower() == 'co-authored-by':
+                    headers.append(h.groups())
+                    continue
+
+            body.append(line)
+            in_headers = False
+
+        # if there are non-title body lines, add a separation after the title
+        if body and body[-1]:
+            body.append('')
+        body.append(lines[0])
+        return cls('\n'.join(reversed(body)), Headers(reversed(headers)))
+
+    def __init__(self, body: str, headers: Optional[Headers] = None):
+        self.body = body
+        self.headers = headers or Headers()
+
+    def __setattr__(self, name, value):
+        # make sure stored body is always stripped
+        if name == 'body':
+            value = value and value.strip()
+        super().__setattr__(name, value)
+
+    def __str__(self):
+        if not self.headers:
+            return self.body + '\n'
+
+        with io.StringIO(self.body) as msg:
+            msg.write(self.body)
+            msg.write('\n\n')
+            # https://git.wiki.kernel.org/index.php/CommitMessageConventions
+            # seems to mostly use capitalised names (rather than title-cased)
+            keys = list(OrderedSet(k.capitalize() for k in self.headers.keys()))
+            # c-a-b must be at the very end otherwise github doesn't see it
+            keys.sort(key=lambda k: k == 'Co-authored-by')
+            for k in keys:
+                for v in self.headers.getlist(k):
+                    msg.write(k)
+                    msg.write(': ')
+                    msg.write(v)
+                    msg.write('\n')
+
+            return msg.getvalue()
diff --git a/runbot_merge/tests/test_multirepo.py b/runbot_merge/tests/test_multirepo.py
index 58c642fa..57698426 100644
--- a/runbot_merge/tests/test_multirepo.py
+++ b/runbot_merge/tests/test_multirepo.py
@@ -182,7 +182,6 @@ def test_stage_match(env, project, repo_a, repo_b, config, page):
     assert 'Related: {}'.format(pr_b.display_name) in repo_a.commit('master').message
     assert 'Related: {}'.format(pr_a.display_name) in repo_b.commit('master').message
 
-    print(pr_a.batch_ids.read(['staging_id', 'prs']))
     # check that related PRs *still* link to one another after merge
     assert get_related_pr_labels(pr_page(page, prx_a)) == [pr_b.display_name]
     assert get_related_pr_labels(pr_page(page, prx_b)) == [pr_a.display_name]
@@ -1093,13 +1092,6 @@ def test_multi_project(env, make_repo, setreviewers, users, config,
     pr1_id = to_pr(env, pr1)
     pr2_id = to_pr(env, pr2)
 
-    print(
-        pr1.repo.name, pr1.number, pr1_id.display_name, pr1_id.label,
-        '\n',
-        pr2.repo.name, pr2.number, pr2_id.display_name, pr2_id.label,
-        flush=True,
-    )
-
     assert pr1_id.state == 'ready' and not pr1_id.blocked
     assert pr2_id.state == 'validated'