[ADD] runbot_merge: support staging ready PRs over splits

Not sure it's going to be useful but it's hard to know if we can't test it. The intent is mostly the ability to prioritize throughput (or attempt to) during high-load events, if we can favour staging N new batches over a split's N/2 we might be able to merge more crap. But maybe not, we'll see, either way now it's here and seems to more or less work. Fixes #798
2025-03-15 15:35:46 +07:00 · 2023-11-30 12:27:09 +01:00 · 2023-11-30 12:27:09 +01:00 · c140701975
commit c140701975
parent 9f54e6f209
5 changed files with 155 additions and 12 deletions
--- a/runbot_merge/changelog/2023-12/staging-priority.md
+++ b/runbot_merge/changelog/2023-12/staging-priority.md
@ -0,0 +1,4 @@
+ADD: projects now know how to prioritise new PRs over splits
+
+While this likely has relatively low utility, we'll look at how it performs
+during periods of high throughput.
--- a/runbot_merge/models/project.py
+++ b/runbot_merge/models/project.py
@ -25,6 +25,11 @@ class Project(models.Model):
        "target branches of PR this project handles."
    )
    staging_enabled = fields.Boolean(default=True)
+    staging_priority = fields.Selection([
+        ('default', "Splits over ready PRs"),
+        ('largest', "Largest of split and ready PRs"),
+        ('ready', "Ready PRs over split"),
+    ], default="default", required=True)

    ci_timeout = fields.Integer(
        default=60, required=True, group_operator=None,
--- a/runbot_merge/models/stagings_create.py
+++ b/runbot_merge/models/stagings_create.py
@ -61,21 +61,67 @@ def try_staging(branch: Branch) -> Optional[Stagings]:
        for p, prs in ready_prs(for_branch=branch)
        if not any(prs.mapped('blocked'))
    ]
-    if not rows:
-        return

-    priority = rows[0][0]
+    def log(label, batches):
+        _logger.info(label, ', '.join(
+            p.display_name
+            for batch in batches
+            for p in batch
+        ))
+
+    priority = rows[0][0] if rows else None
    if priority == 'alone':
        batched_prs = [pr_ids for _, pr_ids in takewhile(lambda r: r[0] == priority, rows)]
-    elif branch.split_ids:
-        split_ids = branch.split_ids[0]
-        _logger.info("Found split of PRs %s, re-staging", split_ids.mapped('batch_ids.prs'))
-        batched_prs = [batch.prs for batch in split_ids.batch_ids]
-        split_ids.unlink()
+        log("staging high-priority PRs %s", batched_prs)
+    elif branch.project_id.staging_priority == 'default':
+        if branch.split_ids:
+            split_ids = branch.split_ids[0]
+            batched_prs = [batch.prs for batch in split_ids.batch_ids]
+            split_ids.unlink()
+            log("staging split PRs %s (prioritising splits)", batched_prs)
+        else:
+            # priority, normal; priority = sorted ahead of normal, so always picked
+            # first as long as there's room
+            batched_prs = [pr_ids for _, pr_ids in rows]
+            log("staging ready PRs %s (prioritising splits)", batched_prs)
+    elif branch.project_id.staging_priority == 'ready':
+        # splits are ready by definition, we need to exclude them from the
+        # ready rows otherwise we'll re-stage the splits so if an error is legit
+        # we cycle forever
+        # FIXME: once the batches are less shit, store this durably on the
+        #        batches and filter out when fetching readies (?)
+        split_batches = {batch.prs for batch in branch.split_ids.batch_ids}
+        ready = [pr_ids for _, pr_ids in rows if pr_ids not in split_batches]
+
+        if ready:
+            batched_prs = ready
+            log("staging ready PRs %s (prioritising ready)", batched_prs)
+        else:
+            split_ids = branch.split_ids[:1]
+            batched_prs = [batch.prs for batch in split_ids.batch_ids]
+            split_ids.unlink()
+            log("staging split PRs %s (prioritising ready)", batched_prs)
    else:
-        # priority, normal; priority = sorted ahead of normal, so always picked
-        # first as long as there's room
-        batched_prs = [pr_ids for _, pr_ids in rows]
+        assert branch.project_id.staging_priority == 'largest'
+        # splits are ready by definition, we need to exclude them from the
+        # ready rows otherwise ready always wins but we re-stage the splits, so
+        # if an error is legit we'll cycle forever
+        split_batches = {batch.prs for batch in branch.split_ids.batch_ids}
+        ready = [pr_ids for _, pr_ids in rows if pr_ids not in split_batches]
+
+        maxsplit = max(branch.split_ids, key=lambda s: len(s.batch_ids), default=branch.env['runbot_merge.split'])
+        _logger.info("largest split = %d, ready = %d", len(maxsplit.batch_ids), len(ready))
+        # bias towards splits if len(ready) = len(batch_ids)
+        if len(maxsplit.batch_ids) >= len(ready):
+            batched_prs = [batch.prs for batch in maxsplit.batch_ids]
+            maxsplit.unlink()
+            log("staging split PRs %s (prioritising largest)", batched_prs)
+        else:
+            batched_prs = ready
+            log("staging ready PRs %s (prioritising largest)", batched_prs)
+
+    if not batched_prs:
+        return

    original_heads, staging_state = staging_setup(branch, batched_prs)

--- a/runbot_merge/tests/test_project_toggles.py
+++ b/runbot_merge/tests/test_project_toggles.py
@ -1,6 +1,10 @@
+import functools
+from itertools import repeat
+
 import pytest

-from utils import Commit, to_pr
+from utils import Commit, to_pr, ensure_one
+

@pytest.fixture
 def repo(env, project, make_repo, users, setreviewers):
@ -40,3 +44,86 @@ def test_disable_staging(env, project, repo, config):
    assert staging_1.state == "cancelled"
    assert not pr_id.staging_id.active,\
        "should not be re-staged, because staging has been disabled"
+
+@pytest.mark.parametrize('mode,cutoff,second', [
+    # default mode, the second staging is the first half of the first staging
+    ('default', 2, [0]),
+    # splits are right-biased (the midpoint is rounded down), so for odd
+    # staging sizes the first split is the smaller one
+    ('default', 3, [0]),
+    # if the split results in ((1, 2), 1), largest stages the second
+    ('largest', 3, [1, 2]),
+    # if the split results in ((1, 1), 2), largest stages the ready PRs
+    ('largest', 2, [2, 3]),
+    # even if it's a small minority, ready selects the ready PR(s)
+    ('ready', 3, [3]),
+    ('ready', 2, [2, 3]),
+])
+def test_staging_priority(env, project, repo, config, mode, cutoff, second):
+    """By default, unless a PR is prioritised as "alone" splits take priority
+    over new stagings.
+
+    *However* to try and maximise throughput in trying times, it's possible to
+    configure the project to prioritise either the largest staging (between spit
+    and ready batches), or to just prioritise new stagings.
+    """
+    def select(prs, indices):
+        zero = env['runbot_merge.pull_requests']
+        filtered = (p for i, p in enumerate(prs) if i in indices)
+        return functools.reduce(lambda a, b: a | b, filtered, zero)
+
+    project.staging_priority = mode
+    # we need at least 3 PRs, two that we can split out, and one leftover
+    with repo:
+        [m] = repo.make_commits(None, Commit("m", tree={"ble": "1"}), ref="heads/master")
+
+        [c] = repo.make_commits(m, Commit("c", tree={"1": "1"}), ref="heads/pr1")
+        pr1 = repo.make_pr(title="whatever", target="master", head="pr1")
+
+        [c] = repo.make_commits(m, Commit("c", tree={"2": "2"}), ref="heads/pr2")
+        pr2 = repo.make_pr(title="whatever", target="master", head="pr2")
+
+        [c] = repo.make_commits(m, Commit("c", tree={"3": "3"}), ref="heads/pr3")
+        pr3 = repo.make_pr(title="whatever", target="master", head="pr3")
+
+        [c] = repo.make_commits(m, Commit("c", tree={"4": "4"}), ref="heads/pr4")
+        pr4 = repo.make_pr(title="whatever", target="master", head="pr4")
+
+    prs = [pr1, pr2, pr3, pr4]
+    pr_ids = functools.reduce(
+        lambda a, b: a | b,
+        map(to_pr, repeat(env), prs)
+    )
+    # ready the PRs for the initial staging (to split)
+    pre_cutoff = pr_ids[:cutoff]
+    with repo:
+        for pr, pr_id in zip(prs[:cutoff], pre_cutoff):
+            pr.post_comment('hansen r+', config['role_reviewer']['token'])
+            repo.post_status(pr_id.head, 'success')
+    env.run_crons()
+    # check they staged as expected
+    assert all(p.staging_id for p in pre_cutoff)
+    staging = ensure_one(env['runbot_merge.stagings'].search([]))
+    ensure_one(pre_cutoff.staging_id)
+
+    # ready the rest
+    with repo:
+        for pr, pr_id in zip(prs[cutoff:], pr_ids[cutoff:]):
+            pr.post_comment('hansen r+', config['role_reviewer']['token'])
+            repo.post_status(pr_id.head, 'success')
+    env.run_crons('runbot_merge.process_updated_commits')
+    assert not pr_ids.filtered(lambda p: p.blocked)
+
+    # trigger a split
+    with repo:
+        repo.post_status('staging.master', 'failure')
+    env.run_crons('runbot_merge.process_updated_commits', 'runbot_merge.merge_cron')
+    assert not staging.active
+    assert not env['runbot_merge.stagings'].search([]).active
+    assert env['runbot_merge.split'].search_count([]) == 2
+
+    env.run_crons()
+
+    # check that st.pr_ids are the PRs we expect
+    st = env['runbot_merge.stagings'].search([])
+    assert st.pr_ids == select(pr_ids, second)
--- a/runbot_merge/views/runbot_merge_project.xml
+++ b/runbot_merge/views/runbot_merge_project.xml
@ -33,6 +33,7 @@
                        </group>
                        <group>
                            <field name="staging_enabled" widget="boolean_toggle"/>
+                            <field name="staging_priority"/>
                            <field name="uniquifier"/>
                            <field name="ci_timeout"/>
                            <field name="batch_limit"/>