[ADD] runbot_merge: support staging ready PRs over splits

Not sure it's going to be useful but it's hard to know if we can't
test it. The intent is mostly the ability to prioritize throughput (or
attempt to) during high-load events, if we can favour staging N
new batches over a split's N/2 we might be able to merge more crap.

But maybe not, we'll see, either way now it's here and seems to more
or less work.

Fixes #798
This commit is contained in:
Xavier Morel 2023-11-30 12:27:09 +01:00
parent 9f54e6f209
commit c140701975
5 changed files with 155 additions and 12 deletions

View File

@ -0,0 +1,4 @@
ADD: projects now know how to prioritise new PRs over splits
While this likely has relatively low utility, we'll look at how it performs
during periods of high throughput.

View File

@ -25,6 +25,11 @@ class Project(models.Model):
"target branches of PR this project handles."
)
staging_enabled = fields.Boolean(default=True)
staging_priority = fields.Selection([
('default', "Splits over ready PRs"),
('largest', "Largest of split and ready PRs"),
('ready', "Ready PRs over split"),
], default="default", required=True)
ci_timeout = fields.Integer(
default=60, required=True, group_operator=None,

View File

@ -61,21 +61,67 @@ def try_staging(branch: Branch) -> Optional[Stagings]:
for p, prs in ready_prs(for_branch=branch)
if not any(prs.mapped('blocked'))
]
if not rows:
return
priority = rows[0][0]
def log(label, batches):
_logger.info(label, ', '.join(
p.display_name
for batch in batches
for p in batch
))
priority = rows[0][0] if rows else None
if priority == 'alone':
batched_prs = [pr_ids for _, pr_ids in takewhile(lambda r: r[0] == priority, rows)]
elif branch.split_ids:
split_ids = branch.split_ids[0]
_logger.info("Found split of PRs %s, re-staging", split_ids.mapped('batch_ids.prs'))
batched_prs = [batch.prs for batch in split_ids.batch_ids]
split_ids.unlink()
log("staging high-priority PRs %s", batched_prs)
elif branch.project_id.staging_priority == 'default':
if branch.split_ids:
split_ids = branch.split_ids[0]
batched_prs = [batch.prs for batch in split_ids.batch_ids]
split_ids.unlink()
log("staging split PRs %s (prioritising splits)", batched_prs)
else:
# priority, normal; priority = sorted ahead of normal, so always picked
# first as long as there's room
batched_prs = [pr_ids for _, pr_ids in rows]
log("staging ready PRs %s (prioritising splits)", batched_prs)
elif branch.project_id.staging_priority == 'ready':
# splits are ready by definition, we need to exclude them from the
# ready rows otherwise we'll re-stage the splits so if an error is legit
# we cycle forever
# FIXME: once the batches are less shit, store this durably on the
# batches and filter out when fetching readies (?)
split_batches = {batch.prs for batch in branch.split_ids.batch_ids}
ready = [pr_ids for _, pr_ids in rows if pr_ids not in split_batches]
if ready:
batched_prs = ready
log("staging ready PRs %s (prioritising ready)", batched_prs)
else:
split_ids = branch.split_ids[:1]
batched_prs = [batch.prs for batch in split_ids.batch_ids]
split_ids.unlink()
log("staging split PRs %s (prioritising ready)", batched_prs)
else:
# priority, normal; priority = sorted ahead of normal, so always picked
# first as long as there's room
batched_prs = [pr_ids for _, pr_ids in rows]
assert branch.project_id.staging_priority == 'largest'
# splits are ready by definition, we need to exclude them from the
# ready rows otherwise ready always wins but we re-stage the splits, so
# if an error is legit we'll cycle forever
split_batches = {batch.prs for batch in branch.split_ids.batch_ids}
ready = [pr_ids for _, pr_ids in rows if pr_ids not in split_batches]
maxsplit = max(branch.split_ids, key=lambda s: len(s.batch_ids), default=branch.env['runbot_merge.split'])
_logger.info("largest split = %d, ready = %d", len(maxsplit.batch_ids), len(ready))
# bias towards splits if len(ready) = len(batch_ids)
if len(maxsplit.batch_ids) >= len(ready):
batched_prs = [batch.prs for batch in maxsplit.batch_ids]
maxsplit.unlink()
log("staging split PRs %s (prioritising largest)", batched_prs)
else:
batched_prs = ready
log("staging ready PRs %s (prioritising largest)", batched_prs)
if not batched_prs:
return
original_heads, staging_state = staging_setup(branch, batched_prs)

View File

@ -1,6 +1,10 @@
import functools
from itertools import repeat
import pytest
from utils import Commit, to_pr
from utils import Commit, to_pr, ensure_one
@pytest.fixture
def repo(env, project, make_repo, users, setreviewers):
@ -40,3 +44,86 @@ def test_disable_staging(env, project, repo, config):
assert staging_1.state == "cancelled"
assert not pr_id.staging_id.active,\
"should not be re-staged, because staging has been disabled"
@pytest.mark.parametrize('mode,cutoff,second', [
# default mode, the second staging is the first half of the first staging
('default', 2, [0]),
# splits are right-biased (the midpoint is rounded down), so for odd
# staging sizes the first split is the smaller one
('default', 3, [0]),
# if the split results in ((1, 2), 1), largest stages the second
('largest', 3, [1, 2]),
# if the split results in ((1, 1), 2), largest stages the ready PRs
('largest', 2, [2, 3]),
# even if it's a small minority, ready selects the ready PR(s)
('ready', 3, [3]),
('ready', 2, [2, 3]),
])
def test_staging_priority(env, project, repo, config, mode, cutoff, second):
"""By default, unless a PR is prioritised as "alone" splits take priority
over new stagings.
*However* to try and maximise throughput in trying times, it's possible to
configure the project to prioritise either the largest staging (between spit
and ready batches), or to just prioritise new stagings.
"""
def select(prs, indices):
zero = env['runbot_merge.pull_requests']
filtered = (p for i, p in enumerate(prs) if i in indices)
return functools.reduce(lambda a, b: a | b, filtered, zero)
project.staging_priority = mode
# we need at least 3 PRs, two that we can split out, and one leftover
with repo:
[m] = repo.make_commits(None, Commit("m", tree={"ble": "1"}), ref="heads/master")
[c] = repo.make_commits(m, Commit("c", tree={"1": "1"}), ref="heads/pr1")
pr1 = repo.make_pr(title="whatever", target="master", head="pr1")
[c] = repo.make_commits(m, Commit("c", tree={"2": "2"}), ref="heads/pr2")
pr2 = repo.make_pr(title="whatever", target="master", head="pr2")
[c] = repo.make_commits(m, Commit("c", tree={"3": "3"}), ref="heads/pr3")
pr3 = repo.make_pr(title="whatever", target="master", head="pr3")
[c] = repo.make_commits(m, Commit("c", tree={"4": "4"}), ref="heads/pr4")
pr4 = repo.make_pr(title="whatever", target="master", head="pr4")
prs = [pr1, pr2, pr3, pr4]
pr_ids = functools.reduce(
lambda a, b: a | b,
map(to_pr, repeat(env), prs)
)
# ready the PRs for the initial staging (to split)
pre_cutoff = pr_ids[:cutoff]
with repo:
for pr, pr_id in zip(prs[:cutoff], pre_cutoff):
pr.post_comment('hansen r+', config['role_reviewer']['token'])
repo.post_status(pr_id.head, 'success')
env.run_crons()
# check they staged as expected
assert all(p.staging_id for p in pre_cutoff)
staging = ensure_one(env['runbot_merge.stagings'].search([]))
ensure_one(pre_cutoff.staging_id)
# ready the rest
with repo:
for pr, pr_id in zip(prs[cutoff:], pr_ids[cutoff:]):
pr.post_comment('hansen r+', config['role_reviewer']['token'])
repo.post_status(pr_id.head, 'success')
env.run_crons('runbot_merge.process_updated_commits')
assert not pr_ids.filtered(lambda p: p.blocked)
# trigger a split
with repo:
repo.post_status('staging.master', 'failure')
env.run_crons('runbot_merge.process_updated_commits', 'runbot_merge.merge_cron')
assert not staging.active
assert not env['runbot_merge.stagings'].search([]).active
assert env['runbot_merge.split'].search_count([]) == 2
env.run_crons()
# check that st.pr_ids are the PRs we expect
st = env['runbot_merge.stagings'].search([])
assert st.pr_ids == select(pr_ids, second)

View File

@ -33,6 +33,7 @@
</group>
<group>
<field name="staging_enabled" widget="boolean_toggle"/>
<field name="staging_priority"/>
<field name="uniquifier"/>
<field name="ci_timeout"/>
<field name="batch_limit"/>