From ef6a002ea7b387c75f00759858ea16e61287030e Mon Sep 17 00:00:00 2001
From: Xavier Morel <xmo@odoo.com>
Date: Mon, 5 Feb 2024 16:10:15 +0100
Subject: [PATCH] [CHG] runbot_merge: move staging readiness to batch

Staging readiness is a batch-level concerns, and many of the markers
are already there though a few need to be aggregated from the PRs. As
such, staging has no reason to be performed in terms of PRs anymore,
it should be performed via batches directly.

There is a bit of a mess in order not to completely fuck up when
retargeting PRs (implicitly via freeze wizard, or explicitely) as for
now we're moving PRs between batches in order to keep the
batches *mostly* target-bound.

Some of the side-effects in managing the coherence of the targeting
and moving PRs between batches is... not great. This might need to be
revisited and cleaned up with those scenarios better considered.
---
 runbot_merge/controllers/__init__.py          |   2 +-
 runbot_merge/models/batch.py                  |  28 ++++-
 .../models/project_freeze/__init__.py         |  13 +-
 runbot_merge/models/pull_requests.py          |  53 ++++----
 runbot_merge/models/stagings_create.py        | 114 ++++++++----------
 runbot_merge/tests/test_basic.py              |   1 +
 6 files changed, 118 insertions(+), 93 deletions(-)

diff --git a/runbot_merge/controllers/__init__.py b/runbot_merge/controllers/__init__.py
index 006f441e..5485f1fa 100644
--- a/runbot_merge/controllers/__init__.py
+++ b/runbot_merge/controllers/__init__.py
@@ -176,7 +176,7 @@ def handle_pr(env, event):
         return env['runbot_merge.pull_requests'].search([
             ('repository', '=', repo.id),
             ('number', '=', pr['number']),
-            ('target', '=', target.id),
+            # ('target', '=', target.id),
         ])
     # edition difficulty: pr['base']['ref] is the *new* target, the old one
     # is at event['change']['base']['ref'] (if the target changed), so edition
diff --git a/runbot_merge/models/batch.py b/runbot_merge/models/batch.py
index 8e9a3674..bb49c857 100644
--- a/runbot_merge/models/batch.py
+++ b/runbot_merge/models/batch.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from odoo import models, fields
+from odoo import models, fields, api
 
 
 class Batch(models.Model):
@@ -36,3 +36,29 @@ class Batch(models.Model):
         default=False, tracking=True,
         help="Cancels current staging on target branch when becoming ready"
     )
+
+    blocked = fields.Char(store=True, compute="_compute_stageable")
+
+    @api.depends(
+        "merge_date",
+        "prs.error", "prs.draft", "prs.squash", "prs.merge_method",
+        "skipchecks", "prs.status", "prs.reviewed_by",
+    )
+    def _compute_stageable(self):
+        for batch in self:
+            if batch.merge_date:
+                batch.blocked = "Merged."
+            elif blocking := batch.prs.filtered(
+                lambda p: p.error or p.draft or not (p.squash or p.merge_method)
+            ):
+                batch.blocked = "Pull request(s) %s blocked." % (
+                    p.display_name for p in blocking
+                )
+            elif not batch.skipchecks and (unready := batch.prs.filtered(
+                lambda p: not (p.reviewed_by and p.status == "success")
+            )):
+                batch.blocked = "Pull request(s) %s waiting for CI." % ', '.join(
+                    p.display_name for p in unready
+                )
+            else:
+                batch.blocked = False
diff --git a/runbot_merge/models/project_freeze/__init__.py b/runbot_merge/models/project_freeze/__init__.py
index 7215d762..4114c944 100644
--- a/runbot_merge/models/project_freeze/__init__.py
+++ b/runbot_merge/models/project_freeze/__init__.py
@@ -218,7 +218,8 @@ class FreezeWizard(models.Model):
         }
         for repo, copy in repos.items():
             copy.fetch(git.source_url(repo, 'github'), '+refs/heads/*:refs/heads/*')
-        for pr in self.release_pr_ids.pr_id | self.bump_pr_ids.pr_id:
+        all_prs = self.release_pr_ids.pr_id | self.bump_pr_ids.pr_id
+        for pr in all_prs:
             repos[pr.repository].fetch(
                 git.source_url(pr.repository, 'github'),
                 pr.head,
@@ -338,7 +339,15 @@ class FreezeWizard(models.Model):
                 f"Unable to {reason} branch {repo}:{branch}.{addendum}"
             )
 
-        all_prs = self.release_pr_ids.pr_id | self.bump_pr_ids.pr_id
+        b = self.env['runbot_merge.branch'].search([('name', '=', self.branch_name)])
+        self.env.cr.execute(
+            "UPDATE runbot_merge_batch SET target=%s WHERE id = %s;"
+            "UPDATE runbot_merge_pull_requests SET target=%s WHERE id = any(%s)",
+            [
+                b.id, self.release_pr_ids.pr_id.batch_id.id,
+                b.id, self.release_pr_ids.pr_id.ids,
+            ]
+        )
         all_prs.batch_id.merge_date = fields.Datetime.now()
         all_prs.reviewed_by = self.env.user.partner_id.id
         self.env['runbot_merge.pull_requests.feedback'].create([{
diff --git a/runbot_merge/models/pull_requests.py b/runbot_merge/models/pull_requests.py
index 74f7dbe7..931ade1c 100644
--- a/runbot_merge/models/pull_requests.py
+++ b/runbot_merge/models/pull_requests.py
@@ -1094,15 +1094,12 @@ class PullRequests(models.Model):
         if canceler:
             fields.append('state')
             fields.append('skipchecks')
-        if 'target' in vals:
-            fields.append('target')
-        if 'message' in vals:
-            fields.append('message')
         prev = {pr.id: {field: pr[field] for field in fields} for pr in self}
 
+        # when explicitly marking a PR as ready
         if vals.get('state') == 'ready':
             # skip checks anyway
-            vals['skipchecks'] = True
+            self.batch_id.skipchecks = True
             # if the state is forced to ready, set current user as reviewer
             # and override all statuses
             vals.setdefault('reviewed_by', self.env.user.partner_id.id)
@@ -1117,6 +1114,20 @@ class PullRequests(models.Model):
                     ('prs', '=', True),
                 ])
             }))
+
+        for pr in self:
+            if (t := vals.get('target')) is not None and pr.target.id != t:
+                pr.unstage(
+                    "target (base) branch was changed from %r to %r",
+                    pr.target.display_name,
+                    self.env['runbot_merge.branch'].browse(t).display_name,
+                )
+
+            if 'message' in vals:
+                merge_method = vals['merge_method'] if 'merge_method' in vals else pr.merge_method
+                if merge_method not in (False, 'rebase-ff') and pr.message != vals['message']:
+                    pr.unstage("merge message updated")
+
         match vals.get('closed'):
             case True if not self.closed:
                 vals['reviewed_by'] = False
@@ -1128,6 +1139,13 @@ class PullRequests(models.Model):
                     target=vals.get('target') or self.target.id,
                     label=vals.get('label') or self.label,
                 )
+            case None if (new_target := vals.get('target')) and new_target != self.target.id:
+                vals['batch_id'] = self._get_batch(
+                    target=new_target,
+                    label=vals.get('label') or self.label,
+                )
+                if len(self.batch_id.prs) == 1:
+                    self.env.cr.precommit.add(self.batch_id.unlink)
         w = super().write(vals)
 
         newhead = vals.get('head')
@@ -1135,29 +1153,20 @@ class PullRequests(models.Model):
             c = self.env['runbot_merge.commit'].search([('sha', '=', newhead)])
             self._validate(c.statuses or '{}')
 
-        for pr in self:
-            old = prev[pr.id]
-            if canceler:
+        if canceler:
+            for pr in self:
+                if not pr.cancel_staging:
+                    continue
+
+                old = prev[pr.id]
                 def ready(pr):
                     return pr['state'] == 'ready'\
                        or (pr['skipchecks'] and pr['state'] != 'error')
-                if pr.cancel_staging and not ready(old) and ready(pr):
-                    if old['state'] == 'error': # error -> ready gets a bespok message
+                if ready(pr) and not ready(old):
+                    if old['state'] == 'error': # error -> ready gets a bespoke message
                         pr.target.active_staging_id.cancel(f"retrying {pr.display_name}")
                     else:
                         pr.target.active_staging_id.cancel(f"{pr.display_name} became ready")
-
-            if 'target' in vals:
-                old_target = old['target']
-                if pr.target != old_target:
-                    pr.unstage(
-                        "target (base) branch was changed from %r to %r",
-                        old_target.display_name, pr.target.display_name,
-                    )
-
-            if 'message' in vals:
-                if pr.merge_method not in (False, 'rebase-ff') and pr.message != old['message']:
-                    pr.unstage("merge message updated")
         return w
 
     def _check_linked_prs_statuses(self, commit=False):
diff --git a/runbot_merge/models/stagings_create.py b/runbot_merge/models/stagings_create.py
index f7f6f971..fc893937 100644
--- a/runbot_merge/models/stagings_create.py
+++ b/runbot_merge/models/stagings_create.py
@@ -6,6 +6,7 @@ import json
 import logging
 import os
 import re
+from collections.abc import Mapping
 from difflib import Differ
 from itertools import takewhile
 from operator import itemgetter
@@ -14,7 +15,7 @@ from typing import Dict, Union, Optional, Literal, Callable, Iterator, Tuple, Li
 from werkzeug.datastructures import Headers
 
 from odoo import api, models, fields
-from odoo.tools import OrderedSet
+from odoo.tools import OrderedSet, groupby
 from .pull_requests import Branch, Stagings, PullRequests, Repository
 from .batch import Batch
 from .. import exceptions, utils, github, git
@@ -57,76 +58,63 @@ def try_staging(branch: Branch) -> Optional[Stagings]:
     if branch.active_staging_id:
         return None
 
-    rows = [
-        (p, prs)
-        for p, prs in ready_prs(for_branch=branch)
-        if not any(prs.mapped('blocked'))
-    ]
-
-    def log(label, batches):
-        _logger.info(label, ', '.join(
-            p.display_name
-            for batch in batches
-            for p in batch
-        ))
+    def log(label: str, batches: Batch) -> None:
+        _logger.info(label, ', '.join(batches.mapped('prs.display_name')))
 
+    rows = ready_prs(for_branch=branch)
     priority = rows[0][0] if rows else None
+    concat = branch.env['runbot_merge.batch'].concat
     if priority == 'alone':
-        batched_prs = [pr_ids for _, pr_ids in takewhile(lambda r: r[0] == priority, rows)]
-        log("staging high-priority PRs %s", batched_prs)
+        batches: Batch = concat(*(batch for _, batch in takewhile(lambda r: r[0] == priority, rows)))
+        log("staging high-priority PRs %s", batches)
     elif branch.project_id.staging_priority == 'default':
-        if branch.split_ids:
-            split_ids = branch.split_ids[0]
-            batched_prs = [batch.prs for batch in split_ids.batch_ids]
-            split_ids.unlink()
-            log("staging split PRs %s (prioritising splits)", batched_prs)
+        if split := branch.split_ids[:1]:
+            batches = split.batch_ids
+            split.unlink()
+            log("staging split PRs %s (prioritising splits)", batches)
         else:
             # priority, normal; priority = sorted ahead of normal, so always picked
             # first as long as there's room
-            batched_prs = [pr_ids for _, pr_ids in rows]
-            log("staging ready PRs %s (prioritising splits)", batched_prs)
+            batches = concat(*(batch for _, batch in rows))
+            log("staging ready PRs %s (prioritising splits)", batches)
     elif branch.project_id.staging_priority == 'ready':
         # splits are ready by definition, we need to exclude them from the
         # ready rows otherwise we'll re-stage the splits so if an error is legit
         # we cycle forever
         # FIXME: once the batches are less shit, store this durably on the
         #        batches and filter out when fetching readies (?)
-        split_batches = {batch.prs for batch in branch.split_ids.batch_ids}
-        ready = [pr_ids for _, pr_ids in rows if pr_ids not in split_batches]
+        batches = concat(*(batch for _, batch in rows)) - branch.split_ids.batch_ids
 
-        if ready:
-            batched_prs = ready
-            log("staging ready PRs %s (prioritising ready)", batched_prs)
+        if batches:
+            log("staging ready PRs %s (prioritising ready)", batches)
         else:
-            split_ids = branch.split_ids[:1]
-            batched_prs = [batch.prs for batch in split_ids.batch_ids]
-            split_ids.unlink()
-            log("staging split PRs %s (prioritising ready)", batched_prs)
+            split = branch.split_ids[:1]
+            batches = split.batch_ids
+            split.unlink()
+            log("staging split PRs %s (prioritising ready)", batches)
     else:
         assert branch.project_id.staging_priority == 'largest'
         # splits are ready by definition, we need to exclude them from the
         # ready rows otherwise ready always wins but we re-stage the splits, so
         # if an error is legit we'll cycle forever
-        split_batches = {batch.prs for batch in branch.split_ids.batch_ids}
-        ready = [pr_ids for _, pr_ids in rows if pr_ids not in split_batches]
+        batches = concat(*(batch for _, batch in rows)) - branch.split_ids.batch_ids
 
         maxsplit = max(branch.split_ids, key=lambda s: len(s.batch_ids), default=branch.env['runbot_merge.split'])
-        _logger.info("largest split = %d, ready = %d", len(maxsplit.batch_ids), len(ready))
+        _logger.info("largest split = %d, ready = %d", len(maxsplit.batch_ids), len(batches))
         # bias towards splits if len(ready) = len(batch_ids)
-        if len(maxsplit.batch_ids) >= len(ready):
-            batched_prs = [batch.prs for batch in maxsplit.batch_ids]
+        if len(maxsplit.batch_ids) >= len(batches):
+            batches = maxsplit.batch_ids
             maxsplit.unlink()
-            log("staging split PRs %s (prioritising largest)", batched_prs)
+            log("staging split PRs %s (prioritising largest)", batches)
         else:
-            batched_prs = ready
-            log("staging ready PRs %s (prioritising largest)", batched_prs)
+            log("staging ready PRs %s (prioritising largest)", batches)
 
-    if not batched_prs:
+    if not batches:
         return
 
-    original_heads, staging_state = staging_setup(branch, batched_prs)
+    original_heads, staging_state = staging_setup(branch, batches)
 
-    staged = stage_batches(branch, batched_prs, staging_state)
+    staged = stage_batches(branch, batches, staging_state)
 
     if not staged:
         return None
@@ -216,32 +204,25 @@ For-Commit-Id: {it.head}
     return st
 
 
-def ready_prs(for_branch: Branch) -> List[Tuple[int, PullRequests]]:
+def ready_prs(for_branch: Branch) -> List[Tuple[int, Batch]]:
     env = for_branch.env
     env.cr.execute("""
     SELECT
       max(pr.priority) as priority,
-      array_agg(pr.id) AS match
-    FROM runbot_merge_pull_requests pr
-    JOIN runbot_merge_batch b ON (b.id = pr.batch_id)
-    WHERE pr.target = any(%s)
-      -- exclude terminal states (so there's no issue when
-      -- deleting branches & reusing labels)
-      AND pr.state != 'merged'
-      AND pr.state != 'closed'
+      b.id as batch
+    FROM runbot_merge_batch b
+    JOIN runbot_merge_pull_requests pr ON (b.id = pr.batch_id)
+    WHERE b.target = %s AND b.blocked IS NULL
     GROUP BY b.id
-    HAVING
-        bool_and(pr.state = 'ready')
-     OR (bool_or(b.skipchecks) AND bool_and(pr.state != 'error'))
-    ORDER BY max(pr.priority) DESC, min(pr.id)
-    """, [for_branch.ids])
-    browse = env['runbot_merge.pull_requests'].browse
-    return [(p, browse(ids)) for p, ids in env.cr.fetchall()]
+    ORDER BY max(pr.priority) DESC, min(b.id)
+    """, [for_branch.id])
+    browse = env['runbot_merge.batch'].browse
+    return [(p, browse(id)) for p, id in env.cr.fetchall()]
 
 
 def staging_setup(
         target: Branch,
-        batched_prs: List[PullRequests],
+        batches: Batch,
 ) -> Tuple[Dict[Repository, str], StagingState]:
     """Sets up the staging:
 
@@ -249,7 +230,9 @@ def staging_setup(
     - creates tmp branch via gh API (to remove)
     - generates working copy for each repository with the target branch
     """
-    all_prs: PullRequests = target.env['runbot_merge.pull_requests'].concat(*batched_prs)
+    by_repo: Mapping[Repository, List[PullRequests]] = \
+        dict(groupby(batches.prs, lambda p: p.repository))
+
     staging_state = {}
     original_heads = {}
     for repo in target.project_id.repo_ids.having_branch(target):
@@ -265,7 +248,7 @@ def staging_setup(
             # be hooked only to "proper" remote-tracking branches
             # (in `refs/remotes`), it doesn't seem to work here
             f'+refs/heads/{target.name}:refs/heads/{target.name}',
-            *(pr.head for pr in all_prs if pr.repository == repo)
+            *(pr.head for pr in by_repo.get(repo, []))
         )
         original_heads[repo] = head
         staging_state[repo] = StagingSlice(gh=gh, head=head, repo=source.stdout().with_config(text=True, check=False))
@@ -273,18 +256,15 @@ def staging_setup(
     return original_heads, staging_state
 
 
-def stage_batches(branch: Branch, batched_prs: List[PullRequests], staging_state: StagingState) -> Stagings:
+def stage_batches(branch: Branch, batches: Batch, staging_state: StagingState) -> Stagings:
     batch_limit = branch.project_id.batch_limit
     env = branch.env
     staged = env['runbot_merge.batch']
-    for batch in batched_prs:
+    for batch in batches:
         if len(staged) >= batch_limit:
             break
-
-        assert len(bs := {p.batch_id for p in batch}) == 1,\
-            f"expected all PRs to have the same batch, got {bs}"
         try:
-            staged |= stage_batch(env, batch[0].batch_id, staging_state)
+            staged |= stage_batch(env, batch, staging_state)
         except exceptions.MergeError as e:
             pr = e.args[0]
             _logger.info("Failed to stage %s into %s", pr.display_name, branch.name, exc_info=True)
diff --git a/runbot_merge/tests/test_basic.py b/runbot_merge/tests/test_basic.py
index 2d0bc440..4ca95c60 100644
--- a/runbot_merge/tests/test_basic.py
+++ b/runbot_merge/tests/test_basic.py
@@ -773,6 +773,7 @@ class TestPREdition:
         with repo: prx.base = '1.0'
         assert pr.target == branch_1
         assert not pr.staging_id, "updated the base of a staged PR should have unstaged it"
+        assert st.state == 'cancelled', f"expected cancellation, got {st.state}"
         assert st.reason == f"{pr.display_name} target (base) branch was changed from 'master' to '1.0'"
 
         with repo: prx.base = '2.0'