From 86a1b5523e91f1c2a1e08d781f73b8906fefaa38 Mon Sep 17 00:00:00 2001 From: Xavier Morel Date: Wed, 16 Aug 2023 15:31:42 +0200 Subject: [PATCH] [MOV] runbot_merge: all the staging creation code to a separate module Move *almost* all the staging code to free functions, in a separate module, and extensively typed. The only bits which didn't move are: - the entry point (the cron hook), because it has to be a model method in order to be called - the `_build_merge_message` method, because it needs to be overridable There's also a bit of an import mess, because the cron & `_build_merge_message` need to call into the new module, but the new module wants the types they belong to, so it's a bit circular. --- forwardport/models/project.py | 3 +- runbot_merge/github.py | 45 +- runbot_merge/models/__init__.py | 1 + runbot_merge/models/project.py | 7 +- runbot_merge/models/pull_requests.py | 676 +---------------------- runbot_merge/models/stagings_create.py | 727 +++++++++++++++++++++++++ runbot_merge/tests/test_multirepo.py | 8 - 7 files changed, 791 insertions(+), 676 deletions(-) create mode 100644 runbot_merge/models/stagings_create.py diff --git a/forwardport/models/project.py b/forwardport/models/project.py index 573d0086..b71a08f5 100644 --- a/forwardport/models/project.py +++ b/forwardport/models/project.py @@ -37,6 +37,7 @@ from odoo.tools.sql import reverse_order from odoo.tools.appdirs import user_cache_dir from odoo.addons.runbot_merge import git, utils from odoo.addons.runbot_merge.models.pull_requests import RPLUS +from odoo.addons.runbot_merge.models.stagings_create import Message footer = '\nMore info at https://github.com/odoo/odoo/wiki/Mergebot#forward-port\n' @@ -1064,7 +1065,7 @@ stderr: def _make_fp_message(self, commit): cmap = json.loads(self.commits_map) - msg = self._parse_commit_message(commit['commit']['message']) + msg = Message.from_message(commit['commit']['message']) # write the *merged* commit as "original", not the PR's msg.headers['x-original-commit'] = cmap.get(commit['sha'], commit['sha']) # don't stringify so caller can still perform alterations diff --git a/runbot_merge/github.py b/runbot_merge/github.py index ed3950b0..3fdec209 100644 --- a/runbot_merge/github.py +++ b/runbot_merge/github.py @@ -8,6 +8,7 @@ import pathlib import pprint import time import unicodedata +from typing import Iterable, List, TypedDict, Literal import requests import werkzeug.urls @@ -47,6 +48,42 @@ def _init_gh_logger(): if odoo.netsvc._logger_init: _init_gh_logger() +SimpleUser = TypedDict('SimpleUser', { + 'login': str, + 'url': str, + 'type': Literal['User', 'Organization'], +}) +Authorship = TypedDict('Authorship', { + 'name': str, + 'email': str, +}) +Commit = TypedDict('Commit', { + 'tree': str, + 'url': str, + 'message': str, + # optional when creating a commit + 'author': Authorship, + 'committer': Authorship, + 'comments_count': int, +}) +CommitLink = TypedDict('CommitLink', { + 'html_url': str, + 'sha': str, + 'url': str, +}) +PrCommit = TypedDict('PrCommit', { + 'url': str, + 'sha': str, + 'commit': Commit, + # optional when creating a commit (in which case it uses the current user) + 'author': SimpleUser, + 'committer': SimpleUser, + 'parents': List[CommitLink], + # not actually true but we're smuggling stuff via that key + 'new_tree': str, +}) + + GH_LOG_PATTERN = """=> {method} {path}{qs}{body} <= {r.status_code} {r.reason} @@ -137,7 +174,7 @@ class GH(object): r.raise_for_status() return r.json() - def head(self, branch): + def head(self, branch: str) -> str: d = utils.backoff( lambda: self('get', 'git/refs/heads/{}'.format(branch)).json(), exc=requests.HTTPError @@ -383,14 +420,14 @@ class GH(object): if not r.links.get('next'): return - def commits_lazy(self, pr): + def commits_lazy(self, pr: int) -> Iterable[PrCommit]: for page in itertools.count(1): - r = self('get', 'pulls/{}/commits'.format(pr), params={'page': page}) + r = self('get', f'pulls/{pr}/commits', params={'page': page}) yield from r.json() if not r.links.get('next'): return - def commits(self, pr): + def commits(self, pr: int) -> List[PrCommit]: """ Returns a PR's commits oldest first (that's what GH does & is what we want) """ diff --git a/runbot_merge/models/__init__.py b/runbot_merge/models/__init__.py index 9ca405e4..6cbd92cf 100644 --- a/runbot_merge/models/__init__.py +++ b/runbot_merge/models/__init__.py @@ -3,5 +3,6 @@ from . import res_partner from . import project from . import pull_requests from . import project_freeze +from . import stagings_create from . import staging_cancel from . import crons diff --git a/runbot_merge/models/project.py b/runbot_merge/models/project.py index 8578aeb5..67222ea8 100644 --- a/runbot_merge/models/project.py +++ b/runbot_merge/models/project.py @@ -32,8 +32,9 @@ class Project(models.Model): required=True, default="hanson", # mergebot du bot du bot du~ help="Prefix (~bot name) used when sending commands from PR " - "comments e.g. [hanson retry] or [hanson r+ p=1]" + "comments e.g. [hanson retry] or [hanson r+ p=1]", ) + github_name = fields.Char(related='github_prefix') batch_limit = fields.Integer( default=8, group_operator=None, help="Maximum number of PRs staged together") @@ -64,6 +65,8 @@ class Project(models.Model): self.env.cr.commit() def _create_stagings(self, commit=False): + from .stagings_create import try_staging + # look up branches which can be staged on and have no active staging for branch in self.env['runbot_merge.branch'].search([ ('active_staging_id', '=', False), @@ -74,7 +77,7 @@ class Project(models.Model): with self.env.cr.savepoint(), \ sentry_sdk.start_span(description=f'create staging {branch.name}') as span: span.set_tag('branch', branch.name) - branch.try_staging() + try_staging(branch) except Exception: _logger.exception("Failed to create staging for branch %r", branch.name) else: diff --git a/runbot_merge/models/pull_requests.py b/runbot_merge/models/pull_requests.py index 0005704b..a392b880 100644 --- a/runbot_merge/models/pull_requests.py +++ b/runbot_merge/models/pull_requests.py @@ -1,37 +1,24 @@ -# coding: utf-8 - import ast -import base64 import collections import contextlib import datetime -import io import itertools import json import logging -import os import pprint import re import time +from typing import Optional, Union -from difflib import Differ -from itertools import takewhile -from typing import Optional - -import requests import sentry_sdk import werkzeug -from werkzeug.datastructures import Headers from odoo import api, fields, models, tools from odoo.exceptions import ValidationError from odoo.osv import expression -from odoo.tools import OrderedSet from .. import github, exceptions, controllers, utils -WAIT_FOR_VISIBILITY = [10, 10, 10, 10] - _logger = logging.getLogger(__name__) @@ -67,6 +54,8 @@ class Repository(models.Model): _name = _description = 'runbot_merge.repository' _order = 'sequence, id' + id: int + sequence = fields.Integer(default=50, group_operator=None) name = fields.Char(required=True) project_id = fields.Many2one('runbot_merge.project', required=True, index=True) @@ -98,7 +87,7 @@ All substitutions are tentatively applied sequentially to the input. vals['status_ids'] = [(5, 0, {})] + [(0, 0, {'context': c}) for c in st.split(',')] return super().write(vals) - def github(self, token_field='github_token'): + def github(self, token_field='github_token') -> github.GH: return github.GH(self.project_id[token_field], self.name) def _auto_init(self): @@ -245,6 +234,8 @@ class Branch(models.Model): _name = _description = 'runbot_merge.branch' _order = 'sequence, name' + id: int + name = fields.Char(required=True) project_id = fields.Many2one('runbot_merge.project', required=True, index=True) @@ -298,235 +289,6 @@ class Branch(models.Model): for b in self: b.active_staging_id = b.with_context(active_test=True).staging_ids - def _ready(self): - self.env.cr.execute(""" - SELECT - min(pr.priority) as priority, - array_agg(pr.id) AS match - FROM runbot_merge_pull_requests pr - WHERE pr.target = any(%s) - -- exclude terminal states (so there's no issue when - -- deleting branches & reusing labels) - AND pr.state != 'merged' - AND pr.state != 'closed' - GROUP BY - pr.target, - CASE - WHEN pr.label SIMILAR TO '%%:patch-[[:digit:]]+' - THEN pr.id::text - ELSE pr.label - END - HAVING - bool_or(pr.state = 'ready') or bool_or(pr.priority = 0) - ORDER BY min(pr.priority), min(pr.id) - """, [self.ids]) - browse = self.env['runbot_merge.pull_requests'].browse - return [(p, browse(ids)) for p, ids in self.env.cr.fetchall()] - - def _stageable(self): - return [ - (p, prs) - for p, prs in self._ready() - if not any(prs.mapped('blocked')) - ] - - def try_staging(self): - """ Tries to create a staging if the current branch does not already - have one. Returns None if the branch already has a staging or there - is nothing to stage, the newly created staging otherwise. - """ - logger = _logger.getChild('cron') - - logger.info( - "Checking %s (%s) for staging: %s, skip? %s", - self, self.name, - self.active_staging_id, - bool(self.active_staging_id) - ) - if self.active_staging_id: - return - - rows = self._stageable() - priority = rows[0][0] if rows else -1 - if priority == 0 or priority == 1: - # p=0 take precedence over all else - # p=1 allows merging a fix inside / ahead of a split (e.g. branch - # is broken or widespread false positive) without having to cancel - # the existing staging - batched_prs = [pr_ids for _, pr_ids in takewhile(lambda r: r[0] == priority, rows)] - elif self.split_ids: - split_ids = self.split_ids[0] - logger.info("Found split of PRs %s, re-staging", split_ids.mapped('batch_ids.prs')) - batched_prs = [batch.prs for batch in split_ids.batch_ids] - split_ids.unlink() - else: # p=2 - batched_prs = [pr_ids for _, pr_ids in takewhile(lambda r: r[0] == priority, rows)] - - if not batched_prs: - return - - Batch = self.env['runbot_merge.batch'] - staged = Batch - original_heads = {} - meta = {repo: {} for repo in self.project_id.repo_ids.having_branch(self)} - for repo, it in meta.items(): - gh = it['gh'] = repo.github() - it['head'] = original_heads[repo] = gh.head(self.name) - # create tmp staging branch - gh.set_ref('tmp.{}'.format(self.name), it['head']) - - batch_limit = self.project_id.batch_limit - first = True - for batch in batched_prs: - if len(staged) >= batch_limit: - break - try: - staged |= Batch.stage(meta, batch) - except exceptions.MergeError as e: - pr = e.args[0] - _logger.exception("Failed to merge %s into staging branch", pr.display_name) - if first or isinstance(e, exceptions.Unmergeable): - if len(e.args) > 1 and e.args[1]: - reason = e.args[1] - else: - reason = e.__cause__ or e.__context__ - # if the reason is a json document, assume it's a github - # error and try to extract the error message to give it to - # the user - with contextlib.suppress(Exception): - reason = json.loads(str(reason))['message'].lower() - - pr.state = 'error' - self.env.ref('runbot_merge.pr.merge.failed')._send( - repository=pr.repository, - pull_request=pr.number, - format_args= {'pr': pr, 'reason': reason, 'exc': e}, - ) - else: - first = False - - if not staged: - return - - heads = [] - heads_map = {} - commits = [] - for repo, it in meta.items(): - tree = it['gh'].commit(it['head'])['tree'] - # ensures staging branches are unique and always - # rebuilt - r = base64.b64encode(os.urandom(12)).decode('ascii') - trailer = '' - if heads_map: - trailer = '\n'.join( - 'Runbot-dependency: %s:%s' % (repo, h) - for repo, h in heads_map.items() - ) - dummy_head = {'sha': it['head']} - if it['head'] == original_heads[repo]: - # if the repo has not been updated by the staging, create a - # dummy commit to force rebuild - dummy_head = it['gh']('post', 'git/commits', json={ - 'message': '''force rebuild - -uniquifier: %s -For-Commit-Id: %s -%s''' % (r, it['head'], trailer), - 'tree': tree['sha'], - 'parents': [it['head']], - }).json() - - # special case if the two commits are identical because otherwise - # postgres raises error "ensure that no rows proposed for insertion - # within the same command have duplicate constained values" - if it['head'] == dummy_head['sha']: - self.env.cr.execute( - "INSERT INTO runbot_merge_commit (sha, to_check, statuses) " - "VALUES (%s, true, '{}') " - "ON CONFLICT (sha) DO UPDATE SET to_check=true " - "RETURNING id", - [it['head']] - ) - [commit] = [head] = self.env.cr.fetchone() - else: - self.env.cr.execute( - "INSERT INTO runbot_merge_commit (sha, to_check, statuses) " - "VALUES (%s, false, '{}'), (%s, true, '{}') " - "ON CONFLICT (sha) DO UPDATE SET to_check=true " - "RETURNING id", - [it['head'], dummy_head['sha']] - ) - ([commit], [head]) = self.env.cr.fetchall() - - heads_map[repo.name] = dummy_head['sha'] - heads.append(fields.Command.create({ - 'repository_id': repo.id, - 'commit_id': head, - })) - commits.append(fields.Command.create({ - 'repository_id': repo.id, - 'commit_id': commit, - })) - - # create actual staging object - st = self.env['runbot_merge.stagings'].create({ - 'target': self.id, - 'batch_ids': [(4, batch.id, 0) for batch in staged], - 'heads': heads, - 'commits': commits, - }) - # create staging branch from tmp - token = self.project_id.github_token - for r in self.project_id.repo_ids.having_branch(self): - it = meta[r] - staging_head = heads_map[r.name] - _logger.info( - "%s: create staging for %s:%s at %s", - self.project_id.name, r.name, self.name, - staging_head - ) - refname = 'staging.{}'.format(self.name) - it['gh'].set_ref(refname, staging_head) - - i = itertools.count() - @utils.backoff(delays=WAIT_FOR_VISIBILITY, exc=TimeoutError) - def wait_for_visibility(): - if self._check_visibility(r, refname, staging_head, token): - _logger.info( - "[repo] updated %s:%s to %s: ok (at %d/%d)", - r.name, refname, staging_head, - next(i), len(WAIT_FOR_VISIBILITY) - ) - return - _logger.warning( - "[repo] updated %s:%s to %s: failed (at %d/%d)", - r.name, refname, staging_head, - next(i), len(WAIT_FOR_VISIBILITY) - ) - raise TimeoutError("Staged head not updated after %d seconds" % sum(WAIT_FOR_VISIBILITY)) - - logger.info("Created staging %s (%s) to %s", st, ', '.join( - '%s[%s]' % (batch, batch.prs) - for batch in staged - ), st.target.name) - return st - - def _check_visibility(self, repo, branch_name, expected_head, token): - """ Checks the repository actual to see if the new / expected head is - now visible - """ - # v1 protocol provides URL for ref discovery: https://github.com/git/git/blob/6e0cc6776106079ed4efa0cc9abace4107657abf/Documentation/technical/http-protocol.txt#L187 - # for more complete client this is also the capabilities discovery and - # the "entry point" for the service - url = 'https://github.com/{}.git/info/refs?service=git-upload-pack'.format(repo.name) - with requests.get(url, stream=True, auth=(token, '')) as resp: - if not resp.ok: - return False - for head, ref in parse_refs_smart(resp.raw.read): - if ref != ('refs/heads/' + branch_name): - continue - return head == expected_head - return False ACL = collections.namedtuple('ACL', 'is_admin is_reviewer is_author') class PullRequests(models.Model): @@ -534,6 +296,9 @@ class PullRequests(models.Model): _order = 'number desc' _rec_name = 'number' + id: int + display_name: str + target = fields.Many2one('runbot_merge.branch', required=True, index=True) repository = fields.Many2one('runbot_merge.repository', required=True) # NB: check that target & repo have same project & provide project related? @@ -1265,35 +1030,14 @@ class PullRequests(models.Model): if commit: self.env.cr.commit() - def _parse_commit_message(self, message): - """ Parses a commit message to split out the pseudo-headers (which - should be at the end) from the body, and serialises back with a - predefined pseudo-headers ordering. - """ - return Message.from_message(message) - - def _is_mentioned(self, message, *, full_reference=False): - """Returns whether ``self`` is mentioned in ``message``` - - :param str | PullRequest message: - :param bool full_reference: whether the repository name must be present - :rtype: bool - """ - if full_reference: - pattern = fr'\b{re.escape(self.display_name)}\b' - else: - repository = self.repository.name # .replace('/', '\\/') - pattern = fr'( |\b{repository})#{self.number}\b' - return bool(re.search(pattern, message if isinstance(message, str) else message.message)) - - def _build_merge_message(self, message, related_prs=()): + def _build_merge_message(self, message: Union['PullRequests', str], related_prs=()) -> 'Message': # handle co-authored commits (https://help.github.com/articles/creating-a-commit-with-multiple-authors/) - m = self._parse_commit_message(message) - if not self._is_mentioned(message): - m.body += '\n\ncloses {pr.display_name}'.format(pr=self) + m = Message.from_message(message) + if not is_mentioned(message, self): + m.body += f'\n\ncloses {self.display_name}' for r in related_prs: - if not r._is_mentioned(message, full_reference=True): + if not is_mentioned(message, r, full_reference=True): m.headers.add('Related', r.display_name) if self.reviewed_by: @@ -1301,190 +1045,6 @@ class PullRequests(models.Model): return m - def _add_self_references(self, commits): - """Adds a footer reference to ``self`` to all ``commits`` if they don't - already refer to the PR. - """ - for c in (c['commit'] for c in commits): - if not self._is_mentioned(c['message']): - m = self._parse_commit_message(c['message']) - m.headers.pop('Part-Of', None) - m.headers.add('Part-Of', self.display_name) - c['message'] = str(m) - - def _stage(self, gh, target, related_prs=()): - # nb: pr_commits is oldest to newest so pr.head is pr_commits[-1] - _, prdict = gh.pr(self.number) - commits = prdict['commits'] - method = self.merge_method or ('rebase-ff' if commits == 1 else None) - if commits > 50 and method.startswith('rebase'): - raise exceptions.Unmergeable(self, "Rebasing 50 commits is too much.") - if commits > 250: - raise exceptions.Unmergeable( - self, "Merging PRs of 250 or more commits is not supported " - "(https://developer.github.com/v3/pulls/#list-commits-on-a-pull-request)" - ) - pr_commits = gh.commits(self.number) - for c in pr_commits: - if not (c['commit']['author']['email'] and c['commit']['committer']['email']): - raise exceptions.Unmergeable( - self, - f"All commits must have author and committer email, " - f"missing email on {c['sha']} indicates the authorship is " - f"most likely incorrect." - ) - - # sync and signal possibly missed updates - invalid = {} - diff = [] - pr_head = pr_commits[-1]['sha'] - if self.head != pr_head: - invalid['head'] = pr_head - diff.append(('Head', self.head, pr_head)) - - if self.target.name != prdict['base']['ref']: - branch = self.env['runbot_merge.branch'].with_context(active_test=False).search([ - ('name', '=', prdict['base']['ref']), - ('project_id', '=', self.repository.project_id.id), - ]) - if not branch: - self.unlink() - raise exceptions.Unmergeable(self, "While staging, found this PR had been retargeted to an un-managed branch.") - invalid['target'] = branch.id - diff.append(('Target branch', self.target.name, branch.name)) - - if self.squash != commits == 1: - invalid['squash'] = commits == 1 - diff.append(('Single commit', self.squash, commits == 1)) - - msg = utils.make_message(prdict) - if self.message != msg: - invalid['message'] = msg - diff.append(('Message', self.message, msg)) - - if invalid: - self.write({**invalid, 'state': 'opened', 'head': pr_head}) - raise exceptions.Mismatch(invalid, diff) - - if self.reviewed_by and self.reviewed_by.name == self.reviewed_by.github_login: - # XXX: find other trigger(s) to sync github name? - gh_name = gh.user(self.reviewed_by.github_login)['name'] - if gh_name: - self.reviewed_by.name = gh_name - - # NOTE: lost merge v merge/copy distinction (head being - # a merge commit reused instead of being re-merged) - return method, getattr(self, '_stage_' + method.replace('-', '_'))( - gh, target, pr_commits, related_prs=related_prs) - - def _stage_squash(self, gh, target, commits, related_prs=()): - msg = self._build_merge_message(self, related_prs=related_prs) - authorship = {} - - authors = { - (c['commit']['author']['name'], c['commit']['author']['email']) - for c in commits - } - if len(authors) == 1: - name, email = authors.pop() - authorship['author'] = {'name': name, 'email': email} - else: - msg.headers.extend(sorted( - ('Co-Authored-By', "%s <%s>" % author) - for author in authors - )) - - committers = { - (c['commit']['committer']['name'], c['commit']['committer']['email']) - for c in commits - } - if len(committers) == 1: - name, email = committers.pop() - authorship['committer'] = {'name': name, 'email': email} - # should committers also be added to co-authors? - - original_head = gh.head(target) - merge_tree = gh.merge(self.head, target, 'temp merge')['tree']['sha'] - head = gh('post', 'git/commits', json={ - **authorship, - 'message': str(msg), - 'tree': merge_tree, - 'parents': [original_head], - }).json()['sha'] - gh.set_ref(target, head) - - commits_map = {c['sha']: head for c in commits} - commits_map[''] = head - self.commits_map = json.dumps(commits_map) - - return head - - def _stage_rebase_ff(self, gh, target, commits, related_prs=()): - # updates head commit with PR number (if necessary) then rebases - # on top of target - msg = self._build_merge_message(commits[-1]['commit']['message'], related_prs=related_prs) - commits[-1]['commit']['message'] = str(msg) - self._add_self_references(commits[:-1]) - head, mapping = gh.rebase(self.number, target, commits=commits) - self.commits_map = json.dumps({**mapping, '': head}) - return head - - def _stage_rebase_merge(self, gh, target, commits, related_prs=()): - self._add_self_references(commits) - h, mapping = gh.rebase(self.number, target, reset=True, commits=commits) - msg = self._build_merge_message(self, related_prs=related_prs) - merge_head = gh.merge(h, target, str(msg))['sha'] - self.commits_map = json.dumps({**mapping, '': merge_head}) - return merge_head - - def _stage_merge(self, gh, target, commits, related_prs=()): - pr_head = commits[-1] # oldest to newest - base_commit = None - head_parents = {p['sha'] for p in pr_head['parents']} - if len(head_parents) > 1: - # look for parent(s?) of pr_head not in PR, means it's - # from target (so we merged target in pr) - merge = head_parents - {c['sha'] for c in commits} - external_parents = len(merge) - if external_parents > 1: - raise exceptions.Unmergeable( - "The PR head can only have one parent from the base branch " - "(not part of the PR itself), found %d: %s" % ( - external_parents, - ', '.join(merge) - )) - if external_parents == 1: - [base_commit] = merge - - commits_map = {c['sha']: c['sha'] for c in commits} - if base_commit: - # replicate pr_head with base_commit replaced by - # the current head - original_head = gh.head(target) - merge_tree = gh.merge(pr_head['sha'], target, 'temp merge')['tree']['sha'] - new_parents = [original_head] + list(head_parents - {base_commit}) - msg = self._build_merge_message(pr_head['commit']['message'], related_prs=related_prs) - copy = gh('post', 'git/commits', json={ - 'message': str(msg), - 'tree': merge_tree, - 'author': pr_head['commit']['author'], - 'committer': pr_head['commit']['committer'], - 'parents': new_parents, - }).json() - gh.set_ref(target, copy['sha']) - # merge commit *and old PR head* map to the pr head replica - commits_map[''] = commits_map[pr_head['sha']] = copy['sha'] - self.commits_map = json.dumps(commits_map) - return copy['sha'] - else: - # otherwise do a regular merge - msg = self._build_merge_message(self) - merge_head = gh.merge(self.head, target, str(msg))['sha'] - # and the merge commit is the normal merge head - commits_map[''] = merge_head - self.commits_map = json.dumps(commits_map) - return merge_head - def unstage(self, reason, *args): """ If the PR is staged, cancel the staging. If the PR is split and waiting, remove it from the split (possibly delete the split entirely) @@ -2241,82 +1801,6 @@ class Batch(models.Model): raise ValidationError("All prs of a batch must have different target repositories, got a duplicate %s on %s" % (pr.repository, pr)) repos |= pr.repository - def stage(self, meta, prs): - """ - Updates meta[*][head] on success - - :return: () or Batch object (if all prs successfully staged) - """ - new_heads = {} - pr_fields = self.env['runbot_merge.pull_requests']._fields - for pr in prs: - gh = meta[pr.repository]['gh'] - - _logger.info( - "Staging pr %s for target %s; method=%s", - pr.display_name, pr.target.name, - pr.merge_method or (pr.squash and 'single') or None - ) - - target = 'tmp.{}'.format(pr.target.name) - original_head = gh.head(target) - try: - try: - method, new_heads[pr] = pr._stage(gh, target, related_prs=(prs - pr)) - _logger.info( - "Staged pr %s to %s by %s: %s -> %s", - pr.display_name, pr.target.name, method, - original_head, new_heads[pr] - ) - except Exception: - # reset the head which failed, as rebase() may have partially - # updated it (despite later steps failing) - gh.set_ref(target, original_head) - # then reset every previous update - for to_revert in new_heads.keys(): - it = meta[to_revert.repository] - it['gh'].set_ref('tmp.{}'.format(to_revert.target.name), it['head']) - raise - except github.MergeError as e: - raise exceptions.MergeError(pr) from e - except exceptions.Mismatch as e: - def format_items(items): - """ Bit of a pain in the ass because difflib really wants - all lines to be newline-terminated, but not all values are - actual lines, and also needs to split multiline values. - """ - for name, value in items: - yield name + ':\n' - if not value.endswith('\n'): - value += '\n' - yield from value.splitlines(keepends=True) - yield '\n' - - old = list(format_items((n, str(v)) for n, v, _ in e.args[1])) - new = list(format_items((n, str(v)) for n, _, v in e.args[1])) - diff = ''.join(Differ().compare(old, new)) - _logger.info("data mismatch on %s:\n%s", pr.display_name, diff) - self.env.ref('runbot_merge.pr.staging.mismatch')._send( - repository=pr.repository, - pull_request=pr.number, - format_args={ - 'pr': pr, - 'mismatch': ', '.join(pr_fields[f].string for f in e.args[0]), - 'diff': diff, - 'unchecked': ', '.join(pr_fields[f].string for f in UNCHECKABLE) - } - ) - return self.env['runbot_merge.batch'] - - # update meta to new heads - for pr, head in new_heads.items(): - meta[pr.repository]['head'] = head - return self.create({ - 'target': prs[0].target.id, - 'prs': [(4, pr.id, 0) for pr in prs], - }) - -UNCHECKABLE = ['merge_method', 'overrides', 'draft'] class FetchJob(models.Model): _name = _description = 'runbot_merge.fetch_job' @@ -2372,134 +1856,4 @@ def to_status(v): return v return {'state': v, 'target_url': None, 'description': None} -refline = re.compile(rb'([\da-f]{40}) ([^\0\n]+)(\0.*)?\n?$') -ZERO_REF = b'0'*40 -def parse_refs_smart(read): - """ yields pkt-line data (bytes), or None for flush lines """ - def read_line(): - length = int(read(4), 16) - if length == 0: - return None - return read(length - 4) - - header = read_line() - assert header.rstrip() == b'# service=git-upload-pack', header - assert read_line() is None, "failed to find first flush line" - # read lines until second delimiter - for line in iter(read_line, None): - if line.startswith(ZERO_REF): - break # empty list (no refs) - m = refline.match(line) - yield m[1].decode(), m[2].decode() - -BREAK = re.compile(r''' - ^ - [ ]{0,3} # 0-3 spaces of indentation - # followed by a sequence of three or more matching -, _, or * characters, - # each followed optionally by any number of spaces or tabs - # so needs to start with a _, - or *, then have at least 2 more such - # interspersed with any number of spaces or tabs - ([*_-]) - ([ \t]*\1){2,} - [ \t]* - $ -''', flags=re.VERBOSE) -SETEX_UNDERLINE = re.compile(r''' - ^ - [ ]{0,3} # no more than 3 spaces indentation - [-=]+ # a sequence of = characters or a sequence of - characters - [ ]* # any number of trailing spaces - $ - # we don't care about "a line containing a single -" because we want to - # disambiguate SETEX headings from thematic breaks, and thematic breaks have - # 3+ -. Doesn't look like GH interprets `- - -` as a line so yay... -''', flags=re.VERBOSE) -HEADER = re.compile('^([A-Za-z-]+): (.*)$') -class Message: - @classmethod - def from_message(cls, msg): - in_headers = True - maybe_setex = None - # creating from PR message -> remove content following break - msg, handle_break = (msg, False) if isinstance(msg, str) else (msg.message, True) - headers = [] - body = [] - # don't process the title (first line) of the commit message - msg = msg.splitlines() - for line in reversed(msg[1:]): - if maybe_setex: - # NOTE: actually slightly more complicated: it's a SETEX heading - # only if preceding line(s) can be interpreted as a - # paragraph so e.g. a title followed by a line of dashes - # would indeed be a break, but this should be good enough - # for now, if we need more we'll need a full-blown - # markdown parser probably - if line: # actually a SETEX title -> add underline to body then process current - body.append(maybe_setex) - else: # actually break, remove body then process current - body = [] - maybe_setex = None - - if not line: - if not in_headers and body and body[-1]: - body.append(line) - continue - - if handle_break and BREAK.match(line): - if SETEX_UNDERLINE.match(line): - maybe_setex = line - else: - body = [] - continue - - h = HEADER.match(line) - if h: - # c-a-b = special case from an existing test, not sure if actually useful? - if in_headers or h.group(1).lower() == 'co-authored-by': - headers.append(h.groups()) - continue - - body.append(line) - in_headers = False - - # if there are non-title body lines, add a separation after the title - if body and body[-1]: - body.append('') - body.append(msg[0]) - return cls('\n'.join(reversed(body)), Headers(reversed(headers))) - - def __init__(self, body, headers=None): - self.body = body - self.headers = headers or Headers() - - def __setattr__(self, name, value): - # make sure stored body is always stripped - if name == 'body': - value = value and value.strip() - super().__setattr__(name, value) - - def __str__(self): - if not self.headers: - return self.body + '\n' - - with io.StringIO(self.body) as msg: - msg.write(self.body) - msg.write('\n\n') - # https://git.wiki.kernel.org/index.php/CommitMessageConventions - # seems to mostly use capitalised names (rather than title-cased) - keys = list(OrderedSet(k.capitalize() for k in self.headers.keys())) - # c-a-b must be at the very end otherwise github doesn't see it - keys.sort(key=lambda k: k == 'Co-authored-by') - for k in keys: - for v in self.headers.getlist(k): - msg.write(k) - msg.write(': ') - msg.write(v) - msg.write('\n') - - return msg.getvalue() - - def sub(self, pattern, repl, *, flags): - """ Performs in-place replacements on the body - """ - self.body = re.sub(pattern, repl, self.body, flags=flags) +from .stagings_create import is_mentioned, Message diff --git a/runbot_merge/models/stagings_create.py b/runbot_merge/models/stagings_create.py new file mode 100644 index 00000000..b00342dc --- /dev/null +++ b/runbot_merge/models/stagings_create.py @@ -0,0 +1,727 @@ +import base64 +import contextlib +import dataclasses +import io +import json +import logging +import os +import re +import tempfile +from difflib import Differ +from itertools import count, takewhile +from pathlib import Path +from typing import Dict, Union, Optional, Literal, Callable, Iterator, Tuple, List, TypeAlias + +import requests +from werkzeug.datastructures import Headers + +from odoo import api, models, fields +from odoo.tools import OrderedSet +from odoo.tools.appdirs import user_cache_dir +from .pull_requests import Branch, Stagings, PullRequests, Repository, Batch +from .. import exceptions, utils, github, git + +WAIT_FOR_VISIBILITY = [10, 10, 10, 10] +_logger = logging.getLogger(__name__) + + +class Project(models.Model): + _inherit = 'runbot_merge.project' + + +@dataclasses.dataclass(slots=True) +class StagingSlice: + """Staging state for a single repository: + + - gh is a cache for the github proxy object (contains a session for reusing + connection) + - head is the current staging head for the branch of that repo + - working_copy is the local working copy for the staging for that repo + """ + gh: github.GH + head: str + working_copy: git.Repo + + +StagingState: TypeAlias = Dict[Repository, StagingSlice] + +def try_staging(branch: Branch) -> Optional[Stagings]: + """ Tries to create a staging if the current branch does not already + have one. Returns None if the branch already has a staging or there + is nothing to stage, the newly created staging otherwise. + """ + _logger.info( + "Checking %s (%s) for staging: %s, skip? %s", + branch, branch.name, + branch.active_staging_id, + bool(branch.active_staging_id) + ) + if branch.active_staging_id: + return None + + rows = [ + (p, prs) + for p, prs in ready_prs(for_branch=branch) + if not any(prs.mapped('blocked')) + ] + if not rows: + return + + priority = rows[0][0] + if priority == 0 or priority == 1: + # p=0 take precedence over all else + # p=1 allows merging a fix inside / ahead of a split (e.g. branch + # is broken or widespread false positive) without having to cancel + # the existing staging + batched_prs = [pr_ids for _, pr_ids in takewhile(lambda r: r[0] == priority, rows)] + elif branch.split_ids: + split_ids = branch.split_ids[0] + _logger.info("Found split of PRs %s, re-staging", split_ids.mapped('batch_ids.prs')) + batched_prs = [batch.prs for batch in split_ids.batch_ids] + split_ids.unlink() + else: # p=2 + batched_prs = [pr_ids for _, pr_ids in takewhile(lambda r: r[0] == priority, rows)] + + with contextlib.ExitStack() as cleanup: + return stage_into(branch, batched_prs, cleanup) + + +def ready_prs(for_branch: Branch) -> List[Tuple[int, PullRequests]]: + env = for_branch.env + env.cr.execute(""" + SELECT + min(pr.priority) as priority, + array_agg(pr.id) AS match + FROM runbot_merge_pull_requests pr + WHERE pr.target = any(%s) + -- exclude terminal states (so there's no issue when + -- deleting branches & reusing labels) + AND pr.state != 'merged' + AND pr.state != 'closed' + GROUP BY + pr.target, + CASE + WHEN pr.label SIMILAR TO '%%:patch-[[:digit:]]+' + THEN pr.id::text + ELSE pr.label + END + HAVING + bool_or(pr.state = 'ready') or bool_or(pr.priority = 0) + ORDER BY min(pr.priority), min(pr.id) + """, [for_branch.ids]) + browse = env['runbot_merge.pull_requests'].browse + return [(p, browse(ids)) for p, ids in env.cr.fetchall()] + + +def stage_into( + branch: Branch, + batched_prs: List[PullRequests], + cleanup: contextlib.ExitStack, +) -> Optional[Stagings]: + original_heads, staging_state = staging_setup(branch, batched_prs, cleanup) + + staged = stage_batches(branch, batched_prs, staging_state) + + if not staged: + return None + + env = branch.env + heads = [] + commits = [] + for repo, it in staging_state.items(): + if it.head != original_heads[repo]: + # if we staged something for that repo, just create a record for + # that commit, or flag existing one as to-recheck in case there are + # already statuses we want to propagate to the staging or something + env.cr.execute( + "INSERT INTO runbot_merge_commit (sha, to_check, statuses) " + "VALUES (%s, true, '{}') " + "ON CONFLICT (sha) DO UPDATE SET to_check=true " + "RETURNING id", + [it.head] + ) + [commit] = [head] = env.cr.fetchone() + else: + # if we didn't stage anything for that repo, create a dummy commit + # (with a uniquifier to ensure we don't hit a previous version of + # the same) to ensure the staging head is new and we're building + # everything + tree = it.gh.commit(it.head)['tree'] + uniquifier = base64.b64encode(os.urandom(12)).decode('ascii') + dummy_head = it.gh('post', 'git/commits', json={ + 'tree': tree['sha'], + 'parents': [it.head], + 'message': f'''\ +force rebuild + +uniquifier: {uniquifier} +For-Commit-Id: {it.head} +''', + }).json()['sha'] + # see above, ideally we don't need to mark the real head as + # `to_check` because it's an old commit but `DO UPDATE` is necessary + # for `RETURNING` to work, and it doesn't really hurt (maybe) + env.cr.execute( + "INSERT INTO runbot_merge_commit (sha, to_check, statuses) " + "VALUES (%s, false, '{}'), (%s, true, '{}') " + "ON CONFLICT (sha) DO UPDATE SET to_check=true " + "RETURNING id", + [it.head, dummy_head] + ) + ([commit], [head]) = env.cr.fetchall() + it.head = dummy_head + + heads.append(fields.Command.create({ + 'repository_id': repo.id, + 'commit_id': head, + })) + commits.append(fields.Command.create({ + 'repository_id': repo.id, + 'commit_id': commit, + })) + + # create actual staging object + st: Stagings = env['runbot_merge.stagings'].create({ + 'target': branch.id, + 'batch_ids': [(4, batch.id, 0) for batch in staged], + 'heads': heads, + 'commits': commits, + }) + # create staging branch from tmp + token = branch.project_id.github_token + for repo in branch.project_id.repo_ids.having_branch(branch): + it = staging_state[repo] + _logger.info( + "%s: create staging for %s:%s at %s", + branch.project_id.name, repo.name, branch.name, + it.head + ) + refname = 'staging.{}'.format(branch.name) + it.gh.set_ref(refname, it.head) + + i = count() + @utils.backoff(delays=WAIT_FOR_VISIBILITY, exc=TimeoutError) + def wait_for_visibility(): + if check_visibility(repo, refname, it.head, token): + _logger.info( + "[repo] updated %s:%s to %s: ok (at %d/%d)", + repo.name, refname, it.head, + next(i), len(WAIT_FOR_VISIBILITY) + ) + return + _logger.warning( + "[repo] updated %s:%s to %s: failed (at %d/%d)", + repo.name, refname, it.head, + next(i), len(WAIT_FOR_VISIBILITY) + ) + raise TimeoutError("Staged head not updated after %d seconds" % sum(WAIT_FOR_VISIBILITY)) + + _logger.info("Created staging %s (%s) to %s", st, ', '.join( + '%s[%s]' % (batch, batch.prs) + for batch in staged + ), st.target.name) + return st + + +def staging_setup( + target: Branch, + batched_prs: List[PullRequests], + cleanup: contextlib.ExitStack +) -> Tuple[Dict[Repository, str], StagingState]: + """Sets up the staging: + + - stores baseline info + - creates tmp branch via gh API (to remove) + - generates working copy for each repository with the target branch + """ + all_prs: PullRequests = target.env['runbot_merge.pull_requests'].concat(*batched_prs) + cache_dir = user_cache_dir('mergebot') + staging_state = {} + original_heads = {} + for repo in target.project_id.repo_ids.having_branch(target): + gh = repo.github() + head = gh.head(target.name) + # create tmp staging branch + gh.set_ref('tmp.{}'.format(target.name), head) + + source = git.get_local(repo, 'github') + source.fetch( + git.source_url(repo, 'github'), + # a full refspec is necessary to ensure we actually fetch the ref + # (not just the commit it points to) and update it. + # `git fetch $remote $branch` seems to work locally, but it might + # be hooked only to "proper" remote-tracking branches + # (in `refs/remotes`), it doesn't seem to work here + f'+refs/heads/{target.name}:refs/heads/{target.name}', + *(pr.head for pr in all_prs if pr.repository == repo) + ) + Path(cache_dir, repo.name).parent.mkdir(parents=True, exist_ok=True) + d = cleanup.enter_context(tempfile.TemporaryDirectory( + prefix=f'{repo.name}-{target.name}-staging', + dir=cache_dir, + )) + working_copy = source.clone(d, branch=target.name) + original_heads[repo] = head + staging_state[repo] = StagingSlice(gh=gh, head=head, working_copy=working_copy) + + return original_heads, staging_state + + +def stage_batches(branch: Branch, batched_prs: List[PullRequests], staging_state: StagingState) -> Stagings: + batch_limit = branch.project_id.batch_limit + env = branch.env + staged = env['runbot_merge.batch'] + for batch in batched_prs: + if len(staged) >= batch_limit: + break + + try: + staged |= stage_batch(env, batch, staging_state) + except exceptions.MergeError as e: + pr = e.args[0] + _logger.info("Failed to stage %s into %s", pr.display_name, branch.name, exc_info=True) + if not staged or isinstance(e, exceptions.Unmergeable): + if len(e.args) > 1 and e.args[1]: + reason = e.args[1] + else: + reason = e.__cause__ or e.__context__ + # if the reason is a json document, assume it's a github error + # and try to extract the error message to give it to the user + with contextlib.suppress(Exception): + reason = json.loads(str(reason))['message'].lower() + + pr.state = 'error' + env.ref('runbot_merge.pr.merge.failed')._send( + repository=pr.repository, + pull_request=pr.number, + format_args={'pr': pr, 'reason': reason, 'exc': e}, + ) + return staged + +def check_visibility(repo: Repository, branch_name: str, expected_head: str, token: str): + """ Checks the repository actual to see if the new / expected head is + now visible + """ + # v1 protocol provides URL for ref discovery: https://github.com/git/git/blob/6e0cc6776106079ed4efa0cc9abace4107657abf/Documentation/technical/http-protocol.txt#L187 + # for more complete client this is also the capabilities discovery and + # the "entry point" for the service + url = 'https://github.com/{}.git/info/refs?service=git-upload-pack'.format(repo.name) + with requests.get(url, stream=True, auth=(token, '')) as resp: + if not resp.ok: + return False + for head, ref in parse_refs_smart(resp.raw.read): + if ref != ('refs/heads/' + branch_name): + continue + return head == expected_head + return False + + +refline = re.compile(rb'([\da-f]{40}) ([^\0\n]+)(\0.*)?\n?') +ZERO_REF = b'0'*40 + +def parse_refs_smart(read: Callable[[int], bytes]) -> Iterator[Tuple[str, str]]: + """ yields pkt-line data (bytes), or None for flush lines """ + def read_line() -> Optional[bytes]: + length = int(read(4), 16) + if length == 0: + return None + return read(length - 4) + + header = read_line() + assert header and header.rstrip() == b'# service=git-upload-pack', header + assert read_line() is None, "failed to find first flush line" + # read lines until second delimiter + for line in iter(read_line, None): + if line.startswith(ZERO_REF): + break # empty list (no refs) + m = refline.fullmatch(line) + assert m + yield m[1].decode(), m[2].decode() + + +UNCHECKABLE = ['merge_method', 'overrides', 'draft'] + + +def stage_batch(env: api.Environment, prs: PullRequests, staging: StagingState) -> Batch: + """ + Updates meta[*][head] on success + """ + new_heads: Dict[PullRequests, str] = {} + pr_fields = env['runbot_merge.pull_requests']._fields + for pr in prs: + gh = staging[pr.repository].gh + + _logger.info( + "Staging pr %s for target %s; method=%s", + pr.display_name, pr.target.name, + pr.merge_method or (pr.squash and 'single') or None + ) + + target = 'tmp.{}'.format(pr.target.name) + original_head = gh.head(target) + try: + try: + method, new_heads[pr] = stage(pr, gh, target, related_prs=(prs - pr)) + _logger.info( + "Staged pr %s to %s by %s: %s -> %s", + pr.display_name, pr.target.name, method, + original_head, new_heads[pr] + ) + except Exception: + # reset the head which failed, as rebase() may have partially + # updated it (despite later steps failing) + gh.set_ref(target, original_head) + # then reset every previous update + for to_revert in new_heads.keys(): + it = staging[to_revert.repository] + it.gh.set_ref('tmp.{}'.format(to_revert.target.name), it.head) + raise + except github.MergeError as e: + raise exceptions.MergeError(pr) from e + except exceptions.Mismatch as e: + diff = ''.join(Differ().compare( + list(format_for_difflib((n, v) for n, v, _ in e.args[1])), + list(format_for_difflib((n, v) for n, _, v in e.args[1])), + )) + _logger.info("data mismatch on %s:\n%s", pr.display_name, diff) + env.ref('runbot_merge.pr.staging.mismatch')._send( + repository=pr.repository, + pull_request=pr.number, + format_args={ + 'pr': pr, + 'mismatch': ', '.join(pr_fields[f].string for f in e.args[0]), + 'diff': diff, + 'unchecked': ', '.join(pr_fields[f].string for f in UNCHECKABLE) + } + ) + return env['runbot_merge.batch'] + + # update meta to new heads + for pr, head in new_heads.items(): + staging[pr.repository].head = head + return env['runbot_merge.batch'].create({ + 'target': prs[0].target.id, + 'prs': [(4, pr.id, 0) for pr in prs], + }) + +def format_for_difflib(items: Iterator[Tuple[str, object]]) -> Iterator[str]: + """ Bit of a pain in the ass because difflib really wants + all lines to be newline-terminated, but not all values are + actual lines, and also needs to split multiline values. + """ + for name, value in items: + yield name + ':\n' + value = str(value) + if not value.endswith('\n'): + value += '\n' + yield from value.splitlines(keepends=True) + yield '\n' + + +Method = Literal['merge', 'rebase-merge', 'rebase-ff', 'squash'] +def stage(pr: PullRequests, gh: github.GH, target: str, related_prs: PullRequests) -> Tuple[Method, str]: + # nb: pr_commits is oldest to newest so pr.head is pr_commits[-1] + _, prdict = gh.pr(pr.number) + commits = prdict['commits'] + method: Method = pr.merge_method or ('rebase-ff' if commits == 1 else None) + if commits > 50 and method.startswith('rebase'): + raise exceptions.Unmergeable(pr, "Rebasing 50 commits is too much.") + if commits > 250: + raise exceptions.Unmergeable( + pr, "Merging PRs of 250 or more commits is not supported " + "(https://developer.github.com/v3/pulls/#list-commits-on-a-pull-request)" + ) + pr_commits = gh.commits(pr.number) + for c in pr_commits: + if not (c['commit']['author']['email'] and c['commit']['committer']['email']): + raise exceptions.Unmergeable( + pr, + f"All commits must have author and committer email, " + f"missing email on {c['sha']} indicates the authorship is " + f"most likely incorrect." + ) + + # sync and signal possibly missed updates + invalid = {} + diff = [] + pr_head = pr_commits[-1]['sha'] + if pr.head != pr_head: + invalid['head'] = pr_head + diff.append(('Head', pr.head, pr_head)) + + if pr.target.name != prdict['base']['ref']: + branch = pr.env['runbot_merge.branch'].with_context(active_test=False).search([ + ('name', '=', prdict['base']['ref']), + ('project_id', '=', pr.repository.project_id.id), + ]) + if not branch: + pr.unlink() + raise exceptions.Unmergeable(pr, "While staging, found this PR had been retargeted to an un-managed branch.") + invalid['target'] = branch.id + diff.append(('Target branch', pr.target.name, branch.name)) + + if pr.squash != commits == 1: + invalid['squash'] = commits == 1 + diff.append(('Single commit', pr.squash, commits == 1)) + + msg = utils.make_message(prdict) + if pr.message != msg: + invalid['message'] = msg + diff.append(('Message', pr.message, msg)) + + if invalid: + pr.write({**invalid, 'state': 'opened', 'head': pr_head}) + raise exceptions.Mismatch(invalid, diff) + + if pr.reviewed_by and pr.reviewed_by.name == pr.reviewed_by.github_login: + # XXX: find other trigger(s) to sync github name? + gh_name = gh.user(pr.reviewed_by.github_login)['name'] + if gh_name: + pr.reviewed_by.name = gh_name + + match method: + case 'merge': + fn = stage_merge + case 'rebase-merge': + fn = stage_rebase_merge + case 'rebase-ff': + fn = stage_rebase_ff + case 'squash': + fn = stage_squash + return method, fn(pr, gh, target, pr_commits, related_prs=related_prs) + +def stage_squash(pr: PullRequests, gh: github.GH, target: str, commits: List[github.PrCommit], related_prs: PullRequests) -> str: + msg = pr._build_merge_message(pr, related_prs=related_prs) + authorship = {} + + authors = { + (c['commit']['author']['name'], c['commit']['author']['email']) + for c in commits + } + if len(authors) == 1: + name, email = authors.pop() + authorship['author'] = {'name': name, 'email': email} + else: + msg.headers.extend(sorted( + ('Co-Authored-By', "%s <%s>" % author) + for author in authors + )) + + committers = { + (c['commit']['committer']['name'], c['commit']['committer']['email']) + for c in commits + } + if len(committers) == 1: + name, email = committers.pop() + authorship['committer'] = {'name': name, 'email': email} + # should committers also be added to co-authors? + + original_head = gh.head(target) + merge_tree = gh.merge(pr.head, target, 'temp merge')['tree']['sha'] + head = gh('post', 'git/commits', json={ + **authorship, + 'message': str(msg), + 'tree': merge_tree, + 'parents': [original_head], + }).json()['sha'] + gh.set_ref(target, head) + + commits_map = {c['sha']: head for c in commits} + commits_map[''] = head + pr.commits_map = json.dumps(commits_map) + + return head + +def stage_rebase_ff(pr: PullRequests, gh: github.GH, target: str, commits: List[github.PrCommit], related_prs: PullRequests) -> str: + # updates head commit with PR number (if necessary) then rebases + # on top of target + msg = pr._build_merge_message(commits[-1]['commit']['message'], related_prs=related_prs) + commits[-1]['commit']['message'] = str(msg) + add_self_references(pr, commits[:-1]) + head, mapping = gh.rebase(pr.number, target, commits=commits) + pr.commits_map = json.dumps({**mapping, '': head}) + return head + +def stage_rebase_merge(pr: PullRequests, gh: github.GH, target: str, commits: List[github.PrCommit], related_prs: PullRequests) -> str : + add_self_references(pr, commits) + h, mapping = gh.rebase(pr.number, target, reset=True, commits=commits) + msg = pr._build_merge_message(pr, related_prs=related_prs) + merge_head = gh.merge(h, target, str(msg))['sha'] + pr.commits_map = json.dumps({**mapping, '': merge_head}) + return merge_head + +def stage_merge(pr: PullRequests, gh: github.GH, target: str, commits: List[github.PrCommit], related_prs: PullRequests) -> str: + pr_head = commits[-1] # oldest to newest + base_commit = None + head_parents = {p['sha'] for p in pr_head['parents']} + if len(head_parents) > 1: + # look for parent(s?) of pr_head not in PR, means it's + # from target (so we merged target in pr) + merge = head_parents - {c['sha'] for c in commits} + external_parents = len(merge) + if external_parents > 1: + raise exceptions.Unmergeable( + "The PR head can only have one parent from the base branch " + "(not part of the PR itself), found %d: %s" % ( + external_parents, + ', '.join(merge) + )) + if external_parents == 1: + [base_commit] = merge + + commits_map = {c['sha']: c['sha'] for c in commits} + if base_commit: + # replicate pr_head with base_commit replaced by + # the current head + original_head = gh.head(target) + merge_tree = gh.merge(pr_head['sha'], target, 'temp merge')['tree']['sha'] + new_parents = [original_head] + list(head_parents - {base_commit}) + msg = pr._build_merge_message(pr_head['commit']['message'], related_prs=related_prs) + copy = gh('post', 'git/commits', json={ + 'message': str(msg), + 'tree': merge_tree, + 'author': pr_head['commit']['author'], + 'committer': pr_head['commit']['committer'], + 'parents': new_parents, + }).json() + gh.set_ref(target, copy['sha']) + # merge commit *and old PR head* map to the pr head replica + commits_map[''] = commits_map[pr_head['sha']] = copy['sha'] + pr.commits_map = json.dumps(commits_map) + return copy['sha'] + else: + # otherwise do a regular merge + msg = pr._build_merge_message(pr) + merge_head = gh.merge(pr.head, target, str(msg))['sha'] + # and the merge commit is the normal merge head + commits_map[''] = merge_head + pr.commits_map = json.dumps(commits_map) + return merge_head + +def is_mentioned(message: Union[PullRequests, str], pr: PullRequests, *, full_reference: bool = False) -> bool: + """Returns whether ``pr`` is mentioned in ``message``` + """ + if full_reference: + pattern = fr'\b{re.escape(pr.display_name)}\b' + else: + repository = pr.repository.name # .replace('/', '\\/') + pattern = fr'( |\b{repository})#{pr.number}\b' + return bool(re.search(pattern, message if isinstance(message, str) else message.message)) + +def add_self_references(pr: PullRequests, commits: List[github.PrCommit]): + """Adds a footer reference to ``self`` to all ``commits`` if they don't + already refer to the PR. + """ + for c in (c['commit'] for c in commits): + if not is_mentioned(c['message'], pr): + message = c['message'] + m = Message.from_message(message) + m.headers.pop('Part-Of', None) + m.headers.add('Part-Of', pr.display_name) + c['message'] = str(m) + +BREAK = re.compile(r''' + [ ]{0,3} # 0-3 spaces of indentation + # followed by a sequence of three or more matching -, _, or * characters, + # each followed optionally by any number of spaces or tabs + # so needs to start with a _, - or *, then have at least 2 more such + # interspersed with any number of spaces or tabs + ([*_-]) + ([ \t]*\1){2,} + [ \t]* +''', flags=re.VERBOSE) +SETEX_UNDERLINE = re.compile(r''' + [ ]{0,3} # no more than 3 spaces indentation + [-=]+ # a sequence of = characters or a sequence of - characters + [ ]* # any number of trailing spaces + # we don't care about "a line containing a single -" because we want to + # disambiguate SETEX headings from thematic breaks, and thematic breaks have + # 3+ -. Doesn't look like GH interprets `- - -` as a line so yay... +''', flags=re.VERBOSE) +HEADER = re.compile('([A-Za-z-]+): (.*)') +class Message: + @classmethod + def from_message(cls, msg: Union[PullRequests, str]) -> 'Message': + in_headers = True + maybe_setex = None + # creating from PR message -> remove content following break + if isinstance(msg, str): + message, handle_break = (msg, False) + else: + message, handle_break = (msg.message, True) + headers = [] + body: List[str] = [] + # don't process the title (first line) of the commit message + lines = message.splitlines() + for line in reversed(lines[1:]): + if maybe_setex: + # NOTE: actually slightly more complicated: it's a SETEX heading + # only if preceding line(s) can be interpreted as a + # paragraph so e.g. a title followed by a line of dashes + # would indeed be a break, but this should be good enough + # for now, if we need more we'll need a full-blown + # markdown parser probably + if line: # actually a SETEX title -> add underline to body then process current + body.append(maybe_setex) + else: # actually break, remove body then process current + body = [] + maybe_setex = None + + if not line: + if not in_headers and body and body[-1]: + body.append(line) + continue + + if handle_break and BREAK.fullmatch(line): + if SETEX_UNDERLINE.fullmatch(line): + maybe_setex = line + else: + body = [] + continue + + h = HEADER.fullmatch(line) + if h: + # c-a-b = special case from an existing test, not sure if actually useful? + if in_headers or h[1].lower() == 'co-authored-by': + headers.append(h.groups()) + continue + + body.append(line) + in_headers = False + + # if there are non-title body lines, add a separation after the title + if body and body[-1]: + body.append('') + body.append(lines[0]) + return cls('\n'.join(reversed(body)), Headers(reversed(headers))) + + def __init__(self, body: str, headers: Optional[Headers] = None): + self.body = body + self.headers = headers or Headers() + + def __setattr__(self, name, value): + # make sure stored body is always stripped + if name == 'body': + value = value and value.strip() + super().__setattr__(name, value) + + def __str__(self): + if not self.headers: + return self.body + '\n' + + with io.StringIO(self.body) as msg: + msg.write(self.body) + msg.write('\n\n') + # https://git.wiki.kernel.org/index.php/CommitMessageConventions + # seems to mostly use capitalised names (rather than title-cased) + keys = list(OrderedSet(k.capitalize() for k in self.headers.keys())) + # c-a-b must be at the very end otherwise github doesn't see it + keys.sort(key=lambda k: k == 'Co-authored-by') + for k in keys: + for v in self.headers.getlist(k): + msg.write(k) + msg.write(': ') + msg.write(v) + msg.write('\n') + + return msg.getvalue() diff --git a/runbot_merge/tests/test_multirepo.py b/runbot_merge/tests/test_multirepo.py index 58c642fa..57698426 100644 --- a/runbot_merge/tests/test_multirepo.py +++ b/runbot_merge/tests/test_multirepo.py @@ -182,7 +182,6 @@ def test_stage_match(env, project, repo_a, repo_b, config, page): assert 'Related: {}'.format(pr_b.display_name) in repo_a.commit('master').message assert 'Related: {}'.format(pr_a.display_name) in repo_b.commit('master').message - print(pr_a.batch_ids.read(['staging_id', 'prs'])) # check that related PRs *still* link to one another after merge assert get_related_pr_labels(pr_page(page, prx_a)) == [pr_b.display_name] assert get_related_pr_labels(pr_page(page, prx_b)) == [pr_a.display_name] @@ -1093,13 +1092,6 @@ def test_multi_project(env, make_repo, setreviewers, users, config, pr1_id = to_pr(env, pr1) pr2_id = to_pr(env, pr2) - print( - pr1.repo.name, pr1.number, pr1_id.display_name, pr1_id.label, - '\n', - pr2.repo.name, pr2.number, pr2_id.display_name, pr2_id.label, - flush=True, - ) - assert pr1_id.state == 'ready' and not pr1_id.blocked assert pr2_id.state == 'validated'