diff --git a/runbot_merge/README.rst b/runbot_merge/README.rst new file mode 100644 index 00000000..2aa2068c --- /dev/null +++ b/runbot_merge/README.rst @@ -0,0 +1,175 @@ +Merge Bot +========= + +Setup +----- + +* Setup a project with relevant repositories and branches the bot + should manage (e.g. odoo/odoo and 10.0). +* Set up reviewers (github_login + boolean flag on partners). +* Add "Issue comments", "Pull request reviews", "Pull requests" and + "Statuses" webhooks to managed repositories. +* If applicable, add "Statuses" webhook to the *source* repositories. + + Github does not seem to send statuses cross-repository when commits + get transmigrated so if a user creates a branch in odoo-dev/odoo, + waits for CI to run then creates a PR targeted to odoo/odoo the PR + will never get status-checked (unless we modify runbot to re-send + statuses on pull_request webhook). + +Working Principles +------------------ + +Useful information (new PRs, CI, comments, ...) is pushed to the MB +via webhooks. Most of the staging work is performed via a cron job: + +1. for each active staging, check if their are done + + 1. if successful + + * ``push --ff`` to target branches + * close PRs + + 2. if only one batch, mark as failed + + for batches of multiple PRs, the MB attempts to infer which + specific PR failed + + 3. otherwise split staging in 2 (bisection search of problematic + batch) + +2. for each branch with no active staging + + * if there are inactive stagings, stage one of them + * otherwise look for batches targered to that PR (PRs grouped by + label with branch as target) + * attempt staging + + 1. reset temp branches (one per repo) to corresponding targets + 2. merge each batch's PR into the relevant temp branch + + * on merge failure, mark PRs as failed + + 3. once no more batch or limit reached, reset staging branches to + tmp + 4. mark staging as active + +Commands +-------- + +A command string is a line starting with the mergebot's name and +followed by various commands. Self-reviewers count as reviewers for +the purpose of their own PRs, but delegate reviewers don't. + +retry + resets a PR in error mode to ready for staging + + can be used by a reviewer or the PR author to re-stage the PR after + it's been updated or the target has been updated & fixed. + +r(review)+ + approves a PR, can be used by a reviewer or delegate reviewer + + submitting an "approve" review implicitly r+'s the PR + +r(eview)- + removes approval from a PR, allows un-reviewing a PR in error (staging + failed) so it can be updated and re-submitted + +.. squash+/squash- +.. marks the PR as squash or merge, can override squash inference or a +.. previous squash command, can only be used by reviewers + +delegate+/delegate= + adds either PR author or the specified (github) users as authorised + reviewers for this PR. ```` is a comma-separated list of + github usernames (no @), can be used by reviewers + +p(riority)=2|1|0 + sets the priority to normal (2), pressing (1) or urgent (0), + lower-priority PRs are selected first and batched together, can be + used by reviewers + +rebase- + the default merge mode is to rebase and merge the PR into the + target, however for some situations this is not suitable and + a regular merge is necessary; this command toggles rebasing + mode off (and thus back to a regular merge) + +Structure +--------- + +A *project* is used to manage multiple *repositories* across many +*branches*. + +Each *PR* targets a specific branch in a specific repository. + +A *batch* is a number of co-dependent PRs, PRs which are assumed to +depend on one another (the exact relationship is irrelevant) and thus +always need to be batched together. Batches are normally created on +the fly during staging. + +A *staging* is a number of batches (up to 8 by default) which will be +tested together, and split if CI fails. Each staging applies to a +single *branch* the target) across all managed repositories. Stagings +can be active (currently live on the various staging branches) or +inactive (to be staged later, generally as a result of splitting a +failed staging). + +Notes +----- + +* When looking for stageable batches, priority is taken in account and + isolating e.g. if there's a single high-priority PR, low-priority + PRs are ignored completely and only that will be staged on its own +* Reviewers are set up on partners so we can e.g. have author-tracking + & delegate reviewers without needing to create proper users for + every contributor. +* MB collates statuses on commits independently from other objects, so + a commit getting CI'd in odoo-dev/odoo then made into a PR on + odoo/odoo should be correctly interpreted assuming odoo-dev/odoo + sent its statuses to the MB. +* Github does not support transactional sequences of API calls, so + it's possible that "intermediate" staging states are visible & have + to be rollbacked e.g. a staging succeeds in a 2-repo scenario, + A.{target} is ff-d to A.{staging}, then B.{target}'s ff to + B.{staging} fails, we have to rollback A.{target}. +* Co-dependence is currently inferred through *labels*, which is a + pair of ``{repo}:{branchname}`` e.g. odoo-dev:11.0-pr-flanker-jke. + If this label is present in a PR to A and a PR to B, these two + PRs will be collected into a single batch to ensure they always + get batched (and failed) together. + +Previous Work +------------- + +bors-ng +~~~~~~~ + +* r+: accept (only for trusted reviewers) +* r-: unaccept +* r=users...: accept on behalf of users +* delegate+: allows author to self-review +* delegate=users...: allow non-reviewers users to review +* try: stage build (to separate branch) but don't merge on succes + +Why not bors-ng +############### + +* no concurrent staging (can only stage one target at a time) +* can't do co-dependent repositories/multi-repo staging +* cancels/forgets r+'d branches on FF failure (emergency pushes) + instead of re-staging + +homu +~~~~ + +Additionally to bors-ng's: + +* SHA option on r+/r=, guards +* p=NUMBER: set priority (unclear if best = low/high) +* rollup/rollup-: should be default +* retry: re-attempt PR (flaky?) +* delegate-: remove delegate+/delegate= +* force: ??? +* clean: ??? diff --git a/runbot_merge/__init__.py b/runbot_merge/__init__.py new file mode 100644 index 00000000..76a74f98 --- /dev/null +++ b/runbot_merge/__init__.py @@ -0,0 +1 @@ +from . import models, controllers diff --git a/runbot_merge/__manifest__.py b/runbot_merge/__manifest__.py new file mode 100644 index 00000000..0f81d5df --- /dev/null +++ b/runbot_merge/__manifest__.py @@ -0,0 +1,13 @@ +{ + 'name': 'merge bot', + 'depends': ['contacts', 'website'], + 'data': [ + 'security/security.xml', + 'security/ir.model.access.csv', + + 'data/merge_cron.xml', + 'views/res_partner.xml', + 'views/mergebot.xml', + 'views/templates.xml', + ] +} diff --git a/runbot_merge/controllers/__init__.py b/runbot_merge/controllers/__init__.py new file mode 100644 index 00000000..4950da1a --- /dev/null +++ b/runbot_merge/controllers/__init__.py @@ -0,0 +1,262 @@ +import hashlib +import hmac +import logging +import json + +import werkzeug.exceptions + +from odoo.http import Controller, request, route + +from . import dashboard + +_logger = logging.getLogger(__name__) + +class MergebotController(Controller): + @route('/runbot_merge/hooks', auth='none', type='json', csrf=False, methods=['POST']) + def index(self): + req = request.httprequest + event = req.headers['X-Github-Event'] + + c = EVENTS.get(event) + if not c: + _logger.warn('Unknown event %s', event) + return 'Unknown event {}'.format(event) + + repo = request.jsonrequest['repository']['full_name'] + env = request.env(user=1) + + secret = env['runbot_merge.repository'].search([ + ('name', '=', repo), + ]).project_id.secret + if secret: + signature = 'sha1=' + hmac.new(secret.encode('ascii'), req.get_data(), hashlib.sha1).hexdigest() + if not hmac.compare_digest(signature, req.headers.get('X-Hub-Signature', '')): + _logger.warn("Ignored hook with incorrect signature %s", + req.headers.get('X-Hub-Signature')) + return werkzeug.exceptions.Forbidden() + + return c(env, request.jsonrequest) + +def handle_pr(env, event): + if event['action'] in [ + 'assigned', 'unassigned', 'review_requested', 'review_request_removed', + 'labeled', 'unlabeled' + ]: + _logger.debug( + 'Ignoring pull_request[%s] on %s:%s', + event['action'], + event['pull_request']['base']['repo']['full_name'], + event['pull_request']['number'], + ) + return 'Ignoring' + + pr = event['pull_request'] + r = pr['base']['repo']['full_name'] + b = pr['base']['ref'] + + repo = env['runbot_merge.repository'].search([('name', '=', r)]) + if not repo: + _logger.warning("Received a PR for %s but not configured to handle that repo", r) + # sadly shit's retarded so odoo json endpoints really mean + # jsonrpc and it's LITERALLY NOT POSSIBLE TO REPLY WITH + # ACTUAL RAW HTTP RESPONSES and thus not possible to + # report actual errors to the webhooks listing thing on + # github (not that we'd be looking at them but it'd be + # useful for tests) + return "Not configured to handle {}".format(r) + + # PRs to unmanaged branches are not necessarily abnormal and + # we don't care + branch = env['runbot_merge.branch'].search([ + ('name', '=', b), + ('project_id', '=', repo.project_id.id), + ]) + + def find(target): + return env['runbot_merge.pull_requests'].search([ + ('repository', '=', repo.id), + ('number', '=', pr['number']), + ('target', '=', target.id), + ]) + # edition difficulty: pr['base']['ref] is the *new* target, the old one + # is at event['change']['base']['ref'] (if the target changed), so edition + # handling must occur before the rest of the steps + if event['action'] == 'edited': + source = event['changes'].get('base', {'ref': {'from': b}})['ref']['from'] + source_branch = env['runbot_merge.branch'].search([ + ('name', '=', source), + ('project_id', '=', repo.project_id.id), + ]) + # retargeting to un-managed => delete + if not branch: + pr = find(source_branch) + pr.unlink() + return 'Retargeted {} to un-managed branch {}, deleted'.format(pr.id, b) + + # retargeting from un-managed => create + if not source_branch: + return handle_pr(env, dict(event, action='opened')) + + updates = {} + if source_branch != branch: + updates['target'] = branch.id + if event['changes'].keys() & {'title', 'body'}: + updates['message'] = "{}\n\n{}".format(pr['title'].strip(), pr['body'].strip()) + if updates: + pr_obj = find(source_branch) + pr_obj.write(updates) + return 'Updated {}'.format(pr_obj.id) + return "Nothing to update ({})".format(event['changes'].keys()) + + if not branch: + _logger.info("Ignoring PR for un-managed branch %s:%s", r, b) + return "Not set up to care about {}:{}".format(r, b) + + author_name = pr['user']['login'] + author = env['res.partner'].search([('github_login', '=', author_name)], limit=1) + if not author: + author = env['res.partner'].create({ + 'name': author_name, + 'github_login': author_name, + }) + + _logger.info("%s: %s:%s (%s) (%s)", event['action'], repo.name, pr['number'], pr['title'].strip(), author.github_login) + if event['action'] == 'opened': + # some PRs have leading/trailing newlines in body/title (resp) + title = pr['title'].strip() + body = pr['body'].strip() + pr_obj = env['runbot_merge.pull_requests'].create({ + 'number': pr['number'], + 'label': pr['head']['label'], + 'author': author.id, + 'target': branch.id, + 'repository': repo.id, + 'head': pr['head']['sha'], + 'squash': pr['commits'] == 1, + 'message': '{}\n\n{}'.format(title, body), + }) + return "Tracking PR as {}".format(pr_obj.id) + + pr_obj = env['runbot_merge.pull_requests']._get_or_schedule(r, pr['number']) + if not pr_obj: + _logger.warn("webhook %s on unknown PR %s:%s, scheduled fetch", event['action'], repo.name, pr['number']) + return "Unknown PR {}:{}, scheduling fetch".format(repo.name, pr['number']) + if event['action'] == 'synchronize': + if pr_obj.head == pr['head']['sha']: + return 'No update to pr head' + + if pr_obj.state in ('closed', 'merged'): + _logger.error("Tentative sync to closed PR %s:%s", repo.name, pr['number']) + return "It's my understanding that closed/merged PRs don't get sync'd" + + if pr_obj.state == 'validated': + pr_obj.state = 'opened' + elif pr_obj.state == 'ready': + pr_obj.state = 'approved' + pr_obj.staging_id.cancel( + "Updated PR %s:%s, removing staging %s", + pr_obj.repository.name, pr_obj.number, + pr_obj.staging_id, + ) + + pr_obj.head = pr['head']['sha'] + pr_obj.squash = pr['commits'] == 1 + return 'Updated {} to {}'.format(pr_obj.id, pr_obj.head) + + # don't marked merged PRs as closed (!!!) + if event['action'] == 'closed' and pr_obj.state != 'merged': + pr_obj.state = 'closed' + pr_obj.staging_id.cancel( + "Closed PR %s:%s, removing staging %s", + pr_obj.repository.name, pr_obj.number, + pr_obj.staging_id + ) + return 'Closed {}'.format(pr_obj.id) + + if event['action'] == 'reopened' and pr_obj.state == 'closed': + pr_obj.state = 'opened' + return 'Reopened {}'.format(pr_obj.id) + + _logger.info("Ignoring event %s on PR %s", event['action'], pr['number']) + return "Not handling {} yet".format(event['action']) + +def handle_status(env, event): + _logger.info( + 'status %s:%s on commit %s', + event['context'], event['state'], + event['sha'], + ) + Commits = env['runbot_merge.commit'] + c = Commits.search([('sha', '=', event['sha'])]) + if c: + c.statuses = json.dumps({ + **json.loads(c.statuses), + event['context']: event['state'] + }) + else: + Commits.create({ + 'sha': event['sha'], + 'statuses': json.dumps({event['context']: event['state']}) + }) + + return 'ok' + +def handle_comment(env, event): + if 'pull_request' not in event['issue']: + return "issue comment, ignoring" + + repo = event['repository']['full_name'] + issue = event['issue']['number'] + author = event['sender']['login'] + comment = event['comment']['body'] + _logger.info('comment: %s %s:%s "%s"', author, repo, issue, comment) + + partner = env['res.partner'].search([('github_login', '=', author), ]) + if not partner: + _logger.info("ignoring comment from %s: not in system", author) + return 'ignored' + + repository = env['runbot_merge.repository'].search([('name', '=', repo)]) + if not repository.project_id._find_commands(comment): + return "No commands, ignoring" + + pr = env['runbot_merge.pull_requests']._get_or_schedule(repo, issue) + if not pr: + return "Unknown PR, scheduling fetch" + + return pr._parse_commands(partner, comment) + +def handle_review(env, event): + partner = env['res.partner'].search([('github_login', '=', event['review']['user']['login'])]) + if not partner: + _logger.info('ignoring comment from %s: not in system', event['review']['user']['login']) + return 'ignored' + + pr = env['runbot_merge.pull_requests']._get_or_schedule( + event['repository']['full_name'], + event['pull_request']['number'], + event['pull_request']['base']['ref'] + ) + if not pr: + return "Unknown PR, scheduling fetch" + + firstline = '' + state = event['review']['state'].lower() + if state == 'approved': + firstline = pr.repository.project_id.github_prefix + ' r+\n' + elif state == 'request_changes': + firstline = pr.repository.project_id.github_prefix + ' r-\n' + + return pr._parse_commands(partner, firstline + event['review']['body']) + +def handle_ping(env, event): + print("Got ping! {}".format(event['zen'])) + return "pong" + +EVENTS = { + 'pull_request': handle_pr, + 'status': handle_status, + 'issue_comment': handle_comment, + 'pull_request_review': handle_review, + 'ping': handle_ping, +} diff --git a/runbot_merge/controllers/dashboard.py b/runbot_merge/controllers/dashboard.py new file mode 100644 index 00000000..4d5f0d2e --- /dev/null +++ b/runbot_merge/controllers/dashboard.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8 -*- +from odoo.http import Controller, route, request + + +class MergebotDashboard(Controller): + @route('/runbot_merge', auth="public", type="http", website=True) + def dashboard(self): + return request.render('runbot_merge.dashboard', { + 'projects': request.env['runbot_merge.project'].sudo().search([]), + }) diff --git a/runbot_merge/data/merge_cron.xml b/runbot_merge/data/merge_cron.xml new file mode 100644 index 00000000..cd001c2d --- /dev/null +++ b/runbot_merge/data/merge_cron.xml @@ -0,0 +1,22 @@ + + + Check for progress of PRs & Stagings + + code + model._check_progress() + 1 + minutes + -1 + + + + Check for PRs to fetch + + code + model._check_fetch(True) + 1 + minutes + -1 + + + diff --git a/runbot_merge/exceptions.py b/runbot_merge/exceptions.py new file mode 100644 index 00000000..81d48533 --- /dev/null +++ b/runbot_merge/exceptions.py @@ -0,0 +1,4 @@ +class MergeError(Exception): + pass +class FastForwardError(Exception): + pass diff --git a/runbot_merge/github.py b/runbot_merge/github.py new file mode 100644 index 00000000..4f93123f --- /dev/null +++ b/runbot_merge/github.py @@ -0,0 +1,173 @@ +import collections +import functools +import itertools +import logging + +import requests + +from . import exceptions + +_logger = logging.getLogger(__name__) +class GH(object): + def __init__(self, token, repo): + self._url = 'https://api.github.com' + self._repo = repo + session = self._session = requests.Session() + session.headers['Authorization'] = 'token {}'.format(token) + + def __call__(self, method, path, params=None, json=None, check=True): + """ + :type check: bool | dict[int:Exception] + """ + r = self._session.request( + method, + '{}/repos/{}/{}'.format(self._url, self._repo, path), + params=params, + json=json + ) + if check: + if isinstance(check, collections.Mapping): + exc = check.get(r.status_code) + if exc: + raise exc(r.content) + r.raise_for_status() + return r + + def head(self, branch): + d = self('get', 'git/refs/heads/{}'.format(branch)).json() + + assert d['ref'] == 'refs/heads/{}'.format(branch) + assert d['object']['type'] == 'commit' + return d['object']['sha'] + + def commit(self, sha): + return self('GET', 'git/commits/{}'.format(sha)).json() + + def comment(self, pr, message): + self('POST', 'issues/{}/comments'.format(pr), json={'body': message}) + + def close(self, pr, message): + self.comment(pr, message) + self('PATCH', 'pulls/{}'.format(pr), json={'state': 'closed'}) + + def change_tags(self, pr, from_, to_): + to_add, to_remove = to_ - from_, from_ - to_ + for t in to_remove: + r = self('DELETE', 'issues/{}/labels/{}'.format(pr, t), check=False) + r.raise_for_status() + # successful deletion or attempt to delete a tag which isn't there + # is fine, otherwise trigger an error + if r.status_code not in (200, 404): + r.raise_for_status() + + if to_add: + self('POST', 'issues/{}/labels'.format(pr), json=list(to_add)) + + def fast_forward(self, branch, sha): + try: + self('patch', 'git/refs/heads/{}'.format(branch), json={'sha': sha}) + except requests.HTTPError: + raise exceptions.FastForwardError() + + def set_ref(self, branch, sha): + # force-update ref + r = self('patch', 'git/refs/heads/{}'.format(branch), json={ + 'sha': sha, + 'force': True, + }, check=False) + if r.status_code == 200: + return + + # 422 makes no sense but that's what github returns, leaving 404 just + # in case + if r.status_code in (404, 422): + # fallback: create ref + r = self('post', 'git/refs', json={ + 'ref': 'refs/heads/{}'.format(branch), + 'sha': sha, + }, check=False) + if r.status_code == 201: + return + raise AssertionError("{}: {}".format(r.status_code, r.json())) + + def merge(self, sha, dest, message): + r = self('post', 'merges', json={ + 'base': dest, + 'head': sha, + 'commit_message': message, + }, check={409: exceptions.MergeError}) + r = r.json() + return dict(r['commit'], sha=r['sha']) + + def rebase(self, pr, dest, reset=False, commits=None): + """ Rebase pr's commits on top of dest, updates dest unless ``reset`` + is set. + + Returns the hash of the rebased head. + """ + original_head = self.head(dest) + if commits is None: + commits = self.commits(pr) + + assert commits, "can't rebase a PR with no commits" + for c in commits: + assert len(c['parents']) == 1, "can't rebase commits with more than one parent" + tmp_msg = 'temp rebasing PR %s (%s)' % (pr, c['sha']) + c['new_tree'] = self.merge(c['sha'], dest, tmp_msg)['tree']['sha'] + self.set_ref(dest, original_head) + + prev = original_head + for c in commits: + copy = self('post', 'git/commits', json={ + 'message': c['commit']['message'], + 'tree': c['new_tree'], + 'parents': [prev], + 'author': c['commit']['author'], + 'committer': c['commit']['committer'], + }, check={409: exceptions.MergeError}).json() + prev = copy['sha'] + + if reset: + self.set_ref(dest, original_head) + + # prev is updated after each copy so it's the rebased PR head + return prev + + # fetch various bits of issues / prs to load them + def pr(self, number): + return ( + self('get', 'issues/{}'.format(number)).json(), + self('get', 'pulls/{}'.format(number)).json() + ) + + def comments(self, number): + for page in itertools.count(1): + r = self('get', 'issues/{}/comments'.format(number), params={'page': page}) + yield from r.json() + if not r.links.get('next'): + return + + def reviews(self, number): + for page in itertools.count(1): + r = self('get', 'pulls/{}/reviews'.format(number), params={'page': page}) + yield from r.json() + if not r.links.get('next'): + return + + def commits(self, pr): + """ Returns a PR's commits oldest first (that's what GH does & + is what we want) + """ + r = self('get', 'pulls/{}/commits'.format(pr), params={'per_page': PR_COMMITS_MAX}) + assert not r.links.get('next'), "more than {} commits".format(PR_COMMITS_MAX) + return r.json() + + def statuses(self, h): + r = self('get', 'commits/{}/status'.format(h)).json() + return [{ + 'sha': r['sha'], + 'context': s['context'], + 'state': s['state'], + } for s in r['statuses']] + +PR_COMMITS_MAX = 50 diff --git a/runbot_merge/models/__init__.py b/runbot_merge/models/__init__.py new file mode 100644 index 00000000..c9cf1cb9 --- /dev/null +++ b/runbot_merge/models/__init__.py @@ -0,0 +1,2 @@ +from . import res_partner +from . import pull_requests diff --git a/runbot_merge/models/pull_requests.py b/runbot_merge/models/pull_requests.py new file mode 100644 index 00000000..a8f10cd4 --- /dev/null +++ b/runbot_merge/models/pull_requests.py @@ -0,0 +1,974 @@ +import collections +import datetime +import json +import logging +import pprint +import re + +from itertools import takewhile + +from odoo import api, fields, models, tools +from odoo.exceptions import ValidationError + +from .. import github, exceptions, controllers + +_logger = logging.getLogger(__name__) +class Project(models.Model): + _name = 'runbot_merge.project' + + name = fields.Char(required=True, index=True) + repo_ids = fields.One2many( + 'runbot_merge.repository', 'project_id', + help="Repos included in that project, they'll be staged together. "\ + "*Not* to be used for cross-repo dependencies (that is to be handled by the CI)" + ) + branch_ids = fields.One2many( + 'runbot_merge.branch', 'project_id', + help="Branches of all project's repos which are managed by the merge bot. Also "\ + "target branches of PR this project handles." + ) + + required_statuses = fields.Char( + help="Comma-separated list of status contexts which must be "\ + "`success` for a PR or staging to be valid", + default='legal/cla,ci/runbot' + ) + ci_timeout = fields.Integer( + default=60, required=True, + help="Delay (in minutes) before a staging is considered timed out and failed" + ) + + github_token = fields.Char("Github Token", required=True) + github_prefix = fields.Char( + required=True, + default="hanson", # mergebot du bot du bot du~ + help="Prefix (~bot name) used when sending commands from PR " + "comments e.g. [hanson retry] or [hanson r+ p=1]" + ) + + batch_limit = fields.Integer( + default=8, help="Maximum number of PRs staged together") + + secret = fields.Char( + help="Webhook secret. If set, will be checked against the signature " + "of (valid) incoming webhook signatures, failing signatures " + "will lead to webhook rejection. Should only use ASCII." + ) + + def _check_progress(self): + logger = _logger.getChild('cron') + Batch = self.env['runbot_merge.batch'] + PRs = self.env['runbot_merge.pull_requests'] + for project in self.search([]): + gh = {repo.name: repo.github() for repo in project.repo_ids} + # check status of staged PRs + for staging in project.mapped('branch_ids.active_staging_id'): + logger.info( + "Checking active staging %s (state=%s)", + staging, staging.state + ) + if staging.state == 'success': + old_heads = { + n: g.head(staging.target.name) + for n, g in gh.items() + } + repo_name = None + staging_heads = json.loads(staging.heads) + updated = [] + try: + for repo_name, head in staging_heads.items(): + gh[repo_name].fast_forward( + staging.target.name, + head + ) + updated.append(repo_name) + except exceptions.FastForwardError: + logger.warning( + "Could not fast-forward successful staging on %s:%s, reverting updated repos %s and re-staging", + repo_name, staging.target.name, + ', '.join(updated), + exc_info=True + ) + for name in reversed(updated): + gh[name].set_ref(staging.target.name, old_heads[name]) + else: + prs = staging.mapped('batch_ids.prs') + logger.info( + "%s FF successful, marking %s as merged", + staging, prs + ) + prs.write({'state': 'merged'}) + for pr in prs: + # FIXME: this is the staging head rather than the actual merge commit for the PR + gh[pr.repository.name].close(pr.number, 'Merged in {}'.format(staging_heads[pr.repository.name])) + finally: + staging.batch_ids.write({'active': False}) + staging.write({'active': False}) + elif staging.state == 'failure' or project.is_timed_out(staging): + staging.try_splitting() + # else let flow + + # check for stageable branches/prs + for branch in project.branch_ids: + logger.info( + "Checking %s (%s) for staging: %s, skip? %s", + branch, branch.name, + branch.active_staging_id, + bool(branch.active_staging_id) + ) + if branch.active_staging_id: + continue + + # noinspection SqlResolve + self.env.cr.execute(""" + SELECT + min(pr.priority) as priority, + array_agg(pr.id) AS match + FROM runbot_merge_pull_requests pr + LEFT JOIN runbot_merge_batch batch ON pr.batch_id = batch.id AND batch.active + WHERE pr.target = %s + -- exclude terminal states (so there's no issue when + -- deleting branches & reusing labels) + AND pr.state != 'merged' + AND pr.state != 'closed' + GROUP BY pr.label + HAVING (bool_or(pr.priority = 0) AND NOT bool_or(pr.state = 'error')) + OR bool_and(pr.state = 'ready') + ORDER BY min(pr.priority), min(pr.id) + """, [branch.id]) + # result: [(priority, [(repo_id, pr_id) for repo in repos] + rows = self.env.cr.fetchall() + priority = rows[0][0] if rows else -1 + if priority == 0: + # p=0 take precedence over all else + batched_prs = [ + PRs.browse(pr_ids) + for _, pr_ids in takewhile(lambda r: r[0] == priority, rows) + ] + elif branch.split_ids: + split_ids = branch.split_ids[0] + logger.info("Found split of PRs %s, re-staging", split_ids.mapped('batch_ids.prs')) + batched_prs = [batch.prs for batch in split_ids.batch_ids] + split_ids.unlink() + elif rows: + # p=1 or p=2 + batched_prs = [PRs.browse(pr_ids) for _, pr_ids in takewhile(lambda r: r[0] == priority, rows)] + else: + continue + + staged = Batch + meta = {repo: {} for repo in project.repo_ids} + for repo, it in meta.items(): + gh = it['gh'] = repo.github() + it['head'] = gh.head(branch.name) + # create tmp staging branch + gh.set_ref('tmp.{}'.format(branch.name), it['head']) + + batch_limit = project.batch_limit + for batch in batched_prs: + if len(staged) >= batch_limit: + break + staged |= Batch.stage(meta, batch) + + if staged: + # create actual staging object + st = self.env['runbot_merge.stagings'].create({ + 'target': branch.id, + 'batch_ids': [(4, batch.id, 0) for batch in staged], + 'heads': json.dumps({ + repo.name: it['head'] + for repo, it in meta.items() + }) + }) + # create staging branch from tmp + for r, it in meta.items(): + it['gh'].set_ref('staging.{}'.format(branch.name), it['head']) + + # creating the staging doesn't trigger a write on the prs + # and thus the ->staging taggings, so do that by hand + Tagging = self.env['runbot_merge.pull_requests.tagging'] + for pr in st.mapped('batch_ids.prs'): + Tagging.create({ + 'pull_request': pr.number, + 'repository': pr.repository.id, + 'state_from': pr._tagstate, + 'state_to': 'staged', + }) + + logger.info("Created staging %s (%s)", st, staged) + + Repos = self.env['runbot_merge.repository'] + ghs = {} + # noinspection SqlResolve + self.env.cr.execute(""" + SELECT + t.repository as repo_id, + t.pull_request as pr_number, + array_agg(t.id) as ids, + (array_agg(t.state_from ORDER BY t.id))[1] as state_from, + (array_agg(t.state_to ORDER BY t.id DESC))[1] as state_to + FROM runbot_merge_pull_requests_tagging t + GROUP BY t.repository, t.pull_request + """) + to_remove = [] + for repo_id, pr, ids, from_, to_ in self.env.cr.fetchall(): + repo = Repos.browse(repo_id) + from_tags = _TAGS[from_ or False] + to_tags = _TAGS[to_ or False] + + gh = ghs.get(repo) + if not gh: + gh = ghs[repo] = repo.github() + + try: + gh.change_tags(pr, from_tags, to_tags) + except Exception: + _logger.exception( + "Error while trying to change the tags of %s:%s from %s to %s", + repo.name, pr, from_tags, to_tags, + ) + else: + to_remove.extend(ids) + self.env['runbot_merge.pull_requests.tagging'].browse(to_remove).unlink() + + def is_timed_out(self, staging): + return fields.Datetime.from_string(staging.staged_at) + datetime.timedelta(minutes=self.ci_timeout) < datetime.datetime.now() + + def _check_fetch(self, commit=False): + """ + :param bool commit: commit after each fetch has been executed + """ + while True: + f = self.env['runbot_merge.fetch_job'].search([], limit=1) + if not f: + return + + f.repository._load_pr(f.number) + + # commit after each fetched PR + f.active = False + if commit: + self.env.cr.commit() + + def _find_commands(self, comment): + return re.findall( + '^{}:? (.*)$'.format(self.github_prefix), + comment, re.MULTILINE) + + def _has_branch(self, name): + self.env.cr.execute(""" + SELECT 1 FROM runbot_merge_branch + WHERE project_id = %s AND name = %s + LIMIT 1 + """, (self.id, name)) + return bool(self.env.cr.rowcount) + +class Repository(models.Model): + _name = 'runbot_merge.repository' + + name = fields.Char(required=True) + project_id = fields.Many2one('runbot_merge.project', required=True) + + def github(self): + return github.GH(self.project_id.github_token, self.name) + + def _auto_init(self): + res = super(Repository, self)._auto_init() + tools.create_unique_index( + self._cr, 'runbot_merge_unique_repo', self._table, ['name']) + return res + + def _load_pr(self, number): + gh = self.github() + + # fetch PR object and handle as *opened* + issue, pr = gh.pr(number) + + if not self.project_id._has_branch(pr['base']['ref']): + _logger.info("Tasked with loading PR %d for un-managed branch %s, ignoring", pr['number'], pr['base']['ref']) + return + + controllers.handle_pr(self.env, { + 'action': 'opened', + 'pull_request': pr, + }) + for st in gh.statuses(pr['head']['sha']): + controllers.handle_status(self.env, st) + # get and handle all comments + for comment in gh.comments(number): + controllers.handle_comment(self.env, { + 'issue': issue, + 'sender': comment['user'], + 'comment': comment, + 'repository': {'full_name': self.name}, + }) + # get and handle all reviews + for review in gh.reviews(number): + controllers.handle_review(self.env, { + 'review': review, + 'pull_request': pr, + 'repository': {'full_name': self.name}, + }) + +class Branch(models.Model): + _name = 'runbot_merge.branch' + + name = fields.Char(required=True) + project_id = fields.Many2one('runbot_merge.project', required=True) + + active_staging_id = fields.Many2one( + 'runbot_merge.stagings', compute='_compute_active_staging', store=True, + help="Currently running staging for the branch." + ) + staging_ids = fields.One2many('runbot_merge.stagings', 'target') + split_ids = fields.One2many('runbot_merge.split', 'target') + + prs = fields.One2many('runbot_merge.pull_requests', 'target', domain=[ + ('state', '!=', 'closed'), + ('state', '!=', 'merged'), + ]) + + def _auto_init(self): + res = super(Branch, self)._auto_init() + tools.create_unique_index( + self._cr, 'runbot_merge_unique_branch_per_repo', + self._table, ['name', 'project_id']) + return res + + @api.depends('staging_ids.active') + def _compute_active_staging(self): + for b in self: + b.active_staging_id = b.staging_ids + +class PullRequests(models.Model): + _name = 'runbot_merge.pull_requests' + _order = 'number desc' + + target = fields.Many2one('runbot_merge.branch', required=True) + repository = fields.Many2one('runbot_merge.repository', required=True) + # NB: check that target & repo have same project & provide project related? + + state = fields.Selection([ + ('opened', 'Opened'), + ('closed', 'Closed'), + ('validated', 'Validated'), + ('approved', 'Approved'), + ('ready', 'Ready'), + # staged? + ('merged', 'Merged'), + ('error', 'Error'), + ], default='opened') + + number = fields.Integer(required=True, index=True) + author = fields.Many2one('res.partner') + head = fields.Char(required=True) + label = fields.Char( + required=True, index=True, + help="Label of the source branch (owner:branchname), used for " + "cross-repository branch-matching" + ) + message = fields.Text(required=True) + squash = fields.Boolean(default=False) + rebase = fields.Boolean(default=True) + + delegates = fields.Many2many('res.partner', help="Delegate reviewers, not intrinsically reviewers but can review this PR") + priority = fields.Selection([ + (0, 'Urgent'), + (1, 'Pressing'), + (2, 'Normal'), + ], default=2, index=True) + + statuses = fields.Text(compute='_compute_statuses') + + batch_id = fields.Many2one('runbot_merge.batch',compute='_compute_active_batch', store=True) + batch_ids = fields.Many2many('runbot_merge.batch') + staging_id = fields.Many2one(related='batch_id.staging_id', store=True) + + @api.depends('head') + def _compute_statuses(self): + Commits = self.env['runbot_merge.commit'] + for s in self: + c = Commits.search([('sha', '=', s.head)]) + if c and c.statuses: + s.statuses = pprint.pformat(json.loads(c.statuses)) + + @api.depends('batch_ids.active') + def _compute_active_batch(self): + for r in self: + r.batch_id = r.batch_ids.filtered(lambda b: b.active)[:1] + + def _get_or_schedule(self, repo_name, number, target=None): + repo = self.env['runbot_merge.repository'].search([('name', '=', repo_name)]) + if not repo: + return + + if target and not repo.project_id._has_branch(target): + return + + pr = self.search([ + ('repository', '=', repo.id), + ('number', '=', number,) + ]) + if pr: + return pr + + Fetch = self.env['runbot_merge.fetch_job'] + if Fetch.search([('repository', '=', repo.id), ('number', '=', number)]): + return + Fetch.create({ + 'repository': repo.id, + 'number': number, + }) + + def _parse_command(self, commandstring): + m = re.match(r'(\w+)(?:([+-])|=(.*))?', commandstring) + if not m: + return None + + name, flag, param = m.groups() + if name == 'retry': + return ('retry', True) + elif name in ('r', 'review'): + if flag == '+': + return ('review', True) + elif flag == '-': + return ('review', False) + elif name == 'delegate': + if flag == '+': + return ('delegate', True) + elif param: + return ('delegate', param.split(',')) + elif name in ('p', 'priority'): + if param in ('0', '1', '2'): + return ('priority', int(param)) + elif name == 'rebase': + return ('rebase', flag != '-') + + return None + + def _parse_commands(self, author, comment): + """Parses a command string prefixed by Project::github_prefix. + + A command string can contain any number of space-separated commands: + + retry + resets a PR in error mode to ready for staging + r(eview)+/- + approves or disapproves a PR (disapproving just cancels an approval) + delegate+/delegate= + adds either PR author or the specified (github) users as + authorised reviewers for this PR. ```` is a + comma-separated list of github usernames (no @) + p(riority)=2|1|0 + sets the priority to normal (2), pressing (1) or urgent (0). + Lower-priority PRs are selected first and batched together. + rebase+/- + Whether the PR should be rebased-and-merged (the default) or just + merged normally. + """ + assert self, "parsing commands must be executed in an actual PR" + + is_admin = (author.reviewer and self.author != author) or (author.self_reviewer and self.author == author) + is_reviewer = is_admin or self in author.delegate_reviewer + # TODO: should delegate reviewers be able to retry PRs? + is_author = is_reviewer or self.author == author + + if not is_author: + # no point even parsing commands + _logger.info("ignoring comment of %s (%s): no ACL to %s:%s", + author.github_login, author.display_name, + self.repository.name, self.number) + return 'ignored' + + commands = dict( + ps + for m in self.repository.project_id._find_commands(comment) + for c in m.strip().split() + for ps in [self._parse_command(c)] + if ps is not None + ) + + if not commands: + _logger.info("found no commands in comment of %s (%s) (%s%s)", author.github_login, author.display_name, + comment[:50], '...' if len(comment) > 50 else '' + ) + return 'ok' + + applied, ignored = [], [] + for command, param in commands.items(): + ok = False + if command == 'retry': + if is_author and self.state == 'error': + ok = True + self.state = 'ready' + elif command == 'review': + if param and is_reviewer: + if self.state == 'opened': + ok = True + self.state = 'approved' + elif self.state == 'validated': + ok = True + self.state = 'ready' + elif not param and is_author and self.state == 'error': + # TODO: r- on something which isn't in error? + ok = True + self.state = 'validated' + elif command == 'delegate': + if is_reviewer: + ok = True + Partners = delegates = self.env['res.partner'] + if param is True: + delegates |= self.author + else: + for login in param: + delegates |= Partners.search([('github_login', '=', login)]) or Partners.create({ + 'name': login, + 'github_login': login, + }) + delegates.write({'delegate_reviewer': [(4, self.id, 0)]}) + elif command == 'priority': + if is_admin: + ok = True + self.priority = param + if param == 0: + self.target.active_staging_id.cancel( + "P=0 on %s:%s by %s, unstaging %s", + self.repository.name, self.number, + author.github_login, self.target.name, + ) + elif command == 'rebase': + # anyone can rebase- their PR I guess? + self.rebase = param + + _logger.info( + "%s %s(%s) on %s:%s by %s (%s)", + "applied" if ok else "ignored", + command, param, + self.repository.name, self.number, + author.github_login, author.display_name, + ) + if ok: + applied.append('{}({})'.format(command, param)) + else: + ignored.append('{}({})'.format(command, param)) + msg = [] + if applied: + msg.append('applied ' + ' '.join(applied)) + if ignored: + msg.append('ignored ' + ' '.join(ignored)) + return '\n'.join(msg) + + def _validate(self, statuses): + # could have two PRs (e.g. one open and one closed) at least + # temporarily on the same head, or on the same head with different + # targets + for pr in self: + required = pr.repository.project_id.required_statuses.split(',') + if all(statuses.get(r.strip()) == 'success' for r in required): + oldstate = pr.state + if oldstate == 'opened': + pr.state = 'validated' + elif oldstate == 'approved': + pr.state = 'ready' + + # _logger.info("CI+ (%s) for PR %s:%s: %s -> %s", + # statuses, pr.repository.name, pr.number, oldstate, pr.state) + # else: + # _logger.info("CI- (%s) for PR %s:%s", statuses, pr.repository.name, pr.number) + + def _auto_init(self): + res = super(PullRequests, self)._auto_init() + tools.create_unique_index( + self._cr, 'runbot_merge_unique_pr_per_target', self._table, ['number', 'target', 'repository']) + self._cr.execute("CREATE INDEX IF NOT EXISTS runbot_merge_pr_head " + "ON runbot_merge_pull_requests " + "USING hash (head)") + return res + + @property + def _tagstate(self): + if self.state == 'ready' and self.staging_id.heads: + return 'staged' + return self.state + + @api.model + def create(self, vals): + pr = super().create(vals) + c = self.env['runbot_merge.commit'].search([('sha', '=', pr.head)]) + if c and c.statuses: + pr._validate(json.loads(c.statuses)) + + if pr.state not in ('closed', 'merged'): + self.env['runbot_merge.pull_requests.tagging'].create({ + 'pull_request': pr.number, + 'repository': pr.repository.id, + 'state_from': False, + 'state_to': pr._tagstate, + }) + return pr + + @api.multi + def write(self, vals): + oldstate = { pr: pr._tagstate for pr in self } + w = super().write(vals) + for pr in self: + before, after = oldstate[pr], pr._tagstate + if after != before: + self.env['runbot_merge.pull_requests.tagging'].create({ + 'pull_request': pr.number, + 'repository': pr.repository.id, + 'state_from': oldstate[pr], + 'state_to': pr._tagstate, + }) + return w + + @api.multi + def unlink(self): + for pr in self: + self.env['runbot_merge.pull_requests.tagging'].create({ + 'pull_request': pr.number, + 'repository': pr.repository.id, + 'state_from': pr._tagstate, + 'state_to': False, + }) + return super().unlink() + + +_TAGS = { + False: set(), + 'opened': {'seen 🙂'}, +} +_TAGS['validated'] = _TAGS['opened'] | {'CI 🤖'} +_TAGS['approved'] = _TAGS['opened'] | {'r+ 👌'} +_TAGS['ready'] = _TAGS['validated'] | _TAGS['approved'] +_TAGS['staged'] = _TAGS['ready'] | {'merging 👷'} +_TAGS['merged'] = _TAGS['ready'] | {'merged 🎉'} +_TAGS['error'] = _TAGS['opened'] | {'error 🙅'} +_TAGS['closed'] = _TAGS['opened'] | {'closed 💔'} + +class Tagging(models.Model): + """ + Queue of tag changes to make on PRs. + + Several PR state changes are driven by webhooks, webhooks should return + quickly, performing calls to the Github API would *probably* get in the + way of that. Instead, queue tagging changes into this table whose + execution can be cron-driven. + """ + _name = 'runbot_merge.pull_requests.tagging' + + repository = fields.Many2one('runbot_merge.repository', required=True) + # store the PR number (not id) as we need a Tagging for PR objects + # being deleted (retargeted to non-managed branches) + pull_request = fields.Integer() + + state_from = fields.Selection([ + ('opened', 'Opened'), + ('closed', 'Closed'), + ('validated', 'Validated'), + ('approved', 'Approved'), + ('ready', 'Ready'), + ('staged', 'Staged'), + ('merged', 'Merged'), + ('error', 'Error'), + ]) + state_to = fields.Selection([ + ('opened', 'Opened'), + ('closed', 'Closed'), + ('validated', 'Validated'), + ('approved', 'Approved'), + ('ready', 'Ready'), + ('staged', 'Staged'), + ('merged', 'Merged'), + ('error', 'Error'), + ]) + +class Commit(models.Model): + """Represents a commit onto which statuses might be posted, + independent of everything else as commits can be created by + statuses only, by PR pushes, by branch updates, ... + """ + _name = 'runbot_merge.commit' + + sha = fields.Char(required=True) + statuses = fields.Char(help="json-encoded mapping of status contexts to states", default="{}") + + def create(self, values): + r = super(Commit, self).create(values) + r._notify() + return r + + def write(self, values): + r = super(Commit, self).write(values) + self._notify() + return r + + # NB: GH recommends doing heavy work asynchronously, may be a good + # idea to defer this to a cron or something + def _notify(self): + Stagings = self.env['runbot_merge.stagings'] + PRs = self.env['runbot_merge.pull_requests'] + # chances are low that we'll have more than one commit + for c in self: + st = json.loads(c.statuses) + pr = PRs.search([('head', '=', c.sha)]) + if pr: + pr._validate(st) + # heads is a json-encoded mapping of reponame:head, so chances + # are if a sha matches a heads it's matching one of the shas + stagings = Stagings.search([('heads', 'ilike', c.sha)]) + if stagings: + stagings._validate() + + _sql_constraints = [ + ('unique_sha', 'unique (sha)', 'no duplicated commit'), + ] + + def _auto_init(self): + res = super(Commit, self)._auto_init() + self._cr.execute(""" + CREATE INDEX IF NOT EXISTS runbot_merge_unique_statuses + ON runbot_merge_commit + USING hash (sha) + """) + return res + +class Stagings(models.Model): + _name = 'runbot_merge.stagings' + + target = fields.Many2one('runbot_merge.branch', required=True) + + batch_ids = fields.One2many( + 'runbot_merge.batch', 'staging_id', + ) + state = fields.Selection([ + ('success', 'Success'), + ('failure', 'Failure'), + ('pending', 'Pending'), + ]) + active = fields.Boolean(default=True) + + staged_at = fields.Datetime(default=fields.Datetime.now) + restaged = fields.Integer(default=0) + + # seems simpler than adding yet another indirection through a model + heads = fields.Char(required=True, help="JSON-encoded map of heads, one per repo in the project") + + def _validate(self): + Commits = self.env['runbot_merge.commit'] + for s in self: + heads = list(json.loads(s.heads).values()) + commits = Commits.search([ + ('sha', 'in', heads) + ]) + if len(commits) < len(heads): + s.state = 'pending' + continue + + reqs = [r.strip() for r in s.target.project_id.required_statuses.split(',')] + st = 'success' + for c in commits: + statuses = json.loads(c.statuses) + for v in map(statuses.get, reqs): + if st == 'failure' or v in ('error', 'failure'): + st = 'failure' + elif v in (None, 'pending'): + st = 'pending' + else: + assert v == 'success' + s.state = st + + def cancel(self, reason, *args): + if not self: + return + + _logger.info(reason, *args) + self.batch_ids.write({'active': False}) + self.active = False + + def fail(self, message, prs=None): + _logger.error("Staging %s failed: %s", self, message) + prs = prs or self.batch_ids.prs + prs.write({'state': 'error'}) + for pr in prs: + pr.repository.github().comment( + pr.number, "Staging failed: %s" % message) + + self.batch_ids.write({'active': False}) + self.active = False + + def try_splitting(self): + batches = len(self.batch_ids) + if batches > 1: + midpoint = batches // 2 + h, t = self.batch_ids[:midpoint], self.batch_ids[midpoint:] + # NB: batches remain attached to their original staging + sh = self.env['runbot_merge.split'].create({ + 'target': self.target.id, + 'batch_ids': [(4, batch.id, 0) for batch in h], + }) + st = self.env['runbot_merge.split'].create({ + 'target': self.target.id, + 'batch_ids': [(4, batch.id, 0) for batch in t], + }) + _logger.info("Split %s to %s (%s) and %s (%s)", + self, h, sh, t, st) + self.batch_ids.write({'active': False}) + self.active = False + return True + + # single batch => the staging is an unredeemable failure + if self.state != 'failure': + # timed out, just mark all PRs (wheee) + self.fail('timed out (>{} minutes)'.format(self.target.project_id.ci_timeout)) + return False + + # try inferring which PR failed and only mark that one + for repo, head in json.loads(self.heads).items(): + commit = self.env['runbot_merge.commit'].search([ + ('sha', '=', head) + ]) + reason = next(( + ctx for ctx, result in json.loads(commit.statuses).items() + if result in ('error', 'failure') + ), None) + if not reason: + continue + + pr = next(( + pr for pr in self.batch_ids.prs + if pr.repository.name == repo + ), None) + if pr: + self.fail(reason, pr) + return False + + # the staging failed but we don't have a specific culprit, fail + # everything + self.fail("unknown reason") + + return False + +class Split(models.Model): + _name = 'runbot_merge.split' + + target = fields.Many2one('runbot_merge.branch', required=True) + batch_ids = fields.One2many('runbot_merge.batch', 'split_id', context={'active_test': False}) + +class Batch(models.Model): + """ A batch is a "horizontal" grouping of *codependent* PRs: PRs with + the same label & target but for different repositories. These are + assumed to be part of the same "change" smeared over multiple + repositories e.g. change an API in repo1, this breaks use of that API + in repo2 which now needs to be updated. + """ + _name = 'runbot_merge.batch' + + target = fields.Many2one('runbot_merge.branch', required=True) + staging_id = fields.Many2one('runbot_merge.stagings') + split_id = fields.Many2one('runbot_merge.split') + + prs = fields.Many2many('runbot_merge.pull_requests') + + active = fields.Boolean(default=True) + + @api.constrains('target', 'prs') + def _check_prs(self): + for batch in self: + repos = self.env['runbot_merge.repository'] + for pr in batch.prs: + if pr.target != batch.target: + raise ValidationError("A batch and its PRs must have the same branch, got %s and %s" % (batch.target, pr.target)) + if pr.repository in repos: + raise ValidationError("All prs of a batch must have different target repositories, got a duplicate %s on %s" % (pr.repository, pr)) + repos |= pr.repository + + def stage(self, meta, prs): + """ + Updates meta[*][head] on success + + :return: () or Batch object (if all prs successfully staged) + """ + new_heads = {} + for pr in prs: + gh = meta[pr.repository]['gh'] + + _logger.info( + "Staging pr %s:%s for target %s; squash=%s", + pr.repository.name, pr.number, pr.target.name, pr.squash + ) + + target = 'tmp.{}'.format(pr.target.name) + suffix = '\n\ncloses {pr.repository.name}#{pr.number}'.format(pr=pr) + try: + # nb: pr_commits is oldest to newest so pr.head is pr_commits[-1] + pr_commits = gh.commits(pr.number) + rebase_and_merge = pr.rebase + squash = rebase_and_merge and len(pr_commits) == 1 + if squash: + pr_commits[0]['commit']['message'] += suffix + new_heads[pr] = gh.rebase(pr.number, target, commits=pr_commits) + elif rebase_and_merge: + msg = pr.message + suffix + h = gh.rebase(pr.number, target, reset=True, commits=pr_commits) + new_heads[pr] = gh.merge(h, target, msg)['sha'] + else: + pr_head = pr_commits[-1] # pr_commits is oldest to newest + base_commit = None + head_parents = {p['sha'] for p in pr_head['parents']} + if len(head_parents) > 1: + # look for parent(s?) of pr_head not in PR, means it's + # from target (so we merged target in pr) + merge = head_parents - {c['sha'] for c in pr_commits} + assert len(merge) <= 1, \ + ">1 parent from base in PR's head is not supported" + if len(merge) == 1: + [base_commit] = merge + + if base_commit: + # replicate pr_head with base_commit replaced by + # the current head + original_head = gh.head(target) + merge_tree = gh.merge(pr_head['sha'], target, 'temp merge')['tree']['sha'] + new_parents = [original_head] + list(head_parents - {base_commit}) + copy = gh('post', 'git/commits', json={ + 'message': pr_head['commit']['message'] + suffix, + 'tree': merge_tree, + 'author': pr_head['commit']['author'], + 'committer': pr_head['commit']['committer'], + 'parents': new_parents, + }).json() + gh.set_ref(target, copy['sha']) + new_heads[pr] = copy['sha'] + else: + # otherwise do a regular merge + msg = pr.message + suffix + new_heads[pr] = gh.merge(pr.head, target, msg)['sha'] + except (exceptions.MergeError, AssertionError) as e: + _logger.exception("Failed to merge %s:%s into staging branch (error: %s)", pr.repository.name, pr.number, e) + pr.state = 'error' + gh.comment(pr.number, "Unable to stage PR (merge conflict)") + + # reset other PRs + for to_revert in new_heads.keys(): + it = meta[to_revert.repository] + it['gh'].set_ref('tmp.{}'.format(to_revert.target.name), it['head']) + + return self.env['runbot_merge.batch'] + + # update meta to new heads + for pr, head in new_heads.items(): + meta[pr.repository]['head'] = head + if not self.env['runbot_merge.commit'].search([('sha', '=', head)]): + self.env['runbot_merge.commit'].create({'sha': head}) + return self.create({ + 'target': prs[0].target.id, + 'prs': [(4, pr.id, 0) for pr in prs], + }) + +class FetchJob(models.Model): + _name = 'runbot_merge.fetch_job' + + active = fields.Boolean(default=True) + repository = fields.Many2one('runbot_merge.repository', index=True, required=True) + number = fields.Integer(index=True, required=True) diff --git a/runbot_merge/models/res_partner.py b/runbot_merge/models/res_partner.py new file mode 100644 index 00000000..f3aeed2c --- /dev/null +++ b/runbot_merge/models/res_partner.py @@ -0,0 +1,15 @@ +from odoo import fields, models, tools + +class Partner(models.Model): + _inherit = 'res.partner' + + github_login = fields.Char() + reviewer = fields.Boolean(default=False, help="Can review PRs (maybe m2m to repos/branches?)") + self_reviewer = fields.Boolean(default=False, help="Can review own PRs (independent from reviewer)") + delegate_reviewer = fields.Many2many('runbot_merge.pull_requests') + + def _auto_init(self): + res = super(Partner, self)._auto_init() + tools.create_unique_index( + self._cr, 'runbot_merge_unique_gh_login', self._table, ['github_login']) + return res diff --git a/runbot_merge/security/ir.model.access.csv b/runbot_merge/security/ir.model.access.csv new file mode 100644 index 00000000..7618b609 --- /dev/null +++ b/runbot_merge/security/ir.model.access.csv @@ -0,0 +1,15 @@ +id,name,model_id:id,group_id:id,perm_read,perm_write,perm_create,perm_unlink +access_runbot_merge_project_admin,Admin access to project,model_runbot_merge_project,runbot_merge.group_admin,1,1,1,1 +access_runbot_merge_repository_admin,Admin access to repo,model_runbot_merge_repository,runbot_merge.group_admin,1,1,1,1 +access_runbot_merge_branch_admin,Admin access to branches,model_runbot_merge_branch,runbot_merge.group_admin,1,1,1,1 +access_runbot_merge_pull_requests_admin,Admin access to PR,model_runbot_merge_pull_requests,runbot_merge.group_admin,1,1,1,1 +access_runbot_merge_pull_requests_tagging_admin,Admin access to tagging,model_runbot_merge_pull_requests_tagging,runbot_merge.group_admin,1,1,1,1 +access_runbot_merge_commit_admin,Admin access to commits,model_runbot_merge_commit,runbot_merge.group_admin,1,1,1,1 +access_runbot_merge_stagings_admin,Admin access to stagings,model_runbot_merge_stagings,runbot_merge.group_admin,1,1,1,1 +access_runbot_merge_split_admin,Admin access to splits,model_runbot_merge_split,runbot_merge.group_admin,1,1,1,1 +access_runbot_merge_batch_admin,Admin access to batches,model_runbot_merge_batch,runbot_merge.group_admin,1,1,1,1 +access_runbot_merge_fetch_job_admin,Admin access to fetch jobs,model_runbot_merge_fetch_job,runbot_merge.group_admin,1,1,1,1 +access_runbot_merge_project,User access to project,model_runbot_merge_project,base.group_user,1,0,0,0 +access_runbot_merge_repository,User access to repo,model_runbot_merge_repository,base.group_user,1,0,0,0 +access_runbot_merge_branch,User access to branches,model_runbot_merge_branch,base.group_user,1,0,0,0 +access_runbot_merge_pull_requests,User access to PR,model_runbot_merge_pull_requests,base.group_user,1,0,0,0 diff --git a/runbot_merge/security/security.xml b/runbot_merge/security/security.xml new file mode 100644 index 00000000..62e1f323 --- /dev/null +++ b/runbot_merge/security/security.xml @@ -0,0 +1,8 @@ + + + Mergebot Administrator + + + + + diff --git a/runbot_merge/tests/README.rst b/runbot_merge/tests/README.rst new file mode 100644 index 00000000..0671b9be --- /dev/null +++ b/runbot_merge/tests/README.rst @@ -0,0 +1,47 @@ +Execute this test suite using pytest. + +The default mode is to run tests locally using a mock github.com. + +See the docstring of remote.py for instructions to run against github "actual" +(including remote-specific options) and the end of this file for a sample. + +Shared properties running tests, regardless of the github implementation: + +* test should be run from the root of the runbot repository providing the + name of this module aka ``pytest runbot_merge`` or + ``python -mpytest runbot_merge`` +* a database name to use must be provided using ``--db``, the database should + not exist beforehand +* the addons path must be specified using ``--addons-path``, both "runbot" and + the standard addons (odoo/addons) must be provided explicitly + +See pytest's documentation for other options, I would recommend ``-rXs``, +``-v`` and ``--showlocals``. + +When running "remote" tests as they take a very long time (hours) ``-x`` +(aka ``--maxfail=1``) and ``--ff`` (run previously failed first) is also +recommended unless e.g. you run the tests overnight. + +``pytest.ini`` sample +--------------------- + +.. code:: ini + + [github] + owner = test-org + token = aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + + [role_reviewer] + name = Dick Bong + user = loginb + token = bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb + + [role_self_reviewer] + name = Fanny Chmelar + user = loginc + token = cccccccccccccccccccccccccccccccccccccccc + + [role_other] + name = Harry Baals + user = logind + token = dddddddddddddddddddddddddddddddddddddddd diff --git a/runbot_merge/tests/conftest.py b/runbot_merge/tests/conftest.py new file mode 100644 index 00000000..feee06cc --- /dev/null +++ b/runbot_merge/tests/conftest.py @@ -0,0 +1,5 @@ +pytest_plugins = ["local"] + +def pytest_addoption(parser): + parser.addoption("--db", action="store", help="Odoo DB to run tests with") + parser.addoption('--addons-path', action='store', help="Odoo's addons path") diff --git a/runbot_merge/tests/fake_github/__init__.py b/runbot_merge/tests/fake_github/__init__.py new file mode 100644 index 00000000..2faf54cf --- /dev/null +++ b/runbot_merge/tests/fake_github/__init__.py @@ -0,0 +1,748 @@ +import collections +import hashlib +import hmac +import io +import json +import logging +import re + +import responses +import werkzeug.urls +import werkzeug.test +import werkzeug.wrappers + +from . import git + +API_PATTERN = re.compile( + r'https://api.github.com/repos/(?P\w+/\w+)/(?P.+)' +) +class APIResponse(responses.BaseResponse): + def __init__(self, sim): + super(APIResponse, self).__init__( + method=None, + url=API_PATTERN + ) + self.sim = sim + self.content_type = 'application/json' + self.stream = False + + def matches(self, request): + return self._url_matches(self.url, request.url, self.match_querystring) + + def get_response(self, request): + m = self.url.match(request.url) + + (status, r) = self.sim.repos[m.group('repo')].api(m.group('path'), request) + + headers = self.get_headers() + body = io.BytesIO(b'') + if r is not None: + body = io.BytesIO(json.dumps(r).encode('utf-8')) + + return responses.HTTPResponse( + status=status, + reason=r.get('message') if isinstance(r, dict) else "bollocks", + body=body, + headers=headers, + preload_content=False, ) + +class Github(object): + """ Github simulator + + When enabled (by context-managing): + + * intercepts all ``requests`` calls & replies to api.github.com + * sends relevant hooks (registered per-repo as pairs of WSGI app and URL) + * stores repo content + """ + def __init__(self): + # {repo: {name, issues, objects, refs, hooks}} + self.repos = {} + + def repo(self, name, hooks=()): + r = self.repos[name] = Repo(name) + for hook, events in hooks: + r.hook(hook, events) + return self.repos[name] + + def __enter__(self): + # otherwise swallows errors from within the test + self._requests = responses.RequestsMock(assert_all_requests_are_fired=False).__enter__() + self._requests.add(APIResponse(self)) + return self + + def __exit__(self, *args): + return self._requests.__exit__(*args) + +class Repo(object): + def __init__(self, name): + self.name = name + self.issues = {} + #: we're cheating, instead of storing serialised in-memory + #: objects we're storing the Python stuff directly, Commit + #: objects for commits, {str: hash} for trees and bytes for + #: blobs. We're still indirecting via hashes and storing a + #: h:o map because going through the API probably requires it + self.objects = {} + # branches: refs/heads/* + # PRs: refs/pull/* + self.refs = {} + # {event: (wsgi_app, url)} + self.hooks = collections.defaultdict(list) + + def hook(self, hook, events): + for event in events: + self.hooks[event].append(Client(*hook)) + + def notify(self, event_type, *payload): + for client in self.hooks.get(event_type, []): + getattr(client, event_type)(*payload) + + def set_secret(self, secret): + for clients in self.hooks.values(): + for client in clients: + client.secret = secret + + def issue(self, number): + return self.issues[number] + + def make_issue(self, title, body): + return Issue(self, title, body) + + def make_pr(self, title, body, target, ctid, user, label=None): + assert 'heads/%s' % target in self.refs + return PR(self, title, body, target, ctid, user=user, label='{}:{}'.format(user, label or target)) + + def make_ref(self, name, commit, force=False): + assert isinstance(self.objects[commit], Commit) + if not force and name in self.refs: + raise ValueError("ref %s already exists" % name) + self.refs[name] = commit + + def commit(self, ref): + sha = self.refs.get(ref) or ref + commit = self.objects[sha] + assert isinstance(commit, Commit) + return commit + + def log(self, ref): + commits = [self.commit(ref)] + while commits: + c = commits.pop(0) + commits.extend(self.commit(r) for r in c.parents) + yield c.to_json() + + def post_status(self, ref, status, context='default', description=""): + assert status in ('error', 'failure', 'pending', 'success') + c = self.commit(ref) + c.statuses.append((status, context, description)) + self.notify('status', self.name, context, status, c.id) + + def make_commit(self, ref, message, author, committer=None, tree=None): + assert tree, "a commit must provide either a full tree" + + refs = ref or [] + if not isinstance(refs, list): + refs = [ref] + + pids = [ + ref if re.match(r'[0-9a-f]{40}', ref) else self.refs[ref] + for ref in refs + ] + + if type(tree) is type(u''): + assert isinstance(self.objects.get(tree), dict) + tid = tree + else: + tid = self._save_tree(tree) + + c = Commit(tid, message, author, committer or author, parents=pids) + self.objects[c.id] = c + if refs and refs[0] != pids[0]: + self.refs[refs[0]] = c.id + return c.id + + def _save_tree(self, t): + """ t: Dict String (String | Tree) + """ + t = {name: self._make_obj(obj) for name, obj in t.items()} + h, _ = git.make_tree( + self.objects, + t + ) + self.objects[h] = t + return h + + def _make_obj(self, o): + if type(o) is type(u''): + o = o.encode('utf-8') + + if type(o) is bytes: + h, b = git.make_blob(o) + self.objects[h] = o + return h + return self._save_tree(o) + + def api(self, path, request): + # a better version would be some sort of longest-match? + for method, pattern, handler in sorted(self._handlers, key=lambda t: -len(t[1])): + if method and request.method != method: + continue + # FIXME: remove qs from path & ensure path is entirely matched, maybe finally use proper routing? + m = re.match(pattern, path) + if m: + return handler(self, request, **m.groupdict()) + return (404, {'message': "No match for {} {}".format(request.method, path)}) + + def read_tree(self, commit): + return git.read_object(self.objects, commit.tree) + + def is_ancestor(self, sha, of): + assert not git.is_ancestor(self.objects, sha, of=of) + + def _read_ref(self, _, ref): + obj = self.refs.get(ref) + if obj is None: + return (404, None) + return (200, { + "ref": "refs/%s" % ref, + "object": { + "type": "commit", + "sha": obj, + } + }) + def _create_ref(self, r): + body = json.loads(r.body) + ref = body['ref'] + # ref must start with refs/ and contain at least two slashes + if not (ref.startswith('refs/') and ref.count('/') >= 2): + return (400, None) + ref = ref[5:] + # if ref already exists conflict? + if ref in self.refs: + return (409, None) + + sha = body['sha'] + obj = self.objects.get(sha) + # if sha is not in the repo or not a commit, 404 + if not isinstance(obj, Commit): + return (404, None) + + self.make_ref(ref, sha) + + return (201, { + "ref": "refs/%s" % ref, + "object": { + "type": "commit", + "sha": sha, + } + }) + + def _write_ref(self, r, ref): + current = self.refs.get(ref) + if current is None: + return (404, None) + body = json.loads(r.body) + sha = body['sha'] + if sha not in self.objects: + return (404, None) + + if not body.get('force'): + if not git.is_ancestor(self.objects, current, sha): + return (400, None) + + self.make_ref(ref, sha, force=True) + return (200, { + "ref": "refs/%s" % ref, + "object": { + "type": "commit", + "sha": sha, + } + }) + + def _create_commit(self, r): + body = json.loads(r.body) + author = body.get('author') or {'name': 'default', 'email': 'default', 'date': 'Z'} + try: + sha = self.make_commit( + ref=(body.get('parents')), + message=body['message'], + author=author, + committer=body.get('committer') or author, + tree=body['tree'] + ) + except (KeyError, AssertionError): + # either couldn't find the parent or couldn't find the tree + return (404, None) + + return (201, { + "sha": sha, + "author": author, + "committer": body.get('committer') or author, + "message": body['message'], + "tree": {"sha": body['tree']}, + "parents": [{"sha": sha}], + }) + def _read_commit(self, _, sha): + c = self.objects.get(sha) + if not isinstance(c, Commit): + return (404, None) + return (200, { + "sha": sha, + "author": c.author, + "committer": c.committer, + "message": c.message, + "tree": {"sha": c.tree}, + "parents": [{"sha": p} for p in c.parents], + }) + + def _read_statuses(self, _, ref): + try: + c = self.commit(ref) + except KeyError: + return (404, None) + + return (200, { + 'sha': c.id, + 'total_count': len(c.statuses), + # TODO: combined? + 'statuses': [ + {'context': context, 'state': state} + for state, context, _ in reversed(c.statuses) + ] + }) + + def _read_issue(self, r, number): + try: + issue = self.issues[int(number)] + except KeyError: + return (404, None) + attr = {'pull_request': True} if isinstance(issue, PR) else {} + return (200, {'number': issue.number, **attr}) + + def _read_issue_comments(self, r, number): + try: + issue = self.issues[int(number)] + except KeyError: + return (404, None) + return (200, [{ + 'user': {'login': author}, + 'body': body, + } for author, body in issue.comments + if not body.startswith('REVIEW') + ]) + + def _create_issue_comment(self, r, number): + try: + issue = self.issues[int(number)] + except KeyError: + return (404, None) + try: + body = json.loads(r.body)['body'] + except KeyError: + return (400, None) + + issue.post_comment(body, "user") + return (201, { + 'id': 0, + 'body': body, + 'user': { 'login': "user" }, + }) + + def _read_pr(self, r, number): + try: + pr = self.issues[int(number)] + except KeyError: + return (404, None) + # FIXME: dedup with Client + return (200, { + 'number': pr.number, + 'head': { + 'sha': pr.head, + 'label': pr.label, + }, + 'base': { + 'ref': pr.base, + 'repo': { + 'name': self.name.split('/')[1], + 'full_name': self.name, + }, + }, + 'title': pr.title, + 'body': pr.body, + 'commits': len(pr.commits), + 'user': {'login': pr.user}, + }) + + def _edit_pr(self, r, number): + try: + pr = self.issues[int(number)] + except KeyError: + return (404, None) + + body = json.loads(r.body) + if not body.keys() & {'title', 'body', 'state', 'base'}: + # FIXME: return PR content + return (200, {}) + assert body.get('state') in ('open', 'closed', None) + + pr.state = body.get('state') or pr.state + if body.get('title'): + pr.title = body.get('title') + if body.get('body'): + pr.body = body.get('body') + if body.get('base'): + pr.base = body.get('base') + + if body.get('state') == 'open': + self.notify('pull_request', 'reopened', pr) + elif body.get('state') == 'closed': + self.notify('pull_request', 'closed', pr) + + return (200, {}) + + def _read_pr_reviews(self, _, number): + pr = self.issues.get(int(number)) + if not isinstance(pr, PR): + return (404, None) + + return (200, [{ + 'user': {'login': author}, + 'state': r.group(1), + 'body': r.group(2), + } + for author, body in pr.comments + for r in [re.match(r'REVIEW (\w+)\n\n(.*)', body)] + if r + ]) + + def _read_pr_commits(self, r, number): + pr = self.issues.get(int(number)) + if not isinstance(pr, PR): + return (404, None) + + return (200, [c.to_json() for c in pr.commits]) + + + def _add_labels(self, r, number): + try: + pr = self.issues[int(number)] + except KeyError: + return (404, None) + + pr.labels.update(json.loads(r.body)) + + return (200, {}) + + def _remove_label(self, _, number, label): + try: + pr = self.issues[int(number)] + except KeyError: + return (404, None) + + try: + pr.labels.remove(werkzeug.urls.url_unquote(label)) + except KeyError: + return (404, None) + else: + return (200, {}) + + def _do_merge(self, r): + body = json.loads(r.body) # {base, head, commit_message} + if not body.get('commit_message'): + return (400, {'message': "Merges require a commit message"}) + base = 'heads/%s' % body['base'] + target = self.refs.get(base) + if not target: + return (404, {'message': "Base does not exist"}) + # head can be either a branch or a sha + sha = self.refs.get('heads/%s' % body['head']) or body['head'] + if sha not in self.objects: + return (404, {'message': "Head does not exist"}) + + if git.is_ancestor(self.objects, sha, of=target): + return (204, None) + + # merging according to read-tree: + # get common ancestor (base) of commits + try: + base = git.merge_base(self.objects, target, sha) + except Exception: + return (400, {'message': "No common ancestor between %(base)s and %(head)s" % body}) + try: + tid = git.merge_objects( + self.objects, + self.objects[base].tree, + self.objects[target].tree, + self.objects[sha].tree, + ) + except Exception as e: + logging.exception("Merge Conflict") + return (409, {'message': 'Merge Conflict %r' % e}) + + c = Commit(tid, body['commit_message'], author=None, committer=None, parents=[target, sha]) + self.objects[c.id] = c + + return (201, c.to_json()) + + _handlers = [ + ('POST', r'git/refs', _create_ref), + ('GET', r'git/refs/(?P.*)', _read_ref), + ('PATCH', r'git/refs/(?P.*)', _write_ref), + + # nb: there's a different commits at /commits with repo-level metadata + ('GET', r'git/commits/(?P[0-9A-Fa-f]{40})', _read_commit), + ('POST', r'git/commits', _create_commit), + ('GET', r'commits/(?P[^/]+)/status', _read_statuses), + + ('GET', r'issues/(?P\d+)', _read_issue), + ('GET', r'issues/(?P\d+)/comments', _read_issue_comments), + ('POST', r'issues/(?P\d+)/comments', _create_issue_comment), + + ('POST', r'merges', _do_merge), + + ('GET', r'pulls/(?P\d+)', _read_pr), + ('PATCH', r'pulls/(?P\d+)', _edit_pr), + ('GET', r'pulls/(?P\d+)/reviews', _read_pr_reviews), + ('GET', r'pulls/(?P\d+)/commits', _read_pr_commits), + + ('POST', r'issues/(?P\d+)/labels', _add_labels), + ('DELETE', r'issues/(?P\d+)/labels/(?P