From 9de18de4546591a6943fd2ec11a6f91188e190cb Mon Sep 17 00:00:00 2001 From: Xavier Morel Date: Wed, 16 Aug 2023 14:37:19 +0200 Subject: [PATCH] [CHG] *: move repo cache from forwardbot to mergebot If the stagings are going to be created locally (via a git working copy rather than the github API), the mergebot part needs to have access to the cache, so move the cache over. Also move the maintenance cron. In an extermely minor way, this prefigures the (hopeful) eventual merging of the ~~planes~~ modules. --- forwardport/__manifest__.py | 2 +- forwardport/data/crons.xml | 13 -- forwardport/data/security.xml | 9 -- .../migrations/15.0.1.3/pre-migration.py | 9 ++ forwardport/models/forwardport.py | 54 +------ forwardport/models/project.py | 133 ++---------------- runbot_merge/__manifest__.py | 1 + runbot_merge/git.py | 124 ++++++++++++++++ runbot_merge/models/__init__.py | 1 + runbot_merge/models/crons/__init__.py | 1 + runbot_merge/models/crons/git_maintenance.py | 37 +++++ runbot_merge/models/crons/git_maintenance.xml | 26 ++++ 12 files changed, 214 insertions(+), 196 deletions(-) create mode 100644 forwardport/migrations/15.0.1.3/pre-migration.py create mode 100644 runbot_merge/git.py create mode 100644 runbot_merge/models/crons/__init__.py create mode 100644 runbot_merge/models/crons/git_maintenance.py create mode 100644 runbot_merge/models/crons/git_maintenance.xml diff --git a/forwardport/__manifest__.py b/forwardport/__manifest__.py index 3ece49af..1f59983e 100644 --- a/forwardport/__manifest__.py +++ b/forwardport/__manifest__.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- { 'name': 'forward port bot', - 'version': '1.2', + 'version': '1.3', 'summary': "A port which forward ports successful PRs.", 'depends': ['runbot_merge'], 'data': [ diff --git a/forwardport/data/crons.xml b/forwardport/data/crons.xml index 1360914c..02d2be1e 100644 --- a/forwardport/data/crons.xml +++ b/forwardport/data/crons.xml @@ -42,17 +42,4 @@ -1 - - - Maintenance of repo cache - - code - model._run() - - - 1 - weeks - -1 - - diff --git a/forwardport/data/security.xml b/forwardport/data/security.xml index 424e7991..99548e0c 100644 --- a/forwardport/data/security.xml +++ b/forwardport/data/security.xml @@ -43,13 +43,4 @@ 0 0 - - - Access to maintenance is useless - - 0 - 0 - 0 - 0 - diff --git a/forwardport/migrations/15.0.1.3/pre-migration.py b/forwardport/migrations/15.0.1.3/pre-migration.py new file mode 100644 index 00000000..ab2fbdd4 --- /dev/null +++ b/forwardport/migrations/15.0.1.3/pre-migration.py @@ -0,0 +1,9 @@ +import pathlib + +from odoo.tools.appdirs import user_cache_dir + + +def migrate(_cr, _version): + # avoid needing to re-clone our repo unnecessarily + pathlib.Path(user_cache_dir('forwardport')).rename( + pathlib.Path(user_cache_dir('mergebot'))) diff --git a/forwardport/models/forwardport.py b/forwardport/models/forwardport.py index 6e3e0c24..3bddf74d 100644 --- a/forwardport/models/forwardport.py +++ b/forwardport/models/forwardport.py @@ -1,20 +1,15 @@ # -*- coding: utf-8 -*- import logging -import pathlib - -import sentry_sdk - -import resource -import subprocess import uuid from contextlib import ExitStack from datetime import datetime, timedelta +import sentry_sdk from dateutil import relativedelta from odoo import fields, models +from odoo.addons.runbot_merge import git from odoo.addons.runbot_merge.github import GH -from odoo.tools.appdirs import user_cache_dir # how long a merged PR survives MERGE_AGE = relativedelta.relativedelta(weeks=2) @@ -177,7 +172,7 @@ class UpdateQueue(models.Model, Queue): # doesn't propagate revisions fast enough so on the next loop we # can't find the revision we just pushed dummy_branch = str(uuid.uuid4()) - ref = previous._get_local_directory() + ref = git.get_local(previous.repository, 'fp_github') working_copy.push(ref._directory, f'{new_head}:refs/heads/{dummy_branch}') ref.branch('--delete', '--force', dummy_branch) # then update the child's branch to the new head @@ -261,46 +256,3 @@ class DeleteBranches(models.Model, Queue): r.json() ) _deleter.info('✔ deleted branch %s of PR %s', self.pr_id.label, self.pr_id.display_name) - -_gc = _logger.getChild('maintenance') -def _bypass_limits(): - """Allow git to go beyond the limits set for Odoo. - - On large repositories, git gc can take a *lot* of memory (especially with - `--aggressive`), if the Odoo limits are too low this can prevent the gc - from running, leading to a lack of packing and a massive amount of cruft - accumulating in the working copy. - """ - resource.setrlimit(resource.RLIMIT_AS, (resource.RLIM_INFINITY, resource.RLIM_INFINITY)) - -class GC(models.TransientModel): - _name = 'forwardport.maintenance' - _description = "Weekly maintenance of... cache repos?" - - def _run(self): - # lock out the forward port cron to avoid concurrency issues while we're - # GC-ing it: wait until it's available, then SELECT FOR UPDATE it, - # which should prevent cron workers from running it - fp_cron = self.env.ref('forwardport.port_forward') - self.env.cr.execute(""" - SELECT 1 FROM ir_cron - WHERE id = %s - FOR UPDATE - """, [fp_cron.id]) - - repos_dir = pathlib.Path(user_cache_dir('forwardport')) - # run on all repos with a forwardport target (~ forwardport enabled) - for repo in self.env['runbot_merge.repository'].search([('fp_remote_target', '!=', False)]): - repo_dir = repos_dir / repo.name - if not repo_dir.is_dir(): - continue - - _gc.info('Running maintenance on %s', repo.name) - r = subprocess.run( - ['git', '--git-dir', repo_dir, 'gc', '--aggressive', '--prune=now'], - stdout=subprocess.PIPE, stderr=subprocess.STDOUT, - encoding='utf-8', - preexec_fn = _bypass_limits, - ) - if r.returncode: - _gc.warning("Maintenance failure (status=%d):\n%s", r.returncode, r.stdout) diff --git a/forwardport/models/project.py b/forwardport/models/project.py index cf60f68a..573d0086 100644 --- a/forwardport/models/project.py +++ b/forwardport/models/project.py @@ -20,23 +20,22 @@ import json import logging import operator import os -import pathlib import re import subprocess import tempfile import typing +from pathlib import Path import dateutil.relativedelta import requests -import resource from odoo import _, models, fields, api from odoo.osv import expression from odoo.exceptions import UserError from odoo.tools.misc import topological_sort, groupby from odoo.tools.sql import reverse_order from odoo.tools.appdirs import user_cache_dir -from odoo.addons.runbot_merge import utils +from odoo.addons.runbot_merge import git, utils from odoo.addons.runbot_merge.models.pull_requests import RPLUS footer = '\nMore info at https://github.com/odoo/odoo/wiki/Mergebot#forward-port\n' @@ -838,14 +837,6 @@ class PullRequests(models.Model): b.prs[0]._schedule_fp_followup() return b - @property - def _source_url(self): - return 'https://{}:{}@github.com/{}'.format( - self.repository.project_id.fp_github_name or '', - self.repository.project_id.fp_github_token, - self.repository.name, - ) - def _create_fp_branch(self, target_branch, fp_branch_name, cleanup): """ Creates a forward-port for the current PR to ``target_branch`` under ``fp_branch_name``. @@ -865,25 +856,26 @@ class PullRequests(models.Model): "Forward-porting %s (%s) to %s", self.display_name, root.display_name, target_branch.name ) - source = self._get_local_directory() + source = git.get_local(self.repository, 'fp_github') r = source.with_config(stdout=subprocess.PIPE, stderr=subprocess.STDOUT).fetch() logger.info("Updated cache repo %s:\n%s", source._directory, r.stdout.decode()) logger.info("Create working copy...") + cache_dir = user_cache_dir('forwardport') + # PullRequest.display_name is `owner/repo#number`, so `owner` becomes a + # directory, `TemporaryDirectory` only creates the leaf, so we need to + # make sure `owner` exists in `cache_dir`. + Path(cache_dir, root.repository.name).parent.mkdir(parents=True, exist_ok=True) working_copy = source.clone( cleanup.enter_context( tempfile.TemporaryDirectory( - prefix='%s-to-%s-' % ( - root.display_name, - target_branch.name - ), - dir=user_cache_dir('forwardport') - )), + prefix=f'{root.display_name}-to-{target_branch.name}', + dir=cache_dir)), branch=target_branch.name ) r = working_copy.with_config(stdout=subprocess.PIPE, stderr=subprocess.STDOUT) \ - .fetch(self._source_url, root.head) + .fetch(git.source_url(self.repository, 'fp_github'), root.head) logger.info( "Fetched head of %s into %s:\n%s", root.display_name, @@ -1078,26 +1070,6 @@ stderr: # don't stringify so caller can still perform alterations return msg - def _get_local_directory(self): - repos_dir = pathlib.Path(user_cache_dir('forwardport')) - repos_dir.mkdir(parents=True, exist_ok=True) - repo_dir = repos_dir / self.repository.name - - if repo_dir.is_dir(): - return git(repo_dir) - else: - _logger.info("Cloning out %s to %s", self.repository.name, repo_dir) - subprocess.run(['git', 'clone', '--bare', self._source_url, str(repo_dir)], check=True) - # bare repos don't have fetch specs by default, and fetching *into* - # them is a pain in the ass, configure fetch specs so `git fetch` - # works properly - repo = git(repo_dir) - repo.config('--add', 'remote.origin.fetch', '+refs/heads/*:refs/heads/*') - # negative refspecs require git 2.29 - repo.config('--add', 'remote.origin.fetch', '^refs/heads/tmp.*') - repo.config('--add', 'remote.origin.fetch', '^refs/heads/staging.*') - return repo - def _outstanding(self, cutoff): """ Returns "outstanding" (unmerged and unclosed) forward-ports whose source was merged before ``cutoff`` (all of them if not provided). @@ -1161,89 +1133,6 @@ class Feedback(models.Model): token_field = fields.Selection(selection_add=[('fp_github_token', 'Forwardport Bot')]) -ALWAYS = ('gc.auto=0', 'maintenance.auto=0') - -def _bypass_limits(): - resource.setrlimit(resource.RLIMIT_AS, (resource.RLIM_INFINITY, resource.RLIM_INFINITY)) - -def git(directory): return Repo(directory, check=True) -class Repo: - def __init__(self, directory, **config): - self._directory = str(directory) - config.setdefault('stderr', subprocess.PIPE) - self._config = config - self._params = () - self._opener = subprocess.run - - def __getattr__(self, name): - return GitCommand(self, name.replace('_', '-')) - - def _run(self, *args, **kwargs): - opts = {**self._config, **kwargs} - args = ('git', '-C', self._directory)\ - + tuple(itertools.chain.from_iterable(('-c', p) for p in self._params + ALWAYS))\ - + args - try: - return self._opener(args, preexec_fn=_bypass_limits, **opts) - except subprocess.CalledProcessError as e: - stream = e.stderr if e.stderr else e.stdout - if stream: - _logger.error("git call error: %s", stream) - raise - - def stdout(self, flag=True): - if flag is True: - return self.with_config(stdout=subprocess.PIPE) - elif flag is False: - return self.with_config(stdout=None) - return self.with_config(stdout=flag) - - def lazy(self): - r = self.with_config() - r._config.pop('check', None) - r._opener = subprocess.Popen - return r - - def check(self, flag): - return self.with_config(check=flag) - - def with_config(self, **kw): - opts = {**self._config, **kw} - r = Repo(self._directory, **opts) - r._opener = self._opener - r._params = self._params - return r - - def with_params(self, *args): - r = self.with_config() - r._params = args - return r - - def clone(self, to, branch=None): - self._run( - 'clone', - *([] if branch is None else ['-b', branch]), - self._directory, to, - ) - return Repo(to) - -class GitCommand: - def __init__(self, repo, name): - self._name = name - self._repo = repo - - def __call__(self, *args, **kwargs): - return self._repo._run(self._name, *args, *self._to_options(kwargs)) - - def _to_options(self, d): - for k, v in d.items(): - if len(k) == 1: - yield '-' + k - else: - yield '--' + k.replace('_', '-') - if v not in (None, True): - assert v is not False - yield str(v) class CherrypickError(Exception): ... diff --git a/runbot_merge/__manifest__.py b/runbot_merge/__manifest__.py index 04c23c7c..1f6ae7f4 100644 --- a/runbot_merge/__manifest__.py +++ b/runbot_merge/__manifest__.py @@ -7,6 +7,7 @@ 'security/ir.model.access.csv', 'data/merge_cron.xml', + 'models/crons/git_maintenance.xml', 'data/runbot_merge.pull_requests.feedback.template.csv', 'views/res_partner.xml', 'views/runbot_merge_project.xml', diff --git a/runbot_merge/git.py b/runbot_merge/git.py new file mode 100644 index 00000000..91297f9e --- /dev/null +++ b/runbot_merge/git.py @@ -0,0 +1,124 @@ +import dataclasses +import itertools +import logging +import pathlib +import resource +import subprocess +from typing import Optional, TypeVar + +from odoo.tools.appdirs import user_cache_dir + +_logger = logging.getLogger(__name__) + + +def source_url(repository, prefix: str) -> str: + return 'https://{}@github.com/{}'.format( + repository.project_id[f'{prefix}_token'], + repository.name, + ) + + +def get_local(repository, prefix: Optional[str]) -> 'Optional[Repo]': + repos_dir = pathlib.Path(user_cache_dir('mergebot')) + repos_dir.mkdir(parents=True, exist_ok=True) + # NB: `repository.name` is `$org/$name` so this will be a subdirectory, probably + repo_dir = repos_dir / repository.name + + if repo_dir.is_dir(): + return git(repo_dir) + elif prefix: + _logger.info("Cloning out %s to %s", repository.name, repo_dir) + subprocess.run(['git', 'clone', '--bare', source_url(repository, prefix), str(repo_dir)], check=True) + # bare repos don't have fetch specs by default, and fetching *into* + # them is a pain in the ass, configure fetch specs so `git fetch` + # works properly + repo = git(repo_dir) + repo.config('--add', 'remote.origin.fetch', '+refs/heads/*:refs/heads/*') + # negative refspecs require git 2.29 + repo.config('--add', 'remote.origin.fetch', '^refs/heads/tmp.*') + repo.config('--add', 'remote.origin.fetch', '^refs/heads/staging.*') + return repo + + +ALWAYS = ('gc.auto=0', 'maintenance.auto=0') + + +def _bypass_limits(): + resource.setrlimit(resource.RLIMIT_AS, (resource.RLIM_INFINITY, resource.RLIM_INFINITY)) + + +def git(directory: str) -> 'Repo': + return Repo(directory, check=True) + + +Self = TypeVar("Self", bound="Repo") +class Repo: + def __init__(self, directory, **config) -> None: + self._directory = str(directory) + config.setdefault('stderr', subprocess.PIPE) + self._config = config + self._params = () + + def __getattr__(self, name: str) -> 'GitCommand': + return GitCommand(self, name.replace('_', '-')) + + def _run(self, *args, **kwargs) -> subprocess.CompletedProcess: + opts = {**self._config, **kwargs} + args = ('git', '-C', self._directory)\ + + tuple(itertools.chain.from_iterable(('-c', p) for p in self._params + ALWAYS))\ + + args + try: + return subprocess.run(args, preexec_fn=_bypass_limits, **opts) + except subprocess.CalledProcessError as e: + stream = e.stderr or e.stdout + if stream: + _logger.error("git call error: %s", stream) + raise + + def stdout(self, flag: bool = True) -> Self: + if flag is True: + return self.with_config(stdout=subprocess.PIPE) + elif flag is False: + return self.with_config(stdout=None) + return self.with_config(stdout=flag) + + def check(self, flag: bool) -> Self: + return self.with_config(check=flag) + + def with_config(self, **kw) -> Self: + opts = {**self._config, **kw} + r = Repo(self._directory, **opts) + r._params = self._params + return r + + def with_params(self, *args) -> Self: + r = self.with_config() + r._params = args + return r + + def clone(self, to: str, branch: Optional[str] = None) -> Self: + self._run( + 'clone', + *([] if branch is None else ['-b', branch]), + self._directory, to, + ) + return Repo(to) + + +@dataclasses.dataclass +class GitCommand: + repo: Repo + name: str + + def __call__(self, *args, **kwargs) -> subprocess.CompletedProcess: + return self.repo._run(self.name, *args, *self._to_options(kwargs)) + + def _to_options(self, d): + for k, v in d.items(): + if len(k) == 1: + yield '-' + k + else: + yield '--' + k.replace('_', '-') + if v not in (None, True): + assert v is not False + yield str(v) diff --git a/runbot_merge/models/__init__.py b/runbot_merge/models/__init__.py index 5cf276c8..9ca405e4 100644 --- a/runbot_merge/models/__init__.py +++ b/runbot_merge/models/__init__.py @@ -4,3 +4,4 @@ from . import project from . import pull_requests from . import project_freeze from . import staging_cancel +from . import crons diff --git a/runbot_merge/models/crons/__init__.py b/runbot_merge/models/crons/__init__.py new file mode 100644 index 00000000..939a44cc --- /dev/null +++ b/runbot_merge/models/crons/__init__.py @@ -0,0 +1 @@ +from . import git_maintenance diff --git a/runbot_merge/models/crons/git_maintenance.py b/runbot_merge/models/crons/git_maintenance.py new file mode 100644 index 00000000..33262d7e --- /dev/null +++ b/runbot_merge/models/crons/git_maintenance.py @@ -0,0 +1,37 @@ +import logging +import subprocess + +from odoo import models +from ...git import get_local + + +_gc = logging.getLogger(__name__) +class GC(models.TransientModel): + _name = 'runbot_merge.maintenance' + _description = "Weekly maintenance of... cache repos?" + + def _run(self): + # lock out crons which use the local repo cache to avoid concurrency + # issues while we're GC-ing it + Stagings = self.env['runbot_merge.stagings'] + crons = self.env.ref('runbot_merge.staging_cron', Stagings) | self.env.ref('forwardport.port_forward', Stagings) + if crons: + self.env.cr.execute(""" + SELECT 1 FROM ir_cron + WHERE id = any(%s) + FOR UPDATE + """, [crons.ids]) + + # run on all repos with a forwardport target (~ forwardport enabled) + for repo in self.env['runbot_merge.repository'].search([]): + repo_git = get_local(repo, prefix=None) + if not repo: + continue + + _gc.info('Running maintenance on %s', repo.name) + r = repo_git\ + .stdout(True)\ + .with_config(stderr=subprocess.STDOUT, text=True, check=False)\ + .gc(aggressive=True, prune='now') + if r.returncode: + _gc.warning("Maintenance failure (status=%d):\n%s", r.returncode, r.stdout) diff --git a/runbot_merge/models/crons/git_maintenance.xml b/runbot_merge/models/crons/git_maintenance.xml new file mode 100644 index 00000000..6190834a --- /dev/null +++ b/runbot_merge/models/crons/git_maintenance.xml @@ -0,0 +1,26 @@ + + + Access to maintenance is useless + + 0 + 0 + 0 + 0 + + + + Maintenance of repo cache + + code + model._run() + + + 1 + weeks + -1 + + +