mirror of
https://github.com/odoo/runbot.git
synced 2025-03-16 16:05:42 +07:00

The current system makes / lets GC run during fetching. This has a few issues: - the autogc consumes resources during the forward-porting process (not that it's hugely urgent but it seems unnecessary) - the autogc commonly fails due to the combination of large repository (odoo/odoo) and low memory limits (hardmem for odoo, which get translated into soft ulimits) As a result, the garbage collection of the repository sometimes stops entirely, leading to an increase in repository size and a decrease in performances. To mitigate this issue, disable the automagic gc and maintenance during normal operation, and instead add a weekly cron which runs an aggressive GC with memory limits disabled (as far as they can get, if the limits are imposed externally there's nothing to be done). The maintenance is implemented using a full lockout of the forward-port cron and an in-place GC rather than a copy/gc/swap, as doing this maintenance at the small hours of the week-end (sat-sun night) seems like a non-issue: currently an aggressive GC of odoo/odoo (using the default aggressive options) takes a total of 2:30 wallclock (5h user) on a fairly elderly machine (it's closer to 20mn wallclock and 2h user on my local machine, also turns out the cache repos are kinda badly configured leading to ~30% more objects than necessary which doesn't help). For the record, a fresh checkout of odoo/odoo right now yields: | Overall repository size | | | * Commits | | | * Count | 199 k | | * Total size | 102 MiB | | * Trees | | | * Count | 1.60 M | | * Total size | 2.67 GiB | | * Total tree entries | 74.1 M | | * Blobs | | | * Count | 1.69 M | | * Total size | 72.4 GiB | If this still proves insufficient, a further option would be to deploy a "generational repacking" strategy: https://gitlab.com/gitlab-org/gitaly/-/issues/2861 (though apparently it's not yet been implemented / deployed on gitlab so...). But for now we'll see how it shakes out. Close #489
316 lines
12 KiB
Python
316 lines
12 KiB
Python
# -*- coding: utf-8 -*-
|
|
import logging
|
|
import pathlib
|
|
import resource
|
|
import subprocess
|
|
import uuid
|
|
from contextlib import ExitStack
|
|
from datetime import datetime, timedelta
|
|
|
|
from dateutil import relativedelta
|
|
|
|
from odoo import fields, models
|
|
from odoo.addons.runbot_merge.github import GH
|
|
from odoo.tools.appdirs import user_cache_dir
|
|
|
|
# how long a merged PR survives
|
|
MERGE_AGE = relativedelta.relativedelta(weeks=2)
|
|
|
|
_logger = logging.getLogger(__name__)
|
|
|
|
class Queue:
|
|
__slots__ = ()
|
|
limit = 100
|
|
|
|
def _process_item(self):
|
|
raise NotImplementedError
|
|
|
|
def _process(self):
|
|
for b in self.search(self._search_domain(), order='create_date, id', limit=self.limit):
|
|
try:
|
|
b._process_item()
|
|
b.unlink()
|
|
self.env.cr.commit()
|
|
except Exception:
|
|
_logger.exception("Error while processing %s, skipping", b)
|
|
self.env.cr.rollback()
|
|
b._on_failure()
|
|
self.env.cr.commit()
|
|
|
|
def _on_failure(self):
|
|
pass
|
|
|
|
def _search_domain(self):
|
|
return []
|
|
|
|
class ForwardPortTasks(models.Model, Queue):
|
|
_name = 'forwardport.batches'
|
|
_description = 'batches which got merged and are candidates for forward-porting'
|
|
|
|
limit = 10
|
|
|
|
batch_id = fields.Many2one('runbot_merge.batch', required=True)
|
|
source = fields.Selection([
|
|
('merge', 'Merge'),
|
|
('fp', 'Forward Port Followup'),
|
|
('insert', 'New branch port')
|
|
], required=True)
|
|
retry_after = fields.Datetime(required=True, default='1900-01-01 01:01:01')
|
|
|
|
def _search_domain(self):
|
|
return super()._search_domain() + [
|
|
('retry_after', '<=', fields.Datetime.to_string(fields.Datetime.now())),
|
|
]
|
|
|
|
def _on_failure(self):
|
|
super()._on_failure()
|
|
self.retry_after = fields.Datetime.to_string(fields.Datetime.now() + timedelta(minutes=30))
|
|
|
|
def _process_item(self):
|
|
batch = self.batch_id
|
|
newbatch = batch.prs._port_forward()
|
|
|
|
if newbatch:
|
|
_logger.info(
|
|
"Processing %s (from %s): %s (%s) -> %s (%s)",
|
|
self.id, self.source,
|
|
batch, batch.prs,
|
|
newbatch, newbatch.prs,
|
|
)
|
|
# insert new batch in ancestry sequence unless conflict (= no parent)
|
|
if self.source == 'insert':
|
|
for pr in newbatch.prs:
|
|
if not pr.parent_id:
|
|
break
|
|
newchild = pr.search([
|
|
('parent_id', '=', pr.parent_id.id),
|
|
('id', '!=', pr.id),
|
|
])
|
|
if newchild:
|
|
newchild.parent_id = pr.id
|
|
else: # reached end of seq (or batch is empty)
|
|
# FIXME: or configuration is fucky so doesn't want to FP (maybe should error and retry?)
|
|
_logger.info(
|
|
"Processing %s (from %s): %s (%s) -> end of the sequence",
|
|
self.id, self.source,
|
|
batch, batch.prs
|
|
)
|
|
batch.active = False
|
|
|
|
|
|
CONFLICT_TEMPLATE = "{ping}WARNING: the latest change ({previous.head}) triggered " \
|
|
"a conflict when updating the next forward-port " \
|
|
"({next.display_name}), and has been ignored.\n\n" \
|
|
"You will need to update this pull request differently, " \
|
|
"or fix the issue by hand on {next.display_name}."
|
|
CHILD_CONFLICT = "{ping}WARNING: the update of {previous.display_name} to " \
|
|
"{previous.head} has caused a conflict in this pull request, " \
|
|
"data may have been lost."
|
|
class UpdateQueue(models.Model, Queue):
|
|
_name = 'forwardport.updates'
|
|
_description = 'if a forward-port PR gets updated & has followups (cherrypick succeeded) the followups need to be updated as well'
|
|
|
|
limit = 10
|
|
|
|
original_root = fields.Many2one('runbot_merge.pull_requests')
|
|
new_root = fields.Many2one('runbot_merge.pull_requests')
|
|
|
|
def _process_item(self):
|
|
Feedback = self.env['runbot_merge.pull_requests.feedback']
|
|
previous = self.new_root
|
|
with ExitStack() as s:
|
|
for child in self.new_root._iter_descendants():
|
|
self.env.cr.execute("""
|
|
SELECT id
|
|
FROM runbot_merge_pull_requests
|
|
WHERE id = %s
|
|
FOR UPDATE NOWAIT
|
|
""", [child.id])
|
|
_logger.info(
|
|
"Re-port %s from %s (changed root %s -> %s)",
|
|
child.display_name,
|
|
previous.display_name,
|
|
self.original_root.display_name,
|
|
self.new_root.display_name
|
|
)
|
|
if child.state in ('closed', 'merged'):
|
|
Feedback.create({
|
|
'repository': child.repository.id,
|
|
'pull_request': child.number,
|
|
'message': "%sancestor PR %s has been updated but this PR"
|
|
" is %s and can't be updated to match."
|
|
"\n\n"
|
|
"You may want or need to manually update any"
|
|
" followup PR." % (
|
|
child.ping(),
|
|
self.new_root.display_name,
|
|
child.state,
|
|
)
|
|
})
|
|
return
|
|
|
|
conflicts, working_copy = previous._create_fp_branch(
|
|
child.target, child.refname, s)
|
|
if conflicts:
|
|
_, out, err, _ = conflicts
|
|
Feedback.create({
|
|
'repository': previous.repository.id,
|
|
'pull_request': previous.number,
|
|
'message': CONFLICT_TEMPLATE.format(
|
|
ping=previous.ping(),
|
|
previous=previous,
|
|
next=child
|
|
)
|
|
})
|
|
Feedback.create({
|
|
'repository': child.repository.id,
|
|
'pull_request': child.number,
|
|
'message': CHILD_CONFLICT.format(ping=child.ping(), previous=previous, next=child)\
|
|
+ (f'\n\nstdout:\n```\n{out.strip()}\n```' if out.strip() else '')
|
|
+ (f'\n\nstderr:\n```\n{err.strip()}\n```' if err.strip() else '')
|
|
})
|
|
|
|
new_head = working_copy.stdout().rev_parse(child.refname).stdout.decode().strip()
|
|
commits_count = int(working_copy.stdout().rev_list(
|
|
f'{child.target.name}..{child.refname}',
|
|
count=True
|
|
).stdout.decode().strip())
|
|
old_head = child.head
|
|
# update child's head to the head we're going to push
|
|
child.with_context(ignore_head_update=True).write({
|
|
'head': new_head,
|
|
# 'state': 'opened',
|
|
'squash': commits_count == 1,
|
|
})
|
|
# push the new head to the local cache: in some cases github
|
|
# doesn't propagate revisions fast enough so on the next loop we
|
|
# can't find the revision we just pushed
|
|
dummy_branch = str(uuid.uuid4())
|
|
ref = previous._get_local_directory()
|
|
working_copy.push(ref._directory, f'{new_head}:refs/heads/{dummy_branch}')
|
|
ref.branch('--delete', '--force', dummy_branch)
|
|
# then update the child's branch to the new head
|
|
working_copy.push(f'--force-with-lease={child.refname}:{old_head}',
|
|
'target', child.refname)
|
|
|
|
# committing here means github could technically trigger its
|
|
# webhook before sending a response, but committing before
|
|
# would mean we can update the PR in database but fail to
|
|
# update on github, which is probably worse?
|
|
# alternatively we can commit, push, and rollback if the push
|
|
# fails
|
|
# FIXME: handle failures (especially on non-first update)
|
|
self.env.cr.commit()
|
|
|
|
previous = child
|
|
|
|
_deleter = _logger.getChild('deleter')
|
|
class DeleteBranches(models.Model, Queue):
|
|
_name = 'forwardport.branch_remover'
|
|
_description = "Removes branches of merged PRs"
|
|
|
|
pr_id = fields.Many2one('runbot_merge.pull_requests')
|
|
|
|
def _search_domain(self):
|
|
cutoff = self.env.context.get('forwardport_merged_before') \
|
|
or fields.Datetime.to_string(datetime.now() - MERGE_AGE)
|
|
return [('pr_id.merge_date', '<', cutoff)]
|
|
|
|
def _process_item(self):
|
|
_deleter.info(
|
|
"PR %s: checking deletion of linked branch %s",
|
|
self.pr_id.display_name,
|
|
self.pr_id.label
|
|
)
|
|
|
|
if self.pr_id.state != 'merged':
|
|
_deleter.info('✘ PR is not "merged" (got %s)', self.pr_id.state)
|
|
return
|
|
|
|
repository = self.pr_id.repository
|
|
fp_remote = repository.fp_remote_target
|
|
if not fp_remote:
|
|
_deleter.info('✘ no forward-port target')
|
|
return
|
|
|
|
repo_owner, repo_name = fp_remote.split('/')
|
|
owner, branch = self.pr_id.label.split(':')
|
|
if repo_owner != owner:
|
|
_deleter.info('✘ PR owner != FP target owner (%s)', repo_owner)
|
|
return # probably don't have access to arbitrary repos
|
|
|
|
github = GH(token=repository.project_id.fp_github_token, repo=fp_remote)
|
|
refurl = 'git/refs/heads/' + branch
|
|
ref = github('get', refurl, check=False)
|
|
if ref.status_code != 200:
|
|
_deleter.info("✘ branch already deleted (%s)", ref.json())
|
|
return
|
|
|
|
ref = ref.json()
|
|
if isinstance(ref, list):
|
|
_deleter.info(
|
|
"✘ got a fuzzy match (%s), branch probably deleted",
|
|
', '.join(r['ref'] for r in ref)
|
|
)
|
|
return
|
|
|
|
if ref['object']['sha'] != self.pr_id.head:
|
|
_deleter.info(
|
|
"✘ branch %s head mismatch, expected %s, got %s",
|
|
self.pr_id.label,
|
|
self.pr_id.head,
|
|
ref['object']['sha']
|
|
)
|
|
return
|
|
|
|
r = github('delete', refurl, check=False)
|
|
assert r.status_code == 204, \
|
|
"Tried to delete branch %s of %s, got %s" % (
|
|
branch, self.pr_id.display_name,
|
|
r.json()
|
|
)
|
|
_deleter.info('✔ deleted branch %s of PR %s', self.pr_id.label, self.pr_id.display_name)
|
|
|
|
_gc = _logger.getChild('maintenance')
|
|
def _bypass_limits():
|
|
"""Allow git to go beyond the limits set for Odoo.
|
|
|
|
On large repositories, git gc can take a *lot* of memory (especially with
|
|
`--aggressive`), if the Odoo limits are too low this can prevent the gc
|
|
from running, leading to a lack of packing and a massive amount of cruft
|
|
accumulating in the working copy.
|
|
"""
|
|
resource.setrlimit(resource.RLIMIT_AS, (resource.RLIM_INFINITY, resource.RLIM_INFINITY))
|
|
|
|
class GC(models.TransientModel):
|
|
_name = 'forwardport.maintenance'
|
|
_description = "Weekly maintenance of... cache repos?"
|
|
|
|
def _run(self):
|
|
# lock out the forward port cron to avoid concurrency issues while we're
|
|
# GC-ing it: wait until it's available, then SELECT FOR UPDATE it,
|
|
# which should prevent cron workers from running it
|
|
fp_cron = self.env.ref('forwardport.port_forward')
|
|
self.env.cr.execute("""
|
|
SELECT 1 FROM ir_cron
|
|
WHERE id = %s
|
|
FOR UPDATE
|
|
""", [fp_cron.id])
|
|
|
|
repos_dir = pathlib.Path(user_cache_dir('forwardport'))
|
|
# run on all repos with a forwardport target (~ forwardport enabled)
|
|
for repo in self.env['runbot_merge.repository'].search([('fp_remote_target', '!=', False)]):
|
|
repo_dir = repos_dir / repo.name
|
|
if not repo_dir.is_dir():
|
|
continue
|
|
|
|
_gc.info('Running maintenance on %s', repo.name)
|
|
r = subprocess.run(
|
|
['git', '--git-dir', repo_dir, 'gc', '--aggressive', '--prune=now'],
|
|
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
|
|
encoding='utf-8',
|
|
preexec_fn = _bypass_limits,
|
|
)
|
|
if r.returncode:
|
|
_gc.warning("Maintenance failure (status=%d):\n%s", r.returncode, r.stdout)
|