[IMP] runbot: schedule git gc on repositories

On the actual runbot deployments, the `git gc` command is handled by a
unix cron. From time to time, some repositories get corrupted and we
suspect that some concurrent action may be involved as stated in
documentation [0].

For those reasons, with this commit, the `git gc` will be run by the
runbot clients themselves in order to avoid concurrent operations.

By default, the first gc will occur a few minutes after the start of the
client and the next gc are scheduled a two hours and a few minutes later.

Also, this commit ensures that the git config is written regularly in
case of change.

[0] https://git-scm.com/docs/git-gc
This commit is contained in:
Christophe Monniez 2022-06-15 16:29:33 +02:00 committed by xdo
parent cdaae9b3ed
commit 250d48e266
4 changed files with 30 additions and 0 deletions

View File

@ -9,6 +9,7 @@ import shutil
from contextlib import contextmanager
from requests.exceptions import HTTPError
from subprocess import CalledProcessError
from ..common import fqdn, dest_reg, os
from ..container import docker_ps, docker_stop
@ -367,6 +368,17 @@ class Runbot(models.AbstractModel):
if ignored:
_logger.info('docker (%s) not deleted because not dest format', list(ignored))
def _git_gc(self, host):
"""
cleanup and optimize git repositories on the host
"""
for repo in self.env['runbot.repo'].search():
try:
repo._git(['gc', '--prune=all', '--quiet'])
except CalledProcessError as e:
message = f'git gc failed on host {host} with exit status {e.returncode} and message "{e.output[:30]} ..."'
self.warning.create({'message': message})
def warning(self, message, *args):
if args:
message = message % args

View File

@ -17,6 +17,8 @@ class BuilderClient(RunbotClient):
self.env['runbot.runbot']._docker_cleanup()
self.host.set_psql_conn_count()
self.host._docker_build()
self.env['runbot.repo']._update_git_config()
self.git_gc()
return self.env['runbot.runbot']._scheduler_loop_turn(self.host)

View File

@ -10,7 +10,13 @@ class LeaderClient(RunbotClient): # Conductor, Director, Main, Maestro, Lead
self.pull_info_failures = {}
super().__init__(env)
def on_start(self):
self.env['runbot.repo'].search([('mode', '!=', 'disabled')])._update(force=True)
def loop_turn(self):
if self.count == 0:
self.env['runbot.repo']._update_git_config()
self.git_gc()
return self.env['runbot.runbot']._fetch_loop_turn(self.host, self.pull_info_failures)

View File

@ -4,8 +4,10 @@ import logging
import os
import sys
import threading
import random
import signal
from datetime import datetime, timedelta
from logging.handlers import WatchedFileHandler
LOG_FORMAT = '%(asctime)s %(levelname)s %(name)s: %(message)s'
@ -24,6 +26,7 @@ class RunbotClient():
self.host = None
self.count = 0
self.max_count = 60
self.next_git_gc_date = datetime.now() + timedelta(minutes=random.randint(5, 15))
def on_start(self):
pass
@ -80,6 +83,13 @@ class RunbotClient():
def sleep(self, t):
self.ask_interrupt.wait(t)
def git_gc(self):
""" git gc once a day """
if datetime.now() > self.next_git_gc_date:
_logger.info('Starting git gc on repositories')
self.env['runbot.runbot']._git_gc(self.host)
self.next_git_gc_date = datetime.now() + timedelta(hours=2, minutes=random.randint(0, 59))
_logger.info('Next git gc scheduled on %s', self.next_git_gc_date)
def run(client_class):
# parse args