[IMP] runbot: schedule git gc on repositories

On the actual runbot deployments, the `git gc` command is handled by a
unix cron. From time to time, some repositories get corrupted and we
suspect that some concurrent action may be involved as stated in
documentation [0].

For those reasons, with this commit, the `git gc` will be run by the
runbot clients themselves in order to avoid concurrent operations.

By default, the first gc will occur a few minutes after the start of the
client and the next gc are scheduled a two hours and a few minutes later.

Also, this commit ensures that the git config is written regularly in
case of change.

[0] https://git-scm.com/docs/git-gc
This commit is contained in:
Christophe Monniez 2022-06-15 16:29:33 +02:00 committed by xdo
parent cdaae9b3ed
commit 250d48e266
4 changed files with 30 additions and 0 deletions

View File

@ -9,6 +9,7 @@ import shutil
from contextlib import contextmanager from contextlib import contextmanager
from requests.exceptions import HTTPError from requests.exceptions import HTTPError
from subprocess import CalledProcessError
from ..common import fqdn, dest_reg, os from ..common import fqdn, dest_reg, os
from ..container import docker_ps, docker_stop from ..container import docker_ps, docker_stop
@ -367,6 +368,17 @@ class Runbot(models.AbstractModel):
if ignored: if ignored:
_logger.info('docker (%s) not deleted because not dest format', list(ignored)) _logger.info('docker (%s) not deleted because not dest format', list(ignored))
def _git_gc(self, host):
"""
cleanup and optimize git repositories on the host
"""
for repo in self.env['runbot.repo'].search():
try:
repo._git(['gc', '--prune=all', '--quiet'])
except CalledProcessError as e:
message = f'git gc failed on host {host} with exit status {e.returncode} and message "{e.output[:30]} ..."'
self.warning.create({'message': message})
def warning(self, message, *args): def warning(self, message, *args):
if args: if args:
message = message % args message = message % args

View File

@ -17,6 +17,8 @@ class BuilderClient(RunbotClient):
self.env['runbot.runbot']._docker_cleanup() self.env['runbot.runbot']._docker_cleanup()
self.host.set_psql_conn_count() self.host.set_psql_conn_count()
self.host._docker_build() self.host._docker_build()
self.env['runbot.repo']._update_git_config()
self.git_gc()
return self.env['runbot.runbot']._scheduler_loop_turn(self.host) return self.env['runbot.runbot']._scheduler_loop_turn(self.host)

View File

@ -10,7 +10,13 @@ class LeaderClient(RunbotClient): # Conductor, Director, Main, Maestro, Lead
self.pull_info_failures = {} self.pull_info_failures = {}
super().__init__(env) super().__init__(env)
def on_start(self):
self.env['runbot.repo'].search([('mode', '!=', 'disabled')])._update(force=True)
def loop_turn(self): def loop_turn(self):
if self.count == 0:
self.env['runbot.repo']._update_git_config()
self.git_gc()
return self.env['runbot.runbot']._fetch_loop_turn(self.host, self.pull_info_failures) return self.env['runbot.runbot']._fetch_loop_turn(self.host, self.pull_info_failures)

View File

@ -4,8 +4,10 @@ import logging
import os import os
import sys import sys
import threading import threading
import random
import signal import signal
from datetime import datetime, timedelta
from logging.handlers import WatchedFileHandler from logging.handlers import WatchedFileHandler
LOG_FORMAT = '%(asctime)s %(levelname)s %(name)s: %(message)s' LOG_FORMAT = '%(asctime)s %(levelname)s %(name)s: %(message)s'
@ -24,6 +26,7 @@ class RunbotClient():
self.host = None self.host = None
self.count = 0 self.count = 0
self.max_count = 60 self.max_count = 60
self.next_git_gc_date = datetime.now() + timedelta(minutes=random.randint(5, 15))
def on_start(self): def on_start(self):
pass pass
@ -80,6 +83,13 @@ class RunbotClient():
def sleep(self, t): def sleep(self, t):
self.ask_interrupt.wait(t) self.ask_interrupt.wait(t)
def git_gc(self):
""" git gc once a day """
if datetime.now() > self.next_git_gc_date:
_logger.info('Starting git gc on repositories')
self.env['runbot.runbot']._git_gc(self.host)
self.next_git_gc_date = datetime.now() + timedelta(hours=2, minutes=random.randint(0, 59))
_logger.info('Next git gc scheduled on %s', self.next_git_gc_date)
def run(client_class): def run(client_class):
# parse args # parse args