From d722160247c3fbbe52ea806fe7eb4965095b69c4 Mon Sep 17 00:00:00 2001 From: Christophe Monniez Date: Thu, 13 Jul 2023 10:12:23 +0200 Subject: [PATCH] [IMP] drunbot: docker registry The current runbot infrastructure has 100+ machine that will each build all docker images. This is unpractical for multiple reasons: - impotant load on all machine when the build could be done once - possible differences for the same image depending on the moment the base was pulled on the host. An extreme example was the version of python (3.11 or 3.12) when building the noble docker image before it was stabilized. - increase the chance to have a docker build failure in case of network problem. (random) A centralized registry will help with that, and allow future devlopment to generate more docker images. All docker images will be exactly the same, the pull time is faster than build time, only one build per docker image, ... --- .gitignore | 1 + runbot/container.py | 90 +++++++++++++++++++++- runbot/models/host.py | 41 +++++++++- runbot/models/res_config_settings.py | 1 + runbot/models/runbot.py | 39 +++++++++- runbot/templates/nginx.xml | 17 ++++ runbot/tests/common.py | 1 + runbot/tests/test_cron.py | 6 +- runbot/views/host_views.xml | 2 + runbot/views/res_config_settings_views.xml | 6 +- runbot_builder/builder.py | 18 ++++- 11 files changed, 207 insertions(+), 15 deletions(-) diff --git a/.gitignore b/.gitignore index ef74c0e1..776409db 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,4 @@ runbot/static/sources runbot/static/nginx runbot/static/databases runbot/static/docker +runbot/static/docker-registry diff --git a/runbot/container.py b/runbot/container.py index e9fa14bd..473b7546 100644 --- a/runbot/container.py +++ b/runbot/container.py @@ -124,6 +124,83 @@ def _docker_build(build_dir, image_tag): msg = f"{''.join(l.get('stream') or '' for l in e.build_log)}\nERROR:{e.msg}" return (False, msg) +def docker_push(image_tag): + return _docker_push(image_tag) + + +def _docker_push(image_tag): + """Push a Docker image to the localy hosted docker registry + :param image_tag: the image tag (or id) to push + :return: tuple(success, msg) where success is a boolean and msg is the error message or None + """ + docker_client = docker.from_env() + try: + image = docker_client.images.get(image_tag) + except docker.errors.ImageNotFound: + return (False, f"Docker image '{image_tag}' not found.") + + push_tag = f'127.0.0.1:5001/{image_tag}' + image.tag(push_tag) + error = None + try: + for push_progress in docker_client.images.push(push_tag, stream=True, decode=True): + # the push method is supposed to raise in cases of API errors but doesn't in other cases + # e.g. connection errors or the image tag does not exists locally ... + if 'error' in push_progress: + error = str(push_progress) # just stringify the whole as it might contains other keys like errorDetail ... + except docker.errors.APIError as e: + error = e + if error: + return (False, error) + return (True, None) + + +def docker_pull(image_tag): + return _docker_pull(image_tag) + + +def _docker_pull(image_tag): + """Pull a docker image from a registry. + :param image_tag: the full image tag, including the registry host + e.g.: `dockerhub.runbot102.odoo.com/odoo:PureNobleTest` + :return: tuple(success, image) where success is a boolean and image a Docker image object or None in case of failure + """ + docker_client = docker.from_env() + try: + image = docker_client.images.pull(image_tag) + except docker.errors.APIError: + message = f"failed Docker pull for {image_tag}" + _logger.warning(message) + return (False, None) + return (True, image) + + +def docker_remove(image_tag): + return _docker_remove(image_tag) + + +def _docker_remove(image_tag): + docker_client = docker.from_env() + try: + docker_client.images.remove(image_tag, force=1) + except docker.errors.APIError: + message = f"Docker remove failed for {image_tag}" + _logger.exception(message) + return False + return True + + +def docker_prune(): + return _docker_prune() + + +def _docker_prune(): + docker_client = docker.from_env() + try: + return docker_client.images.prune() + except docker.errors.APIError: + _logger.exception('Docker prune failed') + return {'ImagesDeleted': None, 'SpaceReclaimed': 0} def docker_run(*args, **kwargs): return _docker_run(*args, **kwargs) @@ -296,7 +373,18 @@ def docker_ps(): def _docker_ps(): """Return a list of running containers names""" docker_client = docker.client.from_env() - return [ c.name for c in docker_client.containers.list()] + return [c.name for c in docker_client.containers.list()] + + +def docker_images(): + return _docker_images() + + +def _docker_images(): + """Return a list of running existing images""" + docker_client = docker.client.from_env() + return [c for c in docker_client.images.list()] + def sanitize_container_name(name): """Returns a container name with unallowed characters removed""" diff --git a/runbot/models/host.py b/runbot/models/host.py index 953b220c..34af0adf 100644 --- a/runbot/models/host.py +++ b/runbot/models/host.py @@ -5,6 +5,7 @@ from collections import defaultdict from odoo import models, fields, api from odoo.tools import config, ormcache from ..common import fqdn, local_pgadmin_cursor, os, list_local_dbs, local_pg_cursor +from ..container import docker_push, docker_pull, docker_prune, docker_images, docker_remove _logger = logging.getLogger(__name__) @@ -44,6 +45,7 @@ class Host(models.Model): paused = fields.Boolean('Paused', help='Host will stop scheduling while paused') profile = fields.Boolean('Profile', help='Enable profiling on this host') + use_remote_docker_registry = fields.Boolean('Use remote Docker Registry', default=False, help="Use docker registry for pulling images") def _compute_nb(self): groups = self.env['runbot.build'].read_group( @@ -117,12 +119,43 @@ class Host(models.Model): self._bootstrap_db_template() self._bootstrap_local_logs_db() - def _docker_build(self): + def _docker_update_images(self): """ build docker images needed by locally pending builds""" - _logger.info('Building docker images...') self.ensure_one() - for dockerfile in self.env['runbot.dockerfile'].search([('to_build', '=', True)]): - dockerfile._build(self) + icp = self.env['ir.config_parameter'] + docker_registry_host = self.browse(int(icp.get_param('runbot.docker_registry_host_id', default=0))) + # pull all images from the runbot docker registry + is_registry = docker_registry_host == self + all_docker_files = self.env['runbot.dockerfile'].search([]) + all_tags = set(all_docker_files.mapped('image_tag')) + if docker_registry_host and self.use_remote_docker_registry and not is_registry: + _logger.info('Pulling docker images...') + for dockerfile in all_docker_files: + remote_tag = f'dockerhub.{docker_registry_host.name}/{dockerfile.image_tag}' + pull_result, image = docker_pull(remote_tag) + if pull_result: + image.tag(dockerfile.image_tag) + else: + _logger.info('Building docker images...') + for dockerfile in self.env['runbot.dockerfile'].search([('to_build', '=', True)]): + dockerfile._build(self) + if is_registry: + docker_push(dockerfile.image_tag) + + _logger.info('Cleaning docker images...') + for image in docker_images(): + for tag in image.tags: + if tag.startswith('odoo:') and tag not in all_tags: # what about odoo:latest + _logger.info(f"Removing tag '{tag}' since it doesn't exist anymore") + docker_remove(tag) + + result = docker_prune() + if result['ImagesDeleted']: + for r in result['ImagesDeleted']: + for operation, identifier in r.items(): + _logger.info(f"{operation}: {identifier}") + if result['SpaceReclaimed']: + _logger.info(f"Space reclaimed: {result['SpaceReclaimed']}") _logger.info('Done...') @ormcache() diff --git a/runbot/models/res_config_settings.py b/runbot/models/res_config_settings.py index 77f52797..90a60aab 100644 --- a/runbot/models/res_config_settings.py +++ b/runbot/models/res_config_settings.py @@ -51,6 +51,7 @@ class ResConfigSettings(models.TransientModel): runbot_pending_warning = fields.Integer('Pending warning limit', default=5, config_parameter='runbot.pending.warning') runbot_pending_critical = fields.Integer('Pending critical limit', default=5, config_parameter='runbot.pending.critical') + runbot_docker_registry_host_id = fields.Many2one('runbot.host', 'Docker registry', help='Runbot host which handles Docker registry.', config_parameter='runbot.docker_registry_host_id') # TODO other icp # runbot.runbot_maxlogs 100 # migration db diff --git a/runbot/models/runbot.py b/runbot/models/runbot.py index 3fa9d902..f3926fb1 100644 --- a/runbot/models/runbot.py +++ b/runbot/models/runbot.py @@ -1,3 +1,4 @@ +import docker import time import logging import glob @@ -223,7 +224,7 @@ class Runbot(models.AbstractModel): # Bootstrap host._bootstrap() if runbot_do_schedule: - host._docker_build() + host._docker_update_images() self._source_cleanup() self.env['runbot.build']._local_cleanup() self._docker_cleanup() @@ -366,7 +367,7 @@ class Runbot(models.AbstractModel): def _docker_cleanup(self): _logger.info('Docker cleaning') - docker_ps_result = docker_ps() + docker_ps_result = [container for container in docker_ps() if container != "runbot-registry"] containers = {} ignored = [] @@ -383,6 +384,40 @@ class Runbot(models.AbstractModel): if ignored: _logger.info('docker (%s) not deleted because not dest format', list(ignored)) + def _start_docker_registry(self, host): + """ + Start a docker registry if not already running. + The registry is in `always_restart` mode, meaning that it will restart properly after a reboot. + """ + docker_client = docker.from_env() + try: + registry_container = docker_client.containers.get('runbot-registry') + except docker.errors.NotFound: + registry_container = None + + if registry_container: + if registry_container.status in ('running', 'created', 'restarting'): + if registry_container.status != 'running': + _logger.info('Docker registry container already found with status %s, skipping start procedure.', registry_container.status) + return + + _logger.info('Docker registry container found with status %s, trying the start procedure.', registry_container.status) + + try: + registry_container = docker_client.containers.run( + 'registry:2', + name='runbot-registry', + volumes={f'{os.path.join(self._root(), "docker-registry")}':{'bind': '/var/lib/registry', 'mode': 'rw'}}, + ports={5000: ('127.0.0.1', 5001)}, + restart_policy= {"Name": "always"}, + detach=True + ) + _logger.info('Docker registry started') + # TODO push local images in registry here + except Exception as e: + message = f'Starting registry failed with exception: {e}' + self.warning(message) + _logger.error(message) def _warning(self, message, *args): if args: diff --git a/runbot/templates/nginx.xml b/runbot/templates/nginx.xml index 11bf9f6a..e9f2144f 100644 --- a/runbot/templates/nginx.xml +++ b/runbot/templates/nginx.xml @@ -51,6 +51,23 @@ server { } } +server { + listen 8080; + server_name ~^dockerhub\.$; + + location /v2/ { + limit_except GET HEAD OPTIONS { + deny all; + } + proxy_pass http://localhost:5001; + proxy_set_header Host $http_host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_read_timeout 900; + } +} + diff --git a/runbot/tests/common.py b/runbot/tests/common.py index d695e951..fbb6ceea 100644 --- a/runbot/tests/common.py +++ b/runbot/tests/common.py @@ -180,6 +180,7 @@ class RunbotCase(TransactionCase): self.start_patcher('isfile', 'odoo.addons.runbot.common.os.path.isfile', True) self.start_patcher('docker_run', 'odoo.addons.runbot.container._docker_run') self.start_patcher('docker_build', 'odoo.addons.runbot.container._docker_build') + self.start_patcher('docker_push', 'odoo.addons.runbot.container._docker_push') self.start_patcher('docker_ps', 'odoo.addons.runbot.container._docker_ps', []) self.start_patcher('docker_stop', 'odoo.addons.runbot.container._docker_stop') self.start_patcher('docker_get_gateway_ip', 'odoo.addons.runbot.models.build_config.docker_get_gateway_ip', None) diff --git a/runbot/tests/test_cron.py b/runbot/tests/test_cron.py index 378ff737..d00d7763 100644 --- a/runbot/tests/test_cron.py +++ b/runbot/tests/test_cron.py @@ -32,10 +32,10 @@ class TestCron(RunbotCase): mock_update_batches.assert_called() @patch('time.sleep', side_effect=sleep) - @patch('odoo.addons.runbot.models.host.Host._docker_build') + @patch('odoo.addons.runbot.models.host.Host._docker_update_images') @patch('odoo.addons.runbot.models.host.Host._bootstrap') @patch('odoo.addons.runbot.models.runbot.Runbot._scheduler') - def test_cron_build(self, mock_scheduler, mock_host_bootstrap, mock_host_docker_build, *args): + def test_cron_build(self, mock_scheduler, mock_host_bootstrap, mock_host_docker_update_images, *args): """ test that cron_fetch_and_build do its work """ hostname = 'cronhost.runbot.com' self.patchers['hostname_patcher'].return_value = hostname @@ -49,7 +49,7 @@ class TestCron(RunbotCase): pass # sleep raises an exception to avoid to stay stuck in loop mock_scheduler.assert_called() mock_host_bootstrap.assert_called() - mock_host_docker_build.assert_called() + mock_host_docker_update_images.assert_called() host = self.env['runbot.host'].search([('name', '=', hostname)]) self.assertTrue(host, 'A new host should have been created') # self.assertGreater(host.psql_conn_count, 0, 'A least one connection should exist on the current psql batch') diff --git a/runbot/views/host_views.xml b/runbot/views/host_views.xml index 1a00dfb2..04eeabf4 100644 --- a/runbot/views/host_views.xml +++ b/runbot/views/host_views.xml @@ -11,6 +11,7 @@ + @@ -53,6 +54,7 @@ + diff --git a/runbot/views/res_config_settings_views.xml b/runbot/views/res_config_settings_views.xml index 65b9a483..745fdf5f 100644 --- a/runbot/views/res_config_settings_views.xml +++ b/runbot/views/res_config_settings_views.xml @@ -93,7 +93,11 @@ - + + + + + diff --git a/runbot_builder/builder.py b/runbot_builder/builder.py index b1f0a607..fd575dff 100755 --- a/runbot_builder/builder.py +++ b/runbot_builder/builder.py @@ -20,18 +20,28 @@ class BuilderClient(RunbotClient): for repo in self.env['runbot.repo'].search([('mode', '!=', 'disabled')]): repo._update(force=True) - self.last_docker_update = None + self.last_docker_updates = None def loop_turn(self): + icp = self.env['ir.config_parameter'] + docker_registry_host_id = icp.get_param('runbot.docker_registry_host_id', default=False) + is_registry = docker_registry_host_id == str(self.host.id) + if is_registry: + self.env['runbot.runbot']._start_docker_registry(self.host) last_docker_updates = self.env['runbot.dockerfile'].search([('to_build', '=', True)]).mapped('write_date') - if self.count == 1 or last_docker_updates and self.last_docker_update != max(last_docker_updates): - self.host._docker_build() - self.last_docker_update = max(last_docker_updates) + if self.count == 1 or self.last_docker_updates != last_docker_updates: + self.last_docker_updates = last_docker_updates + self.host._docker_update_images() + self.env.cr.commit() if self.count == 1: # cleanup at second iteration self.env['runbot.runbot']._source_cleanup() + self.env.cr.commit() self.env['runbot.build']._local_cleanup() + self.env.cr.commit() self.env['runbot.runbot']._docker_cleanup() + self.env.cr.commit() self.host._set_psql_conn_count() + self.env.cr.commit() self.env['runbot.repo']._update_git_config() self.env.cr.commit() self.git_gc()