[IMP] drunbot: docker registry

The current runbot infrastructure has 100+ machine that will each build
all docker images.

This is unpractical for multiple reasons:
- impotant load on all machine when the build could be done once
- possible differences for the same image depending on the moment the
base was pulled on the host. An extreme example was the version of
python (3.11 or 3.12) when building the noble docker image before it was
stabilized.
- increase the chance to have a docker build failure in case of network
problem. (random)

A centralized registry will help with that, and allow future devlopment
to generate more docker images. All docker images will be exactly the
same, the pull time is faster than build time, only one build per docker
image, ...
This commit is contained in:
Christophe Monniez 2023-07-13 10:12:23 +02:00
parent 4a2c9d7491
commit d722160247
11 changed files with 207 additions and 15 deletions

1
.gitignore vendored
View File

@ -11,3 +11,4 @@ runbot/static/sources
runbot/static/nginx
runbot/static/databases
runbot/static/docker
runbot/static/docker-registry

View File

@ -124,6 +124,83 @@ def _docker_build(build_dir, image_tag):
msg = f"{''.join(l.get('stream') or '' for l in e.build_log)}\nERROR:{e.msg}"
return (False, msg)
def docker_push(image_tag):
return _docker_push(image_tag)
def _docker_push(image_tag):
"""Push a Docker image to the localy hosted docker registry
:param image_tag: the image tag (or id) to push
:return: tuple(success, msg) where success is a boolean and msg is the error message or None
"""
docker_client = docker.from_env()
try:
image = docker_client.images.get(image_tag)
except docker.errors.ImageNotFound:
return (False, f"Docker image '{image_tag}' not found.")
push_tag = f'127.0.0.1:5001/{image_tag}'
image.tag(push_tag)
error = None
try:
for push_progress in docker_client.images.push(push_tag, stream=True, decode=True):
# the push method is supposed to raise in cases of API errors but doesn't in other cases
# e.g. connection errors or the image tag does not exists locally ...
if 'error' in push_progress:
error = str(push_progress) # just stringify the whole as it might contains other keys like errorDetail ...
except docker.errors.APIError as e:
error = e
if error:
return (False, error)
return (True, None)
def docker_pull(image_tag):
return _docker_pull(image_tag)
def _docker_pull(image_tag):
"""Pull a docker image from a registry.
:param image_tag: the full image tag, including the registry host
e.g.: `dockerhub.runbot102.odoo.com/odoo:PureNobleTest`
:return: tuple(success, image) where success is a boolean and image a Docker image object or None in case of failure
"""
docker_client = docker.from_env()
try:
image = docker_client.images.pull(image_tag)
except docker.errors.APIError:
message = f"failed Docker pull for {image_tag}"
_logger.warning(message)
return (False, None)
return (True, image)
def docker_remove(image_tag):
return _docker_remove(image_tag)
def _docker_remove(image_tag):
docker_client = docker.from_env()
try:
docker_client.images.remove(image_tag, force=1)
except docker.errors.APIError:
message = f"Docker remove failed for {image_tag}"
_logger.exception(message)
return False
return True
def docker_prune():
return _docker_prune()
def _docker_prune():
docker_client = docker.from_env()
try:
return docker_client.images.prune()
except docker.errors.APIError:
_logger.exception('Docker prune failed')
return {'ImagesDeleted': None, 'SpaceReclaimed': 0}
def docker_run(*args, **kwargs):
return _docker_run(*args, **kwargs)
@ -296,7 +373,18 @@ def docker_ps():
def _docker_ps():
"""Return a list of running containers names"""
docker_client = docker.client.from_env()
return [ c.name for c in docker_client.containers.list()]
return [c.name for c in docker_client.containers.list()]
def docker_images():
return _docker_images()
def _docker_images():
"""Return a list of running existing images"""
docker_client = docker.client.from_env()
return [c for c in docker_client.images.list()]
def sanitize_container_name(name):
"""Returns a container name with unallowed characters removed"""

View File

@ -5,6 +5,7 @@ from collections import defaultdict
from odoo import models, fields, api
from odoo.tools import config, ormcache
from ..common import fqdn, local_pgadmin_cursor, os, list_local_dbs, local_pg_cursor
from ..container import docker_push, docker_pull, docker_prune, docker_images, docker_remove
_logger = logging.getLogger(__name__)
@ -44,6 +45,7 @@ class Host(models.Model):
paused = fields.Boolean('Paused', help='Host will stop scheduling while paused')
profile = fields.Boolean('Profile', help='Enable profiling on this host')
use_remote_docker_registry = fields.Boolean('Use remote Docker Registry', default=False, help="Use docker registry for pulling images")
def _compute_nb(self):
groups = self.env['runbot.build'].read_group(
@ -117,12 +119,43 @@ class Host(models.Model):
self._bootstrap_db_template()
self._bootstrap_local_logs_db()
def _docker_build(self):
def _docker_update_images(self):
""" build docker images needed by locally pending builds"""
_logger.info('Building docker images...')
self.ensure_one()
icp = self.env['ir.config_parameter']
docker_registry_host = self.browse(int(icp.get_param('runbot.docker_registry_host_id', default=0)))
# pull all images from the runbot docker registry
is_registry = docker_registry_host == self
all_docker_files = self.env['runbot.dockerfile'].search([])
all_tags = set(all_docker_files.mapped('image_tag'))
if docker_registry_host and self.use_remote_docker_registry and not is_registry:
_logger.info('Pulling docker images...')
for dockerfile in all_docker_files:
remote_tag = f'dockerhub.{docker_registry_host.name}/{dockerfile.image_tag}'
pull_result, image = docker_pull(remote_tag)
if pull_result:
image.tag(dockerfile.image_tag)
else:
_logger.info('Building docker images...')
for dockerfile in self.env['runbot.dockerfile'].search([('to_build', '=', True)]):
dockerfile._build(self)
if is_registry:
docker_push(dockerfile.image_tag)
_logger.info('Cleaning docker images...')
for image in docker_images():
for tag in image.tags:
if tag.startswith('odoo:') and tag not in all_tags: # what about odoo:latest
_logger.info(f"Removing tag '{tag}' since it doesn't exist anymore")
docker_remove(tag)
result = docker_prune()
if result['ImagesDeleted']:
for r in result['ImagesDeleted']:
for operation, identifier in r.items():
_logger.info(f"{operation}: {identifier}")
if result['SpaceReclaimed']:
_logger.info(f"Space reclaimed: {result['SpaceReclaimed']}")
_logger.info('Done...')
@ormcache()

View File

@ -51,6 +51,7 @@ class ResConfigSettings(models.TransientModel):
runbot_pending_warning = fields.Integer('Pending warning limit', default=5, config_parameter='runbot.pending.warning')
runbot_pending_critical = fields.Integer('Pending critical limit', default=5, config_parameter='runbot.pending.critical')
runbot_docker_registry_host_id = fields.Many2one('runbot.host', 'Docker registry', help='Runbot host which handles Docker registry.', config_parameter='runbot.docker_registry_host_id')
# TODO other icp
# runbot.runbot_maxlogs 100
# migration db

View File

@ -1,3 +1,4 @@
import docker
import time
import logging
import glob
@ -223,7 +224,7 @@ class Runbot(models.AbstractModel):
# Bootstrap
host._bootstrap()
if runbot_do_schedule:
host._docker_build()
host._docker_update_images()
self._source_cleanup()
self.env['runbot.build']._local_cleanup()
self._docker_cleanup()
@ -366,7 +367,7 @@ class Runbot(models.AbstractModel):
def _docker_cleanup(self):
_logger.info('Docker cleaning')
docker_ps_result = docker_ps()
docker_ps_result = [container for container in docker_ps() if container != "runbot-registry"]
containers = {}
ignored = []
@ -383,6 +384,40 @@ class Runbot(models.AbstractModel):
if ignored:
_logger.info('docker (%s) not deleted because not dest format', list(ignored))
def _start_docker_registry(self, host):
"""
Start a docker registry if not already running.
The registry is in `always_restart` mode, meaning that it will restart properly after a reboot.
"""
docker_client = docker.from_env()
try:
registry_container = docker_client.containers.get('runbot-registry')
except docker.errors.NotFound:
registry_container = None
if registry_container:
if registry_container.status in ('running', 'created', 'restarting'):
if registry_container.status != 'running':
_logger.info('Docker registry container already found with status %s, skipping start procedure.', registry_container.status)
return
_logger.info('Docker registry container found with status %s, trying the start procedure.', registry_container.status)
try:
registry_container = docker_client.containers.run(
'registry:2',
name='runbot-registry',
volumes={f'{os.path.join(self._root(), "docker-registry")}':{'bind': '/var/lib/registry', 'mode': 'rw'}},
ports={5000: ('127.0.0.1', 5001)},
restart_policy= {"Name": "always"},
detach=True
)
_logger.info('Docker registry started')
# TODO push local images in registry here
except Exception as e:
message = f'Starting registry failed with exception: {e}'
self.warning(message)
_logger.error(message)
def _warning(self, message, *args):
if args:

View File

@ -51,6 +51,23 @@ server {
}
}
server {
listen 8080;
server_name ~^dockerhub\.<t t-esc="re_escape(host_name)"/>$;
location /v2/ {
limit_except GET HEAD OPTIONS {
deny all;
}
proxy_pass http://localhost:5001;
proxy_set_header Host $http_host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_read_timeout 900;
}
}
<t id="root_anchor"/>
<t t-foreach="builds" t-as="build">

View File

@ -180,6 +180,7 @@ class RunbotCase(TransactionCase):
self.start_patcher('isfile', 'odoo.addons.runbot.common.os.path.isfile', True)
self.start_patcher('docker_run', 'odoo.addons.runbot.container._docker_run')
self.start_patcher('docker_build', 'odoo.addons.runbot.container._docker_build')
self.start_patcher('docker_push', 'odoo.addons.runbot.container._docker_push')
self.start_patcher('docker_ps', 'odoo.addons.runbot.container._docker_ps', [])
self.start_patcher('docker_stop', 'odoo.addons.runbot.container._docker_stop')
self.start_patcher('docker_get_gateway_ip', 'odoo.addons.runbot.models.build_config.docker_get_gateway_ip', None)

View File

@ -32,10 +32,10 @@ class TestCron(RunbotCase):
mock_update_batches.assert_called()
@patch('time.sleep', side_effect=sleep)
@patch('odoo.addons.runbot.models.host.Host._docker_build')
@patch('odoo.addons.runbot.models.host.Host._docker_update_images')
@patch('odoo.addons.runbot.models.host.Host._bootstrap')
@patch('odoo.addons.runbot.models.runbot.Runbot._scheduler')
def test_cron_build(self, mock_scheduler, mock_host_bootstrap, mock_host_docker_build, *args):
def test_cron_build(self, mock_scheduler, mock_host_bootstrap, mock_host_docker_update_images, *args):
""" test that cron_fetch_and_build do its work """
hostname = 'cronhost.runbot.com'
self.patchers['hostname_patcher'].return_value = hostname
@ -49,7 +49,7 @@ class TestCron(RunbotCase):
pass # sleep raises an exception to avoid to stay stuck in loop
mock_scheduler.assert_called()
mock_host_bootstrap.assert_called()
mock_host_docker_build.assert_called()
mock_host_docker_update_images.assert_called()
host = self.env['runbot.host'].search([('name', '=', hostname)])
self.assertTrue(host, 'A new host should have been created')
# self.assertGreater(host.psql_conn_count, 0, 'A least one connection should exist on the current psql batch')

View File

@ -11,6 +11,7 @@
<field name="name" readonly='1'/>
<field name="disp_name"/>
<field name="active"/>
<field name="use_remote_docker_registry"/>
<field name="last_start_loop" readonly='1'/>
<field name="last_end_loop" readonly='1'/>
<field name="last_success" readonly='1'/>
@ -53,6 +54,7 @@
<field name="name"/>
<field name="disp_name"/>
<field name="assigned_only" widget="boolean_toggle"/>
<field name="use_remote_docker_registry" widget="boolean_toggle"/>
<field name="nb_worker"/>
<field name="nb_run_slot"/>
</tree>

View File

@ -93,7 +93,11 @@
<field name="runbot_upgrade_exception_message" class="w-100"/>
</setting>
</block>
<block title="Docker Registry Settings">
<setting>
<field name="runbot_docker_registry_host_id"/>
</setting>
</block>
</app>
</xpath>
</field>

View File

@ -20,18 +20,28 @@ class BuilderClient(RunbotClient):
for repo in self.env['runbot.repo'].search([('mode', '!=', 'disabled')]):
repo._update(force=True)
self.last_docker_update = None
self.last_docker_updates = None
def loop_turn(self):
icp = self.env['ir.config_parameter']
docker_registry_host_id = icp.get_param('runbot.docker_registry_host_id', default=False)
is_registry = docker_registry_host_id == str(self.host.id)
if is_registry:
self.env['runbot.runbot']._start_docker_registry(self.host)
last_docker_updates = self.env['runbot.dockerfile'].search([('to_build', '=', True)]).mapped('write_date')
if self.count == 1 or last_docker_updates and self.last_docker_update != max(last_docker_updates):
self.host._docker_build()
self.last_docker_update = max(last_docker_updates)
if self.count == 1 or self.last_docker_updates != last_docker_updates:
self.last_docker_updates = last_docker_updates
self.host._docker_update_images()
self.env.cr.commit()
if self.count == 1: # cleanup at second iteration
self.env['runbot.runbot']._source_cleanup()
self.env.cr.commit()
self.env['runbot.build']._local_cleanup()
self.env.cr.commit()
self.env['runbot.runbot']._docker_cleanup()
self.env.cr.commit()
self.host._set_psql_conn_count()
self.env.cr.commit()
self.env['runbot.repo']._update_git_config()
self.env.cr.commit()
self.git_gc()