From 8cb957803304c1383de1a1f09be77122e62809e7 Mon Sep 17 00:00:00 2001 From: Xavier-Do Date: Thu, 29 Aug 2024 14:57:37 +0200 Subject: [PATCH] [IMP] runbot: better docker logging The docker operation are called often and cannot be logged each time. If a docker operation is slow, we log it at the end but waiting for this output we have no idea what the process is doing. This pr proposed to go a lower level and get the stream from the docker operation to be able to log earlier if we started a potentially slow operation. --- runbot/container.py | 111 +++++++++++++++++--------------------- runbot/docker_manager.py | 69 ++++++++++++++++++++++++ runbot/models/docker.py | 15 +++--- runbot/models/host.py | 6 +-- runbot/models/runbot.py | 4 +- runbot_builder/builder.py | 2 +- 6 files changed, 131 insertions(+), 76 deletions(-) create mode 100644 runbot/docker_manager.py diff --git a/runbot/container.py b/runbot/container.py index 1064030e..86c51180 100644 --- a/runbot/container.py +++ b/runbot/container.py @@ -8,6 +8,7 @@ When testing this file: the first parameter should be a directory containing Odoo. The second parameter is the exposed port """ +from .docker_manager import DockerManager, docker import configparser import getpass import io @@ -16,19 +17,9 @@ import os import re import subprocess import time -import warnings from odoo.tools import file_path -# unsolved issue https://github.com/docker/docker-py/issues/2928 -with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", - message="The distutils package is deprecated.*", - category=DeprecationWarning - ) - import docker - USERNAME = getpass.getuser() _logger = logging.getLogger(__name__) @@ -54,6 +45,15 @@ class Command(): self.config_tuples = config_tuples or [] self.cmd_checker = cmd_checker + def set_pres(self, pres): + self.pres = pres + + def set_cmd(self, cmd): + self.cmd = cmd + + def set_post(self, posts): + self.posts = posts + def __getattr__(self, name): return getattr(self.cmd, name) @@ -110,31 +110,32 @@ def _docker_build(build_dir, image_tag): :param image_tag: name used to tag the resulting docker image :return: tuple(success, msg) where success is a boolean and msg is the error message or None """ - result = { - 'image': False, - 'msg': '', - } - docker_client = docker.from_env() - start = time.time() - try: - docker_image, result_stream = docker_client.images.build(path=build_dir, tag=image_tag, rm=True) - result_stream = list(result_stream) - msg = ''.join([r.get('stream', '') for r in result_stream]) - result['image'] = docker_image - result['msg'] = msg - except docker.errors.APIError as e: - _logger.error('Build of image %s failed', image_tag) - result['msg'] = e.explanation - except docker.errors.BuildError as e: - _logger.error('Build of image %s failed', image_tag) - msg = f"{''.join(l.get('stream') or '' for l in e.build_log)}\nERROR:{e.msg}" - result['msg'] = msg - duration = time.time() - start - if duration > 1: - _logger.info('Dockerfile %s finished build in %s', image_tag, duration) - result['duration'] = duration - return result + with DockerManager(image_tag) as dm: + last_step = None + dm.result['success'] = False # waiting for an image_id + for chunk in dm.consume(dm.docker_client.api.build(path=build_dir, tag=image_tag, rm=True)): + if 'stream' in chunk: + stream = chunk['stream'] + if stream.startswith('Step '): + last_step = stream[:120] + '...' + if dm.log_progress: + _logger.info(last_step) + if not dm.log_progress and ('running in' in stream or dm.duration > 1): + dm.log_progress = True + _logger.info('Building dockerfile %s', image_tag) + if last_step: + _logger.info(last_step) + match = re.search( + r'(^Successfully built |sha256:)([0-9a-f]+)$', + chunk['stream'], + ) + if match: + dm.result['image_id'] = match.group(2) + dm.result['success'] = True + break + return dm.result + def docker_push(image_tag): return _docker_push(image_tag) @@ -145,26 +146,16 @@ def _docker_push(image_tag): :param image_tag: the image tag (or id) to push :return: tuple(success, msg) where success is a boolean and msg is the error message or None """ - docker_client = docker.from_env() - try: - image = docker_client.images.get(image_tag) - except docker.errors.ImageNotFound: - return (False, f"Docker image '{image_tag}' not found.") - push_tag = f'127.0.0.1:5001/{image_tag}' - image.tag(push_tag) - error = None - try: - for push_progress in docker_client.images.push(push_tag, stream=True, decode=True): - # the push method is supposed to raise in cases of API errors but doesn't in other cases - # e.g. connection errors or the image tag does not exists locally ... - if 'error' in push_progress: - error = str(push_progress) # just stringify the whole as it might contains other keys like errorDetail ... - except docker.errors.APIError as e: - error = e - if error: - return (False, error) - return (True, None) + with DockerManager(image_tag) as dm: + image = dm.docker_client.images.get(image_tag) + push_tag = f'127.0.0.1:5001/{image_tag}' + image.tag(push_tag) + for chunk in dm.consume(dm.docker_client.api.push(push_tag, stream=True)): + if not dm.log_progress and 'Pushing' in chunk.get('status', ''): + dm.log_progress = True + _logger.info('Pushing %s', image_tag) + return dm.result def docker_pull(image_tag): @@ -177,14 +168,12 @@ def _docker_pull(image_tag): e.g.: `dockerhub.runbot102.odoo.com/odoo:PureNobleTest` :return: tuple(success, image) where success is a boolean and image a Docker image object or None in case of failure """ - docker_client = docker.from_env() - try: - image = docker_client.images.pull(image_tag) - except docker.errors.APIError: - message = f"failed Docker pull for {image_tag}" - _logger.warning(message) - return (False, None) - return (True, image) + with DockerManager(image_tag) as dm: + for chunk in dm.consume(dm.docker_client.api.pull(image_tag, stream=True)): + if not dm.log_progress and 'Downloading' in chunk['status']: + dm.log_progress = True + _logger.info('Pulling %s', image_tag) + return dm.result def docker_remove(image_tag): diff --git a/runbot/docker_manager.py b/runbot/docker_manager.py new file mode 100644 index 00000000..1c298bcb --- /dev/null +++ b/runbot/docker_manager.py @@ -0,0 +1,69 @@ + +import getpass +import logging +import time +import warnings + +# unsolved issue https://github.com/docker/docker-py/issues/2928 +with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + message="The distutils package is deprecated.*", + category=DeprecationWarning, + ) + import docker + +USERNAME = getpass.getuser() + +_logger = logging.getLogger(__name__) +docker_stop_failures = {} + + +class DockerManager: + def __init__(self, image_tag): + self.image_tag = image_tag + + def __enter__(self): + self.start = time.time() + self.duration = 0 + self.docker_client = docker.from_env() + self.result = { + 'msg': '', + 'image': False, + 'success': True, + } + self.log_progress = False + return self + + def consume(self, stream): + for chunk in docker.utils.json_stream.json_stream(stream): + self.duration = time.time() - self.start + if 'error' in chunk: + _logger.error(chunk['error']) + self.result['msg'] += chunk['error'] + # self.result['msg'] += str(chunk.get('errorDetail', '')) + self.result['msg'] += '\n' + self.result['success'] = False + break + if 'stream' in chunk: + self.result['msg'] += chunk['stream'] + if 'status' in chunk: + self.result['msg'] += chunk['status'] + if 'progress' in chunk: + self.result['msg'] += chunk['progress'] + self.result['msg'] += '\n' + yield chunk + + def __exit__(self, exception_type, exception_value, exception_traceback): + if self.log_progress: + _logger.info('Finished in %.2fs', self.duration) + if exception_value: + self.result['success'] = False + _logger.warning(exception_value) + self.result['msg'] += str(exception_value) + self.result['duration'] = self.duration + if self.result['success']: + self.result['image'] = self.docker_client.images.get(self.image_tag) + if 'image_id' in self.result and self.result['image_id'] not in self.result['image'].id: + _logger.warning('Image id does not match %s %s', self.result['image_id'], self.result['image'].id) + # if this never triggers, we could remove or simplify the success check from docker_build diff --git a/runbot/models/docker.py b/runbot/models/docker.py index 96468756..5d4f5b92 100644 --- a/runbot/models/docker.py +++ b/runbot/models/docker.py @@ -2,8 +2,6 @@ import getpass import logging import os import re -import time - from odoo import api, fields, models from odoo.addons.base.models.ir_qweb import QWebException @@ -296,20 +294,19 @@ class Dockerfile(models.Model): with open(self.env['runbot.runbot']._path('docker', self.image_tag, 'Dockerfile'), 'w') as Dockerfile: Dockerfile.write(content) - result = docker_build(docker_build_path, self.image_tag) - docker_build_identifier = result['image'] duration = result['duration'] msg = result['msg'] + success = image_id = result['image_id'] docker_build_result_values = {'dockerfile_id': self.id, 'output': msg, 'duration': duration, 'content': content, 'host_id': host and host.id} - if docker_build_identifier: + if success: docker_build_result_values['result'] = 'success' - docker_build_result_values['identifier'] = docker_build_identifier.id + docker_build_result_values['identifier'] = image_id else: docker_build_result_values['result'] = 'error' self.to_build = False - should_save_result = not docker_build_identifier # always save in case of failure + should_save_result = not success # always save in case of failure if not should_save_result: # check previous result anyway previous_result = self.env['runbot.docker_build_result'].search([ @@ -317,7 +314,7 @@ class Dockerfile(models.Model): ('host_id', '=', host and host.id), ], order='id desc', limit=1) # identifier changed - if docker_build_identifier.id != previous_result.identifier: + if image_id != previous_result.identifier: should_save_result = True if previous_result.output != docker_build_result_values['output']: # to discuss should_save_result = True @@ -326,7 +323,7 @@ class Dockerfile(models.Model): if should_save_result: result = self.env['runbot.docker_build_result'].create(docker_build_result_values) - if not docker_build_identifier: + if not success: message = f'Build failure, check results for more info ({result.summary})' self.message_post(body=message) _logger.error(message) diff --git a/runbot/models/host.py b/runbot/models/host.py index 34af0adf..a0dd7b89 100644 --- a/runbot/models/host.py +++ b/runbot/models/host.py @@ -132,9 +132,9 @@ class Host(models.Model): _logger.info('Pulling docker images...') for dockerfile in all_docker_files: remote_tag = f'dockerhub.{docker_registry_host.name}/{dockerfile.image_tag}' - pull_result, image = docker_pull(remote_tag) - if pull_result: - image.tag(dockerfile.image_tag) + result = docker_pull(remote_tag) + if result['success']: + result['image'].tag(dockerfile.image_tag) else: _logger.info('Building docker images...') for dockerfile in self.env['runbot.dockerfile'].search([('to_build', '=', True)]): diff --git a/runbot/models/runbot.py b/runbot/models/runbot.py index f3926fb1..057e88a7 100644 --- a/runbot/models/runbot.py +++ b/runbot/models/runbot.py @@ -384,7 +384,7 @@ class Runbot(models.AbstractModel): if ignored: _logger.info('docker (%s) not deleted because not dest format', list(ignored)) - def _start_docker_registry(self, host): + def _start_docker_registry(self): """ Start a docker registry if not already running. The registry is in `always_restart` mode, meaning that it will restart properly after a reboot. @@ -409,7 +409,7 @@ class Runbot(models.AbstractModel): name='runbot-registry', volumes={f'{os.path.join(self._root(), "docker-registry")}':{'bind': '/var/lib/registry', 'mode': 'rw'}}, ports={5000: ('127.0.0.1', 5001)}, - restart_policy= {"Name": "always"}, + restart_policy={"Name": "always"}, detach=True ) _logger.info('Docker registry started') diff --git a/runbot_builder/builder.py b/runbot_builder/builder.py index fd575dff..acf34e54 100755 --- a/runbot_builder/builder.py +++ b/runbot_builder/builder.py @@ -27,7 +27,7 @@ class BuilderClient(RunbotClient): docker_registry_host_id = icp.get_param('runbot.docker_registry_host_id', default=False) is_registry = docker_registry_host_id == str(self.host.id) if is_registry: - self.env['runbot.runbot']._start_docker_registry(self.host) + self.env['runbot.runbot']._start_docker_registry() last_docker_updates = self.env['runbot.dockerfile'].search([('to_build', '=', True)]).mapped('write_date') if self.count == 1 or self.last_docker_updates != last_docker_updates: self.last_docker_updates = last_docker_updates