[IMP] runbot: better docker logging

The docker operation are called often and cannot be logged each time.
If a docker operation is slow, we log it at the end but waiting for this
output we have no idea what the process is doing.

This pr proposed to go a lower level and get the stream from the docker
operation to be able to log earlier if we started a potentially slow
operation.
This commit is contained in:
Xavier-Do 2024-08-29 14:57:37 +02:00 committed by Christophe Monniez
parent 14bac37438
commit b96b32e8b9
6 changed files with 131 additions and 76 deletions

View File

@ -8,6 +8,7 @@ When testing this file:
the first parameter should be a directory containing Odoo.
The second parameter is the exposed port
"""
from .docker_manager import DockerManager, docker
import configparser
import getpass
import io
@ -16,19 +17,9 @@ import os
import re
import subprocess
import time
import warnings
from odoo.tools import file_path
# unsolved issue https://github.com/docker/docker-py/issues/2928
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
message="The distutils package is deprecated.*",
category=DeprecationWarning
)
import docker
USERNAME = getpass.getuser()
_logger = logging.getLogger(__name__)
@ -54,6 +45,15 @@ class Command():
self.config_tuples = config_tuples or []
self.cmd_checker = cmd_checker
def set_pres(self, pres):
self.pres = pres
def set_cmd(self, cmd):
self.cmd = cmd
def set_post(self, posts):
self.posts = posts
def __getattr__(self, name):
return getattr(self.cmd, name)
@ -110,31 +110,32 @@ def _docker_build(build_dir, image_tag):
:param image_tag: name used to tag the resulting docker image
:return: tuple(success, msg) where success is a boolean and msg is the error message or None
"""
result = {
'image': False,
'msg': '',
}
docker_client = docker.from_env()
start = time.time()
try:
docker_image, result_stream = docker_client.images.build(path=build_dir, tag=image_tag, rm=True)
result_stream = list(result_stream)
msg = ''.join([r.get('stream', '') for r in result_stream])
result['image'] = docker_image
result['msg'] = msg
except docker.errors.APIError as e:
_logger.error('Build of image %s failed', image_tag)
result['msg'] = e.explanation
except docker.errors.BuildError as e:
_logger.error('Build of image %s failed', image_tag)
msg = f"{''.join(l.get('stream') or '' for l in e.build_log)}\nERROR:{e.msg}"
result['msg'] = msg
duration = time.time() - start
if duration > 1:
_logger.info('Dockerfile %s finished build in %s', image_tag, duration)
result['duration'] = duration
return result
with DockerManager(image_tag) as dm:
last_step = None
dm.result['success'] = False # waiting for an image_id
for chunk in dm.consume(dm.docker_client.api.build(path=build_dir, tag=image_tag, rm=True)):
if 'stream' in chunk:
stream = chunk['stream']
if stream.startswith('Step '):
last_step = stream[:120] + '...'
if dm.log_progress:
_logger.info(last_step)
if not dm.log_progress and ('running in' in stream or dm.duration > 1):
dm.log_progress = True
_logger.info('Building dockerfile %s', image_tag)
if last_step:
_logger.info(last_step)
match = re.search(
r'(^Successfully built |sha256:)([0-9a-f]+)$',
chunk['stream'],
)
if match:
dm.result['image_id'] = match.group(2)
dm.result['success'] = True
break
return dm.result
def docker_push(image_tag):
return _docker_push(image_tag)
@ -145,26 +146,16 @@ def _docker_push(image_tag):
:param image_tag: the image tag (or id) to push
:return: tuple(success, msg) where success is a boolean and msg is the error message or None
"""
docker_client = docker.from_env()
try:
image = docker_client.images.get(image_tag)
except docker.errors.ImageNotFound:
return (False, f"Docker image '{image_tag}' not found.")
with DockerManager(image_tag) as dm:
image = dm.docker_client.images.get(image_tag)
push_tag = f'127.0.0.1:5001/{image_tag}'
image.tag(push_tag)
error = None
try:
for push_progress in docker_client.images.push(push_tag, stream=True, decode=True):
# the push method is supposed to raise in cases of API errors but doesn't in other cases
# e.g. connection errors or the image tag does not exists locally ...
if 'error' in push_progress:
error = str(push_progress) # just stringify the whole as it might contains other keys like errorDetail ...
except docker.errors.APIError as e:
error = e
if error:
return (False, error)
return (True, None)
for chunk in dm.consume(dm.docker_client.api.push(push_tag, stream=True)):
if not dm.log_progress and 'Pushing' in chunk.get('status', ''):
dm.log_progress = True
_logger.info('Pushing %s', image_tag)
return dm.result
def docker_pull(image_tag):
@ -177,14 +168,12 @@ def _docker_pull(image_tag):
e.g.: `dockerhub.runbot102.odoo.com/odoo:PureNobleTest`
:return: tuple(success, image) where success is a boolean and image a Docker image object or None in case of failure
"""
docker_client = docker.from_env()
try:
image = docker_client.images.pull(image_tag)
except docker.errors.APIError:
message = f"failed Docker pull for {image_tag}"
_logger.warning(message)
return (False, None)
return (True, image)
with DockerManager(image_tag) as dm:
for chunk in dm.consume(dm.docker_client.api.pull(image_tag, stream=True)):
if not dm.log_progress and 'Downloading' in chunk['status']:
dm.log_progress = True
_logger.info('Pulling %s', image_tag)
return dm.result
def docker_remove(image_tag):

69
runbot/docker_manager.py Normal file
View File

@ -0,0 +1,69 @@
import getpass
import logging
import time
import warnings
# unsolved issue https://github.com/docker/docker-py/issues/2928
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
message="The distutils package is deprecated.*",
category=DeprecationWarning,
)
import docker
USERNAME = getpass.getuser()
_logger = logging.getLogger(__name__)
docker_stop_failures = {}
class DockerManager:
def __init__(self, image_tag):
self.image_tag = image_tag
def __enter__(self):
self.start = time.time()
self.duration = 0
self.docker_client = docker.from_env()
self.result = {
'msg': '',
'image': False,
'success': True,
}
self.log_progress = False
return self
def consume(self, stream):
for chunk in docker.utils.json_stream.json_stream(stream):
self.duration = time.time() - self.start
if 'error' in chunk:
_logger.error(chunk['error'])
self.result['msg'] += chunk['error']
# self.result['msg'] += str(chunk.get('errorDetail', ''))
self.result['msg'] += '\n'
self.result['success'] = False
break
if 'stream' in chunk:
self.result['msg'] += chunk['stream']
if 'status' in chunk:
self.result['msg'] += chunk['status']
if 'progress' in chunk:
self.result['msg'] += chunk['progress']
self.result['msg'] += '\n'
yield chunk
def __exit__(self, exception_type, exception_value, exception_traceback):
if self.log_progress:
_logger.info('Finished in %.2fs', self.duration)
if exception_value:
self.result['success'] = False
_logger.warning(exception_value)
self.result['msg'] += str(exception_value)
self.result['duration'] = self.duration
if self.result['success']:
self.result['image'] = self.docker_client.images.get(self.image_tag)
if 'image_id' in self.result and self.result['image_id'] not in self.result['image'].id:
_logger.warning('Image id does not match %s %s', self.result['image_id'], self.result['image'].id)
# if this never triggers, we could remove or simplify the success check from docker_build

View File

@ -2,8 +2,6 @@ import getpass
import logging
import os
import re
import time
from odoo import api, fields, models
from odoo.addons.base.models.ir_qweb import QWebException
@ -296,20 +294,19 @@ class Dockerfile(models.Model):
with open(self.env['runbot.runbot']._path('docker', self.image_tag, 'Dockerfile'), 'w') as Dockerfile:
Dockerfile.write(content)
result = docker_build(docker_build_path, self.image_tag)
docker_build_identifier = result['image']
duration = result['duration']
msg = result['msg']
success = image_id = result['image_id']
docker_build_result_values = {'dockerfile_id': self.id, 'output': msg, 'duration': duration, 'content': content, 'host_id': host and host.id}
if docker_build_identifier:
if success:
docker_build_result_values['result'] = 'success'
docker_build_result_values['identifier'] = docker_build_identifier.id
docker_build_result_values['identifier'] = image_id
else:
docker_build_result_values['result'] = 'error'
self.to_build = False
should_save_result = not docker_build_identifier # always save in case of failure
should_save_result = not success # always save in case of failure
if not should_save_result:
# check previous result anyway
previous_result = self.env['runbot.docker_build_result'].search([
@ -317,7 +314,7 @@ class Dockerfile(models.Model):
('host_id', '=', host and host.id),
], order='id desc', limit=1)
# identifier changed
if docker_build_identifier.id != previous_result.identifier:
if image_id != previous_result.identifier:
should_save_result = True
if previous_result.output != docker_build_result_values['output']: # to discuss
should_save_result = True
@ -326,7 +323,7 @@ class Dockerfile(models.Model):
if should_save_result:
result = self.env['runbot.docker_build_result'].create(docker_build_result_values)
if not docker_build_identifier:
if not success:
message = f'Build failure, check results for more info ({result.summary})'
self.message_post(body=message)
_logger.error(message)

View File

@ -132,9 +132,9 @@ class Host(models.Model):
_logger.info('Pulling docker images...')
for dockerfile in all_docker_files:
remote_tag = f'dockerhub.{docker_registry_host.name}/{dockerfile.image_tag}'
pull_result, image = docker_pull(remote_tag)
if pull_result:
image.tag(dockerfile.image_tag)
result = docker_pull(remote_tag)
if result['success']:
result['image'].tag(dockerfile.image_tag)
else:
_logger.info('Building docker images...')
for dockerfile in self.env['runbot.dockerfile'].search([('to_build', '=', True)]):

View File

@ -384,7 +384,7 @@ class Runbot(models.AbstractModel):
if ignored:
_logger.info('docker (%s) not deleted because not dest format', list(ignored))
def _start_docker_registry(self, host):
def _start_docker_registry(self):
"""
Start a docker registry if not already running.
The registry is in `always_restart` mode, meaning that it will restart properly after a reboot.

View File

@ -27,7 +27,7 @@ class BuilderClient(RunbotClient):
docker_registry_host_id = icp.get_param('runbot.docker_registry_host_id', default=False)
is_registry = docker_registry_host_id == str(self.host.id)
if is_registry:
self.env['runbot.runbot']._start_docker_registry(self.host)
self.env['runbot.runbot']._start_docker_registry()
last_docker_updates = self.env['runbot.dockerfile'].search([('to_build', '=', True)]).mapped('write_date')
if self.count == 1 or self.last_docker_updates != last_docker_updates:
self.last_docker_updates = last_docker_updates