2021-11-25 19:22:19 +07:00
|
|
|
#!/usr/bin/python3
|
|
|
|
import argparse
|
2022-12-16 22:13:32 +07:00
|
|
|
import docker
|
2021-11-25 19:22:19 +07:00
|
|
|
import logging
|
|
|
|
import os
|
2022-12-16 22:13:32 +07:00
|
|
|
import psutil
|
|
|
|
import re
|
2021-11-25 19:22:19 +07:00
|
|
|
import sys
|
|
|
|
import threading
|
2022-12-16 22:13:32 +07:00
|
|
|
import time
|
2021-11-25 19:22:19 +07:00
|
|
|
import signal
|
2024-02-12 17:21:09 +07:00
|
|
|
import subprocess
|
2021-11-25 19:22:19 +07:00
|
|
|
|
2023-06-28 13:06:50 +07:00
|
|
|
from contextlib import nullcontext
|
2022-12-16 22:13:32 +07:00
|
|
|
from datetime import datetime, timedelta, timezone
|
|
|
|
from pathlib import Path
|
2021-11-25 19:22:19 +07:00
|
|
|
from logging.handlers import WatchedFileHandler
|
|
|
|
|
|
|
|
LOG_FORMAT = '%(asctime)s %(levelname)s %(name)s: %(message)s'
|
|
|
|
logging.basicConfig(level=logging.INFO, format=LOG_FORMAT)
|
|
|
|
logging.getLogger('odoo.addons.runbot').setLevel(logging.DEBUG)
|
|
|
|
logging.addLevelName(25, "!NFO")
|
|
|
|
|
2022-12-16 22:13:32 +07:00
|
|
|
CPU_COUNT = os.cpu_count()
|
2021-11-25 19:22:19 +07:00
|
|
|
_logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
class RunbotClient():
|
|
|
|
|
|
|
|
def __init__(self, env):
|
|
|
|
self.env = env
|
|
|
|
self.ask_interrupt = threading.Event()
|
|
|
|
self.host = None
|
|
|
|
self.count = 0
|
|
|
|
self.max_count = 60
|
|
|
|
|
|
|
|
def on_start(self):
|
|
|
|
pass
|
|
|
|
|
|
|
|
def main_loop(self):
|
|
|
|
from odoo import fields
|
2023-06-28 13:06:50 +07:00
|
|
|
from odoo.tools.profiler import Profiler
|
2021-11-25 19:22:19 +07:00
|
|
|
self.on_start()
|
|
|
|
signal.signal(signal.SIGINT, self.signal_handler)
|
|
|
|
signal.signal(signal.SIGTERM, self.signal_handler)
|
|
|
|
signal.signal(signal.SIGQUIT, self.dump_stack)
|
|
|
|
self.host = self.env['runbot.host']._get_current()
|
2022-06-22 15:26:21 +07:00
|
|
|
self.update_next_git_gc_date()
|
2021-11-25 19:22:19 +07:00
|
|
|
self.host._bootstrap()
|
|
|
|
logging.info(
|
|
|
|
'Host %s running with %s slots on pid %s%s',
|
|
|
|
self.host.name,
|
|
|
|
self.host.nb_worker,
|
|
|
|
os.getpid(),
|
|
|
|
' (assigned only)' if self.host.assigned_only else ''
|
|
|
|
)
|
|
|
|
while True:
|
2023-06-28 13:06:50 +07:00
|
|
|
context_manager = Profiler(db=self.env.cr.dbname) if self.host.profile else nullcontext()
|
|
|
|
with context_manager:
|
|
|
|
try:
|
|
|
|
self.host.last_start_loop = fields.Datetime.now()
|
|
|
|
self.env.cr.commit()
|
2024-02-05 17:48:37 +07:00
|
|
|
if self.env.registry != self.env.registry.check_signaling():
|
2023-11-28 18:23:06 +07:00
|
|
|
self.env.reset()
|
2024-02-05 17:48:37 +07:00
|
|
|
self.env = self.env()
|
2023-06-28 13:06:50 +07:00
|
|
|
self.count = self.count % self.max_count
|
|
|
|
if self.host.paused:
|
|
|
|
sleep_time = 5
|
|
|
|
else:
|
|
|
|
sleep_time = self.loop_turn()
|
|
|
|
self.count += 1
|
|
|
|
self.host.last_end_loop = fields.Datetime.now()
|
|
|
|
self.env.cr.commit()
|
|
|
|
self.env.clear()
|
|
|
|
self.sleep(sleep_time)
|
|
|
|
except Exception as e:
|
|
|
|
_logger.exception('Builder main loop failed with: %s', e)
|
|
|
|
self.env.cr.rollback()
|
|
|
|
self.env.clear()
|
|
|
|
self.sleep(10)
|
|
|
|
if self.ask_interrupt.is_set():
|
|
|
|
return
|
2021-11-25 19:22:19 +07:00
|
|
|
|
|
|
|
def loop_turn(self):
|
|
|
|
raise NotImplementedError()
|
|
|
|
|
|
|
|
def signal_handler(self, _signal, _frame):
|
|
|
|
if self.ask_interrupt.is_set():
|
|
|
|
_logger.info("Second Interrupt detected, force exit")
|
|
|
|
os._exit(1)
|
|
|
|
|
|
|
|
_logger.info("Interrupt detected")
|
|
|
|
self.ask_interrupt.set()
|
|
|
|
|
|
|
|
def dump_stack(self, _signal, _frame):
|
|
|
|
import odoo
|
|
|
|
odoo.tools.misc.dumpstacks()
|
|
|
|
|
|
|
|
def sleep(self, t):
|
|
|
|
self.ask_interrupt.wait(t)
|
|
|
|
|
2022-06-22 15:26:21 +07:00
|
|
|
def update_next_git_gc_date(self):
|
|
|
|
now = datetime.now()
|
|
|
|
gc_hour = int(self.env['ir.config_parameter'].sudo().get_param('runbot.git_gc_hour', '23'))
|
|
|
|
gc_minutes = self.host.id % 60 # deterministic minutes
|
|
|
|
self.next_git_gc_date = datetime(now.year, now.month, now.day, gc_hour, gc_minutes)
|
|
|
|
while self.next_git_gc_date <= now:
|
|
|
|
self.next_git_gc_date += timedelta(days=1)
|
|
|
|
_logger.info('Next git gc scheduled on %s', self.next_git_gc_date)
|
|
|
|
|
2022-06-15 21:29:33 +07:00
|
|
|
def git_gc(self):
|
|
|
|
""" git gc once a day """
|
2022-06-22 15:26:21 +07:00
|
|
|
if self.next_git_gc_date < datetime.now():
|
2022-06-15 21:29:33 +07:00
|
|
|
_logger.info('Starting git gc on repositories')
|
2024-02-12 17:21:09 +07:00
|
|
|
commands = []
|
|
|
|
host_name = self.host.name
|
|
|
|
for repo in self.env['runbot.repo'].search([]):
|
2024-02-21 13:57:18 +07:00
|
|
|
commands.append((repo.name, repo._get_git_command(['gc', '--prune=all', '--quiet'])))
|
2024-02-12 17:21:09 +07:00
|
|
|
self.env.cr.rollback()
|
|
|
|
# gc commands can be slow, rollbacking to avoid to keep a transaction idle for multiple minutes.
|
|
|
|
messages = []
|
|
|
|
for repo_name, command in commands:
|
|
|
|
try:
|
|
|
|
start = time.time()
|
|
|
|
subprocess.check_output(command, stderr=subprocess.STDOUT).decode()
|
|
|
|
_logger.info('Git gc on %s took %ss', repo_name, time.time() - start)
|
|
|
|
except subprocess.CalledProcessError as e:
|
|
|
|
message = f'git gc failed for {repo_name} on {host_name} with exit status {e.returncode} and message "{e.output[:60]} ..."'
|
|
|
|
messages.append(message)
|
|
|
|
for message in messages:
|
|
|
|
self.env['runbot.runbot']._warning(message)
|
|
|
|
|
|
|
|
self.env.cr.commit()
|
2022-06-22 15:26:21 +07:00
|
|
|
self.update_next_git_gc_date()
|
2021-11-25 19:22:19 +07:00
|
|
|
|
|
|
|
def run(client_class):
|
|
|
|
# parse args
|
|
|
|
parser = argparse.ArgumentParser()
|
|
|
|
parser.add_argument('--odoo-path', help='Odoo sources path')
|
|
|
|
parser.add_argument('--db_host')
|
|
|
|
parser.add_argument('--db_port')
|
|
|
|
parser.add_argument('--db_user')
|
|
|
|
parser.add_argument('--db_password')
|
|
|
|
parser.add_argument('-d', '--database', default='runbot', help='name of runbot db')
|
2023-02-01 07:30:23 +07:00
|
|
|
parser.add_argument('--addons-path', type=str, dest="addons_path")
|
2021-11-25 19:22:19 +07:00
|
|
|
parser.add_argument('--logfile', default=False)
|
2022-01-24 21:12:29 +07:00
|
|
|
parser.add_argument('--forced-host-name', default=False)
|
|
|
|
|
2021-11-25 19:22:19 +07:00
|
|
|
args = parser.parse_args()
|
|
|
|
if args.logfile:
|
|
|
|
dirname = os.path.dirname(args.logfile)
|
|
|
|
if dirname and not os.path.isdir(dirname):
|
|
|
|
os.makedirs(dirname)
|
|
|
|
|
|
|
|
handler = WatchedFileHandler(args.logfile)
|
|
|
|
formatter = logging.Formatter(LOG_FORMAT)
|
|
|
|
handler.setFormatter(formatter)
|
|
|
|
logging.getLogger().addHandler(handler)
|
|
|
|
|
|
|
|
# configure odoo
|
|
|
|
sys.path.append(args.odoo_path)
|
|
|
|
import odoo
|
|
|
|
_logger.info("Starting scheduler on database %s", args.database)
|
|
|
|
odoo.tools.config['db_host'] = args.db_host
|
|
|
|
odoo.tools.config['db_port'] = args.db_port
|
|
|
|
odoo.tools.config['db_user'] = args.db_user
|
|
|
|
odoo.tools.config['db_password'] = args.db_password
|
|
|
|
addon_path = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..'))
|
2023-02-01 07:30:23 +07:00
|
|
|
config_addons_path = args.addons_path or odoo.tools.config['addons_path']
|
|
|
|
if addon_path not in config_addons_path:
|
|
|
|
config_addons_path = ','.join([config_addons_path, addon_path])
|
|
|
|
odoo.tools.config['addons_path'] = config_addons_path
|
2022-01-24 21:12:29 +07:00
|
|
|
odoo.tools.config['forced_host_name'] = args.forced_host_name
|
2024-05-16 14:08:46 +07:00
|
|
|
import odoo.modules
|
2022-01-24 21:12:29 +07:00
|
|
|
|
2021-11-25 19:22:19 +07:00
|
|
|
# create environment
|
2024-05-16 14:08:46 +07:00
|
|
|
registry = odoo.modules.registry.Registry(args.database)
|
2024-02-05 17:48:37 +07:00
|
|
|
try:
|
2021-11-25 19:22:19 +07:00
|
|
|
with registry.cursor() as cr:
|
|
|
|
env = odoo.api.Environment(cr, odoo.SUPERUSER_ID, {})
|
|
|
|
client = client_class(env)
|
|
|
|
# run main loop
|
2024-02-05 17:48:37 +07:00
|
|
|
client.main_loop()
|
|
|
|
except Exception as e:
|
|
|
|
_logger.exception(str(e))
|
|
|
|
raise e
|
2021-11-25 19:22:19 +07:00
|
|
|
_logger.info("Stopping gracefully")
|
2022-12-16 22:13:32 +07:00
|
|
|
|
|
|
|
def human_size(nb):
|
|
|
|
for i, u in enumerate(['', ' KiB', ' MiB', ' GiB']):
|
|
|
|
if nb < 1000 or i == 3: # 1000 to avoid things like 1001 MiB
|
|
|
|
break
|
|
|
|
nb = nb / 1024
|
|
|
|
return f'{nb:.2f}{u}'
|
|
|
|
|
|
|
|
def get_cpu_time():
|
|
|
|
"""returns system cpus times in microseconds"""
|
|
|
|
return sum([t for t in psutil.cpu_times()]) * 10 ** 6
|
|
|
|
|
|
|
|
def get_docker_stats(container_id):
|
|
|
|
"""
|
|
|
|
Cgroup V1 path is /sys/fs/cgroup/memory/docker/{container_id}/memory.usage_in_bytes'
|
|
|
|
https://docs.docker.com/config/containers/runmetrics/#find-the-cgroup-for-a-given-container
|
|
|
|
on our runbot, we use cgroup V2 + systemd driver
|
|
|
|
We could add a method to find the right path instead of hard coding
|
|
|
|
Returns a tupple with:
|
|
|
|
(time, current memory usage, cpu usage in microseconds, current system usage microseconds)
|
|
|
|
"""
|
|
|
|
try:
|
|
|
|
memory_current = int(Path(f'/sys/fs/cgroup/system.slice/docker-{container_id}.scope/memory.current').read_text().strip())
|
|
|
|
except Exception: # should we log exception in order to debug ... but will spam the logs ...
|
|
|
|
memory_current = 0
|
|
|
|
try:
|
|
|
|
cpu_stats = Path(f'/sys/fs/cgroup/system.slice/docker-{container_id}.scope/cpu.stat').read_text()
|
|
|
|
cpu_usage_line = cpu_stats.split('\n')[0]
|
|
|
|
usage_usec = cpu_usage_line and int(cpu_usage_line.split()[1])
|
|
|
|
except Exception:
|
|
|
|
usage_usec = 0
|
|
|
|
return (time.time(), memory_current, usage_usec, get_cpu_time())
|
|
|
|
|
|
|
|
def prepare_stats_log(dest, previous_stats, current_stats):
|
|
|
|
current_time, current_mem, current_cpu, current_cpu_time = current_stats
|
|
|
|
|
|
|
|
if not previous_stats:
|
|
|
|
return (current_time, 0, 0, current_cpu, current_cpu_time), ''
|
|
|
|
|
|
|
|
_, logged_mem, logged_cpu_percent, previous_cpu, previous_cpu_time, = previous_stats
|
|
|
|
|
|
|
|
mem_ratio = (current_mem - logged_mem) / (logged_mem or 1) * 100
|
|
|
|
cpu_delta = current_cpu - previous_cpu
|
|
|
|
system_delta = (current_cpu_time - previous_cpu_time) / CPU_COUNT
|
|
|
|
cpu_percent = cpu_delta / system_delta * 100
|
|
|
|
cpu_percent_delta = (cpu_percent - logged_cpu_percent)
|
2024-07-18 18:59:58 +07:00
|
|
|
cpu_percent_ratio = cpu_percent_delta / (logged_cpu_percent or 0.000001) * 100
|
2022-12-16 22:13:32 +07:00
|
|
|
|
|
|
|
log_lines = []
|
|
|
|
date_time = datetime.fromtimestamp(current_time).astimezone(tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S,%f')[:-3]
|
|
|
|
if abs(mem_ratio) > 10: # hard-coded values. We should find a way to configure.
|
|
|
|
humanized = f'({human_size(current_mem)})'
|
|
|
|
log_lines.append(f'{date_time} Memory: {current_mem:>11} {humanized:>11} ({mem_ratio:>+5.1f}%)')
|
|
|
|
logged_mem = current_mem
|
|
|
|
if abs(cpu_percent_ratio) > 20 and abs(cpu_percent_delta) > 2:
|
|
|
|
log_lines.append(f'{date_time} CPU: {cpu_percent:>11.2f} ({cpu_percent_ratio:>+5.1f}%)')
|
|
|
|
logged_cpu_percent = cpu_percent
|
|
|
|
previous_stats = current_time, logged_mem, logged_cpu_percent, current_cpu, current_cpu_time
|
|
|
|
return previous_stats, '\n'.join(log_lines)
|
|
|
|
|
|
|
|
def docker_monitoring_loop(builds_dir):
|
2023-09-18 19:37:30 +07:00
|
|
|
builds_dir = Path(builds_dir)
|
2022-12-16 22:13:32 +07:00
|
|
|
docker_client = docker.from_env()
|
|
|
|
previous_stats_per_docker = {}
|
|
|
|
_logger.info('Starting docker monitoring loop thread')
|
|
|
|
while True:
|
|
|
|
try:
|
|
|
|
stats_per_docker = dict()
|
|
|
|
for container in docker_client.containers.list(filters={'status': 'running'}):
|
|
|
|
if re.match(r'^\d+-.+_.+', container.name):
|
|
|
|
dest, suffix = container.name.split('_', maxsplit=1)
|
|
|
|
container_log_dir = builds_dir / dest / 'logs'
|
|
|
|
if not container_log_dir.exists():
|
|
|
|
_logger.warning('Log dir not found: `%s`', container_log_dir)
|
|
|
|
continue
|
|
|
|
current_stats = get_docker_stats(container.id)
|
|
|
|
previous_stats = previous_stats_per_docker.get(container.name)
|
|
|
|
previous_stats, log_line = prepare_stats_log(dest, previous_stats, current_stats)
|
|
|
|
if log_line:
|
|
|
|
stat_log_file = container_log_dir / f'{suffix}-stats.txt'
|
2023-09-18 19:37:30 +07:00
|
|
|
with open(stat_log_file, mode='a') as f:
|
|
|
|
f.write(f'{log_line}\n')
|
2022-12-16 22:13:32 +07:00
|
|
|
stats_per_docker[container.name] = previous_stats
|
|
|
|
previous_stats_per_docker = stats_per_docker
|
|
|
|
time.sleep(1)
|
|
|
|
except Exception as e:
|
|
|
|
_logger.exception('Monitoring loop thread exception: %s', e)
|
|
|
|
time.sleep(60)
|