[IMP] runbot: create a separate process for cron

As for the builder, this give the ability to run the discovery of new
commits and all related logic in a separate process.

This will mainly be usefull to restart frontend without waiting for cron
or restart "leader" without stoping the frontend. This will also be
usefull for optimisation purpose.
This commit is contained in:
Xavier-Do 2021-11-25 13:22:19 +01:00 committed by Christophe Monniez
parent ede5384607
commit 0b30b9c104
8 changed files with 247 additions and 158 deletions

View File

@ -97,12 +97,17 @@ class Batch(models.Model):
batch._log('Cannot kill or skip build %s, build is used in another bundle: %s', build.id, bundles.mapped('name'))
def _process(self):
processed = self.browse()
for batch in self:
if batch.state == 'preparing' and batch.last_update < fields.Datetime.now() - datetime.timedelta(seconds=60):
batch._prepare()
processed |= batch
elif batch.state == 'ready' and all(slot.build_id.global_state in (False, 'running', 'done') for slot in batch.slot_ids):
_logger.info('Batch %s is done', self.id)
batch._log('Batch done')
batch.state = 'done'
processed |= batch
return processed
def _create_build(self, params):
"""
@ -130,12 +135,12 @@ class Batch(models.Model):
return link_type, build
def _prepare(self, auto_rebase=False):
_logger.info('Preparing batch %s', self.id)
for level, message in self.bundle_id.consistency_warning():
if level == "warning":
self.warning("Bundle warning: %s" % message)
self.state = 'ready'
_logger.info('Preparing batch %s', self.id)
bundle = self.bundle_id
project = bundle.project_id

View File

@ -756,6 +756,11 @@ class BuildResult(models.Model):
self._log('_checkout', 'Multiple repo have same export path in build, some source may be missing for %s' % build_export_path, level='ERROR')
self._kill(result='ko')
exports[build_export_path] = commit.export()
checkout_time = time.time() - start
if checkout_time > 60:
self._log('checkout', 'Checkout took %s seconds' % int(checkout_time))
return exports
def _get_available_modules(self):

View File

@ -72,6 +72,7 @@ class Host(models.Model):
def _docker_build(self):
""" build docker images needed by locally pending builds"""
_logger.info('Building docker image...')
self.ensure_one()
static_path = self._get_work_path()
self.clear_caches() # needed to ensure that content is updated on all hosts

View File

@ -7,6 +7,7 @@ import signal
import subprocess
import shutil
from contextlib import contextmanager
from requests.exceptions import HTTPError
from ..common import fqdn, dest_reg, os
@ -203,7 +204,7 @@ class Runbot(models.AbstractModel):
This method is the default cron for new commit discovery and build sheduling.
The cron runs for a long time to avoid spamming logs
"""
pull_info_failures = set()
pull_info_failures = {}
start_time = time.time()
timeout = self._get_cron_period()
get_param = self.env['ir.config_parameter'].get_param
@ -222,17 +223,32 @@ class Runbot(models.AbstractModel):
self.env['runbot.build']._local_cleanup()
self._docker_cleanup()
_logger.info('Starting loop')
if runbot_do_schedule or runbot_do_fetch:
while time.time() - start_time < timeout:
repos = self.env['runbot.repo'].search([('mode', '!=', 'disabled')])
if runbot_do_fetch:
self._fetch_loop_turn(host, pull_info_failures)
if runbot_do_schedule:
sleep_time = self._scheduler_loop_turn(host, update_frequency)
self.sleep(sleep_time)
else:
self.sleep(update_frequency)
self._commit()
host.last_end_loop = fields.Datetime.now()
def sleep(self, t):
time.sleep(t)
def _fetch_loop_turn(self, host, pull_info_failures, default_sleep=1):
with self.manage_host_exception(host) as manager:
repos = self.env['runbot.repo'].search([('mode', '!=', 'disabled')])
processing_batch = self.env['runbot.batch'].search([('state', 'in', ('preparing', 'ready'))], order='id asc')
preparing_batch = processing_batch.filtered(lambda b: b.state == 'preparing')
self._commit()
if runbot_do_fetch:
for repo in repos:
try:
repo._update_batches(bool(preparing_batch), ignore=pull_info_failures)
self._commit()
repo._update_batches(force=bool(preparing_batch), ignore=pull_info_failures)
self._commit() # commit is mainly here to avoid to lose progression in case of fetch failure or concurrent update
except HTTPError as e:
# Sometimes a pr pull info can fail.
# - Most of the time it is only temporary and it will be successfull on next try.
@ -246,31 +262,35 @@ class Runbot(models.AbstractModel):
self.env.cr.rollback()
self.env.clear()
pull_number = e.response.url.split('/')[-1]
pull_info_failures.add(pull_number)
pull_info_failures[pull_number] = time.time()
self.warning('Pr pull info failed for %s', pull_number)
self._commit()
if processing_batch:
_logger.info('starting processing of %s batches', len(processing_batch))
for batch in processing_batch:
batch._process()
if batch._process():
self._commit()
_logger.info('end processing')
self._commit()
if runbot_do_schedule:
sleep_time = self._scheduler_loop_turn(host, update_frequency)
self.sleep(sleep_time)
else:
self.sleep(update_frequency)
self._commit()
host.last_end_loop = fields.Datetime.now()
# cleanup old pull_info_failures
for pr_number, t in pull_info_failures.items():
if t + 15*60 < time.time():
_logger.warning('Removing %s from pull_info_failures', pr_number)
del self.pull_info_failures[pr_number]
def sleep(self, t):
time.sleep(t)
return manager.get('sleep', default_sleep)
def _scheduler_loop_turn(self, host, default_sleep=1):
try:
_logger.info('Scheduling...')
with self.manage_host_exception(host) as manager:
self._scheduler(host)
return manager.get('sleep', default_sleep)
@contextmanager
def manage_host_exception(self, host):
res = {}
try:
yield res
host.last_success = fields.Datetime.now()
self._commit()
except Exception as e:
@ -284,12 +304,11 @@ class Runbot(models.AbstractModel):
host.last_exception = str(e)
host.exception_count = 1
self._commit()
return random.uniform(0, 3)
res['sleep'] = random.uniform(0, 3)
else:
if host.last_exception:
host.last_exception = ""
host.exception_count = 0
return default_sleep
def _source_cleanup(self):
try:

View File

@ -1,122 +1,23 @@
#!/usr/bin/python3
import argparse
from tools import RunbotClient, run
import logging
import os
import sys
import threading
import signal
from logging.handlers import WatchedFileHandler
LOG_FORMAT = '%(asctime)s %(levelname)s %(name)s: %(message)s'
logging.basicConfig(level=logging.INFO, format=LOG_FORMAT)
logging.getLogger('odoo.addons.runbot').setLevel(logging.DEBUG)
logging.addLevelName(25, "!NFO")
_logger = logging.getLogger(__name__)
class BuilderClient(RunbotClient):
class RunbotClient():
def on_start(self):
self.env['runbot.repo'].search([('mode', '!=', 'disabled')])._update()
def __init__(self, env):
self.env = env
self.ask_interrupt = threading.Event()
def main_loop(self):
from odoo import fields
signal.signal(signal.SIGINT, self.signal_handler)
signal.signal(signal.SIGTERM, self.signal_handler)
signal.signal(signal.SIGQUIT, self.dump_stack)
host = self.env['runbot.host']._get_current()
host._bootstrap()
count = 0
while True:
try:
host.last_start_loop = fields.Datetime.now()
count = count % 60
if count == 0:
logging.info('Host %s running with %s slots on pid %s%s', host.name, host.nb_worker, os.getpid(), ' (assigned only)' if host.assigned_only else '')
def loop_turn(self):
if self.count == 1: # cleanup at second iteration
self.env['runbot.runbot']._source_cleanup()
self.env['runbot.build']._local_cleanup()
self.env['runbot.runbot']._docker_cleanup()
host.set_psql_conn_count()
_logger.info('Building docker image...')
host._docker_build()
_logger.info('Scheduling...')
count += 1
sleep_time = self.env['runbot.runbot']._scheduler_loop_turn(host)
host.last_end_loop = fields.Datetime.now()
self.env.cr.commit()
self.env.clear()
self.sleep(sleep_time)
except Exception as e:
_logger.exception('Builder main loop failed with: %s', e)
self.env.cr.rollback()
self.env.clear()
self.sleep(10)
if self.ask_interrupt.is_set():
return
def signal_handler(self, signal, frame):
if self.ask_interrupt.is_set():
_logger.info("Second Interrupt detected, force exit")
os._exit(1)
_logger.info("Interrupt detected")
self.ask_interrupt.set()
def dump_stack(self, signal, frame):
import odoo
odoo.tools.misc.dumpstacks()
def sleep(self, t):
self.ask_interrupt.wait(t)
def run():
# parse args
parser = argparse.ArgumentParser()
parser.add_argument('--odoo-path', help='Odoo sources path')
parser.add_argument('--db_host', default='127.0.0.1')
parser.add_argument('--db_port', default='5432')
parser.add_argument('--db_user')
parser.add_argument('--db_password')
parser.add_argument('-d', '--database', default='runbot', help='name of runbot db')
parser.add_argument('--logfile', default=False)
args = parser.parse_args()
if args.logfile:
dirname = os.path.dirname(args.logfile)
if dirname and not os.path.isdir(dirname):
os.makedirs(dirname)
handler = WatchedFileHandler(args.logfile)
formatter = logging.Formatter(LOG_FORMAT)
handler.setFormatter(formatter)
logging.getLogger().addHandler(handler)
# configure odoo
sys.path.append(args.odoo_path)
import odoo
_logger.info("Starting scheduler on database %s", args.database)
odoo.tools.config['db_host'] = args.db_host
odoo.tools.config['db_port'] = args.db_port
odoo.tools.config['db_user'] = args.db_user
odoo.tools.config['db_password'] = args.db_password
addon_path = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..'))
config_addons_path = odoo.tools.config['addons_path']
odoo.tools.config['addons_path'] = ','.join([config_addons_path, addon_path])
# create environment
registry = odoo.registry(args.database)
with odoo.api.Environment.manage():
with registry.cursor() as cr:
env = odoo.api.Environment(cr, odoo.SUPERUSER_ID, {})
runbot_client = RunbotClient(env)
# run main loop
runbot_client.main_loop()
self.host.set_psql_conn_count()
self.host._docker_build()
return self.env['runbot.runbot']._scheduler_loop_turn(self.host)
if __name__ == '__main__':
run()
_logger.info("Stopping gracefully")
run(BuilderClient)

18
runbot_builder/leader.py Executable file
View File

@ -0,0 +1,18 @@
#!/usr/bin/python3
from tools import RunbotClient, run
import logging
import time
_logger = logging.getLogger(__name__)
class LeaderClient(RunbotClient): # Conductor, Director, Main, Maestro, Lead
def __init__(self, env):
self.pull_info_failures = {}
super().__init__(env)
def loop_turn(self):
return self.env['runbot.runbot']._fetch_loop_turn(self.host, self.pull_info_failures)
if __name__ == '__main__':
run(LeaderClient)

16
runbot_builder/tester.py Executable file
View File

@ -0,0 +1,16 @@
#!/usr/bin/python3
from tools import RunbotClient, run
import logging
_logger = logging.getLogger(__name__)
class TesterClient(RunbotClient):
def loop_turn(self):
_logger.info('='*50)
_logger.info('Testing: %s', self.env['runbot.build'].search_count([('local_state', '=', 'testing')]))
_logger.info('Pending: %s', self.env['runbot.build'].search_count([('local_state', '=', 'pending')]))
return 10
if __name__ == '__main__':
run(TesterClient)

124
runbot_builder/tools.py Normal file
View File

@ -0,0 +1,124 @@
#!/usr/bin/python3
import argparse
import logging
import os
import sys
import threading
import signal
from logging.handlers import WatchedFileHandler
LOG_FORMAT = '%(asctime)s %(levelname)s %(name)s: %(message)s'
logging.basicConfig(level=logging.INFO, format=LOG_FORMAT)
logging.getLogger('odoo.addons.runbot').setLevel(logging.DEBUG)
logging.addLevelName(25, "!NFO")
_logger = logging.getLogger(__name__)
class RunbotClient():
def __init__(self, env):
self.env = env
self.ask_interrupt = threading.Event()
self.host = None
self.count = 0
self.max_count = 60
def on_start(self):
pass
def main_loop(self):
from odoo import fields
self.on_start()
signal.signal(signal.SIGINT, self.signal_handler)
signal.signal(signal.SIGTERM, self.signal_handler)
signal.signal(signal.SIGQUIT, self.dump_stack)
self.host = self.env['runbot.host']._get_current()
self.host._bootstrap()
logging.info(
'Host %s running with %s slots on pid %s%s',
self.host.name,
self.host.nb_worker,
os.getpid(),
' (assigned only)' if self.host.assigned_only else ''
)
while True:
try:
self.host.last_start_loop = fields.Datetime.now()
self.count = self.count % self.max_count
sleep_time = self.loop_turn()
self.count += 1
self.host.last_end_loop = fields.Datetime.now()
self.env.cr.commit()
self.env.clear()
self.sleep(sleep_time)
except Exception as e:
_logger.exception('Builder main loop failed with: %s', e)
self.env.cr.rollback()
self.env.clear()
self.sleep(10)
if self.ask_interrupt.is_set():
return
def loop_turn(self):
raise NotImplementedError()
def signal_handler(self, _signal, _frame):
if self.ask_interrupt.is_set():
_logger.info("Second Interrupt detected, force exit")
os._exit(1)
_logger.info("Interrupt detected")
self.ask_interrupt.set()
def dump_stack(self, _signal, _frame):
import odoo
odoo.tools.misc.dumpstacks()
def sleep(self, t):
self.ask_interrupt.wait(t)
def run(client_class):
# parse args
parser = argparse.ArgumentParser()
parser.add_argument('--odoo-path', help='Odoo sources path')
parser.add_argument('--db_host')
parser.add_argument('--db_port')
parser.add_argument('--db_user')
parser.add_argument('--db_password')
parser.add_argument('-d', '--database', default='runbot', help='name of runbot db')
parser.add_argument('--logfile', default=False)
args = parser.parse_args()
if args.logfile:
dirname = os.path.dirname(args.logfile)
if dirname and not os.path.isdir(dirname):
os.makedirs(dirname)
handler = WatchedFileHandler(args.logfile)
formatter = logging.Formatter(LOG_FORMAT)
handler.setFormatter(formatter)
logging.getLogger().addHandler(handler)
# configure odoo
sys.path.append(args.odoo_path)
import odoo
_logger.info("Starting scheduler on database %s", args.database)
odoo.tools.config['db_host'] = args.db_host
odoo.tools.config['db_port'] = args.db_port
odoo.tools.config['db_user'] = args.db_user
odoo.tools.config['db_password'] = args.db_password
addon_path = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..'))
config_addons_path = odoo.tools.config['addons_path']
odoo.tools.config['addons_path'] = ','.join([config_addons_path, addon_path])
# create environment
registry = odoo.registry(args.database)
with odoo.api.Environment.manage():
with registry.cursor() as cr:
env = odoo.api.Environment(cr, odoo.SUPERUSER_ID, {})
client = client_class(env)
# run main loop
client.main_loop()
_logger.info("Stopping gracefully")