[IMP] runbot: try to fetch multiple times before disabling

Sometimes, it happens that a `git fetch` fails with an error code 128
for example. When this happens, the runbot host is immediately disabled.

During investigations of such cases, we found that simply retrying the
fetch command works.

With this commit, the fetch command is tried 5 times with an increasing
delay before deciding to disable the runbot host.
This commit is contained in:
Christophe Monniez 2020-04-20 11:39:06 +02:00 committed by XavierDo
parent c005561546
commit b517df4ff0
2 changed files with 62 additions and 9 deletions

View File

@ -175,7 +175,7 @@ class runbot_repo(models.Model):
self.ensure_one()
_logger.debug("git command: git (dir %s) %s", self.short_name, ' '.join(cmd))
cmd = ['git', '--git-dir=%s' % self.path] + cmd
return subprocess.check_output(cmd).decode('utf-8')
return subprocess.check_output(cmd, stderr=subprocess.STDOUT).decode()
def _git_rev_parse(self, branch_name):
return self._git(['rev-parse', branch_name]).strip()
@ -425,13 +425,23 @@ class runbot_repo(models.Model):
# Extracted from update_git to be easily overriden in external module
self.ensure_one()
repo = self
try:
repo._git(['fetch', '-p', 'origin', '+refs/heads/*:refs/heads/*', '+refs/pull/*/head:refs/pull/*'])
except subprocess.CalledProcessError as e:
message = 'Failed to fetch repo %s with return code %s. Original command was %s' % (repo.name, e.returncode, e.cmd)
_logger.exception(message)
host = self.env['runbot.host'].search([('name', '=', fqdn())])
host.disable()
try_count = 0
failure = True
delay = 0
while failure and try_count < 5:
time.sleep(delay)
try:
repo._git(['fetch', '-p', 'origin', '+refs/heads/*:refs/heads/*', '+refs/pull/*/head:refs/pull/*'])
failure = False
except subprocess.CalledProcessError as e:
try_count += 1
delay = delay * 1.5 if delay else 0.5
if try_count > 4:
message = 'Failed to fetch repo %s: %s' % (repo.name, e.output.decode())
_logger.exception(message)
host = self.env['runbot.host']._get_current()
host.disable()
def _update(self, force=True):
""" Update the physical git reposotories on FS"""

View File

@ -2,7 +2,9 @@
import datetime
from unittest import skip
from unittest.mock import patch, Mock
from subprocess import CalledProcessError
from odoo.tests import common, TransactionCase
from odoo.tools import mute_logger
import logging
import odoo
import time
@ -211,7 +213,6 @@ class Test_Repo(RunbotCase):
_test_times('runbot.repo.reftime', 'set_ref_time', 'get_ref_time')
class Test_Github(TransactionCase):
def test_github(self):
""" Test different github responses or failures"""
@ -239,6 +240,48 @@ class Test_Github(TransactionCase):
self.assertEqual(2, mock_session.return_value.post.call_count, "_github method should try two times by default")
class TestFetch(RunbotCase):
def setUp(self):
super(TestFetch, self).setUp()
self.mock_root = self.patchers['repo_root_patcher']
def test_update_fetch_cmd(self):
""" Test that git fetch is tried multiple times before disabling host """
fetch_count = 0
force_failure = False
def git_side_effect(cmd):
nonlocal fetch_count
fetch_count += 1
if fetch_count < 3 or force_failure:
raise CalledProcessError(128, cmd, 'Dummy Error'.encode('utf-8'))
else:
return True
git_patcher = self.patchers['git_patcher']
git_patcher.side_effect = git_side_effect
repo = self.Repo.create({'name': 'bla@example.com:foo/bar'})
host = self.env['runbot.host']._get_current()
self.assertFalse(host.assigned_only)
# Ensure that Host is not disabled if fetch succeeds after 3 tries
with mute_logger("odoo.addons.runbot.models.repo"):
repo._update_fetch_cmd()
self.assertFalse(host.assigned_only, "Host should not be disabled when fetch succeeds")
self.assertEqual(fetch_count, 3)
# Now ensure that host is disabled after 5 unsuccesful tries
force_failure = True
fetch_count = 0
with mute_logger("odoo.addons.runbot.models.repo"):
repo._update_fetch_cmd()
self.assertTrue(host.assigned_only)
self.assertEqual(fetch_count, 5)
class Test_Repo_Scheduler(RunbotCase):
def setUp(self):