[IMP] runbot_merge: update staging timeout on 'pending' CI

If the CI is greatly backed up (either insufficient capacity or jobs
spike) a timeout which is normally perfectly fine might be
insufficient e.g. given a 2h timeout, if a job normally takes 80mn but
the staging's job starts 40mn after the staging was actually created
we're sunk. And cancelling the staging once the job has finally gotten
started is not going to improve load on the CI, it just wastes a CI
slot.

Therefore assume a `pending` event denotes the actual start of the job
on the CI, and reset the timeout to start from that moment so
ci_timeout is the timeout of the CI job itself, not of the staging
having been created.

Closes #202
This commit is contained in:
Xavier Morel 2019-09-23 15:42:18 +02:00
parent 78ad4b4e4b
commit 7659293a2b
2 changed files with 41 additions and 5 deletions

View File

@ -143,7 +143,7 @@ class Project(models.Model):
self.env['runbot_merge.pull_requests.feedback'].browse(to_remove).unlink()
def is_timed_out(self, staging):
return fields.Datetime.from_string(staging.staged_at) + datetime.timedelta(minutes=self.ci_timeout) < datetime.datetime.now()
return fields.Datetime.from_string(staging.timeout_limit) < datetime.datetime.now()
def _check_fetch(self, commit=False):
"""
@ -1311,6 +1311,7 @@ class Stagings(models.Model):
active = fields.Boolean(default=True)
staged_at = fields.Datetime(default=fields.Datetime.now)
timeout_limit = fields.Datetime(store=True, compute='_compute_timeout_limit')
reason = fields.Text("Reason for final state (if any)")
# seems simpler than adding yet another indirection through a model
@ -1343,6 +1344,17 @@ class Stagings(models.Model):
for status in [to_status(st)]
]
# only depend on staged_at as it should not get modified, but we might
# update the CI timeout after the staging have been created and we
# *do not* want to update the staging timeouts in that case
@api.depends('staged_at')
def _compute_timeout_limit(self):
for st in self:
st.timeout_limit = fields.Datetime.to_string(
fields.Datetime.from_string(st.staged_at)
+ datetime.timedelta(minutes=st.target.project_id.ci_timeout)
)
def _validate(self):
Commits = self.env['runbot_merge.commit']
for s in self:
@ -1357,6 +1369,7 @@ class Stagings(models.Model):
('sha', 'in', heads)
])
update_timeout_limit = False
reqs = [r.strip() for r in s.target.project_id.required_statuses.split(',')]
st = 'success'
for c in commits:
@ -1364,17 +1377,22 @@ class Stagings(models.Model):
for v in map(lambda n: state_(statuses, n), reqs):
if st == 'failure' or v in ('error', 'failure'):
st = 'failure'
elif v in (None, 'pending'):
elif v is None:
st = 'pending'
elif v == 'pending':
st = 'pending'
update_timeout_limit = True
else:
assert v == 'success'
# mark failure as soon as we find a failed status, but wait until
# all commits are known & not pending to mark a success
if st == 'success' and len(commits) < len(heads):
s.state = 'pending'
continue
st = 'pending'
s.state = st
vals = {'state': st}
if update_timeout_limit:
vals['timeout_limit'] = fields.Datetime.to_string(datetime.datetime.now() + datetime.timedelta(minutes=s.target.project_id.ci_timeout))
s.write(vals)
@api.multi
def action_cancel(self):

View File

@ -440,6 +440,24 @@ def test_staging_ci_timeout(env, repo, users):
env['runbot_merge.project']._check_progress()
assert pr1.state == 'error', "timeout should fail the PR"
def test_timeout_bump_on_pending(env, repo):
m = repo.make_commit(None, 'initial', None, tree={'f': '0'})
repo.make_ref('heads/master', m)
c = repo.make_commit(m, 'c', None, tree={'f': '1'})
prx = repo.make_pr('title', 'body', target='master', ctid=c, user='user')
repo.post_status(prx.head, 'success', 'ci/runbot')
repo.post_status(prx.head, 'success', 'legal/cla')
prx.post_comment('hansen r+', 'reviewer')
run_crons(env)
st = env['runbot_merge.stagings'].search([])
old_timeout = odoo.fields.Datetime.to_string(datetime.datetime.now() - datetime.timedelta(days=15))
st.timeout_limit = old_timeout
repo.post_status(repo.commit('heads/staging.master').id, 'pending', 'ci/runbot')
env['runbot_merge.commit']._notify()
assert st.timeout_limit > old_timeout
def test_staging_ci_failure_single(env, repo, users):
""" on failure of single-PR staging, mark & notify failure
"""