From 7659293a2bd01cfab34c39d8e0aec7226e1fb033 Mon Sep 17 00:00:00 2001 From: Xavier Morel Date: Mon, 23 Sep 2019 15:42:18 +0200 Subject: [PATCH] [IMP] runbot_merge: update staging timeout on 'pending' CI If the CI is greatly backed up (either insufficient capacity or jobs spike) a timeout which is normally perfectly fine might be insufficient e.g. given a 2h timeout, if a job normally takes 80mn but the staging's job starts 40mn after the staging was actually created we're sunk. And cancelling the staging once the job has finally gotten started is not going to improve load on the CI, it just wastes a CI slot. Therefore assume a `pending` event denotes the actual start of the job on the CI, and reset the timeout to start from that moment so ci_timeout is the timeout of the CI job itself, not of the staging having been created. Closes #202 --- runbot_merge/models/pull_requests.py | 28 +++++++++++++++++++++++----- runbot_merge/tests/test_basic.py | 18 ++++++++++++++++++ 2 files changed, 41 insertions(+), 5 deletions(-) diff --git a/runbot_merge/models/pull_requests.py b/runbot_merge/models/pull_requests.py index e659188f..64b54d99 100644 --- a/runbot_merge/models/pull_requests.py +++ b/runbot_merge/models/pull_requests.py @@ -143,7 +143,7 @@ class Project(models.Model): self.env['runbot_merge.pull_requests.feedback'].browse(to_remove).unlink() def is_timed_out(self, staging): - return fields.Datetime.from_string(staging.staged_at) + datetime.timedelta(minutes=self.ci_timeout) < datetime.datetime.now() + return fields.Datetime.from_string(staging.timeout_limit) < datetime.datetime.now() def _check_fetch(self, commit=False): """ @@ -1311,6 +1311,7 @@ class Stagings(models.Model): active = fields.Boolean(default=True) staged_at = fields.Datetime(default=fields.Datetime.now) + timeout_limit = fields.Datetime(store=True, compute='_compute_timeout_limit') reason = fields.Text("Reason for final state (if any)") # seems simpler than adding yet another indirection through a model @@ -1343,6 +1344,17 @@ class Stagings(models.Model): for status in [to_status(st)] ] + # only depend on staged_at as it should not get modified, but we might + # update the CI timeout after the staging have been created and we + # *do not* want to update the staging timeouts in that case + @api.depends('staged_at') + def _compute_timeout_limit(self): + for st in self: + st.timeout_limit = fields.Datetime.to_string( + fields.Datetime.from_string(st.staged_at) + + datetime.timedelta(minutes=st.target.project_id.ci_timeout) + ) + def _validate(self): Commits = self.env['runbot_merge.commit'] for s in self: @@ -1357,6 +1369,7 @@ class Stagings(models.Model): ('sha', 'in', heads) ]) + update_timeout_limit = False reqs = [r.strip() for r in s.target.project_id.required_statuses.split(',')] st = 'success' for c in commits: @@ -1364,17 +1377,22 @@ class Stagings(models.Model): for v in map(lambda n: state_(statuses, n), reqs): if st == 'failure' or v in ('error', 'failure'): st = 'failure' - elif v in (None, 'pending'): + elif v is None: st = 'pending' + elif v == 'pending': + st = 'pending' + update_timeout_limit = True else: assert v == 'success' # mark failure as soon as we find a failed status, but wait until # all commits are known & not pending to mark a success if st == 'success' and len(commits) < len(heads): - s.state = 'pending' - continue + st = 'pending' - s.state = st + vals = {'state': st} + if update_timeout_limit: + vals['timeout_limit'] = fields.Datetime.to_string(datetime.datetime.now() + datetime.timedelta(minutes=s.target.project_id.ci_timeout)) + s.write(vals) @api.multi def action_cancel(self): diff --git a/runbot_merge/tests/test_basic.py b/runbot_merge/tests/test_basic.py index 6de13bb3..4c9bf19d 100644 --- a/runbot_merge/tests/test_basic.py +++ b/runbot_merge/tests/test_basic.py @@ -440,6 +440,24 @@ def test_staging_ci_timeout(env, repo, users): env['runbot_merge.project']._check_progress() assert pr1.state == 'error', "timeout should fail the PR" +def test_timeout_bump_on_pending(env, repo): + m = repo.make_commit(None, 'initial', None, tree={'f': '0'}) + repo.make_ref('heads/master', m) + + c = repo.make_commit(m, 'c', None, tree={'f': '1'}) + prx = repo.make_pr('title', 'body', target='master', ctid=c, user='user') + repo.post_status(prx.head, 'success', 'ci/runbot') + repo.post_status(prx.head, 'success', 'legal/cla') + prx.post_comment('hansen r+', 'reviewer') + run_crons(env) + + st = env['runbot_merge.stagings'].search([]) + old_timeout = odoo.fields.Datetime.to_string(datetime.datetime.now() - datetime.timedelta(days=15)) + st.timeout_limit = old_timeout + repo.post_status(repo.commit('heads/staging.master').id, 'pending', 'ci/runbot') + env['runbot_merge.commit']._notify() + assert st.timeout_limit > old_timeout + def test_staging_ci_failure_single(env, repo, users): """ on failure of single-PR staging, mark & notify failure """