[FIX] runbot_merge: don't over-bump timeout

By updating the staging timeout every time we run `_compute_state` and
still have a `pending` status, we'd actually bump the timeout *on
every success CI* except for the last one. Which was never the
intention and can add an hour or two to the mergebot-side timeout.

Instead, add an `updated_at` attribute to statuses (name taken from
the webhook payload even though we don't use that), read it when we
see `pending` required statuses, and update the staging timeout based
on that if necessary.

That way as long as we don't get *new* pending statuses, the timeout
doesn't get updated.

Fixes #952
This commit is contained in:
Xavier Morel 2024-09-20 12:17:17 +02:00
parent 154e610bbc
commit e309e1a3a2
3 changed files with 46 additions and 27 deletions

View File

@ -2,6 +2,7 @@ import hashlib
import hmac import hmac
import logging import logging
import json import json
from datetime import datetime
import sentry_sdk import sentry_sdk
import werkzeug.exceptions import werkzeug.exceptions
@ -358,7 +359,8 @@ def handle_status(env, event):
event['context']: { event['context']: {
'state': event['state'], 'state': event['state'],
'target_url': event['target_url'], 'target_url': event['target_url'],
'description': event['description'] 'description': event['description'],
'updated_at': datetime.now().isoformat(timespec='seconds'),
} }
}) })
# create status, or merge update into commit *unless* the update is already # create status, or merge update into commit *unless* the update is already

View File

@ -2133,6 +2133,7 @@ class Stagings(models.Model):
if not self.env.user.has_group('runbot_merge.status'): if not self.env.user.has_group('runbot_merge.status'):
raise AccessError("You are not allowed to post a status.") raise AccessError("You are not allowed to post a status.")
now = datetime.datetime.now().isoformat(timespec='seconds')
for s in self: for s in self:
if not s.target.project_id.staging_rpc: if not s.target.project_id.staging_rpc:
continue continue
@ -2145,6 +2146,7 @@ class Stagings(models.Model):
'state': status, 'state': status,
'target_url': target_url, 'target_url': target_url,
'description': description, 'description': description,
'updated_at': now,
} }
s.statuses_cache = json.dumps(st) s.statuses_cache = json.dumps(st)
@ -2158,39 +2160,45 @@ class Stagings(models.Model):
"heads.repository_id.status_ids.context", "heads.repository_id.status_ids.context",
) )
def _compute_state(self): def _compute_state(self):
for s in self: for staging in self:
if s.state != 'pending': if staging.state != 'pending':
continue continue
# maps commits to the statuses they need # maps commits to the statuses they need
required_statuses = [ required_statuses = [
(h.commit_id.sha, h.repository_id.status_ids._for_staging(s).mapped('context')) (h.commit_id.sha, h.repository_id.status_ids._for_staging(staging).mapped('context'))
for h in s.heads for h in staging.heads
] ]
cmap = json.loads(s.statuses_cache) cmap = json.loads(staging.statuses_cache)
update_timeout_limit = False last_pending = ""
st = 'success' state = 'success'
for head, reqs in required_statuses: for head, reqs in required_statuses:
statuses = cmap.get(head) or {} statuses = cmap.get(head) or {}
for v in map(lambda n: statuses.get(n, {}).get('state'), reqs): for status in (statuses.get(n, {}) for n in reqs):
if st == 'failure' or v in ('error', 'failure'): v = status.get('state')
st = 'failure' if state == 'failure' or v in ('error', 'failure'):
state = 'failure'
elif v is None: elif v is None:
st = 'pending' state = 'pending'
elif v == 'pending': elif v == 'pending':
st = 'pending' state = 'pending'
update_timeout_limit = True last_pending = max(last_pending, status.get('updated_at', ''))
else: else:
assert v == 'success' assert v == 'success'
s.state = st staging.state = state
if s.state != 'pending': if staging.state != 'pending':
self.env.ref("runbot_merge.merge_cron")._trigger() self.env.ref("runbot_merge.merge_cron")._trigger()
if update_timeout_limit:
s.timeout_limit = datetime.datetime.now() + datetime.timedelta(minutes=s.target.project_id.ci_timeout) if last_pending:
self.env.ref("runbot_merge.merge_cron")._trigger(s.timeout_limit) timeout = datetime.datetime.fromisoformat(last_pending) \
_logger.debug("%s got pending status, bumping timeout to %s (%s)", self, s.timeout_limit, cmap) + datetime.timedelta(minutes=staging.target.project_id.ci_timeout)
if timeout > staging.timeout_limit:
staging.timeout_limit = timeout
self.env.ref("runbot_merge.merge_cron")._trigger(timeout)
_logger.debug("%s got pending status, bumping timeout to %s", staging, timeout)
def action_cancel(self): def action_cancel(self):
w = self.env['runbot_merge.stagings.cancel'].create({ w = self.env['runbot_merge.stagings.cancel'].create({

View File

@ -605,7 +605,6 @@ def test_staging_ci_timeout(env, repo, config, page, update_op: Callable[[int],
assert dangerbox assert dangerbox
assert dangerbox[0].text == 'timed out (>60 minutes)' assert dangerbox[0].text == 'timed out (>60 minutes)'
@pytest.mark.defaultstatuses
def test_timeout_bump_on_pending(env, repo, config): def test_timeout_bump_on_pending(env, repo, config):
with repo: with repo:
[m, c] = repo.make_commits( [m, c] = repo.make_commits(
@ -615,8 +614,9 @@ def test_timeout_bump_on_pending(env, repo, config):
) )
repo.make_ref('heads/master', m) repo.make_ref('heads/master', m)
prx = repo.make_pr(title='title', body='body', target='master', head=c) prx = repo.make_pr(target='master', head=c)
repo.post_status(prx.head, 'success') repo.post_status(prx.head, 'success', 'ci/runbot')
repo.post_status(prx.head, 'success', 'legal/cla')
prx.post_comment('hansen r+', config['role_reviewer']['token']) prx.post_comment('hansen r+', config['role_reviewer']['token'])
env.run_crons() env.run_crons()
@ -624,9 +624,18 @@ def test_timeout_bump_on_pending(env, repo, config):
old_timeout = odoo.fields.Datetime.to_string(datetime.datetime.now() - datetime.timedelta(days=15)) old_timeout = odoo.fields.Datetime.to_string(datetime.datetime.now() - datetime.timedelta(days=15))
st.timeout_limit = old_timeout st.timeout_limit = old_timeout
with repo: with repo:
repo.post_status('staging.master', 'pending') repo.post_status('staging.master', 'pending', 'ci/runbot')
env.run_crons(None) env.run_crons(None)
assert st.timeout_limit > old_timeout assert st.timeout_limit > old_timeout, "receiving a pending status should bump the timeout"
st.timeout_limit = old_timeout
# clear the statuses cache to remove the memoized status times
st.statuses_cache = "{}"
st.commit_ids.statuses = "{}"
with repo:
repo.post_status('staging.master', 'success', 'legal/cla')
env.run_crons(None)
assert st.timeout_limit == old_timeout, "receiving a success status should *not* bump the timeout"
def test_staging_ci_failure_single(env, repo, users, config, page): def test_staging_ci_failure_single(env, repo, users, config, page):
""" on failure of single-PR staging, mark & notify failure """ on failure of single-PR staging, mark & notify failure
@ -3327,8 +3336,8 @@ class TestUnknownPR:
c = env['runbot_merge.commit'].search([('sha', '=', prx.head)]) c = env['runbot_merge.commit'].search([('sha', '=', prx.head)])
assert json.loads(c.statuses) == { assert json.loads(c.statuses) == {
'legal/cla': {'state': 'success', 'target_url': None, 'description': None}, 'legal/cla': {'state': 'success', 'target_url': None, 'description': None, 'updated_at': matches("$$")},
'ci/runbot': {'state': 'success', 'target_url': 'http://example.org/wheee', 'description': None} 'ci/runbot': {'state': 'success', 'target_url': 'http://example.org/wheee', 'description': None, 'updated_at': matches("$$")}
} }
assert prx.comments == [ assert prx.comments == [
seen(env, prx, users), seen(env, prx, users),