[FIX] runbot_merge: don't over-bump timeout

By updating the staging timeout every time we run `_compute_state` and
still have a `pending` status, we'd actually bump the timeout *on
every success CI* except for the last one. Which was never the
intention and can add an hour or two to the mergebot-side timeout.

Instead, add an `updated_at` attribute to statuses (name taken from
the webhook payload even though we don't use that), read it when we
see `pending` required statuses, and update the staging timeout based
on that if necessary.

That way as long as we don't get *new* pending statuses, the timeout
doesn't get updated.

Fixes #952
This commit is contained in:
Xavier Morel 2024-09-20 12:17:17 +02:00
parent 154e610bbc
commit e309e1a3a2
3 changed files with 46 additions and 27 deletions

View File

@ -2,6 +2,7 @@ import hashlib
import hmac
import logging
import json
from datetime import datetime
import sentry_sdk
import werkzeug.exceptions
@ -358,7 +359,8 @@ def handle_status(env, event):
event['context']: {
'state': event['state'],
'target_url': event['target_url'],
'description': event['description']
'description': event['description'],
'updated_at': datetime.now().isoformat(timespec='seconds'),
}
})
# create status, or merge update into commit *unless* the update is already

View File

@ -2133,6 +2133,7 @@ class Stagings(models.Model):
if not self.env.user.has_group('runbot_merge.status'):
raise AccessError("You are not allowed to post a status.")
now = datetime.datetime.now().isoformat(timespec='seconds')
for s in self:
if not s.target.project_id.staging_rpc:
continue
@ -2145,6 +2146,7 @@ class Stagings(models.Model):
'state': status,
'target_url': target_url,
'description': description,
'updated_at': now,
}
s.statuses_cache = json.dumps(st)
@ -2158,39 +2160,45 @@ class Stagings(models.Model):
"heads.repository_id.status_ids.context",
)
def _compute_state(self):
for s in self:
if s.state != 'pending':
for staging in self:
if staging.state != 'pending':
continue
# maps commits to the statuses they need
required_statuses = [
(h.commit_id.sha, h.repository_id.status_ids._for_staging(s).mapped('context'))
for h in s.heads
(h.commit_id.sha, h.repository_id.status_ids._for_staging(staging).mapped('context'))
for h in staging.heads
]
cmap = json.loads(s.statuses_cache)
cmap = json.loads(staging.statuses_cache)
update_timeout_limit = False
st = 'success'
last_pending = ""
state = 'success'
for head, reqs in required_statuses:
statuses = cmap.get(head) or {}
for v in map(lambda n: statuses.get(n, {}).get('state'), reqs):
if st == 'failure' or v in ('error', 'failure'):
st = 'failure'
for status in (statuses.get(n, {}) for n in reqs):
v = status.get('state')
if state == 'failure' or v in ('error', 'failure'):
state = 'failure'
elif v is None:
st = 'pending'
state = 'pending'
elif v == 'pending':
st = 'pending'
update_timeout_limit = True
state = 'pending'
last_pending = max(last_pending, status.get('updated_at', ''))
else:
assert v == 'success'
s.state = st
if s.state != 'pending':
staging.state = state
if staging.state != 'pending':
self.env.ref("runbot_merge.merge_cron")._trigger()
if update_timeout_limit:
s.timeout_limit = datetime.datetime.now() + datetime.timedelta(minutes=s.target.project_id.ci_timeout)
self.env.ref("runbot_merge.merge_cron")._trigger(s.timeout_limit)
_logger.debug("%s got pending status, bumping timeout to %s (%s)", self, s.timeout_limit, cmap)
if last_pending:
timeout = datetime.datetime.fromisoformat(last_pending) \
+ datetime.timedelta(minutes=staging.target.project_id.ci_timeout)
if timeout > staging.timeout_limit:
staging.timeout_limit = timeout
self.env.ref("runbot_merge.merge_cron")._trigger(timeout)
_logger.debug("%s got pending status, bumping timeout to %s", staging, timeout)
def action_cancel(self):
w = self.env['runbot_merge.stagings.cancel'].create({

View File

@ -605,7 +605,6 @@ def test_staging_ci_timeout(env, repo, config, page, update_op: Callable[[int],
assert dangerbox
assert dangerbox[0].text == 'timed out (>60 minutes)'
@pytest.mark.defaultstatuses
def test_timeout_bump_on_pending(env, repo, config):
with repo:
[m, c] = repo.make_commits(
@ -615,8 +614,9 @@ def test_timeout_bump_on_pending(env, repo, config):
)
repo.make_ref('heads/master', m)
prx = repo.make_pr(title='title', body='body', target='master', head=c)
repo.post_status(prx.head, 'success')
prx = repo.make_pr(target='master', head=c)
repo.post_status(prx.head, 'success', 'ci/runbot')
repo.post_status(prx.head, 'success', 'legal/cla')
prx.post_comment('hansen r+', config['role_reviewer']['token'])
env.run_crons()
@ -624,9 +624,18 @@ def test_timeout_bump_on_pending(env, repo, config):
old_timeout = odoo.fields.Datetime.to_string(datetime.datetime.now() - datetime.timedelta(days=15))
st.timeout_limit = old_timeout
with repo:
repo.post_status('staging.master', 'pending')
repo.post_status('staging.master', 'pending', 'ci/runbot')
env.run_crons(None)
assert st.timeout_limit > old_timeout
assert st.timeout_limit > old_timeout, "receiving a pending status should bump the timeout"
st.timeout_limit = old_timeout
# clear the statuses cache to remove the memoized status times
st.statuses_cache = "{}"
st.commit_ids.statuses = "{}"
with repo:
repo.post_status('staging.master', 'success', 'legal/cla')
env.run_crons(None)
assert st.timeout_limit == old_timeout, "receiving a success status should *not* bump the timeout"
def test_staging_ci_failure_single(env, repo, users, config, page):
""" on failure of single-PR staging, mark & notify failure
@ -3327,8 +3336,8 @@ class TestUnknownPR:
c = env['runbot_merge.commit'].search([('sha', '=', prx.head)])
assert json.loads(c.statuses) == {
'legal/cla': {'state': 'success', 'target_url': None, 'description': None},
'ci/runbot': {'state': 'success', 'target_url': 'http://example.org/wheee', 'description': None}
'legal/cla': {'state': 'success', 'target_url': None, 'description': None, 'updated_at': matches("$$")},
'ci/runbot': {'state': 'success', 'target_url': 'http://example.org/wheee', 'description': None, 'updated_at': matches("$$")}
}
assert prx.comments == [
seen(env, prx, users),