From 8a931adeacbc1e6cfe535a61892606d17bdb6faa Mon Sep 17 00:00:00 2001 From: Xavier Morel Date: Thu, 13 Feb 2025 12:51:43 +0100 Subject: [PATCH] [IMP] forwardport: better fw batch failure notification When an fw batch fails, log a message to its chatter (so the reason for the failure doesn't necessarily have to be hunted down in the logs, although depending on the quality of the error that might still be an issue). Also if a batch keeps failing (fails for more than a day at a retry every hour -- increased from a previous 30mn), stop retrying it and flag it in the list view, it's clearly not going to go through. This is because we hit an issue with a completion fw batch created on January 27th which kept failing until it was deleted on February 10th. Thankfully it failed on `git push` and git operations apparently are not rate limited at all, but still it's not great stewartship to keep trying a forwardport which keeps failing. Retrying in case of transient failure makes sense, but after 24 attempts over a day it's either not transient, or it's not working because github is down and hammering it won't help. --- forwardport/data/queues.xml | 3 ++- forwardport/models/forwardport.py | 19 ++++++++++++++++--- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/forwardport/data/queues.xml b/forwardport/data/queues.xml index 8d552a2a..c7eaac00 100644 --- a/forwardport/data/queues.xml +++ b/forwardport/data/queues.xml @@ -8,7 +8,8 @@ Forward port batches forwardport.batches - + + diff --git a/forwardport/models/forwardport.py b/forwardport/models/forwardport.py index d5e63dbb..7f45133d 100644 --- a/forwardport/models/forwardport.py +++ b/forwardport/models/forwardport.py @@ -3,6 +3,7 @@ import builtins import collections import logging import re +import sys from collections.abc import Mapping from contextlib import ExitStack from datetime import datetime, timedelta @@ -64,6 +65,7 @@ class Queue: class ForwardPortTasks(models.Model, Queue): _name = 'forwardport.batches' + _inherit = ['mail.thread'] _description = 'batches which got merged and are candidates for forward-porting' limit = 10 @@ -76,6 +78,7 @@ class ForwardPortTasks(models.Model, Queue): ('complete', 'Complete ported batches'), ], required=True) retry_after = fields.Datetime(required=True, default='1900-01-01 01:01:01') + cannot_apply = fields.Boolean(compute='_compute_cannot_apply', store=True) retry_after_relative = fields.Char(compute="_compute_retry_after_relative") pr_id = fields.Many2one('runbot_merge.pull_requests') @@ -92,21 +95,31 @@ class ForwardPortTasks(models.Model, Queue): def _search_domain(self): return super()._search_domain() + [ + ('cannot_apply', '=', False), ('retry_after', '<=', fields.Datetime.to_string(fields.Datetime.now())), ] - @api.depends('retry_after') + @api.depends('retry_after', 'cannot_apply') def _compute_retry_after_relative(self): now = fields.Datetime.now() for t in self: - if t.retry_after <= now: + if t.cannot_apply: + t.retry_after_relative = "N/A" + elif t.retry_after <= now: t.retry_after_relative = "" else: t.retry_after_relative = format_timedelta(t.retry_after - now, locale=t.env.lang) + @api.depends('retry_after') + def _compute_cannot_apply(self): + for t in self: + t.cannot_apply = t.retry_after > (t.create_date + timedelta(days=1)) + def _on_failure(self): super()._on_failure() - self.retry_after = fields.Datetime.to_string(fields.Datetime.now() + timedelta(minutes=30)) + _, e, _ = sys.exc_info() + self._message_log(body=f"Error while processing forward-port batch: {e}") + self.retry_after = fields.Datetime.now() + timedelta(hours=1) def _process_item(self): batch = self.batch_id