[FIX] forwardport: don't break forward porting on huge conflicts

On forward-porting, odoo/odoo#170183 generates a conflict on pretty much every one of the 1111 files it touches, because they are modify/delete conflicts that generates a conflict message over 200 bytes per file, which is over 200kB of output. For this specific scenario, the commit message was being passed through arguments to the `git` command, resulting in a command line exceeding `MAX_ARG_STRLEN`[^1]. The obvious way to fix this is to pass the commit message via stdin as is done literally in the line above where we just copy a non-generated commit message. However I don't think hundreds of kbytes worth of stdout[^2] is of any use, so shorten that a bit, and stderr while at it. Don't touch the commit message size for now, possibly forever, but note that some log diving reveals a commit with a legit 18kB message (odoo/odoo@42a3b704f7) so if we want to restrict that the limit should be at least 32k, and possibly 64. But it might be a good idea to make that limit part of the ready / merge checks too, rather than cut things off or trigger errors during staging. Fixes #900 [^1]: Most resources on "Argument list too long" reference `ARG_MAX`, but on both my machine and the server it is 2097152 (25% of the default stack), which is ~10x larger than the commit message we tried to generate. The actual limit is `MAX_ARG_STRLEN` which can't be queried directly but is essentially hard-coded to PAGE_SIZE * 32 = 128kiB, which tracks. [^2]: Somewhat unexpectedly, that's where `git-cherry-pick` sends the conflict info.
2025-03-15 15:35:46 +07:00 · 2024-06-25 13:11:32 +02:00 · 2024-06-25 13:11:32 +02:00 · 0a839a4857
commit 0a839a4857
parent dc90a207d6
3 changed files with 106 additions and 17 deletions
--- a/forwardport/models/project.py
+++ b/forwardport/models/project.py
@ -32,11 +32,10 @@ from odoo.osv import expression
 from odoo.tools.misc import topological_sort, groupby
 from odoo.tools.appdirs import user_cache_dir
 from odoo.addons.base.models.res_partner import Partner
-from odoo.addons.runbot_merge import git
+from odoo.addons.runbot_merge import git, utils
 from odoo.addons.runbot_merge.models.pull_requests import Branch
 from odoo.addons.runbot_merge.models.stagings_create import Message
 DEFAULT_DELTA = dateutil.relativedelta.relativedelta(days=3)
 _logger = logging.getLogger('odoo.addons.forwardport')
@ -378,7 +377,7 @@ class PullRequests(models.Model):
                authors.add(to_tuple(c['author']))
                committers.add(to_tuple(c['committer']))
            fp_authorship = (project_id.fp_github_name, '', '')
-            author = fp_authorship if len(authors) != 1\
+            author = fp_authorship if len(authors) != 1 \
                else authors.pop() + (head_commit['author']['date'],)
            committer = fp_authorship if len(committers) != 1 \
                else committers.pop() + (head_commit['committer']['date'],)
@ -399,31 +398,33 @@ class PullRequests(models.Model):
                'merge.renamelimit=0',
                'merge.renames=copies',
                'merge.conflictstyle=zdiff3'
-            )\
+            ) \
-                .with_config(check=False)\
+                .with_config(check=False) \
                .cherry_pick(squashed, no_commit=True, strategy="ort")
            status = conf.stdout().status(short=True, untracked_files='no').stdout.decode()
            if err.strip():
                err = err.rstrip() + '\n----------\nstatus:\n' + status
            else:
                err = 'status:\n' + status
            # if there was a single commit, reuse its message when committing
            # the conflict
            # TODO: still add conflict information to this?
            if len(commits) == 1:
                msg = root._make_fp_message(commits[0])
                conf.with_config(input=str(msg).encode()) \
                    .commit(all=True, allow_empty=True, file='-')
            else:
-                conf.commit(
+                out = utils.shorten(out, 8*1024, '[...]')
-                    all=True, allow_empty=True,
+                err = utils.shorten(err, 8*1024, '[...]')
-                    message="""Cherry pick of %s failed
+                msg = f"""Cherry pick of {h} failed
 stdout:
-%s
+{out}
 stderr:
-%s
+{err}
-""" % (h, out, err))
+"""
            conf.with_config(input=str(msg).encode()) \
                .commit(all=True, allow_empty=True, file='-')
            return (h, out, err, [c['sha'] for c in commits]), working_copy
    def _cherry_pick(self, working_copy):
--- a/forwardport/tests/test_conflicts.py
+++ b/forwardport/tests/test_conflicts.py
@ -1,3 +1,4 @@
 import random
 import re
 import time
 from operator import itemgetter
@ -177,6 +178,94 @@ More info at https://github.com/odoo/odoo/wiki/Mergebot#forward-port
        'i': 'a',
    }
 def test_massive_conflict(env, config, make_repo):
    """If the conflict is large enough, the commit message may exceed ARG_MAX
     and trigger E2BIG.
    """
    # CONFLICT (modify/delete): <file> deleted in <commit> (<title>) and modified in HEAD.  Version HEAD of <file> left in tree.
    #
    # 107 + 2 * len(filename) + len(title) per conflicting file.
    # - filename: random.randbytes(10).hex() -> 20
    # - title: random.randbytes(20).hex() -> 40
    # -> 701 (!) files
    files = []
    while len(files) < 1500:
        files.append(random.randbytes(10).hex())
    # region setup
    project = env['runbot_merge.project'].create({
        'name': "thing",
        'github_token': config['github']['token'],
        'github_prefix': 'hansen',
        'fp_github_token': config['github']['token'],
        'fp_github_name': 'herbert',
        'branch_ids': [
            (0, 0, {'name': 'a', 'sequence': 100}),
            (0, 0, {'name': 'b', 'sequence': 80}),
        ],
    })
    repo = make_repo("repo")
    env['runbot_merge.events_sources'].create({'repository': repo.name})
    repo_id = env['runbot_merge.repository'].create({
        'project_id': project.id,
        'name': repo.name,
        'required_statuses': "default",
        'fp_remote_target': repo.name,
        'group_id': False,
    })
    env['res.partner'].search([
        ('github_login', '=', config['role_reviewer']['user'])
    ]).write({
        'review_rights': [(0, 0, {'repository_id': repo_id.id, 'review': True})]
    })
    with repo:
        # create branch with a ton of empty files
        repo.make_commits(
            None,
            Commit(
                random.randbytes(20).hex(),
                tree=dict.fromkeys(files, "xoxo"),
            ),
            ref='heads/a',
        )
        # removes all those files in the next branch
        repo.make_commits(
            'a',
            Commit(
                random.randbytes(20).hex(),
                tree=dict.fromkeys(files, "content!"),
            ),
            ref='heads/b',
        )
    # endregion setup
    with repo:
        # update all the files
        repo.make_commits(
            'a',
            Commit(random.randbytes(20).hex(), tree={'a': '1'}),
            Commit(random.randbytes(20).hex(), tree={'x': '1'}, reset=True),
            ref='heads/change',
        )
        pr = repo.make_pr(target='a', head='change')
        repo.post_status('refs/heads/change', 'success')
        pr.post_comment('hansen rebase-ff r+', config['role_reviewer']['token'])
    env.run_crons()
    with repo:
        repo.post_status('staging.a', 'success')
    env.run_crons()
    # we don't actually need more, the bug crashes the forward port entirely so
    # the PR is never even created
    _pra_id, _prb_id = env['runbot_merge.pull_requests'].search([], order='number')
 def test_conflict_deleted(env, config, make_repo):
    prod, other = make_basic(env, config, make_repo)
    # remove f from b
--- a/runbot_merge/utils.py
+++ b/runbot_merge/utils.py
@ -3,7 +3,7 @@ import itertools
 import time
-def shorten(text_ish, length):
+def shorten(text_ish, length, cont='...'):
    """ If necessary, cuts-off the text or bytes input and appends ellipsis to
    signal the cutoff, such that the result is below the provided length
    (according to whatever "len" means on the text-ish so bytes or codepoints
@ -12,11 +12,10 @@ def shorten(text_ish, length):
    if len(text_ish or ()) <= length:
        return text_ish
    cont = '...'
    if isinstance(text_ish, bytes):
        cont = cont.encode('ascii') # whatever
    # add enough room for the ellipsis
-    return text_ish[:length-3] + cont
+    return text_ish[:length-len(cont)] + cont
 BACKOFF_DELAYS = (0.1, 0.2, 0.4, 0.8, 1.6)
 def backoff(func=None, *, delays=BACKOFF_DELAYS, exc=Exception):