From 8f27773f8d158d53a7ac37197b48f173d0c5411c Mon Sep 17 00:00:00 2001 From: Xavier Morel Date: Fri, 27 Sep 2024 12:37:49 +0200 Subject: [PATCH] [IMP] forwardport: surfacing of modify/delete conflicts Given branch A, and branch B forked from it. If B removes a file which a PR to A later modifies, on forward port Git generates a modify/delete conflict (as in one side modifies a file which the other deleted). So far so good, except while it does notify the caller of the issue the modified file is just dumped as-is into the working copy (or commit), which essentially resurrects it. This is an issue, *especially* as the file is already part of a commit (rather tan just a U local file), if the developer fixes the conflict "in place" rather than re-doing the forward-port from scratch it's easy to miss the reincarnated file (and possibly the changes that were part of it too), which at best leaves parasitic dead code in the working copy. There is also no easy way for the runbot to check it as adding unimported standalone files while rare is not unknown e.g. utility scripts (to say nothing of JS where we can't really track the usages / imports at all). To resolve this issue, during conflict generation post-process modify/delete to insert artificial conflict markers, the file should be syntactically invalid so linters / checkers should complain, and the minimal check has a step looking for conflict markers which should fire and prevent merging the PR. Fixes #896 --- forwardport/models/project.py | 22 +++++- forwardport/tests/test_conflicts.py | 114 ++++++++++++++++++++++++---- runbot_merge/git.py | 44 ++++++++++- 3 files changed, 161 insertions(+), 19 deletions(-) diff --git a/forwardport/models/project.py b/forwardport/models/project.py index 0031b61b..f26ebfd5 100644 --- a/forwardport/models/project.py +++ b/forwardport/models/project.py @@ -346,7 +346,7 @@ class PullRequests(models.Model): '--merge-base', commits[0]['parents'][0]['sha'], target_branch.name, root.head, - ) + ).stdout.decode().splitlines(keepends=False)[0] # if there was a single commit, reuse its message when committing # the conflict if len(commits) == 1: @@ -362,10 +362,28 @@ stderr: {err} """ + # if a file is modified by the original PR and added by the forward + # port / conflict it's a modify/delete conflict: the file was + # deleted in the target branch, and the update (modify) in the + # original PR causes it to be added back + original_modified = set(conf.diff( + "--diff-filter=M", "--name-only", + "--merge-base", root.target.name, + root.head, + ).stdout.decode().splitlines(keepends=False)) + conflict_added = set(conf.diff( + "--diff-filter=A", "--name-only", + target_branch.name, + tree, + ).stdout.decode().splitlines(keepends=False)) + if modify_delete := (conflict_added & original_modified): + # rewrite the tree with conflict markers added to modify/deleted blobs + tree = conf.modify_delete(tree, modify_delete) + target_head = source.stdout().rev_parse(f"refs/heads/{target_branch.name}")\ .stdout.decode().strip() commit = conf.commit_tree( - tree=tree.stdout.decode().splitlines(keepends=False)[0], + tree=tree, parents=[target_head], message=str(msg), author=author, diff --git a/forwardport/tests/test_conflicts.py b/forwardport/tests/test_conflicts.py index 3c0bbda6..f6e23bbf 100644 --- a/forwardport/tests/test_conflicts.py +++ b/forwardport/tests/test_conflicts.py @@ -262,7 +262,7 @@ def test_massive_conflict(env, config, make_repo): def test_conflict_deleted(env, config, make_repo): - prod, other = make_basic(env, config, make_repo) + prod, other = make_basic(env, config, make_repo, statuses="default") # remove f from b with prod: prod.make_commits( @@ -277,18 +277,12 @@ def test_conflict_deleted(env, config, make_repo): ref='heads/conflicting' ) pr = prod.make_pr(target='a', head='conflicting') - prod.post_status(p_0, 'success', 'legal/cla') - prod.post_status(p_0, 'success', 'ci/runbot') + prod.post_status(p_0, 'success') pr.post_comment('hansen r+', config['role_reviewer']['token']) env.run_crons() with prod: - prod.post_status('staging.a', 'success', 'legal/cla') - prod.post_status('staging.a', 'success', 'ci/runbot') - - env.run_crons() - # wait a bit for PR webhook... ? - time.sleep(5) + prod.post_status('staging.a', 'success') env.run_crons() # should have created a new PR @@ -300,9 +294,14 @@ def test_conflict_deleted(env, config, make_repo): 'g': 'c', } assert pr1.state == 'opened' - # NOTE: no actual conflict markers because pr1 essentially adds f de-novo assert prod.read_tree(prod.commit(pr1.head)) == { - 'f': 'xxx', + 'f': matches("""\ +<<<\x3c<<< $$ +||||||| $$ +======= +xxx +>>>\x3e>>> $$ +"""), 'g': 'c', } @@ -333,6 +332,89 @@ def test_conflict_deleted(env, config, make_repo): } assert pr1.state == 'opened', "state should be open still" + +def test_conflict_deleted_deep(env, config, make_repo): + """ Same as the previous one but files are deeper than toplevel, and we only + want to see if the conflict post-processing works. + """ + # region: setup + prod = make_repo("test") + env['runbot_merge.events_sources'].create({'repository': prod.name}) + with prod: + [a, b] = prod.make_commits( + None, + Commit("a", tree={ + "foo/bar/baz": "1", + "foo/bar/qux": "1", + "corge/grault": "1", + }), + Commit("b", tree={"foo/bar/qux": "2"}, reset=True), + ) + prod.make_ref("heads/a", a) + prod.make_ref("heads/b", b) + + project = env['runbot_merge.project'].create({ + 'name': "test", + 'github_token': config['github']['token'], + 'github_prefix': 'hansen', + 'fp_github_token': config['github']['token'], + 'fp_github_name': 'herbert', + 'branch_ids': [ + (0, 0, {'name': 'a', 'sequence': 100}), + (0, 0, {'name': 'b', 'sequence': 80}), + ], + "repo_ids": [ + (0, 0, { + 'name': prod.name, + 'required_statuses': "default", + 'fp_remote_target': prod.fork().name, + 'group_id': False, + }) + ] + }) + env['res.partner'].search([ + ('github_login', '=', config['role_reviewer']['user']) + ]).write({ + 'review_rights': [(0, 0, {'repository_id': project.repo_ids.id, 'review': True})] + }) + # endregion + + with prod: + prod.make_commits( + 'a', + Commit("c", tree={ + "foo/bar/baz": "2", + "corge/grault": "insert funny number", + }), + ref="heads/conflicting", + ) + pr = prod.make_pr(target='a', head='conflicting') + prod.post_status("conflicting", "success") + pr.post_comment("hansen r+", config['role_reviewer']['token']) + env.run_crons() + + with prod: + prod.post_status('staging.a', 'success') + env.run_crons() + _, pr1 = env['runbot_merge.pull_requests'].search([], order='number') + assert prod.read_tree(prod.commit(pr1.head), recursive=True) == { + "foo/bar/qux": "2", + "foo/bar/baz": """\ +<<<<<<< HEAD +||||||| MERGE BASE +======= +2 +>>>>>>> FORWARD PORTED +""", + "corge/grault": """\ +<<<<<<< HEAD +||||||| MERGE BASE +======= +insert funny number +>>>>>>> FORWARD PORTED +""" + }, "the conflicting file should have had conflict markers fixed in" + def test_multiple_commits_same_authorship(env, config, make_repo): """ When a PR has multiple commits by the same author and its forward-porting triggers a conflict, the resulting (squashed) conflict @@ -436,13 +518,13 @@ def test_multiple_commits_different_authorship(env, config, make_repo, users, ro "author set to the bot but an empty email" assert get(c.committer) == (bot, '') - assert re.match(r'''<<<\x3c<<< HEAD + assert prod.read_tree(c)['g'] == matches('''<<<\x3c<<< b b -|||||||| parent of [\da-f]{7,}.* +||||||| $$ ======= 2 ->>>\x3e>>> [\da-f]{7,}.* -''', prod.read_tree(c)['g']) +>>>\x3e>>> $$ +''') # I'd like to fix the conflict so everything is clean and proper *but* # github's API apparently rejects creating commits with an empty email. @@ -459,7 +541,7 @@ b assert pr2.comments == [ seen(env, pr2, users), - (users['user'], matches('@%s @%s $$CONFLICT' % (users['user'], users['reviewer']))), + (users['user'], matches('@%(user)s @%(reviewer)s $$CONFLICT' % users)), (users['reviewer'], 'hansen r+'), (users['user'], f"@{users['user']} @{users['reviewer']} unable to stage: " "All commits must have author and committer email, " diff --git a/runbot_merge/git.py b/runbot_merge/git.py index caaa6e5d..d9937329 100644 --- a/runbot_merge/git.py +++ b/runbot_merge/git.py @@ -6,7 +6,8 @@ import pathlib import resource import stat import subprocess -from typing import Optional, TypeVar, Union, Sequence, Tuple, Dict +from operator import methodcaller +from typing import Optional, TypeVar, Union, Sequence, Tuple, Dict, Iterable from odoo.tools.appdirs import user_cache_dir from .github import MergeError, PrCommit @@ -245,6 +246,47 @@ class Repo: *itertools.chain.from_iterable(('-p', p) for p in parents), ) + + def modify_delete(self, tree: str, files: Iterable[str]) -> str: + """Updates ``files`` in ``tree`` to add conflict markers to show them + as being modify/delete-ed, rather than have only the original content. + + This is because having just content in a valid file is easy to miss, + causing file resurrections as they get committed rather than re-removed. + + TODO: maybe extract the diff information compared to before they were removed? idk + """ + # FIXME: either ignore or process binary files somehow (how does git show conflicts in binary files?) + repo = self.stdout().with_config(stderr=None, text=True, check=False, encoding="utf-8") + for f in files: + contents = repo.cat_file("-p", f"{tree}:{f}").stdout + # decorate the contents as if HEAD and BASE are empty + oid = repo\ + .with_config(input=f"""\ +<<<\x3c<<< HEAD +||||||| MERGE BASE +======= +{contents} +>>>\x3e>>> FORWARD PORTED +""")\ + .hash_object("-w", "--stdin", "--path", f)\ + .stdout.strip() + + # we need to rewrite every tree from the parent of `f` + while f: + f, _, local = f.rpartition("/") + # tree to update, `{tree}:` works as an alias for tree + lstree = repo.ls_tree(f"{tree}:{f}").stdout.splitlines(keepends=False) + new_tree = "".join( + # tab before name is critical to the format + f"{mode} {typ} {oid if name == local else sha}\t{name}\n" + for mode, typ, sha, name in map(methodcaller("split", None, 3), lstree) + ) + oid = repo.with_config(input=new_tree, check=True).mktree().stdout.strip() + tree = oid + return tree + + def check(p: subprocess.CompletedProcess) -> subprocess.CompletedProcess: if not p.returncode: return p