[IMP] forwardport: surfacing of modify/delete conflicts

Given branch A, and branch B forked from it. If B removes a file which
a PR to A later modifies, on forward port Git generates a
modify/delete conflict (as in one side modifies a file which the
other deleted).

So far so good, except while it does notify the caller of the issue
the modified file is just dumped as-is into the working copy (or
commit), which essentially resurrects it.

This is an issue, *especially* as the file is already part of a
commit (rather tan just a U local file), if the developer fixes the
conflict "in place" rather than re-doing the forward-port from scratch
it's easy to miss the reincarnated file (and possibly the changes that
were part of it too), which at best leaves parasitic dead code in the
working copy. There is also no easy way for the runbot to check it as
adding unimported standalone files while rare is not unknown
e.g. utility scripts (to say nothing of JS where we can't really track
the usages / imports at all).

To resolve this issue, during conflict generation post-process
modify/delete to insert artificial conflict markers, the file should
be syntactically invalid so linters / checkers should complain, and
the minimal check has a step looking for conflict markers which should
fire and prevent merging the PR.

Fixes #896
This commit is contained in:
Xavier Morel 2024-09-27 12:37:49 +02:00
parent ef22529620
commit 8f27773f8d
3 changed files with 161 additions and 19 deletions

View File

@ -346,7 +346,7 @@ class PullRequests(models.Model):
'--merge-base', commits[0]['parents'][0]['sha'],
target_branch.name,
root.head,
)
).stdout.decode().splitlines(keepends=False)[0]
# if there was a single commit, reuse its message when committing
# the conflict
if len(commits) == 1:
@ -362,10 +362,28 @@ stderr:
{err}
"""
# if a file is modified by the original PR and added by the forward
# port / conflict it's a modify/delete conflict: the file was
# deleted in the target branch, and the update (modify) in the
# original PR causes it to be added back
original_modified = set(conf.diff(
"--diff-filter=M", "--name-only",
"--merge-base", root.target.name,
root.head,
).stdout.decode().splitlines(keepends=False))
conflict_added = set(conf.diff(
"--diff-filter=A", "--name-only",
target_branch.name,
tree,
).stdout.decode().splitlines(keepends=False))
if modify_delete := (conflict_added & original_modified):
# rewrite the tree with conflict markers added to modify/deleted blobs
tree = conf.modify_delete(tree, modify_delete)
target_head = source.stdout().rev_parse(f"refs/heads/{target_branch.name}")\
.stdout.decode().strip()
commit = conf.commit_tree(
tree=tree.stdout.decode().splitlines(keepends=False)[0],
tree=tree,
parents=[target_head],
message=str(msg),
author=author,

View File

@ -262,7 +262,7 @@ def test_massive_conflict(env, config, make_repo):
def test_conflict_deleted(env, config, make_repo):
prod, other = make_basic(env, config, make_repo)
prod, other = make_basic(env, config, make_repo, statuses="default")
# remove f from b
with prod:
prod.make_commits(
@ -277,18 +277,12 @@ def test_conflict_deleted(env, config, make_repo):
ref='heads/conflicting'
)
pr = prod.make_pr(target='a', head='conflicting')
prod.post_status(p_0, 'success', 'legal/cla')
prod.post_status(p_0, 'success', 'ci/runbot')
prod.post_status(p_0, 'success')
pr.post_comment('hansen r+', config['role_reviewer']['token'])
env.run_crons()
with prod:
prod.post_status('staging.a', 'success', 'legal/cla')
prod.post_status('staging.a', 'success', 'ci/runbot')
env.run_crons()
# wait a bit for PR webhook... ?
time.sleep(5)
prod.post_status('staging.a', 'success')
env.run_crons()
# should have created a new PR
@ -300,9 +294,14 @@ def test_conflict_deleted(env, config, make_repo):
'g': 'c',
}
assert pr1.state == 'opened'
# NOTE: no actual conflict markers because pr1 essentially adds f de-novo
assert prod.read_tree(prod.commit(pr1.head)) == {
'f': 'xxx',
'f': matches("""\
<<<\x3c<<< $$
||||||| $$
=======
xxx
>>>\x3e>>> $$
"""),
'g': 'c',
}
@ -333,6 +332,89 @@ def test_conflict_deleted(env, config, make_repo):
}
assert pr1.state == 'opened', "state should be open still"
def test_conflict_deleted_deep(env, config, make_repo):
""" Same as the previous one but files are deeper than toplevel, and we only
want to see if the conflict post-processing works.
"""
# region: setup
prod = make_repo("test")
env['runbot_merge.events_sources'].create({'repository': prod.name})
with prod:
[a, b] = prod.make_commits(
None,
Commit("a", tree={
"foo/bar/baz": "1",
"foo/bar/qux": "1",
"corge/grault": "1",
}),
Commit("b", tree={"foo/bar/qux": "2"}, reset=True),
)
prod.make_ref("heads/a", a)
prod.make_ref("heads/b", b)
project = env['runbot_merge.project'].create({
'name': "test",
'github_token': config['github']['token'],
'github_prefix': 'hansen',
'fp_github_token': config['github']['token'],
'fp_github_name': 'herbert',
'branch_ids': [
(0, 0, {'name': 'a', 'sequence': 100}),
(0, 0, {'name': 'b', 'sequence': 80}),
],
"repo_ids": [
(0, 0, {
'name': prod.name,
'required_statuses': "default",
'fp_remote_target': prod.fork().name,
'group_id': False,
})
]
})
env['res.partner'].search([
('github_login', '=', config['role_reviewer']['user'])
]).write({
'review_rights': [(0, 0, {'repository_id': project.repo_ids.id, 'review': True})]
})
# endregion
with prod:
prod.make_commits(
'a',
Commit("c", tree={
"foo/bar/baz": "2",
"corge/grault": "insert funny number",
}),
ref="heads/conflicting",
)
pr = prod.make_pr(target='a', head='conflicting')
prod.post_status("conflicting", "success")
pr.post_comment("hansen r+", config['role_reviewer']['token'])
env.run_crons()
with prod:
prod.post_status('staging.a', 'success')
env.run_crons()
_, pr1 = env['runbot_merge.pull_requests'].search([], order='number')
assert prod.read_tree(prod.commit(pr1.head), recursive=True) == {
"foo/bar/qux": "2",
"foo/bar/baz": """\
<<<<<<< HEAD
||||||| MERGE BASE
=======
2
>>>>>>> FORWARD PORTED
""",
"corge/grault": """\
<<<<<<< HEAD
||||||| MERGE BASE
=======
insert funny number
>>>>>>> FORWARD PORTED
"""
}, "the conflicting file should have had conflict markers fixed in"
def test_multiple_commits_same_authorship(env, config, make_repo):
""" When a PR has multiple commits by the same author and its
forward-porting triggers a conflict, the resulting (squashed) conflict
@ -436,13 +518,13 @@ def test_multiple_commits_different_authorship(env, config, make_repo, users, ro
"author set to the bot but an empty email"
assert get(c.committer) == (bot, '')
assert re.match(r'''<<<\x3c<<< HEAD
assert prod.read_tree(c)['g'] == matches('''<<<\x3c<<< b
b
|||||||| parent of [\da-f]{7,}.*
||||||| $$
=======
2
>>>\x3e>>> [\da-f]{7,}.*
''', prod.read_tree(c)['g'])
>>>\x3e>>> $$
''')
# I'd like to fix the conflict so everything is clean and proper *but*
# github's API apparently rejects creating commits with an empty email.
@ -459,7 +541,7 @@ b
assert pr2.comments == [
seen(env, pr2, users),
(users['user'], matches('@%s @%s $$CONFLICT' % (users['user'], users['reviewer']))),
(users['user'], matches('@%(user)s @%(reviewer)s $$CONFLICT' % users)),
(users['reviewer'], 'hansen r+'),
(users['user'], f"@{users['user']} @{users['reviewer']} unable to stage: "
"All commits must have author and committer email, "

View File

@ -6,7 +6,8 @@ import pathlib
import resource
import stat
import subprocess
from typing import Optional, TypeVar, Union, Sequence, Tuple, Dict
from operator import methodcaller
from typing import Optional, TypeVar, Union, Sequence, Tuple, Dict, Iterable
from odoo.tools.appdirs import user_cache_dir
from .github import MergeError, PrCommit
@ -245,6 +246,47 @@ class Repo:
*itertools.chain.from_iterable(('-p', p) for p in parents),
)
def modify_delete(self, tree: str, files: Iterable[str]) -> str:
"""Updates ``files`` in ``tree`` to add conflict markers to show them
as being modify/delete-ed, rather than have only the original content.
This is because having just content in a valid file is easy to miss,
causing file resurrections as they get committed rather than re-removed.
TODO: maybe extract the diff information compared to before they were removed? idk
"""
# FIXME: either ignore or process binary files somehow (how does git show conflicts in binary files?)
repo = self.stdout().with_config(stderr=None, text=True, check=False, encoding="utf-8")
for f in files:
contents = repo.cat_file("-p", f"{tree}:{f}").stdout
# decorate the contents as if HEAD and BASE are empty
oid = repo\
.with_config(input=f"""\
<<<\x3c<<< HEAD
||||||| MERGE BASE
=======
{contents}
>>>\x3e>>> FORWARD PORTED
""")\
.hash_object("-w", "--stdin", "--path", f)\
.stdout.strip()
# we need to rewrite every tree from the parent of `f`
while f:
f, _, local = f.rpartition("/")
# tree to update, `{tree}:` works as an alias for tree
lstree = repo.ls_tree(f"{tree}:{f}").stdout.splitlines(keepends=False)
new_tree = "".join(
# tab before name is critical to the format
f"{mode} {typ} {oid if name == local else sha}\t{name}\n"
for mode, typ, sha, name in map(methodcaller("split", None, 3), lstree)
)
oid = repo.with_config(input=new_tree, check=True).mktree().stdout.strip()
tree = oid
return tree
def check(p: subprocess.CompletedProcess) -> subprocess.CompletedProcess:
if not p.returncode:
return p