mirror of
https://github.com/odoo/runbot.git
synced 2025-03-15 23:45:44 +07:00

Given branch A, and branch B forked from it. If B removes a file which a PR to A later modifies, on forward port Git generates a modify/delete conflict (as in one side modifies a file which the other deleted). So far so good, except while it does notify the caller of the issue the modified file is just dumped as-is into the working copy (or commit), which essentially resurrects it. This is an issue, *especially* as the file is already part of a commit (rather tan just a U local file), if the developer fixes the conflict "in place" rather than re-doing the forward-port from scratch it's easy to miss the reincarnated file (and possibly the changes that were part of it too), which at best leaves parasitic dead code in the working copy. There is also no easy way for the runbot to check it as adding unimported standalone files while rare is not unknown e.g. utility scripts (to say nothing of JS where we can't really track the usages / imports at all). To resolve this issue, during conflict generation post-process modify/delete to insert artificial conflict markers, the file should be syntactically invalid so linters / checkers should complain, and the minimal check has a step looking for conflict markers which should fire and prevent merging the PR. Fixes #896
315 lines
11 KiB
Python
315 lines
11 KiB
Python
import dataclasses
|
|
import itertools
|
|
import logging
|
|
import os
|
|
import pathlib
|
|
import resource
|
|
import stat
|
|
import subprocess
|
|
from operator import methodcaller
|
|
from typing import Optional, TypeVar, Union, Sequence, Tuple, Dict, Iterable
|
|
|
|
from odoo.tools.appdirs import user_cache_dir
|
|
from .github import MergeError, PrCommit
|
|
|
|
_logger = logging.getLogger(__name__)
|
|
|
|
def source_url(repository) -> str:
|
|
return 'https://{}@github.com/{}'.format(
|
|
repository.project_id.github_token,
|
|
repository.name,
|
|
)
|
|
|
|
def fw_url(repository) -> str:
|
|
return 'https://{}@github.com/{}'.format(
|
|
repository.project_id.fp_github_token,
|
|
repository.fp_remote_target,
|
|
)
|
|
|
|
Authorship = Union[Tuple[str, str], Tuple[str, str, str]]
|
|
|
|
def get_local(repository, *, clone: bool = True) -> 'Optional[Repo]':
|
|
repos_dir = pathlib.Path(user_cache_dir('mergebot'))
|
|
repos_dir.mkdir(parents=True, exist_ok=True)
|
|
# NB: `repository.name` is `$org/$name` so this will be a subdirectory, probably
|
|
repo_dir = repos_dir / repository.name
|
|
|
|
if repo_dir.is_dir():
|
|
return git(repo_dir)
|
|
elif clone:
|
|
_logger.info("Cloning out %s to %s", repository.name, repo_dir)
|
|
subprocess.run(['git', 'clone', '--bare', source_url(repository), str(repo_dir)], check=True)
|
|
# bare repos don't have fetch specs by default, and fetching *into*
|
|
# them is a pain in the ass, configure fetch specs so `git fetch`
|
|
# works properly
|
|
repo = git(repo_dir)
|
|
repo.config('--add', 'remote.origin.fetch', '+refs/heads/*:refs/heads/*')
|
|
# negative refspecs require git 2.29
|
|
repo.config('--add', 'remote.origin.fetch', '^refs/heads/tmp.*')
|
|
repo.config('--add', 'remote.origin.fetch', '^refs/heads/staging.*')
|
|
return repo
|
|
else:
|
|
_logger.warning(
|
|
"Unable to acquire %s: %s",
|
|
repo_dir,
|
|
"doesn't exist" if not repo_dir.exists()\
|
|
else oct(stat.S_IFMT(repo_dir.stat().st_mode))
|
|
)
|
|
return None
|
|
|
|
|
|
ALWAYS = ('gc.auto=0', 'maintenance.auto=0')
|
|
|
|
|
|
def _bypass_limits():
|
|
resource.setrlimit(resource.RLIMIT_AS, (resource.RLIM_INFINITY, resource.RLIM_INFINITY))
|
|
|
|
|
|
def git(directory: str) -> 'Repo':
|
|
return Repo(directory, check=True)
|
|
|
|
|
|
Self = TypeVar("Self", bound="Repo")
|
|
class Repo:
|
|
def __init__(self, directory, **config) -> None:
|
|
self._directory = str(directory)
|
|
config.setdefault('stderr', subprocess.PIPE)
|
|
self._config = config
|
|
self._params = ()
|
|
|
|
def __getattr__(self, name: str) -> 'GitCommand':
|
|
return GitCommand(self, name.replace('_', '-'))
|
|
|
|
def _run(self, *args, **kwargs) -> subprocess.CompletedProcess:
|
|
opts = {**self._config, **kwargs}
|
|
args = ('git', '-C', self._directory)\
|
|
+ tuple(itertools.chain.from_iterable(('-c', p) for p in self._params + ALWAYS))\
|
|
+ args
|
|
try:
|
|
return subprocess.run(args, preexec_fn=_bypass_limits, **opts)
|
|
except subprocess.CalledProcessError as e:
|
|
stream = e.stderr or e.stdout
|
|
if stream:
|
|
_logger.error("git call error: %s", stream)
|
|
raise
|
|
|
|
def stdout(self, flag: bool = True) -> Self:
|
|
if flag is True:
|
|
return self.with_config(stdout=subprocess.PIPE)
|
|
elif flag is False:
|
|
return self.with_config(stdout=None)
|
|
return self.with_config(stdout=flag)
|
|
|
|
def check(self, flag: bool) -> Self:
|
|
return self.with_config(check=flag)
|
|
|
|
def with_config(self, **kw) -> Self:
|
|
opts = {**self._config, **kw}
|
|
r = Repo(self._directory, **opts)
|
|
r._params = self._params
|
|
return r
|
|
|
|
def with_params(self, *args) -> Self:
|
|
r = self.with_config()
|
|
r._params = args
|
|
return r
|
|
|
|
def clone(self, to: str, branch: Optional[str] = None) -> Self:
|
|
self._run(
|
|
'clone',
|
|
*([] if branch is None else ['-b', branch]),
|
|
self._directory, to,
|
|
)
|
|
return Repo(to)
|
|
|
|
def get_tree(self, commit_hash: str) -> str:
|
|
r = self.with_config(check=True).rev_parse(f'{commit_hash}^{{tree}}')
|
|
|
|
return r.stdout.strip()
|
|
|
|
def rebase(self, dest: str, commits: Sequence[PrCommit]) -> Tuple[str, Dict[str, str]]:
|
|
"""Implements rebase by hand atop plumbing so:
|
|
|
|
- we can work without a working copy
|
|
- we can track individual commits (and store the mapping)
|
|
|
|
It looks like `--merge-base` is not sufficient for `merge-tree` to
|
|
correctly keep track of history, so it loses contents. Therefore
|
|
implement in two passes as in the github version.
|
|
"""
|
|
repo = self.stdout().with_config(text=True, check=False)
|
|
|
|
logger = _logger.getChild('rebase')
|
|
if not commits:
|
|
raise MergeError("PR has no commits")
|
|
|
|
prev_tree = repo.get_tree(dest)
|
|
prev_original_tree = repo.get_tree(commits[0]['parents'][0]["sha"])
|
|
|
|
new_trees = []
|
|
parent = dest
|
|
for original in commits:
|
|
if len(original['parents']) != 1:
|
|
raise MergeError(
|
|
f"commits with multiple parents ({original['sha']}) can not be rebased, "
|
|
"either fix the branch to remove merges or merge without "
|
|
"rebasing")
|
|
|
|
new_trees.append(check(repo.merge_tree(parent, original['sha'])).stdout.strip())
|
|
# allow merging empty commits, but not empty*ing* commits while merging
|
|
if prev_original_tree != original['commit']['tree']['sha']:
|
|
if new_trees[-1] == prev_tree:
|
|
raise MergeError(
|
|
f"commit {original['sha']} results in an empty tree when "
|
|
f"merged, it is likely a duplicate of a merged commit, "
|
|
f"rebase and remove."
|
|
)
|
|
|
|
parent = check(repo.commit_tree(
|
|
tree=new_trees[-1],
|
|
parents=[parent, original['sha']],
|
|
message=f'temp rebase {original["sha"]}',
|
|
)).stdout.strip()
|
|
prev_tree = new_trees[-1]
|
|
prev_original_tree = original['commit']['tree']['sha']
|
|
|
|
mapping = {}
|
|
for original, tree in zip(commits, new_trees):
|
|
authorship = check(repo.show('--no-patch', '--pretty=%an%n%ae%n%ai%n%cn%n%ce', original['sha']))
|
|
author_name, author_email, author_date, committer_name, committer_email =\
|
|
authorship.stdout.splitlines()
|
|
|
|
c = check(repo.commit_tree(
|
|
tree=tree,
|
|
parents=[dest],
|
|
message=original['commit']['message'],
|
|
author=(author_name, author_email, author_date),
|
|
committer=(committer_name, committer_email),
|
|
)).stdout.strip()
|
|
|
|
logger.debug('copied %s to %s (parent: %s)', original['sha'], c, dest)
|
|
dest = mapping[original['sha']] = c
|
|
|
|
return dest, mapping
|
|
|
|
def merge(self, c1: str, c2: str, msg: str, *, author: Tuple[str, str]) -> str:
|
|
repo = self.stdout().with_config(text=True, check=False)
|
|
|
|
t = repo.merge_tree(c1, c2)
|
|
if t.returncode:
|
|
raise MergeError(t.stderr)
|
|
|
|
c = self.commit_tree(
|
|
tree=t.stdout.strip(),
|
|
message=msg,
|
|
parents=[c1, c2],
|
|
author=author,
|
|
)
|
|
if c.returncode:
|
|
raise MergeError(c.stderr)
|
|
return c.stdout.strip()
|
|
|
|
def commit_tree(
|
|
self, *, tree: str, message: str,
|
|
parents: Sequence[str] = (),
|
|
author: Optional[Authorship] = None,
|
|
committer: Optional[Authorship] = None,
|
|
) -> subprocess.CompletedProcess:
|
|
authorship = {}
|
|
if author:
|
|
authorship['GIT_AUTHOR_NAME'] = author[0]
|
|
authorship['GIT_AUTHOR_EMAIL'] = author[1]
|
|
if len(author) > 2:
|
|
authorship['GIT_AUTHOR_DATE'] = author[2]
|
|
if committer:
|
|
authorship['GIT_COMMITTER_NAME'] = committer[0]
|
|
authorship['GIT_COMMITTER_EMAIL'] = committer[1]
|
|
if len(committer) > 2:
|
|
authorship['GIT_COMMITTER_DATE'] = committer[2]
|
|
|
|
return self.with_config(
|
|
input=message,
|
|
stdout=subprocess.PIPE,
|
|
text=True,
|
|
env={
|
|
**os.environ,
|
|
**authorship,
|
|
# we don't want git to use the timezone of the machine it's
|
|
# running on: previously it used the timezone configured in
|
|
# github (?), which I think / assume defaults to a generic UTC
|
|
'TZ': 'UTC',
|
|
}
|
|
)._run(
|
|
'commit-tree',
|
|
tree,
|
|
'-F', '-',
|
|
*itertools.chain.from_iterable(('-p', p) for p in parents),
|
|
)
|
|
|
|
|
|
def modify_delete(self, tree: str, files: Iterable[str]) -> str:
|
|
"""Updates ``files`` in ``tree`` to add conflict markers to show them
|
|
as being modify/delete-ed, rather than have only the original content.
|
|
|
|
This is because having just content in a valid file is easy to miss,
|
|
causing file resurrections as they get committed rather than re-removed.
|
|
|
|
TODO: maybe extract the diff information compared to before they were removed? idk
|
|
"""
|
|
# FIXME: either ignore or process binary files somehow (how does git show conflicts in binary files?)
|
|
repo = self.stdout().with_config(stderr=None, text=True, check=False, encoding="utf-8")
|
|
for f in files:
|
|
contents = repo.cat_file("-p", f"{tree}:{f}").stdout
|
|
# decorate the contents as if HEAD and BASE are empty
|
|
oid = repo\
|
|
.with_config(input=f"""\
|
|
<<<\x3c<<< HEAD
|
|
||||||| MERGE BASE
|
|
=======
|
|
{contents}
|
|
>>>\x3e>>> FORWARD PORTED
|
|
""")\
|
|
.hash_object("-w", "--stdin", "--path", f)\
|
|
.stdout.strip()
|
|
|
|
# we need to rewrite every tree from the parent of `f`
|
|
while f:
|
|
f, _, local = f.rpartition("/")
|
|
# tree to update, `{tree}:` works as an alias for tree
|
|
lstree = repo.ls_tree(f"{tree}:{f}").stdout.splitlines(keepends=False)
|
|
new_tree = "".join(
|
|
# tab before name is critical to the format
|
|
f"{mode} {typ} {oid if name == local else sha}\t{name}\n"
|
|
for mode, typ, sha, name in map(methodcaller("split", None, 3), lstree)
|
|
)
|
|
oid = repo.with_config(input=new_tree, check=True).mktree().stdout.strip()
|
|
tree = oid
|
|
return tree
|
|
|
|
|
|
def check(p: subprocess.CompletedProcess) -> subprocess.CompletedProcess:
|
|
if not p.returncode:
|
|
return p
|
|
|
|
_logger.info("rebase failed at %s\nstdout:\n%s\nstderr:\n%s", p.args, p.stdout, p.stderr)
|
|
raise MergeError(p.stderr or 'merge conflict')
|
|
|
|
|
|
@dataclasses.dataclass
|
|
class GitCommand:
|
|
repo: Repo
|
|
name: str
|
|
|
|
def __call__(self, *args, **kwargs) -> subprocess.CompletedProcess:
|
|
return self.repo._run(self.name, *args, *self._to_options(kwargs))
|
|
|
|
def _to_options(self, d):
|
|
for k, v in d.items():
|
|
if len(k) == 1:
|
|
yield '-' + k
|
|
else:
|
|
yield '--' + k.replace('_', '-')
|
|
if v not in (None, True):
|
|
assert v is not False
|
|
yield str(v)
|