mirror of
https://github.com/odoo/runbot.git
synced 2025-03-15 15:35:46 +07:00

- ensure all its requirements are set - rename parameter as `rev-parse` can take an arbitrary rev expression not just a commit
356 lines
13 KiB
Python
356 lines
13 KiB
Python
import dataclasses
|
|
import itertools
|
|
import logging
|
|
import os
|
|
import pathlib
|
|
import resource
|
|
import stat
|
|
import subprocess
|
|
from operator import methodcaller
|
|
from typing import Optional, TypeVar, Union, Sequence, Tuple, Dict, Iterator
|
|
from collections.abc import Iterable, Mapping, Callable
|
|
|
|
from odoo.tools.appdirs import user_cache_dir
|
|
from .github import MergeError, PrCommit
|
|
|
|
_logger = logging.getLogger(__name__)
|
|
|
|
try:
|
|
from opentelemetry import trace
|
|
from opentelemetry.propagate import inject
|
|
tracer = trace.get_tracer(__name__)
|
|
|
|
def git_tracing_params() -> Iterator[str]:
|
|
tracing = {}
|
|
inject(tracing)
|
|
return itertools.chain.from_iterable(
|
|
('-c', f'http.extraHeader={k}:{v}')
|
|
for k, v in tracing.items()
|
|
)
|
|
except ImportError:
|
|
trace = tracer = inject = None
|
|
def git_tracing_params() -> Iterator[str]:
|
|
return iter(())
|
|
|
|
def source_url(repository) -> str:
|
|
return 'https://{}@github.com/{}'.format(
|
|
repository.project_id.github_token,
|
|
repository.name,
|
|
)
|
|
|
|
def fw_url(repository) -> str:
|
|
return 'https://{}@github.com/{}'.format(
|
|
repository.project_id.fp_github_token,
|
|
repository.fp_remote_target,
|
|
)
|
|
|
|
Authorship = Union[Tuple[str, str], Tuple[str, str, str]]
|
|
|
|
def get_local(repository, *, clone: bool = True) -> 'Optional[Repo]':
|
|
repos_dir = pathlib.Path(user_cache_dir('mergebot'))
|
|
repos_dir.mkdir(parents=True, exist_ok=True)
|
|
# NB: `repository.name` is `$org/$name` so this will be a subdirectory, probably
|
|
repo_dir = repos_dir / repository.name
|
|
|
|
if repo_dir.is_dir():
|
|
return git(repo_dir)
|
|
elif clone:
|
|
_logger.info("Cloning out %s to %s", repository.name, repo_dir)
|
|
subprocess.run([
|
|
'git', *git_tracing_params(), 'clone', '--bare',
|
|
source_url(repository), str(repo_dir)
|
|
], check=True)
|
|
# bare repos don't have fetch specs by default, and fetching *into*
|
|
# them is a pain in the ass, configure fetch specs so `git fetch`
|
|
# works properly
|
|
repo = git(repo_dir)
|
|
repo.config('--add', 'remote.origin.fetch', '+refs/heads/*:refs/heads/*')
|
|
# negative refspecs require git 2.29
|
|
repo.config('--add', 'remote.origin.fetch', '^refs/heads/tmp.*')
|
|
repo.config('--add', 'remote.origin.fetch', '^refs/heads/staging.*')
|
|
return repo
|
|
else:
|
|
_logger.warning(
|
|
"Unable to acquire %s: %s",
|
|
repo_dir,
|
|
"doesn't exist" if not repo_dir.exists()\
|
|
else oct(stat.S_IFMT(repo_dir.stat().st_mode))
|
|
)
|
|
return None
|
|
|
|
|
|
ALWAYS = ('gc.auto=0', 'maintenance.auto=0')
|
|
|
|
|
|
def _bypass_limits():
|
|
resource.setrlimit(resource.RLIMIT_AS, (resource.RLIM_INFINITY, resource.RLIM_INFINITY))
|
|
|
|
|
|
def git(directory: str) -> 'Repo':
|
|
return Repo(directory, check=True)
|
|
|
|
|
|
Self = TypeVar("Self", bound="Repo")
|
|
class Repo:
|
|
def __init__(self, directory: str, **config: object) -> None:
|
|
self._directory = str(directory)
|
|
config.setdefault('stderr', subprocess.PIPE)
|
|
self._config = config
|
|
self._params = ()
|
|
self.runner = subprocess.run
|
|
|
|
def __getattr__(self, name: str) -> 'GitCommand':
|
|
return GitCommand(self, name.replace('_', '-'))
|
|
|
|
def _run(self, *args, **kwargs) -> subprocess.CompletedProcess:
|
|
opts = {**self._config, **kwargs}
|
|
args = tuple(itertools.chain(
|
|
('git', '-C', self._directory),
|
|
itertools.chain.from_iterable(('-c', p) for p in self._params + ALWAYS),
|
|
git_tracing_params(),
|
|
args,
|
|
))
|
|
try:
|
|
return self.runner(args, preexec_fn=_bypass_limits, **opts)
|
|
except subprocess.CalledProcessError as e:
|
|
stream = e.stderr or e.stdout
|
|
if stream:
|
|
_logger.error("git call error: %s", stream)
|
|
raise
|
|
|
|
def stdout(self, flag: bool = True) -> Self:
|
|
if flag is True:
|
|
return self.with_config(stdout=subprocess.PIPE)
|
|
elif flag is False:
|
|
return self.with_config(stdout=None)
|
|
return self.with_config(stdout=flag)
|
|
|
|
def check(self, flag: bool) -> Self:
|
|
return self.with_config(check=flag)
|
|
|
|
def with_config(self, **kw) -> Self:
|
|
opts = {**self._config, **kw}
|
|
r = Repo(self._directory, **opts)
|
|
r._params = self._params
|
|
return r
|
|
|
|
def with_params(self, *args) -> Self:
|
|
r = self.with_config()
|
|
r._params = args
|
|
return r
|
|
|
|
def clone(self, to: str, branch: Optional[str] = None) -> Self:
|
|
self._run(
|
|
'clone',
|
|
*([] if branch is None else ['-b', branch]),
|
|
self._directory, to,
|
|
)
|
|
return Repo(to)
|
|
|
|
def get_tree(self, rev: str) -> str:
|
|
return self.stdout().with_config(check=True, encoding="utf-8")\
|
|
.rev_parse(f'{rev}^{{tree}}')\
|
|
.stdout.strip()
|
|
|
|
def rebase(self, dest: str, commits: Sequence[PrCommit]) -> Tuple[str, Dict[str, str]]:
|
|
"""Implements rebase by hand atop plumbing so:
|
|
|
|
- we can work without a working copy
|
|
- we can track individual commits (and store the mapping)
|
|
|
|
It looks like `--merge-base` is not sufficient for `merge-tree` to
|
|
correctly keep track of history, so it loses contents. Therefore
|
|
implement in two passes as in the github version.
|
|
"""
|
|
repo = self.stdout().with_config(text=True, check=False)
|
|
|
|
logger = _logger.getChild('rebase')
|
|
if not commits:
|
|
raise MergeError("PR has no commits")
|
|
|
|
prev_tree = repo.get_tree(dest)
|
|
prev_original_tree = repo.get_tree(commits[0]['parents'][0]["sha"])
|
|
|
|
new_trees = []
|
|
parent = dest
|
|
for original in commits:
|
|
if len(original['parents']) != 1:
|
|
raise MergeError(
|
|
f"commits with multiple parents ({original['sha']}) can not be rebased, "
|
|
"either fix the branch to remove merges or merge without "
|
|
"rebasing")
|
|
|
|
new_trees.append(check(repo.merge_tree(parent, original['sha'])).stdout.strip())
|
|
# allow merging empty commits, but not empty*ing* commits while merging
|
|
if prev_original_tree != original['commit']['tree']['sha']:
|
|
if new_trees[-1] == prev_tree:
|
|
raise MergeError(
|
|
f"commit {original['sha']} results in an empty tree when "
|
|
f"merged, it is likely a duplicate of a merged commit, "
|
|
f"rebase and remove."
|
|
)
|
|
|
|
parent = check(repo.commit_tree(
|
|
tree=new_trees[-1],
|
|
parents=[parent, original['sha']],
|
|
message=f'temp rebase {original["sha"]}',
|
|
)).stdout.strip()
|
|
prev_tree = new_trees[-1]
|
|
prev_original_tree = original['commit']['tree']['sha']
|
|
|
|
mapping = {}
|
|
for original, tree in zip(commits, new_trees):
|
|
authorship = check(repo.show('--no-patch', '--pretty=%an%n%ae%n%ai%n%cn%n%ce', original['sha']))
|
|
author_name, author_email, author_date, committer_name, committer_email =\
|
|
authorship.stdout.splitlines()
|
|
|
|
c = check(repo.commit_tree(
|
|
tree=tree,
|
|
parents=[dest],
|
|
message=original['commit']['message'],
|
|
author=(author_name, author_email, author_date),
|
|
committer=(committer_name, committer_email),
|
|
)).stdout.strip()
|
|
|
|
logger.debug('copied %s to %s (parent: %s)', original['sha'], c, dest)
|
|
dest = mapping[original['sha']] = c
|
|
|
|
return dest, mapping
|
|
|
|
def merge(self, c1: str, c2: str, msg: str, *, author: Tuple[str, str]) -> str:
|
|
repo = self.stdout().with_config(text=True, check=False)
|
|
|
|
t = repo.merge_tree(c1, c2)
|
|
if t.returncode:
|
|
raise MergeError(t.stderr)
|
|
|
|
c = self.commit_tree(
|
|
tree=t.stdout.strip(),
|
|
message=msg,
|
|
parents=[c1, c2],
|
|
author=author,
|
|
)
|
|
if c.returncode:
|
|
raise MergeError(c.stderr)
|
|
return c.stdout.strip()
|
|
|
|
def commit_tree(
|
|
self, *, tree: str, message: str,
|
|
parents: Sequence[str] = (),
|
|
author: Optional[Authorship] = None,
|
|
committer: Optional[Authorship] = None,
|
|
) -> subprocess.CompletedProcess:
|
|
authorship = {}
|
|
if author:
|
|
authorship['GIT_AUTHOR_NAME'] = author[0]
|
|
authorship['GIT_AUTHOR_EMAIL'] = author[1]
|
|
if len(author) > 2:
|
|
authorship['GIT_AUTHOR_DATE'] = author[2]
|
|
if committer:
|
|
authorship['GIT_COMMITTER_NAME'] = committer[0]
|
|
authorship['GIT_COMMITTER_EMAIL'] = committer[1]
|
|
if len(committer) > 2:
|
|
authorship['GIT_COMMITTER_DATE'] = committer[2]
|
|
|
|
return self.with_config(
|
|
input=message,
|
|
stdout=subprocess.PIPE,
|
|
text=True,
|
|
env={
|
|
**os.environ,
|
|
**authorship,
|
|
# we don't want git to use the timezone of the machine it's
|
|
# running on: previously it used the timezone configured in
|
|
# github (?), which I think / assume defaults to a generic UTC
|
|
'TZ': 'UTC',
|
|
}
|
|
)._run(
|
|
'commit-tree',
|
|
tree,
|
|
'-F', '-',
|
|
*itertools.chain.from_iterable(('-p', p) for p in parents),
|
|
)
|
|
|
|
def update_tree(self, tree: str, files: Mapping[str, Callable[[Self, str], str]]) -> str:
|
|
# FIXME: either ignore or process binary files somehow (how does git show conflicts in binary files?)
|
|
repo = self.stdout().with_config(stderr=None, text=True, check=False, encoding="utf-8")
|
|
for f, c in files.items():
|
|
new_contents = c(repo, f)
|
|
oid = repo \
|
|
.with_config(input=new_contents) \
|
|
.hash_object("-w", "--stdin", "--path", f) \
|
|
.stdout.strip()
|
|
|
|
# we need to rewrite every tree from the parent of `f`
|
|
while f:
|
|
f, _, local = f.rpartition("/")
|
|
# tree to update, `{tree}:` works as an alias for tree
|
|
lstree = repo.ls_tree(f"{tree}:{f}").stdout.splitlines(keepends=False)
|
|
new_tree = []
|
|
seen = False
|
|
for mode, typ, sha, name in map(methodcaller("split", None, 3), lstree):
|
|
if name == local:
|
|
sha = oid
|
|
seen = True
|
|
# tab before name is critical to the format
|
|
new_tree.append(f"{mode} {typ} {sha}\t{name}\n")
|
|
if not seen:
|
|
new_tree.append(f"100644 blob {oid}\t{local}\n")
|
|
oid = repo.with_config(input="".join(new_tree), check=True).mktree().stdout.strip()
|
|
tree = oid
|
|
return tree
|
|
|
|
def modify_delete(self, tree: str, files: Iterable[str]) -> str:
|
|
"""Updates ``files`` in ``tree`` to add conflict markers to show them
|
|
as being modify/delete-ed, rather than have only the original content.
|
|
|
|
This is because having just content in a valid file is easy to miss,
|
|
causing file resurrections as they get committed rather than re-removed.
|
|
|
|
TODO: maybe extract the diff information compared to before they were removed? idk
|
|
"""
|
|
def rewriter(r: Self, f: str) -> str:
|
|
contents = r.cat_file("-p", f"{tree}:{f}").stdout
|
|
return f"""\
|
|
<<<\x3c<<< HEAD
|
|
||||||| MERGE BASE
|
|
=======
|
|
{contents}
|
|
>>>\x3e>>> FORWARD PORTED
|
|
"""
|
|
return self.update_tree(tree, dict.fromkeys(files, rewriter))
|
|
|
|
|
|
def check(p: subprocess.CompletedProcess) -> subprocess.CompletedProcess:
|
|
if not p.returncode:
|
|
return p
|
|
|
|
_logger.info("rebase failed at %s\nstdout:\n%s\nstderr:\n%s", p.args, p.stdout, p.stderr)
|
|
raise MergeError(p.stderr or 'merge conflict')
|
|
|
|
@dataclasses.dataclass
|
|
class GitCommand:
|
|
repo: Repo
|
|
name: str
|
|
|
|
if tracer:
|
|
def __call__(self, *args, **kwargs) -> subprocess.CompletedProcess:
|
|
with tracer.start_as_current_span(f"git.{self.name}", attributes={
|
|
"http.target": None,
|
|
"http.user_agent": "git",
|
|
}):
|
|
return self.repo._run(self.name, *args, *self._to_options(kwargs))
|
|
else:
|
|
def __call__(self, *args, **kwargs) -> subprocess.CompletedProcess:
|
|
return self.repo._run(self.name, *args, *self._to_options(kwargs))
|
|
|
|
def _to_options(self, d):
|
|
for k, v in d.items():
|
|
if len(k) == 1:
|
|
yield '-' + k
|
|
else:
|
|
yield '--' + k.replace('_', '-')
|
|
if v not in (None, True):
|
|
assert v is not False
|
|
yield str(v)
|