mirror of
https://github.com/odoo/runbot.git
synced 2025-03-27 13:25:47 +07:00
[FIX] forwardport: storage of old garbage, repo cache sizes
Since the forwardport bot works off of PRs, when it was created leveraging the magic refs of github (refs/pull/*/head) seemed sensible: when updating the cache repo, the magic refs would be updated alongside and the forward-porting would have all the commits available. Turns out there are a few issues with this: - the magic refs have become a bit unreliable, so we need a fallback (b1c16fce8768080d30430f4e6d3788b60ce13de7) - the forwardport bot only needs the commits on a transient basis, but the magic refs live forever and diverge from all other content (since we rarely `merge` PRs), for a large repo with lots of PRs this leads to a significant inflation in size of repo (both on-disk and objects count) e.g. odoo/odoo has about 25% more objects with the pull refs included (3486550 to 4395319) and takes nearly 50% more space on disk (1.21G to 1.77) As a result, we can just stop configuring the pull refs entirely, and instead fetch the heads (~refs) we need as we need them. This can be a touch more expensive at times as it requires two `fetch` calls, and we'll have a few redundant calls as every forward port of a chain will require a fetch of the root's head, *however* it will avoid retrieving PR data for e.g. prs to master, as they don't get forward-ported, they also won't get parasite updates from PRs being ignored or eventually closed. Eventually this could be optimised further by - performing an update of the cache repo at the start of the cron iff there are batches to port - creating a temp clone for the batch - fetching the heads of all PRs to forward port in the temp clone in a single `fetch` - performing all the ports by cloning the temp clone (and not `fetch`-ing into those) - then cleaning up the temp clone after having cleaned up individual forward port clones Small followup for #489
This commit is contained in:
parent
c35b721f0e
commit
e20277c6ad
@ -793,6 +793,14 @@ This PR targets %s and is part of the forward-port chain. Further PRs will be cr
|
|||||||
b.prs[0]._schedule_fp_followup()
|
b.prs[0]._schedule_fp_followup()
|
||||||
return b
|
return b
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _source_url(self):
|
||||||
|
return 'https://{}:{}@github.com/{}'.format(
|
||||||
|
self.repository.project_id.fp_github_name or '',
|
||||||
|
self.repository.project_id.fp_github_token,
|
||||||
|
self.repository.name,
|
||||||
|
)
|
||||||
|
|
||||||
def _create_fp_branch(self, target_branch, fp_branch_name, cleanup):
|
def _create_fp_branch(self, target_branch, fp_branch_name, cleanup):
|
||||||
""" Creates a forward-port for the current PR to ``target_branch`` under
|
""" Creates a forward-port for the current PR to ``target_branch`` under
|
||||||
``fp_branch_name``.
|
``fp_branch_name``.
|
||||||
@ -804,7 +812,7 @@ This PR targets %s and is part of the forward-port chain. Further PRs will be cr
|
|||||||
present the conflict information is composed of the hash of the
|
present the conflict information is composed of the hash of the
|
||||||
conflicting commit, the stderr and stdout of the failed
|
conflicting commit, the stderr and stdout of the failed
|
||||||
cherrypick and a list of all PR commit hashes
|
cherrypick and a list of all PR commit hashes
|
||||||
:rtype: (None | (str, str, str, list[str]), Repo)
|
:rtype: (None | (str, str, str, list[commit]), Repo)
|
||||||
"""
|
"""
|
||||||
logger = _logger.getChild(str(self.id))
|
logger = _logger.getChild(str(self.id))
|
||||||
root = self._get_root()
|
root = self._get_root()
|
||||||
@ -813,21 +821,8 @@ This PR targets %s and is part of the forward-port chain. Further PRs will be cr
|
|||||||
self.display_name, root.display_name, target_branch.name
|
self.display_name, root.display_name, target_branch.name
|
||||||
)
|
)
|
||||||
source = self._get_local_directory()
|
source = self._get_local_directory()
|
||||||
|
r = source.with_config(stdout=subprocess.PIPE, stderr=subprocess.STDOUT).fetch()
|
||||||
check_commit = partial(source.check(False).cat_file, e=root.head)
|
logger.info("Updated cache repo %s:\n%s", source._directory, r.stdout.decode())
|
||||||
r = source.with_config(stdout=subprocess.PIPE, stderr=subprocess.STDOUT)\
|
|
||||||
.fetch('origin')
|
|
||||||
logger.info("Updated %s:\n%s", source._directory, r.stdout.decode())
|
|
||||||
if check_commit().returncode:
|
|
||||||
# fallback: try to fetch the commit directly if the magic ref doesn't work?
|
|
||||||
r = source.with_config(stdout=subprocess.PIPE, stderr=subprocess.STDOUT) \
|
|
||||||
.fetch('origin', root.head)
|
|
||||||
logger.info("Updated %s:\n%s", source._directory, r.stdout.decode())
|
|
||||||
if check_commit().returncode:
|
|
||||||
raise ForwardPortError(
|
|
||||||
f"During forward port of {self.display_name}, unable to find "
|
|
||||||
f"expected head of {root.display_name} ({root.head})"
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info("Create working copy...")
|
logger.info("Create working copy...")
|
||||||
working_copy = source.clone(
|
working_copy = source.clone(
|
||||||
@ -841,6 +836,21 @@ This PR targets %s and is part of the forward-port chain. Further PRs will be cr
|
|||||||
)),
|
)),
|
||||||
branch=target_branch.name
|
branch=target_branch.name
|
||||||
)
|
)
|
||||||
|
|
||||||
|
r = working_copy.with_config(stdout=subprocess.PIPE, stderr=subprocess.STDOUT) \
|
||||||
|
.fetch(self._source_url, root.head)
|
||||||
|
logger.info(
|
||||||
|
"Fetched head of %s into %s:\n%s",
|
||||||
|
root.display_name,
|
||||||
|
working_copy._directory,
|
||||||
|
r.stdout.decode()
|
||||||
|
)
|
||||||
|
if working_copy.check(False).cat_file(e=root.head).returncode:
|
||||||
|
raise ForwardPortError(
|
||||||
|
f"During forward port of {self.display_name}, unable to find "
|
||||||
|
f"expected head of {root.display_name} ({root.head})"
|
||||||
|
)
|
||||||
|
|
||||||
project_id = self.repository.project_id
|
project_id = self.repository.project_id
|
||||||
# add target remote
|
# add target remote
|
||||||
working_copy.remote(
|
working_copy.remote(
|
||||||
@ -857,17 +867,17 @@ This PR targets %s and is part of the forward-port chain. Further PRs will be cr
|
|||||||
root._cherry_pick(working_copy)
|
root._cherry_pick(working_copy)
|
||||||
return None, working_copy
|
return None, working_copy
|
||||||
except CherrypickError as e:
|
except CherrypickError as e:
|
||||||
|
h, out, err, commits = e.args
|
||||||
|
|
||||||
# using git diff | git apply -3 to get the entire conflict set
|
# using git diff | git apply -3 to get the entire conflict set
|
||||||
# turns out to not work correctly: in case files have been moved
|
# turns out to not work correctly: in case files have been moved
|
||||||
# / removed (which turns out to be a common source of conflicts
|
# / removed (which turns out to be a common source of conflicts
|
||||||
# when forward-porting) it'll just do nothing to the working copy
|
# when forward-porting) it'll just do nothing to the working copy
|
||||||
# so the "conflict commit" will be empty
|
# so the "conflict commit" will be empty
|
||||||
# switch to a squashed-pr branch
|
# switch to a squashed-pr branch
|
||||||
root_branch = 'origin/pull/%d' % root.number
|
working_copy.check(True).checkout('-bsquashed', root.head)
|
||||||
working_copy.checkout('-bsquashed', root_branch)
|
|
||||||
root_commits = root.commits()
|
|
||||||
# commits returns oldest first, so youngest (head) last
|
# commits returns oldest first, so youngest (head) last
|
||||||
head_commit = root_commits[-1]['commit']
|
head_commit = commits[-1]['commit']
|
||||||
|
|
||||||
to_tuple = operator.itemgetter('name', 'email')
|
to_tuple = operator.itemgetter('name', 'email')
|
||||||
to_dict = lambda term, vals: {
|
to_dict = lambda term, vals: {
|
||||||
@ -876,7 +886,7 @@ This PR targets %s and is part of the forward-port chain. Further PRs will be cr
|
|||||||
'GIT_%s_DATE' % term: vals[2],
|
'GIT_%s_DATE' % term: vals[2],
|
||||||
}
|
}
|
||||||
authors, committers = set(), set()
|
authors, committers = set(), set()
|
||||||
for c in (c['commit'] for c in root_commits):
|
for c in (c['commit'] for c in commits):
|
||||||
authors.add(to_tuple(c['author']))
|
authors.add(to_tuple(c['author']))
|
||||||
committers.add(to_tuple(c['committer']))
|
committers.add(to_tuple(c['committer']))
|
||||||
fp_authorship = (project_id.fp_github_name, '', '')
|
fp_authorship = (project_id.fp_github_name, '', '')
|
||||||
@ -890,7 +900,7 @@ This PR targets %s and is part of the forward-port chain. Further PRs will be cr
|
|||||||
'GIT_COMMITTER_DATE': '',
|
'GIT_COMMITTER_DATE': '',
|
||||||
})
|
})
|
||||||
# squash to a single commit
|
# squash to a single commit
|
||||||
conf.reset('--soft', root_commits[0]['parents'][0]['sha'])
|
conf.reset('--soft', commits[0]['parents'][0]['sha'])
|
||||||
conf.commit(a=True, message="temp")
|
conf.commit(a=True, message="temp")
|
||||||
squashed = conf.stdout().rev_parse('HEAD').stdout.strip().decode()
|
squashed = conf.stdout().rev_parse('HEAD').stdout.strip().decode()
|
||||||
|
|
||||||
@ -901,7 +911,6 @@ This PR targets %s and is part of the forward-port chain. Further PRs will be cr
|
|||||||
.with_config(check=False)\
|
.with_config(check=False)\
|
||||||
.cherry_pick(squashed, no_commit=True)
|
.cherry_pick(squashed, no_commit=True)
|
||||||
status = conf.stdout().status(short=True, untracked_files='no').stdout.decode()
|
status = conf.stdout().status(short=True, untracked_files='no').stdout.decode()
|
||||||
h, out, err, hh = e.args
|
|
||||||
if err.strip():
|
if err.strip():
|
||||||
err = err.rstrip() + '\n----------\nstatus:\n' + status
|
err = err.rstrip() + '\n----------\nstatus:\n' + status
|
||||||
else:
|
else:
|
||||||
@ -909,9 +918,9 @@ This PR targets %s and is part of the forward-port chain. Further PRs will be cr
|
|||||||
# if there was a single commit, reuse its message when committing
|
# if there was a single commit, reuse its message when committing
|
||||||
# the conflict
|
# the conflict
|
||||||
# TODO: still add conflict information to this?
|
# TODO: still add conflict information to this?
|
||||||
if len(root_commits) == 1:
|
if len(commits) == 1:
|
||||||
msg = root._make_fp_message(root_commits[0])
|
msg = root._make_fp_message(commits[0])
|
||||||
conf.with_config(input=str(msg).encode())\
|
conf.with_config(input=str(msg).encode()) \
|
||||||
.commit(all=True, allow_empty=True, file='-')
|
.commit(all=True, allow_empty=True, file='-')
|
||||||
else:
|
else:
|
||||||
conf.commit(
|
conf.commit(
|
||||||
@ -923,7 +932,7 @@ stdout:
|
|||||||
stderr:
|
stderr:
|
||||||
%s
|
%s
|
||||||
""" % (h, out, err))
|
""" % (h, out, err))
|
||||||
return (h, out, err, hh), working_copy
|
return (h, out, err, [c['sha'] for c in commits]), working_copy
|
||||||
|
|
||||||
def _cherry_pick(self, working_copy):
|
def _cherry_pick(self, working_copy):
|
||||||
""" Cherrypicks ``self`` into the working copy
|
""" Cherrypicks ``self`` into the working copy
|
||||||
@ -982,7 +991,7 @@ stderr:
|
|||||||
commit_sha,
|
commit_sha,
|
||||||
r.stdout.decode(),
|
r.stdout.decode(),
|
||||||
_clean_rename(r.stderr.decode()),
|
_clean_rename(r.stderr.decode()),
|
||||||
[commit['sha'] for commit in commits]
|
commits
|
||||||
)
|
)
|
||||||
|
|
||||||
msg = self._make_fp_message(commit)
|
msg = self._make_fp_message(commit)
|
||||||
@ -1033,22 +1042,15 @@ stderr:
|
|||||||
return git(repo_dir)
|
return git(repo_dir)
|
||||||
else:
|
else:
|
||||||
_logger.info("Cloning out %s to %s", self.repository.name, repo_dir)
|
_logger.info("Cloning out %s to %s", self.repository.name, repo_dir)
|
||||||
subprocess.run([
|
subprocess.run(['git', 'clone', '--bare', self._source_url, str(repo_dir)], check=True)
|
||||||
'git', 'clone', '--bare',
|
# bare repos don't have fetch specs by default, and fetching *into*
|
||||||
'https://{}:{}@github.com/{}'.format(
|
# them is a pain in the ass, configure fetch specs so `git fetch`
|
||||||
self.repository.project_id.fp_github_name or '',
|
# works properly
|
||||||
self.repository.project_id.fp_github_token,
|
|
||||||
self.repository.name,
|
|
||||||
),
|
|
||||||
str(repo_dir)
|
|
||||||
], check=True)
|
|
||||||
# add PR branches as local but namespaced (?)
|
|
||||||
repo = git(repo_dir)
|
repo = git(repo_dir)
|
||||||
# bare repos don't have a fetch spec by default (!) so adding one
|
|
||||||
# removes the default behaviour and stops fetching the base
|
|
||||||
# branches unless we add an explicit fetch spec for them
|
|
||||||
repo.config('--add', 'remote.origin.fetch', '+refs/heads/*:refs/heads/*')
|
repo.config('--add', 'remote.origin.fetch', '+refs/heads/*:refs/heads/*')
|
||||||
repo.config('--add', 'remote.origin.fetch', '+refs/pull/*/head:refs/heads/pull/*')
|
# negative refspecs require
|
||||||
|
# repo.config('--add', 'remote.origin.fetch', '^refs/heads/tmp.*')
|
||||||
|
# repo.config('--add', 'remote.origin.fetch', '^refs/heads/staging.*')
|
||||||
return repo
|
return repo
|
||||||
|
|
||||||
def _outstanding(self, cutoff):
|
def _outstanding(self, cutoff):
|
||||||
|
Loading…
Reference in New Issue
Block a user