[FIX] runbot_merge: normalisation of patches before parsing

- Apparently if a user is on windows the ACE editor can swap out their
  line end from unix to windows. The patch parsers were predicated
  upon all patches being in unix mode (because git, and diff).

  Fixup both parsers to convert windows-style line end to unix before
  trying to parse the patch data. Also add a few fallbacks to limit
  the odds of an unhelpful `StopIteration` (though that might hide
  errors more than reveal them...)
- Make sure we support `format-patch --no-signature`, just requires
  using the correct partition direction: I assume I used `rpartition`
  as a form of micro-optimisation *but*

  - If the separator is not found the "patch body" ends up in the
    third parameter rather than the first, which makes the fallback
    difficult.
  - There doesn't seem to be anything preventing *multiple* signature
    separators in a message, and logically the first one should hold
    and the rest is all part of the signature.

  As a result, for both reasons we need to look *forwards* for the
  signature separator, not backwards. Hence `str.partition`.

Fixes #992
This commit is contained in:
Xavier Morel 2024-11-28 13:46:44 +01:00
parent 749382a1e7
commit 7f7589c50e
2 changed files with 18 additions and 10 deletions

View File

@ -53,20 +53,20 @@ def expect(line: str, starts_with: str, message: str) -> str:
def parse_show(p: Patch) -> ParseResult: def parse_show(p: Patch) -> ParseResult:
# headers are Author, Date or Author, AuthorDate, Commit, CommitDate # headers are Author, Date or Author, AuthorDate, Commit, CommitDate
# commit message is indented 4 spaces # commit message is indented 4 spaces
lines = iter(p.patch.splitlines(keepends=True)) lines = (l + '\n' for l in p.patch.splitlines(keepends=False))
if not next(lines).startswith("commit "): if not next(lines, '').startswith("commit "):
raise ValidationError("Invalid patch") raise ValidationError("Invalid patch")
name, email = parseaddr( name, email = parseaddr(
expect(next(lines), "Author:", "Missing author") expect(next(lines, ''), "Author:", "Missing author")
.split(maxsplit=1)[1]) .split(maxsplit=1)[1])
date: str = next(lines) date: str = next(lines, '')
header, date = date.split(maxsplit=1) header, date = date.split(maxsplit=1)
author = (name, email, date) author = (name, email, date)
if header.startswith("Date:"): if header.startswith("Date:"):
committer = author committer = author
elif header.startswith("AuthorDate:"): elif header.startswith("AuthorDate:"):
commit = expect(next(lines), "Commit:", "Missing committer") commit = expect(next(lines, ''), "Commit:", "Missing committer")
commit_date = expect(next(lines), "CommitDate:", "Missing commit date") commit_date = expect(next(lines, ''), "CommitDate:", "Missing commit date")
name, email = parseaddr(commit.split(maxsplit=1)[1]) name, email = parseaddr(commit.split(maxsplit=1)[1])
committer = (name, email, commit_date.split(maxsplit=1)[1]) committer = (name, email, commit_date.split(maxsplit=1)[1])
else: else:
@ -75,11 +75,11 @@ def parse_show(p: Patch) -> ParseResult:
f"found {header}.\nOnly 'medium' and 'fuller' formats are supported") f"found {header}.\nOnly 'medium' and 'fuller' formats are supported")
# skip possible extra headers before the message # skip possible extra headers before the message
while next(lines) != ' \n': while next(lines, ' \n') != ' \n':
continue continue
body = [] body = []
while (l := next(lines)) != ' \n': while (l := next(lines, ' \n')) != ' \n':
body.append(l.removeprefix(' ')) body.append(l.removeprefix(' '))
# remainder should be the patch # remainder should be the patch
@ -101,11 +101,11 @@ def parse_format_patch(p: Patch) -> ParseResult:
msg = re.sub(r'^\[PATCH( \d+/\d+)?\] ', '', m['subject']) msg = re.sub(r'^\[PATCH( \d+/\d+)?\] ', '', m['subject'])
body, _, rest = m.get_payload().partition('---\n') body, _, rest = m.get_payload().partition('---\n')
if body: if body:
msg += '\n\n' + body msg += '\n\n' + body.replace('\r\n', '\n')
# split off the signature, per RFC 3676 § 4.3. # split off the signature, per RFC 3676 § 4.3.
# leave the diffstat in as it *should* not confuse tooling? # leave the diffstat in as it *should* not confuse tooling?
patch, _, _ = rest.rpartition("-- \n") patch, _, _ = rest.partition("-- \n")
# git (diff, show, format-patch) adds command and index headers to every # git (diff, show, format-patch) adds command and index headers to every
# file header, which patch(1) chokes on, strip them... but maybe this should # file header, which patch(1) chokes on, strip them... but maybe this should
# extract the udiff sections instead? # extract the udiff sections instead?

View File

@ -196,6 +196,14 @@ def test_apply_udiff(env, project, repo, users):
@pytest.mark.parametrize('patch', [ @pytest.mark.parametrize('patch', [
pytest.param(FORMAT_PATCH_XMO, id='xmo'), pytest.param(FORMAT_PATCH_XMO, id='xmo'),
pytest.param(FORMAT_PATCH_MAT, id='mat'), pytest.param(FORMAT_PATCH_MAT, id='mat'),
pytest.param(
FORMAT_PATCH_XMO.replace('\n', '\r\n'),
id='windows',
),
pytest.param(
FORMAT_PATCH_XMO.rsplit('-- \n')[0],
id='no-signature',
)
]) ])
def test_apply_format_patch(env, project, repo, users, patch): def test_apply_format_patch(env, project, repo, users, patch):
p = env['runbot_merge.patch'].create({ p = env['runbot_merge.patch'].create({