runbot/runbot_merge/models/utils.py
Xavier Morel d6bb18e358 [ADD] runbot_merge: rendering of PR descriptions
Previously PR descriptions were displayed as raw text in the PR
dashboard. While not wrong per se, this was pretty ugly and not always
convenient as e.g. links had to be copied by hand.

Push descriptions through pymarkdown for rendering them, with a few
customisations:

- Enabled footnotes & tables & fenced code blocks because GFM has
  that, this doesn't quite put pymarkdown's base behaviour on par with
  gfm (and py-gfm ultimately gave up on that effort moving to just
  wrap github's own markdown renderer instead).
- Don't allow raw html because too much of a hassle to do it
  correctly, and very few people ever do it (mostly me I think).
- Added a bespoke handler / renderer for github-style references.

  Note: uses positional captures because it started that way and named
  captures are not removed from that sequence so mixing and matching
  is not very useful, plus python does not support identically named
  groups (even exclusive) so all 4 repo captures and all 3 issue
  number captures would need different names...
- And added a second bespoke handler for our own opw/issue references
  leading to odoo.com, that's something we can't do via github[^1] so
  it's a genuine value-add.

Fixes #889

[^1]: github can do it (though possibly not with the arbitrary
    unspecified nonsense I got when I tried to list some of the
    reference styles, some folks need therapy), but it's not available
    on our plan
2024-07-15 10:28:28 +02:00

202 lines
7.4 KiB
Python

import logging
from contextvars import ContextVar
from typing import Tuple
from xml.etree.ElementTree import Element, tostring
import markdown.inlinepatterns
import markdown.treeprocessors
from markupsafe import escape, Markup
def enum(model: str, field: str) -> Tuple[str, str]:
n = f'{model.replace(".", "_")}_{field}_type'
return n, n
def readonly(_):
raise TypeError("Field is readonly")
DFM_CONTEXT_REPO = ContextVar("dfm_context", default="")
def dfm(repository: str, text: str) -> Markup:
""" Converts the input text from markup to HTML using the Odoo PR
Description Rules, which are basically:
- GFM
- minus raw HTML (?)
- + github's autolinking (https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/autolinked-references-and-urls)
- + bespoke autolinking of OPW and Task links to odoo.com
"""
t = DFM_CONTEXT_REPO.set(repository)
try:
return Markup(dfm_renderer.convert(escape(text)))
finally:
DFM_CONTEXT_REPO.reset(t)
class DfmExtension(markdown.extensions.Extension):
def extendMarkdown(self, md):
md.registerExtensions(['fenced_code', 'footnotes', 'nl2br', 'sane_lists', 'tables'], configs={})
md.inlinePatterns.register(GithubLinking(md), 'githublinking', 123)
md.inlinePatterns.register(OdooLinking(md), 'odoolinking', 124)
# ideally the unlinker should run before the prettifier so the
# prettification is done correctly, but it seems unlikely the prettifier
# handles the variable nature of links correctly, and we likely want to
# run after the unescaper
md.treeprocessors.register(Unlinker(), "unlinker", -10)
class GithubLinking(markdown.inlinepatterns.InlineProcessor):
"""Aside from being *very* varied github links are *contextual*. That is,
their resolution depends on the repository they're being called from
(technically they also need all the information from the github backend to
know the people & objects exist but we don't have that option).
Context is not available to us, but we can fake it through the application
of contextvars: ``DFM_CONTEXT_REPO`` should contain the full name of the
repository this is being resolved from.
If ``DFM_CONTEXT_REPO`` is empty and needed, this processor emits a warning.
"""
def __init__(self, md=None):
super().__init__(r"""(?xi)
(?:
\bhttps://github.com/([\w\.-]+/[\w\.-]+)/(?:issues|pull)/(\d+)(\#[\w-]+)?
| \bhttps://github.com/([\w\.-]+/[\w\.-]+)/commit/([a-f0-9]+)
| \b([\w\.-]+/[\w\.-]+)\#(\d+)
| (\bGH-|(?:^|(?<=\s))\#)(\d+)
| \b(?:
# user@sha or user/repo@sha
([\w\.-]+(?:/[\w\.-]+)?)
@
([0-9a-f]{7,40})
)
| \b(
# a sha is 7~40 hex digits but that means any million+ number matches
# which is probably wrong. So ensure there's at least one letter in the
# set by using a positive lookahead which looks for a sequence of at
# least 0 numbers followed by a-f
(?=[0-9]{0,39}?[a-f])
[0-9a-f]{7,40}
)
)
\b
""", md)
def handleMatch(self, m, data):
ctx = DFM_CONTEXT_REPO.get()
if not ctx:
logging.getLogger(__name__)\
.getChild("github_links")\
.warning("missing context for rewriting github links, skipping")
return m[0], *m.span()
repo = issue = commit = None
if m[2]: # full issue / PR
repo = m[1]
issue = m[2]
elif m[5]: # long hash
repo = m[4]
commit = m[5]
elif m[7]: # short issue with repo
repo = m[6]
issue = m[7]
elif m[9]: # short issue without repo
repo = None if m[8] == '#' else "GH"
issue = m[9]
elif m[11]: # medium hash
repo = m[10]
commit = m[11]
else: # hash only
commit = m[12]
el = Element("a")
if issue is not None:
if repo == "GH":
el.text = f"GH-{issue}"
repo = ctx
elif repo in (None, ctx):
repo = ctx
el.text = f"#{issue}"
else:
el.text = f"{repo}#{issue}"
if (fragment := m[3]) and fragment.startswith('#issuecomment-'):
el.text += ' (comment)'
else:
fragment = ''
el.set('href', f"https://github.com/{repo}/issues/{issue}{fragment}")
else:
if repo in (None, ctx):
label_repo = ""
repo = ctx
elif '/' not in repo: # owner-only
label_repo = repo
# NOTE: I assume in reality we're supposed to find the actual fork if unambiguous...
repo = repo + '/' + ctx.split('/')[-1]
elif repo.split('/')[-1] == ctx.split('/')[-1]:
# NOTE: here we assume if it's the same repo in a different owner it's a fork
label_repo = repo.split('/')[0]
else:
label_repo = repo
el.text = f"{label_repo}@{commit}" if label_repo else commit
el.set("href", f"https://github.com/{repo}/commit/{commit}")
return el, *m.span()
class OdooLinking(markdown.inlinepatterns.InlineProcessor):
def __init__(self, md=None):
# there are other weirder variations but fuck em, this matches
# "opw", "task", "task-id" or "taskid" followed by an optional - or :
# followed by digits
super().__init__(r"(?i)\b(task(?:-?id)?|opw)\s*[-:]?\s*(\d+)\b", md)
def handleMatch(self, m, data):
el = Element("a", href='https://www.odoo.com/web#model=project.task&id=' + m[2])
if m[1].lower() == 'opw':
el.text = f"opw-{m[2]}"
else:
el.text = f"task-{m[2]}"
return el, *m.span()
class Unlinker(markdown.treeprocessors.Treeprocessor):
def run(self, root):
# find all elements which contain a link, as ElementTree does not have
# parent links we can't really replace links in place
for parent in root.iterfind('.//*[a]'):
children = parent[:]
# can't use clear because that clears the attributes and tail/text
del parent[:]
for el in children:
if el.tag != 'a' or el.get('href', '').startswith(('https:', 'http:')):
parent.append(el)
continue
# this is a weird link, remove it
if el.text: # first attach its text to the previous element
if len(parent): # prev is not parent
parent[-1].tail = (parent[-1].tail or '') + el.text
else:
parent.text = (parent.text or '') + el.text
if len(el): # then unpack all its children
parent.extend(el[:])
if el.tail: # then attach tail to previous element
if len(parent): # prev is not parent
parent[-1].tail = (parent[-1].tail or '') + el.tail
else:
parent.text = (parent.text or '') + el.tail
return None
# alternatively, use cmarkgfm? The maintainer of py-gfm (impl'd over
# python-markdown) ultimately gave up, if apparently mostly due to pymarkdown's
# tendency to break its API all the time
dfm_renderer = markdown.Markdown(
extensions=[DfmExtension()],
output_format='html5',
)