runbot/runbot_merge/models/utils.py

203 lines
7.5 KiB
Python
Raw Permalink Normal View History

import logging
from contextvars import ContextVar
from typing import Tuple
from xml.etree.ElementTree import Element, tostring
import markdown.inlinepatterns
import markdown.treeprocessors
from markupsafe import escape, Markup
def enum(model: str, field: str) -> Tuple[str, str]:
n = f'{model.replace(".", "_")}_{field}_type'
return n, n
def readonly(_):
raise TypeError("Field is readonly")
DFM_CONTEXT_REPO = ContextVar("dfm_context", default="")
def dfm(repository: str, text: str) -> Markup:
""" Converts the input text from markup to HTML using the Odoo PR
Description Rules, which are basically:
- GFM
- minus raw HTML (?)
- + github's autolinking (https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/autolinked-references-and-urls)
- + bespoke autolinking of OPW and Task links to odoo.com
"""
t = DFM_CONTEXT_REPO.set(repository)
try:
dfm_renderer.reset()
return Markup(dfm_renderer.convert(escape(text)))
finally:
DFM_CONTEXT_REPO.reset(t)
class DfmExtension(markdown.extensions.Extension):
def extendMarkdown(self, md):
md.registerExtensions(['fenced_code', 'footnotes', 'nl2br', 'sane_lists', 'tables'], configs={})
md.inlinePatterns.register(GithubLinking(md), 'githublinking', 123)
md.inlinePatterns.register(OdooLinking(md), 'odoolinking', 124)
# ideally the unlinker should run before the prettifier so the
# prettification is done correctly, but it seems unlikely the prettifier
# handles the variable nature of links correctly, and we likely want to
# run after the unescaper
md.treeprocessors.register(Unlinker(), "unlinker", -10)
class GithubLinking(markdown.inlinepatterns.InlineProcessor):
"""Aside from being *very* varied github links are *contextual*. That is,
their resolution depends on the repository they're being called from
(technically they also need all the information from the github backend to
know the people & objects exist but we don't have that option).
Context is not available to us, but we can fake it through the application
of contextvars: ``DFM_CONTEXT_REPO`` should contain the full name of the
repository this is being resolved from.
If ``DFM_CONTEXT_REPO`` is empty and needed, this processor emits a warning.
"""
def __init__(self, md=None):
super().__init__(r"""(?xi)
(?:
\bhttps://github.com/([\w\.-]+/[\w\.-]+)/(?:issues|pull)/(\d+)(\#[\w-]+)?
| \bhttps://github.com/([\w\.-]+/[\w\.-]+)/commit/([a-f0-9]+)
| \b([\w\.-]+/[\w\.-]+)\#(\d+)
| (\bGH-|(?:^|(?<=\s))\#)(\d+)
| \b(?:
# user@sha or user/repo@sha
([\w\.-]+(?:/[\w\.-]+)?)
@
([0-9a-f]{7,40})
)
| \b(
# a sha is 7~40 hex digits but that means any million+ number matches
# which is probably wrong. So ensure there's at least one letter in the
# set by using a positive lookahead which looks for a sequence of at
# least 0 numbers followed by a-f
(?=[0-9]{0,39}?[a-f])
[0-9a-f]{7,40}
)
)
\b
""", md)
def handleMatch(self, m, data):
ctx = DFM_CONTEXT_REPO.get()
if not ctx:
logging.getLogger(__name__)\
.getChild("github_links")\
.warning("missing context for rewriting github links, skipping")
return m[0], *m.span()
repo = issue = commit = None
if m[2]: # full issue / PR
repo = m[1]
issue = m[2]
elif m[5]: # long hash
repo = m[4]
commit = m[5]
elif m[7]: # short issue with repo
repo = m[6]
issue = m[7]
elif m[9]: # short issue without repo
repo = None if m[8] == '#' else "GH"
issue = m[9]
elif m[11]: # medium hash
repo = m[10]
commit = m[11]
else: # hash only
commit = m[12]
el = Element("a")
if issue is not None:
if repo == "GH":
el.text = f"GH-{issue}"
repo = ctx
elif repo in (None, ctx):
repo = ctx
el.text = f"#{issue}"
else:
el.text = f"{repo}#{issue}"
if (fragment := m[3]) and fragment.startswith('#issuecomment-'):
el.text += ' (comment)'
else:
fragment = ''
el.set('href', f"https://github.com/{repo}/issues/{issue}{fragment}")
else:
if repo in (None, ctx):
label_repo = ""
repo = ctx
elif '/' not in repo: # owner-only
label_repo = repo
# NOTE: I assume in reality we're supposed to find the actual fork if unambiguous...
repo = repo + '/' + ctx.split('/')[-1]
elif repo.split('/')[-1] == ctx.split('/')[-1]:
# NOTE: here we assume if it's the same repo in a different owner it's a fork
label_repo = repo.split('/')[0]
else:
label_repo = repo
el.text = f"{label_repo}@{commit}" if label_repo else commit
el.set("href", f"https://github.com/{repo}/commit/{commit}")
return el, *m.span()
class OdooLinking(markdown.inlinepatterns.InlineProcessor):
def __init__(self, md=None):
# there are other weirder variations but fuck em, this matches
# "opw", "task", "task-id" or "taskid" followed by an optional - or :
# followed by digits
super().__init__(r"(?i)\b(task(?:-?id)?|opw)\s*[-:]?\s*(\d+)\b", md)
def handleMatch(self, m, data):
el = Element("a", href='https://www.odoo.com/web#model=project.task&id=' + m[2])
if m[1].lower() == 'opw':
el.text = f"opw-{m[2]}"
else:
el.text = f"task-{m[2]}"
return el, *m.span()
class Unlinker(markdown.treeprocessors.Treeprocessor):
def run(self, root):
# find all elements which contain a link, as ElementTree does not have
# parent links we can't really replace links in place
for parent in root.iterfind('.//*[a]'):
children = parent[:]
# can't use clear because that clears the attributes and tail/text
del parent[:]
for el in children:
if el.tag != 'a' or el.get('href', '').startswith(('https:', 'http:')):
parent.append(el)
continue
# this is a weird link, remove it
if el.text: # first attach its text to the previous element
if len(parent): # prev is not parent
parent[-1].tail = (parent[-1].tail or '') + el.text
else:
parent.text = (parent.text or '') + el.text
if len(el): # then unpack all its children
parent.extend(el[:])
if el.tail: # then attach tail to previous element
if len(parent): # prev is not parent
parent[-1].tail = (parent[-1].tail or '') + el.tail
else:
parent.text = (parent.text or '') + el.tail
return None
# alternatively, use cmarkgfm? The maintainer of py-gfm (impl'd over
# python-markdown) ultimately gave up, if apparently mostly due to pymarkdown's
# tendency to break its API all the time
dfm_renderer = markdown.Markdown(
extensions=[DfmExtension()],
output_format='html5',
)