runbot/runbot_merge/models/utils.py
2025-02-28 16:12:40 +01:00

203 lines
7.4 KiB
Python

import logging
from contextvars import ContextVar
from typing import Tuple
from xml.etree.ElementTree import Element
import markdown.inlinepatterns
import markdown.treeprocessors
from markupsafe import escape, Markup
def enum(model: str, field: str) -> Tuple[str, str]:
n = f'{model.replace(".", "_")}_{field}_type'
return n, n
def readonly(_):
raise TypeError("Field is readonly")
DFM_CONTEXT_REPO = ContextVar("dfm_context", default="")
def dfm(repository: str, text: str) -> Markup:
""" Converts the input text from markup to HTML using the Odoo PR
Description Rules, which are basically:
- GFM
- minus raw HTML (?)
- + github's autolinking (https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/autolinked-references-and-urls)
- + bespoke autolinking of OPW and Task links to odoo.com
"""
t = DFM_CONTEXT_REPO.set(repository)
try:
dfm_renderer.reset()
return Markup(dfm_renderer.convert(escape(text)))
finally:
DFM_CONTEXT_REPO.reset(t)
class DfmExtension(markdown.extensions.Extension):
def extendMarkdown(self, md):
md.registerExtensions(['fenced_code', 'footnotes', 'nl2br', 'sane_lists', 'tables'], configs={})
md.inlinePatterns.register(GithubLinking(md), 'githublinking', 123)
md.inlinePatterns.register(OdooLinking(md), 'odoolinking', 124)
# ideally the unlinker should run before the prettifier so the
# prettification is done correctly, but it seems unlikely the prettifier
# handles the variable nature of links correctly, and we likely want to
# run after the unescaper
md.treeprocessors.register(Unlinker(), "unlinker", -10)
class GithubLinking(markdown.inlinepatterns.InlineProcessor):
"""Aside from being *very* varied github links are *contextual*. That is,
their resolution depends on the repository they're being called from
(technically they also need all the information from the github backend to
know the people & objects exist but we don't have that option).
Context is not available to us, but we can fake it through the application
of contextvars: ``DFM_CONTEXT_REPO`` should contain the full name of the
repository this is being resolved from.
If ``DFM_CONTEXT_REPO`` is empty and needed, this processor emits a warning.
"""
def __init__(self, md=None):
super().__init__(r"""(?xi)
(?:
\bhttps://github.com/([\w\.-]+/[\w\.-]+)/(?:issues|pull)/(\d+)(\#[\w-]+)?
| \bhttps://github.com/([\w\.-]+/[\w\.-]+)/commit/([a-f0-9]+)
| \b([\w\.-]+/[\w\.-]+)\#(\d+)
| (\bGH-|(?:^|(?<=\s))\#)(\d+)
| \b(?:
# user@sha or user/repo@sha
([\w\.-]+(?:/[\w\.-]+)?)
@
([0-9a-f]{7,40})
)
| \b(
# a sha is 7~40 hex digits but that means any million+ number matches
# which is probably wrong. So ensure there's at least one letter in the
# set by using a positive lookahead which looks for a sequence of at
# least 0 numbers followed by a-f
(?=[0-9]{0,39}?[a-f])
[0-9a-f]{7,40}
)
)
\b
""", md)
def handleMatch(self, m, data):
ctx = DFM_CONTEXT_REPO.get()
if not ctx:
logging.getLogger(__name__)\
.getChild("github_links")\
.warning("missing context for rewriting github links, skipping")
return m[0], *m.span()
repo = issue = commit = None
if m[2]: # full issue / PR
repo = m[1]
issue = m[2]
elif m[5]: # long hash
repo = m[4]
commit = m[5]
elif m[7]: # short issue with repo
repo = m[6]
issue = m[7]
elif m[9]: # short issue without repo
repo = None if m[8] == '#' else "GH"
issue = m[9]
elif m[11]: # medium hash
repo = m[10]
commit = m[11]
else: # hash only
commit = m[12]
el = Element("a")
if issue is not None:
if repo == "GH":
el.text = f"GH-{issue}"
repo = ctx
elif repo in (None, ctx):
repo = ctx
el.text = f"#{issue}"
else:
el.text = f"{repo}#{issue}"
if (fragment := m[3]) and fragment.startswith('#issuecomment-'):
el.text += ' (comment)'
else:
fragment = ''
el.set('href', f"https://github.com/{repo}/issues/{issue}{fragment}")
else:
if repo in (None, ctx):
label_repo = ""
repo = ctx
elif '/' not in repo: # owner-only
label_repo = repo
# NOTE: I assume in reality we're supposed to find the actual fork if unambiguous...
repo = repo + '/' + ctx.split('/')[-1]
elif repo.split('/')[-1] == ctx.split('/')[-1]:
# NOTE: here we assume if it's the same repo in a different owner it's a fork
label_repo = repo.split('/')[0]
else:
label_repo = repo
el.text = f"{label_repo}@{commit}" if label_repo else commit
el.set("href", f"https://github.com/{repo}/commit/{commit}")
return el, *m.span()
class OdooLinking(markdown.inlinepatterns.InlineProcessor):
def __init__(self, md=None):
# there are other weirder variations but fuck em, this matches
# "opw", "task", "task-id" or "taskid" followed by an optional - or :
# followed by digits
super().__init__(r"(?i)\b(task(?:-?id)?|opw)\s*[-:]?\s*(\d+)\b", md)
def handleMatch(self, m, data):
el = Element("a", href='https://www.odoo.com/web#model=project.task&id=' + m[2])
if m[1].lower() == 'opw':
el.text = f"opw-{m[2]}"
else:
el.text = f"task-{m[2]}"
return el, *m.span()
class Unlinker(markdown.treeprocessors.Treeprocessor):
def run(self, root):
# find all elements which contain a link, as ElementTree does not have
# parent links we can't really replace links in place
for parent in root.iterfind('.//*[a]'):
children = parent[:]
# can't use clear because that clears the attributes and tail/text
del parent[:]
for el in children:
if el.tag != 'a' or el.get('href', '').startswith(('https:', 'http:')):
parent.append(el)
continue
# this is a weird link, remove it
if el.text: # first attach its text to the previous element
if len(parent): # prev is not parent
parent[-1].tail = (parent[-1].tail or '') + el.text
else:
parent.text = (parent.text or '') + el.text
if len(el): # then unpack all its children
parent.extend(el[:])
if el.tail: # then attach tail to previous element
if len(parent): # prev is not parent
parent[-1].tail = (parent[-1].tail or '') + el.tail
else:
parent.text = (parent.text or '') + el.tail
return None
# alternatively, use cmarkgfm? The maintainer of py-gfm (impl'd over
# python-markdown) ultimately gave up, if apparently mostly due to pymarkdown's
# tendency to break its API all the time
dfm_renderer = markdown.Markdown(
extensions=[DfmExtension()],
output_format='html5',
)