import logging from contextvars import ContextVar from typing import Tuple from xml.etree.ElementTree import Element, tostring import markdown.inlinepatterns import markdown.treeprocessors from markupsafe import escape, Markup def enum(model: str, field: str) -> Tuple[str, str]: n = f'{model.replace(".", "_")}_{field}_type' return n, n def readonly(_): raise TypeError("Field is readonly") DFM_CONTEXT_REPO = ContextVar("dfm_context", default="") def dfm(repository: str, text: str) -> Markup: """ Converts the input text from markup to HTML using the Odoo PR Description Rules, which are basically: - GFM - minus raw HTML (?) - + github's autolinking (https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/autolinked-references-and-urls) - + bespoke autolinking of OPW and Task links to odoo.com """ t = DFM_CONTEXT_REPO.set(repository) try: return Markup(dfm_renderer.convert(escape(text))) finally: DFM_CONTEXT_REPO.reset(t) class DfmExtension(markdown.extensions.Extension): def extendMarkdown(self, md): md.registerExtensions(['fenced_code', 'footnotes', 'nl2br', 'sane_lists', 'tables'], configs={}) md.inlinePatterns.register(GithubLinking(md), 'githublinking', 123) md.inlinePatterns.register(OdooLinking(md), 'odoolinking', 124) # ideally the unlinker should run before the prettifier so the # prettification is done correctly, but it seems unlikely the prettifier # handles the variable nature of links correctly, and we likely want to # run after the unescaper md.treeprocessors.register(Unlinker(), "unlinker", -10) class GithubLinking(markdown.inlinepatterns.InlineProcessor): """Aside from being *very* varied github links are *contextual*. That is, their resolution depends on the repository they're being called from (technically they also need all the information from the github backend to know the people & objects exist but we don't have that option). Context is not available to us, but we can fake it through the application of contextvars: ``DFM_CONTEXT_REPO`` should contain the full name of the repository this is being resolved from. If ``DFM_CONTEXT_REPO`` is empty and needed, this processor emits a warning. """ def __init__(self, md=None): super().__init__(r"""(?xi) (?: \bhttps://github.com/([\w\.-]+/[\w\.-]+)/(?:issues|pull)/(\d+)(\#[\w-]+)? | \bhttps://github.com/([\w\.-]+/[\w\.-]+)/commit/([a-f0-9]+) | \b([\w\.-]+/[\w\.-]+)\#(\d+) | (\bGH-|(?:^|(?<=\s))\#)(\d+) | \b(?: # user@sha or user/repo@sha ([\w\.-]+(?:/[\w\.-]+)?) @ ([0-9a-f]{7,40}) ) | \b( # a sha is 7~40 hex digits but that means any million+ number matches # which is probably wrong. So ensure there's at least one letter in the # set by using a positive lookahead which looks for a sequence of at # least 0 numbers followed by a-f (?=[0-9]{0,39}?[a-f]) [0-9a-f]{7,40} ) ) \b """, md) def handleMatch(self, m, data): ctx = DFM_CONTEXT_REPO.get() if not ctx: logging.getLogger(__name__)\ .getChild("github_links")\ .warning("missing context for rewriting github links, skipping") return m[0], *m.span() repo = issue = commit = None if m[2]: # full issue / PR repo = m[1] issue = m[2] elif m[5]: # long hash repo = m[4] commit = m[5] elif m[7]: # short issue with repo repo = m[6] issue = m[7] elif m[9]: # short issue without repo repo = None if m[8] == '#' else "GH" issue = m[9] elif m[11]: # medium hash repo = m[10] commit = m[11] else: # hash only commit = m[12] el = Element("a") if issue is not None: if repo == "GH": el.text = f"GH-{issue}" repo = ctx elif repo in (None, ctx): repo = ctx el.text = f"#{issue}" else: el.text = f"{repo}#{issue}" if (fragment := m[3]) and fragment.startswith('#issuecomment-'): el.text += ' (comment)' else: fragment = '' el.set('href', f"https://github.com/{repo}/issues/{issue}{fragment}") else: if repo in (None, ctx): label_repo = "" repo = ctx elif '/' not in repo: # owner-only label_repo = repo # NOTE: I assume in reality we're supposed to find the actual fork if unambiguous... repo = repo + '/' + ctx.split('/')[-1] elif repo.split('/')[-1] == ctx.split('/')[-1]: # NOTE: here we assume if it's the same repo in a different owner it's a fork label_repo = repo.split('/')[0] else: label_repo = repo el.text = f"{label_repo}@{commit}" if label_repo else commit el.set("href", f"https://github.com/{repo}/commit/{commit}") return el, *m.span() class OdooLinking(markdown.inlinepatterns.InlineProcessor): def __init__(self, md=None): # there are other weirder variations but fuck em, this matches # "opw", "task", "task-id" or "taskid" followed by an optional - or : # followed by digits super().__init__(r"(?i)\b(task(?:-?id)?|opw)\s*[-:]?\s*(\d+)\b", md) def handleMatch(self, m, data): el = Element("a", href='https://www.odoo.com/web#model=project.task&id=' + m[2]) if m[1].lower() == 'opw': el.text = f"opw-{m[2]}" else: el.text = f"task-{m[2]}" return el, *m.span() class Unlinker(markdown.treeprocessors.Treeprocessor): def run(self, root): # find all elements which contain a link, as ElementTree does not have # parent links we can't really replace links in place for parent in root.iterfind('.//*[a]'): children = parent[:] # can't use clear because that clears the attributes and tail/text del parent[:] for el in children: if el.tag != 'a' or el.get('href', '').startswith(('https:', 'http:')): parent.append(el) continue # this is a weird link, remove it if el.text: # first attach its text to the previous element if len(parent): # prev is not parent parent[-1].tail = (parent[-1].tail or '') + el.text else: parent.text = (parent.text or '') + el.text if len(el): # then unpack all its children parent.extend(el[:]) if el.tail: # then attach tail to previous element if len(parent): # prev is not parent parent[-1].tail = (parent[-1].tail or '') + el.tail else: parent.text = (parent.text or '') + el.tail return None # alternatively, use cmarkgfm? The maintainer of py-gfm (impl'd over # python-markdown) ultimately gave up, if apparently mostly due to pymarkdown's # tendency to break its API all the time dfm_renderer = markdown.Markdown( extensions=[DfmExtension()], output_format='html5', )