[ADD] runbot_merge: rendering of PR descriptions

Previously PR descriptions were displayed as raw text in the PR
dashboard. While not wrong per se, this was pretty ugly and not always
convenient as e.g. links had to be copied by hand.

Push descriptions through pymarkdown for rendering them, with a few
customisations:

- Enabled footnotes & tables & fenced code blocks because GFM has
  that, this doesn't quite put pymarkdown's base behaviour on par with
  gfm (and py-gfm ultimately gave up on that effort moving to just
  wrap github's own markdown renderer instead).
- Don't allow raw html because too much of a hassle to do it
  correctly, and very few people ever do it (mostly me I think).
- Added a bespoke handler / renderer for github-style references.

  Note: uses positional captures because it started that way and named
  captures are not removed from that sequence so mixing and matching
  is not very useful, plus python does not support identically named
  groups (even exclusive) so all 4 repo captures and all 3 issue
  number captures would need different names...
- And added a second bespoke handler for our own opw/issue references
  leading to odoo.com, that's something we can't do via github[^1] so
  it's a genuine value-add.

Fixes #889

[^1]: github can do it (though possibly not with the arbitrary
    unspecified nonsense I got when I tried to list some of the
    reference styles, some folks need therapy), but it's not available
    on our plan
This commit is contained in:
Xavier Morel 2024-07-11 11:53:01 +02:00
parent 02013a53d9
commit d6bb18e358
5 changed files with 291 additions and 2 deletions

View File

@ -0,0 +1,10 @@
IMP: PR descriptions are now markdown-rendered in the dashboard
Previously the raw text was displayed. The main advantage of rendering, aside
from not splatting huge links in the middle of the thing, is that we can
autolink *odoo tasks* if they're of a pattern we recognize. Some support has
also been added for github's references to mirror GFM rendering.
This would be a lot less useful (and in fact pretty much useless) if we could
use github's built-in [references to external resources](https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/managing-repository-settings/configuring-autolinks-to-reference-external-resources)
sadly that seems to not be available on our plan.

View File

@ -16,13 +16,14 @@ from typing import Optional, Union, List, Iterator, Tuple
import psycopg2
import sentry_sdk
import werkzeug
from markupsafe import Markup
from odoo import api, fields, models, tools, Command
from odoo.exceptions import AccessError
from odoo.osv import expression
from odoo.tools import html_escape, Reverse
from . import commands
from .utils import enum, readonly
from .utils import enum, readonly, dfm
from .. import github, exceptions, controllers, utils
@ -369,6 +370,7 @@ class PullRequests(models.Model):
)
refname = fields.Char(compute='_compute_refname')
message = fields.Text(required=True)
message_html = fields.Html(compute='_compute_message_html', sanitize=False)
draft = fields.Boolean(
default=False, required=True, tracking=True,
help="A draft PR can not be merged",
@ -490,6 +492,20 @@ class PullRequests(models.Model):
for pr in self:
pr.message_title = next(iter(pr.message.splitlines()), '')
@api.depends("message")
def _compute_message_html(self):
for pr in self:
match pr.message.split('\n\n', 1):
case [title]:
pr.message_html = Markup('<h3>%s<h3>') % title
case [title, description]:
pr.message_html = Markup('<h3>%s</h3>\n%s') % (
title,
dfm(pr.repository.name, description),
)
case _:
pr.message_html = ""
@api.depends('repository.name', 'number', 'message')
def _compute_display_name(self):
return super()._compute_display_name()

View File

@ -1,4 +1,11 @@
import logging
from contextvars import ContextVar
from typing import Tuple
from xml.etree.ElementTree import Element, tostring
import markdown.inlinepatterns
import markdown.treeprocessors
from markupsafe import escape, Markup
def enum(model: str, field: str) -> Tuple[str, str]:
@ -8,3 +15,187 @@ def enum(model: str, field: str) -> Tuple[str, str]:
def readonly(_):
raise TypeError("Field is readonly")
DFM_CONTEXT_REPO = ContextVar("dfm_context", default="")
def dfm(repository: str, text: str) -> Markup:
""" Converts the input text from markup to HTML using the Odoo PR
Description Rules, which are basically:
- GFM
- minus raw HTML (?)
- + github's autolinking (https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/autolinked-references-and-urls)
- + bespoke autolinking of OPW and Task links to odoo.com
"""
t = DFM_CONTEXT_REPO.set(repository)
try:
return Markup(dfm_renderer.convert(escape(text)))
finally:
DFM_CONTEXT_REPO.reset(t)
class DfmExtension(markdown.extensions.Extension):
def extendMarkdown(self, md):
md.registerExtensions(['fenced_code', 'footnotes', 'nl2br', 'sane_lists', 'tables'], configs={})
md.inlinePatterns.register(GithubLinking(md), 'githublinking', 123)
md.inlinePatterns.register(OdooLinking(md), 'odoolinking', 124)
# ideally the unlinker should run before the prettifier so the
# prettification is done correctly, but it seems unlikely the prettifier
# handles the variable nature of links correctly, and we likely want to
# run after the unescaper
md.treeprocessors.register(Unlinker(), "unlinker", -10)
class GithubLinking(markdown.inlinepatterns.InlineProcessor):
"""Aside from being *very* varied github links are *contextual*. That is,
their resolution depends on the repository they're being called from
(technically they also need all the information from the github backend to
know the people & objects exist but we don't have that option).
Context is not available to us, but we can fake it through the application
of contextvars: ``DFM_CONTEXT_REPO`` should contain the full name of the
repository this is being resolved from.
If ``DFM_CONTEXT_REPO`` is empty and needed, this processor emits a warning.
"""
def __init__(self, md=None):
super().__init__(r"""(?xi)
(?:
\bhttps://github.com/([\w\.-]+/[\w\.-]+)/(?:issues|pull)/(\d+)(\#[\w-]+)?
| \bhttps://github.com/([\w\.-]+/[\w\.-]+)/commit/([a-f0-9]+)
| \b([\w\.-]+/[\w\.-]+)\#(\d+)
| (\bGH-|(?:^|(?<=\s))\#)(\d+)
| \b(?:
# user@sha or user/repo@sha
([\w\.-]+(?:/[\w\.-]+)?)
@
([0-9a-f]{7,40})
)
| \b(
# a sha is 7~40 hex digits but that means any million+ number matches
# which is probably wrong. So ensure there's at least one letter in the
# set by using a positive lookahead which looks for a sequence of at
# least 0 numbers followed by a-f
(?=[0-9]{0,39}?[a-f])
[0-9a-f]{7,40}
)
)
\b
""", md)
def handleMatch(self, m, data):
ctx = DFM_CONTEXT_REPO.get()
if not ctx:
logging.getLogger(__name__)\
.getChild("github_links")\
.warning("missing context for rewriting github links, skipping")
return m[0], *m.span()
repo = issue = commit = None
if m[2]: # full issue / PR
repo = m[1]
issue = m[2]
elif m[5]: # long hash
repo = m[4]
commit = m[5]
elif m[7]: # short issue with repo
repo = m[6]
issue = m[7]
elif m[9]: # short issue without repo
repo = None if m[8] == '#' else "GH"
issue = m[9]
elif m[11]: # medium hash
repo = m[10]
commit = m[11]
else: # hash only
commit = m[12]
el = Element("a")
if issue is not None:
if repo == "GH":
el.text = f"GH-{issue}"
repo = ctx
elif repo in (None, ctx):
repo = ctx
el.text = f"#{issue}"
else:
el.text = f"{repo}#{issue}"
if (fragment := m[3]) and fragment.startswith('#issuecomment-'):
el.text += ' (comment)'
else:
fragment = ''
el.set('href', f"https://github.com/{repo}/issues/{issue}{fragment}")
else:
if repo in (None, ctx):
label_repo = ""
repo = ctx
elif '/' not in repo: # owner-only
label_repo = repo
# NOTE: I assume in reality we're supposed to find the actual fork if unambiguous...
repo = repo + '/' + ctx.split('/')[-1]
elif repo.split('/')[-1] == ctx.split('/')[-1]:
# NOTE: here we assume if it's the same repo in a different owner it's a fork
label_repo = repo.split('/')[0]
else:
label_repo = repo
el.text = f"{label_repo}@{commit}" if label_repo else commit
el.set("href", f"https://github.com/{repo}/commit/{commit}")
return el, *m.span()
class OdooLinking(markdown.inlinepatterns.InlineProcessor):
def __init__(self, md=None):
# there are other weirder variations but fuck em, this matches
# "opw", "task", "task-id" or "taskid" followed by an optional - or :
# followed by digits
super().__init__(r"(?i)\b(task(?:-?id)?|opw)\s*[-:]?\s*(\d+)\b", md)
def handleMatch(self, m, data):
el = Element("a", href='https://www.odoo.com/web#model=project.task&id=' + m[2])
if m[1].lower() == 'opw':
el.text = f"opw-{m[2]}"
else:
el.text = f"task-{m[2]}"
return el, *m.span()
class Unlinker(markdown.treeprocessors.Treeprocessor):
def run(self, root):
# find all elements which contain a link, as ElementTree does not have
# parent links we can't really replace links in place
for parent in root.iterfind('.//*[a]'):
children = parent[:]
# can't use clear because that clears the attributes and tail/text
del parent[:]
for el in children:
if el.tag != 'a' or el.get('href', '').startswith(('https:', 'http:')):
parent.append(el)
continue
# this is a weird link, remove it
if el.text: # first attach its text to the previous element
if len(parent): # prev is not parent
parent[-1].tail = (parent[-1].tail or '') + el.text
else:
parent.text = (parent.text or '') + el.text
if len(el): # then unpack all its children
parent.extend(el[:])
if el.tail: # then attach tail to previous element
if len(parent): # prev is not parent
parent[-1].tail = (parent[-1].tail or '') + el.tail
else:
parent.text = (parent.text or '') + el.tail
return None
# alternatively, use cmarkgfm? The maintainer of py-gfm (impl'd over
# python-markdown) ultimately gave up, if apparently mostly due to pymarkdown's
# tendency to break its API all the time
dfm_renderer = markdown.Markdown(
extensions=[DfmExtension()],
output_format='html5',
)

View File

@ -0,0 +1,72 @@
from odoo.addons.runbot_merge.models.utils import dfm
def test_odoo_links():
assert dfm("", "OPW-42") == '<p><a href="https://www.odoo.com/web#model=project.task&amp;id=42">opw-42</a></p>'
assert dfm("", "taskid : 42") == '<p><a href="https://www.odoo.com/web#model=project.task&amp;id=42">task-42</a></p>'
assert dfm("", "I was doing task foo") == '<p>I was doing task foo</p>'
assert dfm("", "Task 687d3") == "<p>Task 687d3</p>"
def p(*content):
return f'<p>{"".join(content)}</p>'
def a(label, url):
return f'<a href="{url}">{label}</a>'
def test_gh_issue_links():
# same-repository link
assert dfm("odoo/runbot", "thing thing #26") == p("thing thing ", a('#26', 'https://github.com/odoo/runbot/issues/26'))
assert dfm("odoo/runbot", "GH-26") == p(a('GH-26', 'https://github.com/odoo/runbot/issues/26'))
assert dfm(
"odoo/runbot", "https://github.com/odoo/runbot/issues/26"
) == p(a('#26', 'https://github.com/odoo/runbot/issues/26'))
# cross-repo link
assert dfm(
"odoo/runbot", "jlord/sheetsee.js#26"
) == p(a('jlord/sheetsee.js#26', 'https://github.com/jlord/sheetsee.js/issues/26'))
assert dfm(
"odoo/runbot", "https://github.com/jlord/sheetsee.js/pull/26"
) == p(a('jlord/sheetsee.js#26', 'https://github.com/jlord/sheetsee.js/issues/26'))
# cross-repo link with comment
assert dfm(
"odoo/runbot", "https://github.com/odoo/odoo/pull/173061#issuecomment-2227874482"
) == p(a("odoo/odoo#173061 (comment)", "https://github.com/odoo/odoo/issues/173061#issuecomment-2227874482"))
def test_gh_commit_link():
# same repository
assert dfm(
"odoo/runbot", "https://github.com/odoo/runbot/commit/a5c3785ed8d6a35868bc169f07e40e889087fd2e"
) == p(a("a5c3785ed8d6a35868bc169f07e40e889087fd2e", "https://github.com/odoo/runbot/commit/a5c3785ed8d6a35868bc169f07e40e889087fd2e"))
# cross fork
assert dfm(
"odoo/runbot", "jlord@a5c3785ed8d6a35868bc169f07e40e889087fd2e"
) == p(a("jlord@a5c3785ed8d6a35868bc169f07e40e889087fd2e", "https://github.com/jlord/runbot/commit/a5c3785ed8d6a35868bc169f07e40e889087fd2e"))
assert dfm(
"odoo/runbot", "https://github.com/jlord/runbot/commit/a5c3785ed8d6a35868bc169f07e40e889087fd2e"
) == p(a("jlord@a5c3785ed8d6a35868bc169f07e40e889087fd2e", "https://github.com/jlord/runbot/commit/a5c3785ed8d6a35868bc169f07e40e889087fd2e"))
# cross repo
assert dfm(
"odoo/runbot", "jlord/sheetsee.js@a5c3785ed8d6a35868bc169f07e40e889087fd2e"
) == p(a("jlord/sheetsee.js@a5c3785ed8d6a35868bc169f07e40e889087fd2e", "https://github.com/jlord/sheetsee.js/commit/a5c3785ed8d6a35868bc169f07e40e889087fd2e"))
assert dfm(
"odoo/runbot", "https://github.com/jlord/sheetsee.js/commit/a5c3785ed8d6a35868bc169f07e40e889087fd2e"
) == p(a("jlord/sheetsee.js@a5c3785ed8d6a35868bc169f07e40e889087fd2e", "https://github.com/jlord/sheetsee.js/commit/a5c3785ed8d6a35868bc169f07e40e889087fd2e"))
def test_standalone_hash():
assert dfm(
"odoo/runbot", "a5c3785ed8d6a35868bc169f07e40e889087fd2e"
) == p(a("a5c3785ed8d6a35868bc169f07e40e889087fd2e", "https://github.com/odoo/runbot/commit/a5c3785ed8d6a35868bc169f07e40e889087fd2e"))
assert dfm(
"odoo/runbot", "a5c3785ed8d6a35868bc169f07e4"
) == p(a("a5c3785ed8d6a35868bc169f07e4", "https://github.com/odoo/runbot/commit/a5c3785ed8d6a35868bc169f07e4"))
assert dfm(
"odoo/runbot", "a5c3785"
) == p(a("a5c3785", "https://github.com/odoo/runbot/commit/a5c3785"))
assert dfm(
"odoo/runbot", "a5c378"
) == p("a5c378")
def test_ignore_tel():
assert dfm("", "[ok](https://github.com)") == p(a("ok", "https://github.com"))
assert dfm("", "[nope](tel:+1-212-555-0100)") == "<p>nope</p>"
assert dfm("", "[lol](rdar://10198949)") == "<p>lol</p>"

View File

@ -446,7 +446,7 @@
<dd><a t-attf-href="{{pr.github_url}}/commits/{{pr.head}}"><span t-field="pr.head"/></a></dd>
</dl>
<t t-call="runbot_merge.dashboard-table"/>
<p t-field="pr.message"/>
<p t-field="pr.message_html"/>
</div></div>
</t>
</template>