pfmarkdown.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644
  1. # This program is free software; you can redistribute it and/or
  2. # modify it under the terms of the GNU General Public License
  3. # as published by the Free Software Foundation; either version 2
  4. # of the License, or (at your option) any later version.
  5. #
  6. # This program is distributed in the hope that it will be useful,
  7. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  9. # GNU General Public License for more details.
  10. #
  11. # You should have received a copy of the GNU General Public License
  12. # along with this program; if not, write to the Free Software
  13. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
  14. # USA.
  15. """ Pagure-flavored Markdown
  16. Author: Ralph Bean <rbean@redhat.com>
  17. Pierre-Yves Chibon <pingou@pingoured.fr>
  18. """
  19. from __future__ import absolute_import, unicode_literals
  20. import logging
  21. import re
  22. import flask
  23. import markdown.inlinepatterns
  24. import markdown.postprocessors
  25. import markdown.preprocessors
  26. import markdown.util
  27. import pygit2
  28. import six
  29. import xml.etree.ElementTree as etree
  30. import pagure.lib.query
  31. from pagure.config import config as pagure_config
  32. try:
  33. from markdown.inlinepatterns import ImagePattern as ImagePattern
  34. MK_VERSION = 2
  35. except ImportError:
  36. from markdown.inlinepatterns import ImageInlineProcessor as ImagePattern
  37. MK_VERSION = 3
  38. _log = logging.getLogger(__name__)
  39. # the (?<!\w) (and variants) we use a lot in all these regexes is a
  40. # negative lookbehind assertion. It means 'match when the preceding
  41. # character is not in the \w class'. This stops us from starting a
  42. # match in the middle of a word (e.g. someone@something in the
  43. # MENTION_RE regex). Note that it is a zero-length match - it does
  44. # not capture or consume any of the string - and it does not appear
  45. # as a group for the match object.
  46. MENTION_RE = r"(?<![\w\-\"\'\`\$\!\*\+#%&/=^{}|~])@(\w+)"
  47. # Each line below correspond to a line of the regex:
  48. # 1) Don't start matching in the middle of a word
  49. # 2) See if there is a `forks/` at the start
  50. # 3) See if we have a `user/`
  51. # 4) See if we have a `namespace/`
  52. # 5) Get the last part `project`
  53. # 6) Get the identifier `#<id>`
  54. EXPLICIT_LINK_RE = (
  55. r"(?<!\w)"
  56. r"(fork[s]?/)?"
  57. r"([a-zA-Z0-9_-]*?/)?"
  58. r"([a-zA-Z0-9_-]*?/)?"
  59. r"([a-zA-Z0-9_-]+)"
  60. r"#(?P<id>[0-9]+)"
  61. )
  62. COMMIT_LINK_RE = (
  63. r"(?<!\w)"
  64. r"(fork[s]?/)?"
  65. r"([a-zA-Z0-9_-]*?/)?"
  66. r"([a-zA-Z0-9_-]*?/)?"
  67. r"([a-zA-Z0-9_-]+)"
  68. r"#(?P<id>[\w]{40})"
  69. )
  70. # PREPROCIMPLLINK is used by ImplicitIssuePreprocessor to replace the
  71. # '#' when a line starts with an implicit issue link, to prevent
  72. # markdown parsing it as a header; we have to handle it here
  73. IMPLICIT_ISSUE_RE = r"(?<!\w)(?:PREPROCIMPLLINK|#)([0-9]+)"
  74. IMPLICIT_PR_RE = r"(?<!\w)PR#([0-9]+)"
  75. IMPLICIT_COMMIT_RE = r"(?<![<\w#])([a-f0-9]{7,40})"
  76. STRIKE_THROUGH_RE = r"~~(.*?)~~"
  77. class MentionPattern(markdown.inlinepatterns.Pattern):
  78. """@user pattern class."""
  79. def handleMatch(self, m):
  80. """When the pattern matches, update the text."""
  81. _log.debug("MentionPattern: %s", m.groups())
  82. name = markdown.util.AtomicString(m.group(2))
  83. text = "@%s" % name
  84. user = pagure.lib.query.search_user(flask.g.session, username=name)
  85. if not user:
  86. return text
  87. element = etree.Element("a")
  88. base_url = pagure_config["APP_URL"]
  89. if base_url.endswith("/"):
  90. base_url = base_url[:-1]
  91. url = "%s/user/%s" % (base_url, user.username)
  92. element.set("href", url)
  93. element.text = text
  94. return element
  95. class ExplicitLinkPattern(markdown.inlinepatterns.Pattern):
  96. """Explicit link pattern."""
  97. def handleMatch(self, m):
  98. """When the pattern matches, update the text."""
  99. _log.debug("ExplicitLinkPattern: %s", m.groups())
  100. is_fork = m.group(2)
  101. user = m.group(3)
  102. namespace = m.group(4)
  103. repo = m.group(5)
  104. idx = m.group(6)
  105. text = "%s#%s" % (repo, idx)
  106. if not is_fork and user:
  107. namespace = user
  108. user = None
  109. if namespace:
  110. namespace = namespace.rstrip("/")
  111. text = "%s/%s" % (namespace, text)
  112. if user:
  113. user = user.rstrip("/")
  114. text = "%s/%s" % (user.rstrip("/"), text)
  115. try:
  116. idx = int(idx)
  117. except (ValueError, TypeError):
  118. return text
  119. issue = _issue_exists(user, namespace, repo, idx)
  120. if issue:
  121. return _obj_anchor_tag(user, namespace, repo, issue, text)
  122. request = _pr_exists(user, namespace, repo, idx)
  123. if request:
  124. return _obj_anchor_tag(user, namespace, repo, request, text)
  125. return text
  126. class CommitLinkPattern(markdown.inlinepatterns.Pattern):
  127. """Commit link pattern."""
  128. def handleMatch(self, m):
  129. """When the pattern matches, update the text."""
  130. _log.debug("CommitLinkPattern: %s", m.groups())
  131. is_fork = m.group(2)
  132. user = m.group(3)
  133. namespace = m.group(4)
  134. repo = m.group(5)
  135. commitid = m.group(6)
  136. text = "%s#%s" % (repo, commitid)
  137. if not is_fork and user:
  138. namespace = user
  139. user = None
  140. if namespace:
  141. namespace = namespace.rstrip("/")
  142. text = "%s/%s" % (namespace, text)
  143. if user:
  144. user = user.rstrip("/")
  145. text = "%s/%s" % (user.rstrip("/"), text)
  146. if pagure.lib.query.search_projects(
  147. flask.g.session,
  148. username=user,
  149. fork=is_fork,
  150. namespace=namespace,
  151. pattern=repo,
  152. ):
  153. return _obj_anchor_tag(user, namespace, repo, commitid, text)
  154. return text
  155. class ImplicitIssuePreprocessor(markdown.preprocessors.Preprocessor):
  156. """
  157. Preprocessor which handles lines starting with an implicit
  158. link. We have to modify these so that markdown doesn't interpret
  159. them as headers.
  160. """
  161. def run(self, lines):
  162. """
  163. If a line starts with an implicit issue link like #152,
  164. we replace the # with PREPROCIMPLLINK. This prevents markdown
  165. parsing the line as a header. ImplicitIssuePattern will catch
  166. and parse the text later. Otherwise, we change nothing.
  167. """
  168. _log.debug("ImplicitIssuePreprocessor")
  169. # match a # character, then any number of digits
  170. regex = re.compile(r"#([0-9]+)")
  171. new_lines = []
  172. for line in lines:
  173. # avoid calling the regex if line doesn't start with #
  174. if line.startswith("#"):
  175. match = regex.match(line)
  176. if match:
  177. idx = int(match.group(1))
  178. # we have to check if this is a real issue or PR now.
  179. # we can't just 'tag' the text somehow and leave it to
  180. # the pattern to check, as if it's *not* one we want
  181. # the line treated as a header, so we need the block
  182. # processor to see it unmodified.
  183. try:
  184. namespace, repo, user = _get_ns_repo_user()
  185. except RuntimeError:
  186. # non-match path, keep original line
  187. new_lines.append(line)
  188. continue
  189. if _issue_exists(user, namespace, repo, idx) or _pr_exists(
  190. user, namespace, repo, idx
  191. ):
  192. # tweak the text
  193. new_lines.append("PREPROCIMPLLINK" + line[1:])
  194. continue
  195. # this is a non-match path, keep original line
  196. new_lines.append(line)
  197. continue
  198. return new_lines
  199. class ImplicitIssuePattern(markdown.inlinepatterns.Pattern):
  200. """Implicit issue pattern."""
  201. def handleMatch(self, m):
  202. """When the pattern matches, update the text."""
  203. _log.debug("ImplicitIssuePattern: %s", m.groups())
  204. idx = markdown.util.AtomicString(m.group(2))
  205. text = "#%s" % idx
  206. try:
  207. idx = int(idx)
  208. except (ValueError, TypeError):
  209. _log.debug("Invalid integer for %s, bailing", idx)
  210. return text
  211. try:
  212. namespace, repo, user = _get_ns_repo_user()
  213. except RuntimeError:
  214. _log.debug("No repo found associated with this context, bailing")
  215. return text
  216. _log.debug(
  217. "Checking ns: %s, name: %s, user:%s for id: %s",
  218. namespace,
  219. repo,
  220. user,
  221. idx,
  222. )
  223. issue = _issue_exists(user, namespace, repo, idx)
  224. if issue:
  225. _log.debug("Linking to an issue")
  226. return _obj_anchor_tag(user, namespace, repo, issue, text)
  227. request = _pr_exists(user, namespace, repo, idx)
  228. if request:
  229. _log.debug("Linking to an PR")
  230. return _obj_anchor_tag(user, namespace, repo, request, text)
  231. _log.debug("Bailing, return text as is")
  232. return text
  233. class ImplicitPRPattern(markdown.inlinepatterns.Pattern):
  234. """Implicit pull-request pattern."""
  235. def handleMatch(self, m):
  236. """When the pattern matches, update the text."""
  237. _log.debug("ImplicitPRPattern: %s", m.groups())
  238. idx = markdown.util.AtomicString(m.group(2))
  239. text = "PR#%s" % idx
  240. try:
  241. idx = int(idx)
  242. except (ValueError, TypeError):
  243. return text
  244. try:
  245. namespace, repo, user = _get_ns_repo_user()
  246. except RuntimeError:
  247. return text
  248. issue = _issue_exists(user, namespace, repo, idx)
  249. if issue:
  250. return _obj_anchor_tag(user, namespace, repo, issue, text)
  251. request = _pr_exists(user, namespace, repo, idx)
  252. if request:
  253. return _obj_anchor_tag(user, namespace, repo, request, text)
  254. return text
  255. class ImplicitCommitPattern(markdown.inlinepatterns.Pattern):
  256. """Implicit commit pattern."""
  257. def handleMatch(self, m):
  258. """When the pattern matches, update the text."""
  259. _log.debug("ImplicitCommitPattern: %s", m.groups())
  260. githash = markdown.util.AtomicString(m.group(2))
  261. text = "%s" % githash
  262. try:
  263. namespace, repo, user = _get_ns_repo_user()
  264. except RuntimeError:
  265. return text
  266. if pagure.lib.query.search_projects(
  267. flask.g.session, username=user, namespace=namespace, pattern=repo
  268. ) and _commit_exists(user, namespace, repo, githash):
  269. return _obj_anchor_tag(user, namespace, repo, githash, text[:7])
  270. return text
  271. class StrikeThroughPattern(markdown.inlinepatterns.Pattern):
  272. """~~striked~~ pattern class."""
  273. def handleMatch(self, m):
  274. """When the pattern matches, update the text."""
  275. _log.debug("StrikeThroughPattern: %s", m.groups())
  276. text = markdown.util.AtomicString(m.group(2))
  277. element = etree.Element("del")
  278. element.text = text
  279. return element
  280. class AutolinkPattern2(markdown.inlinepatterns.Pattern):
  281. """Return a link Element given an autolink (`<http://example/com>`)."""
  282. def handleMatch(self, m):
  283. """When the pattern matches, update the text.
  284. :arg m: the matched object
  285. """
  286. _log.debug("AutolinkPattern2: %s", m.groups())
  287. url = m.group(2)
  288. if url.startswith("<"):
  289. url = url[1:]
  290. if url.endswith(">"):
  291. url = url[:-1]
  292. el = etree.Element("a")
  293. el.set("href", self.unescape(url))
  294. el.text = markdown.util.AtomicString(url)
  295. return el
  296. class ImagePatternLazyLoad(ImagePattern):
  297. """Customize the image element matched for lazyloading."""
  298. def handleMatch(self, m, *args):
  299. out = super(ImagePatternLazyLoad, self).handleMatch(m, *args)
  300. if MK_VERSION == 3:
  301. el = out[0]
  302. else:
  303. el = out
  304. # Add a noscript tag with the untouched img tag
  305. noscript = etree.Element("noscript")
  306. noscript.append(el)
  307. # Modify the origina img tag
  308. img = etree.Element("img")
  309. img.set("data-src", el.get("src"))
  310. img.set("src", "")
  311. img.set("alt", el.get("alt"))
  312. img.set("class", "lazyload")
  313. # Create a global span in which we add both the new img tag and the
  314. # noscript one
  315. outel = etree.Element("span")
  316. outel.append(img)
  317. outel.append(noscript)
  318. output = outel
  319. if MK_VERSION == 3:
  320. output = (outel, out[1], out[2])
  321. return output
  322. class EncapsulateMarkdownPostprocessor(markdown.postprocessors.Postprocessor):
  323. def run(self, text):
  324. return '<div class="markdown">' + text + "</div>"
  325. class PagureExtension(markdown.extensions.Extension):
  326. def extendMarkdown(self, md, *args):
  327. # First, make it so that bare links get automatically linkified.
  328. AUTOLINK_RE = "(%s)" % "|".join(
  329. [
  330. r"<((?:[Ff]|[Hh][Tt])[Tt][Pp][Ss]?://[^>]*)>",
  331. r"\b(?:[Ff]|[Hh][Tt])[Tt][Pp][Ss]?://[^)<>\s]+[^.,)<>\s]",
  332. r"<(Ii][Rr][Cc][Ss]?://[^>]*)>",
  333. r"\b[Ii][Rr][Cc][Ss]?://[^)<>\s]+[^.,)<>\s]",
  334. ]
  335. )
  336. def _old_mardkown_way():
  337. markdown.inlinepatterns.AUTOLINK_RE = AUTOLINK_RE
  338. md.preprocessors["implicit_issue"] = ImplicitIssuePreprocessor()
  339. md.inlinePatterns["mention"] = MentionPattern(MENTION_RE)
  340. # Customize the image linking to support lazy loading
  341. md.inlinePatterns["image_link"] = ImagePatternLazyLoad(
  342. markdown.inlinepatterns.IMAGE_LINK_RE, md
  343. )
  344. md.inlinePatterns["implicit_commit"] = ImplicitCommitPattern(
  345. IMPLICIT_COMMIT_RE
  346. )
  347. md.inlinePatterns["commit_links"] = CommitLinkPattern(
  348. COMMIT_LINK_RE
  349. )
  350. md.inlinePatterns["autolink"] = AutolinkPattern2(AUTOLINK_RE, md)
  351. if pagure_config.get("ENABLE_TICKETS", True):
  352. md.inlinePatterns["implicit_pr"] = ImplicitPRPattern(
  353. IMPLICIT_PR_RE
  354. )
  355. md.inlinePatterns["explicit_fork_issue"] = ExplicitLinkPattern(
  356. EXPLICIT_LINK_RE
  357. )
  358. md.inlinePatterns["implicit_issue"] = ImplicitIssuePattern(
  359. IMPLICIT_ISSUE_RE
  360. )
  361. md.inlinePatterns["striked"] = StrikeThroughPattern(
  362. STRIKE_THROUGH_RE
  363. )
  364. md.postprocessors[
  365. "encapsulate"
  366. ] = EncapsulateMarkdownPostprocessor()
  367. def _new_markdown_way():
  368. idx = md.inlinePatterns.get_index_for_name("autolink")
  369. md.inlinePatterns[idx].AUTOLINK_RE = AUTOLINK_RE
  370. # The number at the end is the priority, the highest priorities are
  371. # processed first.
  372. md.preprocessors.register(
  373. ImplicitIssuePreprocessor(), "implicit_issue", 100
  374. )
  375. md.inlinePatterns.register(
  376. MentionPattern(MENTION_RE), "mention", 95
  377. )
  378. # Customize the image linking to support lazy loading
  379. md.inlinePatterns.register(
  380. ImagePatternLazyLoad(
  381. markdown.inlinepatterns.IMAGE_LINK_RE, md
  382. ),
  383. "image_link",
  384. 90,
  385. )
  386. md.inlinePatterns.register(
  387. ImplicitCommitPattern(IMPLICIT_COMMIT_RE),
  388. "implicit_commit",
  389. 85,
  390. )
  391. md.inlinePatterns.register(
  392. CommitLinkPattern(COMMIT_LINK_RE), "autolink2", 80
  393. )
  394. md.inlinePatterns.register(
  395. AutolinkPattern2(AUTOLINK_RE, md), "commit_links", 75
  396. )
  397. if pagure_config.get("ENABLE_TICKETS", True):
  398. md.inlinePatterns.register(
  399. ImplicitPRPattern(IMPLICIT_PR_RE), "implicit_pr", 70
  400. )
  401. md.inlinePatterns.register(
  402. ExplicitLinkPattern(EXPLICIT_LINK_RE),
  403. "explicit_fork_issue",
  404. 65,
  405. )
  406. md.inlinePatterns.register(
  407. ImplicitIssuePattern(IMPLICIT_ISSUE_RE),
  408. "implicit_issue",
  409. 60,
  410. )
  411. md.inlinePatterns.register(
  412. StrikeThroughPattern(STRIKE_THROUGH_RE), "striked", 50
  413. )
  414. md.postprocessors.register(
  415. EncapsulateMarkdownPostprocessor(), "encapsulate", 100
  416. )
  417. if hasattr(md.inlinePatterns, "get_index_for_name"):
  418. _new_markdown_way()
  419. else:
  420. _old_mardkown_way()
  421. md.registerExtension(self)
  422. def makeExtension(*arg, **kwargs):
  423. return PagureExtension(**kwargs)
  424. def _issue_exists(user, namespace, repo, idx):
  425. """Utility method checking if a given issue exists."""
  426. repo_obj = pagure.lib.query.get_authorized_project(
  427. flask.g.session, project_name=repo, user=user, namespace=namespace
  428. )
  429. if not repo_obj:
  430. return False
  431. issue_obj = pagure.lib.query.search_issues(
  432. flask.g.session, repo=repo_obj, issueid=idx
  433. )
  434. if not issue_obj:
  435. return False
  436. return issue_obj
  437. def _pr_exists(user, namespace, repo, idx):
  438. """Utility method checking if a given PR exists."""
  439. repo_obj = pagure.lib.query.get_authorized_project(
  440. flask.g.session, project_name=repo, user=user, namespace=namespace
  441. )
  442. if not repo_obj:
  443. return False
  444. pr_obj = pagure.lib.query.search_pull_requests(
  445. flask.g.session, project_id=repo_obj.id, requestid=idx
  446. )
  447. if not pr_obj:
  448. return False
  449. return pr_obj
  450. def _commit_exists(user, namespace, repo, githash):
  451. """Utility method checking if a given commit exists."""
  452. repo_obj = pagure.lib.query.get_authorized_project(
  453. flask.g.session, project_name=repo, user=user, namespace=namespace
  454. )
  455. if not repo_obj:
  456. return False
  457. reponame = pagure.utils.get_repo_path(repo_obj)
  458. git_repo = pygit2.Repository(reponame)
  459. return githash in git_repo
  460. def _obj_anchor_tag(user, namespace, repo, obj, text):
  461. """
  462. Utility method generating the link to an issue or a PR.
  463. :return: An element tree containing the href to the issue or PR
  464. :rtype: xml.etree.ElementTree.Element
  465. """
  466. if isinstance(obj, six.string_types):
  467. url = flask.url_for(
  468. "ui_ns.view_commit",
  469. username=user,
  470. namespace=namespace,
  471. repo=repo,
  472. commitid=obj,
  473. )
  474. title = "Commit %s" % obj
  475. elif obj.isa == "issue":
  476. url = flask.url_for(
  477. "ui_ns.view_issue",
  478. username=user,
  479. namespace=namespace,
  480. repo=repo,
  481. issueid=obj.id,
  482. )
  483. if obj.private:
  484. title = "Private issue"
  485. else:
  486. if obj.status:
  487. title = "[%s] %s" % (obj.status, obj.title)
  488. else:
  489. title = obj.title
  490. else:
  491. url = flask.url_for(
  492. "ui_ns.request_pull",
  493. username=user,
  494. namespace=namespace,
  495. repo=repo,
  496. requestid=obj.id,
  497. )
  498. if obj.status:
  499. title = "[%s] %s" % (obj.status, obj.title)
  500. else:
  501. title = obj.title
  502. element = etree.Element("a")
  503. element.set("href", url)
  504. element.set("title", title)
  505. element.text = text
  506. return element
  507. def _get_ns_repo_user():
  508. """Return the namespace, repo, user corresponding to the given request
  509. :return: A tuple of three string corresponding to namespace, repo, user
  510. :rtype: tuple(str, str, str)
  511. """
  512. root = flask.request.url_root
  513. url = flask.request.url
  514. user = flask.request.args.get("user") or None
  515. namespace = flask.request.args.get("namespace") or None
  516. repo = flask.request.args.get("repo") or None
  517. if not user and not repo:
  518. _log.debug("Extracting repo info from url: %s", url)
  519. if "fork/" in url:
  520. user, ext = url.split("fork/")[1].split("/", 1)
  521. else:
  522. ext = url.split(root)[1]
  523. if ext.count("/") >= 3:
  524. namespace, repo = ext.split("/", 2)[:2]
  525. else:
  526. repo = ext.split("/", 1)[0]
  527. return (namespace, repo, user)