pfmarkdown.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643
  1. # This program is free software; you can redistribute it and/or
  2. # modify it under the terms of the GNU General Public License
  3. # as published by the Free Software Foundation; either version 2
  4. # of the License, or (at your option) any later version.
  5. #
  6. # This program is distributed in the hope that it will be useful,
  7. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  9. # GNU General Public License for more details.
  10. #
  11. # You should have received a copy of the GNU General Public License
  12. # along with this program; if not, write to the Free Software
  13. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
  14. # USA.
  15. """ Pagure-flavored Markdown
  16. Author: Ralph Bean <rbean@redhat.com>
  17. Pierre-Yves Chibon <pingou@pingoured.fr>
  18. """
  19. from __future__ import absolute_import, unicode_literals
  20. import logging
  21. import re
  22. import flask
  23. import markdown.inlinepatterns
  24. import markdown.postprocessors
  25. import markdown.preprocessors
  26. import markdown.util
  27. import pygit2
  28. import six
  29. import pagure.lib.query
  30. from pagure.config import config as pagure_config
  31. try:
  32. from markdown.inlinepatterns import ImagePattern as ImagePattern
  33. MK_VERSION = 2
  34. except ImportError:
  35. from markdown.inlinepatterns import ImageInlineProcessor as ImagePattern
  36. MK_VERSION = 3
  37. _log = logging.getLogger(__name__)
  38. # the (?<!\w) (and variants) we use a lot in all these regexes is a
  39. # negative lookbehind assertion. It means 'match when the preceding
  40. # character is not in the \w class'. This stops us from starting a
  41. # match in the middle of a word (e.g. someone@something in the
  42. # MENTION_RE regex). Note that it is a zero-length match - it does
  43. # not capture or consume any of the string - and it does not appear
  44. # as a group for the match object.
  45. MENTION_RE = r"(?<![\w\-\"\'\`\$\!\*\+#%&/=^{}|~])@(\w+)"
  46. # Each line below correspond to a line of the regex:
  47. # 1) Don't start matching in the middle of a word
  48. # 2) See if there is a `forks/` at the start
  49. # 3) See if we have a `user/`
  50. # 4) See if we have a `namespace/`
  51. # 5) Get the last part `project`
  52. # 6) Get the identifier `#<id>`
  53. EXPLICIT_LINK_RE = (
  54. r"(?<!\w)"
  55. r"(fork[s]?/)?"
  56. r"([a-zA-Z0-9_-]*?/)?"
  57. r"([a-zA-Z0-9_-]*?/)?"
  58. r"([a-zA-Z0-9_-]+)"
  59. r"#(?P<id>[0-9]+)"
  60. )
  61. COMMIT_LINK_RE = (
  62. r"(?<!\w)"
  63. r"(fork[s]?/)?"
  64. r"([a-zA-Z0-9_-]*?/)?"
  65. r"([a-zA-Z0-9_-]*?/)?"
  66. r"([a-zA-Z0-9_-]+)"
  67. r"#(?P<id>[\w]{40})"
  68. )
  69. # PREPROCIMPLLINK is used by ImplicitIssuePreprocessor to replace the
  70. # '#' when a line starts with an implicit issue link, to prevent
  71. # markdown parsing it as a header; we have to handle it here
  72. IMPLICIT_ISSUE_RE = r"(?<!\w)(?:PREPROCIMPLLINK|#)([0-9]+)"
  73. IMPLICIT_PR_RE = r"(?<!\w)PR#([0-9]+)"
  74. IMPLICIT_COMMIT_RE = r"(?<![<\w#])([a-f0-9]{7,40})"
  75. STRIKE_THROUGH_RE = r"~~(.*?)~~"
  76. class MentionPattern(markdown.inlinepatterns.Pattern):
  77. """@user pattern class."""
  78. def handleMatch(self, m):
  79. """When the pattern matches, update the text."""
  80. _log.debug("MentionPattern: %s", m.groups())
  81. name = markdown.util.AtomicString(m.group(2))
  82. text = "@%s" % name
  83. user = pagure.lib.query.search_user(flask.g.session, username=name)
  84. if not user:
  85. return text
  86. element = markdown.util.etree.Element("a")
  87. base_url = pagure_config["APP_URL"]
  88. if base_url.endswith("/"):
  89. base_url = base_url[:-1]
  90. url = "%s/user/%s" % (base_url, user.username)
  91. element.set("href", url)
  92. element.text = text
  93. return element
  94. class ExplicitLinkPattern(markdown.inlinepatterns.Pattern):
  95. """Explicit link pattern."""
  96. def handleMatch(self, m):
  97. """When the pattern matches, update the text."""
  98. _log.debug("ExplicitLinkPattern: %s", m.groups())
  99. is_fork = m.group(2)
  100. user = m.group(3)
  101. namespace = m.group(4)
  102. repo = m.group(5)
  103. idx = m.group(6)
  104. text = "%s#%s" % (repo, idx)
  105. if not is_fork and user:
  106. namespace = user
  107. user = None
  108. if namespace:
  109. namespace = namespace.rstrip("/")
  110. text = "%s/%s" % (namespace, text)
  111. if user:
  112. user = user.rstrip("/")
  113. text = "%s/%s" % (user.rstrip("/"), text)
  114. try:
  115. idx = int(idx)
  116. except (ValueError, TypeError):
  117. return text
  118. issue = _issue_exists(user, namespace, repo, idx)
  119. if issue:
  120. return _obj_anchor_tag(user, namespace, repo, issue, text)
  121. request = _pr_exists(user, namespace, repo, idx)
  122. if request:
  123. return _obj_anchor_tag(user, namespace, repo, request, text)
  124. return text
  125. class CommitLinkPattern(markdown.inlinepatterns.Pattern):
  126. """Commit link pattern."""
  127. def handleMatch(self, m):
  128. """When the pattern matches, update the text."""
  129. _log.debug("CommitLinkPattern: %s", m.groups())
  130. is_fork = m.group(2)
  131. user = m.group(3)
  132. namespace = m.group(4)
  133. repo = m.group(5)
  134. commitid = m.group(6)
  135. text = "%s#%s" % (repo, commitid)
  136. if not is_fork and user:
  137. namespace = user
  138. user = None
  139. if namespace:
  140. namespace = namespace.rstrip("/")
  141. text = "%s/%s" % (namespace, text)
  142. if user:
  143. user = user.rstrip("/")
  144. text = "%s/%s" % (user.rstrip("/"), text)
  145. if pagure.lib.query.search_projects(
  146. flask.g.session,
  147. username=user,
  148. fork=is_fork,
  149. namespace=namespace,
  150. pattern=repo,
  151. ):
  152. return _obj_anchor_tag(user, namespace, repo, commitid, text)
  153. return text
  154. class ImplicitIssuePreprocessor(markdown.preprocessors.Preprocessor):
  155. """
  156. Preprocessor which handles lines starting with an implicit
  157. link. We have to modify these so that markdown doesn't interpret
  158. them as headers.
  159. """
  160. def run(self, lines):
  161. """
  162. If a line starts with an implicit issue link like #152,
  163. we replace the # with PREPROCIMPLLINK. This prevents markdown
  164. parsing the line as a header. ImplicitIssuePattern will catch
  165. and parse the text later. Otherwise, we change nothing.
  166. """
  167. _log.debug("ImplicitIssuePreprocessor")
  168. # match a # character, then any number of digits
  169. regex = re.compile(r"#([0-9]+)")
  170. new_lines = []
  171. for line in lines:
  172. # avoid calling the regex if line doesn't start with #
  173. if line.startswith("#"):
  174. match = regex.match(line)
  175. if match:
  176. idx = int(match.group(1))
  177. # we have to check if this is a real issue or PR now.
  178. # we can't just 'tag' the text somehow and leave it to
  179. # the pattern to check, as if it's *not* one we want
  180. # the line treated as a header, so we need the block
  181. # processor to see it unmodified.
  182. try:
  183. namespace, repo, user = _get_ns_repo_user()
  184. except RuntimeError:
  185. # non-match path, keep original line
  186. new_lines.append(line)
  187. continue
  188. if _issue_exists(user, namespace, repo, idx) or _pr_exists(
  189. user, namespace, repo, idx
  190. ):
  191. # tweak the text
  192. new_lines.append("PREPROCIMPLLINK" + line[1:])
  193. continue
  194. # this is a non-match path, keep original line
  195. new_lines.append(line)
  196. continue
  197. return new_lines
  198. class ImplicitIssuePattern(markdown.inlinepatterns.Pattern):
  199. """Implicit issue pattern."""
  200. def handleMatch(self, m):
  201. """When the pattern matches, update the text."""
  202. _log.debug("ImplicitIssuePattern: %s", m.groups())
  203. idx = markdown.util.AtomicString(m.group(2))
  204. text = "#%s" % idx
  205. try:
  206. idx = int(idx)
  207. except (ValueError, TypeError):
  208. _log.debug("Invalid integer for %s, bailing", idx)
  209. return text
  210. try:
  211. namespace, repo, user = _get_ns_repo_user()
  212. except RuntimeError:
  213. _log.debug("No repo found associated with this context, bailing")
  214. return text
  215. _log.debug(
  216. "Checking ns: %s, name: %s, user:%s for id: %s",
  217. namespace,
  218. repo,
  219. user,
  220. idx,
  221. )
  222. issue = _issue_exists(user, namespace, repo, idx)
  223. if issue:
  224. _log.debug("Linking to an issue")
  225. return _obj_anchor_tag(user, namespace, repo, issue, text)
  226. request = _pr_exists(user, namespace, repo, idx)
  227. if request:
  228. _log.debug("Linking to an PR")
  229. return _obj_anchor_tag(user, namespace, repo, request, text)
  230. _log.debug("Bailing, return text as is")
  231. return text
  232. class ImplicitPRPattern(markdown.inlinepatterns.Pattern):
  233. """Implicit pull-request pattern."""
  234. def handleMatch(self, m):
  235. """When the pattern matches, update the text."""
  236. _log.debug("ImplicitPRPattern: %s", m.groups())
  237. idx = markdown.util.AtomicString(m.group(2))
  238. text = "PR#%s" % idx
  239. try:
  240. idx = int(idx)
  241. except (ValueError, TypeError):
  242. return text
  243. try:
  244. namespace, repo, user = _get_ns_repo_user()
  245. except RuntimeError:
  246. return text
  247. issue = _issue_exists(user, namespace, repo, idx)
  248. if issue:
  249. return _obj_anchor_tag(user, namespace, repo, issue, text)
  250. request = _pr_exists(user, namespace, repo, idx)
  251. if request:
  252. return _obj_anchor_tag(user, namespace, repo, request, text)
  253. return text
  254. class ImplicitCommitPattern(markdown.inlinepatterns.Pattern):
  255. """Implicit commit pattern."""
  256. def handleMatch(self, m):
  257. """When the pattern matches, update the text."""
  258. _log.debug("ImplicitCommitPattern: %s", m.groups())
  259. githash = markdown.util.AtomicString(m.group(2))
  260. text = "%s" % githash
  261. try:
  262. namespace, repo, user = _get_ns_repo_user()
  263. except RuntimeError:
  264. return text
  265. if pagure.lib.query.search_projects(
  266. flask.g.session, username=user, namespace=namespace, pattern=repo
  267. ) and _commit_exists(user, namespace, repo, githash):
  268. return _obj_anchor_tag(user, namespace, repo, githash, text[:7])
  269. return text
  270. class StrikeThroughPattern(markdown.inlinepatterns.Pattern):
  271. """~~striked~~ pattern class."""
  272. def handleMatch(self, m):
  273. """When the pattern matches, update the text."""
  274. _log.debug("StrikeThroughPattern: %s", m.groups())
  275. text = markdown.util.AtomicString(m.group(2))
  276. element = markdown.util.etree.Element("del")
  277. element.text = text
  278. return element
  279. class AutolinkPattern2(markdown.inlinepatterns.Pattern):
  280. """Return a link Element given an autolink (`<http://example/com>`)."""
  281. def handleMatch(self, m):
  282. """When the pattern matches, update the text.
  283. :arg m: the matched object
  284. """
  285. _log.debug("AutolinkPattern2: %s", m.groups())
  286. url = m.group(2)
  287. if url.startswith("<"):
  288. url = url[1:]
  289. if url.endswith(">"):
  290. url = url[:-1]
  291. el = markdown.util.etree.Element("a")
  292. el.set("href", self.unescape(url))
  293. el.text = markdown.util.AtomicString(url)
  294. return el
  295. class ImagePatternLazyLoad(ImagePattern):
  296. """Customize the image element matched for lazyloading."""
  297. def handleMatch(self, m, *args):
  298. out = super(ImagePatternLazyLoad, self).handleMatch(m, *args)
  299. if MK_VERSION == 3:
  300. el = out[0]
  301. else:
  302. el = out
  303. # Add a noscript tag with the untouched img tag
  304. noscript = markdown.util.etree.Element("noscript")
  305. noscript.append(el)
  306. # Modify the origina img tag
  307. img = markdown.util.etree.Element("img")
  308. img.set("data-src", el.get("src"))
  309. img.set("src", "")
  310. img.set("alt", el.get("alt"))
  311. img.set("class", "lazyload")
  312. # Create a global span in which we add both the new img tag and the
  313. # noscript one
  314. outel = markdown.util.etree.Element("span")
  315. outel.append(img)
  316. outel.append(noscript)
  317. output = outel
  318. if MK_VERSION == 3:
  319. output = (outel, out[1], out[2])
  320. return output
  321. class EncapsulateMarkdownPostprocessor(markdown.postprocessors.Postprocessor):
  322. def run(self, text):
  323. return '<div class="markdown">' + text + "</div>"
  324. class PagureExtension(markdown.extensions.Extension):
  325. def extendMarkdown(self, md, *args):
  326. # First, make it so that bare links get automatically linkified.
  327. AUTOLINK_RE = "(%s)" % "|".join(
  328. [
  329. r"<((?:[Ff]|[Hh][Tt])[Tt][Pp][Ss]?://[^>]*)>",
  330. r"\b(?:[Ff]|[Hh][Tt])[Tt][Pp][Ss]?://[^)<>\s]+[^.,)<>\s]",
  331. r"<(Ii][Rr][Cc][Ss]?://[^>]*)>",
  332. r"\b[Ii][Rr][Cc][Ss]?://[^)<>\s]+[^.,)<>\s]",
  333. ]
  334. )
  335. def _old_mardkown_way():
  336. markdown.inlinepatterns.AUTOLINK_RE = AUTOLINK_RE
  337. md.preprocessors["implicit_issue"] = ImplicitIssuePreprocessor()
  338. md.inlinePatterns["mention"] = MentionPattern(MENTION_RE)
  339. # Customize the image linking to support lazy loading
  340. md.inlinePatterns["image_link"] = ImagePatternLazyLoad(
  341. markdown.inlinepatterns.IMAGE_LINK_RE, md
  342. )
  343. md.inlinePatterns["implicit_commit"] = ImplicitCommitPattern(
  344. IMPLICIT_COMMIT_RE
  345. )
  346. md.inlinePatterns["commit_links"] = CommitLinkPattern(
  347. COMMIT_LINK_RE
  348. )
  349. md.inlinePatterns["autolink"] = AutolinkPattern2(AUTOLINK_RE, md)
  350. if pagure_config.get("ENABLE_TICKETS", True):
  351. md.inlinePatterns["implicit_pr"] = ImplicitPRPattern(
  352. IMPLICIT_PR_RE
  353. )
  354. md.inlinePatterns["explicit_fork_issue"] = ExplicitLinkPattern(
  355. EXPLICIT_LINK_RE
  356. )
  357. md.inlinePatterns["implicit_issue"] = ImplicitIssuePattern(
  358. IMPLICIT_ISSUE_RE
  359. )
  360. md.inlinePatterns["striked"] = StrikeThroughPattern(
  361. STRIKE_THROUGH_RE
  362. )
  363. md.postprocessors[
  364. "encapsulate"
  365. ] = EncapsulateMarkdownPostprocessor()
  366. def _new_markdown_way():
  367. idx = md.inlinePatterns.get_index_for_name("autolink")
  368. md.inlinePatterns[idx].AUTOLINK_RE = AUTOLINK_RE
  369. # The number at the end is the priority, the highest priorities are
  370. # processed first.
  371. md.preprocessors.register(
  372. ImplicitIssuePreprocessor(), "implicit_issue", 100
  373. )
  374. md.inlinePatterns.register(
  375. MentionPattern(MENTION_RE), "mention", 95
  376. )
  377. # Customize the image linking to support lazy loading
  378. md.inlinePatterns.register(
  379. ImagePatternLazyLoad(
  380. markdown.inlinepatterns.IMAGE_LINK_RE, md
  381. ),
  382. "image_link",
  383. 90,
  384. )
  385. md.inlinePatterns.register(
  386. ImplicitCommitPattern(IMPLICIT_COMMIT_RE),
  387. "implicit_commit",
  388. 85,
  389. )
  390. md.inlinePatterns.register(
  391. CommitLinkPattern(COMMIT_LINK_RE), "autolink2", 80
  392. )
  393. md.inlinePatterns.register(
  394. AutolinkPattern2(AUTOLINK_RE, md), "commit_links", 75
  395. )
  396. if pagure_config.get("ENABLE_TICKETS", True):
  397. md.inlinePatterns.register(
  398. ImplicitPRPattern(IMPLICIT_PR_RE), "implicit_pr", 70
  399. )
  400. md.inlinePatterns.register(
  401. ExplicitLinkPattern(EXPLICIT_LINK_RE),
  402. "explicit_fork_issue",
  403. 65,
  404. )
  405. md.inlinePatterns.register(
  406. ImplicitIssuePattern(IMPLICIT_ISSUE_RE),
  407. "implicit_issue",
  408. 60,
  409. )
  410. md.inlinePatterns.register(
  411. StrikeThroughPattern(STRIKE_THROUGH_RE), "striked", 50
  412. )
  413. md.postprocessors.register(
  414. EncapsulateMarkdownPostprocessor(), "encapsulate", 100
  415. )
  416. if hasattr(md.inlinePatterns, "get_index_for_name"):
  417. _new_markdown_way()
  418. else:
  419. _old_mardkown_way()
  420. md.registerExtension(self)
  421. def makeExtension(*arg, **kwargs):
  422. return PagureExtension(**kwargs)
  423. def _issue_exists(user, namespace, repo, idx):
  424. """Utility method checking if a given issue exists."""
  425. repo_obj = pagure.lib.query.get_authorized_project(
  426. flask.g.session, project_name=repo, user=user, namespace=namespace
  427. )
  428. if not repo_obj:
  429. return False
  430. issue_obj = pagure.lib.query.search_issues(
  431. flask.g.session, repo=repo_obj, issueid=idx
  432. )
  433. if not issue_obj:
  434. return False
  435. return issue_obj
  436. def _pr_exists(user, namespace, repo, idx):
  437. """Utility method checking if a given PR exists."""
  438. repo_obj = pagure.lib.query.get_authorized_project(
  439. flask.g.session, project_name=repo, user=user, namespace=namespace
  440. )
  441. if not repo_obj:
  442. return False
  443. pr_obj = pagure.lib.query.search_pull_requests(
  444. flask.g.session, project_id=repo_obj.id, requestid=idx
  445. )
  446. if not pr_obj:
  447. return False
  448. return pr_obj
  449. def _commit_exists(user, namespace, repo, githash):
  450. """Utility method checking if a given commit exists."""
  451. repo_obj = pagure.lib.query.get_authorized_project(
  452. flask.g.session, project_name=repo, user=user, namespace=namespace
  453. )
  454. if not repo_obj:
  455. return False
  456. reponame = pagure.utils.get_repo_path(repo_obj)
  457. git_repo = pygit2.Repository(reponame)
  458. return githash in git_repo
  459. def _obj_anchor_tag(user, namespace, repo, obj, text):
  460. """
  461. Utility method generating the link to an issue or a PR.
  462. :return: An element tree containing the href to the issue or PR
  463. :rtype: xml.etree.ElementTree.Element
  464. """
  465. if isinstance(obj, six.string_types):
  466. url = flask.url_for(
  467. "ui_ns.view_commit",
  468. username=user,
  469. namespace=namespace,
  470. repo=repo,
  471. commitid=obj,
  472. )
  473. title = "Commit %s" % obj
  474. elif obj.isa == "issue":
  475. url = flask.url_for(
  476. "ui_ns.view_issue",
  477. username=user,
  478. namespace=namespace,
  479. repo=repo,
  480. issueid=obj.id,
  481. )
  482. if obj.private:
  483. title = "Private issue"
  484. else:
  485. if obj.status:
  486. title = "[%s] %s" % (obj.status, obj.title)
  487. else:
  488. title = obj.title
  489. else:
  490. url = flask.url_for(
  491. "ui_ns.request_pull",
  492. username=user,
  493. namespace=namespace,
  494. repo=repo,
  495. requestid=obj.id,
  496. )
  497. if obj.status:
  498. title = "[%s] %s" % (obj.status, obj.title)
  499. else:
  500. title = obj.title
  501. element = markdown.util.etree.Element("a")
  502. element.set("href", url)
  503. element.set("title", title)
  504. element.text = text
  505. return element
  506. def _get_ns_repo_user():
  507. """Return the namespace, repo, user corresponding to the given request
  508. :return: A tuple of three string corresponding to namespace, repo, user
  509. :rtype: tuple(str, str, str)
  510. """
  511. root = flask.request.url_root
  512. url = flask.request.url
  513. user = flask.request.args.get("user") or None
  514. namespace = flask.request.args.get("namespace") or None
  515. repo = flask.request.args.get("repo") or None
  516. if not user and not repo:
  517. _log.debug("Extracting repo info from url: %s", url)
  518. if "fork/" in url:
  519. user, ext = url.split("fork/")[1].split("/", 1)
  520. else:
  521. ext = url.split(root)[1]
  522. if ext.count("/") >= 3:
  523. namespace, repo = ext.split("/", 2)[:2]
  524. else:
  525. repo = ext.split("/", 1)[0]
  526. return (namespace, repo, user)