pfmarkdown.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546
  1. # This program is free software; you can redistribute it and/or
  2. # modify it under the terms of the GNU General Public License
  3. # as published by the Free Software Foundation; either version 2
  4. # of the License, or (at your option) any later version.
  5. #
  6. # This program is distributed in the hope that it will be useful,
  7. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  9. # GNU General Public License for more details.
  10. #
  11. # You should have received a copy of the GNU General Public License
  12. # along with this program; if not, write to the Free Software
  13. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
  14. # USA.
  15. """ Pagure-flavored Markdown
  16. Author: Ralph Bean <rbean@redhat.com>
  17. Pierre-Yves Chibon <pingou@pingoured.fr>
  18. """
  19. from __future__ import unicode_literals, absolute_import
  20. import flask
  21. import markdown.inlinepatterns
  22. import markdown.preprocessors
  23. import markdown.util
  24. import pygit2
  25. import re
  26. import six
  27. import pagure.lib.query
  28. from pagure.config import config as pagure_config
  29. try:
  30. from markdown.inlinepatterns import ImagePattern as ImagePattern
  31. MK_VERSION = 2
  32. except ImportError:
  33. from markdown.inlinepatterns import ImageInlineProcessor as ImagePattern
  34. MK_VERSION = 3
  35. # the (?<!\w) (and variants) we use a lot in all these regexes is a
  36. # negative lookbehind assertion. It means 'match when the preceding
  37. # character is not in the \w class'. This stops us from starting a
  38. # match in the middle of a word (e.g. someone@something in the
  39. # MENTION_RE regex). Note that it is a zero-length match - it does
  40. # not capture or consume any of the string - and it does not appear
  41. # as a group for the match object.
  42. MENTION_RE = r"(?<!\w)@(\w+)"
  43. # Each line below correspond to a line of the regex:
  44. # 1) Don't start matching in the middle of a word
  45. # 2) See if there is a `forks/` at the start
  46. # 3) See if we have a `user/`
  47. # 4) See if we have a `namespace/`
  48. # 5) Get the last part `project`
  49. # 6) Get the identifier `#<id>`
  50. EXPLICIT_LINK_RE = (
  51. r"(?<!\w)"
  52. r"(fork[s]?/)?"
  53. r"([a-zA-Z0-9_-]*?/)?"
  54. r"([a-zA-Z0-9_-]*?/)?"
  55. r"([a-zA-Z0-9_-]+)"
  56. r"#(?P<id>[0-9]+)"
  57. )
  58. COMMIT_LINK_RE = (
  59. r"(?<!\w)"
  60. r"(fork[s]?/)?"
  61. r"([a-zA-Z0-9_-]*?/)?"
  62. r"([a-zA-Z0-9_-]*?/)?"
  63. r"([a-zA-Z0-9_-]+)"
  64. r"#(?P<id>[\w]{40})"
  65. )
  66. # PREPROCIMPLLINK is used by ImplicitIssuePreprocessor to replace the
  67. # '#' when a line starts with an implicit issue link, to prevent
  68. # markdown parsing it as a header; we have to handle it here
  69. IMPLICIT_ISSUE_RE = r"(?<!\w)(?:PREPROCIMPLLINK|#)([0-9]+)"
  70. IMPLICIT_PR_RE = r"(?<!\w)PR#([0-9]+)"
  71. IMPLICIT_COMMIT_RE = r"(?<![<\w#])([a-f0-9]{7,40})"
  72. STRIKE_THROUGH_RE = r"~~(.*?)~~"
  73. class MentionPattern(markdown.inlinepatterns.Pattern):
  74. """ @user pattern class. """
  75. def handleMatch(self, m):
  76. """ When the pattern matches, update the text. """
  77. name = markdown.util.AtomicString(m.group(2))
  78. text = "@%s" % name
  79. user = pagure.lib.query.search_user(flask.g.session, username=name)
  80. if not user:
  81. return text
  82. element = markdown.util.etree.Element("a")
  83. base_url = pagure_config["APP_URL"]
  84. if base_url.endswith("/"):
  85. base_url = base_url[:-1]
  86. url = "%s/user/%s" % (base_url, user.username)
  87. element.set("href", url)
  88. element.text = text
  89. return element
  90. class ExplicitLinkPattern(markdown.inlinepatterns.Pattern):
  91. """ Explicit link pattern. """
  92. def handleMatch(self, m):
  93. """ When the pattern matches, update the text. """
  94. is_fork = m.group(2)
  95. user = m.group(3)
  96. namespace = m.group(4)
  97. repo = m.group(5)
  98. idx = m.group(6)
  99. text = "%s#%s" % (repo, idx)
  100. if not is_fork and user:
  101. namespace = user
  102. user = None
  103. if namespace:
  104. namespace = namespace.rstrip("/")
  105. text = "%s/%s" % (namespace, text)
  106. if user:
  107. user = user.rstrip("/")
  108. text = "%s/%s" % (user.rstrip("/"), text)
  109. try:
  110. idx = int(idx)
  111. except (ValueError, TypeError):
  112. return text
  113. issue = _issue_exists(user, namespace, repo, idx)
  114. if issue:
  115. return _obj_anchor_tag(user, namespace, repo, issue, text)
  116. request = _pr_exists(user, namespace, repo, idx)
  117. if request:
  118. return _obj_anchor_tag(user, namespace, repo, request, text)
  119. return text
  120. class CommitLinkPattern(markdown.inlinepatterns.Pattern):
  121. """ Commit link pattern. """
  122. def handleMatch(self, m):
  123. """ When the pattern matches, update the text. """
  124. is_fork = m.group(2)
  125. user = m.group(3)
  126. namespace = m.group(4)
  127. repo = m.group(5)
  128. commitid = m.group(6)
  129. text = "%s#%s" % (repo, commitid)
  130. if not is_fork and user:
  131. namespace = user
  132. user = None
  133. if namespace:
  134. namespace = namespace.rstrip("/")
  135. text = "%s/%s" % (namespace, text)
  136. if user:
  137. user = user.rstrip("/")
  138. text = "%s/%s" % (user.rstrip("/"), text)
  139. if pagure.lib.query.search_projects(
  140. flask.g.session,
  141. username=user,
  142. fork=is_fork,
  143. namespace=namespace,
  144. pattern=repo,
  145. ):
  146. return _obj_anchor_tag(user, namespace, repo, commitid, text)
  147. return text
  148. class ImplicitIssuePreprocessor(markdown.preprocessors.Preprocessor):
  149. """
  150. Preprocessor which handles lines starting with an implicit
  151. link. We have to modify these so that markdown doesn't interpret
  152. them as headers.
  153. """
  154. def run(self, lines):
  155. """
  156. If a line starts with an implicit issue link like #152,
  157. we replace the # with PREPROCIMPLLINK. This prevents markdown
  158. parsing the line as a header. ImplicitIssuePattern will catch
  159. and parse the text later. Otherwise, we change nothing.
  160. """
  161. # match a # character, then any number of digits
  162. regex = re.compile(r"#([0-9]+)")
  163. new_lines = []
  164. for line in lines:
  165. # avoid calling the regex if line doesn't start with #
  166. if line.startswith("#"):
  167. match = regex.match(line)
  168. if match:
  169. idx = int(match.group(1))
  170. # we have to check if this is a real issue or PR now.
  171. # we can't just 'tag' the text somehow and leave it to
  172. # the pattern to check, as if it's *not* one we want
  173. # the line treated as a header, so we need the block
  174. # processor to see it unmodified.
  175. try:
  176. namespace, repo, user = _get_ns_repo_user()
  177. except RuntimeError:
  178. # non-match path, keep original line
  179. new_lines.append(line)
  180. continue
  181. if _issue_exists(user, namespace, repo, idx) or _pr_exists(
  182. user, namespace, repo, idx
  183. ):
  184. # tweak the text
  185. new_lines.append("PREPROCIMPLLINK" + line[1:])
  186. continue
  187. # this is a non-match path, keep original line
  188. new_lines.append(line)
  189. continue
  190. return new_lines
  191. class ImplicitIssuePattern(markdown.inlinepatterns.Pattern):
  192. """ Implicit issue pattern. """
  193. def handleMatch(self, m):
  194. """ When the pattern matches, update the text. """
  195. idx = markdown.util.AtomicString(m.group(2))
  196. text = "#%s" % idx
  197. try:
  198. idx = int(idx)
  199. except (ValueError, TypeError):
  200. return text
  201. try:
  202. namespace, repo, user = _get_ns_repo_user()
  203. except RuntimeError:
  204. return text
  205. issue = _issue_exists(user, namespace, repo, idx)
  206. if issue:
  207. return _obj_anchor_tag(user, namespace, repo, issue, text)
  208. request = _pr_exists(user, namespace, repo, idx)
  209. if request:
  210. return _obj_anchor_tag(user, namespace, repo, request, text)
  211. return text
  212. class ImplicitPRPattern(markdown.inlinepatterns.Pattern):
  213. """ Implicit pull-request pattern. """
  214. def handleMatch(self, m):
  215. """ When the pattern matches, update the text. """
  216. idx = markdown.util.AtomicString(m.group(2))
  217. text = "PR#%s" % idx
  218. try:
  219. idx = int(idx)
  220. except (ValueError, TypeError):
  221. return text
  222. try:
  223. namespace, repo, user = _get_ns_repo_user()
  224. except RuntimeError:
  225. return text
  226. issue = _issue_exists(user, namespace, repo, idx)
  227. if issue:
  228. return _obj_anchor_tag(user, namespace, repo, issue, text)
  229. request = _pr_exists(user, namespace, repo, idx)
  230. if request:
  231. return _obj_anchor_tag(user, namespace, repo, request, text)
  232. return text
  233. class ImplicitCommitPattern(markdown.inlinepatterns.Pattern):
  234. """ Implicit commit pattern. """
  235. def handleMatch(self, m):
  236. """ When the pattern matches, update the text. """
  237. githash = markdown.util.AtomicString(m.group(2))
  238. text = "%s" % githash
  239. try:
  240. namespace, repo, user = _get_ns_repo_user()
  241. except RuntimeError:
  242. return text
  243. if pagure.lib.query.search_projects(
  244. flask.g.session, username=user, namespace=namespace, pattern=repo
  245. ) and _commit_exists(user, namespace, repo, githash):
  246. return _obj_anchor_tag(user, namespace, repo, githash, text[:7])
  247. return text
  248. class StrikeThroughPattern(markdown.inlinepatterns.Pattern):
  249. """ ~~striked~~ pattern class. """
  250. def handleMatch(self, m):
  251. """ When the pattern matches, update the text. """
  252. text = markdown.util.AtomicString(m.group(2))
  253. element = markdown.util.etree.Element("del")
  254. element.text = text
  255. return element
  256. class AutolinkPattern2(markdown.inlinepatterns.Pattern):
  257. """ Return a link Element given an autolink (`<http://example/com>`). """
  258. def handleMatch(self, m):
  259. """ When the pattern matches, update the text.
  260. :arg m: the matched object
  261. """
  262. url = m.group(2)
  263. if url.startswith("<"):
  264. url = url[1:]
  265. if url.endswith(">"):
  266. url = url[:-1]
  267. el = markdown.util.etree.Element("a")
  268. el.set("href", self.unescape(url))
  269. el.text = markdown.util.AtomicString(url)
  270. return el
  271. class ImagePatternLazyLoad(ImagePattern):
  272. """ Customize the image element matched for lazyloading. """
  273. def handleMatch(self, m, *args):
  274. out = super(ImagePatternLazyLoad, self).handleMatch(m, *args)
  275. if MK_VERSION == 3:
  276. el = out[0]
  277. else:
  278. el = out
  279. # Add a noscript tag with the untouched img tag
  280. noscript = markdown.util.etree.Element("noscript")
  281. noscript.append(el)
  282. # Modify the origina img tag
  283. img = markdown.util.etree.Element("img")
  284. img.set("data-src", el.get("src"))
  285. img.set("src", "")
  286. img.set("alt", el.get("alt"))
  287. img.set("class", "lazyload")
  288. # Create a global span in which we add both the new img tag and the
  289. # noscript one
  290. outel = markdown.util.etree.Element("span")
  291. outel.append(img)
  292. outel.append(noscript)
  293. output = outel
  294. if MK_VERSION == 3:
  295. output = (outel, out[1], out[2])
  296. return output
  297. class PagureExtension(markdown.extensions.Extension):
  298. def extendMarkdown(self, md, md_globals):
  299. # First, make it so that bare links get automatically linkified.
  300. AUTOLINK_RE = "(%s)" % "|".join(
  301. [
  302. r"<((?:[Ff]|[Hh][Tt])[Tt][Pp][Ss]?://[^>]*)>",
  303. r"\b(?:[Ff]|[Hh][Tt])[Tt][Pp][Ss]?://[^)<>\s]+[^.,)<>\s]",
  304. r"<(Ii][Rr][Cc][Ss]?://[^>]*)>",
  305. r"\b[Ii][Rr][Cc][Ss]?://[^)<>\s]+[^.,)<>\s]",
  306. ]
  307. )
  308. markdown.inlinepatterns.AUTOLINK_RE = AUTOLINK_RE
  309. md.preprocessors["implicit_issue"] = ImplicitIssuePreprocessor()
  310. md.inlinePatterns["mention"] = MentionPattern(MENTION_RE)
  311. # Customize the image linking to support lazy loading
  312. md.inlinePatterns["image_link"] = ImagePatternLazyLoad(
  313. markdown.inlinepatterns.IMAGE_LINK_RE, md
  314. )
  315. md.inlinePatterns["implicit_commit"] = ImplicitCommitPattern(
  316. IMPLICIT_COMMIT_RE
  317. )
  318. md.inlinePatterns["commit_links"] = CommitLinkPattern(COMMIT_LINK_RE)
  319. md.inlinePatterns["autolink"] = AutolinkPattern2(AUTOLINK_RE, md)
  320. if pagure_config.get("ENABLE_TICKETS", True):
  321. md.inlinePatterns["implicit_pr"] = ImplicitPRPattern(
  322. IMPLICIT_PR_RE
  323. )
  324. md.inlinePatterns["explicit_fork_issue"] = ExplicitLinkPattern(
  325. EXPLICIT_LINK_RE
  326. )
  327. md.inlinePatterns["implicit_issue"] = ImplicitIssuePattern(
  328. IMPLICIT_ISSUE_RE
  329. )
  330. md.inlinePatterns["striked"] = StrikeThroughPattern(STRIKE_THROUGH_RE)
  331. md.registerExtension(self)
  332. def makeExtension(*arg, **kwargs):
  333. return PagureExtension(**kwargs)
  334. def _issue_exists(user, namespace, repo, idx):
  335. """ Utility method checking if a given issue exists. """
  336. repo_obj = pagure.lib.query.get_authorized_project(
  337. flask.g.session, project_name=repo, user=user, namespace=namespace
  338. )
  339. if not repo_obj:
  340. return False
  341. issue_obj = pagure.lib.query.search_issues(
  342. flask.g.session, repo=repo_obj, issueid=idx
  343. )
  344. if not issue_obj:
  345. return False
  346. return issue_obj
  347. def _pr_exists(user, namespace, repo, idx):
  348. """ Utility method checking if a given PR exists. """
  349. repo_obj = pagure.lib.query.get_authorized_project(
  350. flask.g.session, project_name=repo, user=user, namespace=namespace
  351. )
  352. if not repo_obj:
  353. return False
  354. pr_obj = pagure.lib.query.search_pull_requests(
  355. flask.g.session, project_id=repo_obj.id, requestid=idx
  356. )
  357. if not pr_obj:
  358. return False
  359. return pr_obj
  360. def _commit_exists(user, namespace, repo, githash):
  361. """ Utility method checking if a given commit exists. """
  362. repo_obj = pagure.lib.query.get_authorized_project(
  363. flask.g.session, project_name=repo, user=user, namespace=namespace
  364. )
  365. if not repo_obj:
  366. return False
  367. reponame = pagure.utils.get_repo_path(repo_obj)
  368. git_repo = pygit2.Repository(reponame)
  369. return githash in git_repo
  370. def _obj_anchor_tag(user, namespace, repo, obj, text):
  371. """
  372. Utility method generating the link to an issue or a PR.
  373. :return: An element tree containing the href to the issue or PR
  374. :rtype: xml.etree.ElementTree.Element
  375. """
  376. if isinstance(obj, six.string_types):
  377. url = flask.url_for(
  378. "ui_ns.view_commit",
  379. username=user,
  380. namespace=namespace,
  381. repo=repo,
  382. commitid=obj,
  383. )
  384. title = "Commit %s" % obj
  385. elif obj.isa == "issue":
  386. url = flask.url_for(
  387. "ui_ns.view_issue",
  388. username=user,
  389. namespace=namespace,
  390. repo=repo,
  391. issueid=obj.id,
  392. )
  393. if obj.private:
  394. title = "Private issue"
  395. else:
  396. if obj.status:
  397. title = "[%s] %s" % (obj.status, obj.title)
  398. else:
  399. title = obj.title
  400. else:
  401. url = flask.url_for(
  402. "ui_ns.request_pull",
  403. username=user,
  404. namespace=namespace,
  405. repo=repo,
  406. requestid=obj.id,
  407. )
  408. if obj.status:
  409. title = "[%s] %s" % (obj.status, obj.title)
  410. else:
  411. title = obj.title
  412. element = markdown.util.etree.Element("a")
  413. element.set("href", url)
  414. element.set("title", title)
  415. element.text = text
  416. return element
  417. def _get_ns_repo_user():
  418. """ Return the namespace, repo, user corresponding to the given request
  419. :return: A tuple of three string corresponding to namespace, repo, user
  420. :rtype: tuple(str, str, str)
  421. """
  422. root = flask.request.url_root
  423. url = flask.request.url
  424. user = flask.request.args.get("user")
  425. namespace = flask.request.args.get("namespace")
  426. repo = flask.request.args.get("repo")
  427. if not user and not repo:
  428. if "fork/" in url:
  429. user, ext = url.split("fork/")[1].split("/", 1)
  430. else:
  431. ext = url.split(root)[1]
  432. if ext.count("/") >= 3:
  433. namespace, repo = ext.split("/", 2)[:2]
  434. else:
  435. repo = ext.split("/", 1)[0]
  436. return (namespace, repo, user)