clone.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450
  1. # -*- coding: utf-8 -*-
  2. """
  3. (c) 2014-2018 - Copyright Red Hat Inc
  4. Authors:
  5. Patrick Uiterwijk <puiterwijk@redhat.com>
  6. """
  7. from __future__ import absolute_import, unicode_literals
  8. import base64
  9. import logging
  10. import os
  11. import subprocess
  12. import tempfile
  13. import flask
  14. import requests
  15. import werkzeug.wsgi
  16. import pagure.exceptions
  17. import pagure.forms
  18. import pagure.lib.git
  19. import pagure.lib.mimetype
  20. import pagure.lib.plugins
  21. import pagure.lib.query
  22. import pagure.lib.tasks
  23. import pagure.ui.plugins
  24. from pagure.config import config as pagure_config
  25. from pagure.ui import UI_NS
  26. _log = logging.getLogger(__name__)
  27. _auth_log = logging.getLogger("pagure_auth")
  28. def _get_remote_user(project):
  29. """Returns the remote user using either the content of
  30. ``flask.g.remote_user`` or checking the headers for ``Authorization``
  31. and check if the provided API token is valid.
  32. """
  33. remote_user = flask.request.remote_user
  34. if not remote_user:
  35. # Check the headers
  36. if "Authorization" in flask.request.headers:
  37. auth = flask.request.headers["Authorization"]
  38. if "Basic" in auth:
  39. auth_token = auth.split("Basic ", 1)[-1]
  40. info = base64.b64decode(auth_token).decode("utf-8")
  41. if ":" in info:
  42. username, token_str = info.split(":")
  43. auth = pagure_config.get("PAGURE_AUTH", None)
  44. if auth == "local":
  45. import pagure.lib.login
  46. try:
  47. pagure.lib.login.check_username_and_password(
  48. flask.g.session, username, token_str
  49. )
  50. except pagure.exceptions.PagureException as ex:
  51. _log.exception(ex)
  52. else:
  53. remote_user = username
  54. # We're doing a second check here, if the user/password
  55. # approach above didn't work, the user may still be
  56. # using an API token, so we want to check that as well.
  57. if not remote_user:
  58. import pagure.lib.query
  59. token = pagure.lib.query.get_api_token(
  60. flask.g.session, token_str
  61. )
  62. if token:
  63. if (
  64. not token.expired
  65. and username == token.user.username
  66. and "commit" in token.acls_list
  67. ):
  68. if (
  69. project
  70. and token.project
  71. and token.project.fullname
  72. != project.fullname
  73. ):
  74. return remote_user
  75. flask.g.authenticated = True
  76. remote_user = token.user.username
  77. return remote_user
  78. def proxy_raw_git(project):
  79. """Proxy a request to Git or gitolite3 via a subprocess.
  80. This should get called after it is determined the requested project
  81. is not on repoSpanner.
  82. """
  83. _log.debug("Raw git clone proxy started")
  84. remote_user = _get_remote_user(project)
  85. # We are going to shell out to gitolite-shell. Prepare the env it needs.
  86. gitenv = {
  87. "PATH": os.environ["PATH"],
  88. # These are the vars git-http-backend needs
  89. "PATH_INFO": flask.request.path,
  90. "REMOTE_USER": remote_user,
  91. "USER": remote_user,
  92. "REMOTE_ADDR": flask.request.remote_addr,
  93. "CONTENT_TYPE": flask.request.content_type,
  94. "QUERY_STRING": flask.request.query_string,
  95. "REQUEST_METHOD": flask.request.method,
  96. "GIT_PROJECT_ROOT": pagure_config["GIT_FOLDER"],
  97. # We perform access checks, so can bypass that of Git
  98. "GIT_HTTP_EXPORT_ALL": "true",
  99. # This might be needed by hooks
  100. "PAGURE_CONFIG": os.environ.get("PAGURE_CONFIG"),
  101. "PYTHONPATH": os.environ.get("PYTHONPATH"),
  102. # Some HTTP headers that we want to pass through because they
  103. # impact the request/response. Only add headers here that are
  104. # "safe", as in they don't allow for other issues.
  105. "HTTP_CONTENT_ENCODING": flask.request.content_encoding,
  106. }
  107. _auth_log.info(
  108. "Serving git to |user: %s|IP: %s|method: %s|repo: %s|query: %s"
  109. % (
  110. remote_user,
  111. flask.request.remote_addr,
  112. flask.request.method,
  113. project.path,
  114. flask.request.query_string,
  115. )
  116. )
  117. gitolite = pagure_config["HTTP_REPO_ACCESS_GITOLITE"]
  118. if gitolite:
  119. gitenv.update(
  120. {
  121. # These are the additional vars gitolite needs
  122. # Fun fact: REQUEST_URI is not even mentioned in RFC3875
  123. "REQUEST_URI": flask.request.full_path,
  124. "GITOLITE_HTTP_HOME": pagure_config["GITOLITE_HOME"],
  125. "HOME": pagure_config["GITOLITE_HOME"],
  126. }
  127. )
  128. elif remote_user:
  129. gitenv.update({"GL_USER": remote_user})
  130. # These keys are optional
  131. for key in (
  132. "REMOTE_USER",
  133. "USER",
  134. "REMOTE_ADDR",
  135. "CONTENT_TYPE",
  136. "QUERY_STRING",
  137. "PYTHONPATH",
  138. "PATH",
  139. "HTTP_CONTENT_ENCODING",
  140. ):
  141. if not gitenv[key]:
  142. del gitenv[key]
  143. for key in gitenv:
  144. if not gitenv[key]:
  145. raise ValueError("Value for key %s unknown" % key)
  146. if gitolite:
  147. _log.debug("Running git via: %s", gitolite)
  148. cmd = [gitolite]
  149. else:
  150. _log.debug("Running git via git directly")
  151. cmd = ["/usr/bin/git", "http-backend"]
  152. # Note: using a temporary files to buffer the input contents
  153. # is non-ideal, but it is a way to make sure we don't need to have
  154. # the full input (which can be very long) in memory.
  155. # Ideally, we'd directly stream, but that's an RFE for the future,
  156. # since that needs to happen in other threads so as to not block.
  157. # (See the warnings in the subprocess module)
  158. with tempfile.SpooledTemporaryFile() as infile:
  159. while True:
  160. block = flask.request.stream.read(4096)
  161. if not block:
  162. break
  163. infile.write(block)
  164. infile.seek(0)
  165. _log.debug("Calling: %s", cmd)
  166. proc = subprocess.Popen(
  167. cmd, stdin=infile, stdout=subprocess.PIPE, stderr=None, env=gitenv
  168. )
  169. out = proc.stdout
  170. # First, gather the response head
  171. headers = {}
  172. while True:
  173. line = out.readline()
  174. if not line:
  175. raise Exception("End of file while reading headers?")
  176. # This strips the \n, meaning end-of-headers
  177. line = line.strip()
  178. if not line:
  179. break
  180. header = line.split(b": ", 1)
  181. header[0] = header[0].decode("utf-8")
  182. headers[str(header[0].lower())] = header[1]
  183. if len(headers) == 0:
  184. raise Exception("No response at all received")
  185. if "status" not in headers:
  186. # If no status provided, assume 200 OK as per RFC3875
  187. headers[str("status")] = "200 OK"
  188. respcode, respmsg = headers.pop("status").split(" ", 1)
  189. wrapout = werkzeug.wsgi.wrap_file(flask.request.environ, out)
  190. return flask.Response(
  191. wrapout,
  192. status=int(respcode),
  193. headers=headers,
  194. direct_passthrough=True,
  195. )
  196. def proxy_repospanner(project, service):
  197. """Proxy a request to repoSpanner.
  198. Args:
  199. project (model.Project): The project being accessed
  200. service (String): The service as indicated by ?Service= in /info/refs
  201. """
  202. oper = os.path.basename(flask.request.path)
  203. if oper == "refs":
  204. oper = "info/refs?service=%s" % service
  205. regionurl, regioninfo = project.repospanner_repo_info("main")
  206. url = "%s/%s" % (regionurl, oper)
  207. # Older flask/werkzeug versions don't support both an input and output
  208. # stream: this results in a blank upload.
  209. # So, we optimize for the direction the majority of the data will likely
  210. # flow.
  211. streamargs = {}
  212. if service == "git-receive-pack":
  213. # This is a Push operation, optimize for data from the client
  214. streamargs["data"] = flask.request.stream
  215. streamargs["stream"] = False
  216. else:
  217. # This is a Pull operation, optimize for data from the server
  218. streamargs["data"] = flask.request.data
  219. streamargs["stream"] = True
  220. resp = requests.request(
  221. flask.request.method,
  222. url,
  223. verify=regioninfo["ca"],
  224. cert=(regioninfo["push_cert"]["cert"], regioninfo["push_cert"]["key"]),
  225. headers={
  226. "Content-Encoding": flask.request.content_encoding,
  227. "Content-Type": flask.request.content_type,
  228. "X-Extra-Username": flask.request.remote_user,
  229. "X-Extra-Repotype": "main",
  230. "X-Extra-project_name": project.name,
  231. "x-Extra-project_user": project.user if project.is_fork else "",
  232. "X-Extra-project_namespace": project.namespace,
  233. },
  234. **streamargs
  235. )
  236. # Strip out any headers that cause problems
  237. for name in ("transfer-encoding",):
  238. if name in resp.headers:
  239. del resp.headers[name]
  240. return flask.Response(
  241. resp.iter_content(chunk_size=128),
  242. status=resp.status_code,
  243. headers=dict(resp.headers),
  244. direct_passthrough=True,
  245. )
  246. def clone_proxy(project, username=None, namespace=None):
  247. """Proxy the /info/refs endpoint for HTTP pull/push.
  248. Note that for the clone endpoints, it's very explicit that <repo> has been
  249. renamed to <project>, to avoid the automatic repo searching from flask_app.
  250. This means that we have a chance to trust REMOTE_USER to verify the users'
  251. access to the attempted repository.
  252. """
  253. if not pagure_config["ALLOW_HTTP_PULL_PUSH"]:
  254. _auth_log.info(
  255. "User tried to access the git repo via http but this is not "
  256. "enabled -- |user: N/A|IP: %s|method: %s|repo: %s|query: %s"
  257. % (
  258. flask.request.remote_addr,
  259. flask.request.method,
  260. project,
  261. flask.request.query_string,
  262. )
  263. )
  264. flask.abort(403, description="HTTP pull/push is not allowed")
  265. service = None
  266. # name it p1 so there is no risk of variable shadowing, we do not want
  267. # this to be used elsewhere since there is no check here if the user
  268. # is allowed to access this project (this is done lower down)
  269. p1 = pagure.lib.query.get_authorized_project(
  270. flask.g.session, project, user=username, namespace=namespace
  271. )
  272. p1_path = "invalid repo: %s/%s/%s" % (username, namespace, project)
  273. if p1:
  274. p1_path = p1.path
  275. remote_user = _get_remote_user(p1)
  276. if flask.request.path.endswith("/info/refs"):
  277. service = flask.request.args.get("service")
  278. if not service:
  279. # This is a Git client older than 1.6.6, and it doesn't work with
  280. # the smart protocol. We do not support the old protocol via HTTP.
  281. _auth_log.info(
  282. "User is using a git client to old (pre-1.6.6) -- "
  283. "|user: %s|IP: %s|method: %s|repo: %s|query: %s"
  284. % (
  285. remote_user,
  286. flask.request.remote_addr,
  287. flask.request.method,
  288. p1_path,
  289. flask.request.query_string,
  290. )
  291. )
  292. flask.abort(400, description="Please switch to newer Git client")
  293. if service not in ("git-upload-pack", "git-receive-pack"):
  294. _auth_log.info(
  295. "User asked for an unknown service "
  296. "|user: %s|IP: %s|method: %s|repo: %s|query: %s"
  297. % (
  298. remote_user,
  299. flask.request.remote_addr,
  300. flask.request.method,
  301. p1_path,
  302. flask.request.query_string,
  303. )
  304. )
  305. flask.abort(400, description="Unknown service requested")
  306. if "git-receive-pack" in flask.request.full_path:
  307. if not pagure_config["ALLOW_HTTP_PUSH"]:
  308. _auth_log.info(
  309. "User tried a git push over http while this is not enabled -- "
  310. "|user: %s|IP: %s|method: %s|repo: %s|query: %s"
  311. % (
  312. remote_user,
  313. flask.request.remote_addr,
  314. flask.request.method,
  315. p1_path,
  316. flask.request.query_string,
  317. )
  318. )
  319. # Pushing (git-receive-pack) over HTTP is not allowed
  320. flask.abort(403, description="HTTP pushing disabled")
  321. if not remote_user:
  322. # Anonymous pushing... nope
  323. realm = "Pagure API token"
  324. if pagure_config.get("PAGURE_AUTH") == "local":
  325. realm = "Pagure password or API token"
  326. headers = {
  327. "WWW-Authenticate": 'Basic realm="%s"' % realm,
  328. "X-Frame-Options": "DENY",
  329. }
  330. _auth_log.info(
  331. "User tried a git push over http but was not authenticated -- "
  332. "|user: %s|IP: %s|method: %s|repo: %s|query: %s"
  333. % (
  334. remote_user,
  335. flask.request.remote_addr,
  336. flask.request.method,
  337. p1_path,
  338. flask.request.query_string,
  339. )
  340. )
  341. response = flask.Response(
  342. response="Authorization Required",
  343. status=401,
  344. headers=headers,
  345. content_type="text/plain",
  346. )
  347. flask.abort(response)
  348. project_obj = pagure.lib.query.get_authorized_project(
  349. flask.g.session,
  350. project,
  351. user=username,
  352. namespace=namespace,
  353. asuser=remote_user,
  354. )
  355. if not project_obj:
  356. _auth_log.info(
  357. "User asked to access a git repo that they are not allowed to "
  358. "access -- |user: %s|IP: %s|method: %s|repo: %s|query: %s"
  359. % (
  360. remote_user,
  361. flask.request.remote_addr,
  362. flask.request.method,
  363. p1_path,
  364. flask.request.query_string,
  365. )
  366. )
  367. _log.info(
  368. "%s could not find project: %s for user %s and namespace %s",
  369. remote_user,
  370. project,
  371. username,
  372. namespace,
  373. )
  374. flask.abort(404, description="Project not found")
  375. if project_obj.is_on_repospanner:
  376. return proxy_repospanner(project_obj, service)
  377. else:
  378. return proxy_raw_git(project_obj)
  379. def add_clone_proxy_cmds():
  380. """This function adds flask routes for all possible clone paths.
  381. This comes down to:
  382. /(fork/<username>/)(<namespace>/)<project>(.git)
  383. with an operation following, where operation is one of:
  384. - /info/refs (generic)
  385. - /git-upload-pack (pull)
  386. - /git-receive-pack (push)
  387. """
  388. for prefix in (
  389. "<project>",
  390. "<namespace>/<project>",
  391. "forks/<username>/<project>",
  392. "forks/<username>/<namespace>/<project>",
  393. ):
  394. for suffix in ("", ".git"):
  395. for oper in ("info/refs", "git-receive-pack", "git-upload-pack"):
  396. route = "/%s%s/%s" % (prefix, suffix, oper)
  397. methods = ("GET",) if oper == "info/refs" else ("POST",)
  398. UI_NS.add_url_rule(
  399. route, view_func=clone_proxy, methods=methods
  400. )