proxyagent.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340
  1. # Copyright 2019 The Matrix.org Foundation C.I.C.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import logging
  15. import re
  16. from typing import Any, Dict, Optional, Tuple
  17. from urllib.parse import urlparse
  18. from urllib.request import ( # type: ignore[attr-defined]
  19. getproxies_environment,
  20. proxy_bypass_environment,
  21. )
  22. from zope.interface import implementer
  23. from twisted.internet import defer
  24. from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS
  25. from twisted.internet.interfaces import IReactorCore, IStreamClientEndpoint
  26. from twisted.python.failure import Failure
  27. from twisted.web.client import (
  28. URI,
  29. BrowserLikePolicyForHTTPS,
  30. HTTPConnectionPool,
  31. _AgentBase,
  32. )
  33. from twisted.web.error import SchemeNotSupported
  34. from twisted.web.http_headers import Headers
  35. from twisted.web.iweb import IAgent, IBodyProducer, IPolicyForHTTPS
  36. from synapse.http import redact_uri
  37. from synapse.http.connectproxyclient import HTTPConnectProxyEndpoint, ProxyCredentials
  38. from synapse.types import ISynapseReactor
  39. logger = logging.getLogger(__name__)
  40. _VALID_URI = re.compile(rb"\A[\x21-\x7e]+\Z")
  41. @implementer(IAgent)
  42. class ProxyAgent(_AgentBase):
  43. """An Agent implementation which will use an HTTP proxy if one was requested
  44. Args:
  45. reactor: twisted reactor to place outgoing
  46. connections.
  47. proxy_reactor: twisted reactor to use for connections to the proxy server
  48. reactor might have some blacklisting applied (i.e. for DNS queries),
  49. but we need unblocked access to the proxy.
  50. contextFactory: A factory for TLS contexts, to control the
  51. verification parameters of OpenSSL. The default is to use a
  52. `BrowserLikePolicyForHTTPS`, so unless you have special
  53. requirements you can leave this as-is.
  54. connectTimeout: The amount of time that this Agent will wait
  55. for the peer to accept a connection, in seconds. If 'None',
  56. HostnameEndpoint's default (30s) will be used.
  57. This is used for connections to both proxies and destination servers.
  58. bindAddress: The local address for client sockets to bind to.
  59. pool: connection pool to be used. If None, a
  60. non-persistent pool instance will be created.
  61. use_proxy: Whether proxy settings should be discovered and used
  62. from conventional environment variables.
  63. Raises:
  64. ValueError if use_proxy is set and the environment variables
  65. contain an invalid proxy specification.
  66. RuntimeError if no tls_options_factory is given for a https connection
  67. """
  68. def __init__(
  69. self,
  70. reactor: IReactorCore,
  71. proxy_reactor: Optional[ISynapseReactor] = None,
  72. contextFactory: Optional[IPolicyForHTTPS] = None,
  73. connectTimeout: Optional[float] = None,
  74. bindAddress: Optional[bytes] = None,
  75. pool: Optional[HTTPConnectionPool] = None,
  76. use_proxy: bool = False,
  77. ):
  78. contextFactory = contextFactory or BrowserLikePolicyForHTTPS()
  79. _AgentBase.__init__(self, reactor, pool)
  80. if proxy_reactor is None:
  81. self.proxy_reactor = reactor
  82. else:
  83. self.proxy_reactor = proxy_reactor
  84. self._endpoint_kwargs: Dict[str, Any] = {}
  85. if connectTimeout is not None:
  86. self._endpoint_kwargs["timeout"] = connectTimeout
  87. if bindAddress is not None:
  88. self._endpoint_kwargs["bindAddress"] = bindAddress
  89. http_proxy = None
  90. https_proxy = None
  91. no_proxy = None
  92. if use_proxy:
  93. proxies = getproxies_environment()
  94. http_proxy = proxies["http"].encode() if "http" in proxies else None
  95. https_proxy = proxies["https"].encode() if "https" in proxies else None
  96. no_proxy = proxies["no"] if "no" in proxies else None
  97. self.http_proxy_endpoint, self.http_proxy_creds = http_proxy_endpoint(
  98. http_proxy, self.proxy_reactor, contextFactory, **self._endpoint_kwargs
  99. )
  100. self.https_proxy_endpoint, self.https_proxy_creds = http_proxy_endpoint(
  101. https_proxy, self.proxy_reactor, contextFactory, **self._endpoint_kwargs
  102. )
  103. self.no_proxy = no_proxy
  104. self._policy_for_https = contextFactory
  105. self._reactor = reactor
  106. def request(
  107. self,
  108. method: bytes,
  109. uri: bytes,
  110. headers: Optional[Headers] = None,
  111. bodyProducer: Optional[IBodyProducer] = None,
  112. ) -> defer.Deferred:
  113. """
  114. Issue a request to the server indicated by the given uri.
  115. Supports `http` and `https` schemes.
  116. An existing connection from the connection pool may be used or a new one may be
  117. created.
  118. See also: twisted.web.iweb.IAgent.request
  119. Args:
  120. method: The request method to use, such as `GET`, `POST`, etc
  121. uri: The location of the resource to request.
  122. headers: Extra headers to send with the request
  123. bodyProducer: An object which can generate bytes to make up the body of
  124. this request (for example, the properly encoded contents of a file for
  125. a file upload). Or, None if the request is to have no body.
  126. Returns:
  127. Deferred[IResponse]: completes when the header of the response has
  128. been received (regardless of the response status code).
  129. Can fail with:
  130. SchemeNotSupported: if the uri is not http or https
  131. twisted.internet.error.TimeoutError if the server we are connecting
  132. to (proxy or destination) does not accept a connection before
  133. connectTimeout.
  134. ... other things too.
  135. """
  136. uri = uri.strip()
  137. if not _VALID_URI.match(uri):
  138. raise ValueError(f"Invalid URI {uri!r}")
  139. parsed_uri = URI.fromBytes(uri)
  140. pool_key = f"{parsed_uri.scheme!r}{parsed_uri.host!r}{parsed_uri.port}"
  141. request_path = parsed_uri.originForm
  142. should_skip_proxy = False
  143. if self.no_proxy is not None:
  144. should_skip_proxy = proxy_bypass_environment(
  145. parsed_uri.host.decode(),
  146. proxies={"no": self.no_proxy},
  147. )
  148. if (
  149. parsed_uri.scheme == b"http"
  150. and self.http_proxy_endpoint
  151. and not should_skip_proxy
  152. ):
  153. # Determine whether we need to set Proxy-Authorization headers
  154. if self.http_proxy_creds:
  155. # Set a Proxy-Authorization header
  156. if headers is None:
  157. headers = Headers()
  158. headers.addRawHeader(
  159. b"Proxy-Authorization",
  160. self.http_proxy_creds.as_proxy_authorization_value(),
  161. )
  162. # Cache *all* connections under the same key, since we are only
  163. # connecting to a single destination, the proxy:
  164. pool_key = "http-proxy"
  165. endpoint = self.http_proxy_endpoint
  166. request_path = uri
  167. elif (
  168. parsed_uri.scheme == b"https"
  169. and self.https_proxy_endpoint
  170. and not should_skip_proxy
  171. ):
  172. endpoint = HTTPConnectProxyEndpoint(
  173. self.proxy_reactor,
  174. self.https_proxy_endpoint,
  175. parsed_uri.host,
  176. parsed_uri.port,
  177. self.https_proxy_creds,
  178. )
  179. else:
  180. # not using a proxy
  181. endpoint = HostnameEndpoint(
  182. self._reactor, parsed_uri.host, parsed_uri.port, **self._endpoint_kwargs
  183. )
  184. logger.debug(
  185. "Requesting %s via %s",
  186. redact_uri(uri.decode("ascii", errors="replace")),
  187. endpoint,
  188. )
  189. if parsed_uri.scheme == b"https":
  190. tls_connection_creator = self._policy_for_https.creatorForNetloc(
  191. parsed_uri.host, parsed_uri.port
  192. )
  193. endpoint = wrapClientTLS(tls_connection_creator, endpoint)
  194. elif parsed_uri.scheme == b"http":
  195. pass
  196. else:
  197. return defer.fail(
  198. Failure(
  199. SchemeNotSupported("Unsupported scheme: %r" % (parsed_uri.scheme,))
  200. )
  201. )
  202. return self._requestWithEndpoint(
  203. pool_key, endpoint, method, parsed_uri, headers, bodyProducer, request_path
  204. )
  205. def http_proxy_endpoint(
  206. proxy: Optional[bytes],
  207. reactor: IReactorCore,
  208. tls_options_factory: Optional[IPolicyForHTTPS],
  209. **kwargs: object,
  210. ) -> Tuple[Optional[IStreamClientEndpoint], Optional[ProxyCredentials]]:
  211. """Parses an http proxy setting and returns an endpoint for the proxy
  212. Args:
  213. proxy: the proxy setting in the form: [scheme://][<username>:<password>@]<host>[:<port>]
  214. This currently supports http:// and https:// proxies.
  215. A hostname without scheme is assumed to be http.
  216. reactor: reactor to be used to connect to the proxy
  217. tls_options_factory: the TLS options to use when connecting through a https proxy
  218. kwargs: other args to be passed to HostnameEndpoint
  219. Returns:
  220. a tuple of
  221. endpoint to use to connect to the proxy, or None
  222. ProxyCredentials or if no credentials were found, or None
  223. Raise:
  224. ValueError if proxy has no hostname or unsupported scheme.
  225. RuntimeError if no tls_options_factory is given for a https connection
  226. """
  227. if proxy is None:
  228. return None, None
  229. # Note: urlsplit/urlparse cannot be used here as that does not work (for Python
  230. # 3.9+) on scheme-less proxies, e.g. host:port.
  231. scheme, host, port, credentials = parse_proxy(proxy)
  232. proxy_endpoint = HostnameEndpoint(reactor, host, port, **kwargs)
  233. if scheme == b"https":
  234. if tls_options_factory:
  235. tls_options = tls_options_factory.creatorForNetloc(host, port)
  236. proxy_endpoint = wrapClientTLS(tls_options, proxy_endpoint)
  237. else:
  238. raise RuntimeError(
  239. f"No TLS options for a https connection via proxy {proxy!s}"
  240. )
  241. return proxy_endpoint, credentials
  242. def parse_proxy(
  243. proxy: bytes, default_scheme: bytes = b"http", default_port: int = 1080
  244. ) -> Tuple[bytes, bytes, int, Optional[ProxyCredentials]]:
  245. """
  246. Parse a proxy connection string.
  247. Given a HTTP proxy URL, breaks it down into components and checks that it
  248. has a hostname (otherwise it is not useful to us when trying to find a
  249. proxy) and asserts that the URL has a scheme we support.
  250. Args:
  251. proxy: The proxy connection string. Must be in the form '[scheme://][<username>:<password>@]host[:port]'.
  252. default_scheme: The default scheme to return if one is not found in `proxy`. Defaults to http
  253. default_port: The default port to return if one is not found in `proxy`. Defaults to 1080
  254. Returns:
  255. A tuple containing the scheme, hostname, port and ProxyCredentials.
  256. If no credentials were found, the ProxyCredentials instance is replaced with None.
  257. Raise:
  258. ValueError if proxy has no hostname or unsupported scheme.
  259. """
  260. # First check if we have a scheme present
  261. # Note: urlsplit/urlparse cannot be used (for Python # 3.9+) on scheme-less proxies, e.g. host:port.
  262. if b"://" not in proxy:
  263. proxy = b"".join([default_scheme, b"://", proxy])
  264. url = urlparse(proxy)
  265. if not url.hostname:
  266. raise ValueError("Proxy URL did not contain a hostname! Please specify one.")
  267. if url.scheme not in (b"http", b"https"):
  268. raise ValueError(
  269. f"Unknown proxy scheme {url.scheme!s}; only 'http' and 'https' is supported."
  270. )
  271. credentials = None
  272. if url.username and url.password:
  273. credentials = ProxyCredentials(b"".join([url.username, b":", url.password]))
  274. return url.scheme, url.hostname, url.port or default_port, credentials