opentracing.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737
  1. # -*- coding: utf-8 -*-
  2. # Copyright 2019 The Matrix.org Foundation C.I.C.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. # NOTE
  16. # This is a small wrapper around opentracing because opentracing is not currently
  17. # packaged downstream (specifically debian). Since opentracing instrumentation is
  18. # fairly invasive it was awkward to make it optional. As a result we opted to encapsulate
  19. # all opentracing state in these methods which effectively noop if opentracing is
  20. # not present. We should strongly consider encouraging the downstream distributers
  21. # to package opentracing and making opentracing a full dependency. In order to facilitate
  22. # this move the methods have work very similarly to opentracing's and it should only
  23. # be a matter of few regexes to move over to opentracing's access patterns proper.
  24. """
  25. ============================
  26. Using OpenTracing in Synapse
  27. ============================
  28. Python-specific tracing concepts are at https://opentracing.io/guides/python/.
  29. Note that Synapse wraps OpenTracing in a small module (this one) in order to make the
  30. OpenTracing dependency optional. That means that the access patterns are
  31. different to those demonstrated in the OpenTracing guides. However, it is
  32. still useful to know, especially if OpenTracing is included as a full dependency
  33. in the future or if you are modifying this module.
  34. OpenTracing is encapsulated so that
  35. no span objects from OpenTracing are exposed in Synapse's code. This allows
  36. OpenTracing to be easily disabled in Synapse and thereby have OpenTracing as
  37. an optional dependency. This does however limit the number of modifiable spans
  38. at any point in the code to one. From here out references to `opentracing`
  39. in the code snippets refer to the Synapses module.
  40. Tracing
  41. -------
  42. In Synapse it is not possible to start a non-active span. Spans can be started
  43. using the ``start_active_span`` method. This returns a scope (see
  44. OpenTracing docs) which is a context manager that needs to be entered and
  45. exited. This is usually done by using ``with``.
  46. .. code-block:: python
  47. from synapse.logging.opentracing import start_active_span
  48. with start_active_span("operation name"):
  49. # Do something we want to tracer
  50. Forgetting to enter or exit a scope will result in some mysterious and grievous log
  51. context errors.
  52. At anytime where there is an active span ``opentracing.set_tag`` can be used to
  53. set a tag on the current active span.
  54. Tracing functions
  55. -----------------
  56. Functions can be easily traced using decorators. There is a decorator for
  57. 'normal' function and for functions which are actually deferreds. The name of
  58. the function becomes the operation name for the span.
  59. .. code-block:: python
  60. from synapse.logging.opentracing import trace, trace_deferred
  61. # Start a span using 'normal_function' as the operation name
  62. @trace
  63. def normal_function(*args, **kwargs):
  64. # Does all kinds of cool and expected things
  65. return something_usual_and_useful
  66. # Start a span using 'deferred_function' as the operation name
  67. @trace_deferred
  68. @defer.inlineCallbacks
  69. def deferred_function(*args, **kwargs):
  70. # We start
  71. yield we_wait
  72. # we finish
  73. return something_usual_and_useful
  74. Operation names can be explicitly set for functions by using
  75. ``trace_using_operation_name`` and
  76. ``trace_deferred_using_operation_name``
  77. .. code-block:: python
  78. from synapse.logging.opentracing import (
  79. trace_using_operation_name,
  80. trace_deferred_using_operation_name
  81. )
  82. @trace_using_operation_name("A *much* better operation name")
  83. def normal_function(*args, **kwargs):
  84. # Does all kinds of cool and expected things
  85. return something_usual_and_useful
  86. @trace_deferred_using_operation_name("Another exciting operation name!")
  87. @defer.inlineCallbacks
  88. def deferred_function(*args, **kwargs):
  89. # We start
  90. yield we_wait
  91. # we finish
  92. return something_usual_and_useful
  93. Contexts and carriers
  94. ---------------------
  95. There are a selection of wrappers for injecting and extracting contexts from
  96. carriers provided. Unfortunately OpenTracing's three context injection
  97. techniques are not adequate for our inject of OpenTracing span-contexts into
  98. Twisted's http headers, EDU contents and our database tables. Also note that
  99. the binary encoding format mandated by OpenTracing is not actually implemented
  100. by jaeger_client v4.0.0 - it will silently noop.
  101. Please refer to the end of ``logging/opentracing.py`` for the available
  102. injection and extraction methods.
  103. Homeserver whitelisting
  104. -----------------------
  105. Most of the whitelist checks are encapsulated in the modules's injection
  106. and extraction method but be aware that using custom carriers or crossing
  107. unchartered waters will require the enforcement of the whitelist.
  108. ``logging/opentracing.py`` has a ``whitelisted_homeserver`` method which takes
  109. in a destination and compares it to the whitelist.
  110. =======
  111. Gotchas
  112. =======
  113. - Checking whitelists on span propagation
  114. - Inserting pii
  115. - Forgetting to enter or exit a scope
  116. - Span source: make sure that the span you expect to be active across a
  117. function call really will be that one. Does the current function have more
  118. than one caller? Will all of those calling functions have be in a context
  119. with an active span?
  120. """
  121. import contextlib
  122. import inspect
  123. import logging
  124. import re
  125. from functools import wraps
  126. from canonicaljson import json
  127. from twisted.internet import defer
  128. from synapse.config import ConfigError
  129. try:
  130. import opentracing
  131. except ImportError:
  132. opentracing = None
  133. try:
  134. from jaeger_client import Config as JaegerConfig
  135. from synapse.logging.scopecontextmanager import LogContextScopeManager
  136. except ImportError:
  137. JaegerConfig = None
  138. LogContextScopeManager = None
  139. logger = logging.getLogger(__name__)
  140. # Block everything by default
  141. # A regex which matches the server_names to expose traces for.
  142. # None means 'block everything'.
  143. _homeserver_whitelist = None
  144. # Util methods
  145. def only_if_tracing(func):
  146. """Executes the function only if we're tracing. Otherwise return.
  147. Assumes the function wrapped may return None"""
  148. @wraps(func)
  149. def _only_if_tracing_inner(*args, **kwargs):
  150. if opentracing:
  151. return func(*args, **kwargs)
  152. else:
  153. return
  154. return _only_if_tracing_inner
  155. @contextlib.contextmanager
  156. def _noop_context_manager(*args, **kwargs):
  157. """Does exactly what it says on the tin"""
  158. yield
  159. # Setup
  160. def init_tracer(config):
  161. """Set the whitelists and initialise the JaegerClient tracer
  162. Args:
  163. config (HomeserverConfig): The config used by the homeserver
  164. """
  165. global opentracing
  166. if not config.opentracer_enabled:
  167. # We don't have a tracer
  168. opentracing = None
  169. return
  170. if not opentracing or not JaegerConfig:
  171. raise ConfigError(
  172. "The server has been configured to use opentracing but opentracing is not "
  173. "installed."
  174. )
  175. # Include the worker name
  176. name = config.worker_name if config.worker_name else "master"
  177. set_homeserver_whitelist(config.opentracer_whitelist)
  178. jaeger_config = JaegerConfig(
  179. config={"sampler": {"type": "const", "param": 1}, "logging": True},
  180. service_name="{} {}".format(config.server_name, name),
  181. scope_manager=LogContextScopeManager(config),
  182. )
  183. jaeger_config.initialize_tracer()
  184. # Set up tags to be opentracing's tags
  185. global tags
  186. tags = opentracing.tags
  187. # Whitelisting
  188. @only_if_tracing
  189. def set_homeserver_whitelist(homeserver_whitelist):
  190. """Sets the homeserver whitelist
  191. Args:
  192. homeserver_whitelist (Iterable[str]): regex of whitelisted homeservers
  193. """
  194. global _homeserver_whitelist
  195. if homeserver_whitelist:
  196. # Makes a single regex which accepts all passed in regexes in the list
  197. _homeserver_whitelist = re.compile(
  198. "({})".format(")|(".join(homeserver_whitelist))
  199. )
  200. @only_if_tracing
  201. def whitelisted_homeserver(destination):
  202. """Checks if a destination matches the whitelist
  203. Args:
  204. destination (str)
  205. """
  206. _homeserver_whitelist
  207. if _homeserver_whitelist:
  208. return _homeserver_whitelist.match(destination)
  209. return False
  210. # Start spans and scopes
  211. # Could use kwargs but I want these to be explicit
  212. def start_active_span(
  213. operation_name,
  214. child_of=None,
  215. references=None,
  216. tags=None,
  217. start_time=None,
  218. ignore_active_span=False,
  219. finish_on_close=True,
  220. ):
  221. """Starts an active opentracing span. Note, the scope doesn't become active
  222. until it has been entered, however, the span starts from the time this
  223. message is called.
  224. Args:
  225. See opentracing.tracer
  226. Returns:
  227. scope (Scope) or noop_context_manager
  228. """
  229. if opentracing is None:
  230. return _noop_context_manager()
  231. else:
  232. # We need to enter the scope here for the logcontext to become active
  233. return opentracing.tracer.start_active_span(
  234. operation_name,
  235. child_of=child_of,
  236. references=references,
  237. tags=tags,
  238. start_time=start_time,
  239. ignore_active_span=ignore_active_span,
  240. finish_on_close=finish_on_close,
  241. )
  242. def start_active_span_follows_from(operation_name, contexts):
  243. if opentracing is None:
  244. return _noop_context_manager()
  245. else:
  246. references = [opentracing.follows_from(context) for context in contexts]
  247. scope = start_active_span(operation_name, references=references)
  248. return scope
  249. def start_active_span_from_context(
  250. headers,
  251. operation_name,
  252. references=None,
  253. tags=None,
  254. start_time=None,
  255. ignore_active_span=False,
  256. finish_on_close=True,
  257. ):
  258. """
  259. Extracts a span context from Twisted Headers.
  260. args:
  261. headers (twisted.web.http_headers.Headers)
  262. For the other args see opentracing.tracer
  263. returns:
  264. span_context (opentracing.span.SpanContext)
  265. """
  266. # Twisted encodes the values as lists whereas opentracing doesn't.
  267. # So, we take the first item in the list.
  268. # Also, twisted uses byte arrays while opentracing expects strings.
  269. if opentracing is None:
  270. return _noop_context_manager()
  271. header_dict = {k.decode(): v[0].decode() for k, v in headers.getAllRawHeaders()}
  272. context = opentracing.tracer.extract(opentracing.Format.HTTP_HEADERS, header_dict)
  273. return opentracing.tracer.start_active_span(
  274. operation_name,
  275. child_of=context,
  276. references=references,
  277. tags=tags,
  278. start_time=start_time,
  279. ignore_active_span=ignore_active_span,
  280. finish_on_close=finish_on_close,
  281. )
  282. def start_active_span_from_edu(
  283. edu_content,
  284. operation_name,
  285. references=[],
  286. tags=None,
  287. start_time=None,
  288. ignore_active_span=False,
  289. finish_on_close=True,
  290. ):
  291. """
  292. Extracts a span context from an edu and uses it to start a new active span
  293. Args:
  294. edu_content (dict): and edu_content with a `context` field whose value is
  295. canonical json for a dict which contains opentracing information.
  296. For the other args see opentracing.tracer
  297. """
  298. if opentracing is None:
  299. return _noop_context_manager()
  300. carrier = json.loads(edu_content.get("context", "{}")).get("opentracing", {})
  301. context = opentracing.tracer.extract(opentracing.Format.TEXT_MAP, carrier)
  302. _references = [
  303. opentracing.child_of(span_context_from_string(x))
  304. for x in carrier.get("references", [])
  305. ]
  306. # For some reason jaeger decided not to support the visualization of multiple parent
  307. # spans or explicitely show references. I include the span context as a tag here as
  308. # an aid to people debugging but it's really not an ideal solution.
  309. references += _references
  310. scope = opentracing.tracer.start_active_span(
  311. operation_name,
  312. child_of=context,
  313. references=references,
  314. tags=tags,
  315. start_time=start_time,
  316. ignore_active_span=ignore_active_span,
  317. finish_on_close=finish_on_close,
  318. )
  319. scope.span.set_tag("references", carrier.get("references", []))
  320. return scope
  321. # Opentracing setters for tags, logs, etc
  322. @only_if_tracing
  323. def set_tag(key, value):
  324. """Sets a tag on the active span"""
  325. opentracing.tracer.active_span.set_tag(key, value)
  326. @only_if_tracing
  327. def log_kv(key_values, timestamp=None):
  328. """Log to the active span"""
  329. opentracing.tracer.active_span.log_kv(key_values, timestamp)
  330. @only_if_tracing
  331. def set_operation_name(operation_name):
  332. """Sets the operation name of the active span"""
  333. opentracing.tracer.active_span.set_operation_name(operation_name)
  334. # Injection and extraction
  335. @only_if_tracing
  336. def inject_active_span_twisted_headers(headers, destination):
  337. """
  338. Injects a span context into twisted headers in-place
  339. Args:
  340. headers (twisted.web.http_headers.Headers)
  341. span (opentracing.Span)
  342. Returns:
  343. In-place modification of headers
  344. Note:
  345. The headers set by the tracer are custom to the tracer implementation which
  346. should be unique enough that they don't interfere with any headers set by
  347. synapse or twisted. If we're still using jaeger these headers would be those
  348. here:
  349. https://github.com/jaegertracing/jaeger-client-python/blob/master/jaeger_client/constants.py
  350. """
  351. if not whitelisted_homeserver(destination):
  352. return
  353. span = opentracing.tracer.active_span
  354. carrier = {}
  355. opentracing.tracer.inject(span, opentracing.Format.HTTP_HEADERS, carrier)
  356. for key, value in carrier.items():
  357. headers.addRawHeaders(key, value)
  358. @only_if_tracing
  359. def inject_active_span_byte_dict(headers, destination):
  360. """
  361. Injects a span context into a dict where the headers are encoded as byte
  362. strings
  363. Args:
  364. headers (dict)
  365. span (opentracing.Span)
  366. Returns:
  367. In-place modification of headers
  368. Note:
  369. The headers set by the tracer are custom to the tracer implementation which
  370. should be unique enough that they don't interfere with any headers set by
  371. synapse or twisted. If we're still using jaeger these headers would be those
  372. here:
  373. https://github.com/jaegertracing/jaeger-client-python/blob/master/jaeger_client/constants.py
  374. """
  375. if not whitelisted_homeserver(destination):
  376. return
  377. span = opentracing.tracer.active_span
  378. carrier = {}
  379. opentracing.tracer.inject(span, opentracing.Format.HTTP_HEADERS, carrier)
  380. for key, value in carrier.items():
  381. headers[key.encode()] = [value.encode()]
  382. @only_if_tracing
  383. def inject_active_span_text_map(carrier, destination=None):
  384. """
  385. Injects a span context into a dict
  386. Args:
  387. carrier (dict)
  388. destination (str): the name of the remote server. The span context
  389. will only be injected if the destination matches the homeserver_whitelist
  390. or destination is None.
  391. Returns:
  392. In-place modification of carrier
  393. Note:
  394. The headers set by the tracer are custom to the tracer implementation which
  395. should be unique enough that they don't interfere with any headers set by
  396. synapse or twisted. If we're still using jaeger these headers would be those
  397. here:
  398. https://github.com/jaegertracing/jaeger-client-python/blob/master/jaeger_client/constants.py
  399. """
  400. if destination and not whitelisted_homeserver(destination):
  401. return
  402. opentracing.tracer.inject(
  403. opentracing.tracer.active_span, opentracing.Format.TEXT_MAP, carrier
  404. )
  405. def active_span_context_as_string():
  406. """
  407. Returns:
  408. The active span context encoded as a string.
  409. """
  410. carrier = {}
  411. if opentracing:
  412. opentracing.tracer.inject(
  413. opentracing.tracer.active_span, opentracing.Format.TEXT_MAP, carrier
  414. )
  415. return json.dumps(carrier)
  416. @only_if_tracing
  417. def span_context_from_string(carrier):
  418. """
  419. Returns:
  420. The active span context decoded from a string.
  421. """
  422. carrier = json.loads(carrier)
  423. return opentracing.tracer.extract(opentracing.Format.TEXT_MAP, carrier)
  424. @only_if_tracing
  425. def extract_text_map(carrier):
  426. """
  427. Wrapper method for opentracing's tracer.extract for TEXT_MAP.
  428. Args:
  429. carrier (dict): a dict possibly containing a span context.
  430. Returns:
  431. The active span context extracted from carrier.
  432. """
  433. return opentracing.tracer.extract(opentracing.Format.TEXT_MAP, carrier)
  434. # Tracing decorators
  435. def trace(func):
  436. """
  437. Decorator to trace a function.
  438. Sets the operation name to that of the function's.
  439. """
  440. if opentracing is None:
  441. return func
  442. @wraps(func)
  443. def _trace_inner(self, *args, **kwargs):
  444. if opentracing is None:
  445. return func(self, *args, **kwargs)
  446. scope = start_active_span(func.__name__)
  447. scope.__enter__()
  448. try:
  449. result = func(self, *args, **kwargs)
  450. if isinstance(result, defer.Deferred):
  451. def call_back(result):
  452. scope.__exit__(None, None, None)
  453. return result
  454. def err_back(result):
  455. scope.span.set_tag(tags.ERROR, True)
  456. scope.__exit__(None, None, None)
  457. return result
  458. result.addCallbacks(call_back, err_back)
  459. else:
  460. scope.__exit__(None, None, None)
  461. return result
  462. except Exception as e:
  463. scope.__exit__(type(e), None, e.__traceback__)
  464. raise
  465. return _trace_inner
  466. def trace_using_operation_name(operation_name):
  467. """Decorator to trace a function. Explicitely sets the operation_name."""
  468. def trace(func):
  469. """
  470. Decorator to trace a function.
  471. Sets the operation name to that of the function's.
  472. """
  473. if opentracing is None:
  474. return func
  475. @wraps(func)
  476. def _trace_inner(self, *args, **kwargs):
  477. if opentracing is None:
  478. return func(self, *args, **kwargs)
  479. scope = start_active_span(operation_name)
  480. scope.__enter__()
  481. try:
  482. result = func(self, *args, **kwargs)
  483. if isinstance(result, defer.Deferred):
  484. def call_back(result):
  485. scope.__exit__(None, None, None)
  486. return result
  487. def err_back(result):
  488. scope.span.set_tag(tags.ERROR, True)
  489. scope.__exit__(None, None, None)
  490. return result
  491. result.addCallbacks(call_back, err_back)
  492. else:
  493. scope.__exit__(None, None, None)
  494. return result
  495. except Exception as e:
  496. scope.__exit__(type(e), None, e.__traceback__)
  497. raise
  498. return _trace_inner
  499. return trace
  500. def tag_args(func):
  501. """
  502. Tags all of the args to the active span.
  503. """
  504. if not opentracing:
  505. return func
  506. @wraps(func)
  507. def _tag_args_inner(self, *args, **kwargs):
  508. argspec = inspect.getargspec(func)
  509. for i, arg in enumerate(argspec.args[1:]):
  510. set_tag("ARG_" + arg, args[i])
  511. set_tag("args", args[len(argspec.args) :])
  512. set_tag("kwargs", kwargs)
  513. return func(self, *args, **kwargs)
  514. return _tag_args_inner
  515. def trace_servlet(servlet_name, func):
  516. """Decorator which traces a serlet. It starts a span with some servlet specific
  517. tags such as the servlet_name and request information"""
  518. if not opentracing:
  519. return func
  520. @wraps(func)
  521. @defer.inlineCallbacks
  522. def _trace_servlet_inner(request, *args, **kwargs):
  523. with start_active_span(
  524. "incoming-client-request",
  525. tags={
  526. "request_id": request.get_request_id(),
  527. tags.SPAN_KIND: tags.SPAN_KIND_RPC_SERVER,
  528. tags.HTTP_METHOD: request.get_method(),
  529. tags.HTTP_URL: request.get_redacted_uri(),
  530. tags.PEER_HOST_IPV6: request.getClientIP(),
  531. "servlet_name": servlet_name,
  532. },
  533. ):
  534. result = yield defer.maybeDeferred(func, request, *args, **kwargs)
  535. return result
  536. return _trace_servlet_inner
  537. # Helper class
  538. class _DummyTagNames(object):
  539. """wrapper of opentracings tags. We need to have them if we
  540. want to reference them without opentracing around. Clearly they
  541. should never actually show up in a trace. `set_tags` overwrites
  542. these with the correct ones."""
  543. INVALID_TAG = "invalid-tag"
  544. COMPONENT = INVALID_TAG
  545. DATABASE_INSTANCE = INVALID_TAG
  546. DATABASE_STATEMENT = INVALID_TAG
  547. DATABASE_TYPE = INVALID_TAG
  548. DATABASE_USER = INVALID_TAG
  549. ERROR = INVALID_TAG
  550. HTTP_METHOD = INVALID_TAG
  551. HTTP_STATUS_CODE = INVALID_TAG
  552. HTTP_URL = INVALID_TAG
  553. MESSAGE_BUS_DESTINATION = INVALID_TAG
  554. PEER_ADDRESS = INVALID_TAG
  555. PEER_HOSTNAME = INVALID_TAG
  556. PEER_HOST_IPV4 = INVALID_TAG
  557. PEER_HOST_IPV6 = INVALID_TAG
  558. PEER_PORT = INVALID_TAG
  559. PEER_SERVICE = INVALID_TAG
  560. SAMPLING_PRIORITY = INVALID_TAG
  561. SERVICE = INVALID_TAG
  562. SPAN_KIND = INVALID_TAG
  563. SPAN_KIND_CONSUMER = INVALID_TAG
  564. SPAN_KIND_PRODUCER = INVALID_TAG
  565. SPAN_KIND_RPC_CLIENT = INVALID_TAG
  566. SPAN_KIND_RPC_SERVER = INVALID_TAG
  567. tags = _DummyTagNames