stats.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306
  1. # -*- coding: utf-8 -*-
  2. # Copyright 2018 New Vector Ltd
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. import logging
  16. from collections import Counter
  17. from twisted.internet import defer
  18. from synapse.api.constants import EventTypes, Membership
  19. from synapse.handlers.state_deltas import StateDeltasHandler
  20. from synapse.metrics import event_processing_positions
  21. from synapse.metrics.background_process_metrics import run_as_background_process
  22. logger = logging.getLogger(__name__)
  23. class StatsHandler(StateDeltasHandler):
  24. """Handles keeping the *_stats tables updated with a simple time-series of
  25. information about the users, rooms and media on the server, such that admins
  26. have some idea of who is consuming their resources.
  27. Heavily derived from UserDirectoryHandler
  28. """
  29. def __init__(self, hs):
  30. super(StatsHandler, self).__init__(hs)
  31. self.hs = hs
  32. self.store = hs.get_datastore()
  33. self.state = hs.get_state_handler()
  34. self.server_name = hs.hostname
  35. self.clock = hs.get_clock()
  36. self.notifier = hs.get_notifier()
  37. self.is_mine_id = hs.is_mine_id
  38. self.stats_bucket_size = hs.config.stats_bucket_size
  39. self.stats_enabled = hs.config.stats_enabled
  40. # The current position in the current_state_delta stream
  41. self.pos = None
  42. # Guard to ensure we only process deltas one at a time
  43. self._is_processing = False
  44. if hs.config.stats_enabled:
  45. self.notifier.add_replication_callback(self.notify_new_event)
  46. # We kick this off so that we don't have to wait for a change before
  47. # we start populating stats
  48. self.clock.call_later(0, self.notify_new_event)
  49. def notify_new_event(self):
  50. """Called when there may be more deltas to process
  51. """
  52. if not self.stats_enabled or self._is_processing:
  53. return
  54. self._is_processing = True
  55. @defer.inlineCallbacks
  56. def process():
  57. try:
  58. yield self._unsafe_process()
  59. finally:
  60. self._is_processing = False
  61. run_as_background_process("stats.notify_new_event", process)
  62. @defer.inlineCallbacks
  63. def _unsafe_process(self):
  64. # If self.pos is None then means we haven't fetched it from DB
  65. if self.pos is None:
  66. self.pos = yield self.store.get_stats_positions()
  67. # Loop round handling deltas until we're up to date
  68. while True:
  69. # Be sure to read the max stream_ordering *before* checking if there are any outstanding
  70. # deltas, since there is otherwise a chance that we could miss updates which arrive
  71. # after we check the deltas.
  72. room_max_stream_ordering = self.store.get_room_max_stream_ordering()
  73. if self.pos == room_max_stream_ordering:
  74. break
  75. logger.debug(
  76. "Processing room stats %s->%s", self.pos, room_max_stream_ordering
  77. )
  78. max_pos, deltas = yield self.store.get_current_state_deltas(
  79. self.pos, room_max_stream_ordering
  80. )
  81. if deltas:
  82. logger.debug("Handling %d state deltas", len(deltas))
  83. room_deltas, user_deltas = yield self._handle_deltas(deltas)
  84. else:
  85. room_deltas = {}
  86. user_deltas = {}
  87. # Then count deltas for total_events and total_event_bytes.
  88. (
  89. room_count,
  90. user_count,
  91. ) = yield self.store.get_changes_room_total_events_and_bytes(
  92. self.pos, max_pos
  93. )
  94. for room_id, fields in room_count.items():
  95. room_deltas.setdefault(room_id, {}).update(fields)
  96. for user_id, fields in user_count.items():
  97. user_deltas.setdefault(user_id, {}).update(fields)
  98. logger.debug("room_deltas: %s", room_deltas)
  99. logger.debug("user_deltas: %s", user_deltas)
  100. # Always call this so that we update the stats position.
  101. yield self.store.bulk_update_stats_delta(
  102. self.clock.time_msec(),
  103. updates={"room": room_deltas, "user": user_deltas},
  104. stream_id=max_pos,
  105. )
  106. logger.debug("Handled room stats to %s -> %s", self.pos, max_pos)
  107. event_processing_positions.labels("stats").set(max_pos)
  108. self.pos = max_pos
  109. @defer.inlineCallbacks
  110. def _handle_deltas(self, deltas):
  111. """Called with the state deltas to process
  112. Returns:
  113. Deferred[tuple[dict[str, Counter], dict[str, counter]]]
  114. Resovles to two dicts, the room deltas and the user deltas,
  115. mapping from room/user ID to changes in the various fields.
  116. """
  117. room_to_stats_deltas = {}
  118. user_to_stats_deltas = {}
  119. room_to_state_updates = {}
  120. for delta in deltas:
  121. typ = delta["type"]
  122. state_key = delta["state_key"]
  123. room_id = delta["room_id"]
  124. event_id = delta["event_id"]
  125. stream_id = delta["stream_id"]
  126. prev_event_id = delta["prev_event_id"]
  127. logger.debug("Handling: %r, %r %r, %s", room_id, typ, state_key, event_id)
  128. token = yield self.store.get_earliest_token_for_stats("room", room_id)
  129. # If the earliest token to begin from is larger than our current
  130. # stream ID, skip processing this delta.
  131. if token is not None and token >= stream_id:
  132. logger.debug(
  133. "Ignoring: %s as earlier than this room's initial ingestion event",
  134. event_id,
  135. )
  136. continue
  137. if event_id is None and prev_event_id is None:
  138. logger.error(
  139. "event ID is None and so is the previous event ID. stream_id: %s",
  140. stream_id,
  141. )
  142. continue
  143. event_content = {}
  144. sender = None
  145. if event_id is not None:
  146. event = yield self.store.get_event(event_id, allow_none=True)
  147. if event:
  148. event_content = event.content or {}
  149. sender = event.sender
  150. # All the values in this dict are deltas (RELATIVE changes)
  151. room_stats_delta = room_to_stats_deltas.setdefault(room_id, Counter())
  152. room_state = room_to_state_updates.setdefault(room_id, {})
  153. if prev_event_id is None:
  154. # this state event doesn't overwrite another,
  155. # so it is a new effective/current state event
  156. room_stats_delta["current_state_events"] += 1
  157. if typ == EventTypes.Member:
  158. # we could use _get_key_change here but it's a bit inefficient
  159. # given we're not testing for a specific result; might as well
  160. # just grab the prev_membership and membership strings and
  161. # compare them.
  162. # We take None rather than leave as a previous membership
  163. # in the absence of a previous event because we do not want to
  164. # reduce the leave count when a new-to-the-room user joins.
  165. prev_membership = None
  166. if prev_event_id is not None:
  167. prev_event = yield self.store.get_event(
  168. prev_event_id, allow_none=True
  169. )
  170. if prev_event:
  171. prev_event_content = prev_event.content
  172. prev_membership = prev_event_content.get(
  173. "membership", Membership.LEAVE
  174. )
  175. membership = event_content.get("membership", Membership.LEAVE)
  176. if prev_membership is None:
  177. logger.debug("No previous membership for this user.")
  178. elif membership == prev_membership:
  179. pass # noop
  180. elif prev_membership == Membership.JOIN:
  181. room_stats_delta["joined_members"] -= 1
  182. elif prev_membership == Membership.INVITE:
  183. room_stats_delta["invited_members"] -= 1
  184. elif prev_membership == Membership.LEAVE:
  185. room_stats_delta["left_members"] -= 1
  186. elif prev_membership == Membership.BAN:
  187. room_stats_delta["banned_members"] -= 1
  188. else:
  189. raise ValueError(
  190. "%r is not a valid prev_membership" % (prev_membership,)
  191. )
  192. if membership == prev_membership:
  193. pass # noop
  194. if membership == Membership.JOIN:
  195. room_stats_delta["joined_members"] += 1
  196. elif membership == Membership.INVITE:
  197. room_stats_delta["invited_members"] += 1
  198. if sender and self.is_mine_id(sender):
  199. user_to_stats_deltas.setdefault(sender, Counter())[
  200. "invites_sent"
  201. ] += 1
  202. elif membership == Membership.LEAVE:
  203. room_stats_delta["left_members"] += 1
  204. elif membership == Membership.BAN:
  205. room_stats_delta["banned_members"] += 1
  206. else:
  207. raise ValueError("%r is not a valid membership" % (membership,))
  208. user_id = state_key
  209. if self.is_mine_id(user_id):
  210. # this accounts for transitions like leave → ban and so on.
  211. has_changed_joinedness = (prev_membership == Membership.JOIN) != (
  212. membership == Membership.JOIN
  213. )
  214. if has_changed_joinedness:
  215. delta = +1 if membership == Membership.JOIN else -1
  216. user_to_stats_deltas.setdefault(user_id, Counter())[
  217. "joined_rooms"
  218. ] += delta
  219. room_stats_delta["local_users_in_room"] += delta
  220. elif typ == EventTypes.Create:
  221. room_state["is_federatable"] = (
  222. event_content.get("m.federate", True) is True
  223. )
  224. if sender and self.is_mine_id(sender):
  225. user_to_stats_deltas.setdefault(sender, Counter())[
  226. "rooms_created"
  227. ] += 1
  228. elif typ == EventTypes.JoinRules:
  229. room_state["join_rules"] = event_content.get("join_rule")
  230. elif typ == EventTypes.RoomHistoryVisibility:
  231. room_state["history_visibility"] = event_content.get(
  232. "history_visibility"
  233. )
  234. elif typ == EventTypes.Encryption:
  235. room_state["encryption"] = event_content.get("algorithm")
  236. elif typ == EventTypes.Name:
  237. room_state["name"] = event_content.get("name")
  238. elif typ == EventTypes.Topic:
  239. room_state["topic"] = event_content.get("topic")
  240. elif typ == EventTypes.RoomAvatar:
  241. room_state["avatar"] = event_content.get("url")
  242. elif typ == EventTypes.CanonicalAlias:
  243. room_state["canonical_alias"] = event_content.get("alias")
  244. elif typ == EventTypes.GuestAccess:
  245. room_state["guest_access"] = event_content.get("guest_access")
  246. for room_id, state in room_to_state_updates.items():
  247. logger.info("Updating room_stats_state for %s: %s", room_id, state)
  248. yield self.store.update_room_state(room_id, state)
  249. return room_to_stats_deltas, user_to_stats_deltas