phone_stats_home.py 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. # Copyright 2020 The Matrix.org Foundation C.I.C.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import logging
  15. import math
  16. import resource
  17. import sys
  18. from prometheus_client import Gauge
  19. from synapse.metrics.background_process_metrics import wrap_as_background_process
  20. logger = logging.getLogger("synapse.app.homeserver")
  21. # Contains the list of processes we will be monitoring
  22. # currently either 0 or 1
  23. _stats_process = []
  24. # Gauges to expose monthly active user control metrics
  25. current_mau_gauge = Gauge("synapse_admin_mau:current", "Current MAU")
  26. current_mau_by_service_gauge = Gauge(
  27. "synapse_admin_mau_current_mau_by_service",
  28. "Current MAU by service",
  29. ["app_service"],
  30. )
  31. max_mau_gauge = Gauge("synapse_admin_mau:max", "MAU Limit")
  32. registered_reserved_users_mau_gauge = Gauge(
  33. "synapse_admin_mau:registered_reserved_users",
  34. "Registered users with reserved threepids",
  35. )
  36. @wrap_as_background_process("phone_stats_home")
  37. async def phone_stats_home(hs, stats, stats_process=_stats_process):
  38. logger.info("Gathering stats for reporting")
  39. now = int(hs.get_clock().time())
  40. uptime = int(now - hs.start_time)
  41. if uptime < 0:
  42. uptime = 0
  43. #
  44. # Performance statistics. Keep this early in the function to maintain reliability of `test_performance_100` test.
  45. #
  46. old = stats_process[0]
  47. new = (now, resource.getrusage(resource.RUSAGE_SELF))
  48. stats_process[0] = new
  49. # Get RSS in bytes
  50. stats["memory_rss"] = new[1].ru_maxrss
  51. # Get CPU time in % of a single core, not % of all cores
  52. used_cpu_time = (new[1].ru_utime + new[1].ru_stime) - (
  53. old[1].ru_utime + old[1].ru_stime
  54. )
  55. if used_cpu_time == 0 or new[0] == old[0]:
  56. stats["cpu_average"] = 0
  57. else:
  58. stats["cpu_average"] = math.floor(used_cpu_time / (new[0] - old[0]) * 100)
  59. #
  60. # General statistics
  61. #
  62. stats["homeserver"] = hs.config.server_name
  63. stats["server_context"] = hs.config.server_context
  64. stats["timestamp"] = now
  65. stats["uptime_seconds"] = uptime
  66. version = sys.version_info
  67. stats["python_version"] = "{}.{}.{}".format(
  68. version.major, version.minor, version.micro
  69. )
  70. stats["total_users"] = await hs.get_datastore().count_all_users()
  71. total_nonbridged_users = await hs.get_datastore().count_nonbridged_users()
  72. stats["total_nonbridged_users"] = total_nonbridged_users
  73. daily_user_type_results = await hs.get_datastore().count_daily_user_type()
  74. for name, count in daily_user_type_results.items():
  75. stats["daily_user_type_" + name] = count
  76. room_count = await hs.get_datastore().get_room_count()
  77. stats["total_room_count"] = room_count
  78. stats["daily_active_users"] = await hs.get_datastore().count_daily_users()
  79. stats["monthly_active_users"] = await hs.get_datastore().count_monthly_users()
  80. daily_active_e2ee_rooms = await hs.get_datastore().count_daily_active_e2ee_rooms()
  81. stats["daily_active_e2ee_rooms"] = daily_active_e2ee_rooms
  82. stats["daily_e2ee_messages"] = await hs.get_datastore().count_daily_e2ee_messages()
  83. daily_sent_e2ee_messages = await hs.get_datastore().count_daily_sent_e2ee_messages()
  84. stats["daily_sent_e2ee_messages"] = daily_sent_e2ee_messages
  85. stats["daily_active_rooms"] = await hs.get_datastore().count_daily_active_rooms()
  86. stats["daily_messages"] = await hs.get_datastore().count_daily_messages()
  87. daily_sent_messages = await hs.get_datastore().count_daily_sent_messages()
  88. stats["daily_sent_messages"] = daily_sent_messages
  89. r30_results = await hs.get_datastore().count_r30_users()
  90. for name, count in r30_results.items():
  91. stats["r30_users_" + name] = count
  92. stats["cache_factor"] = hs.config.caches.global_factor
  93. stats["event_cache_size"] = hs.config.caches.event_cache_size
  94. #
  95. # Database version
  96. #
  97. # This only reports info about the *main* database.
  98. stats["database_engine"] = hs.get_datastore().db_pool.engine.module.__name__
  99. stats["database_server_version"] = hs.get_datastore().db_pool.engine.server_version
  100. #
  101. # Logging configuration
  102. #
  103. synapse_logger = logging.getLogger("synapse")
  104. log_level = synapse_logger.getEffectiveLevel()
  105. stats["log_level"] = logging.getLevelName(log_level)
  106. logger.info("Reporting stats to %s: %s" % (hs.config.report_stats_endpoint, stats))
  107. try:
  108. await hs.get_proxied_http_client().put_json(
  109. hs.config.report_stats_endpoint, stats
  110. )
  111. except Exception as e:
  112. logger.warning("Error reporting stats: %s", e)
  113. def start_phone_stats_home(hs):
  114. """
  115. Start the background tasks which report phone home stats.
  116. """
  117. clock = hs.get_clock()
  118. stats = {}
  119. def performance_stats_init():
  120. _stats_process.clear()
  121. _stats_process.append(
  122. (int(hs.get_clock().time()), resource.getrusage(resource.RUSAGE_SELF))
  123. )
  124. # Rather than update on per session basis, batch up the requests.
  125. # If you increase the loop period, the accuracy of user_daily_visits
  126. # table will decrease
  127. clock.looping_call(hs.get_datastore().generate_user_daily_visits, 5 * 60 * 1000)
  128. # monthly active user limiting functionality
  129. clock.looping_call(hs.get_datastore().reap_monthly_active_users, 1000 * 60 * 60)
  130. hs.get_datastore().reap_monthly_active_users()
  131. @wrap_as_background_process("generate_monthly_active_users")
  132. async def generate_monthly_active_users():
  133. current_mau_count = 0
  134. current_mau_count_by_service = {}
  135. reserved_users = ()
  136. store = hs.get_datastore()
  137. if hs.config.limit_usage_by_mau or hs.config.mau_stats_only:
  138. current_mau_count = await store.get_monthly_active_count()
  139. current_mau_count_by_service = (
  140. await store.get_monthly_active_count_by_service()
  141. )
  142. reserved_users = await store.get_registered_reserved_users()
  143. current_mau_gauge.set(float(current_mau_count))
  144. for app_service, count in current_mau_count_by_service.items():
  145. current_mau_by_service_gauge.labels(app_service).set(float(count))
  146. registered_reserved_users_mau_gauge.set(float(len(reserved_users)))
  147. max_mau_gauge.set(float(hs.config.max_mau_value))
  148. if hs.config.limit_usage_by_mau or hs.config.mau_stats_only:
  149. generate_monthly_active_users()
  150. clock.looping_call(generate_monthly_active_users, 5 * 60 * 1000)
  151. # End of monthly active user settings
  152. if hs.config.report_stats:
  153. logger.info("Scheduling stats reporting for 3 hour intervals")
  154. clock.looping_call(phone_stats_home, 3 * 60 * 60 * 1000, hs, stats)
  155. # We need to defer this init for the cases that we daemonize
  156. # otherwise the process ID we get is that of the non-daemon process
  157. clock.call_later(0, performance_stats_init)
  158. # We wait 5 minutes to send the first set of stats as the server can
  159. # be quite busy the first few minutes
  160. clock.call_later(5 * 60, phone_stats_home, hs, stats)