Browse Source

Generate user daily stats

Neil Johnson 6 years ago
parent
commit
617bf40924

+ 6 - 0
synapse/app/homeserver.py

@@ -473,6 +473,9 @@ def run(hs):
                 " changes across releases."
             )
 
+    def generate_user_daily_visit_stats():
+        hs.get_datastore().generate_user_daily_visits()
+
     if hs.config.report_stats:
         logger.info("Scheduling stats reporting for 3 hour intervals")
         clock.looping_call(phone_stats_home, 3 * 60 * 60 * 1000)
@@ -485,6 +488,9 @@ def run(hs):
         # be quite busy the first few minutes
         clock.call_later(5 * 60, phone_stats_home)
 
+    clock.looping_call(generate_user_daily_visit_stats, 60 * 1000)
+    clock.call_later(5 * 60, generate_user_daily_visit_stats)
+
     if hs.config.daemonize and hs.config.print_pidfile:
         print (hs.config.pid_file)
 

+ 56 - 4
synapse/storage/__init__.py

@@ -14,6 +14,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import datetime
+import time
+import logging
+
 from synapse.storage.devices import DeviceStore
 from .appservice import (
     ApplicationServiceStore, ApplicationServiceTransactionStore
@@ -55,10 +59,6 @@ from .engines import PostgresEngine
 from synapse.api.constants import PresenceState
 from synapse.util.caches.stream_change_cache import StreamChangeCache
 
-
-import logging
-
-
 logger = logging.getLogger(__name__)
 
 
@@ -347,6 +347,58 @@ class DataStore(RoomMemberStore, RoomStore,
 
         return self.runInteraction("count_r30_users", _count_r30_users)
 
+
+    def generate_user_daily_visits(self):
+        """
+        Generates daily visit data for use in cohort/ retention analysis
+        """
+        def _generate_user_daily_visits(txn):
+            logger.info("Calling _generate_user_daily_visits")
+            # determine timestamp of previous days
+            yesterday = datetime.datetime.now() - datetime.timedelta(days=1)
+            yesterday_start = datetime.datetime(yesterday.year,
+                                                yesterday.month,
+                                                yesterday.day, 0, 0, 0, 0)
+            yesterday_start_time = int(time.mktime(yesterday_start.timetuple())) * 1000
+
+            # Check that this job has not already been completed
+            sql = """
+                SELECT timestamp
+                FROM user_daily_visits
+                ORDER by timestamp desc limit 1
+            """
+            txn.execute(sql)
+            row = txn.fetchone()
+
+            # Bail if the most recent time is yesterday
+            if row and row[0] == yesterday_start_time:
+                logger.info("Bailing from _generate_user_daily_visits, already completed")
+                return
+            logger.info("inserting into user_daily_visits")
+            # Not specificying an upper bound means that if the update is run at
+            # 10 mins past midnight and the user is active during a 30 min session
+            # that the user is still included in the previous days stats
+            # This does mean that if the update is run hours late, then it is possible
+            # to overstate the cohort, but this seems a reasonable trade off
+            # The alternative is to insert on every request - but prefer to avoid
+            # for performance reasons
+            sql = """
+                    SELECT user_id, user_agent, device_id
+                    FROM user_ips
+                    WHERE last_seen > ?
+            """
+            txn.execute(sql, (yesterday_start_time,))
+
+            sql = """
+                    INSERT INTO user_daily_visits (user_id, user_agent, device_id, timestamp)
+                    VALUES (?, ?, ?, ?)
+            """
+
+            for row in txn:
+                txn.execute(sql, (row + (yesterday_start_time,)))
+
+        return self.runInteraction("generate_user_daily_visits", _generate_user_daily_visits)
+
     def get_users(self):
         """Function to reterive a list of users in users table.
 

+ 7 - 0
synapse/storage/client_ips.py

@@ -55,6 +55,13 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
             columns=["user_id", "last_seen"],
         )
 
+        self.register_background_index_update(
+            "user_ips_last_seen_only_index",
+            index_name="user_ips_last_seen_only",
+            table="user_ips",
+            columns=["last_seen"],
+        )
+
         # (user_id, access_token, ip) -> (user_agent, device_id, last_seen)
         self._batch_row_update = {}
 

+ 1 - 1
synapse/storage/prepare_database.py

@@ -26,7 +26,7 @@ logger = logging.getLogger(__name__)
 
 # Remember to update this number every time a change is made to database
 # schema files, so the users will be informed on server restarts.
-SCHEMA_VERSION = 48
+SCHEMA_VERSION = 49
 
 dir_path = os.path.abspath(os.path.dirname(__file__))
 

+ 25 - 0
synapse/storage/schema/delta/49/add_user_daily_visits.sql

@@ -0,0 +1,25 @@
+/* Copyright 2018 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+CREATE TABLE user_daily_visits ( user_id TEXT NOT NULL,
+                                 device_id TEXT,
+                                 user_agent TEXT NOT NULL,
+                                 timestamp BIGINT NOT NULL );
+
+/* What indexes should I include?
+ * Reads are offline so should optimise for writes
+ * Need to check if already an entry so user,day
+ */

+ 17 - 0
synapse/storage/schema/delta/49/add_user_ips_last_seen_only_index.sql

@@ -0,0 +1,17 @@
+/* Copyright 2018 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+INSERT into background_updates (update_name, progress_json)
+    VALUES ('user_ips_last_seen_only_index', '{}');