room.py 46 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344
  1. # -*- coding: utf-8 -*-
  2. # Copyright 2014-2016 OpenMarket Ltd
  3. # Copyright 2019 The Matrix.org Foundation C.I.C.
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. import collections
  17. import logging
  18. import re
  19. from abc import abstractmethod
  20. from enum import Enum
  21. from typing import Any, Dict, List, Optional, Tuple
  22. from six import integer_types
  23. from canonicaljson import json
  24. from twisted.internet import defer
  25. from synapse.api.constants import EventTypes
  26. from synapse.api.errors import StoreError
  27. from synapse.api.room_versions import RoomVersion, RoomVersions
  28. from synapse.storage._base import SQLBaseStore
  29. from synapse.storage.data_stores.main.search import SearchStore
  30. from synapse.storage.database import Database, LoggingTransaction
  31. from synapse.types import ThirdPartyInstanceID
  32. from synapse.util.caches.descriptors import cached, cachedInlineCallbacks
  33. logger = logging.getLogger(__name__)
  34. OpsLevel = collections.namedtuple(
  35. "OpsLevel", ("ban_level", "kick_level", "redact_level")
  36. )
  37. RatelimitOverride = collections.namedtuple(
  38. "RatelimitOverride", ("messages_per_second", "burst_count")
  39. )
  40. class RoomSortOrder(Enum):
  41. """
  42. Enum to define the sorting method used when returning rooms with get_rooms_paginate
  43. ALPHABETICAL = sort rooms alphabetically by name
  44. SIZE = sort rooms by membership size, highest to lowest
  45. """
  46. ALPHABETICAL = "alphabetical"
  47. SIZE = "size"
  48. class RoomWorkerStore(SQLBaseStore):
  49. def __init__(self, database: Database, db_conn, hs):
  50. super(RoomWorkerStore, self).__init__(database, db_conn, hs)
  51. self.config = hs.config
  52. def get_room(self, room_id):
  53. """Retrieve a room.
  54. Args:
  55. room_id (str): The ID of the room to retrieve.
  56. Returns:
  57. A dict containing the room information, or None if the room is unknown.
  58. """
  59. return self.db.simple_select_one(
  60. table="rooms",
  61. keyvalues={"room_id": room_id},
  62. retcols=("room_id", "is_public", "creator"),
  63. desc="get_room",
  64. allow_none=True,
  65. )
  66. def get_public_room_ids(self):
  67. return self.db.simple_select_onecol(
  68. table="rooms",
  69. keyvalues={"is_public": True},
  70. retcol="room_id",
  71. desc="get_public_room_ids",
  72. )
  73. def count_public_rooms(self, network_tuple, ignore_non_federatable):
  74. """Counts the number of public rooms as tracked in the room_stats_current
  75. and room_stats_state table.
  76. Args:
  77. network_tuple (ThirdPartyInstanceID|None)
  78. ignore_non_federatable (bool): If true filters out non-federatable rooms
  79. """
  80. def _count_public_rooms_txn(txn):
  81. query_args = []
  82. if network_tuple:
  83. if network_tuple.appservice_id:
  84. published_sql = """
  85. SELECT room_id from appservice_room_list
  86. WHERE appservice_id = ? AND network_id = ?
  87. """
  88. query_args.append(network_tuple.appservice_id)
  89. query_args.append(network_tuple.network_id)
  90. else:
  91. published_sql = """
  92. SELECT room_id FROM rooms WHERE is_public
  93. """
  94. else:
  95. published_sql = """
  96. SELECT room_id FROM rooms WHERE is_public
  97. UNION SELECT room_id from appservice_room_list
  98. """
  99. sql = """
  100. SELECT
  101. COALESCE(COUNT(*), 0)
  102. FROM (
  103. %(published_sql)s
  104. ) published
  105. INNER JOIN room_stats_state USING (room_id)
  106. INNER JOIN room_stats_current USING (room_id)
  107. WHERE
  108. (
  109. join_rules = 'public' OR history_visibility = 'world_readable'
  110. )
  111. AND joined_members > 0
  112. """ % {
  113. "published_sql": published_sql
  114. }
  115. txn.execute(sql, query_args)
  116. return txn.fetchone()[0]
  117. return self.db.runInteraction("count_public_rooms", _count_public_rooms_txn)
  118. @defer.inlineCallbacks
  119. def get_largest_public_rooms(
  120. self,
  121. network_tuple: Optional[ThirdPartyInstanceID],
  122. search_filter: Optional[dict],
  123. limit: Optional[int],
  124. bounds: Optional[Tuple[int, str]],
  125. forwards: bool,
  126. ignore_non_federatable: bool = False,
  127. ):
  128. """Gets the largest public rooms (where largest is in terms of joined
  129. members, as tracked in the statistics table).
  130. Args:
  131. network_tuple
  132. search_filter
  133. limit: Maxmimum number of rows to return, unlimited otherwise.
  134. bounds: An uppoer or lower bound to apply to result set if given,
  135. consists of a joined member count and room_id (these are
  136. excluded from result set).
  137. forwards: true iff going forwards, going backwards otherwise
  138. ignore_non_federatable: If true filters out non-federatable rooms.
  139. Returns:
  140. Rooms in order: biggest number of joined users first.
  141. We then arbitrarily use the room_id as a tie breaker.
  142. """
  143. where_clauses = []
  144. query_args = []
  145. if network_tuple:
  146. if network_tuple.appservice_id:
  147. published_sql = """
  148. SELECT room_id from appservice_room_list
  149. WHERE appservice_id = ? AND network_id = ?
  150. """
  151. query_args.append(network_tuple.appservice_id)
  152. query_args.append(network_tuple.network_id)
  153. else:
  154. published_sql = """
  155. SELECT room_id FROM rooms WHERE is_public
  156. """
  157. else:
  158. published_sql = """
  159. SELECT room_id FROM rooms WHERE is_public
  160. UNION SELECT room_id from appservice_room_list
  161. """
  162. # Work out the bounds if we're given them, these bounds look slightly
  163. # odd, but are designed to help query planner use indices by pulling
  164. # out a common bound.
  165. if bounds:
  166. last_joined_members, last_room_id = bounds
  167. if forwards:
  168. where_clauses.append(
  169. """
  170. joined_members <= ? AND (
  171. joined_members < ? OR room_id < ?
  172. )
  173. """
  174. )
  175. else:
  176. where_clauses.append(
  177. """
  178. joined_members >= ? AND (
  179. joined_members > ? OR room_id > ?
  180. )
  181. """
  182. )
  183. query_args += [last_joined_members, last_joined_members, last_room_id]
  184. if ignore_non_federatable:
  185. where_clauses.append("is_federatable")
  186. if search_filter and search_filter.get("generic_search_term", None):
  187. search_term = "%" + search_filter["generic_search_term"] + "%"
  188. where_clauses.append(
  189. """
  190. (
  191. LOWER(name) LIKE ?
  192. OR LOWER(topic) LIKE ?
  193. OR LOWER(canonical_alias) LIKE ?
  194. )
  195. """
  196. )
  197. query_args += [
  198. search_term.lower(),
  199. search_term.lower(),
  200. search_term.lower(),
  201. ]
  202. where_clause = ""
  203. if where_clauses:
  204. where_clause = " AND " + " AND ".join(where_clauses)
  205. sql = """
  206. SELECT
  207. room_id, name, topic, canonical_alias, joined_members,
  208. avatar, history_visibility, joined_members, guest_access
  209. FROM (
  210. %(published_sql)s
  211. ) published
  212. INNER JOIN room_stats_state USING (room_id)
  213. INNER JOIN room_stats_current USING (room_id)
  214. WHERE
  215. (
  216. join_rules = 'public' OR history_visibility = 'world_readable'
  217. )
  218. AND joined_members > 0
  219. %(where_clause)s
  220. ORDER BY joined_members %(dir)s, room_id %(dir)s
  221. """ % {
  222. "published_sql": published_sql,
  223. "where_clause": where_clause,
  224. "dir": "DESC" if forwards else "ASC",
  225. }
  226. if limit is not None:
  227. query_args.append(limit)
  228. sql += """
  229. LIMIT ?
  230. """
  231. def _get_largest_public_rooms_txn(txn):
  232. txn.execute(sql, query_args)
  233. results = self.db.cursor_to_dict(txn)
  234. if not forwards:
  235. results.reverse()
  236. return results
  237. ret_val = yield self.db.runInteraction(
  238. "get_largest_public_rooms", _get_largest_public_rooms_txn
  239. )
  240. defer.returnValue(ret_val)
  241. @cached(max_entries=10000)
  242. def is_room_blocked(self, room_id):
  243. return self.db.simple_select_one_onecol(
  244. table="blocked_rooms",
  245. keyvalues={"room_id": room_id},
  246. retcol="1",
  247. allow_none=True,
  248. desc="is_room_blocked",
  249. )
  250. async def get_rooms_paginate(
  251. self,
  252. start: int,
  253. limit: int,
  254. order_by: RoomSortOrder,
  255. reverse_order: bool,
  256. search_term: Optional[str],
  257. ) -> Tuple[List[Dict[str, Any]], int]:
  258. """Function to retrieve a paginated list of rooms as json.
  259. Args:
  260. start: offset in the list
  261. limit: maximum amount of rooms to retrieve
  262. order_by: the sort order of the returned list
  263. reverse_order: whether to reverse the room list
  264. search_term: a string to filter room names by
  265. Returns:
  266. A list of room dicts and an integer representing the total number of
  267. rooms that exist given this query
  268. """
  269. # Filter room names by a string
  270. where_statement = ""
  271. if search_term:
  272. where_statement = "WHERE state.name LIKE ?"
  273. # Our postgres db driver converts ? -> %s in SQL strings as that's the
  274. # placeholder for postgres.
  275. # HOWEVER, if you put a % into your SQL then everything goes wibbly.
  276. # To get around this, we're going to surround search_term with %'s
  277. # before giving it to the database in python instead
  278. search_term = "%" + search_term + "%"
  279. # Set ordering
  280. if RoomSortOrder(order_by) == RoomSortOrder.SIZE:
  281. order_by_column = "curr.joined_members"
  282. order_by_asc = False
  283. elif RoomSortOrder(order_by) == RoomSortOrder.ALPHABETICAL:
  284. # Sort alphabetically
  285. order_by_column = "state.name"
  286. order_by_asc = True
  287. else:
  288. raise StoreError(
  289. 500, "Incorrect value for order_by provided: %s" % order_by
  290. )
  291. # Whether to return the list in reverse order
  292. if reverse_order:
  293. # Flip the boolean
  294. order_by_asc = not order_by_asc
  295. # Create one query for getting the limited number of events that the user asked
  296. # for, and another query for getting the total number of events that could be
  297. # returned. Thus allowing us to see if there are more events to paginate through
  298. info_sql = """
  299. SELECT state.room_id, state.name, state.canonical_alias, curr.joined_members
  300. FROM room_stats_state state
  301. INNER JOIN room_stats_current curr USING (room_id)
  302. %s
  303. ORDER BY %s %s
  304. LIMIT ?
  305. OFFSET ?
  306. """ % (
  307. where_statement,
  308. order_by_column,
  309. "ASC" if order_by_asc else "DESC",
  310. )
  311. # Use a nested SELECT statement as SQL can't count(*) with an OFFSET
  312. count_sql = """
  313. SELECT count(*) FROM (
  314. SELECT room_id FROM room_stats_state state
  315. %s
  316. ) AS get_room_ids
  317. """ % (
  318. where_statement,
  319. )
  320. def _get_rooms_paginate_txn(txn):
  321. # Execute the data query
  322. sql_values = (limit, start)
  323. if search_term:
  324. # Add the search term into the WHERE clause
  325. sql_values = (search_term,) + sql_values
  326. txn.execute(info_sql, sql_values)
  327. # Refactor room query data into a structured dictionary
  328. rooms = []
  329. for room in txn:
  330. rooms.append(
  331. {
  332. "room_id": room[0],
  333. "name": room[1],
  334. "canonical_alias": room[2],
  335. "joined_members": room[3],
  336. }
  337. )
  338. # Execute the count query
  339. # Add the search term into the WHERE clause if present
  340. sql_values = (search_term,) if search_term else ()
  341. txn.execute(count_sql, sql_values)
  342. room_count = txn.fetchone()
  343. return rooms, room_count[0]
  344. return await self.db.runInteraction(
  345. "get_rooms_paginate", _get_rooms_paginate_txn,
  346. )
  347. @cachedInlineCallbacks(max_entries=10000)
  348. def get_ratelimit_for_user(self, user_id):
  349. """Check if there are any overrides for ratelimiting for the given
  350. user
  351. Args:
  352. user_id (str)
  353. Returns:
  354. RatelimitOverride if there is an override, else None. If the contents
  355. of RatelimitOverride are None or 0 then ratelimitng has been
  356. disabled for that user entirely.
  357. """
  358. row = yield self.db.simple_select_one(
  359. table="ratelimit_override",
  360. keyvalues={"user_id": user_id},
  361. retcols=("messages_per_second", "burst_count"),
  362. allow_none=True,
  363. desc="get_ratelimit_for_user",
  364. )
  365. if row:
  366. return RatelimitOverride(
  367. messages_per_second=row["messages_per_second"],
  368. burst_count=row["burst_count"],
  369. )
  370. else:
  371. return None
  372. @cachedInlineCallbacks()
  373. def get_retention_policy_for_room(self, room_id):
  374. """Get the retention policy for a given room.
  375. If no retention policy has been found for this room, returns a policy defined
  376. by the configured default policy (which has None as both the 'min_lifetime' and
  377. the 'max_lifetime' if no default policy has been defined in the server's
  378. configuration).
  379. Args:
  380. room_id (str): The ID of the room to get the retention policy of.
  381. Returns:
  382. dict[int, int]: "min_lifetime" and "max_lifetime" for this room.
  383. """
  384. def get_retention_policy_for_room_txn(txn):
  385. txn.execute(
  386. """
  387. SELECT min_lifetime, max_lifetime FROM room_retention
  388. INNER JOIN current_state_events USING (event_id, room_id)
  389. WHERE room_id = ?;
  390. """,
  391. (room_id,),
  392. )
  393. return self.db.cursor_to_dict(txn)
  394. ret = yield self.db.runInteraction(
  395. "get_retention_policy_for_room", get_retention_policy_for_room_txn,
  396. )
  397. # If we don't know this room ID, ret will be None, in this case return the default
  398. # policy.
  399. if not ret:
  400. defer.returnValue(
  401. {
  402. "min_lifetime": self.config.retention_default_min_lifetime,
  403. "max_lifetime": self.config.retention_default_max_lifetime,
  404. }
  405. )
  406. row = ret[0]
  407. # If one of the room's policy's attributes isn't defined, use the matching
  408. # attribute from the default policy.
  409. # The default values will be None if no default policy has been defined, or if one
  410. # of the attributes is missing from the default policy.
  411. if row["min_lifetime"] is None:
  412. row["min_lifetime"] = self.config.retention_default_min_lifetime
  413. if row["max_lifetime"] is None:
  414. row["max_lifetime"] = self.config.retention_default_max_lifetime
  415. defer.returnValue(row)
  416. def get_media_mxcs_in_room(self, room_id):
  417. """Retrieves all the local and remote media MXC URIs in a given room
  418. Args:
  419. room_id (str)
  420. Returns:
  421. The local and remote media as a lists of tuples where the key is
  422. the hostname and the value is the media ID.
  423. """
  424. def _get_media_mxcs_in_room_txn(txn):
  425. local_mxcs, remote_mxcs = self._get_media_mxcs_in_room_txn(txn, room_id)
  426. local_media_mxcs = []
  427. remote_media_mxcs = []
  428. # Convert the IDs to MXC URIs
  429. for media_id in local_mxcs:
  430. local_media_mxcs.append("mxc://%s/%s" % (self.hs.hostname, media_id))
  431. for hostname, media_id in remote_mxcs:
  432. remote_media_mxcs.append("mxc://%s/%s" % (hostname, media_id))
  433. return local_media_mxcs, remote_media_mxcs
  434. return self.db.runInteraction(
  435. "get_media_ids_in_room", _get_media_mxcs_in_room_txn
  436. )
  437. def quarantine_media_ids_in_room(self, room_id, quarantined_by):
  438. """For a room loops through all events with media and quarantines
  439. the associated media
  440. """
  441. logger.info("Quarantining media in room: %s", room_id)
  442. def _quarantine_media_in_room_txn(txn):
  443. local_mxcs, remote_mxcs = self._get_media_mxcs_in_room_txn(txn, room_id)
  444. total_media_quarantined = 0
  445. # Now update all the tables to set the quarantined_by flag
  446. txn.executemany(
  447. """
  448. UPDATE local_media_repository
  449. SET quarantined_by = ?
  450. WHERE media_id = ?
  451. """,
  452. ((quarantined_by, media_id) for media_id in local_mxcs),
  453. )
  454. txn.executemany(
  455. """
  456. UPDATE remote_media_cache
  457. SET quarantined_by = ?
  458. WHERE media_origin = ? AND media_id = ?
  459. """,
  460. (
  461. (quarantined_by, origin, media_id)
  462. for origin, media_id in remote_mxcs
  463. ),
  464. )
  465. total_media_quarantined += len(local_mxcs)
  466. total_media_quarantined += len(remote_mxcs)
  467. return total_media_quarantined
  468. return self.db.runInteraction(
  469. "quarantine_media_in_room", _quarantine_media_in_room_txn
  470. )
  471. def _get_media_mxcs_in_room_txn(self, txn, room_id):
  472. """Retrieves all the local and remote media MXC URIs in a given room
  473. Args:
  474. txn (cursor)
  475. room_id (str)
  476. Returns:
  477. The local and remote media as a lists of tuples where the key is
  478. the hostname and the value is the media ID.
  479. """
  480. mxc_re = re.compile("^mxc://([^/]+)/([^/#?]+)")
  481. sql = """
  482. SELECT stream_ordering, json FROM events
  483. JOIN event_json USING (room_id, event_id)
  484. WHERE room_id = ?
  485. %(where_clause)s
  486. AND contains_url = ? AND outlier = ?
  487. ORDER BY stream_ordering DESC
  488. LIMIT ?
  489. """
  490. txn.execute(sql % {"where_clause": ""}, (room_id, True, False, 100))
  491. local_media_mxcs = []
  492. remote_media_mxcs = []
  493. while True:
  494. next_token = None
  495. for stream_ordering, content_json in txn:
  496. next_token = stream_ordering
  497. event_json = json.loads(content_json)
  498. content = event_json["content"]
  499. content_url = content.get("url")
  500. thumbnail_url = content.get("info", {}).get("thumbnail_url")
  501. for url in (content_url, thumbnail_url):
  502. if not url:
  503. continue
  504. matches = mxc_re.match(url)
  505. if matches:
  506. hostname = matches.group(1)
  507. media_id = matches.group(2)
  508. if hostname == self.hs.hostname:
  509. local_media_mxcs.append(media_id)
  510. else:
  511. remote_media_mxcs.append((hostname, media_id))
  512. if next_token is None:
  513. # We've gone through the whole room, so we're finished.
  514. break
  515. txn.execute(
  516. sql % {"where_clause": "AND stream_ordering < ?"},
  517. (room_id, next_token, True, False, 100),
  518. )
  519. return local_media_mxcs, remote_media_mxcs
  520. def quarantine_media_by_id(
  521. self, server_name: str, media_id: str, quarantined_by: str,
  522. ):
  523. """quarantines a single local or remote media id
  524. Args:
  525. server_name: The name of the server that holds this media
  526. media_id: The ID of the media to be quarantined
  527. quarantined_by: The user ID that initiated the quarantine request
  528. """
  529. logger.info("Quarantining media: %s/%s", server_name, media_id)
  530. is_local = server_name == self.config.server_name
  531. def _quarantine_media_by_id_txn(txn):
  532. local_mxcs = [media_id] if is_local else []
  533. remote_mxcs = [(server_name, media_id)] if not is_local else []
  534. return self._quarantine_media_txn(
  535. txn, local_mxcs, remote_mxcs, quarantined_by
  536. )
  537. return self.db.runInteraction(
  538. "quarantine_media_by_user", _quarantine_media_by_id_txn
  539. )
  540. def quarantine_media_ids_by_user(self, user_id: str, quarantined_by: str):
  541. """quarantines all local media associated with a single user
  542. Args:
  543. user_id: The ID of the user to quarantine media of
  544. quarantined_by: The ID of the user who made the quarantine request
  545. """
  546. def _quarantine_media_by_user_txn(txn):
  547. local_media_ids = self._get_media_ids_by_user_txn(txn, user_id)
  548. return self._quarantine_media_txn(txn, local_media_ids, [], quarantined_by)
  549. return self.db.runInteraction(
  550. "quarantine_media_by_user", _quarantine_media_by_user_txn
  551. )
  552. def _get_media_ids_by_user_txn(self, txn, user_id: str, filter_quarantined=True):
  553. """Retrieves local media IDs by a given user
  554. Args:
  555. txn (cursor)
  556. user_id: The ID of the user to retrieve media IDs of
  557. Returns:
  558. The local and remote media as a lists of tuples where the key is
  559. the hostname and the value is the media ID.
  560. """
  561. # Local media
  562. sql = """
  563. SELECT media_id
  564. FROM local_media_repository
  565. WHERE user_id = ?
  566. """
  567. if filter_quarantined:
  568. sql += "AND quarantined_by IS NULL"
  569. txn.execute(sql, (user_id,))
  570. local_media_ids = [row[0] for row in txn]
  571. # TODO: Figure out all remote media a user has referenced in a message
  572. return local_media_ids
  573. def _quarantine_media_txn(
  574. self,
  575. txn,
  576. local_mxcs: List[str],
  577. remote_mxcs: List[Tuple[str, str]],
  578. quarantined_by: str,
  579. ) -> int:
  580. """Quarantine local and remote media items
  581. Args:
  582. txn (cursor)
  583. local_mxcs: A list of local mxc URLs
  584. remote_mxcs: A list of (remote server, media id) tuples representing
  585. remote mxc URLs
  586. quarantined_by: The ID of the user who initiated the quarantine request
  587. Returns:
  588. The total number of media items quarantined
  589. """
  590. total_media_quarantined = 0
  591. # Update all the tables to set the quarantined_by flag
  592. txn.executemany(
  593. """
  594. UPDATE local_media_repository
  595. SET quarantined_by = ?
  596. WHERE media_id = ?
  597. """,
  598. ((quarantined_by, media_id) for media_id in local_mxcs),
  599. )
  600. txn.executemany(
  601. """
  602. UPDATE remote_media_cache
  603. SET quarantined_by = ?
  604. WHERE media_origin = ? AND media_id = ?
  605. """,
  606. ((quarantined_by, origin, media_id) for origin, media_id in remote_mxcs),
  607. )
  608. total_media_quarantined += len(local_mxcs)
  609. total_media_quarantined += len(remote_mxcs)
  610. return total_media_quarantined
  611. class RoomBackgroundUpdateStore(SQLBaseStore):
  612. REMOVE_TOMESTONED_ROOMS_BG_UPDATE = "remove_tombstoned_rooms_from_directory"
  613. ADD_ROOMS_ROOM_VERSION_COLUMN = "add_rooms_room_version_column"
  614. def __init__(self, database: Database, db_conn, hs):
  615. super(RoomBackgroundUpdateStore, self).__init__(database, db_conn, hs)
  616. self.config = hs.config
  617. self.db.updates.register_background_update_handler(
  618. "insert_room_retention", self._background_insert_retention,
  619. )
  620. self.db.updates.register_background_update_handler(
  621. self.REMOVE_TOMESTONED_ROOMS_BG_UPDATE,
  622. self._remove_tombstoned_rooms_from_directory,
  623. )
  624. self.db.updates.register_background_update_handler(
  625. self.ADD_ROOMS_ROOM_VERSION_COLUMN,
  626. self._background_add_rooms_room_version_column,
  627. )
  628. @defer.inlineCallbacks
  629. def _background_insert_retention(self, progress, batch_size):
  630. """Retrieves a list of all rooms within a range and inserts an entry for each of
  631. them into the room_retention table.
  632. NULLs the property's columns if missing from the retention event in the room's
  633. state (or NULLs all of them if there's no retention event in the room's state),
  634. so that we fall back to the server's retention policy.
  635. """
  636. last_room = progress.get("room_id", "")
  637. def _background_insert_retention_txn(txn):
  638. txn.execute(
  639. """
  640. SELECT state.room_id, state.event_id, events.json
  641. FROM current_state_events as state
  642. LEFT JOIN event_json AS events ON (state.event_id = events.event_id)
  643. WHERE state.room_id > ? AND state.type = '%s'
  644. ORDER BY state.room_id ASC
  645. LIMIT ?;
  646. """
  647. % EventTypes.Retention,
  648. (last_room, batch_size),
  649. )
  650. rows = self.db.cursor_to_dict(txn)
  651. if not rows:
  652. return True
  653. for row in rows:
  654. if not row["json"]:
  655. retention_policy = {}
  656. else:
  657. ev = json.loads(row["json"])
  658. retention_policy = json.dumps(ev["content"])
  659. self.db.simple_insert_txn(
  660. txn=txn,
  661. table="room_retention",
  662. values={
  663. "room_id": row["room_id"],
  664. "event_id": row["event_id"],
  665. "min_lifetime": retention_policy.get("min_lifetime"),
  666. "max_lifetime": retention_policy.get("max_lifetime"),
  667. },
  668. )
  669. logger.info("Inserted %d rows into room_retention", len(rows))
  670. self.db.updates._background_update_progress_txn(
  671. txn, "insert_room_retention", {"room_id": rows[-1]["room_id"]}
  672. )
  673. if batch_size > len(rows):
  674. return True
  675. else:
  676. return False
  677. end = yield self.db.runInteraction(
  678. "insert_room_retention", _background_insert_retention_txn,
  679. )
  680. if end:
  681. yield self.db.updates._end_background_update("insert_room_retention")
  682. defer.returnValue(batch_size)
  683. async def _background_add_rooms_room_version_column(
  684. self, progress: dict, batch_size: int
  685. ):
  686. """Background update to go and add room version inforamtion to `rooms`
  687. table from `current_state_events` table.
  688. """
  689. last_room_id = progress.get("room_id", "")
  690. def _background_add_rooms_room_version_column_txn(txn: LoggingTransaction):
  691. sql = """
  692. SELECT room_id, json FROM current_state_events
  693. INNER JOIN event_json USING (room_id, event_id)
  694. WHERE room_id > ? AND type = 'm.room.create' AND state_key = ''
  695. ORDER BY room_id
  696. LIMIT ?
  697. """
  698. txn.execute(sql, (last_room_id, batch_size))
  699. updates = []
  700. for room_id, event_json in txn:
  701. event_dict = json.loads(event_json)
  702. room_version_id = event_dict.get("content", {}).get(
  703. "room_version", RoomVersions.V1.identifier
  704. )
  705. creator = event_dict.get("content").get("creator")
  706. updates.append((room_id, creator, room_version_id))
  707. if not updates:
  708. return True
  709. new_last_room_id = ""
  710. for room_id, creator, room_version_id in updates:
  711. # We upsert here just in case we don't already have a row,
  712. # mainly for paranoia as much badness would happen if we don't
  713. # insert the row and then try and get the room version for the
  714. # room.
  715. self.db.simple_upsert_txn(
  716. txn,
  717. table="rooms",
  718. keyvalues={"room_id": room_id},
  719. values={"room_version": room_version_id},
  720. insertion_values={"is_public": False, "creator": creator},
  721. )
  722. new_last_room_id = room_id
  723. self.db.updates._background_update_progress_txn(
  724. txn, self.ADD_ROOMS_ROOM_VERSION_COLUMN, {"room_id": new_last_room_id}
  725. )
  726. return False
  727. end = await self.db.runInteraction(
  728. "_background_add_rooms_room_version_column",
  729. _background_add_rooms_room_version_column_txn,
  730. )
  731. if end:
  732. await self.db.updates._end_background_update(
  733. self.ADD_ROOMS_ROOM_VERSION_COLUMN
  734. )
  735. return batch_size
  736. async def _remove_tombstoned_rooms_from_directory(
  737. self, progress, batch_size
  738. ) -> int:
  739. """Removes any rooms with tombstone events from the room directory
  740. Nowadays this is handled by the room upgrade handler, but we may have some
  741. that got left behind
  742. """
  743. last_room = progress.get("room_id", "")
  744. def _get_rooms(txn):
  745. txn.execute(
  746. """
  747. SELECT room_id
  748. FROM rooms r
  749. INNER JOIN current_state_events cse USING (room_id)
  750. WHERE room_id > ? AND r.is_public
  751. AND cse.type = '%s' AND cse.state_key = ''
  752. ORDER BY room_id ASC
  753. LIMIT ?;
  754. """
  755. % EventTypes.Tombstone,
  756. (last_room, batch_size),
  757. )
  758. return [row[0] for row in txn]
  759. rooms = await self.db.runInteraction(
  760. "get_tombstoned_directory_rooms", _get_rooms
  761. )
  762. if not rooms:
  763. await self.db.updates._end_background_update(
  764. self.REMOVE_TOMESTONED_ROOMS_BG_UPDATE
  765. )
  766. return 0
  767. for room_id in rooms:
  768. logger.info("Removing tombstoned room %s from the directory", room_id)
  769. await self.set_room_is_public(room_id, False)
  770. await self.db.updates._background_update_progress(
  771. self.REMOVE_TOMESTONED_ROOMS_BG_UPDATE, {"room_id": rooms[-1]}
  772. )
  773. return len(rooms)
  774. @abstractmethod
  775. def set_room_is_public(self, room_id, is_public):
  776. # this will need to be implemented if a background update is performed with
  777. # existing (tombstoned, public) rooms in the database.
  778. #
  779. # It's overridden by RoomStore for the synapse master.
  780. raise NotImplementedError()
  781. class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore, SearchStore):
  782. def __init__(self, database: Database, db_conn, hs):
  783. super(RoomStore, self).__init__(database, db_conn, hs)
  784. self.config = hs.config
  785. @defer.inlineCallbacks
  786. def store_room(
  787. self,
  788. room_id: str,
  789. room_creator_user_id: str,
  790. is_public: bool,
  791. room_version: RoomVersion,
  792. ):
  793. """Stores a room.
  794. Args:
  795. room_id: The desired room ID, can be None.
  796. room_creator_user_id: The user ID of the room creator.
  797. is_public: True to indicate that this room should appear in
  798. public room lists.
  799. room_version: The version of the room
  800. Raises:
  801. StoreError if the room could not be stored.
  802. """
  803. try:
  804. def store_room_txn(txn, next_id):
  805. self.db.simple_insert_txn(
  806. txn,
  807. "rooms",
  808. {
  809. "room_id": room_id,
  810. "creator": room_creator_user_id,
  811. "is_public": is_public,
  812. "room_version": room_version.identifier,
  813. },
  814. )
  815. if is_public:
  816. self.db.simple_insert_txn(
  817. txn,
  818. table="public_room_list_stream",
  819. values={
  820. "stream_id": next_id,
  821. "room_id": room_id,
  822. "visibility": is_public,
  823. },
  824. )
  825. with self._public_room_id_gen.get_next() as next_id:
  826. yield self.db.runInteraction("store_room_txn", store_room_txn, next_id)
  827. except Exception as e:
  828. logger.error("store_room with room_id=%s failed: %s", room_id, e)
  829. raise StoreError(500, "Problem creating room.")
  830. @defer.inlineCallbacks
  831. def set_room_is_public(self, room_id, is_public):
  832. def set_room_is_public_txn(txn, next_id):
  833. self.db.simple_update_one_txn(
  834. txn,
  835. table="rooms",
  836. keyvalues={"room_id": room_id},
  837. updatevalues={"is_public": is_public},
  838. )
  839. entries = self.db.simple_select_list_txn(
  840. txn,
  841. table="public_room_list_stream",
  842. keyvalues={
  843. "room_id": room_id,
  844. "appservice_id": None,
  845. "network_id": None,
  846. },
  847. retcols=("stream_id", "visibility"),
  848. )
  849. entries.sort(key=lambda r: r["stream_id"])
  850. add_to_stream = True
  851. if entries:
  852. add_to_stream = bool(entries[-1]["visibility"]) != is_public
  853. if add_to_stream:
  854. self.db.simple_insert_txn(
  855. txn,
  856. table="public_room_list_stream",
  857. values={
  858. "stream_id": next_id,
  859. "room_id": room_id,
  860. "visibility": is_public,
  861. "appservice_id": None,
  862. "network_id": None,
  863. },
  864. )
  865. with self._public_room_id_gen.get_next() as next_id:
  866. yield self.db.runInteraction(
  867. "set_room_is_public", set_room_is_public_txn, next_id
  868. )
  869. self.hs.get_notifier().on_new_replication_data()
  870. @defer.inlineCallbacks
  871. def set_room_is_public_appservice(
  872. self, room_id, appservice_id, network_id, is_public
  873. ):
  874. """Edit the appservice/network specific public room list.
  875. Each appservice can have a number of published room lists associated
  876. with them, keyed off of an appservice defined `network_id`, which
  877. basically represents a single instance of a bridge to a third party
  878. network.
  879. Args:
  880. room_id (str)
  881. appservice_id (str)
  882. network_id (str)
  883. is_public (bool): Whether to publish or unpublish the room from the
  884. list.
  885. """
  886. def set_room_is_public_appservice_txn(txn, next_id):
  887. if is_public:
  888. try:
  889. self.db.simple_insert_txn(
  890. txn,
  891. table="appservice_room_list",
  892. values={
  893. "appservice_id": appservice_id,
  894. "network_id": network_id,
  895. "room_id": room_id,
  896. },
  897. )
  898. except self.database_engine.module.IntegrityError:
  899. # We've already inserted, nothing to do.
  900. return
  901. else:
  902. self.db.simple_delete_txn(
  903. txn,
  904. table="appservice_room_list",
  905. keyvalues={
  906. "appservice_id": appservice_id,
  907. "network_id": network_id,
  908. "room_id": room_id,
  909. },
  910. )
  911. entries = self.db.simple_select_list_txn(
  912. txn,
  913. table="public_room_list_stream",
  914. keyvalues={
  915. "room_id": room_id,
  916. "appservice_id": appservice_id,
  917. "network_id": network_id,
  918. },
  919. retcols=("stream_id", "visibility"),
  920. )
  921. entries.sort(key=lambda r: r["stream_id"])
  922. add_to_stream = True
  923. if entries:
  924. add_to_stream = bool(entries[-1]["visibility"]) != is_public
  925. if add_to_stream:
  926. self.db.simple_insert_txn(
  927. txn,
  928. table="public_room_list_stream",
  929. values={
  930. "stream_id": next_id,
  931. "room_id": room_id,
  932. "visibility": is_public,
  933. "appservice_id": appservice_id,
  934. "network_id": network_id,
  935. },
  936. )
  937. with self._public_room_id_gen.get_next() as next_id:
  938. yield self.db.runInteraction(
  939. "set_room_is_public_appservice",
  940. set_room_is_public_appservice_txn,
  941. next_id,
  942. )
  943. self.hs.get_notifier().on_new_replication_data()
  944. def get_room_count(self):
  945. """Retrieve a list of all rooms
  946. """
  947. def f(txn):
  948. sql = "SELECT count(*) FROM rooms"
  949. txn.execute(sql)
  950. row = txn.fetchone()
  951. return row[0] or 0
  952. return self.db.runInteraction("get_rooms", f)
  953. def _store_room_topic_txn(self, txn, event):
  954. if hasattr(event, "content") and "topic" in event.content:
  955. self.store_event_search_txn(
  956. txn, event, "content.topic", event.content["topic"]
  957. )
  958. def _store_room_name_txn(self, txn, event):
  959. if hasattr(event, "content") and "name" in event.content:
  960. self.store_event_search_txn(
  961. txn, event, "content.name", event.content["name"]
  962. )
  963. def _store_room_message_txn(self, txn, event):
  964. if hasattr(event, "content") and "body" in event.content:
  965. self.store_event_search_txn(
  966. txn, event, "content.body", event.content["body"]
  967. )
  968. def _store_retention_policy_for_room_txn(self, txn, event):
  969. if hasattr(event, "content") and (
  970. "min_lifetime" in event.content or "max_lifetime" in event.content
  971. ):
  972. if (
  973. "min_lifetime" in event.content
  974. and not isinstance(event.content.get("min_lifetime"), integer_types)
  975. ) or (
  976. "max_lifetime" in event.content
  977. and not isinstance(event.content.get("max_lifetime"), integer_types)
  978. ):
  979. # Ignore the event if one of the value isn't an integer.
  980. return
  981. self.db.simple_insert_txn(
  982. txn=txn,
  983. table="room_retention",
  984. values={
  985. "room_id": event.room_id,
  986. "event_id": event.event_id,
  987. "min_lifetime": event.content.get("min_lifetime"),
  988. "max_lifetime": event.content.get("max_lifetime"),
  989. },
  990. )
  991. self._invalidate_cache_and_stream(
  992. txn, self.get_retention_policy_for_room, (event.room_id,)
  993. )
  994. def add_event_report(
  995. self, room_id, event_id, user_id, reason, content, received_ts
  996. ):
  997. next_id = self._event_reports_id_gen.get_next()
  998. return self.db.simple_insert(
  999. table="event_reports",
  1000. values={
  1001. "id": next_id,
  1002. "received_ts": received_ts,
  1003. "room_id": room_id,
  1004. "event_id": event_id,
  1005. "user_id": user_id,
  1006. "reason": reason,
  1007. "content": json.dumps(content),
  1008. },
  1009. desc="add_event_report",
  1010. )
  1011. def get_current_public_room_stream_id(self):
  1012. return self._public_room_id_gen.get_current_token()
  1013. def get_all_new_public_rooms(self, prev_id, current_id, limit):
  1014. def get_all_new_public_rooms(txn):
  1015. sql = """
  1016. SELECT stream_id, room_id, visibility, appservice_id, network_id
  1017. FROM public_room_list_stream
  1018. WHERE stream_id > ? AND stream_id <= ?
  1019. ORDER BY stream_id ASC
  1020. LIMIT ?
  1021. """
  1022. txn.execute(sql, (prev_id, current_id, limit))
  1023. return txn.fetchall()
  1024. if prev_id == current_id:
  1025. return defer.succeed([])
  1026. return self.db.runInteraction(
  1027. "get_all_new_public_rooms", get_all_new_public_rooms
  1028. )
  1029. @defer.inlineCallbacks
  1030. def block_room(self, room_id, user_id):
  1031. """Marks the room as blocked. Can be called multiple times.
  1032. Args:
  1033. room_id (str): Room to block
  1034. user_id (str): Who blocked it
  1035. Returns:
  1036. Deferred
  1037. """
  1038. yield self.db.simple_upsert(
  1039. table="blocked_rooms",
  1040. keyvalues={"room_id": room_id},
  1041. values={},
  1042. insertion_values={"user_id": user_id},
  1043. desc="block_room",
  1044. )
  1045. yield self.db.runInteraction(
  1046. "block_room_invalidation",
  1047. self._invalidate_cache_and_stream,
  1048. self.is_room_blocked,
  1049. (room_id,),
  1050. )
  1051. @defer.inlineCallbacks
  1052. def get_rooms_for_retention_period_in_range(
  1053. self, min_ms, max_ms, include_null=False
  1054. ):
  1055. """Retrieves all of the rooms within the given retention range.
  1056. Optionally includes the rooms which don't have a retention policy.
  1057. Args:
  1058. min_ms (int|None): Duration in milliseconds that define the lower limit of
  1059. the range to handle (exclusive). If None, doesn't set a lower limit.
  1060. max_ms (int|None): Duration in milliseconds that define the upper limit of
  1061. the range to handle (inclusive). If None, doesn't set an upper limit.
  1062. include_null (bool): Whether to include rooms which retention policy is NULL
  1063. in the returned set.
  1064. Returns:
  1065. dict[str, dict]: The rooms within this range, along with their retention
  1066. policy. The key is "room_id", and maps to a dict describing the retention
  1067. policy associated with this room ID. The keys for this nested dict are
  1068. "min_lifetime" (int|None), and "max_lifetime" (int|None).
  1069. """
  1070. def get_rooms_for_retention_period_in_range_txn(txn):
  1071. range_conditions = []
  1072. args = []
  1073. if min_ms is not None:
  1074. range_conditions.append("max_lifetime > ?")
  1075. args.append(min_ms)
  1076. if max_ms is not None:
  1077. range_conditions.append("max_lifetime <= ?")
  1078. args.append(max_ms)
  1079. # Do a first query which will retrieve the rooms that have a retention policy
  1080. # in their current state.
  1081. sql = """
  1082. SELECT room_id, min_lifetime, max_lifetime FROM room_retention
  1083. INNER JOIN current_state_events USING (event_id, room_id)
  1084. """
  1085. if len(range_conditions):
  1086. sql += " WHERE (" + " AND ".join(range_conditions) + ")"
  1087. if include_null:
  1088. sql += " OR max_lifetime IS NULL"
  1089. txn.execute(sql, args)
  1090. rows = self.db.cursor_to_dict(txn)
  1091. rooms_dict = {}
  1092. for row in rows:
  1093. rooms_dict[row["room_id"]] = {
  1094. "min_lifetime": row["min_lifetime"],
  1095. "max_lifetime": row["max_lifetime"],
  1096. }
  1097. if include_null:
  1098. # If required, do a second query that retrieves all of the rooms we know
  1099. # of so we can handle rooms with no retention policy.
  1100. sql = "SELECT DISTINCT room_id FROM current_state_events"
  1101. txn.execute(sql)
  1102. rows = self.db.cursor_to_dict(txn)
  1103. # If a room isn't already in the dict (i.e. it doesn't have a retention
  1104. # policy in its state), add it with a null policy.
  1105. for row in rows:
  1106. if row["room_id"] not in rooms_dict:
  1107. rooms_dict[row["room_id"]] = {
  1108. "min_lifetime": None,
  1109. "max_lifetime": None,
  1110. }
  1111. return rooms_dict
  1112. rooms = yield self.db.runInteraction(
  1113. "get_rooms_for_retention_period_in_range",
  1114. get_rooms_for_retention_period_in_range_txn,
  1115. )
  1116. defer.returnValue(rooms)