123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476 |
- # -*- coding: utf-8 -*-
- # Copyright 2014 - 2016 OpenMarket Ltd
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- import logging
- from collections import namedtuple
- from six import iteritems
- from six.moves import range
- import msgpack
- from unpaddedbase64 import decode_base64, encode_base64
- from twisted.internet import defer
- from synapse.api.constants import EventTypes, JoinRules
- from synapse.types import ThirdPartyInstanceID
- from synapse.util.async import concurrently_execute
- from synapse.util.caches.descriptors import cachedInlineCallbacks
- from synapse.util.caches.response_cache import ResponseCache
- from ._base import BaseHandler
- logger = logging.getLogger(__name__)
- REMOTE_ROOM_LIST_POLL_INTERVAL = 60 * 1000
- # This is used to indicate we should only return rooms published to the main list.
- EMPTY_THIRD_PARTY_ID = ThirdPartyInstanceID(None, None)
- class RoomListHandler(BaseHandler):
- def __init__(self, hs):
- super(RoomListHandler, self).__init__(hs)
- self.response_cache = ResponseCache(hs, "room_list")
- self.remote_response_cache = ResponseCache(hs, "remote_room_list",
- timeout_ms=30 * 1000)
- def get_local_public_room_list(self, limit=None, since_token=None,
- search_filter=None,
- network_tuple=EMPTY_THIRD_PARTY_ID,):
- """Generate a local public room list.
- There are multiple different lists: the main one plus one per third
- party network. A client can ask for a specific list or to return all.
- Args:
- limit (int)
- since_token (str)
- search_filter (dict)
- network_tuple (ThirdPartyInstanceID): Which public list to use.
- This can be (None, None) to indicate the main list, or a particular
- appservice and network id to use an appservice specific one.
- Setting to None returns all public rooms across all lists.
- """
- logger.info(
- "Getting public room list: limit=%r, since=%r, search=%r, network=%r",
- limit, since_token, bool(search_filter), network_tuple,
- )
- if search_filter:
- # We explicitly don't bother caching searches or requests for
- # appservice specific lists.
- logger.info("Bypassing cache as search request.")
- return self._get_public_room_list(
- limit, since_token, search_filter, network_tuple=network_tuple,
- )
- key = (limit, since_token, network_tuple)
- return self.response_cache.wrap(
- key,
- self._get_public_room_list,
- limit, since_token, network_tuple=network_tuple,
- )
- @defer.inlineCallbacks
- def _get_public_room_list(self, limit=None, since_token=None,
- search_filter=None,
- network_tuple=EMPTY_THIRD_PARTY_ID,):
- if since_token and since_token != "END":
- since_token = RoomListNextBatch.from_token(since_token)
- else:
- since_token = None
- rooms_to_order_value = {}
- rooms_to_num_joined = {}
- newly_visible = []
- newly_unpublished = []
- if since_token:
- stream_token = since_token.stream_ordering
- current_public_id = yield self.store.get_current_public_room_stream_id()
- public_room_stream_id = since_token.public_room_stream_id
- newly_visible, newly_unpublished = yield self.store.get_public_room_changes(
- public_room_stream_id, current_public_id,
- network_tuple=network_tuple,
- )
- else:
- stream_token = yield self.store.get_room_max_stream_ordering()
- public_room_stream_id = yield self.store.get_current_public_room_stream_id()
- room_ids = yield self.store.get_public_room_ids_at_stream_id(
- public_room_stream_id, network_tuple=network_tuple,
- )
- # We want to return rooms in a particular order: the number of joined
- # users. We then arbitrarily use the room_id as a tie breaker.
- @defer.inlineCallbacks
- def get_order_for_room(room_id):
- # Most of the rooms won't have changed between the since token and
- # now (especially if the since token is "now"). So, we can ask what
- # the current users are in a room (that will hit a cache) and then
- # check if the room has changed since the since token. (We have to
- # do it in that order to avoid races).
- # If things have changed then fall back to getting the current state
- # at the since token.
- joined_users = yield self.store.get_users_in_room(room_id)
- if self.store.has_room_changed_since(room_id, stream_token):
- latest_event_ids = yield self.store.get_forward_extremeties_for_room(
- room_id, stream_token
- )
- if not latest_event_ids:
- return
- joined_users = yield self.state_handler.get_current_user_in_room(
- room_id, latest_event_ids,
- )
- num_joined_users = len(joined_users)
- rooms_to_num_joined[room_id] = num_joined_users
- if num_joined_users == 0:
- return
- # We want larger rooms to be first, hence negating num_joined_users
- rooms_to_order_value[room_id] = (-num_joined_users, room_id)
- logger.info("Getting ordering for %i rooms since %s",
- len(room_ids), stream_token)
- yield concurrently_execute(get_order_for_room, room_ids, 10)
- sorted_entries = sorted(rooms_to_order_value.items(), key=lambda e: e[1])
- sorted_rooms = [room_id for room_id, _ in sorted_entries]
- # `sorted_rooms` should now be a list of all public room ids that is
- # stable across pagination. Therefore, we can use indices into this
- # list as our pagination tokens.
- # Filter out rooms that we don't want to return
- rooms_to_scan = [
- r for r in sorted_rooms
- if r not in newly_unpublished and rooms_to_num_joined[room_id] > 0
- ]
- total_room_count = len(rooms_to_scan)
- if since_token:
- # Filter out rooms we've already returned previously
- # `since_token.current_limit` is the index of the last room we
- # sent down, so we exclude it and everything before/after it.
- if since_token.direction_is_forward:
- rooms_to_scan = rooms_to_scan[since_token.current_limit + 1:]
- else:
- rooms_to_scan = rooms_to_scan[:since_token.current_limit]
- rooms_to_scan.reverse()
- logger.info("After sorting and filtering, %i rooms remain",
- len(rooms_to_scan))
- # _append_room_entry_to_chunk will append to chunk but will stop if
- # len(chunk) > limit
- #
- # Normally we will generate enough results on the first iteration here,
- # but if there is a search filter, _append_room_entry_to_chunk may
- # filter some results out, in which case we loop again.
- #
- # We don't want to scan over the entire range either as that
- # would potentially waste a lot of work.
- #
- # XXX if there is no limit, we may end up DoSing the server with
- # calls to get_current_state_ids for every single room on the
- # server. Surely we should cap this somehow?
- #
- if limit:
- step = limit + 1
- else:
- # step cannot be zero
- step = len(rooms_to_scan) if len(rooms_to_scan) != 0 else 1
- chunk = []
- for i in range(0, len(rooms_to_scan), step):
- batch = rooms_to_scan[i:i + step]
- logger.info("Processing %i rooms for result", len(batch))
- yield concurrently_execute(
- lambda r: self._append_room_entry_to_chunk(
- r, rooms_to_num_joined[r],
- chunk, limit, search_filter
- ),
- batch, 5,
- )
- logger.info("Now %i rooms in result", len(chunk))
- if len(chunk) >= limit + 1:
- break
- chunk.sort(key=lambda e: (-e["num_joined_members"], e["room_id"]))
- # Work out the new limit of the batch for pagination, or None if we
- # know there are no more results that would be returned.
- # i.e., [since_token.current_limit..new_limit] is the batch of rooms
- # we've returned (or the reverse if we paginated backwards)
- # We tried to pull out limit + 1 rooms above, so if we have <= limit
- # then we know there are no more results to return
- new_limit = None
- if chunk and (not limit or len(chunk) > limit):
- if not since_token or since_token.direction_is_forward:
- if limit:
- chunk = chunk[:limit]
- last_room_id = chunk[-1]["room_id"]
- else:
- if limit:
- chunk = chunk[-limit:]
- last_room_id = chunk[0]["room_id"]
- new_limit = sorted_rooms.index(last_room_id)
- results = {
- "chunk": chunk,
- "total_room_count_estimate": total_room_count,
- }
- if since_token:
- results["new_rooms"] = bool(newly_visible)
- if not since_token or since_token.direction_is_forward:
- if new_limit is not None:
- results["next_batch"] = RoomListNextBatch(
- stream_ordering=stream_token,
- public_room_stream_id=public_room_stream_id,
- current_limit=new_limit,
- direction_is_forward=True,
- ).to_token()
- if since_token:
- results["prev_batch"] = since_token.copy_and_replace(
- direction_is_forward=False,
- current_limit=since_token.current_limit + 1,
- ).to_token()
- else:
- if new_limit is not None:
- results["prev_batch"] = RoomListNextBatch(
- stream_ordering=stream_token,
- public_room_stream_id=public_room_stream_id,
- current_limit=new_limit,
- direction_is_forward=False,
- ).to_token()
- if since_token:
- results["next_batch"] = since_token.copy_and_replace(
- direction_is_forward=True,
- current_limit=since_token.current_limit - 1,
- ).to_token()
- defer.returnValue(results)
- @defer.inlineCallbacks
- def _append_room_entry_to_chunk(self, room_id, num_joined_users, chunk, limit,
- search_filter):
- """Generate the entry for a room in the public room list and append it
- to the `chunk` if it matches the search filter
- """
- if limit and len(chunk) > limit + 1:
- # We've already got enough, so lets just drop it.
- return
- result = yield self.generate_room_entry(room_id, num_joined_users)
- if result and _matches_room_entry(result, search_filter):
- chunk.append(result)
- @cachedInlineCallbacks(num_args=1, cache_context=True)
- def generate_room_entry(self, room_id, num_joined_users, cache_context,
- with_alias=True, allow_private=False):
- """Returns the entry for a room
- """
- result = {
- "room_id": room_id,
- "num_joined_members": num_joined_users,
- }
- current_state_ids = yield self.store.get_current_state_ids(
- room_id, on_invalidate=cache_context.invalidate,
- )
- event_map = yield self.store.get_events([
- event_id for key, event_id in iteritems(current_state_ids)
- if key[0] in (
- EventTypes.JoinRules,
- EventTypes.Name,
- EventTypes.Topic,
- EventTypes.CanonicalAlias,
- EventTypes.RoomHistoryVisibility,
- EventTypes.GuestAccess,
- "m.room.avatar",
- )
- ])
- current_state = {
- (ev.type, ev.state_key): ev
- for ev in event_map.values()
- }
- # Double check that this is actually a public room.
- join_rules_event = current_state.get((EventTypes.JoinRules, ""))
- if join_rules_event:
- join_rule = join_rules_event.content.get("join_rule", None)
- if not allow_private and join_rule and join_rule != JoinRules.PUBLIC:
- defer.returnValue(None)
- if with_alias:
- aliases = yield self.store.get_aliases_for_room(
- room_id, on_invalidate=cache_context.invalidate
- )
- if aliases:
- result["aliases"] = aliases
- name_event = yield current_state.get((EventTypes.Name, ""))
- if name_event:
- name = name_event.content.get("name", None)
- if name:
- result["name"] = name
- topic_event = current_state.get((EventTypes.Topic, ""))
- if topic_event:
- topic = topic_event.content.get("topic", None)
- if topic:
- result["topic"] = topic
- canonical_event = current_state.get((EventTypes.CanonicalAlias, ""))
- if canonical_event:
- canonical_alias = canonical_event.content.get("alias", None)
- if canonical_alias:
- result["canonical_alias"] = canonical_alias
- visibility_event = current_state.get((EventTypes.RoomHistoryVisibility, ""))
- visibility = None
- if visibility_event:
- visibility = visibility_event.content.get("history_visibility", None)
- result["world_readable"] = visibility == "world_readable"
- guest_event = current_state.get((EventTypes.GuestAccess, ""))
- guest = None
- if guest_event:
- guest = guest_event.content.get("guest_access", None)
- result["guest_can_join"] = guest == "can_join"
- avatar_event = current_state.get(("m.room.avatar", ""))
- if avatar_event:
- avatar_url = avatar_event.content.get("url", None)
- if avatar_url:
- result["avatar_url"] = avatar_url
- defer.returnValue(result)
- @defer.inlineCallbacks
- def get_remote_public_room_list(self, server_name, limit=None, since_token=None,
- search_filter=None, include_all_networks=False,
- third_party_instance_id=None,):
- if search_filter:
- # We currently don't support searching across federation, so we have
- # to do it manually without pagination
- limit = None
- since_token = None
- res = yield self._get_remote_list_cached(
- server_name, limit=limit, since_token=since_token,
- include_all_networks=include_all_networks,
- third_party_instance_id=third_party_instance_id,
- )
- if search_filter:
- res = {"chunk": [
- entry
- for entry in list(res.get("chunk", []))
- if _matches_room_entry(entry, search_filter)
- ]}
- defer.returnValue(res)
- def _get_remote_list_cached(self, server_name, limit=None, since_token=None,
- search_filter=None, include_all_networks=False,
- third_party_instance_id=None,):
- repl_layer = self.hs.get_federation_client()
- if search_filter:
- # We can't cache when asking for search
- return repl_layer.get_public_rooms(
- server_name, limit=limit, since_token=since_token,
- search_filter=search_filter, include_all_networks=include_all_networks,
- third_party_instance_id=third_party_instance_id,
- )
- key = (
- server_name, limit, since_token, include_all_networks,
- third_party_instance_id,
- )
- return self.remote_response_cache.wrap(
- key,
- repl_layer.get_public_rooms,
- server_name, limit=limit, since_token=since_token,
- search_filter=search_filter,
- include_all_networks=include_all_networks,
- third_party_instance_id=third_party_instance_id,
- )
- class RoomListNextBatch(namedtuple("RoomListNextBatch", (
- "stream_ordering", # stream_ordering of the first public room list
- "public_room_stream_id", # public room stream id for first public room list
- "current_limit", # The number of previous rooms returned
- "direction_is_forward", # Bool if this is a next_batch, false if prev_batch
- ))):
- KEY_DICT = {
- "stream_ordering": "s",
- "public_room_stream_id": "p",
- "current_limit": "n",
- "direction_is_forward": "d",
- }
- REVERSE_KEY_DICT = {v: k for k, v in KEY_DICT.items()}
- @classmethod
- def from_token(cls, token):
- return RoomListNextBatch(**{
- cls.REVERSE_KEY_DICT[key]: val
- for key, val in msgpack.loads(decode_base64(token)).items()
- })
- def to_token(self):
- return encode_base64(msgpack.dumps({
- self.KEY_DICT[key]: val
- for key, val in self._asdict().items()
- }))
- def copy_and_replace(self, **kwds):
- return self._replace(
- **kwds
- )
- def _matches_room_entry(room_entry, search_filter):
- if search_filter and search_filter.get("generic_search_term", None):
- generic_search_term = search_filter["generic_search_term"].upper()
- if generic_search_term in room_entry.get("name", "").upper():
- return True
- elif generic_search_term in room_entry.get("topic", "").upper():
- return True
- elif generic_search_term in room_entry.get("canonical_alias", "").upper():
- return True
- else:
- return True
- return False
|