utils.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424
  1. # -*- coding: utf-8 -*-
  2. # Copyright 2014-2016 OpenMarket Ltd
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. import re
  16. from six import string_types
  17. from frozendict import frozendict
  18. from twisted.internet import defer
  19. from synapse.api.constants import EventTypes, RelationTypes
  20. from synapse.util.async_helpers import yieldable_gather_results
  21. from . import EventBase
  22. # Split strings on "." but not "\." This uses a negative lookbehind assertion for '\'
  23. # (?<!stuff) matches if the current position in the string is not preceded
  24. # by a match for 'stuff'.
  25. # TODO: This is fast, but fails to handle "foo\\.bar" which should be treated as
  26. # the literal fields "foo\" and "bar" but will instead be treated as "foo\\.bar"
  27. SPLIT_FIELD_REGEX = re.compile(r"(?<!\\)\.")
  28. def prune_event(event):
  29. """ Returns a pruned version of the given event, which removes all keys we
  30. don't know about or think could potentially be dodgy.
  31. This is used when we "redact" an event. We want to remove all fields that
  32. the user has specified, but we do want to keep necessary information like
  33. type, state_key etc.
  34. Args:
  35. event (FrozenEvent)
  36. Returns:
  37. FrozenEvent
  38. """
  39. pruned_event_dict = prune_event_dict(event.get_dict())
  40. from . import event_type_from_format_version
  41. pruned_event = event_type_from_format_version(event.format_version)(
  42. pruned_event_dict, event.internal_metadata.get_dict()
  43. )
  44. # Mark the event as redacted
  45. pruned_event.internal_metadata.redacted = True
  46. return pruned_event
  47. def prune_event_dict(event_dict):
  48. """Redacts the event_dict in the same way as `prune_event`, except it
  49. operates on dicts rather than event objects
  50. Args:
  51. event_dict (dict)
  52. Returns:
  53. dict: A copy of the pruned event dict
  54. """
  55. allowed_keys = [
  56. "event_id",
  57. "sender",
  58. "room_id",
  59. "hashes",
  60. "signatures",
  61. "content",
  62. "type",
  63. "state_key",
  64. "depth",
  65. "prev_events",
  66. "prev_state",
  67. "auth_events",
  68. "origin",
  69. "origin_server_ts",
  70. "membership",
  71. ]
  72. event_type = event_dict["type"]
  73. new_content = {}
  74. def add_fields(*fields):
  75. for field in fields:
  76. if field in event_dict["content"]:
  77. new_content[field] = event_dict["content"][field]
  78. if event_type == EventTypes.Member:
  79. add_fields("membership")
  80. elif event_type == EventTypes.Create:
  81. add_fields("creator")
  82. elif event_type == EventTypes.JoinRules:
  83. add_fields("join_rule")
  84. elif event_type == EventTypes.PowerLevels:
  85. add_fields(
  86. "users",
  87. "users_default",
  88. "events",
  89. "events_default",
  90. "state_default",
  91. "ban",
  92. "kick",
  93. "redact",
  94. )
  95. elif event_type == EventTypes.Aliases:
  96. add_fields("aliases")
  97. elif event_type == EventTypes.RoomHistoryVisibility:
  98. add_fields("history_visibility")
  99. allowed_fields = {k: v for k, v in event_dict.items() if k in allowed_keys}
  100. allowed_fields["content"] = new_content
  101. unsigned = {}
  102. allowed_fields["unsigned"] = unsigned
  103. event_unsigned = event_dict.get("unsigned", {})
  104. if "age_ts" in event_unsigned:
  105. unsigned["age_ts"] = event_unsigned["age_ts"]
  106. if "replaces_state" in event_unsigned:
  107. unsigned["replaces_state"] = event_unsigned["replaces_state"]
  108. return allowed_fields
  109. def _copy_field(src, dst, field):
  110. """Copy the field in 'src' to 'dst'.
  111. For example, if src={"foo":{"bar":5}} and dst={}, and field=["foo","bar"]
  112. then dst={"foo":{"bar":5}}.
  113. Args:
  114. src(dict): The dict to read from.
  115. dst(dict): The dict to modify.
  116. field(list<str>): List of keys to drill down to in 'src'.
  117. """
  118. if len(field) == 0: # this should be impossible
  119. return
  120. if len(field) == 1: # common case e.g. 'origin_server_ts'
  121. if field[0] in src:
  122. dst[field[0]] = src[field[0]]
  123. return
  124. # Else is a nested field e.g. 'content.body'
  125. # Pop the last field as that's the key to move across and we need the
  126. # parent dict in order to access the data. Drill down to the right dict.
  127. key_to_move = field.pop(-1)
  128. sub_dict = src
  129. for sub_field in field: # e.g. sub_field => "content"
  130. if sub_field in sub_dict and type(sub_dict[sub_field]) in [dict, frozendict]:
  131. sub_dict = sub_dict[sub_field]
  132. else:
  133. return
  134. if key_to_move not in sub_dict:
  135. return
  136. # Insert the key into the output dictionary, creating nested objects
  137. # as required. We couldn't do this any earlier or else we'd need to delete
  138. # the empty objects if the key didn't exist.
  139. sub_out_dict = dst
  140. for sub_field in field:
  141. sub_out_dict = sub_out_dict.setdefault(sub_field, {})
  142. sub_out_dict[key_to_move] = sub_dict[key_to_move]
  143. def only_fields(dictionary, fields):
  144. """Return a new dict with only the fields in 'dictionary' which are present
  145. in 'fields'.
  146. If there are no event fields specified then all fields are included.
  147. The entries may include '.' charaters to indicate sub-fields.
  148. So ['content.body'] will include the 'body' field of the 'content' object.
  149. A literal '.' character in a field name may be escaped using a '\'.
  150. Args:
  151. dictionary(dict): The dictionary to read from.
  152. fields(list<str>): A list of fields to copy over. Only shallow refs are
  153. taken.
  154. Returns:
  155. dict: A new dictionary with only the given fields. If fields was empty,
  156. the same dictionary is returned.
  157. """
  158. if len(fields) == 0:
  159. return dictionary
  160. # for each field, convert it:
  161. # ["content.body.thing\.with\.dots"] => [["content", "body", "thing\.with\.dots"]]
  162. split_fields = [SPLIT_FIELD_REGEX.split(f) for f in fields]
  163. # for each element of the output array of arrays:
  164. # remove escaping so we can use the right key names.
  165. split_fields[:] = [
  166. [f.replace(r"\.", r".") for f in field_array] for field_array in split_fields
  167. ]
  168. output = {}
  169. for field_array in split_fields:
  170. _copy_field(dictionary, output, field_array)
  171. return output
  172. def format_event_raw(d):
  173. return d
  174. def format_event_for_client_v1(d):
  175. d = format_event_for_client_v2(d)
  176. sender = d.get("sender")
  177. if sender is not None:
  178. d["user_id"] = sender
  179. copy_keys = (
  180. "age",
  181. "redacted_because",
  182. "replaces_state",
  183. "prev_content",
  184. "invite_room_state",
  185. )
  186. for key in copy_keys:
  187. if key in d["unsigned"]:
  188. d[key] = d["unsigned"][key]
  189. return d
  190. def format_event_for_client_v2(d):
  191. drop_keys = (
  192. "auth_events",
  193. "prev_events",
  194. "hashes",
  195. "signatures",
  196. "depth",
  197. "origin",
  198. "prev_state",
  199. )
  200. for key in drop_keys:
  201. d.pop(key, None)
  202. return d
  203. def format_event_for_client_v2_without_room_id(d):
  204. d = format_event_for_client_v2(d)
  205. d.pop("room_id", None)
  206. return d
  207. def serialize_event(
  208. e,
  209. time_now_ms,
  210. as_client_event=True,
  211. event_format=format_event_for_client_v1,
  212. token_id=None,
  213. only_event_fields=None,
  214. is_invite=False,
  215. ):
  216. """Serialize event for clients
  217. Args:
  218. e (EventBase)
  219. time_now_ms (int)
  220. as_client_event (bool)
  221. event_format
  222. token_id
  223. only_event_fields
  224. is_invite (bool): Whether this is an invite that is being sent to the
  225. invitee
  226. Returns:
  227. dict
  228. """
  229. # FIXME(erikj): To handle the case of presence events and the like
  230. if not isinstance(e, EventBase):
  231. return e
  232. time_now_ms = int(time_now_ms)
  233. # Should this strip out None's?
  234. d = {k: v for k, v in e.get_dict().items()}
  235. d["event_id"] = e.event_id
  236. if "age_ts" in d["unsigned"]:
  237. d["unsigned"]["age"] = time_now_ms - d["unsigned"]["age_ts"]
  238. del d["unsigned"]["age_ts"]
  239. if "redacted_because" in e.unsigned:
  240. d["unsigned"]["redacted_because"] = serialize_event(
  241. e.unsigned["redacted_because"], time_now_ms, event_format=event_format
  242. )
  243. if token_id is not None:
  244. if token_id == getattr(e.internal_metadata, "token_id", None):
  245. txn_id = getattr(e.internal_metadata, "txn_id", None)
  246. if txn_id is not None:
  247. d["unsigned"]["transaction_id"] = txn_id
  248. # If this is an invite for somebody else, then we don't care about the
  249. # invite_room_state as that's meant solely for the invitee. Other clients
  250. # will already have the state since they're in the room.
  251. if not is_invite:
  252. d["unsigned"].pop("invite_room_state", None)
  253. if as_client_event:
  254. d = event_format(d)
  255. if only_event_fields:
  256. if not isinstance(only_event_fields, list) or not all(
  257. isinstance(f, string_types) for f in only_event_fields
  258. ):
  259. raise TypeError("only_event_fields must be a list of strings")
  260. d = only_fields(d, only_event_fields)
  261. return d
  262. class EventClientSerializer(object):
  263. """Serializes events that are to be sent to clients.
  264. This is used for bundling extra information with any events to be sent to
  265. clients.
  266. """
  267. def __init__(self, hs):
  268. self.store = hs.get_datastore()
  269. self.experimental_msc1849_support_enabled = (
  270. hs.config.experimental_msc1849_support_enabled
  271. )
  272. @defer.inlineCallbacks
  273. def serialize_event(self, event, time_now, bundle_aggregations=True, **kwargs):
  274. """Serializes a single event.
  275. Args:
  276. event (EventBase)
  277. time_now (int): The current time in milliseconds
  278. bundle_aggregations (bool): Whether to bundle in related events
  279. **kwargs: Arguments to pass to `serialize_event`
  280. Returns:
  281. Deferred[dict]: The serialized event
  282. """
  283. # To handle the case of presence events and the like
  284. if not isinstance(event, EventBase):
  285. return event
  286. event_id = event.event_id
  287. serialized_event = serialize_event(event, time_now, **kwargs)
  288. # If MSC1849 is enabled then we need to look if there are any relations
  289. # we need to bundle in with the event.
  290. # Do not bundle relations if the event has been redacted
  291. if not event.internal_metadata.is_redacted() and (
  292. self.experimental_msc1849_support_enabled and bundle_aggregations
  293. ):
  294. annotations = yield self.store.get_aggregation_groups_for_event(event_id)
  295. references = yield self.store.get_relations_for_event(
  296. event_id, RelationTypes.REFERENCE, direction="f"
  297. )
  298. if annotations.chunk:
  299. r = serialized_event["unsigned"].setdefault("m.relations", {})
  300. r[RelationTypes.ANNOTATION] = annotations.to_dict()
  301. if references.chunk:
  302. r = serialized_event["unsigned"].setdefault("m.relations", {})
  303. r[RelationTypes.REFERENCE] = references.to_dict()
  304. edit = None
  305. if event.type == EventTypes.Message:
  306. edit = yield self.store.get_applicable_edit(event_id)
  307. if edit:
  308. # If there is an edit replace the content, preserving existing
  309. # relations.
  310. relations = event.content.get("m.relates_to")
  311. serialized_event["content"] = edit.content.get("m.new_content", {})
  312. if relations:
  313. serialized_event["content"]["m.relates_to"] = relations
  314. else:
  315. serialized_event["content"].pop("m.relates_to", None)
  316. r = serialized_event["unsigned"].setdefault("m.relations", {})
  317. r[RelationTypes.REPLACE] = {
  318. "event_id": edit.event_id,
  319. "origin_server_ts": edit.origin_server_ts,
  320. "sender": edit.sender,
  321. }
  322. return serialized_event
  323. def serialize_events(self, events, time_now, **kwargs):
  324. """Serializes multiple events.
  325. Args:
  326. event (iter[EventBase])
  327. time_now (int): The current time in milliseconds
  328. **kwargs: Arguments to pass to `serialize_event`
  329. Returns:
  330. Deferred[list[dict]]: The list of serialized events
  331. """
  332. return yieldable_gather_results(
  333. self.serialize_event, events, time_now=time_now, **kwargs
  334. )