feed_manager.rb 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365
  1. # frozen_string_literal: true
  2. require 'singleton'
  3. class FeedManager
  4. include Singleton
  5. include Redisable
  6. MAX_ITEMS = 400
  7. # Must be <= MAX_ITEMS or the tracking sets will grow forever
  8. REBLOG_FALLOFF = 40
  9. def with_active_accounts(&block)
  10. Account.joins(:user).where('users.current_sign_in_at > ?', User::ACTIVE_DURATION.ago).find_each(&block)
  11. end
  12. def key(type, id, subtype = nil)
  13. return "feed:#{type}:#{id}" unless subtype
  14. "feed:#{type}:#{id}:#{subtype}"
  15. end
  16. def filter?(timeline_type, status, receiver_id)
  17. if timeline_type == :home
  18. filter_from_home?(status, receiver_id, build_crutches(receiver_id, [status]))
  19. elsif timeline_type == :mentions
  20. filter_from_mentions?(status, receiver_id)
  21. else
  22. false
  23. end
  24. end
  25. def push_to_home(account, status)
  26. return false unless add_to_feed(:home, account.id, status, account.user&.aggregates_reblogs?)
  27. trim(:home, account.id)
  28. PushUpdateWorker.perform_async(account.id, status.id, "timeline:#{account.id}") if push_update_required?("timeline:#{account.id}")
  29. true
  30. end
  31. def unpush_from_home(account, status)
  32. return false unless remove_from_feed(:home, account.id, status, account.user&.aggregates_reblogs?)
  33. redis.publish("timeline:#{account.id}", Oj.dump(event: :delete, payload: status.id.to_s))
  34. true
  35. end
  36. def push_to_list(list, status)
  37. if status.reply? && status.in_reply_to_account_id != status.account_id
  38. should_filter = status.in_reply_to_account_id != list.account_id
  39. should_filter &&= !ListAccount.where(list_id: list.id, account_id: status.in_reply_to_account_id).exists?
  40. return false if should_filter
  41. end
  42. return false unless add_to_feed(:list, list.id, status, list.account.user&.aggregates_reblogs?)
  43. trim(:list, list.id)
  44. PushUpdateWorker.perform_async(list.account_id, status.id, "timeline:list:#{list.id}") if push_update_required?("timeline:list:#{list.id}")
  45. true
  46. end
  47. def unpush_from_list(list, status)
  48. return false unless remove_from_feed(:list, list.id, status, list.account.user&.aggregates_reblogs?)
  49. redis.publish("timeline:list:#{list.id}", Oj.dump(event: :delete, payload: status.id.to_s))
  50. true
  51. end
  52. def trim(type, account_id)
  53. timeline_key = key(type, account_id)
  54. reblog_key = key(type, account_id, 'reblogs')
  55. # Remove any items past the MAX_ITEMS'th entry in our feed
  56. redis.zremrangebyrank(timeline_key, 0, -(FeedManager::MAX_ITEMS + 1))
  57. # Get the score of the REBLOG_FALLOFF'th item in our feed, and stop
  58. # tracking anything after it for deduplication purposes.
  59. falloff_rank = FeedManager::REBLOG_FALLOFF - 1
  60. falloff_range = redis.zrevrange(timeline_key, falloff_rank, falloff_rank, with_scores: true)
  61. falloff_score = falloff_range&.first&.last&.to_i || 0
  62. # Get any reblogs we might have to clean up after.
  63. redis.zrangebyscore(reblog_key, 0, falloff_score).each do |reblogged_id|
  64. # Remove it from the set of reblogs we're tracking *first* to avoid races.
  65. redis.zrem(reblog_key, reblogged_id)
  66. # Just drop any set we might have created to track additional reblogs.
  67. # This means that if this reblog is deleted, we won't automatically insert
  68. # another reblog, but also that any new reblog can be inserted into the
  69. # feed.
  70. redis.del(key(type, account_id, "reblogs:#{reblogged_id}"))
  71. end
  72. end
  73. def merge_into_timeline(from_account, into_account)
  74. timeline_key = key(:home, into_account.id)
  75. aggregate = into_account.user&.aggregates_reblogs?
  76. query = from_account.statuses.where(visibility: [:public, :unlisted, :private]).includes(:preloadable_poll, reblog: :account).limit(FeedManager::MAX_ITEMS / 4)
  77. if redis.zcard(timeline_key) >= FeedManager::MAX_ITEMS / 4
  78. oldest_home_score = redis.zrange(timeline_key, 0, 0, with_scores: true).first.last.to_i
  79. query = query.where('id > ?', oldest_home_score)
  80. end
  81. statuses = query.to_a
  82. crutches = build_crutches(into_account.id, statuses)
  83. statuses.each do |status|
  84. next if filter_from_home?(status, into_account, crutches)
  85. add_to_feed(:home, into_account.id, status, aggregate)
  86. end
  87. trim(:home, into_account.id)
  88. end
  89. def unmerge_from_timeline(from_account, into_account)
  90. timeline_key = key(:home, into_account.id)
  91. oldest_home_score = redis.zrange(timeline_key, 0, 0, with_scores: true)&.first&.last&.to_i || 0
  92. from_account.statuses.select('id, reblog_of_id').where('id > ?', oldest_home_score).reorder(nil).find_each do |status|
  93. remove_from_feed(:home, into_account.id, status, into_account.user&.aggregates_reblogs?)
  94. end
  95. end
  96. def clear_from_timeline(account, target_account)
  97. timeline_key = key(:home, account.id)
  98. timeline_status_ids = redis.zrange(timeline_key, 0, -1)
  99. target_statuses = Status.where(id: timeline_status_ids, account: target_account)
  100. target_statuses.each do |status|
  101. unpush_from_home(account, status)
  102. end
  103. end
  104. def populate_feed(account)
  105. limit = FeedManager::MAX_ITEMS / 2
  106. aggregate = account.user&.aggregates_reblogs?
  107. timeline_key = key(:home, account.id)
  108. account.statuses.where.not(visibility: :direct).limit(limit).each do |status|
  109. add_to_feed(:home, account.id, status, aggregate)
  110. end
  111. account.following.includes(:account_stat).find_each do |target_account|
  112. if redis.zcard(timeline_key) >= limit
  113. oldest_home_score = redis.zrange(timeline_key, 0, 0, with_scores: true).first.last.to_i
  114. last_status_score = Mastodon::Snowflake.id_at(account.last_status_at)
  115. # If the feed is full and this account has not posted more recently
  116. # than the last item on the feed, then we can skip the whole account
  117. # because none of its statuses would stay on the feed anyway
  118. next if last_status_score < oldest_home_score
  119. end
  120. statuses = target_account.statuses.where(visibility: [:public, :unlisted, :private]).includes(:preloadable_poll, reblog: :account).limit(limit)
  121. crutches = build_crutches(account.id, statuses)
  122. statuses.each do |status|
  123. next if filter_from_home?(status, account.id, crutches)
  124. add_to_feed(:home, account.id, status, aggregate)
  125. end
  126. trim(:home, account.id)
  127. end
  128. end
  129. private
  130. def push_update_required?(timeline_id)
  131. redis.exists("subscribed:#{timeline_id}")
  132. end
  133. def blocks_or_mutes?(receiver_id, account_ids, context)
  134. Block.where(account_id: receiver_id, target_account_id: account_ids).any? ||
  135. (context == :home ? Mute.where(account_id: receiver_id, target_account_id: account_ids).any? : Mute.where(account_id: receiver_id, target_account_id: account_ids, hide_notifications: true).any?)
  136. end
  137. def filter_from_home?(status, receiver_id, crutches)
  138. return false if receiver_id == status.account_id
  139. return true if status.reply? && (status.in_reply_to_id.nil? || status.in_reply_to_account_id.nil?)
  140. return true if phrase_filtered?(status, receiver_id, :home)
  141. check_for_blocks = crutches[:active_mentions][status.id] || []
  142. check_for_blocks.concat([status.account_id])
  143. if status.reblog?
  144. check_for_blocks.concat([status.reblog.account_id])
  145. check_for_blocks.concat(crutches[:active_mentions][status.reblog_of_id] || [])
  146. end
  147. return true if check_for_blocks.any? { |target_account_id| crutches[:blocking][target_account_id] || crutches[:muting][target_account_id] }
  148. if status.reply? && !status.in_reply_to_account_id.nil? # Filter out if it's a reply
  149. should_filter = !crutches[:following][status.in_reply_to_account_id] # and I'm not following the person it's a reply to
  150. should_filter &&= receiver_id != status.in_reply_to_account_id # and it's not a reply to me
  151. should_filter &&= status.account_id != status.in_reply_to_account_id # and it's not a self-reply
  152. return !!should_filter
  153. elsif status.reblog? # Filter out a reblog
  154. should_filter = crutches[:hiding_reblogs][status.account_id] # if the reblogger's reblogs are suppressed
  155. should_filter ||= crutches[:blocked_by][status.reblog.account_id] # or if the author of the reblogged status is blocking me
  156. should_filter ||= crutches[:domain_blocking][status.reblog.account.domain] # or the author's domain is blocked
  157. return !!should_filter
  158. end
  159. false
  160. end
  161. def filter_from_mentions?(status, receiver_id)
  162. return true if receiver_id == status.account_id
  163. return true if phrase_filtered?(status, receiver_id, :notifications)
  164. # This filter is called from NotifyService, but already after the sender of
  165. # the notification has been checked for mute/block. Therefore, it's not
  166. # necessary to check the author of the toot for mute/block again
  167. check_for_blocks = status.active_mentions.pluck(:account_id)
  168. check_for_blocks.concat([status.in_reply_to_account]) if status.reply? && !status.in_reply_to_account_id.nil?
  169. should_filter = blocks_or_mutes?(receiver_id, check_for_blocks, :mentions) # Filter if it's from someone I blocked, in reply to someone I blocked, or mentioning someone I blocked (or muted)
  170. should_filter ||= (status.account.silenced? && !Follow.where(account_id: receiver_id, target_account_id: status.account_id).exists?) # of if the account is silenced and I'm not following them
  171. should_filter
  172. end
  173. def phrase_filtered?(status, receiver_id, context)
  174. active_filters = Rails.cache.fetch("filters:#{receiver_id}") { CustomFilter.where(account_id: receiver_id).active_irreversible.to_a }.to_a
  175. active_filters.select! { |filter| filter.context.include?(context.to_s) && !filter.expired? }
  176. active_filters.map! do |filter|
  177. if filter.whole_word
  178. sb = filter.phrase =~ /\A[[:word:]]/ ? '\b' : ''
  179. eb = filter.phrase =~ /[[:word:]]\z/ ? '\b' : ''
  180. /(?mix:#{sb}#{Regexp.escape(filter.phrase)}#{eb})/
  181. else
  182. /#{Regexp.escape(filter.phrase)}/i
  183. end
  184. end
  185. return false if active_filters.empty?
  186. combined_regex = active_filters.reduce { |memo, obj| Regexp.union(memo, obj) }
  187. status = status.reblog if status.reblog?
  188. !combined_regex.match(Formatter.instance.plaintext(status)).nil? ||
  189. (status.spoiler_text.present? && !combined_regex.match(status.spoiler_text).nil?) ||
  190. (status.preloadable_poll && !combined_regex.match(status.preloadable_poll.options.join("\n\n")).nil?)
  191. end
  192. # Adds a status to an account's feed, returning true if a status was
  193. # added, and false if it was not added to the feed. Note that this is
  194. # an internal helper: callers must call trim or push updates if
  195. # either action is appropriate.
  196. def add_to_feed(timeline_type, account_id, status, aggregate_reblogs = true)
  197. timeline_key = key(timeline_type, account_id)
  198. reblog_key = key(timeline_type, account_id, 'reblogs')
  199. if status.reblog? && (aggregate_reblogs.nil? || aggregate_reblogs)
  200. # If the original status or a reblog of it is within
  201. # REBLOG_FALLOFF statuses from the top, do not re-insert it into
  202. # the feed
  203. rank = redis.zrevrank(timeline_key, status.reblog_of_id)
  204. return false if !rank.nil? && rank < FeedManager::REBLOG_FALLOFF
  205. reblog_rank = redis.zrevrank(reblog_key, status.reblog_of_id)
  206. if reblog_rank.nil?
  207. # This is not something we've already seen reblogged, so we
  208. # can just add it to the feed (and note that we're
  209. # reblogging it).
  210. redis.zadd(timeline_key, status.id, status.id)
  211. redis.zadd(reblog_key, status.id, status.reblog_of_id)
  212. else
  213. # Another reblog of the same status was already in the
  214. # REBLOG_FALLOFF most recent statuses, so we note that this
  215. # is an "extra" reblog, by storing it in reblog_set_key.
  216. reblog_set_key = key(timeline_type, account_id, "reblogs:#{status.reblog_of_id}")
  217. redis.sadd(reblog_set_key, status.id)
  218. return false
  219. end
  220. else
  221. # A reblog may reach earlier than the original status because of the
  222. # delay of the worker deliverying the original status, the late addition
  223. # by merging timelines, and other reasons.
  224. # If such a reblog already exists, just do not re-insert it into the feed.
  225. rank = redis.zrevrank(reblog_key, status.id)
  226. return false unless rank.nil?
  227. redis.zadd(timeline_key, status.id, status.id)
  228. end
  229. true
  230. end
  231. # Removes an individual status from a feed, correctly handling cases
  232. # with reblogs, and returning true if a status was removed. As with
  233. # `add_to_feed`, this does not trigger push updates, so callers must
  234. # do so if appropriate.
  235. def remove_from_feed(timeline_type, account_id, status, aggregate_reblogs = true)
  236. timeline_key = key(timeline_type, account_id)
  237. reblog_key = key(timeline_type, account_id, 'reblogs')
  238. if status.reblog? && (aggregate_reblogs.nil? || aggregate_reblogs)
  239. # 1. If the reblogging status is not in the feed, stop.
  240. status_rank = redis.zrevrank(timeline_key, status.id)
  241. return false if status_rank.nil?
  242. # 2. Remove reblog from set of this status's reblogs.
  243. reblog_set_key = key(timeline_type, account_id, "reblogs:#{status.reblog_of_id}")
  244. redis.srem(reblog_set_key, status.id)
  245. redis.zrem(reblog_key, status.reblog_of_id)
  246. # 3. Re-insert another reblog or original into the feed if one
  247. # remains in the set. We could pick a random element, but this
  248. # set should generally be small, and it seems ideal to show the
  249. # oldest potential such reblog.
  250. other_reblog = redis.smembers(reblog_set_key).map(&:to_i).min
  251. redis.zadd(timeline_key, other_reblog, other_reblog) if other_reblog
  252. redis.zadd(reblog_key, other_reblog, status.reblog_of_id) if other_reblog
  253. # 4. Remove the reblogging status from the feed (as normal)
  254. # (outside conditional)
  255. else
  256. # If the original is getting deleted, no use for reblog references
  257. redis.del(key(timeline_type, account_id, "reblogs:#{status.id}"))
  258. redis.zrem(reblog_key, status.id)
  259. end
  260. redis.zrem(timeline_key, status.id)
  261. end
  262. def build_crutches(receiver_id, statuses)
  263. crutches = {}
  264. crutches[:active_mentions] = Mention.active.where(status_id: statuses.flat_map { |s| [s.id, s.reblog_of_id] }.compact).pluck(:status_id, :account_id).each_with_object({}) { |(id, account_id), mapping| (mapping[id] ||= []).push(account_id) }
  265. check_for_blocks = statuses.flat_map do |s|
  266. arr = crutches[:active_mentions][s.id] || []
  267. arr.concat([s.account_id])
  268. if s.reblog?
  269. arr.concat([s.reblog.account_id])
  270. arr.concat(crutches[:active_mentions][s.reblog_of_id] || [])
  271. end
  272. arr
  273. end
  274. crutches[:following] = Follow.where(account_id: receiver_id, target_account_id: statuses.map(&:in_reply_to_account_id).compact).pluck(:target_account_id).each_with_object({}) { |id, mapping| mapping[id] = true }
  275. crutches[:hiding_reblogs] = Follow.where(account_id: receiver_id, target_account_id: statuses.map { |s| s.account_id if s.reblog? }.compact, show_reblogs: false).pluck(:target_account_id).each_with_object({}) { |id, mapping| mapping[id] = true }
  276. crutches[:blocking] = Block.where(account_id: receiver_id, target_account_id: check_for_blocks).pluck(:target_account_id).each_with_object({}) { |id, mapping| mapping[id] = true }
  277. crutches[:muting] = Mute.where(account_id: receiver_id, target_account_id: check_for_blocks).pluck(:target_account_id).each_with_object({}) { |id, mapping| mapping[id] = true }
  278. crutches[:domain_blocking] = AccountDomainBlock.where(account_id: receiver_id, domain: statuses.map { |s| s.reblog&.account&.domain }.compact).pluck(:domain).each_with_object({}) { |domain, mapping| mapping[domain] = true }
  279. crutches[:blocked_by] = Block.where(target_account_id: receiver_id, account_id: statuses.map { |s| s.reblog&.account_id }.compact).pluck(:account_id).each_with_object({}) { |id, mapping| mapping[id] = true }
  280. crutches
  281. end
  282. end