feed_manager.rb 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515
  1. # frozen_string_literal: true
  2. require 'singleton'
  3. class FeedManager
  4. include Singleton
  5. include Redisable
  6. # Maximum number of items stored in a single feed
  7. MAX_ITEMS = 400
  8. # Number of items in the feed since last reblog of status
  9. # before the new reblog will be inserted. Must be <= MAX_ITEMS
  10. # or the tracking sets will grow forever
  11. REBLOG_FALLOFF = 40
  12. # Execute block for every active account
  13. # @yield [Account]
  14. # @return [void]
  15. def with_active_accounts(&block)
  16. Account.joins(:user).where('users.current_sign_in_at > ?', User::ACTIVE_DURATION.ago).find_each(&block)
  17. end
  18. # Redis key of a feed
  19. # @param [Symbol] type
  20. # @param [Integer] id
  21. # @param [Symbol] subtype
  22. # @return [String]
  23. def key(type, id, subtype = nil)
  24. return "feed:#{type}:#{id}" unless subtype
  25. "feed:#{type}:#{id}:#{subtype}"
  26. end
  27. # Check if the status should not be added to a feed
  28. # @param [Symbol] timeline_type
  29. # @param [Status] status
  30. # @param [Account|List] receiver
  31. # @return [Boolean]
  32. def filter?(timeline_type, status, receiver)
  33. case timeline_type
  34. when :home
  35. filter_from_home?(status, receiver.id, build_crutches(receiver.id, [status]))
  36. when :list
  37. filter_from_list?(status, receiver) || filter_from_home?(status, receiver.account_id, build_crutches(receiver.account_id, [status]))
  38. when :mentions
  39. filter_from_mentions?(status, receiver.id)
  40. else
  41. false
  42. end
  43. end
  44. # Add a status to a home feed and send a streaming API update
  45. # @param [Account] account
  46. # @param [Status] status
  47. # @return [Boolean]
  48. def push_to_home(account, status)
  49. return false unless add_to_feed(:home, account.id, status, account.user&.aggregates_reblogs?)
  50. trim(:home, account.id)
  51. PushUpdateWorker.perform_async(account.id, status.id, "timeline:#{account.id}") if push_update_required?("timeline:#{account.id}")
  52. true
  53. end
  54. # Remove a status from a home feed and send a streaming API update
  55. # @param [Account] account
  56. # @param [Status] status
  57. # @return [Boolean]
  58. def unpush_from_home(account, status)
  59. return false unless remove_from_feed(:home, account.id, status, account.user&.aggregates_reblogs?)
  60. redis.publish("timeline:#{account.id}", Oj.dump(event: :delete, payload: status.id.to_s))
  61. true
  62. end
  63. # Add a status to a list feed and send a streaming API update
  64. # @param [List] list
  65. # @param [Status] status
  66. # @return [Boolean]
  67. def push_to_list(list, status)
  68. return false if filter_from_list?(status, list) || !add_to_feed(:list, list.id, status, list.account.user&.aggregates_reblogs?)
  69. trim(:list, list.id)
  70. PushUpdateWorker.perform_async(list.account_id, status.id, "timeline:list:#{list.id}") if push_update_required?("timeline:list:#{list.id}")
  71. true
  72. end
  73. # Remove a status from a list feed and send a streaming API update
  74. # @param [List] list
  75. # @param [Status] status
  76. # @return [Boolean]
  77. def unpush_from_list(list, status)
  78. return false unless remove_from_feed(:list, list.id, status, list.account.user&.aggregates_reblogs?)
  79. redis.publish("timeline:list:#{list.id}", Oj.dump(event: :delete, payload: status.id.to_s))
  80. true
  81. end
  82. # Fill a home feed with an account's statuses
  83. # @param [Account] from_account
  84. # @param [Account] into_account
  85. # @return [void]
  86. def merge_into_home(from_account, into_account)
  87. timeline_key = key(:home, into_account.id)
  88. aggregate = into_account.user&.aggregates_reblogs?
  89. query = from_account.statuses.where(visibility: [:public, :unlisted, :private]).includes(:preloadable_poll, reblog: :account).limit(FeedManager::MAX_ITEMS / 4)
  90. if redis.zcard(timeline_key) >= FeedManager::MAX_ITEMS / 4
  91. oldest_home_score = redis.zrange(timeline_key, 0, 0, with_scores: true).first.last.to_i
  92. query = query.where('id > ?', oldest_home_score)
  93. end
  94. statuses = query.to_a
  95. crutches = build_crutches(into_account.id, statuses)
  96. statuses.each do |status|
  97. next if filter_from_home?(status, into_account.id, crutches)
  98. add_to_feed(:home, into_account.id, status, aggregate)
  99. end
  100. trim(:home, into_account.id)
  101. end
  102. # Fill a list feed with an account's statuses
  103. # @param [Account] from_account
  104. # @param [List] list
  105. # @return [void]
  106. def merge_into_list(from_account, list)
  107. timeline_key = key(:list, list.id)
  108. aggregate = list.account.user&.aggregates_reblogs?
  109. query = from_account.statuses.where(visibility: [:public, :unlisted, :private]).includes(:preloadable_poll, reblog: :account).limit(FeedManager::MAX_ITEMS / 4)
  110. if redis.zcard(timeline_key) >= FeedManager::MAX_ITEMS / 4
  111. oldest_home_score = redis.zrange(timeline_key, 0, 0, with_scores: true).first.last.to_i
  112. query = query.where('id > ?', oldest_home_score)
  113. end
  114. statuses = query.to_a
  115. crutches = build_crutches(list.account_id, statuses)
  116. statuses.each do |status|
  117. next if filter_from_home?(status, list.account_id, crutches) || filter_from_list?(status, list)
  118. add_to_feed(:list, list.id, status, aggregate)
  119. end
  120. trim(:list, list.id)
  121. end
  122. # Remove an account's statuses from a home feed
  123. # @param [Account] from_account
  124. # @param [Account] into_account
  125. # @return [void]
  126. def unmerge_from_home(from_account, into_account)
  127. timeline_key = key(:home, into_account.id)
  128. oldest_home_score = redis.zrange(timeline_key, 0, 0, with_scores: true)&.first&.last&.to_i || 0
  129. from_account.statuses.select('id, reblog_of_id').where('id > ?', oldest_home_score).reorder(nil).find_each do |status|
  130. remove_from_feed(:home, into_account.id, status, into_account.user&.aggregates_reblogs?)
  131. end
  132. end
  133. # Remove an account's statuses from a list feed
  134. # @param [Account] from_account
  135. # @param [List] list
  136. # @return [void]
  137. def unmerge_from_list(from_account, list)
  138. timeline_key = key(:list, list.id)
  139. oldest_list_score = redis.zrange(timeline_key, 0, 0, with_scores: true)&.first&.last&.to_i || 0
  140. from_account.statuses.select('id, reblog_of_id').where('id > ?', oldest_list_score).reorder(nil).find_each do |status|
  141. remove_from_feed(:list, list.id, status, list.account.user&.aggregates_reblogs?)
  142. end
  143. end
  144. # Clear all statuses from or mentioning target_account from a home feed
  145. # @param [Account] account
  146. # @param [Account] target_account
  147. # @return [void]
  148. def clear_from_home(account, target_account)
  149. timeline_key = key(:home, account.id)
  150. timeline_status_ids = redis.zrange(timeline_key, 0, -1)
  151. statuses = Status.where(id: timeline_status_ids).select(:id, :reblog_of_id, :account_id).to_a
  152. reblogged_ids = Status.where(id: statuses.map(&:reblog_of_id).compact, account: target_account).pluck(:id)
  153. with_mentions_ids = Mention.active.where(status_id: statuses.flat_map { |s| [s.id, s.reblog_of_id] }.compact, account: target_account).pluck(:status_id)
  154. target_statuses = statuses.select do |status|
  155. status.account_id == target_account.id || reblogged_ids.include?(status.reblog_of_id) || with_mentions_ids.include?(status.id) || with_mentions_ids.include?(status.reblog_of_id)
  156. end
  157. target_statuses.each do |status|
  158. unpush_from_home(account, status)
  159. end
  160. end
  161. # Populate home feed of account from scratch
  162. # @param [Account] account
  163. # @return [void]
  164. def populate_home(account)
  165. limit = FeedManager::MAX_ITEMS / 2
  166. aggregate = account.user&.aggregates_reblogs?
  167. timeline_key = key(:home, account.id)
  168. account.statuses.limit(limit).each do |status|
  169. add_to_feed(:home, account.id, status, aggregate)
  170. end
  171. account.following.includes(:account_stat).find_each do |target_account|
  172. if redis.zcard(timeline_key) >= limit
  173. oldest_home_score = redis.zrange(timeline_key, 0, 0, with_scores: true).first.last.to_i
  174. last_status_score = Mastodon::Snowflake.id_at(account.last_status_at)
  175. # If the feed is full and this account has not posted more recently
  176. # than the last item on the feed, then we can skip the whole account
  177. # because none of its statuses would stay on the feed anyway
  178. next if last_status_score < oldest_home_score
  179. end
  180. statuses = target_account.statuses.where(visibility: [:public, :unlisted, :private]).includes(:preloadable_poll, reblog: :account).limit(limit)
  181. crutches = build_crutches(account.id, statuses)
  182. statuses.each do |status|
  183. next if filter_from_home?(status, account.id, crutches)
  184. add_to_feed(:home, account.id, status, aggregate)
  185. end
  186. trim(:home, account.id)
  187. end
  188. end
  189. private
  190. # Trim a feed to maximum size by removing older items
  191. # @param [Symbol] type
  192. # @param [Integer] timeline_id
  193. # @return [void]
  194. def trim(type, timeline_id)
  195. timeline_key = key(type, timeline_id)
  196. reblog_key = key(type, timeline_id, 'reblogs')
  197. # Remove any items past the MAX_ITEMS'th entry in our feed
  198. redis.zremrangebyrank(timeline_key, 0, -(FeedManager::MAX_ITEMS + 1))
  199. # Get the score of the REBLOG_FALLOFF'th item in our feed, and stop
  200. # tracking anything after it for deduplication purposes.
  201. falloff_rank = FeedManager::REBLOG_FALLOFF
  202. falloff_range = redis.zrevrange(timeline_key, falloff_rank, falloff_rank, with_scores: true)
  203. falloff_score = falloff_range&.first&.last&.to_i
  204. return if falloff_score.nil?
  205. # Get any reblogs we might have to clean up after.
  206. redis.zrangebyscore(reblog_key, 0, falloff_score).each do |reblogged_id|
  207. # Remove it from the set of reblogs we're tracking *first* to avoid races.
  208. redis.zrem(reblog_key, reblogged_id)
  209. # Just drop any set we might have created to track additional reblogs.
  210. # This means that if this reblog is deleted, we won't automatically insert
  211. # another reblog, but also that any new reblog can be inserted into the
  212. # feed.
  213. redis.del(key(type, timeline_id, "reblogs:#{reblogged_id}"))
  214. end
  215. end
  216. # Check if there is a streaming API client connected
  217. # for the given feed
  218. # @param [String] timeline_key
  219. # @return [Boolean]
  220. def push_update_required?(timeline_key)
  221. redis.exists?("subscribed:#{timeline_key}")
  222. end
  223. # Check if the account is blocking or muting any of the given accounts
  224. # @param [Integer] receiver_id
  225. # @param [Array<Integer>] account_ids
  226. # @param [Symbol] context
  227. def blocks_or_mutes?(receiver_id, account_ids, context)
  228. Block.where(account_id: receiver_id, target_account_id: account_ids).any? ||
  229. (context == :home ? Mute.where(account_id: receiver_id, target_account_id: account_ids).any? : Mute.where(account_id: receiver_id, target_account_id: account_ids, hide_notifications: true).any?)
  230. end
  231. # Check if status should not be added to the home feed
  232. # @param [Status] status
  233. # @param [Integer] receiver_id
  234. # @param [Hash] crutches
  235. # @return [Boolean]
  236. def filter_from_home?(status, receiver_id, crutches)
  237. return false if receiver_id == status.account_id
  238. return true if status.reply? && (status.in_reply_to_id.nil? || status.in_reply_to_account_id.nil?)
  239. return true if phrase_filtered?(status, receiver_id, :home)
  240. check_for_blocks = crutches[:active_mentions][status.id] || []
  241. check_for_blocks.concat([status.account_id])
  242. if status.reblog?
  243. check_for_blocks.concat([status.reblog.account_id])
  244. check_for_blocks.concat(crutches[:active_mentions][status.reblog_of_id] || [])
  245. end
  246. return true if check_for_blocks.any? { |target_account_id| crutches[:blocking][target_account_id] || crutches[:muting][target_account_id] }
  247. if status.reply? && !status.in_reply_to_account_id.nil? # Filter out if it's a reply
  248. should_filter = !crutches[:following][status.in_reply_to_account_id] # and I'm not following the person it's a reply to
  249. should_filter &&= receiver_id != status.in_reply_to_account_id # and it's not a reply to me
  250. should_filter &&= status.account_id != status.in_reply_to_account_id # and it's not a self-reply
  251. return !!should_filter
  252. elsif status.reblog? # Filter out a reblog
  253. should_filter = crutches[:hiding_reblogs][status.account_id] # if the reblogger's reblogs are suppressed
  254. should_filter ||= crutches[:blocked_by][status.reblog.account_id] # or if the author of the reblogged status is blocking me
  255. should_filter ||= crutches[:domain_blocking][status.reblog.account.domain] # or the author's domain is blocked
  256. return !!should_filter
  257. end
  258. false
  259. end
  260. # Check if status should not be added to the mentions feed
  261. # @see NotifyService
  262. # @param [Status] status
  263. # @param [Integer] receiver_id
  264. # @return [Boolean]
  265. def filter_from_mentions?(status, receiver_id)
  266. return true if receiver_id == status.account_id
  267. return true if phrase_filtered?(status, receiver_id, :notifications)
  268. # This filter is called from NotifyService, but already after the sender of
  269. # the notification has been checked for mute/block. Therefore, it's not
  270. # necessary to check the author of the toot for mute/block again
  271. check_for_blocks = status.active_mentions.pluck(:account_id)
  272. check_for_blocks.concat([status.in_reply_to_account]) if status.reply? && !status.in_reply_to_account_id.nil?
  273. should_filter = blocks_or_mutes?(receiver_id, check_for_blocks, :mentions) # Filter if it's from someone I blocked, in reply to someone I blocked, or mentioning someone I blocked (or muted)
  274. should_filter ||= (status.account.silenced? && !Follow.where(account_id: receiver_id, target_account_id: status.account_id).exists?) # of if the account is silenced and I'm not following them
  275. should_filter
  276. end
  277. # Check if status should not be added to the list feed
  278. # @param [Status] status
  279. # @param [List] list
  280. # @return [Boolean]
  281. def filter_from_list?(status, list)
  282. if status.reply? && status.in_reply_to_account_id != status.account_id
  283. should_filter = status.in_reply_to_account_id != list.account_id
  284. should_filter &&= !list.show_all_replies?
  285. should_filter &&= !(list.show_list_replies? && ListAccount.where(list_id: list.id, account_id: status.in_reply_to_account_id).exists?)
  286. return !!should_filter
  287. end
  288. false
  289. end
  290. # Check if the status hits a phrase filter
  291. # @param [Status] status
  292. # @param [Integer] receiver_id
  293. # @param [Symbol] context
  294. # @return [Boolean]
  295. def phrase_filtered?(status, receiver_id, context)
  296. active_filters = Rails.cache.fetch("filters:#{receiver_id}") { CustomFilter.where(account_id: receiver_id).active_irreversible.to_a }.to_a
  297. active_filters.select! { |filter| filter.context.include?(context.to_s) && !filter.expired? }
  298. active_filters.map! do |filter|
  299. if filter.whole_word
  300. sb = filter.phrase =~ /\A[[:word:]]/ ? '\b' : ''
  301. eb = filter.phrase =~ /[[:word:]]\z/ ? '\b' : ''
  302. /(?mix:#{sb}#{Regexp.escape(filter.phrase)}#{eb})/
  303. else
  304. /#{Regexp.escape(filter.phrase)}/i
  305. end
  306. end
  307. return false if active_filters.empty?
  308. combined_regex = active_filters.reduce { |memo, obj| Regexp.union(memo, obj) }
  309. status = status.reblog if status.reblog?
  310. combined_text = [
  311. Formatter.instance.plaintext(status),
  312. status.spoiler_text,
  313. status.preloadable_poll ? status.preloadable_poll.options.join("\n\n") : nil,
  314. status.media_attachments.map(&:description).join("\n\n"),
  315. ].compact.join("\n\n")
  316. !combined_regex.match(combined_text).nil?
  317. end
  318. # Adds a status to an account's feed, returning true if a status was
  319. # added, and false if it was not added to the feed. Note that this is
  320. # an internal helper: callers must call trim or push updates if
  321. # either action is appropriate.
  322. # @param [Symbol] timeline_type
  323. # @param [Integer] account_id
  324. # @param [Status] status
  325. # @param [Boolean] aggregate_reblogs
  326. # @return [Boolean]
  327. def add_to_feed(timeline_type, account_id, status, aggregate_reblogs = true)
  328. timeline_key = key(timeline_type, account_id)
  329. reblog_key = key(timeline_type, account_id, 'reblogs')
  330. if status.reblog? && (aggregate_reblogs.nil? || aggregate_reblogs)
  331. # If the original status or a reblog of it is within
  332. # REBLOG_FALLOFF statuses from the top, do not re-insert it into
  333. # the feed
  334. rank = redis.zrevrank(timeline_key, status.reblog_of_id)
  335. return false if !rank.nil? && rank < FeedManager::REBLOG_FALLOFF
  336. # The ordered set at `reblog_key` holds statuses which have a reblog
  337. # in the top `REBLOG_FALLOFF` statuses of the timeline
  338. if redis.zadd(reblog_key, status.id, status.reblog_of_id, nx: true)
  339. # This is not something we've already seen reblogged, so we
  340. # can just add it to the feed (and note that we're reblogging it).
  341. redis.zadd(timeline_key, status.id, status.id)
  342. else
  343. # Another reblog of the same status was already in the
  344. # REBLOG_FALLOFF most recent statuses, so we note that this
  345. # is an "extra" reblog, by storing it in reblog_set_key.
  346. reblog_set_key = key(timeline_type, account_id, "reblogs:#{status.reblog_of_id}")
  347. redis.sadd(reblog_set_key, status.id)
  348. return false
  349. end
  350. else
  351. # A reblog may reach earlier than the original status because of the
  352. # delay of the worker deliverying the original status, the late addition
  353. # by merging timelines, and other reasons.
  354. # If such a reblog already exists, just do not re-insert it into the feed.
  355. return false unless redis.zscore(reblog_key, status.id).nil?
  356. redis.zadd(timeline_key, status.id, status.id)
  357. end
  358. true
  359. end
  360. # Removes an individual status from a feed, correctly handling cases
  361. # with reblogs, and returning true if a status was removed. As with
  362. # `add_to_feed`, this does not trigger push updates, so callers must
  363. # do so if appropriate.
  364. # @param [Symbol] timeline_type
  365. # @param [Integer] account_id
  366. # @param [Status] status
  367. # @param [Boolean] aggregate_reblogs
  368. # @return [Boolean]
  369. def remove_from_feed(timeline_type, account_id, status, aggregate_reblogs = true)
  370. timeline_key = key(timeline_type, account_id)
  371. reblog_key = key(timeline_type, account_id, 'reblogs')
  372. if status.reblog? && (aggregate_reblogs.nil? || aggregate_reblogs)
  373. # 1. If the reblogging status is not in the feed, stop.
  374. status_rank = redis.zrevrank(timeline_key, status.id)
  375. return false if status_rank.nil?
  376. # 2. Remove reblog from set of this status's reblogs.
  377. reblog_set_key = key(timeline_type, account_id, "reblogs:#{status.reblog_of_id}")
  378. redis.srem(reblog_set_key, status.id)
  379. redis.zrem(reblog_key, status.reblog_of_id)
  380. # 3. Re-insert another reblog or original into the feed if one
  381. # remains in the set. We could pick a random element, but this
  382. # set should generally be small, and it seems ideal to show the
  383. # oldest potential such reblog.
  384. other_reblog = redis.smembers(reblog_set_key).map(&:to_i).min
  385. redis.zadd(timeline_key, other_reblog, other_reblog) if other_reblog
  386. redis.zadd(reblog_key, other_reblog, status.reblog_of_id) if other_reblog
  387. # 4. Remove the reblogging status from the feed (as normal)
  388. # (outside conditional)
  389. else
  390. # If the original is getting deleted, no use for reblog references
  391. redis.del(key(timeline_type, account_id, "reblogs:#{status.id}"))
  392. redis.zrem(reblog_key, status.id)
  393. end
  394. redis.zrem(timeline_key, status.id)
  395. end
  396. # Pre-fetch various objects and relationships for given statuses that
  397. # are going to be checked by the filtering methods
  398. # @param [Integer] receiver_id
  399. # @param [Array<Status>] statuses
  400. # @return [Hash]
  401. def build_crutches(receiver_id, statuses)
  402. crutches = {}
  403. crutches[:active_mentions] = Mention.active.where(status_id: statuses.flat_map { |s| [s.id, s.reblog_of_id] }.compact).pluck(:status_id, :account_id).each_with_object({}) { |(id, account_id), mapping| (mapping[id] ||= []).push(account_id) }
  404. check_for_blocks = statuses.flat_map do |s|
  405. arr = crutches[:active_mentions][s.id] || []
  406. arr.concat([s.account_id])
  407. if s.reblog?
  408. arr.concat([s.reblog.account_id])
  409. arr.concat(crutches[:active_mentions][s.reblog_of_id] || [])
  410. end
  411. arr
  412. end
  413. crutches[:following] = Follow.where(account_id: receiver_id, target_account_id: statuses.map(&:in_reply_to_account_id).compact).pluck(:target_account_id).each_with_object({}) { |id, mapping| mapping[id] = true }
  414. crutches[:hiding_reblogs] = Follow.where(account_id: receiver_id, target_account_id: statuses.map { |s| s.account_id if s.reblog? }.compact, show_reblogs: false).pluck(:target_account_id).each_with_object({}) { |id, mapping| mapping[id] = true }
  415. crutches[:blocking] = Block.where(account_id: receiver_id, target_account_id: check_for_blocks).pluck(:target_account_id).each_with_object({}) { |id, mapping| mapping[id] = true }
  416. crutches[:muting] = Mute.where(account_id: receiver_id, target_account_id: check_for_blocks).pluck(:target_account_id).each_with_object({}) { |id, mapping| mapping[id] = true }
  417. crutches[:domain_blocking] = AccountDomainBlock.where(account_id: receiver_id, domain: statuses.map { |s| s.reblog&.account&.domain }.compact).pluck(:domain).each_with_object({}) { |domain, mapping| mapping[domain] = true }
  418. crutches[:blocked_by] = Block.where(target_account_id: receiver_id, account_id: statuses.map { |s| s.reblog&.account_id }.compact).pluck(:account_id).each_with_object({}) { |id, mapping| mapping[id] = true }
  419. crutches
  420. end
  421. end