account_statuses_cleanup_policy.rb 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194
  1. # frozen_string_literal: true
  2. # == Schema Information
  3. #
  4. # Table name: account_statuses_cleanup_policies
  5. #
  6. # id :bigint(8) not null, primary key
  7. # account_id :bigint(8) not null
  8. # enabled :boolean default(TRUE), not null
  9. # min_status_age :integer default(1209600), not null
  10. # keep_direct :boolean default(TRUE), not null
  11. # keep_pinned :boolean default(TRUE), not null
  12. # keep_polls :boolean default(FALSE), not null
  13. # keep_media :boolean default(FALSE), not null
  14. # keep_self_fav :boolean default(TRUE), not null
  15. # keep_self_bookmark :boolean default(TRUE), not null
  16. # min_favs :integer
  17. # min_reblogs :integer
  18. # created_at :datetime not null
  19. # updated_at :datetime not null
  20. #
  21. class AccountStatusesCleanupPolicy < ApplicationRecord
  22. include Redisable
  23. ALLOWED_MIN_STATUS_AGE = [
  24. 1.week.seconds,
  25. 2.weeks.seconds,
  26. 1.month.seconds,
  27. 2.months.seconds,
  28. 3.months.seconds,
  29. 6.months.seconds,
  30. 1.year.seconds,
  31. 2.years.seconds,
  32. ].freeze
  33. EXCEPTION_BOOLS = %w(keep_direct keep_pinned keep_polls keep_media keep_self_fav keep_self_bookmark).freeze
  34. EXCEPTION_THRESHOLDS = %w(min_favs min_reblogs).freeze
  35. # Depending on the cleanup policy, the query to discover the next
  36. # statuses to delete my get expensive if the account has a lot of old
  37. # statuses otherwise excluded from deletion by the other exceptions.
  38. #
  39. # Therefore, `EARLY_SEARCH_CUTOFF` is meant to be the maximum number of
  40. # old statuses to be considered for deletion prior to checking exceptions.
  41. #
  42. # This is used in `compute_cutoff_id` to provide a `max_id` to
  43. # `statuses_to_delete`.
  44. EARLY_SEARCH_CUTOFF = 5_000
  45. belongs_to :account
  46. validates :min_status_age, inclusion: { in: ALLOWED_MIN_STATUS_AGE }
  47. validates :min_favs, numericality: { greater_than_or_equal_to: 1, allow_nil: true }
  48. validates :min_reblogs, numericality: { greater_than_or_equal_to: 1, allow_nil: true }
  49. validate :validate_local_account
  50. before_save :update_last_inspected
  51. def statuses_to_delete(limit = 50, max_id = nil, min_id = nil)
  52. scope = account_statuses
  53. scope.merge!(old_enough_scope(max_id))
  54. scope = scope.where(id: min_id..) if min_id.present?
  55. scope.merge!(without_popular_scope) unless min_favs.nil? && min_reblogs.nil?
  56. scope.merge!(without_direct_scope) if keep_direct?
  57. scope.merge!(without_pinned_scope) if keep_pinned?
  58. scope.merge!(without_poll_scope) if keep_polls?
  59. scope.merge!(without_media_scope) if keep_media?
  60. scope.merge!(without_self_fav_scope) if keep_self_fav?
  61. scope.merge!(without_self_bookmark_scope) if keep_self_bookmark?
  62. scope.reorder(id: :asc).limit(limit)
  63. end
  64. # This computes a toot id such that:
  65. # - the toot would be old enough to be candidate for deletion
  66. # - there are at most EARLY_SEARCH_CUTOFF toots between the last inspected toot and this one
  67. #
  68. # The idea is to limit expensive SQL queries when an account has lots of toots excluded from
  69. # deletion, while not starting anew on each run.
  70. def compute_cutoff_id
  71. min_id = last_inspected || 0
  72. max_id = Mastodon::Snowflake.id_at(min_status_age.seconds.ago, with_random: false)
  73. subquery = account_statuses.where(id: min_id..max_id)
  74. subquery = subquery.select(:id).reorder(id: :asc).limit(EARLY_SEARCH_CUTOFF)
  75. # We're textually interpolating a subquery here as ActiveRecord seem to not provide
  76. # a way to apply the limit to the subquery
  77. Status.connection.execute("SELECT MAX(id) FROM (#{subquery.to_sql}) t").values.first.first
  78. end
  79. # The most important thing about `last_inspected` is that any toot older than it is guaranteed
  80. # not to be kept by the policy regardless of its age.
  81. def record_last_inspected(last_id)
  82. redis.set("account_cleanup:#{account_id}", last_id, ex: 2.weeks.seconds)
  83. end
  84. def last_inspected
  85. redis.get("account_cleanup:#{account_id}")&.to_i
  86. end
  87. def invalidate_last_inspected(status, action)
  88. last_value = last_inspected
  89. return if last_value.nil? || status.id > last_value || status.account_id != account_id
  90. case action
  91. when :unbookmark
  92. return unless keep_self_bookmark?
  93. when :unfav
  94. return unless keep_self_fav?
  95. when :unpin
  96. return unless keep_pinned?
  97. end
  98. record_last_inspected(status.id)
  99. end
  100. private
  101. def update_last_inspected
  102. if EXCEPTION_BOOLS.filter_map { |name| attribute_change_to_be_saved(name) }.include?([true, false])
  103. # Policy has been widened in such a way that any previously-inspected status
  104. # may need to be deleted, so we'll have to start again.
  105. redis.del("account_cleanup:#{account_id}")
  106. end
  107. redis.del("account_cleanup:#{account_id}") if EXCEPTION_THRESHOLDS.filter_map { |name| attribute_change_to_be_saved(name) }.any? { |old, new| old.present? && (new.nil? || new > old) }
  108. end
  109. def validate_local_account
  110. errors.add(:account, :invalid) unless account&.local?
  111. end
  112. def without_direct_scope
  113. Status.not_direct_visibility
  114. end
  115. def old_enough_scope(max_id = nil)
  116. # Filtering on `id` rather than `min_status_age` ago will treat
  117. # non-snowflake statuses as older than they really are, but Mastodon
  118. # has switched to snowflake IDs significantly over 2 years ago anyway.
  119. snowflake_id = Mastodon::Snowflake.id_at(min_status_age.seconds.ago, with_random: false)
  120. max_id = snowflake_id if max_id.nil? || snowflake_id < max_id
  121. Status.where(id: ..max_id)
  122. end
  123. def without_self_fav_scope
  124. Status.where.not(self_status_reference_exists(Favourite))
  125. end
  126. def without_self_bookmark_scope
  127. Status.where.not(self_status_reference_exists(Bookmark))
  128. end
  129. def without_pinned_scope
  130. Status.where.not(self_status_reference_exists(StatusPin))
  131. end
  132. def without_media_scope
  133. Status.where.not(status_media_reference_exists)
  134. end
  135. def without_poll_scope
  136. Status.where(poll_id: nil)
  137. end
  138. def without_popular_scope
  139. scope = Status.left_joins(:status_stat)
  140. scope = scope.where('COALESCE(status_stats.reblogs_count, 0) < ?', min_reblogs) unless min_reblogs.nil?
  141. scope = scope.where('COALESCE(status_stats.favourites_count, 0) < ?', min_favs) unless min_favs.nil?
  142. scope
  143. end
  144. def account_statuses
  145. Status.where(account_id: account_id)
  146. end
  147. def status_media_reference_exists
  148. MediaAttachment
  149. .where(MediaAttachment.arel_table[:status_id].eq Status.arel_table[:id])
  150. .select(1)
  151. .arel
  152. .exists
  153. end
  154. def self_status_reference_exists(model)
  155. model
  156. .where(model.arel_table[:account_id].eq Status.arel_table[:account_id])
  157. .where(model.arel_table[:status_id].eq Status.arel_table[:id])
  158. .select(1)
  159. .arel
  160. .exists
  161. end
  162. end