123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177 |
- # frozen_string_literal: true
- # == Schema Information
- #
- # Table name: account_statuses_cleanup_policies
- #
- # id :bigint(8) not null, primary key
- # account_id :bigint(8) not null
- # enabled :boolean default(TRUE), not null
- # min_status_age :integer default(1209600), not null
- # keep_direct :boolean default(TRUE), not null
- # keep_pinned :boolean default(TRUE), not null
- # keep_polls :boolean default(FALSE), not null
- # keep_media :boolean default(FALSE), not null
- # keep_self_fav :boolean default(TRUE), not null
- # keep_self_bookmark :boolean default(TRUE), not null
- # min_favs :integer
- # min_reblogs :integer
- # created_at :datetime not null
- # updated_at :datetime not null
- #
- class AccountStatusesCleanupPolicy < ApplicationRecord
- include Redisable
- ALLOWED_MIN_STATUS_AGE = [
- 1.week.seconds,
- 2.weeks.seconds,
- 1.month.seconds,
- 2.months.seconds,
- 3.months.seconds,
- 6.months.seconds,
- 1.year.seconds,
- 2.years.seconds,
- ].freeze
- EXCEPTION_BOOLS = %w(keep_direct keep_pinned keep_polls keep_media keep_self_fav keep_self_bookmark).freeze
- EXCEPTION_THRESHOLDS = %w(min_favs min_reblogs).freeze
- # Depending on the cleanup policy, the query to discover the next
- # statuses to delete my get expensive if the account has a lot of old
- # statuses otherwise excluded from deletion by the other exceptions.
- #
- # Therefore, `EARLY_SEARCH_CUTOFF` is meant to be the maximum number of
- # old statuses to be considered for deletion prior to checking exceptions.
- #
- # This is used in `compute_cutoff_id` to provide a `max_id` to
- # `statuses_to_delete`.
- EARLY_SEARCH_CUTOFF = 5_000
- belongs_to :account
- validates :min_status_age, inclusion: { in: ALLOWED_MIN_STATUS_AGE }
- validates :min_favs, numericality: { greater_than_or_equal_to: 1, allow_nil: true }
- validates :min_reblogs, numericality: { greater_than_or_equal_to: 1, allow_nil: true }
- validate :validate_local_account
- before_save :update_last_inspected
- def statuses_to_delete(limit = 50, max_id = nil, min_id = nil)
- scope = account_statuses
- scope.merge!(old_enough_scope(max_id))
- scope = scope.where(id: min_id..) if min_id.present?
- scope.merge!(without_popular_scope) unless min_favs.nil? && min_reblogs.nil?
- scope.merge!(without_direct_scope) if keep_direct?
- scope.merge!(without_pinned_scope) if keep_pinned?
- scope.merge!(without_poll_scope) if keep_polls?
- scope.merge!(without_media_scope) if keep_media?
- scope.merge!(without_self_fav_scope) if keep_self_fav?
- scope.merge!(without_self_bookmark_scope) if keep_self_bookmark?
- scope.reorder(id: :asc).limit(limit)
- end
- # This computes a toot id such that:
- # - the toot would be old enough to be candidate for deletion
- # - there are at most EARLY_SEARCH_CUTOFF toots between the last inspected toot and this one
- #
- # The idea is to limit expensive SQL queries when an account has lots of toots excluded from
- # deletion, while not starting anew on each run.
- def compute_cutoff_id
- min_id = last_inspected || 0
- max_id = Mastodon::Snowflake.id_at(min_status_age.seconds.ago, with_random: false)
- subquery = account_statuses.where(id: min_id..max_id)
- subquery = subquery.select(:id).reorder(id: :asc).limit(EARLY_SEARCH_CUTOFF)
- # We're textually interpolating a subquery here as ActiveRecord seem to not provide
- # a way to apply the limit to the subquery
- Status.connection.execute("SELECT MAX(id) FROM (#{subquery.to_sql}) t").values.first.first
- end
- # The most important thing about `last_inspected` is that any toot older than it is guaranteed
- # not to be kept by the policy regardless of its age.
- def record_last_inspected(last_id)
- redis.set("account_cleanup:#{account_id}", last_id, ex: 2.weeks.seconds)
- end
- def last_inspected
- redis.get("account_cleanup:#{account_id}")&.to_i
- end
- def invalidate_last_inspected(status, action)
- last_value = last_inspected
- return if last_value.nil? || status.id > last_value || status.account_id != account_id
- case action
- when :unbookmark
- return unless keep_self_bookmark?
- when :unfav
- return unless keep_self_fav?
- when :unpin
- return unless keep_pinned?
- end
- record_last_inspected(status.id)
- end
- private
- def update_last_inspected
- if EXCEPTION_BOOLS.filter_map { |name| attribute_change_to_be_saved(name) }.include?([true, false])
- # Policy has been widened in such a way that any previously-inspected status
- # may need to be deleted, so we'll have to start again.
- redis.del("account_cleanup:#{account_id}")
- end
- redis.del("account_cleanup:#{account_id}") if EXCEPTION_THRESHOLDS.filter_map { |name| attribute_change_to_be_saved(name) }.any? { |old, new| old.present? && (new.nil? || new > old) }
- end
- def validate_local_account
- errors.add(:account, :invalid) unless account&.local?
- end
- def without_direct_scope
- Status.where.not(visibility: :direct)
- end
- def old_enough_scope(max_id = nil)
- # Filtering on `id` rather than `min_status_age` ago will treat
- # non-snowflake statuses as older than they really are, but Mastodon
- # has switched to snowflake IDs significantly over 2 years ago anyway.
- snowflake_id = Mastodon::Snowflake.id_at(min_status_age.seconds.ago, with_random: false)
- max_id = snowflake_id if max_id.nil? || snowflake_id < max_id
- Status.where(id: ..max_id)
- end
- def without_self_fav_scope
- Status.where('NOT EXISTS (SELECT 1 FROM favourites fav WHERE fav.account_id = statuses.account_id AND fav.status_id = statuses.id)')
- end
- def without_self_bookmark_scope
- Status.where('NOT EXISTS (SELECT 1 FROM bookmarks bookmark WHERE bookmark.account_id = statuses.account_id AND bookmark.status_id = statuses.id)')
- end
- def without_pinned_scope
- Status.where('NOT EXISTS (SELECT 1 FROM status_pins pin WHERE pin.account_id = statuses.account_id AND pin.status_id = statuses.id)')
- end
- def without_media_scope
- Status.where('NOT EXISTS (SELECT 1 FROM media_attachments media WHERE media.status_id = statuses.id)')
- end
- def without_poll_scope
- Status.where(poll_id: nil)
- end
- def without_popular_scope
- scope = Status.left_joins(:status_stat)
- scope = scope.where('COALESCE(status_stats.reblogs_count, 0) < ?', min_reblogs) unless min_reblogs.nil?
- scope = scope.where('COALESCE(status_stats.favourites_count, 0) < ?', min_favs) unless min_favs.nil?
- scope
- end
- def account_statuses
- Status.where(account_id: account_id)
- end
- end
|