Browse Source

Add feature to automatically delete old toots (#16529)

* Add account statuses cleanup policy model

* Record last inspected toot to delete to speed up successive calls to statuses_to_delete

* Add service to cleanup a given account's statuses within a budget

* Add worker to go through account policies and delete old toots

* Fix last inspected status id logic

All existing statuses older or equal to last inspected status id must be
kept by the current policy. This is an invariant that must be kept so that
resuming deletion from the last inspected status remains sound.

* Add tests

* Refactor scheduler and add tests

* Add user interface

* Add support for discriminating based on boosts/favs

* Add UI support for min_reblogs and min_favs, rework UI

* Address first round of review comments

* Replace Snowflake#id_at_start with with_random parameter

* Add tests

* Add tests for StatusesCleanupController

* Rework settings page

* Adjust load-avoiding mechanisms

* Please CodeClimate
Claire 2 years ago
parent
commit
4ac78e2a06

+ 35 - 0
app/controllers/statuses_cleanup_controller.rb

@@ -0,0 +1,35 @@
+# frozen_string_literal: true
+
+class StatusesCleanupController < ApplicationController
+  layout 'admin'
+
+  before_action :authenticate_user!
+  before_action :set_policy
+  before_action :set_body_classes
+
+  def show; end
+
+  def update
+    if @policy.update(resource_params)
+      redirect_to statuses_cleanup_path, notice: I18n.t('generic.changes_saved_msg')
+    else
+      render action: :show
+    end
+  rescue ActionController::ParameterMissing
+    # Do nothing
+  end
+
+  private
+
+  def set_policy
+    @policy = current_account.statuses_cleanup_policy || current_account.build_statuses_cleanup_policy(enabled: false)
+  end
+
+  def resource_params
+    params.require(:account_statuses_cleanup_policy).permit(:enabled, :min_status_age, :keep_direct, :keep_pinned, :keep_polls, :keep_media, :keep_self_fav, :keep_self_bookmark, :min_favs, :min_reblogs)
+  end
+
+  def set_body_classes
+    @body_classes = 'admin'
+  end
+end

+ 171 - 0
app/models/account_statuses_cleanup_policy.rb

@@ -0,0 +1,171 @@
+# frozen_string_literal: true
+
+# == Schema Information
+#
+# Table name: account_statuses_cleanup_policies
+#
+#  id                 :bigint           not null, primary key
+#  account_id         :bigint           not null
+#  enabled            :boolean          default(TRUE), not null
+#  min_status_age     :integer          default(1209600), not null
+#  keep_direct        :boolean          default(TRUE), not null
+#  keep_pinned        :boolean          default(TRUE), not null
+#  keep_polls         :boolean          default(FALSE), not null
+#  keep_media         :boolean          default(FALSE), not null
+#  keep_self_fav      :boolean          default(TRUE), not null
+#  keep_self_bookmark :boolean          default(TRUE), not null
+#  min_favs           :integer
+#  min_reblogs        :integer
+#  created_at         :datetime         not null
+#  updated_at         :datetime         not null
+#
+class AccountStatusesCleanupPolicy < ApplicationRecord
+  include Redisable
+
+  ALLOWED_MIN_STATUS_AGE = [
+    2.weeks.seconds,
+    1.month.seconds,
+    2.months.seconds,
+    3.months.seconds,
+    6.months.seconds,
+    1.year.seconds,
+    2.years.seconds,
+  ].freeze
+
+  EXCEPTION_BOOLS      = %w(keep_direct keep_pinned keep_polls keep_media keep_self_fav keep_self_bookmark).freeze
+  EXCEPTION_THRESHOLDS = %w(min_favs min_reblogs).freeze
+
+  # Depending on the cleanup policy, the query to discover the next
+  # statuses to delete my get expensive if the account has a lot of old
+  # statuses otherwise excluded from deletion by the other exceptions.
+  #
+  # Therefore, `EARLY_SEARCH_CUTOFF` is meant to be the maximum number of
+  # old statuses to be considered for deletion prior to checking exceptions.
+  #
+  # This is used in `compute_cutoff_id` to provide a `max_id` to
+  # `statuses_to_delete`.
+  EARLY_SEARCH_CUTOFF = 5_000
+
+  belongs_to :account
+
+  validates :min_status_age, inclusion: { in: ALLOWED_MIN_STATUS_AGE }
+  validates :min_favs, numericality: { greater_than_or_equal_to: 1, allow_nil: true }
+  validates :min_reblogs, numericality: { greater_than_or_equal_to: 1, allow_nil: true }
+  validate :validate_local_account
+
+  before_save :update_last_inspected
+
+  def statuses_to_delete(limit = 50, max_id = nil, min_id = nil)
+    scope = account.statuses
+    scope.merge!(old_enough_scope(max_id))
+    scope = scope.where(Status.arel_table[:id].gteq(min_id)) if min_id.present?
+    scope.merge!(without_popular_scope) unless min_favs.nil? && min_reblogs.nil?
+    scope.merge!(without_direct_scope) if keep_direct?
+    scope.merge!(without_pinned_scope) if keep_pinned?
+    scope.merge!(without_poll_scope) if keep_polls?
+    scope.merge!(without_media_scope) if keep_media?
+    scope.merge!(without_self_fav_scope) if keep_self_fav?
+    scope.merge!(without_self_bookmark_scope) if keep_self_bookmark?
+
+    scope.reorder(id: :asc).limit(limit)
+  end
+
+  # This computes a toot id such that:
+  # - the toot would be old enough to be candidate for deletion
+  # - there are at most EARLY_SEARCH_CUTOFF toots between the last inspected toot and this one
+  #
+  # The idea is to limit expensive SQL queries when an account has lots of toots excluded from
+  # deletion, while not starting anew on each run.
+  def compute_cutoff_id
+    min_id = last_inspected || 0
+    max_id = Mastodon::Snowflake.id_at(min_status_age.seconds.ago, with_random: false)
+    subquery = account.statuses.where(Status.arel_table[:id].gteq(min_id)).where(Status.arel_table[:id].lteq(max_id))
+    subquery = subquery.select(:id).reorder(id: :asc).limit(EARLY_SEARCH_CUTOFF)
+
+    # We're textually interpolating a subquery here as ActiveRecord seem to not provide
+    # a way to apply the limit to the subquery
+    Status.connection.execute("SELECT MAX(id) FROM (#{subquery.to_sql}) t").values.first.first
+  end
+
+  # The most important thing about `last_inspected` is that any toot older than it is guaranteed
+  # not to be kept by the policy regardless of its age.
+  def record_last_inspected(last_id)
+    redis.set("account_cleanup:#{account.id}", last_id, ex: 1.week.seconds)
+  end
+
+  def last_inspected
+    redis.get("account_cleanup:#{account.id}")&.to_i
+  end
+
+  def invalidate_last_inspected(status, action)
+    last_value = last_inspected
+    return if last_value.nil? || status.id > last_value || status.account_id != account_id
+
+    case action
+    when :unbookmark
+      return unless keep_self_bookmark?
+    when :unfav
+      return unless keep_self_fav?
+    when :unpin
+      return unless keep_pinned?
+    end
+
+    record_last_inspected(status.id)
+  end
+
+  private
+
+  def update_last_inspected
+    if EXCEPTION_BOOLS.map { |name| attribute_change_to_be_saved(name) }.compact.include?([true, false])
+      # Policy has been widened in such a way that any previously-inspected status
+      # may need to be deleted, so we'll have to start again.
+      redis.del("account_cleanup:#{account.id}")
+    end
+    if EXCEPTION_THRESHOLDS.map { |name| attribute_change_to_be_saved(name) }.compact.any? { |old, new| old.present? && (new.nil? || new > old) }
+      redis.del("account_cleanup:#{account.id}")
+    end
+  end
+
+  def validate_local_account
+    errors.add(:account, :invalid) unless account&.local?
+  end
+
+  def without_direct_scope
+    Status.where.not(visibility: :direct)
+  end
+
+  def old_enough_scope(max_id = nil)
+    # Filtering on `id` rather than `min_status_age` ago will treat
+    # non-snowflake statuses as older than they really are, but Mastodon
+    # has switched to snowflake IDs significantly over 2 years ago anyway.
+    max_id = [max_id, Mastodon::Snowflake.id_at(min_status_age.seconds.ago, with_random: false)].compact.min
+    Status.where(Status.arel_table[:id].lteq(max_id))
+  end
+
+  def without_self_fav_scope
+    Status.where('NOT EXISTS (SELECT * FROM favourites fav WHERE fav.account_id = statuses.account_id AND fav.status_id = statuses.id)')
+  end
+
+  def without_self_bookmark_scope
+    Status.where('NOT EXISTS (SELECT * FROM bookmarks bookmark WHERE bookmark.account_id = statuses.account_id AND bookmark.status_id = statuses.id)')
+  end
+
+  def without_pinned_scope
+    Status.where('NOT EXISTS (SELECT * FROM status_pins pin WHERE pin.account_id = statuses.account_id AND pin.status_id = statuses.id)')
+  end
+
+  def without_media_scope
+    Status.where('NOT EXISTS (SELECT * FROM media_attachments media WHERE media.status_id = statuses.id)')
+  end
+
+  def without_poll_scope
+    Status.where(poll_id: nil)
+  end
+
+  def without_popular_scope
+    scope = Status.left_joins(:status_stat)
+    scope = scope.where('COALESCE(status_stats.reblogs_count, 0) <= ?', min_reblogs) unless min_reblogs.nil?
+    scope = scope.where('COALESCE(status_stats.favourites_count, 0) <= ?', min_favs) unless min_favs.nil?
+    scope
+  end
+end

+ 8 - 0
app/models/bookmark.rb

@@ -23,4 +23,12 @@ class Bookmark < ApplicationRecord
   before_validation do
     self.status = status.reblog if status&.reblog?
   end
+
+  after_destroy :invalidate_cleanup_info
+
+  def invalidate_cleanup_info
+    return unless status&.account_id == account_id && account.local?
+
+    account.statuses_cleanup_policy&.invalidate_last_inspected(status, :unbookmark)
+  end
 end

+ 3 - 0
app/models/concerns/account_associations.rb

@@ -66,5 +66,8 @@ module AccountAssociations
 
     # Follow recommendations
     has_one :follow_recommendation_suppression, inverse_of: :account, dependent: :destroy
+
+    # Account statuses cleanup policy
+    has_one :statuses_cleanup_policy, class_name: 'AccountStatusesCleanupPolicy', inverse_of: :account, dependent: :destroy
   end
 end

+ 7 - 0
app/models/favourite.rb

@@ -28,6 +28,7 @@ class Favourite < ApplicationRecord
 
   after_create :increment_cache_counters
   after_destroy :decrement_cache_counters
+  after_destroy :invalidate_cleanup_info
 
   private
 
@@ -39,4 +40,10 @@ class Favourite < ApplicationRecord
     return if association(:status).loaded? && status.marked_for_destruction?
     status&.decrement_count!(:favourites_count)
   end
+
+  def invalidate_cleanup_info
+    return unless status&.account_id == account_id && account.local?
+
+    account.statuses_cleanup_policy&.invalidate_last_inspected(status, :unfav)
+  end
 end

+ 8 - 0
app/models/status_pin.rb

@@ -15,4 +15,12 @@ class StatusPin < ApplicationRecord
   belongs_to :status
 
   validates_with StatusPinValidator
+
+  after_destroy :invalidate_cleanup_info
+
+  def invalidate_cleanup_info
+    return unless status&.account_id == account_id && account.local?
+
+    account.statuses_cleanup_policy&.invalidate_last_inspected(status, :unpin)
+  end
 end

+ 27 - 0
app/services/account_statuses_cleanup_service.rb

@@ -0,0 +1,27 @@
+# frozen_string_literal: true
+
+class AccountStatusesCleanupService < BaseService
+  # @param [AccountStatusesCleanupPolicy] account_policy
+  # @param [Integer] budget
+  # @return [Integer]
+  def call(account_policy, budget = 50)
+    return 0 unless account_policy.enabled?
+
+    cutoff_id = account_policy.compute_cutoff_id
+    return 0 if cutoff_id.blank?
+
+    num_deleted = 0
+    last_deleted = nil
+
+    account_policy.statuses_to_delete(budget, cutoff_id, account_policy.last_inspected).reorder(nil).find_each(order: :asc) do |status|
+      status.discard
+      RemovalWorker.perform_async(status.id, redraft: false)
+      num_deleted += 1
+      last_deleted = status.id
+    end
+
+    account_policy.record_last_inspected(last_deleted.presence || cutoff_id)
+
+    num_deleted
+  end
+end

+ 45 - 0
app/views/statuses_cleanup/show.html.haml

@@ -0,0 +1,45 @@
+- content_for :page_title do
+  = t('settings.statuses_cleanup')
+
+- content_for :heading_actions do
+  = button_tag t('generic.save_changes'), class: 'button', form: 'edit_policy'
+
+= simple_form_for @policy, url: statuses_cleanup_path, method: :put, html: { id: 'edit_policy' } do |f|
+
+  .fields-row
+    .fields-row__column.fields-row__column-6.fields-group
+      = f.input :enabled, as: :boolean, wrapper: :with_label, label: t('statuses_cleanup.enabled'), hint: t('statuses_cleanup.enabled_hint')
+    .fields-row__column.fields-row__column-6.fields-group
+      = f.input :min_status_age, wrapper: :with_label, label: t('statuses_cleanup.min_age_label'), collection: AccountStatusesCleanupPolicy::ALLOWED_MIN_STATUS_AGE.map(&:to_i), label_method: lambda { |i| t("statuses_cleanup.min_age.#{i}") }, include_blank: false, hint: false
+
+  .flash-message= t('statuses_cleanup.explanation')
+
+  %h4= t('statuses_cleanup.exceptions')
+
+  .fields-row
+    .fields-row__column.fields-row__column-6.fields-group
+      = f.input :keep_pinned, wrapper: :with_label, label: t('statuses_cleanup.keep_pinned'), hint: t('statuses_cleanup.keep_pinned_hint')
+    .fields-row__column.fields-row__column-6.fields-group
+      = f.input :keep_direct, wrapper: :with_label, label: t('statuses_cleanup.keep_direct'), hint: t('statuses_cleanup.keep_direct_hint')
+
+  .fields-row
+    .fields-row__column.fields-row__column-6.fields-group
+      = f.input :keep_self_fav, wrapper: :with_label, label: t('statuses_cleanup.keep_self_fav'), hint: t('statuses_cleanup.keep_self_fav_hint')
+    .fields-row__column.fields-row__column-6.fields-group
+      = f.input :keep_self_bookmark, wrapper: :with_label, label: t('statuses_cleanup.keep_self_bookmark'), hint: t('statuses_cleanup.keep_self_bookmark_hint')
+
+  .fields-row
+    .fields-row__column.fields-row__column-6.fields-group
+      = f.input :keep_polls, wrapper: :with_label, label: t('statuses_cleanup.keep_polls'), hint: t('statuses_cleanup.keep_polls_hint')
+    .fields-row__column.fields-row__column-6.fields-group
+      = f.input :keep_media, wrapper: :with_label, label: t('statuses_cleanup.keep_media'), hint: t('statuses_cleanup.keep_media_hint')
+
+  %h4= t('statuses_cleanup.interaction_exceptions')
+
+  .fields-row
+    .fields-row__column.fields-row__column-6.fields-group
+      = f.input :min_favs, wrapper: :with_label, label: t('statuses_cleanup.min_favs'), hint: t('statuses_cleanup.min_favs_hint'), input_html: { min: 1, placeholder: t('statuses_cleanup.ignore_favs') }
+    .fields-row__column.fields-row__column-6.fields-group
+      = f.input :min_reblogs, wrapper: :with_label, label: t('statuses_cleanup.min_reblogs'), hint: t('statuses_cleanup.min_reblogs_hint'), input_html: { min: 1, placeholder: t('statuses_cleanup.ignore_reblogs') }
+
+  .flash-message= t('statuses_cleanup.interaction_exceptions_explanation')

+ 96 - 0
app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb

@@ -0,0 +1,96 @@
+# frozen_string_literal: true
+
+class Scheduler::AccountsStatusesCleanupScheduler
+  include Sidekiq::Worker
+
+  # This limit is mostly to be nice to the fediverse at large and not
+  # generate too much traffic.
+  # This also helps limiting the running time of the scheduler itself.
+  MAX_BUDGET         = 50
+
+  # This is an attempt to spread the load across instances, as various
+  # accounts are likely to have various followers.
+  PER_ACCOUNT_BUDGET = 5
+
+  # This is an attempt to limit the workload generated by status removal
+  # jobs to something the particular instance can handle.
+  PER_THREAD_BUDGET  = 5
+
+  # Those avoid loading an instance that is already under load
+  MAX_DEFAULT_SIZE    = 2
+  MAX_DEFAULT_LATENCY = 5
+  MAX_PUSH_SIZE       = 5
+  MAX_PUSH_LATENCY    = 10
+  # 'pull' queue has lower priority jobs, and it's unlikely that pushing
+  # deletes would cause much issues with this queue if it didn't cause issues
+  # with default and push. Yet, do not enqueue deletes if the instance is
+  # lagging behind too much.
+  MAX_PULL_SIZE       = 500
+  MAX_PULL_LATENCY    = 300
+
+  # This is less of an issue in general, but deleting old statuses is likely
+  # to cause delivery errors, and thus increase the number of jobs to be retried.
+  # This doesn't directly translate to load, but connection errors and a high
+  # number of dead instances may lead to this spiraling out of control if
+  # unchecked.
+  MAX_RETRY_SIZE = 50_000
+
+  sidekiq_options retry: 0, lock: :until_executed
+
+  def perform
+    return if under_load?
+
+    budget = compute_budget
+    first_policy_id = last_processed_id
+
+    loop do
+      num_processed_accounts = 0
+
+      scope = AccountStatusesCleanupPolicy.where(enabled: true)
+      scope.where(Account.arel_table[:id].gt(first_policy_id)) if first_policy_id.present?
+      scope.find_each(order: :asc) do |policy|
+        num_deleted = AccountStatusesCleanupService.new.call(policy, [budget, PER_ACCOUNT_BUDGET].min)
+        num_processed_accounts += 1 unless num_deleted.zero?
+        budget -= num_deleted
+        if budget.zero?
+          save_last_processed_id(policy.id)
+          break
+        end
+      end
+
+      # The idea here is to loop through all policies at least once until the budget is exhausted
+      # and start back after the last processed account otherwise
+      break if budget.zero? || (num_processed_accounts.zero? && first_policy_id.nil?)
+      first_policy_id = nil
+    end
+  end
+
+  def compute_budget
+    threads = Sidekiq::ProcessSet.new.filter { |x| x['queues'].include?('push') }.map { |x| x['concurrency'] }.sum
+    [PER_THREAD_BUDGET * threads, MAX_BUDGET].min
+  end
+
+  def under_load?
+    return true if Sidekiq::Stats.new.retry_size > MAX_RETRY_SIZE
+    queue_under_load?('default', MAX_DEFAULT_SIZE, MAX_DEFAULT_LATENCY) || queue_under_load?('push', MAX_PUSH_SIZE, MAX_PUSH_LATENCY) || queue_under_load?('pull', MAX_PULL_SIZE, MAX_PULL_LATENCY)
+  end
+
+  private
+
+  def queue_under_load?(name, max_size, max_latency)
+    queue = Sidekiq::Queue.new(name)
+    queue.size > max_size || queue.latency > max_latency
+  end
+
+  def last_processed_id
+    Redis.current.get('account_statuses_cleanup_scheduler:last_account_id')
+  end
+
+  def save_last_processed_id(id)
+    if id.nil?
+      Redis.current.del('account_statuses_cleanup_scheduler:last_account_id')
+    else
+      Redis.current.set('account_statuses_cleanup_scheduler:last_account_id', id, ex: 1.hour.seconds)
+    end
+  end
+end

+ 35 - 0
config/locales/en.yml

@@ -1254,6 +1254,7 @@ en:
     preferences: Preferences
     profile: Profile
     relationships: Follows and followers
+    statuses_cleanup: Automated post deletion
     two_factor_authentication: Two-factor Auth
     webauthn_authentication: Security keys
   statuses:
@@ -1305,6 +1306,40 @@ en:
       public_long: Everyone can see
       unlisted: Unlisted
       unlisted_long: Everyone can see, but not listed on public timelines
+  statuses_cleanup:
+    enabled: Automatically delete old posts
+    enabled_hint: Automatically deletes your posts once they reach a specified age threshold, unless they match one of the exceptions below
+    exceptions: Exceptions
+    explanation: Because deleting posts is an expensive operation, this is done slowly over time when the server is not otherwise busy. For this reason, your posts may be deleted a while after they reach the age threshold.
+    ignore_favs: Ignore favourites
+    ignore_reblogs: Ignore boosts
+    interaction_exceptions: Exceptions based on interactions
+    interaction_exceptions_explanation: Note that there is no guarantee for posts to be deleted if they go below the favourite or boost threshold after having once gone over them.
+    keep_direct: Keep direct messages
+    keep_direct_hint: Doesn't delete any of your direct messages
+    keep_media: Keep posts with media attachments
+    keep_media_hint: Doesn't delete any of your posts that have media attachments
+    keep_pinned: Keep pinned posts
+    keep_pinned_hint: Doesn't delete any of your pinned posts
+    keep_polls: Keep polls
+    keep_polls_hint: Doesn't delete any of your polls
+    keep_self_bookmark: Keep posts you bookmarked
+    keep_self_bookmark_hint: Doesn't delete your own posts if you have bookmarked them
+    keep_self_fav: Keep posts you favourited
+    keep_self_fav_hint: Doesn't delete your own posts if you have favourited them
+    min_age:
+      '1209600': 2 weeks
+      '15778476': 6 months
+      '2629746': 1 month
+      '31556952': 1 year
+      '5259492': 2 months
+      '63113904': 2 years
+      '7889238': 3 months
+    min_age_label: Age threshold
+    min_favs: Keep posts favourited more than
+    min_favs_hint: Doesn't delete any of your posts that has received more than this amount of favourites. Leave blank to delete posts regardless of their number of favourites
+    min_reblogs: Keep posts boosted more than
+    min_reblogs_hint: Doesn't delete any of your posts that has been boosted more than this number of times. Leave blank to delete posts regardless of their number of boosts
   stream_entries:
     pinned: Pinned post
     reblogged: boosted

+ 1 - 0
config/navigation.rb

@@ -18,6 +18,7 @@ SimpleNavigation::Configuration.run do |navigation|
 
     n.item :relationships, safe_join([fa_icon('users fw'), t('settings.relationships')]), relationships_url, if: -> { current_user.functional? }
     n.item :filters, safe_join([fa_icon('filter fw'), t('filters.index.title')]), filters_path, highlights_on: %r{/filters}, if: -> { current_user.functional? }
+    n.item :statuses_cleanup, safe_join([fa_icon('history fw'), t('settings.statuses_cleanup')]), statuses_cleanup_url, if: -> { current_user.functional? }
 
     n.item :security, safe_join([fa_icon('lock fw'), t('settings.account')]), edit_user_registration_url do |s|
       s.item :password, safe_join([fa_icon('lock fw'), t('settings.account_settings')]), edit_user_registration_url, highlights_on: %r{/auth/edit|/settings/delete|/settings/migration|/settings/aliases|/settings/login_activities}

+ 1 - 0
config/routes.rb

@@ -176,6 +176,7 @@ Rails.application.routes.draw do
   resources :invites, only: [:index, :create, :destroy]
   resources :filters, except: [:show]
   resource :relationships, only: [:show, :update]
+  resource :statuses_cleanup, controller: :statuses_cleanup, only: [:show, :update]
 
   get '/public', to: 'public_timelines#show', as: :public_timeline
   get '/media_proxy/:id/(*any)', to: 'media_proxy#show', as: :media_proxy

+ 4 - 0
config/sidekiq.yml

@@ -57,3 +57,7 @@
     cron: '0 * * * *'
     class: Scheduler::InstanceRefreshScheduler
     queue: scheduler
+  accounts_statuses_cleanup_scheduler:
+    interval: 1 minute
+    class: Scheduler::AccountsStatusesCleanupScheduler
+    queue: scheduler

+ 20 - 0
db/migrate/20210722120340_create_account_statuses_cleanup_policies.rb

@@ -0,0 +1,20 @@
+class CreateAccountStatusesCleanupPolicies < ActiveRecord::Migration[6.1]
+  def change
+    create_table :account_statuses_cleanup_policies do |t|
+      t.belongs_to :account, null: false, foreign_key: { on_delete: :cascade }
+      t.boolean :enabled, null: false, default: true
+      t.integer :min_status_age, null: false, default: 2.weeks.seconds
+      t.boolean :keep_direct, null: false, default: true
+      t.boolean :keep_pinned, null: false, default: true
+      t.boolean :keep_polls, null: false, default: false
+      t.boolean :keep_media, null: false, default: false
+      t.boolean :keep_self_fav, null: false, default: true
+      t.boolean :keep_self_bookmark, null: false, default: true
+      t.integer :min_favs, null: true
+      t.integer :min_reblogs, null: true
+
+      t.timestamps
+    end
+  end
+end
+

+ 18 - 0
db/schema.rb

@@ -114,6 +114,23 @@ ActiveRecord::Schema.define(version: 2021_08_08_071221) do
     t.index ["account_id"], name: "index_account_stats_on_account_id", unique: true
   end
 
+  create_table "account_statuses_cleanup_policies", force: :cascade do |t|
+    t.bigint "account_id", null: false
+    t.boolean "enabled", default: true, null: false
+    t.integer "min_status_age", default: 1209600, null: false
+    t.boolean "keep_direct", default: true, null: false
+    t.boolean "keep_pinned", default: true, null: false
+    t.boolean "keep_polls", default: false, null: false
+    t.boolean "keep_media", default: false, null: false
+    t.boolean "keep_self_fav", default: true, null: false
+    t.boolean "keep_self_bookmark", default: true, null: false
+    t.integer "min_favs"
+    t.integer "min_reblogs"
+    t.datetime "created_at", precision: 6, null: false
+    t.datetime "updated_at", precision: 6, null: false
+    t.index ["account_id"], name: "index_account_statuses_cleanup_policies_on_account_id"
+  end
+
   create_table "account_warning_presets", force: :cascade do |t|
     t.text "text", default: "", null: false
     t.datetime "created_at", null: false
@@ -984,6 +1001,7 @@ ActiveRecord::Schema.define(version: 2021_08_08_071221) do
   add_foreign_key "account_pins", "accounts", column: "target_account_id", on_delete: :cascade
   add_foreign_key "account_pins", "accounts", on_delete: :cascade
   add_foreign_key "account_stats", "accounts", on_delete: :cascade
+  add_foreign_key "account_statuses_cleanup_policies", "accounts", on_delete: :cascade
   add_foreign_key "account_warnings", "accounts", column: "target_account_id", on_delete: :cascade
   add_foreign_key "account_warnings", "accounts", on_delete: :nullify
   add_foreign_key "accounts", "accounts", column: "moved_to_account_id", on_delete: :nullify

+ 4 - 3
lib/mastodon/snowflake.rb

@@ -138,10 +138,11 @@ module Mastodon::Snowflake
       end
     end
 
-    def id_at(timestamp)
-      id  = timestamp.to_i * 1000 + rand(1000)
+    def id_at(timestamp, with_random: true)
+      id  = timestamp.to_i * 1000
+      id += rand(1000) if with_random
       id  = id << 16
-      id += rand(2**16)
+      id += rand(2**16) if with_random
       id
     end
 

+ 27 - 0
spec/controllers/statuses_cleanup_controller_spec.rb

@@ -0,0 +1,27 @@
+require 'rails_helper'
+
+RSpec.describe StatusesCleanupController, type: :controller do
+  render_views
+
+  before do
+    @user = Fabricate(:user)
+    sign_in @user, scope: :user
+  end
+
+  describe "GET #show" do
+    it "returns http success" do
+      get :show
+      expect(response).to have_http_status(200)
+    end
+  end
+
+  describe 'PUT #update' do
+    it 'updates the account status cleanup policy' do
+      put :update, params: { account_statuses_cleanup_policy: { enabled: true, min_status_age: 2.weeks.seconds, keep_direct: false, keep_polls: true } }
+      expect(response).to redirect_to(statuses_cleanup_path)
+      expect(@user.account.statuses_cleanup_policy.enabled).to eq true
+      expect(@user.account.statuses_cleanup_policy.keep_direct).to eq false
+      expect(@user.account.statuses_cleanup_policy.keep_polls).to eq true
+    end
+  end
+end

+ 3 - 0
spec/fabricators/account_statuses_cleanup_policy_fabricator.rb

@@ -0,0 +1,3 @@
+Fabricator(:account_statuses_cleanup_policy) do
+  account
+end

+ 546 - 0
spec/models/account_statuses_cleanup_policy_spec.rb

@@ -0,0 +1,546 @@
+require 'rails_helper'
+
+RSpec.describe AccountStatusesCleanupPolicy, type: :model do
+  let(:account) { Fabricate(:account, username: 'alice', domain: nil) }
+
+  describe 'validation' do
+    it 'disallow remote accounts' do
+      account.update(domain: 'example.com')
+      account_statuses_cleanup_policy = Fabricate.build(:account_statuses_cleanup_policy, account: account)
+      account_statuses_cleanup_policy.valid?
+      expect(account_statuses_cleanup_policy).to model_have_error_on_field(:account)
+    end
+  end
+
+  describe 'save hooks' do
+    context 'when widening a policy' do
+      let!(:account_statuses_cleanup_policy) do
+        Fabricate(:account_statuses_cleanup_policy,
+          account: account,
+          keep_direct: true,
+          keep_pinned: true,
+          keep_polls: true,
+          keep_media: true,
+          keep_self_fav: true,
+          keep_self_bookmark: true,
+          min_favs: 1,
+          min_reblogs: 1
+        )
+      end
+
+      before do
+        account_statuses_cleanup_policy.record_last_inspected(42)
+      end
+
+      it 'invalidates last_inspected when widened because of keep_direct' do
+        account_statuses_cleanup_policy.keep_direct = false
+        account_statuses_cleanup_policy.save
+        expect(account_statuses_cleanup_policy.last_inspected).to be nil
+      end
+
+      it 'invalidates last_inspected when widened because of keep_pinned' do
+        account_statuses_cleanup_policy.keep_pinned = false
+        account_statuses_cleanup_policy.save
+        expect(account_statuses_cleanup_policy.last_inspected).to be nil
+      end
+
+      it 'invalidates last_inspected when widened because of keep_polls' do
+        account_statuses_cleanup_policy.keep_polls = false
+        account_statuses_cleanup_policy.save
+        expect(account_statuses_cleanup_policy.last_inspected).to be nil
+      end
+
+      it 'invalidates last_inspected when widened because of keep_media' do
+        account_statuses_cleanup_policy.keep_media = false
+        account_statuses_cleanup_policy.save
+        expect(account_statuses_cleanup_policy.last_inspected).to be nil
+      end
+
+      it 'invalidates last_inspected when widened because of keep_self_fav' do
+        account_statuses_cleanup_policy.keep_self_fav = false
+        account_statuses_cleanup_policy.save
+        expect(account_statuses_cleanup_policy.last_inspected).to be nil
+      end
+
+      it 'invalidates last_inspected when widened because of keep_self_bookmark' do
+        account_statuses_cleanup_policy.keep_self_bookmark = false
+        account_statuses_cleanup_policy.save
+        expect(account_statuses_cleanup_policy.last_inspected).to be nil
+      end
+
+      it 'invalidates last_inspected when widened because of higher min_favs' do
+        account_statuses_cleanup_policy.min_favs = 5
+        account_statuses_cleanup_policy.save
+        expect(account_statuses_cleanup_policy.last_inspected).to be nil
+      end
+
+      it 'invalidates last_inspected when widened because of disabled min_favs' do
+        account_statuses_cleanup_policy.min_favs = nil
+        account_statuses_cleanup_policy.save
+        expect(account_statuses_cleanup_policy.last_inspected).to be nil
+      end
+
+      it 'invalidates last_inspected when widened because of higher min_reblogs' do
+        account_statuses_cleanup_policy.min_reblogs = 5
+        account_statuses_cleanup_policy.save
+        expect(account_statuses_cleanup_policy.last_inspected).to be nil
+      end
+
+      it 'invalidates last_inspected when widened because of disable min_reblogs' do
+        account_statuses_cleanup_policy.min_reblogs = nil
+        account_statuses_cleanup_policy.save
+        expect(account_statuses_cleanup_policy.last_inspected).to be nil
+      end
+    end
+
+    context 'when narrowing a policy' do
+      let!(:account_statuses_cleanup_policy) do
+        Fabricate(:account_statuses_cleanup_policy,
+          account: account,
+          keep_direct: false,
+          keep_pinned: false,
+          keep_polls: false,
+          keep_media: false,
+          keep_self_fav: false,
+          keep_self_bookmark: false,
+          min_favs: nil,
+          min_reblogs: nil
+        )
+      end
+
+      it 'does not unnecessarily invalidate last_inspected' do
+        account_statuses_cleanup_policy.record_last_inspected(42)
+        account_statuses_cleanup_policy.keep_direct = true
+        account_statuses_cleanup_policy.keep_pinned = true
+        account_statuses_cleanup_policy.keep_polls = true
+        account_statuses_cleanup_policy.keep_media = true
+        account_statuses_cleanup_policy.keep_self_fav = true
+        account_statuses_cleanup_policy.keep_self_bookmark = true
+        account_statuses_cleanup_policy.min_favs = 5
+        account_statuses_cleanup_policy.min_reblogs = 5
+        account_statuses_cleanup_policy.save
+        expect(account_statuses_cleanup_policy.last_inspected).to eq 42
+      end
+    end
+  end
+
+  describe '#record_last_inspected' do
+    let(:account_statuses_cleanup_policy) { Fabricate(:account_statuses_cleanup_policy, account: account) }
+
+    it 'records the given id' do
+      account_statuses_cleanup_policy.record_last_inspected(42)
+      expect(account_statuses_cleanup_policy.last_inspected).to eq 42
+    end
+  end
+
+  describe '#invalidate_last_inspected' do
+    let(:account_statuses_cleanup_policy) { Fabricate(:account_statuses_cleanup_policy, account: account) }
+    let(:status) { Fabricate(:status, id: 10, account: account) }
+    subject { account_statuses_cleanup_policy.invalidate_last_inspected(status, action) }
+
+    before do
+      account_statuses_cleanup_policy.record_last_inspected(42)
+    end
+
+    context 'when the action is :unbookmark' do
+      let(:action) { :unbookmark }
+
+      context 'when the policy is not to keep self-bookmarked toots' do
+        before do
+          account_statuses_cleanup_policy.keep_self_bookmark = false
+        end
+
+        it 'does not change the recorded id' do
+          subject
+          expect(account_statuses_cleanup_policy.last_inspected).to eq 42
+        end
+      end
+
+      context 'when the policy is to keep self-bookmarked toots' do
+        before do
+          account_statuses_cleanup_policy.keep_self_bookmark = true
+        end
+
+        it 'records the older id' do
+          subject
+          expect(account_statuses_cleanup_policy.last_inspected).to eq 10
+        end
+      end
+    end
+
+    context 'when the action is :unfav' do
+      let(:action) { :unfav }
+
+      context 'when the policy is not to keep self-favourited toots' do
+        before do
+          account_statuses_cleanup_policy.keep_self_fav = false
+        end
+
+        it 'does not change the recorded id' do
+          subject
+          expect(account_statuses_cleanup_policy.last_inspected).to eq 42
+        end
+      end
+
+      context 'when the policy is to keep self-favourited toots' do
+        before do
+          account_statuses_cleanup_policy.keep_self_fav = true
+        end
+
+        it 'records the older id' do
+          subject
+          expect(account_statuses_cleanup_policy.last_inspected).to eq 10
+        end
+      end
+    end
+
+    context 'when the action is :unpin' do
+      let(:action) { :unpin }
+
+      context 'when the policy is not to keep pinned toots' do
+        before do
+          account_statuses_cleanup_policy.keep_pinned = false
+        end
+
+        it 'does not change the recorded id' do
+          subject
+          expect(account_statuses_cleanup_policy.last_inspected).to eq 42
+        end
+      end
+
+      context 'when the policy is to keep pinned toots' do
+        before do
+          account_statuses_cleanup_policy.keep_pinned = true
+        end
+
+        it 'records the older id' do
+          subject
+          expect(account_statuses_cleanup_policy.last_inspected).to eq 10
+        end
+      end
+    end
+
+    context 'when the status is more recent than the recorded inspected id' do
+      let(:action) { :unfav }
+      let(:status) { Fabricate(:status, account: account) }
+
+      it 'does not change the recorded id' do
+        subject
+        expect(account_statuses_cleanup_policy.last_inspected).to eq 42
+      end
+    end
+  end
+
+  describe '#compute_cutoff_id' do
+    let!(:unrelated_status)  { Fabricate(:status, created_at: 3.years.ago) }
+    let(:account_statuses_cleanup_policy) { Fabricate(:account_statuses_cleanup_policy, account: account) }
+
+    subject { account_statuses_cleanup_policy.compute_cutoff_id }
+
+    context 'when the account has posted multiple toots' do
+      let!(:very_old_status)   { Fabricate(:status, created_at: 3.years.ago, account: account) }
+      let!(:old_status)        { Fabricate(:status, created_at: 3.weeks.ago, account: account) }
+      let!(:recent_status)     { Fabricate(:status, created_at: 2.days.ago, account: account) }
+
+      it 'returns the most recent id that is still below policy age' do
+        expect(subject).to eq old_status.id
+      end
+    end
+
+    context 'when the account has not posted anything' do
+      it 'returns nil' do
+        expect(subject).to be_nil
+      end
+    end
+  end
+
+  describe '#statuses_to_delete' do
+    let!(:unrelated_status)  { Fabricate(:status, created_at: 3.years.ago) }
+    let!(:very_old_status)   { Fabricate(:status, created_at: 3.years.ago, account: account) }
+    let!(:pinned_status)     { Fabricate(:status, created_at: 1.year.ago, account: account) }
+    let!(:direct_message)    { Fabricate(:status, created_at: 1.year.ago, account: account, visibility: :direct) }
+    let!(:self_faved)        { Fabricate(:status, created_at: 1.year.ago, account: account) }
+    let!(:self_bookmarked)   { Fabricate(:status, created_at: 1.year.ago, account: account) }
+    let!(:status_with_poll)  { Fabricate(:status, created_at: 1.year.ago, account: account, poll_attributes: { account: account, voters_count: 0, options: ['a', 'b'], expires_in: 2.days }) }
+    let!(:status_with_media) { Fabricate(:status, created_at: 1.year.ago, account: account) }
+    let!(:faved4)            { Fabricate(:status, created_at: 1.year.ago, account: account) }
+    let!(:faved5)            { Fabricate(:status, created_at: 1.year.ago, account: account) }
+    let!(:reblogged4)        { Fabricate(:status, created_at: 1.year.ago, account: account) }
+    let!(:reblogged5)        { Fabricate(:status, created_at: 1.year.ago, account: account) }
+    let!(:recent_status)     { Fabricate(:status, created_at: 2.days.ago, account: account) }
+
+    let!(:media_attachment)  { Fabricate(:media_attachment, account: account, status: status_with_media) }
+    let!(:status_pin)        { Fabricate(:status_pin, account: account, status: pinned_status) }
+    let!(:favourite)         { Fabricate(:favourite, account: account, status: self_faved) }
+    let!(:bookmark)          { Fabricate(:bookmark, account: account, status: self_bookmarked) }
+
+    let(:account_statuses_cleanup_policy) { Fabricate(:account_statuses_cleanup_policy, account: account) }
+
+    subject { account_statuses_cleanup_policy.statuses_to_delete }
+
+    before do
+      4.times { faved4.increment_count!(:favourites_count) }
+      5.times { faved5.increment_count!(:favourites_count) }
+      4.times { reblogged4.increment_count!(:reblogs_count) }
+      5.times { reblogged5.increment_count!(:reblogs_count) }
+    end
+
+    context 'when passed a max_id' do
+      let!(:old_status)               { Fabricate(:status, created_at: 1.year.ago, account: account) }
+      let!(:slightly_less_old_status) { Fabricate(:status, created_at: 6.months.ago, account: account) }
+
+      subject { account_statuses_cleanup_policy.statuses_to_delete(50, old_status.id).pluck(:id) }
+
+      it 'returns statuses including max_id' do
+        expect(subject).to include(old_status.id)
+      end
+
+      it 'returns statuses including older than max_id' do
+        expect(subject).to include(very_old_status.id)
+      end
+
+      it 'does not return statuses newer than max_id' do
+        expect(subject).to_not include(slightly_less_old_status.id)
+      end
+    end
+
+    context 'when passed a min_id' do
+      let!(:old_status)               { Fabricate(:status, created_at: 1.year.ago, account: account) }
+      let!(:slightly_less_old_status) { Fabricate(:status, created_at: 6.months.ago, account: account) }
+
+      subject { account_statuses_cleanup_policy.statuses_to_delete(50, recent_status.id, old_status.id).pluck(:id) }
+
+      it 'returns statuses including min_id' do
+        expect(subject).to include(old_status.id)
+      end
+
+      it 'returns statuses including newer than max_id' do
+        expect(subject).to include(slightly_less_old_status.id)
+      end
+
+      it 'does not return statuses older than min_id' do
+        expect(subject).to_not include(very_old_status.id)
+      end
+    end
+
+    context 'when passed a low limit' do
+      it 'only returns the limited number of items' do
+        expect(account_statuses_cleanup_policy.statuses_to_delete(1).count).to eq 1
+      end
+    end
+
+    context 'when policy is set to keep statuses more recent than 2 years' do
+      before do
+        account_statuses_cleanup_policy.min_status_age = 2.years.seconds
+      end
+
+      it 'does not return unrelated old status' do
+        expect(subject.pluck(:id)).to_not include(unrelated_status.id)
+      end
+
+      it 'returns only oldest status for deletion' do
+        expect(subject.pluck(:id)).to eq [very_old_status.id]
+      end
+    end
+
+    context 'when policy is set to keep DMs and reject everything else' do
+      before do
+        account_statuses_cleanup_policy.keep_direct = true
+        account_statuses_cleanup_policy.keep_pinned = false
+        account_statuses_cleanup_policy.keep_polls = false
+        account_statuses_cleanup_policy.keep_media = false
+        account_statuses_cleanup_policy.keep_self_fav = false
+        account_statuses_cleanup_policy.keep_self_bookmark = false
+      end
+
+      it 'does not return the old direct message for deletion' do
+        expect(subject.pluck(:id)).to_not include(direct_message.id)
+      end
+
+      it 'returns every other old status for deletion' do
+        expect(subject.pluck(:id)).to include(very_old_status.id, pinned_status.id, self_faved.id, self_bookmarked.id, status_with_poll.id, status_with_media.id, faved4.id, faved5.id, reblogged4.id, reblogged5.id)
+      end
+    end
+
+    context 'when policy is set to keep self-bookmarked toots and reject everything else' do
+      before do
+        account_statuses_cleanup_policy.keep_direct = false
+        account_statuses_cleanup_policy.keep_pinned = false
+        account_statuses_cleanup_policy.keep_polls = false
+        account_statuses_cleanup_policy.keep_media = false
+        account_statuses_cleanup_policy.keep_self_fav = false
+        account_statuses_cleanup_policy.keep_self_bookmark = true
+      end
+
+      it 'does not return the old self-bookmarked message for deletion' do
+        expect(subject.pluck(:id)).to_not include(self_bookmarked.id)
+      end
+
+      it 'returns every other old status for deletion' do
+        expect(subject.pluck(:id)).to include(direct_message.id, very_old_status.id, pinned_status.id, self_faved.id, status_with_poll.id, status_with_media.id, faved4.id, faved5.id, reblogged4.id, reblogged5.id)
+      end
+    end
+
+    context 'when policy is set to keep self-faved toots and reject everything else' do
+      before do
+        account_statuses_cleanup_policy.keep_direct = false
+        account_statuses_cleanup_policy.keep_pinned = false
+        account_statuses_cleanup_policy.keep_polls = false
+        account_statuses_cleanup_policy.keep_media = false
+        account_statuses_cleanup_policy.keep_self_fav = true
+        account_statuses_cleanup_policy.keep_self_bookmark = false
+      end
+
+      it 'does not return the old self-bookmarked message for deletion' do
+        expect(subject.pluck(:id)).to_not include(self_faved.id)
+      end
+
+      it 'returns every other old status for deletion' do
+        expect(subject.pluck(:id)).to include(direct_message.id, very_old_status.id, pinned_status.id, self_bookmarked.id, status_with_poll.id, status_with_media.id, faved4.id, faved5.id, reblogged4.id, reblogged5.id)
+      end
+    end
+
+    context 'when policy is set to keep toots with media and reject everything else' do
+      before do
+        account_statuses_cleanup_policy.keep_direct = false
+        account_statuses_cleanup_policy.keep_pinned = false
+        account_statuses_cleanup_policy.keep_polls = false
+        account_statuses_cleanup_policy.keep_media = true
+        account_statuses_cleanup_policy.keep_self_fav = false
+        account_statuses_cleanup_policy.keep_self_bookmark = false
+      end
+
+      it 'does not return the old message with media for deletion' do
+        expect(subject.pluck(:id)).to_not include(status_with_media.id)
+      end
+
+      it 'returns every other old status for deletion' do
+        expect(subject.pluck(:id)).to include(direct_message.id, very_old_status.id, pinned_status.id, self_faved.id, self_bookmarked.id, status_with_poll.id, faved4.id, faved5.id, reblogged4.id, reblogged5.id)
+      end
+    end
+
+    context 'when policy is set to keep toots with polls and reject everything else' do
+      before do
+        account_statuses_cleanup_policy.keep_direct = false
+        account_statuses_cleanup_policy.keep_pinned = false
+        account_statuses_cleanup_policy.keep_polls = true
+        account_statuses_cleanup_policy.keep_media = false
+        account_statuses_cleanup_policy.keep_self_fav = false
+        account_statuses_cleanup_policy.keep_self_bookmark = false
+      end
+
+      it 'does not return the old poll message for deletion' do
+        expect(subject.pluck(:id)).to_not include(status_with_poll.id)
+      end
+
+      it 'returns every other old status for deletion' do
+        expect(subject.pluck(:id)).to include(direct_message.id, very_old_status.id, pinned_status.id, self_faved.id, self_bookmarked.id, status_with_media.id, faved4.id, faved5.id, reblogged4.id, reblogged5.id)
+      end
+    end
+
+    context 'when policy is set to keep pinned toots and reject everything else' do
+      before do
+        account_statuses_cleanup_policy.keep_direct = false
+        account_statuses_cleanup_policy.keep_pinned = true
+        account_statuses_cleanup_policy.keep_polls = false
+        account_statuses_cleanup_policy.keep_media = false
+        account_statuses_cleanup_policy.keep_self_fav = false
+        account_statuses_cleanup_policy.keep_self_bookmark = false
+      end
+
+      it 'does not return the old pinned message for deletion' do
+        expect(subject.pluck(:id)).to_not include(pinned_status.id)
+      end
+
+      it 'returns every other old status for deletion' do
+        expect(subject.pluck(:id)).to include(direct_message.id, very_old_status.id, self_faved.id, self_bookmarked.id, status_with_poll.id, status_with_media.id, faved4.id, faved5.id, reblogged4.id, reblogged5.id)
+      end
+    end
+
+    context 'when policy is to not keep any special messages' do
+      before do
+        account_statuses_cleanup_policy.keep_direct = false
+        account_statuses_cleanup_policy.keep_pinned = false
+        account_statuses_cleanup_policy.keep_polls = false
+        account_statuses_cleanup_policy.keep_media = false
+        account_statuses_cleanup_policy.keep_self_fav = false
+        account_statuses_cleanup_policy.keep_self_bookmark = false
+      end
+
+      it 'does not return the recent toot' do
+        expect(subject.pluck(:id)).to_not include(recent_status.id)
+      end
+
+      it 'does not return the unrelated toot' do
+        expect(subject.pluck(:id)).to_not include(unrelated_status.id)
+      end
+
+      it 'returns every other old status for deletion' do
+        expect(subject.pluck(:id)).to include(direct_message.id, very_old_status.id, pinned_status.id, self_faved.id, self_bookmarked.id, status_with_poll.id, status_with_media.id, faved4.id, faved5.id, reblogged4.id, reblogged5.id)
+      end
+    end
+
+    context 'when policy is set to keep every category of toots' do
+      before do
+        account_statuses_cleanup_policy.keep_direct = true
+        account_statuses_cleanup_policy.keep_pinned = true
+        account_statuses_cleanup_policy.keep_polls = true
+        account_statuses_cleanup_policy.keep_media = true
+        account_statuses_cleanup_policy.keep_self_fav = true
+        account_statuses_cleanup_policy.keep_self_bookmark = true
+      end
+
+      it 'does not return unrelated old status' do
+        expect(subject.pluck(:id)).to_not include(unrelated_status.id)
+      end
+
+      it 'returns only normal statuses for deletion' do
+        expect(subject.pluck(:id).sort).to eq [very_old_status.id, faved4.id, faved5.id, reblogged4.id, reblogged5.id].sort
+      end
+    end
+
+    context 'when policy is to keep statuses with more than 4 boosts' do
+      before do
+        account_statuses_cleanup_policy.min_reblogs = 4
+      end
+
+      it 'does not return the recent toot' do
+        expect(subject.pluck(:id)).to_not include(recent_status.id)
+      end
+
+      it 'does not return the toot reblogged 5 times' do
+        expect(subject.pluck(:id)).to_not include(reblogged5.id)
+      end
+
+      it 'does not return the unrelated toot' do
+        expect(subject.pluck(:id)).to_not include(unrelated_status.id)
+      end
+
+      it 'returns old statuses not reblogged as much' do
+        expect(subject.pluck(:id)).to include(very_old_status.id, faved4.id, faved5.id, reblogged4.id)
+      end
+    end
+
+    context 'when policy is to keep statuses with more than 4 favs' do
+      before do
+        account_statuses_cleanup_policy.min_favs = 4
+      end
+
+      it 'does not return the recent toot' do
+        expect(subject.pluck(:id)).to_not include(recent_status.id)
+      end
+
+      it 'does not return the toot faved 5 times' do
+        expect(subject.pluck(:id)).to_not include(faved5.id)
+      end
+
+      it 'does not return the unrelated toot' do
+        expect(subject.pluck(:id)).to_not include(unrelated_status.id)
+      end
+
+      it 'returns old statuses not faved as much' do
+        expect(subject.pluck(:id)).to include(very_old_status.id, faved4.id, reblogged4.id, reblogged5.id)
+      end
+    end
+  end
+end

+ 101 - 0
spec/services/account_statuses_cleanup_service_spec.rb

@@ -0,0 +1,101 @@
+require 'rails_helper'
+
+describe AccountStatusesCleanupService, type: :service do
+  let(:account)           { Fabricate(:account, username: 'alice', domain: nil) }
+  let(:account_policy)    { Fabricate(:account_statuses_cleanup_policy, account: account) }
+  let!(:unrelated_status) { Fabricate(:status, created_at: 3.years.ago) }
+
+  describe '#call' do
+    context 'when the account has not posted anything' do
+      it 'returns 0 deleted toots' do
+        expect(subject.call(account_policy)).to eq 0
+      end
+    end
+
+    context 'when the account has posted several old statuses' do
+      let!(:very_old_status)    { Fabricate(:status, created_at: 3.years.ago, account: account) }
+      let!(:old_status)         { Fabricate(:status, created_at: 1.year.ago, account: account) }
+      let!(:another_old_status) { Fabricate(:status, created_at: 1.year.ago, account: account) }
+      let!(:recent_status)      { Fabricate(:status, created_at: 1.day.ago, account: account) }
+
+      context 'given a budget of 1' do
+        it 'reports 1 deleted toot' do
+          expect(subject.call(account_policy, 1)).to eq 1
+        end
+      end
+
+      context 'given a normal budget of 10' do
+        it 'reports 3 deleted statuses' do
+          expect(subject.call(account_policy, 10)).to eq 3
+        end
+
+        it 'records the last deleted id' do
+          subject.call(account_policy, 10)
+          expect(account_policy.last_inspected).to eq [old_status.id, another_old_status.id].max
+        end
+
+        it 'actually deletes the statuses' do
+          subject.call(account_policy, 10)
+          expect(Status.find_by(id: [very_old_status.id, old_status.id, another_old_status.id])).to be_nil
+        end
+      end
+
+      context 'when called repeatedly with a budget of 2' do
+        it 'reports 2 then 1 deleted statuses' do
+         expect(subject.call(account_policy, 2)).to eq 2
+         expect(subject.call(account_policy, 2)).to eq 1
+        end
+
+        it 'actually deletes the statuses in the expected order' do
+          subject.call(account_policy, 2)
+          expect(Status.find_by(id: very_old_status.id)).to be_nil
+          subject.call(account_policy, 2)
+          expect(Status.find_by(id: [very_old_status.id, old_status.id, another_old_status.id])).to be_nil
+        end
+      end
+
+      context 'when a self-faved toot is unfaved' do
+        let!(:self_faved) { Fabricate(:status, created_at: 6.months.ago, account: account) }
+        let!(:favourite)  { Fabricate(:favourite, account: account, status: self_faved) }
+
+        it 'deletes it once unfaved' do
+          expect(subject.call(account_policy, 20)).to eq 3
+          expect(Status.find_by(id: self_faved.id)).to_not be_nil
+          expect(subject.call(account_policy, 20)).to eq 0
+          favourite.destroy!
+          expect(subject.call(account_policy, 20)).to eq 1
+          expect(Status.find_by(id: self_faved.id)).to be_nil
+        end
+      end
+
+      context 'when there are more un-deletable old toots than the early search cutoff' do
+        before do
+          stub_const 'AccountStatusesCleanupPolicy::EARLY_SEARCH_CUTOFF', 5
+          # Old statuses that should be cut-off
+          10.times do
+            Fabricate(:status, created_at: 4.years.ago, visibility: :direct, account: account)
+          end
+          # New statuses that prevent cut-off id to reach the last status
+          10.times do
+            Fabricate(:status, created_at: 4.seconds.ago, visibility: :direct, account: account)
+          end
+        end
+
+        it 'reports 0 deleted statuses then 0 then 3 then 0 again' do
+          expect(subject.call(account_policy, 10)).to eq 0
+          expect(subject.call(account_policy, 10)).to eq 0
+          expect(subject.call(account_policy, 10)).to eq 3
+          expect(subject.call(account_policy, 10)).to eq 0
+        end
+
+        it 'never causes the recorded id to get higher than oldest deletable toot' do
+          subject.call(account_policy, 10)
+          subject.call(account_policy, 10)
+          subject.call(account_policy, 10)
+          subject.call(account_policy, 10)
+          expect(account_policy.last_inspected).to be < Mastodon::Snowflake.id_at(account_policy.min_status_age.seconds.ago, with_random: false)
+        end
+      end
+    end
+  end
+end

+ 127 - 0
spec/workers/scheduler/accounts_statuses_cleanup_scheduler_spec.rb

@@ -0,0 +1,127 @@
+require 'rails_helper'
+
+describe Scheduler::AccountsStatusesCleanupScheduler do
+  subject { described_class.new }
+
+  let!(:account1)  { Fabricate(:account, domain: nil) }
+  let!(:account2)  { Fabricate(:account, domain: nil) }
+  let!(:account3)  { Fabricate(:account, domain: nil) }
+  let!(:account4)  { Fabricate(:account, domain: nil) }
+  let!(:remote)    { Fabricate(:account) }
+
+  let!(:policy1)   { Fabricate(:account_statuses_cleanup_policy, account: account1) }
+  let!(:policy2)   { Fabricate(:account_statuses_cleanup_policy, account: account3) }
+  let!(:policy3)   { Fabricate(:account_statuses_cleanup_policy, account: account4, enabled: false) }
+
+  let(:queue_size)       { 0 }
+  let(:queue_latency)    { 0 }
+  let(:process_set_stub) do
+    [
+      {
+        'concurrency' => 2,
+        'queues' => ['push', 'default'],
+      },
+    ]
+  end
+  let(:retry_size) { 0 }
+
+  before do
+    queue_stub = double
+    allow(queue_stub).to receive(:size).and_return(queue_size)
+    allow(queue_stub).to receive(:latency).and_return(queue_latency)
+    allow(Sidekiq::Queue).to receive(:new).and_return(queue_stub)
+    allow(Sidekiq::ProcessSet).to receive(:new).and_return(process_set_stub)
+
+    sidekiq_stats_stub = double
+    allow(sidekiq_stats_stub).to receive(:retry_size).and_return(retry_size)
+    allow(Sidekiq::Stats).to receive(:new).and_return(sidekiq_stats_stub)
+
+    # Create a bunch of old statuses
+    10.times do
+      Fabricate(:status, account: account1, created_at: 3.years.ago)
+      Fabricate(:status, account: account2, created_at: 3.years.ago)
+      Fabricate(:status, account: account3, created_at: 3.years.ago)
+      Fabricate(:status, account: account4, created_at: 3.years.ago)
+      Fabricate(:status, account: remote, created_at: 3.years.ago)
+    end
+
+    # Create a bunch of newer statuses
+    5.times do
+      Fabricate(:status, account: account1, created_at: 3.minutes.ago)
+      Fabricate(:status, account: account2, created_at: 3.minutes.ago)
+      Fabricate(:status, account: account3, created_at: 3.minutes.ago)
+      Fabricate(:status, account: account4, created_at: 3.minutes.ago)
+      Fabricate(:status, account: remote, created_at: 3.minutes.ago)
+    end
+  end
+
+  describe '#under_load?' do
+    context 'when nothing is queued' do
+      it 'returns false' do
+        expect(subject.under_load?).to be false
+      end
+    end
+
+    context 'when numerous jobs are queued' do
+      let(:queue_size)    { 5 }
+      let(:queue_latency) { 120 }
+
+      it 'returns true' do
+        expect(subject.under_load?).to be true
+      end
+    end
+
+    context 'when there is a huge amount of jobs to retry' do
+      let(:retry_size) { 1_000_000 }
+
+      it 'returns true' do
+        expect(subject.under_load?).to be true
+      end
+    end
+  end
+
+  describe '#get_budget' do
+    context 'on a single thread' do
+      let(:process_set_stub) { [ { 'concurrency' => 1, 'queues' => ['push', 'default'] } ] }
+
+      it 'returns a low value' do
+        expect(subject.compute_budget).to be < 10
+      end
+    end
+
+    context 'on a lot of threads' do
+      let(:process_set_stub) do
+        [
+          { 'concurrency' => 2, 'queues' => ['push', 'default'] },
+          { 'concurrency' => 2, 'queues' => ['push'] },
+          { 'concurrency' => 2, 'queues' => ['push'] },
+          { 'concurrency' => 2, 'queues' => ['push'] },
+        ]
+      end
+
+      it 'returns a larger value' do
+        expect(subject.compute_budget).to be > 10
+      end
+    end
+  end
+
+  describe '#perform' do
+    context 'when the budget is lower than the number of toots to delete' do
+      it 'deletes as many statuses as the given budget' do
+        expect { subject.perform }.to change { Status.count }.by(-subject.compute_budget)
+      end
+
+      it 'does not delete from accounts with no cleanup policy' do
+        expect { subject.perform }.to_not change { account2.statuses.count }
+      end
+
+      it 'does not delete from accounts with disabled cleanup policies' do
+        expect { subject.perform }.to_not change { account4.statuses.count }
+      end
+
+      it 'eventually deletes every deletable toot' do
+        expect { subject.perform; subject.perform; subject.perform; subject.perform }.to change { Status.count }.by(-20)
+      end
+    end
+  end
+end