domains_cli.rb 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
  1. # frozen_string_literal: true
  2. require 'concurrent'
  3. require_relative '../../config/boot'
  4. require_relative '../../config/environment'
  5. require_relative 'cli_helper'
  6. module Mastodon
  7. class DomainsCLI < Thor
  8. def self.exit_on_failure?
  9. true
  10. end
  11. option :dry_run, type: :boolean
  12. option :whitelist_mode, type: :boolean
  13. desc 'purge [DOMAIN]', 'Remove accounts from a DOMAIN without a trace'
  14. long_desc <<-LONG_DESC
  15. Remove all accounts from a given DOMAIN without leaving behind any
  16. records. Unlike a suspension, if the DOMAIN still exists in the wild,
  17. it means the accounts could return if they are resolved again.
  18. When the --whitelist-mode option is given, instead of purging accounts
  19. from a single domain, all accounts from domains that are not whitelisted
  20. are removed from the database.
  21. LONG_DESC
  22. def purge(domain = nil)
  23. removed = 0
  24. dry_run = options[:dry_run] ? ' (DRY RUN)' : ''
  25. scope = begin
  26. if options[:whitelist_mode]
  27. Account.remote.where.not(domain: DomainAllow.pluck(:domain))
  28. elsif domain.present?
  29. Account.remote.where(domain: domain)
  30. else
  31. say('No domain given', :red)
  32. exit(1)
  33. end
  34. end
  35. scope.find_each do |account|
  36. SuspendAccountService.new.call(account, destroy: true) unless options[:dry_run]
  37. removed += 1
  38. say('.', :green, false)
  39. end
  40. DomainBlock.where(domain: domain).destroy_all unless options[:dry_run]
  41. say
  42. say("Removed #{removed} accounts#{dry_run}", :green)
  43. custom_emojis = CustomEmoji.where(domain: domain)
  44. custom_emojis_count = custom_emojis.count
  45. custom_emojis.destroy_all unless options[:dry_run]
  46. say("Removed #{custom_emojis_count} custom emojis", :green)
  47. end
  48. option :concurrency, type: :numeric, default: 50, aliases: [:c]
  49. option :silent, type: :boolean, default: false, aliases: [:s]
  50. option :format, type: :string, default: 'summary', aliases: [:f]
  51. desc 'crawl [START]', 'Crawl all known peers, optionally beginning at START'
  52. long_desc <<-LONG_DESC
  53. Crawl the fediverse by using the Mastodon REST API endpoints that expose
  54. all known peers, and collect statistics from those peers, as long as those
  55. peers support those API endpoints. When no START is given, the command uses
  56. this server's own database of known peers to seed the crawl.
  57. The --concurrency (-c) option controls the number of threads performing HTTP
  58. requests at the same time. More threads means the crawl may complete faster.
  59. The --silent (-s) option controls progress output.
  60. The --format (-f) option controls how the data is displayed at the end. By
  61. default (`summary`), a summary of the statistics is returned. The other options
  62. are `domains`, which returns a newline-delimited list of all discovered peers,
  63. and `json`, which dumps all the aggregated data raw.
  64. LONG_DESC
  65. def crawl(start = nil)
  66. stats = Concurrent::Hash.new
  67. processed = Concurrent::AtomicFixnum.new(0)
  68. failed = Concurrent::AtomicFixnum.new(0)
  69. start_at = Time.now.to_f
  70. seed = start ? [start] : Account.remote.domains
  71. pool = Concurrent::ThreadPoolExecutor.new(min_threads: 0, max_threads: options[:concurrency], idletime: 10, auto_terminate: true, max_queue: 0)
  72. work_unit = ->(domain) do
  73. next if stats.key?(domain)
  74. stats[domain] = nil
  75. processed.increment
  76. begin
  77. Request.new(:get, "https://#{domain}/api/v1/instance").perform do |res|
  78. next unless res.code == 200
  79. stats[domain] = Oj.load(res.to_s)
  80. end
  81. Request.new(:get, "https://#{domain}/api/v1/instance/peers").perform do |res|
  82. next unless res.code == 200
  83. Oj.load(res.to_s).reject { |peer| stats.key?(peer) }.each do |peer|
  84. pool.post(peer, &work_unit)
  85. end
  86. end
  87. Request.new(:get, "https://#{domain}/api/v1/instance/activity").perform do |res|
  88. next unless res.code == 200
  89. stats[domain]['activity'] = Oj.load(res.to_s)
  90. end
  91. say('.', :green, false) unless options[:silent]
  92. rescue StandardError
  93. failed.increment
  94. say('.', :red, false) unless options[:silent]
  95. end
  96. end
  97. seed.each do |domain|
  98. pool.post(domain, &work_unit)
  99. end
  100. sleep 20
  101. sleep 20 until pool.queue_length.zero?
  102. pool.shutdown
  103. pool.wait_for_termination(20)
  104. ensure
  105. pool.shutdown
  106. say unless options[:silent]
  107. case options[:format]
  108. when 'summary'
  109. stats_to_summary(stats, processed, failed, start_at)
  110. when 'domains'
  111. stats_to_domains(stats)
  112. when 'json'
  113. stats_to_json(stats)
  114. end
  115. end
  116. private
  117. def stats_to_summary(stats, processed, failed, start_at)
  118. stats.compact!
  119. total_domains = stats.size
  120. total_users = stats.reduce(0) { |sum, (_key, val)| val.is_a?(Hash) && val['stats'].is_a?(Hash) ? sum + val['stats']['user_count'].to_i : sum }
  121. total_active = stats.reduce(0) { |sum, (_key, val)| val.is_a?(Hash) && val['activity'].is_a?(Array) && val['activity'].size > 2 && val['activity'][1].is_a?(Hash) ? sum + val['activity'][1]['logins'].to_i : sum }
  122. total_joined = stats.reduce(0) { |sum, (_key, val)| val.is_a?(Hash) && val['activity'].is_a?(Array) && val['activity'].size > 2 && val['activity'][1].is_a?(Hash) ? sum + val['activity'][1]['registrations'].to_i : sum }
  123. say("Visited #{processed.value} domains, #{failed.value} failed (#{(Time.now.to_f - start_at).round}s elapsed)", :green)
  124. say("Total servers: #{total_domains}", :green)
  125. say("Total registered: #{total_users}", :green)
  126. say("Total active last week: #{total_active}", :green)
  127. say("Total joined last week: #{total_joined}", :green)
  128. end
  129. def stats_to_domains(stats)
  130. say(stats.keys.join("\n"))
  131. end
  132. def stats_to_json(stats)
  133. stats.compact!
  134. say(Oj.dump(stats))
  135. end
  136. end
  137. end