media_cli.rb 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293
  1. # frozen_string_literal: true
  2. require_relative '../../config/boot'
  3. require_relative '../../config/environment'
  4. require_relative 'cli_helper'
  5. module Mastodon
  6. class MediaCLI < Thor
  7. include ActionView::Helpers::NumberHelper
  8. include CLIHelper
  9. def self.exit_on_failure?
  10. true
  11. end
  12. option :days, type: :numeric, default: 7, aliases: [:d]
  13. option :concurrency, type: :numeric, default: 5, aliases: [:c]
  14. option :verbose, type: :boolean, default: false, aliases: [:v]
  15. option :dry_run, type: :boolean, default: false
  16. desc 'remove', 'Remove remote media files'
  17. long_desc <<-DESC
  18. Removes locally cached copies of media attachments from other servers.
  19. The --days option specifies how old media attachments have to be before
  20. they are removed. It defaults to 7 days.
  21. DESC
  22. def remove
  23. time_ago = options[:days].days.ago
  24. dry_run = options[:dry_run] ? '(DRY RUN)' : ''
  25. processed, aggregate = parallelize_with_progress(MediaAttachment.cached.where.not(remote_url: '').where('created_at < ?', time_ago)) do |media_attachment|
  26. next if media_attachment.file.blank?
  27. size = media_attachment.file_file_size
  28. unless options[:dry_run]
  29. media_attachment.file.destroy
  30. media_attachment.save
  31. end
  32. size
  33. end
  34. say("Removed #{processed} media attachments (approx. #{number_to_human_size(aggregate)}) #{dry_run}", :green, true)
  35. end
  36. option :start_after
  37. option :prefix
  38. option :dry_run, type: :boolean, default: false
  39. desc 'remove-orphans', 'Scan storage and check for files that do not belong to existing media attachments'
  40. long_desc <<~LONG_DESC
  41. Scans file storage for files that do not belong to existing media attachments. Because this operation
  42. requires iterating over every single file individually, it will be slow.
  43. Please mind that some storage providers charge for the necessary API requests to list objects.
  44. LONG_DESC
  45. def remove_orphans
  46. progress = create_progress_bar(nil)
  47. reclaimed_bytes = 0
  48. removed = 0
  49. dry_run = options[:dry_run] ? ' (DRY RUN)' : ''
  50. prefix = options[:prefix]
  51. case Paperclip::Attachment.default_options[:storage]
  52. when :s3
  53. paperclip_instance = MediaAttachment.new.file
  54. s3_interface = paperclip_instance.s3_interface
  55. bucket = s3_interface.bucket(Paperclip::Attachment.default_options[:s3_credentials][:bucket])
  56. last_key = options[:start_after]
  57. loop do
  58. objects = begin
  59. begin
  60. bucket.objects(start_after: last_key, prefix: prefix).limit(1000).map { |x| x }
  61. rescue => e
  62. progress.log(pastel.red("Error fetching list of files: #{e}"))
  63. progress.log("If you want to continue from this point, add --start-after=#{last_key} to your command") if last_key
  64. break
  65. end
  66. end
  67. break if objects.empty?
  68. last_key = objects.last.key
  69. record_map = preload_records_from_mixed_objects(objects)
  70. objects.each do |object|
  71. path_segments = object.key.split('/')
  72. model_name = path_segments.first.classify
  73. attachment_name = path_segments[1].singularize
  74. record_id = path_segments[2..-2].join.to_i
  75. file_name = path_segments.last
  76. record = record_map.dig(model_name, record_id)
  77. attachment = record&.public_send(attachment_name)
  78. progress.increment
  79. next unless attachment.blank? || !attachment.variant?(file_name)
  80. begin
  81. object.delete unless options[:dry_run]
  82. reclaimed_bytes += object.size
  83. removed += 1
  84. progress.log("Found and removed orphan: #{object.key}")
  85. rescue => e
  86. progress.log(pastel.red("Error processing #{object.key}: #{e}"))
  87. end
  88. end
  89. end
  90. when :fog
  91. say('The fog storage driver is not supported for this operation at this time', :red)
  92. exit(1)
  93. when :filesystem
  94. require 'find'
  95. root_path = ENV.fetch('PAPERCLIP_ROOT_PATH', File.join(':rails_root', 'public', 'system')).gsub(':rails_root', Rails.root.to_s)
  96. Find.find(File.join(*[root_path, prefix].compact)) do |path|
  97. next if File.directory?(path)
  98. key = path.gsub("#{root_path}#{File::SEPARATOR}", '')
  99. path_segments = key.split(File::SEPARATOR)
  100. model_name = path_segments.first.classify
  101. record_id = path_segments[2..-2].join.to_i
  102. attachment_name = path_segments[1].singularize
  103. file_name = path_segments.last
  104. next unless PRELOAD_MODEL_WHITELIST.include?(model_name)
  105. record = model_name.constantize.find_by(id: record_id)
  106. attachment = record&.public_send(attachment_name)
  107. progress.increment
  108. next unless attachment.blank? || !attachment.variant?(file_name)
  109. begin
  110. size = File.size(path)
  111. File.delete(path) unless options[:dry_run]
  112. reclaimed_bytes += size
  113. removed += 1
  114. progress.log("Found and removed orphan: #{key}")
  115. rescue => e
  116. progress.log(pastel.red("Error processing #{key}: #{e}"))
  117. end
  118. end
  119. end
  120. progress.total = progress.progress
  121. progress.finish
  122. say("Removed #{removed} orphans (approx. #{number_to_human_size(reclaimed_bytes)})#{dry_run}", :green, true)
  123. end
  124. option :account, type: :string
  125. option :domain, type: :string
  126. option :status, type: :numeric
  127. option :concurrency, type: :numeric, default: 5, aliases: [:c]
  128. option :verbose, type: :boolean, default: false, aliases: [:v]
  129. option :dry_run, type: :boolean, default: false
  130. option :force, type: :boolean, default: false
  131. desc 'refresh', 'Fetch remote media files'
  132. long_desc <<-DESC
  133. Re-downloads media attachments from other servers. You must specify the
  134. source of media attachments with one of the following options:
  135. Use the --status option to download attachments from a specific status,
  136. using the status local numeric ID.
  137. Use the --account option to download attachments from a specific account,
  138. using username@domain handle of the account.
  139. Use the --domain option to download attachments from a specific domain.
  140. By default, attachments that are believed to be already downloaded will
  141. not be re-downloaded. To force re-download of every URL, use --force.
  142. DESC
  143. def refresh
  144. dry_run = options[:dry_run] ? ' (DRY RUN)' : ''
  145. if options[:status]
  146. scope = MediaAttachment.where(status_id: options[:status])
  147. elsif options[:account]
  148. username, domain = username.split('@')
  149. account = Account.find_remote(username, domain)
  150. if account.nil?
  151. say('No such account', :red)
  152. exit(1)
  153. end
  154. scope = MediaAttachment.where(account_id: account.id)
  155. elsif options[:domain]
  156. scope = MediaAttachment.joins(:account).merge(Account.by_domain_and_subdomains(options[:domain]))
  157. else
  158. exit(1)
  159. end
  160. processed, aggregate = parallelize_with_progress(scope) do |media_attachment|
  161. next if media_attachment.remote_url.blank? || (!options[:force] && media_attachment.file_file_name.present?)
  162. unless options[:dry_run]
  163. media_attachment.reset_file!
  164. media_attachment.save
  165. end
  166. media_attachment.file_file_size
  167. end
  168. say("Downloaded #{processed} media attachments (approx. #{number_to_human_size(aggregate)})#{dry_run}", :green, true)
  169. end
  170. desc 'usage', 'Calculate disk space consumed by Mastodon'
  171. def usage
  172. say("Attachments:\t#{number_to_human_size(MediaAttachment.sum(:file_file_size))} (#{number_to_human_size(MediaAttachment.where(account: Account.local).sum(:file_file_size))} local)")
  173. say("Custom emoji:\t#{number_to_human_size(CustomEmoji.sum(:image_file_size))} (#{number_to_human_size(CustomEmoji.local.sum(:image_file_size))} local)")
  174. say("Preview cards:\t#{number_to_human_size(PreviewCard.sum(:image_file_size))}")
  175. say("Avatars:\t#{number_to_human_size(Account.sum(:avatar_file_size))} (#{number_to_human_size(Account.local.sum(:avatar_file_size))} local)")
  176. say("Headers:\t#{number_to_human_size(Account.sum(:header_file_size))} (#{number_to_human_size(Account.local.sum(:header_file_size))} local)")
  177. say("Backups:\t#{number_to_human_size(Backup.sum(:dump_file_size))}")
  178. say("Imports:\t#{number_to_human_size(Import.sum(:data_file_size))}")
  179. say("Settings:\t#{number_to_human_size(SiteUpload.sum(:file_file_size))}")
  180. end
  181. desc 'lookup URL', 'Lookup where media is displayed by passing a media URL'
  182. def lookup(url)
  183. path = Addressable::URI.parse(url).path
  184. path_segments = path.split('/')[2..-1]
  185. model_name = path_segments.first.classify
  186. record_id = path_segments[2..-2].join.to_i
  187. unless PRELOAD_MODEL_WHITELIST.include?(model_name)
  188. say("Cannot find corresponding model: #{model_name}", :red)
  189. exit(1)
  190. end
  191. record = model_name.constantize.find_by(id: record_id)
  192. record = record.status if record.respond_to?(:status)
  193. unless record
  194. say('Cannot find corresponding record', :red)
  195. exit(1)
  196. end
  197. display_url = ActivityPub::TagManager.instance.url_for(record)
  198. if display_url.blank?
  199. say('No public URL for this type of record', :red)
  200. exit(1)
  201. end
  202. say(display_url, :blue)
  203. rescue Addressable::URI::InvalidURIError
  204. say('Invalid URL', :red)
  205. exit(1)
  206. end
  207. private
  208. PRELOAD_MODEL_WHITELIST = %w(
  209. Account
  210. Backup
  211. CustomEmoji
  212. Import
  213. MediaAttachment
  214. PreviewCard
  215. SiteUpload
  216. ).freeze
  217. def preload_records_from_mixed_objects(objects)
  218. preload_map = Hash.new { |hash, key| hash[key] = [] }
  219. objects.map do |object|
  220. segments = object.key.split('/').first
  221. model_name = segments.first.classify
  222. record_id = segments[2..-2].join.to_i
  223. next unless PRELOAD_MODEL_WHITELIST.include?(model_name)
  224. preload_map[model_name] << record_id
  225. end
  226. preload_map.each_with_object({}) do |(model_name, record_ids), model_map|
  227. model_map[model_name] = model_name.constantize.where(id: record_ids).each_with_object({}) { |record, record_map| record_map[record.id] = record }
  228. end
  229. end
  230. end
  231. end