media_cli.rb 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343
  1. # frozen_string_literal: true
  2. require_relative '../../config/boot'
  3. require_relative '../../config/environment'
  4. require_relative 'cli_helper'
  5. module Mastodon
  6. class MediaCLI < Thor
  7. include ActionView::Helpers::NumberHelper
  8. include CLIHelper
  9. def self.exit_on_failure?
  10. true
  11. end
  12. option :days, type: :numeric, default: 7, aliases: [:d]
  13. option :concurrency, type: :numeric, default: 5, aliases: [:c]
  14. option :verbose, type: :boolean, default: false, aliases: [:v]
  15. option :dry_run, type: :boolean, default: false
  16. desc 'remove', 'Remove remote media files'
  17. long_desc <<-DESC
  18. Removes locally cached copies of media attachments from other servers.
  19. The --days option specifies how old media attachments have to be before
  20. they are removed. It defaults to 7 days.
  21. DESC
  22. def remove
  23. time_ago = options[:days].days.ago
  24. dry_run = options[:dry_run] ? '(DRY RUN)' : ''
  25. processed, aggregate = parallelize_with_progress(MediaAttachment.cached.where.not(remote_url: '').where('created_at < ?', time_ago)) do |media_attachment|
  26. next if media_attachment.file.blank?
  27. size = (media_attachment.file_file_size || 0) + (media_attachment.thumbnail_file_size || 0)
  28. unless options[:dry_run]
  29. media_attachment.file.destroy
  30. media_attachment.thumbnail.destroy
  31. media_attachment.save
  32. end
  33. size
  34. end
  35. say("Removed #{processed} media attachments (approx. #{number_to_human_size(aggregate)}) #{dry_run}", :green, true)
  36. end
  37. option :start_after
  38. option :prefix
  39. option :fix_permissions, type: :boolean, default: false
  40. option :dry_run, type: :boolean, default: false
  41. desc 'remove-orphans', 'Scan storage and check for files that do not belong to existing media attachments'
  42. long_desc <<~LONG_DESC
  43. Scans file storage for files that do not belong to existing media attachments. Because this operation
  44. requires iterating over every single file individually, it will be slow.
  45. Please mind that some storage providers charge for the necessary API requests to list objects.
  46. LONG_DESC
  47. def remove_orphans
  48. progress = create_progress_bar(nil)
  49. reclaimed_bytes = 0
  50. removed = 0
  51. dry_run = options[:dry_run] ? ' (DRY RUN)' : ''
  52. prefix = options[:prefix]
  53. case Paperclip::Attachment.default_options[:storage]
  54. when :s3
  55. paperclip_instance = MediaAttachment.new.file
  56. s3_interface = paperclip_instance.s3_interface
  57. s3_permissions = Paperclip::Attachment.default_options[:s3_permissions]
  58. bucket = s3_interface.bucket(Paperclip::Attachment.default_options[:s3_credentials][:bucket])
  59. last_key = options[:start_after]
  60. loop do
  61. objects = begin
  62. begin
  63. bucket.objects(start_after: last_key, prefix: prefix).limit(1000).map { |x| x }
  64. rescue => e
  65. progress.log(pastel.red("Error fetching list of files: #{e}"))
  66. progress.log("If you want to continue from this point, add --start-after=#{last_key} to your command") if last_key
  67. break
  68. end
  69. end
  70. break if objects.empty?
  71. last_key = objects.last.key
  72. record_map = preload_records_from_mixed_objects(objects)
  73. objects.each do |object|
  74. object.acl.put(acl: s3_permissions) if options[:fix_permissions] && !options[:dry_run]
  75. path_segments = object.key.split('/')
  76. path_segments.delete('cache')
  77. unless [7, 10].include?(path_segments.size)
  78. progress.log(pastel.yellow("Unrecognized file found: #{object.key}"))
  79. next
  80. end
  81. model_name = path_segments.first.classify
  82. attachment_name = path_segments[1].singularize
  83. record_id = path_segments[2..-2].join.to_i
  84. file_name = path_segments.last
  85. record = record_map.dig(model_name, record_id)
  86. attachment = record&.public_send(attachment_name)
  87. progress.increment
  88. next unless attachment.blank? || !attachment.variant?(file_name)
  89. begin
  90. object.delete unless options[:dry_run]
  91. reclaimed_bytes += object.size
  92. removed += 1
  93. progress.log("Found and removed orphan: #{object.key}")
  94. rescue => e
  95. progress.log(pastel.red("Error processing #{object.key}: #{e}"))
  96. end
  97. end
  98. end
  99. when :fog
  100. say('The fog storage driver is not supported for this operation at this time', :red)
  101. exit(1)
  102. when :filesystem
  103. require 'find'
  104. root_path = ENV.fetch('PAPERCLIP_ROOT_PATH', File.join(':rails_root', 'public', 'system')).gsub(':rails_root', Rails.root.to_s)
  105. Find.find(File.join(*[root_path, prefix].compact)) do |path|
  106. next if File.directory?(path)
  107. key = path.gsub("#{root_path}#{File::SEPARATOR}", '')
  108. path_segments = key.split(File::SEPARATOR)
  109. path_segments.delete('cache')
  110. unless [7, 10].include?(path_segments.size)
  111. progress.log(pastel.yellow("Unrecognized file found: #{key}"))
  112. next
  113. end
  114. model_name = path_segments.first.classify
  115. record_id = path_segments[2..-2].join.to_i
  116. attachment_name = path_segments[1].singularize
  117. file_name = path_segments.last
  118. next unless PRELOAD_MODEL_WHITELIST.include?(model_name)
  119. record = model_name.constantize.find_by(id: record_id)
  120. attachment = record&.public_send(attachment_name)
  121. progress.increment
  122. next unless attachment.blank? || !attachment.variant?(file_name)
  123. begin
  124. size = File.size(path)
  125. unless options[:dry_run]
  126. File.delete(path)
  127. begin
  128. FileUtils.rmdir(File.dirname(path), parents: true)
  129. rescue Errno::ENOTEMPTY
  130. # OK
  131. end
  132. end
  133. reclaimed_bytes += size
  134. removed += 1
  135. progress.log("Found and removed orphan: #{key}")
  136. rescue => e
  137. progress.log(pastel.red("Error processing #{key}: #{e}"))
  138. end
  139. end
  140. end
  141. progress.total = progress.progress
  142. progress.finish
  143. say("Removed #{removed} orphans (approx. #{number_to_human_size(reclaimed_bytes)})#{dry_run}", :green, true)
  144. end
  145. option :account, type: :string
  146. option :domain, type: :string
  147. option :status, type: :numeric
  148. option :days, type: :numeric
  149. option :concurrency, type: :numeric, default: 5, aliases: [:c]
  150. option :verbose, type: :boolean, default: false, aliases: [:v]
  151. option :dry_run, type: :boolean, default: false
  152. option :force, type: :boolean, default: false
  153. desc 'refresh', 'Fetch remote media files'
  154. long_desc <<-DESC
  155. Re-downloads media attachments from other servers. You must specify the
  156. source of media attachments with one of the following options:
  157. Use the --status option to download attachments from a specific status,
  158. using the status local numeric ID.
  159. Use the --account option to download attachments from a specific account,
  160. using username@domain handle of the account.
  161. Use the --domain option to download attachments from a specific domain.
  162. Use the --days option to limit attachments created within days.
  163. By default, attachments that are believed to be already downloaded will
  164. not be re-downloaded. To force re-download of every URL, use --force.
  165. DESC
  166. def refresh
  167. dry_run = options[:dry_run] ? ' (DRY RUN)' : ''
  168. if options[:status]
  169. scope = MediaAttachment.where(status_id: options[:status])
  170. elsif options[:account]
  171. username, domain = options[:account].split('@')
  172. account = Account.find_remote(username, domain)
  173. if account.nil?
  174. say('No such account', :red)
  175. exit(1)
  176. end
  177. scope = MediaAttachment.where(account_id: account.id)
  178. elsif options[:domain]
  179. scope = MediaAttachment.joins(:account).merge(Account.by_domain_and_subdomains(options[:domain]))
  180. elsif options[:days].present?
  181. scope = MediaAttachment.remote
  182. else
  183. exit(1)
  184. end
  185. if options[:days].present?
  186. scope = scope.where('media_attachments.id > ?', Mastodon::Snowflake.id_at(options[:days].days.ago, with_random: false))
  187. end
  188. processed, aggregate = parallelize_with_progress(scope) do |media_attachment|
  189. next if media_attachment.remote_url.blank? || (!options[:force] && media_attachment.file_file_name.present?)
  190. next if DomainBlock.reject_media?(media_attachment.account.domain)
  191. unless options[:dry_run]
  192. media_attachment.reset_file!
  193. media_attachment.reset_thumbnail!
  194. media_attachment.save
  195. end
  196. media_attachment.file_file_size + (media_attachment.thumbnail_file_size || 0)
  197. end
  198. say("Downloaded #{processed} media attachments (approx. #{number_to_human_size(aggregate)})#{dry_run}", :green, true)
  199. end
  200. desc 'usage', 'Calculate disk space consumed by Mastodon'
  201. def usage
  202. say("Attachments:\t#{number_to_human_size(MediaAttachment.sum(Arel.sql('COALESCE(file_file_size, 0) + COALESCE(thumbnail_file_size, 0)')))} (#{number_to_human_size(MediaAttachment.where(account: Account.local).sum(Arel.sql('COALESCE(file_file_size, 0) + COALESCE(thumbnail_file_size, 0)')))} local)")
  203. say("Custom emoji:\t#{number_to_human_size(CustomEmoji.sum(:image_file_size))} (#{number_to_human_size(CustomEmoji.local.sum(:image_file_size))} local)")
  204. say("Preview cards:\t#{number_to_human_size(PreviewCard.sum(:image_file_size))}")
  205. say("Avatars:\t#{number_to_human_size(Account.sum(:avatar_file_size))} (#{number_to_human_size(Account.local.sum(:avatar_file_size))} local)")
  206. say("Headers:\t#{number_to_human_size(Account.sum(:header_file_size))} (#{number_to_human_size(Account.local.sum(:header_file_size))} local)")
  207. say("Backups:\t#{number_to_human_size(Backup.sum(:dump_file_size))}")
  208. say("Imports:\t#{number_to_human_size(Import.sum(:data_file_size))}")
  209. say("Settings:\t#{number_to_human_size(SiteUpload.sum(:file_file_size))}")
  210. end
  211. desc 'lookup URL', 'Lookup where media is displayed by passing a media URL'
  212. def lookup(url)
  213. path = Addressable::URI.parse(url).path
  214. path_segments = path.split('/')[2..-1]
  215. path_segments.delete('cache')
  216. unless [7, 10].include?(path_segments.size)
  217. say('Not a media URL', :red)
  218. exit(1)
  219. end
  220. model_name = path_segments.first.classify
  221. record_id = path_segments[2..-2].join.to_i
  222. unless PRELOAD_MODEL_WHITELIST.include?(model_name)
  223. say("Cannot find corresponding model: #{model_name}", :red)
  224. exit(1)
  225. end
  226. record = model_name.constantize.find_by(id: record_id)
  227. record = record.status if record.respond_to?(:status)
  228. unless record
  229. say('Cannot find corresponding record', :red)
  230. exit(1)
  231. end
  232. display_url = ActivityPub::TagManager.instance.url_for(record)
  233. if display_url.blank?
  234. say('No public URL for this type of record', :red)
  235. exit(1)
  236. end
  237. say(display_url, :blue)
  238. rescue Addressable::URI::InvalidURIError
  239. say('Invalid URL', :red)
  240. exit(1)
  241. end
  242. private
  243. PRELOAD_MODEL_WHITELIST = %w(
  244. Account
  245. Backup
  246. CustomEmoji
  247. Import
  248. MediaAttachment
  249. PreviewCard
  250. SiteUpload
  251. ).freeze
  252. def preload_records_from_mixed_objects(objects)
  253. preload_map = Hash.new { |hash, key| hash[key] = [] }
  254. objects.map do |object|
  255. segments = object.key.split('/')
  256. segments.delete('cache')
  257. next unless [7, 10].include?(segments.size)
  258. model_name = segments.first.classify
  259. record_id = segments[2..-2].join.to_i
  260. next unless PRELOAD_MODEL_WHITELIST.include?(model_name)
  261. preload_map[model_name] << record_id
  262. end
  263. preload_map.each_with_object({}) do |(model_name, record_ids), model_map|
  264. model_map[model_name] = model_name.constantize.where(id: record_ids).index_by(&:id)
  265. end
  266. end
  267. end
  268. end