Browse Source

Refactor fetching of remote resources (#11251)

Eugen Rochko 4 years ago
parent
commit
5d3feed191

+ 26 - 21
app/helpers/jsonld_helper.rb

@@ -16,13 +16,15 @@ module JsonLdHelper
   # The url attribute can be a string, an array of strings, or an array of objects.
   # The objects could include a mimeType. Not-included mimeType means it's text/html.
   def url_to_href(value, preferred_type = nil)
-    single_value = if value.is_a?(Array) && !value.first.is_a?(String)
-                     value.find { |link| preferred_type.nil? || ((link['mimeType'].presence || 'text/html') == preferred_type) }
-                   elsif value.is_a?(Array)
-                     value.first
-                   else
-                     value
-                   end
+    single_value = begin
+      if value.is_a?(Array) && !value.first.is_a?(String)
+        value.find { |link| preferred_type.nil? || ((link['mimeType'].presence || 'text/html') == preferred_type) }
+      elsif value.is_a?(Array)
+        value.first
+      else
+        value
+      end
+    end
 
     if single_value.nil? || single_value.is_a?(String)
       single_value
@@ -64,7 +66,9 @@ module JsonLdHelper
   def fetch_resource(uri, id, on_behalf_of = nil)
     unless id
       json = fetch_resource_without_id_validation(uri, on_behalf_of)
+
       return unless json
+
       uri = json['id']
     end
 
@@ -74,24 +78,26 @@ module JsonLdHelper
 
   def fetch_resource_without_id_validation(uri, on_behalf_of = nil, raise_on_temporary_error = false)
     build_request(uri, on_behalf_of).perform do |response|
-      unless response_successful?(response) || response_error_unsalvageable?(response) || !raise_on_temporary_error
-        raise Mastodon::UnexpectedResponseError, response
-      end
+      raise Mastodon::UnexpectedResponseError, response unless response_successful?(response) || response_error_unsalvageable?(response) || !raise_on_temporary_error
+
       return body_to_json(response.body_with_limit) if response.code == 200
     end
+
     # If request failed, retry without doing it on behalf of a user
     return if on_behalf_of.nil?
+
     build_request(uri).perform do |response|
-      unless response_successful?(response) || response_error_unsalvageable?(response) || !raise_on_temporary_error
-        raise Mastodon::UnexpectedResponseError, response
-      end
+      raise Mastodon::UnexpectedResponseError, response unless response_successful?(response) || response_error_unsalvageable?(response) || !raise_on_temporary_error
+
       response.code == 200 ? body_to_json(response.body_with_limit) : nil
     end
   end
 
   def body_to_json(body, compare_id: nil)
     json = body.is_a?(String) ? Oj.load(body, mode: :strict) : body
+
     return if compare_id.present? && json['id'] != compare_id
+
     json
   rescue Oj::ParseError
     nil
@@ -105,35 +111,34 @@ module JsonLdHelper
     end
   end
 
-  private
-
   def response_successful?(response)
     (200...300).cover?(response.code)
   end
 
   def response_error_unsalvageable?(response)
-    (400...500).cover?(response.code) && response.code != 429
+    response.code == 501 || ((400...500).cover?(response.code) && ![401, 408, 429].include?(response.code))
   end
 
   def build_request(uri, on_behalf_of = nil)
-    request = Request.new(:get, uri)
-    request.on_behalf_of(on_behalf_of) if on_behalf_of
-    request.add_headers('Accept' => 'application/activity+json, application/ld+json')
-    request
+    Request.new(:get, uri).tap do |request|
+      request.on_behalf_of(on_behalf_of) if on_behalf_of
+      request.add_headers('Accept' => 'application/activity+json, application/ld+json')
+    end
   end
 
   def load_jsonld_context(url, _options = {}, &_block)
     json = Rails.cache.fetch("jsonld:context:#{url}", expires_in: 30.days, raw: true) do
       request = Request.new(:get, url)
       request.add_headers('Accept' => 'application/ld+json')
-
       request.perform do |res|
         raise JSON::LD::JsonLdError::LoadingDocumentFailed unless res.code == 200 && res.mime_type == 'application/ld+json'
+
         res.body_with_limit
       end
     end
 
     doc = JSON::LD::API::RemoteDocument.new(url, json)
+
     block_given? ? yield(doc) : doc
   end
 end

+ 1 - 1
app/lib/request.rb

@@ -41,7 +41,7 @@ class Request
   end
 
   def on_behalf_of(account, key_id_format = :acct, sign_with: nil)
-    raise ArgumentError unless account.local?
+    raise ArgumentError, 'account must be local' unless account&.local?
 
     @account       = account
     @keypair       = sign_with.present? ? OpenSSL::PKey::RSA.new(sign_with) : @account.keypair

+ 10 - 10
app/services/activitypub/fetch_remote_status_service.rb

@@ -5,18 +5,18 @@ class ActivityPub::FetchRemoteStatusService < BaseService
 
   # Should be called when uri has already been checked for locality
   def call(uri, id: true, prefetched_body: nil, on_behalf_of: nil)
-    @json = if prefetched_body.nil?
-              fetch_resource(uri, id, on_behalf_of)
-            else
-              body_to_json(prefetched_body, compare_id: id ? uri : nil)
-            end
+    @json = begin
+      if prefetched_body.nil?
+        fetch_resource(uri, id, on_behalf_of)
+      else
+        body_to_json(prefetched_body, compare_id: id ? uri : nil)
+      end
+    end
 
-    return unless supported_context? && expected_type?
-
-    return if actor_id.nil? || !trustworthy_attribution?(@json['id'], actor_id)
+    return if !(supported_context? && expected_type?) || actor_id.nil? || !trustworthy_attribution?(@json['id'], actor_id)
 
     actor = ActivityPub::TagManager.instance.uri_to_resource(actor_id, Account)
-    actor = ActivityPub::FetchRemoteAccountService.new.call(actor_id, id: true) if actor.nil? || needs_update(actor)
+    actor = ActivityPub::FetchRemoteAccountService.new.call(actor_id, id: true) if actor.nil? || needs_update?(actor)
 
     return if actor.nil? || actor.suspended?
 
@@ -46,7 +46,7 @@ class ActivityPub::FetchRemoteStatusService < BaseService
     equals_or_includes_any?(@json['type'], ActivityPub::Activity::Create::SUPPORTED_TYPES + ActivityPub::Activity::Create::CONVERTED_TYPES)
   end
 
-  def needs_update(actor)
+  def needs_update?(actor)
     actor.possibly_stale?
   end
 end

+ 0 - 93
app/services/fetch_atom_service.rb

@@ -1,93 +0,0 @@
-# frozen_string_literal: true
-
-class FetchAtomService < BaseService
-  include JsonLdHelper
-
-  def call(url)
-    return if url.blank?
-
-    result = process(url)
-
-    # retry without ActivityPub
-    result ||= process(url) if @unsupported_activity
-
-    result
-  rescue OpenSSL::SSL::SSLError => e
-    Rails.logger.debug "SSL error: #{e}"
-    nil
-  rescue HTTP::ConnectionError => e
-    Rails.logger.debug "HTTP ConnectionError: #{e}"
-    nil
-  end
-
-  private
-
-  def process(url, terminal = false)
-    @url = url
-    perform_request { |response| process_response(response, terminal) }
-  end
-
-  def perform_request(&block)
-    accept = 'text/html'
-    accept = 'application/activity+json, application/ld+json; profile="https://www.w3.org/ns/activitystreams", application/atom+xml, ' + accept unless @unsupported_activity
-
-    Request.new(:get, @url).add_headers('Accept' => accept).perform(&block)
-  end
-
-  def process_response(response, terminal = false)
-    return nil if response.code != 200
-
-    if response.mime_type == 'application/atom+xml'
-      [@url, { prefetched_body: response.body_with_limit }, :ostatus]
-    elsif ['application/activity+json', 'application/ld+json'].include?(response.mime_type)
-      body = response.body_with_limit
-      json = body_to_json(body)
-      if supported_context?(json) && equals_or_includes_any?(json['type'], ActivityPub::FetchRemoteAccountService::SUPPORTED_TYPES) && json['inbox'].present?
-        [json['id'], { prefetched_body: body, id: true }, :activitypub]
-      elsif supported_context?(json) && expected_type?(json)
-        [json['id'], { prefetched_body: body, id: true }, :activitypub]
-      else
-        @unsupported_activity = true
-        nil
-      end
-    elsif !terminal
-      link_header = response['Link'] && parse_link_header(response)
-
-      if link_header&.find_link(%w(rel alternate))
-        process_link_headers(link_header)
-      elsif response.mime_type == 'text/html'
-        process_html(response)
-      end
-    end
-  end
-
-  def expected_type?(json)
-    equals_or_includes_any?(json['type'], ActivityPub::Activity::Create::SUPPORTED_TYPES + ActivityPub::Activity::Create::CONVERTED_TYPES)
-  end
-
-  def process_html(response)
-    page = Nokogiri::HTML(response.body_with_limit)
-
-    json_link = page.xpath('//link[@rel="alternate"]').find { |link| ['application/activity+json', 'application/ld+json; profile="https://www.w3.org/ns/activitystreams"'].include?(link['type']) }
-    atom_link = page.xpath('//link[@rel="alternate"]').find { |link| link['type'] == 'application/atom+xml' }
-
-    result ||= process(json_link['href'], terminal: true) unless json_link.nil? || @unsupported_activity
-    result ||= process(atom_link['href'], terminal: true) unless atom_link.nil?
-
-    result
-  end
-
-  def process_link_headers(link_header)
-    json_link = link_header.find_link(%w(rel alternate), %w(type application/activity+json)) || link_header.find_link(%w(rel alternate), ['type', 'application/ld+json; profile="https://www.w3.org/ns/activitystreams"'])
-    atom_link = link_header.find_link(%w(rel alternate), %w(type application/atom+xml))
-
-    result ||= process(json_link.href, terminal: true) unless json_link.nil? || @unsupported_activity
-    result ||= process(atom_link.href, terminal: true) unless atom_link.nil?
-
-    result
-  end
-
-  def parse_link_header(response)
-    LinkHeader.parse(response['Link'].is_a?(Array) ? response['Link'].first : response['Link'])
-  end
-end

+ 1 - 1
app/services/fetch_link_card_service.rb

@@ -29,7 +29,7 @@ class FetchLinkCardService < BaseService
     end
 
     attach_card if @card&.persisted?
-  rescue HTTP::Error, Addressable::URI::InvalidURIError, Mastodon::HostValidationError, Mastodon::LengthValidationError => e
+  rescue HTTP::Error, OpenSSL::SSL::SSLError, Addressable::URI::InvalidURIError, Mastodon::HostValidationError, Mastodon::LengthValidationError => e
     Rails.logger.debug "Error fetching link #{@url}: #{e}"
     nil
   end

+ 1 - 1
app/services/fetch_remote_account_service.rb

@@ -3,7 +3,7 @@
 class FetchRemoteAccountService < BaseService
   def call(url, prefetched_body = nil, protocol = :ostatus)
     if prefetched_body.nil?
-      resource_url, resource_options, protocol = FetchAtomService.new.call(url)
+      resource_url, resource_options, protocol = FetchResourceService.new.call(url)
     else
       resource_url     = url
       resource_options = { prefetched_body: prefetched_body }

+ 1 - 1
app/services/fetch_remote_status_service.rb

@@ -3,7 +3,7 @@
 class FetchRemoteStatusService < BaseService
   def call(url, prefetched_body = nil, protocol = :ostatus)
     if prefetched_body.nil?
-      resource_url, resource_options, protocol = FetchAtomService.new.call(url)
+      resource_url, resource_options, protocol = FetchResourceService.new.call(url)
     else
       resource_url     = url
       resource_options = { prefetched_body: prefetched_body }

+ 68 - 0
app/services/fetch_resource_service.rb

@@ -0,0 +1,68 @@
+# frozen_string_literal: true
+
+class FetchResourceService < BaseService
+  include JsonLdHelper
+
+  ACCEPT_HEADER = 'application/activity+json, application/ld+json; profile="https://www.w3.org/ns/activitystreams", text/html'
+
+  def call(url)
+    return if url.blank?
+
+    process(url)
+  rescue HTTP::Error, OpenSSL::SSL::SSLError, Addressable::URI::InvalidURIError, Mastodon::HostValidationError, Mastodon::LengthValidationError => e
+    Rails.logger.debug "Error fetching resource #{@url}: #{e}"
+    nil
+  end
+
+  private
+
+  def process(url, terminal = false)
+    @url = url
+
+    perform_request { |response| process_response(response, terminal) }
+  end
+
+  def perform_request(&block)
+    Request.new(:get, @url).add_headers('Accept' => ACCEPT_HEADER).perform(&block)
+  end
+
+  def process_response(response, terminal = false)
+    return nil if response.code != 200
+
+    if ['application/activity+json', 'application/ld+json'].include?(response.mime_type)
+      body = response.body_with_limit
+      json = body_to_json(body)
+
+      [json['id'], { prefetched_body: body, id: true }, :activitypub] if supported_context?(json) && (equals_or_includes_any?(json['type'], ActivityPub::FetchRemoteAccountService::SUPPORTED_TYPES) || expected_type?(json))
+    elsif !terminal
+      link_header = response['Link'] && parse_link_header(response)
+
+      if link_header&.find_link(%w(rel alternate))
+        process_link_headers(link_header)
+      elsif response.mime_type == 'text/html'
+        process_html(response)
+      end
+    end
+  end
+
+  def expected_type?(json)
+    equals_or_includes_any?(json['type'], ActivityPub::Activity::Create::SUPPORTED_TYPES + ActivityPub::Activity::Create::CONVERTED_TYPES)
+  end
+
+  def process_html(response)
+    page      = Nokogiri::HTML(response.body_with_limit)
+    json_link = page.xpath('//link[@rel="alternate"]').find { |link| ['application/activity+json', 'application/ld+json; profile="https://www.w3.org/ns/activitystreams"'].include?(link['type']) }
+
+    process(json_link['href'], terminal: true) unless json_link.nil?
+  end
+
+  def process_link_headers(link_header)
+    json_link = link_header.find_link(%w(rel alternate), %w(type application/activity+json)) || link_header.find_link(%w(rel alternate), ['type', 'application/ld+json; profile="https://www.w3.org/ns/activitystreams"'])
+
+    process(json_link.href, terminal: true) unless json_link.nil?
+  end
+
+  def parse_link_header(response)
+    LinkHeader.parse(response['Link'].is_a?(Array) ? response['Link'].first : response['Link'])
+  end
+end

+ 16 - 31
app/services/resolve_url_service.rb

@@ -4,64 +4,49 @@ class ResolveURLService < BaseService
   include JsonLdHelper
   include Authorization
 
-  attr_reader :url
-
   def call(url, on_behalf_of: nil)
-    @url = url
+    @url          = url
     @on_behalf_of = on_behalf_of
 
-    return process_local_url if local_url?
-
-    process_url unless fetched_atom_feed.nil?
+    if local_url?
+      process_local_url
+    elsif !fetched_resource.nil?
+      process_url
+    end
   end
 
   private
 
   def process_url
     if equals_or_includes_any?(type, ActivityPub::FetchRemoteAccountService::SUPPORTED_TYPES)
-      FetchRemoteAccountService.new.call(atom_url, body, protocol)
+      FetchRemoteAccountService.new.call(resource_url, body, protocol)
     elsif equals_or_includes_any?(type, ActivityPub::Activity::Create::SUPPORTED_TYPES + ActivityPub::Activity::Create::CONVERTED_TYPES)
-      FetchRemoteStatusService.new.call(atom_url, body, protocol)
+      FetchRemoteStatusService.new.call(resource_url, body, protocol)
     end
   end
 
-  def fetched_atom_feed
-    @_fetched_atom_feed ||= FetchAtomService.new.call(url)
+  def fetched_resource
+    @fetched_resource ||= FetchResourceService.new.call(@url)
   end
 
-  def atom_url
-    fetched_atom_feed.first
+  def resource_url
+    fetched_resource.first
   end
 
   def body
-    fetched_atom_feed.second[:prefetched_body]
+    fetched_resource.second[:prefetched_body]
   end
 
   def protocol
-    fetched_atom_feed.third
+    fetched_resource.third
   end
 
   def type
     return json_data['type'] if protocol == :activitypub
-
-    case xml_root
-    when 'feed'
-      'Person'
-    when 'entry'
-      'Note'
-    end
   end
 
   def json_data
-    @_json_data ||= body_to_json(body)
-  end
-
-  def xml_root
-    xml_data.root.name
-  end
-
-  def xml_data
-    @_xml_data ||= Nokogiri::XML(body, nil, 'utf-8')
+    @json_data ||= body_to_json(body)
   end
 
   def local_url?
@@ -83,10 +68,10 @@ class ResolveURLService < BaseService
 
   def check_local_status(status)
     return if status.nil?
+
     authorize_with @on_behalf_of, status, :show?
     status
   rescue Mastodon::NotPermittedError
-    # Do not disclose the existence of status the user is not authorized to see
     nil
   end
 end

+ 5 - 11
app/workers/activitypub/delivery_worker.rb

@@ -2,6 +2,7 @@
 
 class ActivityPub::DeliveryWorker
   include Sidekiq::Worker
+  include JsonLdHelper
 
   STOPLIGHT_FAILURE_THRESHOLD = 10
   STOPLIGHT_COOLDOWN = 60
@@ -32,9 +33,10 @@ class ActivityPub::DeliveryWorker
   private
 
   def build_request(http_client)
-    request = Request.new(:post, @inbox_url, body: @json, http_client: http_client)
-    request.on_behalf_of(@source_account, :uri, sign_with: @options[:sign_with])
-    request.add_headers(HEADERS)
+    Request.new(:post, @inbox_url, body: @json, http_client: http_client).tap do |request|
+      request.on_behalf_of(@source_account, :uri, sign_with: @options[:sign_with])
+      request.add_headers(HEADERS)
+    end
   end
 
   def perform_request
@@ -53,14 +55,6 @@ class ActivityPub::DeliveryWorker
          .run
   end
 
-  def response_successful?(response)
-    (200...300).cover?(response.code)
-  end
-
-  def response_error_unsalvageable?(response)
-    response.code == 501 || ((400...500).cover?(response.code) && ![401, 408, 429].include?(response.code))
-  end
-
   def failure_tracker
     @failure_tracker ||= DeliveryFailureTracker.new(@inbox_url)
   end

+ 1 - 0
spec/services/fetch_remote_account_service_spec.rb

@@ -4,6 +4,7 @@ RSpec.describe FetchRemoteAccountService, type: :service do
   let(:url) { 'https://example.com/alice' }
   let(:prefetched_body) { nil }
   let(:protocol) { :ostatus }
+
   subject { FetchRemoteAccountService.new.call(url, prefetched_body, protocol) }
 
   let(:actor) do

+ 7 - 7
spec/services/fetch_atom_service_spec.rb → spec/services/fetch_resource_service_spec.rb

@@ -1,9 +1,11 @@
 require 'rails_helper'
 
-RSpec.describe FetchAtomService, type: :service do
+RSpec.describe FetchResourceService, type: :service do
+  let!(:representative) { Fabricate(:account) }
+
   describe '#call' do
     let(:url) { 'http://example.com' }
-    subject { FetchAtomService.new.call(url) }
+    subject { described_class.new.call(url) }
 
     context 'url is blank' do
       let(:url) { '' }
@@ -23,8 +25,7 @@ RSpec.describe FetchAtomService, type: :service do
         allow(Request).to receive_message_chain(:new, :add_headers, :perform).and_raise(OpenSSL::SSL::SSLError)
       end
 
-      it 'output log and return nil' do
-        expect_any_instance_of(ActiveSupport::Logger).to receive(:debug).with('SSL error: OpenSSL::SSL::SSLError')
+      it 'return nil' do
         is_expected.to be_nil
       end
     end
@@ -34,8 +35,7 @@ RSpec.describe FetchAtomService, type: :service do
         allow(Request).to receive_message_chain(:new, :add_headers, :perform).and_raise(HTTP::ConnectionError)
       end
 
-      it 'output log and return nil' do
-        expect_any_instance_of(ActiveSupport::Logger).to receive(:debug).with('HTTP ConnectionError: HTTP::ConnectionError')
+      it 'return nil' do
         is_expected.to be_nil
       end
     end
@@ -57,7 +57,7 @@ RSpec.describe FetchAtomService, type: :service do
       context 'content type is application/atom+xml' do
         let(:content_type) { 'application/atom+xml' }
 
-        it { is_expected.to eq [url, { :prefetched_body => "" }, :ostatus] }
+        it { is_expected.to eq nil }
       end
 
       context 'content_type is activity+json' do

+ 5 - 39
spec/services/resolve_url_service_spec.rb

@@ -6,48 +6,14 @@ describe ResolveURLService, type: :service do
   subject { described_class.new }
 
   describe '#call' do
-    it 'returns nil when there is no atom url' do
-      url = 'http://example.com/missing-atom'
+    it 'returns nil when there is no resource url' do
+      url     = 'http://example.com/missing-resource'
       service = double
-      allow(FetchAtomService).to receive(:new).and_return service
-      allow(service).to receive(:call).with(url).and_return(nil)
-
-      result = subject.call(url)
-      expect(result).to be_nil
-    end
-
-    it 'fetches remote accounts for feed types' do
-      url = 'http://example.com/atom-feed'
-      service = double
-      allow(FetchAtomService).to receive(:new).and_return service
-      feed_url = 'http://feed-url'
-      feed_content = '<feed>contents</feed>'
-      allow(service).to receive(:call).with(url).and_return([feed_url, { prefetched_body: feed_content }])
-
-      account_service = double
-      allow(FetchRemoteAccountService).to receive(:new).and_return(account_service)
-      allow(account_service).to receive(:call)
-
-      _result = subject.call(url)
 
-      expect(account_service).to have_received(:call).with(feed_url, feed_content, nil)
-    end
-
-    it 'fetches remote statuses for entry types' do
-      url = 'http://example.com/atom-entry'
-      service = double
-      allow(FetchAtomService).to receive(:new).and_return service
-      feed_url = 'http://feed-url'
-      feed_content = '<entry>contents</entry>'
-      allow(service).to receive(:call).with(url).and_return([feed_url, { prefetched_body: feed_content }])
-
-      account_service = double
-      allow(FetchRemoteStatusService).to receive(:new).and_return(account_service)
-      allow(account_service).to receive(:call)
-
-      _result = subject.call(url)
+      allow(FetchResourceService).to receive(:new).and_return service
+      allow(service).to receive(:call).with(url).and_return(nil)
 
-      expect(account_service).to have_received(:call).with(feed_url, feed_content, nil)
+      expect(subject.call(url)).to be_nil
     end
   end
 end