fetch_oembed_service.rb 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. # frozen_string_literal: true
  2. class FetchOEmbedService
  3. ENDPOINT_CACHE_EXPIRES_IN = 24.hours.freeze
  4. URL_REGEX = %r{(=(https?(%3A|:)(//|%2F%2F)))([^&]*)}i
  5. attr_reader :url, :options, :format, :endpoint_url
  6. def call(url, options = {})
  7. @url = url
  8. @options = options
  9. if @options[:cached_endpoint]
  10. parse_cached_endpoint!
  11. else
  12. discover_endpoint!
  13. end
  14. fetch!
  15. end
  16. private
  17. def discover_endpoint!
  18. return if html.nil?
  19. @format = @options[:format]
  20. page = Nokogiri::HTML(html)
  21. if @format.nil? || @format == :json
  22. @endpoint_url ||= page.at_xpath('//link[@type="application/json+oembed"]|//link[@type="text/json+oembed"]')&.attribute('href')&.value
  23. @format ||= :json if @endpoint_url
  24. end
  25. if @format.nil? || @format == :xml
  26. @endpoint_url ||= page.at_xpath('//link[@type="text/xml+oembed"]')&.attribute('href')&.value
  27. @format ||= :xml if @endpoint_url
  28. end
  29. return if @endpoint_url.blank?
  30. @endpoint_url = begin
  31. base_url = Addressable::URI.parse(@url)
  32. # If the OEmbed endpoint is given as http but the URL we opened
  33. # was served over https, we can assume OEmbed will be available
  34. # through https as well
  35. (base_url + @endpoint_url).tap do |absolute_url|
  36. absolute_url.scheme = base_url.scheme if base_url.scheme == 'https'
  37. end.to_s
  38. end
  39. cache_endpoint!
  40. rescue Addressable::URI::InvalidURIError
  41. @endpoint_url = nil
  42. end
  43. def parse_cached_endpoint!
  44. cached = @options[:cached_endpoint]
  45. return if cached[:endpoint].nil? || cached[:format].nil?
  46. @endpoint_url = Addressable::Template.new(cached[:endpoint]).expand(url: @url).to_s
  47. @format = cached[:format]
  48. end
  49. def cache_endpoint!
  50. return unless URL_REGEX.match?(@endpoint_url)
  51. url_domain = Addressable::URI.parse(@url).normalized_host
  52. endpoint_hash = {
  53. endpoint: @endpoint_url.gsub(URL_REGEX, '={url}'),
  54. format: @format,
  55. }
  56. Rails.cache.write("oembed_endpoint:#{url_domain}", endpoint_hash, expires_in: ENDPOINT_CACHE_EXPIRES_IN)
  57. end
  58. def fetch!
  59. return if @endpoint_url.blank?
  60. body = Request.new(:get, @endpoint_url).perform do |res|
  61. res.code == 200 ? res.body_with_limit : nil
  62. end
  63. validate(parse_for_format(body)) if body.present?
  64. rescue Oj::ParseError, Ox::ParseError
  65. nil
  66. end
  67. def parse_for_format(body)
  68. case @format
  69. when :json
  70. Oj.load(body, mode: :strict)&.with_indifferent_access
  71. when :xml
  72. Ox.load(body, mode: :hash_no_attrs)&.with_indifferent_access&.dig(:oembed)
  73. end
  74. end
  75. def validate(oembed)
  76. oembed if oembed[:version].to_s == '1.0' && oembed[:type].present?
  77. end
  78. def html
  79. return @html if defined?(@html)
  80. @html = @options[:html] || Request.new(:get, @url).add_headers('Accept' => 'text/html').perform do |res|
  81. res.code != 200 || res.mime_type != 'text/html' ? nil : res.body_with_limit
  82. end
  83. end
  84. end