fetch_link_card_service_spec.rb 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324
  1. # frozen_string_literal: true
  2. require 'rails_helper'
  3. RSpec.describe FetchLinkCardService do
  4. subject { described_class.new }
  5. let(:html) { '<!doctype html><title>Hello world</title>' }
  6. let(:oembed_cache) { nil }
  7. before do
  8. stub_request(:get, 'http://example.com/html').to_return(headers: { 'Content-Type' => 'text/html' }, body: html)
  9. stub_request(:get, 'http://example.com/not-found').to_return(status: 404, headers: { 'Content-Type' => 'text/html' }, body: html)
  10. stub_request(:get, 'http://example.com/text').to_return(status: 404, headers: { 'Content-Type' => 'text/plain' }, body: 'Hello')
  11. stub_request(:get, 'http://example.com/redirect').to_return(status: 302, headers: { 'Location' => 'http://example.com/html' })
  12. stub_request(:get, 'http://example.com/redirect-to-404').to_return(status: 302, headers: { 'Location' => 'http://example.com/not-found' })
  13. stub_request(:get, 'http://example.com/oembed?url=http://example.com/html').to_return(headers: { 'Content-Type' => 'application/json' }, body: '{ "version": "1.0", "type": "link", "title": "oEmbed title" }')
  14. stub_request(:get, 'http://example.com/oembed?format=json&url=http://example.com/html').to_return(headers: { 'Content-Type' => 'application/json' }, body: '{ "version": "1.0", "type": "link", "title": "oEmbed title" }')
  15. stub_request(:get, 'http://example.xn--fiqs8s')
  16. stub_request(:get, 'http://example.com/日本語')
  17. stub_request(:get, 'http://example.com/test?data=file.gpx%5E1')
  18. stub_request(:get, 'http://example.com/test-')
  19. stub_request(:get, 'http://example.com/sjis').to_return(request_fixture('sjis.txt'))
  20. stub_request(:get, 'http://example.com/sjis_with_wrong_charset').to_return(request_fixture('sjis_with_wrong_charset.txt'))
  21. stub_request(:get, 'http://example.com/koi8-r').to_return(request_fixture('koi8-r.txt'))
  22. stub_request(:get, 'http://example.com/windows-1251').to_return(request_fixture('windows-1251.txt'))
  23. stub_request(:get, 'http://example.com/low_confidence_latin1').to_return(request_fixture('low_confidence_latin1.txt'))
  24. stub_request(:get, 'http://example.com/latin1_posing_as_utf8_broken').to_return(request_fixture('latin1_posing_as_utf8_broken.txt'))
  25. stub_request(:get, 'http://example.com/latin1_posing_as_utf8_recoverable').to_return(request_fixture('latin1_posing_as_utf8_recoverable.txt'))
  26. stub_request(:get, 'http://example.com/aergerliche-umlaute').to_return(request_fixture('redirect_with_utf8_url.txt'))
  27. stub_request(:get, 'http://example.com/page_without_title').to_return(request_fixture('page_without_title.txt'))
  28. stub_request(:get, 'http://example.com/long_canonical_url').to_return(request_fixture('long_canonical_url.txt'))
  29. stub_request(:get, 'http://example.com/alternative_utf8_spelling_in_header').to_return(request_fixture('alternative_utf8_spelling_in_header.txt'))
  30. Rails.cache.write('oembed_endpoint:example.com', oembed_cache) if oembed_cache
  31. subject.call(status)
  32. end
  33. context 'with a local status' do
  34. context 'with URL of a regular HTML page' do
  35. let(:status) { Fabricate(:status, text: 'http://example.com/html') }
  36. it 'creates preview card' do
  37. expect(status.preview_card).to_not be_nil
  38. expect(status.preview_card.url).to eq 'http://example.com/html'
  39. expect(status.preview_card.title).to eq 'Hello world'
  40. end
  41. end
  42. context 'with URL of a page with no title' do
  43. let(:status) { Fabricate(:status, text: 'http://example.com/html') }
  44. let(:html) { '<!doctype html><title></title>' }
  45. it 'does not create a preview card' do
  46. expect(status.preview_card).to be_nil
  47. end
  48. end
  49. context 'with a URL of a plain-text page' do
  50. let(:status) { Fabricate(:status, text: 'http://example.com/text') }
  51. it 'does not create a preview card' do
  52. expect(status.preview_card).to be_nil
  53. end
  54. end
  55. context 'with multiple URLs' do
  56. let(:status) { Fabricate(:status, text: 'ftp://example.com http://example.com/html http://example.com/text') }
  57. it 'fetches the first valid URL' do
  58. expect(a_request(:get, 'http://example.com/html')).to have_been_made
  59. end
  60. it 'does not fetch the second valid URL' do
  61. expect(a_request(:get, 'http://example.com/text/')).to_not have_been_made
  62. end
  63. end
  64. context 'with a redirect URL' do
  65. let(:status) { Fabricate(:status, text: 'http://example.com/redirect') }
  66. it 'follows redirect' do
  67. expect(a_request(:get, 'http://example.com/redirect')).to have_been_made.once
  68. expect(a_request(:get, 'http://example.com/html')).to have_been_made.once
  69. end
  70. it 'creates preview card' do
  71. expect(status.preview_card).to_not be_nil
  72. expect(status.preview_card.url).to eq 'http://example.com/html'
  73. expect(status.preview_card.title).to eq 'Hello world'
  74. end
  75. end
  76. context 'with a broken redirect URL' do
  77. let(:status) { Fabricate(:status, text: 'http://example.com/redirect-to-404') }
  78. it 'follows redirect' do
  79. expect(a_request(:get, 'http://example.com/redirect-to-404')).to have_been_made.once
  80. expect(a_request(:get, 'http://example.com/not-found')).to have_been_made.once
  81. end
  82. it 'does not create a preview card' do
  83. expect(status.preview_card).to be_nil
  84. end
  85. end
  86. context 'with a redirect URL with faulty encoding' do
  87. let(:status) { Fabricate(:status, text: 'http://example.com/aergerliche-umlaute') }
  88. it 'does not create a preview card' do
  89. expect(status.preview_card).to be_nil
  90. end
  91. end
  92. context 'with a page that has no title' do
  93. let(:status) { Fabricate(:status, text: 'http://example.com/page_without_title') }
  94. it 'does not create a preview card' do
  95. expect(status.preview_card).to be_nil
  96. end
  97. end
  98. context 'with a 404 URL' do
  99. let(:status) { Fabricate(:status, text: 'http://example.com/not-found') }
  100. it 'does not create a preview card' do
  101. expect(status.preview_card).to be_nil
  102. end
  103. end
  104. context 'with an IDN URL' do
  105. let(:status) { Fabricate(:status, text: 'Check out http://example.中国') }
  106. it 'fetches the URL' do
  107. expect(a_request(:get, 'http://example.xn--fiqs8s/')).to have_been_made.once
  108. end
  109. end
  110. context 'with a URL of a page in Shift JIS encoding' do
  111. let(:status) { Fabricate(:status, text: 'Check out http://example.com/sjis') }
  112. it 'decodes the HTML' do
  113. expect(status.preview_card.title).to eq('SJISのページ')
  114. end
  115. end
  116. context 'with a URL of a page in Shift JIS encoding labeled as UTF-8' do
  117. let(:status) { Fabricate(:status, text: 'Check out http://example.com/sjis_with_wrong_charset') }
  118. it 'decodes the HTML despite the wrong charset header' do
  119. expect(status.preview_card.title).to eq('SJISのページ')
  120. end
  121. end
  122. context 'with a URL of a page in KOI8-R encoding' do
  123. let(:status) { Fabricate(:status, text: 'Check out http://example.com/koi8-r') }
  124. it 'decodes the HTML' do
  125. expect(status.preview_card.title).to eq('Московя начинаетъ только въ XVI ст. привлекать внимане иностранцевъ.')
  126. end
  127. end
  128. context 'with a URL of a page in Windows-1251 encoding' do
  129. let(:status) { Fabricate(:status, text: 'Check out http://example.com/windows-1251') }
  130. it 'decodes the HTML' do
  131. expect(status.preview_card.title).to eq('сэмпл текст')
  132. end
  133. end
  134. context 'with a URL of a page in ISO-8859-1 encoding, that charlock_holmes cannot detect' do
  135. context 'when encoding in http header is correct' do
  136. let(:status) { Fabricate(:status, text: 'Check out http://example.com/low_confidence_latin1') }
  137. it 'decodes the HTML' do
  138. expect(status.preview_card.title).to eq("Tofu á l'orange")
  139. end
  140. end
  141. context 'when encoding in http header is incorrect' do
  142. context 'when encoding problems appear in unrelated tags' do
  143. let(:status) { Fabricate(:status, text: 'Check out http://example.com/latin1_posing_as_utf8_recoverable') }
  144. it 'decodes the HTML' do
  145. expect(status.preview_card.title).to eq('Tofu with orange sauce')
  146. end
  147. end
  148. context 'when encoding problems appear in title tag' do
  149. let(:status) { Fabricate(:status, text: 'Check out http://example.com/latin1_posing_as_utf8_broken') }
  150. it 'creates a preview card anyway that replaces invalid bytes with U+FFFD (replacement char)' do
  151. expect(status.preview_card.title).to eq("Tofu � l'orange")
  152. end
  153. end
  154. end
  155. end
  156. context 'with a Japanese path URL' do
  157. let(:status) { Fabricate(:status, text: 'テストhttp://example.com/日本語') }
  158. it 'fetches the URL' do
  159. expect(a_request(:get, 'http://example.com/日本語')).to have_been_made.once
  160. end
  161. end
  162. context 'with a hyphen-suffixed URL' do
  163. let(:status) { Fabricate(:status, text: 'test http://example.com/test-') }
  164. it 'fetches the URL' do
  165. expect(a_request(:get, 'http://example.com/test-')).to have_been_made.once
  166. end
  167. end
  168. context 'with a caret-suffixed URL' do
  169. let(:status) { Fabricate(:status, text: 'test http://example.com/test?data=file.gpx^1') }
  170. it 'fetches the URL' do
  171. expect(a_request(:get, 'http://example.com/test?data=file.gpx%5E1')).to have_been_made.once
  172. end
  173. it 'does not strip the caret before fetching' do
  174. expect(a_request(:get, 'http://example.com/test?data=file.gpx')).to_not have_been_made
  175. end
  176. end
  177. context 'with a non-isolated URL' do
  178. let(:status) { Fabricate(:status, text: 'testhttp://example.com/sjis') }
  179. it 'does not fetch URLs not isolated from their surroundings' do
  180. expect(a_request(:get, 'http://example.com/sjis')).to_not have_been_made
  181. end
  182. end
  183. context 'with a URL of a page with oEmbed support' do
  184. let(:html) { '<!doctype html><title>Hello world</title><link rel="alternate" type="application/json+oembed" href="http://example.com/oembed?url=http://example.com/html">' }
  185. let(:status) { Fabricate(:status, text: 'http://example.com/html') }
  186. it 'fetches the oEmbed URL' do
  187. expect(a_request(:get, 'http://example.com/oembed?url=http://example.com/html')).to have_been_made.once
  188. end
  189. it 'creates preview card' do
  190. expect(status.preview_card).to_not be_nil
  191. expect(status.preview_card.url).to eq 'http://example.com/html'
  192. expect(status.preview_card.title).to eq 'oEmbed title'
  193. end
  194. context 'when oEmbed endpoint cache populated' do
  195. let(:oembed_cache) { { endpoint: 'http://example.com/oembed?format=json&url={url}', format: :json } }
  196. it 'uses the cached oEmbed response' do
  197. expect(a_request(:get, 'http://example.com/oembed?url=http://example.com/html')).to_not have_been_made
  198. expect(a_request(:get, 'http://example.com/oembed?format=json&url=http://example.com/html')).to have_been_made
  199. end
  200. it 'creates preview card' do
  201. expect(status.preview_card).to_not be_nil
  202. expect(status.preview_card.url).to eq 'http://example.com/html'
  203. expect(status.preview_card.title).to eq 'oEmbed title'
  204. end
  205. end
  206. # If the original HTML URL for whatever reason (e.g. DOS protection) redirects to
  207. # an error page, we can still use the cached oEmbed but should not use the
  208. # redirect URL on the card.
  209. context 'when oEmbed endpoint cache populated but page returns 404' do
  210. let(:status) { Fabricate(:status, text: 'http://example.com/redirect-to-404') }
  211. let(:oembed_cache) { { endpoint: 'http://example.com/oembed?url=http://example.com/html', format: :json } }
  212. it 'uses the cached oEmbed response' do
  213. expect(a_request(:get, 'http://example.com/oembed?url=http://example.com/html')).to have_been_made
  214. end
  215. it 'creates preview card' do
  216. expect(status.preview_card).to_not be_nil
  217. expect(status.preview_card.title).to eq 'oEmbed title'
  218. end
  219. it 'uses the original URL' do
  220. expect(status.preview_card&.url).to eq 'http://example.com/redirect-to-404'
  221. end
  222. end
  223. end
  224. context 'with a URL of a page that includes a canonical URL too long for PostgreSQL unique indexes' do
  225. let(:status) { Fabricate(:status, text: 'test http://example.com/long_canonical_url') }
  226. it 'does not create a preview card' do
  227. expect(status.preview_card).to be_nil
  228. end
  229. end
  230. context 'with a URL where the `Content-Type` header uses `utf8` instead of `utf-8`' do
  231. let(:status) { Fabricate(:status, text: 'test http://example.com/alternative_utf8_spelling_in_header') }
  232. it 'does not create a preview card' do
  233. expect(status.preview_card.title).to eq 'Webserver Configs R Us'
  234. end
  235. end
  236. end
  237. context 'with a remote status' do
  238. let(:status) do
  239. Fabricate(:status, account: Fabricate(:account, domain: 'example.com'), text: <<-TEXT)
  240. Habt ihr ein paar gute Links zu <a>foo</a>
  241. #<span class="tag"><a href="https://quitter.se/tag/wannacry" target="_blank" rel="tag noopener noreferrer" title="https://quitter.se/tag/wannacry">Wannacry</a></span> herumfliegen?
  242. Ich will mal unter <br> <a href="http://example.com/not-found" target="_blank" rel="noopener noreferrer" title="http://example.com/not-found">http://example.com/not-found</a> was sammeln. !
  243. <a href="http://sn.jonkman.ca/group/416/id" target="_blank" rel="noopener noreferrer" title="http://sn.jonkman.ca/group/416/id">security</a>&nbsp;
  244. TEXT
  245. end
  246. it 'parses out URLs' do
  247. expect(a_request(:get, 'http://example.com/not-found')).to have_been_made.once
  248. end
  249. it 'ignores URLs to hashtags' do
  250. expect(a_request(:get, 'https://quitter.se/tag/wannacry')).to_not have_been_made
  251. end
  252. end
  253. end