test_02_download.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. #***************************************************************************
  4. # _ _ ____ _
  5. # Project ___| | | | _ \| |
  6. # / __| | | | |_) | |
  7. # | (__| |_| | _ <| |___
  8. # \___|\___/|_| \_\_____|
  9. #
  10. # Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
  11. #
  12. # This software is licensed as described in the file COPYING, which
  13. # you should have received as part of this distribution. The terms
  14. # are also available at https://curl.se/docs/copyright.html.
  15. #
  16. # You may opt to use, copy, modify, merge, publish, distribute and/or sell
  17. # copies of the Software, and permit persons to whom the Software is
  18. # furnished to do so, under the terms of the COPYING file.
  19. #
  20. # This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
  21. # KIND, either express or implied.
  22. #
  23. # SPDX-License-Identifier: curl
  24. #
  25. ###########################################################################
  26. #
  27. import difflib
  28. import filecmp
  29. import logging
  30. import os
  31. from datetime import timedelta
  32. import pytest
  33. from testenv import Env, CurlClient, LocalClient
  34. log = logging.getLogger(__name__)
  35. class TestDownload:
  36. @pytest.fixture(autouse=True, scope='class')
  37. def _class_scope(self, env, httpd, nghttpx):
  38. if env.have_h3():
  39. nghttpx.start_if_needed()
  40. httpd.clear_extra_configs()
  41. httpd.reload()
  42. @pytest.fixture(autouse=True, scope='class')
  43. def _class_scope(self, env, httpd):
  44. indir = httpd.docs_dir
  45. env.make_data_file(indir=indir, fname="data-10k", fsize=10*1024)
  46. env.make_data_file(indir=indir, fname="data-100k", fsize=100*1024)
  47. env.make_data_file(indir=indir, fname="data-1m", fsize=1024*1024)
  48. env.make_data_file(indir=indir, fname="data-10m", fsize=10*1024*1024)
  49. env.make_data_file(indir=indir, fname="data-50m", fsize=50*1024*1024)
  50. # download 1 file
  51. @pytest.mark.parametrize("proto", ['http/1.1', 'h2', 'h3'])
  52. def test_02_01_download_1(self, env: Env, httpd, nghttpx, repeat, proto):
  53. if proto == 'h3' and not env.have_h3():
  54. pytest.skip("h3 not supported")
  55. curl = CurlClient(env=env)
  56. url = f'https://{env.authority_for(env.domain1, proto)}/data.json'
  57. r = curl.http_download(urls=[url], alpn_proto=proto)
  58. r.check_response(http_status=200)
  59. # download 2 files
  60. @pytest.mark.parametrize("proto", ['http/1.1', 'h2', 'h3'])
  61. def test_02_02_download_2(self, env: Env, httpd, nghttpx, repeat, proto):
  62. if proto == 'h3' and not env.have_h3():
  63. pytest.skip("h3 not supported")
  64. curl = CurlClient(env=env)
  65. url = f'https://{env.authority_for(env.domain1, proto)}/data.json?[0-1]'
  66. r = curl.http_download(urls=[url], alpn_proto=proto)
  67. r.check_response(http_status=200, count=2)
  68. # download 100 files sequentially
  69. @pytest.mark.parametrize("proto", ['http/1.1', 'h2', 'h3'])
  70. def test_02_03_download_100_sequential(self, env: Env,
  71. httpd, nghttpx, repeat, proto):
  72. if proto == 'h3' and not env.have_h3():
  73. pytest.skip("h3 not supported")
  74. curl = CurlClient(env=env)
  75. urln = f'https://{env.authority_for(env.domain1, proto)}/data.json?[0-99]'
  76. r = curl.http_download(urls=[urln], alpn_proto=proto)
  77. r.check_response(http_status=200, count=100, connect_count=1)
  78. # download 100 files parallel
  79. @pytest.mark.parametrize("proto", ['h2', 'h3'])
  80. def test_02_04_download_100_parallel(self, env: Env,
  81. httpd, nghttpx, repeat, proto):
  82. if proto == 'h3' and not env.have_h3():
  83. pytest.skip("h3 not supported")
  84. max_parallel = 50
  85. curl = CurlClient(env=env)
  86. urln = f'https://{env.authority_for(env.domain1, proto)}/data.json?[0-99]'
  87. r = curl.http_download(urls=[urln], alpn_proto=proto, extra_args=[
  88. '--parallel', '--parallel-max', f'{max_parallel}'
  89. ])
  90. r.check_response(http_status=200, count=100)
  91. if proto == 'http/1.1':
  92. # http/1.1 parallel transfers will open multiple connections
  93. assert r.total_connects > 1, r.dump_logs()
  94. else:
  95. # http2 parallel transfers will use one connection (common limit is 100)
  96. assert r.total_connects == 1, r.dump_logs()
  97. # download 500 files sequential
  98. @pytest.mark.parametrize("proto", ['http/1.1', 'h2', 'h3'])
  99. def test_02_05_download_500_sequential(self, env: Env,
  100. httpd, nghttpx, repeat, proto):
  101. if proto == 'h3' and not env.have_h3():
  102. pytest.skip("h3 not supported")
  103. if proto == 'h3' and env.curl_uses_lib('msh3'):
  104. pytest.skip("msh3 shaky here")
  105. curl = CurlClient(env=env)
  106. urln = f'https://{env.authority_for(env.domain1, proto)}/data.json?[0-499]'
  107. r = curl.http_download(urls=[urln], alpn_proto=proto)
  108. r.check_response(http_status=200, count=500)
  109. if proto == 'http/1.1':
  110. # http/1.1 parallel transfers will open multiple connections
  111. assert r.total_connects > 1, r.dump_logs()
  112. else:
  113. # http2 parallel transfers will use one connection (common limit is 100)
  114. assert r.total_connects == 1, r.dump_logs()
  115. # download 500 files parallel
  116. @pytest.mark.parametrize("proto", ['h2', 'h3'])
  117. def test_02_06_download_500_parallel(self, env: Env,
  118. httpd, nghttpx, repeat, proto):
  119. if proto == 'h3' and not env.have_h3():
  120. pytest.skip("h3 not supported")
  121. count = 500
  122. max_parallel = 50
  123. curl = CurlClient(env=env)
  124. urln = f'https://{env.authority_for(env.domain1, proto)}/data.json?[000-{count-1}]'
  125. r = curl.http_download(urls=[urln], alpn_proto=proto, extra_args=[
  126. '--parallel', '--parallel-max', f'{max_parallel}'
  127. ])
  128. r.check_response(http_status=200, count=count, connect_count=1)
  129. # download files parallel, check connection reuse/multiplex
  130. @pytest.mark.parametrize("proto", ['h2', 'h3'])
  131. def test_02_07_download_reuse(self, env: Env,
  132. httpd, nghttpx, repeat, proto):
  133. if proto == 'h3' and not env.have_h3():
  134. pytest.skip("h3 not supported")
  135. count = 200
  136. curl = CurlClient(env=env)
  137. urln = f'https://{env.authority_for(env.domain1, proto)}/data.json?[0-{count-1}]'
  138. r = curl.http_download(urls=[urln], alpn_proto=proto,
  139. with_stats=True, extra_args=[
  140. '--parallel', '--parallel-max', '200'
  141. ])
  142. r.check_response(http_status=200, count=count)
  143. # should have used at most 2 connections only (test servers allow 100 req/conn)
  144. # it may be just 1 on slow systems where request are answered faster than
  145. # curl can exhaust the capacity or if curl runs with address-sanitizer speed
  146. assert r.total_connects <= 2, "h2 should use fewer connections here"
  147. # download files parallel with http/1.1, check connection not reused
  148. @pytest.mark.parametrize("proto", ['http/1.1'])
  149. def test_02_07b_download_reuse(self, env: Env,
  150. httpd, nghttpx, repeat, proto):
  151. if env.curl_uses_lib('wolfssl'):
  152. pytest.skip("wolfssl session reuse borked")
  153. count = 6
  154. curl = CurlClient(env=env)
  155. urln = f'https://{env.authority_for(env.domain1, proto)}/data.json?[0-{count-1}]'
  156. r = curl.http_download(urls=[urln], alpn_proto=proto,
  157. with_stats=True, extra_args=[
  158. '--parallel'
  159. ])
  160. r.check_response(count=count, http_status=200)
  161. # http/1.1 should have used count connections
  162. assert r.total_connects == count, "http/1.1 should use this many connections"
  163. @pytest.mark.parametrize("proto", ['http/1.1', 'h2', 'h3'])
  164. def test_02_08_1MB_serial(self, env: Env,
  165. httpd, nghttpx, repeat, proto):
  166. if proto == 'h3' and not env.have_h3():
  167. pytest.skip("h3 not supported")
  168. count = 20
  169. urln = f'https://{env.authority_for(env.domain1, proto)}/data-1m?[0-{count-1}]'
  170. curl = CurlClient(env=env)
  171. r = curl.http_download(urls=[urln], alpn_proto=proto)
  172. r.check_response(count=count, http_status=200)
  173. @pytest.mark.parametrize("proto", ['h2', 'h3'])
  174. def test_02_09_1MB_parallel(self, env: Env,
  175. httpd, nghttpx, repeat, proto):
  176. if proto == 'h3' and not env.have_h3():
  177. pytest.skip("h3 not supported")
  178. count = 20
  179. urln = f'https://{env.authority_for(env.domain1, proto)}/data-1m?[0-{count-1}]'
  180. curl = CurlClient(env=env)
  181. r = curl.http_download(urls=[urln], alpn_proto=proto, extra_args=[
  182. '--parallel'
  183. ])
  184. r.check_response(count=count, http_status=200)
  185. @pytest.mark.skipif(condition=Env().slow_network, reason="not suitable for slow network tests")
  186. @pytest.mark.skipif(condition=Env().ci_run, reason="not suitable for CI runs")
  187. @pytest.mark.parametrize("proto", ['http/1.1', 'h2', 'h3'])
  188. def test_02_10_10MB_serial(self, env: Env,
  189. httpd, nghttpx, repeat, proto):
  190. if proto == 'h3' and not env.have_h3():
  191. pytest.skip("h3 not supported")
  192. count = 20
  193. urln = f'https://{env.authority_for(env.domain1, proto)}/data-10m?[0-{count-1}]'
  194. curl = CurlClient(env=env)
  195. r = curl.http_download(urls=[urln], alpn_proto=proto)
  196. r.check_response(count=count, http_status=200)
  197. @pytest.mark.skipif(condition=Env().slow_network, reason="not suitable for slow network tests")
  198. @pytest.mark.skipif(condition=Env().ci_run, reason="not suitable for CI runs")
  199. @pytest.mark.parametrize("proto", ['h2', 'h3'])
  200. def test_02_11_10MB_parallel(self, env: Env,
  201. httpd, nghttpx, repeat, proto):
  202. if proto == 'h3' and not env.have_h3():
  203. pytest.skip("h3 not supported")
  204. if proto == 'h3' and env.curl_uses_lib('msh3'):
  205. pytest.skip("msh3 stalls here")
  206. count = 20
  207. urln = f'https://{env.authority_for(env.domain1, proto)}/data-10m?[0-{count-1}]'
  208. curl = CurlClient(env=env)
  209. r = curl.http_download(urls=[urln], alpn_proto=proto, extra_args=[
  210. '--parallel'
  211. ])
  212. r.check_response(count=count, http_status=200)
  213. @pytest.mark.parametrize("proto", ['h2', 'h3'])
  214. def test_02_12_head_serial_https(self, env: Env,
  215. httpd, nghttpx, repeat, proto):
  216. if proto == 'h3' and not env.have_h3():
  217. pytest.skip("h3 not supported")
  218. count = 100
  219. urln = f'https://{env.authority_for(env.domain1, proto)}/data-10m?[0-{count-1}]'
  220. curl = CurlClient(env=env)
  221. r = curl.http_download(urls=[urln], alpn_proto=proto, extra_args=[
  222. '--head'
  223. ])
  224. r.check_response(count=count, http_status=200)
  225. @pytest.mark.parametrize("proto", ['h2'])
  226. def test_02_13_head_serial_h2c(self, env: Env,
  227. httpd, nghttpx, repeat, proto):
  228. if proto == 'h3' and not env.have_h3():
  229. pytest.skip("h3 not supported")
  230. count = 100
  231. urln = f'http://{env.domain1}:{env.http_port}/data-10m?[0-{count-1}]'
  232. curl = CurlClient(env=env)
  233. r = curl.http_download(urls=[urln], alpn_proto=proto, extra_args=[
  234. '--head', '--http2-prior-knowledge', '--fail-early'
  235. ])
  236. r.check_response(count=count, http_status=200)
  237. @pytest.mark.skipif(condition=Env().slow_network, reason="not suitable for slow network tests")
  238. @pytest.mark.skipif(condition=Env().ci_run, reason="not suitable for CI runs")
  239. def test_02_20_h2_small_frames(self, env: Env, httpd, repeat):
  240. # Test case to reproduce content corruption as observed in
  241. # https://github.com/curl/curl/issues/10525
  242. # To reliably reproduce, we need an Apache httpd that supports
  243. # setting smaller frame sizes. This is not released yet, we
  244. # test if it works and back out if not.
  245. httpd.set_extra_config(env.domain1, lines=[
  246. f'H2MaxDataFrameLen 1024',
  247. ])
  248. assert httpd.stop()
  249. if not httpd.start():
  250. # no, not supported, bail out
  251. httpd.set_extra_config(env.domain1, lines=None)
  252. assert httpd.start()
  253. pytest.skip(f'H2MaxDataFrameLen not supported')
  254. # ok, make 100 downloads with 2 parallel running and they
  255. # are expected to stumble into the issue when using `lib/http2.c`
  256. # from curl 7.88.0
  257. count = 100
  258. urln = f'https://{env.authority_for(env.domain1, "h2")}/data-1m?[0-{count-1}]'
  259. curl = CurlClient(env=env)
  260. r = curl.http_download(urls=[urln], alpn_proto="h2", extra_args=[
  261. '--parallel', '--parallel-max', '2'
  262. ])
  263. r.check_response(count=count, http_status=200)
  264. srcfile = os.path.join(httpd.docs_dir, 'data-1m')
  265. self.check_downloads(curl, srcfile, count)
  266. # restore httpd defaults
  267. httpd.set_extra_config(env.domain1, lines=None)
  268. assert httpd.stop()
  269. assert httpd.start()
  270. # download via lib client, 1 at a time, pause/resume at different offsets
  271. @pytest.mark.parametrize("pause_offset", [0, 10*1024, 100*1023, 640000])
  272. def test_02_21_h2_lib_serial(self, env: Env, httpd, nghttpx, pause_offset, repeat):
  273. count = 10
  274. docname = 'data-10m'
  275. url = f'https://localhost:{env.https_port}/{docname}'
  276. client = LocalClient(name='h2-download', env=env)
  277. if not client.exists():
  278. pytest.skip(f'example client not built: {client.name}')
  279. r = client.run(args=[
  280. '-n', f'{count}', '-P', f'{pause_offset}', url
  281. ])
  282. r.check_exit_code(0)
  283. srcfile = os.path.join(httpd.docs_dir, docname)
  284. self.check_downloads(client, srcfile, count)
  285. # download via lib client, several at a time, pause/resume
  286. @pytest.mark.parametrize("pause_offset", [100*1023])
  287. def test_02_22_h2_lib_parallel_resume(self, env: Env, httpd, nghttpx, pause_offset, repeat):
  288. count = 10
  289. max_parallel = 5
  290. docname = 'data-10m'
  291. url = f'https://localhost:{env.https_port}/{docname}'
  292. client = LocalClient(name='h2-download', env=env)
  293. if not client.exists():
  294. pytest.skip(f'example client not built: {client.name}')
  295. r = client.run(args=[
  296. '-n', f'{count}', '-m', f'{max_parallel}',
  297. '-P', f'{pause_offset}', url
  298. ])
  299. r.check_exit_code(0)
  300. srcfile = os.path.join(httpd.docs_dir, docname)
  301. self.check_downloads(client, srcfile, count)
  302. # download, several at a time, pause and abort paused
  303. @pytest.mark.parametrize("pause_offset", [100*1023])
  304. def test_02_23_h2_lib_parallel_abort(self, env: Env, httpd, nghttpx, pause_offset, repeat):
  305. count = 200
  306. max_parallel = 100
  307. docname = 'data-10m'
  308. url = f'https://localhost:{env.https_port}/{docname}'
  309. client = LocalClient(name='h2-download', env=env)
  310. if not client.exists():
  311. pytest.skip(f'example client not built: {client.name}')
  312. r = client.run(args=[
  313. '-n', f'{count}', '-m', f'{max_parallel}', '-a',
  314. '-P', f'{pause_offset}', url
  315. ])
  316. r.check_exit_code(0)
  317. srcfile = os.path.join(httpd.docs_dir, docname)
  318. # downloads should be there, but not necessarily complete
  319. self.check_downloads(client, srcfile, count, complete=False)
  320. # speed limited download
  321. @pytest.mark.parametrize("proto", ['h2', 'h3'])
  322. def test_02_24_speed_limit(self, env: Env, httpd, nghttpx, proto, repeat):
  323. if proto == 'h3' and not env.have_h3():
  324. pytest.skip("h3 not supported")
  325. count = 1
  326. url = f'https://{env.authority_for(env.domain1, proto)}/data-1m'
  327. curl = CurlClient(env=env)
  328. r = curl.http_download(urls=[url], alpn_proto=proto, extra_args=[
  329. '--limit-rate', f'{196 * 1024}'
  330. ])
  331. r.check_response(count=count, http_status=200)
  332. assert r.duration > timedelta(seconds=4), \
  333. f'rate limited transfer should take more than 4s, not {r.duration}'
  334. # make extreme parallel h2 upgrades, check invalid conn reuse
  335. # before protocol switch has happened
  336. def test_02_25_h2_upgrade_x(self, env: Env, httpd, repeat):
  337. # not locally reproducible timeouts with certain SSL libs
  338. # Since this test is about connection reuse handling, we skip
  339. # it on these builds. Although we would certainly like to understand
  340. # why this happens.
  341. if env.curl_uses_lib('bearssl'):
  342. pytest.skip('CI workflows timeout on bearssl build')
  343. url = f'http://localhost:{env.http_port}/data-100k'
  344. client = LocalClient(name='h2-upgrade-extreme', env=env, timeout=15)
  345. if not client.exists():
  346. pytest.skip(f'example client not built: {client.name}')
  347. r = client.run(args=[url])
  348. assert r.exit_code == 0, f'{client.dump_logs()}'
  349. # Special client that tests TLS session reuse in parallel transfers
  350. def test_02_26_session_shared_reuse(self, env: Env, httpd, repeat):
  351. curl = CurlClient(env=env)
  352. url = f'https://{env.domain1}:{env.https_port}/data-100k'
  353. client = LocalClient(name='tls-session-reuse', env=env)
  354. if not client.exists():
  355. pytest.skip(f'example client not built: {client.name}')
  356. r = client.run(args=[url])
  357. r.check_exit_code(0)
  358. def check_downloads(self, client, srcfile: str, count: int,
  359. complete: bool = True):
  360. for i in range(count):
  361. dfile = client.download_file(i)
  362. assert os.path.exists(dfile)
  363. if complete and not filecmp.cmp(srcfile, dfile, shallow=False):
  364. diff = "".join(difflib.unified_diff(a=open(srcfile).readlines(),
  365. b=open(dfile).readlines(),
  366. fromfile=srcfile,
  367. tofile=dfile,
  368. n=1))
  369. assert False, f'download {dfile} differs:\n{diff}'