curl.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. #***************************************************************************
  4. # _ _ ____ _
  5. # Project ___| | | | _ \| |
  6. # / __| | | | |_) | |
  7. # | (__| |_| | _ <| |___
  8. # \___|\___/|_| \_\_____|
  9. #
  10. # Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
  11. #
  12. # This software is licensed as described in the file COPYING, which
  13. # you should have received as part of this distribution. The terms
  14. # are also available at https://curl.se/docs/copyright.html.
  15. #
  16. # You may opt to use, copy, modify, merge, publish, distribute and/or sell
  17. # copies of the Software, and permit persons to whom the Software is
  18. # furnished to do so, under the terms of the COPYING file.
  19. #
  20. # This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
  21. # KIND, either express or implied.
  22. #
  23. # SPDX-License-Identifier: curl
  24. #
  25. ###########################################################################
  26. #
  27. import json
  28. import logging
  29. import os
  30. import re
  31. import shutil
  32. import subprocess
  33. from datetime import timedelta, datetime
  34. from typing import List, Optional, Dict, Union
  35. from urllib.parse import urlparse
  36. from .env import Env
  37. log = logging.getLogger(__name__)
  38. class ExecResult:
  39. def __init__(self, args: List[str], exit_code: int,
  40. stdout: List[str], stderr: List[str],
  41. duration: Optional[timedelta] = None,
  42. with_stats: bool = False,
  43. exception: Optional[str] = None):
  44. self._args = args
  45. self._exit_code = exit_code
  46. self._exception = exception
  47. self._stdout = stdout
  48. self._stderr = stderr
  49. self._duration = duration if duration is not None else timedelta()
  50. self._response = None
  51. self._responses = []
  52. self._results = {}
  53. self._assets = []
  54. self._stats = []
  55. self._json_out = None
  56. self._with_stats = with_stats
  57. if with_stats:
  58. self._parse_stats()
  59. else:
  60. # noinspection PyBroadException
  61. try:
  62. out = ''.join(self._stdout)
  63. self._json_out = json.loads(out)
  64. except:
  65. pass
  66. def __repr__(self):
  67. return f"ExecResult[code={self.exit_code}, exception={self._exception}, "\
  68. f"args={self._args}, stdout={self._stdout}, stderr={self._stderr}]"
  69. def _parse_stats(self):
  70. self._stats = []
  71. for l in self._stdout:
  72. try:
  73. self._stats.append(json.loads(l))
  74. except:
  75. log.error(f'not a JSON stat: {l}')
  76. break
  77. @property
  78. def exit_code(self) -> int:
  79. return self._exit_code
  80. @property
  81. def args(self) -> List[str]:
  82. return self._args
  83. @property
  84. def outraw(self) -> bytes:
  85. return ''.join(self._stdout).encode()
  86. @property
  87. def stdout(self) -> str:
  88. return ''.join(self._stdout)
  89. @property
  90. def json(self) -> Optional[Dict]:
  91. """Output as JSON dictionary or None if not parseable."""
  92. return self._json_out
  93. @property
  94. def stderr(self) -> str:
  95. return ''.join(self._stderr)
  96. @property
  97. def trace_lines(self) -> List[str]:
  98. return self._stderr
  99. @property
  100. def duration(self) -> timedelta:
  101. return self._duration
  102. @property
  103. def response(self) -> Optional[Dict]:
  104. return self._response
  105. @property
  106. def responses(self) -> List[Dict]:
  107. return self._responses
  108. @property
  109. def results(self) -> Dict:
  110. return self._results
  111. @property
  112. def assets(self) -> List:
  113. return self._assets
  114. @property
  115. def with_stats(self) -> bool:
  116. return self._with_stats
  117. @property
  118. def stats(self) -> List:
  119. return self._stats
  120. @property
  121. def total_connects(self) -> Optional[int]:
  122. if len(self.stats):
  123. n = 0
  124. for stat in self.stats:
  125. n += stat['num_connects']
  126. return n
  127. return None
  128. def add_response(self, resp: Dict):
  129. self._response = resp
  130. self._responses.append(resp)
  131. def add_results(self, results: Dict):
  132. self._results.update(results)
  133. if 'response' in results:
  134. self.add_response(results['response'])
  135. def add_assets(self, assets: List):
  136. self._assets.extend(assets)
  137. def check_exit_code(self, code: Union[int, bool]):
  138. if code is True:
  139. assert self.exit_code == 0, f'expected exit code {code}, '\
  140. f'got {self.exit_code}\n{self.dump_logs()}'
  141. elif code is False:
  142. assert self.exit_code != 0, f'expected exit code {code}, '\
  143. f'got {self.exit_code}\n{self.dump_logs()}'
  144. else:
  145. assert self.exit_code == code, f'expected exit code {code}, '\
  146. f'got {self.exit_code}\n{self.dump_logs()}'
  147. def check_response(self, http_status: Optional[int] = 200,
  148. count: Optional[int] = 1,
  149. protocol: Optional[str] = None,
  150. exitcode: Optional[int] = 0,
  151. connect_count: Optional[int] = None):
  152. if exitcode:
  153. self.check_exit_code(exitcode)
  154. if self.with_stats and isinstance(exitcode, int):
  155. for idx, x in enumerate(self.stats):
  156. if 'exitcode' in x:
  157. assert int(x['exitcode']) == exitcode, \
  158. f'response #{idx} exitcode: expected {exitcode}, '\
  159. f'got {x["exitcode"]}\n{self.dump_logs()}'
  160. if self.with_stats:
  161. assert len(self.stats) == count, \
  162. f'response count: expected {count}, ' \
  163. f'got {len(self.stats)}\n{self.dump_logs()}'
  164. else:
  165. assert len(self.responses) == count, \
  166. f'response count: expected {count}, ' \
  167. f'got {len(self.responses)}\n{self.dump_logs()}'
  168. if http_status is not None:
  169. if self.with_stats:
  170. for idx, x in enumerate(self.stats):
  171. assert 'http_code' in x, \
  172. f'response #{idx} reports no http_code\n{self.dump_stat(x)}'
  173. assert x['http_code'] == http_status, \
  174. f'response #{idx} http_code: expected {http_status}, '\
  175. f'got {x["http_code"]}\n{self.dump_stat(x)}'
  176. else:
  177. for idx, x in enumerate(self.responses):
  178. assert x['status'] == http_status, \
  179. f'response #{idx} status: expected {http_status},'\
  180. f'got {x["status"]}\n{self.dump_stat(x)}'
  181. if protocol is not None:
  182. if self.with_stats:
  183. http_version = None
  184. if protocol == 'HTTP/1.1':
  185. http_version = '1.1'
  186. elif protocol == 'HTTP/2':
  187. http_version = '2'
  188. elif protocol == 'HTTP/3':
  189. http_version = '3'
  190. if http_version is not None:
  191. for idx, x in enumerate(self.stats):
  192. assert x['http_version'] == http_version, \
  193. f'response #{idx} protocol: expected http/{http_version},' \
  194. f'got version {x["http_version"]}\n{self.dump_stat(x)}'
  195. else:
  196. for idx, x in enumerate(self.responses):
  197. assert x['protocol'] == protocol, \
  198. f'response #{idx} protocol: expected {protocol},'\
  199. f'got {x["protocol"]}\n{self.dump_logs()}'
  200. if connect_count is not None:
  201. assert self.total_connects == connect_count, \
  202. f'expected {connect_count}, but {self.total_connects} '\
  203. f'were made\n{self.dump_logs()}'
  204. def check_stats(self, count: int, http_status: Optional[int] = None,
  205. exitcode: Optional[int] = None):
  206. if exitcode is None:
  207. self.check_exit_code(0)
  208. assert len(self.stats) == count, \
  209. f'stats count: expected {count}, got {len(self.stats)}\n{self.dump_logs()}'
  210. if http_status is not None:
  211. for idx, x in enumerate(self.stats):
  212. assert 'http_code' in x, \
  213. f'status #{idx} reports no http_code\n{self.dump_stat(x)}'
  214. assert x['http_code'] == http_status, \
  215. f'status #{idx} http_code: expected {http_status}, '\
  216. f'got {x["http_code"]}\n{self.dump_stat(x)}'
  217. if exitcode is not None:
  218. for idx, x in enumerate(self.stats):
  219. if 'exitcode' in x:
  220. assert x['exitcode'] == 0, \
  221. f'status #{idx} exitcode: expected {exitcode}, '\
  222. f'got {x["exitcode"]}\n{self.dump_stat(x)}'
  223. def dump_logs(self):
  224. lines = ['>>--stdout ----------------------------------------------\n']
  225. lines.extend(self._stdout)
  226. lines.append('>>--stderr ----------------------------------------------\n')
  227. lines.extend(self._stderr)
  228. lines.append('<<-------------------------------------------------------\n')
  229. return ''.join(lines)
  230. def dump_stat(self, x):
  231. lines = [
  232. 'json stat from curl:',
  233. json.JSONEncoder(indent=2).encode(x),
  234. ]
  235. if 'xfer_id' in x:
  236. xfer_id = x['xfer_id']
  237. lines.append(f'>>--xfer {xfer_id} trace:\n')
  238. lines.extend(self.xfer_trace_for(xfer_id))
  239. else:
  240. lines.append('>>--full trace-------------------------------------------\n')
  241. lines.extend(self._stderr)
  242. lines.append('<<-------------------------------------------------------\n')
  243. return ''.join(lines)
  244. def xfer_trace_for(self, xfer_id) -> List[str]:
  245. pat = re.compile(f'^[^[]* \\[{xfer_id}-.*$')
  246. return [line for line in self._stderr if pat.match(line)]
  247. class CurlClient:
  248. ALPN_ARG = {
  249. 'http/0.9': '--http0.9',
  250. 'http/1.0': '--http1.0',
  251. 'http/1.1': '--http1.1',
  252. 'h2': '--http2',
  253. 'h2c': '--http2',
  254. 'h3': '--http3-only',
  255. }
  256. def __init__(self, env: Env, run_dir: Optional[str] = None,
  257. timeout: Optional[float] = None, silent: bool = False):
  258. self.env = env
  259. self._timeout = timeout if timeout else env.test_timeout
  260. self._curl = os.environ['CURL'] if 'CURL' in os.environ else env.curl
  261. self._run_dir = run_dir if run_dir else os.path.join(env.gen_dir, 'curl')
  262. self._stdoutfile = f'{self._run_dir}/curl.stdout'
  263. self._stderrfile = f'{self._run_dir}/curl.stderr'
  264. self._headerfile = f'{self._run_dir}/curl.headers'
  265. self._log_path = f'{self._run_dir}/curl.log'
  266. self._silent = silent
  267. self._rmrf(self._run_dir)
  268. self._mkpath(self._run_dir)
  269. @property
  270. def run_dir(self) -> str:
  271. return self._run_dir
  272. def download_file(self, i: int) -> str:
  273. return os.path.join(self.run_dir, f'download_{i}.data')
  274. def _rmf(self, path):
  275. if os.path.exists(path):
  276. return os.remove(path)
  277. def _rmrf(self, path):
  278. if os.path.exists(path):
  279. return shutil.rmtree(path)
  280. def _mkpath(self, path):
  281. if not os.path.exists(path):
  282. return os.makedirs(path)
  283. def get_proxy_args(self, proto: str = 'http/1.1',
  284. proxys: bool = True, tunnel: bool = False):
  285. if proxys:
  286. pport = self.env.pts_port(proto) if tunnel else self.env.proxys_port
  287. xargs = [
  288. '--proxy', f'https://{self.env.proxy_domain}:{pport}/',
  289. '--resolve', f'{self.env.proxy_domain}:{pport}:127.0.0.1',
  290. '--proxy-cacert', self.env.ca.cert_file,
  291. ]
  292. if proto == 'h2':
  293. xargs.append('--proxy-http2')
  294. else:
  295. xargs = [
  296. '--proxy', f'http://{self.env.proxy_domain}:{self.env.proxy_port}/',
  297. '--resolve', f'{self.env.proxy_domain}:{self.env.proxy_port}:127.0.0.1',
  298. ]
  299. if tunnel:
  300. xargs.append('--proxytunnel')
  301. return xargs
  302. def http_get(self, url: str, extra_args: Optional[List[str]] = None,
  303. def_tracing: bool = True):
  304. return self._raw(url, options=extra_args, with_stats=False,
  305. def_tracing=def_tracing)
  306. def http_download(self, urls: List[str],
  307. alpn_proto: Optional[str] = None,
  308. with_stats: bool = True,
  309. with_headers: bool = False,
  310. no_save: bool = False,
  311. extra_args: List[str] = None):
  312. if extra_args is None:
  313. extra_args = []
  314. if no_save:
  315. extra_args.extend([
  316. '-o', '/dev/null',
  317. ])
  318. else:
  319. extra_args.extend([
  320. '-o', 'download_#1.data',
  321. ])
  322. # remove any existing ones
  323. for i in range(100):
  324. self._rmf(self.download_file(i))
  325. if with_stats:
  326. extra_args.extend([
  327. '-w', '%{json}\\n'
  328. ])
  329. return self._raw(urls, alpn_proto=alpn_proto, options=extra_args,
  330. with_stats=with_stats,
  331. with_headers=with_headers)
  332. def http_upload(self, urls: List[str], data: str,
  333. alpn_proto: Optional[str] = None,
  334. with_stats: bool = True,
  335. with_headers: bool = False,
  336. extra_args: Optional[List[str]] = None):
  337. if extra_args is None:
  338. extra_args = []
  339. extra_args.extend([
  340. '--data-binary', data, '-o', 'download_#1.data',
  341. ])
  342. if with_stats:
  343. extra_args.extend([
  344. '-w', '%{json}\\n'
  345. ])
  346. return self._raw(urls, alpn_proto=alpn_proto, options=extra_args,
  347. with_stats=with_stats,
  348. with_headers=with_headers)
  349. def http_put(self, urls: List[str], data=None, fdata=None,
  350. alpn_proto: Optional[str] = None,
  351. with_stats: bool = True,
  352. with_headers: bool = False,
  353. extra_args: Optional[List[str]] = None):
  354. if extra_args is None:
  355. extra_args = []
  356. if fdata is not None:
  357. extra_args.extend(['-T', fdata])
  358. elif data is not None:
  359. extra_args.extend(['-T', '-'])
  360. extra_args.extend([
  361. '-o', 'download_#1.data',
  362. ])
  363. if with_stats:
  364. extra_args.extend([
  365. '-w', '%{json}\\n'
  366. ])
  367. return self._raw(urls, intext=data,
  368. alpn_proto=alpn_proto, options=extra_args,
  369. with_stats=with_stats,
  370. with_headers=with_headers)
  371. def http_form(self, urls: List[str], form: Dict[str, str],
  372. alpn_proto: Optional[str] = None,
  373. with_stats: bool = True,
  374. with_headers: bool = False,
  375. extra_args: Optional[List[str]] = None):
  376. if extra_args is None:
  377. extra_args = []
  378. for key, val in form.items():
  379. extra_args.extend(['-F', f'{key}={val}'])
  380. extra_args.extend([
  381. '-o', 'download_#1.data',
  382. ])
  383. if with_stats:
  384. extra_args.extend([
  385. '-w', '%{json}\\n'
  386. ])
  387. return self._raw(urls, alpn_proto=alpn_proto, options=extra_args,
  388. with_stats=with_stats,
  389. with_headers=with_headers)
  390. def response_file(self, idx: int):
  391. return os.path.join(self._run_dir, f'download_{idx}.data')
  392. def run_direct(self, args, with_stats: bool = False):
  393. my_args = [self._curl]
  394. if with_stats:
  395. my_args.extend([
  396. '-w', '%{json}\\n'
  397. ])
  398. my_args.extend([
  399. '-o', 'download.data',
  400. ])
  401. my_args.extend(args)
  402. return self._run(args=my_args, with_stats=with_stats)
  403. def _run(self, args, intext='', with_stats: bool = False):
  404. self._rmf(self._stdoutfile)
  405. self._rmf(self._stderrfile)
  406. self._rmf(self._headerfile)
  407. start = datetime.now()
  408. exception = None
  409. try:
  410. with open(self._stdoutfile, 'w') as cout:
  411. with open(self._stderrfile, 'w') as cerr:
  412. p = subprocess.run(args, stderr=cerr, stdout=cout,
  413. cwd=self._run_dir, shell=False,
  414. input=intext.encode() if intext else None,
  415. timeout=self._timeout)
  416. exitcode = p.returncode
  417. except subprocess.TimeoutExpired:
  418. log.warning(f'Timeout after {self._timeout}s: {args}')
  419. exitcode = -1
  420. exception = 'TimeoutExpired'
  421. coutput = open(self._stdoutfile).readlines()
  422. cerrput = open(self._stderrfile).readlines()
  423. return ExecResult(args=args, exit_code=exitcode, exception=exception,
  424. stdout=coutput, stderr=cerrput,
  425. duration=datetime.now() - start,
  426. with_stats=with_stats)
  427. def _raw(self, urls, intext='', timeout=None, options=None, insecure=False,
  428. alpn_proto: Optional[str] = None,
  429. force_resolve=True,
  430. with_stats=False,
  431. with_headers=True,
  432. def_tracing=True):
  433. args = self._complete_args(
  434. urls=urls, timeout=timeout, options=options, insecure=insecure,
  435. alpn_proto=alpn_proto, force_resolve=force_resolve,
  436. with_headers=with_headers, def_tracing=def_tracing)
  437. r = self._run(args, intext=intext, with_stats=with_stats)
  438. if r.exit_code == 0 and with_headers:
  439. self._parse_headerfile(self._headerfile, r=r)
  440. if r.json:
  441. r.response["json"] = r.json
  442. return r
  443. def _complete_args(self, urls, timeout=None, options=None,
  444. insecure=False, force_resolve=True,
  445. alpn_proto: Optional[str] = None,
  446. with_headers: bool = True,
  447. def_tracing: bool = True):
  448. if not isinstance(urls, list):
  449. urls = [urls]
  450. args = [self._curl, "-s", "--path-as-is"]
  451. if with_headers:
  452. args.extend(["-D", self._headerfile])
  453. if def_tracing is not False:
  454. args.extend(['-v', '--trace-config', 'ids,time'])
  455. if self.env.verbose > 1:
  456. args.extend(['--trace-config', 'http/2,http/3,h2-proxy,h1-proxy'])
  457. pass
  458. for url in urls:
  459. u = urlparse(urls[0])
  460. if alpn_proto is not None:
  461. if alpn_proto not in self.ALPN_ARG:
  462. raise Exception(f'unknown ALPN protocol: "{alpn_proto}"')
  463. args.append(self.ALPN_ARG[alpn_proto])
  464. if u.scheme == 'http':
  465. pass
  466. elif insecure:
  467. args.append('--insecure')
  468. elif options and "--cacert" in options:
  469. pass
  470. elif u.hostname:
  471. args.extend(["--cacert", self.env.ca.cert_file])
  472. if force_resolve and u.hostname and u.hostname != 'localhost' \
  473. and not re.match(r'^(\d+|\[|:).*', u.hostname):
  474. port = u.port if u.port else 443
  475. args.extend(["--resolve", f"{u.hostname}:{port}:127.0.0.1"])
  476. if timeout is not None and int(timeout) > 0:
  477. args.extend(["--connect-timeout", str(int(timeout))])
  478. if options:
  479. args.extend(options)
  480. args.append(url)
  481. return args
  482. def _parse_headerfile(self, headerfile: str, r: ExecResult = None) -> ExecResult:
  483. lines = open(headerfile).readlines()
  484. if r is None:
  485. r = ExecResult(args=[], exit_code=0, stdout=[], stderr=[])
  486. response = None
  487. def fin_response(resp):
  488. if resp:
  489. r.add_response(resp)
  490. expected = ['status']
  491. for line in lines:
  492. line = line.strip()
  493. if re.match(r'^$', line):
  494. if 'trailer' in expected:
  495. # end of trailers
  496. fin_response(response)
  497. response = None
  498. expected = ['status']
  499. elif 'header' in expected:
  500. # end of header, another status or trailers might follow
  501. expected = ['status', 'trailer']
  502. else:
  503. assert False, f"unexpected line: '{line}'"
  504. continue
  505. if 'status' in expected:
  506. # log.debug("reading 1st response line: %s", line)
  507. m = re.match(r'^(\S+) (\d+)( .*)?$', line)
  508. if m:
  509. fin_response(response)
  510. response = {
  511. "protocol": m.group(1),
  512. "status": int(m.group(2)),
  513. "description": m.group(3),
  514. "header": {},
  515. "trailer": {},
  516. "body": r.outraw
  517. }
  518. expected = ['header']
  519. continue
  520. if 'trailer' in expected:
  521. m = re.match(r'^([^:]+):\s*(.*)$', line)
  522. if m:
  523. response['trailer'][m.group(1).lower()] = m.group(2)
  524. continue
  525. if 'header' in expected:
  526. m = re.match(r'^([^:]+):\s*(.*)$', line)
  527. if m:
  528. response['header'][m.group(1).lower()] = m.group(2)
  529. continue
  530. assert False, f"unexpected line: '{line}, expected: {expected}'"
  531. fin_response(response)
  532. return r