curl.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. #***************************************************************************
  4. # _ _ ____ _
  5. # Project ___| | | | _ \| |
  6. # / __| | | | |_) | |
  7. # | (__| |_| | _ <| |___
  8. # \___|\___/|_| \_\_____|
  9. #
  10. # Copyright (C) 2008 - 2022, Daniel Stenberg, <daniel@haxx.se>, et al.
  11. #
  12. # This software is licensed as described in the file COPYING, which
  13. # you should have received as part of this distribution. The terms
  14. # are also available at https://curl.se/docs/copyright.html.
  15. #
  16. # You may opt to use, copy, modify, merge, publish, distribute and/or sell
  17. # copies of the Software, and permit persons to whom the Software is
  18. # furnished to do so, under the terms of the COPYING file.
  19. #
  20. # This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
  21. # KIND, either express or implied.
  22. #
  23. # SPDX-License-Identifier: curl
  24. #
  25. ###########################################################################
  26. #
  27. import json
  28. import logging
  29. import os
  30. import re
  31. import shutil
  32. import subprocess
  33. from datetime import timedelta, datetime
  34. from typing import List, Optional, Dict
  35. from urllib.parse import urlparse
  36. from .env import Env
  37. log = logging.getLogger(__name__)
  38. class ExecResult:
  39. def __init__(self, args: List[str], exit_code: int,
  40. stdout: List[str], stderr: List[str],
  41. duration: Optional[timedelta] = None,
  42. with_stats: bool = False):
  43. self._args = args
  44. self._exit_code = exit_code
  45. self._stdout = stdout
  46. self._stderr = stderr
  47. self._duration = duration if duration is not None else timedelta()
  48. self._response = None
  49. self._responses = []
  50. self._results = {}
  51. self._assets = []
  52. self._stats = []
  53. self._json_out = None
  54. self._with_stats = with_stats
  55. if with_stats:
  56. self._parse_stats()
  57. else:
  58. # noinspection PyBroadException
  59. try:
  60. out = ''.join(self._stdout)
  61. self._json_out = json.loads(out)
  62. except:
  63. pass
  64. def __repr__(self):
  65. return f"ExecResult[code={self.exit_code}, args={self._args}, stdout={self._stdout}, stderr={self._stderr}]"
  66. def _parse_stats(self):
  67. self._stats = []
  68. for l in self._stdout:
  69. try:
  70. self._stats.append(json.loads(l))
  71. except:
  72. log.error(f'not a JSON stat: {l}')
  73. log.error(f'stdout is: {"".join(self._stdout)}')
  74. break
  75. @property
  76. def exit_code(self) -> int:
  77. return self._exit_code
  78. @property
  79. def args(self) -> List[str]:
  80. return self._args
  81. @property
  82. def outraw(self) -> bytes:
  83. return ''.join(self._stdout).encode()
  84. @property
  85. def stdout(self) -> str:
  86. return ''.join(self._stdout)
  87. @property
  88. def json(self) -> Optional[Dict]:
  89. """Output as JSON dictionary or None if not parseable."""
  90. return self._json_out
  91. @property
  92. def stderr(self) -> str:
  93. return ''.join(self._stderr)
  94. @property
  95. def duration(self) -> timedelta:
  96. return self._duration
  97. @property
  98. def response(self) -> Optional[Dict]:
  99. return self._response
  100. @property
  101. def responses(self) -> List[Dict]:
  102. return self._responses
  103. @property
  104. def results(self) -> Dict:
  105. return self._results
  106. @property
  107. def assets(self) -> List:
  108. return self._assets
  109. @property
  110. def with_stats(self) -> bool:
  111. return self._with_stats
  112. @property
  113. def stats(self) -> List:
  114. return self._stats
  115. @property
  116. def total_connects(self) -> Optional[int]:
  117. if len(self.stats):
  118. n = 0
  119. for stat in self.stats:
  120. n += stat['num_connects']
  121. return n
  122. return None
  123. def add_response(self, resp: Dict):
  124. self._response = resp
  125. self._responses.append(resp)
  126. def add_results(self, results: Dict):
  127. self._results.update(results)
  128. if 'response' in results:
  129. self.add_response(results['response'])
  130. def add_assets(self, assets: List):
  131. self._assets.extend(assets)
  132. def check_responses(self, count: int, exp_status: Optional[int] = None):
  133. if len(self.responses) != count:
  134. seen_queries = []
  135. for idx, resp in enumerate(self.responses):
  136. assert resp['status'] == 200, f'response #{idx} status: {resp["status"]}'
  137. if 'rquery' not in resp['header']:
  138. log.error(f'response #{idx} missing "rquery": {resp["header"]}')
  139. seen_queries.append(int(resp['header']['rquery']))
  140. for i in range(0, count-1):
  141. if i not in seen_queries:
  142. log.error(f'response for query {i} missing')
  143. if self.with_stats and len(self.stats) == count:
  144. log.error(f'got all {count} stats, though')
  145. assert len(self.responses) == count, \
  146. f'response count: expected {count}, got {len(self.responses)}'
  147. if exp_status is not None:
  148. for idx, x in enumerate(self.responses):
  149. assert x['status'] == exp_status, \
  150. f'response #{idx} unexpectedstatus: {x["status"]}'
  151. if self.with_stats:
  152. assert len(self.stats) == count, f'{self}'
  153. class CurlClient:
  154. ALPN_ARG = {
  155. 'http/0.9': '--http0.9',
  156. 'http/1.0': '--http1.0',
  157. 'http/1.1': '--http1.1',
  158. 'h2': '--http2',
  159. 'h2c': '--http2',
  160. 'h3': '--http3',
  161. }
  162. def __init__(self, env: Env, run_dir: Optional[str] = None):
  163. self.env = env
  164. self._curl = os.environ['CURL'] if 'CURL' in os.environ else env.curl
  165. self._run_dir = run_dir if run_dir else os.path.join(env.gen_dir, 'curl')
  166. self._stdoutfile = f'{self._run_dir}/curl.stdout'
  167. self._stderrfile = f'{self._run_dir}/curl.stderr'
  168. self._headerfile = f'{self._run_dir}/curl.headers'
  169. self._tracefile = f'{self._run_dir}/curl.trace'
  170. self._log_path = f'{self._run_dir}/curl.log'
  171. self._rmrf(self._run_dir)
  172. self._mkpath(self._run_dir)
  173. def _rmf(self, path):
  174. if os.path.exists(path):
  175. return os.remove(path)
  176. def _rmrf(self, path):
  177. if os.path.exists(path):
  178. return shutil.rmtree(path)
  179. def _mkpath(self, path):
  180. if not os.path.exists(path):
  181. return os.makedirs(path)
  182. def http_get(self, url: str, extra_args: Optional[List[str]] = None):
  183. return self._raw(url, options=extra_args, with_stats=False)
  184. def http_download(self, urls: List[str],
  185. alpn_proto: Optional[str] = None,
  186. with_stats: bool = True,
  187. extra_args: List[str] = None):
  188. if extra_args is None:
  189. extra_args = []
  190. extra_args.extend([
  191. '-o', 'download.data',
  192. ])
  193. if with_stats:
  194. extra_args.extend([
  195. '-w', '%{json}\\n'
  196. ])
  197. return self._raw(urls, alpn_proto=alpn_proto, options=extra_args,
  198. with_stats=with_stats)
  199. def _run(self, args, intext='', with_stats: bool = False):
  200. self._rmf(self._stdoutfile)
  201. self._rmf(self._stderrfile)
  202. self._rmf(self._headerfile)
  203. self._rmf(self._tracefile)
  204. start = datetime.now()
  205. with open(self._stdoutfile, 'w') as cout:
  206. with open(self._stderrfile, 'w') as cerr:
  207. p = subprocess.run(args, stderr=cerr, stdout=cout,
  208. cwd=self._run_dir, shell=False,
  209. input=intext.encode() if intext else None)
  210. coutput = open(self._stdoutfile).readlines()
  211. cerrput = open(self._stderrfile).readlines()
  212. return ExecResult(args=args, exit_code=p.returncode,
  213. stdout=coutput, stderr=cerrput,
  214. duration=datetime.now() - start,
  215. with_stats=with_stats)
  216. def _raw(self, urls, timeout=10, options=None, insecure=False,
  217. alpn_proto: Optional[str] = None,
  218. force_resolve=True, with_stats=False):
  219. args = self._complete_args(
  220. urls=urls, timeout=timeout, options=options, insecure=insecure,
  221. alpn_proto=alpn_proto, force_resolve=force_resolve)
  222. r = self._run(args, with_stats=with_stats)
  223. if r.exit_code == 0:
  224. self._parse_headerfile(self._headerfile, r=r)
  225. if r.json:
  226. r.response["json"] = r.json
  227. return r
  228. def _complete_args(self, urls, timeout=None, options=None,
  229. insecure=False, force_resolve=True,
  230. alpn_proto: Optional[str] = None):
  231. if not isinstance(urls, list):
  232. urls = [urls]
  233. args = [
  234. self._curl, "-s", "--path-as-is", "-D", self._headerfile,
  235. ]
  236. if self.env.verbose > 2:
  237. args.extend(['--trace', self._tracefile, '--trace-time'])
  238. for url in urls:
  239. u = urlparse(urls[0])
  240. if alpn_proto is not None:
  241. if alpn_proto not in self.ALPN_ARG:
  242. raise Exception(f'unknown ALPN protocol: "{alpn_proto}"')
  243. args.append(self.ALPN_ARG[alpn_proto])
  244. if u.scheme == 'http':
  245. pass
  246. elif insecure:
  247. args.append('--insecure')
  248. elif options and "--cacert" in options:
  249. pass
  250. elif u.hostname:
  251. args.extend(["--cacert", self.env.ca.cert_file])
  252. if force_resolve and u.hostname and u.hostname != 'localhost' \
  253. and not re.match(r'^(\d+|\[|:).*', u.hostname):
  254. port = u.port if u.port else 443
  255. args.extend(["--resolve", f"{u.hostname}:{port}:127.0.0.1"])
  256. if timeout is not None and int(timeout) > 0:
  257. args.extend(["--connect-timeout", str(int(timeout))])
  258. if options:
  259. args.extend(options)
  260. args.append(url)
  261. return args
  262. def _parse_headerfile(self, headerfile: str, r: ExecResult = None) -> ExecResult:
  263. lines = open(headerfile).readlines()
  264. if r is None:
  265. r = ExecResult(args=[], exit_code=0, stdout=[], stderr=[])
  266. response = None
  267. def fin_response(resp):
  268. if resp:
  269. r.add_response(resp)
  270. expected = ['status']
  271. for line in lines:
  272. line = line.strip()
  273. if re.match(r'^$', line):
  274. if 'trailer' in expected:
  275. # end of trailers
  276. fin_response(response)
  277. response = None
  278. expected = ['status']
  279. elif 'header' in expected:
  280. # end of header, another status or trailers might follow
  281. expected = ['status', 'trailer']
  282. else:
  283. assert False, f"unexpected line: '{line}'"
  284. continue
  285. if 'status' in expected:
  286. # log.debug("reading 1st response line: %s", line)
  287. m = re.match(r'^(\S+) (\d+)( .*)?$', line)
  288. if m:
  289. fin_response(response)
  290. response = {
  291. "protocol": m.group(1),
  292. "status": int(m.group(2)),
  293. "description": m.group(3),
  294. "header": {},
  295. "trailer": {},
  296. "body": r.outraw
  297. }
  298. expected = ['header']
  299. continue
  300. if 'trailer' in expected:
  301. m = re.match(r'^([^:]+):\s*(.*)$', line)
  302. if m:
  303. response['trailer'][m.group(1).lower()] = m.group(2)
  304. continue
  305. if 'header' in expected:
  306. m = re.match(r'^([^:]+):\s*(.*)$', line)
  307. if m:
  308. response['header'][m.group(1).lower()] = m.group(2)
  309. continue
  310. assert False, f"unexpected line: '{line}, expected: {expected}'"
  311. fin_response(response)
  312. return r