2
0

http1.c 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349
  1. /***************************************************************************
  2. * _ _ ____ _
  3. * Project ___| | | | _ \| |
  4. * / __| | | | |_) | |
  5. * | (__| |_| | _ <| |___
  6. * \___|\___/|_| \_\_____|
  7. *
  8. * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
  9. *
  10. * This software is licensed as described in the file COPYING, which
  11. * you should have received as part of this distribution. The terms
  12. * are also available at https://curl.se/docs/copyright.html.
  13. *
  14. * You may opt to use, copy, modify, merge, publish, distribute and/or sell
  15. * copies of the Software, and permit persons to whom the Software is
  16. * furnished to do so, under the terms of the COPYING file.
  17. *
  18. * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
  19. * KIND, either express or implied.
  20. *
  21. * SPDX-License-Identifier: curl
  22. *
  23. ***************************************************************************/
  24. #include "curl_setup.h"
  25. #ifndef CURL_DISABLE_HTTP
  26. #include "urldata.h"
  27. #include <curl/curl.h>
  28. #include "http.h"
  29. #include "http1.h"
  30. #include "urlapi-int.h"
  31. /* The last 3 #include files should be in this order */
  32. #include "curl_printf.h"
  33. #include "curl_memory.h"
  34. #include "memdebug.h"
  35. #define H1_MAX_URL_LEN (4*1024)
  36. void Curl_h1_req_parse_init(struct h1_req_parser *parser, size_t max_line_len)
  37. {
  38. memset(parser, 0, sizeof(*parser));
  39. parser->max_line_len = max_line_len;
  40. Curl_bufq_init(&parser->scratch, max_line_len, 1);
  41. }
  42. void Curl_h1_req_parse_free(struct h1_req_parser *parser)
  43. {
  44. if(parser) {
  45. Curl_http_req_free(parser->req);
  46. Curl_bufq_free(&parser->scratch);
  47. parser->req = NULL;
  48. parser->done = FALSE;
  49. }
  50. }
  51. static ssize_t detect_line(struct h1_req_parser *parser,
  52. const char *buf, const size_t buflen, int options,
  53. CURLcode *err)
  54. {
  55. const char *line_end;
  56. size_t len;
  57. DEBUGASSERT(!parser->line);
  58. line_end = memchr(buf, '\n', buflen);
  59. if(!line_end) {
  60. *err = (buflen > parser->max_line_len)? CURLE_URL_MALFORMAT : CURLE_AGAIN;
  61. return -1;
  62. }
  63. len = line_end - buf + 1;
  64. if(len > parser->max_line_len) {
  65. *err = CURLE_URL_MALFORMAT;
  66. return -1;
  67. }
  68. if(options & H1_PARSE_OPT_STRICT) {
  69. if((len == 1) || (buf[len - 2] != '\r')) {
  70. *err = CURLE_URL_MALFORMAT;
  71. return -1;
  72. }
  73. parser->line = buf;
  74. parser->line_len = len - 2;
  75. }
  76. else {
  77. parser->line = buf;
  78. parser->line_len = len - (((len == 1) || (buf[len - 2] != '\r'))? 1 : 2);
  79. }
  80. *err = CURLE_OK;
  81. return (ssize_t)len;
  82. }
  83. static ssize_t next_line(struct h1_req_parser *parser,
  84. const char *buf, const size_t buflen, int options,
  85. CURLcode *err)
  86. {
  87. ssize_t nread = 0, n;
  88. if(parser->line) {
  89. if(parser->scratch_skip) {
  90. /* last line was from scratch. Remove it now, since we are done
  91. * with it and look for the next one. */
  92. Curl_bufq_skip_and_shift(&parser->scratch, parser->scratch_skip);
  93. parser->scratch_skip = 0;
  94. }
  95. parser->line = NULL;
  96. parser->line_len = 0;
  97. }
  98. if(Curl_bufq_is_empty(&parser->scratch)) {
  99. nread = detect_line(parser, buf, buflen, options, err);
  100. if(nread < 0) {
  101. if(*err != CURLE_AGAIN)
  102. return -1;
  103. /* not a complete line, add to scratch for later revisit */
  104. nread = Curl_bufq_write(&parser->scratch,
  105. (const unsigned char *)buf, buflen, err);
  106. return nread;
  107. }
  108. /* found one */
  109. }
  110. else {
  111. const char *sbuf;
  112. size_t sbuflen;
  113. /* scratch contains bytes from last attempt, add more to it */
  114. if(buflen) {
  115. const char *line_end;
  116. size_t add_len;
  117. ssize_t pos;
  118. line_end = memchr(buf, '\n', buflen);
  119. pos = line_end? (line_end - buf + 1) : -1;
  120. add_len = (pos >= 0)? (size_t)pos : buflen;
  121. nread = Curl_bufq_write(&parser->scratch,
  122. (const unsigned char *)buf, add_len, err);
  123. if(nread < 0) {
  124. /* Unable to add anything to scratch is an error, since we should
  125. * have seen a line there then before. */
  126. if(*err == CURLE_AGAIN)
  127. *err = CURLE_URL_MALFORMAT;
  128. return -1;
  129. }
  130. }
  131. if(Curl_bufq_peek(&parser->scratch,
  132. (const unsigned char **)&sbuf, &sbuflen)) {
  133. n = detect_line(parser, sbuf, sbuflen, options, err);
  134. if(n < 0 && *err != CURLE_AGAIN)
  135. return -1; /* real error */
  136. parser->scratch_skip = (size_t)n;
  137. }
  138. else {
  139. /* we SHOULD be able to peek at scratch data */
  140. DEBUGASSERT(0);
  141. }
  142. }
  143. return nread;
  144. }
  145. static CURLcode start_req(struct h1_req_parser *parser,
  146. const char *scheme_default, int options)
  147. {
  148. const char *p, *m, *target, *hv, *scheme, *authority, *path;
  149. size_t m_len, target_len, hv_len, scheme_len, authority_len, path_len;
  150. size_t i;
  151. CURLU *url = NULL;
  152. CURLcode result = CURLE_URL_MALFORMAT; /* Use this as default fail */
  153. DEBUGASSERT(!parser->req);
  154. /* line must match: "METHOD TARGET HTTP_VERSION" */
  155. p = memchr(parser->line, ' ', parser->line_len);
  156. if(!p || p == parser->line)
  157. goto out;
  158. m = parser->line;
  159. m_len = p - parser->line;
  160. target = p + 1;
  161. target_len = hv_len = 0;
  162. hv = NULL;
  163. /* URL may contain spaces so scan backwards */
  164. for(i = parser->line_len; i > m_len; --i) {
  165. if(parser->line[i] == ' ') {
  166. hv = &parser->line[i + 1];
  167. hv_len = parser->line_len - i;
  168. target_len = (hv - target) - 1;
  169. break;
  170. }
  171. }
  172. /* no SPACE found or empty TARGET or empy HTTP_VERSION */
  173. if(!target_len || !hv_len)
  174. goto out;
  175. /* TODO: we do not check HTTP_VERSION for conformity, should
  176. + do that when STRICT option is supplied. */
  177. (void)hv;
  178. /* The TARGET can be (rfc 9112, ch. 3.2):
  179. * origin-form: path + optional query
  180. * absolute-form: absolute URI
  181. * authority-form: host+port for CONNECT
  182. * asterisk-form: '*' for OPTIONS
  183. *
  184. * from TARGET, we derive `scheme` `authority` `path`
  185. * origin-form -- -- TARGET
  186. * absolute-form URL* URL* URL*
  187. * authority-form -- TARGET --
  188. * asterisk-form -- -- TARGET
  189. */
  190. scheme = authority = path = NULL;
  191. scheme_len = authority_len = path_len = 0;
  192. if(target_len == 1 && target[0] == '*') {
  193. /* asterisk-form */
  194. path = target;
  195. path_len = target_len;
  196. }
  197. else if(!strncmp("CONNECT", m, m_len)) {
  198. /* authority-form */
  199. authority = target;
  200. authority_len = target_len;
  201. }
  202. else if(target[0] == '/') {
  203. /* origin-form */
  204. path = target;
  205. path_len = target_len;
  206. }
  207. else {
  208. /* origin-form OR absolute-form */
  209. CURLUcode uc;
  210. char tmp[H1_MAX_URL_LEN];
  211. /* default, unless we see an absolute URL */
  212. path = target;
  213. path_len = target_len;
  214. /* URL parser wants 0-termination */
  215. if(target_len >= sizeof(tmp))
  216. goto out;
  217. memcpy(tmp, target, target_len);
  218. tmp[target_len] = '\0';
  219. /* See if treating TARGET as an absolute URL makes sense */
  220. if(Curl_is_absolute_url(tmp, NULL, 0, FALSE)) {
  221. int url_options;
  222. url = curl_url();
  223. if(!url) {
  224. result = CURLE_OUT_OF_MEMORY;
  225. goto out;
  226. }
  227. url_options = (CURLU_NON_SUPPORT_SCHEME|
  228. CURLU_PATH_AS_IS|
  229. CURLU_NO_DEFAULT_PORT);
  230. if(!(options & H1_PARSE_OPT_STRICT))
  231. url_options |= CURLU_ALLOW_SPACE;
  232. uc = curl_url_set(url, CURLUPART_URL, tmp, url_options);
  233. if(uc) {
  234. goto out;
  235. }
  236. }
  237. if(!url && (options & H1_PARSE_OPT_STRICT)) {
  238. /* we should have an absolute URL or have seen `/` earlier */
  239. goto out;
  240. }
  241. }
  242. if(url) {
  243. result = Curl_http_req_make2(&parser->req, m, m_len, url, scheme_default);
  244. }
  245. else {
  246. if(!scheme && scheme_default) {
  247. scheme = scheme_default;
  248. scheme_len = strlen(scheme_default);
  249. }
  250. result = Curl_http_req_make(&parser->req, m, m_len, scheme, scheme_len,
  251. authority, authority_len, path, path_len);
  252. }
  253. out:
  254. curl_url_cleanup(url);
  255. return result;
  256. }
  257. ssize_t Curl_h1_req_parse_read(struct h1_req_parser *parser,
  258. const char *buf, size_t buflen,
  259. const char *scheme_default, int options,
  260. CURLcode *err)
  261. {
  262. ssize_t nread = 0, n;
  263. *err = CURLE_OK;
  264. while(!parser->done) {
  265. n = next_line(parser, buf, buflen, options, err);
  266. if(n < 0) {
  267. if(*err != CURLE_AGAIN) {
  268. nread = -1;
  269. }
  270. *err = CURLE_OK;
  271. goto out;
  272. }
  273. /* Consume this line */
  274. nread += (size_t)n;
  275. buf += (size_t)n;
  276. buflen -= (size_t)n;
  277. if(!parser->line) {
  278. /* consumed bytes, but line not complete */
  279. if(!buflen)
  280. goto out;
  281. }
  282. else if(!parser->req) {
  283. *err = start_req(parser, scheme_default, options);
  284. if(*err) {
  285. nread = -1;
  286. goto out;
  287. }
  288. }
  289. else if(parser->line_len == 0) {
  290. /* last, empty line, we are finished */
  291. if(!parser->req) {
  292. *err = CURLE_URL_MALFORMAT;
  293. nread = -1;
  294. goto out;
  295. }
  296. parser->done = TRUE;
  297. Curl_bufq_free(&parser->scratch);
  298. /* last chance adjustments */
  299. }
  300. else {
  301. *err = Curl_dynhds_h1_add_line(&parser->req->headers,
  302. parser->line, parser->line_len);
  303. if(*err) {
  304. nread = -1;
  305. goto out;
  306. }
  307. }
  308. }
  309. out:
  310. return nread;
  311. }
  312. #endif /* !CURL_DISABLE_HTTP */