wget.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895
  1. /* vi: set sw=4 ts=4: */
  2. /*
  3. * wget - retrieve a file using HTTP or FTP
  4. *
  5. * Chip Rosenthal Covad Communications <chip@laserlink.net>
  6. *
  7. * Licensed under GPLv2, see file LICENSE in this tarball for details.
  8. */
  9. #include "libbb.h"
  10. struct host_info {
  11. // May be used if we ever will want to free() all xstrdup()s...
  12. /* char *allocated; */
  13. const char *path;
  14. const char *user;
  15. char *host;
  16. int port;
  17. smallint is_ftp;
  18. };
  19. /* Globals (can be accessed from signal handlers) */
  20. struct globals {
  21. off_t content_len; /* Content-length of the file */
  22. off_t beg_range; /* Range at which continue begins */
  23. #if ENABLE_FEATURE_WGET_STATUSBAR
  24. off_t lastsize;
  25. off_t totalsize;
  26. off_t transferred; /* Number of bytes transferred so far */
  27. const char *curfile; /* Name of current file being transferred */
  28. unsigned lastupdate_sec;
  29. unsigned start_sec;
  30. #endif
  31. smallint chunked; /* chunked transfer encoding */
  32. };
  33. #define G (*(struct globals*)&bb_common_bufsiz1)
  34. struct BUG_G_too_big {
  35. char BUG_G_too_big[sizeof(G) <= COMMON_BUFSIZE ? 1 : -1];
  36. };
  37. #define content_len (G.content_len )
  38. #define beg_range (G.beg_range )
  39. #define lastsize (G.lastsize )
  40. #define totalsize (G.totalsize )
  41. #define transferred (G.transferred )
  42. #define curfile (G.curfile )
  43. #define lastupdate_sec (G.lastupdate_sec )
  44. #define start_sec (G.start_sec )
  45. #define chunked (G.chunked )
  46. #define INIT_G() do { } while (0)
  47. #if ENABLE_FEATURE_WGET_STATUSBAR
  48. enum {
  49. STALLTIME = 5 /* Seconds when xfer considered "stalled" */
  50. };
  51. static unsigned int get_tty2_width(void)
  52. {
  53. unsigned width;
  54. get_terminal_width_height(2, &width, NULL);
  55. return width;
  56. }
  57. static void progress_meter(int flag)
  58. {
  59. /* We can be called from signal handler */
  60. int save_errno = errno;
  61. off_t abbrevsize;
  62. unsigned since_last_update, elapsed;
  63. unsigned ratio;
  64. int barlength, i;
  65. if (flag == -1) { /* first call to progress_meter */
  66. start_sec = monotonic_sec();
  67. lastupdate_sec = start_sec;
  68. lastsize = 0;
  69. totalsize = content_len + beg_range; /* as content_len changes.. */
  70. }
  71. ratio = 100;
  72. if (totalsize != 0 && !chunked) {
  73. /* long long helps to have it working even if !LFS */
  74. ratio = (unsigned) (100ULL * (transferred+beg_range) / totalsize);
  75. if (ratio > 100) ratio = 100;
  76. }
  77. fprintf(stderr, "\r%-20.20s%4d%% ", curfile, ratio);
  78. barlength = get_tty2_width() - 49;
  79. if (barlength > 0) {
  80. /* god bless gcc for variable arrays :) */
  81. i = barlength * ratio / 100;
  82. {
  83. char buf[i+1];
  84. memset(buf, '*', i);
  85. buf[i] = '\0';
  86. fprintf(stderr, "|%s%*s|", buf, barlength - i, "");
  87. }
  88. }
  89. i = 0;
  90. abbrevsize = transferred + beg_range;
  91. while (abbrevsize >= 100000) {
  92. i++;
  93. abbrevsize >>= 10;
  94. }
  95. /* see http://en.wikipedia.org/wiki/Tera */
  96. fprintf(stderr, "%6d%c ", (int)abbrevsize, " kMGTPEZY"[i]);
  97. // Nuts! Ain't it easier to update progress meter ONLY when we transferred++?
  98. elapsed = monotonic_sec();
  99. since_last_update = elapsed - lastupdate_sec;
  100. if (transferred > lastsize) {
  101. lastupdate_sec = elapsed;
  102. lastsize = transferred;
  103. if (since_last_update >= STALLTIME) {
  104. /* We "cut off" these seconds from elapsed time
  105. * by adjusting start time */
  106. start_sec += since_last_update;
  107. }
  108. since_last_update = 0; /* we are un-stalled now */
  109. }
  110. elapsed -= start_sec; /* now it's "elapsed since start" */
  111. if (since_last_update >= STALLTIME) {
  112. fprintf(stderr, " - stalled -");
  113. } else {
  114. off_t to_download = totalsize - beg_range;
  115. if (transferred <= 0 || (int)elapsed <= 0 || transferred > to_download || chunked) {
  116. fprintf(stderr, "--:--:-- ETA");
  117. } else {
  118. /* to_download / (transferred/elapsed) - elapsed: */
  119. int eta = (int) ((unsigned long long)to_download*elapsed/transferred - elapsed);
  120. /* (long long helps to have working ETA even if !LFS) */
  121. i = eta % 3600;
  122. fprintf(stderr, "%02d:%02d:%02d ETA", eta / 3600, i / 60, i % 60);
  123. }
  124. }
  125. if (flag == 0) {
  126. /* last call to progress_meter */
  127. alarm(0);
  128. transferred = 0;
  129. fputc('\n', stderr);
  130. } else {
  131. if (flag == -1) { /* first call to progress_meter */
  132. signal_SA_RESTART_empty_mask(SIGALRM, progress_meter);
  133. }
  134. alarm(1);
  135. }
  136. errno = save_errno;
  137. }
  138. /* Original copyright notice which applies to the CONFIG_FEATURE_WGET_STATUSBAR stuff,
  139. * much of which was blatantly stolen from openssh. */
  140. /*-
  141. * Copyright (c) 1992, 1993
  142. * The Regents of the University of California. All rights reserved.
  143. *
  144. * Redistribution and use in source and binary forms, with or without
  145. * modification, are permitted provided that the following conditions
  146. * are met:
  147. * 1. Redistributions of source code must retain the above copyright
  148. * notice, this list of conditions and the following disclaimer.
  149. * 2. Redistributions in binary form must reproduce the above copyright
  150. * notice, this list of conditions and the following disclaimer in the
  151. * documentation and/or other materials provided with the distribution.
  152. *
  153. * 3. <BSD Advertising Clause omitted per the July 22, 1999 licensing change
  154. * ftp://ftp.cs.berkeley.edu/pub/4bsd/README.Impt.License.Change>
  155. *
  156. * 4. Neither the name of the University nor the names of its contributors
  157. * may be used to endorse or promote products derived from this software
  158. * without specific prior written permission.
  159. *
  160. * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  161. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  162. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  163. * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  164. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  165. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  166. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  167. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  168. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  169. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  170. * SUCH DAMAGE.
  171. *
  172. */
  173. #else /* FEATURE_WGET_STATUSBAR */
  174. static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
  175. #endif
  176. /* Read NMEMB bytes into PTR from STREAM. Returns the number of bytes read,
  177. * and a short count if an eof or non-interrupt error is encountered. */
  178. static size_t safe_fread(void *ptr, size_t nmemb, FILE *stream)
  179. {
  180. size_t ret;
  181. char *p = (char*)ptr;
  182. do {
  183. clearerr(stream);
  184. errno = 0;
  185. ret = fread(p, 1, nmemb, stream);
  186. p += ret;
  187. nmemb -= ret;
  188. } while (nmemb && ferror(stream) && errno == EINTR);
  189. return p - (char*)ptr;
  190. }
  191. /* Read a line or SIZE-1 bytes into S, whichever is less, from STREAM.
  192. * Returns S, or NULL if an eof or non-interrupt error is encountered. */
  193. static char *safe_fgets(char *s, int size, FILE *stream)
  194. {
  195. char *ret;
  196. do {
  197. clearerr(stream);
  198. errno = 0;
  199. ret = fgets(s, size, stream);
  200. } while (ret == NULL && ferror(stream) && errno == EINTR);
  201. return ret;
  202. }
  203. #if ENABLE_FEATURE_WGET_AUTHENTICATION
  204. /* Base64-encode character string. buf is assumed to be char buf[512]. */
  205. static char *base64enc_512(char buf[512], const char *str)
  206. {
  207. unsigned len = strlen(str);
  208. if (len > 512/4*3 - 10) /* paranoia */
  209. len = 512/4*3 - 10;
  210. bb_uuencode(buf, str, len, bb_uuenc_tbl_base64);
  211. return buf;
  212. }
  213. #endif
  214. static FILE *open_socket(len_and_sockaddr *lsa)
  215. {
  216. FILE *fp;
  217. /* glibc 2.4 seems to try seeking on it - ??! */
  218. /* hopefully it understands what ESPIPE means... */
  219. fp = fdopen(xconnect_stream(lsa), "r+");
  220. if (fp == NULL)
  221. bb_perror_msg_and_die("fdopen");
  222. return fp;
  223. }
  224. static int ftpcmd(const char *s1, const char *s2, FILE *fp, char *buf)
  225. {
  226. int result;
  227. if (s1) {
  228. if (!s2) s2 = "";
  229. fprintf(fp, "%s%s\r\n", s1, s2);
  230. fflush(fp);
  231. }
  232. do {
  233. char *buf_ptr;
  234. if (fgets(buf, 510, fp) == NULL) {
  235. bb_perror_msg_and_die("error getting response");
  236. }
  237. buf_ptr = strstr(buf, "\r\n");
  238. if (buf_ptr) {
  239. *buf_ptr = '\0';
  240. }
  241. } while (!isdigit(buf[0]) || buf[3] != ' ');
  242. buf[3] = '\0';
  243. result = xatoi_u(buf);
  244. buf[3] = ' ';
  245. return result;
  246. }
  247. static void parse_url(char *src_url, struct host_info *h)
  248. {
  249. char *url, *p, *sp;
  250. /* h->allocated = */ url = xstrdup(src_url);
  251. if (strncmp(url, "http://", 7) == 0) {
  252. h->port = bb_lookup_port("http", "tcp", 80);
  253. h->host = url + 7;
  254. h->is_ftp = 0;
  255. } else if (strncmp(url, "ftp://", 6) == 0) {
  256. h->port = bb_lookup_port("ftp", "tcp", 21);
  257. h->host = url + 6;
  258. h->is_ftp = 1;
  259. } else
  260. bb_error_msg_and_die("not an http or ftp url: %s", url);
  261. // FYI:
  262. // "Real" wget 'http://busybox.net?var=a/b' sends this request:
  263. // 'GET /?var=a/b HTTP 1.0'
  264. // and saves 'index.html?var=a%2Fb' (we save 'b')
  265. // wget 'http://busybox.net?login=john@doe':
  266. // request: 'GET /?login=john@doe HTTP/1.0'
  267. // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
  268. // wget 'http://busybox.net#test/test':
  269. // request: 'GET / HTTP/1.0'
  270. // saves: 'index.html' (we save 'test')
  271. //
  272. // We also don't add unique .N suffix if file exists...
  273. sp = strchr(h->host, '/');
  274. p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
  275. p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
  276. if (!sp) {
  277. h->path = "";
  278. } else if (*sp == '/') {
  279. *sp = '\0';
  280. h->path = sp + 1;
  281. } else { // '#' or '?'
  282. // http://busybox.net?login=john@doe is a valid URL
  283. // memmove converts to:
  284. // http:/busybox.nett?login=john@doe...
  285. memmove(h->host - 1, h->host, sp - h->host);
  286. h->host--;
  287. sp[-1] = '\0';
  288. h->path = sp;
  289. }
  290. sp = strrchr(h->host, '@');
  291. h->user = NULL;
  292. if (sp != NULL) {
  293. h->user = h->host;
  294. *sp = '\0';
  295. h->host = sp + 1;
  296. }
  297. sp = h->host;
  298. }
  299. static char *gethdr(char *buf, size_t bufsiz, FILE *fp /*, int *istrunc*/)
  300. {
  301. char *s, *hdrval;
  302. int c;
  303. /* *istrunc = 0; */
  304. /* retrieve header line */
  305. if (fgets(buf, bufsiz, fp) == NULL)
  306. return NULL;
  307. /* see if we are at the end of the headers */
  308. for (s = buf; *s == '\r'; ++s)
  309. continue;
  310. if (*s == '\n')
  311. return NULL;
  312. /* convert the header name to lower case */
  313. for (s = buf; isalnum(*s) || *s == '-' || *s == '.'; ++s)
  314. *s = tolower(*s);
  315. /* verify we are at the end of the header name */
  316. if (*s != ':')
  317. bb_error_msg_and_die("bad header line: %s", buf);
  318. /* locate the start of the header value */
  319. *s++ = '\0';
  320. hdrval = skip_whitespace(s);
  321. /* locate the end of header */
  322. while (*s && *s != '\r' && *s != '\n')
  323. ++s;
  324. /* end of header found */
  325. if (*s) {
  326. *s = '\0';
  327. return hdrval;
  328. }
  329. /* Rats! The buffer isn't big enough to hold the entire header value. */
  330. while (c = getc(fp), c != EOF && c != '\n')
  331. continue;
  332. /* *istrunc = 1; */
  333. return hdrval;
  334. }
  335. #if ENABLE_FEATURE_WGET_LONG_OPTIONS
  336. static char *URL_escape(const char *str)
  337. {
  338. /* URL encode, see RFC 2396 */
  339. char *dst;
  340. char *res = dst = xmalloc(strlen(str) * 3 + 1);
  341. unsigned char c;
  342. while (1) {
  343. c = *str++;
  344. if (c == '\0'
  345. /* || strchr("!&'()*-.=_~", c) - more code */
  346. || c == '!'
  347. || c == '&'
  348. || c == '\''
  349. || c == '('
  350. || c == ')'
  351. || c == '*'
  352. || c == '-'
  353. || c == '.'
  354. || c == '='
  355. || c == '_'
  356. || c == '~'
  357. || (c >= '0' && c <= '9')
  358. || ((c|0x20) >= 'a' && (c|0x20) <= 'z')
  359. ) {
  360. *dst++ = c;
  361. if (c == '\0')
  362. return res;
  363. } else {
  364. *dst++ = '%';
  365. *dst++ = bb_hexdigits_upcase[c >> 4];
  366. *dst++ = bb_hexdigits_upcase[c & 0xf];
  367. }
  368. }
  369. }
  370. #endif
  371. int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
  372. int wget_main(int argc UNUSED_PARAM, char **argv)
  373. {
  374. char buf[512];
  375. struct host_info server, target;
  376. len_and_sockaddr *lsa;
  377. int status;
  378. int port;
  379. int try = 5;
  380. unsigned opt;
  381. char *str;
  382. char *proxy = 0;
  383. char *dir_prefix = NULL;
  384. #if ENABLE_FEATURE_WGET_LONG_OPTIONS
  385. char *post_data;
  386. char *extra_headers = NULL;
  387. llist_t *headers_llist = NULL;
  388. #endif
  389. FILE *sfp = NULL; /* socket to web/ftp server */
  390. FILE *dfp; /* socket to ftp server (data) */
  391. char *fname_out; /* where to direct output (-O) */
  392. bool got_clen = 0; /* got content-length: from server */
  393. int output_fd = -1;
  394. bool use_proxy = 1; /* Use proxies if env vars are set */
  395. const char *proxy_flag = "on"; /* Use proxies if env vars are set */
  396. const char *user_agent = "Wget";/* "User-Agent" header field */
  397. static const char keywords[] ALIGN1 =
  398. "content-length\0""transfer-encoding\0""chunked\0""location\0";
  399. enum {
  400. KEY_content_length = 1, KEY_transfer_encoding, KEY_chunked, KEY_location
  401. };
  402. enum {
  403. WGET_OPT_CONTINUE = (1 << 0),
  404. WGET_OPT_SPIDER = (1 << 1),
  405. WGET_OPT_QUIET = (1 << 2),
  406. WGET_OPT_OUTNAME = (1 << 3),
  407. WGET_OPT_PREFIX = (1 << 4),
  408. WGET_OPT_PROXY = (1 << 5),
  409. WGET_OPT_USER_AGENT = (1 << 6),
  410. WGET_OPT_RETRIES = (1 << 7),
  411. WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 8),
  412. WGET_OPT_PASSIVE = (1 << 9),
  413. WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
  414. WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
  415. };
  416. #if ENABLE_FEATURE_WGET_LONG_OPTIONS
  417. static const char wget_longopts[] ALIGN1 =
  418. /* name, has_arg, val */
  419. "continue\0" No_argument "c"
  420. "spider\0" No_argument "s"
  421. "quiet\0" No_argument "q"
  422. "output-document\0" Required_argument "O"
  423. "directory-prefix\0" Required_argument "P"
  424. "proxy\0" Required_argument "Y"
  425. "user-agent\0" Required_argument "U"
  426. /* Ignored: */
  427. // "tries\0" Required_argument "t"
  428. // "timeout\0" Required_argument "T"
  429. /* Ignored (we always use PASV): */
  430. "passive-ftp\0" No_argument "\xff"
  431. "header\0" Required_argument "\xfe"
  432. "post-data\0" Required_argument "\xfd"
  433. ;
  434. #endif
  435. INIT_G();
  436. #if ENABLE_FEATURE_WGET_LONG_OPTIONS
  437. applet_long_options = wget_longopts;
  438. #endif
  439. /* server.allocated = target.allocated = NULL; */
  440. opt_complementary = "-1" USE_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
  441. opt = getopt32(argv, "csqO:P:Y:U:" /*ignored:*/ "t:T:",
  442. &fname_out, &dir_prefix,
  443. &proxy_flag, &user_agent,
  444. NULL, /* -t RETRIES */
  445. NULL /* -T NETWORK_READ_TIMEOUT */
  446. USE_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
  447. USE_FEATURE_WGET_LONG_OPTIONS(, &post_data)
  448. );
  449. if (strcmp(proxy_flag, "off") == 0) {
  450. /* Use the proxy if necessary */
  451. use_proxy = 0;
  452. }
  453. #if ENABLE_FEATURE_WGET_LONG_OPTIONS
  454. if (headers_llist) {
  455. int size = 1;
  456. char *cp;
  457. llist_t *ll = headers_llist;
  458. while (ll) {
  459. size += strlen(ll->data) + 2;
  460. ll = ll->link;
  461. }
  462. extra_headers = cp = xmalloc(size);
  463. while (headers_llist) {
  464. cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
  465. }
  466. }
  467. #endif
  468. parse_url(argv[optind], &target);
  469. server.host = target.host;
  470. server.port = target.port;
  471. /* Use the proxy if necessary */
  472. if (use_proxy) {
  473. proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
  474. if (proxy && *proxy) {
  475. parse_url(proxy, &server);
  476. } else {
  477. use_proxy = 0;
  478. }
  479. }
  480. /* Guess an output filename, if there was no -O FILE */
  481. if (!(opt & WGET_OPT_OUTNAME)) {
  482. fname_out = bb_get_last_path_component_nostrip(target.path);
  483. /* handle "wget http://kernel.org//" */
  484. if (fname_out[0] == '/' || !fname_out[0])
  485. fname_out = (char*)"index.html";
  486. /* -P DIR is considered only if there was no -O FILE */
  487. if (dir_prefix)
  488. fname_out = concat_path_file(dir_prefix, fname_out);
  489. } else {
  490. if (LONE_DASH(fname_out)) {
  491. /* -O - */
  492. output_fd = 1;
  493. opt &= ~WGET_OPT_CONTINUE;
  494. }
  495. }
  496. #if ENABLE_FEATURE_WGET_STATUSBAR
  497. curfile = bb_get_last_path_component_nostrip(fname_out);
  498. #endif
  499. /* Impossible?
  500. if ((opt & WGET_OPT_CONTINUE) && !fname_out)
  501. bb_error_msg_and_die("cannot specify continue (-c) without a filename (-O)"); */
  502. /* Determine where to start transfer */
  503. if (opt & WGET_OPT_CONTINUE) {
  504. output_fd = open(fname_out, O_WRONLY);
  505. if (output_fd >= 0) {
  506. beg_range = xlseek(output_fd, 0, SEEK_END);
  507. }
  508. /* File doesn't exist. We do not create file here yet.
  509. We are not sure it exists on remove side */
  510. }
  511. /* We want to do exactly _one_ DNS lookup, since some
  512. * sites (i.e. ftp.us.debian.org) use round-robin DNS
  513. * and we want to connect to only one IP... */
  514. lsa = xhost2sockaddr(server.host, server.port);
  515. if (!(opt & WGET_OPT_QUIET)) {
  516. fprintf(stderr, "Connecting to %s (%s)\n", server.host,
  517. xmalloc_sockaddr2dotted(&lsa->u.sa));
  518. /* We leak result of xmalloc_sockaddr2dotted */
  519. }
  520. if (use_proxy || !target.is_ftp) {
  521. /*
  522. * HTTP session
  523. */
  524. do {
  525. got_clen = 0;
  526. chunked = 0;
  527. if (!--try)
  528. bb_error_msg_and_die("too many redirections");
  529. /* Open socket to http server */
  530. if (sfp) fclose(sfp);
  531. sfp = open_socket(lsa);
  532. /* Send HTTP request. */
  533. if (use_proxy) {
  534. fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
  535. target.is_ftp ? "f" : "ht", target.host,
  536. target.path);
  537. } else {
  538. if (opt & WGET_OPT_POST_DATA)
  539. fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
  540. else
  541. fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
  542. }
  543. fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
  544. target.host, user_agent);
  545. #if ENABLE_FEATURE_WGET_AUTHENTICATION
  546. if (target.user) {
  547. fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
  548. base64enc_512(buf, target.user));
  549. }
  550. if (use_proxy && server.user) {
  551. fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
  552. base64enc_512(buf, server.user));
  553. }
  554. #endif
  555. if (beg_range)
  556. fprintf(sfp, "Range: bytes=%"OFF_FMT"d-\r\n", beg_range);
  557. #if ENABLE_FEATURE_WGET_LONG_OPTIONS
  558. if (extra_headers)
  559. fputs(extra_headers, sfp);
  560. if (opt & WGET_OPT_POST_DATA) {
  561. char *estr = URL_escape(post_data);
  562. fprintf(sfp, "Content-Type: application/x-www-form-urlencoded\r\n");
  563. fprintf(sfp, "Content-Length: %u\r\n" "\r\n" "%s",
  564. (int) strlen(estr), estr);
  565. /*fprintf(sfp, "Connection: Keep-Alive\r\n\r\n");*/
  566. /*fprintf(sfp, "%s\r\n", estr);*/
  567. free(estr);
  568. } else
  569. #endif
  570. { /* If "Connection:" is needed, document why */
  571. fprintf(sfp, /* "Connection: close\r\n" */ "\r\n");
  572. }
  573. /*
  574. * Retrieve HTTP response line and check for "200" status code.
  575. */
  576. read_response:
  577. if (fgets(buf, sizeof(buf), sfp) == NULL)
  578. bb_error_msg_and_die("no response from server");
  579. str = buf;
  580. str = skip_non_whitespace(str);
  581. str = skip_whitespace(str);
  582. // FIXME: no error check
  583. // xatou wouldn't work: "200 OK"
  584. status = atoi(str);
  585. switch (status) {
  586. case 0:
  587. case 100:
  588. while (gethdr(buf, sizeof(buf), sfp /*, &n*/) != NULL)
  589. /* eat all remaining headers */;
  590. goto read_response;
  591. case 200:
  592. /*
  593. Response 204 doesn't say "null file", it says "metadata
  594. has changed but data didn't":
  595. "10.2.5 204 No Content
  596. The server has fulfilled the request but does not need to return
  597. an entity-body, and might want to return updated metainformation.
  598. The response MAY include new or updated metainformation in the form
  599. of entity-headers, which if present SHOULD be associated with
  600. the requested variant.
  601. If the client is a user agent, it SHOULD NOT change its document
  602. view from that which caused the request to be sent. This response
  603. is primarily intended to allow input for actions to take place
  604. without causing a change to the user agent's active document view,
  605. although any new or updated metainformation SHOULD be applied
  606. to the document currently in the user agent's active view.
  607. The 204 response MUST NOT include a message-body, and thus
  608. is always terminated by the first empty line after the header fields."
  609. However, in real world it was observed that some web servers
  610. (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
  611. */
  612. case 204:
  613. break;
  614. case 300: /* redirection */
  615. case 301:
  616. case 302:
  617. case 303:
  618. break;
  619. case 206:
  620. if (beg_range)
  621. break;
  622. /* fall through */
  623. default:
  624. /* Show first line only and kill any ESC tricks */
  625. buf[strcspn(buf, "\n\r\x1b")] = '\0';
  626. bb_error_msg_and_die("server returned error: %s", buf);
  627. }
  628. /*
  629. * Retrieve HTTP headers.
  630. */
  631. while ((str = gethdr(buf, sizeof(buf), sfp /*, &n*/)) != NULL) {
  632. /* gethdr did already convert the "FOO:" string to lowercase */
  633. smalluint key = index_in_strings(keywords, *&buf) + 1;
  634. if (key == KEY_content_length) {
  635. content_len = BB_STRTOOFF(str, NULL, 10);
  636. if (errno || content_len < 0) {
  637. bb_error_msg_and_die("content-length %s is garbage", str);
  638. }
  639. got_clen = 1;
  640. continue;
  641. }
  642. if (key == KEY_transfer_encoding) {
  643. if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked)
  644. bb_error_msg_and_die("transfer encoding '%s' is not supported", str);
  645. chunked = got_clen = 1;
  646. }
  647. if (key == KEY_location) {
  648. if (str[0] == '/')
  649. /* free(target.allocated); */
  650. target.path = /* target.allocated = */ xstrdup(str+1);
  651. else {
  652. parse_url(str, &target);
  653. if (use_proxy == 0) {
  654. server.host = target.host;
  655. server.port = target.port;
  656. }
  657. free(lsa);
  658. lsa = xhost2sockaddr(server.host, server.port);
  659. break;
  660. }
  661. }
  662. }
  663. } while (status >= 300);
  664. dfp = sfp;
  665. } else {
  666. /*
  667. * FTP session
  668. */
  669. if (!target.user)
  670. target.user = xstrdup("anonymous:busybox@");
  671. sfp = open_socket(lsa);
  672. if (ftpcmd(NULL, NULL, sfp, buf) != 220)
  673. bb_error_msg_and_die("%s", buf+4);
  674. /*
  675. * Splitting username:password pair,
  676. * trying to log in
  677. */
  678. str = strchr(target.user, ':');
  679. if (str)
  680. *(str++) = '\0';
  681. switch (ftpcmd("USER ", target.user, sfp, buf)) {
  682. case 230:
  683. break;
  684. case 331:
  685. if (ftpcmd("PASS ", str, sfp, buf) == 230)
  686. break;
  687. /* fall through (failed login) */
  688. default:
  689. bb_error_msg_and_die("ftp login: %s", buf+4);
  690. }
  691. ftpcmd("TYPE I", NULL, sfp, buf);
  692. /*
  693. * Querying file size
  694. */
  695. if (ftpcmd("SIZE ", target.path, sfp, buf) == 213) {
  696. content_len = BB_STRTOOFF(buf+4, NULL, 10);
  697. if (errno || content_len < 0) {
  698. bb_error_msg_and_die("SIZE value is garbage");
  699. }
  700. got_clen = 1;
  701. }
  702. /*
  703. * Entering passive mode
  704. */
  705. if (ftpcmd("PASV", NULL, sfp, buf) != 227) {
  706. pasv_error:
  707. bb_error_msg_and_die("bad response to %s: %s", "PASV", buf);
  708. }
  709. // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
  710. // Server's IP is N1.N2.N3.N4 (we ignore it)
  711. // Server's port for data connection is P1*256+P2
  712. str = strrchr(buf, ')');
  713. if (str) str[0] = '\0';
  714. str = strrchr(buf, ',');
  715. if (!str) goto pasv_error;
  716. port = xatou_range(str+1, 0, 255);
  717. *str = '\0';
  718. str = strrchr(buf, ',');
  719. if (!str) goto pasv_error;
  720. port += xatou_range(str+1, 0, 255) * 256;
  721. set_nport(lsa, htons(port));
  722. dfp = open_socket(lsa);
  723. if (beg_range) {
  724. sprintf(buf, "REST %"OFF_FMT"d", beg_range);
  725. if (ftpcmd(buf, NULL, sfp, buf) == 350)
  726. content_len -= beg_range;
  727. }
  728. if (ftpcmd("RETR ", target.path, sfp, buf) > 150)
  729. bb_error_msg_and_die("bad response to %s: %s", "RETR", buf);
  730. }
  731. if (opt & WGET_OPT_SPIDER) {
  732. if (ENABLE_FEATURE_CLEAN_UP)
  733. fclose(sfp);
  734. return EXIT_SUCCESS;
  735. }
  736. /*
  737. * Retrieve file
  738. */
  739. /* Do it before progress_meter (want to have nice error message) */
  740. if (output_fd < 0) {
  741. int o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
  742. /* compat with wget: -O FILE can overwrite */
  743. if (opt & WGET_OPT_OUTNAME)
  744. o_flags = O_WRONLY | O_CREAT | O_TRUNC;
  745. output_fd = xopen(fname_out, o_flags);
  746. }
  747. if (!(opt & WGET_OPT_QUIET))
  748. progress_meter(-1);
  749. if (chunked)
  750. goto get_clen;
  751. /* Loops only if chunked */
  752. while (1) {
  753. while (content_len > 0 || !got_clen) {
  754. int n;
  755. unsigned rdsz = sizeof(buf);
  756. if (content_len < sizeof(buf) && (chunked || got_clen))
  757. rdsz = (unsigned)content_len;
  758. n = safe_fread(buf, rdsz, dfp);
  759. if (n <= 0) {
  760. if (ferror(dfp)) {
  761. /* perror will not work: ferror doesn't set errno */
  762. bb_error_msg_and_die(bb_msg_read_error);
  763. }
  764. break;
  765. }
  766. xwrite(output_fd, buf, n);
  767. #if ENABLE_FEATURE_WGET_STATUSBAR
  768. transferred += n;
  769. #endif
  770. if (got_clen)
  771. content_len -= n;
  772. }
  773. if (!chunked)
  774. break;
  775. safe_fgets(buf, sizeof(buf), dfp); /* This is a newline */
  776. get_clen:
  777. safe_fgets(buf, sizeof(buf), dfp);
  778. content_len = STRTOOFF(buf, NULL, 16);
  779. /* FIXME: error check? */
  780. if (content_len == 0)
  781. break; /* all done! */
  782. }
  783. if (!(opt & WGET_OPT_QUIET))
  784. progress_meter(0);
  785. if ((use_proxy == 0) && target.is_ftp) {
  786. fclose(dfp);
  787. if (ftpcmd(NULL, NULL, sfp, buf) != 226)
  788. bb_error_msg_and_die("ftp error: %s", buf+4);
  789. ftpcmd("QUIT", NULL, sfp, buf);
  790. }
  791. return EXIT_SUCCESS;
  792. }