wget.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925
  1. /* vi: set sw=4 ts=4: */
  2. /*
  3. * wget - retrieve a file using HTTP or FTP
  4. *
  5. * Chip Rosenthal Covad Communications <chip@laserlink.net>
  6. * Licensed under GPLv2, see file LICENSE in this source tree.
  7. *
  8. * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
  9. * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
  10. */
  11. #include "libbb.h"
  12. struct host_info {
  13. // May be used if we ever will want to free() all xstrdup()s...
  14. /* char *allocated; */
  15. const char *path;
  16. const char *user;
  17. char *host;
  18. int port;
  19. smallint is_ftp;
  20. };
  21. /* Globals */
  22. struct globals {
  23. off_t content_len; /* Content-length of the file */
  24. off_t beg_range; /* Range at which continue begins */
  25. #if ENABLE_FEATURE_WGET_STATUSBAR
  26. off_t transferred; /* Number of bytes transferred so far */
  27. const char *curfile; /* Name of current file being transferred */
  28. bb_progress_t pmt;
  29. #endif
  30. #if ENABLE_FEATURE_WGET_TIMEOUT
  31. unsigned timeout_seconds;
  32. #endif
  33. smallint chunked; /* chunked transfer encoding */
  34. smallint got_clen; /* got content-length: from server */
  35. } FIX_ALIASING;
  36. #define G (*(struct globals*)&bb_common_bufsiz1)
  37. struct BUG_G_too_big {
  38. char BUG_G_too_big[sizeof(G) <= COMMON_BUFSIZE ? 1 : -1];
  39. };
  40. #define INIT_G() do { \
  41. IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;) \
  42. } while (0)
  43. /* Must match option string! */
  44. enum {
  45. WGET_OPT_CONTINUE = (1 << 0),
  46. WGET_OPT_SPIDER = (1 << 1),
  47. WGET_OPT_QUIET = (1 << 2),
  48. WGET_OPT_OUTNAME = (1 << 3),
  49. WGET_OPT_PREFIX = (1 << 4),
  50. WGET_OPT_PROXY = (1 << 5),
  51. WGET_OPT_USER_AGENT = (1 << 6),
  52. WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
  53. WGET_OPT_RETRIES = (1 << 8),
  54. WGET_OPT_PASSIVE = (1 << 9),
  55. WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
  56. WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
  57. };
  58. enum {
  59. PROGRESS_START = -1,
  60. PROGRESS_END = 0,
  61. PROGRESS_BUMP = 1,
  62. };
  63. #if ENABLE_FEATURE_WGET_STATUSBAR
  64. static void progress_meter(int flag)
  65. {
  66. if (option_mask32 & WGET_OPT_QUIET)
  67. return;
  68. if (flag == PROGRESS_START)
  69. bb_progress_init(&G.pmt);
  70. bb_progress_update(&G.pmt, G.curfile, G.beg_range, G.transferred,
  71. G.chunked ? 0 : G.beg_range + G.transferred + G.content_len);
  72. if (flag == PROGRESS_END) {
  73. bb_putchar_stderr('\n');
  74. G.transferred = 0;
  75. }
  76. }
  77. #else
  78. static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
  79. #endif
  80. /* IPv6 knows scoped address types i.e. link and site local addresses. Link
  81. * local addresses can have a scope identifier to specify the
  82. * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
  83. * identifier is only valid on a single node.
  84. *
  85. * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
  86. * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
  87. * in the Host header as invalid requests, see
  88. * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
  89. */
  90. static void strip_ipv6_scope_id(char *host)
  91. {
  92. char *scope, *cp;
  93. /* bbox wget actually handles IPv6 addresses without [], like
  94. * wget "http://::1/xxx", but this is not standard.
  95. * To save code, _here_ we do not support it. */
  96. if (host[0] != '[')
  97. return; /* not IPv6 */
  98. scope = strchr(host, '%');
  99. if (!scope)
  100. return;
  101. /* Remove the IPv6 zone identifier from the host address */
  102. cp = strchr(host, ']');
  103. if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
  104. /* malformed address (not "[xx]:nn" or "[xx]") */
  105. return;
  106. }
  107. /* cp points to "]...", scope points to "%eth0]..." */
  108. overlapping_strcpy(scope, cp);
  109. }
  110. /* Read NMEMB bytes into PTR from STREAM. Returns the number of bytes read,
  111. * and a short count if an eof or non-interrupt error is encountered. */
  112. static size_t safe_fread(void *ptr, size_t nmemb, FILE *stream)
  113. {
  114. size_t ret;
  115. char *p = (char*)ptr;
  116. do {
  117. clearerr(stream);
  118. errno = 0;
  119. ret = fread(p, 1, nmemb, stream);
  120. p += ret;
  121. nmemb -= ret;
  122. } while (nmemb && ferror(stream) && errno == EINTR);
  123. return p - (char*)ptr;
  124. }
  125. /* Read a line or SIZE-1 bytes into S, whichever is less, from STREAM.
  126. * Returns S, or NULL if an eof or non-interrupt error is encountered. */
  127. static char *safe_fgets(char *s, int size, FILE *stream)
  128. {
  129. char *ret;
  130. do {
  131. clearerr(stream);
  132. errno = 0;
  133. ret = fgets(s, size, stream);
  134. } while (ret == NULL && ferror(stream) && errno == EINTR);
  135. return ret;
  136. }
  137. #if ENABLE_FEATURE_WGET_AUTHENTICATION
  138. /* Base64-encode character string. buf is assumed to be char buf[512]. */
  139. static char *base64enc_512(char buf[512], const char *str)
  140. {
  141. unsigned len = strlen(str);
  142. if (len > 512/4*3 - 10) /* paranoia */
  143. len = 512/4*3 - 10;
  144. bb_uuencode(buf, str, len, bb_uuenc_tbl_base64);
  145. return buf;
  146. }
  147. #endif
  148. static char* sanitize_string(char *s)
  149. {
  150. unsigned char *p = (void *) s;
  151. while (*p >= ' ')
  152. p++;
  153. *p = '\0';
  154. return s;
  155. }
  156. static FILE *open_socket(len_and_sockaddr *lsa)
  157. {
  158. FILE *fp;
  159. /* glibc 2.4 seems to try seeking on it - ??! */
  160. /* hopefully it understands what ESPIPE means... */
  161. fp = fdopen(xconnect_stream(lsa), "r+");
  162. if (fp == NULL)
  163. bb_perror_msg_and_die("fdopen");
  164. return fp;
  165. }
  166. static int ftpcmd(const char *s1, const char *s2, FILE *fp, char *buf)
  167. {
  168. int result;
  169. if (s1) {
  170. if (!s2) s2 = "";
  171. fprintf(fp, "%s%s\r\n", s1, s2);
  172. fflush(fp);
  173. }
  174. do {
  175. char *buf_ptr;
  176. if (fgets(buf, 510, fp) == NULL) {
  177. bb_perror_msg_and_die("error getting response");
  178. }
  179. buf_ptr = strstr(buf, "\r\n");
  180. if (buf_ptr) {
  181. *buf_ptr = '\0';
  182. }
  183. } while (!isdigit(buf[0]) || buf[3] != ' ');
  184. buf[3] = '\0';
  185. result = xatoi_positive(buf);
  186. buf[3] = ' ';
  187. return result;
  188. }
  189. static void parse_url(char *src_url, struct host_info *h)
  190. {
  191. char *url, *p, *sp;
  192. /* h->allocated = */ url = xstrdup(src_url);
  193. if (strncmp(url, "http://", 7) == 0) {
  194. h->port = bb_lookup_port("http", "tcp", 80);
  195. h->host = url + 7;
  196. h->is_ftp = 0;
  197. } else if (strncmp(url, "ftp://", 6) == 0) {
  198. h->port = bb_lookup_port("ftp", "tcp", 21);
  199. h->host = url + 6;
  200. h->is_ftp = 1;
  201. } else
  202. bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
  203. // FYI:
  204. // "Real" wget 'http://busybox.net?var=a/b' sends this request:
  205. // 'GET /?var=a/b HTTP 1.0'
  206. // and saves 'index.html?var=a%2Fb' (we save 'b')
  207. // wget 'http://busybox.net?login=john@doe':
  208. // request: 'GET /?login=john@doe HTTP/1.0'
  209. // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
  210. // wget 'http://busybox.net#test/test':
  211. // request: 'GET / HTTP/1.0'
  212. // saves: 'index.html' (we save 'test')
  213. //
  214. // We also don't add unique .N suffix if file exists...
  215. sp = strchr(h->host, '/');
  216. p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
  217. p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
  218. if (!sp) {
  219. h->path = "";
  220. } else if (*sp == '/') {
  221. *sp = '\0';
  222. h->path = sp + 1;
  223. } else { // '#' or '?'
  224. // http://busybox.net?login=john@doe is a valid URL
  225. // memmove converts to:
  226. // http:/busybox.nett?login=john@doe...
  227. memmove(h->host - 1, h->host, sp - h->host);
  228. h->host--;
  229. sp[-1] = '\0';
  230. h->path = sp;
  231. }
  232. // We used to set h->user to NULL here, but this interferes
  233. // with handling of code 302 ("object was moved")
  234. sp = strrchr(h->host, '@');
  235. if (sp != NULL) {
  236. h->user = h->host;
  237. *sp = '\0';
  238. h->host = sp + 1;
  239. }
  240. sp = h->host;
  241. }
  242. static char *gethdr(char *buf, size_t bufsiz, FILE *fp /*, int *istrunc*/)
  243. {
  244. char *s, *hdrval;
  245. int c;
  246. /* *istrunc = 0; */
  247. /* retrieve header line */
  248. if (fgets(buf, bufsiz, fp) == NULL)
  249. return NULL;
  250. /* see if we are at the end of the headers */
  251. for (s = buf; *s == '\r'; ++s)
  252. continue;
  253. if (*s == '\n')
  254. return NULL;
  255. /* convert the header name to lower case */
  256. for (s = buf; isalnum(*s) || *s == '-' || *s == '.'; ++s) {
  257. /* tolower for "A-Z", no-op for "0-9a-z-." */
  258. *s = (*s | 0x20);
  259. }
  260. /* verify we are at the end of the header name */
  261. if (*s != ':')
  262. bb_error_msg_and_die("bad header line: %s", sanitize_string(buf));
  263. /* locate the start of the header value */
  264. *s++ = '\0';
  265. hdrval = skip_whitespace(s);
  266. /* locate the end of header */
  267. while (*s && *s != '\r' && *s != '\n')
  268. ++s;
  269. /* end of header found */
  270. if (*s) {
  271. *s = '\0';
  272. return hdrval;
  273. }
  274. /* Rats! The buffer isn't big enough to hold the entire header value */
  275. while (c = getc(fp), c != EOF && c != '\n')
  276. continue;
  277. /* *istrunc = 1; */
  278. return hdrval;
  279. }
  280. #if ENABLE_FEATURE_WGET_LONG_OPTIONS
  281. static char *URL_escape(const char *str)
  282. {
  283. /* URL encode, see RFC 2396 */
  284. char *dst;
  285. char *res = dst = xmalloc(strlen(str) * 3 + 1);
  286. unsigned char c;
  287. while (1) {
  288. c = *str++;
  289. if (c == '\0'
  290. /* || strchr("!&'()*-.=_~", c) - more code */
  291. || c == '!'
  292. || c == '&'
  293. || c == '\''
  294. || c == '('
  295. || c == ')'
  296. || c == '*'
  297. || c == '-'
  298. || c == '.'
  299. || c == '='
  300. || c == '_'
  301. || c == '~'
  302. || (c >= '0' && c <= '9')
  303. || ((c|0x20) >= 'a' && (c|0x20) <= 'z')
  304. ) {
  305. *dst++ = c;
  306. if (c == '\0')
  307. return res;
  308. } else {
  309. *dst++ = '%';
  310. *dst++ = bb_hexdigits_upcase[c >> 4];
  311. *dst++ = bb_hexdigits_upcase[c & 0xf];
  312. }
  313. }
  314. }
  315. #endif
  316. static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
  317. {
  318. char buf[512];
  319. FILE *sfp;
  320. char *str;
  321. int port;
  322. if (!target->user)
  323. target->user = xstrdup("anonymous:busybox@");
  324. sfp = open_socket(lsa);
  325. if (ftpcmd(NULL, NULL, sfp, buf) != 220)
  326. bb_error_msg_and_die("%s", sanitize_string(buf+4));
  327. /*
  328. * Splitting username:password pair,
  329. * trying to log in
  330. */
  331. str = strchr(target->user, ':');
  332. if (str)
  333. *str++ = '\0';
  334. switch (ftpcmd("USER ", target->user, sfp, buf)) {
  335. case 230:
  336. break;
  337. case 331:
  338. if (ftpcmd("PASS ", str, sfp, buf) == 230)
  339. break;
  340. /* fall through (failed login) */
  341. default:
  342. bb_error_msg_and_die("ftp login: %s", sanitize_string(buf+4));
  343. }
  344. ftpcmd("TYPE I", NULL, sfp, buf);
  345. /*
  346. * Querying file size
  347. */
  348. if (ftpcmd("SIZE ", target->path, sfp, buf) == 213) {
  349. G.content_len = BB_STRTOOFF(buf+4, NULL, 10);
  350. if (G.content_len < 0 || errno) {
  351. bb_error_msg_and_die("SIZE value is garbage");
  352. }
  353. G.got_clen = 1;
  354. }
  355. /*
  356. * Entering passive mode
  357. */
  358. if (ftpcmd("PASV", NULL, sfp, buf) != 227) {
  359. pasv_error:
  360. bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(buf));
  361. }
  362. // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
  363. // Server's IP is N1.N2.N3.N4 (we ignore it)
  364. // Server's port for data connection is P1*256+P2
  365. str = strrchr(buf, ')');
  366. if (str) str[0] = '\0';
  367. str = strrchr(buf, ',');
  368. if (!str) goto pasv_error;
  369. port = xatou_range(str+1, 0, 255);
  370. *str = '\0';
  371. str = strrchr(buf, ',');
  372. if (!str) goto pasv_error;
  373. port += xatou_range(str+1, 0, 255) * 256;
  374. set_nport(lsa, htons(port));
  375. *dfpp = open_socket(lsa);
  376. if (G.beg_range) {
  377. sprintf(buf, "REST %"OFF_FMT"u", G.beg_range);
  378. if (ftpcmd(buf, NULL, sfp, buf) == 350)
  379. G.content_len -= G.beg_range;
  380. }
  381. if (ftpcmd("RETR ", target->path, sfp, buf) > 150)
  382. bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(buf));
  383. return sfp;
  384. }
  385. static void NOINLINE retrieve_file_data(FILE *dfp, int output_fd)
  386. {
  387. char buf[4*1024]; /* made bigger to speed up local xfers */
  388. #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
  389. # if ENABLE_FEATURE_WGET_TIMEOUT
  390. unsigned second_cnt;
  391. # endif
  392. struct pollfd polldata;
  393. polldata.fd = fileno(dfp);
  394. polldata.events = POLLIN | POLLPRI;
  395. #endif
  396. progress_meter(PROGRESS_START);
  397. if (G.chunked)
  398. goto get_clen;
  399. /* Loops only if chunked */
  400. while (1) {
  401. #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
  402. ndelay_on(polldata.fd);
  403. #endif
  404. while (1) {
  405. int n;
  406. unsigned rdsz;
  407. rdsz = sizeof(buf);
  408. if (G.got_clen) {
  409. if (G.content_len < (off_t)sizeof(buf)) {
  410. if ((int)G.content_len <= 0)
  411. break;
  412. rdsz = (unsigned)G.content_len;
  413. }
  414. }
  415. #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
  416. # if ENABLE_FEATURE_WGET_TIMEOUT
  417. second_cnt = G.timeout_seconds;
  418. # endif
  419. while (1) {
  420. if (safe_poll(&polldata, 1, 1000) != 0)
  421. break; /* error, EOF, or data is available */
  422. # if ENABLE_FEATURE_WGET_TIMEOUT
  423. if (second_cnt != 0 && --second_cnt == 0) {
  424. progress_meter(PROGRESS_END);
  425. bb_perror_msg_and_die("download timed out");
  426. }
  427. # endif
  428. /* Needed for "stalled" indicator */
  429. progress_meter(PROGRESS_BUMP);
  430. }
  431. #endif
  432. /* fread internally uses read loop, which in our case
  433. * is usually exited when we get EAGAIN.
  434. * In this case, libc sets error marker on the stream.
  435. * Need to clear it before next fread to avoid possible
  436. * rare false positive ferror below. Rare because usually
  437. * fread gets more than zero bytes, and we don't fall
  438. * into if (n <= 0) ...
  439. */
  440. clearerr(dfp);
  441. errno = 0;
  442. n = safe_fread(buf, rdsz, dfp);
  443. /* man fread:
  444. * If error occurs, or EOF is reached, the return value
  445. * is a short item count (or zero).
  446. * fread does not distinguish between EOF and error.
  447. */
  448. if (n <= 0) {
  449. #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
  450. if (errno == EAGAIN) /* poll lied, there is no data? */
  451. continue; /* yes */
  452. #endif
  453. if (ferror(dfp))
  454. bb_perror_msg_and_die(bb_msg_read_error);
  455. break; /* EOF, not error */
  456. }
  457. xwrite(output_fd, buf, n);
  458. #if ENABLE_FEATURE_WGET_STATUSBAR
  459. G.transferred += n;
  460. progress_meter(PROGRESS_BUMP);
  461. #endif
  462. if (G.got_clen) {
  463. G.content_len -= n;
  464. if (G.content_len == 0)
  465. break;
  466. }
  467. }
  468. #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
  469. ndelay_off(polldata.fd);
  470. #endif
  471. if (!G.chunked)
  472. break;
  473. safe_fgets(buf, sizeof(buf), dfp); /* This is a newline */
  474. get_clen:
  475. safe_fgets(buf, sizeof(buf), dfp);
  476. G.content_len = STRTOOFF(buf, NULL, 16);
  477. /* FIXME: error check? */
  478. if (G.content_len == 0)
  479. break; /* all done! */
  480. G.got_clen = 1;
  481. }
  482. progress_meter(PROGRESS_END);
  483. }
  484. int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
  485. int wget_main(int argc UNUSED_PARAM, char **argv)
  486. {
  487. char buf[512];
  488. struct host_info server, target;
  489. len_and_sockaddr *lsa;
  490. unsigned opt;
  491. int redir_limit;
  492. char *proxy = NULL;
  493. char *dir_prefix = NULL;
  494. #if ENABLE_FEATURE_WGET_LONG_OPTIONS
  495. char *post_data;
  496. char *extra_headers = NULL;
  497. llist_t *headers_llist = NULL;
  498. #endif
  499. FILE *sfp; /* socket to web/ftp server */
  500. FILE *dfp; /* socket to ftp server (data) */
  501. char *fname_out; /* where to direct output (-O) */
  502. int output_fd = -1;
  503. bool use_proxy; /* Use proxies if env vars are set */
  504. const char *proxy_flag = "on"; /* Use proxies if env vars are set */
  505. const char *user_agent = "Wget";/* "User-Agent" header field */
  506. static const char keywords[] ALIGN1 =
  507. "content-length\0""transfer-encoding\0""chunked\0""location\0";
  508. enum {
  509. KEY_content_length = 1, KEY_transfer_encoding, KEY_chunked, KEY_location
  510. };
  511. #if ENABLE_FEATURE_WGET_LONG_OPTIONS
  512. static const char wget_longopts[] ALIGN1 =
  513. /* name, has_arg, val */
  514. "continue\0" No_argument "c"
  515. "spider\0" No_argument "s"
  516. "quiet\0" No_argument "q"
  517. "output-document\0" Required_argument "O"
  518. "directory-prefix\0" Required_argument "P"
  519. "proxy\0" Required_argument "Y"
  520. "user-agent\0" Required_argument "U"
  521. #if ENABLE_FEATURE_WGET_TIMEOUT
  522. "timeout\0" Required_argument "T"
  523. #endif
  524. /* Ignored: */
  525. // "tries\0" Required_argument "t"
  526. /* Ignored (we always use PASV): */
  527. "passive-ftp\0" No_argument "\xff"
  528. "header\0" Required_argument "\xfe"
  529. "post-data\0" Required_argument "\xfd"
  530. /* Ignored (we don't do ssl) */
  531. "no-check-certificate\0" No_argument "\xfc"
  532. ;
  533. #endif
  534. INIT_G();
  535. #if ENABLE_FEATURE_WGET_LONG_OPTIONS
  536. applet_long_options = wget_longopts;
  537. #endif
  538. /* server.allocated = target.allocated = NULL; */
  539. opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
  540. opt = getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
  541. &fname_out, &dir_prefix,
  542. &proxy_flag, &user_agent,
  543. IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
  544. NULL /* -t RETRIES */
  545. IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
  546. IF_FEATURE_WGET_LONG_OPTIONS(, &post_data)
  547. );
  548. #if ENABLE_FEATURE_WGET_LONG_OPTIONS
  549. if (headers_llist) {
  550. int size = 1;
  551. char *cp;
  552. llist_t *ll = headers_llist;
  553. while (ll) {
  554. size += strlen(ll->data) + 2;
  555. ll = ll->link;
  556. }
  557. extra_headers = cp = xmalloc(size);
  558. while (headers_llist) {
  559. cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
  560. }
  561. }
  562. #endif
  563. /* TODO: compat issue: should handle "wget URL1 URL2..." */
  564. target.user = NULL;
  565. parse_url(argv[optind], &target);
  566. /* Use the proxy if necessary */
  567. use_proxy = (strcmp(proxy_flag, "off") != 0);
  568. if (use_proxy) {
  569. proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
  570. if (proxy && proxy[0]) {
  571. server.user = NULL;
  572. parse_url(proxy, &server);
  573. } else {
  574. use_proxy = 0;
  575. }
  576. }
  577. if (!use_proxy) {
  578. server.port = target.port;
  579. if (ENABLE_FEATURE_IPV6) {
  580. server.host = xstrdup(target.host);
  581. } else {
  582. server.host = target.host;
  583. }
  584. }
  585. if (ENABLE_FEATURE_IPV6)
  586. strip_ipv6_scope_id(target.host);
  587. /* Guess an output filename, if there was no -O FILE */
  588. if (!(opt & WGET_OPT_OUTNAME)) {
  589. fname_out = bb_get_last_path_component_nostrip(target.path);
  590. /* handle "wget http://kernel.org//" */
  591. if (fname_out[0] == '/' || !fname_out[0])
  592. fname_out = (char*)"index.html";
  593. /* -P DIR is considered only if there was no -O FILE */
  594. if (dir_prefix)
  595. fname_out = concat_path_file(dir_prefix, fname_out);
  596. } else {
  597. if (LONE_DASH(fname_out)) {
  598. /* -O - */
  599. output_fd = 1;
  600. opt &= ~WGET_OPT_CONTINUE;
  601. }
  602. }
  603. #if ENABLE_FEATURE_WGET_STATUSBAR
  604. G.curfile = bb_get_last_path_component_nostrip(fname_out);
  605. #endif
  606. /* Impossible?
  607. if ((opt & WGET_OPT_CONTINUE) && !fname_out)
  608. bb_error_msg_and_die("can't specify continue (-c) without a filename (-O)");
  609. */
  610. /* Determine where to start transfer */
  611. if (opt & WGET_OPT_CONTINUE) {
  612. output_fd = open(fname_out, O_WRONLY);
  613. if (output_fd >= 0) {
  614. G.beg_range = xlseek(output_fd, 0, SEEK_END);
  615. }
  616. /* File doesn't exist. We do not create file here yet.
  617. * We are not sure it exists on remove side */
  618. }
  619. redir_limit = 5;
  620. resolve_lsa:
  621. lsa = xhost2sockaddr(server.host, server.port);
  622. if (!(opt & WGET_OPT_QUIET)) {
  623. char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
  624. fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
  625. free(s);
  626. }
  627. establish_session:
  628. if (use_proxy || !target.is_ftp) {
  629. /*
  630. * HTTP session
  631. */
  632. char *str;
  633. int status;
  634. /* Open socket to http server */
  635. sfp = open_socket(lsa);
  636. /* Send HTTP request */
  637. if (use_proxy) {
  638. fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
  639. target.is_ftp ? "f" : "ht", target.host,
  640. target.path);
  641. } else {
  642. if (opt & WGET_OPT_POST_DATA)
  643. fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
  644. else
  645. fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
  646. }
  647. fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
  648. target.host, user_agent);
  649. /* Ask server to close the connection as soon as we are done
  650. * (IOW: we do not intend to send more requests)
  651. */
  652. fprintf(sfp, "Connection: close\r\n");
  653. #if ENABLE_FEATURE_WGET_AUTHENTICATION
  654. if (target.user) {
  655. fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
  656. base64enc_512(buf, target.user));
  657. }
  658. if (use_proxy && server.user) {
  659. fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
  660. base64enc_512(buf, server.user));
  661. }
  662. #endif
  663. if (G.beg_range)
  664. fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
  665. #if ENABLE_FEATURE_WGET_LONG_OPTIONS
  666. if (extra_headers)
  667. fputs(extra_headers, sfp);
  668. if (opt & WGET_OPT_POST_DATA) {
  669. char *estr = URL_escape(post_data);
  670. fprintf(sfp,
  671. "Content-Type: application/x-www-form-urlencoded\r\n"
  672. "Content-Length: %u\r\n"
  673. "\r\n"
  674. "%s",
  675. (int) strlen(estr), estr
  676. );
  677. free(estr);
  678. } else
  679. #endif
  680. {
  681. fprintf(sfp, "\r\n");
  682. }
  683. fflush(sfp);
  684. /*
  685. * Retrieve HTTP response line and check for "200" status code.
  686. */
  687. read_response:
  688. if (fgets(buf, sizeof(buf), sfp) == NULL)
  689. bb_error_msg_and_die("no response from server");
  690. str = buf;
  691. str = skip_non_whitespace(str);
  692. str = skip_whitespace(str);
  693. // FIXME: no error check
  694. // xatou wouldn't work: "200 OK"
  695. status = atoi(str);
  696. switch (status) {
  697. case 0:
  698. case 100:
  699. while (gethdr(buf, sizeof(buf), sfp /*, &n*/) != NULL)
  700. /* eat all remaining headers */;
  701. goto read_response;
  702. case 200:
  703. /*
  704. Response 204 doesn't say "null file", it says "metadata
  705. has changed but data didn't":
  706. "10.2.5 204 No Content
  707. The server has fulfilled the request but does not need to return
  708. an entity-body, and might want to return updated metainformation.
  709. The response MAY include new or updated metainformation in the form
  710. of entity-headers, which if present SHOULD be associated with
  711. the requested variant.
  712. If the client is a user agent, it SHOULD NOT change its document
  713. view from that which caused the request to be sent. This response
  714. is primarily intended to allow input for actions to take place
  715. without causing a change to the user agent's active document view,
  716. although any new or updated metainformation SHOULD be applied
  717. to the document currently in the user agent's active view.
  718. The 204 response MUST NOT include a message-body, and thus
  719. is always terminated by the first empty line after the header fields."
  720. However, in real world it was observed that some web servers
  721. (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
  722. */
  723. case 204:
  724. break;
  725. case 300: /* redirection */
  726. case 301:
  727. case 302:
  728. case 303:
  729. break;
  730. case 206:
  731. if (G.beg_range)
  732. break;
  733. /* fall through */
  734. default:
  735. bb_error_msg_and_die("server returned error: %s", sanitize_string(buf));
  736. }
  737. /*
  738. * Retrieve HTTP headers.
  739. */
  740. while ((str = gethdr(buf, sizeof(buf), sfp /*, &n*/)) != NULL) {
  741. /* gethdr converted "FOO:" string to lowercase */
  742. smalluint key;
  743. /* strip trailing whitespace */
  744. char *s = strchrnul(str, '\0') - 1;
  745. while (s >= str && (*s == ' ' || *s == '\t')) {
  746. *s = '\0';
  747. s--;
  748. }
  749. key = index_in_strings(keywords, buf) + 1;
  750. if (key == KEY_content_length) {
  751. G.content_len = BB_STRTOOFF(str, NULL, 10);
  752. if (G.content_len < 0 || errno) {
  753. bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
  754. }
  755. G.got_clen = 1;
  756. continue;
  757. }
  758. if (key == KEY_transfer_encoding) {
  759. if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked)
  760. bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
  761. G.chunked = G.got_clen = 1;
  762. }
  763. if (key == KEY_location && status >= 300) {
  764. if (--redir_limit == 0)
  765. bb_error_msg_and_die("too many redirections");
  766. fclose(sfp);
  767. G.got_clen = 0;
  768. G.chunked = 0;
  769. if (str[0] == '/')
  770. /* free(target.allocated); */
  771. target.path = /* target.allocated = */ xstrdup(str+1);
  772. /* lsa stays the same: it's on the same server */
  773. else {
  774. parse_url(str, &target);
  775. if (!use_proxy) {
  776. server.host = target.host;
  777. /* strip_ipv6_scope_id(target.host); - no! */
  778. /* we assume remote never gives us IPv6 addr with scope id */
  779. server.port = target.port;
  780. free(lsa);
  781. goto resolve_lsa;
  782. } /* else: lsa stays the same: we use proxy */
  783. }
  784. goto establish_session;
  785. }
  786. }
  787. // if (status >= 300)
  788. // bb_error_msg_and_die("bad redirection (no Location: header from server)");
  789. /* For HTTP, data is pumped over the same connection */
  790. dfp = sfp;
  791. } else {
  792. /*
  793. * FTP session
  794. */
  795. sfp = prepare_ftp_session(&dfp, &target, lsa);
  796. }
  797. if (opt & WGET_OPT_SPIDER) {
  798. if (ENABLE_FEATURE_CLEAN_UP)
  799. fclose(sfp);
  800. return EXIT_SUCCESS;
  801. }
  802. if (output_fd < 0) {
  803. int o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
  804. /* compat with wget: -O FILE can overwrite */
  805. if (opt & WGET_OPT_OUTNAME)
  806. o_flags = O_WRONLY | O_CREAT | O_TRUNC;
  807. output_fd = xopen(fname_out, o_flags);
  808. }
  809. retrieve_file_data(dfp, output_fd);
  810. xclose(output_fd);
  811. if (dfp != sfp) {
  812. /* It's ftp. Close it properly */
  813. fclose(dfp);
  814. if (ftpcmd(NULL, NULL, sfp, buf) != 226)
  815. bb_error_msg_and_die("ftp error: %s", sanitize_string(buf+4));
  816. /* ftpcmd("QUIT", NULL, sfp, buf); - why bother? */
  817. }
  818. return EXIT_SUCCESS;
  819. }