hget.c 25 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475
  1. #include <u.h>
  2. #include <libc.h>
  3. #include <ctype.h>
  4. #include <bio.h>
  5. #include <ip.h>
  6. #include <libsec.h>
  7. #include <auth.h>
  8. typedef struct URL URL;
  9. struct URL
  10. {
  11. int method;
  12. char *host;
  13. char *port;
  14. char *page;
  15. char *etag;
  16. char *redirect;
  17. char *postbody;
  18. char *cred;
  19. long mtime;
  20. };
  21. typedef struct Range Range;
  22. struct Range
  23. {
  24. long start; /* only 2 gig supported, tdb */
  25. long end;
  26. };
  27. typedef struct Out Out;
  28. struct Out
  29. {
  30. int fd;
  31. int offset; /* notional current offset in output */
  32. int written; /* number of bytes successfully transferred to output */
  33. DigestState *curr; /* digest state up to offset (if known) */
  34. DigestState *hiwat; /* digest state of all bytes written */
  35. };
  36. enum
  37. {
  38. Other,
  39. Http,
  40. Https,
  41. Ftp,
  42. };
  43. enum
  44. {
  45. Eof = 0,
  46. Error = -1,
  47. Server = -2,
  48. Changed = -3,
  49. };
  50. int debug;
  51. char *ofile;
  52. int doftp(URL*, URL*, Range*, Out*, long);
  53. int dohttp(URL*, URL*, Range*, Out*, long);
  54. int crackurl(URL*, char*);
  55. Range* crackrange(char*);
  56. int getheader(int, char*, int);
  57. int httpheaders(int, int, URL*, Range*);
  58. int httprcode(int);
  59. int cistrncmp(char*, char*, int);
  60. int cistrcmp(char*, char*);
  61. void initibuf(void);
  62. int readline(int, char*, int);
  63. int readibuf(int, char*, int);
  64. int dfprint(int, char*, ...);
  65. void unreadline(char*);
  66. int output(Out*, char*, int);
  67. void setoffset(Out*, int);
  68. int verbose;
  69. char *net;
  70. char tcpdir[NETPATHLEN];
  71. int headerprint;
  72. struct {
  73. char *name;
  74. int (*f)(URL*, URL*, Range*, Out*, long);
  75. } method[] = {
  76. [Http] { "http", dohttp },
  77. [Https] { "https", dohttp },
  78. [Ftp] { "ftp", doftp },
  79. [Other] { "_______", nil },
  80. };
  81. void
  82. usage(void)
  83. {
  84. fprint(2, "usage: %s [-dhv] [-o outfile] [-p body] [-x netmtpt] url\n", argv0);
  85. exits("usage");
  86. }
  87. void
  88. main(int argc, char **argv)
  89. {
  90. URL u;
  91. Range r;
  92. int errs, n;
  93. ulong mtime;
  94. Dir *d;
  95. char postbody[4096], *p, *e, *t, *hpx;
  96. URL px; // Proxy
  97. Out out;
  98. ofile = nil;
  99. p = postbody;
  100. e = p + sizeof(postbody);
  101. r.start = 0;
  102. r.end = -1;
  103. mtime = 0;
  104. memset(&u, 0, sizeof(u));
  105. memset(&px, 0, sizeof(px));
  106. hpx = getenv("httpproxy");
  107. ARGBEGIN {
  108. case 'o':
  109. ofile = EARGF(usage());
  110. break;
  111. case 'd':
  112. debug = 1;
  113. break;
  114. case 'h':
  115. headerprint = 1;
  116. break;
  117. case 'v':
  118. verbose = 1;
  119. break;
  120. case 'x':
  121. net = EARGF(usage());
  122. break;
  123. case 'p':
  124. t = EARGF(usage());
  125. if(p != postbody)
  126. p = seprint(p, e, "&%s", t);
  127. else
  128. p = seprint(p, e, "%s", t);
  129. u.postbody = postbody;
  130. break;
  131. default:
  132. usage();
  133. } ARGEND;
  134. if(net != nil){
  135. if(strlen(net) > sizeof(tcpdir)-5)
  136. sysfatal("network mount point too long");
  137. snprint(tcpdir, sizeof(tcpdir), "%s/tcp", net);
  138. } else
  139. snprint(tcpdir, sizeof(tcpdir), "tcp");
  140. if(argc != 1)
  141. usage();
  142. out.fd = 1;
  143. out.written = 0;
  144. out.offset = 0;
  145. out.curr = nil;
  146. out.hiwat = nil;
  147. if(ofile != nil){
  148. d = dirstat(ofile);
  149. if(d == nil){
  150. out.fd = create(ofile, OWRITE, 0664);
  151. if(out.fd < 0)
  152. sysfatal("creating %s: %r", ofile);
  153. } else {
  154. out.fd = open(ofile, OWRITE);
  155. if(out.fd < 0)
  156. sysfatal("can't open %s: %r", ofile);
  157. r.start = d->length;
  158. mtime = d->mtime;
  159. free(d);
  160. }
  161. }
  162. errs = 0;
  163. if(crackurl(&u, argv[0]) < 0)
  164. sysfatal("%r");
  165. if(hpx && crackurl(&px, hpx) < 0)
  166. sysfatal("%r");
  167. for(;;){
  168. setoffset(&out, 0);
  169. /* transfer data */
  170. werrstr("");
  171. n = (*method[u.method].f)(&u, &px, &r, &out, mtime);
  172. switch(n){
  173. case Eof:
  174. exits(0);
  175. break;
  176. case Error:
  177. if(errs++ < 10)
  178. continue;
  179. sysfatal("too many errors with no progress %r");
  180. break;
  181. case Server:
  182. sysfatal("server returned: %r");
  183. break;
  184. }
  185. /* forward progress */
  186. errs = 0;
  187. r.start += n;
  188. if(r.start >= r.end)
  189. break;
  190. }
  191. exits(0);
  192. }
  193. int
  194. crackurl(URL *u, char *s)
  195. {
  196. char *p;
  197. int i;
  198. if(u->page != nil){
  199. free(u->page);
  200. u->page = nil;
  201. }
  202. /* get type */
  203. for(p = s; *p; p++){
  204. if(*p == '/'){
  205. p = s;
  206. if(u->method == Other){
  207. werrstr("missing method");
  208. return -1;
  209. }
  210. if(u->host == nil){
  211. werrstr("missing host");
  212. return -1;
  213. }
  214. u->page = strdup(p);
  215. return 0;
  216. }
  217. if(*p == ':' && *(p+1)=='/' && *(p+2)=='/'){
  218. *p = 0;
  219. p += 3;
  220. for(i = 0; i < nelem(method); i++){
  221. if(cistrcmp(s, method[i].name) == 0){
  222. u->method = i;
  223. break;
  224. }
  225. }
  226. break;
  227. }
  228. }
  229. if(u->method == Other){
  230. werrstr("unsupported URL type %s", s);
  231. return -1;
  232. }
  233. /* get system */
  234. free(u->host);
  235. s = p;
  236. p = strchr(s, '/');
  237. if(p == nil){
  238. u->host = strdup(s);
  239. u->page = strdup("/");
  240. } else {
  241. u->page = strdup(p);
  242. *p = 0;
  243. u->host = strdup(s);
  244. *p = '/';
  245. }
  246. if(p = strchr(u->host, ':')) {
  247. *p++ = 0;
  248. u->port = p;
  249. } else
  250. u->port = method[u->method].name;
  251. if(*(u->host) == 0){
  252. werrstr("bad url, null host");
  253. return -1;
  254. }
  255. return 0;
  256. }
  257. char *day[] = {
  258. "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
  259. };
  260. char *month[] = {
  261. "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
  262. };
  263. struct
  264. {
  265. int fd;
  266. long mtime;
  267. } note;
  268. void
  269. catch(void*, char*)
  270. {
  271. Dir d;
  272. nulldir(&d);
  273. d.mtime = note.mtime;
  274. if(dirfwstat(note.fd, &d) < 0)
  275. sysfatal("catch: can't dirfwstat: %r");
  276. noted(NDFLT);
  277. }
  278. int
  279. dohttp(URL *u, URL *px, Range *r, Out *out, long mtime)
  280. {
  281. int fd, cfd;
  282. int redirect, auth, loop;
  283. int n, rv, code;
  284. long tot, vtime;
  285. Tm *tm;
  286. char buf[1024];
  287. char err[ERRMAX];
  288. /* always move back to a previous 512 byte bound because some
  289. * servers can't seem to deal with requests that start at the
  290. * end of the file
  291. */
  292. if(r->start)
  293. r->start = ((r->start-1)/512)*512;
  294. /* loop for redirects, requires reading both response code and headers */
  295. fd = -1;
  296. for(loop = 0; loop < 32; loop++){
  297. if(px->host == nil){
  298. fd = dial(netmkaddr(u->host, tcpdir, u->port), 0, 0, 0);
  299. } else {
  300. fd = dial(netmkaddr(px->host, tcpdir, px->port), 0, 0, 0);
  301. }
  302. if(fd < 0)
  303. return Error;
  304. if(u->method == Https){
  305. int tfd;
  306. TLSconn conn;
  307. memset(&conn, 0, sizeof conn);
  308. tfd = tlsClient(fd, &conn);
  309. if(tfd < 0){
  310. fprint(2, "tlsClient: %r\n");
  311. close(fd);
  312. return Error;
  313. }
  314. /* BUG: check cert here? */
  315. if(conn.cert)
  316. free(conn.cert);
  317. close(fd);
  318. fd = tfd;
  319. }
  320. /* write request, use range if not start of file */
  321. if(u->postbody == nil){
  322. if(px->host == nil){
  323. dfprint(fd, "GET %s HTTP/1.0\r\n"
  324. "Host: %s\r\n"
  325. "User-agent: Plan9/hget\r\n"
  326. "Cache-Control: no-cache\r\n"
  327. "Pragma: no-cache\r\n",
  328. u->page, u->host);
  329. } else {
  330. dfprint(fd, "GET http://%s%s HTTP/1.0\r\n"
  331. "Host: %s\r\n"
  332. "User-agent: Plan9/hget\r\n"
  333. "Cache-Control: no-cache\r\n"
  334. "Pragma: no-cache\r\n",
  335. u->host, u->page, u->host);
  336. }
  337. if(u->cred)
  338. dfprint(fd, "Authorization: Basic %s\r\n",
  339. u->cred);
  340. } else {
  341. dfprint(fd, "POST %s HTTP/1.0\r\n"
  342. "Host: %s\r\n"
  343. "Content-type: application/x-www-form-urlencoded\r\n"
  344. "Content-length: %d\r\n"
  345. "User-agent: Plan9/hget\r\n",
  346. u->page, u->host, strlen(u->postbody));
  347. if(u->cred)
  348. dfprint(fd, "Authorization: Basic %s\r\n", u->cred);
  349. }
  350. if(r->start != 0){
  351. dfprint(fd, "Range: bytes=%d-\n", r->start);
  352. if(u->etag != nil){
  353. dfprint(fd, "If-range: %s\n", u->etag);
  354. } else {
  355. tm = gmtime(mtime);
  356. dfprint(fd, "If-range: %s, %d %s %d %2d:%2.2d:%2.2d GMT\n",
  357. day[tm->wday], tm->mday, month[tm->mon],
  358. tm->year+1900, tm->hour, tm->min, tm->sec);
  359. }
  360. }
  361. if((cfd = open("/mnt/webcookies/http", ORDWR)) >= 0){
  362. if(fprint(cfd, "http://%s%s", u->host, u->page) > 0){
  363. while((n = read(cfd, buf, sizeof buf)) > 0){
  364. if(debug)
  365. write(2, buf, n);
  366. write(fd, buf, n);
  367. }
  368. }else{
  369. close(cfd);
  370. cfd = -1;
  371. }
  372. }
  373. dfprint(fd, "\r\n", u->host);
  374. if(u->postbody)
  375. dfprint(fd, "%s", u->postbody);
  376. auth = 0;
  377. redirect = 0;
  378. initibuf();
  379. code = httprcode(fd);
  380. switch(code){
  381. case Error: /* connection timed out */
  382. case Eof:
  383. close(fd);
  384. close(cfd);
  385. return code;
  386. case 200: /* OK */
  387. case 201: /* Created */
  388. case 202: /* Accepted */
  389. if(ofile == nil && r->start != 0)
  390. sysfatal("page changed underfoot");
  391. break;
  392. case 204: /* No Content */
  393. sysfatal("No Content");
  394. case 206: /* Partial Content */
  395. setoffset(out, r->start);
  396. break;
  397. case 301: /* Moved Permanently */
  398. case 302: /* Moved Temporarily */
  399. redirect = 1;
  400. u->postbody = nil;
  401. break;
  402. case 304: /* Not Modified */
  403. break;
  404. case 400: /* Bad Request */
  405. sysfatal("Bad Request");
  406. case 401: /* Unauthorized */
  407. if (auth)
  408. sysfatal("Authentication failed");
  409. auth = 1;
  410. break;
  411. case 402: /* ??? */
  412. sysfatal("Unauthorized");
  413. case 403: /* Forbidden */
  414. sysfatal("Forbidden by server");
  415. case 404: /* Not Found */
  416. sysfatal("Not found on server");
  417. case 407: /* Proxy Authentication */
  418. sysfatal("Proxy authentication required");
  419. case 500: /* Internal server error */
  420. sysfatal("Server choked");
  421. case 501: /* Not implemented */
  422. sysfatal("Server can't do it!");
  423. case 502: /* Bad gateway */
  424. sysfatal("Bad gateway");
  425. case 503: /* Service unavailable */
  426. sysfatal("Service unavailable");
  427. default:
  428. sysfatal("Unknown response code %d", code);
  429. }
  430. if(u->redirect != nil){
  431. free(u->redirect);
  432. u->redirect = nil;
  433. }
  434. rv = httpheaders(fd, cfd, u, r);
  435. close(cfd);
  436. if(rv != 0){
  437. close(fd);
  438. return rv;
  439. }
  440. if(!redirect && !auth)
  441. break;
  442. if (redirect){
  443. if(u->redirect == nil)
  444. sysfatal("redirect: no URL");
  445. if(crackurl(u, u->redirect) < 0)
  446. sysfatal("redirect: %r");
  447. }
  448. }
  449. /* transfer whatever you get */
  450. if(ofile != nil && u->mtime != 0){
  451. note.fd = out->fd;
  452. note.mtime = u->mtime;
  453. notify(catch);
  454. }
  455. tot = 0;
  456. vtime = 0;
  457. for(;;){
  458. n = readibuf(fd, buf, sizeof(buf));
  459. if(n <= 0)
  460. break;
  461. if(output(out, buf, n) != n)
  462. break;
  463. tot += n;
  464. if(verbose && (vtime != time(0) || r->start == r->end)) {
  465. vtime = time(0);
  466. fprint(2, "%ld %ld\n", r->start+tot, r->end);
  467. }
  468. }
  469. notify(nil);
  470. close(fd);
  471. if(ofile != nil && u->mtime != 0){
  472. Dir d;
  473. rerrstr(err, sizeof err);
  474. nulldir(&d);
  475. d.mtime = u->mtime;
  476. if(dirfwstat(out->fd, &d) < 0)
  477. fprint(2, "couldn't set mtime: %r\n");
  478. errstr(err, sizeof err);
  479. }
  480. return tot;
  481. }
  482. /* get the http response code */
  483. int
  484. httprcode(int fd)
  485. {
  486. int n;
  487. char *p;
  488. char buf[256];
  489. n = readline(fd, buf, sizeof(buf)-1);
  490. if(n <= 0)
  491. return n;
  492. if(debug)
  493. fprint(2, "%d <- %s\n", fd, buf);
  494. p = strchr(buf, ' ');
  495. if(strncmp(buf, "HTTP/", 5) != 0 || p == nil){
  496. werrstr("bad response from server");
  497. return -1;
  498. }
  499. buf[n] = 0;
  500. return atoi(p+1);
  501. }
  502. /* read in and crack the http headers, update u and r */
  503. void hhetag(char*, URL*, Range*);
  504. void hhmtime(char*, URL*, Range*);
  505. void hhclen(char*, URL*, Range*);
  506. void hhcrange(char*, URL*, Range*);
  507. void hhuri(char*, URL*, Range*);
  508. void hhlocation(char*, URL*, Range*);
  509. void hhauth(char*, URL*, Range*);
  510. struct {
  511. char *name;
  512. void (*f)(char*, URL*, Range*);
  513. } headers[] = {
  514. { "etag:", hhetag },
  515. { "last-modified:", hhmtime },
  516. { "content-length:", hhclen },
  517. { "content-range:", hhcrange },
  518. { "uri:", hhuri },
  519. { "location:", hhlocation },
  520. { "WWW-Authenticate:", hhauth },
  521. };
  522. int
  523. httpheaders(int fd, int cfd, URL *u, Range *r)
  524. {
  525. char buf[2048];
  526. char *p;
  527. int i, n;
  528. for(;;){
  529. n = getheader(fd, buf, sizeof(buf));
  530. if(n <= 0)
  531. break;
  532. if(cfd >= 0)
  533. fprint(cfd, "%s\n", buf);
  534. for(i = 0; i < nelem(headers); i++){
  535. n = strlen(headers[i].name);
  536. if(cistrncmp(buf, headers[i].name, n) == 0){
  537. /* skip field name and leading white */
  538. p = buf + n;
  539. while(*p == ' ' || *p == '\t')
  540. p++;
  541. (*headers[i].f)(p, u, r);
  542. break;
  543. }
  544. }
  545. }
  546. return n;
  547. }
  548. /*
  549. * read a single mime header, collect continuations.
  550. *
  551. * this routine assumes that there is a blank line twixt
  552. * the header and the message body, otherwise bytes will
  553. * be lost.
  554. */
  555. int
  556. getheader(int fd, char *buf, int n)
  557. {
  558. char *p, *e;
  559. int i;
  560. n--;
  561. p = buf;
  562. for(e = p + n; ; p += i){
  563. i = readline(fd, p, e-p);
  564. if(i < 0)
  565. return i;
  566. if(p == buf){
  567. /* first line */
  568. if(strchr(buf, ':') == nil)
  569. break; /* end of headers */
  570. } else {
  571. /* continuation line */
  572. if(*p != ' ' && *p != '\t'){
  573. unreadline(p);
  574. *p = 0;
  575. break; /* end of this header */
  576. }
  577. }
  578. }
  579. if(headerprint)
  580. print("%s\n", buf);
  581. if(debug)
  582. fprint(2, "%d <- %s\n", fd, buf);
  583. return p-buf;
  584. }
  585. void
  586. hhetag(char *p, URL *u, Range*)
  587. {
  588. if(u->etag != nil){
  589. if(strcmp(u->etag, p) != 0)
  590. sysfatal("file changed underfoot");
  591. } else
  592. u->etag = strdup(p);
  593. }
  594. char* monthchars = "janfebmaraprmayjunjulaugsepoctnovdec";
  595. void
  596. hhmtime(char *p, URL *u, Range*)
  597. {
  598. char *month, *day, *yr, *hms;
  599. char *fields[6];
  600. Tm tm, now;
  601. int i;
  602. i = getfields(p, fields, 6, 1, " \t");
  603. if(i < 5)
  604. return;
  605. day = fields[1];
  606. month = fields[2];
  607. yr = fields[3];
  608. hms = fields[4];
  609. /* default time */
  610. now = *gmtime(time(0));
  611. tm = now;
  612. tm.yday = 0;
  613. /* convert ascii month to a number twixt 1 and 12 */
  614. if(*month >= '0' && *month <= '9'){
  615. tm.mon = atoi(month) - 1;
  616. if(tm.mon < 0 || tm.mon > 11)
  617. tm.mon = 5;
  618. } else {
  619. for(p = month; *p; p++)
  620. *p = tolower(*p);
  621. for(i = 0; i < 12; i++)
  622. if(strncmp(&monthchars[i*3], month, 3) == 0){
  623. tm.mon = i;
  624. break;
  625. }
  626. }
  627. tm.mday = atoi(day);
  628. if(hms) {
  629. tm.hour = strtoul(hms, &p, 10);
  630. if(*p == ':') {
  631. p++;
  632. tm.min = strtoul(p, &p, 10);
  633. if(*p == ':') {
  634. p++;
  635. tm.sec = strtoul(p, &p, 10);
  636. }
  637. }
  638. if(tolower(*p) == 'p')
  639. tm.hour += 12;
  640. }
  641. if(yr) {
  642. tm.year = atoi(yr);
  643. if(tm.year >= 1900)
  644. tm.year -= 1900;
  645. } else {
  646. if(tm.mon > now.mon || (tm.mon == now.mon && tm.mday > now.mday+1))
  647. tm.year--;
  648. }
  649. strcpy(tm.zone, "GMT");
  650. /* convert to epoch seconds */
  651. u->mtime = tm2sec(&tm);
  652. }
  653. void
  654. hhclen(char *p, URL*, Range *r)
  655. {
  656. r->end = atoi(p);
  657. }
  658. void
  659. hhcrange(char *p, URL*, Range *r)
  660. {
  661. char *x;
  662. vlong l;
  663. l = 0;
  664. x = strchr(p, '/');
  665. if(x)
  666. l = atoll(x+1);
  667. if(l == 0) {
  668. x = strchr(p, '-');
  669. if(x)
  670. l = atoll(x+1);
  671. }
  672. if(l)
  673. r->end = l;
  674. }
  675. void
  676. hhuri(char *p, URL *u, Range*)
  677. {
  678. if(*p != '<')
  679. return;
  680. u->redirect = strdup(p+1);
  681. p = strchr(u->redirect, '>');
  682. if(p != nil)
  683. *p = 0;
  684. }
  685. void
  686. hhlocation(char *p, URL *u, Range*)
  687. {
  688. u->redirect = strdup(p);
  689. }
  690. void
  691. hhauth(char *p, URL *u, Range*)
  692. {
  693. char *f[4];
  694. UserPasswd *up;
  695. char *s, cred[64];
  696. if (cistrncmp(p, "basic ", 6) != 0)
  697. sysfatal("only Basic authentication supported");
  698. if (gettokens(p, f, nelem(f), "\"") < 2)
  699. sysfatal("garbled auth data");
  700. if ((up = auth_getuserpasswd(auth_getkey, "proto=pass service=http server=%q realm=%q",
  701. u->host, f[1])) == nil)
  702. sysfatal("cannot authenticate");
  703. s = smprint("%s:%s", up->user, up->passwd);
  704. if(enc64(cred, sizeof(cred), (uchar *)s, strlen(s)) == -1)
  705. sysfatal("enc64");
  706. free(s);
  707. assert(u->cred = strdup(cred));
  708. }
  709. enum
  710. {
  711. /* ftp return codes */
  712. Extra= 1,
  713. Success= 2,
  714. Incomplete= 3,
  715. TempFail= 4,
  716. PermFail= 5,
  717. Nnetdir= 64, /* max length of network directory paths */
  718. Ndialstr= 64, /* max length of dial strings */
  719. };
  720. int ftpcmd(int, char*, ...);
  721. int ftprcode(int, char*, int);
  722. int hello(int);
  723. int logon(int);
  724. int xfertype(int, char*);
  725. int passive(int, URL*);
  726. int active(int, URL*);
  727. int ftpxfer(int, Out*, Range*);
  728. int terminateftp(int, int);
  729. int getaddrport(char*, uchar*, uchar*);
  730. int ftprestart(int, Out*, URL*, Range*, long);
  731. int
  732. doftp(URL *u, URL *px, Range *r, Out *out, long mtime)
  733. {
  734. int pid, ctl, data, rv;
  735. Waitmsg *w;
  736. char msg[64];
  737. char conndir[NETPATHLEN];
  738. char *p;
  739. /* untested, proxy doesn't work with ftp (I think) */
  740. if(px->host == nil){
  741. ctl = dial(netmkaddr(u->host, tcpdir, u->port), 0, conndir, 0);
  742. } else {
  743. ctl = dial(netmkaddr(px->host, tcpdir, px->port), 0, conndir, 0);
  744. }
  745. if(ctl < 0)
  746. return Error;
  747. if(net == nil){
  748. p = strrchr(conndir, '/');
  749. *p = 0;
  750. snprint(tcpdir, sizeof(tcpdir), conndir);
  751. }
  752. initibuf();
  753. rv = hello(ctl);
  754. if(rv < 0)
  755. return terminateftp(ctl, rv);
  756. rv = logon(ctl);
  757. if(rv < 0)
  758. return terminateftp(ctl, rv);
  759. rv = xfertype(ctl, "I");
  760. if(rv < 0)
  761. return terminateftp(ctl, rv);
  762. /* if file is up to date and the right size, stop */
  763. if(ftprestart(ctl, out, u, r, mtime) > 0){
  764. close(ctl);
  765. return Eof;
  766. }
  767. /* first try passive mode, then active */
  768. data = passive(ctl, u);
  769. if(data < 0){
  770. data = active(ctl, u);
  771. if(data < 0)
  772. return Error;
  773. }
  774. /* fork */
  775. switch(pid = rfork(RFPROC|RFFDG|RFMEM)){
  776. case -1:
  777. close(data);
  778. return terminateftp(ctl, Error);
  779. case 0:
  780. ftpxfer(data, out, r);
  781. close(data);
  782. _exits(0);
  783. default:
  784. close(data);
  785. break;
  786. }
  787. /* wait for reply message */
  788. rv = ftprcode(ctl, msg, sizeof(msg));
  789. close(ctl);
  790. /* wait for process to terminate */
  791. w = nil;
  792. for(;;){
  793. free(w);
  794. w = wait();
  795. if(w == nil)
  796. return Error;
  797. if(w->pid == pid){
  798. if(w->msg[0] == 0){
  799. free(w);
  800. break;
  801. }
  802. werrstr("xfer: %s", w->msg);
  803. free(w);
  804. return Error;
  805. }
  806. }
  807. switch(rv){
  808. case Success:
  809. return Eof;
  810. case TempFail:
  811. return Server;
  812. default:
  813. return Error;
  814. }
  815. }
  816. int
  817. ftpcmd(int ctl, char *fmt, ...)
  818. {
  819. va_list arg;
  820. char buf[2*1024], *s;
  821. va_start(arg, fmt);
  822. s = vseprint(buf, buf + (sizeof(buf)-4) / sizeof(*buf), fmt, arg);
  823. va_end(arg);
  824. if(debug)
  825. fprint(2, "%d -> %s\n", ctl, buf);
  826. *s++ = '\r';
  827. *s++ = '\n';
  828. if(write(ctl, buf, s - buf) != s - buf)
  829. return -1;
  830. return 0;
  831. }
  832. int
  833. ftprcode(int ctl, char *msg, int len)
  834. {
  835. int rv;
  836. int i;
  837. char *p;
  838. len--; /* room for terminating null */
  839. for(;;){
  840. *msg = 0;
  841. i = readline(ctl, msg, len);
  842. if(i < 0)
  843. break;
  844. if(debug)
  845. fprint(2, "%d <- %s\n", ctl, msg);
  846. /* stop if not a continuation */
  847. rv = strtol(msg, &p, 10);
  848. if(rv >= 100 && rv < 600 && p==msg+3 && *p == ' ')
  849. return rv/100;
  850. }
  851. *msg = 0;
  852. return -1;
  853. }
  854. int
  855. hello(int ctl)
  856. {
  857. char msg[1024];
  858. /* wait for hello from other side */
  859. if(ftprcode(ctl, msg, sizeof(msg)) != Success){
  860. werrstr("HELLO: %s", msg);
  861. return Server;
  862. }
  863. return 0;
  864. }
  865. int
  866. getdec(char *p, int n)
  867. {
  868. int x = 0;
  869. int i;
  870. for(i = 0; i < n; i++)
  871. x = x*10 + (*p++ - '0');
  872. return x;
  873. }
  874. int
  875. ftprestart(int ctl, Out *out, URL *u, Range *r, long mtime)
  876. {
  877. Tm tm;
  878. char msg[1024];
  879. long x, rmtime;
  880. ftpcmd(ctl, "MDTM %s", u->page);
  881. if(ftprcode(ctl, msg, sizeof(msg)) != Success){
  882. r->start = 0;
  883. return 0; /* need to do something */
  884. }
  885. /* decode modification time */
  886. if(strlen(msg) < 4 + 4 + 2 + 2 + 2 + 2 + 2){
  887. r->start = 0;
  888. return 0; /* need to do something */
  889. }
  890. memset(&tm, 0, sizeof(tm));
  891. tm.year = getdec(msg+4, 4) - 1900;
  892. tm.mon = getdec(msg+4+4, 2) - 1;
  893. tm.mday = getdec(msg+4+4+2, 2);
  894. tm.hour = getdec(msg+4+4+2+2, 2);
  895. tm.min = getdec(msg+4+4+2+2+2, 2);
  896. tm.sec = getdec(msg+4+4+2+2+2+2, 2);
  897. strcpy(tm.zone, "GMT");
  898. rmtime = tm2sec(&tm);
  899. if(rmtime > mtime)
  900. r->start = 0;
  901. /* get size */
  902. ftpcmd(ctl, "SIZE %s", u->page);
  903. if(ftprcode(ctl, msg, sizeof(msg)) == Success){
  904. x = atol(msg+4);
  905. if(r->start == x)
  906. return 1; /* we're up to date */
  907. r->end = x;
  908. }
  909. /* seek to restart point */
  910. if(r->start > 0){
  911. ftpcmd(ctl, "REST %lud", r->start);
  912. if(ftprcode(ctl, msg, sizeof(msg)) == Incomplete){
  913. setoffset(out, r->start);
  914. }else
  915. r->start = 0;
  916. }
  917. return 0; /* need to do something */
  918. }
  919. int
  920. logon(int ctl)
  921. {
  922. char msg[1024];
  923. /* login anonymous */
  924. ftpcmd(ctl, "USER anonymous");
  925. switch(ftprcode(ctl, msg, sizeof(msg))){
  926. case Success:
  927. return 0;
  928. case Incomplete:
  929. break; /* need password */
  930. default:
  931. werrstr("USER: %s", msg);
  932. return Server;
  933. }
  934. /* send user id as password */
  935. sprint(msg, "%s@closedmind.org", getuser());
  936. ftpcmd(ctl, "PASS %s", msg);
  937. if(ftprcode(ctl, msg, sizeof(msg)) != Success){
  938. werrstr("PASS: %s", msg);
  939. return Server;
  940. }
  941. return 0;
  942. }
  943. int
  944. xfertype(int ctl, char *t)
  945. {
  946. char msg[1024];
  947. ftpcmd(ctl, "TYPE %s", t);
  948. if(ftprcode(ctl, msg, sizeof(msg)) != Success){
  949. werrstr("TYPE %s: %s", t, msg);
  950. return Server;
  951. }
  952. return 0;
  953. }
  954. int
  955. passive(int ctl, URL *u)
  956. {
  957. char msg[1024];
  958. char ipaddr[32];
  959. char *f[6];
  960. char *p;
  961. int fd;
  962. int port;
  963. char aport[12];
  964. ftpcmd(ctl, "PASV");
  965. if(ftprcode(ctl, msg, sizeof(msg)) != Success)
  966. return Error;
  967. /* get address and port number from reply, this is AI */
  968. p = strchr(msg, '(');
  969. if(p == nil){
  970. for(p = msg+3; *p; p++)
  971. if(isdigit(*p))
  972. break;
  973. } else
  974. p++;
  975. if(getfields(p, f, 6, 0, ",)") < 6){
  976. werrstr("ftp protocol botch");
  977. return Server;
  978. }
  979. snprint(ipaddr, sizeof(ipaddr), "%s.%s.%s.%s",
  980. f[0], f[1], f[2], f[3]);
  981. port = ((atoi(f[4])&0xff)<<8) + (atoi(f[5])&0xff);
  982. sprint(aport, "%d", port);
  983. /* open data connection */
  984. fd = dial(netmkaddr(ipaddr, tcpdir, aport), 0, 0, 0);
  985. if(fd < 0){
  986. werrstr("passive mode failed: %r");
  987. return Error;
  988. }
  989. /* tell remote to send a file */
  990. ftpcmd(ctl, "RETR %s", u->page);
  991. if(ftprcode(ctl, msg, sizeof(msg)) != Extra){
  992. werrstr("RETR %s: %s", u->page, msg);
  993. return Error;
  994. }
  995. return fd;
  996. }
  997. int
  998. active(int ctl, URL *u)
  999. {
  1000. char msg[1024];
  1001. char dir[40], ldir[40];
  1002. uchar ipaddr[4];
  1003. uchar port[2];
  1004. int lcfd, dfd, afd;
  1005. /* announce a port for the call back */
  1006. snprint(msg, sizeof(msg), "%s!*!0", tcpdir);
  1007. afd = announce(msg, dir);
  1008. if(afd < 0)
  1009. return Error;
  1010. /* get a local address/port of the annoucement */
  1011. if(getaddrport(dir, ipaddr, port) < 0){
  1012. close(afd);
  1013. return Error;
  1014. }
  1015. /* tell remote side address and port*/
  1016. ftpcmd(ctl, "PORT %d,%d,%d,%d,%d,%d", ipaddr[0], ipaddr[1], ipaddr[2],
  1017. ipaddr[3], port[0], port[1]);
  1018. if(ftprcode(ctl, msg, sizeof(msg)) != Success){
  1019. close(afd);
  1020. werrstr("active: %s", msg);
  1021. return Error;
  1022. }
  1023. /* tell remote to send a file */
  1024. ftpcmd(ctl, "RETR %s", u->page);
  1025. if(ftprcode(ctl, msg, sizeof(msg)) != Extra){
  1026. close(afd);
  1027. werrstr("RETR: %s", msg);
  1028. return Server;
  1029. }
  1030. /* wait for a connection */
  1031. lcfd = listen(dir, ldir);
  1032. if(lcfd < 0){
  1033. close(afd);
  1034. return Error;
  1035. }
  1036. dfd = accept(lcfd, ldir);
  1037. if(dfd < 0){
  1038. close(afd);
  1039. close(lcfd);
  1040. return Error;
  1041. }
  1042. close(afd);
  1043. close(lcfd);
  1044. return dfd;
  1045. }
  1046. int
  1047. ftpxfer(int in, Out *out, Range *r)
  1048. {
  1049. char buf[1024];
  1050. long vtime;
  1051. int i, n;
  1052. vtime = 0;
  1053. for(n = 0;;n += i){
  1054. i = read(in, buf, sizeof(buf));
  1055. if(i == 0)
  1056. break;
  1057. if(i < 0)
  1058. return Error;
  1059. if(output(out, buf, i) != i)
  1060. return Error;
  1061. r->start += i;
  1062. if(verbose && (vtime != time(0) || r->start == r->end)) {
  1063. vtime = time(0);
  1064. fprint(2, "%ld %ld\n", r->start, r->end);
  1065. }
  1066. }
  1067. return n;
  1068. }
  1069. int
  1070. terminateftp(int ctl, int rv)
  1071. {
  1072. close(ctl);
  1073. return rv;
  1074. }
  1075. /*
  1076. * case insensitive strcmp (why aren't these in libc?)
  1077. */
  1078. int
  1079. cistrncmp(char *a, char *b, int n)
  1080. {
  1081. while(n-- > 0){
  1082. if(tolower(*a++) != tolower(*b++))
  1083. return -1;
  1084. }
  1085. return 0;
  1086. }
  1087. int
  1088. cistrcmp(char *a, char *b)
  1089. {
  1090. while(*a || *b)
  1091. if(tolower(*a++) != tolower(*b++))
  1092. return -1;
  1093. return 0;
  1094. }
  1095. /*
  1096. * buffered io
  1097. */
  1098. struct
  1099. {
  1100. char *rp;
  1101. char *wp;
  1102. char buf[4*1024];
  1103. } b;
  1104. void
  1105. initibuf(void)
  1106. {
  1107. b.rp = b.wp = b.buf;
  1108. }
  1109. /*
  1110. * read a possibly buffered line, strip off trailing while
  1111. */
  1112. int
  1113. readline(int fd, char *buf, int len)
  1114. {
  1115. int n;
  1116. char *p;
  1117. int eof = 0;
  1118. len--;
  1119. for(p = buf;;){
  1120. if(b.rp >= b.wp){
  1121. n = read(fd, b.wp, sizeof(b.buf)/2);
  1122. if(n < 0)
  1123. return -1;
  1124. if(n == 0){
  1125. eof = 1;
  1126. break;
  1127. }
  1128. b.wp += n;
  1129. }
  1130. n = *b.rp++;
  1131. if(len > 0){
  1132. *p++ = n;
  1133. len--;
  1134. }
  1135. if(n == '\n')
  1136. break;
  1137. }
  1138. /* drop trailing white */
  1139. for(;;){
  1140. if(p <= buf)
  1141. break;
  1142. n = *(p-1);
  1143. if(n != ' ' && n != '\t' && n != '\r' && n != '\n')
  1144. break;
  1145. p--;
  1146. }
  1147. *p = 0;
  1148. if(eof && p == buf)
  1149. return -1;
  1150. return p-buf;
  1151. }
  1152. void
  1153. unreadline(char *line)
  1154. {
  1155. int i, n;
  1156. i = strlen(line);
  1157. n = b.wp-b.rp;
  1158. memmove(&b.buf[i+1], b.rp, n);
  1159. memmove(b.buf, line, i);
  1160. b.buf[i] = '\n';
  1161. b.rp = b.buf;
  1162. b.wp = b.rp + i + 1 + n;
  1163. }
  1164. int
  1165. readibuf(int fd, char *buf, int len)
  1166. {
  1167. int n;
  1168. n = b.wp-b.rp;
  1169. if(n > 0){
  1170. if(n > len)
  1171. n = len;
  1172. memmove(buf, b.rp, n);
  1173. b.rp += n;
  1174. return n;
  1175. }
  1176. return read(fd, buf, len);
  1177. }
  1178. int
  1179. dfprint(int fd, char *fmt, ...)
  1180. {
  1181. char buf[4*1024];
  1182. va_list arg;
  1183. va_start(arg, fmt);
  1184. vseprint(buf, buf+sizeof(buf), fmt, arg);
  1185. va_end(arg);
  1186. if(debug)
  1187. fprint(2, "%d -> %s", fd, buf);
  1188. return fprint(fd, "%s", buf);
  1189. }
  1190. int
  1191. getaddrport(char *dir, uchar *ipaddr, uchar *port)
  1192. {
  1193. char buf[256];
  1194. int fd, i;
  1195. char *p;
  1196. snprint(buf, sizeof(buf), "%s/local", dir);
  1197. fd = open(buf, OREAD);
  1198. if(fd < 0)
  1199. return -1;
  1200. i = read(fd, buf, sizeof(buf)-1);
  1201. close(fd);
  1202. if(i <= 0)
  1203. return -1;
  1204. buf[i] = 0;
  1205. p = strchr(buf, '!');
  1206. if(p != nil)
  1207. *p++ = 0;
  1208. v4parseip(ipaddr, buf);
  1209. i = atoi(p);
  1210. port[0] = i>>8;
  1211. port[1] = i;
  1212. return 0;
  1213. }
  1214. void
  1215. md5free(DigestState *state)
  1216. {
  1217. uchar x[MD5dlen];
  1218. md5(nil, 0, x, state);
  1219. }
  1220. DigestState*
  1221. md5dup(DigestState *state)
  1222. {
  1223. char *p;
  1224. p = md5pickle(state);
  1225. if(p == nil)
  1226. sysfatal("md5pickle: %r");
  1227. state = md5unpickle(p);
  1228. if(state == nil)
  1229. sysfatal("md5unpickle: %r");
  1230. free(p);
  1231. return state;
  1232. }
  1233. void
  1234. setoffset(Out *out, int offset)
  1235. {
  1236. md5free(out->curr);
  1237. if(offset == 0)
  1238. out->curr = md5(nil, 0, nil, nil);
  1239. else
  1240. out->curr = nil;
  1241. out->offset = offset;
  1242. out->written = offset;
  1243. if(ofile != nil)
  1244. if(seek(out->fd, offset, 0) != offset)
  1245. sysfatal("seek: %r");
  1246. }
  1247. /*
  1248. * write some output, discarding it (but keeping track)
  1249. * if we've already written it. if we've gone backwards,
  1250. * verify that everything previously written matches
  1251. * that which would have been written from the current
  1252. * output.
  1253. */
  1254. int
  1255. output(Out *out, char *buf, int nb)
  1256. {
  1257. int n, d;
  1258. uchar m0[MD5dlen], m1[MD5dlen];
  1259. n = nb;
  1260. d = out->written - out->offset;
  1261. assert(d >= 0);
  1262. if(d > 0){
  1263. if(n < d){
  1264. if(out->curr != nil)
  1265. md5((uchar*)buf, n, nil, out->curr);
  1266. out->offset += n;
  1267. return n;
  1268. }
  1269. if(out->curr != nil){
  1270. md5((uchar*)buf, d, m0, out->curr);
  1271. out->curr = nil;
  1272. md5(nil, 0, m1, md5dup(out->hiwat));
  1273. if(memcmp(m0, m1, MD5dlen) != 0){
  1274. fprint(2, "integrity check failure at offset %d\n", out->written);
  1275. return -1;
  1276. }
  1277. }
  1278. buf += d;
  1279. n -= d;
  1280. out->offset += d;
  1281. }
  1282. if(n > 0){
  1283. out->hiwat = md5((uchar*)buf, n, nil, out->hiwat);
  1284. n = write(out->fd, buf, n);
  1285. if(n > 0){
  1286. out->offset += n;
  1287. out->written += n;
  1288. }
  1289. }
  1290. return n + d;
  1291. }