unzip.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750
  1. #include <u.h>
  2. #include <libc.h>
  3. #include <bio.h>
  4. #include <flate.h>
  5. #include "zip.h"
  6. enum
  7. {
  8. BufSize = 4096
  9. };
  10. static int cheader(Biobuf *bin, ZipHead *zh);
  11. static int copyout(int ofd, Biobuf *bin, long len);
  12. static int crcwrite(void *ofd, void *buf, int n);
  13. static int findCDir(Biobuf *bin, char *file);
  14. static int get1(Biobuf *b);
  15. static int get2(Biobuf *b);
  16. static ulong get4(Biobuf *b);
  17. static char *getname(Biobuf *b, int len);
  18. static int header(Biobuf *bin, ZipHead *zh);
  19. static long msdos2time(int time, int date);
  20. static int sunzip(Biobuf *bin);
  21. static int sunztable(Biobuf *bin);
  22. static void trailer(Biobuf *bin, ZipHead *zh);
  23. static int unzip(Biobuf *bin, char *file);
  24. static int unzipEntry(Biobuf *bin, ZipHead *czh);
  25. static int unztable(Biobuf *bin, char *file);
  26. static int wantFile(char *file);
  27. static void *emalloc(ulong);
  28. static void error(char*, ...);
  29. #pragma varargck argpos error 1
  30. static Biobuf bin;
  31. static ulong crc;
  32. static ulong *crctab;
  33. static int debug;
  34. static char *delfile;
  35. static int lower;
  36. static int nwant;
  37. static ulong rlen;
  38. static int settimes;
  39. static int stdout;
  40. static int verbose;
  41. static char **want;
  42. static int wbad;
  43. static ulong wlen;
  44. static jmp_buf zjmp;
  45. static jmp_buf seekjmp;
  46. static void
  47. usage(void)
  48. {
  49. fprint(2, "usage: unzip [-tsv] [-f zipfile] [file ...]\n");
  50. exits("usage");
  51. }
  52. void
  53. main(int argc, char *argv[])
  54. {
  55. char *zfile;
  56. int fd, ok, table, stream;
  57. table = 0;
  58. stream = 0;
  59. zfile = nil;
  60. ARGBEGIN{
  61. case 'D':
  62. debug++;
  63. break;
  64. case 'c':
  65. stdout++;
  66. break;
  67. case 'i':
  68. lower++;
  69. break;
  70. case 'f':
  71. zfile = ARGF();
  72. if(zfile == nil)
  73. usage();
  74. break;
  75. case 's':
  76. stream++;
  77. break;
  78. case 't':
  79. table++;
  80. break;
  81. case 'T':
  82. settimes++;
  83. break;
  84. case 'v':
  85. verbose++;
  86. break;
  87. default:
  88. usage();
  89. break;
  90. }ARGEND
  91. nwant = argc;
  92. want = argv;
  93. crctab = mkcrctab(ZCrcPoly);
  94. ok = inflateinit();
  95. if(ok != FlateOk)
  96. sysfatal("inflateinit failed: %s\n", flateerr(ok));
  97. if(zfile == nil){
  98. Binit(&bin, 0, OREAD);
  99. zfile = "<stdin>";
  100. }else{
  101. fd = open(zfile, OREAD);
  102. if(fd < 0)
  103. sysfatal("can't open %s: %r", zfile);
  104. Binit(&bin, fd, OREAD);
  105. }
  106. if(setjmp(seekjmp)){
  107. fprint(2, "trying to re-run assuming -s\n");
  108. stream = 1;
  109. Bseek(&bin, 0, 0);
  110. }
  111. if(table){
  112. if(stream)
  113. ok = sunztable(&bin);
  114. else
  115. ok = unztable(&bin, zfile);
  116. }else{
  117. if(stream)
  118. ok = sunzip(&bin);
  119. else
  120. ok = unzip(&bin, zfile);
  121. }
  122. exits(ok ? nil: "errors");
  123. }
  124. /*
  125. * print the table of contents from the "central directory structure"
  126. */
  127. static int
  128. unztable(Biobuf *bin, char *file)
  129. {
  130. ZipHead zh;
  131. int entries;
  132. entries = findCDir(bin, file);
  133. if(entries < 0)
  134. return 0;
  135. if(verbose > 1)
  136. print("%d items in the archive\n", entries);
  137. while(entries-- > 0){
  138. if(setjmp(zjmp)){
  139. free(zh.file);
  140. return 0;
  141. }
  142. memset(&zh, 0, sizeof(zh));
  143. if(!cheader(bin, &zh))
  144. return 1;
  145. if(wantFile(zh.file)){
  146. if(verbose)
  147. print("%-32s %10lud %s", zh.file, zh.uncsize, ctime(msdos2time(zh.modtime, zh.moddate)));
  148. else
  149. print("%s\n", zh.file);
  150. if(verbose > 1){
  151. print("\tmade by os %d vers %d.%d\n", zh.madeos, zh.madevers/10, zh.madevers % 10);
  152. print("\textract by os %d vers %d.%d\n", zh.extos, zh.extvers/10, zh.extvers % 10);
  153. print("\tflags %x\n", zh.flags);
  154. print("\tmethod %d\n", zh.meth);
  155. print("\tmod time %d\n", zh.modtime);
  156. print("\tmod date %d\n", zh.moddate);
  157. print("\tcrc %lux\n", zh.crc);
  158. print("\tcompressed size %lud\n", zh.csize);
  159. print("\tuncompressed size %lud\n", zh.uncsize);
  160. print("\tinternal attributes %ux\n", zh.iattr);
  161. print("\texternal attributes %lux\n", zh.eattr);
  162. print("\tstarts at %ld\n", zh.off);
  163. }
  164. }
  165. free(zh.file);
  166. zh.file = nil;
  167. }
  168. return 1;
  169. }
  170. /*
  171. * print the "local file header" table of contents
  172. */
  173. static int
  174. sunztable(Biobuf *bin)
  175. {
  176. ZipHead zh;
  177. vlong off;
  178. ulong hcrc, hcsize, huncsize;
  179. int ok, err;
  180. ok = 1;
  181. for(;;){
  182. if(setjmp(zjmp)){
  183. free(zh.file);
  184. return 0;
  185. }
  186. memset(&zh, 0, sizeof(zh));
  187. if(!header(bin, &zh))
  188. return ok;
  189. hcrc = zh.crc;
  190. hcsize = zh.csize;
  191. huncsize = zh.uncsize;
  192. wlen = 0;
  193. rlen = 0;
  194. crc = 0;
  195. wbad = 0;
  196. if(zh.meth == 0){
  197. if(!copyout(-1, bin, zh.csize))
  198. error("reading data for %s failed: %r", zh.file);
  199. }else if(zh.meth == 8){
  200. off = Boffset(bin);
  201. err = inflate((void*)-1, crcwrite, bin, (int(*)(void*))Bgetc);
  202. if(err != FlateOk)
  203. error("inflate %s failed: %s", zh.file, flateerr(err));
  204. rlen = Boffset(bin) - off;
  205. }else
  206. error("can't handle compression method %d for %s", zh.meth, zh.file);
  207. trailer(bin, &zh);
  208. if(wantFile(zh.file)){
  209. if(verbose)
  210. print("%-32s %10lud %s", zh.file, zh.uncsize, ctime(msdos2time(zh.modtime, zh.moddate)));
  211. else
  212. print("%s\n", zh.file);
  213. if(verbose > 1){
  214. print("\textract by os %d vers %d.%d\n", zh.extos, zh.extvers / 10, zh.extvers % 10);
  215. print("\tflags %x\n", zh.flags);
  216. print("\tmethod %d\n", zh.meth);
  217. print("\tmod time %d\n", zh.modtime);
  218. print("\tmod date %d\n", zh.moddate);
  219. print("\tcrc %lux\n", zh.crc);
  220. print("\tcompressed size %lud\n", zh.csize);
  221. print("\tuncompressed size %lud\n", zh.uncsize);
  222. if((zh.flags & ZTrailInfo) && (hcrc || hcsize || huncsize)){
  223. print("\theader crc %lux\n", zh.crc);
  224. print("\theader compressed size %lud\n", zh.csize);
  225. print("\theader uncompressed size %lud\n", zh.uncsize);
  226. }
  227. }
  228. }
  229. if(zh.crc != crc)
  230. error("crc mismatch for %s", zh.file);
  231. if(zh.uncsize != wlen)
  232. error("output size mismatch for %s", zh.file);
  233. if(zh.csize != rlen)
  234. error("input size mismatch for %s", zh.file);
  235. free(zh.file);
  236. zh.file = nil;
  237. }
  238. return ok;
  239. }
  240. /*
  241. * extract files using the info in the central directory structure
  242. */
  243. static int
  244. unzip(Biobuf *bin, char *file)
  245. {
  246. ZipHead zh;
  247. vlong off;
  248. int ok, eok, entries;
  249. entries = findCDir(bin, file);
  250. if(entries < 0)
  251. return 0;
  252. ok = 1;
  253. while(entries-- > 0){
  254. if(setjmp(zjmp)){
  255. free(zh.file);
  256. return 0;
  257. }
  258. memset(&zh, 0, sizeof(zh));
  259. if(!cheader(bin, &zh))
  260. return ok;
  261. off = Boffset(bin);
  262. if(wantFile(zh.file)){
  263. if(Bseek(bin, zh.off, 0) < 0){
  264. fprint(2, "unzip: can't seek to start of %s, skipping\n", zh.file);
  265. ok = 0;
  266. }else{
  267. eok = unzipEntry(bin, &zh);
  268. if(eok <= 0){
  269. fprint(2, "unzip: skipping %s\n", zh.file);
  270. ok = 0;
  271. }
  272. }
  273. }
  274. free(zh.file);
  275. zh.file = nil;
  276. if(Bseek(bin, off, 0) < 0){
  277. fprint(2, "unzip: can't seek to start of next entry, terminating extraction\n");
  278. return 0;
  279. }
  280. }
  281. return ok;
  282. }
  283. /*
  284. * extract files using the info the "local file headers"
  285. */
  286. static int
  287. sunzip(Biobuf *bin)
  288. {
  289. int eok;
  290. for(;;){
  291. eok = unzipEntry(bin, nil);
  292. if(eok == 0)
  293. return 1;
  294. if(eok < 0)
  295. return 0;
  296. }
  297. return 1;
  298. }
  299. /*
  300. * extracts a single entry from a zip file
  301. * czh is the optional corresponding central directory entry
  302. */
  303. static int
  304. unzipEntry(Biobuf *bin, ZipHead *czh)
  305. {
  306. Dir *d;
  307. ZipHead zh;
  308. char *p;
  309. vlong off;
  310. int fd, isdir, ok, err;
  311. zh.file = nil;
  312. if(setjmp(zjmp)){
  313. delfile = nil;
  314. free(zh.file);
  315. return -1;
  316. }
  317. memset(&zh, 0, sizeof(zh));
  318. if(!header(bin, &zh))
  319. return 0;
  320. ok = 1;
  321. isdir = 0;
  322. fd = -1;
  323. if(wantFile(zh.file)){
  324. if(verbose)
  325. fprint(2, "extracting %s\n", zh.file);
  326. if(czh != nil && czh->extos == ZDos){
  327. isdir = czh->eattr & ZDDir;
  328. if(isdir && zh.uncsize != 0)
  329. fprint(2, "unzip: ignoring directory data for %s\n", zh.file);
  330. }
  331. if(zh.meth == 0 && zh.uncsize == 0){
  332. p = strchr(zh.file, '\0');
  333. if(p > zh.file && p[-1] == '/')
  334. isdir = 1;
  335. }
  336. if(stdout){
  337. if(ok && !isdir)
  338. fd = 1;
  339. }else if(isdir){
  340. fd = create(zh.file, OREAD, DMDIR | 0775);
  341. if(fd < 0){
  342. d = dirstat(zh.file);
  343. if(d == nil || (d->mode & DMDIR) != DMDIR){
  344. fprint(2, "unzip: can't create directory %s: %r\n", zh.file);
  345. ok = 0;
  346. }
  347. free(d);
  348. }
  349. }else if(ok){
  350. fd = create(zh.file, OWRITE, 0664);
  351. if(fd < 0){
  352. fprint(2, "unzip: can't create %s: %r\n", zh.file);
  353. ok = 0;
  354. }else
  355. delfile = zh.file;
  356. }
  357. }
  358. wlen = 0;
  359. rlen = 0;
  360. crc = 0;
  361. wbad = 0;
  362. if(zh.meth == 0){
  363. if(!copyout(fd, bin, zh.csize))
  364. error("copying data for %s failed: %r", zh.file);
  365. }else if(zh.meth == 8){
  366. off = Boffset(bin);
  367. err = inflate((void*)fd, crcwrite, bin, (int(*)(void*))Bgetc);
  368. if(err != FlateOk)
  369. error("inflate failed: %s", flateerr(err));
  370. rlen = Boffset(bin) - off;
  371. }else
  372. error("can't handle compression method %d for %s", zh.meth, zh.file);
  373. trailer(bin, &zh);
  374. if(zh.crc != crc)
  375. error("crc mismatch for %s", zh.file);
  376. if(zh.uncsize != wlen)
  377. error("output size mismatch for %s", zh.file);
  378. if(zh.csize != rlen)
  379. error("input size mismatch for %s", zh.file);
  380. delfile = nil;
  381. free(zh.file);
  382. if(fd >= 0 && !stdout){
  383. if(settimes){
  384. d = dirfstat(fd);
  385. if(d != nil){
  386. d->mtime = msdos2time(zh.modtime, zh.moddate);
  387. if(d->mtime)
  388. dirfwstat(fd, d);
  389. }
  390. }
  391. close(fd);
  392. }
  393. return ok;
  394. }
  395. static int
  396. wantFile(char *file)
  397. {
  398. int i, n;
  399. if(nwant == 0)
  400. return 1;
  401. for(i = 0; i < nwant; i++){
  402. if(strcmp(want[i], file) == 0)
  403. return 1;
  404. n = strlen(want[i]);
  405. if(strncmp(want[i], file, n) == 0 && file[n] == '/')
  406. return 1;
  407. }
  408. return 0;
  409. }
  410. /*
  411. * find the start of the central directory
  412. * returns the number of entries in the directory,
  413. * or -1 if there was an error
  414. */
  415. static int
  416. findCDir(Biobuf *bin, char *file)
  417. {
  418. vlong ecoff;
  419. long off, size, m;
  420. int entries, zclen, dn, ds, de;
  421. ecoff = Bseek(bin, -ZECHeadSize, 2);
  422. if(ecoff < 0){
  423. fprint(2, "unzip: can't seek to contents of %s\n", file);
  424. longjmp(seekjmp, 1);
  425. return -1;
  426. }
  427. if(setjmp(zjmp))
  428. return -1;
  429. if((m=get4(bin)) != ZECHeader){
  430. fprint(2, "unzip: bad magic number for table of contents of %s: %#.8lx\n", file, m);
  431. longjmp(seekjmp, 1);
  432. return -1;
  433. }
  434. dn = get2(bin);
  435. ds = get2(bin);
  436. de = get2(bin);
  437. entries = get2(bin);
  438. size = get4(bin);
  439. off = get4(bin);
  440. zclen = get2(bin);
  441. while(zclen-- > 0)
  442. get1(bin);
  443. if(verbose > 1){
  444. print("table starts at %ld for %ld bytes\n", off, size);
  445. if(ecoff - size != off)
  446. print("\ttable should start at %lld-%ld=%lld\n", ecoff, size, ecoff-size);
  447. if(dn || ds || de != entries)
  448. print("\tcurrent disk=%d start disk=%d table entries on this disk=%d\n", dn, ds, de);
  449. }
  450. if(Bseek(bin, off, 0) != off){
  451. fprint(2, "unzip: can't seek to start of contents of %s\n", file);
  452. longjmp(seekjmp, 1);
  453. return -1;
  454. }
  455. return entries;
  456. }
  457. static int
  458. cheader(Biobuf *bin, ZipHead *zh)
  459. {
  460. ulong v;
  461. int flen, xlen, fclen;
  462. v = get4(bin);
  463. if(v != ZCHeader){
  464. if(v == ZECHeader)
  465. return 0;
  466. error("bad magic number %lux", v);
  467. }
  468. zh->madevers = get1(bin);
  469. zh->madeos = get1(bin);
  470. zh->extvers = get1(bin);
  471. zh->extos = get1(bin);
  472. zh->flags = get2(bin);
  473. zh->meth = get2(bin);
  474. zh->modtime = get2(bin);
  475. zh->moddate = get2(bin);
  476. zh->crc = get4(bin);
  477. zh->csize = get4(bin);
  478. zh->uncsize = get4(bin);
  479. flen = get2(bin);
  480. xlen = get2(bin);
  481. fclen = get2(bin);
  482. get2(bin); /* disk number start */
  483. zh->iattr = get2(bin);
  484. zh->eattr = get4(bin);
  485. zh->off = get4(bin);
  486. zh->file = getname(bin, flen);
  487. while(xlen-- > 0)
  488. get1(bin);
  489. while(fclen-- > 0)
  490. get1(bin);
  491. return 1;
  492. }
  493. static int
  494. header(Biobuf *bin, ZipHead *zh)
  495. {
  496. ulong v;
  497. int flen, xlen;
  498. v = get4(bin);
  499. if(v != ZHeader){
  500. if(v == ZCHeader)
  501. return 0;
  502. error("bad magic number %lux at %lld", v, Boffset(bin)-4);
  503. }
  504. zh->extvers = get1(bin);
  505. zh->extos = get1(bin);
  506. zh->flags = get2(bin);
  507. zh->meth = get2(bin);
  508. zh->modtime = get2(bin);
  509. zh->moddate = get2(bin);
  510. zh->crc = get4(bin);
  511. zh->csize = get4(bin);
  512. zh->uncsize = get4(bin);
  513. flen = get2(bin);
  514. xlen = get2(bin);
  515. zh->file = getname(bin, flen);
  516. while(xlen-- > 0)
  517. get1(bin);
  518. return 1;
  519. }
  520. static void
  521. trailer(Biobuf *bin, ZipHead *zh)
  522. {
  523. if(zh->flags & ZTrailInfo){
  524. zh->crc = get4(bin);
  525. zh->csize = get4(bin);
  526. zh->uncsize = get4(bin);
  527. }
  528. }
  529. static char*
  530. getname(Biobuf *bin, int len)
  531. {
  532. char *s;
  533. int i, c;
  534. s = emalloc(len + 1);
  535. for(i = 0; i < len; i++){
  536. c = get1(bin);
  537. if(lower)
  538. c = tolower(c);
  539. s[i] = c;
  540. }
  541. s[i] = '\0';
  542. return s;
  543. }
  544. static int
  545. crcwrite(void *out, void *buf, int n)
  546. {
  547. int fd, nw;
  548. wlen += n;
  549. crc = blockcrc(crctab, crc, buf, n);
  550. fd = (int)(uintptr)out;
  551. if(fd < 0)
  552. return n;
  553. nw = write(fd, buf, n);
  554. if(nw != n)
  555. wbad = 1;
  556. return nw;
  557. }
  558. static int
  559. copyout(int ofd, Biobuf *bin, long len)
  560. {
  561. char buf[BufSize];
  562. int n;
  563. for(; len > 0; len -= n){
  564. n = len;
  565. if(n > BufSize)
  566. n = BufSize;
  567. n = Bread(bin, buf, n);
  568. if(n <= 0)
  569. return 0;
  570. rlen += n;
  571. if(crcwrite((void*)ofd, buf, n) != n)
  572. return 0;
  573. }
  574. return 1;
  575. }
  576. static ulong
  577. get4(Biobuf *b)
  578. {
  579. ulong v;
  580. int i, c;
  581. v = 0;
  582. for(i = 0; i < 4; i++){
  583. c = Bgetc(b);
  584. if(c < 0)
  585. error("unexpected eof reading file information");
  586. v |= c << (i * 8);
  587. }
  588. return v;
  589. }
  590. static int
  591. get2(Biobuf *b)
  592. {
  593. int i, c, v;
  594. v = 0;
  595. for(i = 0; i < 2; i++){
  596. c = Bgetc(b);
  597. if(c < 0)
  598. error("unexpected eof reading file information");
  599. v |= c << (i * 8);
  600. }
  601. return v;
  602. }
  603. static int
  604. get1(Biobuf *b)
  605. {
  606. int c;
  607. c = Bgetc(b);
  608. if(c < 0)
  609. error("unexpected eof reading file information");
  610. return c;
  611. }
  612. static long
  613. msdos2time(int time, int date)
  614. {
  615. Tm tm;
  616. tm.hour = time >> 11;
  617. tm.min = (time >> 5) & 63;
  618. tm.sec = (time & 31) << 1;
  619. tm.year = 80 + (date >> 9);
  620. tm.mon = ((date >> 5) & 15) - 1;
  621. tm.mday = date & 31;
  622. tm.zone[0] = '\0';
  623. tm.yday = 0;
  624. return tm2sec(&tm);
  625. }
  626. static void*
  627. emalloc(ulong n)
  628. {
  629. void *p;
  630. p = malloc(n);
  631. if(p == nil)
  632. sysfatal("out of memory");
  633. return p;
  634. }
  635. static void
  636. error(char *fmt, ...)
  637. {
  638. va_list arg;
  639. fprint(2, "unzip: ");
  640. va_start(arg, fmt);
  641. vfprint(2, fmt, arg);
  642. va_end(arg);
  643. fprint(2, "\n");
  644. if(delfile != nil){
  645. fprint(2, "unzip: removing output file %s\n", delfile);
  646. remove(delfile);
  647. delfile = nil;
  648. }
  649. longjmp(zjmp, 1);
  650. }