unzip.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740
  1. #include <u.h>
  2. #include <libc.h>
  3. #include <bio.h>
  4. #include <flate.h>
  5. #include "zip.h"
  6. enum
  7. {
  8. BufSize = 4096
  9. };
  10. static int cheader(Biobuf *bin, ZipHead *zh);
  11. static int copyout(int ofd, Biobuf *bin, long len);
  12. static int crcwrite(void *ofd, void *buf, int n);
  13. static int findCDir(Biobuf *bin, char *file);
  14. static int get1(Biobuf *b);
  15. static int get2(Biobuf *b);
  16. static ulong get4(Biobuf *b);
  17. static char *getname(Biobuf *b, int len);
  18. static int header(Biobuf *bin, ZipHead *zh);
  19. static long msdos2time(int time, int date);
  20. static int sunzip(Biobuf *bin);
  21. static int sunztable(Biobuf *bin);
  22. static void trailer(Biobuf *bin, ZipHead *zh);
  23. static int unzip(Biobuf *bin, char *file);
  24. static int unzipEntry(Biobuf *bin, ZipHead *czh);
  25. static int unztable(Biobuf *bin, char *file);
  26. static int wantFile(char *file);
  27. static void *emalloc(ulong);
  28. static void error(char*, ...);
  29. #pragma varargck argpos error 1
  30. static Biobuf bin;
  31. static ulong crc;
  32. static ulong *crctab;
  33. static int debug;
  34. static char *delfile;
  35. static int lower;
  36. static int nwant;
  37. static ulong rlen;
  38. static int settimes;
  39. static int stdout;
  40. static int verbose;
  41. static char **want;
  42. static int wbad;
  43. static ulong wlen;
  44. static jmp_buf zjmp;
  45. static void
  46. usage(void)
  47. {
  48. fprint(2, "usage: unzip [-tsv] [-f zipfile] [file ...]\n");
  49. exits("usage");
  50. }
  51. void
  52. main(int argc, char *argv[])
  53. {
  54. char *zfile;
  55. int fd, ok, table, stream;
  56. table = 0;
  57. stream = 0;
  58. zfile = nil;
  59. ARGBEGIN{
  60. case 'D':
  61. debug++;
  62. break;
  63. case 'c':
  64. stdout++;
  65. break;
  66. case 'i':
  67. lower++;
  68. break;
  69. case 'f':
  70. zfile = ARGF();
  71. if(zfile == nil)
  72. usage();
  73. break;
  74. case 's':
  75. stream++;
  76. break;
  77. case 't':
  78. table++;
  79. break;
  80. case 'T':
  81. settimes++;
  82. break;
  83. case 'v':
  84. verbose++;
  85. break;
  86. default:
  87. usage();
  88. break;
  89. }ARGEND
  90. nwant = argc;
  91. want = argv;
  92. crctab = mkcrctab(ZCrcPoly);
  93. ok = inflateinit();
  94. if(ok != FlateOk)
  95. sysfatal("inflateinit failed: %s\n", flateerr(ok));
  96. if(zfile == nil){
  97. Binit(&bin, 0, OREAD);
  98. zfile = "<stdin>";
  99. }else{
  100. fd = open(zfile, OREAD);
  101. if(fd < 0)
  102. sysfatal("can't open %s: %r", zfile);
  103. Binit(&bin, fd, OREAD);
  104. }
  105. if(table){
  106. if(stream)
  107. ok = sunztable(&bin);
  108. else
  109. ok = unztable(&bin, zfile);
  110. }else{
  111. if(stream)
  112. ok = sunzip(&bin);
  113. else
  114. ok = unzip(&bin, zfile);
  115. }
  116. exits(ok ? nil: "errors");
  117. }
  118. /*
  119. * print the table of contents from the "central directory structure"
  120. */
  121. static int
  122. unztable(Biobuf *bin, char *file)
  123. {
  124. ZipHead zh;
  125. int entries;
  126. entries = findCDir(bin, file);
  127. if(entries < 0)
  128. return 0;
  129. if(verbose > 1)
  130. print("%d items in the archive\n", entries);
  131. while(entries-- > 0){
  132. if(setjmp(zjmp)){
  133. free(zh.file);
  134. return 0;
  135. }
  136. memset(&zh, 0, sizeof(zh));
  137. if(!cheader(bin, &zh))
  138. return 1;
  139. if(wantFile(zh.file)){
  140. if(verbose)
  141. print("%-32s %10lud %s", zh.file, zh.uncsize, ctime(msdos2time(zh.modtime, zh.moddate)));
  142. else
  143. print("%s\n", zh.file);
  144. if(verbose > 1){
  145. print("\tmade by os %d vers %d.%d\n", zh.madeos, zh.madevers/10, zh.madevers % 10);
  146. print("\textract by os %d vers %d.%d\n", zh.extos, zh.extvers/10, zh.extvers % 10);
  147. print("\tflags %x\n", zh.flags);
  148. print("\tmethod %d\n", zh.meth);
  149. print("\tmod time %d\n", zh.modtime);
  150. print("\tmod date %d\n", zh.moddate);
  151. print("\tcrc %lux\n", zh.crc);
  152. print("\tcompressed size %lud\n", zh.csize);
  153. print("\tuncompressed size %lud\n", zh.uncsize);
  154. print("\tinternal attributes %ux\n", zh.iattr);
  155. print("\texternal attributes %lux\n", zh.eattr);
  156. print("\tstarts at %ld\n", zh.off);
  157. }
  158. }
  159. free(zh.file);
  160. zh.file = nil;
  161. }
  162. return 1;
  163. }
  164. /*
  165. * print the "local file header" table of contents
  166. */
  167. static int
  168. sunztable(Biobuf *bin)
  169. {
  170. ZipHead zh;
  171. vlong off;
  172. ulong hcrc, hcsize, huncsize;
  173. int ok, err;
  174. ok = 1;
  175. for(;;){
  176. if(setjmp(zjmp)){
  177. free(zh.file);
  178. return 0;
  179. }
  180. memset(&zh, 0, sizeof(zh));
  181. if(!header(bin, &zh))
  182. return ok;
  183. hcrc = zh.crc;
  184. hcsize = zh.csize;
  185. huncsize = zh.uncsize;
  186. wlen = 0;
  187. rlen = 0;
  188. crc = 0;
  189. wbad = 0;
  190. if(zh.meth == 0){
  191. if(!copyout(-1, bin, zh.csize))
  192. error("reading data for %s failed: %r", zh.file);
  193. }else if(zh.meth == 8){
  194. off = Boffset(bin);
  195. err = inflate((void*)-1, crcwrite, bin, (int(*)(void*))Bgetc);
  196. if(err != FlateOk)
  197. error("inflate %s failed: %s", zh.file, flateerr(err));
  198. rlen = Boffset(bin) - off;
  199. }else
  200. error("can't handle compression method %d for %s", zh.meth, zh.file);
  201. trailer(bin, &zh);
  202. if(wantFile(zh.file)){
  203. if(verbose)
  204. print("%-32s %10lud %s", zh.file, zh.uncsize, ctime(msdos2time(zh.modtime, zh.moddate)));
  205. else
  206. print("%s\n", zh.file);
  207. if(verbose > 1){
  208. print("\textract by os %d vers %d.%d\n", zh.extos, zh.extvers / 10, zh.extvers % 10);
  209. print("\tflags %x\n", zh.flags);
  210. print("\tmethod %d\n", zh.meth);
  211. print("\tmod time %d\n", zh.modtime);
  212. print("\tmod date %d\n", zh.moddate);
  213. print("\tcrc %lux\n", zh.crc);
  214. print("\tcompressed size %lud\n", zh.csize);
  215. print("\tuncompressed size %lud\n", zh.uncsize);
  216. if((zh.flags & ZTrailInfo) && (hcrc || hcsize || huncsize)){
  217. print("\theader crc %lux\n", zh.crc);
  218. print("\theader compressed size %lud\n", zh.csize);
  219. print("\theader uncompressed size %lud\n", zh.uncsize);
  220. }
  221. }
  222. }
  223. if(zh.crc != crc)
  224. error("crc mismatch for %s", zh.file);
  225. if(zh.uncsize != wlen)
  226. error("output size mismatch for %s", zh.file);
  227. if(zh.csize != rlen)
  228. error("input size mismatch for %s", zh.file);
  229. free(zh.file);
  230. zh.file = nil;
  231. }
  232. return ok;
  233. }
  234. /*
  235. * extract files using the info in the central directory structure
  236. */
  237. static int
  238. unzip(Biobuf *bin, char *file)
  239. {
  240. ZipHead zh;
  241. vlong off;
  242. int ok, eok, entries;
  243. entries = findCDir(bin, file);
  244. if(entries < 0)
  245. return 0;
  246. ok = 1;
  247. while(entries-- > 0){
  248. if(setjmp(zjmp)){
  249. free(zh.file);
  250. return 0;
  251. }
  252. memset(&zh, 0, sizeof(zh));
  253. if(!cheader(bin, &zh))
  254. return ok;
  255. off = Boffset(bin);
  256. if(wantFile(zh.file)){
  257. if(Bseek(bin, zh.off, 0) < 0){
  258. fprint(2, "unzip: can't seek to start of %s, skipping\n", zh.file);
  259. ok = 0;
  260. }else{
  261. eok = unzipEntry(bin, &zh);
  262. if(eok <= 0){
  263. fprint(2, "unzip: skipping %s\n", zh.file);
  264. ok = 0;
  265. }
  266. }
  267. }
  268. free(zh.file);
  269. zh.file = nil;
  270. if(Bseek(bin, off, 0) < 0){
  271. fprint(2, "unzip: can't seek to start of next entry, terminating extraction\n");
  272. return 0;
  273. }
  274. }
  275. return ok;
  276. }
  277. /*
  278. * extract files using the info the "local file headers"
  279. */
  280. static int
  281. sunzip(Biobuf *bin)
  282. {
  283. int eok;
  284. for(;;){
  285. eok = unzipEntry(bin, nil);
  286. if(eok == 0)
  287. return 1;
  288. if(eok < 0)
  289. return 0;
  290. }
  291. return 1;
  292. }
  293. /*
  294. * extracts a single entry from a zip file
  295. * czh is the optional corresponding central directory entry
  296. */
  297. static int
  298. unzipEntry(Biobuf *bin, ZipHead *czh)
  299. {
  300. Dir *d;
  301. ZipHead zh;
  302. char *p;
  303. vlong off;
  304. int fd, isdir, ok, err;
  305. zh.file = nil;
  306. if(setjmp(zjmp)){
  307. delfile = nil;
  308. free(zh.file);
  309. return -1;
  310. }
  311. memset(&zh, 0, sizeof(zh));
  312. if(!header(bin, &zh))
  313. return 0;
  314. ok = 1;
  315. isdir = 0;
  316. fd = -1;
  317. if(wantFile(zh.file)){
  318. if(verbose)
  319. fprint(2, "extracting %s\n", zh.file);
  320. if(czh != nil && czh->extos == ZDos){
  321. isdir = czh->eattr & ZDDir;
  322. if(isdir && zh.uncsize != 0)
  323. fprint(2, "unzip: ignoring directory data for %s\n", zh.file);
  324. }
  325. if(zh.meth == 0 && zh.uncsize == 0){
  326. p = strchr(zh.file, '\0');
  327. if(p > zh.file && p[-1] == '/')
  328. isdir = 1;
  329. }
  330. if(stdout){
  331. if(ok && !isdir)
  332. fd = 1;
  333. }else if(isdir){
  334. fd = create(zh.file, OREAD, DMDIR | 0775);
  335. if(fd < 0){
  336. d = dirstat(zh.file);
  337. if(d == nil || (d->mode & DMDIR) != DMDIR){
  338. fprint(2, "unzip: can't create directory %s: %r\n", zh.file);
  339. ok = 0;
  340. }
  341. free(d);
  342. }
  343. }else if(ok){
  344. fd = create(zh.file, OWRITE, 0664);
  345. if(fd < 0){
  346. fprint(2, "unzip: can't create %s: %r\n", zh.file);
  347. ok = 0;
  348. }else
  349. delfile = zh.file;
  350. }
  351. }
  352. wlen = 0;
  353. rlen = 0;
  354. crc = 0;
  355. wbad = 0;
  356. if(zh.meth == 0){
  357. if(!copyout(fd, bin, zh.csize))
  358. error("copying data for %s failed: %r", zh.file);
  359. }else if(zh.meth == 8){
  360. off = Boffset(bin);
  361. err = inflate((void*)fd, crcwrite, bin, (int(*)(void*))Bgetc);
  362. if(err != FlateOk)
  363. error("inflate failed: %s", flateerr(err));
  364. rlen = Boffset(bin) - off;
  365. }else
  366. error("can't handle compression method %d for %s", zh.meth, zh.file);
  367. trailer(bin, &zh);
  368. if(zh.crc != crc)
  369. error("crc mismatch for %s", zh.file);
  370. if(zh.uncsize != wlen)
  371. error("output size mismatch for %s", zh.file);
  372. if(zh.csize != rlen)
  373. error("input size mismatch for %s", zh.file);
  374. delfile = nil;
  375. free(zh.file);
  376. if(fd >= 0 && !stdout){
  377. if(settimes){
  378. d = dirfstat(fd);
  379. if(d != nil){
  380. d->mtime = msdos2time(zh.modtime, zh.moddate);
  381. if(d->mtime)
  382. dirfwstat(fd, d);
  383. }
  384. }
  385. close(fd);
  386. }
  387. return ok;
  388. }
  389. static int
  390. wantFile(char *file)
  391. {
  392. int i, n;
  393. if(nwant == 0)
  394. return 1;
  395. for(i = 0; i < nwant; i++){
  396. if(strcmp(want[i], file) == 0)
  397. return 1;
  398. n = strlen(want[i]);
  399. if(strncmp(want[i], file, n) == 0 && file[n] == '/')
  400. return 1;
  401. }
  402. return 0;
  403. }
  404. /*
  405. * find the start of the central directory
  406. * returns the number of entries in the directory,
  407. * or -1 if there was an error
  408. */
  409. static int
  410. findCDir(Biobuf *bin, char *file)
  411. {
  412. vlong ecoff;
  413. long off, size;
  414. int entries, zclen, dn, ds, de;
  415. ecoff = Bseek(bin, -ZECHeadSize, 2);
  416. if(ecoff < 0){
  417. fprint(2, "unzip: can't seek to contents of %s; try adding -s\n", file);
  418. return -1;
  419. }
  420. if(setjmp(zjmp))
  421. return -1;
  422. if(get4(bin) != ZECHeader){
  423. fprint(2, "unzip: bad magic number for contents of %s\n", file);
  424. return -1;
  425. }
  426. dn = get2(bin);
  427. ds = get2(bin);
  428. de = get2(bin);
  429. entries = get2(bin);
  430. size = get4(bin);
  431. off = get4(bin);
  432. zclen = get2(bin);
  433. while(zclen-- > 0)
  434. get1(bin);
  435. if(verbose > 1){
  436. print("table starts at %ld for %ld bytes\n", off, size);
  437. if(ecoff - size != off)
  438. print("\ttable should start at %lld-%ld=%lld\n", ecoff, size, ecoff-size);
  439. if(dn || ds || de != entries)
  440. print("\tcurrent disk=%d start disk=%d table entries on this disk=%d\n", dn, ds, de);
  441. }
  442. if(Bseek(bin, off, 0) != off){
  443. fprint(2, "unzip: can't seek to start of contents of %s\n", file);
  444. return -1;
  445. }
  446. return entries;
  447. }
  448. static int
  449. cheader(Biobuf *bin, ZipHead *zh)
  450. {
  451. ulong v;
  452. int flen, xlen, fclen;
  453. v = get4(bin);
  454. if(v != ZCHeader){
  455. if(v == ZECHeader)
  456. return 0;
  457. error("bad magic number %lux", v);
  458. }
  459. zh->madevers = get1(bin);
  460. zh->madeos = get1(bin);
  461. zh->extvers = get1(bin);
  462. zh->extos = get1(bin);
  463. zh->flags = get2(bin);
  464. zh->meth = get2(bin);
  465. zh->modtime = get2(bin);
  466. zh->moddate = get2(bin);
  467. zh->crc = get4(bin);
  468. zh->csize = get4(bin);
  469. zh->uncsize = get4(bin);
  470. flen = get2(bin);
  471. xlen = get2(bin);
  472. fclen = get2(bin);
  473. get2(bin); /* disk number start */
  474. zh->iattr = get2(bin);
  475. zh->eattr = get4(bin);
  476. zh->off = get4(bin);
  477. zh->file = getname(bin, flen);
  478. while(xlen-- > 0)
  479. get1(bin);
  480. while(fclen-- > 0)
  481. get1(bin);
  482. return 1;
  483. }
  484. static int
  485. header(Biobuf *bin, ZipHead *zh)
  486. {
  487. ulong v;
  488. int flen, xlen;
  489. v = get4(bin);
  490. if(v != ZHeader){
  491. if(v == ZCHeader)
  492. return 0;
  493. error("bad magic number %lux at %lld", v, Boffset(bin)-4);
  494. }
  495. zh->extvers = get1(bin);
  496. zh->extos = get1(bin);
  497. zh->flags = get2(bin);
  498. zh->meth = get2(bin);
  499. zh->modtime = get2(bin);
  500. zh->moddate = get2(bin);
  501. zh->crc = get4(bin);
  502. zh->csize = get4(bin);
  503. zh->uncsize = get4(bin);
  504. flen = get2(bin);
  505. xlen = get2(bin);
  506. zh->file = getname(bin, flen);
  507. while(xlen-- > 0)
  508. get1(bin);
  509. return 1;
  510. }
  511. static void
  512. trailer(Biobuf *bin, ZipHead *zh)
  513. {
  514. if(zh->flags & ZTrailInfo){
  515. zh->crc = get4(bin);
  516. zh->csize = get4(bin);
  517. zh->uncsize = get4(bin);
  518. }
  519. }
  520. static char*
  521. getname(Biobuf *bin, int len)
  522. {
  523. char *s;
  524. int i, c;
  525. s = emalloc(len + 1);
  526. for(i = 0; i < len; i++){
  527. c = get1(bin);
  528. if(lower)
  529. c = tolower(c);
  530. s[i] = c;
  531. }
  532. s[i] = '\0';
  533. return s;
  534. }
  535. static int
  536. crcwrite(void *out, void *buf, int n)
  537. {
  538. int fd, nw;
  539. wlen += n;
  540. crc = blockcrc(crctab, crc, buf, n);
  541. fd = (int)out;
  542. if(fd < 0)
  543. return n;
  544. nw = write(fd, buf, n);
  545. if(nw != n)
  546. wbad = 1;
  547. return nw;
  548. }
  549. static int
  550. copyout(int ofd, Biobuf *bin, long len)
  551. {
  552. char buf[BufSize];
  553. int n;
  554. for(; len > 0; len -= n){
  555. n = len;
  556. if(n > BufSize)
  557. n = BufSize;
  558. n = Bread(bin, buf, n);
  559. if(n <= 0)
  560. return 0;
  561. rlen += n;
  562. if(crcwrite((void*)ofd, buf, n) != n)
  563. return 0;
  564. }
  565. return 1;
  566. }
  567. static ulong
  568. get4(Biobuf *b)
  569. {
  570. ulong v;
  571. int i, c;
  572. v = 0;
  573. for(i = 0; i < 4; i++){
  574. c = Bgetc(b);
  575. if(c < 0)
  576. error("unexpected eof reading file information");
  577. v |= c << (i * 8);
  578. }
  579. return v;
  580. }
  581. static int
  582. get2(Biobuf *b)
  583. {
  584. int i, c, v;
  585. v = 0;
  586. for(i = 0; i < 2; i++){
  587. c = Bgetc(b);
  588. if(c < 0)
  589. error("unexpected eof reading file information");
  590. v |= c << (i * 8);
  591. }
  592. return v;
  593. }
  594. static int
  595. get1(Biobuf *b)
  596. {
  597. int c;
  598. c = Bgetc(b);
  599. if(c < 0)
  600. error("unexpected eof reading file information");
  601. return c;
  602. }
  603. static long
  604. msdos2time(int time, int date)
  605. {
  606. Tm tm;
  607. tm.hour = time >> 11;
  608. tm.min = (time >> 5) & 63;
  609. tm.sec = (time & 31) << 1;
  610. tm.year = 80 + (date >> 9);
  611. tm.mon = ((date >> 5) & 15) - 1;
  612. tm.mday = date & 31;
  613. tm.zone[0] = '\0';
  614. tm.yday = 0;
  615. return tm2sec(&tm);
  616. }
  617. static void*
  618. emalloc(ulong n)
  619. {
  620. void *p;
  621. p = malloc(n);
  622. if(p == nil)
  623. sysfatal("out of memory");
  624. return p;
  625. }
  626. static void
  627. error(char *fmt, ...)
  628. {
  629. va_list arg;
  630. fprint(2, "unzip: ");
  631. va_start(arg, fmt);
  632. vfprint(2, fmt, arg);
  633. va_end(arg);
  634. fprint(2, "\n");
  635. if(delfile != nil){
  636. fprint(2, "unzip: removing output file %s\n", delfile);
  637. remove(delfile);
  638. delfile = nil;
  639. }
  640. longjmp(zjmp, 1);
  641. }