unzip.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801
  1. #include <u.h>
  2. #include <libc.h>
  3. #include <bio.h>
  4. #include <flate.h>
  5. #include "zip.h"
  6. enum
  7. {
  8. BufSize = 4096
  9. };
  10. static int cheader(Biobuf *bin, ZipHead *zh);
  11. static int copyout(int ofd, Biobuf *bin, long len);
  12. static int crcwrite(void *ofd, void *buf, int n);
  13. static int findCDir(Biobuf *bin, char *file);
  14. static int get1(Biobuf *b);
  15. static int get2(Biobuf *b);
  16. static ulong get4(Biobuf *b);
  17. static char *getname(Biobuf *b, int len);
  18. static int header(Biobuf *bin, ZipHead *zh);
  19. static long msdos2time(int time, int date);
  20. static int sunzip(Biobuf *bin);
  21. static int sunztable(Biobuf *bin);
  22. static void trailer(Biobuf *bin, ZipHead *zh);
  23. static int unzip(Biobuf *bin, char *file);
  24. static int unzipEntry(Biobuf *bin, ZipHead *czh);
  25. static int unztable(Biobuf *bin, char *file);
  26. static int wantFile(char *file);
  27. static void *emalloc(ulong);
  28. static void error(char*, ...);
  29. #pragma varargck argpos error 1
  30. static Biobuf bin;
  31. static ulong crc;
  32. static ulong *crctab;
  33. static int debug;
  34. static char *delfile;
  35. static int lower;
  36. static int nwant;
  37. static ulong rlen;
  38. static int settimes;
  39. static int stdout;
  40. static int verbose;
  41. static char **want;
  42. static int wbad;
  43. static ulong wlen;
  44. static jmp_buf zjmp;
  45. static jmp_buf seekjmp;
  46. static int autodir;
  47. static void
  48. usage(void)
  49. {
  50. fprint(2, "usage: unzip [-tsv] [-f zipfile] [file ...]\n");
  51. exits("usage");
  52. }
  53. void
  54. main(int argc, char *argv[])
  55. {
  56. char *zfile;
  57. int fd, ok, table, stream;
  58. table = 0;
  59. stream = 0;
  60. zfile = nil;
  61. ARGBEGIN{
  62. case 'a':
  63. autodir++;
  64. break;
  65. case 'D':
  66. debug++;
  67. break;
  68. case 'c':
  69. stdout++;
  70. break;
  71. case 'i':
  72. lower++;
  73. break;
  74. case 'f':
  75. zfile = ARGF();
  76. if(zfile == nil)
  77. usage();
  78. break;
  79. case 's':
  80. stream++;
  81. break;
  82. case 't':
  83. table++;
  84. break;
  85. case 'T':
  86. settimes++;
  87. break;
  88. case 'v':
  89. verbose++;
  90. break;
  91. default:
  92. usage();
  93. break;
  94. }ARGEND
  95. nwant = argc;
  96. want = argv;
  97. crctab = mkcrctab(ZCrcPoly);
  98. ok = inflateinit();
  99. if(ok != FlateOk)
  100. sysfatal("inflateinit failed: %s\n", flateerr(ok));
  101. if(zfile == nil){
  102. Binit(&bin, 0, OREAD);
  103. zfile = "<stdin>";
  104. }else{
  105. fd = open(zfile, OREAD);
  106. if(fd < 0)
  107. sysfatal("can't open %s: %r", zfile);
  108. Binit(&bin, fd, OREAD);
  109. }
  110. if(setjmp(seekjmp)){
  111. fprint(2, "trying to re-run assuming -s\n");
  112. stream = 1;
  113. Bseek(&bin, 0, 0);
  114. }
  115. if(table){
  116. if(stream)
  117. ok = sunztable(&bin);
  118. else
  119. ok = unztable(&bin, zfile);
  120. }else{
  121. if(stream)
  122. ok = sunzip(&bin);
  123. else
  124. ok = unzip(&bin, zfile);
  125. }
  126. exits(ok ? nil: "errors");
  127. }
  128. /*
  129. * print the table of contents from the "central directory structure"
  130. */
  131. static int
  132. unztable(Biobuf *bin, char *file)
  133. {
  134. ZipHead zh;
  135. int entries;
  136. entries = findCDir(bin, file);
  137. if(entries < 0)
  138. return 0;
  139. if(verbose > 1)
  140. print("%d items in the archive\n", entries);
  141. while(entries-- > 0){
  142. if(setjmp(zjmp)){
  143. free(zh.file);
  144. return 0;
  145. }
  146. memset(&zh, 0, sizeof(zh));
  147. if(!cheader(bin, &zh))
  148. return 1;
  149. if(wantFile(zh.file)){
  150. if(verbose)
  151. print("%-32s %10lud %s", zh.file, zh.uncsize, ctime(msdos2time(zh.modtime, zh.moddate)));
  152. else
  153. print("%s\n", zh.file);
  154. if(verbose > 1){
  155. print("\tmade by os %d vers %d.%d\n", zh.madeos, zh.madevers/10, zh.madevers % 10);
  156. print("\textract by os %d vers %d.%d\n", zh.extos, zh.extvers/10, zh.extvers % 10);
  157. print("\tflags %x\n", zh.flags);
  158. print("\tmethod %d\n", zh.meth);
  159. print("\tmod time %d\n", zh.modtime);
  160. print("\tmod date %d\n", zh.moddate);
  161. print("\tcrc %lux\n", zh.crc);
  162. print("\tcompressed size %lud\n", zh.csize);
  163. print("\tuncompressed size %lud\n", zh.uncsize);
  164. print("\tinternal attributes %ux\n", zh.iattr);
  165. print("\texternal attributes %lux\n", zh.eattr);
  166. print("\tstarts at %ld\n", zh.off);
  167. }
  168. }
  169. free(zh.file);
  170. zh.file = nil;
  171. }
  172. return 1;
  173. }
  174. /*
  175. * print the "local file header" table of contents
  176. */
  177. static int
  178. sunztable(Biobuf *bin)
  179. {
  180. ZipHead zh;
  181. vlong off;
  182. ulong hcrc, hcsize, huncsize;
  183. int ok, err;
  184. ok = 1;
  185. for(;;){
  186. if(setjmp(zjmp)){
  187. free(zh.file);
  188. return 0;
  189. }
  190. memset(&zh, 0, sizeof(zh));
  191. if(!header(bin, &zh))
  192. return ok;
  193. hcrc = zh.crc;
  194. hcsize = zh.csize;
  195. huncsize = zh.uncsize;
  196. wlen = 0;
  197. rlen = 0;
  198. crc = 0;
  199. wbad = 0;
  200. if(zh.meth == 0){
  201. if(!copyout(-1, bin, zh.csize))
  202. error("reading data for %s failed: %r", zh.file);
  203. }else if(zh.meth == 8){
  204. off = Boffset(bin);
  205. err = inflate((void*)-1, crcwrite, bin, (int(*)(void*))Bgetc);
  206. if(err != FlateOk)
  207. error("inflate %s failed: %s", zh.file, flateerr(err));
  208. rlen = Boffset(bin) - off;
  209. }else
  210. error("can't handle compression method %d for %s", zh.meth, zh.file);
  211. trailer(bin, &zh);
  212. if(wantFile(zh.file)){
  213. if(verbose)
  214. print("%-32s %10lud %s", zh.file, zh.uncsize, ctime(msdos2time(zh.modtime, zh.moddate)));
  215. else
  216. print("%s\n", zh.file);
  217. if(verbose > 1){
  218. print("\textract by os %d vers %d.%d\n", zh.extos, zh.extvers / 10, zh.extvers % 10);
  219. print("\tflags %x\n", zh.flags);
  220. print("\tmethod %d\n", zh.meth);
  221. print("\tmod time %d\n", zh.modtime);
  222. print("\tmod date %d\n", zh.moddate);
  223. print("\tcrc %lux\n", zh.crc);
  224. print("\tcompressed size %lud\n", zh.csize);
  225. print("\tuncompressed size %lud\n", zh.uncsize);
  226. if((zh.flags & ZTrailInfo) && (hcrc || hcsize || huncsize)){
  227. print("\theader crc %lux\n", zh.crc);
  228. print("\theader compressed size %lud\n", zh.csize);
  229. print("\theader uncompressed size %lud\n", zh.uncsize);
  230. }
  231. }
  232. }
  233. if(zh.crc != crc)
  234. error("crc mismatch for %s", zh.file);
  235. if(zh.uncsize != wlen)
  236. error("output size mismatch for %s", zh.file);
  237. if(zh.csize != rlen)
  238. error("input size mismatch for %s", zh.file);
  239. free(zh.file);
  240. zh.file = nil;
  241. }
  242. }
  243. /*
  244. * extract files using the info in the central directory structure
  245. */
  246. static int
  247. unzip(Biobuf *bin, char *file)
  248. {
  249. ZipHead zh;
  250. vlong off;
  251. int ok, eok, entries;
  252. entries = findCDir(bin, file);
  253. if(entries < 0)
  254. return 0;
  255. ok = 1;
  256. while(entries-- > 0){
  257. if(setjmp(zjmp)){
  258. free(zh.file);
  259. return 0;
  260. }
  261. memset(&zh, 0, sizeof(zh));
  262. if(!cheader(bin, &zh))
  263. return ok;
  264. off = Boffset(bin);
  265. if(wantFile(zh.file)){
  266. if(Bseek(bin, zh.off, 0) < 0){
  267. fprint(2, "unzip: can't seek to start of %s, skipping\n", zh.file);
  268. ok = 0;
  269. }else{
  270. eok = unzipEntry(bin, &zh);
  271. if(eok <= 0){
  272. fprint(2, "unzip: skipping %s\n", zh.file);
  273. ok = 0;
  274. }
  275. }
  276. }
  277. free(zh.file);
  278. zh.file = nil;
  279. if(Bseek(bin, off, 0) < 0){
  280. fprint(2, "unzip: can't seek to start of next entry, terminating extraction\n");
  281. return 0;
  282. }
  283. }
  284. return ok;
  285. }
  286. /*
  287. * extract files using the info the "local file headers"
  288. */
  289. static int
  290. sunzip(Biobuf *bin)
  291. {
  292. int eok;
  293. for(;;){
  294. eok = unzipEntry(bin, nil);
  295. if(eok == 0)
  296. return 1;
  297. if(eok < 0)
  298. return 0;
  299. }
  300. }
  301. static int mkdirs(char *);
  302. /*
  303. * if any directories leading up to path don't exist, create them.
  304. * modifies but restores path.
  305. */
  306. static int
  307. mkpdirs(char *path)
  308. {
  309. int rv = 0;
  310. char *sl = strrchr(path, '/');
  311. print("%s\n", path);
  312. if (sl != nil) {
  313. *sl = '\0';
  314. rv = mkdirs(path);
  315. *sl = '/';
  316. }
  317. return rv;
  318. }
  319. /*
  320. * if path or any directories leading up to it don't exist, create them.
  321. * modifies but restores path.
  322. */
  323. static int
  324. mkdirs(char *path)
  325. {
  326. int fd;
  327. if (access(path, AEXIST) >= 0)
  328. return 0;
  329. /* make presumed-missing intermediate directories */
  330. if (mkpdirs(path) < 0)
  331. return -1;
  332. /* make final directory */
  333. fd = create(path, OREAD, 0755|DMDIR);
  334. if (fd < 0)
  335. /*
  336. * we may have lost a race; if the directory now exists,
  337. * it's okay.
  338. */
  339. return access(path, AEXIST) < 0? -1: 0;
  340. close(fd);
  341. return 0;
  342. }
  343. /*
  344. * extracts a single entry from a zip file
  345. * czh is the optional corresponding central directory entry
  346. */
  347. static int
  348. unzipEntry(Biobuf *bin, ZipHead *czh)
  349. {
  350. Dir *d;
  351. ZipHead zh;
  352. char *p;
  353. vlong off;
  354. int fd, isdir, ok, err;
  355. zh.file = nil;
  356. if(setjmp(zjmp)){
  357. delfile = nil;
  358. free(zh.file);
  359. return -1;
  360. }
  361. memset(&zh, 0, sizeof(zh));
  362. if(!header(bin, &zh))
  363. return 0;
  364. ok = 1;
  365. isdir = 0;
  366. fd = -1;
  367. if(wantFile(zh.file)){
  368. if(verbose)
  369. fprint(2, "extracting %s\n", zh.file);
  370. if(czh != nil && czh->extos == ZDos){
  371. isdir = czh->eattr & ZDDir;
  372. if(isdir && zh.uncsize != 0)
  373. fprint(2, "unzip: ignoring directory data for %s\n", zh.file);
  374. }
  375. if(zh.meth == 0 && zh.uncsize == 0){
  376. p = strchr(zh.file, '\0');
  377. if(p > zh.file && p[-1] == '/')
  378. isdir = 1;
  379. }
  380. if(stdout){
  381. if(ok && !isdir)
  382. fd = 1;
  383. }else if(isdir){
  384. fd = create(zh.file, OREAD, DMDIR | 0775);
  385. if(fd < 0){
  386. d = dirstat(zh.file);
  387. if(d == nil || (d->mode & DMDIR) != DMDIR){
  388. fprint(2, "unzip: can't create directory %s: %r\n", zh.file);
  389. ok = 0;
  390. }
  391. free(d);
  392. }
  393. }else if(ok){
  394. if(autodir)
  395. mkpdirs(zh.file);
  396. fd = create(zh.file, OWRITE, 0664);
  397. if(fd < 0){
  398. fprint(2, "unzip: can't create %s: %r\n", zh.file);
  399. ok = 0;
  400. }else
  401. delfile = zh.file;
  402. }
  403. }
  404. wlen = 0;
  405. rlen = 0;
  406. crc = 0;
  407. wbad = 0;
  408. if(zh.meth == 0){
  409. if(!copyout(fd, bin, zh.csize))
  410. error("copying data for %s failed: %r", zh.file);
  411. }else if(zh.meth == 8){
  412. off = Boffset(bin);
  413. err = inflate((void*)fd, crcwrite, bin, (int(*)(void*))Bgetc);
  414. if(err != FlateOk)
  415. error("inflate failed: %s", flateerr(err));
  416. rlen = Boffset(bin) - off;
  417. }else
  418. error("can't handle compression method %d for %s", zh.meth, zh.file);
  419. trailer(bin, &zh);
  420. if(zh.crc != crc)
  421. error("crc mismatch for %s", zh.file);
  422. if(zh.uncsize != wlen)
  423. error("output size mismatch for %s", zh.file);
  424. if(zh.csize != rlen)
  425. error("input size mismatch for %s", zh.file);
  426. delfile = nil;
  427. free(zh.file);
  428. if(fd >= 0 && !stdout){
  429. if(settimes){
  430. d = dirfstat(fd);
  431. if(d != nil){
  432. d->mtime = msdos2time(zh.modtime, zh.moddate);
  433. if(d->mtime)
  434. dirfwstat(fd, d);
  435. }
  436. }
  437. close(fd);
  438. }
  439. return ok;
  440. }
  441. static int
  442. wantFile(char *file)
  443. {
  444. int i, n;
  445. if(nwant == 0)
  446. return 1;
  447. for(i = 0; i < nwant; i++){
  448. if(strcmp(want[i], file) == 0)
  449. return 1;
  450. n = strlen(want[i]);
  451. if(strncmp(want[i], file, n) == 0 && file[n] == '/')
  452. return 1;
  453. }
  454. return 0;
  455. }
  456. /*
  457. * find the start of the central directory
  458. * returns the number of entries in the directory,
  459. * or -1 if there was an error
  460. */
  461. static int
  462. findCDir(Biobuf *bin, char *file)
  463. {
  464. vlong ecoff;
  465. long off, size, m;
  466. int entries, zclen, dn, ds, de;
  467. ecoff = Bseek(bin, -ZECHeadSize, 2);
  468. if(ecoff < 0){
  469. fprint(2, "unzip: can't seek to contents of %s\n", file);
  470. longjmp(seekjmp, 1);
  471. return -1;
  472. }
  473. if(setjmp(zjmp))
  474. return -1;
  475. if((m=get4(bin)) != ZECHeader){
  476. fprint(2, "unzip: bad magic number for table of contents of %s: %#.8lx\n", file, m);
  477. longjmp(seekjmp, 1);
  478. return -1;
  479. }
  480. dn = get2(bin);
  481. ds = get2(bin);
  482. de = get2(bin);
  483. entries = get2(bin);
  484. size = get4(bin);
  485. off = get4(bin);
  486. zclen = get2(bin);
  487. while(zclen-- > 0)
  488. get1(bin);
  489. if(verbose > 1){
  490. print("table starts at %ld for %ld bytes\n", off, size);
  491. if(ecoff - size != off)
  492. print("\ttable should start at %lld-%ld=%lld\n", ecoff, size, ecoff-size);
  493. if(dn || ds || de != entries)
  494. print("\tcurrent disk=%d start disk=%d table entries on this disk=%d\n", dn, ds, de);
  495. }
  496. if(Bseek(bin, off, 0) != off){
  497. fprint(2, "unzip: can't seek to start of contents of %s\n", file);
  498. longjmp(seekjmp, 1);
  499. return -1;
  500. }
  501. return entries;
  502. }
  503. static int
  504. cheader(Biobuf *bin, ZipHead *zh)
  505. {
  506. ulong v;
  507. int flen, xlen, fclen;
  508. v = get4(bin);
  509. if(v != ZCHeader){
  510. if(v == ZECHeader)
  511. return 0;
  512. error("bad magic number %lux", v);
  513. }
  514. zh->madevers = get1(bin);
  515. zh->madeos = get1(bin);
  516. zh->extvers = get1(bin);
  517. zh->extos = get1(bin);
  518. zh->flags = get2(bin);
  519. zh->meth = get2(bin);
  520. zh->modtime = get2(bin);
  521. zh->moddate = get2(bin);
  522. zh->crc = get4(bin);
  523. zh->csize = get4(bin);
  524. zh->uncsize = get4(bin);
  525. flen = get2(bin);
  526. xlen = get2(bin);
  527. fclen = get2(bin);
  528. get2(bin); /* disk number start */
  529. zh->iattr = get2(bin);
  530. zh->eattr = get4(bin);
  531. zh->off = get4(bin);
  532. zh->file = getname(bin, flen);
  533. while(xlen-- > 0)
  534. get1(bin);
  535. while(fclen-- > 0)
  536. get1(bin);
  537. return 1;
  538. }
  539. static int
  540. header(Biobuf *bin, ZipHead *zh)
  541. {
  542. ulong v;
  543. int flen, xlen;
  544. v = get4(bin);
  545. if(v != ZHeader){
  546. if(v == ZCHeader)
  547. return 0;
  548. error("bad magic number %lux at %lld", v, Boffset(bin)-4);
  549. }
  550. zh->extvers = get1(bin);
  551. zh->extos = get1(bin);
  552. zh->flags = get2(bin);
  553. zh->meth = get2(bin);
  554. zh->modtime = get2(bin);
  555. zh->moddate = get2(bin);
  556. zh->crc = get4(bin);
  557. zh->csize = get4(bin);
  558. zh->uncsize = get4(bin);
  559. flen = get2(bin);
  560. xlen = get2(bin);
  561. zh->file = getname(bin, flen);
  562. while(xlen-- > 0)
  563. get1(bin);
  564. return 1;
  565. }
  566. static void
  567. trailer(Biobuf *bin, ZipHead *zh)
  568. {
  569. if(zh->flags & ZTrailInfo){
  570. zh->crc = get4(bin);
  571. zh->csize = get4(bin);
  572. zh->uncsize = get4(bin);
  573. }
  574. }
  575. static char*
  576. getname(Biobuf *bin, int len)
  577. {
  578. char *s;
  579. int i, c;
  580. s = emalloc(len + 1);
  581. for(i = 0; i < len; i++){
  582. c = get1(bin);
  583. if(lower)
  584. c = tolower(c);
  585. s[i] = c;
  586. }
  587. s[i] = '\0';
  588. return s;
  589. }
  590. static int
  591. crcwrite(void *out, void *buf, int n)
  592. {
  593. int fd, nw;
  594. wlen += n;
  595. crc = blockcrc(crctab, crc, buf, n);
  596. fd = (int)(uintptr)out;
  597. if(fd < 0)
  598. return n;
  599. nw = write(fd, buf, n);
  600. if(nw != n)
  601. wbad = 1;
  602. return nw;
  603. }
  604. static int
  605. copyout(int ofd, Biobuf *bin, long len)
  606. {
  607. char buf[BufSize];
  608. int n;
  609. for(; len > 0; len -= n){
  610. n = len;
  611. if(n > BufSize)
  612. n = BufSize;
  613. n = Bread(bin, buf, n);
  614. if(n <= 0)
  615. return 0;
  616. rlen += n;
  617. if(crcwrite((void*)ofd, buf, n) != n)
  618. return 0;
  619. }
  620. return 1;
  621. }
  622. static ulong
  623. get4(Biobuf *b)
  624. {
  625. ulong v;
  626. int i, c;
  627. v = 0;
  628. for(i = 0; i < 4; i++){
  629. c = Bgetc(b);
  630. if(c < 0)
  631. error("unexpected eof reading file information");
  632. v |= c << (i * 8);
  633. }
  634. return v;
  635. }
  636. static int
  637. get2(Biobuf *b)
  638. {
  639. int i, c, v;
  640. v = 0;
  641. for(i = 0; i < 2; i++){
  642. c = Bgetc(b);
  643. if(c < 0)
  644. error("unexpected eof reading file information");
  645. v |= c << (i * 8);
  646. }
  647. return v;
  648. }
  649. static int
  650. get1(Biobuf *b)
  651. {
  652. int c;
  653. c = Bgetc(b);
  654. if(c < 0)
  655. error("unexpected eof reading file information");
  656. return c;
  657. }
  658. static long
  659. msdos2time(int time, int date)
  660. {
  661. Tm tm;
  662. tm.hour = time >> 11;
  663. tm.min = (time >> 5) & 63;
  664. tm.sec = (time & 31) << 1;
  665. tm.year = 80 + (date >> 9);
  666. tm.mon = ((date >> 5) & 15) - 1;
  667. tm.mday = date & 31;
  668. tm.zone[0] = '\0';
  669. tm.yday = 0;
  670. return tm2sec(&tm);
  671. }
  672. static void*
  673. emalloc(ulong n)
  674. {
  675. void *p;
  676. p = malloc(n);
  677. if(p == nil)
  678. sysfatal("out of memory");
  679. return p;
  680. }
  681. static void
  682. error(char *fmt, ...)
  683. {
  684. va_list arg;
  685. fprint(2, "unzip: ");
  686. va_start(arg, fmt);
  687. vfprint(2, fmt, arg);
  688. va_end(arg);
  689. fprint(2, "\n");
  690. if(delfile != nil){
  691. fprint(2, "unzip: removing output file %s\n", delfile);
  692. remove(delfile);
  693. delfile = nil;
  694. }
  695. longjmp(zjmp, 1);
  696. }