extract.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610
  1. /* $Source: /u/mark/src/pax/RCS/extract.c,v $
  2. *
  3. * $Revision: 1.3 $
  4. *
  5. * extract.c - Extract files from a tar archive.
  6. *
  7. * DESCRIPTION
  8. *
  9. * AUTHOR
  10. *
  11. * Mark H. Colburn, NAPS International (mark@jhereg.mn.org)
  12. *
  13. * Sponsored by The USENIX Association for public distribution.
  14. *
  15. * Copyright (c) 1989 Mark H. Colburn.
  16. * All rights reserved.
  17. *
  18. * Redistribution and use in source and binary forms are permitted
  19. * provided that the above copyright notice is duplicated in all such
  20. * forms and that any documentation, advertising materials, and other
  21. * materials related to such distribution and use acknowledge that the
  22. * software was developed * by Mark H. Colburn and sponsored by The
  23. * USENIX Association.
  24. *
  25. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
  26. * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
  27. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  28. *
  29. * $Log: extract.c,v $
  30. * Revision 1.3 89/02/12 10:29:43 mark
  31. * Fixed misspelling of Replstr
  32. *
  33. * Revision 1.2 89/02/12 10:04:24 mark
  34. * 1.2 release fixes
  35. *
  36. * Revision 1.1 88/12/23 18:02:07 mark
  37. * Initial revision
  38. *
  39. */
  40. #ifndef lint
  41. static char *ident = "$Id: extract.c,v 1.3 89/02/12 10:29:43 mark Exp Locker: mark $";
  42. static char *copyright = "Copyright (c) 1989 Mark H. Colburn.\nAll rights reserved.\n";
  43. #endif /* ! lint */
  44. /* Headers */
  45. #include "pax.h"
  46. /* Defines */
  47. /*
  48. * Swap bytes.
  49. */
  50. #define SWAB(n) ((((ushort)(n) >> 8) & 0xff) | (((ushort)(n) << 8) & 0xff00))
  51. /* Function Prototypes */
  52. #ifdef __STDC__
  53. static int inbinary(char *, char *, Stat *);
  54. static int inascii(char *, char *, Stat *);
  55. static int inswab(char *, char *, Stat *);
  56. static int readtar(char *, Stat *);
  57. static int readcpio(char *, Stat *);
  58. #else /* !__STDC__ */
  59. static int inbinary();
  60. static int inascii();
  61. static int inswab();
  62. static int readtar();
  63. static int readcpio();
  64. #endif /* __STDC__ */
  65. /* read_archive - read in an archive
  66. *
  67. * DESCRIPTION
  68. *
  69. * Read_archive is the central entry point for reading archives.
  70. * Read_archive determines the proper archive functions to call
  71. * based upon the archive type being processed.
  72. *
  73. * RETURNS
  74. *
  75. */
  76. #ifdef __STDC__
  77. int read_archive(void)
  78. #else
  79. int read_archive()
  80. #endif
  81. {
  82. Stat sb;
  83. char name[PATH_MAX + 1];
  84. int match;
  85. int pad;
  86. name_gather(); /* get names from command line */
  87. name[0] = '\0';
  88. while (get_header(name, &sb) == 0) {
  89. match = name_match(name) ^ f_reverse_match;
  90. if (f_list) { /* only wanted a table of contents */
  91. if (match) {
  92. print_entry(name, &sb);
  93. }
  94. if (((ar_format == TAR)
  95. ? buf_skip(ROUNDUP((OFFSET) sb.sb_size, BLOCKSIZE))
  96. : buf_skip((OFFSET) sb.sb_size)) < 0) {
  97. warn(name, "File data is corrupt");
  98. }
  99. } else if (match) {
  100. if (rplhead != (Replstr *)NULL) {
  101. rpl_name(name);
  102. if (strlen(name) == 0) {
  103. continue;
  104. }
  105. }
  106. if (get_disposition("extract", name) ||
  107. get_newname(name, sizeof(name))) {
  108. /* skip file... */
  109. if (((ar_format == TAR)
  110. ? buf_skip(ROUNDUP((OFFSET) sb.sb_size, BLOCKSIZE))
  111. : buf_skip((OFFSET) sb.sb_size)) < 0) {
  112. warn(name, "File data is corrupt");
  113. }
  114. continue;
  115. }
  116. if (inentry(name, &sb) < 0) {
  117. warn(name, "File data is corrupt");
  118. }
  119. if (f_verbose) {
  120. print_entry(name, &sb);
  121. }
  122. if (ar_format == TAR && sb.sb_nlink > 1) {
  123. /*
  124. * This kludge makes sure that the link table is cleared
  125. * before attempting to process any other links.
  126. */
  127. if (sb.sb_nlink > 1) {
  128. linkfrom(name, &sb);
  129. }
  130. }
  131. if (ar_format == TAR && (pad = sb.sb_size % BLOCKSIZE) != 0) {
  132. pad = BLOCKSIZE - pad;
  133. buf_skip((OFFSET) pad);
  134. }
  135. } else {
  136. if (((ar_format == TAR)
  137. ? buf_skip(ROUNDUP((OFFSET) sb.sb_size, BLOCKSIZE))
  138. : buf_skip((OFFSET) sb.sb_size)) < 0) {
  139. warn(name, "File data is corrupt");
  140. }
  141. }
  142. }
  143. close_archive();
  144. }
  145. /* get_header - figures which type of header needs to be read.
  146. *
  147. * DESCRIPTION
  148. *
  149. * This is merely a single entry point for the two types of archive
  150. * headers which are supported. The correct header is selected
  151. * depending on the archive type.
  152. *
  153. * PARAMETERS
  154. *
  155. * char *name - name of the file (passed to header routine)
  156. * Stat *asb - Stat block for the file (passed to header routine)
  157. *
  158. * RETURNS
  159. *
  160. * Returns the value which was returned by the proper header
  161. * function.
  162. */
  163. #ifdef __STDC__
  164. int get_header(char *name, Stat *asb)
  165. #else
  166. int get_header(name, asb)
  167. char *name;
  168. Stat *asb;
  169. #endif
  170. {
  171. if (ar_format == TAR) {
  172. return(readtar(name, asb));
  173. } else {
  174. return(readcpio(name, asb));
  175. }
  176. }
  177. /* readtar - read a tar header
  178. *
  179. * DESCRIPTION
  180. *
  181. * Tar_head read a tar format header from the archive. The name
  182. * and asb parameters are modified as appropriate for the file listed
  183. * in the header. Name is assumed to be a pointer to an array of
  184. * at least PATH_MAX bytes.
  185. *
  186. * PARAMETERS
  187. *
  188. * char *name - name of the file for which the header is
  189. * for. This is modified and passed back to
  190. * the caller.
  191. * Stat *asb - Stat block for the file for which the header
  192. * is for. The fields of the stat structure are
  193. * extracted from the archive header. This is
  194. * also passed back to the caller.
  195. *
  196. * RETURNS
  197. *
  198. * Returns 0 if a valid header was found, or -1 if EOF is
  199. * encountered.
  200. */
  201. #ifdef __STDC__
  202. static int readtar(char *name, Stat *asb)
  203. #else
  204. static int readtar(name, asb)
  205. char *name;
  206. Stat *asb;
  207. #endif
  208. {
  209. int status = 3; /* Initial status at start of archive */
  210. static int prev_status;
  211. for (;;) {
  212. prev_status = status;
  213. status = read_header(name, asb);
  214. switch (status) {
  215. case 1: /* Valid header */
  216. return(0);
  217. case 0: /* Invalid header */
  218. switch (prev_status) {
  219. case 3: /* Error on first record */
  220. warn(ar_file, "This doesn't look like a tar archive");
  221. /* FALLTHRU */
  222. case 2: /* Error after record of zeroes */
  223. case 1: /* Error after header rec */
  224. warn(ar_file, "Skipping to next file...");
  225. /* FALLTHRU */
  226. default:
  227. case 0: /* Error after error */
  228. break;
  229. }
  230. break;
  231. case 2: /* Record of zeroes */
  232. case EOF: /* End of archive */
  233. default:
  234. return(-1);
  235. }
  236. }
  237. }
  238. /* readcpio - read a CPIO header
  239. *
  240. * DESCRIPTION
  241. *
  242. * Read in a cpio header. Understands how to determine and read ASCII,
  243. * binary and byte-swapped binary headers. Quietly translates
  244. * old-fashioned binary cpio headers (and arranges to skip the possible
  245. * alignment byte). Returns zero if successful, -1 upon archive trailer.
  246. *
  247. * PARAMETERS
  248. *
  249. * char *name - name of the file for which the header is
  250. * for. This is modified and passed back to
  251. * the caller.
  252. * Stat *asb - Stat block for the file for which the header
  253. * is for. The fields of the stat structure are
  254. * extracted from the archive header. This is
  255. * also passed back to the caller.
  256. *
  257. * RETURNS
  258. *
  259. * Returns 0 if a valid header was found, or -1 if EOF is
  260. * encountered.
  261. */
  262. #ifdef __STDC__
  263. static int readcpio(char *name, Stat *asb)
  264. #else
  265. static int readcpio(name, asb)
  266. char *name;
  267. Stat *asb;
  268. #endif
  269. {
  270. OFFSET skipped;
  271. char magic[M_STRLEN];
  272. static int align;
  273. if (align > 0) {
  274. buf_skip((OFFSET) align);
  275. }
  276. align = 0;
  277. for (;;) {
  278. buf_read(magic, M_STRLEN);
  279. skipped = 0;
  280. while ((align = inascii(magic, name, asb)) < 0
  281. && (align = inbinary(magic, name, asb)) < 0
  282. && (align = inswab(magic, name, asb)) < 0) {
  283. if (++skipped == 1) {
  284. if (total - sizeof(magic) == 0) {
  285. fatal("Unrecognizable archive");
  286. }
  287. warnarch("Bad magic number", (OFFSET) sizeof(magic));
  288. if (name[0]) {
  289. warn(name, "May be corrupt");
  290. }
  291. }
  292. memcpy(magic, magic + 1, sizeof(magic) - 1);
  293. buf_read(magic + sizeof(magic) - 1, 1);
  294. }
  295. if (skipped) {
  296. warnarch("Apparently resynchronized", (OFFSET) sizeof(magic));
  297. warn(name, "Continuing");
  298. }
  299. if (strcmp(name, TRAILER) == 0) {
  300. return (-1);
  301. }
  302. if (nameopt(name) >= 0) {
  303. break;
  304. }
  305. buf_skip((OFFSET) asb->sb_size + align);
  306. }
  307. #ifdef S_IFLNK
  308. if ((asb->sb_mode & S_IFMT) == S_IFLNK) {
  309. if (buf_read(asb->sb_link, (uint) asb->sb_size) < 0) {
  310. warn(name, "Corrupt symbolic link");
  311. return (readcpio(name, asb));
  312. }
  313. asb->sb_link[asb->sb_size] = '\0';
  314. asb->sb_size = 0;
  315. }
  316. #endif /* S_IFLNK */
  317. /* destroy absolute pathnames for security reasons */
  318. if (name[0] == '/') {
  319. if (name[1]) {
  320. while (name[0] = name[1]) {
  321. ++name;
  322. }
  323. } else {
  324. name[0] = '.';
  325. }
  326. }
  327. asb->sb_atime = asb->sb_ctime = asb->sb_mtime;
  328. if (asb->sb_nlink > 1) {
  329. linkto(name, asb);
  330. }
  331. return (0);
  332. }
  333. /* inswab - read a reversed by order binary header
  334. *
  335. * DESCRIPTIONS
  336. *
  337. * Reads a byte-swapped CPIO binary archive header
  338. *
  339. * PARMAMETERS
  340. *
  341. * char *magic - magic number to match
  342. * char *name - name of the file which is stored in the header.
  343. * (modified and passed back to caller).
  344. * Stat *asb - stat block for the file (modified and passed back
  345. * to the caller).
  346. *
  347. *
  348. * RETURNS
  349. *
  350. * Returns the number of trailing alignment bytes to skip; -1 if
  351. * unsuccessful.
  352. *
  353. */
  354. #ifdef __STDC__
  355. static int inswab(char *magic, char *name, Stat *asb)
  356. #else
  357. static int inswab(magic, name, asb)
  358. char *magic;
  359. char *name;
  360. Stat *asb;
  361. #endif
  362. {
  363. ushort namesize;
  364. uint namefull;
  365. Binary binary;
  366. if (*((ushort *) magic) != SWAB(M_BINARY)) {
  367. return (-1);
  368. }
  369. memcpy((char *) &binary,
  370. magic + sizeof(ushort),
  371. M_STRLEN - sizeof(ushort));
  372. if (buf_read((char *) &binary + M_STRLEN - sizeof(ushort),
  373. sizeof(binary) - (M_STRLEN - sizeof(ushort))) < 0) {
  374. warnarch("Corrupt swapped header",
  375. (OFFSET) sizeof(binary) - (M_STRLEN - sizeof(ushort)));
  376. return (-1);
  377. }
  378. asb->sb_dev = (dev_t) SWAB(binary.b_dev);
  379. asb->sb_ino = (ino_t) SWAB(binary.b_ino);
  380. asb->sb_mode = SWAB(binary.b_mode);
  381. asb->sb_uid = SWAB(binary.b_uid);
  382. asb->sb_gid = SWAB(binary.b_gid);
  383. asb->sb_nlink = SWAB(binary.b_nlink);
  384. #ifndef _POSIX_SOURCE
  385. asb->sb_rdev = (dev_t) SWAB(binary.b_rdev);
  386. #endif
  387. asb->sb_mtime = SWAB(binary.b_mtime[0]) << 16 | SWAB(binary.b_mtime[1]);
  388. asb->sb_size = SWAB(binary.b_size[0]) << 16 | SWAB(binary.b_size[1]);
  389. if ((namesize = SWAB(binary.b_name)) == 0 || namesize >= PATH_MAX) {
  390. warnarch("Bad swapped pathname length",
  391. (OFFSET) sizeof(binary) - (M_STRLEN - sizeof(ushort)));
  392. return (-1);
  393. }
  394. if (buf_read(name, namefull = namesize + namesize % 2) < 0) {
  395. warnarch("Corrupt swapped pathname", (OFFSET) namefull);
  396. return (-1);
  397. }
  398. if (name[namesize - 1] != '\0') {
  399. warnarch("Bad swapped pathname", (OFFSET) namefull);
  400. return (-1);
  401. }
  402. return (asb->sb_size % 2);
  403. }
  404. /* inascii - read in an ASCII cpio header
  405. *
  406. * DESCRIPTION
  407. *
  408. * Reads an ASCII format cpio header
  409. *
  410. * PARAMETERS
  411. *
  412. * char *magic - magic number to match
  413. * char *name - name of the file which is stored in the header.
  414. * (modified and passed back to caller).
  415. * Stat *asb - stat block for the file (modified and passed back
  416. * to the caller).
  417. *
  418. * RETURNS
  419. *
  420. * Returns zero if successful; -1 otherwise. Assumes that the entire
  421. * magic number has been read.
  422. */
  423. #ifdef __STDC__
  424. static int inascii(char *magic, char *name, Stat *asb)
  425. #else
  426. static int inascii(magic, name, asb)
  427. char *magic;
  428. char *name;
  429. Stat *asb;
  430. #endif
  431. {
  432. uint namelen;
  433. char header[H_STRLEN + 1];
  434. #ifdef _POSIX_SOURCE
  435. dev_t dummyrdev;
  436. #endif
  437. if (strncmp(magic, M_ASCII, M_STRLEN) != 0) {
  438. return (-1);
  439. }
  440. if (buf_read(header, H_STRLEN) < 0) {
  441. warnarch("Corrupt ASCII header", (OFFSET) H_STRLEN);
  442. return (-1);
  443. }
  444. header[H_STRLEN] = '\0';
  445. if (sscanf(header, H_SCAN, &asb->sb_dev,
  446. &asb->sb_ino, &asb->sb_mode, &asb->sb_uid,
  447. #ifdef _POSIX_SOURCE
  448. &asb->sb_gid, &asb->sb_nlink, &dummyrdev,
  449. #else
  450. &asb->sb_gid, &asb->sb_nlink, &asb->sb_rdev,
  451. #endif
  452. &asb->sb_mtime, &namelen, &asb->sb_size) != H_COUNT) {
  453. warnarch("Bad ASCII header", (OFFSET) H_STRLEN);
  454. return (-1);
  455. }
  456. if (namelen == 0 || namelen >= PATH_MAX) {
  457. warnarch("Bad ASCII pathname length", (OFFSET) H_STRLEN);
  458. return (-1);
  459. }
  460. if (buf_read(name, namelen) < 0) {
  461. warnarch("Corrupt ASCII pathname", (OFFSET) namelen);
  462. return (-1);
  463. }
  464. if (name[namelen - 1] != '\0') {
  465. warnarch("Bad ASCII pathname", (OFFSET) namelen);
  466. return (-1);
  467. }
  468. return (0);
  469. }
  470. /* inbinary - read a binary header
  471. *
  472. * DESCRIPTION
  473. *
  474. * Reads a CPIO format binary header.
  475. *
  476. * PARAMETERS
  477. *
  478. * char *magic - magic number to match
  479. * char *name - name of the file which is stored in the header.
  480. * (modified and passed back to caller).
  481. * Stat *asb - stat block for the file (modified and passed back
  482. * to the caller).
  483. *
  484. * RETURNS
  485. *
  486. * Returns the number of trailing alignment bytes to skip; -1 if
  487. * unsuccessful.
  488. */
  489. #ifdef __STDC__
  490. static int inbinary(char *magic, char *name, Stat *asb)
  491. #else
  492. static int inbinary(magic, name, asb)
  493. char *magic;
  494. char *name;
  495. Stat *asb;
  496. #endif
  497. {
  498. uint namefull;
  499. Binary binary;
  500. if (*((ushort *) magic) != M_BINARY) {
  501. return (-1);
  502. }
  503. memcpy((char *) &binary,
  504. magic + sizeof(ushort),
  505. M_STRLEN - sizeof(ushort));
  506. if (buf_read((char *) &binary + M_STRLEN - sizeof(ushort),
  507. sizeof(binary) - (M_STRLEN - sizeof(ushort))) < 0) {
  508. warnarch("Corrupt binary header",
  509. (OFFSET) sizeof(binary) - (M_STRLEN - sizeof(ushort)));
  510. return (-1);
  511. }
  512. asb->sb_dev = binary.b_dev;
  513. asb->sb_ino = binary.b_ino;
  514. asb->sb_mode = binary.b_mode;
  515. asb->sb_uid = binary.b_uid;
  516. asb->sb_gid = binary.b_gid;
  517. asb->sb_nlink = binary.b_nlink;
  518. #ifndef _POSIX_SOURCE
  519. asb->sb_rdev = binary.b_rdev;
  520. #endif
  521. asb->sb_mtime = binary.b_mtime[0] << 16 | binary.b_mtime[1];
  522. asb->sb_size = binary.b_size[0] << 16 | binary.b_size[1];
  523. if (binary.b_name == 0 || binary.b_name >= PATH_MAX) {
  524. warnarch("Bad binary pathname length",
  525. (OFFSET) sizeof(binary) - (M_STRLEN - sizeof(ushort)));
  526. return (-1);
  527. }
  528. if (buf_read(name, namefull = binary.b_name + binary.b_name % 2) < 0) {
  529. warnarch("Corrupt binary pathname", (OFFSET) namefull);
  530. return (-1);
  531. }
  532. if (name[binary.b_name - 1] != '\0') {
  533. warnarch("Bad binary pathname", (OFFSET) namefull);
  534. return (-1);
  535. }
  536. return (asb->sb_size % 2);
  537. }