diff.c 28 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243
  1. /* vi: set sw=4 ts=4: */
  2. /*
  3. * Mini diff implementation for busybox, adapted from OpenBSD diff.
  4. *
  5. * Copyright (C) 2006 by Robert Sullivan <cogito.ergo.cogito@hotmail.com>
  6. * Copyright (c) 2003 Todd C. Miller <Todd.Miller@courtesan.com>
  7. *
  8. * Sponsored in part by the Defense Advanced Research Projects
  9. * Agency (DARPA) and Air Force Research Laboratory, Air Force
  10. * Materiel Command, USAF, under agreement number F39502-99-1-0512.
  11. *
  12. * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
  13. */
  14. #include "busybox.h"
  15. #define FSIZE_MAX 32768
  16. /*
  17. * Output flags
  18. */
  19. #define D_HEADER 1 /* Print a header/footer between files */
  20. #define D_EMPTY1 2 /* Treat first file as empty (/dev/null) */
  21. #define D_EMPTY2 4 /* Treat second file as empty (/dev/null) */
  22. /*
  23. * Status values for print_status() and diffreg() return values
  24. * Guide:
  25. * D_SAME - files are the same
  26. * D_DIFFER - files differ
  27. * D_BINARY - binary files differ
  28. * D_COMMON - subdirectory common to both dirs
  29. * D_ONLY - file only exists in one dir
  30. * D_MISMATCH1 - path1 a dir, path2 a file
  31. * D_MISMATCH2 - path1 a file, path2 a dir
  32. * D_ERROR - error occurred
  33. * D_SKIPPED1 - skipped path1 as it is a special file
  34. * D_SKIPPED2 - skipped path2 as it is a special file
  35. */
  36. #define D_SAME 0
  37. #define D_DIFFER (1<<0)
  38. #define D_BINARY (1<<1)
  39. #define D_COMMON (1<<2)
  40. #define D_ONLY (1<<3)
  41. #define D_MISMATCH1 (1<<4)
  42. #define D_MISMATCH2 (1<<5)
  43. #define D_ERROR (1<<6)
  44. #define D_SKIPPED1 (1<<7)
  45. #define D_SKIPPED2 (1<<8)
  46. /* Command line options */
  47. static unsigned long cmd_flags;
  48. #define FLAG_a (1<<0)
  49. #define FLAG_b (1<<1)
  50. #define FLAG_d (1<<2)
  51. #define FLAG_i (1<<3)
  52. #define FLAG_L (1<<4)
  53. #define FLAG_N (1<<5)
  54. #define FLAG_q (1<<6)
  55. #define FLAG_r (1<<7)
  56. #define FLAG_s (1<<8)
  57. #define FLAG_S (1<<9)
  58. #define FLAG_t (1<<10)
  59. #define FLAG_T (1<<11)
  60. #define FLAG_U (1<<12)
  61. #define FLAG_w (1<<13)
  62. /* XXX: FIXME: the following variables should be static, but gcc currently
  63. * creates a much bigger object if we do this. */
  64. int context, status;
  65. char *start, *label[2];
  66. struct stat stb1, stb2;
  67. char **dl;
  68. static int dl_count = 0;
  69. struct cand {
  70. int x;
  71. int y;
  72. int pred;
  73. };
  74. struct line {
  75. int serial;
  76. int value;
  77. } *file[2];
  78. /*
  79. * The following struct is used to record change information
  80. * doing a "context" or "unified" diff. (see routine "change" to
  81. * understand the highly mnemonic field names)
  82. */
  83. struct context_vec {
  84. int a; /* start line in old file */
  85. int b; /* end line in old file */
  86. int c; /* start line in new file */
  87. int d; /* end line in new file */
  88. };
  89. static int *J; /* will be overlaid on class */
  90. static int *class; /* will be overlaid on file[0] */
  91. static int *klist; /* will be overlaid on file[0] after class */
  92. static int *member; /* will be overlaid on file[1] */
  93. static int clen;
  94. static int len[2];
  95. static int pref, suff; /* length of prefix and suffix */
  96. static int slen[2];
  97. static int anychange;
  98. static long *ixnew; /* will be overlaid on file[1] */
  99. static long *ixold; /* will be overlaid on klist */
  100. static struct cand *clist; /* merely a free storage pot for candidates */
  101. static int clistlen; /* the length of clist */
  102. static struct line *sfile[2]; /* shortened by pruning common prefix/suffix */
  103. static struct context_vec *context_vec_start;
  104. static struct context_vec *context_vec_end;
  105. static struct context_vec *context_vec_ptr;
  106. static void print_only(const char *path, size_t dirlen, const char *entry)
  107. {
  108. if (dirlen > 1)
  109. dirlen--;
  110. printf("Only in %.*s: %s\n", (int) dirlen, path, entry);
  111. }
  112. static void print_status(int val, char *path1, char *path2, char *entry)
  113. {
  114. const char *const _entry = entry ? entry : "";
  115. char *_path1 = entry ? concat_path_file(path1, _entry) : path1;
  116. char *_path2 = entry ? concat_path_file(path2, _entry) : path2;
  117. switch (val) {
  118. case D_ONLY:
  119. print_only(path1, strlen(path1), entry);
  120. break;
  121. case D_COMMON:
  122. printf("Common subdirectories: %s and %s\n", _path1, _path2);
  123. break;
  124. case D_BINARY:
  125. printf("Binary files %s and %s differ\n", _path1, _path2);
  126. break;
  127. case D_DIFFER:
  128. if (cmd_flags & FLAG_q)
  129. printf("Files %s and %s differ\n", _path1, _path2);
  130. break;
  131. case D_SAME:
  132. if (cmd_flags & FLAG_s)
  133. printf("Files %s and %s are identical\n", _path1, _path2);
  134. break;
  135. case D_MISMATCH1:
  136. printf("File %s is a directory while file %s is a regular file\n",
  137. _path1, _path2);
  138. break;
  139. case D_MISMATCH2:
  140. printf("File %s is a regular file while file %s is a directory\n",
  141. _path1, _path2);
  142. break;
  143. case D_SKIPPED1:
  144. printf("File %s is not a regular file or directory and was skipped\n",
  145. _path1);
  146. break;
  147. case D_SKIPPED2:
  148. printf("File %s is not a regular file or directory and was skipped\n",
  149. _path2);
  150. break;
  151. }
  152. if (entry) {
  153. free(_path1);
  154. free(_path2);
  155. }
  156. }
  157. /*
  158. * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
  159. */
  160. static int readhash(FILE * f)
  161. {
  162. int i, t, space;
  163. int sum;
  164. sum = 1;
  165. space = 0;
  166. if (!(cmd_flags & FLAG_b) && !(cmd_flags & FLAG_w)) {
  167. if (FLAG_i)
  168. for (i = 0; (t = getc(f)) != '\n'; i++) {
  169. if (t == EOF) {
  170. if (i == 0)
  171. return 0;
  172. break;
  173. }
  174. sum = sum * 127 + t;
  175. } else
  176. for (i = 0; (t = getc(f)) != '\n'; i++) {
  177. if (t == EOF) {
  178. if (i == 0)
  179. return 0;
  180. break;
  181. }
  182. sum = sum * 127 + t;
  183. }
  184. } else {
  185. for (i = 0;;) {
  186. switch (t = getc(f)) {
  187. case '\t':
  188. case '\r':
  189. case '\v':
  190. case '\f':
  191. case ' ':
  192. space++;
  193. continue;
  194. default:
  195. if (space && !(cmd_flags & FLAG_w)) {
  196. i++;
  197. space = 0;
  198. }
  199. sum = sum * 127 + t;
  200. i++;
  201. continue;
  202. case EOF:
  203. if (i == 0)
  204. return 0;
  205. /* FALLTHROUGH */
  206. case '\n':
  207. break;
  208. }
  209. break;
  210. }
  211. }
  212. /*
  213. * There is a remote possibility that we end up with a zero sum.
  214. * Zero is used as an EOF marker, so return 1 instead.
  215. */
  216. return (sum == 0 ? 1 : sum);
  217. }
  218. /*
  219. * Check to see if the given files differ.
  220. * Returns 0 if they are the same, 1 if different, and -1 on error.
  221. */
  222. static int files_differ(FILE * f1, FILE * f2, int flags)
  223. {
  224. char buf1[BUFSIZ], buf2[BUFSIZ];
  225. size_t i, j;
  226. if ((flags & (D_EMPTY1 | D_EMPTY2)) || stb1.st_size != stb2.st_size ||
  227. (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
  228. return 1;
  229. while (1) {
  230. i = fread(buf1, 1, sizeof(buf1), f1);
  231. j = fread(buf2, 1, sizeof(buf2), f2);
  232. if (i != j)
  233. return 1;
  234. if (i == 0 && j == 0) {
  235. if (ferror(f1) || ferror(f2))
  236. return 1;
  237. return 0;
  238. }
  239. if (memcmp(buf1, buf2, i) != 0)
  240. return 1;
  241. }
  242. }
  243. static void prepare(int i, FILE * fd, off_t filesize)
  244. {
  245. struct line *p;
  246. int h;
  247. size_t j, sz;
  248. rewind(fd);
  249. sz = (filesize <= FSIZE_MAX ? filesize : FSIZE_MAX) / 25;
  250. if (sz < 100)
  251. sz = 100;
  252. p = xmalloc((sz + 3) * sizeof(struct line));
  253. for (j = 0; (h = readhash(fd));) {
  254. if (j == sz) {
  255. sz = sz * 3 / 2;
  256. p = xrealloc(p, (sz + 3) * sizeof(struct line));
  257. }
  258. p[++j].value = h;
  259. }
  260. len[i] = j;
  261. file[i] = p;
  262. }
  263. static void prune(void)
  264. {
  265. int i, j;
  266. for (pref = 0; pref < len[0] && pref < len[1] &&
  267. file[0][pref + 1].value == file[1][pref + 1].value; pref++);
  268. for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
  269. file[0][len[0] - suff].value == file[1][len[1] - suff].value;
  270. suff++);
  271. for (j = 0; j < 2; j++) {
  272. sfile[j] = file[j] + pref;
  273. slen[j] = len[j] - pref - suff;
  274. for (i = 0; i <= slen[j]; i++)
  275. sfile[j][i].serial = i;
  276. }
  277. }
  278. static void equiv(struct line *a, int n, struct line *b, int m, int *c)
  279. {
  280. int i, j;
  281. i = j = 1;
  282. while (i <= n && j <= m) {
  283. if (a[i].value < b[j].value)
  284. a[i++].value = 0;
  285. else if (a[i].value == b[j].value)
  286. a[i++].value = j;
  287. else
  288. j++;
  289. }
  290. while (i <= n)
  291. a[i++].value = 0;
  292. b[m + 1].value = 0;
  293. j = 0;
  294. while (++j <= m) {
  295. c[j] = -b[j].serial;
  296. while (b[j + 1].value == b[j].value) {
  297. j++;
  298. c[j] = b[j].serial;
  299. }
  300. }
  301. c[j] = -1;
  302. }
  303. static int isqrt(int n)
  304. {
  305. int y, x = 1;
  306. if (n == 0)
  307. return 0;
  308. do {
  309. y = x;
  310. x = n / x;
  311. x += y;
  312. x /= 2;
  313. } while ((x - y) > 1 || (x - y) < -1);
  314. return x;
  315. }
  316. static int newcand(int x, int y, int pred)
  317. {
  318. struct cand *q;
  319. if (clen == clistlen) {
  320. clistlen = clistlen * 11 / 10;
  321. clist = xrealloc(clist, clistlen * sizeof(struct cand));
  322. }
  323. q = clist + clen;
  324. q->x = x;
  325. q->y = y;
  326. q->pred = pred;
  327. return clen++;
  328. }
  329. static int search(int *c, int k, int y)
  330. {
  331. int i, j, l, t;
  332. if (clist[c[k]].y < y) /* quick look for typical case */
  333. return k + 1;
  334. i = 0;
  335. j = k + 1;
  336. while (1) {
  337. l = i + j;
  338. if ((l >>= 1) <= i)
  339. break;
  340. t = clist[c[l]].y;
  341. if (t > y)
  342. j = l;
  343. else if (t < y)
  344. i = l;
  345. else
  346. return l;
  347. }
  348. return l + 1;
  349. }
  350. static int stone(int *a, int n, int *b, int *c)
  351. {
  352. int i, k, y, j, l;
  353. int oldc, tc, oldl;
  354. unsigned int numtries;
  355. #if ENABLE_FEATURE_DIFF_MINIMAL
  356. const unsigned int bound =
  357. (cmd_flags & FLAG_d) ? UINT_MAX : MAX(256, isqrt(n));
  358. #else
  359. const unsigned int bound = MAX(256, isqrt(n));
  360. #endif
  361. k = 0;
  362. c[0] = newcand(0, 0, 0);
  363. for (i = 1; i <= n; i++) {
  364. j = a[i];
  365. if (j == 0)
  366. continue;
  367. y = -b[j];
  368. oldl = 0;
  369. oldc = c[0];
  370. numtries = 0;
  371. do {
  372. if (y <= clist[oldc].y)
  373. continue;
  374. l = search(c, k, y);
  375. if (l != oldl + 1)
  376. oldc = c[l - 1];
  377. if (l <= k) {
  378. if (clist[c[l]].y <= y)
  379. continue;
  380. tc = c[l];
  381. c[l] = newcand(i, y, oldc);
  382. oldc = tc;
  383. oldl = l;
  384. numtries++;
  385. } else {
  386. c[l] = newcand(i, y, oldc);
  387. k++;
  388. break;
  389. }
  390. } while ((y = b[++j]) > 0 && numtries < bound);
  391. }
  392. return k;
  393. }
  394. static void unravel(int p)
  395. {
  396. struct cand *q;
  397. int i;
  398. for (i = 0; i <= len[0]; i++)
  399. J[i] = i <= pref ? i : i > len[0] - suff ? i + len[1] - len[0] : 0;
  400. for (q = clist + p; q->y != 0; q = clist + q->pred)
  401. J[q->x + pref] = q->y + pref;
  402. }
  403. static void unsort(struct line *f, int l, int *b)
  404. {
  405. int *a, i;
  406. a = xmalloc((l + 1) * sizeof(int));
  407. for (i = 1; i <= l; i++)
  408. a[f[i].serial] = f[i].value;
  409. for (i = 1; i <= l; i++)
  410. b[i] = a[i];
  411. free(a);
  412. }
  413. static int skipline(FILE * f)
  414. {
  415. int i, c;
  416. for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
  417. continue;
  418. return i;
  419. }
  420. /*
  421. * Check does double duty:
  422. * 1. ferret out any fortuitous correspondences due
  423. * to confounding by hashing (which result in "jackpot")
  424. * 2. collect random access indexes to the two files
  425. */
  426. static void check(FILE * f1, FILE * f2)
  427. {
  428. int i, j, jackpot, c, d;
  429. long ctold, ctnew;
  430. rewind(f1);
  431. rewind(f2);
  432. j = 1;
  433. ixold[0] = ixnew[0] = 0;
  434. jackpot = 0;
  435. ctold = ctnew = 0;
  436. for (i = 1; i <= len[0]; i++) {
  437. if (J[i] == 0) {
  438. ixold[i] = ctold += skipline(f1);
  439. continue;
  440. }
  441. while (j < J[i]) {
  442. ixnew[j] = ctnew += skipline(f2);
  443. j++;
  444. }
  445. if ((cmd_flags & FLAG_b) || (cmd_flags & FLAG_w)
  446. || (cmd_flags & FLAG_i)) {
  447. while (1) {
  448. c = getc(f1);
  449. d = getc(f2);
  450. /*
  451. * GNU diff ignores a missing newline
  452. * in one file if bflag || wflag.
  453. */
  454. if (((cmd_flags & FLAG_b) || (cmd_flags & FLAG_w)) &&
  455. ((c == EOF && d == '\n') || (c == '\n' && d == EOF))) {
  456. break;
  457. }
  458. ctold++;
  459. ctnew++;
  460. if ((cmd_flags & FLAG_b) && isspace(c) && isspace(d)) {
  461. do {
  462. if (c == '\n')
  463. break;
  464. ctold++;
  465. } while (isspace(c = getc(f1)));
  466. do {
  467. if (d == '\n')
  468. break;
  469. ctnew++;
  470. } while (isspace(d = getc(f2)));
  471. } else if (cmd_flags & FLAG_w) {
  472. while (isspace(c) && c != '\n') {
  473. c = getc(f1);
  474. ctold++;
  475. }
  476. while (isspace(d) && d != '\n') {
  477. d = getc(f2);
  478. ctnew++;
  479. }
  480. }
  481. if (c != d) {
  482. jackpot++;
  483. J[i] = 0;
  484. if (c != '\n' && c != EOF)
  485. ctold += skipline(f1);
  486. if (d != '\n' && c != EOF)
  487. ctnew += skipline(f2);
  488. break;
  489. }
  490. if (c == '\n' || c == EOF)
  491. break;
  492. }
  493. } else {
  494. while (1) {
  495. ctold++;
  496. ctnew++;
  497. if ((c = getc(f1)) != (d = getc(f2))) {
  498. J[i] = 0;
  499. if (c != '\n' && c != EOF)
  500. ctold += skipline(f1);
  501. if (d != '\n' && c != EOF)
  502. ctnew += skipline(f2);
  503. break;
  504. }
  505. if (c == '\n' || c == EOF)
  506. break;
  507. }
  508. }
  509. ixold[i] = ctold;
  510. ixnew[j] = ctnew;
  511. j++;
  512. }
  513. for (; j <= len[1]; j++)
  514. ixnew[j] = ctnew += skipline(f2);
  515. }
  516. /* shellsort CACM #201 */
  517. static void sort(struct line *a, int n)
  518. {
  519. struct line *ai, *aim, w;
  520. int j, m = 0, k;
  521. if (n == 0)
  522. return;
  523. for (j = 1; j <= n; j *= 2)
  524. m = 2 * j - 1;
  525. for (m /= 2; m != 0; m /= 2) {
  526. k = n - m;
  527. for (j = 1; j <= k; j++) {
  528. for (ai = &a[j]; ai > a; ai -= m) {
  529. aim = &ai[m];
  530. if (aim < ai)
  531. break; /* wraparound */
  532. if (aim->value > ai[0].value ||
  533. (aim->value == ai[0].value && aim->serial > ai[0].serial))
  534. break;
  535. w.value = ai[0].value;
  536. ai[0].value = aim->value;
  537. aim->value = w.value;
  538. w.serial = ai[0].serial;
  539. ai[0].serial = aim->serial;
  540. aim->serial = w.serial;
  541. }
  542. }
  543. }
  544. }
  545. static void uni_range(int a, int b)
  546. {
  547. if (a < b)
  548. printf("%d,%d", a, b - a + 1);
  549. else if (a == b)
  550. printf("%d", b);
  551. else
  552. printf("%d,0", b);
  553. }
  554. static int fetch(long *f, int a, int b, FILE * lb, int ch)
  555. {
  556. int i, j, c, lastc, col, nc;
  557. if (a > b)
  558. return 0;
  559. for (i = a; i <= b; i++) {
  560. fseek(lb, f[i - 1], SEEK_SET);
  561. nc = f[i] - f[i - 1];
  562. if (ch != '\0') {
  563. putchar(ch);
  564. if (cmd_flags & FLAG_T)
  565. putchar('\t');
  566. }
  567. col = 0;
  568. for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
  569. if ((c = getc(lb)) == EOF) {
  570. puts("\n\\ No newline at end of file");
  571. return 0;
  572. }
  573. if (c == '\t' && (cmd_flags & FLAG_t)) {
  574. do {
  575. putchar(' ');
  576. } while (++col & 7);
  577. } else {
  578. putchar(c);
  579. col++;
  580. }
  581. }
  582. }
  583. return 0;
  584. }
  585. static int asciifile(FILE * f)
  586. {
  587. #if ENABLE_FEATURE_DIFF_BINARY
  588. unsigned char buf[BUFSIZ];
  589. int i, cnt;
  590. #endif
  591. if ((cmd_flags & FLAG_a) || f == NULL)
  592. return 1;
  593. #if ENABLE_FEATURE_DIFF_BINARY
  594. rewind(f);
  595. cnt = fread(buf, 1, sizeof(buf), f);
  596. for (i = 0; i < cnt; i++) {
  597. if (!isprint(buf[i]) && !isspace(buf[i])) {
  598. return 0;
  599. }
  600. }
  601. #endif
  602. return 1;
  603. }
  604. /* dump accumulated "unified" diff changes */
  605. static void dump_unified_vec(FILE * f1, FILE * f2)
  606. {
  607. struct context_vec *cvp = context_vec_start;
  608. int lowa, upb, lowc, upd;
  609. int a, b, c, d;
  610. char ch;
  611. if (context_vec_start > context_vec_ptr)
  612. return;
  613. b = d = 0; /* gcc */
  614. lowa = MAX(1, cvp->a - context);
  615. upb = MIN(len[0], context_vec_ptr->b + context);
  616. lowc = MAX(1, cvp->c - context);
  617. upd = MIN(len[1], context_vec_ptr->d + context);
  618. fputs("@@ -", stdout);
  619. uni_range(lowa, upb);
  620. fputs(" +", stdout);
  621. uni_range(lowc, upd);
  622. fputs(" @@", stdout);
  623. putchar('\n');
  624. /*
  625. * Output changes in "unified" diff format--the old and new lines
  626. * are printed together.
  627. */
  628. for (; cvp <= context_vec_ptr; cvp++) {
  629. a = cvp->a;
  630. b = cvp->b;
  631. c = cvp->c;
  632. d = cvp->d;
  633. /*
  634. * c: both new and old changes
  635. * d: only changes in the old file
  636. * a: only changes in the new file
  637. */
  638. if (a <= b && c <= d)
  639. ch = 'c';
  640. else
  641. ch = (a <= b) ? 'd' : 'a';
  642. if (ch == 'c' || ch == 'd') {
  643. fetch(ixold, lowa, a - 1, f1, ' ');
  644. fetch(ixold, a, b, f1, '-');
  645. }
  646. if (ch == 'a')
  647. fetch(ixnew, lowc, c - 1, f2, ' ');
  648. if (ch == 'c' || ch == 'a')
  649. fetch(ixnew, c, d, f2, '+');
  650. lowa = b + 1;
  651. lowc = d + 1;
  652. }
  653. fetch(ixnew, d + 1, upd, f2, ' ');
  654. context_vec_ptr = context_vec_start - 1;
  655. }
  656. static void print_header(const char *file1, const char *file2)
  657. {
  658. if (label[0] != NULL)
  659. printf("%s %s\n", "---", label[0]);
  660. else
  661. printf("%s %s\t%s", "---", file1, ctime(&stb1.st_mtime));
  662. if (label[1] != NULL)
  663. printf("%s %s\n", "+++", label[1]);
  664. else
  665. printf("%s %s\t%s", "+++", file2, ctime(&stb2.st_mtime));
  666. }
  667. /*
  668. * Indicate that there is a difference between lines a and b of the from file
  669. * to get to lines c to d of the to file. If a is greater then b then there
  670. * are no lines in the from file involved and this means that there were
  671. * lines appended (beginning at b). If c is greater than d then there are
  672. * lines missing from the to file.
  673. */
  674. static void change(char *file1, FILE * f1, char *file2, FILE * f2, int a,
  675. int b, int c, int d)
  676. {
  677. static size_t max_context = 64;
  678. if (a > b && c > d)
  679. return;
  680. if (cmd_flags & FLAG_q)
  681. return;
  682. /*
  683. * Allocate change records as needed.
  684. */
  685. if (context_vec_ptr == context_vec_end - 1) {
  686. ptrdiff_t offset = context_vec_ptr - context_vec_start;
  687. max_context <<= 1;
  688. context_vec_start = xrealloc(context_vec_start,
  689. max_context *
  690. sizeof(struct context_vec));
  691. context_vec_end = context_vec_start + max_context;
  692. context_vec_ptr = context_vec_start + offset;
  693. }
  694. if (anychange == 0) {
  695. /*
  696. * Print the context/unidiff header first time through.
  697. */
  698. print_header(file1, file2);
  699. anychange = 1;
  700. } else if (a > context_vec_ptr->b + (2 * context) + 1 &&
  701. c > context_vec_ptr->d + (2 * context) + 1) {
  702. /*
  703. * If this change is more than 'context' lines from the
  704. * previous change, dump the record and reset it.
  705. */
  706. dump_unified_vec(f1, f2);
  707. }
  708. context_vec_ptr++;
  709. context_vec_ptr->a = a;
  710. context_vec_ptr->b = b;
  711. context_vec_ptr->c = c;
  712. context_vec_ptr->d = d;
  713. return;
  714. }
  715. static void output(char *file1, FILE * f1, char *file2, FILE * f2)
  716. {
  717. /* Note that j0 and j1 can't be used as they are defined in math.h.
  718. * This also allows the rather amusing variable 'j00'... */
  719. int m, i0, i1, j00, j01;
  720. rewind(f1);
  721. rewind(f2);
  722. m = len[0];
  723. J[0] = 0;
  724. J[m + 1] = len[1] + 1;
  725. for (i0 = 1; i0 <= m; i0 = i1 + 1) {
  726. while (i0 <= m && J[i0] == J[i0 - 1] + 1)
  727. i0++;
  728. j00 = J[i0 - 1] + 1;
  729. i1 = i0 - 1;
  730. while (i1 < m && J[i1 + 1] == 0)
  731. i1++;
  732. j01 = J[i1 + 1] - 1;
  733. J[i1] = j01;
  734. change(file1, f1, file2, f2, i0, i1, j00, j01);
  735. }
  736. if (m == 0) {
  737. change(file1, f1, file2, f2, 1, 0, 1, len[1]);
  738. }
  739. if (anychange != 0) {
  740. dump_unified_vec(f1, f2);
  741. }
  742. }
  743. /*
  744. * The following code uses an algorithm due to Harold Stone,
  745. * which finds a pair of longest identical subsequences in
  746. * the two files.
  747. *
  748. * The major goal is to generate the match vector J.
  749. * J[i] is the index of the line in file1 corresponding
  750. * to line i file0. J[i] = 0 if there is no
  751. * such line in file1.
  752. *
  753. * Lines are hashed so as to work in core. All potential
  754. * matches are located by sorting the lines of each file
  755. * on the hash (called ``value''). In particular, this
  756. * collects the equivalence classes in file1 together.
  757. * Subroutine equiv replaces the value of each line in
  758. * file0 by the index of the first element of its
  759. * matching equivalence in (the reordered) file1.
  760. * To save space equiv squeezes file1 into a single
  761. * array member in which the equivalence classes
  762. * are simply concatenated, except that their first
  763. * members are flagged by changing sign.
  764. *
  765. * Next the indices that point into member are unsorted into
  766. * array class according to the original order of file0.
  767. *
  768. * The cleverness lies in routine stone. This marches
  769. * through the lines of file0, developing a vector klist
  770. * of "k-candidates". At step i a k-candidate is a matched
  771. * pair of lines x,y (x in file0 y in file1) such that
  772. * there is a common subsequence of length k
  773. * between the first i lines of file0 and the first y
  774. * lines of file1, but there is no such subsequence for
  775. * any smaller y. x is the earliest possible mate to y
  776. * that occurs in such a subsequence.
  777. *
  778. * Whenever any of the members of the equivalence class of
  779. * lines in file1 matable to a line in file0 has serial number
  780. * less than the y of some k-candidate, that k-candidate
  781. * with the smallest such y is replaced. The new
  782. * k-candidate is chained (via pred) to the current
  783. * k-1 candidate so that the actual subsequence can
  784. * be recovered. When a member has serial number greater
  785. * that the y of all k-candidates, the klist is extended.
  786. * At the end, the longest subsequence is pulled out
  787. * and placed in the array J by unravel
  788. *
  789. * With J in hand, the matches there recorded are
  790. * checked against reality to assure that no spurious
  791. * matches have crept in due to hashing. If they have,
  792. * they are broken, and "jackpot" is recorded--a harmless
  793. * matter except that a true match for a spuriously
  794. * mated line may now be unnecessarily reported as a change.
  795. *
  796. * Much of the complexity of the program comes simply
  797. * from trying to minimize core utilization and
  798. * maximize the range of doable problems by dynamically
  799. * allocating what is needed and reusing what is not.
  800. * The core requirements for problems larger than somewhat
  801. * are (in words) 2*length(file0) + length(file1) +
  802. * 3*(number of k-candidates installed), typically about
  803. * 6n words for files of length n.
  804. */
  805. static int diffreg(char *ofile1, char *ofile2, int flags)
  806. {
  807. char *file1 = ofile1;
  808. char *file2 = ofile2;
  809. FILE *f1 = NULL;
  810. FILE *f2 = NULL;
  811. int rval = D_SAME;
  812. int i;
  813. anychange = 0;
  814. context_vec_ptr = context_vec_start - 1;
  815. if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
  816. return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
  817. if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
  818. goto closem;
  819. if (flags & D_EMPTY1)
  820. f1 = xfopen(bb_dev_null, "r");
  821. else {
  822. if (strcmp(file1, "-") == 0)
  823. f1 = stdin;
  824. else
  825. f1 = xfopen(file1, "r");
  826. }
  827. if (flags & D_EMPTY2)
  828. f2 = xfopen(bb_dev_null, "r");
  829. else {
  830. if (strcmp(file2, "-") == 0)
  831. f2 = stdin;
  832. else
  833. f2 = xfopen(file2, "r");
  834. }
  835. if ((i = files_differ(f1, f2, flags)) == 0)
  836. goto closem;
  837. else if (i != 1) { /* 1 == ok */
  838. /* error */
  839. status |= 2;
  840. goto closem;
  841. }
  842. if (!asciifile(f1) || !asciifile(f2)) {
  843. rval = D_BINARY;
  844. status |= 1;
  845. goto closem;
  846. }
  847. prepare(0, f1, stb1.st_size);
  848. prepare(1, f2, stb2.st_size);
  849. prune();
  850. sort(sfile[0], slen[0]);
  851. sort(sfile[1], slen[1]);
  852. member = (int *) file[1];
  853. equiv(sfile[0], slen[0], sfile[1], slen[1], member);
  854. member = xrealloc(member, (slen[1] + 2) * sizeof(int));
  855. class = (int *) file[0];
  856. unsort(sfile[0], slen[0], class);
  857. class = xrealloc(class, (slen[0] + 2) * sizeof(int));
  858. klist = xmalloc((slen[0] + 2) * sizeof(int));
  859. clen = 0;
  860. clistlen = 100;
  861. clist = xmalloc(clistlen * sizeof(struct cand));
  862. i = stone(class, slen[0], member, klist);
  863. free(member);
  864. free(class);
  865. J = xrealloc(J, (len[0] + 2) * sizeof(int));
  866. unravel(klist[i]);
  867. free(clist);
  868. free(klist);
  869. ixold = xrealloc(ixold, (len[0] + 2) * sizeof(long));
  870. ixnew = xrealloc(ixnew, (len[1] + 2) * sizeof(long));
  871. check(f1, f2);
  872. output(file1, f1, file2, f2);
  873. closem:
  874. if (anychange) {
  875. status |= 1;
  876. if (rval == D_SAME)
  877. rval = D_DIFFER;
  878. }
  879. if (f1 != NULL)
  880. fclose(f1);
  881. if (f2 != NULL)
  882. fclose(f2);
  883. if (file1 != ofile1)
  884. free(file1);
  885. if (file2 != ofile2)
  886. free(file2);
  887. return rval;
  888. }
  889. #if ENABLE_FEATURE_DIFF_DIR
  890. static void do_diff(char *dir1, char *path1, char *dir2, char *path2)
  891. {
  892. int flags = D_HEADER;
  893. int val;
  894. char *fullpath1 = xasprintf("%s/%s", dir1, path1);
  895. char *fullpath2 = xasprintf("%s/%s", dir2, path2);
  896. if (stat(fullpath1, &stb1) != 0) {
  897. flags |= D_EMPTY1;
  898. memset(&stb1, 0, sizeof(stb1));
  899. fullpath1 = xasprintf("%s/%s", dir1, path2);
  900. }
  901. if (stat(fullpath2, &stb2) != 0) {
  902. flags |= D_EMPTY2;
  903. memset(&stb2, 0, sizeof(stb2));
  904. stb2.st_mode = stb1.st_mode;
  905. fullpath2 = xasprintf("%s/%s", dir2, path1);
  906. }
  907. if (stb1.st_mode == 0)
  908. stb1.st_mode = stb2.st_mode;
  909. if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) {
  910. printf("Common subdirectories: %s and %s\n", fullpath1, fullpath2);
  911. return;
  912. }
  913. if (!S_ISREG(stb1.st_mode) && !S_ISDIR(stb1.st_mode))
  914. val = D_SKIPPED1;
  915. else if (!S_ISREG(stb2.st_mode) && !S_ISDIR(stb2.st_mode))
  916. val = D_SKIPPED2;
  917. else
  918. val = diffreg(fullpath1, fullpath2, flags);
  919. print_status(val, fullpath1, fullpath2, NULL);
  920. }
  921. #endif
  922. #if ENABLE_FEATURE_DIFF_DIR
  923. static int dir_strcmp(const void *p1, const void *p2)
  924. {
  925. return strcmp(*(char *const *) p1, *(char *const *) p2);
  926. }
  927. /* This function adds a filename to dl, the directory listing. */
  928. static int add_to_dirlist(const char *filename,
  929. struct stat ATTRIBUTE_UNUSED * sb, void *userdata,
  930. int depth ATTRIBUTE_UNUSED)
  931. {
  932. dl_count++;
  933. dl = xrealloc(dl, dl_count * sizeof(char *));
  934. dl[dl_count - 1] = xstrdup(filename);
  935. if (cmd_flags & FLAG_r) {
  936. int *pp = (int *) userdata;
  937. int path_len = *pp + 1;
  938. dl[dl_count - 1] = &(dl[dl_count - 1])[path_len];
  939. }
  940. return TRUE;
  941. }
  942. /* This returns a sorted directory listing. */
  943. static char **get_dir(char *path)
  944. {
  945. int i;
  946. char **retval;
  947. /* If -r has been set, then the recursive_action function will be
  948. * used. Unfortunately, this outputs the root directory along with
  949. * the recursed paths, so use void *userdata to specify the string
  950. * length of the root directory. It can then be removed in
  951. * add_to_dirlist. */
  952. int path_len = strlen(path);
  953. void *userdata = &path_len;
  954. /* Reset dl_count - there's no need to free dl as xrealloc does
  955. * the job nicely. */
  956. dl_count = 0;
  957. /* Now fill dl with a listing. */
  958. if (cmd_flags & FLAG_r)
  959. recursive_action(path, TRUE, TRUE, FALSE, add_to_dirlist, NULL,
  960. userdata, 0);
  961. else {
  962. DIR *dp;
  963. struct dirent *ep;
  964. dp = warn_opendir(path);
  965. while ((ep = readdir(dp))) {
  966. if ((!strcmp(ep->d_name, "..")) || (!strcmp(ep->d_name, ".")))
  967. continue;
  968. add_to_dirlist(ep->d_name, NULL, NULL, 0);
  969. }
  970. closedir(dp);
  971. }
  972. /* Sort dl alphabetically. */
  973. qsort(dl, dl_count, sizeof(char *), dir_strcmp);
  974. /* Copy dl so that we can return it. */
  975. retval = xmalloc(dl_count * sizeof(char *));
  976. for (i = 0; i < dl_count; i++)
  977. retval[i] = xstrdup(dl[i]);
  978. return retval;
  979. }
  980. static void diffdir(char *p1, char *p2)
  981. {
  982. char **dirlist1, **dirlist2;
  983. char *dp1, *dp2;
  984. int dirlist1_count, dirlist2_count;
  985. int pos;
  986. /* Check for trailing slashes. */
  987. dp1 = last_char_is(p1, '/');
  988. if (dp1 != NULL)
  989. *dp1 = '\0';
  990. dp2 = last_char_is(p2, '/');
  991. if (dp2 != NULL)
  992. *dp2 = '\0';
  993. /* Get directory listings for p1 and p2. */
  994. dirlist1 = get_dir(p1);
  995. dirlist1_count = dl_count;
  996. dirlist1[dirlist1_count] = NULL;
  997. dirlist2 = get_dir(p2);
  998. dirlist2_count = dl_count;
  999. dirlist2[dirlist2_count] = NULL;
  1000. /* If -S was set, find the starting point. */
  1001. if (start) {
  1002. while (*dirlist1 != NULL && strcmp(*dirlist1, start) < 0)
  1003. dirlist1++;
  1004. while (*dirlist2 != NULL && strcmp(*dirlist2, start) < 0)
  1005. dirlist2++;
  1006. if ((*dirlist1 == NULL) || (*dirlist2 == NULL))
  1007. bb_error_msg(bb_msg_invalid_arg, "NULL", "-S");
  1008. }
  1009. /* Now that both dirlist1 and dirlist2 contain sorted directory
  1010. * listings, we can start to go through dirlist1. If both listings
  1011. * contain the same file, then do a normal diff. Otherwise, behaviour
  1012. * is determined by whether the -N flag is set. */
  1013. while (*dirlist1 != NULL || *dirlist2 != NULL) {
  1014. dp1 = *dirlist1;
  1015. dp2 = *dirlist2;
  1016. pos = dp1 == NULL ? 1 : dp2 == NULL ? -1 : strcmp(dp1, dp2);
  1017. if (pos == 0) {
  1018. do_diff(p1, dp1, p2, dp2);
  1019. dirlist1++;
  1020. dirlist2++;
  1021. } else if (pos < 0) {
  1022. if (cmd_flags & FLAG_N)
  1023. do_diff(p1, dp1, p2, NULL);
  1024. else
  1025. print_only(p1, strlen(p1) + 1, dp1);
  1026. dirlist1++;
  1027. } else {
  1028. if (cmd_flags & FLAG_N)
  1029. do_diff(p1, NULL, p2, dp2);
  1030. else
  1031. print_only(p2, strlen(p2) + 1, dp2);
  1032. dirlist2++;
  1033. }
  1034. }
  1035. }
  1036. #endif
  1037. int diff_main(int argc, char **argv)
  1038. {
  1039. int gotstdin = 0;
  1040. char *U_opt;
  1041. llist_t *L_arg = NULL;
  1042. opt_complementary = "L::";
  1043. cmd_flags = getopt32(argc, argv, "abdiL:NqrsS:tTU:wu",
  1044. &L_arg, &start, &U_opt);
  1045. if (cmd_flags & FLAG_L) {
  1046. while (L_arg) {
  1047. if (label[0] == NULL)
  1048. label[0] = L_arg->data;
  1049. else if (label[1] == NULL)
  1050. label[1] = L_arg->data;
  1051. else
  1052. bb_show_usage();
  1053. L_arg = L_arg->link;
  1054. }
  1055. /* If both label[0] and label[1] were set, they need to be swapped. */
  1056. if (label[0] && label[1]) {
  1057. char *tmp;
  1058. tmp = label[1];
  1059. label[1] = label[0];
  1060. label[0] = tmp;
  1061. }
  1062. }
  1063. context = 3; /* This is the default number of lines of context. */
  1064. if (cmd_flags & FLAG_U) {
  1065. context = xatoul_range(U_opt, 1, INT_MAX);
  1066. }
  1067. argc -= optind;
  1068. argv += optind;
  1069. /*
  1070. * Do sanity checks, fill in stb1 and stb2 and call the appropriate
  1071. * driver routine. Both drivers use the contents of stb1 and stb2.
  1072. */
  1073. if (argc < 2) {
  1074. bb_error_msg("missing filename");
  1075. bb_show_usage();
  1076. }
  1077. if (strcmp(argv[0], "-") == 0) {
  1078. fstat(STDIN_FILENO, &stb1);
  1079. gotstdin = 1;
  1080. } else
  1081. xstat(argv[0], &stb1);
  1082. if (strcmp(argv[1], "-") == 0) {
  1083. fstat(STDIN_FILENO, &stb2);
  1084. gotstdin = 1;
  1085. } else
  1086. xstat(argv[1], &stb2);
  1087. if (gotstdin && (S_ISDIR(stb1.st_mode) || S_ISDIR(stb2.st_mode)))
  1088. bb_error_msg_and_die("can't compare - to a directory");
  1089. if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) {
  1090. #if ENABLE_FEATURE_DIFF_DIR
  1091. diffdir(argv[0], argv[1]);
  1092. #else
  1093. bb_error_msg_and_die("directory comparison not supported");
  1094. #endif
  1095. } else {
  1096. if (S_ISDIR(stb1.st_mode)) {
  1097. argv[0] = concat_path_file(argv[0], argv[1]);
  1098. xstat(argv[0], &stb1);
  1099. }
  1100. if (S_ISDIR(stb2.st_mode)) {
  1101. argv[1] = concat_path_file(argv[1], argv[0]);
  1102. xstat(argv[1], &stb2);
  1103. }
  1104. print_status(diffreg(argv[0], argv[1], 0), argv[0], argv[1], NULL);
  1105. }
  1106. exit(status);
  1107. }