diff.c 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284
  1. /* vi: set sw=4 ts=4: */
  2. /*
  3. * Mini diff implementation for busybox, adapted from OpenBSD diff.
  4. *
  5. * Copyright (C) 2006 by Robert Sullivan <cogito.ergo.cogito@hotmail.com>
  6. * Copyright (c) 2003 Todd C. Miller <Todd.Miller@courtesan.com>
  7. *
  8. * Sponsored in part by the Defense Advanced Research Projects
  9. * Agency (DARPA) and Air Force Research Laboratory, Air Force
  10. * Materiel Command, USAF, under agreement number F39502-99-1-0512.
  11. *
  12. * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
  13. */
  14. #include "libbb.h"
  15. #define FSIZE_MAX 32768
  16. /*
  17. * Output flags
  18. */
  19. #define D_HEADER 1 /* Print a header/footer between files */
  20. #define D_EMPTY1 2 /* Treat first file as empty (/dev/null) */
  21. #define D_EMPTY2 4 /* Treat second file as empty (/dev/null) */
  22. /*
  23. * Status values for print_status() and diffreg() return values
  24. * Guide:
  25. * D_SAME - files are the same
  26. * D_DIFFER - files differ
  27. * D_BINARY - binary files differ
  28. * D_COMMON - subdirectory common to both dirs
  29. * D_ONLY - file only exists in one dir
  30. * D_MISMATCH1 - path1 a dir, path2 a file
  31. * D_MISMATCH2 - path1 a file, path2 a dir
  32. * D_ERROR - error occurred
  33. * D_SKIPPED1 - skipped path1 as it is a special file
  34. * D_SKIPPED2 - skipped path2 as it is a special file
  35. */
  36. #define D_SAME 0
  37. #define D_DIFFER (1<<0)
  38. #define D_BINARY (1<<1)
  39. #define D_COMMON (1<<2)
  40. #define D_ONLY (1<<3)
  41. #define D_MISMATCH1 (1<<4)
  42. #define D_MISMATCH2 (1<<5)
  43. #define D_ERROR (1<<6)
  44. #define D_SKIPPED1 (1<<7)
  45. #define D_SKIPPED2 (1<<8)
  46. /* Command line options */
  47. #define FLAG_a (1<<0)
  48. #define FLAG_b (1<<1)
  49. #define FLAG_d (1<<2)
  50. #define FLAG_i (1<<3)
  51. #define FLAG_L (1<<4)
  52. #define FLAG_N (1<<5)
  53. #define FLAG_q (1<<6)
  54. #define FLAG_r (1<<7)
  55. #define FLAG_s (1<<8)
  56. #define FLAG_S (1<<9)
  57. #define FLAG_t (1<<10)
  58. #define FLAG_T (1<<11)
  59. #define FLAG_U (1<<12)
  60. #define FLAG_w (1<<13)
  61. #define g_read_buf bb_common_bufsiz1
  62. struct cand {
  63. int x;
  64. int y;
  65. int pred;
  66. };
  67. struct line {
  68. int serial;
  69. int value;
  70. };
  71. /*
  72. * The following struct is used to record change information
  73. * doing a "context" or "unified" diff. (see routine "change" to
  74. * understand the highly mnemonic field names)
  75. */
  76. struct context_vec {
  77. int a; /* start line in old file */
  78. int b; /* end line in old file */
  79. int c; /* start line in new file */
  80. int d; /* end line in new file */
  81. };
  82. struct globals {
  83. USE_FEATURE_DIFF_DIR(char **dl;)
  84. USE_FEATURE_DIFF_DIR(int dl_count;)
  85. /* This is the default number of lines of context. */
  86. int context;
  87. size_t max_context;
  88. int status;
  89. char *start;
  90. const char *label1;
  91. const char *label2;
  92. struct line *file[2];
  93. int *J; /* will be overlaid on class */
  94. int *class; /* will be overlaid on file[0] */
  95. int *klist; /* will be overlaid on file[0] after class */
  96. int *member; /* will be overlaid on file[1] */
  97. int clen;
  98. int len[2];
  99. int pref, suff; /* length of prefix and suffix */
  100. int slen[2];
  101. bool anychange;
  102. long *ixnew; /* will be overlaid on file[1] */
  103. long *ixold; /* will be overlaid on klist */
  104. struct cand *clist; /* merely a free storage pot for candidates */
  105. int clistlen; /* the length of clist */
  106. struct line *sfile[2]; /* shortened by pruning common prefix/suffix */
  107. struct context_vec *context_vec_start;
  108. struct context_vec *context_vec_end;
  109. struct context_vec *context_vec_ptr;
  110. struct stat stb1, stb2;
  111. };
  112. #define G (*ptr_to_globals)
  113. #define dl (G.dl )
  114. #define dl_count (G.dl_count )
  115. #define context (G.context )
  116. #define max_context (G.max_context )
  117. #define status (G.status )
  118. #define start (G.start )
  119. #define label1 (G.label1 )
  120. #define label2 (G.label2 )
  121. #define file (G.file )
  122. #define J (G.J )
  123. #define class (G.class )
  124. #define klist (G.klist )
  125. #define member (G.member )
  126. #define clen (G.clen )
  127. #define len (G.len )
  128. #define pref (G.pref )
  129. #define suff (G.suff )
  130. #define slen (G.slen )
  131. #define anychange (G.anychange )
  132. #define ixnew (G.ixnew )
  133. #define ixold (G.ixold )
  134. #define clist (G.clist )
  135. #define clistlen (G.clistlen )
  136. #define sfile (G.sfile )
  137. #define context_vec_start (G.context_vec_start )
  138. #define context_vec_end (G.context_vec_end )
  139. #define context_vec_ptr (G.context_vec_ptr )
  140. #define stb1 (G.stb1 )
  141. #define stb2 (G.stb2 )
  142. #define INIT_G() do { \
  143. PTR_TO_GLOBALS = xzalloc(sizeof(G)); \
  144. context = 3; \
  145. max_context = 64; \
  146. } while (0)
  147. static void print_only(const char *path, size_t dirlen, const char *entry)
  148. {
  149. if (dirlen > 1)
  150. dirlen--;
  151. printf("Only in %.*s: %s\n", (int) dirlen, path, entry);
  152. }
  153. static void print_status(int val, char *path1, char *path2, char *entry)
  154. {
  155. const char *const _entry = entry ? entry : "";
  156. char * const _path1 = entry ? concat_path_file(path1, _entry) : path1;
  157. char * const _path2 = entry ? concat_path_file(path2, _entry) : path2;
  158. switch (val) {
  159. case D_ONLY:
  160. print_only(path1, strlen(path1), entry);
  161. break;
  162. case D_COMMON:
  163. printf("Common subdirectories: %s and %s\n", _path1, _path2);
  164. break;
  165. case D_BINARY:
  166. printf("Binary files %s and %s differ\n", _path1, _path2);
  167. break;
  168. case D_DIFFER:
  169. if (option_mask32 & FLAG_q)
  170. printf("Files %s and %s differ\n", _path1, _path2);
  171. break;
  172. case D_SAME:
  173. if (option_mask32 & FLAG_s)
  174. printf("Files %s and %s are identical\n", _path1, _path2);
  175. break;
  176. case D_MISMATCH1:
  177. printf("File %s is a %s while file %s is a %s\n",
  178. _path1, "directory", _path2, "regular file");
  179. break;
  180. case D_MISMATCH2:
  181. printf("File %s is a %s while file %s is a %s\n",
  182. _path1, "regular file", _path2, "directory");
  183. break;
  184. case D_SKIPPED1:
  185. printf("File %s is not a regular file or directory and was skipped\n",
  186. _path1);
  187. break;
  188. case D_SKIPPED2:
  189. printf("File %s is not a regular file or directory and was skipped\n",
  190. _path2);
  191. break;
  192. }
  193. if (entry) {
  194. free(_path1);
  195. free(_path2);
  196. }
  197. }
  198. static ALWAYS_INLINE int fiddle_sum(int sum, int t)
  199. {
  200. return sum * 127 + t;
  201. }
  202. /*
  203. * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
  204. */
  205. static int readhash(FILE *fp)
  206. {
  207. int i, t, space;
  208. int sum;
  209. sum = 1;
  210. space = 0;
  211. if (!(option_mask32 & (FLAG_b | FLAG_w))) {
  212. for (i = 0; (t = getc(fp)) != '\n'; i++) {
  213. if (t == EOF) {
  214. if (i == 0)
  215. return 0;
  216. break;
  217. }
  218. sum = fiddle_sum(sum, t);
  219. }
  220. } else {
  221. for (i = 0;;) {
  222. switch (t = getc(fp)) {
  223. case '\t':
  224. case '\r':
  225. case '\v':
  226. case '\f':
  227. case ' ':
  228. space++;
  229. continue;
  230. default:
  231. if (space && !(option_mask32 & FLAG_w)) {
  232. i++;
  233. space = 0;
  234. }
  235. sum = fiddle_sum(sum, t);
  236. i++;
  237. continue;
  238. case EOF:
  239. if (i == 0)
  240. return 0;
  241. /* FALLTHROUGH */
  242. case '\n':
  243. break;
  244. }
  245. break;
  246. }
  247. }
  248. /*
  249. * There is a remote possibility that we end up with a zero sum.
  250. * Zero is used as an EOF marker, so return 1 instead.
  251. */
  252. return (sum == 0 ? 1 : sum);
  253. }
  254. /*
  255. * Check to see if the given files differ.
  256. * Returns 0 if they are the same, 1 if different, and -1 on error.
  257. */
  258. static int files_differ(FILE *f1, FILE *f2, int flags)
  259. {
  260. size_t i, j;
  261. if ((flags & (D_EMPTY1 | D_EMPTY2)) || stb1.st_size != stb2.st_size
  262. || (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT)
  263. ) {
  264. return 1;
  265. }
  266. while (1) {
  267. i = fread(g_read_buf, 1, COMMON_BUFSIZE/2, f1);
  268. j = fread(g_read_buf + COMMON_BUFSIZE/2, 1, COMMON_BUFSIZE/2, f2);
  269. if (i != j)
  270. return 1;
  271. if (i == 0)
  272. return (ferror(f1) || ferror(f2));
  273. if (memcmp(g_read_buf,
  274. g_read_buf + COMMON_BUFSIZE/2, i) != 0)
  275. return 1;
  276. }
  277. }
  278. static void prepare(int i, FILE *fp /*, off_t filesize*/)
  279. {
  280. struct line *p;
  281. int h;
  282. size_t j, sz;
  283. rewind(fp);
  284. /*sz = (filesize <= FSIZE_MAX ? filesize : FSIZE_MAX) / 25;*/
  285. /*if (sz < 100)*/
  286. sz = 100;
  287. p = xmalloc((sz + 3) * sizeof(p[0]));
  288. j = 0;
  289. while ((h = readhash(fp))) {
  290. if (j == sz) {
  291. sz = sz * 3 / 2;
  292. p = xrealloc(p, (sz + 3) * sizeof(p[0]));
  293. }
  294. p[++j].value = h;
  295. }
  296. len[i] = j;
  297. file[i] = p;
  298. }
  299. static void prune(void)
  300. {
  301. int i, j;
  302. for (pref = 0; pref < len[0] && pref < len[1] &&
  303. file[0][pref + 1].value == file[1][pref + 1].value; pref++)
  304. continue;
  305. for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
  306. file[0][len[0] - suff].value == file[1][len[1] - suff].value;
  307. suff++)
  308. continue;
  309. for (j = 0; j < 2; j++) {
  310. sfile[j] = file[j] + pref;
  311. slen[j] = len[j] - pref - suff;
  312. for (i = 0; i <= slen[j]; i++)
  313. sfile[j][i].serial = i;
  314. }
  315. }
  316. static void equiv(struct line *a, int n, struct line *b, int m, int *c)
  317. {
  318. int i, j;
  319. i = j = 1;
  320. while (i <= n && j <= m) {
  321. if (a[i].value < b[j].value)
  322. a[i++].value = 0;
  323. else if (a[i].value == b[j].value)
  324. a[i++].value = j;
  325. else
  326. j++;
  327. }
  328. while (i <= n)
  329. a[i++].value = 0;
  330. b[m + 1].value = 0;
  331. j = 0;
  332. while (++j <= m) {
  333. c[j] = -b[j].serial;
  334. while (b[j + 1].value == b[j].value) {
  335. j++;
  336. c[j] = b[j].serial;
  337. }
  338. }
  339. c[j] = -1;
  340. }
  341. static int isqrt(int n)
  342. {
  343. int y, x;
  344. if (n == 0)
  345. return 0;
  346. x = 1;
  347. do {
  348. y = x;
  349. x = n / x;
  350. x += y;
  351. x /= 2;
  352. } while ((x - y) > 1 || (x - y) < -1);
  353. return x;
  354. }
  355. static int newcand(int x, int y, int pred)
  356. {
  357. struct cand *q;
  358. if (clen == clistlen) {
  359. clistlen = clistlen * 11 / 10;
  360. clist = xrealloc(clist, clistlen * sizeof(struct cand));
  361. }
  362. q = clist + clen;
  363. q->x = x;
  364. q->y = y;
  365. q->pred = pred;
  366. return clen++;
  367. }
  368. static int search(int *c, int k, int y)
  369. {
  370. int i, j, l, t;
  371. if (clist[c[k]].y < y) /* quick look for typical case */
  372. return k + 1;
  373. i = 0;
  374. j = k + 1;
  375. while (1) {
  376. l = i + j;
  377. if ((l >>= 1) <= i)
  378. break;
  379. t = clist[c[l]].y;
  380. if (t > y)
  381. j = l;
  382. else if (t < y)
  383. i = l;
  384. else
  385. return l;
  386. }
  387. return l + 1;
  388. }
  389. static int stone(int *a, int n, int *b, int *c)
  390. {
  391. int i, k, y, j, l;
  392. int oldc, tc, oldl;
  393. unsigned int numtries;
  394. #if ENABLE_FEATURE_DIFF_MINIMAL
  395. const unsigned int bound =
  396. (option_mask32 & FLAG_d) ? UINT_MAX : MAX(256, isqrt(n));
  397. #else
  398. const unsigned int bound = MAX(256, isqrt(n));
  399. #endif
  400. k = 0;
  401. c[0] = newcand(0, 0, 0);
  402. for (i = 1; i <= n; i++) {
  403. j = a[i];
  404. if (j == 0)
  405. continue;
  406. y = -b[j];
  407. oldl = 0;
  408. oldc = c[0];
  409. numtries = 0;
  410. do {
  411. if (y <= clist[oldc].y)
  412. continue;
  413. l = search(c, k, y);
  414. if (l != oldl + 1)
  415. oldc = c[l - 1];
  416. if (l <= k) {
  417. if (clist[c[l]].y <= y)
  418. continue;
  419. tc = c[l];
  420. c[l] = newcand(i, y, oldc);
  421. oldc = tc;
  422. oldl = l;
  423. numtries++;
  424. } else {
  425. c[l] = newcand(i, y, oldc);
  426. k++;
  427. break;
  428. }
  429. } while ((y = b[++j]) > 0 && numtries < bound);
  430. }
  431. return k;
  432. }
  433. static void unravel(int p)
  434. {
  435. struct cand *q;
  436. int i;
  437. for (i = 0; i <= len[0]; i++)
  438. J[i] = i <= pref ? i : i > len[0] - suff ? i + len[1] - len[0] : 0;
  439. for (q = clist + p; q->y != 0; q = clist + q->pred)
  440. J[q->x + pref] = q->y + pref;
  441. }
  442. static void unsort(struct line *f, int l, int *b)
  443. {
  444. int *a, i;
  445. a = xmalloc((l + 1) * sizeof(int));
  446. for (i = 1; i <= l; i++)
  447. a[f[i].serial] = f[i].value;
  448. for (i = 1; i <= l; i++)
  449. b[i] = a[i];
  450. free(a);
  451. }
  452. static int skipline(FILE * f)
  453. {
  454. int i, c;
  455. for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
  456. continue;
  457. return i;
  458. }
  459. /*
  460. * Check does double duty:
  461. * 1. ferret out any fortuitous correspondences due
  462. * to confounding by hashing (which result in "jackpot")
  463. * 2. collect random access indexes to the two files
  464. */
  465. static void check(FILE * f1, FILE * f2)
  466. {
  467. int i, j, jackpot, c, d;
  468. long ctold, ctnew;
  469. rewind(f1);
  470. rewind(f2);
  471. j = 1;
  472. ixold[0] = ixnew[0] = 0;
  473. jackpot = 0;
  474. ctold = ctnew = 0;
  475. for (i = 1; i <= len[0]; i++) {
  476. if (J[i] == 0) {
  477. ixold[i] = ctold += skipline(f1);
  478. continue;
  479. }
  480. while (j < J[i]) {
  481. ixnew[j] = ctnew += skipline(f2);
  482. j++;
  483. }
  484. if ((option_mask32 & FLAG_b) || (option_mask32 & FLAG_w)
  485. || (option_mask32 & FLAG_i)) {
  486. while (1) {
  487. c = getc(f1);
  488. d = getc(f2);
  489. /*
  490. * GNU diff ignores a missing newline
  491. * in one file if bflag || wflag.
  492. */
  493. if (((option_mask32 & FLAG_b) || (option_mask32 & FLAG_w)) &&
  494. ((c == EOF && d == '\n') || (c == '\n' && d == EOF))) {
  495. break;
  496. }
  497. ctold++;
  498. ctnew++;
  499. if ((option_mask32 & FLAG_b) && isspace(c) && isspace(d)) {
  500. do {
  501. if (c == '\n')
  502. break;
  503. ctold++;
  504. } while (isspace(c = getc(f1)));
  505. do {
  506. if (d == '\n')
  507. break;
  508. ctnew++;
  509. } while (isspace(d = getc(f2)));
  510. } else if (option_mask32 & FLAG_w) {
  511. while (isspace(c) && c != '\n') {
  512. c = getc(f1);
  513. ctold++;
  514. }
  515. while (isspace(d) && d != '\n') {
  516. d = getc(f2);
  517. ctnew++;
  518. }
  519. }
  520. if (c != d) {
  521. jackpot++;
  522. J[i] = 0;
  523. if (c != '\n' && c != EOF)
  524. ctold += skipline(f1);
  525. if (d != '\n' && c != EOF)
  526. ctnew += skipline(f2);
  527. break;
  528. }
  529. if (c == '\n' || c == EOF)
  530. break;
  531. }
  532. } else {
  533. while (1) {
  534. ctold++;
  535. ctnew++;
  536. if ((c = getc(f1)) != (d = getc(f2))) {
  537. J[i] = 0;
  538. if (c != '\n' && c != EOF)
  539. ctold += skipline(f1);
  540. if (d != '\n' && c != EOF)
  541. ctnew += skipline(f2);
  542. break;
  543. }
  544. if (c == '\n' || c == EOF)
  545. break;
  546. }
  547. }
  548. ixold[i] = ctold;
  549. ixnew[j] = ctnew;
  550. j++;
  551. }
  552. for (; j <= len[1]; j++)
  553. ixnew[j] = ctnew += skipline(f2);
  554. }
  555. /* shellsort CACM #201 */
  556. static void sort(struct line *a, int n)
  557. {
  558. struct line *ai, *aim, w;
  559. int j, m = 0, k;
  560. if (n == 0)
  561. return;
  562. for (j = 1; j <= n; j *= 2)
  563. m = 2 * j - 1;
  564. for (m /= 2; m != 0; m /= 2) {
  565. k = n - m;
  566. for (j = 1; j <= k; j++) {
  567. for (ai = &a[j]; ai > a; ai -= m) {
  568. aim = &ai[m];
  569. if (aim < ai)
  570. break; /* wraparound */
  571. if (aim->value > ai[0].value ||
  572. (aim->value == ai[0].value && aim->serial > ai[0].serial))
  573. break;
  574. w.value = ai[0].value;
  575. ai[0].value = aim->value;
  576. aim->value = w.value;
  577. w.serial = ai[0].serial;
  578. ai[0].serial = aim->serial;
  579. aim->serial = w.serial;
  580. }
  581. }
  582. }
  583. }
  584. static void uni_range(int a, int b)
  585. {
  586. if (a < b)
  587. printf("%d,%d", a, b - a + 1);
  588. else if (a == b)
  589. printf("%d", b);
  590. else
  591. printf("%d,0", b);
  592. }
  593. static void fetch(long *f, int a, int b, FILE * lb, int ch)
  594. {
  595. int i, j, c, lastc, col, nc;
  596. if (a > b)
  597. return;
  598. for (i = a; i <= b; i++) {
  599. fseek(lb, f[i - 1], SEEK_SET);
  600. nc = f[i] - f[i - 1];
  601. if (ch != '\0') {
  602. putchar(ch);
  603. if (option_mask32 & FLAG_T)
  604. putchar('\t');
  605. }
  606. col = 0;
  607. for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
  608. if ((c = getc(lb)) == EOF) {
  609. printf("\n\\ No newline at end of file\n");
  610. return;
  611. }
  612. if (c == '\t' && (option_mask32 & FLAG_t)) {
  613. do {
  614. putchar(' ');
  615. } while (++col & 7);
  616. } else {
  617. putchar(c);
  618. col++;
  619. }
  620. }
  621. }
  622. }
  623. static int asciifile(FILE * f)
  624. {
  625. #if ENABLE_FEATURE_DIFF_BINARY
  626. int i, cnt;
  627. #endif
  628. if ((option_mask32 & FLAG_a) || f == NULL)
  629. return 1;
  630. #if ENABLE_FEATURE_DIFF_BINARY
  631. rewind(f);
  632. cnt = fread(g_read_buf, 1, COMMON_BUFSIZE, f);
  633. for (i = 0; i < cnt; i++) {
  634. if (!isprint(g_read_buf[i])
  635. && !isspace(g_read_buf[i])) {
  636. return 0;
  637. }
  638. }
  639. #endif
  640. return 1;
  641. }
  642. /* dump accumulated "unified" diff changes */
  643. static void dump_unified_vec(FILE * f1, FILE * f2)
  644. {
  645. struct context_vec *cvp = context_vec_start;
  646. int lowa, upb, lowc, upd;
  647. int a, b, c, d;
  648. char ch;
  649. if (context_vec_start > context_vec_ptr)
  650. return;
  651. b = d = 0; /* gcc */
  652. lowa = MAX(1, cvp->a - context);
  653. upb = MIN(len[0], context_vec_ptr->b + context);
  654. lowc = MAX(1, cvp->c - context);
  655. upd = MIN(len[1], context_vec_ptr->d + context);
  656. printf("@@ -");
  657. uni_range(lowa, upb);
  658. printf(" +");
  659. uni_range(lowc, upd);
  660. printf(" @@\n");
  661. /*
  662. * Output changes in "unified" diff format--the old and new lines
  663. * are printed together.
  664. */
  665. for (; cvp <= context_vec_ptr; cvp++) {
  666. a = cvp->a;
  667. b = cvp->b;
  668. c = cvp->c;
  669. d = cvp->d;
  670. /*
  671. * c: both new and old changes
  672. * d: only changes in the old file
  673. * a: only changes in the new file
  674. */
  675. if (a <= b && c <= d)
  676. ch = 'c';
  677. else
  678. ch = (a <= b) ? 'd' : 'a';
  679. #if 0
  680. switch (ch) {
  681. case 'c':
  682. fetch(ixold, lowa, a - 1, f1, ' ');
  683. fetch(ixold, a, b, f1, '-');
  684. fetch(ixnew, c, d, f2, '+');
  685. break;
  686. case 'd':
  687. fetch(ixold, lowa, a - 1, f1, ' ');
  688. fetch(ixold, a, b, f1, '-');
  689. break;
  690. case 'a':
  691. fetch(ixnew, lowc, c - 1, f2, ' ');
  692. fetch(ixnew, c, d, f2, '+');
  693. break;
  694. }
  695. #else
  696. if (ch == 'c' || ch == 'd') {
  697. fetch(ixold, lowa, a - 1, f1, ' ');
  698. fetch(ixold, a, b, f1, '-');
  699. }
  700. if (ch == 'a')
  701. fetch(ixnew, lowc, c - 1, f2, ' ');
  702. if (ch == 'c' || ch == 'a')
  703. fetch(ixnew, c, d, f2, '+');
  704. #endif
  705. lowa = b + 1;
  706. lowc = d + 1;
  707. }
  708. fetch(ixnew, d + 1, upd, f2, ' ');
  709. context_vec_ptr = context_vec_start - 1;
  710. }
  711. static void print_header(const char *file1, const char *file2)
  712. {
  713. if (label1)
  714. printf("--- %s\n", label1);
  715. else
  716. printf("--- %s\t%s", file1, ctime(&stb1.st_mtime));
  717. if (label2)
  718. printf("+++ %s\n", label2);
  719. else
  720. printf("+++ %s\t%s", file2, ctime(&stb2.st_mtime));
  721. }
  722. /*
  723. * Indicate that there is a difference between lines a and b of the from file
  724. * to get to lines c to d of the to file. If a is greater than b then there
  725. * are no lines in the from file involved and this means that there were
  726. * lines appended (beginning at b). If c is greater than d then there are
  727. * lines missing from the to file.
  728. */
  729. static void change(char *file1, FILE * f1, char *file2, FILE * f2, int a,
  730. int b, int c, int d)
  731. {
  732. if ((a > b && c > d) || (option_mask32 & FLAG_q)) {
  733. anychange = 1;
  734. return;
  735. }
  736. /*
  737. * Allocate change records as needed.
  738. */
  739. if (context_vec_ptr == context_vec_end - 1) {
  740. ptrdiff_t offset = context_vec_ptr - context_vec_start;
  741. max_context <<= 1;
  742. context_vec_start = xrealloc(context_vec_start,
  743. max_context * sizeof(struct context_vec));
  744. context_vec_end = context_vec_start + max_context;
  745. context_vec_ptr = context_vec_start + offset;
  746. }
  747. if (anychange == 0) {
  748. /*
  749. * Print the context/unidiff header first time through.
  750. */
  751. print_header(file1, file2);
  752. } else if (a > context_vec_ptr->b + (2 * context) + 1 &&
  753. c > context_vec_ptr->d + (2 * context) + 1) {
  754. /*
  755. * If this change is more than 'context' lines from the
  756. * previous change, dump the record and reset it.
  757. */
  758. dump_unified_vec(f1, f2);
  759. }
  760. context_vec_ptr++;
  761. context_vec_ptr->a = a;
  762. context_vec_ptr->b = b;
  763. context_vec_ptr->c = c;
  764. context_vec_ptr->d = d;
  765. anychange = 1;
  766. }
  767. static void output(char *file1, FILE * f1, char *file2, FILE * f2)
  768. {
  769. /* Note that j0 and j1 can't be used as they are defined in math.h.
  770. * This also allows the rather amusing variable 'j00'... */
  771. int m, i0, i1, j00, j01;
  772. rewind(f1);
  773. rewind(f2);
  774. m = len[0];
  775. J[0] = 0;
  776. J[m + 1] = len[1] + 1;
  777. for (i0 = 1; i0 <= m; i0 = i1 + 1) {
  778. while (i0 <= m && J[i0] == J[i0 - 1] + 1)
  779. i0++;
  780. j00 = J[i0 - 1] + 1;
  781. i1 = i0 - 1;
  782. while (i1 < m && J[i1 + 1] == 0)
  783. i1++;
  784. j01 = J[i1 + 1] - 1;
  785. J[i1] = j01;
  786. change(file1, f1, file2, f2, i0, i1, j00, j01);
  787. }
  788. if (m == 0) {
  789. change(file1, f1, file2, f2, 1, 0, 1, len[1]);
  790. }
  791. if (anychange != 0 && !(option_mask32 & FLAG_q)) {
  792. dump_unified_vec(f1, f2);
  793. }
  794. }
  795. /*
  796. * The following code uses an algorithm due to Harold Stone,
  797. * which finds a pair of longest identical subsequences in
  798. * the two files.
  799. *
  800. * The major goal is to generate the match vector J.
  801. * J[i] is the index of the line in file1 corresponding
  802. * to line i file0. J[i] = 0 if there is no
  803. * such line in file1.
  804. *
  805. * Lines are hashed so as to work in core. All potential
  806. * matches are located by sorting the lines of each file
  807. * on the hash (called ``value''). In particular, this
  808. * collects the equivalence classes in file1 together.
  809. * Subroutine equiv replaces the value of each line in
  810. * file0 by the index of the first element of its
  811. * matching equivalence in (the reordered) file1.
  812. * To save space equiv squeezes file1 into a single
  813. * array member in which the equivalence classes
  814. * are simply concatenated, except that their first
  815. * members are flagged by changing sign.
  816. *
  817. * Next the indices that point into member are unsorted into
  818. * array class according to the original order of file0.
  819. *
  820. * The cleverness lies in routine stone. This marches
  821. * through the lines of file0, developing a vector klist
  822. * of "k-candidates". At step i a k-candidate is a matched
  823. * pair of lines x,y (x in file0 y in file1) such that
  824. * there is a common subsequence of length k
  825. * between the first i lines of file0 and the first y
  826. * lines of file1, but there is no such subsequence for
  827. * any smaller y. x is the earliest possible mate to y
  828. * that occurs in such a subsequence.
  829. *
  830. * Whenever any of the members of the equivalence class of
  831. * lines in file1 matable to a line in file0 has serial number
  832. * less than the y of some k-candidate, that k-candidate
  833. * with the smallest such y is replaced. The new
  834. * k-candidate is chained (via pred) to the current
  835. * k-1 candidate so that the actual subsequence can
  836. * be recovered. When a member has serial number greater
  837. * that the y of all k-candidates, the klist is extended.
  838. * At the end, the longest subsequence is pulled out
  839. * and placed in the array J by unravel
  840. *
  841. * With J in hand, the matches there recorded are
  842. * checked against reality to assure that no spurious
  843. * matches have crept in due to hashing. If they have,
  844. * they are broken, and "jackpot" is recorded--a harmless
  845. * matter except that a true match for a spuriously
  846. * mated line may now be unnecessarily reported as a change.
  847. *
  848. * Much of the complexity of the program comes simply
  849. * from trying to minimize core utilization and
  850. * maximize the range of doable problems by dynamically
  851. * allocating what is needed and reusing what is not.
  852. * The core requirements for problems larger than somewhat
  853. * are (in words) 2*length(file0) + length(file1) +
  854. * 3*(number of k-candidates installed), typically about
  855. * 6n words for files of length n.
  856. */
  857. static unsigned diffreg(char *ofile1, char *ofile2, int flags)
  858. {
  859. char *file1 = ofile1;
  860. char *file2 = ofile2;
  861. FILE *f1 = stdin, *f2 = stdin;
  862. unsigned rval;
  863. int i;
  864. anychange = 0;
  865. context_vec_ptr = context_vec_start - 1;
  866. if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
  867. return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
  868. rval = D_SAME;
  869. if (LONE_DASH(file1) && LONE_DASH(file2))
  870. goto closem;
  871. if (flags & D_EMPTY1)
  872. f1 = xfopen(bb_dev_null, "r");
  873. else if (NOT_LONE_DASH(file1))
  874. f1 = xfopen(file1, "r");
  875. if (flags & D_EMPTY2)
  876. f2 = xfopen(bb_dev_null, "r");
  877. else if (NOT_LONE_DASH(file2))
  878. f2 = xfopen(file2, "r");
  879. /* We can't diff non-seekable stream - we use rewind(), fseek().
  880. * This can be fixed (volunteers?).
  881. * Meanwhile we should check it here by stat'ing input fds,
  882. * but I am lazy and check that in main() instead.
  883. * Check in main won't catch "diffing fifos buried in subdirectories"
  884. * failure scenario - not very likely in real life... */
  885. i = files_differ(f1, f2, flags);
  886. if (i == 0)
  887. goto closem;
  888. else if (i != 1) { /* 1 == ok */
  889. /* error */
  890. status |= 2;
  891. goto closem;
  892. }
  893. if (!asciifile(f1) || !asciifile(f2)) {
  894. rval = D_BINARY;
  895. status |= 1;
  896. goto closem;
  897. }
  898. prepare(0, f1 /*, stb1.st_size*/);
  899. prepare(1, f2 /*, stb2.st_size*/);
  900. prune();
  901. sort(sfile[0], slen[0]);
  902. sort(sfile[1], slen[1]);
  903. member = (int *) file[1];
  904. equiv(sfile[0], slen[0], sfile[1], slen[1], member);
  905. member = xrealloc(member, (slen[1] + 2) * sizeof(int));
  906. class = (int *) file[0];
  907. unsort(sfile[0], slen[0], class);
  908. class = xrealloc(class, (slen[0] + 2) * sizeof(int));
  909. klist = xmalloc((slen[0] + 2) * sizeof(int));
  910. clen = 0;
  911. clistlen = 100;
  912. clist = xmalloc(clistlen * sizeof(struct cand));
  913. i = stone(class, slen[0], member, klist);
  914. free(member);
  915. free(class);
  916. J = xrealloc(J, (len[0] + 2) * sizeof(int));
  917. unravel(klist[i]);
  918. free(clist);
  919. free(klist);
  920. ixold = xrealloc(ixold, (len[0] + 2) * sizeof(long));
  921. ixnew = xrealloc(ixnew, (len[1] + 2) * sizeof(long));
  922. check(f1, f2);
  923. output(file1, f1, file2, f2);
  924. closem:
  925. if (anychange) {
  926. status |= 1;
  927. if (rval == D_SAME)
  928. rval = D_DIFFER;
  929. }
  930. fclose_if_not_stdin(f1);
  931. fclose_if_not_stdin(f2);
  932. if (file1 != ofile1)
  933. free(file1);
  934. if (file2 != ofile2)
  935. free(file2);
  936. return rval;
  937. }
  938. #if ENABLE_FEATURE_DIFF_DIR
  939. static void do_diff(char *dir1, char *path1, char *dir2, char *path2)
  940. {
  941. int flags = D_HEADER;
  942. int val;
  943. char *fullpath1 = NULL; /* if -N */
  944. char *fullpath2 = NULL;
  945. if (path1)
  946. fullpath1 = concat_path_file(dir1, path1);
  947. if (path2)
  948. fullpath2 = concat_path_file(dir2, path2);
  949. if (!fullpath1 || stat(fullpath1, &stb1) != 0) {
  950. flags |= D_EMPTY1;
  951. memset(&stb1, 0, sizeof(stb1));
  952. if (path2) {
  953. free(fullpath1);
  954. fullpath1 = concat_path_file(dir1, path2);
  955. }
  956. }
  957. if (!fullpath2 || stat(fullpath2, &stb2) != 0) {
  958. flags |= D_EMPTY2;
  959. memset(&stb2, 0, sizeof(stb2));
  960. stb2.st_mode = stb1.st_mode;
  961. if (path1) {
  962. free(fullpath2);
  963. fullpath2 = concat_path_file(dir2, path1);
  964. }
  965. }
  966. if (stb1.st_mode == 0)
  967. stb1.st_mode = stb2.st_mode;
  968. if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) {
  969. printf("Common subdirectories: %s and %s\n", fullpath1, fullpath2);
  970. goto ret;
  971. }
  972. if (!S_ISREG(stb1.st_mode) && !S_ISDIR(stb1.st_mode))
  973. val = D_SKIPPED1;
  974. else if (!S_ISREG(stb2.st_mode) && !S_ISDIR(stb2.st_mode))
  975. val = D_SKIPPED2;
  976. else
  977. val = diffreg(fullpath1, fullpath2, flags);
  978. print_status(val, fullpath1, fullpath2, NULL);
  979. ret:
  980. free(fullpath1);
  981. free(fullpath2);
  982. }
  983. #endif
  984. #if ENABLE_FEATURE_DIFF_DIR
  985. static int dir_strcmp(const void *p1, const void *p2)
  986. {
  987. return strcmp(*(char *const *) p1, *(char *const *) p2);
  988. }
  989. /* This function adds a filename to dl, the directory listing. */
  990. static int add_to_dirlist(const char *filename,
  991. struct stat ATTRIBUTE_UNUSED * sb, void *userdata,
  992. int depth ATTRIBUTE_UNUSED)
  993. {
  994. /* +2: with space for eventual trailing NULL */
  995. dl = xrealloc(dl, (dl_count+2) * sizeof(dl[0]));
  996. dl[dl_count] = xstrdup(filename + (int)(ptrdiff_t)userdata);
  997. dl_count++;
  998. return TRUE;
  999. }
  1000. /* This returns a sorted directory listing. */
  1001. static char **get_dir(char *path)
  1002. {
  1003. dl_count = 0;
  1004. dl = xzalloc(sizeof(dl[0]));
  1005. /* If -r has been set, then the recursive_action function will be
  1006. * used. Unfortunately, this outputs the root directory along with
  1007. * the recursed paths, so use void *userdata to specify the string
  1008. * length of the root directory - '(void*)(strlen(path)+)'.
  1009. * add_to_dirlist then removes root dir prefix. */
  1010. if (option_mask32 & FLAG_r) {
  1011. recursive_action(path, ACTION_RECURSE|ACTION_FOLLOWLINKS,
  1012. add_to_dirlist, NULL,
  1013. (void*)(strlen(path)+1), 0);
  1014. } else {
  1015. DIR *dp;
  1016. struct dirent *ep;
  1017. dp = warn_opendir(path);
  1018. while ((ep = readdir(dp))) {
  1019. if (!strcmp(ep->d_name, "..") || LONE_CHAR(ep->d_name, '.'))
  1020. continue;
  1021. add_to_dirlist(ep->d_name, NULL, (void*)(int)0, 0);
  1022. }
  1023. closedir(dp);
  1024. }
  1025. /* Sort dl alphabetically. */
  1026. qsort(dl, dl_count, sizeof(char *), dir_strcmp);
  1027. dl[dl_count] = NULL;
  1028. return dl;
  1029. }
  1030. static void diffdir(char *p1, char *p2)
  1031. {
  1032. char **dirlist1, **dirlist2;
  1033. char *dp1, *dp2;
  1034. int pos;
  1035. /* Check for trailing slashes. */
  1036. dp1 = last_char_is(p1, '/');
  1037. if (dp1 != NULL)
  1038. *dp1 = '\0';
  1039. dp2 = last_char_is(p2, '/');
  1040. if (dp2 != NULL)
  1041. *dp2 = '\0';
  1042. /* Get directory listings for p1 and p2. */
  1043. dirlist1 = get_dir(p1);
  1044. dirlist2 = get_dir(p2);
  1045. /* If -S was set, find the starting point. */
  1046. if (start) {
  1047. while (*dirlist1 != NULL && strcmp(*dirlist1, start) < 0)
  1048. dirlist1++;
  1049. while (*dirlist2 != NULL && strcmp(*dirlist2, start) < 0)
  1050. dirlist2++;
  1051. if ((*dirlist1 == NULL) || (*dirlist2 == NULL))
  1052. bb_error_msg(bb_msg_invalid_arg, "NULL", "-S");
  1053. }
  1054. /* Now that both dirlist1 and dirlist2 contain sorted directory
  1055. * listings, we can start to go through dirlist1. If both listings
  1056. * contain the same file, then do a normal diff. Otherwise, behaviour
  1057. * is determined by whether the -N flag is set. */
  1058. while (*dirlist1 != NULL || *dirlist2 != NULL) {
  1059. dp1 = *dirlist1;
  1060. dp2 = *dirlist2;
  1061. pos = dp1 == NULL ? 1 : dp2 == NULL ? -1 : strcmp(dp1, dp2);
  1062. if (pos == 0) {
  1063. do_diff(p1, dp1, p2, dp2);
  1064. dirlist1++;
  1065. dirlist2++;
  1066. } else if (pos < 0) {
  1067. if (option_mask32 & FLAG_N)
  1068. do_diff(p1, dp1, p2, NULL);
  1069. else
  1070. print_only(p1, strlen(p1) + 1, dp1);
  1071. dirlist1++;
  1072. } else {
  1073. if (option_mask32 & FLAG_N)
  1074. do_diff(p1, NULL, p2, dp2);
  1075. else
  1076. print_only(p2, strlen(p2) + 1, dp2);
  1077. dirlist2++;
  1078. }
  1079. }
  1080. }
  1081. #endif
  1082. int diff_main(int argc, char **argv);
  1083. int diff_main(int argc, char **argv)
  1084. {
  1085. bool gotstdin = 0;
  1086. char *U_opt;
  1087. char *f1, *f2;
  1088. llist_t *L_arg = NULL;
  1089. INIT_G();
  1090. /* exactly 2 params; collect multiple -L <label> */
  1091. opt_complementary = "=2:L::";
  1092. getopt32(argv, "abdiL:NqrsS:tTU:wu"
  1093. "p" /* ignored (for compatibility) */,
  1094. &L_arg, &start, &U_opt);
  1095. /*argc -= optind;*/
  1096. argv += optind;
  1097. while (L_arg) {
  1098. if (label1 && label2)
  1099. bb_show_usage();
  1100. if (!label1)
  1101. label1 = L_arg->data;
  1102. else { /* then label2 is NULL */
  1103. label2 = label1;
  1104. label1 = L_arg->data;
  1105. }
  1106. /* we leak L_arg here... */
  1107. L_arg = L_arg->link;
  1108. }
  1109. if (option_mask32 & FLAG_U)
  1110. context = xatoi_u(U_opt);
  1111. /*
  1112. * Do sanity checks, fill in stb1 and stb2 and call the appropriate
  1113. * driver routine. Both drivers use the contents of stb1 and stb2.
  1114. */
  1115. f1 = argv[0];
  1116. f2 = argv[1];
  1117. if (LONE_DASH(f1)) {
  1118. fstat(STDIN_FILENO, &stb1);
  1119. gotstdin = 1;
  1120. } else
  1121. xstat(f1, &stb1);
  1122. if (LONE_DASH(f2)) {
  1123. fstat(STDIN_FILENO, &stb2);
  1124. gotstdin = 1;
  1125. } else
  1126. xstat(f2, &stb2);
  1127. if (gotstdin && (S_ISDIR(stb1.st_mode) || S_ISDIR(stb2.st_mode)))
  1128. bb_error_msg_and_die("can't compare - to a directory");
  1129. if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) {
  1130. #if ENABLE_FEATURE_DIFF_DIR
  1131. diffdir(f1, f2);
  1132. #else
  1133. bb_error_msg_and_die("directory comparison not supported");
  1134. #endif
  1135. } else {
  1136. if (S_ISDIR(stb1.st_mode)) {
  1137. f1 = concat_path_file(f1, f2);
  1138. xstat(f1, &stb1);
  1139. }
  1140. if (S_ISDIR(stb2.st_mode)) {
  1141. f2 = concat_path_file(f2, f1);
  1142. xstat(f2, &stb2);
  1143. }
  1144. /* XXX: FIXME: */
  1145. /* We can't diff e.g. stdin supplied by a pipe - we use rewind(), fseek().
  1146. * This can be fixed (volunteers?) */
  1147. if (!S_ISREG(stb1.st_mode) || !S_ISREG(stb2.st_mode))
  1148. bb_error_msg_and_die("can't diff non-seekable stream");
  1149. print_status(diffreg(f1, f2, 0), f1, f2, NULL);
  1150. }
  1151. return status;
  1152. }