vac.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620
  1. #include "stdinc.h"
  2. #include "vac.h"
  3. #include "dat.h"
  4. #include "fns.h"
  5. void
  6. usage(void)
  7. {
  8. fprint(2, "usage: vac [-imqsv] [-b bsize] [-d old.vac] [-e exclude]... "
  9. "[-f new.vac] [-h host] file...\n");
  10. threadexitsall("usage");
  11. }
  12. enum
  13. {
  14. BlockSize = 8*1024,
  15. MaxExclude = 1000
  16. };
  17. struct
  18. {
  19. int nfile;
  20. int ndir;
  21. vlong data;
  22. vlong skipdata;
  23. int skipfiles;
  24. } stats;
  25. int qdiff;
  26. int merge;
  27. int verbose;
  28. char *host;
  29. VtConn *z;
  30. VacFs *fs;
  31. char *exclude[MaxExclude];
  32. int nexclude;
  33. char *vacfile;
  34. int vacmerge(VacFile*, char*);
  35. void vac(VacFile*, VacFile*, char*, Dir*);
  36. void vacstdin(VacFile*, char*);
  37. static u64int unittoull(char*);
  38. static void warn(char *fmt, ...);
  39. static int strpcmp(const void*, const void*);
  40. static void removevacfile(void);
  41. void
  42. threadmain(int argc, char **argv)
  43. {
  44. int i, j, fd, n, printstats;
  45. Dir *d;
  46. char *s;
  47. uvlong u;
  48. VacFile *f, *fdiff;
  49. VacFs *fsdiff;
  50. int blocksize;
  51. int outfd;
  52. char *stdinname;
  53. char *diffvac;
  54. uvlong qid;
  55. fmtinstall('H', encodefmt);
  56. fmtinstall('V', vtscorefmt);
  57. blocksize = BlockSize;
  58. stdinname = nil;
  59. printstats = 0;
  60. fsdiff = nil;
  61. diffvac = nil;
  62. ARGBEGIN{
  63. case 'b':
  64. u = unittoull(EARGF(usage()));
  65. if(u < 512)
  66. u = 512;
  67. if(u > VtMaxLumpSize)
  68. u = VtMaxLumpSize;
  69. blocksize = u;
  70. break;
  71. case 'd':
  72. diffvac = EARGF(usage());
  73. break;
  74. case 'e':
  75. if(nexclude >= MaxExclude)
  76. sysfatal("too many exclusions\n");
  77. exclude[nexclude] = ARGF();
  78. if(exclude[nexclude] == nil)
  79. usage();
  80. nexclude++;
  81. break;
  82. case 'f':
  83. vacfile = EARGF(usage());
  84. break;
  85. case 'h':
  86. host = EARGF(usage());
  87. break;
  88. case 'i':
  89. stdinname = EARGF(usage());
  90. break;
  91. case 'm':
  92. merge++;
  93. break;
  94. case 'q':
  95. qdiff++;
  96. break;
  97. case 's':
  98. printstats++;
  99. break;
  100. case 'v':
  101. verbose++;
  102. break;
  103. default:
  104. usage();
  105. }ARGEND
  106. if(argc == 0 && !stdinname)
  107. usage();
  108. if(vacfile == nil)
  109. outfd = 1;
  110. else if((outfd = create(vacfile, OWRITE, 0666)) < 0)
  111. sysfatal("create %s: %r", vacfile);
  112. atexit(removevacfile);
  113. qsort(exclude, nexclude, sizeof(char*), strpcmp);
  114. z = vtdial(host);
  115. if(z == nil)
  116. sysfatal("could not connect to server: %r");
  117. if(vtconnect(z) < 0)
  118. sysfatal("vtconnect: %r");
  119. if(diffvac){
  120. if((fsdiff = vacfsopen(z, diffvac, VtOREAD, 128)) == nil)
  121. warn("vacfsopen %s: %r", diffvac);
  122. }
  123. if((fs = vacfscreate(z, blocksize, 512)) == nil)
  124. sysfatal("vacfscreate: %r");
  125. f = vacfsgetroot(fs);
  126. if(fsdiff)
  127. fdiff = vacfsgetroot(fsdiff);
  128. else
  129. fdiff = nil;
  130. if(stdinname)
  131. vacstdin(f, stdinname);
  132. for(i=0; i<argc; i++){
  133. /*
  134. * We can't use / and . and .. and ../.. as valid archive
  135. * names, so expand to the list of files in the directory.
  136. */
  137. if(argv[i][0] == 0){
  138. warn("empty string given as command-line argument");
  139. continue;
  140. }
  141. cleanname(argv[i]);
  142. if(strcmp(argv[i], "/") == 0
  143. || strcmp(argv[i], ".") == 0
  144. || strcmp(argv[i], "..") == 0
  145. || (strlen(argv[i]) > 3 && strcmp(argv[i]+strlen(argv[i])-3, "/..") == 0)){
  146. if((fd = open(argv[i], OREAD)) < 0){
  147. warn("open %s: %r", argv[i]);
  148. continue;
  149. }
  150. while((n = dirread(fd, &d)) > 0){
  151. for(j=0; j<n; j++){
  152. s = vtmalloc(strlen(argv[i])+1+strlen(d[j].name)+1);
  153. strcpy(s, argv[i]);
  154. strcat(s, "/");
  155. strcat(s, d[j].name);
  156. cleanname(s);
  157. vac(f, fdiff, s, &d[j]);
  158. }
  159. free(d);
  160. }
  161. close(fd);
  162. continue;
  163. }
  164. if((d = dirstat(argv[i])) == nil){
  165. warn("stat %s: %r", argv[i]);
  166. continue;
  167. }
  168. vac(f, fdiff, argv[i], d);
  169. free(d);
  170. }
  171. if(fdiff)
  172. vacfiledecref(fdiff);
  173. /*
  174. * Record the maximum qid so that vacs can be merged
  175. * without introducing overlapping qids. Older versions
  176. * of vac arranged that the root would have the largest
  177. * qid in the file system, but we can't do that anymore
  178. * (the root gets created first!).
  179. */
  180. if(_vacfsnextqid(fs, &qid) >= 0)
  181. vacfilesetqidspace(f, 0, qid);
  182. vacfiledecref(f);
  183. /*
  184. * Copy fsdiff's root block score into fs's slot for that,
  185. * so that vacfssync will copy it into root.prev for us.
  186. * Just nice documentation, no effect.
  187. */
  188. if(fsdiff)
  189. memmove(fs->score, fsdiff->score, VtScoreSize);
  190. if(vacfssync(fs) < 0)
  191. fprint(2, "vacfssync: %r\n");
  192. fprint(outfd, "vac:%V\n", fs->score);
  193. vacfsclose(fs);
  194. atexitdont(removevacfile);
  195. vthangup(z);
  196. if(printstats){
  197. fprint(2,
  198. "%d files, %d files skipped, %d directories\n"
  199. "%lld data bytes written, %lld data bytes skipped\n",
  200. stats.nfile, stats.skipfiles, stats.ndir, stats.data, stats.skipdata);
  201. dup(2, 1);
  202. packetstats();
  203. }
  204. threadexitsall(0);
  205. }
  206. static void
  207. removevacfile(void)
  208. {
  209. if(vacfile)
  210. remove(vacfile);
  211. }
  212. static int
  213. strpcmp(const void *p0, const void *p1)
  214. {
  215. return strcmp(*(char**)p0, *(char**)p1);
  216. }
  217. static int
  218. isexcluded(char *name)
  219. {
  220. int bot, top, i, x;
  221. bot = 0;
  222. top = nexclude;
  223. while(bot < top) {
  224. i = (bot+top)>>1;
  225. x = strcmp(exclude[i], name);
  226. if(x == 0)
  227. return 1;
  228. if(x < 0)
  229. bot = i + 1;
  230. else /* x > 0 */
  231. top = i;
  232. }
  233. return 0;
  234. }
  235. void
  236. plan9tovacdir(VacDir *vd, Dir *dir)
  237. {
  238. memset(vd, 0, sizeof *vd);
  239. vd->elem = dir->name;
  240. vd->uid = dir->uid;
  241. vd->gid = dir->gid;
  242. vd->mid = dir->muid;
  243. if(vd->mid == nil)
  244. vd->mid = "";
  245. vd->mtime = dir->mtime;
  246. vd->mcount = 0;
  247. vd->ctime = dir->mtime; /* ctime: not available on plan 9 */
  248. vd->atime = dir->atime;
  249. vd->size = dir->length;
  250. vd->mode = dir->mode & 0777;
  251. if(dir->mode & DMDIR)
  252. vd->mode |= ModeDir;
  253. if(dir->mode & DMAPPEND)
  254. vd->mode |= ModeAppend;
  255. if(dir->mode & DMEXCL)
  256. vd->mode |= ModeExclusive;
  257. vd->plan9 = 1;
  258. vd->p9path = dir->qid.path;
  259. vd->p9version = dir->qid.vers;
  260. }
  261. /*
  262. * Does block b of f have the same SHA1 hash as the n bytes at buf?
  263. */
  264. static int
  265. sha1matches(VacFile *f, ulong b, uchar *buf, int n)
  266. {
  267. uchar fscore[VtScoreSize];
  268. uchar bufscore[VtScoreSize];
  269. if(vacfileblockscore(f, b, fscore) < 0)
  270. return 0;
  271. n = vtzerotruncate(VtDataType, buf, n);
  272. sha1(buf, n, bufscore, nil);
  273. if(memcmp(bufscore, fscore, VtScoreSize) == 0)
  274. return 1;
  275. return 0;
  276. }
  277. /*
  278. * Archive the file named name, which has stat info d,
  279. * into the vac directory fp (p = parent).
  280. *
  281. * If we're doing a vac -d against another archive, the
  282. * equivalent directory to fp in that archive is diffp.
  283. */
  284. void
  285. vac(VacFile *fp, VacFile *diffp, char *name, Dir *d)
  286. {
  287. char *elem, *s;
  288. static char buf[65536];
  289. int fd, i, n, bsize;
  290. vlong off;
  291. Dir *dk; /* kids */
  292. VacDir vd, vddiff;
  293. VacFile *f, *fdiff;
  294. VtEntry e;
  295. if(isexcluded(name)){
  296. warn("excluding %s%s", name, (d->mode&DMDIR) ? "/" : "");
  297. return;
  298. }
  299. if(d->mode&DMDIR)
  300. stats.ndir++;
  301. else
  302. stats.nfile++;
  303. if(merge && vacmerge(fp, name) >= 0)
  304. return;
  305. if(verbose)
  306. fprint(2, "%s%s\n", name, (d->mode&DMDIR) ? "/" : "");
  307. if((fd = open(name, OREAD)) < 0){
  308. warn("open %s: %r", name);
  309. return;
  310. }
  311. elem = strrchr(name, '/');
  312. if(elem)
  313. elem++;
  314. else
  315. elem = name;
  316. plan9tovacdir(&vd, d);
  317. if((f = vacfilecreate(fp, elem, vd.mode)) == nil){
  318. warn("vacfilecreate %s: %r", name);
  319. return;
  320. }
  321. if(diffp)
  322. fdiff = vacfilewalk(diffp, elem);
  323. else
  324. fdiff = nil;
  325. if(vacfilesetdir(f, &vd) < 0)
  326. warn("vacfilesetdir %s: %r", name);
  327. if(d->mode&DMDIR){
  328. while((n = dirread(fd, &dk)) > 0){
  329. for(i=0; i<n; i++){
  330. s = vtmalloc(strlen(name)+1+strlen(dk[i].name)+1);
  331. strcpy(s, name);
  332. strcat(s, "/");
  333. strcat(s, dk[i].name);
  334. vac(f, fdiff, s, &dk[i]);
  335. free(s);
  336. }
  337. free(dk);
  338. }
  339. }else{
  340. off = 0;
  341. bsize = fs->bsize;
  342. if(fdiff){
  343. /*
  344. * Copy fdiff's contents into f by moving the score.
  345. * We'll diff and update below.
  346. */
  347. if(vacfilegetentries(fdiff, &e, nil) >= 0)
  348. if(vacfilesetentries(f, &e, nil) >= 0){
  349. bsize = e.dsize;
  350. /*
  351. * Or if -q is set, and the metadata looks the same,
  352. * don't even bother reading the file.
  353. */
  354. if(qdiff && vacfilegetdir(fdiff, &vddiff) >= 0){
  355. if(vddiff.mtime == vd.mtime)
  356. if(vddiff.size == vd.size)
  357. if(!vddiff.plan9 || (/* vddiff.p9path == vd.p9path && */ vddiff.p9version == vd.p9version)){
  358. stats.skipfiles++;
  359. stats.nfile--;
  360. vdcleanup(&vddiff);
  361. goto Out;
  362. }
  363. /*
  364. * Skip over presumably-unchanged prefix
  365. * of an append-only file.
  366. */
  367. if(vd.mode&ModeAppend)
  368. if(vddiff.size < vd.size)
  369. if(vddiff.plan9 && vd.plan9)
  370. if(vddiff.p9path == vd.p9path){
  371. off = vd.size/bsize*bsize;
  372. if(seek(fd, off, 0) >= 0)
  373. stats.skipdata += off;
  374. else{
  375. seek(fd, 0, 0); /* paranoia */
  376. off = 0;
  377. }
  378. }
  379. vdcleanup(&vddiff);
  380. // XXX different verbose chatty prints for kaminsky?
  381. }
  382. }
  383. }
  384. if(qdiff && verbose)
  385. fprint(2, "+%s\n", name);
  386. while((n = readn(fd, buf, bsize)) > 0){
  387. if(fdiff && sha1matches(f, off/bsize, (uchar*)buf, n)){
  388. off += n;
  389. stats.skipdata += n;
  390. continue;
  391. }
  392. if(vacfilewrite(f, buf, n, off) < 0){
  393. warn("venti write %s: %r", name);
  394. goto Out;
  395. }
  396. stats.data += n;
  397. off += n;
  398. }
  399. /*
  400. * Since we started with fdiff's contents,
  401. * set the size in case fdiff was bigger.
  402. */
  403. if(fdiff && vacfilesetsize(f, off) < 0)
  404. warn("vtfilesetsize %s: %r", name);
  405. }
  406. Out:
  407. vacfileflush(f, 1);
  408. vacfiledecref(f);
  409. if(fdiff)
  410. vacfiledecref(fdiff);
  411. close(fd);
  412. }
  413. void
  414. vacstdin(VacFile *fp, char *name)
  415. {
  416. vlong off;
  417. VacFile *f;
  418. static char buf[8192];
  419. int n;
  420. if((f = vacfilecreate(fp, name, 0666)) == nil){
  421. warn("vacfilecreate %s: %r", name);
  422. return;
  423. }
  424. off = 0;
  425. while((n = read(0, buf, sizeof buf)) > 0){
  426. if(vacfilewrite(f, buf, n, off) < 0){
  427. warn("venti write %s: %r", name);
  428. vacfiledecref(f);
  429. return;
  430. }
  431. off += n;
  432. }
  433. vacfileflush(f, 1);
  434. vacfiledecref(f);
  435. }
  436. /*
  437. * fp is the directory we're writing.
  438. * mp is the directory whose contents we're merging in.
  439. * d is the directory entry of the file from mp that we want to add to fp.
  440. * vacfile is the name of the .vac file, for error messages.
  441. * offset is the qid that qid==0 in mp should correspond to.
  442. * max is the maximum qid we expect to see (not really needed).
  443. */
  444. int
  445. vacmergefile(VacFile *fp, VacFile *mp, VacDir *d, char *vacfile,
  446. vlong offset, vlong max)
  447. {
  448. VtEntry ed, em;
  449. VacFile *mf;
  450. VacFile *f;
  451. mf = vacfilewalk(mp, d->elem);
  452. if(mf == nil){
  453. warn("could not walk %s in %s", d->elem, vacfile);
  454. return -1;
  455. }
  456. if(vacfilegetentries(mf, &ed, &em) < 0){
  457. warn("could not get entries for %s in %s", d->elem, vacfile);
  458. vacfiledecref(mf);
  459. return -1;
  460. }
  461. if((f = vacfilecreate(fp, d->elem, d->mode)) == nil){
  462. warn("vacfilecreate %s: %r", d->elem);
  463. vacfiledecref(mf);
  464. return -1;
  465. }
  466. if(d->qidspace){
  467. d->qidoffset += offset;
  468. d->qidmax += offset;
  469. }else{
  470. d->qidspace = 1;
  471. d->qidoffset = offset;
  472. d->qidmax = max;
  473. }
  474. if(vacfilesetdir(f, d) < 0
  475. || vacfilesetentries(f, &ed, &em) < 0
  476. || vacfilesetqidspace(f, d->qidoffset, d->qidmax) < 0){
  477. warn("vacmergefile %s: %r", d->elem);
  478. vacfiledecref(mf);
  479. vacfiledecref(f);
  480. return -1;
  481. }
  482. vacfiledecref(mf);
  483. vacfiledecref(f);
  484. return 0;
  485. }
  486. int
  487. vacmerge(VacFile *fp, char *name)
  488. {
  489. VacFs *mfs;
  490. VacDir vd;
  491. VacDirEnum *de;
  492. VacFile *mp;
  493. uvlong maxqid, offset;
  494. if(strlen(name) < 4 || strcmp(name+strlen(name)-4, ".vac") != 0)
  495. return -1;
  496. if((mfs = vacfsopen(z, name, VtOREAD, 100)) == nil)
  497. return -1;
  498. if(verbose)
  499. fprint(2, "merging %s\n", name);
  500. de = vdeopen(fs->root);
  501. if(de){
  502. mp = vacfsgetroot(mfs);
  503. offset = 0;
  504. if(vacfsgetmaxqid(mfs, &maxqid) >= 0){
  505. _vacfsnextqid(fs, &offset);
  506. vacfsjumpqid(fs, maxqid+1);
  507. }
  508. while(vderead(de, &vd) > 0){
  509. if(vd.qid > maxqid){
  510. warn("vacmerge %s: maxqid=%lld but %s has %lld",
  511. name, maxqid, vd.elem, vd.qid);
  512. vacfsjumpqid(fs, vd.qid - maxqid);
  513. maxqid = vd.qid;
  514. }
  515. vacmergefile(fp, mp, &vd, name,
  516. offset, maxqid);
  517. vdcleanup(&vd);
  518. }
  519. vdeclose(de);
  520. vacfiledecref(mp);
  521. }
  522. vacfsclose(mfs);
  523. return 0;
  524. }
  525. #define TWID64 ((u64int)~(u64int)0)
  526. static u64int
  527. unittoull(char *s)
  528. {
  529. char *es;
  530. u64int n;
  531. if(s == nil)
  532. return TWID64;
  533. n = strtoul(s, &es, 0);
  534. if(*es == 'k' || *es == 'K'){
  535. n *= 1024;
  536. es++;
  537. }else if(*es == 'm' || *es == 'M'){
  538. n *= 1024*1024;
  539. es++;
  540. }else if(*es == 'g' || *es == 'G'){
  541. n *= 1024*1024*1024;
  542. es++;
  543. }
  544. if(*es != '\0')
  545. return TWID64;
  546. return n;
  547. }
  548. static void
  549. warn(char *fmt, ...)
  550. {
  551. va_list arg;
  552. va_start(arg, fmt);
  553. fprint(2, "vac: ");
  554. vfprint(2, fmt, arg);
  555. fprint(2, "\n");
  556. va_end(arg);
  557. }