mirrorarenas.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523
  1. /*
  2. * Mirror one arena partition onto another.
  3. * Be careful to copy only new data.
  4. */
  5. #include "stdinc.h"
  6. #include "dat.h"
  7. #include "fns.h"
  8. Channel *writechan;
  9. typedef struct Write Write;
  10. struct Write
  11. {
  12. uchar *p;
  13. int n;
  14. uvlong o;
  15. int error;
  16. };
  17. Part *src;
  18. Part *dst;
  19. int force;
  20. int verbose;
  21. int dosha1 = 1;
  22. char *status;
  23. uvlong astart, aend;
  24. void
  25. usage(void)
  26. {
  27. fprint(2, "usage: mirrorarenas [-sv] src dst [ranges]\n");
  28. threadexitsall("usage");
  29. }
  30. char *tagged;
  31. void
  32. tag(char *fmt, ...)
  33. {
  34. va_list arg;
  35. if(tagged){
  36. free(tagged);
  37. tagged = nil;
  38. }
  39. va_start(arg, fmt);
  40. tagged = vsmprint(fmt, arg);
  41. va_end(arg);
  42. }
  43. void
  44. chat(char *fmt, ...)
  45. {
  46. va_list arg;
  47. if(tagged){
  48. write(1, tagged, strlen(tagged));
  49. free(tagged);
  50. tagged = nil;
  51. }
  52. va_start(arg, fmt);
  53. vfprint(1, fmt, arg);
  54. va_end(arg);
  55. }
  56. #pragma varargck argpos tag 1
  57. #pragma varargck argpos chat 1
  58. int
  59. ereadpart(Part *p, u64int offset, u8int *buf, u32int count)
  60. {
  61. if(readpart(p, offset, buf, count) != count){
  62. chat("%T readpart %s at %#llux+%ud: %r\n", p->name, offset, count);
  63. return -1;
  64. }
  65. return 0;
  66. }
  67. int
  68. ewritepart(Part *p, u64int offset, u8int *buf, u32int count)
  69. {
  70. if(writepart(p, offset, buf, count) != count || flushpart(p) < 0){
  71. chat("%T writepart %s at %#llux+%ud: %r\n", p->name, offset, count);
  72. return -1;
  73. }
  74. return 0;
  75. }
  76. /*
  77. * Extra proc to do writes to dst, so that we can overlap reading
  78. * src with writing dst during copy. This is an easy factor of two
  79. * (almost) in performance.
  80. */
  81. static Write wsync;
  82. static void
  83. writeproc(void *v)
  84. {
  85. Write *w;
  86. USED(v);
  87. while((w = recvp(writechan)) != nil){
  88. if(w == &wsync)
  89. continue;
  90. if(ewritepart(dst, w->o, w->p, w->n) < 0)
  91. w->error = 1;
  92. }
  93. }
  94. int
  95. copy(uvlong start, uvlong end, char *what, DigestState *ds)
  96. {
  97. int i, n;
  98. uvlong o;
  99. static uchar tmp[2][1024*1024];
  100. Write w[2];
  101. assert(start <= end);
  102. assert(astart <= start && start < aend);
  103. assert(astart <= end && end <= aend);
  104. if(verbose && start != end)
  105. chat("%T copy %,llud-%,llud %s\n", start, end, what);
  106. i = 0;
  107. memset(w, 0, sizeof w);
  108. for(o=start; o<end; o+=n){
  109. if(w[i].error)
  110. goto error;
  111. n = sizeof tmp[i];
  112. if(o+n > end)
  113. n = end - o;
  114. if(ereadpart(src, o, tmp[i], n) < 0)
  115. goto error;
  116. w[i].p = tmp[i];
  117. w[i].o = o;
  118. w[i].n = n;
  119. w[i].error = 0;
  120. sendp(writechan, &w[i]);
  121. if(ds)
  122. sha1(tmp[i], n, nil, ds);
  123. i = 1-i;
  124. }
  125. if(w[i].error)
  126. goto error;
  127. /*
  128. * wait for queued write to finish
  129. */
  130. sendp(writechan, &wsync);
  131. i = 1-i;
  132. if(w[i].error)
  133. return -1;
  134. return 0;
  135. error:
  136. /*
  137. * sync with write proc
  138. */
  139. w[i].p = nil;
  140. w[i].o = 0;
  141. w[i].n = 0;
  142. w[i].error = 0;
  143. sendp(writechan, &w[i]);
  144. return -1;
  145. }
  146. /* single-threaded, for reference */
  147. int
  148. copy1(uvlong start, uvlong end, char *what, DigestState *ds)
  149. {
  150. int n;
  151. uvlong o;
  152. static uchar tmp[1024*1024];
  153. assert(start <= end);
  154. assert(astart <= start && start < aend);
  155. assert(astart <= end && end <= aend);
  156. if(verbose && start != end)
  157. chat("%T copy %,llud-%,llud %s\n", start, end, what);
  158. for(o=start; o<end; o+=n){
  159. n = sizeof tmp;
  160. if(o+n > end)
  161. n = end - o;
  162. if(ereadpart(src, o, tmp, n) < 0)
  163. return -1;
  164. if(ds)
  165. sha1(tmp, n, nil, ds);
  166. if(ewritepart(dst, o, tmp, n) < 0)
  167. return -1;
  168. }
  169. return 0;
  170. }
  171. int
  172. asha1(Part *p, uvlong start, uvlong end, DigestState *ds)
  173. {
  174. int n;
  175. uvlong o;
  176. static uchar tmp[1024*1024];
  177. if(start == end)
  178. return 0;
  179. assert(start < end);
  180. if(verbose)
  181. chat("%T sha1 %,llud-%,llud\n", start, end);
  182. for(o=start; o<end; o+=n){
  183. n = sizeof tmp;
  184. if(o+n > end)
  185. n = end - o;
  186. if(ereadpart(p, o, tmp, n) < 0)
  187. return -1;
  188. sha1(tmp, n, nil, ds);
  189. }
  190. return 0;
  191. }
  192. uvlong
  193. rdown(uvlong a, int b)
  194. {
  195. return a-a%b;
  196. }
  197. uvlong
  198. rup(uvlong a, int b)
  199. {
  200. if(a%b == 0)
  201. return a;
  202. return a+b-a%b;
  203. }
  204. void
  205. mirror(Arena *sa, Arena *da)
  206. {
  207. vlong v, si, di, end;
  208. int clumpmax, blocksize, sealed;
  209. static uchar buf[MaxIoSize];
  210. ArenaHead h;
  211. DigestState xds, *ds;
  212. vlong shaoff, base;
  213. base = sa->base;
  214. blocksize = sa->blocksize;
  215. end = sa->base + sa->size;
  216. astart = base - blocksize;
  217. aend = end + blocksize;
  218. tag("%T %s (%,llud-%,llud)\n", sa->name, astart, aend);
  219. if(force){
  220. copy(astart, aend, "all", nil);
  221. return;
  222. }
  223. if(sa->diskstats.sealed && da->diskstats.sealed && scorecmp(da->score, zeroscore) != 0){
  224. if(scorecmp(sa->score, da->score) == 0){
  225. if(verbose)
  226. chat("%T %s: %V sealed mirrored\n", sa->name, sa->score);
  227. return;
  228. }
  229. chat("%T %s: warning: sealed score mismatch %V vs %V\n", sa->name, sa->score, da->score);
  230. /* Keep executing; will correct seal if possible. */
  231. }
  232. if(!sa->diskstats.sealed && da->diskstats.sealed && scorecmp(da->score, zeroscore) != 0){
  233. chat("%T %s: dst is sealed, src is not\n", sa->name);
  234. status = "errors";
  235. return;
  236. }
  237. if(sa->diskstats.used < da->diskstats.used){
  238. chat("%T %s: src used %,lld < dst used %,lld\n", sa->name, sa->diskstats.used, da->diskstats.used);
  239. status = "errors";
  240. return;
  241. }
  242. if(da->clumpmagic != sa->clumpmagic){
  243. /*
  244. * Write this now to reduce the window in which
  245. * the head and tail disagree about clumpmagic.
  246. */
  247. da->clumpmagic = sa->clumpmagic;
  248. memset(buf, 0, sizeof buf);
  249. packarena(da, buf);
  250. if(ewritepart(dst, end, buf, blocksize) < 0)
  251. return;
  252. }
  253. memset(&h, 0, sizeof h);
  254. h.version = da->version;
  255. strcpy(h.name, da->name);
  256. h.blocksize = da->blocksize;
  257. h.size = da->size + 2*da->blocksize;
  258. h.clumpmagic = da->clumpmagic;
  259. memset(buf, 0, sizeof buf);
  260. packarenahead(&h, buf);
  261. if(ewritepart(dst, base - blocksize, buf, blocksize) < 0)
  262. return;
  263. shaoff = 0;
  264. ds = nil;
  265. sealed = sa->diskstats.sealed && scorecmp(sa->score, zeroscore) != 0;
  266. if(sealed && dosha1){
  267. /* start sha1 state with header */
  268. memset(&xds, 0, sizeof xds);
  269. ds = &xds;
  270. sha1(buf, blocksize, nil, ds);
  271. shaoff = base;
  272. }
  273. if(sa->diskstats.used != da->diskstats.used){
  274. di = base+rdown(da->diskstats.used, blocksize);
  275. si = base+rup(sa->diskstats.used, blocksize);
  276. if(ds && asha1(dst, shaoff, di, ds) < 0)
  277. return;
  278. if(copy(di, si, "data", ds) < 0)
  279. return;
  280. shaoff = si;
  281. }
  282. clumpmax = sa->clumpmax;
  283. di = end - da->diskstats.clumps/clumpmax * blocksize;
  284. si = end - (sa->diskstats.clumps+clumpmax-1)/clumpmax * blocksize;
  285. if(sa->diskstats.sealed){
  286. /*
  287. * might be a small hole between the end of the
  288. * data and the beginning of the directory.
  289. */
  290. v = base+rup(sa->diskstats.used, blocksize);
  291. if(ds && asha1(dst, shaoff, v, ds) < 0)
  292. return;
  293. if(copy(v, si, "hole", ds) < 0)
  294. return;
  295. shaoff = si;
  296. }
  297. if(da->diskstats.clumps != sa->diskstats.clumps){
  298. if(ds && asha1(dst, shaoff, si, ds) < 0)
  299. return;
  300. if(copy(si, di, "directory", ds) < 0) /* si < di because clumpinfo blocks grow down */
  301. return;
  302. shaoff = di;
  303. }
  304. da->ctime = sa->ctime;
  305. da->wtime = sa->wtime;
  306. da->diskstats = sa->diskstats;
  307. da->diskstats.sealed = 0;
  308. /*
  309. * Repack the arena tail information
  310. * and save it for next time...
  311. */
  312. memset(buf, 0, sizeof buf);
  313. packarena(da, buf);
  314. if(ewritepart(dst, end, buf, blocksize) < 0)
  315. return;
  316. if(sealed){
  317. /*
  318. * ... but on the final pass, copy the encoding
  319. * of the tail information from the source
  320. * arena itself. There are multiple possible
  321. * ways to write the tail info out (the exact
  322. * details have changed as venti went through
  323. * revisions), and to keep the SHA1 hash the
  324. * same, we have to use what the disk uses.
  325. */
  326. if(asha1(dst, shaoff, end, ds) < 0
  327. || copy(end, end+blocksize-VtScoreSize, "tail", ds) < 0)
  328. return;
  329. if(dosha1){
  330. memset(buf, 0, VtScoreSize);
  331. sha1(buf, VtScoreSize, da->score, ds);
  332. if(scorecmp(sa->score, da->score) == 0){
  333. if(verbose)
  334. chat("%T %s: %V sealed mirrored\n", sa->name, sa->score);
  335. if(ewritepart(dst, end+blocksize-VtScoreSize, da->score, VtScoreSize) < 0)
  336. return;
  337. }else{
  338. chat("%T %s: sealing dst: score mismatch: %V vs %V\n", sa->name, sa->score, da->score);
  339. memset(&xds, 0, sizeof xds);
  340. asha1(dst, base-blocksize, end+blocksize-VtScoreSize, &xds);
  341. sha1(buf, VtScoreSize, 0, &xds);
  342. chat("%T reseal: %V\n", da->score);
  343. status = "errors";
  344. }
  345. }else{
  346. if(verbose)
  347. chat("%T %s: %V mirrored\n", sa->name, sa->score);
  348. if(ewritepart(dst, end+blocksize-VtScoreSize, sa->score, VtScoreSize) < 0)
  349. return;
  350. }
  351. }else{
  352. chat("%T %s: %,lld used mirrored\n",
  353. sa->name, sa->diskstats.used);
  354. }
  355. }
  356. void
  357. mirrormany(ArenaPart *sp, ArenaPart *dp, char *range)
  358. {
  359. int i, lo, hi;
  360. char *s, *t;
  361. Arena *sa, *da;
  362. if(range == nil){
  363. for(i=0; i<sp->narenas; i++){
  364. sa = sp->arenas[i];
  365. da = dp->arenas[i];
  366. mirror(sa, da);
  367. }
  368. return;
  369. }
  370. if(strcmp(range, "none") == 0)
  371. return;
  372. for(s=range; *s; s=t){
  373. t = strchr(s, ',');
  374. if(t)
  375. *t++ = 0;
  376. else
  377. t = s+strlen(s);
  378. if(*s == '-')
  379. lo = 0;
  380. else
  381. lo = strtol(s, &s, 0);
  382. hi = lo;
  383. if(*s == '-'){
  384. s++;
  385. if(*s == 0)
  386. hi = sp->narenas-1;
  387. else
  388. hi = strtol(s, &s, 0);
  389. }
  390. if(*s != 0){
  391. chat("%T bad arena range: %s\n", s);
  392. continue;
  393. }
  394. for(i=lo; i<=hi; i++){
  395. sa = sp->arenas[i];
  396. da = dp->arenas[i];
  397. mirror(sa, da);
  398. }
  399. }
  400. }
  401. void
  402. threadmain(int argc, char **argv)
  403. {
  404. int i;
  405. Arena *sa, *da;
  406. ArenaPart *s, *d;
  407. char *ranges;
  408. ventifmtinstall();
  409. ARGBEGIN{
  410. case 'F':
  411. force = 1;
  412. break;
  413. case 'v':
  414. verbose++;
  415. break;
  416. case 's':
  417. dosha1 = 0;
  418. break;
  419. default:
  420. usage();
  421. }ARGEND
  422. if(argc != 2 && argc != 3)
  423. usage();
  424. ranges = nil;
  425. if(argc == 3)
  426. ranges = argv[2];
  427. if((src = initpart(argv[0], OREAD)) == nil)
  428. sysfatal("initpart %s: %r", argv[0]);
  429. if((dst = initpart(argv[1], ORDWR)) == nil)
  430. sysfatal("initpart %s: %r", argv[1]);
  431. if((s = initarenapart(src)) == nil)
  432. sysfatal("initarenapart %s: %r", argv[0]);
  433. for(i=0; i<s->narenas; i++)
  434. delarena(s->arenas[i]);
  435. if((d = initarenapart(dst)) == nil)
  436. sysfatal("loadarenapart %s: %r", argv[1]);
  437. for(i=0; i<d->narenas; i++)
  438. delarena(d->arenas[i]);
  439. /*
  440. * The arena geometries must match or all bets are off.
  441. */
  442. if(s->narenas != d->narenas)
  443. sysfatal("arena count mismatch: %d vs %d", s->narenas, d->narenas);
  444. for(i=0; i<s->narenas; i++){
  445. sa = s->arenas[i];
  446. da = d->arenas[i];
  447. if(sa->version != da->version)
  448. sysfatal("arena %d: version mismatch: %d vs %d", i, sa->version, da->version);
  449. if(sa->blocksize != da->blocksize)
  450. sysfatal("arena %d: blocksize mismatch: %d vs %d", i, sa->blocksize, da->blocksize);
  451. if(sa->size != da->size)
  452. sysfatal("arena %d: size mismatch: %,lld vs %,lld", i, sa->size, da->size);
  453. if(strcmp(sa->name, da->name) != 0)
  454. sysfatal("arena %d: name mismatch: %s vs %s", i, sa->name, da->name);
  455. }
  456. /*
  457. * Mirror one arena at a time.
  458. */
  459. writechan = chancreate(sizeof(void*), 0);
  460. vtproc(writeproc, nil);
  461. mirrormany(s, d, ranges);
  462. sendp(writechan, nil);
  463. threadexitsall(status);
  464. }