fixarenas.c 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. /*
  10. * Check and fix an arena partition.
  11. *
  12. * This is a lot grittier than the rest of Venti because
  13. * it can't just give up if a byte here or there is wrong.
  14. *
  15. * The rule here (hopefully followed!) is that block corruption
  16. * only ever has a local effect -- there are no blocks that you
  17. * can wipe out that will cause large portions of
  18. * uncorrupted data blocks to be useless.
  19. */
  20. #include "stdinc.h"
  21. #include "dat.h"
  22. #include "fns.h"
  23. #include "whack.h"
  24. #define ROUNDUP(x,n) (((x)+(n)-1)&~((n)-1))
  25. #pragma varargck type "z" uvlong
  26. #pragma varargck type "z" vlong
  27. #pragma varargck type "t" uint
  28. enum
  29. {
  30. K = 1024,
  31. M = 1024*1024,
  32. G = 1024*1024*1024,
  33. Block = 4096,
  34. };
  35. int debugsha1;
  36. int verbose;
  37. Part *part;
  38. char *file;
  39. char *basename;
  40. char *dumpbase;
  41. int fix;
  42. int badreads;
  43. int unseal;
  44. uint8_t zero[MaxDiskBlock];
  45. Arena lastarena;
  46. ArenaPart ap;
  47. uint64_t arenasize;
  48. int nbadread;
  49. int nbad;
  50. uint64_t partend;
  51. void checkarena(int64_t, int);
  52. void
  53. usage(void)
  54. {
  55. fprint(2, "usage: fixarenas [-fv] [-a arenasize] [-b blocksize] file [ranges]\n");
  56. threadexitsall(0);
  57. }
  58. /*
  59. * Format number in simplest way that is okay with unittoull.
  60. */
  61. static int
  62. zfmt(Fmt *fmt)
  63. {
  64. int64_t x;
  65. x = va_arg(fmt->args, int64_t);
  66. if(x == 0)
  67. return fmtstrcpy(fmt, "0");
  68. if(x%G == 0)
  69. return fmtprint(fmt, "%lldG", x/G);
  70. if(x%M == 0)
  71. return fmtprint(fmt, "%lldM", x/M);
  72. if(x%K == 0)
  73. return fmtprint(fmt, "%lldK", x/K);
  74. return fmtprint(fmt, "%lld", x);
  75. }
  76. /*
  77. * Format time like ctime without newline.
  78. */
  79. static int
  80. tfmt(Fmt *fmt)
  81. {
  82. uint t;
  83. char buf[30];
  84. t = va_arg(fmt->args, uint);
  85. strcpy(buf, ctime(t));
  86. buf[28] = 0;
  87. return fmtstrcpy(fmt, buf);
  88. }
  89. /*
  90. * Coalesce messages about unreadable sectors into larger ranges.
  91. * bad(0, 0) flushes the buffer.
  92. */
  93. static void
  94. bad(char *msg, int64_t o, int len)
  95. {
  96. static int64_t lb0, lb1;
  97. static char *lmsg;
  98. if(msg == nil)
  99. msg = lmsg;
  100. if(o == -1){
  101. lmsg = nil;
  102. lb0 = 0;
  103. lb1 = 0;
  104. return;
  105. }
  106. if(lb1 != o || (msg && lmsg && strcmp(msg, lmsg) != 0)){
  107. if(lb0 != lb1)
  108. print("%s %#llux+%#llux (%,lld+%,lld)\n",
  109. lmsg, lb0, lb1-lb0, lb0, lb1-lb0);
  110. lb0 = o;
  111. }
  112. lmsg = msg;
  113. lb1 = o+len;
  114. }
  115. /*
  116. * Read in the len bytes of data at the offset. If can't for whatever reason,
  117. * fill it with garbage but print an error.
  118. */
  119. static uint8_t*
  120. readdisk(uint8_t *buf, int64_t offset, int len)
  121. {
  122. int i, j, k, n;
  123. if(offset >= partend){
  124. memset(buf, 0xFB, len);
  125. return buf;
  126. }
  127. if(offset+len > partend){
  128. memset(buf, 0xFB, len);
  129. len = partend - offset;
  130. }
  131. if(readpart(part, offset, buf, len) >= 0)
  132. return buf;
  133. /*
  134. * The read failed. Clear the buffer to nonsense, and
  135. * then try reading in smaller pieces. If that fails,
  136. * read in even smaller pieces. And so on down to sectors.
  137. */
  138. memset(buf, 0xFD, len);
  139. for(i=0; i<len; i+=64*K){
  140. n = 64*K;
  141. if(i+n > len)
  142. n = len-i;
  143. if(readpart(part, offset+i, buf+i, n) >= 0)
  144. continue;
  145. for(j=i; j<len && j<i+64*K; j+=4*K){
  146. n = 4*K;
  147. if(j+n > len)
  148. n = len-j;
  149. if(readpart(part, offset+j, buf+j, n) >= 0)
  150. continue;
  151. for(k=j; k<len && k<j+4*K; k+=512){
  152. if(readpart(part, offset+k, buf+k, 512) >= 0)
  153. continue;
  154. bad("disk read failed at", k, 512);
  155. badreads++;
  156. }
  157. }
  158. }
  159. bad(nil, 0, 0);
  160. return buf;
  161. }
  162. /*
  163. * Buffer to support running SHA1 hash of the disk.
  164. */
  165. typedef struct Shabuf Shabuf;
  166. struct Shabuf
  167. {
  168. int fd;
  169. int64_t offset;
  170. DigestState state;
  171. int rollback;
  172. int64_t r0;
  173. DigestState *hist;
  174. int nhist;
  175. };
  176. void
  177. sbdebug(Shabuf *sb, char *file)
  178. {
  179. int fd;
  180. if(sb->fd > 0){
  181. close(sb->fd);
  182. sb->fd = 0;
  183. }
  184. if((fd = create(file, OWRITE, 0666)) < 0)
  185. return;
  186. if(fd == 0){
  187. fd = dup(fd, -1);
  188. close(0);
  189. }
  190. sb->fd = fd;
  191. }
  192. void
  193. sbupdate(Shabuf *sb, uint8_t *p, int64_t offset, int len)
  194. {
  195. int n, x;
  196. int64_t o;
  197. if(sb->rollback && !sb->hist){
  198. sb->r0 = offset;
  199. sb->nhist = 1;
  200. sb->hist = vtmalloc(sb->nhist*sizeof *sb->hist);
  201. memset(sb->hist, 0, sizeof sb->hist[0]);
  202. }
  203. if(sb->r0 == 0)
  204. sb->r0 = offset;
  205. if(sb->offset < offset || sb->offset >= offset+len){
  206. if(0) print("sbupdate %p %#llux+%d but offset=%#llux\n",
  207. p, offset, len, sb->offset);
  208. return;
  209. }
  210. x = sb->offset - offset;
  211. if(0) print("sbupdate %p %#llux+%d skip %d\n",
  212. sb, offset, len, x);
  213. if(x){
  214. p += x;
  215. offset += x;
  216. len -= x;
  217. }
  218. assert(sb->offset == offset);
  219. if(sb->fd > 0)
  220. pwrite(sb->fd, p, len, offset - sb->r0);
  221. if(!sb->rollback){
  222. sha1(p, len, nil, &sb->state);
  223. sb->offset += len;
  224. return;
  225. }
  226. /* save state every 4M so we can roll back quickly */
  227. o = offset - sb->r0;
  228. while(len > 0){
  229. n = 4*M - o%(4*M);
  230. if(n > len)
  231. n = len;
  232. sha1(p, n, nil, &sb->state);
  233. sb->offset += n;
  234. o += n;
  235. p += n;
  236. len -= n;
  237. if(o%(4*M) == 0){
  238. x = o/(4*M);
  239. if(x >= sb->nhist){
  240. if(x != sb->nhist)
  241. print("oops! x=%d nhist=%d\n", x, sb->nhist);
  242. sb->nhist += 32;
  243. sb->hist = vtrealloc(sb->hist, sb->nhist*sizeof *sb->hist);
  244. }
  245. sb->hist[x] = sb->state;
  246. }
  247. }
  248. }
  249. void
  250. sbdiskhash(Shabuf *sb, int64_t eoffset)
  251. {
  252. static uint8_t dbuf[4*M];
  253. int n;
  254. while(sb->offset < eoffset){
  255. n = sizeof dbuf;
  256. if(sb->offset+n > eoffset)
  257. n = eoffset - sb->offset;
  258. readdisk(dbuf, sb->offset, n);
  259. sbupdate(sb, dbuf, sb->offset, n);
  260. }
  261. }
  262. void
  263. sbrollback(Shabuf *sb, int64_t offset)
  264. {
  265. int x;
  266. int64_t o;
  267. Dir d;
  268. if(!sb->rollback || !sb->r0){
  269. print("cannot rollback sha\n");
  270. return;
  271. }
  272. if(offset >= sb->offset)
  273. return;
  274. o = offset - sb->r0;
  275. x = o/(4*M);
  276. if(x >= sb->nhist){
  277. print("cannot rollback sha\n");
  278. return;
  279. }
  280. sb->state = sb->hist[x];
  281. sb->offset = sb->r0 + x*4*M;
  282. assert(sb->offset <= offset);
  283. if(sb->fd > 0){
  284. nulldir(&d);
  285. d.length = sb->offset - sb->r0;
  286. dirfwstat(sb->fd, &d);
  287. }
  288. }
  289. void
  290. sbscore(Shabuf *sb, uint8_t *score)
  291. {
  292. if(sb->hist){
  293. free(sb->hist);
  294. sb->hist = nil;
  295. }
  296. sha1(nil, 0, score, &sb->state);
  297. }
  298. /*
  299. * If we're fixing arenas, then editing this memory edits the disk!
  300. * It will be written back out as new data is paged in.
  301. */
  302. uint8_t buf[4*M];
  303. uint8_t sbuf[4*M];
  304. int64_t bufoffset;
  305. int buflen;
  306. static void pageout(void);
  307. static uint8_t*
  308. pagein(int64_t offset, int len)
  309. {
  310. pageout();
  311. if(offset >= partend){
  312. memset(buf, 0xFB, sizeof buf);
  313. return buf;
  314. }
  315. if(offset+len > partend){
  316. memset(buf, 0xFB, sizeof buf);
  317. len = partend - offset;
  318. }
  319. bufoffset = offset;
  320. buflen = len;
  321. readdisk(buf, offset, len);
  322. memmove(sbuf, buf, len);
  323. return buf;
  324. }
  325. static void
  326. pageout(void)
  327. {
  328. if(buflen==0 || !fix || memcmp(buf, sbuf, buflen) == 0){
  329. buflen = 0;
  330. return;
  331. }
  332. if(writepart(part, bufoffset, buf, buflen) < 0)
  333. print("disk write failed at %#llux+%#ux (%,lld+%,d)\n",
  334. bufoffset, buflen, bufoffset, buflen);
  335. buflen = 0;
  336. }
  337. static void
  338. zerorange(int64_t offset, int len)
  339. {
  340. int i;
  341. int64_t ooff;
  342. int olen;
  343. enum { MinBlock = 4*K, MaxBlock = 8*K };
  344. if(0)
  345. if(bufoffset <= offset && offset+len <= bufoffset+buflen){
  346. memset(buf+(offset-bufoffset), 0, len);
  347. return;
  348. }
  349. ooff = bufoffset;
  350. olen = buflen;
  351. i = offset%MinBlock;
  352. if(i+len < MaxBlock){
  353. pagein(offset-i, (len+MinBlock-1)&~(MinBlock-1));
  354. memset(buf+i, 0, len);
  355. }else{
  356. pagein(offset-i, MaxBlock);
  357. memset(buf+i, 0, MaxBlock-i);
  358. offset += MaxBlock-i;
  359. len -= MaxBlock-i;
  360. while(len >= MaxBlock){
  361. pagein(offset, MaxBlock);
  362. memset(buf, 0, MaxBlock);
  363. offset += MaxBlock;
  364. len -= MaxBlock;
  365. }
  366. pagein(offset, (len+MinBlock-1)&~(MinBlock-1));
  367. memset(buf, 0, len);
  368. }
  369. pagein(ooff, olen);
  370. }
  371. /*
  372. * read/write integers
  373. *
  374. static void
  375. p16(uchar *p, u16int u)
  376. {
  377. p[0] = (u>>8) & 0xFF;
  378. p[1] = u & 0xFF;
  379. }
  380. */
  381. static uint16_t
  382. u16(uint8_t *p)
  383. {
  384. return (p[0]<<8)|p[1];
  385. }
  386. static void
  387. p32(uint8_t *p, uint32_t u)
  388. {
  389. p[0] = (u>>24) & 0xFF;
  390. p[1] = (u>>16) & 0xFF;
  391. p[2] = (u>>8) & 0xFF;
  392. p[3] = u & 0xFF;
  393. }
  394. static uint32_t
  395. u32(uint8_t *p)
  396. {
  397. return (p[0]<<24)|(p[1]<<16)|(p[2]<<8)|p[3];
  398. }
  399. /*
  400. static void
  401. p64(uchar *p, u64int u)
  402. {
  403. p32(p, u>>32);
  404. p32(p, u);
  405. }
  406. */
  407. static uint64_t
  408. u64(uint8_t *p)
  409. {
  410. return ((uint64_t)u32(p)<<32) | u32(p+4);
  411. }
  412. static int
  413. vlongcmp(const void *va, const void *vb)
  414. {
  415. int64_t a, b;
  416. a = *(int64_t*)va;
  417. b = *(int64_t*)vb;
  418. if(a < b)
  419. return -1;
  420. if(b > a)
  421. return 1;
  422. return 0;
  423. }
  424. /* D and S are in draw.h */
  425. #define D VD
  426. #define S VS
  427. enum
  428. {
  429. D = 0x10000,
  430. Z = 0x20000,
  431. S = 0x30000,
  432. T = 0x40000,
  433. N = 0xFFFF
  434. };
  435. typedef struct Info Info;
  436. struct Info
  437. {
  438. int len;
  439. char *name;
  440. };
  441. Info partinfo[] = {
  442. 4, "magic",
  443. D|4, "version",
  444. Z|4, "blocksize",
  445. 4, "arenabase",
  446. 0
  447. };
  448. Info headinfo4[] = {
  449. 4, "magic",
  450. D|4, "version",
  451. S|ANameSize, "name",
  452. Z|4, "blocksize",
  453. Z|8, "size",
  454. 0
  455. };
  456. Info headinfo5[] = {
  457. 4, "magic",
  458. D|4, "version",
  459. S|ANameSize, "name",
  460. Z|4, "blocksize",
  461. Z|8, "size",
  462. 4, "clumpmagic",
  463. 0
  464. };
  465. Info tailinfo4[] = {
  466. 4, "magic",
  467. D|4, "version",
  468. S|ANameSize, "name",
  469. D|4, "clumps",
  470. D|4, "cclumps",
  471. T|4, "ctime",
  472. T|4, "wtime",
  473. D|8, "used",
  474. D|8, "uncsize",
  475. 1, "sealed",
  476. 0
  477. };
  478. Info tailinfo4a[] = {
  479. /* tailinfo 4 */
  480. 4, "magic",
  481. D|4, "version",
  482. S|ANameSize, "name",
  483. D|4, "clumps",
  484. D|4, "cclumps",
  485. T|4, "ctime",
  486. T|4, "wtime",
  487. D|8, "used",
  488. D|8, "uncsize",
  489. 1, "sealed",
  490. /* mem stats */
  491. 1, "extension",
  492. D|4, "mem.clumps",
  493. D|4, "mem.cclumps",
  494. D|8, "mem.used",
  495. D|8, "mem.uncsize",
  496. 1, "mem.sealed",
  497. 0
  498. };
  499. Info tailinfo5[] = {
  500. 4, "magic",
  501. D|4, "version",
  502. S|ANameSize, "name",
  503. D|4, "clumps",
  504. D|4, "cclumps",
  505. T|4, "ctime",
  506. T|4, "wtime",
  507. 4, "clumpmagic",
  508. D|8, "used",
  509. D|8, "uncsize",
  510. 1, "sealed",
  511. 0
  512. };
  513. Info tailinfo5a[] = {
  514. /* tailinfo 5 */
  515. 4, "magic",
  516. D|4, "version",
  517. S|ANameSize, "name",
  518. D|4, "clumps",
  519. D|4, "cclumps",
  520. T|4, "ctime",
  521. T|4, "wtime",
  522. 4, "clumpmagic",
  523. D|8, "used",
  524. D|8, "uncsize",
  525. 1, "sealed",
  526. /* mem stats */
  527. 1, "extension",
  528. D|4, "mem.clumps",
  529. D|4, "mem.cclumps",
  530. D|8, "mem.used",
  531. D|8, "mem.uncsize",
  532. 1, "mem.sealed",
  533. 0
  534. };
  535. void
  536. showdiffs(uint8_t *want, uint8_t *have, int len, Info *info)
  537. {
  538. int n;
  539. while(len > 0 && (n=info->len&N) > 0){
  540. if(memcmp(have, want, n) != 0){
  541. switch(info->len){
  542. case 1:
  543. print("\t%s: correct=%d disk=%d\n",
  544. info->name, *want, *have);
  545. break;
  546. case 4:
  547. print("\t%s: correct=%#ux disk=%#ux\n",
  548. info->name, u32(want), u32(have));
  549. break;
  550. case D|4:
  551. print("\t%s: correct=%,ud disk=%,ud\n",
  552. info->name, u32(want), u32(have));
  553. break;
  554. case T|4:
  555. print("\t%s: correct=%t\n\t\tdisk=%t\n",
  556. info->name, u32(want), u32(have));
  557. break;
  558. case Z|4:
  559. print("\t%s: correct=%z disk=%z\n",
  560. info->name, (uint64_t)u32(want),
  561. (uint64_t)u32(have));
  562. break;
  563. case D|8:
  564. print("\t%s: correct=%,lld disk=%,lld\n",
  565. info->name, u64(want), u64(have));
  566. break;
  567. case Z|8:
  568. print("\t%s: correct=%z disk=%z\n",
  569. info->name, u64(want), u64(have));
  570. break;
  571. case S|ANameSize:
  572. print("\t%s: correct=%s disk=%.*s\n",
  573. info->name, (char*)want,
  574. utfnlen((char*)have, ANameSize-1),
  575. (char*)have);
  576. break;
  577. default:
  578. print("\t%s: correct=%.*H disk=%.*H\n",
  579. info->name, n, want, n, have);
  580. break;
  581. }
  582. }
  583. have += n;
  584. want += n;
  585. len -= n;
  586. info++;
  587. }
  588. if(len > 0 && memcmp(have, want, len) != 0){
  589. if(memcmp(want, zero, len) != 0)
  590. print("!!\textra want data in showdiffs (bug in fixarenas)\n");
  591. else
  592. print("\tnon-zero data on disk after structure\n");
  593. if(verbose > 1){
  594. print("want: %.*H\n", len, want);
  595. print("have: %.*H\n", len, have);
  596. }
  597. }
  598. }
  599. /*
  600. * Does part begin with an arena?
  601. */
  602. int
  603. isonearena(void)
  604. {
  605. return u32(pagein(0, Block)) == ArenaHeadMagic;
  606. }
  607. static int tabsizes[] = { 16*1024, 64*1024, 512*1024, 768*1024, };
  608. /*
  609. * Poke around on the disk to guess what the ArenaPart numbers are.
  610. */
  611. void
  612. guessgeometry(void)
  613. {
  614. int i, j, n, bestn, ndiff, nhead, ntail;
  615. uint8_t *p, *ep, *sp;
  616. uint64_t diff[100], head[20], tail[20];
  617. uint64_t offset, bestdiff;
  618. ap.version = ArenaPartVersion;
  619. if(arenasize == 0 || ap.blocksize == 0){
  620. /*
  621. * The ArenaPart block at offset PartBlank may be corrupt or just wrong.
  622. * Instead, look for the individual arena headers and tails, which there
  623. * are many of, and once we've seen enough, infer the spacing.
  624. *
  625. * Of course, nothing in the file format requires that arenas be evenly
  626. * spaced, but fmtarenas always does that for us.
  627. */
  628. nhead = 0;
  629. ntail = 0;
  630. for(offset=PartBlank; offset<partend; offset+=4*M){
  631. p = pagein(offset, 4*M);
  632. for(sp=p, ep=p+4*M; p<ep; p+=K){
  633. if(u32(p) == ArenaHeadMagic && nhead < nelem(head)){
  634. if(verbose)
  635. print("arena head at %#llx\n", offset+(p-sp));
  636. head[nhead++] = offset+(p-sp);
  637. }
  638. if(u32(p) == ArenaMagic && ntail < nelem(tail)){
  639. tail[ntail++] = offset+(p-sp);
  640. if(verbose)
  641. print("arena tail at %#llx\n", offset+(p-sp));
  642. }
  643. }
  644. if(nhead == nelem(head) && ntail == nelem(tail))
  645. break;
  646. }
  647. if(nhead < 3 && ntail < 3)
  648. sysfatal("too few intact arenas: %d heads, %d tails", nhead, ntail);
  649. /*
  650. * Arena size is likely the most common
  651. * inter-head or inter-tail spacing.
  652. */
  653. ndiff = 0;
  654. for(i=1; i<nhead; i++)
  655. diff[ndiff++] = head[i] - head[i-1];
  656. for(i=1; i<ntail; i++)
  657. diff[ndiff++] = tail[i] - tail[i-1];
  658. qsort(diff, ndiff, sizeof diff[0], vlongcmp);
  659. bestn = 0;
  660. bestdiff = 0;
  661. for(i=1, n=1; i<=ndiff; i++, n++){
  662. if(i==ndiff || diff[i] != diff[i-1]){
  663. if(n > bestn){
  664. bestn = n;
  665. bestdiff = diff[i-1];
  666. }
  667. n = 0;
  668. }
  669. }
  670. print("arena size likely %z (%d of %d)\n", bestdiff, bestn, ndiff);
  671. if(arenasize != 0 && arenasize != bestdiff)
  672. print("using user-specified size %z instead\n", arenasize);
  673. else
  674. arenasize = bestdiff;
  675. /*
  676. * The arena tail for an arena is arenasize-blocksize from the head.
  677. */
  678. ndiff = 0;
  679. for(i=j=0; i<nhead && j<ntail; ){
  680. if(tail[j] < head[i]){
  681. j++;
  682. continue;
  683. }
  684. if(tail[j] < head[i]+arenasize){
  685. diff[ndiff++] = head[i]+arenasize - tail[j];
  686. j++;
  687. continue;
  688. }
  689. i++;
  690. }
  691. if(ndiff < 3)
  692. sysfatal("too few intact arenas: %d head, tail pairs", ndiff);
  693. qsort(diff, ndiff, sizeof diff[0], vlongcmp);
  694. bestn = 0;
  695. bestdiff = 0;
  696. for(i=1, n=1; i<=ndiff; i++, n++){
  697. if(i==ndiff || diff[i] != diff[i-1]){
  698. if(n > bestn){
  699. bestn = n;
  700. bestdiff = diff[i-1];
  701. }
  702. n = 0;
  703. }
  704. }
  705. print("block size likely %z (%d of %d)\n", bestdiff, bestn, ndiff);
  706. if(ap.blocksize != 0 && ap.blocksize != bestdiff)
  707. print("using user-specified size %z instead\n",
  708. (int64_t)ap.blocksize);
  709. else
  710. ap.blocksize = bestdiff;
  711. if(ap.blocksize == 0 || ap.blocksize&(ap.blocksize-1))
  712. sysfatal("block size not a power of two");
  713. if(ap.blocksize > MaxDiskBlock)
  714. sysfatal("block size too big (max=%d)", MaxDiskBlock);
  715. /*
  716. * Use head/tail information to deduce arena base.
  717. */
  718. ndiff = 0;
  719. for(i=0; i<nhead; i++)
  720. diff[ndiff++] = head[i]%arenasize;
  721. for(i=0; i<ntail; i++)
  722. diff[ndiff++] = (tail[i]+ap.blocksize)%arenasize;
  723. qsort(diff, ndiff, sizeof diff[0], vlongcmp);
  724. bestn = 0;
  725. bestdiff = 0;
  726. for(i=1, n=1; i<=ndiff; i++, n++){
  727. if(i==ndiff || diff[i] != diff[i-1]){
  728. if(n > bestn){
  729. bestn = n;
  730. bestdiff = diff[i-1];
  731. }
  732. n = 0;
  733. }
  734. }
  735. ap.arenabase = bestdiff;
  736. }
  737. ap.tabbase = ROUNDUP(PartBlank+HeadSize, ap.blocksize);
  738. /*
  739. * XXX pick up table, check arenabase.
  740. * XXX pick up table, record base name.
  741. */
  742. /*
  743. * Somewhat standard computation.
  744. * Fmtarenas used to use 64k tab, now uses 512k tab.
  745. */
  746. if(ap.arenabase == 0){
  747. print("trying standard arena bases...\n");
  748. for(i=0; i<nelem(tabsizes); i++){
  749. ap.arenabase = ROUNDUP(PartBlank+HeadSize+tabsizes[i], ap.blocksize);
  750. p = pagein(ap.arenabase, Block);
  751. if(u32(p) == ArenaHeadMagic)
  752. break;
  753. }
  754. }
  755. p = pagein(ap.arenabase, Block);
  756. print("arena base likely %z%s\n", (int64_t)ap.arenabase,
  757. u32(p)!=ArenaHeadMagic ? " (but no arena head there)" : "");
  758. ap.tabsize = ap.arenabase - ap.tabbase;
  759. }
  760. /*
  761. * Check the arena partition blocks and then the arenas listed in range.
  762. */
  763. void
  764. checkarenas(char *range)
  765. {
  766. char *s, *t;
  767. int i, lo, hi, narena;
  768. uint8_t dbuf[HeadSize];
  769. uint8_t *p;
  770. guessgeometry();
  771. partend -= partend%ap.blocksize;
  772. memset(dbuf, 0, sizeof dbuf);
  773. packarenapart(&ap, dbuf);
  774. p = pagein(PartBlank, Block);
  775. if(memcmp(p, dbuf, HeadSize) != 0){
  776. print("on-disk arena part superblock incorrect\n");
  777. showdiffs(dbuf, p, HeadSize, partinfo);
  778. }
  779. memmove(p, dbuf, HeadSize);
  780. narena = (partend-ap.arenabase + arenasize-1)/arenasize;
  781. if(range == nil){
  782. for(i=0; i<narena; i++)
  783. checkarena(ap.arenabase+(int64_t)i*arenasize, i);
  784. }else if(strcmp(range, "none") == 0){
  785. /* nothing */
  786. }else{
  787. /* parse, e.g., -4,8-9,10- */
  788. for(s=range; *s; s=t){
  789. t = strchr(s, ',');
  790. if(t)
  791. *t++ = 0;
  792. else
  793. t = s+strlen(s);
  794. if(*s == '-')
  795. lo = 0;
  796. else
  797. lo = strtol(s, &s, 0);
  798. hi = lo;
  799. if(*s == '-'){
  800. s++;
  801. if(*s == 0)
  802. hi = narena-1;
  803. else
  804. hi = strtol(s, &s, 0);
  805. }
  806. if(*s != 0){
  807. print("bad arena range: %s\n", s);
  808. continue;
  809. }
  810. for(i=lo; i<=hi; i++)
  811. checkarena(ap.arenabase+(int64_t)i*arenasize,
  812. i);
  813. }
  814. }
  815. }
  816. /*
  817. * Is there a clump here at p?
  818. */
  819. static int
  820. isclump(uint8_t *p, Clump *cl, uint32_t *pmagic)
  821. {
  822. int n;
  823. uint32_t magic;
  824. uint8_t score[VtScoreSize], *bp;
  825. Unwhack uw;
  826. uint8_t ubuf[70*1024];
  827. bp = p;
  828. magic = u32(p);
  829. if(magic == 0)
  830. return 0;
  831. p += U32Size;
  832. cl->info.type = vtfromdisktype(*p);
  833. if(cl->info.type == 0xFF)
  834. return 0;
  835. p++;
  836. cl->info.size = u16(p);
  837. p += U16Size;
  838. cl->info.uncsize = u16(p);
  839. if(cl->info.size > cl->info.uncsize)
  840. return 0;
  841. p += U16Size;
  842. scorecp(cl->info.score, p);
  843. p += VtScoreSize;
  844. cl->encoding = *p;
  845. p++;
  846. cl->creator = u32(p);
  847. p += U32Size;
  848. cl->time = u32(p);
  849. p += U32Size;
  850. switch(cl->encoding){
  851. case ClumpENone:
  852. if(cl->info.size != cl->info.uncsize)
  853. return 0;
  854. scoremem(score, p, cl->info.size);
  855. if(scorecmp(score, cl->info.score) != 0)
  856. return 0;
  857. break;
  858. case ClumpECompress:
  859. if(cl->info.size >= cl->info.uncsize)
  860. return 0;
  861. unwhackinit(&uw);
  862. n = unwhack(&uw, ubuf, cl->info.uncsize, p, cl->info.size);
  863. if(n != cl->info.uncsize)
  864. return 0;
  865. scoremem(score, ubuf, cl->info.uncsize);
  866. if(scorecmp(score, cl->info.score) != 0)
  867. return 0;
  868. break;
  869. default:
  870. return 0;
  871. }
  872. p += cl->info.size;
  873. /* it all worked out in the end */
  874. *pmagic = magic;
  875. return p - bp;
  876. }
  877. /*
  878. * All ClumpInfos seen in this arena.
  879. * Kept in binary tree so we can look up by score.
  880. */
  881. typedef struct Cit Cit;
  882. struct Cit
  883. {
  884. int left;
  885. int right;
  886. int64_t corrupt;
  887. ClumpInfo ci;
  888. };
  889. Cit *cibuf;
  890. int ciroot;
  891. int ncibuf, mcibuf;
  892. void
  893. resetcibuf(void)
  894. {
  895. ncibuf = 0;
  896. ciroot = -1;
  897. }
  898. int*
  899. ltreewalk(int *p, uint8_t *score)
  900. {
  901. int i;
  902. for(;;){
  903. if(*p == -1)
  904. return p;
  905. i = scorecmp(cibuf[*p].ci.score, score);
  906. if(i == 0)
  907. return p;
  908. if(i < 0)
  909. p = &cibuf[*p].right;
  910. else
  911. p = &cibuf[*p].left;
  912. }
  913. }
  914. void
  915. addcibuf(ClumpInfo *ci, int64_t corrupt)
  916. {
  917. Cit *cit;
  918. if(ncibuf == mcibuf){
  919. mcibuf += 131072;
  920. cibuf = vtrealloc(cibuf, mcibuf*sizeof cibuf[0]);
  921. }
  922. cit = &cibuf[ncibuf];
  923. cit->ci = *ci;
  924. cit->left = -1;
  925. cit->right = -1;
  926. cit->corrupt = corrupt;
  927. if(!corrupt)
  928. *ltreewalk(&ciroot, ci->score) = ncibuf;
  929. ncibuf++;
  930. }
  931. void
  932. addcicorrupt(int64_t len)
  933. {
  934. static ClumpInfo zci;
  935. addcibuf(&zci, len);
  936. }
  937. int
  938. haveclump(uint8_t *score)
  939. {
  940. int i;
  941. int p;
  942. p = ciroot;
  943. for(;;){
  944. if(p == -1)
  945. return 0;
  946. i = scorecmp(cibuf[p].ci.score, score);
  947. if(i == 0)
  948. return 1;
  949. if(i < 0)
  950. p = cibuf[p].right;
  951. else
  952. p = cibuf[p].left;
  953. }
  954. }
  955. int
  956. matchci(ClumpInfo *ci, uint8_t *p)
  957. {
  958. if(ci->type != vtfromdisktype(p[0]))
  959. return 0;
  960. if(ci->size != u16(p+1))
  961. return 0;
  962. if(ci->uncsize != u16(p+3))
  963. return 0;
  964. if(scorecmp(ci->score, p+5) != 0)
  965. return 0;
  966. return 1;
  967. }
  968. int
  969. sealedarena(uint8_t *p, int blocksize)
  970. {
  971. int v, n;
  972. v = u32(p+4);
  973. switch(v){
  974. default:
  975. return 0;
  976. case ArenaVersion4:
  977. n = ArenaSize4;
  978. break;
  979. case ArenaVersion5:
  980. n = ArenaSize5;
  981. break;
  982. }
  983. if(p[n-1] != 1){
  984. print("arena tail says not sealed\n");
  985. return 0;
  986. }
  987. if(memcmp(p+n, zero, blocksize-VtScoreSize-n) != 0){
  988. print("arena tail followed by non-zero data\n");
  989. return 0;
  990. }
  991. if(memcmp(p+blocksize-VtScoreSize, zero, VtScoreSize) == 0){
  992. print("arena score zero\n");
  993. return 0;
  994. }
  995. return 1;
  996. }
  997. int
  998. okayname(char *name, int n)
  999. {
  1000. char buf[20];
  1001. if(nameok(name) < 0)
  1002. return 0;
  1003. sprint(buf, "%d", n);
  1004. if(n == 0)
  1005. buf[0] = 0;
  1006. if(strlen(name) < strlen(buf)
  1007. || strcmp(name+strlen(name)-strlen(buf), buf) != 0)
  1008. return 0;
  1009. return 1;
  1010. }
  1011. int
  1012. clumpinfocmp(ClumpInfo *a, ClumpInfo *b)
  1013. {
  1014. if(a->type != b->type)
  1015. return a->type - b->type;
  1016. if(a->size != b->size)
  1017. return a->size - b->size;
  1018. if(a->uncsize != b->uncsize)
  1019. return a->uncsize - b->uncsize;
  1020. return scorecmp(a->score, b->score);
  1021. }
  1022. ClumpInfo*
  1023. loadci(int64_t offset, Arena *arena, int nci)
  1024. {
  1025. int i, j, per;
  1026. uint8_t *p, *sp;
  1027. ClumpInfo *bci, *ci;
  1028. per = arena->blocksize/ClumpInfoSize;
  1029. bci = vtmalloc(nci*sizeof bci[0]);
  1030. ci = bci;
  1031. offset += arena->size - arena->blocksize;
  1032. p = sp = nil;
  1033. for(i=0; i<nci; i+=per){
  1034. if(p == sp){
  1035. sp = pagein(offset-4*M, 4*M);
  1036. p = sp+4*M;
  1037. }
  1038. p -= arena->blocksize;
  1039. offset -= arena->blocksize;
  1040. for(j=0; j<per && i+j<nci; j++)
  1041. unpackclumpinfo(ci++, p+j*ClumpInfoSize);
  1042. }
  1043. return bci;
  1044. }
  1045. int64_t
  1046. writeci(int64_t offset, Arena *arena, ClumpInfo *ci, int nci)
  1047. {
  1048. int i, j, per;
  1049. uint8_t *p, *sp;
  1050. per = arena->blocksize/ClumpInfoSize;
  1051. offset += arena->size - arena->blocksize;
  1052. p = sp = nil;
  1053. for(i=0; i<nci; i+=per){
  1054. if(p == sp){
  1055. sp = pagein(offset-4*M, 4*M);
  1056. p = sp+4*M;
  1057. }
  1058. p -= arena->blocksize;
  1059. offset -= arena->blocksize;
  1060. memset(p, 0, arena->blocksize);
  1061. for(j=0; j<per && i+j<nci; j++)
  1062. packclumpinfo(ci++, p+j*ClumpInfoSize);
  1063. }
  1064. pageout();
  1065. return offset;
  1066. }
  1067. void
  1068. loadarenabasics(int64_t offset0, int anum, ArenaHead *head, Arena *arena)
  1069. {
  1070. char dname[ANameSize];
  1071. static char lastbase[ANameSize];
  1072. uint8_t *p;
  1073. Arena oarena;
  1074. ArenaHead ohead;
  1075. /*
  1076. * Fmtarenas makes all arenas the same size
  1077. * except the last, which may be smaller.
  1078. * It uses the same block size for arenas as for
  1079. * the arena partition blocks.
  1080. */
  1081. arena->size = arenasize;
  1082. if(offset0+arena->size > partend)
  1083. arena->size = partend - offset0;
  1084. head->size = arena->size;
  1085. arena->blocksize = ap.blocksize;
  1086. head->blocksize = arena->blocksize;
  1087. /*
  1088. * Look for clump magic and name in head/tail blocks.
  1089. * All the other info we will reconstruct just in case.
  1090. */
  1091. p = pagein(offset0, arena->blocksize);
  1092. memset(&ohead, 0, sizeof ohead);
  1093. if(unpackarenahead(&ohead, p) >= 0){
  1094. head->version = ohead.version;
  1095. head->clumpmagic = ohead.clumpmagic;
  1096. if(okayname(ohead.name, anum))
  1097. strcpy(head->name, ohead.name);
  1098. }
  1099. p = pagein(offset0+arena->size-arena->blocksize,
  1100. arena->blocksize);
  1101. memset(&oarena, 0, sizeof oarena);
  1102. if(unpackarena(&oarena, p) >= 0){
  1103. arena->version = oarena.version;
  1104. arena->clumpmagic = oarena.clumpmagic;
  1105. if(okayname(oarena.name, anum))
  1106. strcpy(arena->name, oarena.name);
  1107. arena->diskstats.clumps = oarena.diskstats.clumps;
  1108. print("old arena: sealed=%d\n", oarena.diskstats.sealed);
  1109. arena->diskstats.sealed = oarena.diskstats.sealed;
  1110. }
  1111. /* Head trumps arena. */
  1112. if(head->version){
  1113. arena->version = head->version;
  1114. arena->clumpmagic = head->clumpmagic;
  1115. }
  1116. if(arena->version == 0)
  1117. arena->version = ArenaVersion5;
  1118. if(basename){
  1119. if(anum == -1)
  1120. snprint(arena->name, ANameSize, "%s", basename);
  1121. else
  1122. snprint(arena->name, ANameSize, "%s%d", basename, anum);
  1123. }else if(lastbase[0])
  1124. snprint(arena->name, ANameSize, "%s%d", lastbase, anum);
  1125. else if(head->name[0])
  1126. strcpy(arena->name, head->name);
  1127. else if(arena->name[0] == 0)
  1128. sysfatal("cannot determine base name for arena; use -n");
  1129. strcpy(lastbase, arena->name);
  1130. sprint(dname, "%d", anum);
  1131. lastbase[strlen(lastbase)-strlen(dname)] = 0;
  1132. /* Was working in arena, now copy to head. */
  1133. head->version = arena->version;
  1134. memmove(head->name, arena->name, sizeof head->name);
  1135. head->blocksize = arena->blocksize;
  1136. head->size = arena->size;
  1137. }
  1138. void
  1139. shahead(Shabuf *sb, int64_t offset0, ArenaHead *head)
  1140. {
  1141. uint8_t headbuf[MaxDiskBlock];
  1142. sb->offset = offset0;
  1143. memset(headbuf, 0, sizeof headbuf);
  1144. packarenahead(head, headbuf);
  1145. sbupdate(sb, headbuf, offset0, head->blocksize);
  1146. }
  1147. uint32_t
  1148. newclumpmagic(int version)
  1149. {
  1150. uint32_t m;
  1151. if(version == ArenaVersion4)
  1152. return _ClumpMagic;
  1153. do{
  1154. m = fastrand();
  1155. }while(m==0 || m == _ClumpMagic);
  1156. return m;
  1157. }
  1158. /*
  1159. * Poke around in the arena to find the clump data
  1160. * and compute the relevant statistics.
  1161. */
  1162. void
  1163. guessarena(int64_t offset0, int anum, ArenaHead *head, Arena *arena,
  1164. uint8_t *oldscore, uint8_t *score)
  1165. {
  1166. uint8_t dbuf[MaxDiskBlock];
  1167. int needtozero, clumps, nb1, nb2, minclumps;
  1168. int inbad, n, ncib, printed, sealing, smart;
  1169. uint32_t magic;
  1170. uint8_t *sp, *ep, *p;
  1171. int64_t boffset, eoffset, lastclumpend, leaked;
  1172. int64_t offset, toffset, totalcorrupt, v;
  1173. Clump cl;
  1174. ClumpInfo *bci, *ci, *eci, *xci;
  1175. Cit *bcit, *cit, *ecit;
  1176. Shabuf oldsha, newsha;
  1177. /*
  1178. * We expect to find an arena, with data, between offset
  1179. * and offset+arenasize. With any luck, the data starts at
  1180. * offset+ap.blocksize. The blocks have variable size and
  1181. * aren't padded at all, which doesn't give us any alignment
  1182. * constraints. The blocks are compressed or high entropy,
  1183. * but the headers are pretty low entropy (except the score):
  1184. *
  1185. * type[1] (range 0 thru 9, 13)
  1186. * size[2]
  1187. * uncsize[2] (<= size)
  1188. *
  1189. * so we can look for these. We check the scores as we go,
  1190. * so we can't make any wrong turns. If we find ourselves
  1191. * in a dead end, scan forward looking for a new start.
  1192. */
  1193. resetcibuf();
  1194. memset(head, 0, sizeof *head);
  1195. memset(arena, 0, sizeof *arena);
  1196. memset(oldscore, 0, VtScoreSize);
  1197. memset(score, 0, VtScoreSize);
  1198. memset(&oldsha, 0, sizeof oldsha);
  1199. memset(&newsha, 0, sizeof newsha);
  1200. newsha.rollback = 1;
  1201. if(0){
  1202. sbdebug(&oldsha, "old.sha");
  1203. sbdebug(&newsha, "new.sha");
  1204. }
  1205. loadarenabasics(offset0, anum, head, arena);
  1206. /* start the clump hunt */
  1207. clumps = 0;
  1208. totalcorrupt = 0;
  1209. sealing = 1;
  1210. boffset = offset0 + arena->blocksize;
  1211. offset = boffset;
  1212. eoffset = offset0+arena->size - arena->blocksize;
  1213. toffset = eoffset;
  1214. sp = pagein(offset0, 4*M);
  1215. if(arena->diskstats.sealed){
  1216. oldsha.offset = offset0;
  1217. sbupdate(&oldsha, sp, offset0, 4*M);
  1218. }
  1219. ep = sp+4*M;
  1220. p = sp + (boffset - offset0);
  1221. ncib = arena->blocksize / ClumpInfoSize; /* ci per block in index */
  1222. lastclumpend = offset;
  1223. nbad = 0;
  1224. inbad = 0;
  1225. needtozero = 0;
  1226. minclumps = 0;
  1227. while(offset < eoffset){
  1228. /*
  1229. * Shift buffer if we're running out of room.
  1230. */
  1231. if(p+70*K >= ep){
  1232. /*
  1233. * Start the post SHA1 buffer. By now we should know the
  1234. * clumpmagic and arena version, so we can create a
  1235. * correct head block to get things going.
  1236. */
  1237. if(sealing && fix && newsha.offset == 0){
  1238. newsha.offset = offset0;
  1239. if(arena->clumpmagic == 0){
  1240. if(arena->version == 0)
  1241. arena->version = ArenaVersion5;
  1242. arena->clumpmagic = newclumpmagic(arena->version);
  1243. }
  1244. head->clumpmagic = arena->clumpmagic;
  1245. shahead(&newsha, offset0, head);
  1246. }
  1247. n = 4*M-256*K;
  1248. if(sealing && fix){
  1249. sbdiskhash(&newsha, bufoffset);
  1250. sbupdate(&newsha, buf, bufoffset, 4*M-256*K);
  1251. }
  1252. pagein(bufoffset+n, 4*M);
  1253. p -= n;
  1254. if(arena->diskstats.sealed)
  1255. sbupdate(&oldsha, buf, bufoffset, 4*M);
  1256. }
  1257. /*
  1258. * Check for a clump at p, which is at offset in the disk.
  1259. * Duplicate clumps happen in corrupted disks
  1260. * (the same pattern gets written many times in a row)
  1261. * and should never happen during regular use.
  1262. */
  1263. magic = 0;
  1264. if((n = isclump(p, &cl, &magic)) > 0){
  1265. /*
  1266. * If we were in the middle of some corrupted data,
  1267. * flush a warning about it and then add any clump
  1268. * info blocks as necessary.
  1269. */
  1270. if(inbad){
  1271. inbad = 0;
  1272. v = offset-lastclumpend;
  1273. if(needtozero){
  1274. zerorange(lastclumpend, v);
  1275. sbrollback(&newsha, lastclumpend);
  1276. print("corrupt clump data - %#llux+%#llux (%,llud bytes)\n",
  1277. lastclumpend, v, v);
  1278. }
  1279. addcicorrupt(v);
  1280. totalcorrupt += v;
  1281. nb1 = (minclumps+ncib-1)/ncib;
  1282. minclumps += (v+ClumpSize+VtMaxLumpSize-1)/(ClumpSize+VtMaxLumpSize);
  1283. nb2 = (minclumps+ncib-1)/ncib;
  1284. eoffset -= (nb2-nb1)*arena->blocksize;
  1285. }
  1286. if(haveclump(cl.info.score))
  1287. print("warning: duplicate clump %d %V at %#llux+%#d\n", cl.info.type, cl.info.score, offset, n);
  1288. /*
  1289. * If clumps use different magic numbers, we don't care.
  1290. * We'll just use the first one we find and make the others
  1291. * follow suit.
  1292. */
  1293. if(arena->clumpmagic == 0){
  1294. print("clump type %d size %d score %V magic %x\n",
  1295. cl.info.type, cl.info.size, cl.info.score, magic);
  1296. arena->clumpmagic = magic;
  1297. if(magic == _ClumpMagic)
  1298. arena->version = ArenaVersion4;
  1299. else
  1300. arena->version = ArenaVersion5;
  1301. }
  1302. if(magic != arena->clumpmagic)
  1303. p32(p, arena->clumpmagic);
  1304. if(clumps == 0)
  1305. arena->ctime = cl.time;
  1306. /*
  1307. * Record the clump, update arena stats,
  1308. * grow clump info blocks if needed.
  1309. */
  1310. if(verbose > 1)
  1311. print("\tclump %d: %d %V at %#llux+%#ux (%d)\n",
  1312. clumps, cl.info.type, cl.info.score, offset, n, n);
  1313. addcibuf(&cl.info, 0);
  1314. if(minclumps%ncib == 0)
  1315. eoffset -= arena->blocksize;
  1316. minclumps++;
  1317. clumps++;
  1318. if(cl.encoding != ClumpENone)
  1319. arena->diskstats.cclumps++;
  1320. arena->diskstats.uncsize += cl.info.uncsize;
  1321. arena->wtime = cl.time;
  1322. /*
  1323. * Move to next clump.
  1324. */
  1325. offset += n;
  1326. p += n;
  1327. lastclumpend = offset;
  1328. }else{
  1329. /*
  1330. * Overwrite malformed clump data with zeros later.
  1331. * For now, just record whether it needs to be overwritten.
  1332. * Bad regions must be of size at least ClumpSize.
  1333. * Postponing the overwriting keeps us from writing past
  1334. * the end of the arena data (which might be directory data)
  1335. * with zeros.
  1336. */
  1337. if(!inbad){
  1338. inbad = 1;
  1339. needtozero = 0;
  1340. if(memcmp(p, zero, ClumpSize) != 0)
  1341. needtozero = 1;
  1342. p += ClumpSize;
  1343. offset += ClumpSize;
  1344. nbad++;
  1345. }else{
  1346. if(*p != 0)
  1347. needtozero = 1;
  1348. p++;
  1349. offset++;
  1350. }
  1351. }
  1352. }
  1353. pageout();
  1354. if(verbose)
  1355. print("readable clumps: %d; min. directory entries: %d\n",
  1356. clumps, minclumps);
  1357. arena->diskstats.used = lastclumpend - boffset;
  1358. leaked = eoffset - lastclumpend;
  1359. if(verbose)
  1360. print("used from %#llux to %#llux = %,lld (%,lld unused)\n",
  1361. boffset, lastclumpend, arena->diskstats.used, leaked);
  1362. /*
  1363. * Finish the SHA1 of the old data.
  1364. */
  1365. if(arena->diskstats.sealed){
  1366. sbdiskhash(&oldsha, toffset);
  1367. readdisk(dbuf, toffset, arena->blocksize);
  1368. scorecp(dbuf+arena->blocksize-VtScoreSize, zero);
  1369. sbupdate(&oldsha, dbuf, toffset, arena->blocksize);
  1370. sbscore(&oldsha, oldscore);
  1371. }
  1372. /*
  1373. * If we still don't know the clump magic, the arena
  1374. * must be empty. It still needs a value, so make
  1375. * something up.
  1376. */
  1377. if(arena->version == 0)
  1378. arena->version = ArenaVersion5;
  1379. if(arena->clumpmagic == 0){
  1380. if(arena->version == ArenaVersion4)
  1381. arena->clumpmagic = _ClumpMagic;
  1382. else{
  1383. do
  1384. arena->clumpmagic = fastrand();
  1385. while(arena->clumpmagic==_ClumpMagic
  1386. ||arena->clumpmagic==0);
  1387. }
  1388. head->clumpmagic = arena->clumpmagic;
  1389. }
  1390. /*
  1391. * Guess at number of clumpinfo blocks to load.
  1392. * If we guess high, it's no big deal. If we guess low,
  1393. * we'll be forced into rewriting the whole directory.
  1394. * Still not such a big deal.
  1395. */
  1396. if(clumps == 0 || arena->diskstats.used == totalcorrupt)
  1397. goto Nocib;
  1398. if(clumps < arena->diskstats.clumps)
  1399. clumps = arena->diskstats.clumps;
  1400. if(clumps < ncibuf)
  1401. clumps = ncibuf;
  1402. clumps += totalcorrupt/
  1403. ((arena->diskstats.used - totalcorrupt)/clumps);
  1404. clumps += totalcorrupt/2000;
  1405. if(clumps < minclumps)
  1406. clumps = minclumps;
  1407. clumps += ncib-1;
  1408. clumps -= clumps%ncib;
  1409. /*
  1410. * Can't write into the actual data.
  1411. */
  1412. v = offset0 + arena->size - arena->blocksize;
  1413. v -= (clumps+ncib-1)/ncib * arena->blocksize;
  1414. if(v < lastclumpend){
  1415. v = offset0 + arena->size - arena->blocksize;
  1416. clumps = (v-lastclumpend)/arena->blocksize * ncib;
  1417. }
  1418. if(clumps < minclumps)
  1419. print("cannot happen?\n");
  1420. /*
  1421. * Check clumpinfo blocks against directory we created.
  1422. * The tricky part is handling the corrupt sections of arena.
  1423. * If possible, we remark just the affected directory entries
  1424. * rather than slide everything down.
  1425. *
  1426. * Allocate clumps+1 blocks and check that we don't need
  1427. * the last one at the end.
  1428. */
  1429. bci = loadci(offset0, arena, clumps+1);
  1430. eci = bci+clumps+1;
  1431. bcit = cibuf;
  1432. ecit = cibuf+ncibuf;
  1433. smart = 0; /* Somehow the smart code doesn't do corrupt clumps right. */
  1434. Again:
  1435. nbad = 0;
  1436. ci = bci;
  1437. for(cit=bcit; cit<ecit && ci<eci; cit++){
  1438. if(cit->corrupt){
  1439. int64_t n, m;
  1440. if(smart){
  1441. /*
  1442. * If we can, just mark existing entries as corrupt.
  1443. */
  1444. n = cit->corrupt;
  1445. for(xci=ci; n>0 && xci<eci; xci++)
  1446. n -= ClumpSize+xci->size;
  1447. if(n > 0 || xci >= eci)
  1448. goto Dumb;
  1449. printed = 0;
  1450. for(; ci<xci; ci++){
  1451. if(verbose && ci->type != VtCorruptType){
  1452. if(!printed){
  1453. print("marking directory %d-%d as corrupt\n",
  1454. (int)(ci-bci), (int)(xci-bci));
  1455. printed = 1;
  1456. }
  1457. print("\ttype=%d size=%d uncsize=%d score=%V\n",
  1458. ci->type, ci->size, ci->uncsize, ci->score);
  1459. }
  1460. ci->type = VtCorruptType;
  1461. }
  1462. }else{
  1463. Dumb:
  1464. print("\trewriting clump directory\n");
  1465. /*
  1466. * Otherwise, blaze a new trail.
  1467. */
  1468. n = cit->corrupt;
  1469. while(n > 0 && ci < eci){
  1470. if(n < ClumpSize)
  1471. sysfatal("bad math in clump corrupt");
  1472. if(n <= VtMaxLumpSize+ClumpSize)
  1473. m = n;
  1474. else{
  1475. m = VtMaxLumpSize+ClumpSize;
  1476. if(n-m < ClumpSize)
  1477. m -= ClumpSize;
  1478. }
  1479. ci->type = VtCorruptType;
  1480. ci->size = m-ClumpSize;
  1481. ci->uncsize = m-ClumpSize;
  1482. memset(ci->score, 0, VtScoreSize);
  1483. ci++;
  1484. n -= m;
  1485. }
  1486. }
  1487. continue;
  1488. }
  1489. if(clumpinfocmp(&cit->ci, ci) != 0){
  1490. if(verbose && (smart || verbose>1)){
  1491. print("clumpinfo %d\n", (int)(ci-bci));
  1492. print("\twant: %d %d %d %V\n",
  1493. cit->ci.type, cit->ci.size,
  1494. cit->ci.uncsize, cit->ci.score);
  1495. print("\thave: %d %d %d %V\n",
  1496. ci->type, ci->size,
  1497. ci->uncsize, ci->score);
  1498. }
  1499. *ci = cit->ci;
  1500. nbad++;
  1501. }
  1502. ci++;
  1503. }
  1504. if(ci >= eci || cit < ecit){
  1505. print("ran out of space editing existing directory; rewriting\n");
  1506. print("# eci %ld ci %ld ecit %ld cit %ld\n", eci-bci, ci-bci, ecit-bcit, cit-bcit);
  1507. assert(smart); /* can't happen second time thru */
  1508. smart = 0;
  1509. goto Again;
  1510. }
  1511. assert(ci <= eci);
  1512. arena->diskstats.clumps = ci-bci;
  1513. eoffset = writeci(offset0, arena, bci, ci-bci);
  1514. if(sealing && fix)
  1515. sbrollback(&newsha, v);
  1516. print("eoffset=%lld lastclumpend=%lld diff=%lld unseal=%d\n", eoffset, lastclumpend, eoffset-lastclumpend, unseal);
  1517. if(lastclumpend > eoffset)
  1518. print("arena directory overwrote blocks! cannot happen!\n");
  1519. free(bci);
  1520. if(smart && nbad)
  1521. print("arena directory has %d bad or missing entries\n", nbad);
  1522. Nocib:
  1523. if(eoffset - lastclumpend > 64*1024 && (!arena->diskstats.sealed || unseal)){
  1524. if(arena->diskstats.sealed)
  1525. print("unsealing arena\n");
  1526. sealing = 0;
  1527. memset(oldscore, 0, VtScoreSize);
  1528. }
  1529. /*
  1530. * Finish the SHA1 of the new data - only meaningful
  1531. * if we've been writing to disk (`fix').
  1532. */
  1533. arena->diskstats.sealed = sealing;
  1534. arena->memstats = arena->diskstats;
  1535. if(sealing && fix){
  1536. uint8_t tbuf[MaxDiskBlock];
  1537. sbdiskhash(&newsha, toffset);
  1538. memset(tbuf, 0, sizeof tbuf);
  1539. packarena(arena, tbuf);
  1540. sbupdate(&newsha, tbuf, toffset, arena->blocksize);
  1541. sbscore(&newsha, score);
  1542. }
  1543. }
  1544. void
  1545. dumparena(int64_t offset, int anum, Arena *arena)
  1546. {
  1547. char buf[1000];
  1548. int64_t o, e;
  1549. int fd, n;
  1550. snprint(buf, sizeof buf, "%s.%d", dumpbase, anum);
  1551. if((fd = create(buf, OWRITE, 0666)) < 0){
  1552. fprint(2, "create %s: %r\n", buf);
  1553. return;
  1554. }
  1555. e = offset+arena->size;
  1556. for(o=offset; o<e; o+=n){
  1557. n = 4*M;
  1558. if(o+n > e)
  1559. n = e-o;
  1560. if(pwrite(fd, pagein(o, n), n, o-offset) != n){
  1561. fprint(2, "write %s at %#llux: %r\n", buf, o-offset);
  1562. return;
  1563. }
  1564. }
  1565. }
  1566. void
  1567. checkarena(int64_t offset, int anum)
  1568. {
  1569. uint8_t dbuf[MaxDiskBlock];
  1570. uint8_t *p, oldscore[VtScoreSize], score[VtScoreSize];
  1571. Arena arena, oarena;
  1572. ArenaHead head;
  1573. Info *fmt, *fmta;
  1574. int sz;
  1575. print("# arena %d: offset %#llux\n", anum, offset);
  1576. if(offset >= partend){
  1577. print("arena offset out of bounds\n");
  1578. return;
  1579. }
  1580. guessarena(offset, anum, &head, &arena, oldscore, score);
  1581. if(verbose){
  1582. print("#\tversion=%d name=%s blocksize=%d size=%z",
  1583. head.version, head.name, head.blocksize, head.size);
  1584. if(head.clumpmagic)
  1585. print(" clumpmagic=%#.8ux", head.clumpmagic);
  1586. print("\n#\tclumps=%d cclumps=%d used=%,lld uncsize=%,lld\n",
  1587. arena.diskstats.clumps, arena.diskstats.cclumps,
  1588. arena.diskstats.used, arena.diskstats.uncsize);
  1589. print("#\tctime=%t\n", arena.ctime);
  1590. print("#\twtime=%t\n", arena.wtime);
  1591. if(arena.diskstats.sealed)
  1592. print("#\tsealed score=%V\n", score);
  1593. }
  1594. if(dumpbase){
  1595. dumparena(offset, anum, &arena);
  1596. return;
  1597. }
  1598. memset(dbuf, 0, sizeof dbuf);
  1599. packarenahead(&head, dbuf);
  1600. p = pagein(offset, arena.blocksize);
  1601. if(memcmp(dbuf, p, arena.blocksize) != 0){
  1602. print("on-disk arena header incorrect\n");
  1603. showdiffs(dbuf, p, arena.blocksize,
  1604. arena.version==ArenaVersion4 ? headinfo4 : headinfo5);
  1605. }
  1606. memmove(p, dbuf, arena.blocksize);
  1607. memset(dbuf, 0, sizeof dbuf);
  1608. packarena(&arena, dbuf);
  1609. if(arena.diskstats.sealed)
  1610. scorecp(dbuf+arena.blocksize-VtScoreSize, score);
  1611. p = pagein(offset+arena.size-arena.blocksize, arena.blocksize);
  1612. memset(&oarena, 0, sizeof oarena);
  1613. unpackarena(&oarena, p);
  1614. if(arena.version == ArenaVersion4){
  1615. sz = ArenaSize4;
  1616. fmt = tailinfo4;
  1617. fmta = tailinfo4a;
  1618. }else{
  1619. sz = ArenaSize5;
  1620. fmt = tailinfo5;
  1621. fmta = tailinfo5a;
  1622. }
  1623. if(p[sz] == 1){
  1624. fmt = fmta;
  1625. if(oarena.diskstats.sealed){
  1626. /*
  1627. * some arenas were sealed with the extension
  1628. * before we adopted the convention that if it didn't
  1629. * add new information it gets dropped.
  1630. */
  1631. _packarena(&arena, dbuf, 1);
  1632. }
  1633. }
  1634. if(memcmp(dbuf, p, arena.blocksize-VtScoreSize) != 0){
  1635. print("on-disk arena tail incorrect\n");
  1636. showdiffs(dbuf, p, arena.blocksize-VtScoreSize, fmt);
  1637. }
  1638. if(arena.diskstats.sealed){
  1639. if(oarena.diskstats.sealed)
  1640. if(scorecmp(p+arena.blocksize-VtScoreSize, oldscore) != 0){
  1641. print("on-disk arena seal score incorrect\n");
  1642. print("\tcorrect=%V\n", oldscore);
  1643. print("\t disk=%V\n", p+arena.blocksize-VtScoreSize);
  1644. }
  1645. if(fix && scorecmp(p+arena.blocksize-VtScoreSize, score) != 0){
  1646. print("%ssealing arena%s: %V\n",
  1647. oarena.diskstats.sealed ? "re" : "",
  1648. scorecmp(oldscore, score) == 0 ?
  1649. "" : " after changes", score);
  1650. }
  1651. }
  1652. memmove(p, dbuf, arena.blocksize);
  1653. pageout();
  1654. }
  1655. AMapN*
  1656. buildamap(void)
  1657. {
  1658. uint8_t *p;
  1659. int64_t o;
  1660. ArenaHead h;
  1661. AMapN *an;
  1662. AMap *m;
  1663. an = vtmallocz(sizeof *an);
  1664. for(o=ap.arenabase; o<partend; o+=arenasize){
  1665. p = pagein(o, Block);
  1666. if(unpackarenahead(&h, p) >= 0){
  1667. an->map = vtrealloc(an->map, (an->n+1)*sizeof an->map[0]);
  1668. m = &an->map[an->n++];
  1669. m->start = o;
  1670. m->stop = o+h.size;
  1671. strcpy(m->name, h.name);
  1672. }
  1673. }
  1674. return an;
  1675. }
  1676. void
  1677. checkmap(void)
  1678. {
  1679. char *s;
  1680. uint8_t *p;
  1681. int i, len;
  1682. AMapN *an;
  1683. Fmt fmt;
  1684. an = buildamap();
  1685. fmtstrinit(&fmt);
  1686. fmtprint(&fmt, "%ud\n", an->n);
  1687. for(i=0; i<an->n; i++)
  1688. fmtprint(&fmt, "%s\t%lld\t%lld\n",
  1689. an->map[i].name, an->map[i].start, an->map[i].stop);
  1690. s = fmtstrflush(&fmt);
  1691. len = strlen(s);
  1692. if(len > ap.tabsize){
  1693. print("arena partition map too long: need %z bytes have %z\n",
  1694. (int64_t)len, (int64_t)ap.tabsize);
  1695. len = ap.tabsize;
  1696. }
  1697. if(ap.tabsize >= 4*M){ /* can't happen - max arenas is 2000 */
  1698. print("arena partition map *way* too long\n");
  1699. return;
  1700. }
  1701. p = pagein(ap.tabbase, ap.tabsize);
  1702. if(memcmp(p, s, len) != 0){
  1703. print("arena partition map incorrect; rewriting.\n");
  1704. memmove(p, s, len);
  1705. }
  1706. pageout();
  1707. }
  1708. int mainstacksize = 512*1024;
  1709. void
  1710. threadmain(int argc, char **argv)
  1711. {
  1712. int mode;
  1713. mode = OREAD;
  1714. readonly = 1;
  1715. ARGBEGIN{
  1716. case 'U':
  1717. unseal = 1;
  1718. break;
  1719. case 'a':
  1720. arenasize = unittoull(EARGF(usage()));
  1721. break;
  1722. case 'b':
  1723. ap.blocksize = unittoull(EARGF(usage()));
  1724. break;
  1725. case 'f':
  1726. fix = 1;
  1727. mode = ORDWR;
  1728. readonly = 0;
  1729. break;
  1730. case 'n':
  1731. basename = EARGF(usage());
  1732. break;
  1733. case 'v':
  1734. verbose++;
  1735. break;
  1736. case 'x':
  1737. dumpbase = EARGF(usage());
  1738. break;
  1739. default:
  1740. usage();
  1741. }ARGEND
  1742. if(argc != 1 && argc != 2)
  1743. usage();
  1744. file = argv[0];
  1745. ventifmtinstall();
  1746. fmtinstall('z', zfmt);
  1747. fmtinstall('t', tfmt);
  1748. quotefmtinstall();
  1749. part = initpart(file, mode|ODIRECT);
  1750. if(part == nil)
  1751. sysfatal("can't open %s: %r", file);
  1752. partend = part->size;
  1753. if(isonearena()){
  1754. checkarena(0, -1);
  1755. threadexitsall(nil);
  1756. }
  1757. checkarenas(argc > 1 ? argv[1] : nil);
  1758. checkmap();
  1759. threadexitsall(nil);
  1760. }