arena.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927
  1. #include "stdinc.h"
  2. #include "dat.h"
  3. #include "fns.h"
  4. typedef struct ASum ASum;
  5. struct ASum
  6. {
  7. Arena *arena;
  8. ASum *next;
  9. };
  10. static void sealarena(Arena *arena);
  11. static int okarena(Arena *arena);
  12. static int loadarena(Arena *arena);
  13. static CIBlock *getcib(Arena *arena, int clump, int writing, CIBlock *rock);
  14. static void putcib(Arena *arena, CIBlock *cib);
  15. static void sumproc(void *);
  16. static void loadcig(Arena *arena);
  17. static QLock sumlock;
  18. static Rendez sumwait;
  19. static ASum *sumq;
  20. static ASum *sumqtail;
  21. static uchar zero[8192];
  22. int arenasumsleeptime;
  23. int
  24. initarenasum(void)
  25. {
  26. needzeroscore(); /* OS X */
  27. sumwait.l = &sumlock;
  28. if(vtproc(sumproc, nil) < 0){
  29. seterr(EOk, "can't start arena checksum slave: %r");
  30. return -1;
  31. }
  32. return 0;
  33. }
  34. /*
  35. * make an Arena, and initialize it based upon the disk header and trailer.
  36. */
  37. Arena*
  38. initarena(Part *part, u64int base, u64int size, u32int blocksize)
  39. {
  40. Arena *arena;
  41. arena = MKZ(Arena);
  42. arena->part = part;
  43. arena->blocksize = blocksize;
  44. arena->clumpmax = arena->blocksize / ClumpInfoSize;
  45. arena->base = base + blocksize;
  46. arena->size = size - 2 * blocksize;
  47. if(loadarena(arena) < 0){
  48. seterr(ECorrupt, "arena header or trailer corrupted");
  49. freearena(arena);
  50. return nil;
  51. }
  52. if(okarena(arena) < 0){
  53. freearena(arena);
  54. return nil;
  55. }
  56. if(arena->diskstats.sealed && scorecmp(zeroscore, arena->score)==0)
  57. sealarena(arena);
  58. return arena;
  59. }
  60. void
  61. freearena(Arena *arena)
  62. {
  63. if(arena == nil)
  64. return;
  65. free(arena);
  66. }
  67. Arena*
  68. newarena(Part *part, u32int vers, char *name, u64int base, u64int size, u32int blocksize)
  69. {
  70. int bsize;
  71. Arena *arena;
  72. if(nameok(name) < 0){
  73. seterr(EOk, "illegal arena name", name);
  74. return nil;
  75. }
  76. arena = MKZ(Arena);
  77. arena->part = part;
  78. arena->version = vers;
  79. if(vers == ArenaVersion4)
  80. arena->clumpmagic = _ClumpMagic;
  81. else{
  82. do
  83. arena->clumpmagic = fastrand();
  84. while(arena->clumpmagic==_ClumpMagic || arena->clumpmagic==0);
  85. }
  86. arena->blocksize = blocksize;
  87. arena->clumpmax = arena->blocksize / ClumpInfoSize;
  88. arena->base = base + blocksize;
  89. arena->size = size - 2 * blocksize;
  90. namecp(arena->name, name);
  91. bsize = sizeof zero;
  92. if(bsize > arena->blocksize)
  93. bsize = arena->blocksize;
  94. if(wbarena(arena)<0 || wbarenahead(arena)<0
  95. || writepart(arena->part, arena->base, zero, bsize)<0){
  96. freearena(arena);
  97. return nil;
  98. }
  99. return arena;
  100. }
  101. int
  102. readclumpinfo(Arena *arena, int clump, ClumpInfo *ci)
  103. {
  104. CIBlock *cib, r;
  105. cib = getcib(arena, clump, 0, &r);
  106. if(cib == nil)
  107. return -1;
  108. unpackclumpinfo(ci, &cib->data->data[cib->offset]);
  109. putcib(arena, cib);
  110. return 0;
  111. }
  112. int
  113. readclumpinfos(Arena *arena, int clump, ClumpInfo *cis, int n)
  114. {
  115. CIBlock *cib, r;
  116. int i;
  117. /*
  118. * because the clump blocks are laid out
  119. * in reverse order at the end of the arena,
  120. * it can be a few percent faster to read
  121. * the clumps backwards, which reads the
  122. * disk blocks forwards.
  123. */
  124. for(i = n-1; i >= 0; i--){
  125. cib = getcib(arena, clump + i, 0, &r);
  126. if(cib == nil){
  127. n = i;
  128. continue;
  129. }
  130. unpackclumpinfo(&cis[i], &cib->data->data[cib->offset]);
  131. putcib(arena, cib);
  132. }
  133. return n;
  134. }
  135. /*
  136. * write directory information for one clump
  137. * must be called the arena locked
  138. */
  139. int
  140. writeclumpinfo(Arena *arena, int clump, ClumpInfo *ci)
  141. {
  142. CIBlock *cib, r;
  143. cib = getcib(arena, clump, 1, &r);
  144. if(cib == nil)
  145. return -1;
  146. dirtydblock(cib->data, DirtyArenaCib);
  147. packclumpinfo(ci, &cib->data->data[cib->offset]);
  148. putcib(arena, cib);
  149. return 0;
  150. }
  151. u64int
  152. arenadirsize(Arena *arena, u32int clumps)
  153. {
  154. return ((clumps / arena->clumpmax) + 1) * arena->blocksize;
  155. }
  156. /*
  157. * read a clump of data
  158. * n is a hint of the size of the data, not including the header
  159. * make sure it won't run off the end, then return the number of bytes actually read
  160. */
  161. u32int
  162. readarena(Arena *arena, u64int aa, u8int *buf, long n)
  163. {
  164. DBlock *b;
  165. u64int a;
  166. u32int blocksize, off, m;
  167. long nn;
  168. if(n == 0)
  169. return -1;
  170. qlock(&arena->lock);
  171. a = arena->size - arenadirsize(arena, arena->memstats.clumps);
  172. qunlock(&arena->lock);
  173. if(aa >= a){
  174. seterr(EOk, "reading beyond arena clump storage: clumps=%d aa=%lld a=%lld -1 clumps=%lld\n", arena->memstats.clumps, aa, a, arena->size - arenadirsize(arena, arena->memstats.clumps - 1));
  175. return -1;
  176. }
  177. if(aa + n > a)
  178. n = a - aa;
  179. blocksize = arena->blocksize;
  180. a = arena->base + aa;
  181. off = a & (blocksize - 1);
  182. a -= off;
  183. nn = 0;
  184. for(;;){
  185. b = getdblock(arena->part, a, OREAD);
  186. if(b == nil)
  187. return -1;
  188. m = blocksize - off;
  189. if(m > n - nn)
  190. m = n - nn;
  191. memmove(&buf[nn], &b->data[off], m);
  192. putdblock(b);
  193. nn += m;
  194. if(nn == n)
  195. break;
  196. off = 0;
  197. a += blocksize;
  198. }
  199. return n;
  200. }
  201. /*
  202. * write some data to the clump section at a given offset
  203. * used to fix up corrupted arenas.
  204. */
  205. u32int
  206. writearena(Arena *arena, u64int aa, u8int *clbuf, u32int n)
  207. {
  208. DBlock *b;
  209. u64int a;
  210. u32int blocksize, off, m;
  211. long nn;
  212. int ok;
  213. if(n == 0)
  214. return -1;
  215. qlock(&arena->lock);
  216. a = arena->size - arenadirsize(arena, arena->memstats.clumps);
  217. if(aa >= a || aa + n > a){
  218. qunlock(&arena->lock);
  219. seterr(EOk, "writing beyond arena clump storage");
  220. return -1;
  221. }
  222. blocksize = arena->blocksize;
  223. a = arena->base + aa;
  224. off = a & (blocksize - 1);
  225. a -= off;
  226. nn = 0;
  227. for(;;){
  228. b = getdblock(arena->part, a, off != 0 || off + n < blocksize ? ORDWR : OWRITE);
  229. if(b == nil){
  230. qunlock(&arena->lock);
  231. return -1;
  232. }
  233. dirtydblock(b, DirtyArena);
  234. m = blocksize - off;
  235. if(m > n - nn)
  236. m = n - nn;
  237. memmove(&b->data[off], &clbuf[nn], m);
  238. ok = 0;
  239. putdblock(b);
  240. if(ok < 0){
  241. qunlock(&arena->lock);
  242. return -1;
  243. }
  244. nn += m;
  245. if(nn == n)
  246. break;
  247. off = 0;
  248. a += blocksize;
  249. }
  250. qunlock(&arena->lock);
  251. return n;
  252. }
  253. /*
  254. * allocate space for the clump and write it,
  255. * updating the arena directory
  256. ZZZ question: should this distinguish between an arena
  257. filling up and real errors writing the clump?
  258. */
  259. u64int
  260. writeaclump(Arena *arena, Clump *c, u8int *clbuf)
  261. {
  262. DBlock *b;
  263. u64int a, aa;
  264. u32int clump, n, nn, m, off, blocksize;
  265. int ok;
  266. n = c->info.size + ClumpSize + U32Size;
  267. qlock(&arena->lock);
  268. aa = arena->memstats.used;
  269. if(arena->memstats.sealed
  270. || aa + n + U32Size + arenadirsize(arena, arena->memstats.clumps + 1) > arena->size){
  271. if(!arena->memstats.sealed){
  272. logerr(EOk, "seal memstats %s", arena->name);
  273. arena->memstats.sealed = 1;
  274. wbarena(arena);
  275. }
  276. qunlock(&arena->lock);
  277. return TWID64;
  278. }
  279. if(packclump(c, &clbuf[0], arena->clumpmagic) < 0){
  280. qunlock(&arena->lock);
  281. return TWID64;
  282. }
  283. /*
  284. * write the data out one block at a time
  285. */
  286. blocksize = arena->blocksize;
  287. a = arena->base + aa;
  288. off = a & (blocksize - 1);
  289. a -= off;
  290. nn = 0;
  291. for(;;){
  292. b = getdblock(arena->part, a, off != 0 ? ORDWR : OWRITE);
  293. if(b == nil){
  294. qunlock(&arena->lock);
  295. return TWID64;
  296. }
  297. dirtydblock(b, DirtyArena);
  298. m = blocksize - off;
  299. if(m > n - nn)
  300. m = n - nn;
  301. memmove(&b->data[off], &clbuf[nn], m);
  302. ok = 0;
  303. putdblock(b);
  304. if(ok < 0){
  305. qunlock(&arena->lock);
  306. return TWID64;
  307. }
  308. nn += m;
  309. if(nn == n)
  310. break;
  311. off = 0;
  312. a += blocksize;
  313. }
  314. arena->memstats.used += c->info.size + ClumpSize;
  315. arena->memstats.uncsize += c->info.uncsize;
  316. if(c->info.size < c->info.uncsize)
  317. arena->memstats.cclumps++;
  318. clump = arena->memstats.clumps;
  319. if(clump % ArenaCIGSize == 0){
  320. if(arena->cig == nil){
  321. loadcig(arena);
  322. if(arena->cig == nil)
  323. goto NoCIG;
  324. }
  325. /* add aa as start of next cig */
  326. if(clump/ArenaCIGSize != arena->ncig){
  327. fprint(2, "bad arena cig computation %s: writing clump %d but %d cigs\n",
  328. arena->name, clump, arena->ncig);
  329. arena->ncig = -1;
  330. vtfree(arena->cig);
  331. arena->cig = nil;
  332. goto NoCIG;
  333. }
  334. arena->cig = vtrealloc(arena->cig, (arena->ncig+1)*sizeof arena->cig[0]);
  335. arena->cig[arena->ncig++].offset = aa;
  336. }
  337. NoCIG:
  338. arena->memstats.clumps++;
  339. if(arena->memstats.clumps == 0)
  340. sysfatal("clumps wrapped");
  341. arena->wtime = now();
  342. if(arena->ctime == 0)
  343. arena->ctime = arena->wtime;
  344. writeclumpinfo(arena, clump, &c->info);
  345. wbarena(arena);
  346. qunlock(&arena->lock);
  347. return aa;
  348. }
  349. int
  350. atailcmp(ATailStats *a, ATailStats *b)
  351. {
  352. /* good test */
  353. if(a->used < b->used)
  354. return -1;
  355. if(a->used > b->used)
  356. return 1;
  357. /* suspect tests - why order this way? (no one cares) */
  358. if(a->clumps < b->clumps)
  359. return -1;
  360. if(a->clumps > b->clumps)
  361. return 1;
  362. if(a->cclumps < b->cclumps)
  363. return -1;
  364. if(a->cclumps > b->cclumps)
  365. return 1;
  366. if(a->uncsize < b->uncsize)
  367. return -1;
  368. if(a->uncsize > b->uncsize)
  369. return 1;
  370. if(a->sealed < b->sealed)
  371. return -1;
  372. if(a->sealed > b->sealed)
  373. return 1;
  374. /* everything matches */
  375. return 0;
  376. }
  377. void
  378. setatailstate(AState *as)
  379. {
  380. int i, j, osealed;
  381. Arena *a;
  382. Index *ix;
  383. trace(0, "setatailstate %s 0x%llux clumps %d", as->arena->name, as->aa, as->stats.clumps);
  384. /*
  385. * Look up as->arena to find index.
  386. */
  387. needmainindex(); /* OS X linker */
  388. ix = mainindex;
  389. for(i=0; i<ix->narenas; i++)
  390. if(ix->arenas[i] == as->arena)
  391. break;
  392. if(i==ix->narenas || as->aa < ix->amap[i].start || as->aa >= ix->amap[i].stop || as->arena != ix->arenas[i]){
  393. fprint(2, "funny settailstate 0x%llux\n", as->aa);
  394. return;
  395. }
  396. for(j=0; j<=i; j++){
  397. a = ix->arenas[j];
  398. if(atailcmp(&a->diskstats, &a->memstats) == 0)
  399. continue;
  400. qlock(&a->lock);
  401. osealed = a->diskstats.sealed;
  402. if(j == i)
  403. a->diskstats = as->stats;
  404. else
  405. a->diskstats = a->memstats;
  406. wbarena(a);
  407. if(a->diskstats.sealed != osealed && !a->inqueue)
  408. sealarena(a);
  409. qunlock(&a->lock);
  410. }
  411. }
  412. /*
  413. * once sealed, an arena never has any data added to it.
  414. * it should only be changed to fix errors.
  415. * this also syncs the clump directory.
  416. */
  417. static void
  418. sealarena(Arena *arena)
  419. {
  420. arena->inqueue = 1;
  421. backsumarena(arena);
  422. }
  423. void
  424. backsumarena(Arena *arena)
  425. {
  426. ASum *as;
  427. if(sumwait.l == nil)
  428. return;
  429. as = MK(ASum);
  430. if(as == nil)
  431. return;
  432. qlock(&sumlock);
  433. as->arena = arena;
  434. as->next = nil;
  435. if(sumq)
  436. sumqtail->next = as;
  437. else
  438. sumq = as;
  439. sumqtail = as;
  440. rwakeup(&sumwait);
  441. qunlock(&sumlock);
  442. }
  443. static void
  444. sumproc(void *unused)
  445. {
  446. ASum *as;
  447. Arena *arena;
  448. USED(unused);
  449. for(;;){
  450. qlock(&sumlock);
  451. while(sumq == nil)
  452. rsleep(&sumwait);
  453. as = sumq;
  454. sumq = as->next;
  455. qunlock(&sumlock);
  456. arena = as->arena;
  457. free(as);
  458. sumarena(arena);
  459. }
  460. }
  461. void
  462. sumarena(Arena *arena)
  463. {
  464. ZBlock *b;
  465. DigestState s;
  466. u64int a, e;
  467. u32int bs;
  468. int t;
  469. u8int score[VtScoreSize];
  470. bs = MaxIoSize;
  471. if(bs < arena->blocksize)
  472. bs = arena->blocksize;
  473. /*
  474. * read & sum all blocks except the last one
  475. */
  476. flushdcache();
  477. memset(&s, 0, sizeof s);
  478. b = alloczblock(bs, 0, arena->part->blocksize);
  479. e = arena->base + arena->size;
  480. for(a = arena->base - arena->blocksize; a + arena->blocksize <= e; a += bs){
  481. disksched();
  482. while((t=arenasumsleeptime) == SleepForever){
  483. sleep(1000);
  484. disksched();
  485. }
  486. sleep(t);
  487. if(a + bs > e)
  488. bs = arena->blocksize;
  489. if(readpart(arena->part, a, b->data, bs) < 0)
  490. goto ReadErr;
  491. addstat(StatSumRead, 1);
  492. addstat(StatSumReadBytes, bs);
  493. sha1(b->data, bs, nil, &s);
  494. }
  495. /*
  496. * the last one is special, since it may already have the checksum included
  497. */
  498. bs = arena->blocksize;
  499. if(readpart(arena->part, e, b->data, bs) < 0){
  500. ReadErr:
  501. logerr(EOk, "sumarena can't sum %s, read at %lld failed: %r", arena->name, a);
  502. freezblock(b);
  503. return;
  504. }
  505. addstat(StatSumRead, 1);
  506. addstat(StatSumReadBytes, bs);
  507. sha1(b->data, bs-VtScoreSize, nil, &s);
  508. sha1(zeroscore, VtScoreSize, nil, &s);
  509. sha1(nil, 0, score, &s);
  510. /*
  511. * check for no checksum or the same
  512. */
  513. if(scorecmp(score, &b->data[bs - VtScoreSize]) != 0
  514. && scorecmp(zeroscore, &b->data[bs - VtScoreSize]) != 0)
  515. logerr(EOk, "overwriting mismatched checksums for arena=%s, found=%V calculated=%V",
  516. arena->name, &b->data[bs - VtScoreSize], score);
  517. freezblock(b);
  518. qlock(&arena->lock);
  519. scorecp(arena->score, score);
  520. wbarena(arena);
  521. qunlock(&arena->lock);
  522. }
  523. /*
  524. * write the arena trailer block to the partition
  525. */
  526. int
  527. wbarena(Arena *arena)
  528. {
  529. DBlock *b;
  530. int bad;
  531. if((b = getdblock(arena->part, arena->base + arena->size, OWRITE)) == nil){
  532. logerr(EAdmin, "can't write arena trailer: %r");
  533. return -1;
  534. }
  535. dirtydblock(b, DirtyArenaTrailer);
  536. bad = okarena(arena)<0 || packarena(arena, b->data)<0;
  537. scorecp(b->data + arena->blocksize - VtScoreSize, arena->score);
  538. putdblock(b);
  539. if(bad)
  540. return -1;
  541. return 0;
  542. }
  543. int
  544. wbarenahead(Arena *arena)
  545. {
  546. ZBlock *b;
  547. ArenaHead head;
  548. int bad;
  549. namecp(head.name, arena->name);
  550. head.version = arena->version;
  551. head.size = arena->size + 2 * arena->blocksize;
  552. head.blocksize = arena->blocksize;
  553. head.clumpmagic = arena->clumpmagic;
  554. b = alloczblock(arena->blocksize, 1, arena->part->blocksize);
  555. if(b == nil){
  556. logerr(EAdmin, "can't write arena header: %r");
  557. /* ZZZ add error message? */
  558. return -1;
  559. }
  560. /*
  561. * this writepart is okay because it only happens
  562. * during initialization.
  563. */
  564. bad = packarenahead(&head, b->data)<0 ||
  565. writepart(arena->part, arena->base - arena->blocksize, b->data, arena->blocksize)<0 ||
  566. flushpart(arena->part)<0;
  567. freezblock(b);
  568. if(bad)
  569. return -1;
  570. return 0;
  571. }
  572. /*
  573. * read the arena header and trailer blocks from disk
  574. */
  575. static int
  576. loadarena(Arena *arena)
  577. {
  578. ArenaHead head;
  579. ZBlock *b;
  580. b = alloczblock(arena->blocksize, 0, arena->part->blocksize);
  581. if(b == nil)
  582. return -1;
  583. if(readpart(arena->part, arena->base + arena->size, b->data, arena->blocksize) < 0){
  584. freezblock(b);
  585. return -1;
  586. }
  587. if(unpackarena(arena, b->data) < 0){
  588. freezblock(b);
  589. return -1;
  590. }
  591. if(arena->version != ArenaVersion4 && arena->version != ArenaVersion5){
  592. seterr(EAdmin, "unknown arena version %d", arena->version);
  593. freezblock(b);
  594. return -1;
  595. }
  596. scorecp(arena->score, &b->data[arena->blocksize - VtScoreSize]);
  597. if(readpart(arena->part, arena->base - arena->blocksize, b->data, arena->blocksize) < 0){
  598. logerr(EAdmin, "can't read arena header: %r");
  599. freezblock(b);
  600. return 0;
  601. }
  602. if(unpackarenahead(&head, b->data) < 0)
  603. logerr(ECorrupt, "corrupted arena header: %r");
  604. else if(namecmp(arena->name, head.name)!=0
  605. || arena->clumpmagic != head.clumpmagic
  606. || arena->version != head.version
  607. || arena->blocksize != head.blocksize
  608. || arena->size + 2 * arena->blocksize != head.size){
  609. if(namecmp(arena->name, head.name)!=0)
  610. logerr(ECorrupt, "arena tail name %s head %s",
  611. arena->name, head.name);
  612. else if(arena->clumpmagic != head.clumpmagic)
  613. logerr(ECorrupt, "arena tail clumpmagic 0x%lux head 0x%lux",
  614. (ulong)arena->clumpmagic, (ulong)head.clumpmagic);
  615. else if(arena->version != head.version)
  616. logerr(ECorrupt, "arena tail version %d head version %d",
  617. arena->version, head.version);
  618. else if(arena->blocksize != head.blocksize)
  619. logerr(ECorrupt, "arena tail block size %d head %d",
  620. arena->blocksize, head.blocksize);
  621. else if(arena->size+2*arena->blocksize != head.size)
  622. logerr(ECorrupt, "arena tail size %lud head %lud",
  623. (ulong)arena->size+2*arena->blocksize, head.size);
  624. else
  625. logerr(ECorrupt, "arena header inconsistent with arena data");
  626. }
  627. freezblock(b);
  628. return 0;
  629. }
  630. static int
  631. okarena(Arena *arena)
  632. {
  633. u64int dsize;
  634. int ok;
  635. ok = 0;
  636. dsize = arenadirsize(arena, arena->diskstats.clumps);
  637. if(arena->diskstats.used + dsize > arena->size){
  638. seterr(ECorrupt, "arena %s used > size", arena->name);
  639. ok = -1;
  640. }
  641. if(arena->diskstats.cclumps > arena->diskstats.clumps)
  642. logerr(ECorrupt, "arena %s has more compressed clumps than total clumps", arena->name);
  643. /*
  644. * This need not be true if some of the disk is corrupted.
  645. *
  646. if(arena->diskstats.uncsize + arena->diskstats.clumps * ClumpSize + arena->blocksize < arena->diskstats.used)
  647. logerr(ECorrupt, "arena %s uncompressed size inconsistent with used space %lld %d %lld", arena->name, arena->diskstats.uncsize, arena->diskstats.clumps, arena->diskstats.used);
  648. */
  649. if(arena->ctime > arena->wtime)
  650. logerr(ECorrupt, "arena %s creation time after last write time", arena->name);
  651. return ok;
  652. }
  653. static CIBlock*
  654. getcib(Arena *arena, int clump, int writing, CIBlock *rock)
  655. {
  656. int mode;
  657. CIBlock *cib;
  658. u32int block, off;
  659. if(clump >= arena->memstats.clumps){
  660. seterr(EOk, "clump directory access out of range");
  661. return nil;
  662. }
  663. block = clump / arena->clumpmax;
  664. off = (clump - block * arena->clumpmax) * ClumpInfoSize;
  665. cib = rock;
  666. cib->block = block;
  667. cib->offset = off;
  668. if(writing){
  669. if(off == 0 && clump == arena->memstats.clumps-1)
  670. mode = OWRITE;
  671. else
  672. mode = ORDWR;
  673. }else
  674. mode = OREAD;
  675. cib->data = getdblock(arena->part,
  676. arena->base + arena->size - (block + 1) * arena->blocksize, mode);
  677. if(cib->data == nil)
  678. return nil;
  679. return cib;
  680. }
  681. static void
  682. putcib(Arena *arena, CIBlock *cib)
  683. {
  684. USED(arena);
  685. putdblock(cib->data);
  686. cib->data = nil;
  687. }
  688. /*
  689. * For index entry readahead purposes, the arenas are
  690. * broken into smaller subpieces, called clump info groups
  691. * or cigs. Each cig has ArenaCIGSize clumps (ArenaCIGSize
  692. * is chosen to make the index entries take up about half
  693. * a megabyte). The index entries do not contain enough
  694. * information to determine what the clump index is for
  695. * a given address in an arena. That info is needed both for
  696. * figuring out which clump group an address belongs to
  697. * and for prefetching a clump group's index entries from
  698. * the arena table of contents. The first time clump groups
  699. * are accessed, we scan the entire arena table of contents
  700. * (which might be 10s of megabytes), recording the data
  701. * offset of each clump group.
  702. */
  703. /*
  704. * load clump info group information by scanning entire toc.
  705. */
  706. static void
  707. loadcig(Arena *arena)
  708. {
  709. u32int i, j, ncig, nci;
  710. ArenaCIG *cig;
  711. ClumpInfo *ci;
  712. u64int offset;
  713. int ms;
  714. if(arena->cig || arena->ncig < 0)
  715. return;
  716. // fprint(2, "loadcig %s\n", arena->name);
  717. ncig = (arena->memstats.clumps+ArenaCIGSize-1) / ArenaCIGSize;
  718. if(ncig == 0){
  719. arena->cig = vtmalloc(1);
  720. arena->ncig = 0;
  721. return;
  722. }
  723. ms = msec();
  724. cig = vtmalloc(ncig*sizeof cig[0]);
  725. ci = vtmalloc(ArenaCIGSize*sizeof ci[0]);
  726. offset = 0;
  727. for(i=0; i<ncig; i++){
  728. nci = readclumpinfos(arena, i*ArenaCIGSize, ci, ArenaCIGSize);
  729. cig[i].offset = offset;
  730. for(j=0; j<nci; j++)
  731. offset += ClumpSize + ci[j].size;
  732. if(nci < ArenaCIGSize){
  733. if(i != ncig-1){
  734. vtfree(ci);
  735. vtfree(cig);
  736. arena->ncig = -1;
  737. fprint(2, "loadcig %s: got %ud cigs, expected %ud\n", arena->name, i+1, ncig);
  738. goto out;
  739. }
  740. }
  741. }
  742. vtfree(ci);
  743. arena->ncig = ncig;
  744. arena->cig = cig;
  745. out:
  746. ms = msec() - ms;
  747. addstat2(StatCigLoad, 1, StatCigLoadTime, ms);
  748. }
  749. /*
  750. * convert arena address into arena group + data boundaries.
  751. */
  752. int
  753. arenatog(Arena *arena, u64int addr, u64int *gstart, u64int *glimit, int *g)
  754. {
  755. int r, l, m;
  756. qlock(&arena->lock);
  757. if(arena->cig == nil)
  758. loadcig(arena);
  759. if(arena->cig == nil || arena->ncig == 0){
  760. qunlock(&arena->lock);
  761. return -1;
  762. }
  763. l = 1;
  764. r = arena->ncig - 1;
  765. while(l <= r){
  766. m = (r + l) / 2;
  767. if(arena->cig[m].offset <= addr)
  768. l = m + 1;
  769. else
  770. r = m - 1;
  771. }
  772. l--;
  773. *g = l;
  774. *gstart = arena->cig[l].offset;
  775. if(l+1 < arena->ncig)
  776. *glimit = arena->cig[l+1].offset;
  777. else
  778. *glimit = arena->memstats.used;
  779. qunlock(&arena->lock);
  780. return 0;
  781. }
  782. /*
  783. * load the clump info for group g into the index entries.
  784. */
  785. int
  786. asumload(Arena *arena, int g, IEntry *entries, int nentries)
  787. {
  788. int i, base, limit;
  789. u64int addr;
  790. ClumpInfo ci;
  791. IEntry *ie;
  792. if(nentries < ArenaCIGSize){
  793. fprint(2, "asking for too few entries\n");
  794. return -1;
  795. }
  796. qlock(&arena->lock);
  797. if(arena->cig == nil)
  798. loadcig(arena);
  799. if(arena->cig == nil || arena->ncig == 0 || g >= arena->ncig){
  800. qunlock(&arena->lock);
  801. return -1;
  802. }
  803. addr = 0;
  804. base = g*ArenaCIGSize;
  805. limit = base + ArenaCIGSize;
  806. if(base > arena->memstats.clumps)
  807. base = arena->memstats.clumps;
  808. ie = entries;
  809. for(i=base; i<limit; i++){
  810. if(readclumpinfo(arena, i, &ci) < 0)
  811. break;
  812. if(ci.type != VtCorruptType){
  813. scorecp(ie->score, ci.score);
  814. ie->ia.type = ci.type;
  815. ie->ia.size = ci.uncsize;
  816. ie->ia.blocks = (ci.size + ClumpSize + (1<<ABlockLog) - 1) >> ABlockLog;
  817. ie->ia.addr = addr;
  818. ie++;
  819. }
  820. addr += ClumpSize + ci.size;
  821. }
  822. qunlock(&arena->lock);
  823. return ie - entries;
  824. }