arena.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include "stdinc.h"
  10. #include "dat.h"
  11. #include "fns.h"
  12. typedef struct ASum ASum;
  13. struct ASum
  14. {
  15. Arena *arena;
  16. ASum *next;
  17. };
  18. static void sealarena(Arena *arena);
  19. static int okarena(Arena *arena);
  20. static int loadarena(Arena *arena);
  21. static CIBlock *getcib(Arena *arena, int clump, int writing, CIBlock *rock);
  22. static void putcib(Arena *arena, CIBlock *cib);
  23. static void sumproc(void *);
  24. static void loadcig(Arena *arena);
  25. static QLock sumlock;
  26. static Rendez sumwait;
  27. static ASum *sumq;
  28. static ASum *sumqtail;
  29. static uint8_t zero[8192];
  30. int arenasumsleeptime;
  31. int
  32. initarenasum(void)
  33. {
  34. needzeroscore(); /* OS X */
  35. sumwait.l = &sumlock;
  36. if(vtproc(sumproc, nil) < 0){
  37. seterr(EOk, "can't start arena checksum slave: %r");
  38. return -1;
  39. }
  40. return 0;
  41. }
  42. /*
  43. * make an Arena, and initialize it based upon the disk header and trailer.
  44. */
  45. Arena*
  46. initarena(Part *part, uint64_t base, uint64_t size, uint32_t blocksize)
  47. {
  48. Arena *arena;
  49. arena = MKZ(Arena);
  50. arena->part = part;
  51. arena->blocksize = blocksize;
  52. arena->clumpmax = arena->blocksize / ClumpInfoSize;
  53. arena->base = base + blocksize;
  54. arena->size = size - 2 * blocksize;
  55. if(loadarena(arena) < 0){
  56. seterr(ECorrupt, "arena header or trailer corrupted");
  57. freearena(arena);
  58. return nil;
  59. }
  60. if(okarena(arena) < 0){
  61. freearena(arena);
  62. return nil;
  63. }
  64. if(arena->diskstats.sealed && scorecmp(zeroscore, arena->score)==0)
  65. sealarena(arena);
  66. return arena;
  67. }
  68. void
  69. freearena(Arena *arena)
  70. {
  71. if(arena == nil)
  72. return;
  73. free(arena);
  74. }
  75. Arena*
  76. newarena(Part *part, uint32_t vers, char *name, uint64_t base,
  77. uint64_t size,
  78. uint32_t blocksize)
  79. {
  80. int bsize;
  81. Arena *arena;
  82. if(nameok(name) < 0){
  83. seterr(EOk, "illegal arena name", name);
  84. return nil;
  85. }
  86. arena = MKZ(Arena);
  87. arena->part = part;
  88. arena->version = vers;
  89. if(vers == ArenaVersion4)
  90. arena->clumpmagic = _ClumpMagic;
  91. else{
  92. do
  93. arena->clumpmagic = fastrand();
  94. while(arena->clumpmagic==_ClumpMagic || arena->clumpmagic==0);
  95. }
  96. arena->blocksize = blocksize;
  97. arena->clumpmax = arena->blocksize / ClumpInfoSize;
  98. arena->base = base + blocksize;
  99. arena->size = size - 2 * blocksize;
  100. namecp(arena->name, name);
  101. bsize = sizeof zero;
  102. if(bsize > arena->blocksize)
  103. bsize = arena->blocksize;
  104. if(wbarena(arena)<0 || wbarenahead(arena)<0
  105. || writepart(arena->part, arena->base, zero, bsize)<0){
  106. freearena(arena);
  107. return nil;
  108. }
  109. return arena;
  110. }
  111. int
  112. readclumpinfo(Arena *arena, int clump, ClumpInfo *ci)
  113. {
  114. CIBlock *cib, r;
  115. cib = getcib(arena, clump, 0, &r);
  116. if(cib == nil)
  117. return -1;
  118. unpackclumpinfo(ci, &cib->data->data[cib->offset]);
  119. putcib(arena, cib);
  120. return 0;
  121. }
  122. int
  123. readclumpinfos(Arena *arena, int clump, ClumpInfo *cis, int n)
  124. {
  125. CIBlock *cib, r;
  126. int i;
  127. /*
  128. * because the clump blocks are laid out
  129. * in reverse order at the end of the arena,
  130. * it can be a few percent faster to read
  131. * the clumps backwards, which reads the
  132. * disk blocks forwards.
  133. */
  134. for(i = n-1; i >= 0; i--){
  135. cib = getcib(arena, clump + i, 0, &r);
  136. if(cib == nil){
  137. n = i;
  138. continue;
  139. }
  140. unpackclumpinfo(&cis[i], &cib->data->data[cib->offset]);
  141. putcib(arena, cib);
  142. }
  143. return n;
  144. }
  145. /*
  146. * write directory information for one clump
  147. * must be called the arena locked
  148. */
  149. int
  150. writeclumpinfo(Arena *arena, int clump, ClumpInfo *ci)
  151. {
  152. CIBlock *cib, r;
  153. cib = getcib(arena, clump, 1, &r);
  154. if(cib == nil)
  155. return -1;
  156. dirtydblock(cib->data, DirtyArenaCib);
  157. packclumpinfo(ci, &cib->data->data[cib->offset]);
  158. putcib(arena, cib);
  159. return 0;
  160. }
  161. uint64_t
  162. arenadirsize(Arena *arena, uint32_t clumps)
  163. {
  164. return ((clumps / arena->clumpmax) + 1) * arena->blocksize;
  165. }
  166. /*
  167. * read a clump of data
  168. * n is a hint of the size of the data, not including the header
  169. * make sure it won't run off the end, then return the number of bytes actually read
  170. */
  171. uint32_t
  172. readarena(Arena *arena, uint64_t aa, uint8_t *buf, int32_t n)
  173. {
  174. DBlock *b;
  175. uint64_t a;
  176. uint32_t blocksize, off, m;
  177. int32_t nn;
  178. if(n == 0)
  179. return -1;
  180. qlock(&arena->lock);
  181. a = arena->size - arenadirsize(arena, arena->memstats.clumps);
  182. qunlock(&arena->lock);
  183. if(aa >= a){
  184. seterr(EOk, "reading beyond arena clump storage: clumps=%d aa=%lld a=%lld -1 clumps=%lld\n", arena->memstats.clumps, aa, a, arena->size - arenadirsize(arena, arena->memstats.clumps - 1));
  185. return -1;
  186. }
  187. if(aa + n > a)
  188. n = a - aa;
  189. blocksize = arena->blocksize;
  190. a = arena->base + aa;
  191. off = a & (blocksize - 1);
  192. a -= off;
  193. nn = 0;
  194. for(;;){
  195. b = getdblock(arena->part, a, OREAD);
  196. if(b == nil)
  197. return -1;
  198. m = blocksize - off;
  199. if(m > n - nn)
  200. m = n - nn;
  201. memmove(&buf[nn], &b->data[off], m);
  202. putdblock(b);
  203. nn += m;
  204. if(nn == n)
  205. break;
  206. off = 0;
  207. a += blocksize;
  208. }
  209. return n;
  210. }
  211. /*
  212. * write some data to the clump section at a given offset
  213. * used to fix up corrupted arenas.
  214. */
  215. uint32_t
  216. writearena(Arena *arena, uint64_t aa, uint8_t *clbuf, uint32_t n)
  217. {
  218. DBlock *b;
  219. uint64_t a;
  220. uint32_t blocksize, off, m;
  221. int32_t nn;
  222. int ok;
  223. if(n == 0)
  224. return -1;
  225. qlock(&arena->lock);
  226. a = arena->size - arenadirsize(arena, arena->memstats.clumps);
  227. if(aa >= a || aa + n > a){
  228. qunlock(&arena->lock);
  229. seterr(EOk, "writing beyond arena clump storage");
  230. return -1;
  231. }
  232. blocksize = arena->blocksize;
  233. a = arena->base + aa;
  234. off = a & (blocksize - 1);
  235. a -= off;
  236. nn = 0;
  237. for(;;){
  238. b = getdblock(arena->part, a, off != 0 || off + n < blocksize ? ORDWR : OWRITE);
  239. if(b == nil){
  240. qunlock(&arena->lock);
  241. return -1;
  242. }
  243. dirtydblock(b, DirtyArena);
  244. m = blocksize - off;
  245. if(m > n - nn)
  246. m = n - nn;
  247. memmove(&b->data[off], &clbuf[nn], m);
  248. ok = 0;
  249. putdblock(b);
  250. if(ok < 0){
  251. qunlock(&arena->lock);
  252. return -1;
  253. }
  254. nn += m;
  255. if(nn == n)
  256. break;
  257. off = 0;
  258. a += blocksize;
  259. }
  260. qunlock(&arena->lock);
  261. return n;
  262. }
  263. /*
  264. * allocate space for the clump and write it,
  265. * updating the arena directory
  266. ZZZ question: should this distinguish between an arena
  267. filling up and real errors writing the clump?
  268. */
  269. uint64_t
  270. writeaclump(Arena *arena, Clump *c, uint8_t *clbuf)
  271. {
  272. DBlock *b;
  273. uint64_t a, aa;
  274. uint32_t clump, n, nn, m, off, blocksize;
  275. int ok;
  276. n = c->info.size + ClumpSize + U32Size;
  277. qlock(&arena->lock);
  278. aa = arena->memstats.used;
  279. if(arena->memstats.sealed
  280. || aa + n + U32Size + arenadirsize(arena, arena->memstats.clumps + 1) > arena->size){
  281. if(!arena->memstats.sealed){
  282. logerr(EOk, "seal memstats %s", arena->name);
  283. arena->memstats.sealed = 1;
  284. wbarena(arena);
  285. }
  286. qunlock(&arena->lock);
  287. return TWID64;
  288. }
  289. if(packclump(c, &clbuf[0], arena->clumpmagic) < 0){
  290. qunlock(&arena->lock);
  291. return TWID64;
  292. }
  293. /*
  294. * write the data out one block at a time
  295. */
  296. blocksize = arena->blocksize;
  297. a = arena->base + aa;
  298. off = a & (blocksize - 1);
  299. a -= off;
  300. nn = 0;
  301. for(;;){
  302. b = getdblock(arena->part, a, off != 0 ? ORDWR : OWRITE);
  303. if(b == nil){
  304. qunlock(&arena->lock);
  305. return TWID64;
  306. }
  307. dirtydblock(b, DirtyArena);
  308. m = blocksize - off;
  309. if(m > n - nn)
  310. m = n - nn;
  311. memmove(&b->data[off], &clbuf[nn], m);
  312. ok = 0;
  313. putdblock(b);
  314. if(ok < 0){
  315. qunlock(&arena->lock);
  316. return TWID64;
  317. }
  318. nn += m;
  319. if(nn == n)
  320. break;
  321. off = 0;
  322. a += blocksize;
  323. }
  324. arena->memstats.used += c->info.size + ClumpSize;
  325. arena->memstats.uncsize += c->info.uncsize;
  326. if(c->info.size < c->info.uncsize)
  327. arena->memstats.cclumps++;
  328. clump = arena->memstats.clumps;
  329. if(clump % ArenaCIGSize == 0){
  330. if(arena->cig == nil){
  331. loadcig(arena);
  332. if(arena->cig == nil)
  333. goto NoCIG;
  334. }
  335. /* add aa as start of next cig */
  336. if(clump/ArenaCIGSize != arena->ncig){
  337. fprint(2, "bad arena cig computation %s: writing clump %d but %d cigs\n",
  338. arena->name, clump, arena->ncig);
  339. arena->ncig = -1;
  340. vtfree(arena->cig);
  341. arena->cig = nil;
  342. goto NoCIG;
  343. }
  344. arena->cig = vtrealloc(arena->cig, (arena->ncig+1)*sizeof arena->cig[0]);
  345. arena->cig[arena->ncig++].offset = aa;
  346. }
  347. NoCIG:
  348. arena->memstats.clumps++;
  349. if(arena->memstats.clumps == 0)
  350. sysfatal("clumps wrapped");
  351. arena->wtime = now();
  352. if(arena->ctime == 0)
  353. arena->ctime = arena->wtime;
  354. writeclumpinfo(arena, clump, &c->info);
  355. wbarena(arena);
  356. qunlock(&arena->lock);
  357. return aa;
  358. }
  359. int
  360. atailcmp(ATailStats *a, ATailStats *b)
  361. {
  362. /* good test */
  363. if(a->used < b->used)
  364. return -1;
  365. if(a->used > b->used)
  366. return 1;
  367. /* suspect tests - why order this way? (no one cares) */
  368. if(a->clumps < b->clumps)
  369. return -1;
  370. if(a->clumps > b->clumps)
  371. return 1;
  372. if(a->cclumps < b->cclumps)
  373. return -1;
  374. if(a->cclumps > b->cclumps)
  375. return 1;
  376. if(a->uncsize < b->uncsize)
  377. return -1;
  378. if(a->uncsize > b->uncsize)
  379. return 1;
  380. if(a->sealed < b->sealed)
  381. return -1;
  382. if(a->sealed > b->sealed)
  383. return 1;
  384. /* everything matches */
  385. return 0;
  386. }
  387. void
  388. setatailstate(AState *as)
  389. {
  390. int i, j, osealed;
  391. Arena *a;
  392. Index *ix;
  393. trace(0, "setatailstate %s 0x%llux clumps %d", as->arena->name, as->aa, as->stats.clumps);
  394. /*
  395. * Look up as->arena to find index.
  396. */
  397. needmainindex(); /* OS X linker */
  398. ix = mainindex;
  399. for(i=0; i<ix->narenas; i++)
  400. if(ix->arenas[i] == as->arena)
  401. break;
  402. if(i==ix->narenas || as->aa < ix->amap[i].start || as->aa >= ix->amap[i].stop || as->arena != ix->arenas[i]){
  403. fprint(2, "funny settailstate 0x%llux\n", as->aa);
  404. return;
  405. }
  406. for(j=0; j<=i; j++){
  407. a = ix->arenas[j];
  408. if(atailcmp(&a->diskstats, &a->memstats) == 0)
  409. continue;
  410. qlock(&a->lock);
  411. osealed = a->diskstats.sealed;
  412. if(j == i)
  413. a->diskstats = as->stats;
  414. else
  415. a->diskstats = a->memstats;
  416. wbarena(a);
  417. if(a->diskstats.sealed != osealed && !a->inqueue)
  418. sealarena(a);
  419. qunlock(&a->lock);
  420. }
  421. }
  422. /*
  423. * once sealed, an arena never has any data added to it.
  424. * it should only be changed to fix errors.
  425. * this also syncs the clump directory.
  426. */
  427. static void
  428. sealarena(Arena *arena)
  429. {
  430. arena->inqueue = 1;
  431. backsumarena(arena);
  432. }
  433. void
  434. backsumarena(Arena *arena)
  435. {
  436. ASum *as;
  437. if(sumwait.l == nil)
  438. return;
  439. as = MK(ASum);
  440. if(as == nil)
  441. return;
  442. qlock(&sumlock);
  443. as->arena = arena;
  444. as->next = nil;
  445. if(sumq)
  446. sumqtail->next = as;
  447. else
  448. sumq = as;
  449. sumqtail = as;
  450. rwakeup(&sumwait);
  451. qunlock(&sumlock);
  452. }
  453. static void
  454. sumproc(void *unused)
  455. {
  456. ASum *as;
  457. Arena *arena;
  458. USED(unused);
  459. for(;;){
  460. qlock(&sumlock);
  461. while(sumq == nil)
  462. rsleep(&sumwait);
  463. as = sumq;
  464. sumq = as->next;
  465. qunlock(&sumlock);
  466. arena = as->arena;
  467. free(as);
  468. sumarena(arena);
  469. }
  470. }
  471. void
  472. sumarena(Arena *arena)
  473. {
  474. ZBlock *b;
  475. DigestState s;
  476. uint64_t a, e;
  477. uint32_t bs;
  478. int t;
  479. uint8_t score[VtScoreSize];
  480. bs = MaxIoSize;
  481. if(bs < arena->blocksize)
  482. bs = arena->blocksize;
  483. /*
  484. * read & sum all blocks except the last one
  485. */
  486. flushdcache();
  487. memset(&s, 0, sizeof s);
  488. b = alloczblock(bs, 0, arena->part->blocksize);
  489. e = arena->base + arena->size;
  490. for(a = arena->base - arena->blocksize; a + arena->blocksize <= e; a += bs){
  491. disksched();
  492. while((t=arenasumsleeptime) == SleepForever){
  493. sleep(1000);
  494. disksched();
  495. }
  496. sleep(t);
  497. if(a + bs > e)
  498. bs = arena->blocksize;
  499. if(readpart(arena->part, a, b->data, bs) < 0)
  500. goto ReadErr;
  501. addstat(StatSumRead, 1);
  502. addstat(StatSumReadBytes, bs);
  503. sha1(b->data, bs, nil, &s);
  504. }
  505. /*
  506. * the last one is special, since it may already have the checksum included
  507. */
  508. bs = arena->blocksize;
  509. if(readpart(arena->part, e, b->data, bs) < 0){
  510. ReadErr:
  511. logerr(EOk, "sumarena can't sum %s, read at %lld failed: %r", arena->name, a);
  512. freezblock(b);
  513. return;
  514. }
  515. addstat(StatSumRead, 1);
  516. addstat(StatSumReadBytes, bs);
  517. sha1(b->data, bs-VtScoreSize, nil, &s);
  518. sha1(zeroscore, VtScoreSize, nil, &s);
  519. sha1(nil, 0, score, &s);
  520. /*
  521. * check for no checksum or the same
  522. */
  523. if(scorecmp(score, &b->data[bs - VtScoreSize]) != 0
  524. && scorecmp(zeroscore, &b->data[bs - VtScoreSize]) != 0)
  525. logerr(EOk, "overwriting mismatched checksums for arena=%s, found=%V calculated=%V",
  526. arena->name, &b->data[bs - VtScoreSize], score);
  527. freezblock(b);
  528. qlock(&arena->lock);
  529. scorecp(arena->score, score);
  530. wbarena(arena);
  531. qunlock(&arena->lock);
  532. }
  533. /*
  534. * write the arena trailer block to the partition
  535. */
  536. int
  537. wbarena(Arena *arena)
  538. {
  539. DBlock *b;
  540. int bad;
  541. if((b = getdblock(arena->part, arena->base + arena->size, OWRITE)) == nil){
  542. logerr(EAdmin, "can't write arena trailer: %r");
  543. return -1;
  544. }
  545. dirtydblock(b, DirtyArenaTrailer);
  546. bad = okarena(arena)<0 || packarena(arena, b->data)<0;
  547. scorecp(b->data + arena->blocksize - VtScoreSize, arena->score);
  548. putdblock(b);
  549. if(bad)
  550. return -1;
  551. return 0;
  552. }
  553. int
  554. wbarenahead(Arena *arena)
  555. {
  556. ZBlock *b;
  557. ArenaHead head;
  558. int bad;
  559. namecp(head.name, arena->name);
  560. head.version = arena->version;
  561. head.size = arena->size + 2 * arena->blocksize;
  562. head.blocksize = arena->blocksize;
  563. head.clumpmagic = arena->clumpmagic;
  564. b = alloczblock(arena->blocksize, 1, arena->part->blocksize);
  565. if(b == nil){
  566. logerr(EAdmin, "can't write arena header: %r");
  567. /* ZZZ add error message? */
  568. return -1;
  569. }
  570. /*
  571. * this writepart is okay because it only happens
  572. * during initialization.
  573. */
  574. bad = packarenahead(&head, b->data)<0 ||
  575. writepart(arena->part, arena->base - arena->blocksize, b->data, arena->blocksize)<0 ||
  576. flushpart(arena->part)<0;
  577. freezblock(b);
  578. if(bad)
  579. return -1;
  580. return 0;
  581. }
  582. /*
  583. * read the arena header and trailer blocks from disk
  584. */
  585. static int
  586. loadarena(Arena *arena)
  587. {
  588. ArenaHead head;
  589. ZBlock *b;
  590. b = alloczblock(arena->blocksize, 0, arena->part->blocksize);
  591. if(b == nil)
  592. return -1;
  593. if(readpart(arena->part, arena->base + arena->size, b->data, arena->blocksize) < 0){
  594. freezblock(b);
  595. return -1;
  596. }
  597. if(unpackarena(arena, b->data) < 0){
  598. freezblock(b);
  599. return -1;
  600. }
  601. if(arena->version != ArenaVersion4 && arena->version != ArenaVersion5){
  602. seterr(EAdmin, "unknown arena version %d", arena->version);
  603. freezblock(b);
  604. return -1;
  605. }
  606. scorecp(arena->score, &b->data[arena->blocksize - VtScoreSize]);
  607. if(readpart(arena->part, arena->base - arena->blocksize, b->data, arena->blocksize) < 0){
  608. logerr(EAdmin, "can't read arena header: %r");
  609. freezblock(b);
  610. return 0;
  611. }
  612. if(unpackarenahead(&head, b->data) < 0)
  613. logerr(ECorrupt, "corrupted arena header: %r");
  614. else if(namecmp(arena->name, head.name)!=0
  615. || arena->clumpmagic != head.clumpmagic
  616. || arena->version != head.version
  617. || arena->blocksize != head.blocksize
  618. || arena->size + 2 * arena->blocksize != head.size){
  619. if(namecmp(arena->name, head.name)!=0)
  620. logerr(ECorrupt, "arena tail name %s head %s",
  621. arena->name, head.name);
  622. else if(arena->clumpmagic != head.clumpmagic)
  623. logerr(ECorrupt, "arena %d tail clumpmagic 0x%lx head 0x%lx",
  624. debugarena, (uint32_t)arena->clumpmagic,
  625. (uint32_t)head.clumpmagic);
  626. else if(arena->version != head.version)
  627. logerr(ECorrupt, "arena tail version %d head version %d",
  628. arena->version, head.version);
  629. else if(arena->blocksize != head.blocksize)
  630. logerr(ECorrupt, "arena tail block size %d head %d",
  631. arena->blocksize, head.blocksize);
  632. else if(arena->size+2*arena->blocksize != head.size)
  633. logerr(ECorrupt, "arena tail size %lu head %lu",
  634. (uint32_t)arena->size+2*arena->blocksize,
  635. head.size);
  636. else
  637. logerr(ECorrupt, "arena header inconsistent with arena data");
  638. }
  639. freezblock(b);
  640. return 0;
  641. }
  642. static int
  643. okarena(Arena *arena)
  644. {
  645. uint64_t dsize;
  646. int ok;
  647. ok = 0;
  648. dsize = arenadirsize(arena, arena->diskstats.clumps);
  649. if(arena->diskstats.used + dsize > arena->size){
  650. seterr(ECorrupt, "arena %s used > size", arena->name);
  651. ok = -1;
  652. }
  653. if(arena->diskstats.cclumps > arena->diskstats.clumps)
  654. logerr(ECorrupt, "arena %s has more compressed clumps than total clumps", arena->name);
  655. /*
  656. * This need not be true if some of the disk is corrupted.
  657. *
  658. if(arena->diskstats.uncsize + arena->diskstats.clumps * ClumpSize + arena->blocksize < arena->diskstats.used)
  659. logerr(ECorrupt, "arena %s uncompressed size inconsistent with used space %lld %d %lld", arena->name, arena->diskstats.uncsize, arena->diskstats.clumps, arena->diskstats.used);
  660. */
  661. /*
  662. * this happens; it's harmless.
  663. *
  664. if(arena->ctime > arena->wtime)
  665. logerr(ECorrupt, "arena %s creation time after last write time", arena->name);
  666. */
  667. return ok;
  668. }
  669. static CIBlock*
  670. getcib(Arena *arena, int clump, int writing, CIBlock *rock)
  671. {
  672. int mode;
  673. CIBlock *cib;
  674. uint32_t block, off;
  675. if(clump >= arena->memstats.clumps){
  676. seterr(EOk, "clump directory access out of range");
  677. return nil;
  678. }
  679. block = clump / arena->clumpmax;
  680. off = (clump - block * arena->clumpmax) * ClumpInfoSize;
  681. cib = rock;
  682. cib->block = block;
  683. cib->offset = off;
  684. if(writing){
  685. if(off == 0 && clump == arena->memstats.clumps-1)
  686. mode = OWRITE;
  687. else
  688. mode = ORDWR;
  689. }else
  690. mode = OREAD;
  691. cib->data = getdblock(arena->part,
  692. arena->base + arena->size - (block + 1) * arena->blocksize, mode);
  693. if(cib->data == nil)
  694. return nil;
  695. return cib;
  696. }
  697. static void
  698. putcib(Arena *arena, CIBlock *cib)
  699. {
  700. USED(arena);
  701. putdblock(cib->data);
  702. cib->data = nil;
  703. }
  704. /*
  705. * For index entry readahead purposes, the arenas are
  706. * broken into smaller subpieces, called clump info groups
  707. * or cigs. Each cig has ArenaCIGSize clumps (ArenaCIGSize
  708. * is chosen to make the index entries take up about half
  709. * a megabyte). The index entries do not contain enough
  710. * information to determine what the clump index is for
  711. * a given address in an arena. That info is needed both for
  712. * figuring out which clump group an address belongs to
  713. * and for prefetching a clump group's index entries from
  714. * the arena table of contents. The first time clump groups
  715. * are accessed, we scan the entire arena table of contents
  716. * (which might be 10s of megabytes), recording the data
  717. * offset of each clump group.
  718. */
  719. /*
  720. * load clump info group information by scanning entire toc.
  721. */
  722. static void
  723. loadcig(Arena *arena)
  724. {
  725. uint32_t i, j, ncig, nci;
  726. ArenaCIG *cig;
  727. ClumpInfo *ci;
  728. uint64_t offset;
  729. int ms;
  730. if(arena->cig || arena->ncig < 0)
  731. return;
  732. // fprint(2, "loadcig %s\n", arena->name);
  733. ncig = (arena->memstats.clumps+ArenaCIGSize-1) / ArenaCIGSize;
  734. if(ncig == 0){
  735. arena->cig = vtmalloc(1);
  736. arena->ncig = 0;
  737. return;
  738. }
  739. ms = msec();
  740. cig = vtmalloc(ncig*sizeof cig[0]);
  741. ci = vtmalloc(ArenaCIGSize*sizeof ci[0]);
  742. offset = 0;
  743. for(i=0; i<ncig; i++){
  744. nci = readclumpinfos(arena, i*ArenaCIGSize, ci, ArenaCIGSize);
  745. cig[i].offset = offset;
  746. for(j=0; j<nci; j++)
  747. offset += ClumpSize + ci[j].size;
  748. if(nci < ArenaCIGSize){
  749. if(i != ncig-1){
  750. vtfree(ci);
  751. vtfree(cig);
  752. arena->ncig = -1;
  753. fprint(2, "loadcig %s: got %u cigs, expected %u\n", arena->name, i+1, ncig);
  754. goto out;
  755. }
  756. }
  757. }
  758. vtfree(ci);
  759. arena->ncig = ncig;
  760. arena->cig = cig;
  761. out:
  762. ms = msec() - ms;
  763. addstat2(StatCigLoad, 1, StatCigLoadTime, ms);
  764. }
  765. /*
  766. * convert arena address into arena group + data boundaries.
  767. */
  768. int
  769. arenatog(Arena *arena, uint64_t addr, uint64_t *gstart, uint64_t *glimit,
  770. int *g)
  771. {
  772. int r, l, m;
  773. qlock(&arena->lock);
  774. if(arena->cig == nil)
  775. loadcig(arena);
  776. if(arena->cig == nil || arena->ncig == 0){
  777. qunlock(&arena->lock);
  778. return -1;
  779. }
  780. l = 1;
  781. r = arena->ncig - 1;
  782. while(l <= r){
  783. m = (r + l) / 2;
  784. if(arena->cig[m].offset <= addr)
  785. l = m + 1;
  786. else
  787. r = m - 1;
  788. }
  789. l--;
  790. *g = l;
  791. *gstart = arena->cig[l].offset;
  792. if(l+1 < arena->ncig)
  793. *glimit = arena->cig[l+1].offset;
  794. else
  795. *glimit = arena->memstats.used;
  796. qunlock(&arena->lock);
  797. return 0;
  798. }
  799. /*
  800. * load the clump info for group g into the index entries.
  801. */
  802. int
  803. asumload(Arena *arena, int g, IEntry *entries, int nentries)
  804. {
  805. int i, base, limit;
  806. uint64_t addr;
  807. ClumpInfo ci;
  808. IEntry *ie;
  809. if(nentries < ArenaCIGSize){
  810. fprint(2, "asking for too few entries\n");
  811. return -1;
  812. }
  813. qlock(&arena->lock);
  814. if(arena->cig == nil)
  815. loadcig(arena);
  816. if(arena->cig == nil || arena->ncig == 0 || g >= arena->ncig){
  817. qunlock(&arena->lock);
  818. return -1;
  819. }
  820. addr = 0;
  821. base = g*ArenaCIGSize;
  822. limit = base + ArenaCIGSize;
  823. if(base > arena->memstats.clumps)
  824. base = arena->memstats.clumps;
  825. ie = entries;
  826. for(i=base; i<limit; i++){
  827. if(readclumpinfo(arena, i, &ci) < 0)
  828. break;
  829. if(ci.type != VtCorruptType){
  830. scorecp(ie->score, ci.score);
  831. ie->ia.type = ci.type;
  832. ie->ia.size = ci.uncsize;
  833. ie->ia.blocks = (ci.size + ClumpSize + (1<<ABlockLog) - 1) >> ABlockLog;
  834. ie->ia.addr = addr;
  835. ie++;
  836. }
  837. addr += ClumpSize + ci.size;
  838. }
  839. qunlock(&arena->lock);
  840. return ie - entries;
  841. }