arena.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926
  1. #include "stdinc.h"
  2. #include "dat.h"
  3. #include "fns.h"
  4. typedef struct ASum ASum;
  5. struct ASum
  6. {
  7. Arena *arena;
  8. ASum *next;
  9. };
  10. static void sealarena(Arena *arena);
  11. static int okarena(Arena *arena);
  12. static int loadarena(Arena *arena);
  13. static CIBlock *getcib(Arena *arena, int clump, int writing, CIBlock *rock);
  14. static void putcib(Arena *arena, CIBlock *cib);
  15. static void sumproc(void *);
  16. static void loadcig(Arena *arena);
  17. static QLock sumlock;
  18. static Rendez sumwait;
  19. static ASum *sumq;
  20. static ASum *sumqtail;
  21. static uchar zero[8192];
  22. int arenasumsleeptime;
  23. int
  24. initarenasum(void)
  25. {
  26. needzeroscore(); /* OS X */
  27. sumwait.l = &sumlock;
  28. if(vtproc(sumproc, nil) < 0){
  29. seterr(EOk, "can't start arena checksum slave: %r");
  30. return -1;
  31. }
  32. return 0;
  33. }
  34. /*
  35. * make an Arena, and initialize it based upon the disk header and trailer.
  36. */
  37. Arena*
  38. initarena(Part *part, u64int base, u64int size, u32int blocksize)
  39. {
  40. Arena *arena;
  41. arena = MKZ(Arena);
  42. arena->part = part;
  43. arena->blocksize = blocksize;
  44. arena->clumpmax = arena->blocksize / ClumpInfoSize;
  45. arena->base = base + blocksize;
  46. arena->size = size - 2 * blocksize;
  47. if(loadarena(arena) < 0){
  48. seterr(ECorrupt, "arena header or trailer corrupted");
  49. freearena(arena);
  50. return nil;
  51. }
  52. if(okarena(arena) < 0){
  53. freearena(arena);
  54. return nil;
  55. }
  56. if(arena->diskstats.sealed && scorecmp(zeroscore, arena->score)==0)
  57. sealarena(arena);
  58. return arena;
  59. }
  60. void
  61. freearena(Arena *arena)
  62. {
  63. if(arena == nil)
  64. return;
  65. free(arena);
  66. }
  67. Arena*
  68. newarena(Part *part, u32int vers, char *name, u64int base, u64int size, u32int blocksize)
  69. {
  70. int bsize;
  71. Arena *arena;
  72. if(nameok(name) < 0){
  73. seterr(EOk, "illegal arena name", name);
  74. return nil;
  75. }
  76. arena = MKZ(Arena);
  77. arena->part = part;
  78. arena->version = vers;
  79. if(vers == ArenaVersion4)
  80. arena->clumpmagic = _ClumpMagic;
  81. else{
  82. do
  83. arena->clumpmagic = fastrand();
  84. while(arena->clumpmagic==_ClumpMagic || arena->clumpmagic==0);
  85. }
  86. arena->blocksize = blocksize;
  87. arena->clumpmax = arena->blocksize / ClumpInfoSize;
  88. arena->base = base + blocksize;
  89. arena->size = size - 2 * blocksize;
  90. namecp(arena->name, name);
  91. bsize = sizeof zero;
  92. if(bsize > arena->blocksize)
  93. bsize = arena->blocksize;
  94. if(wbarena(arena)<0 || wbarenahead(arena)<0
  95. || writepart(arena->part, arena->base, zero, bsize)<0){
  96. freearena(arena);
  97. return nil;
  98. }
  99. return arena;
  100. }
  101. int
  102. readclumpinfo(Arena *arena, int clump, ClumpInfo *ci)
  103. {
  104. CIBlock *cib, r;
  105. cib = getcib(arena, clump, 0, &r);
  106. if(cib == nil)
  107. return -1;
  108. unpackclumpinfo(ci, &cib->data->data[cib->offset]);
  109. putcib(arena, cib);
  110. return 0;
  111. }
  112. int
  113. readclumpinfos(Arena *arena, int clump, ClumpInfo *cis, int n)
  114. {
  115. CIBlock *cib, r;
  116. int i;
  117. /*
  118. * because the clump blocks are laid out
  119. * in reverse order at the end of the arena,
  120. * it can be a few percent faster to read
  121. * the clumps backwards, which reads the
  122. * disk blocks forwards.
  123. */
  124. for(i = n-1; i >= 0; i--){
  125. cib = getcib(arena, clump + i, 0, &r);
  126. if(cib == nil){
  127. n = i;
  128. continue;
  129. }
  130. unpackclumpinfo(&cis[i], &cib->data->data[cib->offset]);
  131. putcib(arena, cib);
  132. }
  133. return n;
  134. }
  135. /*
  136. * write directory information for one clump
  137. * must be called the arena locked
  138. */
  139. int
  140. writeclumpinfo(Arena *arena, int clump, ClumpInfo *ci)
  141. {
  142. CIBlock *cib, r;
  143. cib = getcib(arena, clump, 1, &r);
  144. if(cib == nil)
  145. return -1;
  146. dirtydblock(cib->data, DirtyArenaCib);
  147. packclumpinfo(ci, &cib->data->data[cib->offset]);
  148. putcib(arena, cib);
  149. return 0;
  150. }
  151. u64int
  152. arenadirsize(Arena *arena, u32int clumps)
  153. {
  154. return ((clumps / arena->clumpmax) + 1) * arena->blocksize;
  155. }
  156. /*
  157. * read a clump of data
  158. * n is a hint of the size of the data, not including the header
  159. * make sure it won't run off the end, then return the number of bytes actually read
  160. */
  161. u32int
  162. readarena(Arena *arena, u64int aa, u8int *buf, long n)
  163. {
  164. DBlock *b;
  165. u64int a;
  166. u32int blocksize, off, m;
  167. long nn;
  168. if(n == 0)
  169. return -1;
  170. qlock(&arena->lock);
  171. a = arena->size - arenadirsize(arena, arena->memstats.clumps);
  172. qunlock(&arena->lock);
  173. if(aa >= a){
  174. seterr(EOk, "reading beyond arena clump storage: clumps=%d aa=%lld a=%lld -1 clumps=%lld\n", arena->memstats.clumps, aa, a, arena->size - arenadirsize(arena, arena->memstats.clumps - 1));
  175. return -1;
  176. }
  177. if(aa + n > a)
  178. n = a - aa;
  179. blocksize = arena->blocksize;
  180. a = arena->base + aa;
  181. off = a & (blocksize - 1);
  182. a -= off;
  183. nn = 0;
  184. for(;;){
  185. b = getdblock(arena->part, a, OREAD);
  186. if(b == nil)
  187. return -1;
  188. m = blocksize - off;
  189. if(m > n - nn)
  190. m = n - nn;
  191. memmove(&buf[nn], &b->data[off], m);
  192. putdblock(b);
  193. nn += m;
  194. if(nn == n)
  195. break;
  196. off = 0;
  197. a += blocksize;
  198. }
  199. return n;
  200. }
  201. /*
  202. * write some data to the clump section at a given offset
  203. * used to fix up corrupted arenas.
  204. */
  205. u32int
  206. writearena(Arena *arena, u64int aa, u8int *clbuf, u32int n)
  207. {
  208. DBlock *b;
  209. u64int a;
  210. u32int blocksize, off, m;
  211. long nn;
  212. int ok;
  213. if(n == 0)
  214. return -1;
  215. qlock(&arena->lock);
  216. a = arena->size - arenadirsize(arena, arena->memstats.clumps);
  217. if(aa >= a || aa + n > a){
  218. qunlock(&arena->lock);
  219. seterr(EOk, "writing beyond arena clump storage");
  220. return -1;
  221. }
  222. blocksize = arena->blocksize;
  223. a = arena->base + aa;
  224. off = a & (blocksize - 1);
  225. a -= off;
  226. nn = 0;
  227. for(;;){
  228. b = getdblock(arena->part, a, off != 0 || off + n < blocksize ? ORDWR : OWRITE);
  229. if(b == nil){
  230. qunlock(&arena->lock);
  231. return -1;
  232. }
  233. dirtydblock(b, DirtyArena);
  234. m = blocksize - off;
  235. if(m > n - nn)
  236. m = n - nn;
  237. memmove(&b->data[off], &clbuf[nn], m);
  238. ok = 0;
  239. putdblock(b);
  240. if(ok < 0){
  241. qunlock(&arena->lock);
  242. return -1;
  243. }
  244. nn += m;
  245. if(nn == n)
  246. break;
  247. off = 0;
  248. a += blocksize;
  249. }
  250. qunlock(&arena->lock);
  251. return n;
  252. }
  253. /*
  254. * allocate space for the clump and write it,
  255. * updating the arena directory
  256. ZZZ question: should this distinguish between an arena
  257. filling up and real errors writing the clump?
  258. */
  259. u64int
  260. writeaclump(Arena *arena, Clump *c, u8int *clbuf)
  261. {
  262. DBlock *b;
  263. u64int a, aa;
  264. u32int clump, n, nn, m, off, blocksize;
  265. int ok;
  266. n = c->info.size + ClumpSize + U32Size;
  267. qlock(&arena->lock);
  268. aa = arena->memstats.used;
  269. if(arena->memstats.sealed
  270. || aa + n + U32Size + arenadirsize(arena, arena->memstats.clumps + 1) > arena->size){
  271. if(!arena->memstats.sealed){
  272. logerr(EOk, "seal memstats %s", arena->name);
  273. arena->memstats.sealed = 1;
  274. }
  275. qunlock(&arena->lock);
  276. return TWID64;
  277. }
  278. if(packclump(c, &clbuf[0], arena->clumpmagic) < 0){
  279. qunlock(&arena->lock);
  280. return TWID64;
  281. }
  282. /*
  283. * write the data out one block at a time
  284. */
  285. blocksize = arena->blocksize;
  286. a = arena->base + aa;
  287. off = a & (blocksize - 1);
  288. a -= off;
  289. nn = 0;
  290. for(;;){
  291. b = getdblock(arena->part, a, off != 0 ? ORDWR : OWRITE);
  292. if(b == nil){
  293. qunlock(&arena->lock);
  294. return TWID64;
  295. }
  296. dirtydblock(b, DirtyArena);
  297. m = blocksize - off;
  298. if(m > n - nn)
  299. m = n - nn;
  300. memmove(&b->data[off], &clbuf[nn], m);
  301. ok = 0;
  302. putdblock(b);
  303. if(ok < 0){
  304. qunlock(&arena->lock);
  305. return TWID64;
  306. }
  307. nn += m;
  308. if(nn == n)
  309. break;
  310. off = 0;
  311. a += blocksize;
  312. }
  313. arena->memstats.used += c->info.size + ClumpSize;
  314. arena->memstats.uncsize += c->info.uncsize;
  315. if(c->info.size < c->info.uncsize)
  316. arena->memstats.cclumps++;
  317. clump = arena->memstats.clumps;
  318. if(clump % ArenaCIGSize == 0){
  319. if(arena->cig == nil){
  320. loadcig(arena);
  321. if(arena->cig == nil)
  322. goto NoCIG;
  323. }
  324. /* add aa as start of next cig */
  325. if(clump/ArenaCIGSize != arena->ncig){
  326. fprint(2, "bad arena cig computation %s: writing clump %d but %d cigs\n",
  327. arena->name, clump, arena->ncig);
  328. arena->ncig = -1;
  329. vtfree(arena->cig);
  330. arena->cig = nil;
  331. goto NoCIG;
  332. }
  333. arena->cig = vtrealloc(arena->cig, (arena->ncig+1)*sizeof arena->cig[0]);
  334. arena->cig[arena->ncig++].offset = aa;
  335. }
  336. NoCIG:
  337. arena->memstats.clumps++;
  338. if(arena->memstats.clumps == 0)
  339. sysfatal("clumps wrapped");
  340. arena->wtime = now();
  341. if(arena->ctime == 0)
  342. arena->ctime = arena->wtime;
  343. writeclumpinfo(arena, clump, &c->info);
  344. wbarena(arena);
  345. qunlock(&arena->lock);
  346. return aa;
  347. }
  348. int
  349. atailcmp(ATailStats *a, ATailStats *b)
  350. {
  351. /* good test */
  352. if(a->used < b->used)
  353. return -1;
  354. if(a->used > b->used)
  355. return 1;
  356. /* suspect tests - why order this way? (no one cares) */
  357. if(a->clumps < b->clumps)
  358. return -1;
  359. if(a->clumps > b->clumps)
  360. return 1;
  361. if(a->cclumps < b->cclumps)
  362. return -1;
  363. if(a->cclumps > b->cclumps)
  364. return 1;
  365. if(a->uncsize < b->uncsize)
  366. return -1;
  367. if(a->uncsize > b->uncsize)
  368. return 1;
  369. if(a->sealed < b->sealed)
  370. return -1;
  371. if(a->sealed > b->sealed)
  372. return 1;
  373. /* everything matches */
  374. return 0;
  375. }
  376. void
  377. setatailstate(AState *as)
  378. {
  379. int i, j, osealed;
  380. Arena *a;
  381. Index *ix;
  382. trace(0, "setatailstate %s 0x%llux clumps %d", as->arena->name, as->aa, as->stats.clumps);
  383. /*
  384. * Look up as->arena to find index.
  385. */
  386. needmainindex(); /* OS X linker */
  387. ix = mainindex;
  388. for(i=0; i<ix->narenas; i++)
  389. if(ix->arenas[i] == as->arena)
  390. break;
  391. if(i==ix->narenas || as->aa < ix->amap[i].start || as->aa >= ix->amap[i].stop || as->arena != ix->arenas[i]){
  392. fprint(2, "funny settailstate 0x%llux\n", as->aa);
  393. return;
  394. }
  395. for(j=0; j<=i; j++){
  396. a = ix->arenas[j];
  397. if(atailcmp(&a->diskstats, &a->memstats) == 0)
  398. continue;
  399. qlock(&a->lock);
  400. osealed = a->diskstats.sealed;
  401. if(j == i)
  402. a->diskstats = as->stats;
  403. else
  404. a->diskstats = a->memstats;
  405. wbarena(a);
  406. if(a->diskstats.sealed != osealed && !a->inqueue)
  407. sealarena(a);
  408. qunlock(&a->lock);
  409. }
  410. }
  411. /*
  412. * once sealed, an arena never has any data added to it.
  413. * it should only be changed to fix errors.
  414. * this also syncs the clump directory.
  415. */
  416. static void
  417. sealarena(Arena *arena)
  418. {
  419. arena->inqueue = 1;
  420. backsumarena(arena);
  421. }
  422. void
  423. backsumarena(Arena *arena)
  424. {
  425. ASum *as;
  426. if(sumwait.l == nil)
  427. return;
  428. as = MK(ASum);
  429. if(as == nil)
  430. return;
  431. qlock(&sumlock);
  432. as->arena = arena;
  433. as->next = nil;
  434. if(sumq)
  435. sumqtail->next = as;
  436. else
  437. sumq = as;
  438. sumqtail = as;
  439. rwakeup(&sumwait);
  440. qunlock(&sumlock);
  441. }
  442. static void
  443. sumproc(void *unused)
  444. {
  445. ASum *as;
  446. Arena *arena;
  447. USED(unused);
  448. for(;;){
  449. qlock(&sumlock);
  450. while(sumq == nil)
  451. rsleep(&sumwait);
  452. as = sumq;
  453. sumq = as->next;
  454. qunlock(&sumlock);
  455. arena = as->arena;
  456. free(as);
  457. sumarena(arena);
  458. }
  459. }
  460. void
  461. sumarena(Arena *arena)
  462. {
  463. ZBlock *b;
  464. DigestState s;
  465. u64int a, e;
  466. u32int bs;
  467. int t;
  468. u8int score[VtScoreSize];
  469. bs = MaxIoSize;
  470. if(bs < arena->blocksize)
  471. bs = arena->blocksize;
  472. /*
  473. * read & sum all blocks except the last one
  474. */
  475. flushdcache();
  476. memset(&s, 0, sizeof s);
  477. b = alloczblock(bs, 0, arena->part->blocksize);
  478. e = arena->base + arena->size;
  479. for(a = arena->base - arena->blocksize; a + arena->blocksize <= e; a += bs){
  480. disksched();
  481. while((t=arenasumsleeptime) == SleepForever){
  482. sleep(1000);
  483. disksched();
  484. }
  485. sleep(t);
  486. if(a + bs > e)
  487. bs = arena->blocksize;
  488. if(readpart(arena->part, a, b->data, bs) < 0)
  489. goto ReadErr;
  490. addstat(StatSumRead, 1);
  491. addstat(StatSumReadBytes, bs);
  492. sha1(b->data, bs, nil, &s);
  493. }
  494. /*
  495. * the last one is special, since it may already have the checksum included
  496. */
  497. bs = arena->blocksize;
  498. if(readpart(arena->part, e, b->data, bs) < 0){
  499. ReadErr:
  500. logerr(EOk, "sumarena can't sum %s, read at %lld failed: %r", arena->name, a);
  501. freezblock(b);
  502. return;
  503. }
  504. addstat(StatSumRead, 1);
  505. addstat(StatSumReadBytes, bs);
  506. sha1(b->data, bs-VtScoreSize, nil, &s);
  507. sha1(zeroscore, VtScoreSize, nil, &s);
  508. sha1(nil, 0, score, &s);
  509. /*
  510. * check for no checksum or the same
  511. */
  512. if(scorecmp(score, &b->data[bs - VtScoreSize]) != 0
  513. && scorecmp(zeroscore, &b->data[bs - VtScoreSize]) != 0)
  514. logerr(EOk, "overwriting mismatched checksums for arena=%s, found=%V calculated=%V",
  515. arena->name, &b->data[bs - VtScoreSize], score);
  516. freezblock(b);
  517. qlock(&arena->lock);
  518. scorecp(arena->score, score);
  519. wbarena(arena);
  520. qunlock(&arena->lock);
  521. }
  522. /*
  523. * write the arena trailer block to the partition
  524. */
  525. int
  526. wbarena(Arena *arena)
  527. {
  528. DBlock *b;
  529. int bad;
  530. if((b = getdblock(arena->part, arena->base + arena->size, OWRITE)) == nil){
  531. logerr(EAdmin, "can't write arena trailer: %r");
  532. return -1;
  533. }
  534. dirtydblock(b, DirtyArenaTrailer);
  535. bad = okarena(arena)<0 || packarena(arena, b->data)<0;
  536. scorecp(b->data + arena->blocksize - VtScoreSize, arena->score);
  537. putdblock(b);
  538. if(bad)
  539. return -1;
  540. return 0;
  541. }
  542. int
  543. wbarenahead(Arena *arena)
  544. {
  545. ZBlock *b;
  546. ArenaHead head;
  547. int bad;
  548. namecp(head.name, arena->name);
  549. head.version = arena->version;
  550. head.size = arena->size + 2 * arena->blocksize;
  551. head.blocksize = arena->blocksize;
  552. head.clumpmagic = arena->clumpmagic;
  553. b = alloczblock(arena->blocksize, 1, arena->part->blocksize);
  554. if(b == nil){
  555. logerr(EAdmin, "can't write arena header: %r");
  556. /* ZZZ add error message? */
  557. return -1;
  558. }
  559. /*
  560. * this writepart is okay because it only happens
  561. * during initialization.
  562. */
  563. bad = packarenahead(&head, b->data)<0 ||
  564. writepart(arena->part, arena->base - arena->blocksize, b->data, arena->blocksize)<0 ||
  565. flushpart(arena->part)<0;
  566. freezblock(b);
  567. if(bad)
  568. return -1;
  569. return 0;
  570. }
  571. /*
  572. * read the arena header and trailer blocks from disk
  573. */
  574. static int
  575. loadarena(Arena *arena)
  576. {
  577. ArenaHead head;
  578. ZBlock *b;
  579. b = alloczblock(arena->blocksize, 0, arena->part->blocksize);
  580. if(b == nil)
  581. return -1;
  582. if(readpart(arena->part, arena->base + arena->size, b->data, arena->blocksize) < 0){
  583. freezblock(b);
  584. return -1;
  585. }
  586. if(unpackarena(arena, b->data) < 0){
  587. freezblock(b);
  588. return -1;
  589. }
  590. if(arena->version != ArenaVersion4 && arena->version != ArenaVersion5){
  591. seterr(EAdmin, "unknown arena version %d", arena->version);
  592. freezblock(b);
  593. return -1;
  594. }
  595. scorecp(arena->score, &b->data[arena->blocksize - VtScoreSize]);
  596. if(readpart(arena->part, arena->base - arena->blocksize, b->data, arena->blocksize) < 0){
  597. logerr(EAdmin, "can't read arena header: %r");
  598. freezblock(b);
  599. return 0;
  600. }
  601. if(unpackarenahead(&head, b->data) < 0)
  602. logerr(ECorrupt, "corrupted arena header: %r");
  603. else if(namecmp(arena->name, head.name)!=0
  604. || arena->clumpmagic != head.clumpmagic
  605. || arena->version != head.version
  606. || arena->blocksize != head.blocksize
  607. || arena->size + 2 * arena->blocksize != head.size){
  608. if(namecmp(arena->name, head.name)!=0)
  609. logerr(ECorrupt, "arena tail name %s head %s",
  610. arena->name, head.name);
  611. else if(arena->clumpmagic != head.clumpmagic)
  612. logerr(ECorrupt, "arena tail clumpmagic 0x%lux head 0x%lux",
  613. (ulong)arena->clumpmagic, (ulong)head.clumpmagic);
  614. else if(arena->version != head.version)
  615. logerr(ECorrupt, "arena tail version %d head version %d",
  616. arena->version, head.version);
  617. else if(arena->blocksize != head.blocksize)
  618. logerr(ECorrupt, "arena tail block size %d head %d",
  619. arena->blocksize, head.blocksize);
  620. else if(arena->size+2*arena->blocksize != head.size)
  621. logerr(ECorrupt, "arena tail size %lud head %lud",
  622. (ulong)arena->size+2*arena->blocksize, head.size);
  623. else
  624. logerr(ECorrupt, "arena header inconsistent with arena data");
  625. }
  626. freezblock(b);
  627. return 0;
  628. }
  629. static int
  630. okarena(Arena *arena)
  631. {
  632. u64int dsize;
  633. int ok;
  634. ok = 0;
  635. dsize = arenadirsize(arena, arena->diskstats.clumps);
  636. if(arena->diskstats.used + dsize > arena->size){
  637. seterr(ECorrupt, "arena %s used > size", arena->name);
  638. ok = -1;
  639. }
  640. if(arena->diskstats.cclumps > arena->diskstats.clumps)
  641. logerr(ECorrupt, "arena %s has more compressed clumps than total clumps", arena->name);
  642. /*
  643. * This need not be true if some of the disk is corrupted.
  644. *
  645. if(arena->diskstats.uncsize + arena->diskstats.clumps * ClumpSize + arena->blocksize < arena->diskstats.used)
  646. logerr(ECorrupt, "arena %s uncompressed size inconsistent with used space %lld %d %lld", arena->name, arena->diskstats.uncsize, arena->diskstats.clumps, arena->diskstats.used);
  647. */
  648. if(arena->ctime > arena->wtime)
  649. logerr(ECorrupt, "arena %s creation time after last write time", arena->name);
  650. return ok;
  651. }
  652. static CIBlock*
  653. getcib(Arena *arena, int clump, int writing, CIBlock *rock)
  654. {
  655. int mode;
  656. CIBlock *cib;
  657. u32int block, off;
  658. if(clump >= arena->memstats.clumps){
  659. seterr(EOk, "clump directory access out of range");
  660. return nil;
  661. }
  662. block = clump / arena->clumpmax;
  663. off = (clump - block * arena->clumpmax) * ClumpInfoSize;
  664. cib = rock;
  665. cib->block = block;
  666. cib->offset = off;
  667. if(writing){
  668. if(off == 0 && clump == arena->memstats.clumps-1)
  669. mode = OWRITE;
  670. else
  671. mode = ORDWR;
  672. }else
  673. mode = OREAD;
  674. cib->data = getdblock(arena->part,
  675. arena->base + arena->size - (block + 1) * arena->blocksize, mode);
  676. if(cib->data == nil)
  677. return nil;
  678. return cib;
  679. }
  680. static void
  681. putcib(Arena *arena, CIBlock *cib)
  682. {
  683. USED(arena);
  684. putdblock(cib->data);
  685. cib->data = nil;
  686. }
  687. /*
  688. * For index entry readahead purposes, the arenas are
  689. * broken into smaller subpieces, called clump info groups
  690. * or cigs. Each cig has ArenaCIGSize clumps (ArenaCIGSize
  691. * is chosen to make the index entries take up about half
  692. * a megabyte). The index entries do not contain enough
  693. * information to determine what the clump index is for
  694. * a given address in an arena. That info is needed both for
  695. * figuring out which clump group an address belongs to
  696. * and for prefetching a clump group's index entries from
  697. * the arena table of contents. The first time clump groups
  698. * are accessed, we scan the entire arena table of contents
  699. * (which might be 10s of megabytes), recording the data
  700. * offset of each clump group.
  701. */
  702. /*
  703. * load clump info group information by scanning entire toc.
  704. */
  705. static void
  706. loadcig(Arena *arena)
  707. {
  708. u32int i, j, ncig, nci;
  709. ArenaCIG *cig;
  710. ClumpInfo *ci;
  711. u64int offset;
  712. int ms;
  713. if(arena->cig || arena->ncig < 0)
  714. return;
  715. // fprint(2, "loadcig %s\n", arena->name);
  716. ncig = (arena->memstats.clumps+ArenaCIGSize-1) / ArenaCIGSize;
  717. if(ncig == 0){
  718. arena->cig = vtmalloc(1);
  719. arena->ncig = 0;
  720. return;
  721. }
  722. ms = msec();
  723. cig = vtmalloc(ncig*sizeof cig[0]);
  724. ci = vtmalloc(ArenaCIGSize*sizeof ci[0]);
  725. offset = 0;
  726. for(i=0; i<ncig; i++){
  727. nci = readclumpinfos(arena, i*ArenaCIGSize, ci, ArenaCIGSize);
  728. cig[i].offset = offset;
  729. for(j=0; j<nci; j++)
  730. offset += ClumpSize + ci[j].size;
  731. if(nci < ArenaCIGSize){
  732. if(i != ncig-1){
  733. vtfree(ci);
  734. vtfree(cig);
  735. arena->ncig = -1;
  736. fprint(2, "loadcig %s: got %ud cigs, expected %ud\n", arena->name, i+1, ncig);
  737. goto out;
  738. }
  739. }
  740. }
  741. vtfree(ci);
  742. arena->ncig = ncig;
  743. arena->cig = cig;
  744. out:
  745. ms = msec() - ms;
  746. addstat2(StatCigLoad, 1, StatCigLoadTime, ms);
  747. }
  748. /*
  749. * convert arena address into arena group + data boundaries.
  750. */
  751. int
  752. arenatog(Arena *arena, u64int addr, u64int *gstart, u64int *glimit, int *g)
  753. {
  754. int r, l, m;
  755. qlock(&arena->lock);
  756. if(arena->cig == nil)
  757. loadcig(arena);
  758. if(arena->cig == nil || arena->ncig == 0){
  759. qunlock(&arena->lock);
  760. return -1;
  761. }
  762. l = 1;
  763. r = arena->ncig - 1;
  764. while(l <= r){
  765. m = (r + l) / 2;
  766. if(arena->cig[m].offset <= addr)
  767. l = m + 1;
  768. else
  769. r = m - 1;
  770. }
  771. l--;
  772. *g = l;
  773. *gstart = arena->cig[l].offset;
  774. if(l+1 < arena->ncig)
  775. *glimit = arena->cig[l+1].offset;
  776. else
  777. *glimit = arena->memstats.used;
  778. qunlock(&arena->lock);
  779. return 0;
  780. }
  781. /*
  782. * load the clump info for group g into the index entries.
  783. */
  784. int
  785. asumload(Arena *arena, int g, IEntry *entries, int nentries)
  786. {
  787. int i, base, limit;
  788. u64int addr;
  789. ClumpInfo ci;
  790. IEntry *ie;
  791. if(nentries < ArenaCIGSize){
  792. fprint(2, "asking for too few entries\n");
  793. return -1;
  794. }
  795. qlock(&arena->lock);
  796. if(arena->cig == nil)
  797. loadcig(arena);
  798. if(arena->cig == nil || arena->ncig == 0 || g >= arena->ncig){
  799. qunlock(&arena->lock);
  800. return -1;
  801. }
  802. addr = 0;
  803. base = g*ArenaCIGSize;
  804. limit = base + ArenaCIGSize;
  805. if(base > arena->memstats.clumps)
  806. base = arena->memstats.clumps;
  807. ie = entries;
  808. for(i=base; i<limit; i++){
  809. if(readclumpinfo(arena, i, &ci) < 0)
  810. break;
  811. if(ci.type != VtCorruptType){
  812. scorecp(ie->score, ci.score);
  813. ie->ia.type = ci.type;
  814. ie->ia.size = ci.uncsize;
  815. ie->ia.blocks = (ci.size + ClumpSize + (1<<ABlockLog) - 1) >> ABlockLog;
  816. ie->ia.addr = addr;
  817. ie++;
  818. }
  819. addr += ClumpSize + ci.size;
  820. }
  821. qunlock(&arena->lock);
  822. return ie - entries;
  823. }