arena.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650
  1. #include "stdinc.h"
  2. #include "dat.h"
  3. #include "fns.h"
  4. typedef struct ASum ASum;
  5. struct ASum
  6. {
  7. Arena *arena;
  8. ASum *next;
  9. };
  10. static void sealArena(Arena *arena);
  11. static int okArena(Arena *arena);
  12. static int loadArena(Arena *arena);
  13. static CIBlock *getCIB(Arena *arena, int clump, int writing, CIBlock *rock);
  14. static void putCIB(Arena *arena, CIBlock *cib);
  15. static int flushCIEntry(Arena *arena, int i);
  16. static void doASum(void *);
  17. static VtLock *sumLock;
  18. static VtRendez *sumWait;
  19. static ASum *sumq;
  20. int
  21. initArenaSum(void)
  22. {
  23. sumLock = vtLockAlloc();
  24. sumWait = vtRendezAlloc(sumLock);
  25. if(vtThread(doASum, nil) < 0){
  26. setErr(EOk, "can't start arena checksum slave: %R");
  27. return 0;
  28. }
  29. return 1;
  30. }
  31. /*
  32. * make an Arena, and initialize it based upon the disk header and trailer.
  33. */
  34. Arena*
  35. initArena(Part *part, u64int base, u64int size, u32int blockSize)
  36. {
  37. Arena *arena;
  38. arena = MKZ(Arena);
  39. arena->part = part;
  40. arena->blockSize = blockSize;
  41. arena->clumpMax = arena->blockSize / ClumpInfoSize;
  42. arena->base = base + blockSize;
  43. arena->size = size - 2 * blockSize;
  44. arena->lock = vtLockAlloc();
  45. if(!loadArena(arena)){
  46. setErr(ECorrupt, "arena header or trailer corrupted");
  47. freeArena(arena);
  48. return nil;
  49. }
  50. if(!okArena(arena)){
  51. freeArena(arena);
  52. return nil;
  53. }
  54. if(arena->sealed && scoreEq(zeroScore, arena->score))
  55. backSumArena(arena);
  56. return arena;
  57. }
  58. void
  59. freeArena(Arena *arena)
  60. {
  61. if(arena == nil)
  62. return;
  63. if(arena->cib.data != nil){
  64. putDBlock(arena->cib.data);
  65. arena->cib.data = nil;
  66. }
  67. vtLockFree(arena->lock);
  68. free(arena);
  69. }
  70. Arena*
  71. newArena(Part *part, char *name, u64int base, u64int size, u32int blockSize)
  72. {
  73. Arena *arena;
  74. if(!nameOk(name)){
  75. setErr(EOk, "illegal arena name", name);
  76. return nil;
  77. }
  78. arena = MKZ(Arena);
  79. arena->part = part;
  80. arena->version = ArenaVersion;
  81. arena->blockSize = blockSize;
  82. arena->clumpMax = arena->blockSize / ClumpInfoSize;
  83. arena->base = base + blockSize;
  84. arena->size = size - 2 * blockSize;
  85. arena->lock = vtLockAlloc();
  86. nameCp(arena->name, name);
  87. if(!wbArena(arena) || !wbArenaHead(arena)){
  88. freeArena(arena);
  89. return nil;
  90. }
  91. return arena;
  92. }
  93. int
  94. readClumpInfo(Arena *arena, int clump, ClumpInfo *ci)
  95. {
  96. CIBlock *cib, r;
  97. cib = getCIB(arena, clump, 0, &r);
  98. if(cib == nil)
  99. return 0;
  100. unpackClumpInfo(ci, &cib->data->data[cib->offset]);
  101. putCIB(arena, cib);
  102. return 1;
  103. }
  104. int
  105. readClumpInfos(Arena *arena, int clump, ClumpInfo *cis, int n)
  106. {
  107. CIBlock *cib, r;
  108. int i;
  109. for(i = 0; i < n; i++){
  110. cib = getCIB(arena, clump + i, 0, &r);
  111. if(cib == nil)
  112. break;
  113. unpackClumpInfo(&cis[i], &cib->data->data[cib->offset]);
  114. putCIB(arena, cib);
  115. }
  116. return i;
  117. }
  118. /*
  119. * write directory information for one clump
  120. * must be called the arena locked
  121. */
  122. int
  123. writeClumpInfo(Arena *arena, int clump, ClumpInfo *ci)
  124. {
  125. CIBlock *cib, r;
  126. cib = getCIB(arena, clump, 1, &r);
  127. if(cib == nil)
  128. return 0;
  129. packClumpInfo(ci, &cib->data->data[cib->offset]);
  130. putCIB(arena, cib);
  131. return 1;
  132. }
  133. u64int
  134. arenaDirSize(Arena *arena, u32int clumps)
  135. {
  136. return ((clumps / arena->clumpMax) + 1) * arena->blockSize;
  137. }
  138. /*
  139. * read a clump of data
  140. * n is a hint of the size of the data, not including the header
  141. * make sure it won't run off the end, then return the number of bytes actually read
  142. */
  143. u32int
  144. readArena(Arena *arena, u64int aa, u8int *buf, long n)
  145. {
  146. DBlock *b;
  147. u64int a;
  148. u32int blockSize, off, m;
  149. long nn;
  150. if(n == 0)
  151. return 0;
  152. vtLock(arena->lock);
  153. a = arena->size - arenaDirSize(arena, arena->clumps);
  154. vtUnlock(arena->lock);
  155. if(aa >= a){
  156. setErr(EOk, "reading beyond arena clump storage: clumps=%d aa=%lld a=%lld -1 clumps=%lld\n", arena->clumps, aa, a, arena->size - arenaDirSize(arena, arena->clumps - 1));
  157. return 0;
  158. }
  159. if(aa + n > a)
  160. n = a - aa;
  161. blockSize = arena->blockSize;
  162. a = arena->base + aa;
  163. off = a & (blockSize - 1);
  164. a -= off;
  165. nn = 0;
  166. for(;;){
  167. b = getDBlock(arena->part, a, 1);
  168. if(b == nil)
  169. return 0;
  170. m = blockSize - off;
  171. if(m > n - nn)
  172. m = n - nn;
  173. memmove(&buf[nn], &b->data[off], m);
  174. putDBlock(b);
  175. nn += m;
  176. if(nn == n)
  177. break;
  178. off = 0;
  179. a += blockSize;
  180. }
  181. return n;
  182. }
  183. /*
  184. * write some data to the clump section at a given offset
  185. * used to fix up corrupted arenas.
  186. */
  187. u32int
  188. writeArena(Arena *arena, u64int aa, u8int *clbuf, u32int n)
  189. {
  190. DBlock *b;
  191. u64int a;
  192. u32int blockSize, off, m;
  193. long nn;
  194. int ok;
  195. if(n == 0)
  196. return 0;
  197. vtLock(arena->lock);
  198. a = arena->size - arenaDirSize(arena, arena->clumps);
  199. if(aa >= a || aa + n > a){
  200. vtUnlock(arena->lock);
  201. setErr(EOk, "writing beyond arena clump storage");
  202. return 0;
  203. }
  204. blockSize = arena->blockSize;
  205. a = arena->base + aa;
  206. off = a & (blockSize - 1);
  207. a -= off;
  208. nn = 0;
  209. for(;;){
  210. b = getDBlock(arena->part, a, off != 0 || off + n < blockSize);
  211. if(b == nil){
  212. vtUnlock(arena->lock);
  213. return 0;
  214. }
  215. m = blockSize - off;
  216. if(m > n - nn)
  217. m = n - nn;
  218. memmove(&b->data[off], &clbuf[nn], m);
  219. ok = writePart(arena->part, a, b->data, blockSize);
  220. putDBlock(b);
  221. if(!ok){
  222. vtUnlock(arena->lock);
  223. return 0;
  224. }
  225. nn += m;
  226. if(nn == n)
  227. break;
  228. off = 0;
  229. a += blockSize;
  230. }
  231. vtUnlock(arena->lock);
  232. return n;
  233. }
  234. /*
  235. * allocate space for the clump and write it,
  236. * updating the arena directory
  237. ZZZ question: should this distinguish between an arena
  238. filling up and real errors writing the clump?
  239. */
  240. u64int
  241. writeAClump(Arena *arena, Clump *c, u8int *clbuf)
  242. {
  243. DBlock *b;
  244. u64int a, aa;
  245. u32int clump, n, nn, m, off, blockSize;
  246. int ok;
  247. n = c->info.size + ClumpSize;
  248. vtLock(arena->lock);
  249. aa = arena->used;
  250. if(arena->sealed
  251. || aa + n + U32Size + arenaDirSize(arena, arena->clumps + 1) > arena->size){
  252. if(!arena->sealed)
  253. sealArena(arena);
  254. vtUnlock(arena->lock);
  255. return TWID64;
  256. }
  257. if(!packClump(c, &clbuf[0])){
  258. vtUnlock(arena->lock);
  259. return TWID64;
  260. }
  261. /*
  262. * write the data out one block at a time
  263. */
  264. blockSize = arena->blockSize;
  265. a = arena->base + aa;
  266. off = a & (blockSize - 1);
  267. a -= off;
  268. nn = 0;
  269. for(;;){
  270. b = getDBlock(arena->part, a, off != 0);
  271. if(b == nil){
  272. vtUnlock(arena->lock);
  273. return TWID64;
  274. }
  275. m = blockSize - off;
  276. if(m > n - nn)
  277. m = n - nn;
  278. memmove(&b->data[off], &clbuf[nn], m);
  279. ok = writePart(arena->part, a, b->data, blockSize);
  280. putDBlock(b);
  281. if(!ok){
  282. vtUnlock(arena->lock);
  283. return TWID64;
  284. }
  285. nn += m;
  286. if(nn == n)
  287. break;
  288. off = 0;
  289. a += blockSize;
  290. }
  291. arena->used += c->info.size + ClumpSize;
  292. arena->uncsize += c->info.uncsize;
  293. if(c->info.size < c->info.uncsize)
  294. arena->cclumps++;
  295. clump = arena->clumps++;
  296. if(arena->clumps == 0)
  297. fatal("clumps wrapped\n");
  298. arena->wtime = now();
  299. if(arena->ctime == 0)
  300. arena->ctime = arena->wtime;
  301. writeClumpInfo(arena, clump, &c->info);
  302. //ZZZ make this an enum param
  303. if((clump & 0x1ff) == 0x1ff){
  304. flushCIBlocks(arena);
  305. wbArena(arena);
  306. }
  307. vtUnlock(arena->lock);
  308. return aa;
  309. }
  310. /*
  311. * once sealed, an arena never has any data added to it.
  312. * it should only be changed to fix errors.
  313. * this also syncs the clump directory.
  314. */
  315. static void
  316. sealArena(Arena *arena)
  317. {
  318. flushCIBlocks(arena);
  319. arena->sealed = 1;
  320. wbArena(arena);
  321. backSumArena(arena);
  322. }
  323. void
  324. backSumArena(Arena *arena)
  325. {
  326. ASum *as;
  327. if(sumLock == nil)
  328. return;
  329. as = MK(ASum);
  330. if(as == nil)
  331. return;
  332. vtLock(sumLock);
  333. as->arena = arena;
  334. as->next = sumq;
  335. sumq = as;
  336. vtWakeup(sumWait);
  337. vtUnlock(sumLock);
  338. }
  339. static void
  340. doASum(void *unused)
  341. {
  342. ASum *as;
  343. Arena *arena;
  344. if(unused){;}
  345. for(;;){
  346. vtLock(sumLock);
  347. while(sumq == nil)
  348. vtSleep(sumWait);
  349. as = sumq;
  350. sumq = as->next;
  351. vtUnlock(sumLock);
  352. arena = as->arena;
  353. free(as);
  354. sumArena(arena);
  355. }
  356. }
  357. void
  358. sumArena(Arena *arena)
  359. {
  360. ZBlock *b;
  361. VtSha1 *s;
  362. u64int a, e;
  363. u32int bs;
  364. u8int score[VtScoreSize];
  365. bs = MaxIoSize;
  366. if(bs < arena->blockSize)
  367. bs = arena->blockSize;
  368. s = vtSha1Alloc();
  369. if(s == nil){
  370. logErr(EOk, "sumArena can't initialize sha1 state");
  371. return;
  372. }
  373. /*
  374. * read & sum all blocks except the last one
  375. */
  376. vtSha1Init(s);
  377. b = allocZBlock(bs, 0);
  378. e = arena->base + arena->size;
  379. for(a = arena->base - arena->blockSize; a + arena->blockSize <= e; a += bs){
  380. if(a + bs > e)
  381. bs = arena->blockSize;
  382. if(!readPart(arena->part, a, b->data, bs))
  383. goto ReadErr;
  384. vtSha1Update(s, b->data, bs);
  385. }
  386. /*
  387. * the last one is special, since it may already have the checksum included
  388. */
  389. bs = arena->blockSize;
  390. if(!readPart(arena->part, e, b->data, bs)){
  391. ReadErr:
  392. logErr(EOk, "sumArena can't sum %s, read at %lld failed: %r", arena->name, a);
  393. freeZBlock(b);
  394. vtSha1Free(s);
  395. return;
  396. }
  397. vtSha1Update(s, b->data, bs - VtScoreSize);
  398. vtSha1Update(s, zeroScore, VtScoreSize);
  399. vtSha1Final(s, score);
  400. vtSha1Free(s);
  401. /*
  402. * check for no checksum or the same
  403. */
  404. if(!scoreEq(score, &b->data[bs - VtScoreSize])){
  405. if(!scoreEq(zeroScore, &b->data[bs - VtScoreSize]))
  406. logErr(EOk, "overwriting mismatched checksums for arena=%s, found=%V calculated=%V",
  407. arena->name, &b->data[bs - VtScoreSize], score);
  408. scoreCp(&b->data[bs - VtScoreSize], score);
  409. if(!writePart(arena->part, e, b->data, bs))
  410. logErr(EOk, "sumArena can't write sum for %s: %r", arena->name);
  411. }
  412. freeZBlock(b);
  413. vtLock(arena->lock);
  414. scoreCp(arena->score, score);
  415. vtUnlock(arena->lock);
  416. }
  417. /*
  418. * write the arena trailer block to the partition
  419. */
  420. int
  421. wbArena(Arena *arena)
  422. {
  423. ZBlock *b;
  424. int ok;
  425. b = allocZBlock(arena->blockSize, 1);
  426. if(b == nil){
  427. logErr(EAdmin, "can't write arena trailer: %R");
  428. ///ZZZ add error message?
  429. return 0;
  430. }
  431. ok = okArena(arena) && packArena(arena, b->data)
  432. && writePart(arena->part, arena->base + arena->size, b->data, arena->blockSize);
  433. freeZBlock(b);
  434. return ok;
  435. }
  436. int
  437. wbArenaHead(Arena *arena)
  438. {
  439. ZBlock *b;
  440. ArenaHead head;
  441. int ok;
  442. nameCp(head.name, arena->name);
  443. head.version = arena->version;
  444. head.size = arena->size + 2 * arena->blockSize;
  445. head.blockSize = arena->blockSize;
  446. b = allocZBlock(arena->blockSize, 1);
  447. if(b == nil){
  448. logErr(EAdmin, "can't write arena header: %R");
  449. ///ZZZ add error message?
  450. return 0;
  451. }
  452. ok = packArenaHead(&head, b->data)
  453. && writePart(arena->part, arena->base - arena->blockSize, b->data, arena->blockSize);
  454. freeZBlock(b);
  455. return ok;
  456. }
  457. /*
  458. * read the arena header and trailer blocks from disk
  459. */
  460. static int
  461. loadArena(Arena *arena)
  462. {
  463. ArenaHead head;
  464. ZBlock *b;
  465. b = allocZBlock(arena->blockSize, 0);
  466. if(b == nil)
  467. return 0;
  468. if(!readPart(arena->part, arena->base + arena->size, b->data, arena->blockSize)){
  469. freeZBlock(b);
  470. return 0;
  471. }
  472. if(!unpackArena(arena, b->data)){
  473. freeZBlock(b);
  474. return 0;
  475. }
  476. if(arena->version != ArenaVersion){
  477. setErr(EAdmin, "unknown arena version %d", arena->version);
  478. freeZBlock(b);
  479. return 0;
  480. }
  481. scoreCp(arena->score, &b->data[arena->blockSize - VtScoreSize]);
  482. if(!readPart(arena->part, arena->base - arena->blockSize, b->data, arena->blockSize)){
  483. logErr(EAdmin, "can't read arena header: %R");
  484. freeZBlock(b);
  485. return 1;
  486. }
  487. if(!unpackArenaHead(&head, b->data))
  488. logErr(ECorrupt, "corrupted arena header: %R");
  489. else if(!nameEq(arena->name, head.name)
  490. || arena->version != head.version
  491. || arena->blockSize != head.blockSize
  492. || arena->size + 2 * arena->blockSize != head.size)
  493. logErr(ECorrupt, "arena header inconsistent with arena data");
  494. freeZBlock(b);
  495. return 1;
  496. }
  497. static int
  498. okArena(Arena *arena)
  499. {
  500. u64int dsize;
  501. int ok;
  502. ok = 1;
  503. dsize = arenaDirSize(arena, arena->clumps);
  504. if(arena->used + dsize > arena->size){
  505. setErr(ECorrupt, "arena used > size");
  506. ok = 0;
  507. }
  508. if(arena->cclumps > arena->clumps)
  509. logErr(ECorrupt, "arena has more compressed clumps than total clumps");
  510. if(arena->uncsize + arena->clumps * ClumpSize + arena->blockSize < arena->used)
  511. logErr(ECorrupt, "arena uncompressed size inconsistent with used space %lld %d %lld", arena->uncsize, arena->clumps, arena->used);
  512. if(arena->ctime > arena->wtime)
  513. logErr(ECorrupt, "arena creation time after last write time");
  514. return ok;
  515. }
  516. static CIBlock*
  517. getCIB(Arena *arena, int clump, int writing, CIBlock *rock)
  518. {
  519. CIBlock *cib;
  520. u32int block, off;
  521. if(clump >= arena->clumps){
  522. setErr(EOk, "clump directory access out of range");
  523. return nil;
  524. }
  525. block = clump / arena->clumpMax;
  526. off = (clump - block * arena->clumpMax) * ClumpInfoSize;
  527. if(arena->cib.block == block
  528. && arena->cib.data != nil){
  529. arena->cib.offset = off;
  530. return &arena->cib;
  531. }
  532. if(writing){
  533. flushCIBlocks(arena);
  534. cib = &arena->cib;
  535. }else
  536. cib = rock;
  537. vtLock(stats.lock);
  538. stats.ciReads++;
  539. vtUnlock(stats.lock);
  540. cib->block = block;
  541. cib->offset = off;
  542. cib->data = getDBlock(arena->part, arena->base + arena->size - (block + 1) * arena->blockSize, arena->blockSize);
  543. if(cib->data == nil)
  544. return nil;
  545. return cib;
  546. }
  547. static void
  548. putCIB(Arena *arena, CIBlock *cib)
  549. {
  550. if(cib != &arena->cib){
  551. putDBlock(cib->data);
  552. cib->data = nil;
  553. }
  554. }
  555. /*
  556. * must be called with arena locked
  557. */
  558. int
  559. flushCIBlocks(Arena *arena)
  560. {
  561. int ok;
  562. if(arena->cib.data == nil)
  563. return 1;
  564. vtLock(stats.lock);
  565. stats.ciWrites++;
  566. vtUnlock(stats.lock);
  567. ok = writePart(arena->part, arena->base + arena->size - (arena->cib.block + 1) * arena->blockSize, arena->cib.data->data, arena->blockSize);
  568. if(!ok)
  569. setErr(EAdmin, "failed writing arena directory block");
  570. putDBlock(arena->cib.data);
  571. arena->cib.data = nil;
  572. return ok;
  573. }