source.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038
  1. #include "stdinc.h"
  2. #include "dat.h"
  3. #include "fns.h"
  4. #include "error.h"
  5. static int sizeToDepth(uvlong s, int psize, int dsize);
  6. static u32int tagGen(void);
  7. static Block *sourceLoad(Source *r, Entry *e);
  8. static Block *sourceLoadUnlocked(Source *r, Entry *e);
  9. static int sourceShrinkDepth(Source*, Block*, Entry*, int);
  10. static int sourceShrinkSize(Source*, Entry*, uvlong);
  11. static int sourceGrowDepth(Source*, Block*, Entry*, int);
  12. #define sourceIsLocked(r) ((r)->b != nil)
  13. static Source *
  14. sourceAlloc(Fs *fs, Block *b, Source *p, u32int offset, int mode)
  15. {
  16. Source *r;
  17. int epb;
  18. u32int epoch;
  19. Entry e;
  20. assert(p==nil || sourceIsLocked(p));
  21. if(p == nil){
  22. assert(offset == 0);
  23. epb = 1;
  24. }else
  25. epb = p->dsize / VtEntrySize;
  26. if(b->l.type != BtDir)
  27. goto Bad;
  28. /*
  29. * a non-active entry is the only thing that
  30. * can legitimately happen here. all the others
  31. * get prints.
  32. */
  33. if(!entryUnpack(&e, b->data, offset % epb)){
  34. fprint(2, "entryUnpack failed\n");
  35. goto Bad;
  36. }
  37. if(!(e.flags & VtEntryActive)){
  38. if(0)fprint(2, "not active\n");
  39. goto Bad;
  40. }
  41. if(e.psize < 256 || e.dsize < 256){
  42. fprint(2, "psize %ud dsize %ud\n", e.psize, e.dsize);
  43. goto Bad;
  44. }
  45. if(e.depth < sizeToDepth(e.size, e.psize, e.dsize)){
  46. fprint(2, "depth %ud size %llud psize %ud dsize %ud\n", e.depth, e.size, e.psize, e.dsize);
  47. goto Bad;
  48. }
  49. if((e.flags & VtEntryLocal) && e.tag == 0){
  50. fprint(2, "flags %#ux tag %#ux\n", e.flags, e.tag);
  51. goto Bad;
  52. }
  53. if(e.dsize > fs->blockSize || e.psize > fs->blockSize){
  54. fprint(2, "psize %ud dsize %ud blocksize %ud\n", e.psize, e.dsize, fs->blockSize);
  55. goto Bad;
  56. }
  57. epoch = b->l.epoch;
  58. if(mode == OReadWrite){
  59. if(e.snap != 0){
  60. vtSetError(ESnapRO);
  61. return nil;
  62. }
  63. }else if(e.snap != 0){
  64. if(e.snap < fs->elo){
  65. vtSetError(ESnapOld);
  66. return nil;
  67. }
  68. if(e.snap >= fs->ehi)
  69. goto Bad;
  70. epoch = e.snap;
  71. }
  72. r = vtMemAllocZ(sizeof(Source));
  73. r->fs = fs;
  74. r->mode = mode;
  75. r->dsize = e.dsize;
  76. r->gen = e.gen;
  77. r->dir = (e.flags & VtEntryDir) != 0;
  78. r->lk = vtLockAlloc();
  79. r->ref = 1;
  80. r->parent = p;
  81. if(p){
  82. vtLock(p->lk);
  83. assert(mode == OReadOnly || p->mode == OReadWrite);
  84. p->ref++;
  85. vtUnlock(p->lk);
  86. }
  87. r->epoch = epoch;
  88. //fprint(2, "sourceAlloc have %V be.%d fse.%d %s\n", b->score, b->l.epoch, r->fs->ehi, mode==OReadWrite ? "rw" : "ro");
  89. memmove(r->score, b->score, VtScoreSize);
  90. r->scoreEpoch = b->l.epoch;
  91. r->offset = offset;
  92. r->epb = epb;
  93. r->tag = b->l.tag;
  94. //fprint(2, "sourceAlloc: %p -> %V %d\n", r, r->score, r->offset);
  95. return r;
  96. Bad:
  97. vtSetError(EBadEntry);
  98. return nil;
  99. }
  100. Source *
  101. sourceRoot(Fs *fs, u32int addr, int mode)
  102. {
  103. Source *r;
  104. Block *b;
  105. b = cacheLocalData(fs->cache, addr, BtDir, RootTag, mode, 0);
  106. if(b == nil)
  107. return nil;
  108. if(mode == OReadWrite)
  109. if((b->l.state&BsClosed) || b->l.epoch != fs->ehi){
  110. fprint(2, "sourceRoot: fs->ehi = %ud, b->l = %L\n", fs->ehi, &b->l);
  111. blockPut(b);
  112. vtSetError(EBadRoot);
  113. return nil;
  114. }
  115. r = sourceAlloc(fs, b, nil, 0, mode);
  116. blockPut(b);
  117. return r;
  118. }
  119. Source *
  120. sourceOpen(Source *r, ulong offset, int mode)
  121. {
  122. ulong bn;
  123. Block *b;
  124. assert(sourceIsLocked(r));
  125. if(r->mode == OReadWrite)
  126. assert(r->epoch == r->b->l.epoch);
  127. if(!r->dir){
  128. vtSetError(ENotDir);
  129. return nil;
  130. }
  131. bn = offset/(r->dsize/VtEntrySize);
  132. b = sourceBlock(r, bn, mode);
  133. if(b == nil)
  134. return nil;
  135. r = sourceAlloc(r->fs, b, r, offset, mode);
  136. blockPut(b);
  137. return r;
  138. }
  139. Source *
  140. sourceCreate(Source *r, int dsize, int dir, u32int offset)
  141. {
  142. int i;
  143. Block *b;
  144. u32int bn, size;
  145. Entry e;
  146. int epb;
  147. int psize;
  148. Source *rr;
  149. assert(sourceIsLocked(r));
  150. if(!r->dir){
  151. vtSetError(ENotDir);
  152. return nil;
  153. }
  154. epb = r->dsize/VtEntrySize;
  155. psize = (dsize/VtScoreSize)*VtScoreSize;
  156. size = sourceGetDirSize(r);
  157. if(offset == 0){
  158. /*
  159. * look at a random block to see if we can find an empty entry
  160. */
  161. offset = lnrand(size+1);
  162. offset -= offset % epb;
  163. }
  164. /* try the given block and then try the last block */
  165. for(;;){
  166. bn = offset/epb;
  167. b = sourceBlock(r, bn, OReadWrite);
  168. if(b == nil)
  169. return nil;
  170. for(i=offset%r->epb; i<epb; i++){
  171. entryUnpack(&e, b->data, i);
  172. if((e.flags&VtEntryActive) == 0 && e.gen != ~0)
  173. goto Found;
  174. }
  175. blockPut(b);
  176. if(offset == size){
  177. fprint(2, "sourceCreate: cannot happen\n");
  178. vtSetError("sourceCreate: cannot happen");
  179. return nil;
  180. }
  181. offset = size;
  182. }
  183. Found:
  184. /* found an entry - gen already set */
  185. e.psize = psize;
  186. e.dsize = dsize;
  187. assert(psize && dsize);
  188. e.flags = VtEntryActive;
  189. if(dir)
  190. e.flags |= VtEntryDir;
  191. e.depth = 0;
  192. e.size = 0;
  193. memmove(e.score, vtZeroScore, VtScoreSize);
  194. e.tag = 0;
  195. e.snap = 0;
  196. e.archive = 0;
  197. entryPack(&e, b->data, i);
  198. blockDirty(b);
  199. offset = bn*epb + i;
  200. if(offset+1 > size){
  201. if(!sourceSetDirSize(r, offset+1)){
  202. blockPut(b);
  203. return nil;
  204. }
  205. }
  206. rr = sourceAlloc(r->fs, b, r, offset, OReadWrite);
  207. blockPut(b);
  208. return rr;
  209. }
  210. static int
  211. sourceKill(Source *r, int doremove)
  212. {
  213. Entry e;
  214. Block *b;
  215. u32int addr;
  216. u32int tag;
  217. int type;
  218. assert(sourceIsLocked(r));
  219. b = sourceLoad(r, &e);
  220. if(b == nil)
  221. return 0;
  222. assert(b->l.epoch == r->fs->ehi);
  223. if(doremove==0 && e.size == 0){
  224. /* already truncated */
  225. blockPut(b);
  226. return 1;
  227. }
  228. /* remember info on link we are removing */
  229. addr = globalToLocal(e.score);
  230. type = entryType(&e);
  231. tag = e.tag;
  232. if(doremove){
  233. if(e.gen != ~0)
  234. e.gen++;
  235. e.dsize = 0;
  236. e.psize = 0;
  237. e.flags = 0;
  238. }else{
  239. e.flags &= ~VtEntryLocal;
  240. }
  241. e.depth = 0;
  242. e.size = 0;
  243. e.tag = 0;
  244. memmove(e.score, vtZeroScore, VtScoreSize);
  245. entryPack(&e, b->data, r->offset % r->epb);
  246. blockDirty(b);
  247. if(addr != NilBlock)
  248. blockRemoveLink(b, addr, type, tag);
  249. blockPut(b);
  250. if(doremove){
  251. sourceUnlock(r);
  252. sourceClose(r);
  253. }
  254. return 1;
  255. }
  256. int
  257. sourceRemove(Source *r)
  258. {
  259. return sourceKill(r, 1);
  260. }
  261. int
  262. sourceTruncate(Source *r)
  263. {
  264. return sourceKill(r, 0);
  265. }
  266. uvlong
  267. sourceGetSize(Source *r)
  268. {
  269. Entry e;
  270. Block *b;
  271. assert(sourceIsLocked(r));
  272. b = sourceLoad(r, &e);
  273. if(b == nil)
  274. return 0;
  275. blockPut(b);
  276. return e.size;
  277. }
  278. static int
  279. sourceShrinkSize(Source *r, Entry *e, uvlong size)
  280. {
  281. int i, type, ppb;
  282. uvlong ptrsz;
  283. u32int addr;
  284. uchar score[VtScoreSize];
  285. Block *b;
  286. type = entryType(e);
  287. b = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
  288. if(b == nil)
  289. return 0;
  290. ptrsz = e->dsize;
  291. ppb = e->psize/VtScoreSize;
  292. for(i=0; i+1<e->depth; i++)
  293. ptrsz *= ppb;
  294. while(type&BtLevelMask){
  295. if(b->addr == NilBlock
  296. || (b->l.state&BsClosed)
  297. || b->l.epoch != r->fs->ehi){
  298. /* not worth copying the block just so we can zero some of it */
  299. blockPut(b);
  300. return 0;
  301. }
  302. /*
  303. * invariant: each pointer in the tree rooted at b accounts for ptrsz bytes
  304. */
  305. /* zero the pointers to unnecessary blocks */
  306. i = (size+ptrsz-1)/ptrsz;
  307. for(; i<ppb; i++){
  308. addr = globalToLocal(b->data+i*VtScoreSize);
  309. memmove(b->data+i*VtScoreSize, vtZeroScore, VtScoreSize);
  310. blockDirty(b);
  311. if(addr != NilBlock)
  312. blockRemoveLink(b, addr, type-1, e->tag);
  313. }
  314. /* recurse (go around again) on the partially necessary block */
  315. i = size/ptrsz;
  316. size = size%ptrsz;
  317. if(size == 0){
  318. blockPut(b);
  319. return 1;
  320. }
  321. ptrsz /= ppb;
  322. type--;
  323. memmove(score, b->data+i*VtScoreSize, VtScoreSize);
  324. blockPut(b);
  325. b = cacheGlobal(r->fs->cache, score, type, e->tag, OReadWrite);
  326. if(b == nil)
  327. return 0;
  328. }
  329. if(b->addr == NilBlock
  330. || (b->l.state&BsClosed)
  331. || b->l.epoch != r->fs->ehi){
  332. blockPut(b);
  333. return 0;
  334. }
  335. /*
  336. * No one ever truncates BtDir blocks.
  337. */
  338. if(type == BtData && e->dsize > size){
  339. memset(b->data+size, 0, e->dsize-size);
  340. blockDirty(b);
  341. }
  342. blockPut(b);
  343. return 1;
  344. }
  345. int
  346. sourceSetSize(Source *r, uvlong size)
  347. {
  348. int depth;
  349. Entry e;
  350. Block *b;
  351. assert(sourceIsLocked(r));
  352. if(size == 0)
  353. return sourceTruncate(r);
  354. if(size > VtMaxFileSize || size > ((uvlong)MaxBlock)*r->dsize){
  355. vtSetError(ETooBig);
  356. return 0;
  357. }
  358. b = sourceLoad(r, &e);
  359. if(b == nil)
  360. return 0;
  361. /* quick out */
  362. if(e.size == size){
  363. blockPut(b);
  364. return 1;
  365. }
  366. depth = sizeToDepth(size, e.psize, e.dsize);
  367. if(depth < e.depth){
  368. if(!sourceShrinkDepth(r, b, &e, depth)){
  369. blockPut(b);
  370. return 0;
  371. }
  372. }else if(depth > e.depth){
  373. if(!sourceGrowDepth(r, b, &e, depth)){
  374. blockPut(b);
  375. return 0;
  376. }
  377. }
  378. if(size < e.size)
  379. sourceShrinkSize(r, &e, size);
  380. e.size = size;
  381. entryPack(&e, b->data, r->offset % r->epb);
  382. blockDirty(b);
  383. blockPut(b);
  384. return 1;
  385. }
  386. int
  387. sourceSetDirSize(Source *r, ulong ds)
  388. {
  389. uvlong size;
  390. int epb;
  391. assert(sourceIsLocked(r));
  392. epb = r->dsize/VtEntrySize;
  393. size = (uvlong)r->dsize*(ds/epb);
  394. size += VtEntrySize*(ds%epb);
  395. return sourceSetSize(r, size);
  396. }
  397. ulong
  398. sourceGetDirSize(Source *r)
  399. {
  400. ulong ds;
  401. uvlong size;
  402. int epb;
  403. assert(sourceIsLocked(r));
  404. epb = r->dsize/VtEntrySize;
  405. size = sourceGetSize(r);
  406. ds = epb*(size/r->dsize);
  407. ds += (size%r->dsize)/VtEntrySize;
  408. return ds;
  409. }
  410. int
  411. sourceGetEntry(Source *r, Entry *e)
  412. {
  413. Block *b;
  414. assert(sourceIsLocked(r));
  415. b = sourceLoad(r, e);
  416. if(b == nil)
  417. return 0;
  418. blockPut(b);
  419. return 1;
  420. }
  421. /*
  422. * Must be careful with this. Doesn't record
  423. * dependencies, so don't introduce any!
  424. */
  425. int
  426. sourceSetEntry(Source *r, Entry *e)
  427. {
  428. Block *b;
  429. Entry oe;
  430. assert(sourceIsLocked(r));
  431. b = sourceLoad(r, &oe);
  432. if(b == nil)
  433. return 0;
  434. entryPack(e, b->data, r->offset%r->epb);
  435. blockDirty(b);
  436. blockPut(b);
  437. return 1;
  438. }
  439. static Block *
  440. blockWalk(Block *p, int index, int mode, Fs *fs, Entry *e)
  441. {
  442. Block *b;
  443. Cache *c;
  444. u32int addr;
  445. int type;
  446. uchar oscore[VtScoreSize], score[VtScoreSize];
  447. Entry oe;
  448. c = fs->cache;
  449. if((p->l.type & BtLevelMask) == 0){
  450. assert(p->l.type == BtDir);
  451. type = entryType(e);
  452. b = cacheGlobal(c, e->score, type, e->tag, mode);
  453. }else{
  454. type = p->l.type - 1;
  455. b = cacheGlobal(c, p->data + index*VtScoreSize, type, e->tag, mode);
  456. }
  457. if(b)
  458. b->pc = getcallerpc(&p);
  459. if(b == nil || mode == OReadOnly)
  460. return b;
  461. assert(!(p->l.state&BsClosed) && p->l.epoch == fs->ehi);
  462. if(!(b->l.state&BsClosed) && b->l.epoch == fs->ehi)
  463. return b;
  464. oe = *e;
  465. /*
  466. * Copy on write.
  467. */
  468. if(e->tag == 0){
  469. assert(p->l.type == BtDir);
  470. e->tag = tagGen();
  471. e->flags |= VtEntryLocal;
  472. }
  473. addr = b->addr;
  474. b = blockCopy(b, e->tag, fs->ehi, fs->elo);
  475. if(b == nil)
  476. return nil;
  477. b->pc = getcallerpc(&p);
  478. assert(b->l.epoch == fs->ehi);
  479. blockDirty(b);
  480. memmove(score, b->score, VtScoreSize);
  481. if(p->l.type == BtDir){
  482. memmove(e->score, b->score, VtScoreSize);
  483. entryPack(e, p->data, index);
  484. blockDependency(p, b, index, nil, &oe);
  485. }else{
  486. memmove(oscore, p->data+index*VtScoreSize, VtScoreSize);
  487. memmove(p->data+index*VtScoreSize, b->score, VtScoreSize);
  488. blockDependency(p, b, index, oscore, nil);
  489. }
  490. blockDirty(p);
  491. if(addr != NilBlock)
  492. blockRemoveLink(p, addr, type, e->tag);
  493. return b;
  494. }
  495. /*
  496. * Change the depth of the source r.
  497. * The entry e for r is contained in block p.
  498. */
  499. static int
  500. sourceGrowDepth(Source *r, Block *p, Entry *e, int depth)
  501. {
  502. Block *b, *bb;
  503. u32int tag;
  504. int type;
  505. Entry oe;
  506. assert(sourceIsLocked(r));
  507. assert(depth <= VtPointerDepth);
  508. type = entryType(e);
  509. b = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
  510. if(b == nil)
  511. return 0;
  512. tag = e->tag;
  513. if(tag == 0)
  514. tag = tagGen();
  515. oe = *e;
  516. /*
  517. * Keep adding layers until we get to the right depth
  518. * or an error occurs.
  519. */
  520. while(e->depth < depth){
  521. bb = cacheAllocBlock(r->fs->cache, type+1, tag, r->fs->ehi, r->fs->elo);
  522. if(bb == nil)
  523. break;
  524. //fprint(2, "alloc %lux grow %V\n", bb->addr, b->score);
  525. memmove(bb->data, b->score, VtScoreSize);
  526. memmove(e->score, bb->score, VtScoreSize);
  527. e->depth++;
  528. type++;
  529. e->tag = tag;
  530. e->flags |= VtEntryLocal;
  531. blockDependency(bb, b, 0, vtZeroScore, nil);
  532. blockPut(b);
  533. b = bb;
  534. blockDirty(b);
  535. }
  536. entryPack(e, p->data, r->offset % r->epb);
  537. blockDependency(p, b, r->offset % r->epb, nil, &oe);
  538. blockPut(b);
  539. blockDirty(p);
  540. return e->depth == depth;
  541. }
  542. static int
  543. sourceShrinkDepth(Source *r, Block *p, Entry *e, int depth)
  544. {
  545. Block *b, *nb, *ob, *rb;
  546. u32int tag;
  547. int type, d;
  548. Entry oe;
  549. assert(sourceIsLocked(r));
  550. assert(depth <= VtPointerDepth);
  551. type = entryType(e);
  552. rb = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
  553. if(rb == nil)
  554. return 0;
  555. tag = e->tag;
  556. if(tag == 0)
  557. tag = tagGen();
  558. /*
  559. * Walk down to the new root block.
  560. * We may stop early, but something is better than nothing.
  561. */
  562. oe = *e;
  563. ob = nil;
  564. b = rb;
  565. /* BUG: explain type++. i think it is a real bug */
  566. for(d=e->depth; d > depth; d--, type++){
  567. nb = cacheGlobal(r->fs->cache, b->data, type-1, tag, OReadWrite);
  568. if(nb == nil)
  569. break;
  570. if(ob!=nil && ob!=rb)
  571. blockPut(ob);
  572. ob = b;
  573. b = nb;
  574. }
  575. if(b == rb){
  576. blockPut(rb);
  577. return 0;
  578. }
  579. /*
  580. * Right now, e points at the root block rb, b is the new root block,
  581. * and ob points at b. To update:
  582. *
  583. * (i) change e to point at b
  584. * (ii) zero the pointer ob -> b
  585. * (iii) free the root block
  586. *
  587. * p (the block containing e) must be written before
  588. * anything else.
  589. */
  590. /* (i) */
  591. e->depth = d;
  592. memmove(e->score, b->score, VtScoreSize);
  593. entryPack(e, p->data, r->offset % r->epb);
  594. blockDependency(p, b, r->offset % r->epb, nil, &oe);
  595. blockDirty(p);
  596. /* (ii) */
  597. memmove(ob->data, vtZeroScore, VtScoreSize);
  598. blockDependency(ob, p, 0, b->score, nil);
  599. blockDirty(ob);
  600. /* (iii) */
  601. if(rb->addr != NilBlock)
  602. blockRemoveLink(p, rb->addr, rb->l.type, rb->l.tag);
  603. blockPut(rb);
  604. if(ob!=nil && ob!=rb)
  605. blockPut(ob);
  606. blockPut(b);
  607. return d == depth;
  608. }
  609. /*
  610. * Normally we return the block at the given number.
  611. * If early is set, we stop earlier in the tree. Setting early
  612. * to 1 gives us the block that contains the pointer to bn.
  613. */
  614. Block *
  615. _sourceBlock(Source *r, ulong bn, int mode, int early, ulong tag)
  616. {
  617. Block *b, *bb;
  618. int index[VtPointerDepth+1];
  619. Entry e;
  620. int i, np;
  621. int m;
  622. assert(sourceIsLocked(r));
  623. assert(bn != NilBlock);
  624. /* mode for intermediate block */
  625. m = mode;
  626. if(m == OOverWrite)
  627. m = OReadWrite;
  628. b = sourceLoad(r, &e);
  629. if(b == nil)
  630. return nil;
  631. if(r->mode == OReadOnly && (e.flags & VtEntryNoArchive)){
  632. blockPut(b);
  633. vtSetError(ENotArchived);
  634. return nil;
  635. }
  636. if(tag){
  637. if(e.tag == 0)
  638. e.tag = tag;
  639. else if(e.tag != tag){
  640. fprint(2, "tag mismatch\n");
  641. vtSetError("tag mismatch");
  642. goto Err;
  643. }
  644. }
  645. np = e.psize/VtScoreSize;
  646. memset(index, 0, sizeof(index));
  647. for(i=0; bn > 0; i++){
  648. if(i >= VtPointerDepth){
  649. vtSetError(EBadAddr);
  650. goto Err;
  651. }
  652. index[i] = bn % np;
  653. bn /= np;
  654. }
  655. if(i > e.depth){
  656. if(mode == OReadOnly){
  657. vtSetError(EBadAddr);
  658. goto Err;
  659. }
  660. if(!sourceGrowDepth(r, b, &e, i))
  661. goto Err;
  662. }
  663. index[e.depth] = r->offset % r->epb;
  664. for(i=e.depth; i>=early; i--){
  665. bb = blockWalk(b, index[i], m, r->fs, &e);
  666. if(bb == nil)
  667. goto Err;
  668. blockPut(b);
  669. b = bb;
  670. }
  671. b->pc = getcallerpc(&r);
  672. return b;
  673. Err:
  674. blockPut(b);
  675. return nil;
  676. }
  677. Block*
  678. sourceBlock(Source *r, ulong bn, int mode)
  679. {
  680. Block *b;
  681. b = _sourceBlock(r, bn, mode, 0, 0);
  682. if(b)
  683. b->pc = getcallerpc(&r);
  684. return b;
  685. }
  686. void
  687. sourceClose(Source *r)
  688. {
  689. if(r == nil)
  690. return;
  691. vtLock(r->lk);
  692. r->ref--;
  693. if(r->ref){
  694. vtUnlock(r->lk);
  695. return;
  696. }
  697. assert(r->ref == 0);
  698. vtUnlock(r->lk);
  699. if(r->parent)
  700. sourceClose(r->parent);
  701. vtLockFree(r->lk);
  702. memset(r, ~0, sizeof(*r));
  703. vtMemFree(r);
  704. }
  705. /*
  706. * Retrieve the block containing the entry for r.
  707. * If a snapshot has happened, we might need
  708. * to get a new copy of the block. We avoid this
  709. * in the common case by caching the score for
  710. * the block and the last epoch in which it was valid.
  711. *
  712. * We use r->mode to tell the difference between active
  713. * file system sources (OReadWrite) and sources for the
  714. * snapshot file system (OReadOnly).
  715. */
  716. static Block*
  717. sourceLoadBlock(Source *r, int mode)
  718. {
  719. u32int addr;
  720. Block *b;
  721. switch(r->mode){
  722. default:
  723. assert(0);
  724. case OReadWrite:
  725. assert(r->mode == OReadWrite);
  726. /*
  727. * This needn't be true -- we might bump the low epoch
  728. * to reclaim some old blocks, but since this score is
  729. * OReadWrite, the blocks must all still be open, so none
  730. * are reclaimed. Thus it's okay that the epoch is so low.
  731. * Proceed.
  732. assert(r->epoch >= r->fs->elo);
  733. */
  734. if(r->epoch == r->fs->ehi){
  735. b = cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, OReadWrite);
  736. if(b == nil)
  737. return nil;
  738. assert(r->epoch == b->l.epoch);
  739. return b;
  740. }
  741. assert(r->parent != nil);
  742. if(!sourceLock(r->parent, OReadWrite))
  743. return nil;
  744. b = sourceBlock(r->parent, r->offset/r->epb, OReadWrite);
  745. sourceUnlock(r->parent);
  746. if(b == nil)
  747. return nil;
  748. assert(b->l.epoch == r->fs->ehi);
  749. memmove(r->score, b->score, VtScoreSize);
  750. r->scoreEpoch = b->l.epoch;
  751. r->tag = b->l.tag;
  752. r->epoch = r->fs->ehi;
  753. return b;
  754. case OReadOnly:
  755. addr = globalToLocal(r->score);
  756. if(addr == NilBlock)
  757. return cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, mode);
  758. b = cacheLocalData(r->fs->cache, addr, BtDir, r->tag, mode, r->scoreEpoch);
  759. if(b)
  760. return b;
  761. /*
  762. * If it failed because the epochs don't match, the block has been
  763. * archived and reclaimed. Rewalk from the parent and get the
  764. * new pointer. This can't happen in the OReadWrite case
  765. * above because blocks in the current epoch don't get
  766. * reclaimed. The fact that we're OReadOnly means we're
  767. * a snapshot. (Or else the file system is read-only, but then
  768. * the archiver isn't going around deleting blocks.)
  769. */
  770. if(strcmp(vtGetError(), ELabelMismatch) == 0){
  771. if(!sourceLock(r->parent, OReadOnly))
  772. return nil;
  773. b = sourceBlock(r->parent, r->offset/r->epb, OReadOnly);
  774. sourceUnlock(r->parent);
  775. if(b){
  776. fprint(2, "sourceAlloc: lost %V found %V\n",
  777. r->score, b->score);
  778. memmove(r->score, b->score, VtScoreSize);
  779. r->scoreEpoch = b->l.epoch;
  780. return b;
  781. }
  782. }
  783. return nil;
  784. }
  785. }
  786. int
  787. sourceLock(Source *r, int mode)
  788. {
  789. Block *b;
  790. if(mode == -1)
  791. mode = r->mode;
  792. b = sourceLoadBlock(r, mode);
  793. if(b == nil)
  794. return 0;
  795. /*
  796. * The fact that we are holding b serves as the
  797. * lock entitling us to write to r->b.
  798. */
  799. assert(r->b == nil);
  800. r->b = b;
  801. if(r->mode == OReadWrite)
  802. assert(r->epoch == r->b->l.epoch);
  803. return 1;
  804. }
  805. /*
  806. * Lock two (usually sibling) sources. This needs special care
  807. * because the Entries for both sources might be in the same block.
  808. * We also try to lock blocks in left-to-right order within the tree.
  809. */
  810. int
  811. sourceLock2(Source *r, Source *rr, int mode)
  812. {
  813. Block *b, *bb;
  814. if(rr == nil)
  815. return sourceLock(r, mode);
  816. if(mode == -1)
  817. mode = r->mode;
  818. if(r->parent==rr->parent && r->offset/r->epb == rr->offset/rr->epb){
  819. b = sourceLoadBlock(r, mode);
  820. if(b == nil)
  821. return 0;
  822. blockDupLock(b);
  823. bb = b;
  824. }else if(r->parent==rr->parent || r->offset > rr->offset){
  825. bb = sourceLoadBlock(rr, mode);
  826. b = sourceLoadBlock(r, mode);
  827. }else{
  828. b = sourceLoadBlock(r, mode);
  829. bb = sourceLoadBlock(rr, mode);
  830. }
  831. if(b == nil || bb == nil){
  832. if(b)
  833. blockPut(b);
  834. if(bb)
  835. blockPut(bb);
  836. return 0;
  837. }
  838. /*
  839. * The fact that we are holding b and bb serves
  840. * as the lock entitling us to write to r->b and rr->b.
  841. */
  842. r->b = b;
  843. rr->b = bb;
  844. return 1;
  845. }
  846. void
  847. sourceUnlock(Source *r)
  848. {
  849. Block *b;
  850. if(r->b == nil){
  851. fprint(2, "sourceUnlock: already unlocked\n");
  852. abort();
  853. }
  854. b = r->b;
  855. r->b = nil;
  856. blockPut(b);
  857. }
  858. static Block*
  859. sourceLoad(Source *r, Entry *e)
  860. {
  861. Block *b;
  862. assert(sourceIsLocked(r));
  863. b = r->b;
  864. if(!entryUnpack(e, b->data, r->offset % r->epb))
  865. return nil;
  866. if(e->gen != r->gen){
  867. vtSetError(ERemoved);
  868. return nil;
  869. }
  870. blockDupLock(b);
  871. return b;
  872. }
  873. static int
  874. sizeToDepth(uvlong s, int psize, int dsize)
  875. {
  876. int np;
  877. int d;
  878. /* determine pointer depth */
  879. np = psize/VtScoreSize;
  880. s = (s + dsize - 1)/dsize;
  881. for(d = 0; s > 1; d++)
  882. s = (s + np - 1)/np;
  883. return d;
  884. }
  885. static u32int
  886. tagGen(void)
  887. {
  888. u32int tag;
  889. for(;;){
  890. tag = lrand();
  891. if(tag >= UserTag)
  892. break;
  893. }
  894. return tag;
  895. }