source.c 20 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054
  1. #include "stdinc.h"
  2. #include "dat.h"
  3. #include "fns.h"
  4. #include "error.h"
  5. static int sizeToDepth(uvlong s, int psize, int dsize);
  6. static u32int tagGen(void);
  7. static Block *sourceLoad(Source *r, Entry *e);
  8. static Block *sourceLoadUnlocked(Source *r, Entry *e);
  9. static int sourceShrinkDepth(Source*, Block*, Entry*, int);
  10. static int sourceShrinkSize(Source*, Entry*, uvlong);
  11. static int sourceGrowDepth(Source*, Block*, Entry*, int);
  12. #define sourceIsLocked(r) ((r)->b != nil)
  13. static Source *
  14. sourceAlloc(Fs *fs, Block *b, Source *p, u32int offset, int mode, int issnapshot)
  15. {
  16. Source *r;
  17. int epb;
  18. u32int epoch;
  19. Entry e;
  20. assert(p==nil || sourceIsLocked(p));
  21. if(p == nil){
  22. assert(offset == 0);
  23. epb = 1;
  24. }else
  25. epb = p->dsize / VtEntrySize;
  26. if(b->l.type != BtDir)
  27. goto Bad;
  28. /*
  29. * a non-active entry is the only thing that
  30. * can legitimately happen here. all the others
  31. * get prints.
  32. */
  33. if(!entryUnpack(&e, b->data, offset % epb)){
  34. fprint(2, "%s: %V: sourceAlloc: entryUnpack failed\n", argv0,
  35. b->score);
  36. goto Bad;
  37. }
  38. if(!(e.flags & VtEntryActive)){
  39. if(0) fprint(2, "%s: %V: sourceAlloc: not active\n",
  40. argv0, e.score);
  41. goto Bad;
  42. }
  43. if(e.psize < 256 || e.dsize < 256){
  44. fprint(2, "%s: %V: sourceAlloc: psize %ud dsize %ud\n",
  45. argv0, e.score, e.psize, e.dsize);
  46. goto Bad;
  47. }
  48. if(e.depth < sizeToDepth(e.size, e.psize, e.dsize)){
  49. fprint(2, "%s: %V: sourceAlloc: depth %ud size %llud psize %ud dsize %ud\n",
  50. argv0, e.score, e.depth, e.size, e.psize, e.dsize);
  51. goto Bad;
  52. }
  53. if((e.flags & VtEntryLocal) && e.tag == 0){
  54. fprint(2, "%s: %V: sourceAlloc: flags %#ux tag %#ux\n",
  55. argv0, e.score, e.flags, e.tag);
  56. goto Bad;
  57. }
  58. if(e.dsize > fs->blockSize || e.psize > fs->blockSize){
  59. fprint(2, "%s: %V: sourceAlloc: psize %ud dsize %ud blocksize %ud\n",
  60. argv0, e.score, e.psize, e.dsize, fs->blockSize);
  61. goto Bad;
  62. }
  63. epoch = b->l.epoch;
  64. if(mode == OReadWrite){
  65. if(e.snap != 0){
  66. vtSetError(ESnapRO);
  67. return nil;
  68. }
  69. }else if(e.snap != 0){
  70. if(e.snap < fs->elo){
  71. vtSetError(ESnapOld);
  72. return nil;
  73. }
  74. if(e.snap >= fs->ehi)
  75. goto Bad;
  76. epoch = e.snap;
  77. }
  78. r = vtMemAllocZ(sizeof(Source));
  79. r->fs = fs;
  80. r->mode = mode;
  81. r->issnapshot = issnapshot;
  82. r->dsize = e.dsize;
  83. r->gen = e.gen;
  84. r->dir = (e.flags & VtEntryDir) != 0;
  85. r->lk = vtLockAlloc();
  86. r->ref = 1;
  87. r->parent = p;
  88. if(p){
  89. vtLock(p->lk);
  90. assert(mode == OReadOnly || p->mode == OReadWrite);
  91. p->ref++;
  92. vtUnlock(p->lk);
  93. }
  94. r->epoch = epoch;
  95. // fprint(2, "%s: sourceAlloc: have %V be.%d fse.%d %s\n", argv0, b->score,
  96. // b->l.epoch, r->fs->ehi, mode == OReadWrite? "rw": "ro");
  97. memmove(r->score, b->score, VtScoreSize);
  98. r->scoreEpoch = b->l.epoch;
  99. r->offset = offset;
  100. r->epb = epb;
  101. r->tag = b->l.tag;
  102. // fprint(2, "%s: sourceAlloc: %p -> %V %d\n", r, r->score, r->offset);
  103. return r;
  104. Bad:
  105. vtSetError(EBadEntry);
  106. return nil;
  107. }
  108. Source *
  109. sourceRoot(Fs *fs, u32int addr, int mode)
  110. {
  111. Source *r;
  112. Block *b;
  113. b = cacheLocalData(fs->cache, addr, BtDir, RootTag, mode, 0);
  114. if(b == nil)
  115. return nil;
  116. if(mode == OReadWrite && b->l.epoch != fs->ehi){
  117. fprint(2, "%s: sourceRoot: fs->ehi = %ud, b->l = %L\n",
  118. argv0, fs->ehi, &b->l);
  119. blockPut(b);
  120. vtSetError(EBadRoot);
  121. return nil;
  122. }
  123. r = sourceAlloc(fs, b, nil, 0, mode, 0);
  124. blockPut(b);
  125. return r;
  126. }
  127. Source *
  128. sourceOpen(Source *r, ulong offset, int mode, int issnapshot)
  129. {
  130. ulong bn;
  131. Block *b;
  132. assert(sourceIsLocked(r));
  133. if(r->mode == OReadWrite)
  134. assert(r->epoch == r->b->l.epoch);
  135. if(!r->dir){
  136. vtSetError(ENotDir);
  137. return nil;
  138. }
  139. bn = offset/(r->dsize/VtEntrySize);
  140. b = sourceBlock(r, bn, mode);
  141. if(b == nil)
  142. return nil;
  143. r = sourceAlloc(r->fs, b, r, offset, mode, issnapshot);
  144. blockPut(b);
  145. return r;
  146. }
  147. Source *
  148. sourceCreate(Source *r, int dsize, int dir, u32int offset)
  149. {
  150. int i;
  151. Block *b;
  152. u32int bn, size;
  153. Entry e;
  154. int epb;
  155. int psize;
  156. Source *rr;
  157. assert(sourceIsLocked(r));
  158. if(!r->dir){
  159. vtSetError(ENotDir);
  160. return nil;
  161. }
  162. epb = r->dsize/VtEntrySize;
  163. psize = (dsize/VtScoreSize)*VtScoreSize;
  164. size = sourceGetDirSize(r);
  165. if(offset == 0){
  166. /*
  167. * look at a random block to see if we can find an empty entry
  168. */
  169. offset = lnrand(size+1);
  170. offset -= offset % epb;
  171. }
  172. /* try the given block and then try the last block */
  173. for(;;){
  174. bn = offset/epb;
  175. b = sourceBlock(r, bn, OReadWrite);
  176. if(b == nil)
  177. return nil;
  178. for(i=offset%r->epb; i<epb; i++){
  179. entryUnpack(&e, b->data, i);
  180. if((e.flags&VtEntryActive) == 0 && e.gen != ~0)
  181. goto Found;
  182. }
  183. blockPut(b);
  184. if(offset == size){
  185. fprint(2, "sourceCreate: cannot happen\n");
  186. vtSetError("sourceCreate: cannot happen");
  187. return nil;
  188. }
  189. offset = size;
  190. }
  191. Found:
  192. /* found an entry - gen already set */
  193. e.psize = psize;
  194. e.dsize = dsize;
  195. assert(psize && dsize);
  196. e.flags = VtEntryActive;
  197. if(dir)
  198. e.flags |= VtEntryDir;
  199. e.depth = 0;
  200. e.size = 0;
  201. memmove(e.score, vtZeroScore, VtScoreSize);
  202. e.tag = 0;
  203. e.snap = 0;
  204. e.archive = 0;
  205. entryPack(&e, b->data, i);
  206. blockDirty(b);
  207. offset = bn*epb + i;
  208. if(offset+1 > size){
  209. if(!sourceSetDirSize(r, offset+1)){
  210. blockPut(b);
  211. return nil;
  212. }
  213. }
  214. rr = sourceAlloc(r->fs, b, r, offset, OReadWrite, 0);
  215. blockPut(b);
  216. return rr;
  217. }
  218. static int
  219. sourceKill(Source *r, int doremove)
  220. {
  221. Entry e;
  222. Block *b;
  223. u32int addr;
  224. u32int tag;
  225. int type;
  226. assert(sourceIsLocked(r));
  227. b = sourceLoad(r, &e);
  228. if(b == nil)
  229. return 0;
  230. assert(b->l.epoch == r->fs->ehi);
  231. if(doremove==0 && e.size == 0){
  232. /* already truncated */
  233. blockPut(b);
  234. return 1;
  235. }
  236. /* remember info on link we are removing */
  237. addr = globalToLocal(e.score);
  238. type = entryType(&e);
  239. tag = e.tag;
  240. if(doremove){
  241. if(e.gen != ~0)
  242. e.gen++;
  243. e.dsize = 0;
  244. e.psize = 0;
  245. e.flags = 0;
  246. }else{
  247. e.flags &= ~VtEntryLocal;
  248. }
  249. e.depth = 0;
  250. e.size = 0;
  251. e.tag = 0;
  252. memmove(e.score, vtZeroScore, VtScoreSize);
  253. entryPack(&e, b->data, r->offset % r->epb);
  254. blockDirty(b);
  255. if(addr != NilBlock)
  256. blockRemoveLink(b, addr, type, tag, 1);
  257. blockPut(b);
  258. if(doremove){
  259. sourceUnlock(r);
  260. sourceClose(r);
  261. }
  262. return 1;
  263. }
  264. int
  265. sourceRemove(Source *r)
  266. {
  267. return sourceKill(r, 1);
  268. }
  269. int
  270. sourceTruncate(Source *r)
  271. {
  272. return sourceKill(r, 0);
  273. }
  274. uvlong
  275. sourceGetSize(Source *r)
  276. {
  277. Entry e;
  278. Block *b;
  279. assert(sourceIsLocked(r));
  280. b = sourceLoad(r, &e);
  281. if(b == nil)
  282. return 0;
  283. blockPut(b);
  284. return e.size;
  285. }
  286. static int
  287. sourceShrinkSize(Source *r, Entry *e, uvlong size)
  288. {
  289. int i, type, ppb;
  290. uvlong ptrsz;
  291. u32int addr;
  292. uchar score[VtScoreSize];
  293. Block *b;
  294. type = entryType(e);
  295. b = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
  296. if(b == nil)
  297. return 0;
  298. ptrsz = e->dsize;
  299. ppb = e->psize/VtScoreSize;
  300. for(i=0; i+1<e->depth; i++)
  301. ptrsz *= ppb;
  302. while(type&BtLevelMask){
  303. if(b->addr == NilBlock || b->l.epoch != r->fs->ehi){
  304. /* not worth copying the block just so we can zero some of it */
  305. blockPut(b);
  306. return 0;
  307. }
  308. /*
  309. * invariant: each pointer in the tree rooted at b accounts for ptrsz bytes
  310. */
  311. /* zero the pointers to unnecessary blocks */
  312. i = (size+ptrsz-1)/ptrsz;
  313. for(; i<ppb; i++){
  314. addr = globalToLocal(b->data+i*VtScoreSize);
  315. memmove(b->data+i*VtScoreSize, vtZeroScore, VtScoreSize);
  316. blockDirty(b);
  317. if(addr != NilBlock)
  318. blockRemoveLink(b, addr, type-1, e->tag, 1);
  319. }
  320. /* recurse (go around again) on the partially necessary block */
  321. i = size/ptrsz;
  322. size = size%ptrsz;
  323. if(size == 0){
  324. blockPut(b);
  325. return 1;
  326. }
  327. ptrsz /= ppb;
  328. type--;
  329. memmove(score, b->data+i*VtScoreSize, VtScoreSize);
  330. blockPut(b);
  331. b = cacheGlobal(r->fs->cache, score, type, e->tag, OReadWrite);
  332. if(b == nil)
  333. return 0;
  334. }
  335. if(b->addr == NilBlock || b->l.epoch != r->fs->ehi){
  336. blockPut(b);
  337. return 0;
  338. }
  339. /*
  340. * No one ever truncates BtDir blocks.
  341. */
  342. if(type == BtData && e->dsize > size){
  343. memset(b->data+size, 0, e->dsize-size);
  344. blockDirty(b);
  345. }
  346. blockPut(b);
  347. return 1;
  348. }
  349. int
  350. sourceSetSize(Source *r, uvlong size)
  351. {
  352. int depth;
  353. Entry e;
  354. Block *b;
  355. assert(sourceIsLocked(r));
  356. if(size == 0)
  357. return sourceTruncate(r);
  358. if(size > VtMaxFileSize || size > ((uvlong)MaxBlock)*r->dsize){
  359. vtSetError(ETooBig);
  360. return 0;
  361. }
  362. b = sourceLoad(r, &e);
  363. if(b == nil)
  364. return 0;
  365. /* quick out */
  366. if(e.size == size){
  367. blockPut(b);
  368. return 1;
  369. }
  370. depth = sizeToDepth(size, e.psize, e.dsize);
  371. if(depth < e.depth){
  372. if(!sourceShrinkDepth(r, b, &e, depth)){
  373. blockPut(b);
  374. return 0;
  375. }
  376. }else if(depth > e.depth){
  377. if(!sourceGrowDepth(r, b, &e, depth)){
  378. blockPut(b);
  379. return 0;
  380. }
  381. }
  382. if(size < e.size)
  383. sourceShrinkSize(r, &e, size);
  384. e.size = size;
  385. entryPack(&e, b->data, r->offset % r->epb);
  386. blockDirty(b);
  387. blockPut(b);
  388. return 1;
  389. }
  390. int
  391. sourceSetDirSize(Source *r, ulong ds)
  392. {
  393. uvlong size;
  394. int epb;
  395. assert(sourceIsLocked(r));
  396. epb = r->dsize/VtEntrySize;
  397. size = (uvlong)r->dsize*(ds/epb);
  398. size += VtEntrySize*(ds%epb);
  399. return sourceSetSize(r, size);
  400. }
  401. ulong
  402. sourceGetDirSize(Source *r)
  403. {
  404. ulong ds;
  405. uvlong size;
  406. int epb;
  407. assert(sourceIsLocked(r));
  408. epb = r->dsize/VtEntrySize;
  409. size = sourceGetSize(r);
  410. ds = epb*(size/r->dsize);
  411. ds += (size%r->dsize)/VtEntrySize;
  412. return ds;
  413. }
  414. int
  415. sourceGetEntry(Source *r, Entry *e)
  416. {
  417. Block *b;
  418. assert(sourceIsLocked(r));
  419. b = sourceLoad(r, e);
  420. if(b == nil)
  421. return 0;
  422. blockPut(b);
  423. return 1;
  424. }
  425. /*
  426. * Must be careful with this. Doesn't record
  427. * dependencies, so don't introduce any!
  428. */
  429. int
  430. sourceSetEntry(Source *r, Entry *e)
  431. {
  432. Block *b;
  433. Entry oe;
  434. assert(sourceIsLocked(r));
  435. b = sourceLoad(r, &oe);
  436. if(b == nil)
  437. return 0;
  438. entryPack(e, b->data, r->offset%r->epb);
  439. blockDirty(b);
  440. blockPut(b);
  441. return 1;
  442. }
  443. static Block *
  444. blockWalk(Block *p, int index, int mode, Fs *fs, Entry *e)
  445. {
  446. Block *b;
  447. Cache *c;
  448. u32int addr;
  449. int type;
  450. uchar oscore[VtScoreSize], score[VtScoreSize];
  451. Entry oe;
  452. c = fs->cache;
  453. if((p->l.type & BtLevelMask) == 0){
  454. assert(p->l.type == BtDir);
  455. type = entryType(e);
  456. b = cacheGlobal(c, e->score, type, e->tag, mode);
  457. }else{
  458. type = p->l.type - 1;
  459. b = cacheGlobal(c, p->data + index*VtScoreSize, type, e->tag, mode);
  460. }
  461. if(b)
  462. b->pc = getcallerpc(&p);
  463. if(b == nil || mode == OReadOnly)
  464. return b;
  465. if(p->l.epoch != fs->ehi){
  466. fprint(2, "blockWalk: parent not writable\n");
  467. abort();
  468. }
  469. if(b->l.epoch == fs->ehi)
  470. return b;
  471. oe = *e;
  472. /*
  473. * Copy on write.
  474. */
  475. if(e->tag == 0){
  476. assert(p->l.type == BtDir);
  477. e->tag = tagGen();
  478. e->flags |= VtEntryLocal;
  479. }
  480. addr = b->addr;
  481. b = blockCopy(b, e->tag, fs->ehi, fs->elo);
  482. if(b == nil)
  483. return nil;
  484. b->pc = getcallerpc(&p);
  485. assert(b->l.epoch == fs->ehi);
  486. blockDirty(b);
  487. memmove(score, b->score, VtScoreSize);
  488. if(p->l.type == BtDir){
  489. memmove(e->score, b->score, VtScoreSize);
  490. entryPack(e, p->data, index);
  491. blockDependency(p, b, index, nil, &oe);
  492. }else{
  493. memmove(oscore, p->data+index*VtScoreSize, VtScoreSize);
  494. memmove(p->data+index*VtScoreSize, b->score, VtScoreSize);
  495. blockDependency(p, b, index, oscore, nil);
  496. }
  497. blockDirty(p);
  498. if(addr != NilBlock)
  499. blockRemoveLink(p, addr, type, e->tag, 0);
  500. return b;
  501. }
  502. /*
  503. * Change the depth of the source r.
  504. * The entry e for r is contained in block p.
  505. */
  506. static int
  507. sourceGrowDepth(Source *r, Block *p, Entry *e, int depth)
  508. {
  509. Block *b, *bb;
  510. u32int tag;
  511. int type;
  512. Entry oe;
  513. assert(sourceIsLocked(r));
  514. assert(depth <= VtPointerDepth);
  515. type = entryType(e);
  516. b = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
  517. if(b == nil)
  518. return 0;
  519. tag = e->tag;
  520. if(tag == 0)
  521. tag = tagGen();
  522. oe = *e;
  523. /*
  524. * Keep adding layers until we get to the right depth
  525. * or an error occurs.
  526. */
  527. while(e->depth < depth){
  528. bb = cacheAllocBlock(r->fs->cache, type+1, tag, r->fs->ehi, r->fs->elo);
  529. if(bb == nil)
  530. break;
  531. //fprint(2, "alloc %lux grow %V\n", bb->addr, b->score);
  532. memmove(bb->data, b->score, VtScoreSize);
  533. memmove(e->score, bb->score, VtScoreSize);
  534. e->depth++;
  535. type++;
  536. e->tag = tag;
  537. e->flags |= VtEntryLocal;
  538. blockDependency(bb, b, 0, vtZeroScore, nil);
  539. blockPut(b);
  540. b = bb;
  541. blockDirty(b);
  542. }
  543. entryPack(e, p->data, r->offset % r->epb);
  544. blockDependency(p, b, r->offset % r->epb, nil, &oe);
  545. blockPut(b);
  546. blockDirty(p);
  547. return e->depth == depth;
  548. }
  549. static int
  550. sourceShrinkDepth(Source *r, Block *p, Entry *e, int depth)
  551. {
  552. Block *b, *nb, *ob, *rb;
  553. u32int tag;
  554. int type, d;
  555. Entry oe;
  556. assert(sourceIsLocked(r));
  557. assert(depth <= VtPointerDepth);
  558. type = entryType(e);
  559. rb = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
  560. if(rb == nil)
  561. return 0;
  562. tag = e->tag;
  563. if(tag == 0)
  564. tag = tagGen();
  565. /*
  566. * Walk down to the new root block.
  567. * We may stop early, but something is better than nothing.
  568. */
  569. oe = *e;
  570. ob = nil;
  571. b = rb;
  572. /* BUG: explain type++. i think it is a real bug */
  573. for(d=e->depth; d > depth; d--, type++){
  574. nb = cacheGlobal(r->fs->cache, b->data, type-1, tag, OReadWrite);
  575. if(nb == nil)
  576. break;
  577. if(ob!=nil && ob!=rb)
  578. blockPut(ob);
  579. ob = b;
  580. b = nb;
  581. }
  582. if(b == rb){
  583. blockPut(rb);
  584. return 0;
  585. }
  586. /*
  587. * Right now, e points at the root block rb, b is the new root block,
  588. * and ob points at b. To update:
  589. *
  590. * (i) change e to point at b
  591. * (ii) zero the pointer ob -> b
  592. * (iii) free the root block
  593. *
  594. * p (the block containing e) must be written before
  595. * anything else.
  596. */
  597. /* (i) */
  598. e->depth = d;
  599. /* might have been local and now global; reverse cannot happen */
  600. if(globalToLocal(b->score) == NilBlock)
  601. e->flags &= ~VtEntryLocal;
  602. memmove(e->score, b->score, VtScoreSize);
  603. entryPack(e, p->data, r->offset % r->epb);
  604. blockDependency(p, b, r->offset % r->epb, nil, &oe);
  605. blockDirty(p);
  606. /* (ii) */
  607. memmove(ob->data, vtZeroScore, VtScoreSize);
  608. blockDependency(ob, p, 0, b->score, nil);
  609. blockDirty(ob);
  610. /* (iii) */
  611. if(rb->addr != NilBlock)
  612. blockRemoveLink(p, rb->addr, rb->l.type, rb->l.tag, 1);
  613. blockPut(rb);
  614. if(ob!=nil && ob!=rb)
  615. blockPut(ob);
  616. blockPut(b);
  617. return d == depth;
  618. }
  619. /*
  620. * Normally we return the block at the given number.
  621. * If early is set, we stop earlier in the tree. Setting early
  622. * to 1 gives us the block that contains the pointer to bn.
  623. */
  624. Block *
  625. _sourceBlock(Source *r, ulong bn, int mode, int early, ulong tag)
  626. {
  627. Block *b, *bb;
  628. int index[VtPointerDepth+1];
  629. Entry e;
  630. int i, np;
  631. int m;
  632. assert(sourceIsLocked(r));
  633. assert(bn != NilBlock);
  634. /* mode for intermediate block */
  635. m = mode;
  636. if(m == OOverWrite)
  637. m = OReadWrite;
  638. b = sourceLoad(r, &e);
  639. if(b == nil)
  640. return nil;
  641. if(r->issnapshot && (e.flags & VtEntryNoArchive)){
  642. blockPut(b);
  643. vtSetError(ENotArchived);
  644. return nil;
  645. }
  646. if(tag){
  647. if(e.tag == 0)
  648. e.tag = tag;
  649. else if(e.tag != tag){
  650. fprint(2, "tag mismatch\n");
  651. vtSetError("tag mismatch");
  652. goto Err;
  653. }
  654. }
  655. np = e.psize/VtScoreSize;
  656. memset(index, 0, sizeof(index));
  657. for(i=0; bn > 0; i++){
  658. if(i >= VtPointerDepth){
  659. vtSetError(EBadAddr);
  660. goto Err;
  661. }
  662. index[i] = bn % np;
  663. bn /= np;
  664. }
  665. if(i > e.depth){
  666. if(mode == OReadOnly){
  667. vtSetError(EBadAddr);
  668. goto Err;
  669. }
  670. if(!sourceGrowDepth(r, b, &e, i))
  671. goto Err;
  672. }
  673. index[e.depth] = r->offset % r->epb;
  674. for(i=e.depth; i>=early; i--){
  675. bb = blockWalk(b, index[i], m, r->fs, &e);
  676. if(bb == nil)
  677. goto Err;
  678. blockPut(b);
  679. b = bb;
  680. }
  681. b->pc = getcallerpc(&r);
  682. return b;
  683. Err:
  684. blockPut(b);
  685. return nil;
  686. }
  687. Block*
  688. sourceBlock(Source *r, ulong bn, int mode)
  689. {
  690. Block *b;
  691. b = _sourceBlock(r, bn, mode, 0, 0);
  692. if(b)
  693. b->pc = getcallerpc(&r);
  694. return b;
  695. }
  696. void
  697. sourceClose(Source *r)
  698. {
  699. if(r == nil)
  700. return;
  701. vtLock(r->lk);
  702. r->ref--;
  703. if(r->ref){
  704. vtUnlock(r->lk);
  705. return;
  706. }
  707. assert(r->ref == 0);
  708. vtUnlock(r->lk);
  709. if(r->parent)
  710. sourceClose(r->parent);
  711. vtLockFree(r->lk);
  712. memset(r, ~0, sizeof(*r));
  713. vtMemFree(r);
  714. }
  715. /*
  716. * Retrieve the block containing the entry for r.
  717. * If a snapshot has happened, we might need
  718. * to get a new copy of the block. We avoid this
  719. * in the common case by caching the score for
  720. * the block and the last epoch in which it was valid.
  721. *
  722. * We use r->mode to tell the difference between active
  723. * file system sources (OReadWrite) and sources for the
  724. * snapshot file system (OReadOnly).
  725. */
  726. static Block*
  727. sourceLoadBlock(Source *r, int mode)
  728. {
  729. u32int addr;
  730. Block *b;
  731. switch(r->mode){
  732. default:
  733. assert(0);
  734. case OReadWrite:
  735. assert(r->mode == OReadWrite);
  736. /*
  737. * This needn't be true -- we might bump the low epoch
  738. * to reclaim some old blocks, but since this score is
  739. * OReadWrite, the blocks must all still be open, so none
  740. * are reclaimed. Thus it's okay that the epoch is so low.
  741. * Proceed.
  742. assert(r->epoch >= r->fs->elo);
  743. */
  744. if(r->epoch == r->fs->ehi){
  745. b = cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, OReadWrite);
  746. if(b == nil)
  747. return nil;
  748. assert(r->epoch == b->l.epoch);
  749. return b;
  750. }
  751. assert(r->parent != nil);
  752. if(!sourceLock(r->parent, OReadWrite))
  753. return nil;
  754. b = sourceBlock(r->parent, r->offset/r->epb, OReadWrite);
  755. sourceUnlock(r->parent);
  756. if(b == nil)
  757. return nil;
  758. assert(b->l.epoch == r->fs->ehi);
  759. // fprint(2, "sourceLoadBlock %p %V => %V\n", r, r->score, b->score);
  760. memmove(r->score, b->score, VtScoreSize);
  761. r->scoreEpoch = b->l.epoch;
  762. r->tag = b->l.tag;
  763. r->epoch = r->fs->ehi;
  764. return b;
  765. case OReadOnly:
  766. addr = globalToLocal(r->score);
  767. if(addr == NilBlock)
  768. return cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, mode);
  769. b = cacheLocalData(r->fs->cache, addr, BtDir, r->tag, mode, r->scoreEpoch);
  770. if(b)
  771. return b;
  772. /*
  773. * If it failed because the epochs don't match, the block has been
  774. * archived and reclaimed. Rewalk from the parent and get the
  775. * new pointer. This can't happen in the OReadWrite case
  776. * above because blocks in the current epoch don't get
  777. * reclaimed. The fact that we're OReadOnly means we're
  778. * a snapshot. (Or else the file system is read-only, but then
  779. * the archiver isn't going around deleting blocks.)
  780. */
  781. if(strcmp(vtGetError(), ELabelMismatch) == 0){
  782. if(!sourceLock(r->parent, OReadOnly))
  783. return nil;
  784. b = sourceBlock(r->parent, r->offset/r->epb, OReadOnly);
  785. sourceUnlock(r->parent);
  786. if(b){
  787. fprint(2, "sourceAlloc: lost %V found %V\n",
  788. r->score, b->score);
  789. memmove(r->score, b->score, VtScoreSize);
  790. r->scoreEpoch = b->l.epoch;
  791. return b;
  792. }
  793. }
  794. return nil;
  795. }
  796. }
  797. int
  798. sourceLock(Source *r, int mode)
  799. {
  800. Block *b;
  801. if(mode == -1)
  802. mode = r->mode;
  803. b = sourceLoadBlock(r, mode);
  804. if(b == nil)
  805. return 0;
  806. /*
  807. * The fact that we are holding b serves as the
  808. * lock entitling us to write to r->b.
  809. */
  810. assert(r->b == nil);
  811. r->b = b;
  812. if(r->mode == OReadWrite)
  813. assert(r->epoch == r->b->l.epoch);
  814. return 1;
  815. }
  816. /*
  817. * Lock two (usually sibling) sources. This needs special care
  818. * because the Entries for both sources might be in the same block.
  819. * We also try to lock blocks in left-to-right order within the tree.
  820. */
  821. int
  822. sourceLock2(Source *r, Source *rr, int mode)
  823. {
  824. Block *b, *bb;
  825. if(rr == nil)
  826. return sourceLock(r, mode);
  827. if(mode == -1)
  828. mode = r->mode;
  829. if(r->parent==rr->parent && r->offset/r->epb == rr->offset/rr->epb){
  830. b = sourceLoadBlock(r, mode);
  831. if(b == nil)
  832. return 0;
  833. if(memcmp(r->score, rr->score, VtScoreSize) != 0){
  834. memmove(rr->score, b->score, VtScoreSize);
  835. rr->scoreEpoch = b->l.epoch;
  836. rr->tag = b->l.tag;
  837. rr->epoch = rr->fs->ehi;
  838. }
  839. blockDupLock(b);
  840. bb = b;
  841. }else if(r->parent==rr->parent || r->offset > rr->offset){
  842. bb = sourceLoadBlock(rr, mode);
  843. b = sourceLoadBlock(r, mode);
  844. }else{
  845. b = sourceLoadBlock(r, mode);
  846. bb = sourceLoadBlock(rr, mode);
  847. }
  848. if(b == nil || bb == nil){
  849. if(b)
  850. blockPut(b);
  851. if(bb)
  852. blockPut(bb);
  853. return 0;
  854. }
  855. /*
  856. * The fact that we are holding b and bb serves
  857. * as the lock entitling us to write to r->b and rr->b.
  858. */
  859. r->b = b;
  860. rr->b = bb;
  861. return 1;
  862. }
  863. void
  864. sourceUnlock(Source *r)
  865. {
  866. Block *b;
  867. if(r->b == nil){
  868. fprint(2, "sourceUnlock: already unlocked\n");
  869. abort();
  870. }
  871. b = r->b;
  872. r->b = nil;
  873. blockPut(b);
  874. }
  875. static Block*
  876. sourceLoad(Source *r, Entry *e)
  877. {
  878. Block *b;
  879. assert(sourceIsLocked(r));
  880. b = r->b;
  881. if(!entryUnpack(e, b->data, r->offset % r->epb))
  882. return nil;
  883. if(e->gen != r->gen){
  884. vtSetError(ERemoved);
  885. return nil;
  886. }
  887. blockDupLock(b);
  888. return b;
  889. }
  890. static int
  891. sizeToDepth(uvlong s, int psize, int dsize)
  892. {
  893. int np;
  894. int d;
  895. /* determine pointer depth */
  896. np = psize/VtScoreSize;
  897. s = (s + dsize - 1)/dsize;
  898. for(d = 0; s > 1; d++)
  899. s = (s + np - 1)/np;
  900. return d;
  901. }
  902. static u32int
  903. tagGen(void)
  904. {
  905. u32int tag;
  906. for(;;){
  907. tag = lrand();
  908. if(tag >= UserTag)
  909. break;
  910. }
  911. return tag;
  912. }