source.c 20 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046
  1. #include "stdinc.h"
  2. #include "dat.h"
  3. #include "fns.h"
  4. #include "error.h"
  5. static int sizeToDepth(uvlong s, int psize, int dsize);
  6. static u32int tagGen(void);
  7. static Block *sourceLoad(Source *r, Entry *e);
  8. static Block *sourceLoadUnlocked(Source *r, Entry *e);
  9. static int sourceShrinkDepth(Source*, Block*, Entry*, int);
  10. static int sourceShrinkSize(Source*, Entry*, uvlong);
  11. static int sourceGrowDepth(Source*, Block*, Entry*, int);
  12. #define sourceIsLocked(r) ((r)->b != nil)
  13. static Source *
  14. sourceAlloc(Fs *fs, Block *b, Source *p, u32int offset, int mode)
  15. {
  16. Source *r;
  17. int epb;
  18. u32int epoch;
  19. Entry e;
  20. assert(p==nil || sourceIsLocked(p));
  21. if(p == nil){
  22. assert(offset == 0);
  23. epb = 1;
  24. }else
  25. epb = p->dsize / VtEntrySize;
  26. if(b->l.type != BtDir)
  27. goto Bad;
  28. /*
  29. * a non-active entry is the only thing that
  30. * can legitimately happen here. all the others
  31. * get prints.
  32. */
  33. if(!entryUnpack(&e, b->data, offset % epb)){
  34. fprint(2, "entryUnpack failed\n");
  35. goto Bad;
  36. }
  37. if(!(e.flags & VtEntryActive)){
  38. if(0)fprint(2, "not active\n");
  39. goto Bad;
  40. }
  41. if(e.psize < 256 || e.dsize < 256){
  42. fprint(2, "psize %ud dsize %ud\n", e.psize, e.dsize);
  43. goto Bad;
  44. }
  45. if(e.depth < sizeToDepth(e.size, e.psize, e.dsize)){
  46. fprint(2, "depth %ud size %llud psize %ud dsize %ud\n", e.depth, e.size, e.psize, e.dsize);
  47. goto Bad;
  48. }
  49. if((e.flags & VtEntryLocal) && e.tag == 0){
  50. fprint(2, "flags %#ux tag %#ux\n", e.flags, e.tag);
  51. goto Bad;
  52. }
  53. if(e.dsize > fs->blockSize || e.psize > fs->blockSize){
  54. fprint(2, "psize %ud dsize %ud blocksize %ud\n", e.psize, e.dsize, fs->blockSize);
  55. goto Bad;
  56. }
  57. epoch = b->l.epoch;
  58. if(mode == OReadWrite){
  59. if(e.snap != 0){
  60. vtSetError(ESnapRO);
  61. return nil;
  62. }
  63. }else if(e.snap != 0){
  64. if(e.snap < fs->elo){
  65. vtSetError(ESnapOld);
  66. return nil;
  67. }
  68. if(e.snap >= fs->ehi)
  69. goto Bad;
  70. epoch = e.snap;
  71. }
  72. r = vtMemAllocZ(sizeof(Source));
  73. r->fs = fs;
  74. r->mode = mode;
  75. r->dsize = e.dsize;
  76. r->gen = e.gen;
  77. r->dir = (e.flags & VtEntryDir) != 0;
  78. r->lk = vtLockAlloc();
  79. r->ref = 1;
  80. r->parent = p;
  81. if(p){
  82. vtLock(p->lk);
  83. assert(mode == OReadOnly || p->mode == OReadWrite);
  84. p->ref++;
  85. vtUnlock(p->lk);
  86. }
  87. r->epoch = epoch;
  88. //fprint(2, "sourceAlloc have %V be.%d fse.%d %s\n", b->score, b->l.epoch, r->fs->ehi, mode==OReadWrite ? "rw" : "ro");
  89. memmove(r->score, b->score, VtScoreSize);
  90. r->scoreEpoch = b->l.epoch;
  91. r->offset = offset;
  92. r->epb = epb;
  93. r->tag = b->l.tag;
  94. //fprint(2, "sourceAlloc: %p -> %V %d\n", r, r->score, r->offset);
  95. return r;
  96. Bad:
  97. vtSetError(EBadEntry);
  98. return nil;
  99. }
  100. Source *
  101. sourceRoot(Fs *fs, u32int addr, int mode)
  102. {
  103. Source *r;
  104. Block *b;
  105. b = cacheLocalData(fs->cache, addr, BtDir, RootTag, mode, 0);
  106. if(b == nil)
  107. return nil;
  108. if(mode == OReadWrite && b->l.epoch != fs->ehi){
  109. fprint(2, "sourceRoot: fs->ehi = %ud, b->l = %L\n", fs->ehi, &b->l);
  110. blockPut(b);
  111. vtSetError(EBadRoot);
  112. return nil;
  113. }
  114. r = sourceAlloc(fs, b, nil, 0, mode);
  115. blockPut(b);
  116. return r;
  117. }
  118. Source *
  119. sourceOpen(Source *r, ulong offset, int mode)
  120. {
  121. ulong bn;
  122. Block *b;
  123. assert(sourceIsLocked(r));
  124. if(r->mode == OReadWrite)
  125. assert(r->epoch == r->b->l.epoch);
  126. if(!r->dir){
  127. vtSetError(ENotDir);
  128. return nil;
  129. }
  130. bn = offset/(r->dsize/VtEntrySize);
  131. b = sourceBlock(r, bn, mode);
  132. if(b == nil)
  133. return nil;
  134. r = sourceAlloc(r->fs, b, r, offset, mode);
  135. blockPut(b);
  136. return r;
  137. }
  138. Source *
  139. sourceCreate(Source *r, int dsize, int dir, u32int offset)
  140. {
  141. int i;
  142. Block *b;
  143. u32int bn, size;
  144. Entry e;
  145. int epb;
  146. int psize;
  147. Source *rr;
  148. assert(sourceIsLocked(r));
  149. if(!r->dir){
  150. vtSetError(ENotDir);
  151. return nil;
  152. }
  153. epb = r->dsize/VtEntrySize;
  154. psize = (dsize/VtScoreSize)*VtScoreSize;
  155. size = sourceGetDirSize(r);
  156. if(offset == 0){
  157. /*
  158. * look at a random block to see if we can find an empty entry
  159. */
  160. offset = lnrand(size+1);
  161. offset -= offset % epb;
  162. }
  163. /* try the given block and then try the last block */
  164. for(;;){
  165. bn = offset/epb;
  166. b = sourceBlock(r, bn, OReadWrite);
  167. if(b == nil)
  168. return nil;
  169. for(i=offset%r->epb; i<epb; i++){
  170. entryUnpack(&e, b->data, i);
  171. if((e.flags&VtEntryActive) == 0 && e.gen != ~0)
  172. goto Found;
  173. }
  174. blockPut(b);
  175. if(offset == size){
  176. fprint(2, "sourceCreate: cannot happen\n");
  177. vtSetError("sourceCreate: cannot happen");
  178. return nil;
  179. }
  180. offset = size;
  181. }
  182. Found:
  183. /* found an entry - gen already set */
  184. e.psize = psize;
  185. e.dsize = dsize;
  186. assert(psize && dsize);
  187. e.flags = VtEntryActive;
  188. if(dir)
  189. e.flags |= VtEntryDir;
  190. e.depth = 0;
  191. e.size = 0;
  192. memmove(e.score, vtZeroScore, VtScoreSize);
  193. e.tag = 0;
  194. e.snap = 0;
  195. e.archive = 0;
  196. entryPack(&e, b->data, i);
  197. blockDirty(b);
  198. offset = bn*epb + i;
  199. if(offset+1 > size){
  200. if(!sourceSetDirSize(r, offset+1)){
  201. blockPut(b);
  202. return nil;
  203. }
  204. }
  205. rr = sourceAlloc(r->fs, b, r, offset, OReadWrite);
  206. blockPut(b);
  207. return rr;
  208. }
  209. static int
  210. sourceKill(Source *r, int doremove)
  211. {
  212. Entry e;
  213. Block *b;
  214. u32int addr;
  215. u32int tag;
  216. int type;
  217. assert(sourceIsLocked(r));
  218. b = sourceLoad(r, &e);
  219. if(b == nil)
  220. return 0;
  221. assert(b->l.epoch == r->fs->ehi);
  222. if(doremove==0 && e.size == 0){
  223. /* already truncated */
  224. blockPut(b);
  225. return 1;
  226. }
  227. /* remember info on link we are removing */
  228. addr = globalToLocal(e.score);
  229. type = entryType(&e);
  230. tag = e.tag;
  231. if(doremove){
  232. if(e.gen != ~0)
  233. e.gen++;
  234. e.dsize = 0;
  235. e.psize = 0;
  236. e.flags = 0;
  237. }else{
  238. e.flags &= ~VtEntryLocal;
  239. }
  240. e.depth = 0;
  241. e.size = 0;
  242. e.tag = 0;
  243. memmove(e.score, vtZeroScore, VtScoreSize);
  244. entryPack(&e, b->data, r->offset % r->epb);
  245. blockDirty(b);
  246. if(addr != NilBlock)
  247. blockRemoveLink(b, addr, type, tag, 1);
  248. blockPut(b);
  249. if(doremove){
  250. sourceUnlock(r);
  251. sourceClose(r);
  252. }
  253. return 1;
  254. }
  255. int
  256. sourceRemove(Source *r)
  257. {
  258. return sourceKill(r, 1);
  259. }
  260. int
  261. sourceTruncate(Source *r)
  262. {
  263. return sourceKill(r, 0);
  264. }
  265. uvlong
  266. sourceGetSize(Source *r)
  267. {
  268. Entry e;
  269. Block *b;
  270. assert(sourceIsLocked(r));
  271. b = sourceLoad(r, &e);
  272. if(b == nil)
  273. return 0;
  274. blockPut(b);
  275. return e.size;
  276. }
  277. static int
  278. sourceShrinkSize(Source *r, Entry *e, uvlong size)
  279. {
  280. int i, type, ppb;
  281. uvlong ptrsz;
  282. u32int addr;
  283. uchar score[VtScoreSize];
  284. Block *b;
  285. type = entryType(e);
  286. b = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
  287. if(b == nil)
  288. return 0;
  289. ptrsz = e->dsize;
  290. ppb = e->psize/VtScoreSize;
  291. for(i=0; i+1<e->depth; i++)
  292. ptrsz *= ppb;
  293. while(type&BtLevelMask){
  294. if(b->addr == NilBlock || b->l.epoch != r->fs->ehi){
  295. /* not worth copying the block just so we can zero some of it */
  296. blockPut(b);
  297. return 0;
  298. }
  299. /*
  300. * invariant: each pointer in the tree rooted at b accounts for ptrsz bytes
  301. */
  302. /* zero the pointers to unnecessary blocks */
  303. i = (size+ptrsz-1)/ptrsz;
  304. for(; i<ppb; i++){
  305. addr = globalToLocal(b->data+i*VtScoreSize);
  306. memmove(b->data+i*VtScoreSize, vtZeroScore, VtScoreSize);
  307. blockDirty(b);
  308. if(addr != NilBlock)
  309. blockRemoveLink(b, addr, type-1, e->tag, 1);
  310. }
  311. /* recurse (go around again) on the partially necessary block */
  312. i = size/ptrsz;
  313. size = size%ptrsz;
  314. if(size == 0){
  315. blockPut(b);
  316. return 1;
  317. }
  318. ptrsz /= ppb;
  319. type--;
  320. memmove(score, b->data+i*VtScoreSize, VtScoreSize);
  321. blockPut(b);
  322. b = cacheGlobal(r->fs->cache, score, type, e->tag, OReadWrite);
  323. if(b == nil)
  324. return 0;
  325. }
  326. if(b->addr == NilBlock || b->l.epoch != r->fs->ehi){
  327. blockPut(b);
  328. return 0;
  329. }
  330. /*
  331. * No one ever truncates BtDir blocks.
  332. */
  333. if(type == BtData && e->dsize > size){
  334. memset(b->data+size, 0, e->dsize-size);
  335. blockDirty(b);
  336. }
  337. blockPut(b);
  338. return 1;
  339. }
  340. int
  341. sourceSetSize(Source *r, uvlong size)
  342. {
  343. int depth;
  344. Entry e;
  345. Block *b;
  346. assert(sourceIsLocked(r));
  347. if(size == 0)
  348. return sourceTruncate(r);
  349. if(size > VtMaxFileSize || size > ((uvlong)MaxBlock)*r->dsize){
  350. vtSetError(ETooBig);
  351. return 0;
  352. }
  353. b = sourceLoad(r, &e);
  354. if(b == nil)
  355. return 0;
  356. /* quick out */
  357. if(e.size == size){
  358. blockPut(b);
  359. return 1;
  360. }
  361. depth = sizeToDepth(size, e.psize, e.dsize);
  362. if(depth < e.depth){
  363. if(!sourceShrinkDepth(r, b, &e, depth)){
  364. blockPut(b);
  365. return 0;
  366. }
  367. }else if(depth > e.depth){
  368. if(!sourceGrowDepth(r, b, &e, depth)){
  369. blockPut(b);
  370. return 0;
  371. }
  372. }
  373. if(size < e.size)
  374. sourceShrinkSize(r, &e, size);
  375. e.size = size;
  376. entryPack(&e, b->data, r->offset % r->epb);
  377. blockDirty(b);
  378. blockPut(b);
  379. return 1;
  380. }
  381. int
  382. sourceSetDirSize(Source *r, ulong ds)
  383. {
  384. uvlong size;
  385. int epb;
  386. assert(sourceIsLocked(r));
  387. epb = r->dsize/VtEntrySize;
  388. size = (uvlong)r->dsize*(ds/epb);
  389. size += VtEntrySize*(ds%epb);
  390. return sourceSetSize(r, size);
  391. }
  392. ulong
  393. sourceGetDirSize(Source *r)
  394. {
  395. ulong ds;
  396. uvlong size;
  397. int epb;
  398. assert(sourceIsLocked(r));
  399. epb = r->dsize/VtEntrySize;
  400. size = sourceGetSize(r);
  401. ds = epb*(size/r->dsize);
  402. ds += (size%r->dsize)/VtEntrySize;
  403. return ds;
  404. }
  405. int
  406. sourceGetEntry(Source *r, Entry *e)
  407. {
  408. Block *b;
  409. assert(sourceIsLocked(r));
  410. b = sourceLoad(r, e);
  411. if(b == nil)
  412. return 0;
  413. blockPut(b);
  414. return 1;
  415. }
  416. /*
  417. * Must be careful with this. Doesn't record
  418. * dependencies, so don't introduce any!
  419. */
  420. int
  421. sourceSetEntry(Source *r, Entry *e)
  422. {
  423. Block *b;
  424. Entry oe;
  425. assert(sourceIsLocked(r));
  426. b = sourceLoad(r, &oe);
  427. if(b == nil)
  428. return 0;
  429. entryPack(e, b->data, r->offset%r->epb);
  430. blockDirty(b);
  431. blockPut(b);
  432. return 1;
  433. }
  434. static Block *
  435. blockWalk(Block *p, int index, int mode, Fs *fs, Entry *e)
  436. {
  437. Block *b;
  438. Cache *c;
  439. u32int addr;
  440. int type;
  441. uchar oscore[VtScoreSize], score[VtScoreSize];
  442. Entry oe;
  443. c = fs->cache;
  444. if((p->l.type & BtLevelMask) == 0){
  445. assert(p->l.type == BtDir);
  446. type = entryType(e);
  447. b = cacheGlobal(c, e->score, type, e->tag, mode);
  448. }else{
  449. type = p->l.type - 1;
  450. b = cacheGlobal(c, p->data + index*VtScoreSize, type, e->tag, mode);
  451. }
  452. if(b)
  453. b->pc = getcallerpc(&p);
  454. if(b == nil || mode == OReadOnly)
  455. return b;
  456. if(p->l.epoch != fs->ehi){
  457. fprint(2, "blockWalk: parent not writable\n");
  458. abort();
  459. }
  460. if(b->l.epoch == fs->ehi)
  461. return b;
  462. oe = *e;
  463. /*
  464. * Copy on write.
  465. */
  466. if(e->tag == 0){
  467. assert(p->l.type == BtDir);
  468. e->tag = tagGen();
  469. e->flags |= VtEntryLocal;
  470. }
  471. addr = b->addr;
  472. b = blockCopy(b, e->tag, fs->ehi, fs->elo);
  473. if(b == nil)
  474. return nil;
  475. b->pc = getcallerpc(&p);
  476. assert(b->l.epoch == fs->ehi);
  477. blockDirty(b);
  478. memmove(score, b->score, VtScoreSize);
  479. if(p->l.type == BtDir){
  480. memmove(e->score, b->score, VtScoreSize);
  481. entryPack(e, p->data, index);
  482. blockDependency(p, b, index, nil, &oe);
  483. }else{
  484. memmove(oscore, p->data+index*VtScoreSize, VtScoreSize);
  485. memmove(p->data+index*VtScoreSize, b->score, VtScoreSize);
  486. blockDependency(p, b, index, oscore, nil);
  487. }
  488. blockDirty(p);
  489. if(addr != NilBlock)
  490. blockRemoveLink(p, addr, type, e->tag, 0);
  491. return b;
  492. }
  493. /*
  494. * Change the depth of the source r.
  495. * The entry e for r is contained in block p.
  496. */
  497. static int
  498. sourceGrowDepth(Source *r, Block *p, Entry *e, int depth)
  499. {
  500. Block *b, *bb;
  501. u32int tag;
  502. int type;
  503. Entry oe;
  504. assert(sourceIsLocked(r));
  505. assert(depth <= VtPointerDepth);
  506. type = entryType(e);
  507. b = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
  508. if(b == nil)
  509. return 0;
  510. tag = e->tag;
  511. if(tag == 0)
  512. tag = tagGen();
  513. oe = *e;
  514. /*
  515. * Keep adding layers until we get to the right depth
  516. * or an error occurs.
  517. */
  518. while(e->depth < depth){
  519. bb = cacheAllocBlock(r->fs->cache, type+1, tag, r->fs->ehi, r->fs->elo);
  520. if(bb == nil)
  521. break;
  522. //fprint(2, "alloc %lux grow %V\n", bb->addr, b->score);
  523. memmove(bb->data, b->score, VtScoreSize);
  524. memmove(e->score, bb->score, VtScoreSize);
  525. e->depth++;
  526. type++;
  527. e->tag = tag;
  528. e->flags |= VtEntryLocal;
  529. blockDependency(bb, b, 0, vtZeroScore, nil);
  530. blockPut(b);
  531. b = bb;
  532. blockDirty(b);
  533. }
  534. entryPack(e, p->data, r->offset % r->epb);
  535. blockDependency(p, b, r->offset % r->epb, nil, &oe);
  536. blockPut(b);
  537. blockDirty(p);
  538. return e->depth == depth;
  539. }
  540. static int
  541. sourceShrinkDepth(Source *r, Block *p, Entry *e, int depth)
  542. {
  543. Block *b, *nb, *ob, *rb;
  544. u32int tag;
  545. int type, d;
  546. Entry oe;
  547. assert(sourceIsLocked(r));
  548. assert(depth <= VtPointerDepth);
  549. type = entryType(e);
  550. rb = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
  551. if(rb == nil)
  552. return 0;
  553. tag = e->tag;
  554. if(tag == 0)
  555. tag = tagGen();
  556. /*
  557. * Walk down to the new root block.
  558. * We may stop early, but something is better than nothing.
  559. */
  560. oe = *e;
  561. ob = nil;
  562. b = rb;
  563. /* BUG: explain type++. i think it is a real bug */
  564. for(d=e->depth; d > depth; d--, type++){
  565. nb = cacheGlobal(r->fs->cache, b->data, type-1, tag, OReadWrite);
  566. if(nb == nil)
  567. break;
  568. if(ob!=nil && ob!=rb)
  569. blockPut(ob);
  570. ob = b;
  571. b = nb;
  572. }
  573. if(b == rb){
  574. blockPut(rb);
  575. return 0;
  576. }
  577. /*
  578. * Right now, e points at the root block rb, b is the new root block,
  579. * and ob points at b. To update:
  580. *
  581. * (i) change e to point at b
  582. * (ii) zero the pointer ob -> b
  583. * (iii) free the root block
  584. *
  585. * p (the block containing e) must be written before
  586. * anything else.
  587. */
  588. /* (i) */
  589. e->depth = d;
  590. /* might have been local and now global; reverse cannot happen */
  591. if(globalToLocal(b->score) == NilBlock)
  592. e->flags &= ~VtEntryLocal;
  593. memmove(e->score, b->score, VtScoreSize);
  594. entryPack(e, p->data, r->offset % r->epb);
  595. blockDependency(p, b, r->offset % r->epb, nil, &oe);
  596. blockDirty(p);
  597. /* (ii) */
  598. memmove(ob->data, vtZeroScore, VtScoreSize);
  599. blockDependency(ob, p, 0, b->score, nil);
  600. blockDirty(ob);
  601. /* (iii) */
  602. if(rb->addr != NilBlock)
  603. blockRemoveLink(p, rb->addr, rb->l.type, rb->l.tag, 1);
  604. blockPut(rb);
  605. if(ob!=nil && ob!=rb)
  606. blockPut(ob);
  607. blockPut(b);
  608. return d == depth;
  609. }
  610. /*
  611. * Normally we return the block at the given number.
  612. * If early is set, we stop earlier in the tree. Setting early
  613. * to 1 gives us the block that contains the pointer to bn.
  614. */
  615. Block *
  616. _sourceBlock(Source *r, ulong bn, int mode, int early, ulong tag)
  617. {
  618. Block *b, *bb;
  619. int index[VtPointerDepth+1];
  620. Entry e;
  621. int i, np;
  622. int m;
  623. assert(sourceIsLocked(r));
  624. assert(bn != NilBlock);
  625. /* mode for intermediate block */
  626. m = mode;
  627. if(m == OOverWrite)
  628. m = OReadWrite;
  629. b = sourceLoad(r, &e);
  630. if(b == nil)
  631. return nil;
  632. if(r->mode == OReadOnly && (e.flags & VtEntryNoArchive)){
  633. blockPut(b);
  634. vtSetError(ENotArchived);
  635. return nil;
  636. }
  637. if(tag){
  638. if(e.tag == 0)
  639. e.tag = tag;
  640. else if(e.tag != tag){
  641. fprint(2, "tag mismatch\n");
  642. vtSetError("tag mismatch");
  643. goto Err;
  644. }
  645. }
  646. np = e.psize/VtScoreSize;
  647. memset(index, 0, sizeof(index));
  648. for(i=0; bn > 0; i++){
  649. if(i >= VtPointerDepth){
  650. vtSetError(EBadAddr);
  651. goto Err;
  652. }
  653. index[i] = bn % np;
  654. bn /= np;
  655. }
  656. if(i > e.depth){
  657. if(mode == OReadOnly){
  658. vtSetError(EBadAddr);
  659. goto Err;
  660. }
  661. if(!sourceGrowDepth(r, b, &e, i))
  662. goto Err;
  663. }
  664. index[e.depth] = r->offset % r->epb;
  665. for(i=e.depth; i>=early; i--){
  666. bb = blockWalk(b, index[i], m, r->fs, &e);
  667. if(bb == nil)
  668. goto Err;
  669. blockPut(b);
  670. b = bb;
  671. }
  672. b->pc = getcallerpc(&r);
  673. return b;
  674. Err:
  675. blockPut(b);
  676. return nil;
  677. }
  678. Block*
  679. sourceBlock(Source *r, ulong bn, int mode)
  680. {
  681. Block *b;
  682. b = _sourceBlock(r, bn, mode, 0, 0);
  683. if(b)
  684. b->pc = getcallerpc(&r);
  685. return b;
  686. }
  687. void
  688. sourceClose(Source *r)
  689. {
  690. if(r == nil)
  691. return;
  692. vtLock(r->lk);
  693. r->ref--;
  694. if(r->ref){
  695. vtUnlock(r->lk);
  696. return;
  697. }
  698. assert(r->ref == 0);
  699. vtUnlock(r->lk);
  700. if(r->parent)
  701. sourceClose(r->parent);
  702. vtLockFree(r->lk);
  703. memset(r, ~0, sizeof(*r));
  704. vtMemFree(r);
  705. }
  706. /*
  707. * Retrieve the block containing the entry for r.
  708. * If a snapshot has happened, we might need
  709. * to get a new copy of the block. We avoid this
  710. * in the common case by caching the score for
  711. * the block and the last epoch in which it was valid.
  712. *
  713. * We use r->mode to tell the difference between active
  714. * file system sources (OReadWrite) and sources for the
  715. * snapshot file system (OReadOnly).
  716. */
  717. static Block*
  718. sourceLoadBlock(Source *r, int mode)
  719. {
  720. u32int addr;
  721. Block *b;
  722. switch(r->mode){
  723. default:
  724. assert(0);
  725. case OReadWrite:
  726. assert(r->mode == OReadWrite);
  727. /*
  728. * This needn't be true -- we might bump the low epoch
  729. * to reclaim some old blocks, but since this score is
  730. * OReadWrite, the blocks must all still be open, so none
  731. * are reclaimed. Thus it's okay that the epoch is so low.
  732. * Proceed.
  733. assert(r->epoch >= r->fs->elo);
  734. */
  735. if(r->epoch == r->fs->ehi){
  736. b = cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, OReadWrite);
  737. if(b == nil)
  738. return nil;
  739. assert(r->epoch == b->l.epoch);
  740. return b;
  741. }
  742. assert(r->parent != nil);
  743. if(!sourceLock(r->parent, OReadWrite))
  744. return nil;
  745. b = sourceBlock(r->parent, r->offset/r->epb, OReadWrite);
  746. sourceUnlock(r->parent);
  747. if(b == nil)
  748. return nil;
  749. assert(b->l.epoch == r->fs->ehi);
  750. // fprint(2, "sourceLoadBlock %p %V => %V\n", r, r->score, b->score);
  751. memmove(r->score, b->score, VtScoreSize);
  752. r->scoreEpoch = b->l.epoch;
  753. r->tag = b->l.tag;
  754. r->epoch = r->fs->ehi;
  755. return b;
  756. case OReadOnly:
  757. addr = globalToLocal(r->score);
  758. if(addr == NilBlock)
  759. return cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, mode);
  760. b = cacheLocalData(r->fs->cache, addr, BtDir, r->tag, mode, r->scoreEpoch);
  761. if(b)
  762. return b;
  763. /*
  764. * If it failed because the epochs don't match, the block has been
  765. * archived and reclaimed. Rewalk from the parent and get the
  766. * new pointer. This can't happen in the OReadWrite case
  767. * above because blocks in the current epoch don't get
  768. * reclaimed. The fact that we're OReadOnly means we're
  769. * a snapshot. (Or else the file system is read-only, but then
  770. * the archiver isn't going around deleting blocks.)
  771. */
  772. if(strcmp(vtGetError(), ELabelMismatch) == 0){
  773. if(!sourceLock(r->parent, OReadOnly))
  774. return nil;
  775. b = sourceBlock(r->parent, r->offset/r->epb, OReadOnly);
  776. sourceUnlock(r->parent);
  777. if(b){
  778. fprint(2, "sourceAlloc: lost %V found %V\n",
  779. r->score, b->score);
  780. memmove(r->score, b->score, VtScoreSize);
  781. r->scoreEpoch = b->l.epoch;
  782. return b;
  783. }
  784. }
  785. return nil;
  786. }
  787. }
  788. int
  789. sourceLock(Source *r, int mode)
  790. {
  791. Block *b;
  792. if(mode == -1)
  793. mode = r->mode;
  794. b = sourceLoadBlock(r, mode);
  795. if(b == nil)
  796. return 0;
  797. /*
  798. * The fact that we are holding b serves as the
  799. * lock entitling us to write to r->b.
  800. */
  801. assert(r->b == nil);
  802. r->b = b;
  803. if(r->mode == OReadWrite)
  804. assert(r->epoch == r->b->l.epoch);
  805. return 1;
  806. }
  807. /*
  808. * Lock two (usually sibling) sources. This needs special care
  809. * because the Entries for both sources might be in the same block.
  810. * We also try to lock blocks in left-to-right order within the tree.
  811. */
  812. int
  813. sourceLock2(Source *r, Source *rr, int mode)
  814. {
  815. Block *b, *bb;
  816. if(rr == nil)
  817. return sourceLock(r, mode);
  818. if(mode == -1)
  819. mode = r->mode;
  820. if(r->parent==rr->parent && r->offset/r->epb == rr->offset/rr->epb){
  821. b = sourceLoadBlock(r, mode);
  822. if(b == nil)
  823. return 0;
  824. if(memcmp(r->score, rr->score, VtScoreSize) != 0){
  825. memmove(rr->score, b->score, VtScoreSize);
  826. rr->scoreEpoch = b->l.epoch;
  827. rr->tag = b->l.tag;
  828. rr->epoch = rr->fs->ehi;
  829. }
  830. blockDupLock(b);
  831. bb = b;
  832. }else if(r->parent==rr->parent || r->offset > rr->offset){
  833. bb = sourceLoadBlock(rr, mode);
  834. b = sourceLoadBlock(r, mode);
  835. }else{
  836. b = sourceLoadBlock(r, mode);
  837. bb = sourceLoadBlock(rr, mode);
  838. }
  839. if(b == nil || bb == nil){
  840. if(b)
  841. blockPut(b);
  842. if(bb)
  843. blockPut(bb);
  844. return 0;
  845. }
  846. /*
  847. * The fact that we are holding b and bb serves
  848. * as the lock entitling us to write to r->b and rr->b.
  849. */
  850. r->b = b;
  851. rr->b = bb;
  852. return 1;
  853. }
  854. void
  855. sourceUnlock(Source *r)
  856. {
  857. Block *b;
  858. if(r->b == nil){
  859. fprint(2, "sourceUnlock: already unlocked\n");
  860. abort();
  861. }
  862. b = r->b;
  863. r->b = nil;
  864. blockPut(b);
  865. }
  866. static Block*
  867. sourceLoad(Source *r, Entry *e)
  868. {
  869. Block *b;
  870. assert(sourceIsLocked(r));
  871. b = r->b;
  872. if(!entryUnpack(e, b->data, r->offset % r->epb))
  873. return nil;
  874. if(e->gen != r->gen){
  875. vtSetError(ERemoved);
  876. return nil;
  877. }
  878. blockDupLock(b);
  879. return b;
  880. }
  881. static int
  882. sizeToDepth(uvlong s, int psize, int dsize)
  883. {
  884. int np;
  885. int d;
  886. /* determine pointer depth */
  887. np = psize/VtScoreSize;
  888. s = (s + dsize - 1)/dsize;
  889. for(d = 0; s > 1; d++)
  890. s = (s + np - 1)/np;
  891. return d;
  892. }
  893. static u32int
  894. tagGen(void)
  895. {
  896. u32int tag;
  897. for(;;){
  898. tag = lrand();
  899. if(tag >= UserTag)
  900. break;
  901. }
  902. return tag;
  903. }