file.c 24 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. /*
  10. * Manage tree of VtFiles stored in the block cache.
  11. *
  12. * The single point of truth for the info about the VtFiles themselves
  13. * is the block data. Because of this, there is no explicit locking of
  14. * VtFile structures, and indeed there may be more than one VtFile
  15. * structure for a given Venti file. They synchronize through the
  16. * block cache.
  17. *
  18. * This is a bit simpler than fossil because there are no epochs
  19. * or tags or anything else. Just mutable local blocks and immutable
  20. * Venti blocks.
  21. */
  22. #include <u.h>
  23. #include <libc.h>
  24. #include <venti.h>
  25. #define MaxBlock (1UL<<31)
  26. static char ENotDir[] = "walk in non-directory";
  27. static char ETooBig[] = "file too big";
  28. /* static char EBadAddr[] = "bad address"; */
  29. static char ELabelMismatch[] = "label mismatch";
  30. static int sizetodepth(uint64_t s, int psize, int dsize);
  31. static VtBlock *fileload(VtFile *r, VtEntry *e);
  32. static int shrinkdepth(VtFile*, VtBlock*, VtEntry*, int);
  33. static int shrinksize(VtFile*, VtEntry*, uint64_t);
  34. static int growdepth(VtFile*, VtBlock*, VtEntry*, int);
  35. #define ISLOCKED(r) ((r)->b != nil)
  36. #define DEPTH(t) ((t)&VtTypeDepthMask)
  37. static VtFile *
  38. vtfilealloc(VtCache *c, VtBlock *b, VtFile *p, uint32_t offset, int mode)
  39. {
  40. int epb;
  41. uint32_t size;
  42. VtEntry e;
  43. VtFile *r;
  44. assert(p==nil || ISLOCKED(p));
  45. if(p == nil){
  46. assert(offset == 0);
  47. epb = 1;
  48. }else
  49. epb = p->dsize / VtEntrySize;
  50. if(b->type != VtDirType){
  51. werrstr("bad block type %#uo", b->type);
  52. return nil;
  53. }
  54. /*
  55. * a non-active entry is the only thing that
  56. * can legitimately happen here. all the others
  57. * get prints.
  58. */
  59. if(vtentryunpack(&e, b->data, offset % epb) < 0){
  60. fprint(2, "vtentryunpack failed: %r (%.*H)\n", VtEntrySize, b->data+VtEntrySize*(offset%epb));
  61. return nil;
  62. }
  63. if(!(e.flags & VtEntryActive)){
  64. werrstr("entry not active");
  65. return nil;
  66. }
  67. if(DEPTH(e.type) < sizetodepth(e.size, e.psize, e.dsize)){
  68. fprint(2, "depth %ud size %llud psize %ud dsize %ud\n",
  69. DEPTH(e.type), e.size, e.psize, e.dsize);
  70. werrstr("bad depth");
  71. return nil;
  72. }
  73. size = vtcacheblocksize(c);
  74. if(e.dsize > size || e.psize > size){
  75. werrstr("block sizes %ud, %ud bigger than cache block size %ud",
  76. e.psize, e.dsize, size);
  77. return nil;
  78. }
  79. r = vtmallocz(sizeof(VtFile));
  80. r->c = c;
  81. r->mode = mode;
  82. r->dsize = e.dsize;
  83. r->psize = e.psize;
  84. r->gen = e.gen;
  85. r->dir = (e.type & VtTypeBaseMask) == VtDirType;
  86. r->ref = 1;
  87. r->parent = p;
  88. if(p){
  89. qlock(&p->lk);
  90. assert(mode == VtOREAD || p->mode == VtORDWR);
  91. p->ref++;
  92. qunlock(&p->lk);
  93. }else{
  94. assert(b->addr != NilBlock);
  95. r->local = 1;
  96. }
  97. memmove(r->score, b->score, VtScoreSize);
  98. r->offset = offset;
  99. r->epb = epb;
  100. return r;
  101. }
  102. VtFile *
  103. vtfileroot(VtCache *c, uint32_t addr, int mode)
  104. {
  105. VtFile *r;
  106. VtBlock *b;
  107. b = vtcachelocal(c, addr, VtDirType);
  108. if(b == nil)
  109. return nil;
  110. r = vtfilealloc(c, b, nil, 0, mode);
  111. vtblockput(b);
  112. return r;
  113. }
  114. VtFile*
  115. vtfileopenroot(VtCache *c, VtEntry *e)
  116. {
  117. VtBlock *b;
  118. VtFile *f;
  119. b = vtcacheallocblock(c, VtDirType);
  120. if(b == nil)
  121. return nil;
  122. vtentrypack(e, b->data, 0);
  123. f = vtfilealloc(c, b, nil, 0, VtORDWR);
  124. vtblockput(b);
  125. return f;
  126. }
  127. VtFile *
  128. vtfilecreateroot(VtCache *c, int psize, int dsize, int type)
  129. {
  130. VtEntry e;
  131. memset(&e, 0, sizeof e);
  132. e.flags = VtEntryActive;
  133. e.psize = psize;
  134. e.dsize = dsize;
  135. e.type = type;
  136. memmove(e.score, vtzeroscore, VtScoreSize);
  137. return vtfileopenroot(c, &e);
  138. }
  139. VtFile *
  140. vtfileopen(VtFile *r, uint32_t offset, int mode)
  141. {
  142. uint32_t bn;
  143. VtBlock *b;
  144. assert(ISLOCKED(r));
  145. if(!r->dir){
  146. werrstr(ENotDir);
  147. return nil;
  148. }
  149. bn = offset/(r->dsize/VtEntrySize);
  150. b = vtfileblock(r, bn, mode);
  151. if(b == nil)
  152. return nil;
  153. r = vtfilealloc(r->c, b, r, offset, mode);
  154. vtblockput(b);
  155. return r;
  156. }
  157. VtFile*
  158. vtfilecreate(VtFile *r, int psize, int dsize, int type)
  159. {
  160. return _vtfilecreate(r, -1, psize, dsize, type);
  161. }
  162. VtFile*
  163. _vtfilecreate(VtFile *r, int o, int psize, int dsize, int type)
  164. {
  165. int i;
  166. VtBlock *b;
  167. uint32_t bn, size;
  168. VtEntry e;
  169. int epb;
  170. VtFile *rr;
  171. uint32_t offset;
  172. assert(ISLOCKED(r));
  173. assert(psize <= VtMaxLumpSize);
  174. assert(dsize <= VtMaxLumpSize);
  175. assert(type == VtDirType || type == VtDataType);
  176. if(!r->dir){
  177. werrstr(ENotDir);
  178. return nil;
  179. }
  180. epb = r->dsize/VtEntrySize;
  181. size = vtfilegetdirsize(r);
  182. /*
  183. * look at a random block to see if we can find an empty entry
  184. */
  185. if(o == -1){
  186. offset = lnrand(size+1);
  187. offset -= offset % epb;
  188. }else
  189. offset = o;
  190. /* try the given block and then try the last block */
  191. for(;;){
  192. bn = offset/epb;
  193. b = vtfileblock(r, bn, VtORDWR);
  194. if(b == nil)
  195. return nil;
  196. for(i=offset%r->epb; i<epb; i++){
  197. if(vtentryunpack(&e, b->data, i) < 0)
  198. continue;
  199. if((e.flags&VtEntryActive) == 0 && e.gen != ~0)
  200. goto Found;
  201. }
  202. vtblockput(b);
  203. if(offset == size){
  204. fprint(2, "vtfilecreate: cannot happen\n");
  205. werrstr("vtfilecreate: cannot happen");
  206. return nil;
  207. }
  208. offset = size;
  209. }
  210. Found:
  211. /* found an entry - gen already set */
  212. e.psize = psize;
  213. e.dsize = dsize;
  214. e.flags = VtEntryActive;
  215. e.type = type;
  216. e.size = 0;
  217. memmove(e.score, vtzeroscore, VtScoreSize);
  218. vtentrypack(&e, b->data, i);
  219. offset = bn*epb + i;
  220. if(offset+1 > size){
  221. if(vtfilesetdirsize(r, offset+1) < 0){
  222. vtblockput(b);
  223. return nil;
  224. }
  225. }
  226. rr = vtfilealloc(r->c, b, r, offset, VtORDWR);
  227. vtblockput(b);
  228. return rr;
  229. }
  230. static int
  231. vtfilekill(VtFile *r, int doremove)
  232. {
  233. VtEntry e;
  234. VtBlock *b;
  235. assert(ISLOCKED(r));
  236. b = fileload(r, &e);
  237. if(b == nil)
  238. return -1;
  239. if(doremove==0 && e.size == 0){
  240. /* already truncated */
  241. vtblockput(b);
  242. return 0;
  243. }
  244. if(doremove){
  245. if(e.gen != ~0)
  246. e.gen++;
  247. e.dsize = 0;
  248. e.psize = 0;
  249. e.flags = 0;
  250. }else
  251. e.flags &= ~VtEntryLocal;
  252. e.type = 0;
  253. e.size = 0;
  254. memmove(e.score, vtzeroscore, VtScoreSize);
  255. vtentrypack(&e, b->data, r->offset % r->epb);
  256. vtblockput(b);
  257. if(doremove){
  258. vtfileunlock(r);
  259. vtfileclose(r);
  260. }
  261. return 0;
  262. }
  263. int
  264. vtfileremove(VtFile *r)
  265. {
  266. return vtfilekill(r, 1);
  267. }
  268. int
  269. vtfiletruncate(VtFile *r)
  270. {
  271. return vtfilekill(r, 0);
  272. }
  273. uint64_t
  274. vtfilegetsize(VtFile *r)
  275. {
  276. VtEntry e;
  277. VtBlock *b;
  278. assert(ISLOCKED(r));
  279. b = fileload(r, &e);
  280. if(b == nil)
  281. return ~(uint64_t)0;
  282. vtblockput(b);
  283. return e.size;
  284. }
  285. static int
  286. shrinksize(VtFile *r, VtEntry *e, uint64_t size)
  287. {
  288. int i, depth, type, isdir, ppb;
  289. uint64_t ptrsz;
  290. uint8_t score[VtScoreSize];
  291. VtBlock *b;
  292. b = vtcacheglobal(r->c, e->score, e->type);
  293. if(b == nil)
  294. return -1;
  295. ptrsz = e->dsize;
  296. ppb = e->psize/VtScoreSize;
  297. type = e->type;
  298. depth = DEPTH(type);
  299. for(i=0; i+1<depth; i++)
  300. ptrsz *= ppb;
  301. isdir = r->dir;
  302. while(depth > 0){
  303. if(b->addr == NilBlock){
  304. /* not worth copying the block just so we can zero some of it */
  305. vtblockput(b);
  306. return -1;
  307. }
  308. /*
  309. * invariant: each pointer in the tree rooted at b accounts for ptrsz bytes
  310. */
  311. /* zero the pointers to unnecessary blocks */
  312. i = (size+ptrsz-1)/ptrsz;
  313. for(; i<ppb; i++)
  314. memmove(b->data+i*VtScoreSize, vtzeroscore, VtScoreSize);
  315. /* recurse (go around again) on the partially necessary block */
  316. i = size/ptrsz;
  317. size = size%ptrsz;
  318. if(size == 0){
  319. vtblockput(b);
  320. return 0;
  321. }
  322. ptrsz /= ppb;
  323. type--;
  324. memmove(score, b->data+i*VtScoreSize, VtScoreSize);
  325. vtblockput(b);
  326. b = vtcacheglobal(r->c, score, type);
  327. if(b == nil)
  328. return -1;
  329. }
  330. if(b->addr == NilBlock){
  331. vtblockput(b);
  332. return -1;
  333. }
  334. /*
  335. * No one ever truncates BtDir blocks.
  336. */
  337. if(depth==0 && !isdir && e->dsize > size)
  338. memset(b->data+size, 0, e->dsize-size);
  339. vtblockput(b);
  340. return 0;
  341. }
  342. int
  343. vtfilesetsize(VtFile *r, uint64_t size)
  344. {
  345. int depth, edepth;
  346. VtEntry e;
  347. VtBlock *b;
  348. assert(ISLOCKED(r));
  349. if(size == 0)
  350. return vtfiletruncate(r);
  351. if(size > VtMaxFileSize || size > ((uint64_t)MaxBlock)*r->dsize){
  352. werrstr(ETooBig);
  353. return -1;
  354. }
  355. b = fileload(r, &e);
  356. if(b == nil)
  357. return -1;
  358. /* quick out */
  359. if(e.size == size){
  360. vtblockput(b);
  361. return 0;
  362. }
  363. depth = sizetodepth(size, e.psize, e.dsize);
  364. edepth = DEPTH(e.type);
  365. if(depth < edepth){
  366. if(shrinkdepth(r, b, &e, depth) < 0){
  367. vtblockput(b);
  368. return -1;
  369. }
  370. }else if(depth > edepth){
  371. if(growdepth(r, b, &e, depth) < 0){
  372. vtblockput(b);
  373. return -1;
  374. }
  375. }
  376. if(size < e.size)
  377. shrinksize(r, &e, size);
  378. e.size = size;
  379. vtentrypack(&e, b->data, r->offset % r->epb);
  380. vtblockput(b);
  381. return 0;
  382. }
  383. int
  384. vtfilesetdirsize(VtFile *r, uint32_t ds)
  385. {
  386. uint64_t size;
  387. int epb;
  388. assert(ISLOCKED(r));
  389. epb = r->dsize/VtEntrySize;
  390. size = (uint64_t)r->dsize*(ds/epb);
  391. size += VtEntrySize*(ds%epb);
  392. return vtfilesetsize(r, size);
  393. }
  394. uint32_t
  395. vtfilegetdirsize(VtFile *r)
  396. {
  397. uint32_t ds;
  398. uint64_t size;
  399. int epb;
  400. assert(ISLOCKED(r));
  401. epb = r->dsize/VtEntrySize;
  402. size = vtfilegetsize(r);
  403. ds = epb*(size/r->dsize);
  404. ds += (size%r->dsize)/VtEntrySize;
  405. return ds;
  406. }
  407. int
  408. vtfilegetentry(VtFile *r, VtEntry *e)
  409. {
  410. VtBlock *b;
  411. assert(ISLOCKED(r));
  412. b = fileload(r, e);
  413. if(b == nil)
  414. return -1;
  415. vtblockput(b);
  416. return 0;
  417. }
  418. int
  419. vtfilesetentry(VtFile *r, VtEntry *e)
  420. {
  421. VtBlock *b;
  422. VtEntry ee;
  423. assert(ISLOCKED(r));
  424. b = fileload(r, &ee);
  425. if(b == nil)
  426. return -1;
  427. vtentrypack(e, b->data, r->offset % r->epb);
  428. vtblockput(b);
  429. return 0;
  430. }
  431. static VtBlock *
  432. blockwalk(VtBlock *p, int index, VtCache *c, int mode, VtEntry *e)
  433. {
  434. VtBlock *b;
  435. int type;
  436. uint8_t *score;
  437. VtEntry oe;
  438. switch(p->type){
  439. case VtDataType:
  440. assert(0);
  441. case VtDirType:
  442. type = e->type;
  443. score = e->score;
  444. break;
  445. default:
  446. type = p->type - 1;
  447. score = p->data+index*VtScoreSize;
  448. break;
  449. }
  450. /*print("walk from %V/%d ty %d to %V ty %d\n", p->score, index, p->type, score, type); */
  451. if(mode == VtOWRITE && vtglobaltolocal(score) == NilBlock){
  452. b = vtcacheallocblock(c, type);
  453. if(b)
  454. goto HaveCopy;
  455. }else
  456. b = vtcacheglobal(c, score, type);
  457. if(b == nil || mode == VtOREAD)
  458. return b;
  459. if(vtglobaltolocal(b->score) != NilBlock)
  460. return b;
  461. oe = *e;
  462. /*
  463. * Copy on write.
  464. */
  465. e->flags |= VtEntryLocal;
  466. b = vtblockcopy(b/*, e->tag, fs->ehi, fs->elo*/);
  467. if(b == nil)
  468. return nil;
  469. HaveCopy:
  470. if(p->type == VtDirType){
  471. memmove(e->score, b->score, VtScoreSize);
  472. vtentrypack(e, p->data, index);
  473. }else{
  474. memmove(p->data+index*VtScoreSize, b->score, VtScoreSize);
  475. }
  476. return b;
  477. }
  478. /*
  479. * Change the depth of the VtFile r.
  480. * The entry e for r is contained in block p.
  481. */
  482. static int
  483. growdepth(VtFile *r, VtBlock *p, VtEntry *e, int depth)
  484. {
  485. VtBlock *b, *bb;
  486. VtEntry oe;
  487. assert(ISLOCKED(r));
  488. assert(depth <= VtPointerDepth);
  489. b = vtcacheglobal(r->c, e->score, e->type);
  490. if(b == nil)
  491. return -1;
  492. oe = *e;
  493. /*
  494. * Keep adding layers until we get to the right depth
  495. * or an error occurs.
  496. */
  497. while(DEPTH(e->type) < depth){
  498. bb = vtcacheallocblock(r->c, e->type+1);
  499. if(bb == nil)
  500. break;
  501. memmove(bb->data, b->score, VtScoreSize);
  502. memmove(e->score, bb->score, VtScoreSize);
  503. e->type++;
  504. e->flags |= VtEntryLocal;
  505. vtblockput(b);
  506. b = bb;
  507. }
  508. vtentrypack(e, p->data, r->offset % r->epb);
  509. vtblockput(b);
  510. if(DEPTH(e->type) == depth)
  511. return 0;
  512. return -1;
  513. }
  514. static int
  515. shrinkdepth(VtFile *r, VtBlock *p, VtEntry *e, int depth)
  516. {
  517. VtBlock *b, *nb, *ob, *rb;
  518. VtEntry oe;
  519. assert(ISLOCKED(r));
  520. assert(depth <= VtPointerDepth);
  521. rb = vtcacheglobal(r->c, e->score, e->type);
  522. if(rb == nil)
  523. return -1;
  524. /*
  525. * Walk down to the new root block.
  526. * We may stop early, but something is better than nothing.
  527. */
  528. oe = *e;
  529. ob = nil;
  530. b = rb;
  531. for(; DEPTH(e->type) > depth; e->type--){
  532. nb = vtcacheglobal(r->c, b->data, e->type-1);
  533. if(nb == nil)
  534. break;
  535. if(ob!=nil && ob!=rb)
  536. vtblockput(ob);
  537. ob = b;
  538. b = nb;
  539. }
  540. if(b == rb){
  541. vtblockput(rb);
  542. return 0;
  543. }
  544. /*
  545. * Right now, e points at the root block rb, b is the new root block,
  546. * and ob points at b. To update:
  547. *
  548. * (i) change e to point at b
  549. * (ii) zero the pointer ob -> b
  550. * (iii) free the root block
  551. *
  552. * p (the block containing e) must be written before
  553. * anything else.
  554. */
  555. /* (i) */
  556. memmove(e->score, b->score, VtScoreSize);
  557. vtentrypack(e, p->data, r->offset % r->epb);
  558. /* (ii) */
  559. memmove(ob->data, vtzeroscore, VtScoreSize);
  560. /* (iii) */
  561. vtblockput(rb);
  562. if(ob!=nil && ob!=rb)
  563. vtblockput(ob);
  564. vtblockput(b);
  565. if(DEPTH(e->type) == depth)
  566. return 0;
  567. return -1;
  568. }
  569. static int
  570. mkindices(VtEntry *e, uint32_t bn, int *index)
  571. {
  572. int i, np;
  573. memset(index, 0, (VtPointerDepth+1)*sizeof(int));
  574. np = e->psize/VtScoreSize;
  575. for(i=0; bn > 0; i++){
  576. if(i >= VtPointerDepth){
  577. werrstr("bad address 0x%lux", (uint32_t)bn);
  578. return -1;
  579. }
  580. index[i] = bn % np;
  581. bn /= np;
  582. }
  583. return i;
  584. }
  585. VtBlock *
  586. vtfileblock(VtFile *r, uint32_t bn, int mode)
  587. {
  588. VtBlock *b, *bb;
  589. int index[VtPointerDepth+1];
  590. VtEntry e;
  591. int i;
  592. int m;
  593. assert(ISLOCKED(r));
  594. assert(bn != NilBlock);
  595. b = fileload(r, &e);
  596. if(b == nil)
  597. return nil;
  598. i = mkindices(&e, bn, index);
  599. if(i < 0)
  600. goto Err;
  601. if(i > DEPTH(e.type)){
  602. if(mode == VtOREAD){
  603. werrstr("bad address 0x%lux", (uint32_t)bn);
  604. goto Err;
  605. }
  606. index[i] = 0;
  607. if(growdepth(r, b, &e, i) < 0)
  608. goto Err;
  609. }
  610. assert(b->type == VtDirType);
  611. index[DEPTH(e.type)] = r->offset % r->epb;
  612. /* mode for intermediate block */
  613. m = mode;
  614. if(m == VtOWRITE)
  615. m = VtORDWR;
  616. for(i=DEPTH(e.type); i>=0; i--){
  617. bb = blockwalk(b, index[i], r->c, i==0 ? mode : m, &e);
  618. if(bb == nil)
  619. goto Err;
  620. vtblockput(b);
  621. b = bb;
  622. }
  623. b->pc = getcallerpc(&r);
  624. return b;
  625. Err:
  626. vtblockput(b);
  627. return nil;
  628. }
  629. int
  630. vtfileblockscore(VtFile *r, uint32_t bn, uint8_t score[VtScoreSize])
  631. {
  632. VtBlock *b, *bb;
  633. int index[VtPointerDepth+1];
  634. VtEntry e;
  635. int i;
  636. assert(ISLOCKED(r));
  637. assert(bn != NilBlock);
  638. b = fileload(r, &e);
  639. if(b == nil)
  640. return -1;
  641. if(DEPTH(e.type) == 0){
  642. memmove(score, e.score, VtScoreSize);
  643. vtblockput(b);
  644. return 0;
  645. }
  646. i = mkindices(&e, bn, index);
  647. if(i < 0){
  648. vtblockput(b);
  649. return -1;
  650. }
  651. if(i > DEPTH(e.type)){
  652. memmove(score, vtzeroscore, VtScoreSize);
  653. vtblockput(b);
  654. return 0;
  655. }
  656. index[DEPTH(e.type)] = r->offset % r->epb;
  657. for(i=DEPTH(e.type); i>=1; i--){
  658. bb = blockwalk(b, index[i], r->c, VtOREAD, &e);
  659. if(bb == nil)
  660. goto Err;
  661. vtblockput(b);
  662. b = bb;
  663. if(memcmp(b->score, vtzeroscore, VtScoreSize) == 0)
  664. break;
  665. }
  666. memmove(score, b->data+index[0]*VtScoreSize, VtScoreSize);
  667. vtblockput(b);
  668. return 0;
  669. Err:
  670. vtblockput(b);
  671. return -1;
  672. }
  673. void
  674. vtfileincref(VtFile *r)
  675. {
  676. qlock(&r->lk);
  677. r->ref++;
  678. qunlock(&r->lk);
  679. }
  680. void
  681. vtfileclose(VtFile *r)
  682. {
  683. if(r == nil)
  684. return;
  685. qlock(&r->lk);
  686. r->ref--;
  687. if(r->ref){
  688. qunlock(&r->lk);
  689. return;
  690. }
  691. assert(r->ref == 0);
  692. qunlock(&r->lk);
  693. if(r->parent)
  694. vtfileclose(r->parent);
  695. memset(r, ~0, sizeof(*r));
  696. vtfree(r);
  697. }
  698. /*
  699. * Retrieve the block containing the entry for r.
  700. * If a snapshot has happened, we might need
  701. * to get a new copy of the block. We avoid this
  702. * in the common case by caching the score for
  703. * the block and the last epoch in which it was valid.
  704. *
  705. * We use r->mode to tell the difference between active
  706. * file system VtFiles (VtORDWR) and VtFiles for the
  707. * snapshot file system (VtOREAD).
  708. */
  709. static VtBlock*
  710. fileloadblock(VtFile *r, int mode)
  711. {
  712. char e[ERRMAX];
  713. uint32_t addr;
  714. VtBlock *b;
  715. switch(r->mode){
  716. default:
  717. assert(0);
  718. case VtORDWR:
  719. assert(r->mode == VtORDWR);
  720. if(r->local == 1){
  721. b = vtcacheglobal(r->c, r->score, VtDirType);
  722. if(b == nil)
  723. return nil;
  724. b->pc = getcallerpc(&r);
  725. return b;
  726. }
  727. assert(r->parent != nil);
  728. if(vtfilelock(r->parent, VtORDWR) < 0)
  729. return nil;
  730. b = vtfileblock(r->parent, r->offset/r->epb, VtORDWR);
  731. vtfileunlock(r->parent);
  732. if(b == nil)
  733. return nil;
  734. memmove(r->score, b->score, VtScoreSize);
  735. r->local = 1;
  736. return b;
  737. case VtOREAD:
  738. if(mode == VtORDWR){
  739. werrstr("read/write lock of read-only file");
  740. return nil;
  741. }
  742. addr = vtglobaltolocal(r->score);
  743. if(addr == NilBlock)
  744. return vtcacheglobal(r->c, r->score, VtDirType);
  745. b = vtcachelocal(r->c, addr, VtDirType);
  746. if(b)
  747. return b;
  748. /*
  749. * If it failed because the epochs don't match, the block has been
  750. * archived and reclaimed. Rewalk from the parent and get the
  751. * new pointer. This can't happen in the VtORDWR case
  752. * above because blocks in the current epoch don't get
  753. * reclaimed. The fact that we're VtOREAD means we're
  754. * a snapshot. (Or else the file system is read-only, but then
  755. * the archiver isn't going around deleting blocks.)
  756. */
  757. rerrstr(e, sizeof e);
  758. if(strcmp(e, ELabelMismatch) == 0){
  759. if(vtfilelock(r->parent, VtOREAD) < 0)
  760. return nil;
  761. b = vtfileblock(r->parent, r->offset/r->epb, VtOREAD);
  762. vtfileunlock(r->parent);
  763. if(b){
  764. fprint(2, "vtfilealloc: lost %V found %V\n",
  765. r->score, b->score);
  766. memmove(r->score, b->score, VtScoreSize);
  767. return b;
  768. }
  769. }
  770. return nil;
  771. }
  772. }
  773. int
  774. vtfilelock(VtFile *r, int mode)
  775. {
  776. VtBlock *b;
  777. if(mode == -1)
  778. mode = r->mode;
  779. b = fileloadblock(r, mode);
  780. if(b == nil)
  781. return -1;
  782. /*
  783. * The fact that we are holding b serves as the
  784. * lock entitling us to write to r->b.
  785. */
  786. assert(r->b == nil);
  787. r->b = b;
  788. b->pc = getcallerpc(&r);
  789. return 0;
  790. }
  791. /*
  792. * Lock two (usually sibling) VtFiles. This needs special care
  793. * because the Entries for both vtFiles might be in the same block.
  794. * We also try to lock blocks in left-to-right order within the tree.
  795. */
  796. int
  797. vtfilelock2(VtFile *r, VtFile *rr, int mode)
  798. {
  799. VtBlock *b, *bb;
  800. if(rr == nil)
  801. return vtfilelock(r, mode);
  802. if(mode == -1)
  803. mode = r->mode;
  804. if(r->parent==rr->parent && r->offset/r->epb == rr->offset/rr->epb){
  805. b = fileloadblock(r, mode);
  806. if(b == nil)
  807. return -1;
  808. vtblockduplock(b);
  809. bb = b;
  810. }else if(r->parent==rr->parent || r->offset > rr->offset){
  811. bb = fileloadblock(rr, mode);
  812. b = fileloadblock(r, mode);
  813. }else{
  814. b = fileloadblock(r, mode);
  815. bb = fileloadblock(rr, mode);
  816. }
  817. if(b == nil || bb == nil){
  818. if(b)
  819. vtblockput(b);
  820. if(bb)
  821. vtblockput(bb);
  822. return -1;
  823. }
  824. /*
  825. * The fact that we are holding b and bb serves
  826. * as the lock entitling us to write to r->b and rr->b.
  827. */
  828. r->b = b;
  829. rr->b = bb;
  830. b->pc = getcallerpc(&r);
  831. bb->pc = getcallerpc(&r);
  832. return 0;
  833. }
  834. void
  835. vtfileunlock(VtFile *r)
  836. {
  837. VtBlock *b;
  838. if(r->b == nil){
  839. fprint(2, "vtfileunlock: already unlocked\n");
  840. abort();
  841. }
  842. b = r->b;
  843. r->b = nil;
  844. vtblockput(b);
  845. }
  846. static VtBlock*
  847. fileload(VtFile *r, VtEntry *e)
  848. {
  849. VtBlock *b;
  850. assert(ISLOCKED(r));
  851. b = r->b;
  852. if(vtentryunpack(e, b->data, r->offset % r->epb) < 0)
  853. return nil;
  854. vtblockduplock(b);
  855. return b;
  856. }
  857. static int
  858. sizetodepth(uint64_t s, int psize, int dsize)
  859. {
  860. int np;
  861. int d;
  862. /* determine pointer depth */
  863. np = psize/VtScoreSize;
  864. s = (s + dsize - 1)/dsize;
  865. for(d = 0; s > 1; d++)
  866. s = (s + np - 1)/np;
  867. return d;
  868. }
  869. int32_t
  870. vtfileread(VtFile *f, void *data, int32_t count, int64_t offset)
  871. {
  872. int frag;
  873. VtBlock *b;
  874. VtEntry e;
  875. assert(ISLOCKED(f));
  876. vtfilegetentry(f, &e);
  877. if(count == 0)
  878. return 0;
  879. if(count < 0 || offset < 0){
  880. werrstr("vtfileread: bad offset or count");
  881. return -1;
  882. }
  883. if(offset >= e.size)
  884. return 0;
  885. if(offset+count > e.size)
  886. count = e.size - offset;
  887. frag = offset % e.dsize;
  888. if(frag+count > e.dsize)
  889. count = e.dsize - frag;
  890. b = vtfileblock(f, offset/e.dsize, VtOREAD);
  891. if(b == nil)
  892. return -1;
  893. memmove(data, b->data+frag, count);
  894. vtblockput(b);
  895. return count;
  896. }
  897. static int32_t
  898. filewrite1(VtFile *f, void *data, int32_t count, int64_t offset)
  899. {
  900. int frag, m;
  901. VtBlock *b;
  902. VtEntry e;
  903. vtfilegetentry(f, &e);
  904. if(count < 0 || offset < 0){
  905. werrstr("vtfilewrite: bad offset or count");
  906. return -1;
  907. }
  908. frag = offset % e.dsize;
  909. if(frag+count > e.dsize)
  910. count = e.dsize - frag;
  911. m = VtORDWR;
  912. if(frag == 0 && count == e.dsize)
  913. m = VtOWRITE;
  914. b = vtfileblock(f, offset/e.dsize, m);
  915. if(b == nil)
  916. return -1;
  917. memmove(b->data+frag, data, count);
  918. if(m == VtOWRITE && frag+count < e.dsize)
  919. memset(b->data+frag+count, 0, e.dsize-frag-count);
  920. if(offset+count > e.size){
  921. vtfilegetentry(f, &e);
  922. e.size = offset+count;
  923. vtfilesetentry(f, &e);
  924. }
  925. vtblockput(b);
  926. return count;
  927. }
  928. int32_t
  929. vtfilewrite(VtFile *f, void *data, int32_t count, int64_t offset)
  930. {
  931. int32_t tot, m;
  932. assert(ISLOCKED(f));
  933. tot = 0;
  934. m = 0;
  935. while(tot < count){
  936. m = filewrite1(f, (char*)data+tot, count-tot, offset+tot);
  937. if(m <= 0)
  938. break;
  939. tot += m;
  940. }
  941. if(tot==0)
  942. return m;
  943. return tot;
  944. }
  945. static int
  946. flushblock(VtCache *c, VtBlock *bb, uint8_t score[VtScoreSize], int ppb,
  947. int epb,
  948. int type)
  949. {
  950. uint32_t addr;
  951. VtBlock *b;
  952. VtEntry e;
  953. int i;
  954. addr = vtglobaltolocal(score);
  955. if(addr == NilBlock)
  956. return 0;
  957. if(bb){
  958. b = bb;
  959. if(memcmp(b->score, score, VtScoreSize) != 0)
  960. abort();
  961. }else
  962. if((b = vtcachelocal(c, addr, type)) == nil)
  963. return -1;
  964. switch(type){
  965. case VtDataType:
  966. break;
  967. case VtDirType:
  968. for(i=0; i<epb; i++){
  969. if(vtentryunpack(&e, b->data, i) < 0)
  970. goto Err;
  971. if(!(e.flags&VtEntryActive))
  972. continue;
  973. if(flushblock(c, nil, e.score, e.psize/VtScoreSize, e.dsize/VtEntrySize,
  974. e.type) < 0)
  975. goto Err;
  976. vtentrypack(&e, b->data, i);
  977. }
  978. break;
  979. default: /* VtPointerTypeX */
  980. for(i=0; i<ppb; i++){
  981. if(flushblock(c, nil, b->data+VtScoreSize*i, ppb, epb, type-1) < 0)
  982. goto Err;
  983. }
  984. break;
  985. }
  986. if(vtblockwrite(b) < 0)
  987. goto Err;
  988. memmove(score, b->score, VtScoreSize);
  989. if(b != bb)
  990. vtblockput(b);
  991. return 0;
  992. Err:
  993. if(b != bb)
  994. vtblockput(b);
  995. return -1;
  996. }
  997. int
  998. vtfileflush(VtFile *f)
  999. {
  1000. int ret;
  1001. VtBlock *b;
  1002. VtEntry e;
  1003. assert(ISLOCKED(f));
  1004. b = fileload(f, &e);
  1005. if(!(e.flags&VtEntryLocal)){
  1006. vtblockput(b);
  1007. return 0;
  1008. }
  1009. ret = flushblock(f->c, nil, e.score, e.psize/VtScoreSize, e.dsize/VtEntrySize,
  1010. e.type);
  1011. if(ret < 0){
  1012. vtblockput(b);
  1013. return -1;
  1014. }
  1015. vtentrypack(&e, b->data, f->offset % f->epb);
  1016. vtblockput(b);
  1017. return 0;
  1018. }
  1019. int
  1020. vtfileflushbefore(VtFile *r, uint64_t offset)
  1021. {
  1022. VtBlock *b, *bb;
  1023. VtEntry e;
  1024. int i, base, depth, ppb, epb, doflush;
  1025. int index[VtPointerDepth+1], j, ret;
  1026. VtBlock *bi[VtPointerDepth+2];
  1027. uint8_t *score;
  1028. assert(ISLOCKED(r));
  1029. if(offset == 0)
  1030. return 0;
  1031. b = fileload(r, &e);
  1032. if(b == nil)
  1033. return -1;
  1034. /*
  1035. * compute path through tree for the last written byte and the next one.
  1036. */
  1037. ret = -1;
  1038. memset(bi, 0, sizeof bi);
  1039. depth = DEPTH(e.type);
  1040. bi[depth+1] = b;
  1041. i = mkindices(&e, (offset-1)/e.dsize, index);
  1042. if(i < 0)
  1043. goto Err;
  1044. if(i > depth)
  1045. goto Err;
  1046. ppb = e.psize / VtScoreSize;
  1047. epb = e.dsize / VtEntrySize;
  1048. /*
  1049. * load the blocks along the last written byte
  1050. */
  1051. index[depth] = r->offset % r->epb;
  1052. for(i=depth; i>=0; i--){
  1053. bb = blockwalk(b, index[i], r->c, VtORDWR, &e);
  1054. if(bb == nil)
  1055. goto Err;
  1056. bi[i] = bb;
  1057. b = bb;
  1058. }
  1059. ret = 0;
  1060. /*
  1061. * walk up the path from leaf to root, flushing anything that
  1062. * has been finished.
  1063. */
  1064. base = e.type&~VtTypeDepthMask;
  1065. for(i=0; i<=depth; i++){
  1066. doflush = 0;
  1067. if(i == 0){
  1068. /* leaf: data or dir block */
  1069. if(offset%e.dsize == 0)
  1070. doflush = 1;
  1071. }else{
  1072. /*
  1073. * interior node: pointer blocks.
  1074. * specifically, b = bi[i] is a block whose index[i-1]'th entry
  1075. * points at bi[i-1].
  1076. */
  1077. b = bi[i];
  1078. /*
  1079. * the index entries up to but not including index[i-1] point at
  1080. * finished blocks, so flush them for sure.
  1081. */
  1082. for(j=0; j<index[i-1]; j++)
  1083. if(flushblock(r->c, nil, b->data+j*VtScoreSize, ppb, epb, base+i-1) < 0)
  1084. goto Err;
  1085. /*
  1086. * if index[i-1] is the last entry in the block and is global
  1087. * (i.e. the kid is flushed), then we can flush this block.
  1088. */
  1089. if(j==ppb-1 && vtglobaltolocal(b->data+j*VtScoreSize)==NilBlock)
  1090. doflush = 1;
  1091. }
  1092. if(doflush){
  1093. if(i == depth)
  1094. score = e.score;
  1095. else
  1096. score = bi[i+1]->data+index[i]*VtScoreSize;
  1097. if(flushblock(r->c, bi[i], score, ppb, epb, base+i) < 0)
  1098. goto Err;
  1099. }
  1100. }
  1101. Err:
  1102. /* top: entry. do this always so that the score is up-to-date */
  1103. vtentrypack(&e, bi[depth+1]->data, index[depth]);
  1104. for(i=0; i<nelem(bi); i++)
  1105. if(bi[i])
  1106. vtblockput(bi[i]);
  1107. return ret;
  1108. }