file.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. /*
  10. * Manage tree of VtFiles stored in the block cache.
  11. *
  12. * The single point of truth for the info about the VtFiles themselves
  13. * is the block data. Because of this, there is no explicit locking of
  14. * VtFile structures, and indeed there may be more than one VtFile
  15. * structure for a given Venti file. They synchronize through the
  16. * block cache.
  17. *
  18. * This is a bit simpler than fossil because there are no epochs
  19. * or tags or anything else. Just mutable local blocks and immutable
  20. * Venti blocks.
  21. */
  22. #include <u.h>
  23. #include <libc.h>
  24. #include <venti.h>
  25. #define MaxBlock (1UL<<31)
  26. static char ENotDir[] = "walk in non-directory";
  27. static char ETooBig[] = "file too big";
  28. /* static char EBadAddr[] = "bad address"; */
  29. static char ELabelMismatch[] = "label mismatch";
  30. static int sizetodepth(uint64_t s, int psize, int dsize);
  31. static VtBlock *fileload(VtFile *r, VtEntry *e);
  32. static int shrinkdepth(VtFile*, VtBlock*, VtEntry*, int);
  33. static int shrinksize(VtFile*, VtEntry*, uint64_t);
  34. static int growdepth(VtFile*, VtBlock*, VtEntry*, int);
  35. #define ISLOCKED(r) ((r)->b != nil)
  36. #define DEPTH(t) ((t)&VtTypeDepthMask)
  37. static VtFile *
  38. vtfilealloc(VtCache *c, VtBlock *b, VtFile *p, uint32_t offset, int mode)
  39. {
  40. int epb;
  41. uint32_t size;
  42. VtEntry e;
  43. VtFile *r;
  44. assert(p==nil || ISLOCKED(p));
  45. if(p == nil){
  46. assert(offset == 0);
  47. epb = 1;
  48. }else
  49. epb = p->dsize / VtEntrySize;
  50. if(b->type != VtDirType){
  51. werrstr("bad block type %#uo", b->type);
  52. return nil;
  53. }
  54. /*
  55. * a non-active entry is the only thing that
  56. * can legitimately happen here. all the others
  57. * get prints.
  58. */
  59. if(vtentryunpack(&e, b->data, offset % epb) < 0){
  60. fprint(2, "vtentryunpack failed: %r (%.*H)\n", VtEntrySize, b->data+VtEntrySize*(offset%epb));
  61. return nil;
  62. }
  63. if(!(e.flags & VtEntryActive)){
  64. werrstr("entry not active");
  65. return nil;
  66. }
  67. if(DEPTH(e.type) < sizetodepth(e.size, e.psize, e.dsize)){
  68. fprint(2, "depth %u size %llud psize %u dsize %u\n",
  69. DEPTH(e.type), e.size, e.psize, e.dsize);
  70. werrstr("bad depth");
  71. return nil;
  72. }
  73. size = vtcacheblocksize(c);
  74. if(e.dsize > size || e.psize > size){
  75. werrstr("block sizes %u, %u bigger than cache block size %u",
  76. e.psize, e.dsize, size);
  77. return nil;
  78. }
  79. r = vtmallocz(sizeof(VtFile));
  80. r->c = c;
  81. r->mode = mode;
  82. r->dsize = e.dsize;
  83. r->psize = e.psize;
  84. r->gen = e.gen;
  85. r->dir = (e.type & VtTypeBaseMask) == VtDirType;
  86. r->ref = 1;
  87. r->parent = p;
  88. if(p){
  89. qlock(&p->lk);
  90. assert(mode == VtOREAD || p->mode == VtORDWR);
  91. p->ref++;
  92. qunlock(&p->lk);
  93. }else{
  94. assert(b->addr != NilBlock);
  95. r->local = 1;
  96. }
  97. memmove(r->score, b->score, VtScoreSize);
  98. r->offset = offset;
  99. r->epb = epb;
  100. return r;
  101. }
  102. VtFile *
  103. vtfileroot(VtCache *c, uint32_t addr, int mode)
  104. {
  105. VtFile *r;
  106. VtBlock *b;
  107. b = vtcachelocal(c, addr, VtDirType);
  108. if(b == nil)
  109. return nil;
  110. r = vtfilealloc(c, b, nil, 0, mode);
  111. vtblockput(b);
  112. return r;
  113. }
  114. VtFile*
  115. vtfileopenroot(VtCache *c, VtEntry *e)
  116. {
  117. VtBlock *b;
  118. VtFile *f;
  119. b = vtcacheallocblock(c, VtDirType);
  120. if(b == nil)
  121. return nil;
  122. vtentrypack(e, b->data, 0);
  123. f = vtfilealloc(c, b, nil, 0, VtORDWR);
  124. vtblockput(b);
  125. return f;
  126. }
  127. VtFile *
  128. vtfilecreateroot(VtCache *c, int psize, int dsize, int type)
  129. {
  130. VtEntry e;
  131. memset(&e, 0, sizeof e);
  132. e.flags = VtEntryActive;
  133. e.psize = psize;
  134. e.dsize = dsize;
  135. e.type = type;
  136. memmove(e.score, vtzeroscore, VtScoreSize);
  137. return vtfileopenroot(c, &e);
  138. }
  139. VtFile *
  140. vtfileopen(VtFile *r, uint32_t offset, int mode)
  141. {
  142. uint32_t bn;
  143. VtBlock *b;
  144. assert(ISLOCKED(r));
  145. if(!r->dir){
  146. werrstr(ENotDir);
  147. return nil;
  148. }
  149. bn = offset/(r->dsize/VtEntrySize);
  150. b = vtfileblock(r, bn, mode);
  151. if(b == nil)
  152. return nil;
  153. r = vtfilealloc(r->c, b, r, offset, mode);
  154. vtblockput(b);
  155. return r;
  156. }
  157. VtFile*
  158. vtfilecreate(VtFile *r, int psize, int dsize, int type)
  159. {
  160. return _vtfilecreate(r, -1, psize, dsize, type);
  161. }
  162. VtFile*
  163. _vtfilecreate(VtFile *r, int o, int psize, int dsize, int type)
  164. {
  165. int i;
  166. VtBlock *b;
  167. uint32_t bn, size;
  168. VtEntry e;
  169. int epb;
  170. VtFile *rr;
  171. uint32_t offset;
  172. assert(ISLOCKED(r));
  173. assert(psize <= VtMaxLumpSize);
  174. assert(dsize <= VtMaxLumpSize);
  175. assert(type == VtDirType || type == VtDataType);
  176. if(!r->dir){
  177. werrstr(ENotDir);
  178. return nil;
  179. }
  180. epb = r->dsize/VtEntrySize;
  181. size = vtfilegetdirsize(r);
  182. /*
  183. * look at a random block to see if we can find an empty entry
  184. */
  185. if(o == -1){
  186. offset = lnrand(size+1);
  187. offset -= offset % epb;
  188. }else
  189. offset = o;
  190. /* try the given block and then try the last block */
  191. for(;;){
  192. bn = offset/epb;
  193. b = vtfileblock(r, bn, VtORDWR);
  194. if(b == nil)
  195. return nil;
  196. for(i=offset%r->epb; i<epb; i++){
  197. if(vtentryunpack(&e, b->data, i) < 0)
  198. continue;
  199. if((e.flags&VtEntryActive) == 0 && e.gen != ~0)
  200. goto Found;
  201. }
  202. vtblockput(b);
  203. if(offset == size){
  204. fprint(2, "vtfilecreate: cannot happen\n");
  205. werrstr("vtfilecreate: cannot happen");
  206. return nil;
  207. }
  208. offset = size;
  209. }
  210. Found:
  211. /* found an entry - gen already set */
  212. e.psize = psize;
  213. e.dsize = dsize;
  214. e.flags = VtEntryActive;
  215. e.type = type;
  216. e.size = 0;
  217. memmove(e.score, vtzeroscore, VtScoreSize);
  218. vtentrypack(&e, b->data, i);
  219. offset = bn*epb + i;
  220. if(offset+1 > size){
  221. if(vtfilesetdirsize(r, offset+1) < 0){
  222. vtblockput(b);
  223. return nil;
  224. }
  225. }
  226. rr = vtfilealloc(r->c, b, r, offset, VtORDWR);
  227. vtblockput(b);
  228. return rr;
  229. }
  230. static int
  231. vtfilekill(VtFile *r, int doremove)
  232. {
  233. VtEntry e;
  234. VtBlock *b;
  235. assert(ISLOCKED(r));
  236. b = fileload(r, &e);
  237. if(b == nil)
  238. return -1;
  239. if(doremove==0 && e.size == 0){
  240. /* already truncated */
  241. vtblockput(b);
  242. return 0;
  243. }
  244. if(doremove){
  245. if(e.gen != ~0)
  246. e.gen++;
  247. e.dsize = 0;
  248. e.psize = 0;
  249. e.flags = 0;
  250. }else
  251. e.flags &= ~VtEntryLocal;
  252. e.type = 0;
  253. e.size = 0;
  254. memmove(e.score, vtzeroscore, VtScoreSize);
  255. vtentrypack(&e, b->data, r->offset % r->epb);
  256. vtblockput(b);
  257. if(doremove){
  258. vtfileunlock(r);
  259. vtfileclose(r);
  260. }
  261. return 0;
  262. }
  263. int
  264. vtfileremove(VtFile *r)
  265. {
  266. return vtfilekill(r, 1);
  267. }
  268. int
  269. vtfiletruncate(VtFile *r)
  270. {
  271. return vtfilekill(r, 0);
  272. }
  273. uint64_t
  274. vtfilegetsize(VtFile *r)
  275. {
  276. VtEntry e;
  277. VtBlock *b;
  278. assert(ISLOCKED(r));
  279. b = fileload(r, &e);
  280. if(b == nil)
  281. return ~(uint64_t)0;
  282. vtblockput(b);
  283. return e.size;
  284. }
  285. static int
  286. shrinksize(VtFile *r, VtEntry *e, uint64_t size)
  287. {
  288. int i, depth, type, isdir, ppb;
  289. uint64_t ptrsz;
  290. uint8_t score[VtScoreSize];
  291. VtBlock *b;
  292. b = vtcacheglobal(r->c, e->score, e->type);
  293. if(b == nil)
  294. return -1;
  295. ptrsz = e->dsize;
  296. ppb = e->psize/VtScoreSize;
  297. type = e->type;
  298. depth = DEPTH(type);
  299. for(i=0; i+1<depth; i++)
  300. ptrsz *= ppb;
  301. isdir = r->dir;
  302. while(depth > 0){
  303. if(b->addr == NilBlock){
  304. /* not worth copying the block just so we can zero some of it */
  305. vtblockput(b);
  306. return -1;
  307. }
  308. /*
  309. * invariant: each pointer in the tree rooted at b accounts for ptrsz bytes
  310. */
  311. /* zero the pointers to unnecessary blocks */
  312. i = (size+ptrsz-1)/ptrsz;
  313. for(; i<ppb; i++)
  314. memmove(b->data+i*VtScoreSize, vtzeroscore, VtScoreSize);
  315. /* recurse (go around again) on the partially necessary block */
  316. i = size/ptrsz;
  317. size = size%ptrsz;
  318. if(size == 0){
  319. vtblockput(b);
  320. return 0;
  321. }
  322. ptrsz /= ppb;
  323. type--;
  324. memmove(score, b->data+i*VtScoreSize, VtScoreSize);
  325. vtblockput(b);
  326. b = vtcacheglobal(r->c, score, type);
  327. if(b == nil)
  328. return -1;
  329. }
  330. if(b->addr == NilBlock){
  331. vtblockput(b);
  332. return -1;
  333. }
  334. /*
  335. * No one ever truncates BtDir blocks.
  336. */
  337. if(depth==0 && !isdir && e->dsize > size)
  338. memset(b->data+size, 0, e->dsize-size);
  339. vtblockput(b);
  340. return 0;
  341. }
  342. int
  343. vtfilesetsize(VtFile *r, uint64_t size)
  344. {
  345. int depth, edepth;
  346. VtEntry e;
  347. VtBlock *b;
  348. assert(ISLOCKED(r));
  349. if(size == 0)
  350. return vtfiletruncate(r);
  351. if(size > VtMaxFileSize || size > ((uint64_t)MaxBlock)*r->dsize){
  352. werrstr(ETooBig);
  353. return -1;
  354. }
  355. b = fileload(r, &e);
  356. if(b == nil)
  357. return -1;
  358. /* quick out */
  359. if(e.size == size){
  360. vtblockput(b);
  361. return 0;
  362. }
  363. depth = sizetodepth(size, e.psize, e.dsize);
  364. edepth = DEPTH(e.type);
  365. if(depth < edepth){
  366. if(shrinkdepth(r, b, &e, depth) < 0){
  367. vtblockput(b);
  368. return -1;
  369. }
  370. }else if(depth > edepth){
  371. if(growdepth(r, b, &e, depth) < 0){
  372. vtblockput(b);
  373. return -1;
  374. }
  375. }
  376. if(size < e.size)
  377. shrinksize(r, &e, size);
  378. e.size = size;
  379. vtentrypack(&e, b->data, r->offset % r->epb);
  380. vtblockput(b);
  381. return 0;
  382. }
  383. int
  384. vtfilesetdirsize(VtFile *r, uint32_t ds)
  385. {
  386. uint64_t size;
  387. int epb;
  388. assert(ISLOCKED(r));
  389. epb = r->dsize/VtEntrySize;
  390. size = (uint64_t)r->dsize*(ds/epb);
  391. size += VtEntrySize*(ds%epb);
  392. return vtfilesetsize(r, size);
  393. }
  394. uint32_t
  395. vtfilegetdirsize(VtFile *r)
  396. {
  397. uint32_t ds;
  398. uint64_t size;
  399. int epb;
  400. assert(ISLOCKED(r));
  401. epb = r->dsize/VtEntrySize;
  402. size = vtfilegetsize(r);
  403. ds = epb*(size/r->dsize);
  404. ds += (size%r->dsize)/VtEntrySize;
  405. return ds;
  406. }
  407. int
  408. vtfilegetentry(VtFile *r, VtEntry *e)
  409. {
  410. VtBlock *b;
  411. assert(ISLOCKED(r));
  412. b = fileload(r, e);
  413. if(b == nil)
  414. return -1;
  415. vtblockput(b);
  416. return 0;
  417. }
  418. int
  419. vtfilesetentry(VtFile *r, VtEntry *e)
  420. {
  421. VtBlock *b;
  422. VtEntry ee;
  423. assert(ISLOCKED(r));
  424. b = fileload(r, &ee);
  425. if(b == nil)
  426. return -1;
  427. vtentrypack(e, b->data, r->offset % r->epb);
  428. vtblockput(b);
  429. return 0;
  430. }
  431. static VtBlock *
  432. blockwalk(VtBlock *p, int index, VtCache *c, int mode, VtEntry *e)
  433. {
  434. VtBlock *b;
  435. int type;
  436. uint8_t *score;
  437. switch(p->type){
  438. case VtDataType:
  439. assert(0);
  440. case VtDirType:
  441. type = e->type;
  442. score = e->score;
  443. break;
  444. default:
  445. type = p->type - 1;
  446. score = p->data+index*VtScoreSize;
  447. break;
  448. }
  449. /*print("walk from %V/%d ty %d to %V ty %d\n", p->score, index, p->type, score, type); */
  450. if(mode == VtOWRITE && vtglobaltolocal(score) == NilBlock){
  451. b = vtcacheallocblock(c, type);
  452. if(b)
  453. goto HaveCopy;
  454. }else
  455. b = vtcacheglobal(c, score, type);
  456. if(b == nil || mode == VtOREAD)
  457. return b;
  458. if(vtglobaltolocal(b->score) != NilBlock)
  459. return b;
  460. /*
  461. * Copy on write.
  462. */
  463. e->flags |= VtEntryLocal;
  464. b = vtblockcopy(b/*, e->tag, fs->ehi, fs->elo*/);
  465. if(b == nil)
  466. return nil;
  467. HaveCopy:
  468. if(p->type == VtDirType){
  469. memmove(e->score, b->score, VtScoreSize);
  470. vtentrypack(e, p->data, index);
  471. }else{
  472. memmove(p->data+index*VtScoreSize, b->score, VtScoreSize);
  473. }
  474. return b;
  475. }
  476. /*
  477. * Change the depth of the VtFile r.
  478. * The entry e for r is contained in block p.
  479. */
  480. static int
  481. growdepth(VtFile *r, VtBlock *p, VtEntry *e, int depth)
  482. {
  483. VtBlock *b, *bb;
  484. assert(ISLOCKED(r));
  485. assert(depth <= VtPointerDepth);
  486. b = vtcacheglobal(r->c, e->score, e->type);
  487. if(b == nil)
  488. return -1;
  489. /*
  490. * Keep adding layers until we get to the right depth
  491. * or an error occurs.
  492. */
  493. while(DEPTH(e->type) < depth){
  494. bb = vtcacheallocblock(r->c, e->type+1);
  495. if(bb == nil)
  496. break;
  497. memmove(bb->data, b->score, VtScoreSize);
  498. memmove(e->score, bb->score, VtScoreSize);
  499. e->type++;
  500. e->flags |= VtEntryLocal;
  501. vtblockput(b);
  502. b = bb;
  503. }
  504. vtentrypack(e, p->data, r->offset % r->epb);
  505. vtblockput(b);
  506. if(DEPTH(e->type) == depth)
  507. return 0;
  508. return -1;
  509. }
  510. static int
  511. shrinkdepth(VtFile *r, VtBlock *p, VtEntry *e, int depth)
  512. {
  513. VtBlock *b, *nb, *ob, *rb;
  514. assert(ISLOCKED(r));
  515. assert(depth <= VtPointerDepth);
  516. rb = vtcacheglobal(r->c, e->score, e->type);
  517. if(rb == nil)
  518. return -1;
  519. /*
  520. * Walk down to the new root block.
  521. * We may stop early, but something is better than nothing.
  522. */
  523. ob = nil;
  524. b = rb;
  525. for(; DEPTH(e->type) > depth; e->type--){
  526. nb = vtcacheglobal(r->c, b->data, e->type-1);
  527. if(nb == nil)
  528. break;
  529. if(ob!=nil && ob!=rb)
  530. vtblockput(ob);
  531. ob = b;
  532. b = nb;
  533. }
  534. if(b == rb){
  535. vtblockput(rb);
  536. return 0;
  537. }
  538. /*
  539. * Right now, e points at the root block rb, b is the new root block,
  540. * and ob points at b. To update:
  541. *
  542. * (i) change e to point at b
  543. * (ii) zero the pointer ob -> b
  544. * (iii) free the root block
  545. *
  546. * p (the block containing e) must be written before
  547. * anything else.
  548. */
  549. /* (i) */
  550. memmove(e->score, b->score, VtScoreSize);
  551. vtentrypack(e, p->data, r->offset % r->epb);
  552. /* (ii) */
  553. memmove(ob->data, vtzeroscore, VtScoreSize);
  554. /* (iii) */
  555. vtblockput(rb);
  556. if(ob!=nil && ob!=rb)
  557. vtblockput(ob);
  558. vtblockput(b);
  559. if(DEPTH(e->type) == depth)
  560. return 0;
  561. return -1;
  562. }
  563. static int
  564. mkindices(VtEntry *e, uint32_t bn, int *index)
  565. {
  566. int i, np;
  567. memset(index, 0, (VtPointerDepth+1)*sizeof(int));
  568. np = e->psize/VtScoreSize;
  569. for(i=0; bn > 0; i++){
  570. if(i >= VtPointerDepth){
  571. werrstr("bad address 0x%lx", (uint32_t)bn);
  572. return -1;
  573. }
  574. index[i] = bn % np;
  575. bn /= np;
  576. }
  577. return i;
  578. }
  579. VtBlock *
  580. vtfileblock(VtFile *r, uint32_t bn, int mode)
  581. {
  582. VtBlock *b, *bb;
  583. int index[VtPointerDepth+1];
  584. VtEntry e;
  585. int i;
  586. int m;
  587. assert(ISLOCKED(r));
  588. assert(bn != NilBlock);
  589. b = fileload(r, &e);
  590. if(b == nil)
  591. return nil;
  592. i = mkindices(&e, bn, index);
  593. if(i < 0)
  594. goto Err;
  595. if(i > DEPTH(e.type)){
  596. if(mode == VtOREAD){
  597. werrstr("bad address 0x%lx", (uint32_t)bn);
  598. goto Err;
  599. }
  600. index[i] = 0;
  601. if(growdepth(r, b, &e, i) < 0)
  602. goto Err;
  603. }
  604. assert(b->type == VtDirType);
  605. index[DEPTH(e.type)] = r->offset % r->epb;
  606. /* mode for intermediate block */
  607. m = mode;
  608. if(m == VtOWRITE)
  609. m = VtORDWR;
  610. for(i=DEPTH(e.type); i>=0; i--){
  611. bb = blockwalk(b, index[i], r->c, i==0 ? mode : m, &e);
  612. if(bb == nil)
  613. goto Err;
  614. vtblockput(b);
  615. b = bb;
  616. }
  617. b->pc = getcallerpc(&r);
  618. return b;
  619. Err:
  620. vtblockput(b);
  621. return nil;
  622. }
  623. int
  624. vtfileblockscore(VtFile *r, uint32_t bn, uint8_t score[VtScoreSize])
  625. {
  626. VtBlock *b, *bb;
  627. int index[VtPointerDepth+1];
  628. VtEntry e;
  629. int i;
  630. assert(ISLOCKED(r));
  631. assert(bn != NilBlock);
  632. b = fileload(r, &e);
  633. if(b == nil)
  634. return -1;
  635. if(DEPTH(e.type) == 0){
  636. memmove(score, e.score, VtScoreSize);
  637. vtblockput(b);
  638. return 0;
  639. }
  640. i = mkindices(&e, bn, index);
  641. if(i < 0){
  642. vtblockput(b);
  643. return -1;
  644. }
  645. if(i > DEPTH(e.type)){
  646. memmove(score, vtzeroscore, VtScoreSize);
  647. vtblockput(b);
  648. return 0;
  649. }
  650. index[DEPTH(e.type)] = r->offset % r->epb;
  651. for(i=DEPTH(e.type); i>=1; i--){
  652. bb = blockwalk(b, index[i], r->c, VtOREAD, &e);
  653. if(bb == nil)
  654. goto Err;
  655. vtblockput(b);
  656. b = bb;
  657. if(memcmp(b->score, vtzeroscore, VtScoreSize) == 0)
  658. break;
  659. }
  660. memmove(score, b->data+index[0]*VtScoreSize, VtScoreSize);
  661. vtblockput(b);
  662. return 0;
  663. Err:
  664. vtblockput(b);
  665. return -1;
  666. }
  667. void
  668. vtfileincref(VtFile *r)
  669. {
  670. qlock(&r->lk);
  671. r->ref++;
  672. qunlock(&r->lk);
  673. }
  674. void
  675. vtfileclose(VtFile *r)
  676. {
  677. if(r == nil)
  678. return;
  679. qlock(&r->lk);
  680. r->ref--;
  681. if(r->ref){
  682. qunlock(&r->lk);
  683. return;
  684. }
  685. assert(r->ref == 0);
  686. qunlock(&r->lk);
  687. if(r->parent)
  688. vtfileclose(r->parent);
  689. memset(r, ~0, sizeof(*r));
  690. vtfree(r);
  691. }
  692. /*
  693. * Retrieve the block containing the entry for r.
  694. * If a snapshot has happened, we might need
  695. * to get a new copy of the block. We avoid this
  696. * in the common case by caching the score for
  697. * the block and the last epoch in which it was valid.
  698. *
  699. * We use r->mode to tell the difference between active
  700. * file system VtFiles (VtORDWR) and VtFiles for the
  701. * snapshot file system (VtOREAD).
  702. */
  703. static VtBlock*
  704. fileloadblock(VtFile *r, int mode)
  705. {
  706. char e[ERRMAX];
  707. uint32_t addr;
  708. VtBlock *b;
  709. switch(r->mode){
  710. default:
  711. assert(0);
  712. case VtORDWR:
  713. assert(r->mode == VtORDWR);
  714. if(r->local == 1){
  715. b = vtcacheglobal(r->c, r->score, VtDirType);
  716. if(b == nil)
  717. return nil;
  718. b->pc = getcallerpc(&r);
  719. return b;
  720. }
  721. assert(r->parent != nil);
  722. if(vtfilelock(r->parent, VtORDWR) < 0)
  723. return nil;
  724. b = vtfileblock(r->parent, r->offset/r->epb, VtORDWR);
  725. vtfileunlock(r->parent);
  726. if(b == nil)
  727. return nil;
  728. memmove(r->score, b->score, VtScoreSize);
  729. r->local = 1;
  730. return b;
  731. case VtOREAD:
  732. if(mode == VtORDWR){
  733. werrstr("read/write lock of read-only file");
  734. return nil;
  735. }
  736. addr = vtglobaltolocal(r->score);
  737. if(addr == NilBlock)
  738. return vtcacheglobal(r->c, r->score, VtDirType);
  739. b = vtcachelocal(r->c, addr, VtDirType);
  740. if(b)
  741. return b;
  742. /*
  743. * If it failed because the epochs don't match, the block has been
  744. * archived and reclaimed. Rewalk from the parent and get the
  745. * new pointer. This can't happen in the VtORDWR case
  746. * above because blocks in the current epoch don't get
  747. * reclaimed. The fact that we're VtOREAD means we're
  748. * a snapshot. (Or else the file system is read-only, but then
  749. * the archiver isn't going around deleting blocks.)
  750. */
  751. rerrstr(e, sizeof e);
  752. if(strcmp(e, ELabelMismatch) == 0){
  753. if(vtfilelock(r->parent, VtOREAD) < 0)
  754. return nil;
  755. b = vtfileblock(r->parent, r->offset/r->epb, VtOREAD);
  756. vtfileunlock(r->parent);
  757. if(b){
  758. fprint(2, "vtfilealloc: lost %V found %V\n",
  759. r->score, b->score);
  760. memmove(r->score, b->score, VtScoreSize);
  761. return b;
  762. }
  763. }
  764. return nil;
  765. }
  766. }
  767. int
  768. vtfilelock(VtFile *r, int mode)
  769. {
  770. VtBlock *b;
  771. if(mode == -1)
  772. mode = r->mode;
  773. b = fileloadblock(r, mode);
  774. if(b == nil)
  775. return -1;
  776. /*
  777. * The fact that we are holding b serves as the
  778. * lock entitling us to write to r->b.
  779. */
  780. assert(r->b == nil);
  781. r->b = b;
  782. b->pc = getcallerpc(&r);
  783. return 0;
  784. }
  785. /*
  786. * Lock two (usually sibling) VtFiles. This needs special care
  787. * because the Entries for both vtFiles might be in the same block.
  788. * We also try to lock blocks in left-to-right order within the tree.
  789. */
  790. int
  791. vtfilelock2(VtFile *r, VtFile *rr, int mode)
  792. {
  793. VtBlock *b, *bb;
  794. if(rr == nil)
  795. return vtfilelock(r, mode);
  796. if(mode == -1)
  797. mode = r->mode;
  798. if(r->parent==rr->parent && r->offset/r->epb == rr->offset/rr->epb){
  799. b = fileloadblock(r, mode);
  800. if(b == nil)
  801. return -1;
  802. vtblockduplock(b);
  803. bb = b;
  804. }else if(r->parent==rr->parent || r->offset > rr->offset){
  805. bb = fileloadblock(rr, mode);
  806. b = fileloadblock(r, mode);
  807. }else{
  808. b = fileloadblock(r, mode);
  809. bb = fileloadblock(rr, mode);
  810. }
  811. if(b == nil || bb == nil){
  812. if(b)
  813. vtblockput(b);
  814. if(bb)
  815. vtblockput(bb);
  816. return -1;
  817. }
  818. /*
  819. * The fact that we are holding b and bb serves
  820. * as the lock entitling us to write to r->b and rr->b.
  821. */
  822. r->b = b;
  823. rr->b = bb;
  824. b->pc = getcallerpc(&r);
  825. bb->pc = getcallerpc(&r);
  826. return 0;
  827. }
  828. void
  829. vtfileunlock(VtFile *r)
  830. {
  831. VtBlock *b;
  832. if(r->b == nil){
  833. fprint(2, "vtfileunlock: already unlocked\n");
  834. abort();
  835. }
  836. b = r->b;
  837. r->b = nil;
  838. vtblockput(b);
  839. }
  840. static VtBlock*
  841. fileload(VtFile *r, VtEntry *e)
  842. {
  843. VtBlock *b;
  844. assert(ISLOCKED(r));
  845. b = r->b;
  846. if(vtentryunpack(e, b->data, r->offset % r->epb) < 0)
  847. return nil;
  848. vtblockduplock(b);
  849. return b;
  850. }
  851. static int
  852. sizetodepth(uint64_t s, int psize, int dsize)
  853. {
  854. int np;
  855. int d;
  856. /* determine pointer depth */
  857. np = psize/VtScoreSize;
  858. s = (s + dsize - 1)/dsize;
  859. for(d = 0; s > 1; d++)
  860. s = (s + np - 1)/np;
  861. return d;
  862. }
  863. int32_t
  864. vtfileread(VtFile *f, void *data, int32_t count, int64_t offset)
  865. {
  866. int frag;
  867. VtBlock *b;
  868. VtEntry e;
  869. assert(ISLOCKED(f));
  870. vtfilegetentry(f, &e);
  871. if(count == 0)
  872. return 0;
  873. if(count < 0 || offset < 0){
  874. werrstr("vtfileread: bad offset or count");
  875. return -1;
  876. }
  877. if(offset >= e.size)
  878. return 0;
  879. if(offset+count > e.size)
  880. count = e.size - offset;
  881. frag = offset % e.dsize;
  882. if(frag+count > e.dsize)
  883. count = e.dsize - frag;
  884. b = vtfileblock(f, offset/e.dsize, VtOREAD);
  885. if(b == nil)
  886. return -1;
  887. memmove(data, b->data+frag, count);
  888. vtblockput(b);
  889. return count;
  890. }
  891. static int32_t
  892. filewrite1(VtFile *f, void *data, int32_t count, int64_t offset)
  893. {
  894. int frag, m;
  895. VtBlock *b;
  896. VtEntry e;
  897. vtfilegetentry(f, &e);
  898. if(count < 0 || offset < 0){
  899. werrstr("vtfilewrite: bad offset or count");
  900. return -1;
  901. }
  902. frag = offset % e.dsize;
  903. if(frag+count > e.dsize)
  904. count = e.dsize - frag;
  905. m = VtORDWR;
  906. if(frag == 0 && count == e.dsize)
  907. m = VtOWRITE;
  908. b = vtfileblock(f, offset/e.dsize, m);
  909. if(b == nil)
  910. return -1;
  911. memmove(b->data+frag, data, count);
  912. if(m == VtOWRITE && frag+count < e.dsize)
  913. memset(b->data+frag+count, 0, e.dsize-frag-count);
  914. if(offset+count > e.size){
  915. vtfilegetentry(f, &e);
  916. e.size = offset+count;
  917. vtfilesetentry(f, &e);
  918. }
  919. vtblockput(b);
  920. return count;
  921. }
  922. int32_t
  923. vtfilewrite(VtFile *f, void *data, int32_t count, int64_t offset)
  924. {
  925. int32_t tot, m;
  926. assert(ISLOCKED(f));
  927. tot = 0;
  928. m = 0;
  929. while(tot < count){
  930. m = filewrite1(f, (char*)data+tot, count-tot, offset+tot);
  931. if(m <= 0)
  932. break;
  933. tot += m;
  934. }
  935. if(tot==0)
  936. return m;
  937. return tot;
  938. }
  939. static int
  940. flushblock(VtCache *c, VtBlock *bb, uint8_t score[VtScoreSize], int ppb,
  941. int epb,
  942. int type)
  943. {
  944. uint32_t addr;
  945. VtBlock *b;
  946. VtEntry e;
  947. int i;
  948. addr = vtglobaltolocal(score);
  949. if(addr == NilBlock)
  950. return 0;
  951. if(bb){
  952. b = bb;
  953. if(memcmp(b->score, score, VtScoreSize) != 0)
  954. abort();
  955. }else
  956. if((b = vtcachelocal(c, addr, type)) == nil)
  957. return -1;
  958. switch(type){
  959. case VtDataType:
  960. break;
  961. case VtDirType:
  962. for(i=0; i<epb; i++){
  963. if(vtentryunpack(&e, b->data, i) < 0)
  964. goto Err;
  965. if(!(e.flags&VtEntryActive))
  966. continue;
  967. if(flushblock(c, nil, e.score, e.psize/VtScoreSize, e.dsize/VtEntrySize,
  968. e.type) < 0)
  969. goto Err;
  970. vtentrypack(&e, b->data, i);
  971. }
  972. break;
  973. default: /* VtPointerTypeX */
  974. for(i=0; i<ppb; i++){
  975. if(flushblock(c, nil, b->data+VtScoreSize*i, ppb, epb, type-1) < 0)
  976. goto Err;
  977. }
  978. break;
  979. }
  980. if(vtblockwrite(b) < 0)
  981. goto Err;
  982. memmove(score, b->score, VtScoreSize);
  983. if(b != bb)
  984. vtblockput(b);
  985. return 0;
  986. Err:
  987. if(b != bb)
  988. vtblockput(b);
  989. return -1;
  990. }
  991. int
  992. vtfileflush(VtFile *f)
  993. {
  994. int ret;
  995. VtBlock *b;
  996. VtEntry e;
  997. assert(ISLOCKED(f));
  998. b = fileload(f, &e);
  999. if(!(e.flags&VtEntryLocal)){
  1000. vtblockput(b);
  1001. return 0;
  1002. }
  1003. ret = flushblock(f->c, nil, e.score, e.psize/VtScoreSize, e.dsize/VtEntrySize,
  1004. e.type);
  1005. if(ret < 0){
  1006. vtblockput(b);
  1007. return -1;
  1008. }
  1009. vtentrypack(&e, b->data, f->offset % f->epb);
  1010. vtblockput(b);
  1011. return 0;
  1012. }
  1013. int
  1014. vtfileflushbefore(VtFile *r, uint64_t offset)
  1015. {
  1016. VtBlock *b, *bb;
  1017. VtEntry e;
  1018. int i, base, depth, ppb, epb, doflush;
  1019. int index[VtPointerDepth+1], j, ret;
  1020. VtBlock *bi[VtPointerDepth+2];
  1021. uint8_t *score;
  1022. assert(ISLOCKED(r));
  1023. if(offset == 0)
  1024. return 0;
  1025. b = fileload(r, &e);
  1026. if(b == nil)
  1027. return -1;
  1028. /*
  1029. * compute path through tree for the last written byte and the next one.
  1030. */
  1031. ret = -1;
  1032. memset(bi, 0, sizeof bi);
  1033. depth = DEPTH(e.type);
  1034. bi[depth+1] = b;
  1035. i = mkindices(&e, (offset-1)/e.dsize, index);
  1036. if(i < 0)
  1037. goto Err;
  1038. if(i > depth)
  1039. goto Err;
  1040. ppb = e.psize / VtScoreSize;
  1041. epb = e.dsize / VtEntrySize;
  1042. /*
  1043. * load the blocks along the last written byte
  1044. */
  1045. index[depth] = r->offset % r->epb;
  1046. for(i=depth; i>=0; i--){
  1047. bb = blockwalk(b, index[i], r->c, VtORDWR, &e);
  1048. if(bb == nil)
  1049. goto Err;
  1050. bi[i] = bb;
  1051. b = bb;
  1052. }
  1053. ret = 0;
  1054. /*
  1055. * walk up the path from leaf to root, flushing anything that
  1056. * has been finished.
  1057. */
  1058. base = e.type&~VtTypeDepthMask;
  1059. for(i=0; i<=depth; i++){
  1060. doflush = 0;
  1061. if(i == 0){
  1062. /* leaf: data or dir block */
  1063. if(offset%e.dsize == 0)
  1064. doflush = 1;
  1065. }else{
  1066. /*
  1067. * interior node: pointer blocks.
  1068. * specifically, b = bi[i] is a block whose index[i-1]'th entry
  1069. * points at bi[i-1].
  1070. */
  1071. b = bi[i];
  1072. /*
  1073. * the index entries up to but not including index[i-1] point at
  1074. * finished blocks, so flush them for sure.
  1075. */
  1076. for(j=0; j<index[i-1]; j++)
  1077. if(flushblock(r->c, nil, b->data+j*VtScoreSize, ppb, epb, base+i-1) < 0)
  1078. goto Err;
  1079. /*
  1080. * if index[i-1] is the last entry in the block and is global
  1081. * (i.e. the kid is flushed), then we can flush this block.
  1082. */
  1083. if(j==ppb-1 && vtglobaltolocal(b->data+j*VtScoreSize)==NilBlock)
  1084. doflush = 1;
  1085. }
  1086. if(doflush){
  1087. if(i == depth)
  1088. score = e.score;
  1089. else
  1090. score = bi[i+1]->data+index[i]*VtScoreSize;
  1091. if(flushblock(r->c, bi[i], score, ppb, epb, base+i) < 0)
  1092. goto Err;
  1093. }
  1094. }
  1095. Err:
  1096. /* top: entry. do this always so that the score is up-to-date */
  1097. vtentrypack(&e, bi[depth+1]->data, index[depth]);
  1098. for(i=0; i<nelem(bi); i++)
  1099. if(bi[i])
  1100. vtblockput(bi[i]);
  1101. return ret;
  1102. }