write.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595
  1. #include "logfsos.h"
  2. #include "logfs.h"
  3. #include "local.h"
  4. typedef struct AllocState AllocState;
  5. struct AllocState {
  6. long oldblock;
  7. int markbad;
  8. };
  9. Pageset
  10. logfsdatapagemask(int pages, int base)
  11. {
  12. if(pages == BITSPERSET)
  13. return ~(Pageset)0;
  14. return (((Pageset)1 << pages) - 1) << (BITSPERSET - base - pages);
  15. }
  16. static Pageset
  17. fastgap(Pageset w, uint n)
  18. {
  19. Pageset s;
  20. //print("fastgap(0x%.8ux, %d)\n", w, n);
  21. if(w == 0 || n < 1 || n > BITSPERSET)
  22. return 0;
  23. /*
  24. # unroll the following loop 5 times:
  25. # while(n > 1){
  26. # s := n >> 1;
  27. # w &= w<<s;
  28. # n -= s;
  29. # }
  30. */
  31. s = n >> 1;
  32. w &= w << s;
  33. n -= s;
  34. s = n >> 1;
  35. w &= w << s;
  36. n -= s;
  37. s = n >> 1;
  38. w &= w << s;
  39. n -= s;
  40. s = n >> 1;
  41. w &= w << s;
  42. n -= s;
  43. s = n >> 1;
  44. if(BITSPERSET == 64){ /* extra time if 64 bits */
  45. w &= w << s;
  46. n -= s;
  47. s = n >> 1;
  48. }
  49. return w & (w << s);
  50. }
  51. static int
  52. nlz(Pageset x)
  53. {
  54. int n, c;
  55. if(x == 0)
  56. return BITSPERSET;
  57. if(x & PAGETOP)
  58. return 0;
  59. n = BITSPERSET;
  60. c = BITSPERSET/2;
  61. do {
  62. Pageset y;
  63. y = x >> c;
  64. if(y != 0) {
  65. n -= c;
  66. x = y;
  67. }
  68. } while((c >>= 1) != 0);
  69. return n - x;
  70. }
  71. static Pageset
  72. findgap(Pageset w, uint n)
  73. {
  74. Pageset m;
  75. do {
  76. m = fastgap(w, n);
  77. if(m)
  78. break;
  79. n--;
  80. } while(n);
  81. if(n == 0)
  82. return 0;
  83. return logfsdatapagemask(n, nlz(m));
  84. }
  85. static int
  86. bitcount(Pageset mask)
  87. {
  88. Pageset m;
  89. int rv;
  90. rv = 0;
  91. for(m = PAGETOP; m != 0; m >>= 1)
  92. if(mask & m)
  93. rv++;
  94. return rv;
  95. }
  96. static char *
  97. allocdatapages(LogfsServer *server, u32int count, int *countp, long *blockindexp, int *pagep, u32int *flashaddr, AllocState *state)
  98. {
  99. LogfsLowLevel *ll = server->ll;
  100. long b, blockindex;
  101. DataBlock *db;
  102. int pagebase;
  103. u32int pages = (count + (1 << ll->l2pagesize) - 1) >> ll->l2pagesize;
  104. u32int gapmask;
  105. long bestfreeblockindex;
  106. int bestfree;
  107. int pagesperblock = 1 << ll->l2pagesperblock;
  108. int apages;
  109. char *errmsg;
  110. int didsomething;
  111. state->oldblock = -1;
  112. state->markbad = 0;
  113. if(pages > pagesperblock)
  114. pages = pagesperblock;
  115. /*
  116. * fill in gaps first
  117. */
  118. bestfreeblockindex = -1;
  119. bestfree = 0;
  120. for(blockindex = 0; blockindex < server->ndatablocks; blockindex++) {
  121. db = server->datablock + blockindex;
  122. if(db->block < 0)
  123. continue;
  124. gapmask = findgap(db->free & ~db->dirty, pages);
  125. //print("blockindex %ld free 0x%.8ux dirty 0x%.8ux gapmask %.8ux\n", blockindex, db->free, db->dirty, gapmask);
  126. if(gapmask != 0) {
  127. /*
  128. * this is free and !dirty
  129. */
  130. b = db->block;
  131. USED(b);
  132. goto done;
  133. }
  134. else {
  135. int free = bitcount(db->free & logfsdatapagemask(pagesperblock, 0));
  136. if(free > 0 && (bestfreeblockindex < 0 || free > bestfree)) {
  137. bestfreeblockindex = blockindex;
  138. bestfree = free;
  139. }
  140. }
  141. }
  142. //print("out of space - need to clean up a data block\n");
  143. if(bestfreeblockindex >= 0) {
  144. //print("best block index %ld (%ld) %d bits\n", bestfreeblockindex, server->datablock[bestfreeblockindex].block, bestfree);
  145. /*
  146. * clean up data block
  147. */
  148. b = logfsfindfreeblock(ll, AllocReasonTransfer);
  149. while(b >= 0) {
  150. char *errmsg;
  151. LogfsLowLevelReadResult llrr;
  152. long oldblock;
  153. int markedbad;
  154. db = server->datablock + bestfreeblockindex;
  155. oldblock = db->block;
  156. errmsg = logfsservercopyactivedata(server, b, bestfreeblockindex, 0, &llrr, &markedbad);
  157. if(errmsg) {
  158. if(!markedbad)
  159. return errmsg;
  160. b = logfsfindfreeblock(ll, AllocReasonTransfer);
  161. }
  162. else {
  163. Pageset available;
  164. /*
  165. * if page0 is free, then we must ensure that we use it otherwise
  166. * in tagged storage such as nand, the block tag is not written
  167. * in all cases, it is safer to erase the block afterwards to
  168. * preserve the data for as long as possible (we could choose
  169. * to erase the old block now if page0 has already been copied)
  170. */
  171. blockindex = bestfreeblockindex;
  172. state->oldblock = oldblock;
  173. state->markbad = llrr != LogfsLowLevelReadResultOk;
  174. available = db->free & ~db->dirty;
  175. if(available & PAGETOP)
  176. available = logfsdatapagemask(nlz(~available), 0);
  177. gapmask = findgap(available, pages);
  178. goto done;
  179. }
  180. }
  181. }
  182. /*
  183. * use already erased blocks, so long as there are a few free
  184. */
  185. b = logfsfindfreeblock(ll, AllocReasonDataExtend);
  186. if(b >= 0) {
  187. useerased:
  188. for(blockindex = 0, db = server->datablock; blockindex < server->ndatablocks; blockindex++, db++)
  189. if(db->block < 0)
  190. break;
  191. if(blockindex == server->ndatablocks)
  192. server->ndatablocks++;
  193. db->path = mkdatapath(blockindex, 0);
  194. db->block = b;
  195. (*ll->setblocktag)(ll, b, LogfsTdata);
  196. (*ll->setblockpath)(ll, b, db->path);
  197. db->free = logfsdatapagemask(pagesperblock, 0);
  198. db->dirty = 0;
  199. gapmask = db->free;
  200. goto done;
  201. }
  202. /*
  203. * last resort; try to steal from log
  204. */
  205. //print("last resort\n");
  206. errmsg = logfsserverlogsweep(server, 0, &didsomething);
  207. if(errmsg)
  208. return errmsg;
  209. if(didsomething) {
  210. /*
  211. * this can only create whole free blocks, so...
  212. */
  213. //print("findfree after last resort\n");
  214. b = logfsfindfreeblock(ll, AllocReasonDataExtend);
  215. if(b >= 0) {
  216. //print("*********************************************************\n");
  217. goto useerased;
  218. }
  219. }
  220. *countp = 0;
  221. return nil;
  222. done:
  223. /*
  224. * common finish - needs gapmask, blockindex, db
  225. */
  226. apages = bitcount(gapmask);
  227. pagebase = nlz(gapmask);
  228. if(apages > pages)
  229. apages = pages;
  230. gapmask = logfsdatapagemask(apages, pagebase);
  231. if(server->trace > 1)
  232. print("allocdatapages: block %ld(%ld) pages %d mask 0x%.8ux pagebase %d apages %d\n",
  233. blockindex, db->block, pages, gapmask, pagebase, apages);
  234. db->free &= ~gapmask;
  235. db->dirty |= gapmask;
  236. *pagep = pagebase;
  237. *blockindexp = blockindex;
  238. *flashaddr = logfsspo2flashaddr(server, blockindex, pagebase, 0);
  239. *countp = apages << ll->l2pagesize;
  240. if(*countp > count)
  241. *countp = count;
  242. return nil;
  243. }
  244. typedef struct Page {
  245. u32int pageaddr;
  246. int ref;
  247. } Page;
  248. typedef struct DataStructure {
  249. LogfsServer *server;
  250. int nentries;
  251. int maxentries;
  252. Page *array;
  253. } DataStructure;
  254. static int
  255. deltapage(DataStructure *ds, u32int pageaddr, int add, int delta)
  256. {
  257. int i;
  258. for(i = 0; i < ds->nentries; i++)
  259. if(ds->array[i].pageaddr == pageaddr) {
  260. ds->array[i].ref += delta;
  261. return 1;
  262. }
  263. if(!add)
  264. return 1;
  265. if(ds->maxentries == 0) {
  266. ds->array = logfsrealloc(nil, sizeof(Page) * 100);
  267. if(ds->array == nil)
  268. return 0;
  269. ds->maxentries = 100;
  270. }
  271. else if(ds->nentries >= ds->maxentries) {
  272. void *a = logfsrealloc(ds->array, ds->maxentries * 2 * sizeof(Page));
  273. if(a == nil)
  274. return 0;
  275. ds->array = a;
  276. ds->maxentries *= 2;
  277. }
  278. ds->array[ds->nentries].pageaddr = pageaddr;
  279. ds->array[ds->nentries++].ref = delta;
  280. return 1;
  281. }
  282. /*
  283. * only called for data addresses
  284. */
  285. static int
  286. deltapages(DataStructure *ds, LogfsLowLevel *ll, u32int baseflashaddr, int range, int add, int delta)
  287. {
  288. long seq;
  289. int page, offset;
  290. int pages;
  291. u32int pageaddr;
  292. int x;
  293. //print("deltapages(%ud, %ud, %d, %d)\n", baseflashaddr, limitflashaddr, add, delta);
  294. logfsflashaddr2spo(ds->server, baseflashaddr, &seq, &page, &offset);
  295. pages = (offset + range + (1 << ll->l2pagesize) - 1) >> ll->l2pagesize;
  296. pageaddr = (seq << ll->l2pagesperblock) + page;
  297. for(x = 0; x < pages; x++, pageaddr++)
  298. if(!deltapage(ds, pageaddr, add, delta))
  299. return 0;
  300. return 1;
  301. }
  302. static int
  303. findpageset(void *magic, u32int baseoffset, u32int limitoffset, Extent *e, u32int extentoffset)
  304. {
  305. DataStructure *ds = magic;
  306. LogfsLowLevel *ll;
  307. u32int flashaddr;
  308. u32int range;
  309. u32int residue;
  310. if(e == nil || (e->flashaddr & LogAddr) != 0)
  311. return 1;
  312. ll = ds->server->ll;
  313. //print("baseoffset %ud limitoffset %ud min %ud max %ud\n", baseoffset, limitoffset, e->min, e->max);
  314. flashaddr = e->flashaddr;
  315. if(extentoffset)
  316. if(!deltapages(ds, ll, flashaddr, extentoffset, 1, 1))
  317. return -1;
  318. flashaddr += extentoffset;
  319. range = limitoffset - baseoffset;
  320. if(!deltapages(ds, ll, flashaddr, range, 1, -1))
  321. return -1;
  322. flashaddr += range;
  323. residue = e->max - e->min - (extentoffset + range);
  324. if(residue)
  325. if(!deltapages(ds, ll, flashaddr, residue, 1, 1))
  326. return -1;
  327. return 1;
  328. }
  329. static int
  330. addpagereferences(void *magic, Extent *e, int hole)
  331. {
  332. DataStructure *ds = magic;
  333. if(hole || (e->flashaddr & LogAddr) != 0)
  334. return 1;
  335. return deltapages(ds, ds->server->ll, e->flashaddr, e->max - e->min, 0, 1) ? 1 : -1;
  336. }
  337. static char *
  338. zappages(LogfsServer *server, Entry *e, u32int min, u32int max)
  339. {
  340. DataStructure ds;
  341. long seq;
  342. int x, rv, page;
  343. Page *p;
  344. if(min >= e->u.file.length)
  345. return nil; /* no checks necessary */
  346. if(min == 0 && max >= e->u.file.length) {
  347. /* replacing entire file */
  348. logfsextentlistwalk(e->u.file.extent, logfsunconditionallymarkfreeanddirty, server);
  349. return nil;
  350. }
  351. /* hard after that - this will need to be improved */
  352. /*
  353. * current algorithm
  354. * build a list of all pages referenced by the extents being removed, and count the
  355. * number of references
  356. * then subtract the number of references to each page in entire file
  357. * any pages with a reference count == 0 can be removed
  358. */
  359. ds.server = server;
  360. ds.nentries = 0;
  361. ds.maxentries = 0;
  362. ds.array = nil;
  363. rv = logfsextentlistwalkrange(e->u.file.extent, findpageset, &ds, min, max);
  364. if(rv < 0 || ds.nentries == 0)
  365. goto Out;
  366. if(server->trace > 1){
  367. print("pass 1\n");
  368. for(x = 0; x < ds.nentries; x++){
  369. p = &ds.array[x];
  370. seq = p->pageaddr >> server->ll->l2pagesperblock;
  371. page = p->pageaddr & ((1 << server->ll->l2pagesperblock) - 1);
  372. print("block %lud page %ud ref %d\n", seq, page, p->ref);
  373. }
  374. print("pass 2\n");
  375. }
  376. rv = logfsextentlistwalk(e->u.file.extent, addpagereferences, &ds);
  377. if(rv >= 0){
  378. for(x = 0; x < ds.nentries; x++){
  379. p = &ds.array[x];
  380. seq = p->pageaddr >> server->ll->l2pagesperblock;
  381. page = p->pageaddr & ((1 << server->ll->l2pagesperblock) - 1);
  382. if(server->trace > 1)
  383. print("block %lud page %ud ref %d\n", seq, page, p->ref);
  384. if(p->ref == 0)
  385. logfsfreedatapages(server, seq, logfsdatapagemask(1, page));
  386. }
  387. }
  388. Out:
  389. logfsfreemem(ds.array);
  390. return rv < 0 ? Enomem : nil;
  391. }
  392. static void
  393. disposeofoldblock(LogfsServer *server, AllocState *state)
  394. {
  395. if(state->oldblock >= 0) {
  396. if(server->testflags & LogfsTestDontFettleDataBlock) {
  397. /* take the block out of commission */
  398. (*server->ll->setblocktag)(server->ll, state->oldblock, LogfsTworse);
  399. server->testflags &= ~LogfsTestDontFettleDataBlock;
  400. }
  401. else {
  402. if(state->markbad)
  403. (*server->ll->markblockbad)(server->ll, state->oldblock);
  404. else
  405. logfsbootfettleblock(server->lb, state->oldblock, LogfsTnone, ~0, nil);
  406. }
  407. state->oldblock = -1;
  408. }
  409. }
  410. char *
  411. logfsserverwrite(LogfsServer *server, u32int fid, u32int offset, u32int count, uchar *buf, u32int *rcount)
  412. {
  413. Fid *f;
  414. Entry *e;
  415. u32int now;
  416. char *muid;
  417. int muidlen;
  418. LogfsLowLevel *ll = server->ll;
  419. if(server->trace > 1)
  420. print("logfsserverwrite(%ud, %ud, %ud)\n", fid, offset, count);
  421. f = logfsfidmapfindentry(server->fidmap, fid);
  422. if(f == nil)
  423. return logfsebadfid;
  424. if(f->openmode < 0)
  425. return logfsefidnotopen;
  426. if((f->openmode & 3) == OREAD)
  427. return logfseaccess;
  428. e = f->entry;
  429. if(e->deadandgone)
  430. return Eio;
  431. if(e->qid.type & QTDIR)
  432. return Eperm;
  433. if(e->perm & DMAPPEND)
  434. offset = e->u.file.length;
  435. now = logfsnow();
  436. *rcount = count;
  437. muid = logfsisfindidfromname(server->is, f->uname);
  438. muidlen = strlen(muid);
  439. while(count) {
  440. Extent extent;
  441. int thistime;
  442. char *errmsg;
  443. thistime = lognicesizeforwrite(server, 1, count, muidlen);
  444. if(thistime == 0) {
  445. int p;
  446. u32int n;
  447. long blockindex;
  448. int pagebase;
  449. AllocState state;
  450. int pagesize = 1 << ll->l2pagesize;
  451. reallocate:
  452. errmsg = allocdatapages(server, count, &thistime, &blockindex, &pagebase, &extent.flashaddr, &state);
  453. if(errmsg)
  454. return errmsg;
  455. if(thistime == 0)
  456. return logfselogfull;
  457. for(p = pagebase, n = 0; n < thistime; p++, n += pagesize) {
  458. u32int mask;
  459. DataBlock *db = server->datablock + blockindex;
  460. errmsg = (*ll->writepage)(ll, buf + n, db->block, p);
  461. if(errmsg) {
  462. if(strcmp(errmsg, Eio) != 0) {
  463. /*
  464. * something horrid happened down below
  465. * recover without writing any more than we have to
  466. */
  467. if(p != 0) {
  468. /*
  469. * page 0 was either written already, or has been written in this loop
  470. * thus the block referenced is valid on the media. all we need to do
  471. * is lose the old block, mark the written pages as free (so they can
  472. * be scavenged), and don't bother with the log message
  473. */
  474. disposeofoldblock(server, &state);
  475. mask = logfsdatapagemask(p - pagebase - 1, pagebase);
  476. db->free |= mask;
  477. db->dirty |= mask;
  478. return errmsg;
  479. }
  480. /*
  481. * page 0 failed to write (so nothing written at all)
  482. * this is either an entirely free block (no erased block in savestate),
  483. * or a copy of a scavenged block (erased block in savestate)
  484. */
  485. if(state.oldblock < 0) {
  486. /*
  487. * newly selected erased block (blockindex == server->ndatablocks - 1)
  488. * mark it bad, lose it from the datablock table
  489. */
  490. (*ll->markblockbad)(ll, db->block);
  491. db->block = -1;
  492. if(blockindex == server->ndatablocks - 1)
  493. server->ndatablocks--;
  494. return errmsg;
  495. }
  496. /*
  497. * page 0 of a data scavenge copy
  498. * mark it bad, restore state (old block)
  499. */
  500. (*ll->markblockbad)(ll, db->block);
  501. db->block = state.oldblock;
  502. return errmsg;
  503. }
  504. /*
  505. * write error on target block
  506. *
  507. * if it is a replacement (state saved)
  508. * mark the new block bad, restore state and try again
  509. *
  510. * if it is not replaced (no state saved)
  511. * replace block, and try again
  512. */
  513. if(state.oldblock >= 0) {
  514. (*ll->markblockbad)(ll, db->block);
  515. db->block = state.oldblock;
  516. }
  517. else {
  518. errmsg = logfsserverreplacedatablock(server, blockindex);
  519. if(errmsg)
  520. return errmsg;
  521. }
  522. goto reallocate;
  523. }
  524. mask = logfsdatapagemask(1, p);
  525. db->free &= ~mask;
  526. db->dirty |= mask;
  527. }
  528. /* well, we managed to write the data out */
  529. errmsg = logfslogwrite(server, 1, e->qid.path, offset, thistime, now, e->u.file.cvers,
  530. muid, nil, &extent.flashaddr);
  531. /*
  532. * now we can dispose of the original data block, if any
  533. * this is regardless of whether we succeeded in writing a log message, as
  534. * if this block is not erased, there will be a duplicate
  535. */
  536. disposeofoldblock(server, &state);
  537. }
  538. else {
  539. if(thistime > count)
  540. thistime = count;
  541. errmsg = logfslogwrite(server, 1, e->qid.path, offset, thistime, now, e->u.file.cvers,
  542. muid, buf, &extent.flashaddr);
  543. }
  544. /*
  545. * here if we failed to write the log message
  546. */
  547. if(errmsg)
  548. return errmsg;
  549. if(server->trace > 1)
  550. print("logfsserverwrite: %d bytes at flashaddr 0x%.8ux\n", thistime, extent.flashaddr);
  551. extent.min = offset;
  552. extent.max = offset + thistime;
  553. errmsg = zappages(server, e, extent.min, extent.max);
  554. if(errmsg)
  555. return errmsg;
  556. errmsg = logfsextentlistinsert(e->u.file.extent, &extent, nil);
  557. if(errmsg)
  558. return errmsg;
  559. e->muid = muid;
  560. e->mtime = now;
  561. offset += thistime;
  562. if(e->u.file.length < offset)
  563. e->u.file.length = offset;
  564. count -= thistime;
  565. buf += thistime;
  566. e->qid.vers++;
  567. }
  568. return nil;
  569. }