icachewrite.c 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. /*
  10. * Write the dirty icache entries to disk. Random seeks are
  11. * so expensive that it makes sense to wait until we have
  12. * a lot and then just make a sequential pass over the disk.
  13. */
  14. #include "stdinc.h"
  15. #include "dat.h"
  16. #include "fns.h"
  17. static void icachewriteproc(void*);
  18. static void icachewritecoord(void*);
  19. static IEntry *iesort(IEntry*);
  20. int icachesleeptime = 1000; /* milliseconds */
  21. int minicachesleeptime = 0;
  22. enum
  23. {
  24. Bufsize = 8*1024*1024
  25. };
  26. typedef struct IWrite IWrite;
  27. struct IWrite
  28. {
  29. Round round;
  30. AState as;
  31. };
  32. static IWrite iwrite;
  33. void
  34. initicachewrite(void)
  35. {
  36. int i;
  37. Index *ix;
  38. initround(&iwrite.round, "icache", 120*60*1000);
  39. ix = mainindex;
  40. for(i=0; i<ix->nsects; i++){
  41. ix->sects[i]->writechan = chancreate(sizeof(uint32_t), 1);
  42. ix->sects[i]->writedonechan = chancreate(sizeof(uint32_t), 1);
  43. vtproc(icachewriteproc, ix->sects[i]);
  44. }
  45. vtproc(icachewritecoord, nil);
  46. vtproc(delaykickroundproc, &iwrite.round);
  47. }
  48. static uint64_t
  49. ie2diskaddr(Index *ix, ISect *is, IEntry *ie)
  50. {
  51. uint64_t bucket, addr;
  52. bucket = hashbits(ie->score, 32)/ix->div;
  53. addr = is->blockbase + ((bucket - is->start) << is->blocklog);
  54. return addr;
  55. }
  56. static IEntry*
  57. nextchunk(Index *ix, ISect *is, IEntry **pie, uint64_t *paddr, uint *pnbuf)
  58. {
  59. uint64_t addr, naddr;
  60. uint nbuf;
  61. int bsize;
  62. IEntry *iefirst, *ie, **l;
  63. bsize = 1<<is->blocklog;
  64. iefirst = *pie;
  65. addr = ie2diskaddr(ix, is, iefirst);
  66. nbuf = 0;
  67. for(l = &iefirst->nextdirty; (ie = *l) != nil; l = &(*l)->nextdirty){
  68. naddr = ie2diskaddr(ix, is, ie);
  69. if(naddr - addr >= Bufsize)
  70. break;
  71. nbuf = naddr - addr;
  72. }
  73. nbuf += bsize;
  74. *l = nil;
  75. *pie = ie;
  76. *paddr = addr;
  77. *pnbuf = nbuf;
  78. return iefirst;
  79. }
  80. static int
  81. icachewritesect(Index *ix, ISect *is, uint8_t *buf)
  82. {
  83. int err, i, werr, h, bsize, t;
  84. uint32_t lo, hi;
  85. uint64_t addr, naddr;
  86. uint nbuf, off;
  87. DBlock *b;
  88. IBucket ib;
  89. IEntry *ie, *iedirty, **l, *chunk;
  90. lo = is->start * ix->div;
  91. if(TWID32/ix->div < is->stop)
  92. hi = TWID32;
  93. else
  94. hi = is->stop * ix->div - 1;
  95. trace(TraceProc, "icachewritesect enter %u %u %llu",
  96. lo, hi, iwrite.as.aa);
  97. iedirty = icachedirty(lo, hi, iwrite.as.aa);
  98. iedirty = iesort(iedirty);
  99. bsize = 1 << is->blocklog;
  100. err = 0;
  101. while(iedirty){
  102. disksched();
  103. while((t = icachesleeptime) == SleepForever){
  104. sleep(1000);
  105. disksched();
  106. }
  107. if(t < minicachesleeptime)
  108. t = minicachesleeptime;
  109. if(t > 0)
  110. sleep(t);
  111. trace(TraceProc, "icachewritesect nextchunk");
  112. chunk = nextchunk(ix, is, &iedirty, &addr, &nbuf);
  113. trace(TraceProc, "icachewritesect readpart 0x%llux+0x%x",
  114. addr, nbuf);
  115. if(readpart(is->part, addr, buf, nbuf) < 0){
  116. fprint(2, "%s: part %s addr 0x%llux: icachewritesect "
  117. "readpart: %r\n", argv0, is->part->name, addr);
  118. err = -1;
  119. continue;
  120. }
  121. trace(TraceProc, "icachewritesect updatebuf");
  122. addstat(StatIsectReadBytes, nbuf);
  123. addstat(StatIsectRead, 1);
  124. for(l=&chunk; (ie=*l)!=nil; l=&ie->nextdirty){
  125. again:
  126. naddr = ie2diskaddr(ix, is, ie);
  127. off = naddr - addr;
  128. if(off+bsize > nbuf){
  129. fprint(2, "%s: whoops! addr=0x%llux nbuf=%u "
  130. "addr+nbuf=0x%llux naddr=0x%llux\n",
  131. argv0, addr, nbuf, addr+nbuf, naddr);
  132. assert(off+bsize <= nbuf);
  133. }
  134. unpackibucket(&ib, buf+off, is->bucketmagic);
  135. if(okibucket(&ib, is) < 0){
  136. fprint(2, "%s: bad bucket XXX\n", argv0);
  137. goto skipit;
  138. }
  139. trace(TraceProc, "icachewritesect add %V at 0x%llux",
  140. ie->score, naddr);
  141. h = bucklook(ie->score, ie->ia.type, ib.data, ib.n);
  142. if(h & 1){
  143. h ^= 1;
  144. packientry(ie, &ib.data[h]);
  145. }else if(ib.n < is->buckmax){
  146. memmove(&ib.data[h + IEntrySize], &ib.data[h],
  147. ib.n*IEntrySize - h);
  148. ib.n++;
  149. packientry(ie, &ib.data[h]);
  150. }else{
  151. fprint(2, "%s: bucket overflow XXX\n", argv0);
  152. skipit:
  153. err = -1;
  154. *l = ie->nextdirty;
  155. ie = *l;
  156. if(ie)
  157. goto again;
  158. else
  159. break;
  160. }
  161. packibucket(&ib, buf+off, is->bucketmagic);
  162. }
  163. diskaccess(1);
  164. trace(TraceProc, "icachewritesect writepart", addr, nbuf);
  165. werr = 0;
  166. if(writepart(is->part, addr, buf, nbuf) < 0 || flushpart(is->part) < 0)
  167. werr = -1;
  168. for(i=0; i<nbuf; i+=bsize){
  169. if((b = _getdblock(is->part, addr+i, ORDWR, 0)) != nil){
  170. memmove(b->data, buf+i, bsize);
  171. putdblock(b);
  172. }
  173. }
  174. if(werr < 0){
  175. fprint(2, "%s: part %s addr 0x%llux: icachewritesect "
  176. "writepart: %r\n", argv0, is->part->name, addr);
  177. err = -1;
  178. continue;
  179. }
  180. addstat(StatIsectWriteBytes, nbuf);
  181. addstat(StatIsectWrite, 1);
  182. icacheclean(chunk);
  183. }
  184. trace(TraceProc, "icachewritesect done");
  185. return err;
  186. }
  187. static void
  188. icachewriteproc(void *v)
  189. {
  190. int ret;
  191. uint bsize;
  192. ISect *is;
  193. Index *ix;
  194. uint8_t *buf;
  195. ix = mainindex;
  196. is = v;
  197. threadsetname("icachewriteproc:%s", is->part->name);
  198. bsize = 1<<is->blocklog;
  199. buf = emalloc(Bufsize+bsize);
  200. buf = (uint8_t*)(((uintptr)buf+bsize-1)&~(uintptr)(bsize-1));
  201. for(;;){
  202. trace(TraceProc, "icachewriteproc recv");
  203. recv(is->writechan, 0);
  204. trace(TraceWork, "start");
  205. ret = icachewritesect(ix, is, buf);
  206. trace(TraceProc, "icachewriteproc send");
  207. trace(TraceWork, "finish");
  208. sendul(is->writedonechan, ret);
  209. }
  210. }
  211. static void
  212. icachewritecoord(void *v)
  213. {
  214. int i, err;
  215. Index *ix;
  216. AState as;
  217. USED(v);
  218. threadsetname("icachewritecoord");
  219. ix = mainindex;
  220. iwrite.as = icachestate();
  221. for(;;){
  222. trace(TraceProc, "icachewritecoord sleep");
  223. waitforkick(&iwrite.round);
  224. trace(TraceWork, "start");
  225. as = icachestate();
  226. if(as.arena==iwrite.as.arena && as.aa==iwrite.as.aa){
  227. /* will not be able to do anything more than last flush - kick disk */
  228. trace(TraceProc, "icachewritecoord kick dcache");
  229. kickdcache();
  230. trace(TraceProc, "icachewritecoord kicked dcache");
  231. goto SkipWork; /* won't do anything; don't bother rewriting bloom filter */
  232. }
  233. iwrite.as = as;
  234. trace(TraceProc, "icachewritecoord start flush");
  235. if(iwrite.as.arena){
  236. for(i=0; i<ix->nsects; i++)
  237. send(ix->sects[i]->writechan, 0);
  238. if(ix->bloom)
  239. send(ix->bloom->writechan, 0);
  240. err = 0;
  241. for(i=0; i<ix->nsects; i++)
  242. err |= recvul(ix->sects[i]->writedonechan);
  243. if(ix->bloom)
  244. err |= recvul(ix->bloom->writedonechan);
  245. trace(TraceProc, "icachewritecoord donewrite err=%d", err);
  246. if(err == 0){
  247. setatailstate(&iwrite.as);
  248. }
  249. }
  250. SkipWork:
  251. icacheclean(nil); /* wake up anyone waiting */
  252. trace(TraceWork, "finish");
  253. addstat(StatIcacheFlush, 1);
  254. }
  255. }
  256. void
  257. flushicache(void)
  258. {
  259. trace(TraceProc, "flushicache enter");
  260. kickround(&iwrite.round, 1);
  261. trace(TraceProc, "flushicache exit");
  262. }
  263. void
  264. kickicache(void)
  265. {
  266. kickround(&iwrite.round, 0);
  267. }
  268. void
  269. delaykickicache(void)
  270. {
  271. delaykickround(&iwrite.round);
  272. }
  273. static IEntry*
  274. iesort(IEntry *ie)
  275. {
  276. int cmp;
  277. IEntry **l;
  278. IEntry *ie1, *ie2, *sorted;
  279. if(ie == nil || ie->nextdirty == nil)
  280. return ie;
  281. /* split the lists */
  282. ie1 = ie;
  283. ie2 = ie;
  284. if(ie2)
  285. ie2 = ie2->nextdirty;
  286. if(ie2)
  287. ie2 = ie2->nextdirty;
  288. while(ie1 && ie2){
  289. ie1 = ie1->nextdirty;
  290. ie2 = ie2->nextdirty;
  291. if(ie2)
  292. ie2 = ie2->nextdirty;
  293. }
  294. if(ie1){
  295. ie2 = ie1->nextdirty;
  296. ie1->nextdirty = nil;
  297. }
  298. /* sort the lists */
  299. ie1 = iesort(ie);
  300. ie2 = iesort(ie2);
  301. /* merge the lists */
  302. sorted = nil;
  303. l = &sorted;
  304. cmp = 0;
  305. while(ie1 || ie2){
  306. if(ie1 && ie2)
  307. cmp = scorecmp(ie1->score, ie2->score);
  308. if(ie1==nil || (ie2 && cmp > 0)){
  309. *l = ie2;
  310. l = &ie2->nextdirty;
  311. ie2 = ie2->nextdirty;
  312. }else{
  313. *l = ie1;
  314. l = &ie1->nextdirty;
  315. ie1 = ie1->nextdirty;
  316. }
  317. }
  318. *l = nil;
  319. return sorted;
  320. }