disk.c 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include "stdinc.h"
  10. #include "dat.h"
  11. #include "fns.h"
  12. #include "error.h"
  13. static void diskThread(void *a);
  14. enum {
  15. /*
  16. * disable measurement since it gets alignment faults on BG
  17. * and the guts used to be commented out.
  18. */
  19. Timing = 0, /* flag */
  20. QueueSize = 100, /* maximum block to queue */
  21. };
  22. struct Disk {
  23. VtLock *lk;
  24. int ref;
  25. int fd;
  26. Header h;
  27. VtRendez *flow;
  28. VtRendez *starve;
  29. VtRendez *flush;
  30. VtRendez *die;
  31. int nqueue;
  32. Block *cur; /* block to do on current scan */
  33. Block *next; /* blocks to do next scan */
  34. };
  35. /* keep in sync with Part* enum in dat.h */
  36. static char *partname[] = {
  37. [PartError] = "error",
  38. [PartSuper] = "super",
  39. [PartLabel] = "label",
  40. [PartData] = "data",
  41. [PartVenti] = "venti",
  42. };
  43. Disk *
  44. diskAlloc(int fd)
  45. {
  46. uint8_t buf[HeaderSize];
  47. Header h;
  48. Disk *disk;
  49. if(pread(fd, buf, HeaderSize, HeaderOffset) < HeaderSize){
  50. vtSetError("short read: %r");
  51. vtOSError();
  52. return nil;
  53. }
  54. if(!headerUnpack(&h, buf)){
  55. vtSetError("bad disk header");
  56. return nil;
  57. }
  58. disk = vtMemAllocZ(sizeof(Disk));
  59. disk->lk = vtLockAlloc();
  60. disk->starve = vtRendezAlloc(disk->lk);
  61. disk->flow = vtRendezAlloc(disk->lk);
  62. disk->flush = vtRendezAlloc(disk->lk);
  63. disk->fd = fd;
  64. disk->h = h;
  65. disk->ref = 2;
  66. vtThread(diskThread, disk);
  67. return disk;
  68. }
  69. void
  70. diskFree(Disk *disk)
  71. {
  72. diskFlush(disk);
  73. /* kill slave */
  74. vtLock(disk->lk);
  75. disk->die = vtRendezAlloc(disk->lk);
  76. vtWakeup(disk->starve);
  77. while(disk->ref > 1)
  78. vtSleep(disk->die);
  79. vtUnlock(disk->lk);
  80. vtRendezFree(disk->flow);
  81. vtRendezFree(disk->starve);
  82. vtRendezFree(disk->die);
  83. vtLockFree(disk->lk);
  84. close(disk->fd);
  85. vtMemFree(disk);
  86. }
  87. static uint32_t
  88. partStart(Disk *disk, int part)
  89. {
  90. switch(part){
  91. default:
  92. assert(0);
  93. case PartSuper:
  94. return disk->h.super;
  95. case PartLabel:
  96. return disk->h.label;
  97. case PartData:
  98. return disk->h.data;
  99. }
  100. }
  101. static uint32_t
  102. partEnd(Disk *disk, int part)
  103. {
  104. switch(part){
  105. default:
  106. assert(0);
  107. case PartSuper:
  108. return disk->h.super+1;
  109. case PartLabel:
  110. return disk->h.data;
  111. case PartData:
  112. return disk->h.end;
  113. }
  114. }
  115. int
  116. diskReadRaw(Disk *disk, int part, uint32_t addr, uint8_t *buf)
  117. {
  118. uint32_t start, end;
  119. uint64_t offset;
  120. int n, nn;
  121. start = partStart(disk, part);
  122. end = partEnd(disk, part);
  123. //print("Inicio: %u Fin: %u\n", start, end);
  124. //print("Diferencia: %u >= %d\n", addr, end-start);
  125. if(addr >= end-start){
  126. vtSetError(EBadAddr);
  127. return 0;
  128. }
  129. offset = ((uint64_t)(addr + start))*disk->h.blockSize;
  130. //print("Offset: %u Blocksize: %u \n",offset, disk->h.blockSize);
  131. n = disk->h.blockSize;
  132. while(n > 0){
  133. nn = pread(disk->fd, buf, n, offset);
  134. if(nn < 0){
  135. vtOSError();
  136. return 0;
  137. }
  138. if(nn == 0){
  139. vtSetError("eof reading disk");
  140. return 0;
  141. }
  142. n -= nn;
  143. offset += nn;
  144. buf += nn;
  145. }
  146. return 1;
  147. }
  148. int
  149. diskWriteRaw(Disk *disk, int part, uint32_t addr, uint8_t *buf)
  150. {
  151. uint32_t start, end;
  152. uint64_t offset;
  153. int n;
  154. start = partStart(disk, part);
  155. end = partEnd(disk, part);
  156. if(addr >= end - start){
  157. vtSetError(EBadAddr);
  158. return 0;
  159. }
  160. offset = ((uint64_t)(addr + start))*disk->h.blockSize;
  161. n = pwrite(disk->fd, buf, disk->h.blockSize, offset);
  162. if(n < 0){
  163. vtOSError();
  164. return 0;
  165. }
  166. if(n < disk->h.blockSize) {
  167. vtSetError("short write");
  168. return 0;
  169. }
  170. return 1;
  171. }
  172. static void
  173. diskQueue(Disk *disk, Block *b)
  174. {
  175. Block **bp, *bb;
  176. vtLock(disk->lk);
  177. while(disk->nqueue >= QueueSize)
  178. vtSleep(disk->flow);
  179. if(disk->cur == nil || b->addr > disk->cur->addr)
  180. bp = &disk->cur;
  181. else
  182. bp = &disk->next;
  183. for(bb=*bp; bb; bb=*bp){
  184. if(b->addr < bb->addr)
  185. break;
  186. bp = &bb->ionext;
  187. }
  188. b->ionext = bb;
  189. *bp = b;
  190. if(disk->nqueue == 0)
  191. vtWakeup(disk->starve);
  192. disk->nqueue++;
  193. vtUnlock(disk->lk);
  194. }
  195. void
  196. diskRead(Disk *disk, Block *b)
  197. {
  198. assert(b->iostate == BioEmpty || b->iostate == BioLabel);
  199. blockSetIOState(b, BioReading);
  200. diskQueue(disk, b);
  201. }
  202. void
  203. diskWrite(Disk *disk, Block *b)
  204. {
  205. assert(b->nlock == 1);
  206. assert(b->iostate == BioDirty);
  207. blockSetIOState(b, BioWriting);
  208. diskQueue(disk, b);
  209. }
  210. void
  211. diskWriteAndWait(Disk *disk, Block *b)
  212. {
  213. int nlock;
  214. /*
  215. * If b->nlock > 1, the block is aliased within
  216. * a single thread. That thread is us.
  217. * DiskWrite does some funny stuff with VtLock
  218. * and blockPut that basically assumes b->nlock==1.
  219. * We humor diskWrite by temporarily setting
  220. * nlock to 1. This needs to be revisited.
  221. */
  222. nlock = b->nlock;
  223. if(nlock > 1)
  224. b->nlock = 1;
  225. diskWrite(disk, b);
  226. while(b->iostate != BioClean)
  227. vtSleep(b->ioready);
  228. b->nlock = nlock;
  229. }
  230. int
  231. diskBlockSize(Disk *disk)
  232. {
  233. return disk->h.blockSize; /* immuttable */
  234. }
  235. int
  236. diskFlush(Disk *disk)
  237. {
  238. Dir dir;
  239. vtLock(disk->lk);
  240. while(disk->nqueue > 0)
  241. vtSleep(disk->flush);
  242. vtUnlock(disk->lk);
  243. /* there really should be a cleaner interface to flush an fd */
  244. nulldir(&dir);
  245. if(dirfwstat(disk->fd, &dir) < 0){
  246. vtOSError();
  247. return 0;
  248. }
  249. return 1;
  250. }
  251. uint32_t
  252. diskSize(Disk *disk, int part)
  253. {
  254. return partEnd(disk, part) - partStart(disk, part);
  255. }
  256. static uintptr
  257. mypc(int x)
  258. {
  259. return getcallerpc();
  260. }
  261. static char *
  262. disk2file(Disk *disk)
  263. {
  264. static char buf[256];
  265. if (fd2path(disk->fd, buf, sizeof buf) < 0)
  266. strncpy(buf, "GOK", sizeof buf);
  267. return buf;
  268. }
  269. static void
  270. diskThread(void *a)
  271. {
  272. Disk *disk = a;
  273. Block *b;
  274. uint8_t *buf, *p;
  275. double t;
  276. int nio;
  277. vtThreadSetName("disk");
  278. //fprint(2, "diskThread %d\n", getpid());
  279. buf = vtMemAlloc(disk->h.blockSize);
  280. vtLock(disk->lk);
  281. if (Timing) {
  282. nio = 0;
  283. t = -nsec();
  284. }
  285. for(;;){
  286. while(disk->nqueue == 0){
  287. if (Timing) {
  288. t += nsec();
  289. if(nio >= 10000){
  290. fprint(2, "disk: io=%d at %.3fms\n",
  291. nio, t*1e-6/nio);
  292. nio = 0;
  293. t = 0;
  294. }
  295. }
  296. if(disk->die != nil)
  297. goto Done;
  298. vtSleep(disk->starve);
  299. if (Timing)
  300. t -= nsec();
  301. }
  302. assert(disk->cur != nil || disk->next != nil);
  303. if(disk->cur == nil){
  304. disk->cur = disk->next;
  305. disk->next = nil;
  306. }
  307. b = disk->cur;
  308. disk->cur = b->ionext;
  309. vtUnlock(disk->lk);
  310. /*
  311. * no one should hold onto blocking in the
  312. * reading or writing state, so this lock should
  313. * not cause deadlock.
  314. */
  315. if(0)fprint(2, "fossil: diskThread: %d:%d %x\n", getpid(), b->part, b->addr);
  316. bwatchLock(b);
  317. vtLock(b->lk);
  318. b->pc = mypc(0);
  319. assert(b->nlock == 1);
  320. switch(b->iostate){
  321. default:
  322. abort();
  323. case BioReading:
  324. if(!diskReadRaw(disk, b->part, b->addr, b->data)){
  325. fprint(2, "fossil: diskReadRaw failed: %s: "
  326. "score %V: part=%s block %u: %r\n",
  327. disk2file(disk), b->score,
  328. partname[b->part], b->addr);
  329. blockSetIOState(b, BioReadError);
  330. }else
  331. blockSetIOState(b, BioClean);
  332. break;
  333. case BioWriting:
  334. p = blockRollback(b, buf);
  335. /* NB: ctime result ends with a newline */
  336. if(!diskWriteRaw(disk, b->part, b->addr, p)){
  337. fprint(2, "fossil: diskWriteRaw failed: %s: "
  338. "score %V: date %s part=%s block %u: %r\n",
  339. disk2file(disk), b->score,
  340. ctime(time(0)),
  341. partname[b->part], b->addr);
  342. break;
  343. }
  344. if(p != buf)
  345. blockSetIOState(b, BioClean);
  346. else
  347. blockSetIOState(b, BioDirty);
  348. break;
  349. }
  350. blockPut(b); /* remove extra reference, unlock */
  351. vtLock(disk->lk);
  352. disk->nqueue--;
  353. if(disk->nqueue == QueueSize-1)
  354. vtWakeup(disk->flow);
  355. if(disk->nqueue == 0)
  356. vtWakeup(disk->flush);
  357. if(Timing)
  358. nio++;
  359. }
  360. Done:
  361. //fprint(2, "diskThread done\n");
  362. disk->ref--;
  363. vtWakeup(disk->die);
  364. vtUnlock(disk->lk);
  365. vtMemFree(buf);
  366. }