disk.c 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368
  1. #include "stdinc.h"
  2. #include "dat.h"
  3. #include "fns.h"
  4. #include "error.h"
  5. static void diskThread(void *a);
  6. enum {
  7. QueueSize = 100, /* maximum block to queue */
  8. };
  9. struct Disk {
  10. VtLock *lk;
  11. int ref;
  12. int fd;
  13. Header h;
  14. VtRendez *flow;
  15. VtRendez *starve;
  16. VtRendez *flush;
  17. VtRendez *die;
  18. int nqueue;
  19. Block *cur; /* block to do on current scan */
  20. Block *next; /* blocks to do next scan */
  21. };
  22. Disk *
  23. diskAlloc(int fd)
  24. {
  25. u8int buf[HeaderSize];
  26. Header h;
  27. Disk *disk;
  28. if(pread(fd, buf, HeaderSize, HeaderOffset) < HeaderSize){
  29. vtSetError("short read: %r");
  30. vtOSError();
  31. return nil;
  32. }
  33. if(!headerUnpack(&h, buf)){
  34. vtSetError("bad disk header");
  35. return nil;
  36. }
  37. disk = vtMemAllocZ(sizeof(Disk));
  38. disk->lk = vtLockAlloc();
  39. disk->starve = vtRendezAlloc(disk->lk);
  40. disk->flow = vtRendezAlloc(disk->lk);
  41. disk->flush = vtRendezAlloc(disk->lk);
  42. disk->fd = fd;
  43. disk->h = h;
  44. disk->ref = 2;
  45. vtThread(diskThread, disk);
  46. return disk;
  47. }
  48. void
  49. diskFree(Disk *disk)
  50. {
  51. diskFlush(disk);
  52. /* kill slave */
  53. vtLock(disk->lk);
  54. disk->die = vtRendezAlloc(disk->lk);
  55. vtWakeup(disk->starve);
  56. while(disk->ref > 1)
  57. vtSleep(disk->die);
  58. vtUnlock(disk->lk);
  59. vtRendezFree(disk->flow);
  60. vtRendezFree(disk->starve);
  61. vtRendezFree(disk->die);
  62. vtLockFree(disk->lk);
  63. close(disk->fd);
  64. vtMemFree(disk);
  65. }
  66. static u32int
  67. partStart(Disk *disk, int part)
  68. {
  69. switch(part){
  70. default:
  71. assert(0);
  72. case PartSuper:
  73. return disk->h.super;
  74. case PartLabel:
  75. return disk->h.label;
  76. case PartData:
  77. return disk->h.data;
  78. }
  79. }
  80. static u32int
  81. partEnd(Disk *disk, int part)
  82. {
  83. switch(part){
  84. default:
  85. assert(0);
  86. case PartSuper:
  87. return disk->h.super+1;
  88. case PartLabel:
  89. return disk->h.data;
  90. case PartData:
  91. return disk->h.end;
  92. }
  93. }
  94. int
  95. diskReadRaw(Disk *disk, int part, u32int addr, uchar *buf)
  96. {
  97. ulong start, end;
  98. u64int offset;
  99. int n, nn;
  100. start = partStart(disk, part);
  101. end = partEnd(disk, part);
  102. if(addr >= end-start){
  103. vtSetError(EBadAddr);
  104. return 0;
  105. }
  106. offset = ((u64int)(addr + start))*disk->h.blockSize;
  107. n = disk->h.blockSize;
  108. while(n > 0){
  109. nn = pread(disk->fd, buf, n, offset);
  110. if(nn < 0){
  111. vtOSError();
  112. return 0;
  113. }
  114. if(nn == 0){
  115. vtSetError("eof reading disk");
  116. return 0;
  117. }
  118. n -= nn;
  119. offset += nn;
  120. buf += nn;
  121. }
  122. return 1;
  123. }
  124. int
  125. diskWriteRaw(Disk *disk, int part, u32int addr, uchar *buf)
  126. {
  127. ulong start, end;
  128. u64int offset;
  129. int n;
  130. start = partStart(disk, part);
  131. end = partEnd(disk, part);
  132. if(addr >= end-start){
  133. vtSetError(EBadAddr);
  134. return 0;
  135. }
  136. offset = ((u64int)(addr + start))*disk->h.blockSize;
  137. n = pwrite(disk->fd, buf, disk->h.blockSize, offset);
  138. if(n < 0){
  139. vtOSError();
  140. return 0;
  141. }
  142. if(n < disk->h.blockSize) {
  143. vtSetError("short write");
  144. return 0;
  145. }
  146. return 1;
  147. }
  148. static void
  149. diskQueue(Disk *disk, Block *b)
  150. {
  151. Block **bp, *bb;
  152. vtLock(disk->lk);
  153. while(disk->nqueue >= QueueSize)
  154. vtSleep(disk->flow);
  155. if(disk->cur == nil || b->addr > disk->cur->addr)
  156. bp = &disk->cur;
  157. else
  158. bp = &disk->next;
  159. for(bb=*bp; bb; bb=*bp){
  160. if(b->addr < bb->addr)
  161. break;
  162. bp = &bb->ionext;
  163. }
  164. b->ionext = bb;
  165. *bp = b;
  166. if(disk->nqueue == 0)
  167. vtWakeup(disk->starve);
  168. disk->nqueue++;
  169. vtUnlock(disk->lk);
  170. }
  171. void
  172. diskRead(Disk *disk, Block *b)
  173. {
  174. assert(b->iostate == BioEmpty || b->iostate == BioLabel);
  175. blockSetIOState(b, BioReading);
  176. diskQueue(disk, b);
  177. }
  178. void
  179. diskWrite(Disk *disk, Block *b)
  180. {
  181. assert(b->nlock == 1);
  182. assert(b->iostate == BioDirty);
  183. blockSetIOState(b, BioWriting);
  184. diskQueue(disk, b);
  185. }
  186. void
  187. diskWriteAndWait(Disk *disk, Block *b)
  188. {
  189. int nlock;
  190. /*
  191. * If b->nlock > 1, the block is aliased within
  192. * a single thread. That thread is us.
  193. * DiskWrite does some funny stuff with VtLock
  194. * and blockPut that basically assumes b->nlock==1.
  195. * We humor diskWrite by temporarily setting
  196. * nlock to 1. This needs to be revisited.
  197. */
  198. nlock = b->nlock;
  199. if(nlock > 1)
  200. b->nlock = 1;
  201. diskWrite(disk, b);
  202. while(b->iostate != BioClean)
  203. vtSleep(b->ioready);
  204. b->nlock = nlock;
  205. }
  206. int
  207. diskBlockSize(Disk *disk)
  208. {
  209. return disk->h.blockSize; /* immuttable */
  210. }
  211. int
  212. diskFlush(Disk *disk)
  213. {
  214. Dir dir;
  215. vtLock(disk->lk);
  216. while(disk->nqueue > 0)
  217. vtSleep(disk->flush);
  218. vtUnlock(disk->lk);
  219. /* there really should be a cleaner interface to flush an fd */
  220. nulldir(&dir);
  221. if(dirfwstat(disk->fd, &dir) < 0){
  222. vtOSError();
  223. return 0;
  224. }
  225. return 1;
  226. }
  227. u32int
  228. diskSize(Disk *disk, int part)
  229. {
  230. return partEnd(disk, part) - partStart(disk, part);
  231. }
  232. static uintptr
  233. mypc(int x)
  234. {
  235. return getcallerpc(&x);
  236. }
  237. static void
  238. diskThread(void *a)
  239. {
  240. Disk *disk = a;
  241. Block *b;
  242. uchar *buf, *p;
  243. double t;
  244. int nio;
  245. vtThreadSetName("disk");
  246. //fprint(2, "diskThread %d\n", getpid());
  247. buf = vtMemAlloc(disk->h.blockSize);
  248. vtLock(disk->lk);
  249. nio = 0;
  250. t = -nsec();
  251. for(;;){
  252. while(disk->nqueue == 0){
  253. t += nsec();
  254. //if(nio >= 10000){
  255. //fprint(2, "disk: io=%d at %.3fms\n", nio, t*1e-6/nio);
  256. //nio = 0;
  257. //t = 0.;
  258. //}
  259. if(disk->die != nil)
  260. goto Done;
  261. vtSleep(disk->starve);
  262. t -= nsec();
  263. }
  264. assert(disk->cur != nil || disk->next != nil);
  265. if(disk->cur == nil){
  266. disk->cur = disk->next;
  267. disk->next = nil;
  268. }
  269. b = disk->cur;
  270. disk->cur = b->ionext;
  271. vtUnlock(disk->lk);
  272. /*
  273. * no one should hold onto blocking in the
  274. * reading or writing state, so this lock should
  275. * not cause deadlock.
  276. */
  277. if(0)fprint(2, "diskThread: %d:%d %x\n", getpid(), b->part, b->addr);
  278. bwatchLock(b);
  279. vtLock(b->lk);
  280. b->pc = mypc(0);
  281. assert(b->nlock == 1);
  282. switch(b->iostate){
  283. default:
  284. abort();
  285. case BioReading:
  286. if(!diskReadRaw(disk, b->part, b->addr, b->data)){
  287. fprint(2, "diskReadRaw failed: part=%d addr=%ux: %r\n", b->part, b->addr);
  288. blockSetIOState(b, BioReadError);
  289. }else
  290. blockSetIOState(b, BioClean);
  291. break;
  292. case BioWriting:
  293. p = blockRollback(b, buf);
  294. if(!diskWriteRaw(disk, b->part, b->addr, p)){
  295. fprint(2, "diskWriteRaw failed: date=%s part=%d addr=%ux: %r\n", ctime(times(0)), b->part, b->addr);
  296. break;
  297. }
  298. if(p != buf)
  299. blockSetIOState(b, BioClean);
  300. else
  301. blockSetIOState(b, BioDirty);
  302. break;
  303. }
  304. blockPut(b); /* remove extra reference, unlock */
  305. vtLock(disk->lk);
  306. disk->nqueue--;
  307. if(disk->nqueue == QueueSize-1)
  308. vtWakeup(disk->flow);
  309. if(disk->nqueue == 0)
  310. vtWakeup(disk->flush);
  311. nio++;
  312. }
  313. Done:
  314. //fprint(2, "diskThread done\n");
  315. disk->ref--;
  316. vtWakeup(disk->die);
  317. vtUnlock(disk->lk);
  318. vtMemFree(buf);
  319. }