disk.c 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394
  1. #include "stdinc.h"
  2. #include "dat.h"
  3. #include "fns.h"
  4. #include "error.h"
  5. static void diskThread(void *a);
  6. enum {
  7. QueueSize = 100, /* maximum block to queue */
  8. };
  9. struct Disk {
  10. VtLock *lk;
  11. int ref;
  12. int fd;
  13. Header h;
  14. VtRendez *flow;
  15. VtRendez *starve;
  16. VtRendez *flush;
  17. VtRendez *die;
  18. int nqueue;
  19. Block *cur; /* block to do on current scan */
  20. Block *next; /* blocks to do next scan */
  21. };
  22. /* keep in sync with Part* enum in dat.h */
  23. static char *partname[] = {
  24. [PartError] "error",
  25. [PartSuper] "super",
  26. [PartLabel] "label",
  27. [PartData] "data",
  28. [PartVenti] "venti",
  29. };
  30. Disk *
  31. diskAlloc(int fd)
  32. {
  33. u8int buf[HeaderSize];
  34. Header h;
  35. Disk *disk;
  36. if(pread(fd, buf, HeaderSize, HeaderOffset) < HeaderSize){
  37. vtSetError("short read: %r");
  38. vtOSError();
  39. return nil;
  40. }
  41. if(!headerUnpack(&h, buf)){
  42. vtSetError("bad disk header");
  43. return nil;
  44. }
  45. disk = vtMemAllocZ(sizeof(Disk));
  46. disk->lk = vtLockAlloc();
  47. disk->starve = vtRendezAlloc(disk->lk);
  48. disk->flow = vtRendezAlloc(disk->lk);
  49. disk->flush = vtRendezAlloc(disk->lk);
  50. disk->fd = fd;
  51. disk->h = h;
  52. disk->ref = 2;
  53. vtThread(diskThread, disk);
  54. return disk;
  55. }
  56. void
  57. diskFree(Disk *disk)
  58. {
  59. diskFlush(disk);
  60. /* kill slave */
  61. vtLock(disk->lk);
  62. disk->die = vtRendezAlloc(disk->lk);
  63. vtWakeup(disk->starve);
  64. while(disk->ref > 1)
  65. vtSleep(disk->die);
  66. vtUnlock(disk->lk);
  67. vtRendezFree(disk->flow);
  68. vtRendezFree(disk->starve);
  69. vtRendezFree(disk->die);
  70. vtLockFree(disk->lk);
  71. close(disk->fd);
  72. vtMemFree(disk);
  73. }
  74. static u32int
  75. partStart(Disk *disk, int part)
  76. {
  77. switch(part){
  78. default:
  79. assert(0);
  80. case PartSuper:
  81. return disk->h.super;
  82. case PartLabel:
  83. return disk->h.label;
  84. case PartData:
  85. return disk->h.data;
  86. }
  87. }
  88. static u32int
  89. partEnd(Disk *disk, int part)
  90. {
  91. switch(part){
  92. default:
  93. assert(0);
  94. case PartSuper:
  95. return disk->h.super+1;
  96. case PartLabel:
  97. return disk->h.data;
  98. case PartData:
  99. return disk->h.end;
  100. }
  101. }
  102. int
  103. diskReadRaw(Disk *disk, int part, u32int addr, uchar *buf)
  104. {
  105. ulong start, end;
  106. u64int offset;
  107. int n, nn;
  108. start = partStart(disk, part);
  109. end = partEnd(disk, part);
  110. if(addr >= end-start){
  111. vtSetError(EBadAddr);
  112. return 0;
  113. }
  114. offset = ((u64int)(addr + start))*disk->h.blockSize;
  115. n = disk->h.blockSize;
  116. while(n > 0){
  117. nn = pread(disk->fd, buf, n, offset);
  118. if(nn < 0){
  119. vtOSError();
  120. return 0;
  121. }
  122. if(nn == 0){
  123. vtSetError("eof reading disk");
  124. return 0;
  125. }
  126. n -= nn;
  127. offset += nn;
  128. buf += nn;
  129. }
  130. return 1;
  131. }
  132. int
  133. diskWriteRaw(Disk *disk, int part, u32int addr, uchar *buf)
  134. {
  135. ulong start, end;
  136. u64int offset;
  137. int n;
  138. start = partStart(disk, part);
  139. end = partEnd(disk, part);
  140. if(addr >= end - start){
  141. vtSetError(EBadAddr);
  142. return 0;
  143. }
  144. offset = ((u64int)(addr + start))*disk->h.blockSize;
  145. n = pwrite(disk->fd, buf, disk->h.blockSize, offset);
  146. if(n < 0){
  147. vtOSError();
  148. return 0;
  149. }
  150. if(n < disk->h.blockSize) {
  151. vtSetError("short write");
  152. return 0;
  153. }
  154. return 1;
  155. }
  156. static void
  157. diskQueue(Disk *disk, Block *b)
  158. {
  159. Block **bp, *bb;
  160. vtLock(disk->lk);
  161. while(disk->nqueue >= QueueSize)
  162. vtSleep(disk->flow);
  163. if(disk->cur == nil || b->addr > disk->cur->addr)
  164. bp = &disk->cur;
  165. else
  166. bp = &disk->next;
  167. for(bb=*bp; bb; bb=*bp){
  168. if(b->addr < bb->addr)
  169. break;
  170. bp = &bb->ionext;
  171. }
  172. b->ionext = bb;
  173. *bp = b;
  174. if(disk->nqueue == 0)
  175. vtWakeup(disk->starve);
  176. disk->nqueue++;
  177. vtUnlock(disk->lk);
  178. }
  179. void
  180. diskRead(Disk *disk, Block *b)
  181. {
  182. assert(b->iostate == BioEmpty || b->iostate == BioLabel);
  183. blockSetIOState(b, BioReading);
  184. diskQueue(disk, b);
  185. }
  186. void
  187. diskWrite(Disk *disk, Block *b)
  188. {
  189. assert(b->nlock == 1);
  190. assert(b->iostate == BioDirty);
  191. blockSetIOState(b, BioWriting);
  192. diskQueue(disk, b);
  193. }
  194. void
  195. diskWriteAndWait(Disk *disk, Block *b)
  196. {
  197. int nlock;
  198. /*
  199. * If b->nlock > 1, the block is aliased within
  200. * a single thread. That thread is us.
  201. * DiskWrite does some funny stuff with VtLock
  202. * and blockPut that basically assumes b->nlock==1.
  203. * We humor diskWrite by temporarily setting
  204. * nlock to 1. This needs to be revisited.
  205. */
  206. nlock = b->nlock;
  207. if(nlock > 1)
  208. b->nlock = 1;
  209. diskWrite(disk, b);
  210. while(b->iostate != BioClean)
  211. vtSleep(b->ioready);
  212. b->nlock = nlock;
  213. }
  214. int
  215. diskBlockSize(Disk *disk)
  216. {
  217. return disk->h.blockSize; /* immuttable */
  218. }
  219. int
  220. diskFlush(Disk *disk)
  221. {
  222. Dir dir;
  223. vtLock(disk->lk);
  224. while(disk->nqueue > 0)
  225. vtSleep(disk->flush);
  226. vtUnlock(disk->lk);
  227. /* there really should be a cleaner interface to flush an fd */
  228. nulldir(&dir);
  229. if(dirfwstat(disk->fd, &dir) < 0){
  230. vtOSError();
  231. return 0;
  232. }
  233. return 1;
  234. }
  235. u32int
  236. diskSize(Disk *disk, int part)
  237. {
  238. return partEnd(disk, part) - partStart(disk, part);
  239. }
  240. static uintptr
  241. mypc(int x)
  242. {
  243. return getcallerpc(&x);
  244. }
  245. static char *
  246. disk2file(Disk *disk)
  247. {
  248. static char buf[256];
  249. if (fd2path(disk->fd, buf, sizeof buf) < 0)
  250. strncpy(buf, "GOK", sizeof buf);
  251. return buf;
  252. }
  253. static void
  254. diskThread(void *a)
  255. {
  256. Disk *disk = a;
  257. Block *b;
  258. uchar *buf, *p;
  259. double t;
  260. int nio;
  261. vtThreadSetName("disk");
  262. //fprint(2, "diskThread %d\n", getpid());
  263. buf = vtMemAlloc(disk->h.blockSize);
  264. vtLock(disk->lk);
  265. nio = 0;
  266. t = -nsec();
  267. for(;;){
  268. while(disk->nqueue == 0){
  269. t += nsec();
  270. //if(nio >= 10000){
  271. //fprint(2, "disk: io=%d at %.3fms\n", nio, t*1e-6/nio);
  272. //nio = 0;
  273. //t = 0;
  274. //}
  275. if(disk->die != nil)
  276. goto Done;
  277. vtSleep(disk->starve);
  278. t -= nsec();
  279. }
  280. assert(disk->cur != nil || disk->next != nil);
  281. if(disk->cur == nil){
  282. disk->cur = disk->next;
  283. disk->next = nil;
  284. }
  285. b = disk->cur;
  286. disk->cur = b->ionext;
  287. vtUnlock(disk->lk);
  288. /*
  289. * no one should hold onto blocking in the
  290. * reading or writing state, so this lock should
  291. * not cause deadlock.
  292. */
  293. if(0)fprint(2, "fossil: diskThread: %d:%d %x\n", getpid(), b->part, b->addr);
  294. bwatchLock(b);
  295. vtLock(b->lk);
  296. b->pc = mypc(0);
  297. assert(b->nlock == 1);
  298. switch(b->iostate){
  299. default:
  300. abort();
  301. case BioReading:
  302. if(!diskReadRaw(disk, b->part, b->addr, b->data)){
  303. fprint(2, "fossil: diskReadRaw failed: %s: "
  304. "score %V: part=%s block %ud: %r\n",
  305. disk2file(disk), b->score,
  306. partname[b->part], b->addr);
  307. blockSetIOState(b, BioReadError);
  308. }else
  309. blockSetIOState(b, BioClean);
  310. break;
  311. case BioWriting:
  312. p = blockRollback(b, buf);
  313. /* NB: ctime result ends with a newline */
  314. if(!diskWriteRaw(disk, b->part, b->addr, p)){
  315. fprint(2, "fossil: diskWriteRaw failed: %s: "
  316. "score %V: date %s part=%s block %ud: %r\n",
  317. disk2file(disk), b->score,
  318. ctime(time(0)),
  319. partname[b->part], b->addr);
  320. break;
  321. }
  322. if(p != buf)
  323. blockSetIOState(b, BioClean);
  324. else
  325. blockSetIOState(b, BioDirty);
  326. break;
  327. }
  328. blockPut(b); /* remove extra reference, unlock */
  329. vtLock(disk->lk);
  330. disk->nqueue--;
  331. if(disk->nqueue == QueueSize-1)
  332. vtWakeup(disk->flow);
  333. if(disk->nqueue == 0)
  334. vtWakeup(disk->flush);
  335. nio++;
  336. }
  337. Done:
  338. //fprint(2, "diskThread done\n");
  339. disk->ref--;
  340. vtWakeup(disk->die);
  341. vtUnlock(disk->lk);
  342. vtMemFree(buf);
  343. }