devfs.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699
  1. /*
  2. * File system devices.
  3. * Follows device config in Ken's file server.
  4. * Builds mirrors, concatenations, interleavings, and partitions
  5. * of devices out of other (inner) devices.
  6. */
  7. #include "u.h"
  8. #include "../port/lib.h"
  9. #include "mem.h"
  10. #include "dat.h"
  11. #include "fns.h"
  12. #include "io.h"
  13. #include "ureg.h"
  14. #include "../port/error.h"
  15. enum {
  16. Fmirror, /* mirror of others */
  17. Fcat, /* catenation of others */
  18. Finter, /* interleaving of others */
  19. Fpart, /* part of others */
  20. Fclear, /* start over */
  21. Blksize = 8*1024, /* for Finter only */
  22. Qtop = 0, /* top dir (contains "fs") */
  23. Qdir, /* actual dir */
  24. Qctl, /* ctl file */
  25. Qfirst, /* first fs file */
  26. Iswrite = 0,
  27. Isread,
  28. /* tunable parameters */
  29. Maxconf = 4*1024, /* max length for config */
  30. Ndevs = 32, /* max. inner devs per command */
  31. Nfsdevs = 128, /* max. created devs, total */
  32. };
  33. #define Cfgstr "fsdev:\n"
  34. typedef struct Inner Inner;
  35. struct Inner
  36. {
  37. char *iname; /* inner device name */
  38. vlong isize; /* size of inner device */
  39. Chan *idev; /* inner device */
  40. };
  41. typedef struct Fsdev Fsdev;
  42. struct Fsdev
  43. {
  44. int type;
  45. char *name; /* name for this fsdev */
  46. vlong size; /* min(inner[X].isize) */
  47. vlong start; /* start address (for Fpart) */
  48. int ndevs; /* number of inner devices */
  49. Inner inner[Ndevs];
  50. };
  51. extern Dev fsdevtab; /* forward */
  52. /*
  53. * Once configured, a fsdev is never removed. The name of those
  54. * configured is never nil. We have no locks here.
  55. */
  56. static Fsdev fsdev[Nfsdevs];
  57. static Qid tqid = {Qtop, 0, QTDIR};
  58. static Qid dqid = {Qdir, 0, QTDIR};
  59. static Qid cqid = {Qctl, 0, 0};
  60. static Cmdtab configs[] = {
  61. Fmirror,"mirror", 0,
  62. Fcat, "cat", 0,
  63. Finter, "inter", 0,
  64. Fpart, "part", 5,
  65. Fclear, "clear", 1,
  66. };
  67. static char confstr[Maxconf];
  68. static int configed;
  69. static Fsdev*
  70. path2dev(int i, int mustexist)
  71. {
  72. if (i < 0 || i >= nelem(fsdev))
  73. error("bug: bad index in devfsdev");
  74. if (mustexist && fsdev[i].name == nil)
  75. error(Enonexist);
  76. if (fsdev[i].name == nil)
  77. return nil;
  78. else
  79. return &fsdev[i];
  80. }
  81. static Fsdev*
  82. devalloc(void)
  83. {
  84. int i;
  85. for (i = 0; i < nelem(fsdev); i++)
  86. if (fsdev[i].name == nil)
  87. break;
  88. if (i == nelem(fsdev))
  89. error(Enodev);
  90. return &fsdev[i];
  91. }
  92. static void
  93. setdsize(Fsdev* mp)
  94. {
  95. int i;
  96. long l;
  97. uchar buf[128]; /* old DIRLEN plus a little should be plenty */
  98. Dir d;
  99. Inner *in;
  100. if (mp->type != Fpart){
  101. mp->start= 0;
  102. mp->size = 0;
  103. }
  104. for (i = 0; i < mp->ndevs; i++){
  105. in = &mp->inner[i];
  106. l = devtab[in->idev->type]->stat(in->idev, buf, sizeof buf);
  107. convM2D(buf, l, &d, nil);
  108. in->isize = d.length;
  109. switch(mp->type){
  110. case Fmirror:
  111. if (mp->size == 0 || mp->size > d.length)
  112. mp->size = d.length;
  113. break;
  114. case Fcat:
  115. mp->size += d.length;
  116. break;
  117. case Finter:
  118. /* truncate to multiple of Blksize */
  119. d.length &= ~(Blksize-1);
  120. in->isize = d.length;
  121. mp->size += d.length;
  122. break;
  123. case Fpart:
  124. /* should raise errors here? */
  125. if (mp->start > d.length)
  126. mp->start = d.length;
  127. if (d.length < mp->start + mp->size)
  128. mp->size = d.length - mp->start;
  129. break;
  130. }
  131. }
  132. }
  133. static void
  134. mpshut(Fsdev *mp)
  135. {
  136. int i;
  137. char *nm;
  138. nm = mp->name;
  139. mp->name = nil; /* prevent others from using this. */
  140. if (nm)
  141. free(nm);
  142. for (i = 0; i < mp->ndevs; i++){
  143. if (mp->inner[i].idev != nil)
  144. cclose(mp->inner[i].idev);
  145. if (mp->inner[i].iname)
  146. free(mp->inner[i].iname);
  147. }
  148. memset(mp, 0, sizeof *mp);
  149. }
  150. static void
  151. mconfig(char* a, long n) /* "name idev0 idev1" */
  152. {
  153. int i;
  154. vlong size, start;
  155. char *c, *oldc;
  156. Cmdbuf *cb;
  157. Cmdtab *ct;
  158. Fsdev *mp;
  159. Inner *inprv;
  160. static QLock lck;
  161. size = 0;
  162. start = 0;
  163. if (confstr[0] == 0)
  164. seprint(confstr, confstr + sizeof confstr, Cfgstr);
  165. mp = nil;
  166. cb = nil;
  167. oldc = confstr + strlen(confstr);
  168. if (*a == '\0' || *a == '#' || *a == '\n')
  169. return;
  170. qlock(&lck);
  171. if (waserror()){
  172. *oldc = 0;
  173. if (mp != nil)
  174. mpshut(mp);
  175. qunlock(&lck);
  176. if (cb)
  177. free(cb);
  178. nexterror();
  179. }
  180. cb = parsecmd(a, n);
  181. c = oldc;
  182. for (i = 0; i < cb->nf; i++)
  183. c = seprint(c, confstr + sizeof confstr, "%s ", cb->f[i]);
  184. if (c > confstr)
  185. c[-1] = '\n';
  186. ct = lookupcmd(cb, configs, nelem(configs));
  187. cb->f++; /* skip command */
  188. cb->nf--;
  189. if (cb->nf < 0) /* nothing to see here, move along */
  190. ct->index = -1;
  191. switch (ct->index) {
  192. case Fpart:
  193. if (cb->nf < 4)
  194. error("too few fields in fs config");
  195. start = strtoll(cb->f[2], nil, 10);
  196. size = strtoll(cb->f[3], nil, 10);
  197. cb->nf -= 2;
  198. break;
  199. case Fclear:
  200. for (mp = fsdev; mp < fsdev + nelem(fsdev); mp++)
  201. mpshut(mp);
  202. *confstr = '\0';
  203. /* FALL THROUGH */
  204. case -1:
  205. poperror();
  206. qunlock(&lck);
  207. free(cb);
  208. return;
  209. }
  210. if (cb->nf < 2)
  211. error("too few fields in fs config");
  212. /* reject name if already in use */
  213. for (i = 0; i < nelem(fsdev); i++)
  214. if (fsdev[i].name != nil && strcmp(fsdev[i].name, cb->f[0])==0)
  215. error(Eexist);
  216. if (cb->nf - 1 > Ndevs)
  217. error("too many devices; fix #k: increase Ndevs");
  218. for (i = 0; i < cb->nf; i++)
  219. validname(cb->f[i], (i != 0));
  220. mp = devalloc();
  221. mp->type = ct->index;
  222. if (mp->type == Fpart){
  223. mp->start = start;
  224. mp->size = size;
  225. }
  226. kstrdup(&mp->name, cb->f[0]);
  227. for (i = 1; i < cb->nf; i++){
  228. inprv = &mp->inner[i-1];
  229. kstrdup(&inprv->iname, cb->f[i]);
  230. inprv->idev = namec(inprv->iname, Aopen, ORDWR, 0);
  231. if (inprv->idev == nil) {
  232. free(mp->name);
  233. mp->name = nil; /* free mp */
  234. error(Egreg);
  235. }
  236. mp->ndevs++;
  237. }
  238. setdsize(mp);
  239. configed = 1;
  240. poperror();
  241. qunlock(&lck);
  242. free(cb);
  243. }
  244. static void
  245. rdconf(void)
  246. {
  247. int mustrd;
  248. char *c, *e, *p, *s;
  249. Chan *cc;
  250. Chan **ccp;
  251. s = getconf("fsconfig");
  252. if (s == nil){
  253. mustrd = 0;
  254. s = "/dev/sdC0/fscfg";
  255. } else
  256. mustrd = 1;
  257. ccp = &cc;
  258. *ccp = nil;
  259. c = nil;
  260. if (waserror()){
  261. configed = 1;
  262. if (*ccp != nil)
  263. cclose(*ccp);
  264. if (c)
  265. free(c);
  266. if (!mustrd)
  267. return;
  268. nexterror();
  269. }
  270. *ccp = namec(s, Aopen, OREAD, 0);
  271. devtab[(*ccp)->type]->read(*ccp, confstr, sizeof confstr, 0);
  272. cclose(*ccp);
  273. *ccp = nil;
  274. if (strncmp(confstr, Cfgstr, strlen(Cfgstr)) != 0)
  275. error("bad #k config, first line must be: 'fsdev:\\n'");
  276. kstrdup(&c, confstr + strlen(Cfgstr));
  277. memset(confstr, 0, sizeof confstr);
  278. for (p = c; p != nil && *p != 0; p = e){
  279. e = strchr(p, '\n');
  280. if (e == nil)
  281. e = p + strlen(p);
  282. if (e == p) {
  283. e++;
  284. continue;
  285. }
  286. mconfig(p, e - p);
  287. }
  288. poperror();
  289. }
  290. static int
  291. mgen(Chan *c, char*, Dirtab*, int, int i, Dir *dp)
  292. {
  293. Qid qid;
  294. Fsdev *mp;
  295. if (c->qid.path == Qtop)
  296. switch(i){
  297. case DEVDOTDOT:
  298. devdir(c, tqid, "#k", 0, eve, DMDIR|0775, dp);
  299. return 1;
  300. case 0:
  301. devdir(c, dqid, "fs", 0, eve, DMDIR|0775, dp);
  302. return 1;
  303. default:
  304. return -1;
  305. }
  306. if (c->qid.path != Qdir)
  307. switch(i){
  308. case DEVDOTDOT:
  309. devdir(c, dqid, "fs", 0, eve, DMDIR|0775, dp);
  310. return 1;
  311. default:
  312. return -1;
  313. }
  314. switch(i){
  315. case DEVDOTDOT:
  316. devdir(c, tqid, "#k", 0, eve, DMDIR|0775, dp);
  317. return 1;
  318. case 0:
  319. devdir(c, cqid, "ctl", 0, eve, 0664, dp);
  320. return 1;
  321. }
  322. i--; /* for ctl */
  323. qid.path = Qfirst + i;
  324. qid.vers = 0;
  325. qid.type = 0;
  326. mp = path2dev(i, 0);
  327. if (mp == nil)
  328. return -1;
  329. kstrcpy(up->genbuf, mp->name, sizeof(up->genbuf));
  330. devdir(c, qid, up->genbuf, mp->size, eve, 0664, dp);
  331. return 1;
  332. }
  333. static Chan*
  334. mattach(char *spec)
  335. {
  336. return devattach(fsdevtab.dc, spec);
  337. }
  338. static Walkqid*
  339. mwalk(Chan *c, Chan *nc, char **name, int nname)
  340. {
  341. if (!configed)
  342. rdconf();
  343. return devwalk(c, nc, name, nname, 0, 0, mgen);
  344. }
  345. static int
  346. mstat(Chan *c, uchar *db, int n)
  347. {
  348. Dir d;
  349. Fsdev *mp;
  350. int p;
  351. p = c->qid.path;
  352. memset(&d, 0, sizeof d);
  353. switch(p){
  354. case Qtop:
  355. devdir(c, tqid, "#k", 0, eve, DMDIR|0775, &d);
  356. break;
  357. case Qdir:
  358. devdir(c, dqid, "fs", 0, eve, DMDIR|0775, &d);
  359. break;
  360. case Qctl:
  361. devdir(c, cqid, "ctl", 0, eve, 0664, &d);
  362. break;
  363. default:
  364. mp = path2dev(p - Qfirst, 1);
  365. devdir(c, c->qid, mp->name, mp->size, eve, 0664, &d);
  366. }
  367. n = convD2M(&d, db, n);
  368. if (n == 0)
  369. error(Ebadarg);
  370. return n;
  371. }
  372. static Chan*
  373. mopen(Chan *c, int omode)
  374. {
  375. // TODO: call devopen()?
  376. if((c->qid.type & QTDIR) && omode != OREAD)
  377. error(Eperm);
  378. // if (c->flag & COPEN)
  379. // return c;
  380. c->mode = openmode(omode & ~OTRUNC);
  381. c->flag |= COPEN;
  382. c->offset = 0;
  383. return c;
  384. }
  385. static void
  386. mclose(Chan*)
  387. {
  388. /* that's easy */
  389. }
  390. static long
  391. io(Fsdev *mp, Inner *in, int isread, void *a, long l, vlong off)
  392. {
  393. long wl;
  394. Chan *mc = in->idev;
  395. if (waserror()) {
  396. print("#k: %s: byte %,lld count %ld (of #k/%s): %s error: %s\n",
  397. in->iname, off, l, mp->name, (isread? "read": "write"),
  398. (up && up->errstr? up->errstr: ""));
  399. nexterror();
  400. }
  401. if (isread) {
  402. wl = devtab[mc->type]->read(mc, a, l, off);
  403. if (wl != l)
  404. error("#k: short read");
  405. } else {
  406. wl = devtab[mc->type]->write(mc, a, l, off);
  407. if (wl != l)
  408. error("#k: write error");
  409. }
  410. poperror();
  411. return wl;
  412. }
  413. static long
  414. catio(Fsdev *mp, int isread, void *a, long n, vlong off)
  415. {
  416. int i;
  417. long l, wl, res;
  418. Inner *in;
  419. // print("catio %d %p %ld %lld\n", isread, a, n, off);
  420. res = n;
  421. for (i = 0; n >= 0 && i < mp->ndevs ; i++){
  422. in = &mp->inner[i];
  423. if (off > in->isize){
  424. off -= in->isize;
  425. continue; /* not there yet */
  426. }
  427. if (off + n > in->isize)
  428. l = in->isize - off;
  429. else
  430. l = n;
  431. // print("\tdev %d %p %ld %lld\n", i, a, l, off);
  432. wl = io(mp, in, isread, a, l, off);
  433. assert(wl == l);
  434. a = (char*)a + l;
  435. off = 0;
  436. n -= l;
  437. }
  438. // print("\tres %ld\n", res - n);
  439. return res - n;
  440. }
  441. static long
  442. interio(Fsdev *mp, int isread, void *a, long n, vlong off)
  443. {
  444. int i;
  445. long boff, res, l, wl, wsz;
  446. vlong woff, blk, mblk;
  447. Inner *in;
  448. blk = off / Blksize;
  449. boff = off % Blksize;
  450. wsz = Blksize - boff;
  451. res = n;
  452. while(n > 0){
  453. mblk = blk / mp->ndevs;
  454. i = blk % mp->ndevs;
  455. woff = mblk*Blksize + boff;
  456. if (n > wsz)
  457. l = wsz;
  458. else
  459. l = n;
  460. in = &mp->inner[i];
  461. wl = io(mp, in, isread, a, l, woff);
  462. if (wl != l || l == 0)
  463. error(Eio);
  464. a = (char*)a + l;
  465. n -= l;
  466. blk++;
  467. boff = 0;
  468. wsz = Blksize;
  469. }
  470. return res;
  471. }
  472. static long
  473. mread(Chan *c, void *a, long n, vlong off)
  474. {
  475. int i, retry;
  476. long l, res;
  477. Fsdev *mp;
  478. Inner *in;
  479. if (c->qid.type & QTDIR)
  480. return devdirread(c, a, n, 0, 0, mgen);
  481. if (c->qid.path == Qctl)
  482. return readstr((long)off, a, n, confstr + strlen(Cfgstr));
  483. i = c->qid.path - Qfirst;
  484. mp = path2dev(i, 1);
  485. if (off >= mp->size)
  486. return 0;
  487. if (off + n > mp->size)
  488. n = mp->size - off;
  489. if (n == 0)
  490. return 0;
  491. res = -1;
  492. switch(mp->type){
  493. case Fcat:
  494. res = catio(mp, Isread, a, n, off);
  495. break;
  496. case Finter:
  497. res = interio(mp, Isread, a, n, off);
  498. break;
  499. case Fpart:
  500. in = &mp->inner[0];
  501. res = io(mp, in, Isread, a, n, mp->start + off);
  502. assert(res == n);
  503. break;
  504. case Fmirror:
  505. retry = 0;
  506. do {
  507. if (retry > 0) {
  508. print("#k/%s: retry %d read for byte %,lld "
  509. "count %ld: %s\n", mp->name, retry, off,
  510. n, (up && up->errstr? up->errstr: ""));
  511. tsleep(&up->sleep, return0, 0, 2000);
  512. }
  513. for (i = 0; i < mp->ndevs; i++){
  514. if (waserror())
  515. continue;
  516. in = &mp->inner[i];
  517. l = io(mp, in, Isread, a, n, off);
  518. poperror();
  519. if (l >= 0){
  520. res = l;
  521. break; /* read a good copy */
  522. }
  523. }
  524. } while (i == mp->ndevs && ++retry < 2);
  525. if (i == mp->ndevs) {
  526. /* no mirror had a good copy of the block */
  527. print("#k/%s: byte %,lld count %ld: CAN'T READ "
  528. "from mirror: %s\n", mp->name, off, n,
  529. (up && up->errstr? up->errstr: ""));
  530. error(Eio);
  531. } else if (retry > 0)
  532. print("#k/%s: byte %,lld count %ld: retry read OK "
  533. "from mirror: %s\n", mp->name, off, n,
  534. (up && up->errstr? up->errstr: ""));
  535. break;
  536. }
  537. return res;
  538. }
  539. static long
  540. mwrite(Chan *c, void *a, long n, vlong off)
  541. {
  542. int i, allbad, anybad, retry;
  543. long l, res;
  544. Fsdev *mp;
  545. Inner *in;
  546. if (c->qid.type & QTDIR)
  547. error(Eperm);
  548. if (c->qid.path == Qctl){
  549. mconfig(a, n);
  550. return n;
  551. }
  552. mp = path2dev(c->qid.path - Qfirst, 1);
  553. if (off >= mp->size)
  554. return 0;
  555. if (off + n > mp->size)
  556. n = mp->size - off;
  557. if (n == 0)
  558. return 0;
  559. res = n;
  560. switch(mp->type){
  561. case Fcat:
  562. res = catio(mp, Iswrite, a, n, off);
  563. break;
  564. case Finter:
  565. res = interio(mp, Iswrite, a, n, off);
  566. break;
  567. case Fpart:
  568. in = &mp->inner[0];
  569. res = io(mp, in, Iswrite, a, n, mp->start + off);
  570. if (res > n)
  571. res = n;
  572. break;
  573. case Fmirror:
  574. retry = 0;
  575. do {
  576. if (retry > 0) {
  577. print("#k/%s: retry %d write for byte %,lld "
  578. "count %ld: %s\n", mp->name, retry, off,
  579. n, (up && up->errstr? up->errstr: ""));
  580. tsleep(&up->sleep, return0, 0, 2000);
  581. }
  582. allbad = 1;
  583. anybad = 0;
  584. for (i = mp->ndevs - 1; i >= 0; i--){
  585. if (waserror()) {
  586. anybad = 1;
  587. continue;
  588. }
  589. in = &mp->inner[i];
  590. l = io(mp, in, Iswrite, a, n, off);
  591. poperror();
  592. if (res > l)
  593. res = l; /* shortest OK write */
  594. if (l == n)
  595. allbad = 0; /* wrote a good copy */
  596. else
  597. anybad = 1;
  598. }
  599. } while (anybad && ++retry < 2);
  600. if (allbad) {
  601. /* no mirror took a good copy of the block */
  602. print("#k/%s: byte %,lld count %ld: CAN'T WRITE "
  603. "to mirror: %s\n", mp->name, off, n,
  604. (up && up->errstr? up->errstr: ""));
  605. error(Eio);
  606. } else if (retry > 0)
  607. print("#k/%s: byte %,lld count %ld: retry wrote OK "
  608. "to mirror: %s\n", mp->name, off, n,
  609. (up && up->errstr? up->errstr: ""));
  610. break;
  611. }
  612. return res;
  613. }
  614. Dev fsdevtab = {
  615. 'k',
  616. "devfs",
  617. devreset,
  618. devinit,
  619. devshutdown,
  620. mattach,
  621. mwalk,
  622. mstat,
  623. mopen,
  624. devcreate,
  625. mclose,
  626. mread,
  627. devbread,
  628. mwrite,
  629. devbwrite,
  630. devremove,
  631. devwstat,
  632. devpower,
  633. devconfig,
  634. };