devfs.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734
  1. /*
  2. * File system devices.
  3. * Follows device config in Ken's file server.
  4. * Builds mirrors, concatenations, interleavings, and partitions
  5. * of devices out of other (inner) devices.
  6. */
  7. #include "u.h"
  8. #include "../port/lib.h"
  9. #include "mem.h"
  10. #include "dat.h"
  11. #include "fns.h"
  12. #include "io.h"
  13. #include "ureg.h"
  14. #include "../port/error.h"
  15. enum {
  16. Fmirror, /* mirror of others */
  17. Fcat, /* catenation of others */
  18. Finter, /* interleaving of others */
  19. Fpart, /* part of other */
  20. Fclear, /* start over */
  21. Blksize = 8*1024, /* for Finter only */
  22. Qtop = 0, /* top dir (contains "fs") */
  23. Qdir, /* actual dir */
  24. Qctl, /* ctl file */
  25. Qfirst, /* first fs file */
  26. Iswrite = 0,
  27. Isread,
  28. Optional = 0,
  29. Mustexist,
  30. /* tunable parameters */
  31. Maxconf = 4*1024, /* max length for config */
  32. Ndevs = 32, /* max. inner devs per command */
  33. Nfsdevs = 128, /* max. created devs, total */
  34. Maxretries = 3, /* max. retries of i/o errors */
  35. Retrypause = 5000, /* ms. to pause between retries */
  36. };
  37. #define Cfgstr "fsdev:\n"
  38. typedef struct Inner Inner;
  39. struct Inner
  40. {
  41. char *iname; /* inner device name */
  42. vlong isize; /* size of inner device */
  43. Chan *idev; /* inner device */
  44. };
  45. typedef struct Fsdev Fsdev;
  46. struct Fsdev
  47. {
  48. int type;
  49. char *name; /* name for this fsdev */
  50. vlong size; /* min(inner[X].isize) */
  51. vlong start; /* start address (for Fpart) */
  52. int ndevs; /* number of inner devices */
  53. Inner inner[Ndevs];
  54. };
  55. extern Dev fsdevtab; /* forward */
  56. /*
  57. * Once configured, a fsdev is never removed. The name of those
  58. * configured is never nil. We have no locks here.
  59. */
  60. static Fsdev fsdev[Nfsdevs]; /* internal representation of config */
  61. static char confstr[Maxconf]; /* textual configuration */
  62. static Qid tqid = {Qtop, 0, QTDIR};
  63. static Qid dqid = {Qdir, 0, QTDIR};
  64. static Qid cqid = {Qctl, 0, 0};
  65. static Cmdtab configs[] = {
  66. Fmirror,"mirror", 0,
  67. Fcat, "cat", 0,
  68. Finter, "inter", 0,
  69. Fpart, "part", 5,
  70. Fclear, "clear", 1,
  71. };
  72. static Fsdev*
  73. path2dev(int i, int mustexist)
  74. {
  75. if (i < 0 || i >= nelem(fsdev))
  76. error("bug: bad index in devfsdev");
  77. if (mustexist && fsdev[i].name == nil)
  78. error(Enonexist);
  79. if (fsdev[i].name == nil)
  80. return nil;
  81. else
  82. return &fsdev[i];
  83. }
  84. static Fsdev*
  85. devalloc(void)
  86. {
  87. int i;
  88. for (i = 0; i < nelem(fsdev); i++)
  89. if (fsdev[i].name == nil)
  90. break;
  91. if (i == nelem(fsdev))
  92. error(Enodev);
  93. return &fsdev[i];
  94. }
  95. static void
  96. setdsize(Fsdev* mp)
  97. {
  98. int i;
  99. long l;
  100. vlong inlen;
  101. uchar buf[128]; /* old DIRLEN plus a little should be plenty */
  102. Dir d;
  103. Inner *in;
  104. if (mp->type != Fpart){
  105. mp->start = 0;
  106. mp->size = 0;
  107. }
  108. for (i = 0; i < mp->ndevs; i++){
  109. in = &mp->inner[i];
  110. l = devtab[in->idev->type]->stat(in->idev, buf, sizeof buf);
  111. convM2D(buf, l, &d, nil);
  112. inlen = d.length;
  113. in->isize = inlen;
  114. switch(mp->type){
  115. case Finter:
  116. /* truncate to multiple of Blksize */
  117. inlen &= ~(Blksize-1);
  118. in->isize = inlen;
  119. /* fall through */
  120. case Fmirror:
  121. /* use size of smallest inner device */
  122. if (mp->size == 0 || mp->size > inlen)
  123. mp->size = inlen;
  124. break;
  125. case Fcat:
  126. mp->size += inlen;
  127. break;
  128. case Fpart:
  129. /* should raise errors here? */
  130. if (mp->start > inlen) {
  131. print("#k/%s: partition start truncated from "
  132. "%lld to %lld bytes\n", mp->name,
  133. mp->start, inlen);
  134. mp->start = inlen; /* empty partition */
  135. }
  136. /* truncate partition to keep it within inner device */
  137. if (inlen < mp->start + mp->size) {
  138. print("#k/%s: partition truncated from "
  139. "%lld to %lld bytes\n", mp->name,
  140. mp->size, inlen - mp->start);
  141. mp->size = inlen - mp->start;
  142. }
  143. break;
  144. }
  145. }
  146. if(mp->type == Finter)
  147. mp->size *= mp->ndevs;
  148. }
  149. static void
  150. mpshut(Fsdev *mp)
  151. {
  152. int i;
  153. char *nm;
  154. nm = mp->name;
  155. mp->name = nil; /* prevent others from using this. */
  156. if (nm)
  157. free(nm);
  158. for (i = 0; i < mp->ndevs; i++){
  159. if (mp->inner[i].idev != nil)
  160. cclose(mp->inner[i].idev);
  161. if (mp->inner[i].iname)
  162. free(mp->inner[i].iname);
  163. }
  164. memset(mp, 0, sizeof *mp);
  165. }
  166. /*
  167. * process a single line of configuration,
  168. * often of the form "cmd newname idev0 idev1".
  169. */
  170. static void
  171. mconfig(char* a, long n)
  172. {
  173. int i;
  174. vlong size, start;
  175. char *c, *oldc;
  176. Cmdbuf *cb;
  177. Cmdtab *ct;
  178. Fsdev *mp;
  179. Inner *inprv;
  180. static QLock lck;
  181. /* ignore comments & empty lines */
  182. if (*a == '\0' || *a == '#' || *a == '\n')
  183. return;
  184. size = 0;
  185. start = 0;
  186. /* insert header if config is empty */
  187. if (confstr[0] == 0)
  188. seprint(confstr, confstr + sizeof confstr, Cfgstr);
  189. mp = nil;
  190. cb = nil;
  191. oldc = confstr + strlen(confstr);
  192. qlock(&lck);
  193. if (waserror()){
  194. *oldc = 0;
  195. if (mp != nil)
  196. mpshut(mp);
  197. qunlock(&lck);
  198. if (cb)
  199. free(cb);
  200. nexterror();
  201. }
  202. /* append this command after parsing to confstr */
  203. cb = parsecmd(a, n);
  204. c = oldc;
  205. for (i = 0; i < cb->nf; i++)
  206. c = seprint(c, confstr + sizeof confstr - 1, "%s ", cb->f[i]);
  207. if (c > oldc) {
  208. c[-1] = '\n';
  209. c[0] = '\0';
  210. }
  211. /* lookup command, execute special cases */
  212. ct = lookupcmd(cb, configs, nelem(configs));
  213. cb->f++; /* skip command */
  214. cb->nf--;
  215. if (cb->nf < 0) /* nothing to see here, move along */
  216. ct->index = -1;
  217. switch (ct->index) {
  218. case Fpart:
  219. if (cb->nf < 4)
  220. error("too few fields in fs config");
  221. start = strtoll(cb->f[2], nil, 10);
  222. size = strtoll(cb->f[3], nil, 10);
  223. cb->nf -= 2;
  224. break;
  225. case Fclear:
  226. /* clear both internal & textual representations of config */
  227. for (mp = fsdev; mp < fsdev + nelem(fsdev); mp++)
  228. mpshut(mp);
  229. *confstr = '\0';
  230. /* FALL THROUGH */
  231. case -1:
  232. poperror();
  233. qunlock(&lck);
  234. free(cb);
  235. return;
  236. }
  237. if (cb->nf < 2)
  238. error("too few fields in fs config");
  239. else if (cb->nf - 1 > Ndevs)
  240. error("too many devices; fix #k: increase Ndevs");
  241. /* reject new name if already in use, validate old ones */
  242. for (i = 0; i < nelem(fsdev); i++)
  243. if (fsdev[i].name != nil && strcmp(fsdev[i].name, cb->f[0])==0)
  244. error(Eexist);
  245. for (i = 0; i < cb->nf; i++)
  246. validname(cb->f[i], (i != 0));
  247. /* populate new Fsdev with parsed command */
  248. mp = devalloc();
  249. mp->type = ct->index;
  250. if (mp->type == Fpart){
  251. mp->start = start;
  252. mp->size = size;
  253. }
  254. kstrdup(&mp->name, cb->f[0]);
  255. if (waserror()){
  256. mpshut(mp);
  257. nexterror();
  258. }
  259. for (i = 1; i < cb->nf; i++){
  260. inprv = &mp->inner[i-1];
  261. kstrdup(&inprv->iname, cb->f[i]);
  262. inprv->idev = namec(inprv->iname, Aopen, ORDWR, 0);
  263. if (inprv->idev == nil)
  264. error(Egreg);
  265. mp->ndevs++;
  266. }
  267. poperror();
  268. setdsize(mp);
  269. poperror();
  270. qunlock(&lck);
  271. free(cb);
  272. }
  273. static void
  274. rdconf(void)
  275. {
  276. int mustrd;
  277. char *c, *e, *p, *s;
  278. Chan *cc;
  279. static int configed;
  280. /* only read config file once */
  281. if (configed)
  282. return;
  283. configed = 1;
  284. /* identify the config file */
  285. s = getconf("fsconfig");
  286. if (s == nil){
  287. mustrd = 0;
  288. s = "/dev/sdC0/fscfg";
  289. } else
  290. mustrd = 1;
  291. /* read it */
  292. cc = nil;
  293. c = nil;
  294. if (waserror()){
  295. if (cc != nil)
  296. cclose(cc);
  297. if (c)
  298. free(c);
  299. if (!mustrd)
  300. return;
  301. nexterror();
  302. }
  303. cc = namec(s, Aopen, OREAD, 0);
  304. devtab[cc->type]->read(cc, confstr, sizeof confstr, 0);
  305. cclose(cc);
  306. cc = nil;
  307. /* validate, copy and erase config; mconfig will repopulate confstr */
  308. if (strncmp(confstr, Cfgstr, strlen(Cfgstr)) != 0)
  309. error("bad #k config, first line must be: 'fsdev:\\n'");
  310. kstrdup(&c, confstr + strlen(Cfgstr));
  311. memset(confstr, 0, sizeof confstr);
  312. /* process config copy one line at a time */
  313. for (p = c; p != nil && *p != '\0'; p = e){
  314. e = strchr(p, '\n');
  315. if (e == nil)
  316. e = p + strlen(p);
  317. else
  318. e++;
  319. mconfig(p, e - p);
  320. }
  321. USED(cc); /* until now, can be used in waserror clause */
  322. poperror();
  323. }
  324. static int
  325. mgen(Chan *c, char*, Dirtab*, int, int i, Dir *dp)
  326. {
  327. Qid qid;
  328. Fsdev *mp;
  329. if (c->qid.path == Qtop)
  330. switch(i){
  331. case DEVDOTDOT:
  332. devdir(c, tqid, "#k", 0, eve, DMDIR|0775, dp);
  333. return 1;
  334. case 0:
  335. devdir(c, dqid, "fs", 0, eve, DMDIR|0775, dp);
  336. return 1;
  337. default:
  338. return -1;
  339. }
  340. if (c->qid.path != Qdir)
  341. switch(i){
  342. case DEVDOTDOT:
  343. devdir(c, dqid, "fs", 0, eve, DMDIR|0775, dp);
  344. return 1;
  345. default:
  346. return -1;
  347. }
  348. switch(i){
  349. case DEVDOTDOT:
  350. devdir(c, tqid, "#k", 0, eve, DMDIR|0775, dp);
  351. return 1;
  352. case 0:
  353. devdir(c, cqid, "ctl", 0, eve, 0664, dp);
  354. return 1;
  355. }
  356. i--; /* for ctl */
  357. qid.path = Qfirst + i;
  358. qid.vers = 0;
  359. qid.type = 0;
  360. mp = path2dev(i, Optional);
  361. if (mp == nil)
  362. return -1;
  363. kstrcpy(up->genbuf, mp->name, sizeof(up->genbuf));
  364. devdir(c, qid, up->genbuf, mp->size, eve, 0664, dp);
  365. return 1;
  366. }
  367. static Chan*
  368. mattach(char *spec)
  369. {
  370. return devattach(fsdevtab.dc, spec);
  371. }
  372. static Walkqid*
  373. mwalk(Chan *c, Chan *nc, char **name, int nname)
  374. {
  375. rdconf();
  376. return devwalk(c, nc, name, nname, 0, 0, mgen);
  377. }
  378. static int
  379. mstat(Chan *c, uchar *db, int n)
  380. {
  381. Dir d;
  382. Fsdev *mp;
  383. int p;
  384. p = c->qid.path;
  385. memset(&d, 0, sizeof d);
  386. switch(p){
  387. case Qtop:
  388. devdir(c, tqid, "#k", 0, eve, DMDIR|0775, &d);
  389. break;
  390. case Qdir:
  391. devdir(c, dqid, "fs", 0, eve, DMDIR|0775, &d);
  392. break;
  393. case Qctl:
  394. devdir(c, cqid, "ctl", 0, eve, 0664, &d);
  395. break;
  396. default:
  397. mp = path2dev(p - Qfirst, Mustexist);
  398. devdir(c, c->qid, mp->name, mp->size, eve, 0664, &d);
  399. }
  400. n = convD2M(&d, db, n);
  401. if (n == 0)
  402. error(Ebadarg);
  403. return n;
  404. }
  405. static Chan*
  406. mopen(Chan *c, int omode)
  407. {
  408. // TODO: call devopen()?
  409. if((c->qid.type & QTDIR) && omode != OREAD)
  410. error(Eperm);
  411. // if (c->flag & COPEN)
  412. // return c;
  413. c->mode = openmode(omode & ~OTRUNC);
  414. c->flag |= COPEN;
  415. c->offset = 0;
  416. return c;
  417. }
  418. static void
  419. mclose(Chan*)
  420. {
  421. /* that's easy */
  422. }
  423. static long
  424. io(Fsdev *mp, Inner *in, int isread, void *a, long l, vlong off)
  425. {
  426. long wl;
  427. Chan *mc = in->idev;
  428. if (waserror()) {
  429. print("#k: %s: byte %,lld count %ld (of #k/%s): %s error: %s\n",
  430. in->iname, off, l, mp->name, (isread? "read": "write"),
  431. (up && up->errstr? up->errstr: ""));
  432. nexterror();
  433. }
  434. if (isread)
  435. wl = devtab[mc->type]->read(mc, a, l, off);
  436. else
  437. wl = devtab[mc->type]->write(mc, a, l, off);
  438. poperror();
  439. return wl;
  440. }
  441. /* NB: a transfer could span multiple inner devices */
  442. static long
  443. catio(Fsdev *mp, int isread, void *a, long n, vlong off)
  444. {
  445. int i;
  446. long l, res;
  447. Inner *in;
  448. // print("catio %d %p %ld %lld\n", isread, a, n, off);
  449. res = n;
  450. for (i = 0; n > 0 && i < mp->ndevs; i++){
  451. in = &mp->inner[i];
  452. if (off >= in->isize){
  453. off -= in->isize;
  454. continue; /* not there yet */
  455. }
  456. if (off + n > in->isize)
  457. l = in->isize - off;
  458. else
  459. l = n;
  460. // print("\tdev %d %p %ld %lld\n", i, a, l, off);
  461. if (io(mp, in, isread, a, l, off) != l)
  462. error(Eio);
  463. a = (char*)a + l;
  464. off = 0;
  465. n -= l;
  466. }
  467. // print("\tres %ld\n", res - n);
  468. return res - n;
  469. }
  470. static long
  471. interio(Fsdev *mp, int isread, void *a, long n, vlong off)
  472. {
  473. int i;
  474. long boff, res, l, wl, wsz;
  475. vlong woff, blk, mblk;
  476. blk = off / Blksize;
  477. boff = off % Blksize;
  478. wsz = Blksize - boff;
  479. res = n;
  480. while(n > 0){
  481. mblk = blk / mp->ndevs;
  482. i = blk % mp->ndevs;
  483. woff = mblk*Blksize + boff;
  484. if (n > wsz)
  485. l = wsz;
  486. else
  487. l = n;
  488. wl = io(mp, &mp->inner[i], isread, a, l, woff);
  489. if (wl != l)
  490. error(Eio);
  491. blk++;
  492. boff = 0;
  493. wsz = Blksize;
  494. a = (char*)a + l;
  495. n -= l;
  496. }
  497. return res;
  498. }
  499. static long
  500. mread(Chan *c, void *a, long n, vlong off)
  501. {
  502. int i, retry;
  503. long l, res;
  504. Fsdev *mp;
  505. if (c->qid.type & QTDIR)
  506. return devdirread(c, a, n, 0, 0, mgen);
  507. if (c->qid.path == Qctl) {
  508. i = strlen(Cfgstr);
  509. if (strlen(confstr) >= i) /* skip header if present */
  510. return readstr((long)off, a, n, confstr + i);
  511. else
  512. return readstr((long)off, a, n, confstr);
  513. }
  514. i = c->qid.path - Qfirst;
  515. mp = path2dev(i, Mustexist);
  516. if (off >= mp->size)
  517. return 0;
  518. if (off + n > mp->size)
  519. n = mp->size - off;
  520. if (n == 0)
  521. return 0;
  522. res = -1;
  523. switch(mp->type){
  524. case Fcat:
  525. res = catio(mp, Isread, a, n, off);
  526. break;
  527. case Finter:
  528. res = interio(mp, Isread, a, n, off);
  529. break;
  530. case Fpart:
  531. res = io(mp, &mp->inner[0], Isread, a, n, mp->start + off);
  532. break;
  533. case Fmirror:
  534. retry = 0;
  535. do {
  536. if (retry > 0) {
  537. print("#k/%s: retry %d read for byte %,lld "
  538. "count %ld: %s\n", mp->name, retry, off,
  539. n, (up && up->errstr? up->errstr: ""));
  540. /*
  541. * pause before retrying in case it's due to
  542. * a transient bus or controller problem.
  543. */
  544. tsleep(&up->sleep, return0, 0, Retrypause);
  545. }
  546. for (i = 0; i < mp->ndevs; i++){
  547. if (waserror())
  548. continue;
  549. l = io(mp, &mp->inner[i], Isread, a, n, off);
  550. poperror();
  551. if (l >= 0){
  552. res = l;
  553. break; /* read a good copy */
  554. }
  555. }
  556. } while (i == mp->ndevs && ++retry <= Maxretries);
  557. if (retry > Maxretries) {
  558. /* no mirror had a good copy of the block */
  559. print("#k/%s: byte %,lld count %ld: CAN'T READ "
  560. "from mirror: %s\n", mp->name, off, n,
  561. (up && up->errstr? up->errstr: ""));
  562. error(Eio);
  563. } else if (retry > 0)
  564. print("#k/%s: byte %,lld count %ld: retry read OK "
  565. "from mirror: %s\n", mp->name, off, n,
  566. (up && up->errstr? up->errstr: ""));
  567. break;
  568. }
  569. return res;
  570. }
  571. static long
  572. mwrite(Chan *c, void *a, long n, vlong off)
  573. {
  574. int i, allbad, anybad, retry;
  575. long l, res;
  576. Fsdev *mp;
  577. if (c->qid.type & QTDIR)
  578. error(Eperm);
  579. if (c->qid.path == Qctl){
  580. mconfig(a, n);
  581. return n;
  582. }
  583. mp = path2dev(c->qid.path - Qfirst, Mustexist);
  584. if (off >= mp->size)
  585. return 0;
  586. if (off + n > mp->size)
  587. n = mp->size - off;
  588. if (n == 0)
  589. return 0;
  590. res = n;
  591. switch(mp->type){
  592. case Fcat:
  593. res = catio(mp, Iswrite, a, n, off);
  594. break;
  595. case Finter:
  596. res = interio(mp, Iswrite, a, n, off);
  597. break;
  598. case Fpart:
  599. res = io(mp, &mp->inner[0], Iswrite, a, n, mp->start + off);
  600. if (res != n)
  601. error(Eio);
  602. break;
  603. case Fmirror:
  604. retry = 0;
  605. do {
  606. if (retry > 0) {
  607. print("#k/%s: retry %d write for byte %,lld "
  608. "count %ld: %s\n", mp->name, retry, off,
  609. n, (up && up->errstr? up->errstr: ""));
  610. /*
  611. * pause before retrying in case it's due to
  612. * a transient bus or controller problem.
  613. */
  614. tsleep(&up->sleep, return0, 0, Retrypause);
  615. }
  616. allbad = 1;
  617. anybad = 0;
  618. for (i = mp->ndevs - 1; i >= 0; i--){
  619. if (waserror()) {
  620. anybad = 1;
  621. continue;
  622. }
  623. l = io(mp, &mp->inner[i], Iswrite, a, n, off);
  624. poperror();
  625. if (l == n)
  626. allbad = 0; /* wrote a good copy */
  627. else
  628. anybad = 1;
  629. }
  630. } while (anybad && ++retry <= Maxretries);
  631. if (allbad) {
  632. /* no mirror took a good copy of the block */
  633. print("#k/%s: byte %,lld count %ld: CAN'T WRITE "
  634. "to mirror: %s\n", mp->name, off, n,
  635. (up && up->errstr? up->errstr: ""));
  636. error(Eio);
  637. } else if (retry > 0)
  638. print("#k/%s: byte %,lld count %ld: retry wrote OK "
  639. "to mirror: %s\n", mp->name, off, n,
  640. (up && up->errstr? up->errstr: ""));
  641. break;
  642. }
  643. return res;
  644. }
  645. Dev fsdevtab = {
  646. 'k',
  647. "devfs",
  648. devreset,
  649. devinit,
  650. devshutdown,
  651. mattach,
  652. mwalk,
  653. mstat,
  654. mopen,
  655. devcreate,
  656. mclose,
  657. mread,
  658. devbread,
  659. mwrite,
  660. devbwrite,
  661. devremove,
  662. devwstat,
  663. devpower,
  664. devconfig,
  665. };