fault.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include "u.h"
  10. #include "../port/lib.h"
  11. #include "mem.h"
  12. #include "dat.h"
  13. #include "fns.h"
  14. #include "../port/error.h"
  15. #undef DBG
  16. #define DBG \
  17. if(0) \
  18. print
  19. char *faulttypes[] = {
  20. [FT_WRITE] = "write",
  21. [FT_READ] = "read",
  22. [FT_EXEC] = "exec"};
  23. /*
  24. * Fault calls fixfault which ends up calling newpage, which
  25. * might fail to allocate a page for the right color. So, we
  26. * might enter a loop and retry forever.
  27. * We first try with the desired color, and then with any
  28. * other one, if we failed for some time.
  29. */
  30. int
  31. fault(usize addr, usize pc, int ftype)
  32. {
  33. Proc *up = externup();
  34. Segment *s;
  35. char *sps;
  36. int i, color;
  37. if(up->nlocks)
  38. print("%s fault nlocks %d addr %p pc %p\n",
  39. faulttypes[ftype],
  40. up->nlocks,
  41. addr, pc);
  42. sps = up->psstate;
  43. up->psstate = "Fault";
  44. machp()->pfault++;
  45. spllo();
  46. for(i = 0;; i++){
  47. s = seg(up, addr, 1); /* leaves s->lk qlocked if seg != nil */
  48. if(s == nil){
  49. //iprint("fault seg is nil\n");
  50. goto fail;
  51. }
  52. //iprint("%s fault seg for %p is %p base %p top %p\n", faulttypes[ftype], addr, s, s->base, s->top);
  53. if(ftype == FT_READ && (s->type & SG_READ) == 0)
  54. goto fail;
  55. if(ftype == FT_WRITE && (s->type & SG_WRITE) == 0)
  56. goto fail;
  57. if(ftype == FT_EXEC && (s->type & SG_EXEC) == 0)
  58. goto fail;
  59. color = s->color;
  60. if(i > 3)
  61. color = -1;
  62. if(fixfault(s, addr, ftype, 1, color) == 0)
  63. break;
  64. /*
  65. * See the comment in newpage that describes
  66. * how to get here.
  67. */
  68. if(i > 0 && (i % 1000) == 0)
  69. print("fault: tried %d times\n", i);
  70. }
  71. splhi();
  72. up->psstate = sps;
  73. return 0;
  74. fail:
  75. if(s != nil){
  76. qunlock(&s->lk);
  77. print("%s fault fail %s(%c%c%c) pid %d addr 0x%p pc 0x%p\n",
  78. faulttypes[ftype],
  79. segtypes[s->type & SG_TYPE],
  80. (s->type & SG_READ) != 0 ? 'r' : '-',
  81. (s->type & SG_WRITE) != 0 ? 'w' : '-',
  82. (s->type & SG_EXEC) != 0 ? 'x' : '-',
  83. up->pid, addr, pc);
  84. } else {
  85. print("%s fault fail, no segment, pid %d addr 0x%p pc 0x%p\n",
  86. faulttypes[ftype],
  87. up->pid, addr, pc);
  88. }
  89. splhi();
  90. up->psstate = sps;
  91. return -1;
  92. }
  93. static void
  94. faulterror(char *s, Chan *c, int freemem)
  95. {
  96. Proc *up = externup();
  97. char buf[ERRMAX];
  98. if(c && c->path){
  99. snprint(buf, sizeof buf, "%s accessing %s: %s", s, c->path->s, up->errstr);
  100. s = buf;
  101. }
  102. if(up->nerrlab){
  103. postnote(up, 1, s, NDebug);
  104. error(s);
  105. }
  106. pexit(s, freemem);
  107. }
  108. int
  109. fixfault(Segment *s, usize addr, int ftype, int dommuput, int color)
  110. {
  111. Proc *up = externup();
  112. int stype;
  113. int ref;
  114. Pte **p, *etp;
  115. usize soff;
  116. u64 pgsz;
  117. u32 mmuattr;
  118. Page **pg, *lkp, *new;
  119. Page *(*fn)(Segment *, usize);
  120. pgsz = sys->pgsz[s->pgszi];
  121. addr &= ~(pgsz - 1);
  122. soff = addr - s->base;
  123. p = &s->map[soff / PTEMAPMEM];
  124. if(*p == 0)
  125. *p = ptealloc(s);
  126. etp = *p;
  127. pg = &etp->pages[(soff & (PTEMAPMEM - 1)) / pgsz];
  128. stype = s->type & SG_TYPE;
  129. if(pg < etp->first)
  130. etp->first = pg;
  131. if(pg > etp->last)
  132. etp->last = pg;
  133. mmuattr = 0;
  134. switch(stype){
  135. default:
  136. panic("fault");
  137. break;
  138. case SG_BSS:
  139. case SG_SHARED: /* Zero fill on demand */
  140. case SG_STACK:
  141. if(*pg == 0){
  142. new = newpage(1, &s, addr, pgsz, color);
  143. if(s == 0)
  144. return -1;
  145. *pg = new;
  146. }
  147. goto common;
  148. case SG_MMAP:
  149. print("MMAP fault: req is %p, \n", up->req);
  150. if(pagedout(*pg) && up->req){
  151. print("Fault in mmap'ed page\n");
  152. // hazardous.
  153. char f[34];
  154. snprint(f, sizeof(f), "W%016x%016x", addr, pgsz);
  155. if(qwrite(up->req, f, sizeof(f)) != sizeof(f))
  156. error("can't write mmap request");
  157. /* read in answer here. */
  158. error("not reading answer yet");
  159. }
  160. error("No mmap support yet");
  161. goto common;
  162. case SG_LOAD:
  163. case SG_DATA:
  164. case SG_TEXT: /* Demand load */
  165. if(pagedout(*pg))
  166. pio(s, addr, soff, pg, color);
  167. common: /* Demand load/pagein/copy on write */
  168. if(ftype != FT_WRITE){
  169. /* never copy a non-writeable seg */
  170. if((s->type & SG_WRITE) == 0){
  171. mmuattr = PTERONLY | PTEVALID;
  172. if((s->type & SG_EXEC) == 0)
  173. mmuattr |= PTENOEXEC;
  174. (*pg)->modref = PG_REF;
  175. break;
  176. }
  177. /* delay copy if we are the only user (copy on write when it happens) */
  178. if(conf.copymode == 0 && s->r.ref == 1){
  179. mmuattr = PTERONLY | PTEVALID;
  180. if((s->type & SG_EXEC) == 0)
  181. mmuattr |= PTENOEXEC;
  182. (*pg)->modref |= PG_REF;
  183. break;
  184. }
  185. }
  186. if((s->type & SG_WRITE) == 0)
  187. error("fixfault: write on read-only\n");
  188. if((s->type & SG_TYPE) != SG_SHARED){
  189. lkp = *pg;
  190. lock(&lkp->l);
  191. ref = lkp->ref;
  192. if(ref > 1) { /* page is shared but segment is not: copy for write */
  193. int pgref = lkp->ref;
  194. unlock(&lkp->l);
  195. DBG("fixfault %d: copy on %s, %s(%c%c%c) 0x%p segref %d pgref %d\n",
  196. up->pid,
  197. faulttypes[ftype],
  198. segtypes[stype],
  199. (s->type & SG_READ) != 0 ? 'r' : '-',
  200. (s->type & SG_WRITE) != 0 ? 'w' : '-',
  201. (s->type & SG_EXEC) != 0 ? 'x' : '-',
  202. addr,
  203. s->r.ref,
  204. pgref);
  205. // No need to zero here as it is copied
  206. // over.
  207. new = newpage(0, &s, addr, pgsz, color);
  208. if(s == 0)
  209. return -1;
  210. *pg = new;
  211. copypage(lkp, *pg);
  212. putpage(lkp);
  213. } else { /* write: don't dirty the image cache */
  214. if(lkp->image != nil)
  215. duppage(lkp);
  216. unlock(&lkp->l);
  217. }
  218. }
  219. mmuattr = PTEVALID | PTEWRITE;
  220. if((s->type & SG_EXEC) == 0)
  221. mmuattr |= PTENOEXEC;
  222. (*pg)->modref = PG_MOD | PG_REF;
  223. break;
  224. case SG_PHYSICAL:
  225. if(*pg == 0){
  226. fn = s->pseg->pgalloc;
  227. if(fn)
  228. *pg = (*fn)(s, addr);
  229. else {
  230. new = smalloc(sizeof(Page));
  231. new->va = addr;
  232. new->pa = s->pseg->pa + (addr - s->base);
  233. new->ref = 1;
  234. new->pgszi = s->pseg->pgszi;
  235. *pg = new;
  236. }
  237. }
  238. mmuattr = PTEVALID;
  239. if((s->pseg->attr & SG_WRITE) != 0)
  240. mmuattr |= PTEWRITE;
  241. if((s->pseg->attr & SG_CACHED) == 0)
  242. mmuattr |= PTEUNCACHED;
  243. if((s->type & SG_EXEC) == 0)
  244. mmuattr |= PTENOEXEC;
  245. (*pg)->modref = PG_MOD | PG_REF;
  246. break;
  247. }
  248. qunlock(&s->lk);
  249. if(dommuput){
  250. assert(segppn(s, (*pg)->pa) == (*pg)->pa);
  251. mmuput(addr, *pg, mmuattr);
  252. }
  253. return 0;
  254. }
  255. void
  256. pio(Segment *s, usize addr, u32 soff, Page **p, int color)
  257. {
  258. Proc *up = externup();
  259. Page *newpg;
  260. KMap *k;
  261. Chan *c;
  262. int n, ask;
  263. u64 pgsz;
  264. char *kaddr;
  265. u32 daddr, doff = 0;
  266. Page *loadrec;
  267. loadrec = *p;
  268. daddr = ask = 0;
  269. c = nil;
  270. pgsz = sys->pgsz[s->pgszi];
  271. if(loadrec == nil) { /* from a text/data image */
  272. daddr = s->ldseg.pg0fileoff + soff;
  273. doff = s->ldseg.pg0off;
  274. if(soff < doff + s->ldseg.filesz){
  275. ask = doff + s->ldseg.filesz - soff;
  276. if(ask > pgsz)
  277. ask = pgsz;
  278. if(soff > 0)
  279. doff = 0;
  280. newpg = lookpage(s->image, daddr + doff);
  281. if(newpg != nil){
  282. *p = newpg;
  283. return;
  284. }
  285. } else {
  286. // zero fill
  287. ask = 0;
  288. doff = 0;
  289. }
  290. c = s->image->c;
  291. } else {
  292. panic("no swap");
  293. }
  294. qunlock(&s->lk);
  295. // For plan 9 a.out format the amount of data
  296. // we read covered the page; the first parameter
  297. // of newpage here was 0 -- "don't zero".
  298. // It is now 1 -- "do zero" because ELF only covers
  299. // part of the page.
  300. newpg = newpage(1, nil, addr, pgsz, color);
  301. if(ask > doff){
  302. k = kmap(newpg);
  303. kaddr = (char *)VA(k);
  304. while(waserror()){
  305. if(strcmp(up->errstr, Eintr) == 0)
  306. continue;
  307. kunmap(k);
  308. putpage(newpg);
  309. faulterror(Eioload, c, 0);
  310. }
  311. DBG(
  312. "pio %d %s(%c%c%c) addr+doff 0x%p daddr+doff 0x%x ask-doff %d\n",
  313. up->pid, segtypes[s->type & SG_TYPE],
  314. (s->type & SG_READ) != 0 ? 'r' : '-',
  315. (s->type & SG_WRITE) != 0 ? 'w' : '-',
  316. (s->type & SG_EXEC) != 0 ? 'x' : '-',
  317. addr + doff, daddr + doff, ask - doff);
  318. n = c->dev->read(c, kaddr + doff, ask - doff, daddr + doff);
  319. if(n != ask - doff)
  320. faulterror(Eioload, c, 0);
  321. poperror();
  322. kunmap(k);
  323. }
  324. qlock(&s->lk);
  325. if(loadrec == nil) { /* This is demand load */
  326. /*
  327. * race, another proc may have gotten here first while
  328. * s->lk was unlocked
  329. */
  330. if(*p == nil){
  331. // put it to page cache if there was i/o for it
  332. if(ask > doff){
  333. newpg->daddr = daddr + doff;
  334. cachepage(newpg, s->image);
  335. }
  336. *p = newpg;
  337. } else {
  338. print("racing on demand load\n");
  339. putpage(newpg);
  340. }
  341. } else {
  342. panic("no swap");
  343. }
  344. if(s->flushme)
  345. memset((*p)->cachectl, PG_TXTFLUSH, sizeof((*p)->cachectl));
  346. }
  347. /*
  348. * Called only in a system call
  349. */
  350. int
  351. okaddr(usize addr, i32 len, int write)
  352. {
  353. Proc *up = externup();
  354. Segment *s;
  355. if(len >= 0){
  356. for(;;){
  357. s = seg(up, addr, 0);
  358. if(s == 0 || (write && (s->type & SG_WRITE) == 0))
  359. break;
  360. if(addr + len > s->top){
  361. len -= s->top - addr;
  362. addr = s->top;
  363. continue;
  364. }
  365. return 1;
  366. }
  367. }
  368. return 0;
  369. }
  370. void *
  371. validaddr(void *addr, i32 len, int write)
  372. {
  373. if(!okaddr(PTR2UINT(addr), len, write)){
  374. pprint("suicide: invalid address %#p/%ld in sys call pc=%#p\n",
  375. addr, len, userpc(nil));
  376. pexit("Suicide", 0);
  377. }
  378. return UINT2PTR(addr);
  379. }
  380. /*
  381. * &s[0] is known to be a valid address.
  382. * Assume 2M pages, so it works for both 2M and 1G pages.
  383. * Note this won't work for 4*KiB pages!
  384. */
  385. void *
  386. vmemchr(const void *s, int c, u32 n)
  387. {
  388. int m;
  389. usize a;
  390. void *t;
  391. a = PTR2UINT(s);
  392. while(ROUNDUP(a, BIGPGSZ) != ROUNDUP(a + n - 1, BIGPGSZ)){
  393. /* spans pages; handle this page */
  394. m = BIGPGSZ - (a & (BIGPGSZ - 1));
  395. t = memchr(UINT2PTR(a), c, m);
  396. if(t)
  397. return t;
  398. a += m;
  399. n -= m;
  400. /* N.B. You're either going to find the character
  401. * or validaddr will error() and bounce you way back
  402. * up the call chain. That's why there's no worry about
  403. * returning NULL.
  404. */
  405. if((a & KZERO) != KZERO)
  406. validaddr(UINT2PTR(a), 1, 0);
  407. }
  408. /* fits in one page */
  409. t = memchr(UINT2PTR(a), c, n);
  410. if(t == nil)
  411. error("Bogus string");
  412. return t;
  413. }
  414. Segment *
  415. seg(Proc *p, usize addr, int dolock)
  416. {
  417. Segment **s, **et, *n;
  418. et = &p->seg[NSEG];
  419. for(s = p->seg; s < et; s++){
  420. n = *s;
  421. if(n == 0)
  422. continue;
  423. if(addr >= n->base && addr < n->top){
  424. if(dolock == 0)
  425. return n;
  426. qlock(&n->lk);
  427. if(addr >= n->base && addr < n->top)
  428. return n;
  429. qunlock(&n->lk);
  430. }
  431. }
  432. return 0;
  433. }