fault.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include "u.h"
  10. #include "../port/lib.h"
  11. #include "mem.h"
  12. #include "dat.h"
  13. #include "fns.h"
  14. #include "../port/error.h"
  15. #undef DBG
  16. #define DBG if(0)print
  17. char *faulttypes[] = {
  18. [FT_WRITE] = "write",
  19. [FT_READ] = "read",
  20. [FT_EXEC] = "exec"
  21. };
  22. /*
  23. * Fault calls fixfault which ends up calling newpage, which
  24. * might fail to allocate a page for the right color. So, we
  25. * might enter a loop and retry forever.
  26. * We first try with the desired color, and then with any
  27. * other one, if we failed for some time.
  28. */
  29. int
  30. fault(uintptr_t addr, uintptr_t pc, int ftype)
  31. {
  32. Proc *up = externup();
  33. Segment *s;
  34. char *sps;
  35. int i, color;
  36. if(up->nlocks)
  37. print("%s fault nlocks %d addr %p pc %p\n",
  38. faulttypes[ftype],
  39. up->nlocks,
  40. addr, pc);
  41. sps = up->psstate;
  42. up->psstate = "Fault";
  43. machp()->pfault++;
  44. spllo();
  45. for(i = 0;; i++) {
  46. s = seg(up, addr, 1); /* leaves s->lk qlocked if seg != nil */
  47. //print("%s fault seg for %p is %p base %p top %p\n", faulttypes[ftype], addr, s, s->base, s->top);
  48. if(s == nil)
  49. goto fail;
  50. if(ftype == FT_READ && (s->type&SG_READ) == 0)
  51. goto fail;
  52. if(ftype == FT_WRITE && (s->type&SG_WRITE) == 0)
  53. goto fail;
  54. if(ftype == FT_EXEC && (s->type&SG_EXEC) == 0)
  55. goto fail;
  56. color = s->color;
  57. if(i > 3)
  58. color = -1;
  59. if(fixfault(s, addr, ftype, 1, color) == 0)
  60. break;
  61. /*
  62. * See the comment in newpage that describes
  63. * how to get here.
  64. */
  65. if(i > 0 && (i%1000) == 0)
  66. print("fault: tried %d times\n", i);
  67. }
  68. splhi();
  69. up->psstate = sps;
  70. return 0;
  71. fail:
  72. if(s != nil){
  73. qunlock(&s->lk);
  74. print("%s fault fail %s(%c%c%c) pid %d addr 0x%p pc 0x%p\n",
  75. faulttypes[ftype],
  76. segtypes[s->type & SG_TYPE],
  77. (s->type & SG_READ) != 0 ? 'r' : '-',
  78. (s->type & SG_WRITE) != 0 ? 'w' : '-',
  79. (s->type & SG_EXEC) != 0 ? 'x' : '-',
  80. up->pid, addr, pc);
  81. } else {
  82. print("%s fault fail, no segment, pid %d addr 0x%p pc 0x%p\n",
  83. faulttypes[ftype],
  84. up->pid, addr, pc);
  85. }
  86. splhi();
  87. up->psstate = sps;
  88. return -1;
  89. }
  90. static void
  91. faulterror(char *s, Chan *c, int freemem)
  92. {
  93. Proc *up = externup();
  94. char buf[ERRMAX];
  95. if(c && c->path){
  96. snprint(buf, sizeof buf, "%s accessing %s: %s", s, c->path->s, up->errstr);
  97. s = buf;
  98. }
  99. if(up->nerrlab) {
  100. postnote(up, 1, s, NDebug);
  101. error(s);
  102. }
  103. pexit(s, freemem);
  104. }
  105. int
  106. fixfault(Segment *s, uintptr_t addr, int ftype, int dommuput, int color)
  107. {
  108. Proc *up = externup();
  109. int stype;
  110. int ref;
  111. Pte **p, *etp;
  112. uintptr_t soff;
  113. uintmem pgsz;
  114. uint mmuattr;
  115. Page **pg, *lkp, *new;
  116. Page *(*fn)(Segment*, uintptr_t);
  117. pgsz = sys->pgsz[s->pgszi];
  118. addr &= ~(pgsz-1);
  119. soff = addr-s->base;
  120. p = &s->map[soff/PTEMAPMEM];
  121. if(*p == 0)
  122. *p = ptealloc(s);
  123. etp = *p;
  124. pg = &etp->pages[(soff&(PTEMAPMEM-1))/pgsz];
  125. stype = s->type&SG_TYPE;
  126. if(pg < etp->first)
  127. etp->first = pg;
  128. if(pg > etp->last)
  129. etp->last = pg;
  130. mmuattr = 0;
  131. switch(stype) {
  132. default:
  133. panic("fault");
  134. break;
  135. case SG_BSS:
  136. case SG_SHARED: /* Zero fill on demand */
  137. case SG_STACK:
  138. if(*pg == 0) {
  139. new = newpage(1, &s, addr, pgsz, color);
  140. if(s == 0)
  141. return -1;
  142. *pg = new;
  143. }
  144. goto common;
  145. case SG_MMAP:
  146. print("MMAP fault: req is %p, \n", up->req);
  147. if(pagedout(*pg) && up->req) {
  148. print("Fault in mmap'ed page\n");
  149. // hazardous.
  150. char f[34];
  151. snprint(f, sizeof(f), "W%016x%016x", addr, pgsz);
  152. if (qwrite(up->req, f, sizeof(f)) != sizeof(f))
  153. error("can't write mmap request");
  154. /* read in answer here. */
  155. error("not reading answer yet");
  156. }
  157. error("No mmap support yet");
  158. goto common;
  159. case SG_LOAD:
  160. case SG_DATA:
  161. case SG_TEXT: /* Demand load */
  162. if(pagedout(*pg))
  163. pio(s, addr, soff, pg, color);
  164. common: /* Demand load/pagein/copy on write */
  165. if(ftype != FT_WRITE){
  166. /* never copy a non-writeable seg */
  167. if((s->type & SG_WRITE) == 0){
  168. mmuattr = PTERONLY|PTEVALID;
  169. if((s->type & SG_EXEC) == 0)
  170. mmuattr |= PTENOEXEC;
  171. (*pg)->modref = PG_REF;
  172. break;
  173. }
  174. /* delay copy if we are the only user (copy on write when it happens) */
  175. if(conf.copymode == 0 && s->r.ref == 1) {
  176. mmuattr = PTERONLY|PTEVALID;
  177. if((s->type & SG_EXEC) == 0)
  178. mmuattr |= PTENOEXEC;
  179. (*pg)->modref |= PG_REF;
  180. break;
  181. }
  182. }
  183. if((s->type & SG_WRITE) == 0)
  184. error("fixfault: write on read-only\n");
  185. if((s->type & SG_TYPE) != SG_SHARED){
  186. lkp = *pg;
  187. lock(&lkp->l);
  188. ref = lkp->ref;
  189. if(ref > 1) { /* page is shared but segment is not: copy for write */
  190. int pgref = lkp->ref;
  191. unlock(&lkp->l);
  192. DBG("fixfault %d: copy on %s, %s(%c%c%c) 0x%p segref %d pgref %d\n",
  193. up->pid,
  194. faulttypes[ftype],
  195. segtypes[stype],
  196. (s->type & SG_READ) != 0 ? 'r' : '-',
  197. (s->type & SG_WRITE) != 0 ? 'w' : '-',
  198. (s->type & SG_EXEC) != 0 ? 'x' : '-',
  199. addr,
  200. s->r.ref,
  201. pgref
  202. );
  203. // No need to zero here as it is copied
  204. // over.
  205. new = newpage(0, &s, addr, pgsz, color);
  206. if(s == 0)
  207. return -1;
  208. *pg = new;
  209. copypage(lkp, *pg);
  210. putpage(lkp);
  211. } else { /* write: don't dirty the image cache */
  212. if(lkp->image != nil)
  213. duppage(lkp);
  214. unlock(&lkp->l);
  215. }
  216. }
  217. mmuattr = PTEVALID|PTEWRITE;
  218. if((s->type & SG_EXEC) == 0)
  219. mmuattr |= PTENOEXEC;
  220. (*pg)->modref = PG_MOD|PG_REF;
  221. break;
  222. case SG_PHYSICAL:
  223. if(*pg == 0) {
  224. fn = s->pseg->pgalloc;
  225. if(fn)
  226. *pg = (*fn)(s, addr);
  227. else {
  228. new = smalloc(sizeof(Page));
  229. new->va = addr;
  230. new->pa = s->pseg->pa+(addr-s->base);
  231. new->ref = 1;
  232. new->pgszi = s->pseg->pgszi;
  233. *pg = new;
  234. }
  235. }
  236. mmuattr = PTEVALID;
  237. if((s->pseg->attr & SG_WRITE) != 0)
  238. mmuattr |= PTEWRITE;
  239. if((s->pseg->attr & SG_CACHED) == 0)
  240. mmuattr |= PTEUNCACHED;
  241. if((s->type & SG_EXEC) == 0)
  242. mmuattr |= PTENOEXEC;
  243. (*pg)->modref = PG_MOD|PG_REF;
  244. break;
  245. }
  246. qunlock(&s->lk);
  247. if(dommuput){
  248. assert(segppn(s, (*pg)->pa) == (*pg)->pa);
  249. mmuput(addr, *pg, mmuattr);
  250. }
  251. return 0;
  252. }
  253. void
  254. pio(Segment *s, uintptr_t addr, uint32_t soff, Page **p, int color)
  255. {
  256. Proc *up = externup();
  257. Page *newpg;
  258. KMap *k;
  259. Chan *c;
  260. int n, ask;
  261. uintmem pgsz;
  262. char *kaddr;
  263. uint32_t daddr, doff = 0;
  264. Page *loadrec;
  265. loadrec = *p;
  266. daddr = ask = 0;
  267. c = nil;
  268. pgsz = sys->pgsz[s->pgszi];
  269. if(loadrec == nil) { /* from a text/data image */
  270. daddr = s->ldseg.pg0fileoff + soff;
  271. doff = s->ldseg.pg0off;
  272. if(soff < doff+s->ldseg.filesz){
  273. ask = doff+s->ldseg.filesz - soff;
  274. if(ask > pgsz)
  275. ask = pgsz;
  276. if(soff > 0)
  277. doff = 0;
  278. newpg = lookpage(s->image, daddr+doff);
  279. if(newpg != nil) {
  280. *p = newpg;
  281. return;
  282. }
  283. } else {
  284. // zero fill
  285. ask = 0;
  286. doff = 0;
  287. }
  288. c = s->image->c;
  289. } else {
  290. panic("no swap");
  291. }
  292. qunlock(&s->lk);
  293. // For plan 9 a.out format the amount of data
  294. // we read covered the page; the first parameter
  295. // of newpage here was 0 -- "don't zero".
  296. // It is now 1 -- "do zero" because ELF only covers
  297. // part of the page.
  298. newpg = newpage(1, nil, addr, pgsz, color);
  299. if(ask > doff){
  300. k = kmap(newpg);
  301. kaddr = (char*)VA(k);
  302. while(waserror()) {
  303. if(strcmp(up->errstr, Eintr) == 0)
  304. continue;
  305. kunmap(k);
  306. putpage(newpg);
  307. faulterror(Eioload, c, 0);
  308. }
  309. DBG(
  310. "pio %d %s(%c%c%c) addr+doff 0x%p daddr+doff 0x%x ask-doff %d\n",
  311. up->pid, segtypes[s->type & SG_TYPE],
  312. (s->type & SG_READ) != 0 ? 'r' : '-',
  313. (s->type & SG_WRITE) != 0 ? 'w' : '-',
  314. (s->type & SG_EXEC) != 0 ? 'x' : '-',
  315. addr+doff, daddr+doff, ask-doff
  316. );
  317. n = c->dev->read(c, kaddr+doff, ask-doff, daddr+doff);
  318. if(n != ask-doff)
  319. faulterror(Eioload, c, 0);
  320. poperror();
  321. kunmap(k);
  322. }
  323. qlock(&s->lk);
  324. if(loadrec == nil) { /* This is demand load */
  325. /*
  326. * race, another proc may have gotten here first while
  327. * s->lk was unlocked
  328. */
  329. if(*p == nil) {
  330. // put it to page cache if there was i/o for it
  331. if(ask > doff){
  332. newpg->daddr = daddr+doff;
  333. cachepage(newpg, s->image);
  334. }
  335. *p = newpg;
  336. } else {
  337. print("racing on demand load\n");
  338. putpage(newpg);
  339. }
  340. } else {
  341. panic("no swap");
  342. }
  343. if(s->flushme)
  344. memset((*p)->cachectl, PG_TXTFLUSH, sizeof((*p)->cachectl));
  345. }
  346. /*
  347. * Called only in a system call
  348. */
  349. int
  350. okaddr(uintptr_t addr, int32_t len, int write)
  351. {
  352. Proc *up = externup();
  353. Segment *s;
  354. if(len >= 0) {
  355. for(;;) {
  356. s = seg(up, addr, 0);
  357. if(s == 0 || (write && (s->type&SG_WRITE) == 0))
  358. break;
  359. if(addr+len > s->top) {
  360. len -= s->top - addr;
  361. addr = s->top;
  362. continue;
  363. }
  364. return 1;
  365. }
  366. }
  367. return 0;
  368. }
  369. void*
  370. validaddr(void* addr, int32_t len, int write)
  371. {
  372. if(!okaddr(PTR2UINT(addr), len, write)){
  373. pprint("suicide: invalid address %#p/%ld in sys call pc=%#p\n",
  374. addr, len, userpc(nil));
  375. pexit("Suicide", 0);
  376. }
  377. return UINT2PTR(addr);
  378. }
  379. /*
  380. * &s[0] is known to be a valid address.
  381. * Assume 2M pages, so it works for both 2M and 1G pages.
  382. * Note this won't work for 4*KiB pages!
  383. */
  384. void*
  385. vmemchr(void *s, int c, int n)
  386. {
  387. int m;
  388. uintptr_t a;
  389. char *t;
  390. a = PTR2UINT(s);
  391. while(ROUNDUP(a, BIGPGSZ) != ROUNDUP(a+n-1, BIGPGSZ)){
  392. /* spans pages; handle this page */
  393. m = BIGPGSZ - (a & (BIGPGSZ-1));
  394. // t = memchr(UINT2PTR(a), c, m);
  395. for(t = UINT2PTR(a); m > 0; m--, t++)
  396. if (*t == c)
  397. break;
  398. if(*t == c)
  399. return t;
  400. a += m;
  401. n -= m;
  402. /* N.B. You're either going to find the character
  403. * or validaddr will error() and bounce you way back
  404. * up the call chain. That's why there's no worry about
  405. * returning NULL.
  406. */
  407. if((a & KZERO) != KZERO)
  408. validaddr(UINT2PTR(a), 1, 0);
  409. }
  410. /* fits in one page */
  411. for(t = UINT2PTR(a); n > 0; n--, t++)
  412. if (*t == c)
  413. break;
  414. if(*t != c)
  415. error("Bogus string");
  416. return t;
  417. }
  418. Segment*
  419. seg(Proc *p, uintptr_t addr, int dolock)
  420. {
  421. Segment **s, **et, *n;
  422. et = &p->seg[NSEG];
  423. for(s = p->seg; s < et; s++) {
  424. n = *s;
  425. if(n == 0)
  426. continue;
  427. if(addr >= n->base && addr < n->top) {
  428. if(dolock == 0)
  429. return n;
  430. qlock(&n->lk);
  431. if(addr >= n->base && addr < n->top)
  432. return n;
  433. qunlock(&n->lk);
  434. }
  435. }
  436. return 0;
  437. }