mmu.c 23 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036
  1. /*
  2. * Memory mappings. Life was easier when 2G of memory was enough.
  3. *
  4. * The kernel memory starts at KZERO, with the text loaded at KZERO+1M
  5. * (9load sits under 1M during the load). The memory from KZERO to the
  6. * top of memory is mapped 1-1 with physical memory, starting at physical
  7. * address 0. All kernel memory and data structures (i.e., the entries stored
  8. * into conf.mem) must sit in this physical range: if KZERO is at 0xF0000000,
  9. * then the kernel can only have 256MB of memory for itself.
  10. *
  11. * The 256M below KZERO comprises three parts. The lowest 4M is the
  12. * virtual page table, a virtual address representation of the current
  13. * page table tree. The second 4M is used for temporary per-process
  14. * mappings managed by kmap and kunmap. The remaining 248M is used
  15. * for global (shared by all procs and all processors) device memory
  16. * mappings and managed by vmap and vunmap. The total amount (256M)
  17. * could probably be reduced somewhat if desired. The largest device
  18. * mapping is that of the video card, and even though modern video cards
  19. * have embarrassing amounts of memory, the video drivers only use one
  20. * frame buffer worth (at most 16M). Each is described in more detail below.
  21. *
  22. * The VPT is a 4M frame constructed by inserting the pdb into itself.
  23. * This short-circuits one level of the page tables, with the result that
  24. * the contents of second-level page tables can be accessed at VPT.
  25. * We use the VPT to edit the page tables (see mmu) after inserting them
  26. * into the page directory. It is a convenient mechanism for mapping what
  27. * might be otherwise-inaccessible pages. The idea was borrowed from
  28. * the Exokernel.
  29. *
  30. * The VPT doesn't solve all our problems, because we still need to
  31. * prepare page directories before we can install them. For that, we
  32. * use tmpmap/tmpunmap, which map a single page at TMPADDR.
  33. */
  34. #include "u.h"
  35. #include "../port/lib.h"
  36. #include "mem.h"
  37. #include "dat.h"
  38. #include "fns.h"
  39. #include "io.h"
  40. /*
  41. * Simple segment descriptors with no translation.
  42. */
  43. #define DATASEGM(p) { 0xFFFF, SEGG|SEGB|(0xF<<16)|SEGP|SEGPL(p)|SEGDATA|SEGW }
  44. #define EXECSEGM(p) { 0xFFFF, SEGG|SEGD|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }
  45. #define EXEC16SEGM(p) { 0xFFFF, SEGG|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }
  46. #define TSSSEGM(b,p) { ((b)<<16)|sizeof(Tss),\
  47. ((b)&0xFF000000)|(((b)>>16)&0xFF)|SEGTSS|SEGPL(p)|SEGP }
  48. Segdesc gdt[NGDT] =
  49. {
  50. [NULLSEG] { 0, 0}, /* null descriptor */
  51. [KDSEG] DATASEGM(0), /* kernel data/stack */
  52. [KESEG] EXECSEGM(0), /* kernel code */
  53. [UDSEG] DATASEGM(3), /* user data/stack */
  54. [UESEG] EXECSEGM(3), /* user code */
  55. [TSSSEG] TSSSEGM(0,0), /* tss segment */
  56. [KESEG16] EXEC16SEGM(0), /* kernel code 16-bit */
  57. };
  58. static int didmmuinit;
  59. static void taskswitch(ulong, ulong);
  60. static void memglobal(void);
  61. #define vpt ((ulong*)VPT)
  62. #define VPTX(va) (((ulong)(va))>>12)
  63. #define vpd (vpt+VPTX(VPT))
  64. void
  65. mmuinit0(void)
  66. {
  67. memmove(m->gdt, gdt, sizeof gdt);
  68. }
  69. void
  70. mmuinit(void)
  71. {
  72. ulong x, *p;
  73. ushort ptr[3];
  74. didmmuinit = 1;
  75. if(0) print("vpt=%#.8ux vpd=%#.8lux kmap=%#.8ux\n",
  76. VPT, (ulong)vpd, KMAP);
  77. memglobal();
  78. m->pdb[PDX(VPT)] = PADDR(m->pdb)|PTEWRITE|PTEVALID;
  79. m->tss = malloc(sizeof(Tss));
  80. memset(m->tss, 0, sizeof(Tss));
  81. m->tss->iomap = 0xDFFF<<16;
  82. /*
  83. * We used to keep the GDT in the Mach structure, but it
  84. * turns out that that slows down access to the rest of the
  85. * page. Since the Mach structure is accessed quite often,
  86. * it pays off anywhere from a factor of 1.25 to 2 on real
  87. * hardware to separate them (the AMDs are more sensitive
  88. * than Intels in this regard). Under VMware it pays off
  89. * a factor of about 10 to 100.
  90. */
  91. memmove(m->gdt, gdt, sizeof gdt);
  92. x = (ulong)m->tss;
  93. m->gdt[TSSSEG].d0 = (x<<16)|sizeof(Tss);
  94. m->gdt[TSSSEG].d1 = (x&0xFF000000)|((x>>16)&0xFF)|SEGTSS|SEGPL(0)|SEGP;
  95. ptr[0] = sizeof(gdt)-1;
  96. x = (ulong)m->gdt;
  97. ptr[1] = x & 0xFFFF;
  98. ptr[2] = (x>>16) & 0xFFFF;
  99. lgdt(ptr);
  100. ptr[0] = sizeof(Segdesc)*256-1;
  101. x = IDTADDR;
  102. ptr[1] = x & 0xFFFF;
  103. ptr[2] = (x>>16) & 0xFFFF;
  104. lidt(ptr);
  105. /* make kernel text unwritable */
  106. for(x = KTZERO; x < (ulong)etext; x += BY2PG){
  107. p = mmuwalk(m->pdb, x, 2, 0);
  108. if(p == nil)
  109. panic("mmuinit");
  110. *p &= ~PTEWRITE;
  111. }
  112. taskswitch(PADDR(m->pdb), (ulong)m + BY2PG);
  113. ltr(TSSSEL);
  114. }
  115. /*
  116. * On processors that support it, we set the PTEGLOBAL bit in
  117. * page table and page directory entries that map kernel memory.
  118. * Doing this tells the processor not to bother flushing them
  119. * from the TLB when doing the TLB flush associated with a
  120. * context switch (write to CR3). Since kernel memory mappings
  121. * are never removed, this is safe. (If we ever remove kernel memory
  122. * mappings, we can do a full flush by turning off the PGE bit in CR4,
  123. * writing to CR3, and then turning the PGE bit back on.)
  124. *
  125. * See also mmukmap below.
  126. *
  127. * Processor support for the PTEGLOBAL bit is enabled in devarch.c.
  128. */
  129. static void
  130. memglobal(void)
  131. {
  132. int i, j;
  133. ulong *pde, *pte;
  134. /* only need to do this once, on bootstrap processor */
  135. if(m->machno != 0)
  136. return;
  137. if(!m->havepge)
  138. return;
  139. pde = m->pdb;
  140. for(i=PDX(KZERO); i<1024; i++){
  141. if(pde[i] & PTEVALID){
  142. pde[i] |= PTEGLOBAL;
  143. if(!(pde[i] & PTESIZE)){
  144. pte = KADDR(pde[i]&~(BY2PG-1));
  145. for(j=0; j<1024; j++)
  146. if(pte[j] & PTEVALID)
  147. pte[j] |= PTEGLOBAL;
  148. }
  149. }
  150. }
  151. }
  152. /*
  153. * Flush all the user-space and device-mapping mmu info
  154. * for this process, because something has been deleted.
  155. * It will be paged back in on demand.
  156. */
  157. void
  158. flushmmu(void)
  159. {
  160. int s;
  161. s = splhi();
  162. up->newtlb = 1;
  163. mmuswitch(up);
  164. splx(s);
  165. }
  166. /*
  167. * Flush a single page mapping from the tlb.
  168. */
  169. void
  170. flushpg(ulong va)
  171. {
  172. if(X86FAMILY(m->cpuidax) >= 4)
  173. invlpg(va);
  174. else
  175. putcr3(m->tss->cr3);
  176. }
  177. /*
  178. * Allocate a new page for a page directory.
  179. * We keep a small cache of pre-initialized
  180. * page directories in each mach.
  181. */
  182. static Page*
  183. mmupdballoc(void)
  184. {
  185. int s;
  186. Page *page;
  187. ulong *pdb;
  188. s = splhi();
  189. if(m->pdbpool == 0){
  190. spllo();
  191. page = newpage(0, 0, 0);
  192. page->va = (ulong)vpd;
  193. splhi();
  194. pdb = tmpmap(page);
  195. memmove(pdb, m->pdb, BY2PG);
  196. pdb[PDX(VPT)] = page->pa|PTEWRITE|PTEVALID; /* set up VPT */
  197. tmpunmap(pdb);
  198. }else{
  199. page = m->pdbpool;
  200. m->pdbpool = page->next;
  201. m->pdbcnt--;
  202. }
  203. splx(s);
  204. return page;
  205. }
  206. static void
  207. mmupdbfree(Proc *proc, Page *p)
  208. {
  209. if(islo())
  210. panic("mmupdbfree: islo");
  211. if(m->pdbcnt >= 10){
  212. p->next = proc->mmufree;
  213. proc->mmufree = p;
  214. }else{
  215. p->next = m->pdbpool;
  216. m->pdbpool = p;
  217. }
  218. }
  219. /*
  220. * A user-space memory segment has been deleted, or the
  221. * process is exiting. Clear all the pde entries for user-space
  222. * memory mappings and device mappings. Any entries that
  223. * are needed will be paged back in as necessary.
  224. */
  225. static void
  226. mmuptefree(Proc* proc)
  227. {
  228. int s;
  229. ulong *pdb;
  230. Page **last, *page;
  231. if(proc->mmupdb == nil || proc->mmuused == nil)
  232. return;
  233. s = splhi();
  234. pdb = tmpmap(proc->mmupdb);
  235. last = &proc->mmuused;
  236. for(page = *last; page; page = page->next){
  237. pdb[page->daddr] = 0;
  238. last = &page->next;
  239. }
  240. tmpunmap(pdb);
  241. splx(s);
  242. *last = proc->mmufree;
  243. proc->mmufree = proc->mmuused;
  244. proc->mmuused = 0;
  245. }
  246. static void
  247. taskswitch(ulong pdb, ulong stack)
  248. {
  249. Tss *tss;
  250. tss = m->tss;
  251. tss->ss0 = KDSEL;
  252. tss->esp0 = stack;
  253. tss->ss1 = KDSEL;
  254. tss->esp1 = stack;
  255. tss->ss2 = KDSEL;
  256. tss->esp2 = stack;
  257. tss->cr3 = pdb;
  258. putcr3(pdb);
  259. }
  260. void
  261. mmuswitch(Proc* proc)
  262. {
  263. ulong *pdb;
  264. if(proc->newtlb){
  265. mmuptefree(proc);
  266. proc->newtlb = 0;
  267. }
  268. if(proc->mmupdb){
  269. pdb = tmpmap(proc->mmupdb);
  270. pdb[PDX(MACHADDR)] = m->pdb[PDX(MACHADDR)];
  271. tmpunmap(pdb);
  272. taskswitch(proc->mmupdb->pa, (ulong)(proc->kstack+KSTACK));
  273. }else
  274. taskswitch(PADDR(m->pdb), (ulong)(proc->kstack+KSTACK));
  275. }
  276. /*
  277. * Release any pages allocated for a page directory base or page-tables
  278. * for this process:
  279. * switch to the prototype pdb for this processor (m->pdb);
  280. * call mmuptefree() to place all pages used for page-tables (proc->mmuused)
  281. * onto the process' free list (proc->mmufree). This has the side-effect of
  282. * cleaning any user entries in the pdb (proc->mmupdb);
  283. * if there's a pdb put it in the cache of pre-initialised pdb's
  284. * for this processor (m->pdbpool) or on the process' free list;
  285. * finally, place any pages freed back into the free pool (palloc).
  286. * This routine is only called from schedinit() with palloc locked.
  287. */
  288. void
  289. mmurelease(Proc* proc)
  290. {
  291. int s;
  292. Page *page, *next;
  293. ulong *pdb;
  294. taskswitch(PADDR(m->pdb), (ulong)m + BY2PG);
  295. if(proc->kmaptable){
  296. if(proc->mmupdb == nil)
  297. panic("mmurelease: no mmupdb");
  298. if(--proc->kmaptable->ref)
  299. panic("mmurelease: kmap ref %d\n", proc->kmaptable->ref);
  300. if(up->nkmap)
  301. panic("mmurelease: nkmap %d\n", up->nkmap);
  302. /*
  303. * remove kmaptable from pdb before putting pdb up for reuse.
  304. */
  305. s = splhi();
  306. pdb = tmpmap(proc->mmupdb);
  307. if(PPN(pdb[PDX(KMAP)]) != proc->kmaptable->pa)
  308. panic("mmurelease: bad kmap pde %#.8lux kmap %#.8lux",
  309. pdb[PDX(KMAP)], proc->kmaptable->pa);
  310. pdb[PDX(KMAP)] = 0;
  311. tmpunmap(pdb);
  312. splx(s);
  313. /*
  314. * move kmaptable to free list.
  315. */
  316. pagechainhead(proc->kmaptable);
  317. proc->kmaptable = 0;
  318. }
  319. if(proc->mmupdb){
  320. mmuptefree(proc);
  321. mmupdbfree(proc, proc->mmupdb);
  322. proc->mmupdb = 0;
  323. }
  324. for(page = proc->mmufree; page; page = next){
  325. next = page->next;
  326. if(--page->ref)
  327. panic("mmurelease: page->ref %d\n", page->ref);
  328. pagechainhead(page);
  329. }
  330. if(proc->mmufree && palloc.r.p)
  331. wakeup(&palloc.r);
  332. proc->mmufree = 0;
  333. }
  334. /*
  335. * Allocate and install pdb for the current process.
  336. */
  337. static void
  338. upallocpdb(void)
  339. {
  340. int s;
  341. ulong *pdb;
  342. Page *page;
  343. page = mmupdballoc();
  344. s = splhi();
  345. pdb = tmpmap(page);
  346. pdb[PDX(MACHADDR)] = m->pdb[PDX(MACHADDR)];
  347. tmpunmap(pdb);
  348. up->mmupdb = page;
  349. mmuflushtlb(up->mmupdb->pa);
  350. splx(s);
  351. }
  352. /*
  353. * Update the mmu in response to a user fault. pa may have PTEWRITE set.
  354. */
  355. void
  356. putmmu(ulong va, ulong pa, Page*)
  357. {
  358. int old, s;
  359. Page *page;
  360. if(up->mmupdb == nil)
  361. upallocpdb();
  362. s = splhi();
  363. if(!(vpd[PDX(va)]&PTEVALID)){
  364. if(up->mmufree == 0)
  365. page = newpage(0, 0, 0);
  366. else{
  367. page = up->mmufree;
  368. up->mmufree = page->next;
  369. }
  370. vpd[PDX(va)] = PPN(page->pa)|PTEUSER|PTEWRITE|PTEVALID;
  371. /* page is now mapped into the VPT - clear it */
  372. memset((void*)(VPT+PDX(va)*BY2PG), 0, BY2PG);
  373. page->daddr = PDX(va);
  374. page->next = up->mmuused;
  375. up->mmuused = page;
  376. }
  377. old = vpt[VPTX(va)];
  378. vpt[VPTX(va)] = pa|PTEUSER|PTEVALID;
  379. if(old&PTEVALID)
  380. flushpg(va);
  381. splx(s);
  382. }
  383. /*
  384. * Double-check the user MMU.
  385. * Error checking only.
  386. */
  387. void
  388. checkmmu(ulong va, ulong pa)
  389. {
  390. if(up->mmupdb == 0)
  391. return;
  392. if(!(vpd[PDX(va)]&PTEVALID) || !(vpt[VPTX(va)]&PTEVALID))
  393. return;
  394. if(PPN(vpt[VPTX(va)]) != pa)
  395. print("%ld %s: va=0x%08lux pa=0x%08lux pte=0x%08lux\n",
  396. up->pid, up->text,
  397. va, pa, vpt[VPTX(va)]);
  398. }
  399. /*
  400. * Walk the page-table pointed to by pdb and return a pointer
  401. * to the entry for virtual address va at the requested level.
  402. * If the entry is invalid and create isn't requested then bail
  403. * out early. Otherwise, for the 2nd level walk, allocate a new
  404. * page-table page and register it in the 1st level. This is used
  405. * only to edit kernel mappings, which use pages from kernel memory,
  406. * so it's okay to use KADDR to look at the tables.
  407. */
  408. ulong*
  409. mmuwalk(ulong* pdb, ulong va, int level, int create)
  410. {
  411. ulong *table;
  412. void *map;
  413. table = &pdb[PDX(va)];
  414. if(!(*table & PTEVALID) && create == 0)
  415. return 0;
  416. switch(level){
  417. default:
  418. return 0;
  419. case 1:
  420. return table;
  421. case 2:
  422. if(*table & PTESIZE)
  423. panic("mmuwalk2: va %luX entry %luX\n", va, *table);
  424. if(!(*table & PTEVALID)){
  425. /*
  426. * Have to call low-level allocator from
  427. * memory.c if we haven't set up the xalloc
  428. * tables yet.
  429. */
  430. if(didmmuinit)
  431. map = xspanalloc(BY2PG, BY2PG, 0);
  432. else
  433. map = rampage();
  434. if(map == nil)
  435. panic("mmuwalk xspanalloc failed");
  436. *table = PADDR(map)|PTEWRITE|PTEVALID;
  437. }
  438. table = KADDR(PPN(*table));
  439. return &table[PTX(va)];
  440. }
  441. }
  442. /*
  443. * Device mappings are shared by all procs and processors and
  444. * live in the virtual range VMAP to VMAP+VMAPSIZE. The master
  445. * copy of the mappings is stored in mach0->pdb, and they are
  446. * paged in from there as necessary by vmapsync during faults.
  447. */
  448. static Lock vmaplock;
  449. static int findhole(ulong *a, int n, int count);
  450. static ulong vmapalloc(ulong size);
  451. static void pdbunmap(ulong*, ulong, int);
  452. /*
  453. * Add a device mapping to the vmap range.
  454. */
  455. void*
  456. vmap(ulong pa, int size)
  457. {
  458. int osize;
  459. ulong o, va;
  460. /*
  461. * might be asking for less than a page.
  462. */
  463. osize = size;
  464. o = pa & (BY2PG-1);
  465. pa -= o;
  466. size += o;
  467. size = ROUND(size, BY2PG);
  468. if(pa == 0){
  469. print("vmap pa=0 pc=%#.8lux\n", getcallerpc(&pa));
  470. return nil;
  471. }
  472. ilock(&vmaplock);
  473. if((va = vmapalloc(size)) == 0
  474. || pdbmap(MACHP(0)->pdb, pa|PTEUNCACHED|PTEWRITE, va, size) < 0){
  475. iunlock(&vmaplock);
  476. return 0;
  477. }
  478. iunlock(&vmaplock);
  479. /* avoid trap on local processor
  480. for(i=0; i<size; i+=4*MB)
  481. vmapsync(va+i);
  482. */
  483. USED(osize);
  484. // print(" vmap %#.8lux %d => %#.8lux\n", pa+o, osize, va+o);
  485. return (void*)(va + o);
  486. }
  487. static int
  488. findhole(ulong *a, int n, int count)
  489. {
  490. int have, i;
  491. have = 0;
  492. for(i=0; i<n; i++){
  493. if(a[i] == 0)
  494. have++;
  495. else
  496. have = 0;
  497. if(have >= count)
  498. return i+1 - have;
  499. }
  500. return -1;
  501. }
  502. /*
  503. * Look for free space in the vmap.
  504. */
  505. static ulong
  506. vmapalloc(ulong size)
  507. {
  508. int i, n, o;
  509. ulong *vpdb;
  510. int vpdbsize;
  511. vpdb = &MACHP(0)->pdb[PDX(VMAP)];
  512. vpdbsize = VMAPSIZE/(4*MB);
  513. if(size >= 4*MB){
  514. n = (size+4*MB-1) / (4*MB);
  515. if((o = findhole(vpdb, vpdbsize, n)) != -1)
  516. return VMAP + o*4*MB;
  517. return VMAP + o;
  518. }
  519. n = (size+BY2PG-1) / BY2PG;
  520. for(i=0; i<vpdbsize; i++)
  521. if((vpdb[i]&PTEVALID) && !(vpdb[i]&PTESIZE))
  522. if((o = findhole(KADDR(PPN(vpdb[i])), WD2PG, n)) != -1)
  523. return VMAP + i*4*MB + o*BY2PG;
  524. if((o = findhole(vpdb, vpdbsize, 1)) != -1)
  525. return VMAP + o*4*MB;
  526. /*
  527. * could span page directory entries, but not worth the trouble.
  528. * not going to be very much contention.
  529. */
  530. return 0;
  531. }
  532. /*
  533. * Remove a device mapping from the vmap range.
  534. * Since pdbunmap does not remove page tables, just entries,
  535. * the call need not be interlocked with vmap.
  536. */
  537. void
  538. vunmap(void *v, int size)
  539. {
  540. int i;
  541. ulong va, o;
  542. Mach *nm;
  543. Proc *p;
  544. /*
  545. * might not be aligned
  546. */
  547. va = (ulong)v;
  548. o = va&(BY2PG-1);
  549. va -= o;
  550. size += o;
  551. size = ROUND(size, BY2PG);
  552. if(size < 0 || va < VMAP || va+size > VMAP+VMAPSIZE)
  553. panic("vunmap va=%#.8lux size=%#x pc=%#.8lux\n",
  554. va, size, getcallerpc(&va));
  555. pdbunmap(MACHP(0)->pdb, va, size);
  556. /*
  557. * Flush mapping from all the tlbs and copied pdbs.
  558. * This can be (and is) slow, since it is called only rarely.
  559. */
  560. for(i=0; i<conf.nproc; i++){
  561. p = proctab(i);
  562. if(p->state == Dead)
  563. continue;
  564. if(p != up)
  565. p->newtlb = 1;
  566. }
  567. for(i=0; i<conf.nmach; i++){
  568. nm = MACHP(i);
  569. if(nm != m)
  570. nm->flushmmu = 1;
  571. }
  572. flushmmu();
  573. for(i=0; i<conf.nmach; i++){
  574. nm = MACHP(i);
  575. if(nm != m)
  576. while((active.machs&(1<<nm->machno)) && nm->flushmmu)
  577. ;
  578. }
  579. }
  580. /*
  581. * Add kernel mappings for pa -> va for a section of size bytes.
  582. */
  583. int
  584. pdbmap(ulong *pdb, ulong pa, ulong va, int size)
  585. {
  586. int pse;
  587. ulong pae, pgsz, *pte, *table;
  588. ulong flag;
  589. flag = pa&0xFFF;
  590. pa &= ~0xFFF;
  591. if((MACHP(0)->cpuiddx & 0x08) && (getcr4() & 0x10))
  592. pse = 1;
  593. else
  594. pse = 0;
  595. pae = pa + size;
  596. while(pa < pae){
  597. table = &pdb[PDX(va)];
  598. if((*table&PTEVALID) && (*table&PTESIZE))
  599. panic("vmap: va=%#.8lux pa=%#.8lux pde=%#.8lux",
  600. va, pa, *table);
  601. /*
  602. * Check if it can be mapped using a 4MB page:
  603. * va, pa aligned and size >= 4MB and processor can do it.
  604. */
  605. if(pse && pa%(4*MB) == 0 && va%(4*MB) == 0 && (pae >= pa+4*MB)){
  606. *table = pa|PTESIZE|flag|PTEVALID;
  607. pgsz = 4*MB;
  608. }else{
  609. pte = mmuwalk(pdb, va, 2, 1);
  610. if(*pte&PTEVALID)
  611. panic("vmap: va=%#.8lux pa=%#.8lux pte=%#.8lux",
  612. va, pa, *pte);
  613. *pte = pa|flag|PTEVALID;
  614. pgsz = BY2PG;
  615. }
  616. pa += pgsz;
  617. va += pgsz;
  618. }
  619. return 0;
  620. }
  621. /*
  622. * Remove mappings. Must already exist, for sanity.
  623. * Only used for kernel mappings, so okay to use KADDR.
  624. */
  625. static void
  626. pdbunmap(ulong *pdb, ulong va, int size)
  627. {
  628. ulong vae;
  629. ulong *table;
  630. vae = va+size;
  631. while(va < vae){
  632. table = &pdb[PDX(va)];
  633. if(!(*table & PTEVALID)){
  634. panic("vunmap: not mapped");
  635. /*
  636. va = (va+4*MB-1) & ~(4*MB-1);
  637. continue;
  638. */
  639. }
  640. if(*table & PTESIZE){
  641. *table = 0;
  642. va = (va+4*MB-1) & ~(4*MB-1);
  643. continue;
  644. }
  645. table = KADDR(PPN(*table));
  646. if(!(table[PTX(va)] & PTEVALID))
  647. panic("vunmap: not mapped");
  648. table[PTX(va)] = 0;
  649. va += BY2PG;
  650. }
  651. }
  652. /*
  653. * Handle a fault by bringing vmap up to date.
  654. * Only copy pdb entries and they never go away,
  655. * so no locking needed.
  656. */
  657. int
  658. vmapsync(ulong va)
  659. {
  660. ulong entry, *table;
  661. if(va < VMAP || va >= VMAP+VMAPSIZE)
  662. return 0;
  663. entry = MACHP(0)->pdb[PDX(va)];
  664. if(!(entry&PTEVALID))
  665. return 0;
  666. if(!(entry&PTESIZE)){
  667. /* make sure entry will help the fault */
  668. table = KADDR(PPN(entry));
  669. if(!(table[PTX(va)]&PTEVALID))
  670. return 0;
  671. }
  672. vpd[PDX(va)] = entry;
  673. /*
  674. * TLB doesn't cache negative results, so no flush needed.
  675. */
  676. return 1;
  677. }
  678. /*
  679. * KMap is used to map individual pages into virtual memory.
  680. * It is rare to have more than a few KMaps at a time (in the
  681. * absence of interrupts, only two at a time are ever used,
  682. * but interrupts can stack). The mappings are local to a process,
  683. * so we can use the same range of virtual address space for
  684. * all processes without any coordination.
  685. */
  686. #define kpt (vpt+VPTX(KMAP))
  687. #define NKPT (KMAPSIZE/BY2PG)
  688. KMap*
  689. kmap(Page *page)
  690. {
  691. int i, o, s;
  692. if(up == nil)
  693. panic("kmap: up=0 pc=%#.8lux", getcallerpc(&page));
  694. if(up->mmupdb == nil)
  695. upallocpdb();
  696. up->nkmap++;
  697. if(!(vpd[PDX(KMAP)]&PTEVALID)){
  698. /* allocate page directory */
  699. if(KMAPSIZE > BY2XPG)
  700. panic("bad kmapsize");
  701. if(up->kmaptable != nil)
  702. panic("kmaptable");
  703. s = spllo();
  704. up->kmaptable = newpage(0, 0, 0);
  705. splx(s);
  706. vpd[PDX(KMAP)] = up->kmaptable->pa|PTEWRITE|PTEVALID;
  707. memset(kpt, 0, BY2PG);
  708. /* might as well finish the job */
  709. kpt[0] = page->pa|PTEWRITE|PTEVALID;
  710. up->lastkmap = 0;
  711. return (KMap*)KMAP;
  712. }
  713. if(up->kmaptable == nil)
  714. panic("no kmaptable");
  715. o = up->lastkmap+1;
  716. for(i=0; i<NKPT; i++){
  717. if(kpt[(i+o)%NKPT] == 0){
  718. o = (i+o)%NKPT;
  719. kpt[o] = page->pa|PTEWRITE|PTEVALID;
  720. up->lastkmap = o;
  721. return (KMap*)(KMAP+o*BY2PG);
  722. }
  723. }
  724. panic("out of kmap");
  725. return nil;
  726. }
  727. void
  728. kunmap(KMap *k)
  729. {
  730. ulong va;
  731. va = (ulong)k;
  732. if(up->mmupdb == nil || !(vpd[PDX(KMAP)]&PTEVALID))
  733. panic("kunmap: no kmaps");
  734. if(va < KMAP || va >= KMAP+KMAPSIZE)
  735. panic("kunmap: bad address %#.8lux pc=%#.8lux", va, getcallerpc(&k));
  736. if(!(vpt[VPTX(va)]&PTEVALID))
  737. panic("kunmap: not mapped %#.8lux pc=%#.8lux", va, getcallerpc(&k));
  738. up->nkmap--;
  739. vpt[VPTX(va)] = 0;
  740. flushpg(va);
  741. }
  742. /*
  743. * Temporary one-page mapping used to edit page directories.
  744. *
  745. * The fasttmp #define controls whether the code optimizes
  746. * the case where the page is already mapped in the physical
  747. * memory window.
  748. */
  749. #define fasttmp 1
  750. void*
  751. tmpmap(Page *p)
  752. {
  753. ulong i;
  754. ulong *entry;
  755. if(islo())
  756. panic("tmpaddr: islo");
  757. if(fasttmp && p->pa < -KZERO)
  758. return KADDR(p->pa);
  759. /*
  760. * PDX(TMPADDR) == PDX(MACHADDR), so this
  761. * entry is private to the processor and shared
  762. * between up->mmupdb (if any) and m->pdb.
  763. */
  764. entry = &vpt[VPTX(TMPADDR)];
  765. if(!(*entry&PTEVALID)){
  766. for(i=KZERO; i<=CPU0MACH; i+=BY2PG)
  767. print("%.8lux: *%.8lux=%.8lux (vpt=%.8lux index=%.8lux)\n", i, &vpt[VPTX(i)], vpt[VPTX(i)], vpt, VPTX(i));
  768. panic("tmpmap: no entry");
  769. }
  770. if(PPN(*entry) != PPN(TMPADDR-KZERO))
  771. panic("tmpmap: already mapped entry=%#.8lux", *entry);
  772. *entry = p->pa|PTEWRITE|PTEVALID;
  773. flushpg(TMPADDR);
  774. return (void*)TMPADDR;
  775. }
  776. void
  777. tmpunmap(void *v)
  778. {
  779. ulong *entry;
  780. if(islo())
  781. panic("tmpaddr: islo");
  782. if(fasttmp && (ulong)v >= KZERO && v != (void*)TMPADDR)
  783. return;
  784. if(v != (void*)TMPADDR)
  785. panic("tmpunmap: bad address");
  786. entry = &vpt[VPTX(TMPADDR)];
  787. if(!(*entry&PTEVALID) || PPN(*entry) == PPN(PADDR(TMPADDR)))
  788. panic("tmpmap: not mapped entry=%#.8lux", *entry);
  789. *entry = PPN(TMPADDR-KZERO)|PTEWRITE|PTEVALID;
  790. flushpg(TMPADDR);
  791. }
  792. /*
  793. * These could go back to being macros once the kernel is debugged,
  794. * but the extra checking is nice to have.
  795. */
  796. void*
  797. kaddr(ulong pa)
  798. {
  799. if(pa > (ulong)-KZERO)
  800. panic("kaddr: pa=%#.8lux", pa);
  801. return (void*)(pa+KZERO);
  802. }
  803. ulong
  804. paddr(void *v)
  805. {
  806. ulong va;
  807. va = (ulong)v;
  808. if(va < KZERO)
  809. panic("paddr: va=%#.8lux pc=%#.8lux", va, getcallerpc(&va));
  810. return va-KZERO;
  811. }
  812. /*
  813. * More debugging.
  814. */
  815. void
  816. countpagerefs(ulong *ref, int print)
  817. {
  818. int i, n;
  819. Mach *mm;
  820. Page *pg;
  821. Proc *p;
  822. n = 0;
  823. for(i=0; i<conf.nproc; i++){
  824. p = proctab(i);
  825. if(p->mmupdb){
  826. if(print){
  827. if(ref[pagenumber(p->mmupdb)])
  828. iprint("page %#.8lux is proc %d (pid %lud) pdb\n",
  829. p->mmupdb->pa, i, p->pid);
  830. continue;
  831. }
  832. if(ref[pagenumber(p->mmupdb)]++ == 0)
  833. n++;
  834. else
  835. iprint("page %#.8lux is proc %d (pid %lud) pdb but has other refs!\n",
  836. p->mmupdb->pa, i, p->pid);
  837. }
  838. if(p->kmaptable){
  839. if(print){
  840. if(ref[pagenumber(p->kmaptable)])
  841. iprint("page %#.8lux is proc %d (pid %lud) kmaptable\n",
  842. p->kmaptable->pa, i, p->pid);
  843. continue;
  844. }
  845. if(ref[pagenumber(p->kmaptable)]++ == 0)
  846. n++;
  847. else
  848. iprint("page %#.8lux is proc %d (pid %lud) kmaptable but has other refs!\n",
  849. p->kmaptable->pa, i, p->pid);
  850. }
  851. for(pg=p->mmuused; pg; pg=pg->next){
  852. if(print){
  853. if(ref[pagenumber(pg)])
  854. iprint("page %#.8lux is on proc %d (pid %lud) mmuused\n",
  855. pg->pa, i, p->pid);
  856. continue;
  857. }
  858. if(ref[pagenumber(pg)]++ == 0)
  859. n++;
  860. else
  861. iprint("page %#.8lux is on proc %d (pid %lud) mmuused but has other refs!\n",
  862. pg->pa, i, p->pid);
  863. }
  864. for(pg=p->mmufree; pg; pg=pg->next){
  865. if(print){
  866. if(ref[pagenumber(pg)])
  867. iprint("page %#.8lux is on proc %d (pid %lud) mmufree\n",
  868. pg->pa, i, p->pid);
  869. continue;
  870. }
  871. if(ref[pagenumber(pg)]++ == 0)
  872. n++;
  873. else
  874. iprint("page %#.8lux is on proc %d (pid %lud) mmufree but has other refs!\n",
  875. pg->pa, i, p->pid);
  876. }
  877. }
  878. if(!print)
  879. iprint("%d pages in proc mmu\n", n);
  880. n = 0;
  881. for(i=0; i<conf.nmach; i++){
  882. mm = MACHP(i);
  883. for(pg=mm->pdbpool; pg; pg=pg->next){
  884. if(print){
  885. if(ref[pagenumber(pg)])
  886. iprint("page %#.8lux is in cpu%d pdbpool\n",
  887. pg->pa, i);
  888. continue;
  889. }
  890. if(ref[pagenumber(pg)]++ == 0)
  891. n++;
  892. else
  893. iprint("page %#.8lux is in cpu%d pdbpool but has other refs!\n",
  894. pg->pa, i);
  895. }
  896. }
  897. if(!print)
  898. iprint("%d pages in mach pdbpools\n", n);
  899. }
  900. void
  901. checkfault(ulong addr, ulong pc)
  902. {
  903. ulong *a;
  904. int i;
  905. print("user fault: addr=%.8lux pc=%.8lux\n", addr, pc);
  906. if(!(vpd[PDX(addr)]&PTEVALID))
  907. print("addr not mapped (vpd=%.8lux)\n", vpd[PDX(addr)]);
  908. else if(!(vpt[VPTX(addr)]&PTEVALID))
  909. print("addr not mapped (vpd=%.8lux vpt=%.8lux)\n",
  910. vpd[PDX(addr)], vpt[VPTX(addr)]);
  911. else
  912. print("addr mapped (vpd=%.8lux vpt=%.8lux)\n",
  913. vpd[PDX(addr)], vpt[VPTX(addr)]);
  914. if(!(vpd[PDX(pc)]&PTEVALID))
  915. print("pc not mapped (vpd=%.8lux)\n", vpd[PDX(pc)]);
  916. else if(!(vpt[VPTX(pc)]&PTEVALID))
  917. print("pc not mapped (vpd=%.8lux vpt=%.8lux)\n",
  918. vpd[PDX(pc)], vpt[VPTX(pc)]);
  919. else{
  920. print("pc mapped (vpd=%.8lux vpt=%.8lux)\n",
  921. vpd[PDX(pc)], vpt[VPTX(pc)]);
  922. if(PPN(pc) == PPN(pc+4)) /* not crossing into an unmapped page */
  923. print("*pc: %.8lux\n", *(ulong*)pc);
  924. a = (ulong*)PPN(pc);
  925. for(i=0; i<WD2PG; i++)
  926. if(a[i] != 0)
  927. break;
  928. if(i == WD2PG)
  929. print("pc's page is all zeros\n");
  930. else{
  931. for(i=0; i<256/4; i+=8){
  932. print("%.8lux: %.8lux %.8lux %.8lux %.8lux %.8lux %.8lux %.8lux %.8lux\n",
  933. PPN(pc)+i*4, a[i], a[i+1], a[i+2], a[i+3],
  934. a[i+4], a[i+5], a[i+6], a[i+7]);
  935. }
  936. }
  937. }
  938. }