mmu.c 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077
  1. /*
  2. * Memory mappings. Life was easier when 2G of memory was enough.
  3. *
  4. * The kernel memory starts at KZERO, with the text loaded at KZERO+1M
  5. * (9load sits under 1M during the load). The memory from KZERO to the
  6. * top of memory is mapped 1-1 with physical memory, starting at physical
  7. * address 0. All kernel memory and data structures (i.e., the entries stored
  8. * into conf.mem) must sit in this physical range: if KZERO is at 0xF0000000,
  9. * then the kernel can only have 256MB of memory for itself.
  10. *
  11. * The 256M below KZERO comprises three parts. The lowest 4M is the
  12. * virtual page table, a virtual address representation of the current
  13. * page table tree. The second 4M is used for temporary per-process
  14. * mappings managed by kmap and kunmap. The remaining 248M is used
  15. * for global (shared by all procs and all processors) device memory
  16. * mappings and managed by vmap and vunmap. The total amount (256M)
  17. * could probably be reduced somewhat if desired. The largest device
  18. * mapping is that of the video card, and even though modern video cards
  19. * have embarrassing amounts of memory, the video drivers only use one
  20. * frame buffer worth (at most 16M). Each is described in more detail below.
  21. *
  22. * The VPT is a 4M frame constructed by inserting the pdb into itself.
  23. * This short-circuits one level of the page tables, with the result that
  24. * the contents of second-level page tables can be accessed at VPT.
  25. * We use the VPT to edit the page tables (see mmu) after inserting them
  26. * into the page directory. It is a convenient mechanism for mapping what
  27. * might be otherwise-inaccessible pages. The idea was borrowed from
  28. * the Exokernel.
  29. *
  30. * The VPT doesn't solve all our problems, because we still need to
  31. * prepare page directories before we can install them. For that, we
  32. * use tmpmap/tmpunmap, which map a single page at TMPADDR.
  33. */
  34. #include "u.h"
  35. #include "../port/lib.h"
  36. #include "mem.h"
  37. #include "dat.h"
  38. #include "fns.h"
  39. #include "io.h"
  40. /*
  41. * Simple segment descriptors with no translation.
  42. */
  43. #define DATASEGM(p) { 0xFFFF, SEGG|SEGB|(0xF<<16)|SEGP|SEGPL(p)|SEGDATA|SEGW }
  44. #define EXECSEGM(p) { 0xFFFF, SEGG|SEGD|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }
  45. #define EXEC16SEGM(p) { 0xFFFF, SEGG|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }
  46. #define TSSSEGM(b,p) { ((b)<<16)|sizeof(Tss),\
  47. ((b)&0xFF000000)|(((b)>>16)&0xFF)|SEGTSS|SEGPL(p)|SEGP }
  48. Segdesc gdt[NGDT] =
  49. {
  50. [NULLSEG] { 0, 0}, /* null descriptor */
  51. [KDSEG] DATASEGM(0), /* kernel data/stack */
  52. [KESEG] EXECSEGM(0), /* kernel code */
  53. [UDSEG] DATASEGM(3), /* user data/stack */
  54. [UESEG] EXECSEGM(3), /* user code */
  55. [TSSSEG] TSSSEGM(0,0), /* tss segment */
  56. [KESEG16] EXEC16SEGM(0), /* kernel code 16-bit */
  57. };
  58. static int didmmuinit;
  59. static void taskswitch(ulong, ulong);
  60. static void memglobal(void);
  61. #define vpt ((ulong*)VPT)
  62. #define VPTX(va) (((ulong)(va))>>12)
  63. #define vpd (vpt+VPTX(VPT))
  64. void
  65. mmuinit0(void)
  66. {
  67. memmove(m->gdt, gdt, sizeof gdt);
  68. }
  69. void
  70. mmuinit(void)
  71. {
  72. ulong x, *p;
  73. ushort ptr[3];
  74. didmmuinit = 1;
  75. if(0) print("vpt=%#.8ux vpd=%#.8lux kmap=%#.8ux\n",
  76. VPT, (ulong)vpd, KMAP);
  77. memglobal();
  78. m->pdb[PDX(VPT)] = PADDR(m->pdb)|PTEWRITE|PTEVALID;
  79. m->tss = malloc(sizeof(Tss));
  80. memset(m->tss, 0, sizeof(Tss));
  81. m->tss->iomap = 0xDFFF<<16;
  82. /*
  83. * We used to keep the GDT in the Mach structure, but it
  84. * turns out that that slows down access to the rest of the
  85. * page. Since the Mach structure is accessed quite often,
  86. * it pays off anywhere from a factor of 1.25 to 2 on real
  87. * hardware to separate them (the AMDs are more sensitive
  88. * than Intels in this regard). Under VMware it pays off
  89. * a factor of about 10 to 100.
  90. */
  91. memmove(m->gdt, gdt, sizeof gdt);
  92. x = (ulong)m->tss;
  93. m->gdt[TSSSEG].d0 = (x<<16)|sizeof(Tss);
  94. m->gdt[TSSSEG].d1 = (x&0xFF000000)|((x>>16)&0xFF)|SEGTSS|SEGPL(0)|SEGP;
  95. ptr[0] = sizeof(gdt)-1;
  96. x = (ulong)m->gdt;
  97. ptr[1] = x & 0xFFFF;
  98. ptr[2] = (x>>16) & 0xFFFF;
  99. lgdt(ptr);
  100. ptr[0] = sizeof(Segdesc)*256-1;
  101. x = IDTADDR;
  102. ptr[1] = x & 0xFFFF;
  103. ptr[2] = (x>>16) & 0xFFFF;
  104. lidt(ptr);
  105. /* make kernel text unwritable */
  106. for(x = KTZERO; x < (ulong)etext; x += BY2PG){
  107. p = mmuwalk(m->pdb, x, 2, 0);
  108. if(p == nil)
  109. panic("mmuinit");
  110. *p &= ~PTEWRITE;
  111. }
  112. taskswitch(PADDR(m->pdb), (ulong)m + BY2PG);
  113. ltr(TSSSEL);
  114. }
  115. /*
  116. * On processors that support it, we set the PTEGLOBAL bit in
  117. * page table and page directory entries that map kernel memory.
  118. * Doing this tells the processor not to bother flushing them
  119. * from the TLB when doing the TLB flush associated with a
  120. * context switch (write to CR3). Since kernel memory mappings
  121. * are never removed, this is safe. (If we ever remove kernel memory
  122. * mappings, we can do a full flush by turning off the PGE bit in CR4,
  123. * writing to CR3, and then turning the PGE bit back on.)
  124. *
  125. * See also mmukmap below.
  126. *
  127. * Processor support for the PTEGLOBAL bit is enabled in devarch.c.
  128. */
  129. static void
  130. memglobal(void)
  131. {
  132. int i, j;
  133. ulong *pde, *pte;
  134. /* only need to do this once, on bootstrap processor */
  135. if(m->machno != 0)
  136. return;
  137. if(!m->havepge)
  138. return;
  139. pde = m->pdb;
  140. for(i=PDX(KZERO); i<1024; i++){
  141. if(pde[i] & PTEVALID){
  142. pde[i] |= PTEGLOBAL;
  143. if(!(pde[i] & PTESIZE)){
  144. pte = KADDR(pde[i]&~(BY2PG-1));
  145. for(j=0; j<1024; j++)
  146. if(pte[j] & PTEVALID)
  147. pte[j] |= PTEGLOBAL;
  148. }
  149. }
  150. }
  151. }
  152. /*
  153. * Flush all the user-space and device-mapping mmu info
  154. * for this process, because something has been deleted.
  155. * It will be paged back in on demand.
  156. */
  157. void
  158. flushmmu(void)
  159. {
  160. int s;
  161. s = splhi();
  162. up->newtlb = 1;
  163. mmuswitch(up);
  164. splx(s);
  165. }
  166. /*
  167. * Flush a single page mapping from the tlb.
  168. */
  169. void
  170. flushpg(ulong va)
  171. {
  172. if(X86FAMILY(m->cpuidax) >= 4)
  173. invlpg(va);
  174. else
  175. putcr3(getcr3());
  176. }
  177. /*
  178. * Allocate a new page for a page directory.
  179. * We keep a small cache of pre-initialized
  180. * page directories in each mach.
  181. */
  182. static Page*
  183. mmupdballoc(void)
  184. {
  185. int s;
  186. Page *page;
  187. ulong *pdb;
  188. s = splhi();
  189. m->pdballoc++;
  190. if(m->pdbpool == 0){
  191. spllo();
  192. page = newpage(0, 0, 0);
  193. page->va = (ulong)vpd;
  194. splhi();
  195. pdb = tmpmap(page);
  196. memmove(pdb, m->pdb, BY2PG);
  197. pdb[PDX(VPT)] = page->pa|PTEWRITE|PTEVALID; /* set up VPT */
  198. tmpunmap(pdb);
  199. }else{
  200. page = m->pdbpool;
  201. m->pdbpool = page->next;
  202. m->pdbcnt--;
  203. }
  204. splx(s);
  205. return page;
  206. }
  207. static void
  208. mmupdbfree(Proc *proc, Page *p)
  209. {
  210. if(islo())
  211. panic("mmupdbfree: islo");
  212. m->pdbfree++;
  213. if(m->pdbcnt >= 10){
  214. p->next = proc->mmufree;
  215. proc->mmufree = p;
  216. }else{
  217. p->next = m->pdbpool;
  218. m->pdbpool = p;
  219. m->pdbcnt++;
  220. }
  221. }
  222. /*
  223. * A user-space memory segment has been deleted, or the
  224. * process is exiting. Clear all the pde entries for user-space
  225. * memory mappings and device mappings. Any entries that
  226. * are needed will be paged back in as necessary.
  227. */
  228. static void
  229. mmuptefree(Proc* proc)
  230. {
  231. int s;
  232. ulong *pdb;
  233. Page **last, *page;
  234. if(proc->mmupdb == nil || proc->mmuused == nil)
  235. return;
  236. s = splhi();
  237. pdb = tmpmap(proc->mmupdb);
  238. last = &proc->mmuused;
  239. for(page = *last; page; page = page->next){
  240. pdb[page->daddr] = 0;
  241. last = &page->next;
  242. }
  243. tmpunmap(pdb);
  244. splx(s);
  245. *last = proc->mmufree;
  246. proc->mmufree = proc->mmuused;
  247. proc->mmuused = 0;
  248. }
  249. static void
  250. taskswitch(ulong pdb, ulong stack)
  251. {
  252. Tss *tss;
  253. tss = m->tss;
  254. tss->ss0 = KDSEL;
  255. tss->esp0 = stack;
  256. tss->ss1 = KDSEL;
  257. tss->esp1 = stack;
  258. tss->ss2 = KDSEL;
  259. tss->esp2 = stack;
  260. putcr3(pdb);
  261. }
  262. void
  263. mmuswitch(Proc* proc)
  264. {
  265. ulong *pdb;
  266. if(proc->newtlb){
  267. mmuptefree(proc);
  268. proc->newtlb = 0;
  269. }
  270. if(proc->mmupdb){
  271. pdb = tmpmap(proc->mmupdb);
  272. pdb[PDX(MACHADDR)] = m->pdb[PDX(MACHADDR)];
  273. tmpunmap(pdb);
  274. taskswitch(proc->mmupdb->pa, (ulong)(proc->kstack+KSTACK));
  275. }else
  276. taskswitch(PADDR(m->pdb), (ulong)(proc->kstack+KSTACK));
  277. }
  278. /*
  279. * Release any pages allocated for a page directory base or page-tables
  280. * for this process:
  281. * switch to the prototype pdb for this processor (m->pdb);
  282. * call mmuptefree() to place all pages used for page-tables (proc->mmuused)
  283. * onto the process' free list (proc->mmufree). This has the side-effect of
  284. * cleaning any user entries in the pdb (proc->mmupdb);
  285. * if there's a pdb put it in the cache of pre-initialised pdb's
  286. * for this processor (m->pdbpool) or on the process' free list;
  287. * finally, place any pages freed back into the free pool (palloc).
  288. * This routine is only called from schedinit() with palloc locked.
  289. */
  290. void
  291. mmurelease(Proc* proc)
  292. {
  293. Page *page, *next;
  294. ulong *pdb;
  295. if(islo())
  296. panic("mmurelease: islo");
  297. taskswitch(PADDR(m->pdb), (ulong)m + BY2PG);
  298. if(proc->kmaptable){
  299. if(proc->mmupdb == nil)
  300. panic("mmurelease: no mmupdb");
  301. if(--proc->kmaptable->ref)
  302. panic("mmurelease: kmap ref %d\n", proc->kmaptable->ref);
  303. if(proc->nkmap)
  304. panic("mmurelease: nkmap %d\n", proc->nkmap);
  305. /*
  306. * remove kmaptable from pdb before putting pdb up for reuse.
  307. */
  308. pdb = tmpmap(proc->mmupdb);
  309. if(PPN(pdb[PDX(KMAP)]) != proc->kmaptable->pa)
  310. panic("mmurelease: bad kmap pde %#.8lux kmap %#.8lux",
  311. pdb[PDX(KMAP)], proc->kmaptable->pa);
  312. pdb[PDX(KMAP)] = 0;
  313. tmpunmap(pdb);
  314. /*
  315. * move kmaptable to free list.
  316. */
  317. pagechainhead(proc->kmaptable);
  318. proc->kmaptable = 0;
  319. }
  320. if(proc->mmupdb){
  321. mmuptefree(proc);
  322. mmupdbfree(proc, proc->mmupdb);
  323. proc->mmupdb = 0;
  324. }
  325. for(page = proc->mmufree; page; page = next){
  326. next = page->next;
  327. if(--page->ref)
  328. panic("mmurelease: page->ref %d\n", page->ref);
  329. pagechainhead(page);
  330. }
  331. if(proc->mmufree && palloc.r.p)
  332. wakeup(&palloc.r);
  333. proc->mmufree = 0;
  334. }
  335. /*
  336. * Allocate and install pdb for the current process.
  337. */
  338. static void
  339. upallocpdb(void)
  340. {
  341. int s;
  342. ulong *pdb;
  343. Page *page;
  344. if(up->mmupdb != nil)
  345. return;
  346. page = mmupdballoc();
  347. s = splhi();
  348. if(up->mmupdb != nil){
  349. /*
  350. * Perhaps we got an interrupt while
  351. * mmupdballoc was sleeping and that
  352. * interrupt allocated an mmupdb?
  353. * Seems unlikely.
  354. */
  355. mmupdbfree(up, page);
  356. splx(s);
  357. return;
  358. }
  359. pdb = tmpmap(page);
  360. pdb[PDX(MACHADDR)] = m->pdb[PDX(MACHADDR)];
  361. tmpunmap(pdb);
  362. up->mmupdb = page;
  363. putcr3(up->mmupdb->pa);
  364. splx(s);
  365. }
  366. /*
  367. * Update the mmu in response to a user fault. pa may have PTEWRITE set.
  368. */
  369. void
  370. putmmu(ulong va, ulong pa, Page*)
  371. {
  372. int old, s;
  373. Page *page;
  374. if(up->mmupdb == nil)
  375. upallocpdb();
  376. /*
  377. * We should be able to get through this with interrupts
  378. * turned on (if we get interrupted we'll just pick up
  379. * where we left off) but we get many faults accessing
  380. * vpt[] near the end of this function, and they always happen
  381. * after the process has been switched out and then
  382. * switched back, usually many times in a row (perhaps
  383. * it cannot switch back successfully for some reason).
  384. *
  385. * In any event, I'm tired of searching for this bug.
  386. * Turn off interrupts during putmmu even though
  387. * we shouldn't need to. - rsc
  388. */
  389. s = splhi();
  390. if(!(vpd[PDX(va)]&PTEVALID)){
  391. if(up->mmufree == 0)
  392. page = newpage(0, 0, 0);
  393. else{
  394. page = up->mmufree;
  395. up->mmufree = page->next;
  396. }
  397. vpd[PDX(va)] = PPN(page->pa)|PTEUSER|PTEWRITE|PTEVALID;
  398. /* page is now mapped into the VPT - clear it */
  399. memset((void*)(VPT+PDX(va)*BY2PG), 0, BY2PG);
  400. page->daddr = PDX(va);
  401. page->next = up->mmuused;
  402. up->mmuused = page;
  403. }
  404. old = vpt[VPTX(va)];
  405. vpt[VPTX(va)] = pa|PTEUSER|PTEVALID;
  406. if(old&PTEVALID)
  407. flushpg(va);
  408. if(getcr3() != up->mmupdb->pa)
  409. print("bad cr3 %.8lux %.8lux\n", getcr3(), up->mmupdb->pa);
  410. splx(s);
  411. }
  412. /*
  413. * Double-check the user MMU.
  414. * Error checking only.
  415. */
  416. void
  417. checkmmu(ulong va, ulong pa)
  418. {
  419. if(up->mmupdb == 0)
  420. return;
  421. if(!(vpd[PDX(va)]&PTEVALID) || !(vpt[VPTX(va)]&PTEVALID))
  422. return;
  423. if(PPN(vpt[VPTX(va)]) != pa)
  424. print("%ld %s: va=0x%08lux pa=0x%08lux pte=0x%08lux\n",
  425. up->pid, up->text,
  426. va, pa, vpt[VPTX(va)]);
  427. }
  428. /*
  429. * Walk the page-table pointed to by pdb and return a pointer
  430. * to the entry for virtual address va at the requested level.
  431. * If the entry is invalid and create isn't requested then bail
  432. * out early. Otherwise, for the 2nd level walk, allocate a new
  433. * page-table page and register it in the 1st level. This is used
  434. * only to edit kernel mappings, which use pages from kernel memory,
  435. * so it's okay to use KADDR to look at the tables.
  436. */
  437. ulong*
  438. mmuwalk(ulong* pdb, ulong va, int level, int create)
  439. {
  440. ulong *table;
  441. void *map;
  442. table = &pdb[PDX(va)];
  443. if(!(*table & PTEVALID) && create == 0)
  444. return 0;
  445. switch(level){
  446. default:
  447. return 0;
  448. case 1:
  449. return table;
  450. case 2:
  451. if(*table & PTESIZE)
  452. panic("mmuwalk2: va %luX entry %luX\n", va, *table);
  453. if(!(*table & PTEVALID)){
  454. /*
  455. * Have to call low-level allocator from
  456. * memory.c if we haven't set up the xalloc
  457. * tables yet.
  458. */
  459. if(didmmuinit)
  460. map = xspanalloc(BY2PG, BY2PG, 0);
  461. else
  462. map = rampage();
  463. if(map == nil)
  464. panic("mmuwalk xspanalloc failed");
  465. *table = PADDR(map)|PTEWRITE|PTEVALID;
  466. }
  467. table = KADDR(PPN(*table));
  468. return &table[PTX(va)];
  469. }
  470. }
  471. /*
  472. * Device mappings are shared by all procs and processors and
  473. * live in the virtual range VMAP to VMAP+VMAPSIZE. The master
  474. * copy of the mappings is stored in mach0->pdb, and they are
  475. * paged in from there as necessary by vmapsync during faults.
  476. */
  477. static Lock vmaplock;
  478. static int findhole(ulong *a, int n, int count);
  479. static ulong vmapalloc(ulong size);
  480. static void pdbunmap(ulong*, ulong, int);
  481. /*
  482. * Add a device mapping to the vmap range.
  483. */
  484. void*
  485. vmap(ulong pa, int size)
  486. {
  487. int osize;
  488. ulong o, va;
  489. /*
  490. * might be asking for less than a page.
  491. */
  492. osize = size;
  493. o = pa & (BY2PG-1);
  494. pa -= o;
  495. size += o;
  496. size = ROUND(size, BY2PG);
  497. if(pa == 0){
  498. print("vmap pa=0 pc=%#.8lux\n", getcallerpc(&pa));
  499. return nil;
  500. }
  501. ilock(&vmaplock);
  502. if((va = vmapalloc(size)) == 0
  503. || pdbmap(MACHP(0)->pdb, pa|PTEUNCACHED|PTEWRITE, va, size) < 0){
  504. iunlock(&vmaplock);
  505. return 0;
  506. }
  507. iunlock(&vmaplock);
  508. /* avoid trap on local processor
  509. for(i=0; i<size; i+=4*MB)
  510. vmapsync(va+i);
  511. */
  512. USED(osize);
  513. // print(" vmap %#.8lux %d => %#.8lux\n", pa+o, osize, va+o);
  514. return (void*)(va + o);
  515. }
  516. static int
  517. findhole(ulong *a, int n, int count)
  518. {
  519. int have, i;
  520. have = 0;
  521. for(i=0; i<n; i++){
  522. if(a[i] == 0)
  523. have++;
  524. else
  525. have = 0;
  526. if(have >= count)
  527. return i+1 - have;
  528. }
  529. return -1;
  530. }
  531. /*
  532. * Look for free space in the vmap.
  533. */
  534. static ulong
  535. vmapalloc(ulong size)
  536. {
  537. int i, n, o;
  538. ulong *vpdb;
  539. int vpdbsize;
  540. vpdb = &MACHP(0)->pdb[PDX(VMAP)];
  541. vpdbsize = VMAPSIZE/(4*MB);
  542. if(size >= 4*MB){
  543. n = (size+4*MB-1) / (4*MB);
  544. if((o = findhole(vpdb, vpdbsize, n)) != -1)
  545. return VMAP + o*4*MB;
  546. return 0;
  547. }
  548. n = (size+BY2PG-1) / BY2PG;
  549. for(i=0; i<vpdbsize; i++)
  550. if((vpdb[i]&PTEVALID) && !(vpdb[i]&PTESIZE))
  551. if((o = findhole(KADDR(PPN(vpdb[i])), WD2PG, n)) != -1)
  552. return VMAP + i*4*MB + o*BY2PG;
  553. if((o = findhole(vpdb, vpdbsize, 1)) != -1)
  554. return VMAP + o*4*MB;
  555. /*
  556. * could span page directory entries, but not worth the trouble.
  557. * not going to be very much contention.
  558. */
  559. return 0;
  560. }
  561. /*
  562. * Remove a device mapping from the vmap range.
  563. * Since pdbunmap does not remove page tables, just entries,
  564. * the call need not be interlocked with vmap.
  565. */
  566. void
  567. vunmap(void *v, int size)
  568. {
  569. int i;
  570. ulong va, o;
  571. Mach *nm;
  572. Proc *p;
  573. /*
  574. * might not be aligned
  575. */
  576. va = (ulong)v;
  577. o = va&(BY2PG-1);
  578. va -= o;
  579. size += o;
  580. size = ROUND(size, BY2PG);
  581. if(size < 0 || va < VMAP || va+size > VMAP+VMAPSIZE)
  582. panic("vunmap va=%#.8lux size=%#x pc=%#.8lux\n",
  583. va, size, getcallerpc(&va));
  584. pdbunmap(MACHP(0)->pdb, va, size);
  585. /*
  586. * Flush mapping from all the tlbs and copied pdbs.
  587. * This can be (and is) slow, since it is called only rarely.
  588. */
  589. for(i=0; i<conf.nproc; i++){
  590. p = proctab(i);
  591. if(p->state == Dead)
  592. continue;
  593. if(p != up)
  594. p->newtlb = 1;
  595. }
  596. for(i=0; i<conf.nmach; i++){
  597. nm = MACHP(i);
  598. if(nm != m)
  599. nm->flushmmu = 1;
  600. }
  601. flushmmu();
  602. for(i=0; i<conf.nmach; i++){
  603. nm = MACHP(i);
  604. if(nm != m)
  605. while((active.machs&(1<<nm->machno)) && nm->flushmmu)
  606. ;
  607. }
  608. }
  609. /*
  610. * Add kernel mappings for pa -> va for a section of size bytes.
  611. */
  612. int
  613. pdbmap(ulong *pdb, ulong pa, ulong va, int size)
  614. {
  615. int pse;
  616. ulong pgsz, *pte, *table;
  617. ulong flag, off;
  618. flag = pa&0xFFF;
  619. pa &= ~0xFFF;
  620. if((MACHP(0)->cpuiddx & 0x08) && (getcr4() & 0x10))
  621. pse = 1;
  622. else
  623. pse = 0;
  624. for(off=0; off<size; off+=pgsz){
  625. table = &pdb[PDX(va+off)];
  626. if((*table&PTEVALID) && (*table&PTESIZE))
  627. panic("vmap: va=%#.8lux pa=%#.8lux pde=%#.8lux",
  628. va+off, pa+off, *table);
  629. /*
  630. * Check if it can be mapped using a 4MB page:
  631. * va, pa aligned and size >= 4MB and processor can do it.
  632. */
  633. if(pse && (pa+off)%(4*MB) == 0 && (va+off)%(4*MB) == 0 && (size-off) >= 4*MB){
  634. *table = (pa+off)|flag|PTESIZE|PTEVALID;
  635. pgsz = 4*MB;
  636. }else{
  637. pte = mmuwalk(pdb, va+off, 2, 1);
  638. if(*pte&PTEVALID)
  639. panic("vmap: va=%#.8lux pa=%#.8lux pte=%#.8lux",
  640. va+off, pa+off, *pte);
  641. *pte = (pa+off)|flag|PTEVALID;
  642. pgsz = BY2PG;
  643. }
  644. }
  645. return 0;
  646. }
  647. /*
  648. * Remove mappings. Must already exist, for sanity.
  649. * Only used for kernel mappings, so okay to use KADDR.
  650. */
  651. static void
  652. pdbunmap(ulong *pdb, ulong va, int size)
  653. {
  654. ulong vae;
  655. ulong *table;
  656. vae = va+size;
  657. while(va < vae){
  658. table = &pdb[PDX(va)];
  659. if(!(*table & PTEVALID)){
  660. panic("vunmap: not mapped");
  661. /*
  662. va = (va+4*MB-1) & ~(4*MB-1);
  663. continue;
  664. */
  665. }
  666. if(*table & PTESIZE){
  667. *table = 0;
  668. va = (va+4*MB-1) & ~(4*MB-1);
  669. continue;
  670. }
  671. table = KADDR(PPN(*table));
  672. if(!(table[PTX(va)] & PTEVALID))
  673. panic("vunmap: not mapped");
  674. table[PTX(va)] = 0;
  675. va += BY2PG;
  676. }
  677. }
  678. /*
  679. * Handle a fault by bringing vmap up to date.
  680. * Only copy pdb entries and they never go away,
  681. * so no locking needed.
  682. */
  683. int
  684. vmapsync(ulong va)
  685. {
  686. ulong entry, *table;
  687. if(va < VMAP || va >= VMAP+VMAPSIZE)
  688. return 0;
  689. entry = MACHP(0)->pdb[PDX(va)];
  690. if(!(entry&PTEVALID))
  691. return 0;
  692. if(!(entry&PTESIZE)){
  693. /* make sure entry will help the fault */
  694. table = KADDR(PPN(entry));
  695. if(!(table[PTX(va)]&PTEVALID))
  696. return 0;
  697. }
  698. vpd[PDX(va)] = entry;
  699. /*
  700. * TLB doesn't cache negative results, so no flush needed.
  701. */
  702. return 1;
  703. }
  704. /*
  705. * KMap is used to map individual pages into virtual memory.
  706. * It is rare to have more than a few KMaps at a time (in the
  707. * absence of interrupts, only two at a time are ever used,
  708. * but interrupts can stack). The mappings are local to a process,
  709. * so we can use the same range of virtual address space for
  710. * all processes without any coordination.
  711. */
  712. #define kpt (vpt+VPTX(KMAP))
  713. #define NKPT (KMAPSIZE/BY2PG)
  714. KMap*
  715. kmap(Page *page)
  716. {
  717. int i, o, s;
  718. if(up == nil)
  719. panic("kmap: up=0 pc=%#.8lux", getcallerpc(&page));
  720. if(up->mmupdb == nil)
  721. upallocpdb();
  722. if(up->nkmap < 0)
  723. panic("kmap %lud %s: nkmap=%d", up->pid, up->text, up->nkmap);
  724. /*
  725. * Splhi shouldn't be necessary here, but paranoia reigns.
  726. * See comment in putmmu above.
  727. */
  728. s = splhi();
  729. up->nkmap++;
  730. if(!(vpd[PDX(KMAP)]&PTEVALID)){
  731. /* allocate page directory */
  732. if(KMAPSIZE > BY2XPG)
  733. panic("bad kmapsize");
  734. if(up->kmaptable != nil)
  735. panic("kmaptable");
  736. spllo();
  737. up->kmaptable = newpage(0, 0, 0);
  738. splhi();
  739. vpd[PDX(KMAP)] = up->kmaptable->pa|PTEWRITE|PTEVALID;
  740. flushpg((ulong)kpt);
  741. memset(kpt, 0, BY2PG);
  742. kpt[0] = page->pa|PTEWRITE|PTEVALID;
  743. up->lastkmap = 0;
  744. splx(s);
  745. return (KMap*)KMAP;
  746. }
  747. if(up->kmaptable == nil)
  748. panic("no kmaptable");
  749. o = up->lastkmap+1;
  750. for(i=0; i<NKPT; i++){
  751. if(kpt[(i+o)%NKPT] == 0){
  752. o = (i+o)%NKPT;
  753. kpt[o] = page->pa|PTEWRITE|PTEVALID;
  754. up->lastkmap = o;
  755. splx(s);
  756. return (KMap*)(KMAP+o*BY2PG);
  757. }
  758. }
  759. panic("out of kmap");
  760. return nil;
  761. }
  762. void
  763. kunmap(KMap *k)
  764. {
  765. ulong va;
  766. va = (ulong)k;
  767. if(up->mmupdb == nil || !(vpd[PDX(KMAP)]&PTEVALID))
  768. panic("kunmap: no kmaps");
  769. if(va < KMAP || va >= KMAP+KMAPSIZE)
  770. panic("kunmap: bad address %#.8lux pc=%#.8lux", va, getcallerpc(&k));
  771. if(!(vpt[VPTX(va)]&PTEVALID))
  772. panic("kunmap: not mapped %#.8lux pc=%#.8lux", va, getcallerpc(&k));
  773. up->nkmap--;
  774. if(up->nkmap < 0)
  775. panic("kunmap %lud %s: nkmap=%d", up->pid, up->text, up->nkmap);
  776. vpt[VPTX(va)] = 0;
  777. flushpg(va);
  778. }
  779. /*
  780. * Temporary one-page mapping used to edit page directories.
  781. *
  782. * The fasttmp #define controls whether the code optimizes
  783. * the case where the page is already mapped in the physical
  784. * memory window.
  785. */
  786. #define fasttmp 1
  787. void*
  788. tmpmap(Page *p)
  789. {
  790. ulong i;
  791. ulong *entry;
  792. if(islo())
  793. panic("tmpaddr: islo");
  794. if(fasttmp && p->pa < -KZERO)
  795. return KADDR(p->pa);
  796. /*
  797. * PDX(TMPADDR) == PDX(MACHADDR), so this
  798. * entry is private to the processor and shared
  799. * between up->mmupdb (if any) and m->pdb.
  800. */
  801. entry = &vpt[VPTX(TMPADDR)];
  802. if(!(*entry&PTEVALID)){
  803. for(i=KZERO; i<=CPU0MACH; i+=BY2PG)
  804. print("%.8lux: *%.8lux=%.8lux (vpt=%.8lux index=%.8lux)\n", i, &vpt[VPTX(i)], vpt[VPTX(i)], vpt, VPTX(i));
  805. panic("tmpmap: no entry");
  806. }
  807. if(PPN(*entry) != PPN(TMPADDR-KZERO))
  808. panic("tmpmap: already mapped entry=%#.8lux", *entry);
  809. *entry = p->pa|PTEWRITE|PTEVALID;
  810. flushpg(TMPADDR);
  811. return (void*)TMPADDR;
  812. }
  813. void
  814. tmpunmap(void *v)
  815. {
  816. ulong *entry;
  817. if(islo())
  818. panic("tmpaddr: islo");
  819. if(fasttmp && (ulong)v >= KZERO && v != (void*)TMPADDR)
  820. return;
  821. if(v != (void*)TMPADDR)
  822. panic("tmpunmap: bad address");
  823. entry = &vpt[VPTX(TMPADDR)];
  824. if(!(*entry&PTEVALID) || PPN(*entry) == PPN(PADDR(TMPADDR)))
  825. panic("tmpmap: not mapped entry=%#.8lux", *entry);
  826. *entry = PPN(TMPADDR-KZERO)|PTEWRITE|PTEVALID;
  827. flushpg(TMPADDR);
  828. }
  829. /*
  830. * These could go back to being macros once the kernel is debugged,
  831. * but the extra checking is nice to have.
  832. */
  833. void*
  834. kaddr(ulong pa)
  835. {
  836. if(pa > (ulong)-KZERO)
  837. panic("kaddr: pa=%#.8lux", pa);
  838. return (void*)(pa+KZERO);
  839. }
  840. ulong
  841. paddr(void *v)
  842. {
  843. ulong va;
  844. va = (ulong)v;
  845. if(va < KZERO)
  846. panic("paddr: va=%#.8lux pc=%#.8lux", va, getcallerpc(&va));
  847. return va-KZERO;
  848. }
  849. /*
  850. * More debugging.
  851. */
  852. void
  853. countpagerefs(ulong *ref, int print)
  854. {
  855. int i, n;
  856. Mach *mm;
  857. Page *pg;
  858. Proc *p;
  859. n = 0;
  860. for(i=0; i<conf.nproc; i++){
  861. p = proctab(i);
  862. if(p->mmupdb){
  863. if(print){
  864. if(ref[pagenumber(p->mmupdb)])
  865. iprint("page %#.8lux is proc %d (pid %lud) pdb\n",
  866. p->mmupdb->pa, i, p->pid);
  867. continue;
  868. }
  869. if(ref[pagenumber(p->mmupdb)]++ == 0)
  870. n++;
  871. else
  872. iprint("page %#.8lux is proc %d (pid %lud) pdb but has other refs!\n",
  873. p->mmupdb->pa, i, p->pid);
  874. }
  875. if(p->kmaptable){
  876. if(print){
  877. if(ref[pagenumber(p->kmaptable)])
  878. iprint("page %#.8lux is proc %d (pid %lud) kmaptable\n",
  879. p->kmaptable->pa, i, p->pid);
  880. continue;
  881. }
  882. if(ref[pagenumber(p->kmaptable)]++ == 0)
  883. n++;
  884. else
  885. iprint("page %#.8lux is proc %d (pid %lud) kmaptable but has other refs!\n",
  886. p->kmaptable->pa, i, p->pid);
  887. }
  888. for(pg=p->mmuused; pg; pg=pg->next){
  889. if(print){
  890. if(ref[pagenumber(pg)])
  891. iprint("page %#.8lux is on proc %d (pid %lud) mmuused\n",
  892. pg->pa, i, p->pid);
  893. continue;
  894. }
  895. if(ref[pagenumber(pg)]++ == 0)
  896. n++;
  897. else
  898. iprint("page %#.8lux is on proc %d (pid %lud) mmuused but has other refs!\n",
  899. pg->pa, i, p->pid);
  900. }
  901. for(pg=p->mmufree; pg; pg=pg->next){
  902. if(print){
  903. if(ref[pagenumber(pg)])
  904. iprint("page %#.8lux is on proc %d (pid %lud) mmufree\n",
  905. pg->pa, i, p->pid);
  906. continue;
  907. }
  908. if(ref[pagenumber(pg)]++ == 0)
  909. n++;
  910. else
  911. iprint("page %#.8lux is on proc %d (pid %lud) mmufree but has other refs!\n",
  912. pg->pa, i, p->pid);
  913. }
  914. }
  915. if(!print)
  916. iprint("%d pages in proc mmu\n", n);
  917. n = 0;
  918. for(i=0; i<conf.nmach; i++){
  919. mm = MACHP(i);
  920. for(pg=mm->pdbpool; pg; pg=pg->next){
  921. if(print){
  922. if(ref[pagenumber(pg)])
  923. iprint("page %#.8lux is in cpu%d pdbpool\n",
  924. pg->pa, i);
  925. continue;
  926. }
  927. if(ref[pagenumber(pg)]++ == 0)
  928. n++;
  929. else
  930. iprint("page %#.8lux is in cpu%d pdbpool but has other refs!\n",
  931. pg->pa, i);
  932. }
  933. }
  934. if(!print){
  935. iprint("%d pages in mach pdbpools\n", n);
  936. for(i=0; i<conf.nmach; i++)
  937. iprint("cpu%d: %d pdballoc, %d pdbfree\n",
  938. i, MACHP(i)->pdballoc, MACHP(i)->pdbfree);
  939. }
  940. }
  941. void
  942. checkfault(ulong addr, ulong pc)
  943. {
  944. ulong *a;
  945. int i;
  946. print("user fault: addr=%.8lux pc=%.8lux\n", addr, pc);
  947. if(!(vpd[PDX(addr)]&PTEVALID))
  948. print("addr not mapped (vpd=%.8lux)\n", vpd[PDX(addr)]);
  949. else if(!(vpt[VPTX(addr)]&PTEVALID))
  950. print("addr not mapped (vpd=%.8lux vpt=%.8lux)\n",
  951. vpd[PDX(addr)], vpt[VPTX(addr)]);
  952. else
  953. print("addr mapped (vpd=%.8lux vpt=%.8lux)\n",
  954. vpd[PDX(addr)], vpt[VPTX(addr)]);
  955. if(!(vpd[PDX(pc)]&PTEVALID))
  956. print("pc not mapped (vpd=%.8lux)\n", vpd[PDX(pc)]);
  957. else if(!(vpt[VPTX(pc)]&PTEVALID))
  958. print("pc not mapped (vpd=%.8lux vpt=%.8lux)\n",
  959. vpd[PDX(pc)], vpt[VPTX(pc)]);
  960. else{
  961. print("pc mapped (vpd=%.8lux vpt=%.8lux)\n",
  962. vpd[PDX(pc)], vpt[VPTX(pc)]);
  963. if(PPN(pc) == PPN(pc+4)) /* not crossing into an unmapped page */
  964. print("*pc: %.8lux\n", *(ulong*)pc);
  965. a = (ulong*)PPN(pc);
  966. for(i=0; i<WD2PG; i++)
  967. if(a[i] != 0)
  968. break;
  969. if(i == WD2PG)
  970. print("pc's page is all zeros\n");
  971. else{
  972. for(i=0; i<256/4; i+=8){
  973. print("%.8lux: %.8lux %.8lux %.8lux %.8lux %.8lux %.8lux %.8lux %.8lux\n",
  974. PPN(pc)+i*4, a[i], a[i+1], a[i+2], a[i+3],
  975. a[i+4], a[i+5], a[i+6], a[i+7]);
  976. }
  977. }
  978. }
  979. }