mmu.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include "u.h"
  10. #include "../port/lib.h"
  11. #include "mem.h"
  12. #include "dat.h"
  13. #include "fns.h"
  14. #include "amd64.h"
  15. /*
  16. * To do:
  17. * mmukmapsync grot for >1 processor;
  18. * mmuptcopy (PteSHARED trick?);
  19. */
  20. #define PML4X(va) (PTLX(va, 3))
  21. #define PML3X(va) (PTLX(va, 2))
  22. #define PML2X(va) (PTLX(va, 1))
  23. #define PML1X(va) (PTLX(va, 0))
  24. #define PGSZHUGE (PGLSZ(2))
  25. #define PGSZLARGE (PGLSZ(1))
  26. #define PPNHUGE(x) ((x) & ~(PteNX | (PGSZHUGE - 1)))
  27. #define PPNLARGE(x) ((x) & ~(PteNX | (PGSZLARGE - 1)))
  28. #define PPN(x) ((x) & ~(PteNX | (PGSZ - 1)))
  29. void
  30. mmukflushtlb(void)
  31. {
  32. cr3put(machp()->MMU.pml4->pa);
  33. }
  34. // This actually moves to the kernel page table, clearing the
  35. // user portion.
  36. void
  37. mmuflushtlb(void)
  38. {
  39. machp()->tlbpurge++;
  40. cr3put(machp()->MMU.pml4->pa);
  41. }
  42. void
  43. mmuflush(void)
  44. {
  45. Proc *up = externup();
  46. Mpl pl;
  47. pl = splhi();
  48. up->newtlb = 1;
  49. //print("mmuflush: up = %#P\n", up);
  50. mmuswitch(up);
  51. splx(pl);
  52. }
  53. static void
  54. mmuptpunmap(Proc *proc)
  55. {
  56. Page *page, *next;
  57. memset(UINT2PTR(proc->MMU.root->va), 0, PTSZ / 2);
  58. for(next = nil, page = proc->MMU.root->next; page != nil; page = next){
  59. next = page->next;
  60. page->daddr = 0;
  61. memset(UINT2PTR(page->va), 0, PTSZ);
  62. page->next = proc->MMU.root->prev;
  63. proc->MMU.root->prev = page;
  64. }
  65. proc->MMU.root->next = nil;
  66. }
  67. static void
  68. tabs(int n)
  69. {
  70. int i;
  71. for(i = 0; i < n; i++)
  72. print(" ");
  73. }
  74. void
  75. dumpptepg(int lvl, usize pa)
  76. {
  77. PTE *pte;
  78. int tab, i;
  79. tab = 4 - lvl;
  80. pte = KADDR(pa);
  81. for(i = 0; i < PTSZ / sizeof(PTE); i++)
  82. if(pte[i] & PteP){
  83. tabs(tab);
  84. print("l%d %#p[%#05x]: %#llx\n", lvl, pa, i, pte[i]);
  85. /* skip kernel mappings */
  86. if((pte[i] & PteU) == 0){
  87. tabs(tab + 1);
  88. print("...kern...\n");
  89. continue;
  90. }
  91. if(lvl > 2)
  92. dumpptepg(lvl - 1, PPN(pte[i]));
  93. }
  94. }
  95. void
  96. dumpmmu(Proc *p)
  97. {
  98. int i;
  99. Page *pg;
  100. print("proc %#p pml4 %#P is pa %#llx\n", p, p->MMU.root->pa, p->MMU.root->pa);
  101. for(i = 3; i > 0; i--){
  102. print("page table pages at level %d:\n", i);
  103. for(pg = p->MMU.root->next; pg != nil; pg = pg->next){
  104. if(pg->daddr != i)
  105. continue;
  106. print("\tpg %#p = va %#llx pa %#llx"
  107. " daddr %#lx next %#p prev %#p\n",
  108. pg, pg->va, pg->pa, pg->daddr, pg->next, pg->prev);
  109. }
  110. }
  111. if(0)
  112. dumpptepg(4, machp()->MMU.pml4->pa);
  113. }
  114. void
  115. dumpmmuwalk(const PTE *pml4, usize addr)
  116. {
  117. int l;
  118. const PTE *pte;
  119. print("cpu%d: pml4 %#p\n", machp()->machno, pml4);
  120. if((l = mmuwalk(pml4, addr, 3, &pte)) >= 0)
  121. print("cpu%d: mmu l%d pte %#p = %llx\n", machp()->machno, l, pte, *pte);
  122. if((l = mmuwalk(pml4, addr, 2, &pte)) >= 0)
  123. print("cpu%d: mmu l%d pte %#p = %llx\n", machp()->machno, l, pte, *pte);
  124. if((l = mmuwalk(pml4, addr, 1, &pte)) >= 0)
  125. print("cpu%d: mmu l%d pte %#p = %llx\n", machp()->machno, l, pte, *pte);
  126. if((l = mmuwalk(pml4, addr, 0, &pte)) >= 0)
  127. print("cpu%d: mmu l%d pte %#p = %llx\n", machp()->machno, l, pte, *pte);
  128. }
  129. static void *
  130. allocapage(void)
  131. {
  132. void *p;
  133. if(1){
  134. // XXX: Something is blowing away page tables.
  135. // Reserve some space around them for whatever
  136. // is messing things up....
  137. const int npage = 3;
  138. char *pp = mallocalign(npage * PTSZ, PTSZ, 0, 0);
  139. assert(pp != nil);
  140. p = pp + (npage / 2) * PTSZ;
  141. }else{
  142. static alignas(4096) unsigned char alloc[16 * MiB];
  143. static usize offset = 0;
  144. if(offset >= sizeof(alloc))
  145. return nil;
  146. p = alloc + offset;
  147. offset += PTSZ;
  148. }
  149. return p;
  150. }
  151. static Page mmuptpfreelist;
  152. static Page *
  153. mmuptpalloc(void)
  154. {
  155. void *va;
  156. Page *page;
  157. /*
  158. * Do not really need a whole Page structure,
  159. * but it makes testing this out a lot easier.
  160. * Could keep a cache and free excess.
  161. * Have to maintain any fiction for pexit?
  162. */
  163. lock(&mmuptpfreelist.l);
  164. page = mmuptpfreelist.next;
  165. if(page != nil){
  166. mmuptpfreelist.next = page->next;
  167. mmuptpfreelist.ref--;
  168. }
  169. unlock(&mmuptpfreelist.l);
  170. if(page == nil){
  171. if((page = malloc(sizeof(Page))) == nil)
  172. panic("mmuptpalloc: Page alloc failed\n");
  173. //if((va = allocapage()) == nil)
  174. if((va = mallocalign(4096, 4096, 0, 0)) == nil)
  175. panic("mmuptpalloc: page table page alloc failed\n");
  176. page->va = PTR2UINT(va);
  177. page->pa = PADDR(va);
  178. page->ref = 0;
  179. }
  180. if(page->pa == 0)
  181. panic("mmuptpalloc: free page with pa == 0");
  182. if(page->ref++ != 0)
  183. panic("mmuptpalloc ref\n");
  184. page->prev = nil;
  185. page->next = nil;
  186. memset(UINT2PTR(page->va), 0, PTSZ);
  187. return page;
  188. }
  189. void
  190. mmuswitch(Proc *proc)
  191. {
  192. Mpl pl;
  193. //print("mmuswitch: proc = %#P\n", proc);
  194. pl = splhi();
  195. if(proc->newtlb){
  196. /*
  197. * NIX: We cannot clear our page tables if they are going to
  198. * be used in the AC
  199. */
  200. if(proc->ac == nil)
  201. mmuptpunmap(proc);
  202. proc->newtlb = 0;
  203. }
  204. tssrsp0(machp(), STACKALIGN(PTR2UINT(proc->kstack + KSTACK)));
  205. cr3put(proc->MMU.root->pa);
  206. splx(pl);
  207. }
  208. void
  209. mmurelease(Proc *proc)
  210. {
  211. Page *page, *next;
  212. int freed = 0;
  213. mmuptpunmap(proc);
  214. for(next = nil, page = proc->MMU.root->prev; page != nil; page = next){
  215. next = page->next;
  216. if(--page->ref)
  217. panic("mmurelease: page->ref %d\n", page->ref);
  218. lock(&mmuptpfreelist.l);
  219. page->prev = nil;
  220. page->next = mmuptpfreelist.next;
  221. mmuptpfreelist.next = page;
  222. mmuptpfreelist.ref++;
  223. unlock(&mmuptpfreelist.l);
  224. freed = 1;
  225. }
  226. lock(&mmuptpfreelist.l);
  227. if(--proc->MMU.root->ref)
  228. panic("mmurelease: proc->MMU.root->ref %d\n", proc->MMU.root->ref);
  229. proc->MMU.root->prev = nil;
  230. proc->MMU.root->next = mmuptpfreelist.next;
  231. mmuptpfreelist.next = proc->MMU.root;
  232. mmuptpfreelist.ref++;
  233. unlock(&mmuptpfreelist.l);
  234. if(freed && pga.rend.l.p)
  235. wakeup(&pga.rend);
  236. tssrsp0(machp(), STACKALIGN(machp()->stack + MACHSTKSZ));
  237. cr3put(machp()->MMU.pml4->pa);
  238. }
  239. static void
  240. checkpte(const PTE *pml4, u64 ppn, void *a)
  241. {
  242. int l;
  243. const PTE *pte;
  244. u64 addr;
  245. char buf[240], *s;
  246. addr = PTR2UINT(a);
  247. pte = 0;
  248. s = buf;
  249. *s = 0;
  250. if((l = mmuwalk(pml4, addr, 3, &pte)) < 0 || (*pte & PteP) == 0)
  251. goto Panic;
  252. s = seprint(buf, buf + sizeof buf,
  253. "check3: l%d pte %#p = %llx\n",
  254. l, pte, pte ? *pte : ~0);
  255. if((l = mmuwalk(pml4, addr, 2, &pte)) < 0 || (*pte & PteP) == 0)
  256. goto Panic;
  257. s = seprint(s, buf + sizeof buf,
  258. "check2: l%d pte %#p = %llx\n",
  259. l, pte, pte ? *pte : ~0);
  260. if(*pte & PtePS)
  261. return;
  262. if((l = mmuwalk(pml4, addr, 1, &pte)) < 0 || (*pte & PteP) == 0)
  263. goto Panic;
  264. seprint(s, buf + sizeof buf,
  265. "check1: l%d pte %#p = %llx\n",
  266. l, pte, pte ? *pte : ~0);
  267. return;
  268. Panic:
  269. seprint(s, buf + sizeof buf,
  270. "checkpte: l%d addr %#p ppn %#llx kaddr %#p pte %#p = %llx",
  271. l, a, ppn, KADDR(ppn), pte, pte ? *pte : ~0);
  272. panic("%s", buf);
  273. }
  274. static u64
  275. pteflags(u32 attr)
  276. {
  277. u64 flags;
  278. flags = 0;
  279. if(attr & ~(PTEVALID | PTEWRITE | PTERONLY | PTEUSER | PTEUNCACHED | PTENOEXEC))
  280. panic("pteflags: wrong attr bits: %#x\n", attr);
  281. if(attr & PTEVALID)
  282. flags |= PteP;
  283. if(attr & PTEWRITE)
  284. flags |= PteRW;
  285. if(attr & PTEUSER)
  286. flags |= PteU;
  287. if(attr & PTEUNCACHED)
  288. flags |= PtePCD;
  289. if(attr & PTENOEXEC)
  290. flags |= PteNX;
  291. return flags;
  292. }
  293. static PTE
  294. allocptpage(Proc *p, int level)
  295. {
  296. Page *page;
  297. if(p->MMU.root->prev == nil)
  298. p->MMU.root->prev = mmuptpalloc();
  299. assert(p->MMU.root->prev != nil);
  300. page = p->MMU.root->prev;
  301. p->MMU.root->prev = page->next;
  302. page->daddr = level;
  303. page->next = p->MMU.root->next;
  304. p->MMU.root->next = page;
  305. return PPN(page->pa) | PteU | PteRW | PteP;
  306. }
  307. /*
  308. * pg->pgszi indicates the page size in machp()->pgsz[] used for the mapping.
  309. * For the user, it can be either 2*MiB or 1*GiB pages.
  310. * For 2*MiB pages, we use three levels, not four.
  311. * For 1*GiB pages, we use two levels.
  312. */
  313. void
  314. mmuput(usize va, Page *pg, u32 attr)
  315. {
  316. Proc *up;
  317. int pgsz;
  318. PTE *pml4, *p4e, *p3e, *p2e, *p1e, *ptp;
  319. PTE entry;
  320. Mpl pl;
  321. DBG("cpu%d: up %#p mmuput %#p %#P %#x %d\n",
  322. machp()->machno, up, va, pg->pa, attr, pgsz);
  323. pl = splhi();
  324. up = externup();
  325. if(up == nil)
  326. panic("mmuput: no process");
  327. if(pg->pa == 0)
  328. panic("mmuput: zero pa");
  329. if(va >= KZERO)
  330. panic("mmuput: kernel page\n");
  331. if(pg->pgszi < 0)
  332. panic("mmuput: page size index out of bounds (%d)\n", pg->pgszi);
  333. pgsz = sys->pgsz[pg->pgszi];
  334. if(pg->pa & (pgsz - 1))
  335. panic("mmumappage: pa offset non zero: %#llx\n", pg->pa);
  336. if(0x0000800000000000 <= va && va < KZERO)
  337. panic("mmumappage: va %#P is non-canonical", va);
  338. entry = pg->pa | pteflags(attr) | PteU;
  339. pml4 = UINT2PTR(up->MMU.root->va);
  340. p4e = &pml4[PML4X(va)];
  341. if(p4e == nil)
  342. panic("mmuput: PML4 is nil");
  343. if(*p4e == 0)
  344. *p4e = allocptpage(up, 3);
  345. ptp = KADDR(PPN(*p4e));
  346. p3e = &ptp[PML3X(va)];
  347. if(p3e == nil)
  348. panic("mmuput: PML3 is nil");
  349. if(pgsz == 1 * GiB){
  350. *p3e = entry | PtePS;
  351. splx(pl);
  352. DBG("cpu%d: up %#p new 1GiB pte %#p = %#llx\n",
  353. machp()->machno, up, p3e, *p3e);
  354. return;
  355. }
  356. if(*p3e == 0)
  357. *p3e = allocptpage(up, 2);
  358. ptp = KADDR(PPN(*p3e));
  359. p2e = &ptp[PML2X(va)];
  360. if(p2e == nil)
  361. panic("mmuput: PML2 is nil");
  362. if(pgsz == 2 * MiB){
  363. *p2e = entry | PtePS;
  364. splx(pl);
  365. DBG("cpu%d: up %#p new 2MiB pte %#p = %#llx\n",
  366. machp()->machno, up, p2e, *p2e);
  367. return;
  368. }
  369. if(*p2e == 0)
  370. *p2e = allocptpage(up, 1);
  371. ptp = KADDR(PPN(*p2e));
  372. p1e = &ptp[PML1X(va)];
  373. if(p1e == nil)
  374. panic("mmuput: PML1 is nil");
  375. *p1e = entry;
  376. invlpg(va); /* only if old entry valid? */
  377. splx(pl);
  378. DBG("cpu%d: up %#p new pte %#p = %#llx\n",
  379. machp()->machno, up, p1e, *p1e);
  380. if(DBGFLG)
  381. checkpte(pml4, PPN(pg->pa), p1e);
  382. }
  383. static Lock vmaplock;
  384. int
  385. mmukmapsync(u64 va)
  386. {
  387. USED(va);
  388. return 0;
  389. }
  390. static PTE *
  391. mmukpmap4(PTE *pml4, usize va)
  392. {
  393. PTE p4e = pml4[PML4X(va)];
  394. if((p4e & PteP) == 0)
  395. panic("mmukphysmap: PML4E for va %#P is missing", va);
  396. return KADDR(PPN(p4e));
  397. }
  398. static PTE *
  399. mmukpmap3(PTE *pml3, u64 pa, usize va, PTE attr, usize size)
  400. {
  401. PTE p3e = pml3[PML3X(va)];
  402. // Suitable for a huge page?
  403. if(ALIGNED(pa, PGSZHUGE) &&
  404. ALIGNED(va, PGSZHUGE) &&
  405. size >= PGSZHUGE &&
  406. (p3e == 0 || (p3e & PtePS) == PtePS)){
  407. if((p3e & PteP) != 0 && PPNHUGE(p3e) != pa)
  408. panic("mmukphysmap: remapping kernel direct address at va %#P (old PML3E %#P, new %#P)",
  409. va, p3e, pa | attr | PtePS | PteP);
  410. pml3[PML3X(va)] = pa | attr | PtePS | PteP;
  411. return nil;
  412. }else if((p3e & (PtePS | PteP)) == (PtePS | PteP)){
  413. PTE *pml2 = allocapage();
  414. if(pml2 == nil)
  415. panic("mmukphysmap: cannot allocate PML2 to splinter");
  416. PTE entry = p3e & ~(PteD | PteA);
  417. for(int i = 0; i < PTSZ / sizeof(PTE); i++)
  418. pml2[i] = entry + i * PGSZLARGE;
  419. p3e = PADDR(pml2) | PteRW | PteP;
  420. pml3[PML3X(va)] = p3e;
  421. }else if((p3e & PteP) == 0){
  422. PTE *pml2 = allocapage();
  423. if(pml2 == nil)
  424. panic("mmukphysmap: cannot allocate PML2");
  425. p3e = PADDR(pml2) | PteRW | PteP;
  426. pml3[PML3X(va)] = p3e;
  427. }
  428. return KADDR(PPN(p3e));
  429. }
  430. static PTE *
  431. mmukpmap2(PTE *pml2, u64 pa, usize va, PTE attr, usize size)
  432. {
  433. PTE p2e = pml2[PML2X(va)];
  434. // Suitable for a large page?
  435. if(ALIGNED(pa, PGSZLARGE) &&
  436. ALIGNED(va, PGSZLARGE) &&
  437. size >= PGSZLARGE &&
  438. ((p2e & PteP) == 0 || (p2e & (PtePS | PteP)) == (PtePS | PteP))){
  439. if((p2e & PteP) != 0 && PPNLARGE(p2e) != pa)
  440. panic("mmukphysmap: remapping kernel direct address at va %#P (old PML2E %#P, new %#P)",
  441. va, p2e, pa | attr | PtePS | PteP);
  442. pml2[PML2X(va)] = pa | attr | PtePS | PteP;
  443. return nil;
  444. }else if((p2e & (PtePS | PteP)) == (PtePS | PteP)){
  445. PTE *pml1 = allocapage();
  446. if(pml1 == nil)
  447. panic("mmukphysmap: cannot allocate PML1 to splinter");
  448. PTE entry = p2e & ~(PtePS | PteD | PteA);
  449. for(int i = 0; i < PTSZ / sizeof(PTE); i++)
  450. pml1[i] = entry + i * PGSZ;
  451. p2e = PADDR(pml1) | PteRW | PteP;
  452. pml2[PML2X(va)] = p2e;
  453. }else if((p2e & PteP) == 0){
  454. PTE *pml1 = allocapage();
  455. if(pml1 == nil)
  456. panic("mmukphysmap: cannot allocate PML1");
  457. p2e = PPN(PADDR(pml1)) | PteRW | PteP;
  458. pml2[PML2X(va)] = p2e;
  459. }
  460. return KADDR(PPN(p2e));
  461. }
  462. static void
  463. mmukpmap1(PTE *pml1, u64 pa, usize va, PTE attr)
  464. {
  465. PTE p1e = pml1[PML1X(va)];
  466. if((p1e & PteP) == PteP && PPN(p1e) != pa)
  467. panic("mmukphysmap: remapping kernel direct address at va %#P (pml1 %#P old %#P new %#P)",
  468. va, pml1, p1e, pa | attr | PteP);
  469. pml1[PML1X(va)] = pa | attr | PteP;
  470. }
  471. /*
  472. * Add kernel mappings for pa -> va for a section of size bytes.
  473. * Called only after the va range is known to be unoccupied.
  474. */
  475. void
  476. mmukphysmap(PTE *pml4, u64 pa, PTE attr, usize size)
  477. {
  478. usize pgsz = 0;
  479. Mpl pl;
  480. if(pa >= PHYSADDRSIZE || (pa + size) >= PHYSADDRSIZE)
  481. panic("mapping nonexistent physical address");
  482. pl = splhi();
  483. for(u64 pae = pa + size; pa < pae; size -= pgsz, pa += pgsz){
  484. usize va = (usize)KADDR(pa);
  485. invlpg(va);
  486. PTE *pml3 = mmukpmap4(pml4, va);
  487. assert(pml3 != nil);
  488. PTE *pml2 = mmukpmap3(pml3, pa, va, attr, size);
  489. if(pml2 == nil){
  490. pgsz = PGSZHUGE;
  491. continue;
  492. }
  493. PTE *pml1 = mmukpmap2(pml2, pa, va, attr, size);
  494. if(pml1 == nil){
  495. pgsz = PGSZLARGE;
  496. continue;
  497. }
  498. mmukpmap1(pml1, pa, va, attr);
  499. pgsz = PGSZ;
  500. }
  501. splx(pl);
  502. }
  503. /*
  504. * KZERO maps low memory.
  505. * Thus, almost everything in physical memory is already mapped, but
  506. * there are things that fall in the gap, mostly MMIO regions, where
  507. * in particular we would like to disable caching.
  508. * vmap() is required to access them.
  509. */
  510. void *
  511. vmap(usize pa, usize size)
  512. {
  513. usize va;
  514. usize o, sz;
  515. DBG("vmap(%#p, %lu) pc=%#p\n", pa, size, getcallerpc());
  516. if(machp()->machno != 0 && DBGFLG)
  517. print("vmap: machp()->machno != 0\n");
  518. /*
  519. * This is incomplete; the checks are not comprehensive
  520. * enough.
  521. * Sometimes the request is for an already-mapped piece
  522. * of low memory, in which case just return a good value
  523. * and hope that a corresponding vunmap of the address
  524. * will have the same address.
  525. * To do this properly will require keeping track of the
  526. * mappings; perhaps something like kmap, but kmap probably
  527. * can't be used early enough for some of the uses.
  528. */
  529. if(pa + size < 1ull * MiB)
  530. return KADDR(pa);
  531. if(pa < 1ull * MiB)
  532. return nil;
  533. /*
  534. * Might be asking for less than a page.
  535. * This should have a smaller granularity if
  536. * the page size is large.
  537. */
  538. o = pa % PGSZ;
  539. pa -= o;
  540. sz = ROUNDUP(size + o, PGSZ);
  541. if(pa == 0){
  542. print("vmap(0, %lu) pc=%#p\n", size, getcallerpc());
  543. return nil;
  544. }
  545. ilock(&vmaplock);
  546. va = (usize)KADDR(pa);
  547. mmukphysmap(sys->pml4, pa, PteNX | PtePCD | PteRW, sz);
  548. iunlock(&vmaplock);
  549. DBG("vmap(%#p, %lu) => %#p\n", pa + o, size, va + o);
  550. return UINT2PTR(va + o);
  551. }
  552. void
  553. vunmap(void *v, usize size)
  554. {
  555. usize va;
  556. DBG("vunmap(%#p, %lu)\n", v, size);
  557. if(machp()->machno != 0)
  558. DBG("vmap: machp()->machno != 0\n");
  559. /*
  560. * See the comments above in vmap.
  561. */
  562. va = PTR2UINT(v);
  563. if(va >= KZERO && va + size < KZERO + 1ull * MiB)
  564. return;
  565. /*
  566. * Here will have to deal with releasing any
  567. * resources used for the allocation (e.g. page table
  568. * pages).
  569. */
  570. DBG("vunmap(%#p, %lu)\n", v, size);
  571. }
  572. int
  573. mmuwalk(const PTE *pml4, usize va, int level, const PTE **ret)
  574. {
  575. Mpl pl;
  576. if(DBGFLG > 1)
  577. DBG("mmuwalk%d: va %#p level %d\n", machp()->machno, va, level);
  578. if(pml4 == nil)
  579. panic("mmuwalk with nil pml4");
  580. pl = splhi();
  581. const PTE *p4e = &pml4[PML4X(va)];
  582. *ret = p4e;
  583. if((*p4e & PteP) == 0){
  584. splx(pl);
  585. return -1;
  586. }
  587. if(level == 3){
  588. splx(pl);
  589. return 3;
  590. }
  591. const PTE *pml3 = KADDR(PPN(*p4e));
  592. const PTE *p3e = &pml3[PML3X(va)];
  593. *ret = p3e;
  594. if((*p3e & PteP) == 0){
  595. splx(pl);
  596. return -1;
  597. }
  598. if(level == 2 || (*p3e & PtePS) == PtePS){
  599. splx(pl);
  600. return 2;
  601. }
  602. const PTE *pml2 = KADDR(PPN(*p3e));
  603. const PTE *p2e = &pml2[PML2X(va)];
  604. *ret = p2e;
  605. if((*p2e & PteP) == 0){
  606. splx(pl);
  607. return -1;
  608. }
  609. if(level == 1 || (*p2e & PtePS) == PtePS){
  610. splx(pl);
  611. return 1;
  612. }
  613. const PTE *pml1 = KADDR(PPN(*p2e));
  614. const PTE *p1e = &pml1[PML1X(va)];
  615. *ret = p1e;
  616. if(level == 0 && (*p1e & PteP) == PteP){
  617. splx(pl);
  618. return 0;
  619. }
  620. splx(pl);
  621. return -1;
  622. }
  623. usize
  624. mmuphysaddr(const PTE *pml4, usize va)
  625. {
  626. int l;
  627. const PTE *pte;
  628. u64 mask, pa;
  629. /*
  630. * Given a VA, find the PA.
  631. * This is probably not the right interface,
  632. * but will do as an experiment. Usual
  633. * question, should va be void* or usize?
  634. * cross: Since it is unknown whether a mapping
  635. * for the virtual address exists in the given
  636. * address space, usize is more appropriate.
  637. */
  638. l = mmuwalk(pml4, va, 0, &pte);
  639. DBG("physaddr: va %#p l %d\n", va, l);
  640. if(l < 0)
  641. return ~0;
  642. mask = PGLSZ(l) - 1;
  643. pa = (*pte & ~(PteNX | mask)) + (va & mask);
  644. DBG("physaddr: l %d pte %#P va %#p pa %#llx\n", l, *pte, va, pa);
  645. return pa;
  646. }
  647. Page mach0pml4;
  648. void
  649. mmuinit(void)
  650. {
  651. u8 *p;
  652. Page *page;
  653. u64 r;
  654. archmmu();
  655. DBG("mach%d: %#p pml4 %#p npgsz %d\n", machp()->machno, machp(), machp()->MMU.pml4, sys->npgsz);
  656. if(machp()->machno != 0){
  657. /* NIX: KLUDGE: Has to go when each mach is using
  658. * its own page table
  659. */
  660. p = UINT2PTR(machp()->stack);
  661. p += MACHSTKSZ;
  662. memmove(p + PTSZ / 2, &sys->pml4[PTSZ / (2 * sizeof(PTE))], PTSZ / 4 + PTSZ / 8);
  663. machp()->MMU.pml4 = &machp()->MMU.pml4kludge;
  664. machp()->MMU.pml4->va = PTR2UINT(p);
  665. machp()->MMU.pml4->pa = PADDR(p);
  666. machp()->MMU.pml4->daddr = 0;
  667. r = rdmsr(Efer);
  668. r |= Nxe;
  669. wrmsr(Efer, r);
  670. cr3put(machp()->MMU.pml4->pa);
  671. DBG("m %#p pml4 %#p\n", machp(), machp()->MMU.pml4);
  672. return;
  673. }
  674. page = &mach0pml4;
  675. page->va = PTR2UINT(sys->pml4);
  676. page->pa = PADDR(sys->pml4);
  677. machp()->MMU.pml4 = page;
  678. cr3put(cr3get());
  679. r = rdmsr(Efer);
  680. r |= Nxe;
  681. wrmsr(Efer, r);
  682. print("mmuinit: KZERO %#p end %#p\n", KZERO, end);
  683. }
  684. void
  685. mmuprocinit(Proc *p)
  686. {
  687. Page *pg = mmuptpalloc();
  688. memmove(UINT2PTR(pg->va + PTSZ / 2), UINT2PTR(machp()->MMU.pml4->va + PTSZ / 2), PTSZ / 4 + PTSZ / 8);
  689. p->MMU.root = pg;
  690. }