mmu.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include "u.h"
  10. #include "../port/lib.h"
  11. #include "mem.h"
  12. #include "dat.h"
  13. #include "fns.h"
  14. #include "amd64.h"
  15. /*
  16. * To do:
  17. * PteNX;
  18. * mmukmapsync grot for >1 processor;
  19. * replace vmap with newer version (no PDMAP);
  20. * mmuptcopy (PteSHARED trick?);
  21. * calculate and map up to TMFM (conf crap);
  22. */
  23. #define TMFM (64*MiB) /* kernel memory */
  24. #define PPN(x) ((x)&~(PGSZ-1))
  25. void
  26. mmuflushtlb(uint64_t u)
  27. {
  28. machp()->tlbpurge++;
  29. if(machp()->MMU.pml4->daddr){
  30. memset(UINT2PTR(machp()->MMU.pml4->va), 0, machp()->MMU.pml4->daddr*sizeof(PTE));
  31. machp()->MMU.pml4->daddr = 0;
  32. }
  33. cr3put(machp()->MMU.pml4->pa);
  34. }
  35. void
  36. mmuflush(void)
  37. {
  38. Proc *up = externup();
  39. Mpl pl;
  40. pl = splhi();
  41. up->newtlb = 1;
  42. mmuswitch(up);
  43. splx(pl);
  44. }
  45. static void
  46. mmuptpfree(Proc* proc, int clear)
  47. {
  48. int l;
  49. PTE *pte;
  50. Page **last, *page;
  51. for(l = 1; l < 4; l++){
  52. last = &proc->MMU.mmuptp[l];
  53. if(*last == nil)
  54. continue;
  55. for(page = *last; page != nil; page = page->next){
  56. //what is right here? 2 or 1?
  57. if(l <= 2 && clear)
  58. memset(UINT2PTR(page->va), 0, PTSZ);
  59. pte = UINT2PTR(page->prev->va);
  60. pte[page->daddr] = 0;
  61. last = &page->next;
  62. }
  63. *last = proc->MMU.mmuptp[0];
  64. proc->MMU.mmuptp[0] = proc->MMU.mmuptp[l];
  65. proc->MMU.mmuptp[l] = nil;
  66. }
  67. machp()->MMU.pml4->daddr = 0;
  68. }
  69. static void
  70. tabs(int n)
  71. {
  72. int i;
  73. for(i = 0; i < n; i++)
  74. print(" ");
  75. }
  76. void
  77. dumpptepg(int lvl, uintptr_t pa)
  78. {
  79. PTE *pte;
  80. int tab, i;
  81. tab = 4 - lvl;
  82. pte = UINT2PTR(KADDR(pa));
  83. for(i = 0; i < PTSZ/sizeof(PTE); i++)
  84. if(pte[i] & PteP){
  85. tabs(tab);
  86. print("l%d %#p[%#05x]: %#ullx\n", lvl, pa, i, pte[i]);
  87. /* skip kernel mappings */
  88. if((pte[i]&PteU) == 0){
  89. tabs(tab+1);
  90. print("...kern...\n");
  91. continue;
  92. }
  93. if(lvl > 2)
  94. dumpptepg(lvl-1, PPN(pte[i]));
  95. }
  96. }
  97. void
  98. dumpmmu(Proc *p)
  99. {
  100. int i;
  101. Page *pg;
  102. print("proc %#p\n", p);
  103. for(i = 3; i > 0; i--){
  104. print("mmuptp[%d]:\n", i);
  105. for(pg = p->MMU.mmuptp[i]; pg != nil; pg = pg->next)
  106. print("\tpg %#p = va %#ullx pa %#ullx"
  107. " daddr %#ulx next %#p prev %#p\n",
  108. pg, pg->va, pg->pa, pg->daddr, pg->next, pg->prev);
  109. }
  110. print("pml4 %#ullx\n", machp()->MMU.pml4->pa);
  111. if(0)dumpptepg(4, machp()->MMU.pml4->pa);
  112. }
  113. void
  114. dumpmmuwalk(uint64_t addr)
  115. {
  116. int l;
  117. PTE *pte, *pml4;
  118. pml4 = UINT2PTR(machp()->MMU.pml4->va);
  119. if((l = mmuwalk(pml4, addr, 3, &pte, nil)) >= 0)
  120. print("cpu%d: mmu l%d pte %#p = %llux\n", machp()->machno, l, pte, *pte);
  121. if((l = mmuwalk(pml4, addr, 2, &pte, nil)) >= 0)
  122. print("cpu%d: mmu l%d pte %#p = %llux\n", machp()->machno, l, pte, *pte);
  123. if((l = mmuwalk(pml4, addr, 1, &pte, nil)) >= 0)
  124. print("cpu%d: mmu l%d pte %#p = %llux\n", machp()->machno, l, pte, *pte);
  125. if((l = mmuwalk(pml4, addr, 0, &pte, nil)) >= 0)
  126. print("cpu%d: mmu l%d pte %#p = %llux\n", machp()->machno, l, pte, *pte);
  127. }
  128. static Page mmuptpfreelist;
  129. static Page*
  130. mmuptpalloc(void)
  131. {
  132. void* va;
  133. Page *page;
  134. /*
  135. * Do not really need a whole Page structure,
  136. * but it makes testing this out a lot easier.
  137. * Could keep a cache and free excess.
  138. * Have to maintain any fiction for pexit?
  139. */
  140. lock(&mmuptpfreelist.l);
  141. if((page = mmuptpfreelist.next) != nil){
  142. mmuptpfreelist.next = page->next;
  143. mmuptpfreelist.ref--;
  144. unlock(&mmuptpfreelist.l);
  145. if(page->ref++ != 0)
  146. panic("mmuptpalloc ref\n");
  147. page->prev = page->next = nil;
  148. memset(UINT2PTR(page->va), 0, PTSZ);
  149. if(page->pa == 0)
  150. panic("mmuptpalloc: free page with pa == 0");
  151. return page;
  152. }
  153. unlock(&mmuptpfreelist.l);
  154. if((page = malloc(sizeof(Page))) == nil){
  155. print("mmuptpalloc Page\n");
  156. return nil;
  157. }
  158. if((va = mallocalign(PTSZ, PTSZ, 0, 0)) == nil){
  159. print("mmuptpalloc va\n");
  160. free(page);
  161. return nil;
  162. }
  163. page->va = PTR2UINT(va);
  164. page->pa = PADDR(va);
  165. page->ref = 1;
  166. if(page->pa == 0)
  167. panic("mmuptpalloc: no pa");
  168. return page;
  169. }
  170. void
  171. mmuswitch(Proc* proc)
  172. {
  173. PTE *pte;
  174. Page *page;
  175. Mpl pl;
  176. pl = splhi();
  177. if(proc->newtlb){
  178. /*
  179. * NIX: We cannot clear our page tables if they are going to
  180. * be used in the AC
  181. */
  182. if(proc->ac == nil)
  183. mmuptpfree(proc, 1);
  184. proc->newtlb = 0;
  185. }
  186. if(machp()->MMU.pml4->daddr){
  187. memset(UINT2PTR(machp()->MMU.pml4->va), 0, machp()->MMU.pml4->daddr*sizeof(PTE));
  188. machp()->MMU.pml4->daddr = 0;
  189. }
  190. pte = UINT2PTR(machp()->MMU.pml4->va);
  191. for(page = proc->MMU.mmuptp[3]; page != nil; page = page->next){
  192. pte[page->daddr] = PPN(page->pa)|PteU|PteRW|PteP;
  193. if(page->daddr >= machp()->MMU.pml4->daddr)
  194. machp()->MMU.pml4->daddr = page->daddr+1;
  195. page->prev = machp()->MMU.pml4;
  196. }
  197. tssrsp0(machp(), STACKALIGN(PTR2UINT(proc->kstack+KSTACK)));
  198. cr3put(machp()->MMU.pml4->pa);
  199. splx(pl);
  200. }
  201. void
  202. mmurelease(Proc* proc)
  203. {
  204. Page *page, *next;
  205. mmuptpfree(proc, 0);
  206. for(page = proc->MMU.mmuptp[0]; page != nil; page = next){
  207. next = page->next;
  208. if(--page->ref)
  209. panic("mmurelease: page->ref %d\n", page->ref);
  210. lock(&mmuptpfreelist.l);
  211. page->next = mmuptpfreelist.next;
  212. mmuptpfreelist.next = page;
  213. mmuptpfreelist.ref++;
  214. page->prev = nil;
  215. unlock(&mmuptpfreelist.l);
  216. }
  217. if(proc->MMU.mmuptp[0] && pga.rend.l.p)
  218. wakeup(&pga.rend);
  219. proc->MMU.mmuptp[0] = nil;
  220. tssrsp0(machp(), STACKALIGN(machp()->stack+MACHSTKSZ));
  221. cr3put(machp()->MMU.pml4->pa);
  222. }
  223. static void
  224. checkpte(uintmem ppn, void *a)
  225. {
  226. int l;
  227. PTE *pte, *pml4;
  228. uint64_t addr;
  229. char buf[240], *s;
  230. addr = PTR2UINT(a);
  231. pml4 = UINT2PTR(machp()->MMU.pml4->va);
  232. pte = 0;
  233. s = buf;
  234. *s = 0;
  235. if((l = mmuwalk(pml4, addr, 3, &pte, nil)) < 0 || (*pte&PteP) == 0)
  236. goto Panic;
  237. s = seprint(buf, buf+sizeof buf,
  238. "check3: l%d pte %#p = %llux\n",
  239. l, pte, pte?*pte:~0);
  240. if((l = mmuwalk(pml4, addr, 2, &pte, nil)) < 0 || (*pte&PteP) == 0)
  241. goto Panic;
  242. s = seprint(s, buf+sizeof buf,
  243. "check2: l%d pte %#p = %llux\n",
  244. l, pte, pte?*pte:~0);
  245. if(*pte&PtePS)
  246. return;
  247. if((l = mmuwalk(pml4, addr, 1, &pte, nil)) < 0 || (*pte&PteP) == 0)
  248. goto Panic;
  249. seprint(s, buf+sizeof buf,
  250. "check1: l%d pte %#p = %llux\n",
  251. l, pte, pte?*pte:~0);
  252. return;
  253. Panic:
  254. seprint(s, buf+sizeof buf,
  255. "checkpte: l%d addr %#p ppn %#ullx kaddr %#p pte %#p = %llux",
  256. l, a, ppn, KADDR(ppn), pte, pte?*pte:~0);
  257. print("%s\n", buf);
  258. seprint(buf, buf+sizeof buf, "start %#ullx unused %#ullx"
  259. " unmap %#ullx end %#ullx\n",
  260. sys->vmstart, sys->vmunused, sys->vmunmapped, sys->vmend);
  261. panic("%s", buf);
  262. }
  263. static void
  264. mmuptpcheck(Proc *proc)
  265. {
  266. int lvl, npgs, i;
  267. Page *lp, *p, *pgs[16], *fp;
  268. uint idx[16];
  269. if(proc == nil)
  270. return;
  271. lp = machp()->MMU.pml4;
  272. for(lvl = 3; lvl >= 2; lvl--){
  273. npgs = 0;
  274. for(p = proc->MMU.mmuptp[lvl]; p != nil; p = p->next){
  275. for(fp = proc->MMU.mmuptp[0]; fp != nil; fp = fp->next)
  276. if(fp == p){
  277. dumpmmu(proc);
  278. panic("ptpcheck: using free page");
  279. }
  280. for(i = 0; i < npgs; i++){
  281. if(pgs[i] == p){
  282. dumpmmu(proc);
  283. panic("ptpcheck: dup page");
  284. }
  285. if(idx[i] == p->daddr){
  286. dumpmmu(proc);
  287. panic("ptcheck: dup daddr");
  288. }
  289. }
  290. if(npgs >= nelem(pgs))
  291. panic("ptpcheck: pgs is too small");
  292. idx[npgs] = p->daddr;
  293. pgs[npgs++] = p;
  294. if(lvl == 3 && p->prev != lp){
  295. dumpmmu(proc);
  296. panic("ptpcheck: wrong prev");
  297. }
  298. }
  299. }
  300. npgs = 0;
  301. for(fp = proc->MMU.mmuptp[0]; fp != nil; fp = fp->next){
  302. for(i = 0; i < npgs; i++)
  303. if(pgs[i] == fp)
  304. panic("ptpcheck: dup free page");
  305. pgs[npgs++] = fp;
  306. }
  307. }
  308. static uintmem
  309. pteflags(uint attr)
  310. {
  311. uintmem flags;
  312. flags = 0;
  313. if(attr & ~(PTEVALID|PTEWRITE|PTERONLY|PTEUSER|PTEUNCACHED|PTENOEXEC))
  314. panic("mmuput: wrong attr bits: %#ux\n", attr);
  315. if(attr&PTEVALID)
  316. flags |= PteP;
  317. if(attr&PTEWRITE)
  318. flags |= PteRW;
  319. if(attr&PTEUSER)
  320. flags |= PteU;
  321. if(attr&PTEUNCACHED)
  322. flags |= PtePCD;
  323. if(attr&PTENOEXEC)
  324. flags |= PteNX;
  325. return flags;
  326. }
  327. /*
  328. * pg->pgszi indicates the page size in machp()->pgsz[] used for the mapping.
  329. * For the user, it can be either 2*MiB or 1*GiB pages.
  330. * For 2*MiB pages, we use three levels, not four.
  331. * For 1*GiB pages, we use two levels.
  332. */
  333. void
  334. mmuput(uintptr_t va, Page *pg, uint attr)
  335. {
  336. Proc *up = externup();
  337. int lvl, user, x, pgsz;
  338. PTE *pte;
  339. Page *page, *prev;
  340. Mpl pl;
  341. uintmem pa, ppn;
  342. char buf[80];
  343. ppn = 0;
  344. pa = pg->pa;
  345. if(pa == 0)
  346. panic("mmuput: zero pa");
  347. if(DBGFLG){
  348. snprint(buf, sizeof buf, "cpu%d: up %#p mmuput %#p %#P %#ux\n",
  349. machp()->machno, up, va, pa, attr);
  350. print("%s", buf);
  351. }
  352. assert(pg->pgszi >= 0);
  353. pgsz = sys->pgsz[pg->pgszi];
  354. if(pa & (pgsz-1))
  355. panic("mmuput: pa offset non zero: %#ullx\n", pa);
  356. pa |= pteflags(attr);
  357. pl = splhi();
  358. if(DBGFLG)
  359. mmuptpcheck(up);
  360. user = (va < KZERO);
  361. x = PTLX(va, 3);
  362. pte = UINT2PTR(machp()->MMU.pml4->va);
  363. pte += x;
  364. prev = machp()->MMU.pml4;
  365. for(lvl = 3; lvl >= 0; lvl--){
  366. if(user){
  367. if(pgsz == 2*MiB && lvl == 1) /* use 2M */
  368. break;
  369. if(pgsz == 1ull*GiB && lvl == 2) /* use 1G */
  370. break;
  371. }
  372. for(page = up->MMU.mmuptp[lvl]; page != nil; page = page->next)
  373. if(page->prev == prev && page->daddr == x){
  374. if(*pte == 0){
  375. print("mmu: jmk and nemo had fun\n");
  376. *pte = PPN(page->pa)|PteU|PteRW|PteP;
  377. }
  378. break;
  379. }
  380. if(page == nil){
  381. if(up->MMU.mmuptp[0] == nil)
  382. page = mmuptpalloc();
  383. else {
  384. page = up->MMU.mmuptp[0];
  385. up->MMU.mmuptp[0] = page->next;
  386. }
  387. page->daddr = x;
  388. page->next = up->MMU.mmuptp[lvl];
  389. up->MMU.mmuptp[lvl] = page;
  390. page->prev = prev;
  391. *pte = PPN(page->pa)|PteU|PteRW|PteP;
  392. if(lvl == 3 && x >= machp()->MMU.pml4->daddr)
  393. machp()->MMU.pml4->daddr = x+1;
  394. }
  395. x = PTLX(va, lvl-1);
  396. ppn = PPN(*pte);
  397. if(ppn == 0)
  398. panic("mmuput: ppn=0 l%d pte %#p = %#P\n", lvl, pte, *pte);
  399. pte = UINT2PTR(KADDR(ppn));
  400. pte += x;
  401. prev = page;
  402. }
  403. if(DBGFLG)
  404. checkpte(ppn, pte);
  405. *pte = pa|PteU;
  406. if(user)
  407. switch(pgsz){
  408. case 2*MiB:
  409. case 1*GiB:
  410. *pte |= PtePS;
  411. break;
  412. default:
  413. panic("mmuput: user pages must be 2M or 1G");
  414. }
  415. splx(pl);
  416. if(DBGFLG){
  417. snprint(buf, sizeof buf, "cpu%d: up %#p new pte %#p = %#llux\n",
  418. machp()->machno, up, pte, pte?*pte:~0);
  419. print("%s", buf);
  420. }
  421. invlpg(va); /* only if old entry valid? */
  422. }
  423. #if 0
  424. static Lock mmukmaplock;
  425. #endif
  426. static Lock vmaplock;
  427. #define PML4X(v) PTLX((v), 3)
  428. #define PDPX(v) PTLX((v), 2)
  429. #define PDX(v) PTLX((v), 1)
  430. #define PTX(v) PTLX((v), 0)
  431. int
  432. mmukmapsync(uint64_t va)
  433. {
  434. USED(va);
  435. return 0;
  436. }
  437. static PTE
  438. pdeget(uintptr_t va)
  439. {
  440. PTE *pdp;
  441. if(va < 0xffffffffc0000000ull)
  442. panic("pdeget(%#p)", va);
  443. pdp = (PTE*)(PDMAP+PDX(PDMAP)*4096);
  444. return pdp[PDX(va)];
  445. }
  446. /*
  447. * Add kernel mappings for pa -> va for a section of size bytes.
  448. * Called only after the va range is known to be unoccupied.
  449. */
  450. static int
  451. pdmap(uintptr_t pa, int attr, uintptr_t va, usize size)
  452. {
  453. uintptr_t pae;
  454. PTE *pd, *pde, *pt, *pte;
  455. int pdx, pgsz;
  456. Page *pg;
  457. pd = (PTE*)(PDMAP+PDX(PDMAP)*4096);
  458. for(pae = pa + size; pa < pae; pa += pgsz){
  459. pdx = PDX(va);
  460. pde = &pd[pdx];
  461. /*
  462. * Check if it can be mapped using a big page,
  463. * i.e. is big enough and starts on a suitable boundary.
  464. * Assume processor can do it.
  465. */
  466. if(ALIGNED(pa, PGLSZ(1)) && ALIGNED(va, PGLSZ(1)) && (pae-pa) >= PGLSZ(1)){
  467. assert(*pde == 0);
  468. *pde = pa|attr|PtePS|PteP;
  469. pgsz = PGLSZ(1);
  470. }
  471. else{
  472. if(*pde == 0){
  473. pg = mmuptpalloc();
  474. assert(pg != nil && pg->pa != 0);
  475. *pde = pg->pa|PteRW|PteP;
  476. memset((PTE*)(PDMAP+pdx*4096), 0, 4096);
  477. }
  478. assert(*pde != 0);
  479. pt = (PTE*)(PDMAP+pdx*4096);
  480. pte = &pt[PTX(va)];
  481. assert(!(*pte & PteP));
  482. *pte = pa|attr|PteP;
  483. pgsz = PGLSZ(0);
  484. }
  485. va += pgsz;
  486. }
  487. return 0;
  488. }
  489. static int
  490. findhole(PTE* a, int n, int count)
  491. {
  492. int have, i;
  493. have = 0;
  494. for(i = 0; i < n; i++){
  495. if(a[i] == 0)
  496. have++;
  497. else
  498. have = 0;
  499. if(have >= count)
  500. return i+1 - have;
  501. }
  502. return -1;
  503. }
  504. /*
  505. * Look for free space in the vmap.
  506. */
  507. static uintptr_t
  508. vmapalloc(usize size)
  509. {
  510. int i, n, o;
  511. PTE *pd, *pt;
  512. int pdsz, ptsz;
  513. pd = (PTE*)(PDMAP+PDX(PDMAP)*4096);
  514. pd += PDX(VMAP);
  515. pdsz = VMAPSZ/PGLSZ(1);
  516. /*
  517. * Look directly in the PD entries if the size is
  518. * larger than the range mapped by a single entry.
  519. */
  520. if(size >= PGLSZ(1)){
  521. n = HOWMANY(size, PGLSZ(1));
  522. if((o = findhole(pd, pdsz, n)) != -1)
  523. return VMAP + o*PGLSZ(1);
  524. return 0;
  525. }
  526. /*
  527. * Size is smaller than that mapped by a single PD entry.
  528. * Look for an already mapped PT page that has room.
  529. */
  530. n = HOWMANY(size, PGLSZ(0));
  531. ptsz = PGLSZ(0)/sizeof(PTE);
  532. for(i = 0; i < pdsz; i++){
  533. if(!(pd[i] & PteP) || (pd[i] & PtePS))
  534. continue;
  535. pt = (PTE*)(PDMAP+(PDX(VMAP)+i)*4096);
  536. if((o = findhole(pt, ptsz, n)) != -1)
  537. return VMAP + i*PGLSZ(1) + o*PGLSZ(0);
  538. }
  539. /*
  540. * Nothing suitable, start using a new PD entry.
  541. */
  542. if((o = findhole(pd, pdsz, 1)) != -1)
  543. return VMAP + o*PGLSZ(1);
  544. return 0;
  545. }
  546. /*
  547. * KSEG0 maps low memory.
  548. * KSEG2 maps almost all memory, but starting at an address determined
  549. * by the address space map (see asm.c).
  550. * Thus, almost everything in physical memory is already mapped, but
  551. * there are things that fall in the gap
  552. * (acpi tables, device memory-mapped registers, etc.)
  553. * for those things, we also want to disable caching.
  554. * vmap() is required to access them.
  555. */
  556. void*
  557. vmap(uintptr_t pa, usize size)
  558. {
  559. uintptr_t va;
  560. usize o, sz;
  561. DBG("vmap(%#p, %lud) pc=%#p\n", pa, size, getcallerpc(&pa));
  562. if(machp()->machno != 0)
  563. panic("vmap");
  564. /*
  565. * This is incomplete; the checks are not comprehensive
  566. * enough.
  567. * Sometimes the request is for an already-mapped piece
  568. * of low memory, in which case just return a good value
  569. * and hope that a corresponding vunmap of the address
  570. * will have the same address.
  571. * To do this properly will require keeping track of the
  572. * mappings; perhaps something like kmap, but kmap probably
  573. * can't be used early enough for some of the uses.
  574. */
  575. if(pa+size < 1ull*MiB)
  576. return KADDR(pa);
  577. if(pa < 1ull*MiB)
  578. return nil;
  579. /*
  580. * Might be asking for less than a page.
  581. * This should have a smaller granularity if
  582. * the page size is large.
  583. */
  584. o = pa & ((1<<PGSHFT)-1);
  585. pa -= o;
  586. sz = ROUNDUP(size+o, PGSZ);
  587. if(pa == 0){
  588. print("vmap(0, %lud) pc=%#p\n", size, getcallerpc(&pa));
  589. return nil;
  590. }
  591. ilock(&vmaplock);
  592. if((va = vmapalloc(sz)) == 0 || pdmap(pa, PtePCD|PteRW, va, sz) < 0){
  593. iunlock(&vmaplock);
  594. return nil;
  595. }
  596. iunlock(&vmaplock);
  597. DBG("vmap(%#p, %lud) => %#p\n", pa+o, size, va+o);
  598. return UINT2PTR(va + o);
  599. }
  600. void
  601. vunmap(void* v, usize size)
  602. {
  603. uintptr_t va;
  604. DBG("vunmap(%#p, %lud)\n", v, size);
  605. if(machp()->machno != 0)
  606. panic("vunmap");
  607. /*
  608. * See the comments above in vmap.
  609. */
  610. va = PTR2UINT(v);
  611. if(va >= KZERO && va+size < KZERO+1ull*MiB)
  612. return;
  613. /*
  614. * Here will have to deal with releasing any
  615. * resources used for the allocation (e.g. page table
  616. * pages).
  617. */
  618. DBG("vunmap(%#p, %lud)\n", v, size);
  619. }
  620. int
  621. mmuwalk(PTE* pml4, uintptr_t va, int level, PTE** ret,
  622. uint64_t (*alloc)(usize))
  623. {
  624. int l;
  625. uintmem pa;
  626. PTE *pte;
  627. Mpl pl;
  628. pl = splhi();
  629. if(DBGFLG > 1)
  630. DBG("mmuwalk%d: va %#p level %d\n", machp()->machno, va, level);
  631. pte = &pml4[PTLX(va, 3)];
  632. for(l = 3; l >= 0; l--){
  633. if(l == level)
  634. break;
  635. if(!(*pte & PteP)){
  636. if(alloc == nil)
  637. break;
  638. pa = alloc(PTSZ);
  639. if(pa == ~0)
  640. return -1;
  641. memset(UINT2PTR(KADDR(pa)), 0, PTSZ);
  642. *pte = pa|PteRW|PteP;
  643. }
  644. else if(*pte & PtePS)
  645. break;
  646. pte = UINT2PTR(KADDR(PPN(*pte)));
  647. pte += PTLX(va, l-1);
  648. }
  649. *ret = pte;
  650. splx(pl);
  651. return l;
  652. }
  653. uintmem
  654. mmuphysaddr(uintptr_t va)
  655. {
  656. int l;
  657. PTE *pte;
  658. uintmem mask, pa;
  659. /*
  660. * Given a VA, find the PA.
  661. * This is probably not the right interface,
  662. * but will do as an experiment. Usual
  663. * question, should va be void* or uintptr?
  664. */
  665. l = mmuwalk(UINT2PTR(machp()->MMU.pml4->va), va, 0, &pte, nil);
  666. DBG("physaddr: va %#p l %d\n", va, l);
  667. if(l < 0)
  668. return ~0;
  669. mask = PGLSZ(l)-1;
  670. pa = (*pte & ~mask) + (va & mask);
  671. DBG("physaddr: l %d va %#p pa %#llux\n", l, va, pa);
  672. return pa;
  673. }
  674. Page mach0pml4;
  675. void
  676. mmuinit(void)
  677. {
  678. uint8_t *p;
  679. Page *page;
  680. uint64_t o, pa, r, sz;
  681. archmmu();
  682. DBG("mach%d: %#p pml4 %#p npgsz %d\n", machp()->machno, machp(), machp()->MMU.pml4, sys->npgsz);
  683. if(machp()->machno != 0){
  684. /* NIX: KLUDGE: Has to go when each mach is using
  685. * its own page table
  686. */
  687. p = UINT2PTR(machp()->stack);
  688. p += MACHSTKSZ;
  689. memmove(p, UINT2PTR(mach0pml4.va), PTSZ);
  690. machp()->MMU.pml4 = &machp()->MMU.pml4kludge;
  691. machp()->MMU.pml4->va = PTR2UINT(p);
  692. machp()->MMU.pml4->pa = PADDR(p);
  693. machp()->MMU.pml4->daddr = mach0pml4.daddr; /* # of user mappings in pml4 */
  694. r = rdmsr(Efer);
  695. r |= Nxe;
  696. wrmsr(Efer, r);
  697. cr3put(machp()->MMU.pml4->pa);
  698. DBG("m %#p pml4 %#p\n", machp(), machp()->MMU.pml4);
  699. return;
  700. }
  701. page = &mach0pml4;
  702. page->pa = cr3get();
  703. page->va = PTR2UINT(KADDR(page->pa));
  704. machp()->MMU.pml4 = page;
  705. r = rdmsr(Efer);
  706. r |= Nxe;
  707. wrmsr(Efer, r);
  708. /*
  709. * Set up the various kernel memory allocator limits:
  710. * pmstart/pmend bound the unused physical memory;
  711. * vmstart/vmend bound the total possible virtual memory
  712. * used by the kernel;
  713. * vmunused is the highest virtual address currently mapped
  714. * and used by the kernel;
  715. * vmunmapped is the highest virtual address currently
  716. * mapped by the kernel.
  717. * Vmunused can be bumped up to vmunmapped before more
  718. * physical memory needs to be allocated and mapped.
  719. *
  720. * This is set up here so meminit can map appropriately.
  721. */
  722. o = sys->pmstart;
  723. sz = ROUNDUP(o, 4*MiB) - o;
  724. pa = asmalloc(0, sz, 1, 0);
  725. if(pa != o)
  726. panic("mmuinit: pa %#llux memstart %#llux\n", pa, o);
  727. sys->pmstart += sz;
  728. sys->vmstart = KSEG0;
  729. sys->vmunused = sys->vmstart + ROUNDUP(o, 4*KiB);
  730. sys->vmunmapped = sys->vmstart + o + sz;
  731. sys->vmend = sys->vmstart + TMFM;
  732. print("mmuinit: vmstart %#p vmunused %#p vmunmapped %#p vmend %#p\n",
  733. sys->vmstart, sys->vmunused, sys->vmunmapped, sys->vmend);
  734. /*
  735. * Set up the map for PD entry access by inserting
  736. * the relevant PDP entry into the PD. It's equivalent
  737. * to PADDR(sys->pd)|PteRW|PteP.
  738. *
  739. */
  740. sys->pd[PDX(PDMAP)] = sys->pdp[PDPX(PDMAP)] & ~(PteD|PteA);
  741. print("sys->pd %#p %#p\n", sys->pd[PDX(PDMAP)], sys->pdp[PDPX(PDMAP)]);
  742. assert((pdeget(PDMAP) & ~(PteD|PteA)) == (PADDR(sys->pd)|PteRW|PteP));
  743. dumpmmuwalk(KZERO);
  744. mmuphysaddr(PTR2UINT(end));
  745. }