mmu.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include "u.h"
  10. #include "../port/lib.h"
  11. #include "mem.h"
  12. #include "dat.h"
  13. #include "fns.h"
  14. #include "amd64.h"
  15. /*
  16. * To do:
  17. * PteNX;
  18. * mmukmapsync grot for >1 processor;
  19. * replace vmap with newer version (no PDMAP);
  20. * mmuptcopy (PteSHARED trick?);
  21. * calculate and map up to TMFM (conf crap);
  22. */
  23. #define TMFM (64*MiB) /* kernel memory */
  24. #define PPN(x) ((x)&~(PGSZ-1))
  25. void
  26. mmuflushtlb(uint64_t u)
  27. {
  28. Proc *up = machp()->externup;
  29. m->tlbpurge++;
  30. if(machp()->pml4->daddr){
  31. memset(UINT2PTR(machp()->pml4->va), 0, machp()->pml4->daddr*sizeof(PTE));
  32. machp()->pml4->daddr = 0;
  33. }
  34. cr3put(machp()->pml4->pa);
  35. }
  36. void
  37. mmuflush(void)
  38. {
  39. Proc *up = machp()->externup;
  40. Mpl pl;
  41. pl = splhi();
  42. up->newtlb = 1;
  43. mmuswitch(up);
  44. splx(pl);
  45. }
  46. static void
  47. mmuptpfree(Proc* proc, int clear)
  48. {
  49. Proc *up = machp()->externup;
  50. int l;
  51. PTE *pte;
  52. Page **last, *page;
  53. for(l = 1; l < 4; l++){
  54. last = &proc->mmuptp[l];
  55. if(*last == nil)
  56. continue;
  57. for(page = *last; page != nil; page = page->next){
  58. //what is right here? 2 or 1?
  59. if(l <= 2 && clear)
  60. memset(UINT2PTR(page->va), 0, PTSZ);
  61. pte = UINT2PTR(page->prev->va);
  62. pte[page->daddr] = 0;
  63. last = &page->next;
  64. }
  65. *last = proc->mmuptp[0];
  66. proc->mmuptp[0] = proc->mmuptp[l];
  67. proc->mmuptp[l] = nil;
  68. }
  69. machp()->pml4->daddr = 0;
  70. }
  71. static void
  72. tabs(int n)
  73. {
  74. int i;
  75. for(i = 0; i < n; i++)
  76. print(" ");
  77. }
  78. void
  79. dumpptepg(int lvl, uintptr_t pa)
  80. {
  81. PTE *pte;
  82. int tab, i;
  83. tab = 4 - lvl;
  84. pte = UINT2PTR(KADDR(pa));
  85. for(i = 0; i < PTSZ/sizeof(PTE); i++)
  86. if(pte[i] & PteP){
  87. tabs(tab);
  88. print("l%d %#p[%#05x]: %#ullx\n", lvl, pa, i, pte[i]);
  89. /* skip kernel mappings */
  90. if((pte[i]&PteU) == 0){
  91. tabs(tab+1);
  92. print("...kern...\n");
  93. continue;
  94. }
  95. if(lvl > 2)
  96. dumpptepg(lvl-1, PPN(pte[i]));
  97. }
  98. }
  99. void
  100. dumpmmu(Proc *p)
  101. {
  102. Proc *up = machp()->externup;
  103. int i;
  104. Page *pg;
  105. print("proc %#p\n", p);
  106. for(i = 3; i > 0; i--){
  107. print("mmuptp[%d]:\n", i);
  108. for(pg = p->mmuptp[i]; pg != nil; pg = pg->next)
  109. print("\tpg %#p = va %#ullx pa %#ullx"
  110. " daddr %#ulx next %#p prev %#p\n",
  111. pg, pg->va, pg->pa, pg->daddr, pg->next, pg->prev);
  112. }
  113. print("pml4 %#ullx\n", machp()->pml4->pa);
  114. if(0)dumpptepg(4, machp()->pml4->pa);
  115. }
  116. void
  117. dumpmmuwalk(uint64_t addr)
  118. {
  119. Proc *up = machp()->externup;
  120. int l;
  121. PTE *pte, *pml4;
  122. pml4 = UINT2PTR(machp()->pml4->va);
  123. if((l = mmuwalk(pml4, addr, 3, &pte, nil)) >= 0)
  124. print("cpu%d: mmu l%d pte %#p = %llux\n", machp()->machno, l, pte, *pte);
  125. if((l = mmuwalk(pml4, addr, 2, &pte, nil)) >= 0)
  126. print("cpu%d: mmu l%d pte %#p = %llux\n", machp()->machno, l, pte, *pte);
  127. if((l = mmuwalk(pml4, addr, 1, &pte, nil)) >= 0)
  128. print("cpu%d: mmu l%d pte %#p = %llux\n", machp()->machno, l, pte, *pte);
  129. if((l = mmuwalk(pml4, addr, 0, &pte, nil)) >= 0)
  130. print("cpu%d: mmu l%d pte %#p = %llux\n", machp()->machno, l, pte, *pte);
  131. }
  132. static Page mmuptpfreelist;
  133. static Page*
  134. mmuptpalloc(void)
  135. {
  136. void* va;
  137. Page *page;
  138. /*
  139. * Do not really need a whole Page structure,
  140. * but it makes testing this out a lot easier.
  141. * Could keep a cache and free excess.
  142. * Have to maintain any fiction for pexit?
  143. */
  144. lock(&mmuptpfreelist);
  145. if((page = mmuptpfreelist.next) != nil){
  146. mmuptpfreelist.next = page->next;
  147. mmuptpfreelist.ref--;
  148. unlock(&mmuptpfreelist);
  149. if(page->ref++ != 0)
  150. panic("mmuptpalloc ref\n");
  151. page->prev = page->next = nil;
  152. memset(UINT2PTR(page->va), 0, PTSZ);
  153. if(page->pa == 0)
  154. panic("mmuptpalloc: free page with pa == 0");
  155. return page;
  156. }
  157. unlock(&mmuptpfreelist);
  158. if((page = malloc(sizeof(Page))) == nil){
  159. print("mmuptpalloc Page\n");
  160. return nil;
  161. }
  162. if((va = mallocalign(PTSZ, PTSZ, 0, 0)) == nil){
  163. print("mmuptpalloc va\n");
  164. free(page);
  165. return nil;
  166. }
  167. page->va = PTR2UINT(va);
  168. page->pa = PADDR(va);
  169. page->ref = 1;
  170. if(page->pa == 0)
  171. panic("mmuptpalloc: no pa");
  172. return page;
  173. }
  174. void
  175. mmuswitch(Proc* proc)
  176. {
  177. Proc *up = machp()->externup;
  178. PTE *pte;
  179. Page *page;
  180. Mpl pl;
  181. pl = splhi();
  182. if(proc->newtlb){
  183. /*
  184. * NIX: We cannot clear our page tables if they are going to
  185. * be used in the AC
  186. */
  187. if(proc->ac == nil)
  188. mmuptpfree(proc, 1);
  189. proc->newtlb = 0;
  190. }
  191. if(machp()->pml4->daddr){
  192. memset(UINT2PTR(machp()->pml4->va), 0, machp()->pml4->daddr*sizeof(PTE));
  193. machp()->pml4->daddr = 0;
  194. }
  195. pte = UINT2PTR(machp()->pml4->va);
  196. for(page = proc->mmuptp[3]; page != nil; page = page->next){
  197. pte[page->daddr] = PPN(page->pa)|PteU|PteRW|PteP;
  198. if(page->daddr >= machp()->pml4->daddr)
  199. machp()->pml4->daddr = page->daddr+1;
  200. page->prev = machp()->pml4;
  201. }
  202. tssrsp0(machp(), STACKALIGN(PTR2UINT(proc->kstack+KSTACK)));
  203. cr3put(machp()->pml4->pa);
  204. splx(pl);
  205. }
  206. void
  207. mmurelease(Proc* proc)
  208. {
  209. Proc *up = machp()->externup;
  210. Page *page, *next;
  211. mmuptpfree(proc, 0);
  212. for(page = proc->mmuptp[0]; page != nil; page = next){
  213. next = page->next;
  214. if(--page->ref)
  215. panic("mmurelease: page->ref %d\n", page->ref);
  216. lock(&mmuptpfreelist);
  217. page->next = mmuptpfreelist.next;
  218. mmuptpfreelist.next = page;
  219. mmuptpfreelist.ref++;
  220. page->prev = nil;
  221. unlock(&mmuptpfreelist);
  222. }
  223. if(proc->mmuptp[0] && pga.r.p)
  224. wakeup(&pga.r);
  225. proc->mmuptp[0] = nil;
  226. tssrsp0(m, STACKALIGN(m->stack+MACHSTKSZ));
  227. cr3put(machp()->pml4->pa);
  228. }
  229. static void
  230. checkpte(uintmem ppn, void *a)
  231. {
  232. Proc *up = machp()->externup;
  233. int l;
  234. PTE *pte, *pml4;
  235. uint64_t addr;
  236. char buf[240], *s;
  237. addr = PTR2UINT(a);
  238. pml4 = UINT2PTR(machp()->pml4->va);
  239. pte = 0;
  240. s = buf;
  241. *s = 0;
  242. if((l = mmuwalk(pml4, addr, 3, &pte, nil)) < 0 || (*pte&PteP) == 0)
  243. goto Panic;
  244. s = seprint(buf, buf+sizeof buf,
  245. "check3: l%d pte %#p = %llux\n",
  246. l, pte, pte?*pte:~0);
  247. if((l = mmuwalk(pml4, addr, 2, &pte, nil)) < 0 || (*pte&PteP) == 0)
  248. goto Panic;
  249. s = seprint(s, buf+sizeof buf,
  250. "check2: l%d pte %#p = %llux\n",
  251. l, pte, pte?*pte:~0);
  252. if(*pte&PtePS)
  253. return;
  254. if((l = mmuwalk(pml4, addr, 1, &pte, nil)) < 0 || (*pte&PteP) == 0)
  255. goto Panic;
  256. seprint(s, buf+sizeof buf,
  257. "check1: l%d pte %#p = %llux\n",
  258. l, pte, pte?*pte:~0);
  259. return;
  260. Panic:
  261. seprint(s, buf+sizeof buf,
  262. "checkpte: l%d addr %#p ppn %#ullx kaddr %#p pte %#p = %llux",
  263. l, a, ppn, KADDR(ppn), pte, pte?*pte:~0);
  264. print("%s\n", buf);
  265. seprint(buf, buf+sizeof buf, "start %#ullx unused %#ullx"
  266. " unmap %#ullx end %#ullx\n",
  267. sys->vmstart, sys->vmunused, sys->vmunmapped, sys->vmend);
  268. panic("%s", buf);
  269. }
  270. static void
  271. mmuptpcheck(Proc *proc)
  272. {
  273. Proc *up = machp()->externup;
  274. int lvl, npgs, i;
  275. Page *lp, *p, *pgs[16], *fp;
  276. uint idx[16];
  277. if(proc == nil)
  278. return;
  279. lp = machp()->pml4;
  280. for(lvl = 3; lvl >= 2; lvl--){
  281. npgs = 0;
  282. for(p = proc->mmuptp[lvl]; p != nil; p = p->next){
  283. for(fp = proc->mmuptp[0]; fp != nil; fp = fp->next)
  284. if(fp == p){
  285. dumpmmu(proc);
  286. panic("ptpcheck: using free page");
  287. }
  288. for(i = 0; i < npgs; i++){
  289. if(pgs[i] == p){
  290. dumpmmu(proc);
  291. panic("ptpcheck: dup page");
  292. }
  293. if(idx[i] == p->daddr){
  294. dumpmmu(proc);
  295. panic("ptcheck: dup daddr");
  296. }
  297. }
  298. if(npgs >= nelem(pgs))
  299. panic("ptpcheck: pgs is too small");
  300. idx[npgs] = p->daddr;
  301. pgs[npgs++] = p;
  302. if(lvl == 3 && p->prev != lp){
  303. dumpmmu(proc);
  304. panic("ptpcheck: wrong prev");
  305. }
  306. }
  307. }
  308. npgs = 0;
  309. for(fp = proc->mmuptp[0]; fp != nil; fp = fp->next){
  310. for(i = 0; i < npgs; i++)
  311. if(pgs[i] == fp)
  312. panic("ptpcheck: dup free page");
  313. pgs[npgs++] = fp;
  314. }
  315. }
  316. static uint
  317. pteflags(uint attr)
  318. {
  319. uint flags;
  320. flags = 0;
  321. if(attr & ~(PTEVALID|PTEWRITE|PTERONLY|PTEUSER|PTEUNCACHED))
  322. panic("mmuput: wrong attr bits: %#ux\n", attr);
  323. if(attr&PTEVALID)
  324. flags |= PteP;
  325. if(attr&PTEWRITE)
  326. flags |= PteRW;
  327. if(attr&PTEUSER)
  328. flags |= PteU;
  329. if(attr&PTEUNCACHED)
  330. flags |= PtePCD;
  331. return flags;
  332. }
  333. /*
  334. * pg->pgszi indicates the page size in machp()->pgsz[] used for the mapping.
  335. * For the user, it can be either 2*MiB or 1*GiB pages.
  336. * For 2*MiB pages, we use three levels, not four.
  337. * For 1*GiB pages, we use two levels.
  338. */
  339. void
  340. mmuput(uintptr_t va, Page *pg, uint attr)
  341. {
  342. Proc *up = machp()->externup;
  343. int lvl, user, x, pgsz;
  344. PTE *pte;
  345. Page *page, *prev;
  346. Mpl pl;
  347. uintmem pa, ppn;
  348. char buf[80];
  349. ppn = 0;
  350. pa = pg->pa;
  351. if(pa == 0)
  352. panic("mmuput: zero pa");
  353. if(DBGFLG){
  354. snprint(buf, sizeof buf, "cpu%d: up %#p mmuput %#p %#P %#ux\n",
  355. machp()->machno, up, va, pa, attr);
  356. print("%s", buf);
  357. }
  358. assert(pg->pgszi >= 0);
  359. pgsz = machp()->pgsz[pg->pgszi];
  360. if(pa & (pgsz-1))
  361. panic("mmuput: pa offset non zero: %#ullx\n", pa);
  362. pa |= pteflags(attr);
  363. pl = splhi();
  364. if(DBGFLG)
  365. mmuptpcheck(up);
  366. user = (va < KZERO);
  367. x = PTLX(va, 3);
  368. pte = UINT2PTR(machp()->pml4->va);
  369. pte += x;
  370. prev = machp()->pml4;
  371. for(lvl = 3; lvl >= 0; lvl--){
  372. if(user){
  373. if(pgsz == 2*MiB && lvl == 1) /* use 2M */
  374. break;
  375. if(pgsz == 1ull*GiB && lvl == 2) /* use 1G */
  376. break;
  377. }
  378. for(page = up->mmuptp[lvl]; page != nil; page = page->next)
  379. if(page->prev == prev && page->daddr == x){
  380. if(*pte == 0){
  381. print("mmu: jmk and nemo had fun\n");
  382. *pte = PPN(page->pa)|PteU|PteRW|PteP;
  383. }
  384. break;
  385. }
  386. if(page == nil){
  387. if(up->mmuptp[0] == nil)
  388. page = mmuptpalloc();
  389. else {
  390. page = up->mmuptp[0];
  391. up->mmuptp[0] = page->next;
  392. }
  393. page->daddr = x;
  394. page->next = up->mmuptp[lvl];
  395. up->mmuptp[lvl] = page;
  396. page->prev = prev;
  397. *pte = PPN(page->pa)|PteU|PteRW|PteP;
  398. if(lvl == 3 && x >= machp()->pml4->daddr)
  399. machp()->pml4->daddr = x+1;
  400. }
  401. x = PTLX(va, lvl-1);
  402. ppn = PPN(*pte);
  403. if(ppn == 0)
  404. panic("mmuput: ppn=0 l%d pte %#p = %#P\n", lvl, pte, *pte);
  405. pte = UINT2PTR(KADDR(ppn));
  406. pte += x;
  407. prev = page;
  408. }
  409. if(DBGFLG)
  410. checkpte(ppn, pte);
  411. *pte = pa|PteU;
  412. if(user)
  413. switch(pgsz){
  414. case 2*MiB:
  415. case 1*GiB:
  416. *pte |= PtePS;
  417. break;
  418. default:
  419. panic("mmuput: user pages must be 2M or 1G");
  420. }
  421. splx(pl);
  422. if(DBGFLG){
  423. snprint(buf, sizeof buf, "cpu%d: up %#p new pte %#p = %#llux\n",
  424. machp()->machno, up, pte, pte?*pte:~0);
  425. print("%s", buf);
  426. }
  427. invlpg(va); /* only if old entry valid? */
  428. }
  429. #if 0
  430. static Lock mmukmaplock;
  431. #endif
  432. static Lock vmaplock;
  433. #define PML4X(v) PTLX((v), 3)
  434. #define PDPX(v) PTLX((v), 2)
  435. #define PDX(v) PTLX((v), 1)
  436. #define PTX(v) PTLX((v), 0)
  437. int
  438. mmukmapsync(uint64_t va)
  439. {
  440. USED(va);
  441. return 0;
  442. }
  443. static PTE
  444. pdeget(uintptr_t va)
  445. {
  446. PTE *pdp;
  447. if(va < 0xffffffffc0000000ull)
  448. panic("pdeget(%#p)", va);
  449. pdp = (PTE*)(PDMAP+PDX(PDMAP)*4096);
  450. return pdp[PDX(va)];
  451. }
  452. /*
  453. * Add kernel mappings for pa -> va for a section of size bytes.
  454. * Called only after the va range is known to be unoccupied.
  455. */
  456. static int
  457. pdmap(uintptr_t pa, int attr, uintptr_t va, usize size)
  458. {
  459. uintptr_t pae;
  460. PTE *pd, *pde, *pt, *pte;
  461. int pdx, pgsz;
  462. Page *pg;
  463. pd = (PTE*)(PDMAP+PDX(PDMAP)*4096);
  464. for(pae = pa + size; pa < pae; pa += pgsz){
  465. pdx = PDX(va);
  466. pde = &pd[pdx];
  467. /*
  468. * Check if it can be mapped using a big page,
  469. * i.e. is big enough and starts on a suitable boundary.
  470. * Assume processor can do it.
  471. */
  472. if(ALIGNED(pa, PGLSZ(1)) && ALIGNED(va, PGLSZ(1)) && (pae-pa) >= PGLSZ(1)){
  473. assert(*pde == 0);
  474. *pde = pa|attr|PtePS|PteP;
  475. pgsz = PGLSZ(1);
  476. }
  477. else{
  478. if(*pde == 0){
  479. pg = mmuptpalloc();
  480. assert(pg != nil && pg->pa != 0);
  481. *pde = pg->pa|PteRW|PteP;
  482. memset((PTE*)(PDMAP+pdx*4096), 0, 4096);
  483. }
  484. assert(*pde != 0);
  485. pt = (PTE*)(PDMAP+pdx*4096);
  486. pte = &pt[PTX(va)];
  487. assert(!(*pte & PteP));
  488. *pte = pa|attr|PteP;
  489. pgsz = PGLSZ(0);
  490. }
  491. va += pgsz;
  492. }
  493. return 0;
  494. }
  495. static int
  496. findhole(PTE* a, int n, int count)
  497. {
  498. int have, i;
  499. have = 0;
  500. for(i = 0; i < n; i++){
  501. if(a[i] == 0)
  502. have++;
  503. else
  504. have = 0;
  505. if(have >= count)
  506. return i+1 - have;
  507. }
  508. return -1;
  509. }
  510. /*
  511. * Look for free space in the vmap.
  512. */
  513. static uintptr_t
  514. vmapalloc(usize size)
  515. {
  516. int i, n, o;
  517. PTE *pd, *pt;
  518. int pdsz, ptsz;
  519. pd = (PTE*)(PDMAP+PDX(PDMAP)*4096);
  520. pd += PDX(VMAP);
  521. pdsz = VMAPSZ/PGLSZ(1);
  522. /*
  523. * Look directly in the PD entries if the size is
  524. * larger than the range mapped by a single entry.
  525. */
  526. if(size >= PGLSZ(1)){
  527. n = HOWMANY(size, PGLSZ(1));
  528. if((o = findhole(pd, pdsz, n)) != -1)
  529. return VMAP + o*PGLSZ(1);
  530. return 0;
  531. }
  532. /*
  533. * Size is smaller than that mapped by a single PD entry.
  534. * Look for an already mapped PT page that has room.
  535. */
  536. n = HOWMANY(size, PGLSZ(0));
  537. ptsz = PGLSZ(0)/sizeof(PTE);
  538. for(i = 0; i < pdsz; i++){
  539. if(!(pd[i] & PteP) || (pd[i] & PtePS))
  540. continue;
  541. pt = (PTE*)(PDMAP+(PDX(VMAP)+i)*4096);
  542. if((o = findhole(pt, ptsz, n)) != -1)
  543. return VMAP + i*PGLSZ(1) + o*PGLSZ(0);
  544. }
  545. /*
  546. * Nothing suitable, start using a new PD entry.
  547. */
  548. if((o = findhole(pd, pdsz, 1)) != -1)
  549. return VMAP + o*PGLSZ(1);
  550. return 0;
  551. }
  552. /*
  553. * KSEG0 maps low memory.
  554. * KSEG2 maps almost all memory, but starting at an address determined
  555. * by the address space map (see asm.c).
  556. * Thus, almost everything in physical memory is already mapped, but
  557. * there are things that fall in the gap
  558. * (acpi tables, device memory-mapped registers, etc.)
  559. * for those things, we also want to disable caching.
  560. * vmap() is required to access them.
  561. */
  562. void*
  563. vmap(uintptr_t pa, usize size)
  564. {
  565. Proc *up = machp()->externup;
  566. uintptr_t va;
  567. usize o, sz;
  568. DBG("vmap(%#p, %lud) pc=%#p\n", pa, size, getcallerpc(&pa));
  569. if(machp()->machno != 0)
  570. panic("vmap");
  571. /*
  572. * This is incomplete; the checks are not comprehensive
  573. * enough.
  574. * Sometimes the request is for an already-mapped piece
  575. * of low memory, in which case just return a good value
  576. * and hope that a corresponding vunmap of the address
  577. * will have the same address.
  578. * To do this properly will require keeping track of the
  579. * mappings; perhaps something like kmap, but kmap probably
  580. * can't be used early enough for some of the uses.
  581. */
  582. if(pa+size < 1ull*MiB)
  583. return KADDR(pa);
  584. if(pa < 1ull*MiB)
  585. return nil;
  586. /*
  587. * Might be asking for less than a page.
  588. * This should have a smaller granularity if
  589. * the page size is large.
  590. */
  591. o = pa & ((1<<PGSHFT)-1);
  592. pa -= o;
  593. sz = ROUNDUP(size+o, PGSZ);
  594. if(pa == 0){
  595. print("vmap(0, %lud) pc=%#p\n", size, getcallerpc(&pa));
  596. return nil;
  597. }
  598. ilock(&vmaplock);
  599. if((va = vmapalloc(sz)) == 0 || pdmap(pa, PtePCD|PteRW, va, sz) < 0){
  600. iunlock(&vmaplock);
  601. return nil;
  602. }
  603. iunlock(&vmaplock);
  604. DBG("vmap(%#p, %lud) => %#p\n", pa+o, size, va+o);
  605. return UINT2PTR(va + o);
  606. }
  607. void
  608. vunmap(void* v, usize size)
  609. {
  610. Proc *up = machp()->externup;
  611. uintptr_t va;
  612. DBG("vunmap(%#p, %lud)\n", v, size);
  613. if(machp()->machno != 0)
  614. panic("vunmap");
  615. /*
  616. * See the comments above in vmap.
  617. */
  618. va = PTR2UINT(v);
  619. if(va >= KZERO && va+size < KZERO+1ull*MiB)
  620. return;
  621. /*
  622. * Here will have to deal with releasing any
  623. * resources used for the allocation (e.g. page table
  624. * pages).
  625. */
  626. DBG("vunmap(%#p, %lud)\n", v, size);
  627. }
  628. int
  629. mmuwalk(PTE* pml4, uintptr_t va, int level, PTE** ret,
  630. uint64_t (*alloc)(usize))
  631. {
  632. Proc *up = machp()->externup;
  633. int l;
  634. uintmem pa;
  635. PTE *pte;
  636. Mpl pl;
  637. pl = splhi();
  638. if(DBGFLG > 1)
  639. DBG("mmuwalk%d: va %#p level %d\n", machp()->machno, va, level);
  640. pte = &pml4[PTLX(va, 3)];
  641. for(l = 3; l >= 0; l--){
  642. if(l == level)
  643. break;
  644. if(!(*pte & PteP)){
  645. if(alloc == nil)
  646. break;
  647. pa = alloc(PTSZ);
  648. if(pa == ~0)
  649. return -1;
  650. memset(UINT2PTR(KADDR(pa)), 0, PTSZ);
  651. *pte = pa|PteRW|PteP;
  652. }
  653. else if(*pte & PtePS)
  654. break;
  655. pte = UINT2PTR(KADDR(PPN(*pte)));
  656. pte += PTLX(va, l-1);
  657. }
  658. *ret = pte;
  659. splx(pl);
  660. return l;
  661. }
  662. uintmem
  663. mmuphysaddr(uintptr_t va)
  664. {
  665. Proc *up = machp()->externup;
  666. int l;
  667. PTE *pte;
  668. uintmem mask, pa;
  669. /*
  670. * Given a VA, find the PA.
  671. * This is probably not the right interface,
  672. * but will do as an experiment. Usual
  673. * question, should va be void* or uintptr?
  674. */
  675. l = mmuwalk(UINT2PTR(machp()->pml4->va), va, 0, &pte, nil);
  676. DBG("physaddr: va %#p l %d\n", va, l);
  677. if(l < 0)
  678. return ~0;
  679. mask = PGLSZ(l)-1;
  680. pa = (*pte & ~mask) + (va & mask);
  681. DBG("physaddr: l %d va %#p pa %#llux\n", l, va, pa);
  682. return pa;
  683. }
  684. Page mach0pml4;
  685. void
  686. mmuinit(void)
  687. {
  688. Proc *up = machp()->externup;
  689. uint8_t *p;
  690. Page *page;
  691. uint64_t o, pa, r, sz;
  692. archmmu();
  693. DBG("mach%d: %#p pml4 %#p npgsz %d\n", machp()->machno, m, machp()->pml4, m->npgsz);
  694. if(machp()->machno != 0){
  695. /* NIX: KLUDGE: Has to go when each mach is using
  696. * its own page table
  697. */
  698. p = UINT2PTR(m->stack);
  699. p += MACHSTKSZ;
  700. memmove(p, UINT2PTR(mach0pml4.va), PTSZ);
  701. machp()->pml4 = &machp()->pml4kludge;
  702. machp()->pml4->va = PTR2UINT(p);
  703. machp()->pml4->pa = PADDR(p);
  704. machp()->pml4->daddr = mach0pml4.daddr; /* # of user mappings in pml4 */
  705. r = rdmsr(Efer);
  706. r |= Nxe;
  707. wrmsr(Efer, r);
  708. cr3put(machp()->pml4->pa);
  709. DBG("m %#p pml4 %#p\n", m, machp()->pml4);
  710. return;
  711. }
  712. page = &mach0pml4;
  713. page->pa = cr3get();
  714. page->va = PTR2UINT(KADDR(page->pa));
  715. machp()->pml4 = page;
  716. r = rdmsr(Efer);
  717. r |= Nxe;
  718. wrmsr(Efer, r);
  719. /*
  720. * Set up the various kernel memory allocator limits:
  721. * pmstart/pmend bound the unused physical memory;
  722. * vmstart/vmend bound the total possible virtual memory
  723. * used by the kernel;
  724. * vmunused is the highest virtual address currently mapped
  725. * and used by the kernel;
  726. * vmunmapped is the highest virtual address currently
  727. * mapped by the kernel.
  728. * Vmunused can be bumped up to vmunmapped before more
  729. * physical memory needs to be allocated and mapped.
  730. *
  731. * This is set up here so meminit can map appropriately.
  732. */
  733. o = sys->pmstart;
  734. sz = ROUNDUP(o, 4*MiB) - o;
  735. pa = asmalloc(0, sz, 1, 0);
  736. if(pa != o)
  737. panic("mmuinit: pa %#llux memstart %#llux\n", pa, o);
  738. sys->pmstart += sz;
  739. sys->vmstart = KSEG0;
  740. sys->vmunused = sys->vmstart + ROUNDUP(o, 4*KiB);
  741. sys->vmunmapped = sys->vmstart + o + sz;
  742. sys->vmend = sys->vmstart + TMFM;
  743. print("mmuinit: vmstart %#p vmunused %#p vmunmapped %#p vmend %#p\n",
  744. sys->vmstart, sys->vmunused, sys->vmunmapped, sys->vmend);
  745. /*
  746. * Set up the map for PD entry access by inserting
  747. * the relevant PDP entry into the PD. It's equivalent
  748. * to PADDR(sys->pd)|PteRW|PteP.
  749. *
  750. */
  751. sys->pd[PDX(PDMAP)] = sys->pdp[PDPX(PDMAP)] & ~(PteD|PteA);
  752. print("sys->pd %#p %#p\n", sys->pd[PDX(PDMAP)], sys->pdp[PDPX(PDMAP)]);
  753. assert((pdeget(PDMAP) & ~(PteD|PteA)) == (PADDR(sys->pd)|PteRW|PteP));
  754. dumpmmuwalk(KZERO);
  755. mmuphysaddr(PTR2UINT(end));
  756. }