mmu.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include "u.h"
  10. #include "../port/lib.h"
  11. #include "mem.h"
  12. #include "dat.h"
  13. #include "fns.h"
  14. #include "encoding.h"
  15. #include "mmu.h"
  16. /*
  17. * To do:
  18. * PteNX;
  19. * mmukmapsync grot for >1 processor;
  20. * replace vmap with newer version (no PDMAP);
  21. * mmuptcopy (PteSHARED trick?);
  22. * calculate and map up to TMFM (conf crap);
  23. */
  24. #define TMFM (64*MiB) /* kernel memory */
  25. #define PPN(x) ((x)&~(PGSZ-1))
  26. #if 0
  27. /* Print the page table structures to the console */
  28. void print_page_table(void) {
  29. print_page_table_at((void *)(read_csr(sptbr) << RISCV_PGSHIFT), 0, 0);
  30. }
  31. #endif
  32. void flush_tlb(void)
  33. {
  34. asm volatile("sfence.vm");
  35. }
  36. size_t pte_ppn(uint64_t pte)
  37. {
  38. return pte >> PTE_PPN_SHIFT;
  39. }
  40. uint64_t ptd_create(uintptr_t ppn)
  41. {
  42. return (ppn << PTE_PPN_SHIFT) | PTE_V;
  43. }
  44. uint64_t pte_create(uintptr_t ppn, int prot, int user)
  45. {
  46. uint64_t pte = (ppn << PTE_PPN_SHIFT) | PTE_R | PTE_V;
  47. if (prot & PTE_W)
  48. pte |= PTE_W;
  49. if (prot & PTE_X)
  50. pte |= PTE_X;
  51. if (user)
  52. pte |= PTE_U;
  53. return pte;
  54. }
  55. void
  56. rootput(uintptr_t root)
  57. {
  58. uintptr_t ptbr = root >> RISCV_PGSHIFT;
  59. write_csr(sptbr, ptbr);
  60. }
  61. void
  62. mmuflushtlb(uint64_t u)
  63. {
  64. machp()->tlbpurge++;
  65. if(machp()->MMU.pml4->daddr){
  66. memset(UINT2PTR(machp()->MMU.pml4->va), 0, machp()->MMU.pml4->daddr*sizeof(PTE));
  67. machp()->MMU.pml4->daddr = 0;
  68. }
  69. rootput((uintptr_t) machp()->MMU.pml4->pa);
  70. }
  71. void
  72. mmuflush(void)
  73. {
  74. Proc *up = externup();
  75. Mpl pl;
  76. pl = splhi();
  77. up->newtlb = 1;
  78. mmuswitch(up);
  79. splx(pl);
  80. }
  81. static void
  82. mmuptpfree(Proc* proc, int clear)
  83. {
  84. int l;
  85. PTE *pte;
  86. Page **last, *page;
  87. for(l = 1; l < 4; l++){
  88. last = &proc->MMU.mmuptp[l];
  89. if(*last == nil)
  90. continue;
  91. for(page = *last; page != nil; page = page->next){
  92. //what is right here? 2 or 1?
  93. if(l <= 2 && clear)
  94. memset(UINT2PTR(page->va), 0, PTSZ);
  95. pte = UINT2PTR(page->prev->va);
  96. pte[page->daddr] = 0;
  97. last = &page->next;
  98. }
  99. *last = proc->MMU.mmuptp[0];
  100. proc->MMU.mmuptp[0] = proc->MMU.mmuptp[l];
  101. proc->MMU.mmuptp[l] = nil;
  102. }
  103. machp()->MMU.pml4->daddr = 0;
  104. }
  105. static void
  106. tabs(int n)
  107. {
  108. int i;
  109. for(i = 0; i < n; i++)
  110. print(" ");
  111. }
  112. void
  113. dumpptepg(int lvl, uintptr_t pa)
  114. {
  115. PTE *pte;
  116. int tab, i;
  117. tab = 4 - lvl;
  118. pte = UINT2PTR(KADDR(pa));
  119. for(i = 0; i < PTSZ/sizeof(PTE); i++)
  120. if(pte[i] & PteP){
  121. tabs(tab);
  122. print("l%d %#p[%#05x]: %#llx\n", lvl, pa, i, pte[i]);
  123. /* skip kernel mappings */
  124. if((pte[i]&PteU) == 0){
  125. tabs(tab+1);
  126. print("...kern...\n");
  127. continue;
  128. }
  129. if(lvl > 2)
  130. dumpptepg(lvl-1, PPN(pte[i]));
  131. }
  132. }
  133. void
  134. dumpmmu(Proc *p)
  135. {
  136. int i;
  137. Page *pg;
  138. print("proc %#p\n", p);
  139. for(i = 3; i > 0; i--){
  140. print("mmuptp[%d]:\n", i);
  141. for(pg = p->MMU.mmuptp[i]; pg != nil; pg = pg->next)
  142. print("\tpg %#p = va %#llx pa %#llx"
  143. " daddr %#lx next %#p prev %#p\n",
  144. pg, pg->va, pg->pa, pg->daddr, pg->next, pg->prev);
  145. }
  146. print("pml4 %#llx\n", machp()->MMU.pml4->pa);
  147. if(0)dumpptepg(4, machp()->MMU.pml4->pa);
  148. }
  149. void
  150. dumpmmuwalk(uint64_t addr)
  151. {
  152. int l;
  153. PTE *pte, *pml4;
  154. pml4 = UINT2PTR(machp()->MMU.pml4->va);
  155. if((l = mmuwalk(pml4, addr, 3, &pte, nil)) >= 0)
  156. print("cpu%d: mmu l%d pte %#p = %llx\n", machp()->machno, l, pte, *pte);
  157. if((l = mmuwalk(pml4, addr, 2, &pte, nil)) >= 0)
  158. print("cpu%d: mmu l%d pte %#p = %llx\n", machp()->machno, l, pte, *pte);
  159. if((l = mmuwalk(pml4, addr, 1, &pte, nil)) >= 0)
  160. print("cpu%d: mmu l%d pte %#p = %llx\n", machp()->machno, l, pte, *pte);
  161. if((l = mmuwalk(pml4, addr, 0, &pte, nil)) >= 0)
  162. print("cpu%d: mmu l%d pte %#p = %llx\n", machp()->machno, l, pte, *pte);
  163. }
  164. static Page mmuptpfreelist;
  165. static Page*
  166. mmuptpalloc(void)
  167. {
  168. void* va;
  169. Page *page;
  170. /*
  171. * Do not really need a whole Page structure,
  172. * but it makes testing this out a lot easier.
  173. * Could keep a cache and free excess.
  174. * Have to maintain any fiction for pexit?
  175. */
  176. lock(&mmuptpfreelist.l);
  177. if((page = mmuptpfreelist.next) != nil){
  178. mmuptpfreelist.next = page->next;
  179. mmuptpfreelist.ref--;
  180. unlock(&mmuptpfreelist.l);
  181. if(page->ref++ != 0)
  182. panic("mmuptpalloc ref\n");
  183. page->prev = page->next = nil;
  184. memset(UINT2PTR(page->va), 0, PTSZ);
  185. if(page->pa == 0)
  186. panic("mmuptpalloc: free page with pa == 0");
  187. return page;
  188. }
  189. unlock(&mmuptpfreelist.l);
  190. if((page = malloc(sizeof(Page))) == nil){
  191. print("mmuptpalloc Page\n");
  192. return nil;
  193. }
  194. if((va = mallocalign(PTSZ, PTSZ, 0, 0)) == nil){
  195. print("mmuptpalloc va\n");
  196. free(page);
  197. return nil;
  198. }
  199. page->va = PTR2UINT(va);
  200. page->pa = PADDR(va);
  201. page->ref = 1;
  202. if(page->pa == 0)
  203. panic("mmuptpalloc: no pa");
  204. return page;
  205. }
  206. void
  207. mmuswitch(Proc* proc)
  208. {
  209. PTE *pte;
  210. Page *page;
  211. Mpl pl;
  212. pl = splhi();
  213. if(proc->newtlb){
  214. /*
  215. * NIX: We cannot clear our page tables if they are going to
  216. * be used in the AC
  217. */
  218. if(proc->ac == nil)
  219. mmuptpfree(proc, 1);
  220. proc->newtlb = 0;
  221. }
  222. if(machp()->MMU.pml4->daddr){
  223. memset(UINT2PTR(machp()->MMU.pml4->va), 0, machp()->MMU.pml4->daddr*sizeof(PTE));
  224. machp()->MMU.pml4->daddr = 0;
  225. }
  226. pte = UINT2PTR(machp()->MMU.pml4->va);
  227. for(page = proc->MMU.mmuptp[3]; page != nil; page = page->next){
  228. pte[page->daddr] = PPN(page->pa)|PteU|PteRW|PteP;
  229. if(page->daddr >= machp()->MMU.pml4->daddr)
  230. machp()->MMU.pml4->daddr = page->daddr+1;
  231. page->prev = machp()->MMU.pml4;
  232. }
  233. //tssrsp0(machp(), STACKALIGN(PTR2UINT(proc->kstack+KSTACK)));
  234. rootput((uintptr_t) machp()->MMU.pml4->pa);
  235. splx(pl);
  236. }
  237. void
  238. mmurelease(Proc* proc)
  239. {
  240. Page *page, *next;
  241. mmuptpfree(proc, 0);
  242. for(page = proc->MMU.mmuptp[0]; page != nil; page = next){
  243. next = page->next;
  244. if(--page->ref)
  245. panic("mmurelease: page->ref %d\n", page->ref);
  246. lock(&mmuptpfreelist.l);
  247. page->next = mmuptpfreelist.next;
  248. mmuptpfreelist.next = page;
  249. mmuptpfreelist.ref++;
  250. page->prev = nil;
  251. unlock(&mmuptpfreelist.l);
  252. }
  253. if(proc->MMU.mmuptp[0] && pga.rend.l.p)
  254. wakeup(&pga.rend);
  255. proc->MMU.mmuptp[0] = nil;
  256. panic("tssrsp0");
  257. //tssrsp0(machp(), STACKALIGN(machp()->stack+MACHSTKSZ));
  258. rootput(machp()->MMU.pml4->pa);
  259. }
  260. static void
  261. checkpte(uintmem ppn, void *a)
  262. {
  263. int l;
  264. PTE *pte, *pml4;
  265. uint64_t addr;
  266. char buf[240], *s;
  267. addr = PTR2UINT(a);
  268. pml4 = UINT2PTR(machp()->MMU.pml4->va);
  269. pte = 0;
  270. s = buf;
  271. *s = 0;
  272. if((l = mmuwalk(pml4, addr, 3, &pte, nil)) < 0 || (*pte&PteP) == 0)
  273. goto Panic;
  274. s = seprint(buf, buf+sizeof buf,
  275. "check3: l%d pte %#p = %llx\n",
  276. l, pte, pte?*pte:~0);
  277. if((l = mmuwalk(pml4, addr, 2, &pte, nil)) < 0 || (*pte&PteP) == 0)
  278. goto Panic;
  279. s = seprint(s, buf+sizeof buf,
  280. "check2: l%d pte %#p = %llx\n",
  281. l, pte, pte?*pte:~0);
  282. if(*pte&PtePS)
  283. return;
  284. if((l = mmuwalk(pml4, addr, 1, &pte, nil)) < 0 || (*pte&PteP) == 0)
  285. goto Panic;
  286. seprint(s, buf+sizeof buf,
  287. "check1: l%d pte %#p = %llx\n",
  288. l, pte, pte?*pte:~0);
  289. return;
  290. Panic:
  291. seprint(s, buf+sizeof buf,
  292. "checkpte: l%d addr %#p ppn %#llx kaddr %#p pte %#p = %llx",
  293. l, a, ppn, KADDR(ppn), pte, pte?*pte:~0);
  294. print("%s\n", buf);
  295. seprint(buf, buf+sizeof buf, "start %#llx unused %#llx"
  296. " unmap %#llx end %#llx\n",
  297. sys->vmstart, sys->vmunused, sys->vmunmapped, sys->vmend);
  298. panic("%s", buf);
  299. }
  300. static void
  301. mmuptpcheck(Proc *proc)
  302. {
  303. int lvl, npgs, i;
  304. Page *lp, *p, *pgs[16], *fp;
  305. uint idx[16];
  306. if(proc == nil)
  307. return;
  308. lp = machp()->MMU.pml4;
  309. for(lvl = 3; lvl >= 2; lvl--){
  310. npgs = 0;
  311. for(p = proc->MMU.mmuptp[lvl]; p != nil; p = p->next){
  312. for(fp = proc->MMU.mmuptp[0]; fp != nil; fp = fp->next)
  313. if(fp == p){
  314. dumpmmu(proc);
  315. panic("ptpcheck: using free page");
  316. }
  317. for(i = 0; i < npgs; i++){
  318. if(pgs[i] == p){
  319. dumpmmu(proc);
  320. panic("ptpcheck: dup page");
  321. }
  322. if(idx[i] == p->daddr){
  323. dumpmmu(proc);
  324. panic("ptcheck: dup daddr");
  325. }
  326. }
  327. if(npgs >= nelem(pgs))
  328. panic("ptpcheck: pgs is too small");
  329. idx[npgs] = p->daddr;
  330. pgs[npgs++] = p;
  331. if(lvl == 3 && p->prev != lp){
  332. dumpmmu(proc);
  333. panic("ptpcheck: wrong prev");
  334. }
  335. }
  336. }
  337. npgs = 0;
  338. for(fp = proc->MMU.mmuptp[0]; fp != nil; fp = fp->next){
  339. for(i = 0; i < npgs; i++)
  340. if(pgs[i] == fp)
  341. panic("ptpcheck: dup free page");
  342. pgs[npgs++] = fp;
  343. }
  344. }
  345. static uintmem
  346. pteflags(uint attr)
  347. {
  348. uintmem flags;
  349. flags = 0;
  350. if(attr & ~(PTEVALID|PTEWRITE|PTERONLY|PTEUSER|PTEUNCACHED|PTENOEXEC))
  351. panic("mmuput: wrong attr bits: %#x\n", attr);
  352. if(attr&PTEVALID)
  353. flags |= PteP;
  354. if(attr&PTEWRITE)
  355. flags |= PteRW;
  356. if(attr&PTEUSER)
  357. flags |= PteU;
  358. if(attr&PTEUNCACHED)
  359. flags |= PtePCD;
  360. if(attr&PTENOEXEC)
  361. flags |= PteNX;
  362. return flags;
  363. }
  364. void
  365. invlpg(uintptr_t _)
  366. {
  367. panic("invlpage");
  368. }
  369. /*
  370. * pg->pgszi indicates the page size in machp()->pgsz[] used for the mapping.
  371. * For the user, it can be either 2*MiB or 1*GiB pages.
  372. * For 2*MiB pages, we use three levels, not four.
  373. * For 1*GiB pages, we use two levels.
  374. */
  375. void
  376. mmuput(uintptr_t va, Page *pg, uint attr)
  377. {
  378. Proc *up = externup();
  379. int lvl, user, x, pgsz;
  380. PTE *pte;
  381. Page *page, *prev;
  382. Mpl pl;
  383. uintmem pa, ppn;
  384. char buf[80];
  385. ppn = 0;
  386. pa = pg->pa;
  387. if(pa == 0)
  388. panic("mmuput: zero pa");
  389. if(DBGFLG){
  390. snprint(buf, sizeof buf, "cpu%d: up %#p mmuput %#p %#P %#x\n",
  391. machp()->machno, up, va, pa, attr);
  392. print("%s", buf);
  393. }
  394. assert(pg->pgszi >= 0);
  395. pgsz = sys->pgsz[pg->pgszi];
  396. if(pa & (pgsz-1))
  397. panic("mmuput: pa offset non zero: %#llx\n", pa);
  398. pa |= pteflags(attr);
  399. pl = splhi();
  400. if(DBGFLG)
  401. mmuptpcheck(up);
  402. user = (va < KZERO);
  403. x = PTLX(va, 3);
  404. pte = UINT2PTR(machp()->MMU.pml4->va);
  405. pte += x;
  406. prev = machp()->MMU.pml4;
  407. for(lvl = 3; lvl >= 0; lvl--){
  408. if(user){
  409. if(pgsz == 2*MiB && lvl == 1) /* use 2M */
  410. break;
  411. if(pgsz == 1ull*GiB && lvl == 2) /* use 1G */
  412. break;
  413. }
  414. for(page = up->MMU.mmuptp[lvl]; page != nil; page = page->next)
  415. if(page->prev == prev && page->daddr == x){
  416. if(*pte == 0){
  417. print("mmu: jmk and nemo had fun\n");
  418. *pte = PPN(page->pa)|PteU|PteRW|PteP;
  419. }
  420. break;
  421. }
  422. if(page == nil){
  423. if(up->MMU.mmuptp[0] == nil)
  424. page = mmuptpalloc();
  425. else {
  426. page = up->MMU.mmuptp[0];
  427. up->MMU.mmuptp[0] = page->next;
  428. }
  429. page->daddr = x;
  430. page->next = up->MMU.mmuptp[lvl];
  431. up->MMU.mmuptp[lvl] = page;
  432. page->prev = prev;
  433. *pte = PPN(page->pa)|PteU|PteRW|PteP;
  434. if(lvl == 3 && x >= machp()->MMU.pml4->daddr)
  435. machp()->MMU.pml4->daddr = x+1;
  436. }
  437. x = PTLX(va, lvl-1);
  438. ppn = PPN(*pte);
  439. if(ppn == 0)
  440. panic("mmuput: ppn=0 l%d pte %#p = %#P\n", lvl, pte, *pte);
  441. pte = UINT2PTR(KADDR(ppn));
  442. pte += x;
  443. prev = page;
  444. }
  445. if(DBGFLG)
  446. checkpte(ppn, pte);
  447. *pte = pa|PteU;
  448. if(user)
  449. switch(pgsz){
  450. case 2*MiB:
  451. case 1*GiB:
  452. *pte |= PtePS;
  453. break;
  454. default:
  455. panic("mmuput: user pages must be 2M or 1G");
  456. }
  457. splx(pl);
  458. if(DBGFLG){
  459. snprint(buf, sizeof buf, "cpu%d: up %#p new pte %#p = %#llx\n",
  460. machp()->machno, up, pte, pte?*pte:~0);
  461. print("%s", buf);
  462. }
  463. invlpg(va); /* only if old entry valid? */
  464. }
  465. #if 0
  466. static Lock mmukmaplock;
  467. #endif
  468. static Lock vmaplock;
  469. #define PML4X(v) PTLX((v), 3)
  470. #define PDPX(v) PTLX((v), 2)
  471. #define PDX(v) PTLX((v), 1)
  472. #define PTX(v) PTLX((v), 0)
  473. int
  474. mmukmapsync(uint64_t va)
  475. {
  476. USED(va);
  477. return 0;
  478. }
  479. #if 0
  480. static PTE
  481. pdeget(uintptr_t va)
  482. {
  483. PTE *pdp;
  484. if(va < 0xffffffffc0000000ull)
  485. panic("pdeget(%#p)", va);
  486. pdp = (PTE*)(PDMAP+PDX(PDMAP)*4096);
  487. return pdp[PDX(va)];
  488. }
  489. #endif
  490. /*
  491. * Add kernel mappings for pa -> va for a section of size bytes.
  492. * Called only after the va range is known to be unoccupied.
  493. */
  494. static int
  495. pdmap(uintptr_t pa, int attr, uintptr_t va, usize size)
  496. {
  497. uintptr_t pae;
  498. PTE *pd, *pde, *pt, *pte;
  499. int pdx, pgsz;
  500. Page *pg;
  501. pd = (PTE*)(PDMAP+PDX(PDMAP)*4096);
  502. for(pae = pa + size; pa < pae; pa += pgsz){
  503. pdx = PDX(va);
  504. pde = &pd[pdx];
  505. /*
  506. * Check if it can be mapped using a big page,
  507. * i.e. is big enough and starts on a suitable boundary.
  508. * Assume processor can do it.
  509. */
  510. if(ALIGNED(pa, PGLSZ(1)) && ALIGNED(va, PGLSZ(1)) && (pae-pa) >= PGLSZ(1)){
  511. assert(*pde == 0);
  512. *pde = pa|attr|PtePS|PteP;
  513. pgsz = PGLSZ(1);
  514. }
  515. else{
  516. if(*pde == 0){
  517. pg = mmuptpalloc();
  518. assert(pg != nil && pg->pa != 0);
  519. *pde = pg->pa|PteRW|PteP;
  520. memset((PTE*)(PDMAP+pdx*4096), 0, 4096);
  521. }
  522. assert(*pde != 0);
  523. pt = (PTE*)(PDMAP+pdx*4096);
  524. pte = &pt[PTX(va)];
  525. assert(!(*pte & PteP));
  526. *pte = pa|attr|PteP;
  527. pgsz = PGLSZ(0);
  528. }
  529. va += pgsz;
  530. }
  531. return 0;
  532. }
  533. static int
  534. findhole(PTE* a, int n, int count)
  535. {
  536. int have, i;
  537. have = 0;
  538. for(i = 0; i < n; i++){
  539. if(a[i] == 0)
  540. have++;
  541. else
  542. have = 0;
  543. if(have >= count)
  544. return i+1 - have;
  545. }
  546. return -1;
  547. }
  548. /*
  549. * Look for free space in the vmap.
  550. */
  551. static uintptr_t
  552. vmapalloc(usize size)
  553. {
  554. int i, n, o;
  555. PTE *pd, *pt;
  556. int pdsz, ptsz;
  557. pd = (PTE*)(PDMAP+PDX(PDMAP)*4096);
  558. pd += PDX(VMAP);
  559. pdsz = VMAPSZ/PGLSZ(1);
  560. /*
  561. * Look directly in the PD entries if the size is
  562. * larger than the range mapped by a single entry.
  563. */
  564. if(size >= PGLSZ(1)){
  565. n = HOWMANY(size, PGLSZ(1));
  566. if((o = findhole(pd, pdsz, n)) != -1)
  567. return VMAP + o*PGLSZ(1);
  568. return 0;
  569. }
  570. /*
  571. * Size is smaller than that mapped by a single PD entry.
  572. * Look for an already mapped PT page that has room.
  573. */
  574. n = HOWMANY(size, PGLSZ(0));
  575. ptsz = PGLSZ(0)/sizeof(PTE);
  576. for(i = 0; i < pdsz; i++){
  577. if(!(pd[i] & PteP) || (pd[i] & PtePS))
  578. continue;
  579. pt = (PTE*)(PDMAP+(PDX(VMAP)+i)*4096);
  580. if((o = findhole(pt, ptsz, n)) != -1)
  581. return VMAP + i*PGLSZ(1) + o*PGLSZ(0);
  582. }
  583. /*
  584. * Nothing suitable, start using a new PD entry.
  585. */
  586. if((o = findhole(pd, pdsz, 1)) != -1)
  587. return VMAP + o*PGLSZ(1);
  588. return 0;
  589. }
  590. /*
  591. * KSEG0 maps low memory.
  592. * KSEG2 maps almost all memory, but starting at an address determined
  593. * by the address space map (see asm.c).
  594. * Thus, almost everything in physical memory is already mapped, but
  595. * there are things that fall in the gap
  596. * (acpi tables, device memory-mapped registers, etc.)
  597. * for those things, we also want to disable caching.
  598. * vmap() is required to access them.
  599. */
  600. void*
  601. vmap(uintptr_t pa, usize size)
  602. {
  603. uintptr_t va;
  604. usize o, sz;
  605. DBG("vmap(%#p, %lu) pc=%#p\n", pa, size, getcallerpc());
  606. if(machp()->machno != 0)
  607. print("vmap: machp()->machno != 0");
  608. /*
  609. * This is incomplete; the checks are not comprehensive
  610. * enough.
  611. * Sometimes the request is for an already-mapped piece
  612. * of low memory, in which case just return a good value
  613. * and hope that a corresponding vunmap of the address
  614. * will have the same address.
  615. * To do this properly will require keeping track of the
  616. * mappings; perhaps something like kmap, but kmap probably
  617. * can't be used early enough for some of the uses.
  618. */
  619. if(pa+size < 1ull*MiB)
  620. return KADDR(pa);
  621. if(pa < 1ull*MiB)
  622. return nil;
  623. /*
  624. * Might be asking for less than a page.
  625. * This should have a smaller granularity if
  626. * the page size is large.
  627. */
  628. o = pa & ((1<<PGSHFT)-1);
  629. pa -= o;
  630. sz = ROUNDUP(size+o, PGSZ);
  631. if(pa == 0){
  632. print("vmap(0, %lu) pc=%#p\n", size, getcallerpc());
  633. return nil;
  634. }
  635. ilock(&vmaplock);
  636. if((va = vmapalloc(sz)) == 0 || pdmap(pa, PtePCD|PteRW, va, sz) < 0){
  637. iunlock(&vmaplock);
  638. return nil;
  639. }
  640. iunlock(&vmaplock);
  641. DBG("vmap(%#p, %lu) => %#p\n", pa+o, size, va+o);
  642. return UINT2PTR(va + o);
  643. }
  644. void
  645. vunmap(void* v, usize size)
  646. {
  647. uintptr_t va;
  648. DBG("vunmap(%#p, %lu)\n", v, size);
  649. if(machp()->machno != 0)
  650. print("vmap: machp()->machno != 0");
  651. /*
  652. * See the comments above in vmap.
  653. */
  654. va = PTR2UINT(v);
  655. if(va >= KZERO && va+size < KZERO+1ull*MiB)
  656. return;
  657. /*
  658. * Here will have to deal with releasing any
  659. * resources used for the allocation (e.g. page table
  660. * pages).
  661. */
  662. DBG("vunmap(%#p, %lu)\n", v, size);
  663. }
  664. int
  665. mmuwalk(PTE* pml4, uintptr_t va, int level, PTE** ret,
  666. uint64_t (*alloc)(usize))
  667. {
  668. int l;
  669. uintmem pa;
  670. PTE *pte;
  671. Mpl pl;
  672. pl = splhi();
  673. if(DBGFLG > 1)
  674. DBG("mmuwalk%d: va %#p level %d\n", machp()->machno, va, level);
  675. pte = &pml4[PTLX(va, 3)];
  676. for(l = 3; l >= 0; l--){
  677. if(l == level)
  678. break;
  679. if(!(*pte & PteP)){
  680. if(alloc == nil)
  681. break;
  682. pa = alloc(PTSZ);
  683. if(pa == ~0)
  684. return -1;
  685. memset(UINT2PTR(KADDR(pa)), 0, PTSZ);
  686. *pte = pa|PteRW|PteP;
  687. }
  688. else if(*pte & PtePS)
  689. break;
  690. pte = UINT2PTR(KADDR(PPN(*pte)));
  691. pte += PTLX(va, l-1);
  692. }
  693. *ret = pte;
  694. splx(pl);
  695. return l;
  696. }
  697. uintmem
  698. mmuphysaddr(uintptr_t va)
  699. {
  700. int l;
  701. PTE *pte;
  702. uintmem mask, pa;
  703. /*
  704. * Given a VA, find the PA.
  705. * This is probably not the right interface,
  706. * but will do as an experiment. Usual
  707. * question, should va be void* or uintptr?
  708. */
  709. l = mmuwalk(UINT2PTR(machp()->MMU.pml4->va), va, 0, &pte, nil);
  710. DBG("physaddr: va %#p l %d\n", va, l);
  711. if(l < 0)
  712. return ~0;
  713. mask = PGLSZ(l)-1;
  714. pa = (*pte & ~mask) + (va & mask);
  715. DBG("physaddr: l %d va %#p pa %#llx\n", l, va, pa);
  716. return pa;
  717. }
  718. Page mach0pml4;
  719. void
  720. mmuinit(void)
  721. {
  722. panic("mmuinit");
  723. #if 0
  724. uint8_t *p;
  725. Page *page;
  726. uint64_t o, pa, r, sz;
  727. archmmu();
  728. DBG("mach%d: %#p pml4 %#p npgsz %d\n", machp()->machno, machp(), machp()->MMU.pml4, sys->npgsz);
  729. if(machp()->machno != 0){
  730. /* NIX: KLUDGE: Has to go when each mach is using
  731. * its own page table
  732. */
  733. p = UINT2PTR(machp()->stack);
  734. p += MACHSTKSZ;
  735. memmove(p, UINT2PTR(mach0pml4.va), PTSZ);
  736. machp()->MMU.pml4 = &machp()->MMU.pml4kludge;
  737. machp()->MMU.pml4->va = PTR2UINT(p);
  738. machp()->MMU.pml4->pa = PADDR(p);
  739. machp()->MMU.pml4->daddr = mach0pml4.daddr; /* # of user mappings in pml4 */
  740. r = rdmsr(Efer);
  741. r |= Nxe;
  742. wrmsr(Efer, r);
  743. rootput(machp()->MMU.pml4->pa);
  744. DBG("m %#p pml4 %#p\n", machp(), machp()->MMU.pml4);
  745. return;
  746. }
  747. page = &mach0pml4;
  748. page->pa = read_csr(sptbr);
  749. page->va = PTR2UINT(KADDR(page->pa));
  750. machp()->MMU.pml4 = page;
  751. r = rdmsr(Efer);
  752. r |= Nxe;
  753. wrmsr(Efer, r);
  754. /*
  755. * Set up the various kernel memory allocator limits:
  756. * pmstart/pmend bound the unused physical memory;
  757. * vmstart/vmend bound the total possible virtual memory
  758. * used by the kernel;
  759. * vmunused is the highest virtual address currently mapped
  760. * and used by the kernel;
  761. * vmunmapped is the highest virtual address currently
  762. * mapped by the kernel.
  763. * Vmunused can be bumped up to vmunmapped before more
  764. * physical memory needs to be allocated and mapped.
  765. *
  766. * This is set up here so meminit can map appropriately.
  767. */
  768. o = sys->pmstart;
  769. sz = ROUNDUP(o, 4*MiB) - o;
  770. pa = asmalloc(0, sz, 1, 0);
  771. if(pa != o)
  772. panic("mmuinit: pa %#llx memstart %#llx\n", pa, o);
  773. sys->pmstart += sz;
  774. sys->vmstart = KSEG0;
  775. sys->vmunused = sys->vmstart + ROUNDUP(o, 4*KiB);
  776. sys->vmunmapped = sys->vmstart + o + sz;
  777. sys->vmend = sys->vmstart + TMFM;
  778. print("mmuinit: vmstart %#p vmunused %#p vmunmapped %#p vmend %#p\n",
  779. sys->vmstart, sys->vmunused, sys->vmunmapped, sys->vmend);
  780. /*
  781. * Set up the map for PD entry access by inserting
  782. * the relevant PDP entry into the PD. It's equivalent
  783. * to PADDR(sys->pd)|PteRW|PteP.
  784. *
  785. */
  786. sys->pd[PDX(PDMAP)] = sys->pdp[PDPX(PDMAP)] & ~(PteD|PteA);
  787. print("sys->pd %#p %#p\n", sys->pd[PDX(PDMAP)], sys->pdp[PDPX(PDMAP)]);
  788. assert((pdeget(PDMAP) & ~(PteD|PteA)) == (PADDR(sys->pd)|PteRW|PteP));
  789. dumpmmuwalk(KZERO);
  790. mmuphysaddr(PTR2UINT(end));
  791. #endif
  792. }