mmu.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include "u.h"
  10. #include "../port/lib.h"
  11. #include "mem.h"
  12. #include "dat.h"
  13. #include "fns.h"
  14. #include "encoding.h"
  15. #include "mmu.h"
  16. #undef DBGFLG
  17. #define DBGFLG 0
  18. /* this gets pretty messy. RV64 has *at least* two modes:
  19. * 4 level and 3 level page tables. And people wonder why
  20. * I like soft TLB so much. Anyway, for now, not sure
  21. * how to handle it.
  22. * Would be cool to work out a way to Do The Right Thing
  23. * without regard to page size, so that's what I'm going to
  24. * try to do.
  25. */
  26. void msg(char *);
  27. /*
  28. * To do:
  29. * PteNX;
  30. * mmukmapsync grot for >1 processor;
  31. * mmuptcopy (PteSHARED trick?);
  32. * calculate and map up to TMFM (conf crap);
  33. */
  34. /* strike off 2M so it won't wrap to 0. Sleazy. */
  35. #define TMFM (2*GiB-2*MiB) /* kernel memory */
  36. #define PPN(x) ((x)&~(PGSZ-1))
  37. #define PTE2PPN(p) ((p)>>10)
  38. #define PTE2PA(p) (((p)>>10)<<12)
  39. #if 0
  40. /* Print the page table structures to the console */
  41. void print_page_table(void) {
  42. print_page_table_at((void *)(read_csr(sptbr) << RISCV_PGSHIFT), 0, 0);
  43. }
  44. #endif
  45. void flush_tlb(void)
  46. {
  47. asm volatile("sfence.vm");
  48. }
  49. size_t pte_ppn(uint64_t pte)
  50. {
  51. return pte >> PTE_PPN_SHIFT;
  52. }
  53. uint64_t ptd_create(uintptr_t ppn)
  54. {
  55. return (ppn << PTE_PPN_SHIFT) | PTE_V;
  56. }
  57. uint64_t pte_create(uintptr_t ppn, int prot, int user)
  58. {
  59. uint64_t pte = (ppn << PTE_PPN_SHIFT) | PTE_R | PTE_V;
  60. if (prot & PTE_W)
  61. pte |= PTE_W;
  62. if (prot & PTE_X)
  63. pte |= PTE_X;
  64. if (user)
  65. pte |= PTE_U;
  66. return pte;
  67. }
  68. void
  69. rootput(uintptr_t root)
  70. {
  71. uintptr_t ptbr = root >> RISCV_PGSHIFT;
  72. write_csr(sptbr, ptbr);
  73. }
  74. void
  75. mmuflushtlb(void)
  76. {
  77. machp()->tlbpurge++;
  78. if(machp()->MMU.root->daddr){
  79. memset(UINT2PTR(machp()->MMU.root->va), 0, machp()->MMU.root->daddr*sizeof(PTE));
  80. machp()->MMU.root->daddr = 0;
  81. }
  82. rootput((uintptr_t) machp()->MMU.root->pa);
  83. }
  84. void
  85. mmuflush(void)
  86. {
  87. Proc *up = externup();
  88. Mpl pl;
  89. pl = splhi();
  90. up->newtlb = 1;
  91. mmuswitch(up);
  92. splx(pl);
  93. }
  94. static void
  95. mmuptpfree(Proc* proc, int clear)
  96. {
  97. int l;
  98. PTE *pte;
  99. Page **last, *page;
  100. for(l = 1; l < 4; l++){
  101. last = &proc->MMU.mmuptp[l];
  102. if(*last == nil)
  103. continue;
  104. for(page = *last; page != nil; page = page->next){
  105. //what is right here? 2 or 1?
  106. if(l <= 2 && clear)
  107. memset(UINT2PTR(page->va), 0, PTSZ);
  108. pte = UINT2PTR(page->prev->va);
  109. pte[page->daddr] = 0;
  110. last = &page->next;
  111. }
  112. *last = proc->MMU.mmuptp[0];
  113. proc->MMU.mmuptp[0] = proc->MMU.mmuptp[l];
  114. proc->MMU.mmuptp[l] = nil;
  115. }
  116. machp()->MMU.root->daddr = 0;
  117. }
  118. static void
  119. tabs(int n)
  120. {
  121. int i;
  122. for(i = 0; i < n; i++)
  123. print(" ");
  124. }
  125. void
  126. dumpptepg(int lvl, uintptr_t pa)
  127. {
  128. PTE *pte;
  129. int tab, i;
  130. tab = 4 - lvl;
  131. pte = UINT2PTR(KADDR(pa));
  132. for(i = 0; i < PTSZ/sizeof(PTE); i++)
  133. if(pte[i] & PteP){
  134. tabs(tab);
  135. print("l%d %#p[%#05x]: %#llx\n", lvl, pa, i, pte[i]);
  136. /* skip kernel mappings */
  137. if((pte[i]&PteU) == 0){
  138. tabs(tab+1);
  139. print("...kern...\n");
  140. continue;
  141. }
  142. if(lvl > 2)
  143. dumpptepg(lvl-1, PPN(pte[i]));
  144. }
  145. }
  146. void
  147. dumpmmu(Proc *p)
  148. {
  149. int i;
  150. Page *pg;
  151. print("proc %#p\n", p);
  152. for(i = 3; i > 0; i--){
  153. print("mmuptp[%d]:\n", i);
  154. for(pg = p->MMU.mmuptp[i]; pg != nil; pg = pg->next)
  155. print("\tpg %#p = va %#llx pa %#llx"
  156. " daddr %#lx next %#p prev %#p\n",
  157. pg, pg->va, pg->pa, pg->daddr, pg->next, pg->prev);
  158. }
  159. print("root %#llx\n", machp()->MMU.root->pa);
  160. if(0)dumpptepg(4, machp()->MMU.root->pa);
  161. }
  162. void
  163. dumpmmuwalk(uint64_t addr)
  164. {
  165. int l;
  166. PTE *pte, *root;
  167. root = UINT2PTR(machp()->MMU.root->va);
  168. print("root is %p\n", root);
  169. if((l = mmuwalk(root, addr, 2, &pte, nil)) >= 0) {
  170. print("\tcpu%d: mmu l%d pte %#p = ", machp()->machno, l, pte);
  171. print("%llx, PA is %llx\n", *pte, PTE2PA(*pte));
  172. }
  173. if((l = mmuwalk(root, addr, 1, &pte, nil)) >= 0) {
  174. print("\tcpu%d: mmu l%d pte %#p = ", machp()->machno, l, pte);
  175. print("%llx, PA is %llx\n", *pte, PTE2PA(*pte));
  176. }
  177. if((l = mmuwalk(root, addr, 0, &pte, nil)) >= 0) {
  178. print("\tcpu%d: mmu l%d pte %#p = ", machp()->machno, l, pte);
  179. print("%llx, PA is %llx\n", *pte, PTE2PA(*pte));
  180. }
  181. if (PTE2PA(*pte) != 0)
  182. hexdump(KADDR(PTE2PA(*pte)), 32);
  183. }
  184. static Page mmuptpfreelist;
  185. static Page*
  186. mmuptpalloc(void)
  187. {
  188. void* va;
  189. Page *page;
  190. /*
  191. * Do not really need a whole Page structure,
  192. * but it makes testing this out a lot easier.
  193. * Could keep a cache and free excess.
  194. * Have to maintain any fiction for pexit?
  195. */
  196. lock(&mmuptpfreelist.l);
  197. if((page = mmuptpfreelist.next) != nil){
  198. mmuptpfreelist.next = page->next;
  199. mmuptpfreelist.ref--;
  200. unlock(&mmuptpfreelist.l);
  201. if(page->ref++ != 0)
  202. panic("mmuptpalloc ref\n");
  203. page->prev = page->next = nil;
  204. memset(UINT2PTR(page->va), 0, PTSZ);
  205. if(page->pa == 0)
  206. panic("mmuptpalloc: free page with pa == 0");
  207. return page;
  208. }
  209. unlock(&mmuptpfreelist.l);
  210. if((page = malloc(sizeof(Page))) == nil){
  211. print("mmuptpalloc Page\n");
  212. return nil;
  213. }
  214. if((va = mallocalign(PTSZ, PTSZ, 0, 0)) == nil){
  215. print("mmuptpalloc va\n");
  216. free(page);
  217. return nil;
  218. }
  219. page->va = PTR2UINT(va);
  220. page->pa = PADDR(va);
  221. page->ref = 1;
  222. if(page->pa == 0)
  223. panic("mmuptpalloc: no pa");
  224. return page;
  225. }
  226. void
  227. mmuswitch(Proc* proc)
  228. {
  229. PTE *pte;
  230. Page *page;
  231. Mpl pl;
  232. pl = splhi();
  233. if(proc->newtlb){
  234. /*
  235. * NIX: We cannot clear our page tables if they are going to
  236. * be used in the AC
  237. */
  238. if(proc->ac == nil)
  239. mmuptpfree(proc, 1);
  240. proc->newtlb = 0;
  241. }
  242. /* daddr is the number of user PTEs in use in the root. */
  243. if(machp()->MMU.root->daddr){
  244. print("memsg(%p, 0, %d\n", UINT2PTR(machp()->MMU.root->va), 0, machp()->MMU.root->daddr*sizeof(PTE));
  245. memset(UINT2PTR(machp()->MMU.root->va), 0, machp()->MMU.root->daddr*sizeof(PTE));
  246. machp()->MMU.root->daddr = 0;
  247. }
  248. pte = UINT2PTR(machp()->MMU.root->va);
  249. if (0)print("pte %p\n", pte);
  250. /* N.B. On RISCV, we DO NOT SET any of X, R, W bits at this level since
  251. * that we point to page table pages on level down. Also, these are
  252. * explicitly user level pages, so PteU is set. */
  253. for(page = proc->MMU.mmuptp[3]; page != nil; page = page->next){
  254. pte[page->daddr] = PPN(page->pa)|PteU|PteP;
  255. if(page->daddr >= machp()->MMU.root->daddr)
  256. machp()->MMU.root->daddr = page->daddr+1;
  257. page->prev = machp()->MMU.root;
  258. }
  259. if (0)print("rootput %p\n", (void *)(uintptr_t) machp()->MMU.root->pa);
  260. rootput((uintptr_t) machp()->MMU.root->pa);
  261. if (0)print("splx\n");
  262. splx(pl);
  263. }
  264. void
  265. mmurelease(Proc* proc)
  266. {
  267. Page *page, *next;
  268. mmuptpfree(proc, 0);
  269. for(page = proc->MMU.mmuptp[0]; page != nil; page = next){
  270. next = page->next;
  271. if(--page->ref)
  272. panic("mmurelease: page->ref %d\n", page->ref);
  273. lock(&mmuptpfreelist.l);
  274. page->next = mmuptpfreelist.next;
  275. mmuptpfreelist.next = page;
  276. mmuptpfreelist.ref++;
  277. page->prev = nil;
  278. unlock(&mmuptpfreelist.l);
  279. }
  280. if(proc->MMU.mmuptp[0] && pga.rend.l.p)
  281. wakeup(&pga.rend);
  282. proc->MMU.mmuptp[0] = nil;
  283. rootput(machp()->MMU.root->pa);
  284. }
  285. static void
  286. checkpte(uintmem ppn, void *a)
  287. {
  288. int l;
  289. PTE *pte, *root;
  290. uint64_t addr;
  291. char buf[240], *s;
  292. addr = PTR2UINT(a);
  293. root = UINT2PTR(machp()->MMU.root->va);
  294. pte = 0;
  295. s = buf;
  296. *s = 0;
  297. if((l = mmuwalk(root, addr, 2, &pte, nil)) < 0 || (*pte&PteP) == 0)
  298. goto Panic;
  299. s = seprint(s, buf+sizeof buf,
  300. "check2: l%d pte %#p = %llx\n",
  301. l, pte, pte?*pte:~0);
  302. if(*pte&PteFinal)
  303. return;
  304. if((l = mmuwalk(root, addr, 1, &pte, nil)) < 0 || (*pte&PteP) == 0)
  305. goto Panic;
  306. seprint(s, buf+sizeof buf,
  307. "check1: l%d pte %#p = %llx\n",
  308. l, pte, pte?*pte:~0);
  309. return;
  310. Panic:
  311. seprint(s, buf+sizeof buf,
  312. "checkpte: l%d addr %#p ppn %#llx kaddr %#p pte %#p = %llx",
  313. l, a, ppn, KADDR(ppn), pte, pte?*pte:~0);
  314. print("%s\n", buf);
  315. seprint(buf, buf+sizeof buf, "start %#llx unused %#llx"
  316. " unmap %#llx end %#llx\n",
  317. sys->vmstart, sys->vmunused, sys->vmunmapped, sys->vmend);
  318. panic("%s", buf);
  319. }
  320. static void
  321. mmuptpcheck(Proc *proc)
  322. {
  323. int lvl, npgs, i;
  324. Page *lp, *p, *pgs[16], *fp;
  325. uint idx[16];
  326. if(proc == nil)
  327. return;
  328. lp = machp()->MMU.root;
  329. for(lvl = 3; lvl >= 2; lvl--){
  330. npgs = 0;
  331. for(p = proc->MMU.mmuptp[lvl]; p != nil; p = p->next){
  332. for(fp = proc->MMU.mmuptp[0]; fp != nil; fp = fp->next)
  333. if(fp == p){
  334. dumpmmu(proc);
  335. panic("ptpcheck: using free page");
  336. }
  337. for(i = 0; i < npgs; i++){
  338. if(pgs[i] == p){
  339. dumpmmu(proc);
  340. panic("ptpcheck: dup page");
  341. }
  342. if(idx[i] == p->daddr){
  343. dumpmmu(proc);
  344. panic("ptcheck: dup daddr");
  345. }
  346. }
  347. if(npgs >= nelem(pgs))
  348. panic("ptpcheck: pgs is too small");
  349. idx[npgs] = p->daddr;
  350. pgs[npgs++] = p;
  351. if(lvl == 3 && p->prev != lp){
  352. dumpmmu(proc);
  353. panic("ptpcheck: wrong prev");
  354. }
  355. }
  356. }
  357. npgs = 0;
  358. for(fp = proc->MMU.mmuptp[0]; fp != nil; fp = fp->next){
  359. for(i = 0; i < npgs; i++)
  360. if(pgs[i] == fp)
  361. panic("ptpcheck: dup free page");
  362. pgs[npgs++] = fp;
  363. }
  364. }
  365. static uintmem
  366. pteflags(uint attr)
  367. {
  368. uintmem flags;
  369. flags = 0;
  370. if(attr & ~(PTEVALID|PTEWRITE|PTERONLY|PTEUSER|PTEUNCACHED|PTENOEXEC))
  371. panic("mmuput: wrong attr bits: %#x\n", attr);
  372. if(attr&PTEVALID)
  373. flags |= PteP;
  374. if(attr&PTEWRITE)
  375. flags |= PteRW;
  376. if(attr&PTEUSER)
  377. flags |= PteU;
  378. /* Can't do this -- what do we do?
  379. if(attr&PTEUNCACHED)
  380. flags |= PtePCD;
  381. */
  382. if(attr&PTENOEXEC)
  383. flags &= ~PteX;
  384. return flags;
  385. }
  386. void
  387. invlpg(uintptr_t _)
  388. {
  389. // TOODO
  390. if (0) print("invlpage is not implemented, continuing anyway (addr is %p)\n", _);
  391. }
  392. /*
  393. * pg->pgszi indicates the page size in machp()->pgsz[] used for the mapping.
  394. * For the user, it can be either 2*MiB or 1*GiB pages.
  395. * For 2*MiB pages, we use three levels, not four.
  396. * For 1*GiB pages, we use two levels.
  397. */
  398. void
  399. mmuput(uintptr_t va, Page *pg, uint attr)
  400. {
  401. Proc *up = externup();
  402. int lvl, user, x, pgsz;
  403. PTE *pte;
  404. Page *page, *prev;
  405. Mpl pl;
  406. uintmem pa, ppage;
  407. char buf[80];
  408. if (DBGFLG) {
  409. print("mmuput: %p\n", va);
  410. dumpmmuwalk(va);
  411. print("now try the put");
  412. }
  413. ppage = 0;
  414. pa = pg->pa;
  415. if(pa == 0)
  416. panic("mmuput: zero pa");
  417. if(va == 0)
  418. panic("mmuput: zero va");
  419. if(DBGFLG){
  420. snprint(buf, sizeof buf, "cpu%d: up %#p mmuput %#p %#P %#x\n",
  421. machp()->machno, up, va, pa, attr);
  422. print("%s", buf);
  423. }
  424. if (pg->pgszi < 0) {
  425. print("mmuput(%p, %p, 0x%x): bad pgszi %d for pa %p\n",
  426. va, pg, attr, pg->pgszi, pa);
  427. assert(pg->pgszi >= 0);
  428. }
  429. pgsz = sys->pgsz[pg->pgszi];
  430. if(pa & (pgsz-1))
  431. panic("mmuput: pa offset non zero: %#llx\n", pa);
  432. pa |= pteflags(attr);
  433. pl = splhi();
  434. if(DBGFLG)
  435. mmuptpcheck(up);
  436. user = (va < KZERO);
  437. x = PTLX(va, 2);
  438. if (0) print("user is %d, index for %p is 0x%x, ", user, va, x);
  439. pte = UINT2PTR(machp()->MMU.root->va);
  440. pte += x;
  441. prev = machp()->MMU.root;
  442. if (DBGFLG) print("starting PTE at l2 is %p\n", pte);
  443. for(lvl = 2; lvl >= 0; lvl--){
  444. if(user){
  445. if(pgsz == 2*MiB && lvl == 1) /* use 2M */
  446. break;
  447. if(pgsz == 1ull*GiB && lvl == 2) /* use 1G */
  448. break;
  449. }
  450. for(page = up->MMU.mmuptp[lvl]; page != nil; page = page->next)
  451. if(page->prev == prev && page->daddr == x){
  452. if(*pte == 0){
  453. print("mmu: jmk and nemo had fun\n");
  454. *pte = (PPN(page->pa)>>2)|PteP;
  455. if (DBGFLG) print("level %d: set pte %p to 0x%llx for pa %p\n", lvl, pte, *pte, pa);
  456. }
  457. break;
  458. }
  459. if(page == nil){
  460. if(up->MMU.mmuptp[0] == nil) {
  461. page = mmuptpalloc();
  462. if (DBGFLG) print("\tallocated page %p\n", page);
  463. } else {
  464. page = up->MMU.mmuptp[0];
  465. up->MMU.mmuptp[0] = page->next;
  466. if (DBGFLG) print("\tReused page %p\n", page);
  467. }
  468. page->daddr = x;
  469. page->next = up->MMU.mmuptp[lvl];
  470. up->MMU.mmuptp[lvl] = page;
  471. page->prev = prev;
  472. *pte = (PPN(page->pa)>>2)|PteP;
  473. if (DBGFLG) print("\tlevel %d: set pte %p to 0x%llx for pa %p\n", lvl, pte, *pte, PPN(page->pa));
  474. if(lvl == 2 && x >= machp()->MMU.root->daddr)
  475. machp()->MMU.root->daddr = x+1;
  476. }
  477. x = PTLX(va, lvl-1);
  478. if (DBGFLG) print("\tptlx(%p,%d) is %p\n", va, lvl-1,x);
  479. ppage = PTE2PA(*pte);
  480. if (DBGFLG) print("\tpa for pte %p val 0x%llx ppage %p\n", pte, *pte, ppage);
  481. if(ppage == 0)
  482. panic("mmuput: ppn=0 l%d pte %#p = %#P\n", lvl, pte, *pte);
  483. pte = UINT2PTR(KADDR(ppage));
  484. pte += x;
  485. if (DBGFLG) print("\tpte for next iteration is %p\n", pte);
  486. prev = page;
  487. }
  488. if (DBGFLG) print("\tAFTER LOOP pte %p val 0x%llx ppn %p\n", pte, *pte, pa);
  489. if(DBGFLG)
  490. checkpte(ppage, pte);
  491. *pte = (pa>>2)|PteU;
  492. if (DBGFLG) print("\tAFTER SET pte %p val 0x%llx ppn %p\n", pte, *pte, pa);
  493. if(user)
  494. switch(pgsz){
  495. case 2*MiB:
  496. case 1*GiB:
  497. *pte |= attr | PteFinal | PteP | 0x1f;
  498. if (DBGFLG) print("\tUSER PAGE pte %p val 0x%llx\n", pte, *pte);
  499. break;
  500. default:
  501. panic("\tmmuput: user pages must be 2M or 1G");
  502. }
  503. splx(pl);
  504. if(DBGFLG){
  505. snprint(buf, sizeof buf, "cpu%d: up %#p new pte %#p = %#llx\n",
  506. machp()->machno, up, pte, pte?*pte:~0);
  507. print("%s", buf);
  508. }
  509. invlpg(va); /* only if old entry valid? */
  510. //dumpmmuwalk(va);
  511. //hexdump((void *)va, 16);
  512. if (DBGFLG) print("returning from mmuput\n");
  513. }
  514. #if 0
  515. static Lock mmukmaplock;
  516. #endif
  517. #define PML4X(v) PTLX((v), 3)
  518. #define PDPX(v) PTLX((v), 2)
  519. #define PDX(v) PTLX((v), 1)
  520. #define PTX(v) PTLX((v), 0)
  521. int
  522. mmukmapsync(uint64_t va)
  523. {
  524. USED(va);
  525. return 0;
  526. }
  527. // findKSeg2 finds kseg2, i.e., the lowest virtual
  528. // address mapped by firmware. We need to know this so we can
  529. // correctly and easily compute KADDR and PADDR.
  530. // TODO: actually to it.
  531. // It is *possible* that we'll be able to pick this up from
  532. // the configstring.
  533. void *
  534. findKSeg2(void)
  535. {
  536. // return the Sv39 address that we know coreboot
  537. // set up.
  538. return (void *)(~0ULL<<38);
  539. }
  540. /* mmuwalk will walk the page tables as far as we ask (level)
  541. * or as far as possible (you might hit a tera/giga/mega PTE).
  542. * If it gets a valid PTE it will return it in ret; test for
  543. * validity by testing PetP. To see how far it got, check
  544. * the return value. */
  545. int
  546. mmuwalk(PTE* root, uintptr_t va, int level, PTE** ret,
  547. uint64_t (*alloc)(usize))
  548. {
  549. int l;
  550. uintmem pa;
  551. PTE *pte;
  552. Mpl pl;
  553. pl = splhi();
  554. if(DBGFLG > 1) {
  555. print("mmuwalk%d: va %#p level %d\n", machp()->machno, va, level);
  556. print("PTLX(%p, 2) is 0x%x\n", va, PTLX(va,2));
  557. print("root is %p\n", root);
  558. }
  559. pte = &root[PTLX(va, 2)];
  560. if(DBGFLG > 1) {
  561. print("pte is %p\n", pte);
  562. print("*pte is %p\n", *pte);
  563. }
  564. for(l = 2; l >= 0; l--){
  565. if(l == level)
  566. break;
  567. if(!(*pte & PteP)){
  568. if(alloc == nil)
  569. break;
  570. pa = alloc(PTSZ);
  571. if(pa == ~0)
  572. return -1;
  573. memset(UINT2PTR(KADDR(pa)), 0, PTSZ);
  574. *pte = pa|PteRW|PteP;
  575. }
  576. else if(*pte & PteFinal)
  577. break;
  578. pte = UINT2PTR(KADDR((*pte&~0x3ff)<<2)); // PPN(*pte)));
  579. if (DBGFLG > 1)
  580. print("pte is %p: ", pte);
  581. pte += PTLX(va, l-1);
  582. if (DBGFLG > 1)
  583. print("and pte after index is %p\n", pte);
  584. }
  585. *ret = pte;
  586. splx(pl);
  587. return l;
  588. }
  589. uintmem
  590. mmuphysaddr(uintptr_t va)
  591. {
  592. int l;
  593. PTE *pte;
  594. uint64_t ppn;
  595. uintmem mask, pa;
  596. msg("mmyphysaddr\n");
  597. /*
  598. * Given a VA, find the PA.
  599. * This is probably not the right interface,
  600. * but will do as an experiment. Usual
  601. * question, should va be void* or uintptr?
  602. */
  603. print("machp() %p \n", machp());
  604. print("mahcp()->MMU.root %p\n", machp()->MMU.root);
  605. print("... va %p\n", machp()->MMU.root->va);
  606. l = mmuwalk(UINT2PTR(machp()->MMU.root->va), va, 0, &pte, nil);
  607. print("pte is %p *pte is 0x%llx\n", pte, *pte);
  608. print("physaddr: va %#p l %d\n", va, l);
  609. if(l < 0)
  610. return ~0;
  611. ppn = (*pte & ~0x3ff) << 2;
  612. print("PPN from PTE is %llx\n", ppn);
  613. mask = PGLSZ(l)-1;
  614. pa = (ppn & ~mask) + (va & mask);
  615. print("physaddr: mask is %llx, ~mask %llx, ppn & ~mask %llx, \n", mask, ~mask, ppn & ~mask);
  616. print("physaddr: RESULT: l %d va %#p pa %#llx\n", l, va, pa);
  617. return pa;
  618. }
  619. /* to accomodate the weirdness of the rv64 modes, we're going to leave it as a 4
  620. * level PT, and fake up the PML4 with one entry when it's 3 levels. Later, we want
  621. * to be smarter, but a lot of our code is pretty wired to assume 4 level PT and I'm
  622. * not wanting to just rip it all out. */
  623. void
  624. mmuinit(void)
  625. {
  626. uint8_t *p;
  627. uint64_t o, pa, sz, n;
  628. n = archmmu();
  629. print("%d page sizes\n", n);
  630. print("mach%d: %#p root %#p npgsz %d\n", machp()->machno, machp(), machp()->MMU.root, sys->npgsz);
  631. if(machp()->machno != 0){
  632. /* NIX: KLUDGE: Has to go when each mach is using
  633. * its own page table
  634. */
  635. p = UINT2PTR(machp()->stack);
  636. p += MACHSTKSZ;
  637. panic("not yet");
  638. #if 0
  639. memmove(p, UINT2PTR(mach0root.va), PTSZ);
  640. machp()->MMU.root = &machp()->MMU.root;
  641. machp()->MMU.root->va = PTR2UINT(p);
  642. machp()->MMU.root->pa = PADDR(p);
  643. machp()->MMU.root->daddr = mach0root.daddr; /* # of user mappings in root */
  644. rootput(machp()->MMU.root->pa);
  645. print("m %#p root %#p\n", machp(), machp()->MMU.root);
  646. #endif
  647. return;
  648. }
  649. machp()->MMU.root = &sys->root;
  650. uintptr_t PhysicalRoot = read_csr(sptbr)<<12;
  651. PTE *root = KADDR(PhysicalRoot);
  652. print("Physical root is 0x%llx and root 0x %p\n", PhysicalRoot, root);
  653. PTE *KzeroPTE;
  654. /* As it happens, as this point, we don't know the number of page table levels.
  655. * But a walk to "level 4" will work even if it's only 3, and we can use that
  656. * information to know what to do. Further, KSEG0 is the last 2M so this will
  657. * get us the last PTE on either an L3 or L2 pte page */
  658. int l;
  659. if((l = mmuwalk(root, KSEG0, 2, &KzeroPTE, nil)) < 0) {
  660. panic("Can't walk to PtePML2");
  661. }
  662. print("KzeroPTE is 0x%llx\n", KzeroPTE);
  663. int PTLevels = (*KzeroPTE>>9)&3;
  664. switch(PTLevels) {
  665. default:
  666. panic("unsupported number of page table levels: %d", PTLevels);
  667. break;
  668. case 0:
  669. machp()->MMU.root->pa = PhysicalRoot;
  670. print("root is 0x%x\n", machp()->MMU.root->pa);
  671. machp()->MMU.root->va = (uintptr_t) KADDR(machp()->MMU.root->pa);
  672. break;
  673. }
  674. print("mach%d: %#p root %#p npgsz %d\n", machp()->machno, machp(), machp()->MMU.root, sys->npgsz);
  675. /*
  676. * Set up the various kernel memory allocator limits:
  677. * pmstart/pmend bound the unused physical memory;
  678. * vmstart/vmend bound the total possible virtual memory
  679. * used by the kernel;
  680. * vmunused is the highest virtual address currently mapped
  681. * and used by the kernel;
  682. * vmunmapped is the highest virtual address currently
  683. * mapped by the kernel.
  684. * Vmunused can be bumped up to vmunmapped before more
  685. * physical memory needs to be allocated and mapped.
  686. *
  687. * This is set up here so meminit can map appropriately.
  688. */
  689. o = sys->pmstart;
  690. print("sys->pmstart is %p\n", o);
  691. sz = ROUNDUP(o, 4*MiB) - o;
  692. print("Size is 0x%x\n", sz);
  693. pa = asmalloc(0, sz, 1, 0);
  694. if(pa != o)
  695. panic("mmuinit: pa %#llx memstart %#llx\n", pa, o);
  696. sys->pmstart += sz;
  697. sys->vmstart = KSEG0;
  698. print("Going to set vmunused to %p + 0x%x\n", sys->vmstart, ROUNDUP(o, 4*KiB));
  699. /* more issues with arithmetic since physmem is at 80000000 */
  700. o &= 0x7fffffff;
  701. sys->vmunused = sys->vmstart + ROUNDUP(o, 4*KiB);
  702. sys->vmend = sys->vmstart + TMFM;
  703. // on amd64, this was set to just the end of the kernel, because
  704. // only that much was mapped, and also vmap required a lot of
  705. // free *address space* (not memory, *address space*) for the
  706. // vmap functions. vmap was a hack we intended to remove.
  707. // It's still there. But we can get rid of it on riscv.
  708. // There's lots more to do but at least vmap is gone,
  709. // as is the PDMAP hack, which was also supposed to
  710. // be temporary.
  711. // TODO: We get much further now but still
  712. // die in meminit(). When that's fixed remove
  713. // this TODO.
  714. sys->vmunmapped = sys->vmend;
  715. print("mmuinit: vmstart %#p vmunused %#p vmunmapped %#p vmend %#p\n",
  716. sys->vmstart, sys->vmunused, sys->vmunmapped, sys->vmend);
  717. dumpmmuwalk(KZERO);
  718. mmuphysaddr(PTR2UINT(end));
  719. }