mmu.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include "u.h"
  10. #include "../port/lib.h"
  11. #include "mem.h"
  12. #include "dat.h"
  13. #include "fns.h"
  14. #include "encoding.h"
  15. #include "mmu.h"
  16. #undef DBGFLG
  17. #define DBGFLG 0
  18. /* this gets pretty messy. RV64 has *at least* two modes:
  19. * 4 level and 3 level page tables. And people wonder why
  20. * I like soft TLB so much. Anyway, for now, not sure
  21. * how to handle it.
  22. * Would be cool to work out a way to Do The Right Thing
  23. * without regard to page size, so that's what I'm going to
  24. * try to do.
  25. */
  26. void msg(char *);
  27. /*
  28. * To do:
  29. * PteNX;
  30. * mmukmapsync grot for >1 processor;
  31. * mmuptcopy (PteSHARED trick?);
  32. * calculate and map up to TMFM (conf crap);
  33. */
  34. /* strike off 2M so it won't wrap to 0. Sleazy. */
  35. #define TMFM (2*GiB-2*MiB) /* kernel memory */
  36. #define PPN(x) ((x)&~(PGSZ-1))
  37. #define PTE2PPN(p) ((p)>>10)
  38. #define PTE2PA(p) (((p)>>10)<<12)
  39. #if 0
  40. /* Print the page table structures to the console */
  41. void print_page_table(void) {
  42. print_page_table_at((void *)(read_csr(sptbr) << RISCV_PGSHIFT), 0, 0);
  43. }
  44. #endif
  45. void flush_tlb(void)
  46. {
  47. asm volatile("sfence.vm");
  48. }
  49. size_t pte_ppn(uint64_t pte)
  50. {
  51. return pte >> PTE_PPN_SHIFT;
  52. }
  53. uint64_t ptd_create(uintptr_t ppn)
  54. {
  55. return (ppn << PTE_PPN_SHIFT) | PTE_V;
  56. }
  57. uint64_t pte_create(uintptr_t ppn, int prot, int user)
  58. {
  59. uint64_t pte = (ppn << PTE_PPN_SHIFT) | PTE_R | PTE_V;
  60. if (prot & PTE_W)
  61. pte |= PTE_W;
  62. if (prot & PTE_X)
  63. pte |= PTE_X;
  64. if (user)
  65. pte |= PTE_U;
  66. return pte;
  67. }
  68. void
  69. rootput(uintptr_t root)
  70. {
  71. Proc *up = externup();
  72. uintptr_t ptbr = root >> RISCV_PGSHIFT;
  73. if (0) print("rootput %p pid %d\n", root, up ? up->pid : -1);
  74. write_csr(sptbr, ptbr);
  75. }
  76. void
  77. mmuflushtlb(void)
  78. {
  79. machp()->tlbpurge++;
  80. if(machp()->MMU.root->daddr){
  81. memset(UINT2PTR(machp()->MMU.root->va), 0, machp()->MMU.root->daddr*sizeof(PTE));
  82. machp()->MMU.root->daddr = 0;
  83. }
  84. rootput((uintptr_t) machp()->MMU.root->pa);
  85. }
  86. void
  87. mmuflush(void)
  88. {
  89. Proc *up = externup();
  90. Mpl pl;
  91. pl = splhi();
  92. up->newtlb = 1;
  93. mmuswitch(up);
  94. splx(pl);
  95. }
  96. static void
  97. mmuptpfree(Proc* proc, int clear)
  98. {
  99. int l;
  100. PTE *pte;
  101. Page **last, *page;
  102. if (0) print("MMUPTPFREE: proc %p, pid %d\n", proc, proc->pid);
  103. for(l = 1; l < 3; l++){
  104. last = &proc->MMU.mmuptp[l];
  105. if (0) print("%s: level %d: last is %p\n", __func__, l, *last);
  106. if(*last == nil)
  107. continue;
  108. for(page = *last; page != nil; page = page->next){
  109. //what is right here? 2 or 1?
  110. if(l <= 2 && clear)
  111. memset(UINT2PTR(page->va), 0, PTSZ);
  112. pte = UINT2PTR(page->prev->va);
  113. pte[page->daddr] = 0;
  114. last = &page->next;
  115. }
  116. *last = proc->MMU.mmuptp[0];
  117. proc->MMU.mmuptp[0] = proc->MMU.mmuptp[l];
  118. proc->MMU.mmuptp[l] = nil;
  119. }
  120. machp()->MMU.root->daddr = 0;
  121. }
  122. static void
  123. tabs(int n)
  124. {
  125. int i;
  126. for(i = 0; i < n; i++)
  127. print(" ");
  128. }
  129. void
  130. dumpptepg(int lvl, uintptr_t pa)
  131. {
  132. PTE *pte;
  133. int tab, i;
  134. tab = 4 - lvl;
  135. pte = UINT2PTR(KADDR(pa));
  136. for(i = 0; i < PTSZ/sizeof(PTE); i++)
  137. if(pte[i] & PteP){
  138. tabs(tab);
  139. print("l%d %#p[%#05x]: %#llx\n", lvl, pa, i, pte[i]);
  140. /* skip kernel mappings */
  141. if((pte[i]&PteU) == 0){
  142. tabs(tab+1);
  143. print("...kern...\n");
  144. continue;
  145. }
  146. if(lvl > 2)
  147. dumpptepg(lvl-1, PPN(pte[i]));
  148. }
  149. }
  150. void
  151. dumpmmu(Proc *p)
  152. {
  153. int i;
  154. Page *pg;
  155. print("proc %#p, pid %d\n", p, p->pid);
  156. for(i = 3; i > 0; i--){
  157. print("mmuptp[%d]:\n", i);
  158. for(pg = p->MMU.mmuptp[i]; pg != nil; pg = pg->next)
  159. print("\tpg %#p = va %#llx pa %#llx"
  160. " daddr %#lx next %#p prev %#p\n",
  161. pg, pg->va, pg->pa, pg->daddr, pg->next, pg->prev);
  162. }
  163. print("root %#llx\n", machp()->MMU.root->pa);
  164. }
  165. void
  166. dumpmmuwalk(uint64_t addr)
  167. {
  168. int l;
  169. PTE *pte, *root;
  170. root = UINT2PTR(machp()->MMU.root->va);
  171. print("root is %p\n", root);
  172. if((l = mmuwalk(root, addr, 2, &pte, nil)) >= 0) {
  173. print("\tcpu%d: mmu l%d pte %#p = ", machp()->machno, l, pte);
  174. print("%llx, PA is %llx\n", *pte, PTE2PA(*pte));
  175. }
  176. if((l = mmuwalk(root, addr, 1, &pte, nil)) >= 0) {
  177. print("\tcpu%d: mmu l%d pte %#p = ", machp()->machno, l, pte);
  178. print("%llx, PA is %llx\n", *pte, PTE2PA(*pte));
  179. }
  180. if((l = mmuwalk(root, addr, 0, &pte, nil)) >= 0) {
  181. print("\tcpu%d: mmu l%d pte %#p = ", machp()->machno, l, pte);
  182. print("%llx, PA is %llx\n", *pte, PTE2PA(*pte));
  183. }
  184. if (PTE2PA(*pte) != 0)
  185. hexdump(KADDR(PTE2PA(*pte)), 32);
  186. }
  187. static Page mmuptpfreelist;
  188. static Page*
  189. mmuptpalloc(void)
  190. {
  191. void* va;
  192. Page *page;
  193. /*
  194. * Do not really need a whole Page structure,
  195. * but it makes testing this out a lot easier.
  196. * Could keep a cache and free excess.
  197. * Have to maintain any fiction for pexit?
  198. */
  199. lock(&mmuptpfreelist.l);
  200. if((page = mmuptpfreelist.next) != nil){
  201. mmuptpfreelist.next = page->next;
  202. mmuptpfreelist.ref--;
  203. unlock(&mmuptpfreelist.l);
  204. if(page->ref++ != 0)
  205. panic("mmuptpalloc ref\n");
  206. page->prev = page->next = nil;
  207. memset(UINT2PTR(page->va), 0, PTSZ);
  208. if(page->pa == 0)
  209. panic("mmuptpalloc: free page with pa == 0");
  210. return page;
  211. }
  212. unlock(&mmuptpfreelist.l);
  213. if((page = malloc(sizeof(Page))) == nil){
  214. if (0) print("mmuptpalloc Page\n");
  215. return nil;
  216. }
  217. if((va = mallocalign(PTSZ, PTSZ, 0, 0)) == nil){
  218. if (0) print("mmuptpalloc va\n");
  219. free(page);
  220. return nil;
  221. }
  222. page->va = PTR2UINT(va);
  223. page->pa = PADDR(va);
  224. page->ref = 1;
  225. if(page->pa == 0)
  226. panic("mmuptpalloc: no pa");
  227. return page;
  228. }
  229. void
  230. mmuswitch(Proc* proc)
  231. {
  232. PTE *pte;
  233. Page *page;
  234. Mpl pl;
  235. pl = splhi();
  236. if(proc->newtlb){
  237. /*
  238. * NIX: We cannot clear our page tables if they are going to
  239. * be used in the AC
  240. */
  241. if(proc->ac == nil)
  242. mmuptpfree(proc, 1);
  243. proc->newtlb = 0;
  244. }
  245. /* daddr is the number of user PTEs in use in the root. */
  246. if(machp()->MMU.root->daddr){
  247. if (0) print("MMUSWITCH: memset(%p, 0, %d\n", UINT2PTR(machp()->MMU.root->va), 0, machp()->MMU.root->daddr*sizeof(PTE));
  248. memset(UINT2PTR(machp()->MMU.root->va), 0, machp()->MMU.root->daddr*sizeof(PTE));
  249. machp()->MMU.root->daddr = 0;
  250. }
  251. pte = UINT2PTR(machp()->MMU.root->va);
  252. if (0)print("pte %p\n", pte);
  253. /* N.B. On RISCV, we DO NOT SET any of X, R, W bits at this level since
  254. * that we point to page table pages on level down. Also, these are
  255. * explicitly user level pages, so PteU is set. */
  256. for(page = proc->MMU.mmuptp[3]; page != nil; page = page->next){
  257. if (0) print("MMUSWITCH: mmuptp[3]? page->pa is %p\n", page->pa);
  258. pte[page->daddr] = PPN(page->pa)|PteU|PteP;
  259. if(page->daddr >= machp()->MMU.root->daddr)
  260. machp()->MMU.root->daddr = page->daddr+1;
  261. page->prev = machp()->MMU.root;
  262. }
  263. if (0)print("rootput %p\n", (void *)(uintptr_t) machp()->MMU.root->pa);
  264. rootput((uintptr_t) machp()->MMU.root->pa);
  265. if (0)print("splx\n");
  266. splx(pl);
  267. }
  268. void
  269. mmurelease(Proc* proc)
  270. {
  271. Page *page, *next;
  272. mmuptpfree(proc, 0);
  273. for(page = proc->MMU.mmuptp[0]; page != nil; page = next){
  274. next = page->next;
  275. if(--page->ref)
  276. panic("mmurelease: page->ref %d\n", page->ref);
  277. lock(&mmuptpfreelist.l);
  278. page->next = mmuptpfreelist.next;
  279. mmuptpfreelist.next = page;
  280. mmuptpfreelist.ref++;
  281. page->prev = nil;
  282. unlock(&mmuptpfreelist.l);
  283. }
  284. if(proc->MMU.mmuptp[0] && pga.rend.l.p)
  285. wakeup(&pga.rend);
  286. proc->MMU.mmuptp[0] = nil;
  287. rootput(machp()->MMU.root->pa);
  288. }
  289. static void
  290. checkpte(uintmem ppn, void *a)
  291. {
  292. int l;
  293. PTE *pte, *root;
  294. uint64_t addr;
  295. char buf[240], *s;
  296. addr = PTR2UINT(a);
  297. root = UINT2PTR(machp()->MMU.root->va);
  298. pte = 0;
  299. s = buf;
  300. *s = 0;
  301. if((l = mmuwalk(root, addr, 2, &pte, nil)) < 0 || (*pte&PteP) == 0)
  302. goto Panic;
  303. s = seprint(s, buf+sizeof buf,
  304. "check2: l%d pte %#p = %llx\n",
  305. l, pte, pte?*pte:~0);
  306. if(*pte&PteFinal)
  307. return;
  308. if((l = mmuwalk(root, addr, 1, &pte, nil)) < 0 || (*pte&PteP) == 0)
  309. goto Panic;
  310. seprint(s, buf+sizeof buf,
  311. "check1: l%d pte %#p = %llx\n",
  312. l, pte, pte?*pte:~0);
  313. return;
  314. Panic:
  315. seprint(s, buf+sizeof buf,
  316. "checkpte: l%d addr %#p ppn %#llx kaddr %#p pte %#p = %llx",
  317. l, a, ppn, KADDR(ppn), pte, pte?*pte:~0);
  318. print("%s\n", buf);
  319. seprint(buf, buf+sizeof buf, "start %#llx unused %#llx"
  320. " unmap %#llx end %#llx\n",
  321. sys->vmstart, sys->vmunused, sys->vmunmapped, sys->vmend);
  322. panic("%s", buf);
  323. }
  324. static void
  325. mmuptpcheck(Proc *proc)
  326. {
  327. int lvl, npgs, i;
  328. Page *lp, *p, *pgs[16], *fp;
  329. uint idx[16];
  330. if(proc == nil)
  331. return;
  332. lp = machp()->MMU.root;
  333. for(lvl = 3; lvl >= 2; lvl--){
  334. npgs = 0;
  335. for(p = proc->MMU.mmuptp[lvl]; p != nil; p = p->next){
  336. for(fp = proc->MMU.mmuptp[0]; fp != nil; fp = fp->next)
  337. if(fp == p){
  338. dumpmmu(proc);
  339. panic("ptpcheck: using free page");
  340. }
  341. for(i = 0; i < npgs; i++){
  342. if(pgs[i] == p){
  343. dumpmmu(proc);
  344. panic("ptpcheck: dup page");
  345. }
  346. if(idx[i] == p->daddr){
  347. dumpmmu(proc);
  348. panic("ptcheck: dup daddr");
  349. }
  350. }
  351. if(npgs >= nelem(pgs))
  352. panic("ptpcheck: pgs is too small");
  353. idx[npgs] = p->daddr;
  354. pgs[npgs++] = p;
  355. if(lvl == 3 && p->prev != lp){
  356. dumpmmu(proc);
  357. panic("ptpcheck: wrong prev");
  358. }
  359. }
  360. }
  361. npgs = 0;
  362. for(fp = proc->MMU.mmuptp[0]; fp != nil; fp = fp->next){
  363. for(i = 0; i < npgs; i++)
  364. if(pgs[i] == fp)
  365. panic("ptpcheck: dup free page");
  366. pgs[npgs++] = fp;
  367. }
  368. }
  369. static uintmem
  370. pteflags(uint attr)
  371. {
  372. uintmem flags;
  373. flags = 0;
  374. if(attr & ~(PTEVALID|PTEWRITE|PTERONLY|PTEUSER|PTEUNCACHED|PTENOEXEC))
  375. panic("mmuput: wrong attr bits: %#x\n", attr);
  376. if(attr&PTEVALID)
  377. flags |= PteP;
  378. if(attr&PTEWRITE)
  379. flags |= PteRW;
  380. if(attr&PTEUSER)
  381. flags |= PteU;
  382. /* Can't do this -- what do we do?
  383. if(attr&PTEUNCACHED)
  384. flags |= PtePCD;
  385. */
  386. if(attr&PTENOEXEC)
  387. flags &= ~PteX;
  388. return flags;
  389. }
  390. void
  391. invlpg(uintptr_t _)
  392. {
  393. // TOODO
  394. if (0) print("invlpage is not implemented, continuing anyway (addr is %p)\n", _);
  395. }
  396. /*
  397. * pg->pgszi indicates the page size in machp()->pgsz[] used for the mapping.
  398. * For the user, it can be either 2*MiB or 1*GiB pages.
  399. * For 2*MiB pages, we use three levels, not four.
  400. * For 1*GiB pages, we use two levels.
  401. */
  402. void
  403. mmuput(uintptr_t va, Page *pg, uint attr)
  404. {
  405. Proc *up = externup();
  406. int lvl, user, x, pgsz;
  407. PTE *pte;
  408. Page *page, *prev;
  409. Mpl pl;
  410. uintmem pa, ppage;
  411. char buf[80];
  412. uint64_t pteattr = 0;
  413. /* clear attributes base on attr. */
  414. if (attr & PTEVALID) {
  415. pteattr = PTE_V | PTE_R | PTE_X;
  416. if (attr & PTENOEXEC)
  417. pteattr &= ~PTE_X;
  418. if (attr & PTEWRITE)
  419. pteattr |= PTE_W;
  420. }
  421. if (DBGFLG) {
  422. print("mmuput: va %p, pa %p, attr 0x%x\n", va, pg->pa, attr);
  423. dumpmmuwalk(va);
  424. print("now try the put");
  425. }
  426. ppage = 0;
  427. pa = pg->pa;
  428. if(pa == 0)
  429. panic("mmuput: zero pa");
  430. if(va == 0)
  431. panic("mmuput: zero va");
  432. if(DBGFLG){
  433. snprint(buf, sizeof buf, "cpu%d: up %#p mmuput %#p %#P %#x\n",
  434. machp()->machno, up, va, pa, attr);
  435. print("%s", buf);
  436. }
  437. if (pg->pgszi < 0) {
  438. print("mmuput(%p, %p, 0x%x): bad pgszi %d for pa %p\n",
  439. va, pg, attr, pg->pgszi, pa);
  440. assert(pg->pgszi >= 0);
  441. }
  442. pgsz = sys->pgsz[pg->pgszi];
  443. if(pa & (pgsz-1))
  444. panic("mmuput: pa offset non zero: %#llx\n", pa);
  445. pa |= pteflags(attr);
  446. pl = splhi();
  447. if(DBGFLG)
  448. mmuptpcheck(up);
  449. user = (va < KZERO);
  450. x = PTLX(va, 2);
  451. if (1) print("user is %d, index for %p is 0x%x, ", user, va, x);
  452. pte = UINT2PTR(machp()->MMU.root->va);
  453. pte += x;
  454. prev = machp()->MMU.root;
  455. if (DBGFLG) print("starting PTE at l2 is %p\n", pte);
  456. for(lvl = 2; lvl >= 0; lvl--){
  457. if(user){
  458. if(pgsz == 2*MiB && lvl == 1) /* use 2M */
  459. break;
  460. if(pgsz == 1ull*GiB && lvl == 2) /* use 1G */
  461. break;
  462. }
  463. for(page = up->MMU.mmuptp[lvl]; page != nil; page = page->next)
  464. if(page->prev == prev && page->daddr == x){
  465. if(*pte == 0){
  466. print("mmu: jmk and nemo had fun\n");
  467. *pte = (PPN(page->pa)>>2)|PteP;
  468. if (DBGFLG) print("level %d: set pte %p to 0x%llx for pa %p\n", lvl, pte, *pte, pa);
  469. }
  470. break;
  471. }
  472. if(page == nil){
  473. if(up->MMU.mmuptp[0] == nil) {
  474. page = mmuptpalloc();
  475. if (DBGFLG) print("\tallocated page %p\n", page);
  476. } else {
  477. page = up->MMU.mmuptp[0];
  478. up->MMU.mmuptp[0] = page->next;
  479. if (DBGFLG) print("\tReused page %p\n", page);
  480. }
  481. page->daddr = x;
  482. page->next = up->MMU.mmuptp[lvl];
  483. up->MMU.mmuptp[lvl] = page;
  484. page->prev = prev;
  485. *pte = (PPN(page->pa)>>2)|PteP;
  486. if (DBGFLG) print("\tlevel %d: set pte %p to 0x%llx for pa %p\n", lvl, pte, *pte, PPN(page->pa));
  487. if(lvl == 2 && x >= machp()->MMU.root->daddr)
  488. machp()->MMU.root->daddr = x+1;
  489. }
  490. x = PTLX(va, lvl-1);
  491. if (DBGFLG) print("\tptlx(%p,%d) is %p\n", va, lvl-1,x);
  492. ppage = PTE2PA(*pte);
  493. if (DBGFLG) print("\tpa for pte %p val 0x%llx ppage %p\n", pte, *pte, ppage);
  494. if(ppage == 0)
  495. panic("mmuput: ppn=0 l%d pte %#p = %#P\n", lvl, pte, *pte);
  496. pte = UINT2PTR(KADDR(ppage));
  497. pte += x;
  498. if (DBGFLG) print("\tpte for next iteration is %p\n", pte);
  499. prev = page;
  500. }
  501. if (DBGFLG) print("\tAFTER LOOP pte %p val 0x%llx ppn %p\n", pte, *pte, pa);
  502. if(DBGFLG)
  503. checkpte(ppage, pte);
  504. *pte = (pa>>2)|PteU;
  505. if (DBGFLG) print("\tAFTER SET pte %p val 0x%llx ppn %p\n", pte, *pte, pa);
  506. if(user)
  507. switch(pgsz){
  508. case 2*MiB:
  509. case 1*GiB:
  510. *pte |= pteattr | PteFinal | PteP;
  511. if (DBGFLG) print("\tUSER PAGE pte %p val 0x%llx\n", pte, *pte);
  512. break;
  513. default:
  514. panic("\tmmuput: user pages must be 2M or 1G");
  515. }
  516. splx(pl);
  517. if(DBGFLG){
  518. snprint(buf, sizeof buf, "cpu%d: up %#p new pte %#p = %#llx\n",
  519. machp()->machno, up, pte, pte?*pte:~0);
  520. print("%s", buf);
  521. }
  522. invlpg(va); /* only if old entry valid? */
  523. //dumpmmuwalk(va);
  524. //hexdump((void *)va, 16);
  525. if (DBGFLG) print("returning from mmuput\n");
  526. }
  527. #if 0
  528. static Lock mmukmaplock;
  529. #endif
  530. #define PML4X(v) PTLX((v), 3)
  531. #define PDPX(v) PTLX((v), 2)
  532. #define PDX(v) PTLX((v), 1)
  533. #define PTX(v) PTLX((v), 0)
  534. int
  535. mmukmapsync(uint64_t va)
  536. {
  537. USED(va);
  538. return 0;
  539. }
  540. // findKSeg2 finds kseg2, i.e., the lowest virtual
  541. // address mapped by firmware. We need to know this so we can
  542. // correctly and easily compute KADDR and PADDR.
  543. // TODO: actually to it.
  544. // It is *possible* that we'll be able to pick this up from
  545. // the configstring.
  546. void *
  547. findKSeg2(void)
  548. {
  549. // return the Sv39 address that we know coreboot
  550. // set up.
  551. return (void *)(~0ULL<<38);
  552. }
  553. /* mmuwalk will walk the page tables as far as we ask (level)
  554. * or as far as possible (you might hit a tera/giga/mega PTE).
  555. * If it gets a valid PTE it will return it in ret; test for
  556. * validity by testing PetP. To see how far it got, check
  557. * the return value. */
  558. int
  559. mmuwalk(PTE* root, uintptr_t va, int level, PTE** ret,
  560. uint64_t (*alloc)(usize))
  561. {
  562. int l;
  563. uintmem pa;
  564. PTE *pte;
  565. Mpl pl;
  566. pl = splhi();
  567. if(DBGFLG > 1) {
  568. print("mmuwalk%d: va %#p level %d\n", machp()->machno, va, level);
  569. print("PTLX(%p, 2) is 0x%x\n", va, PTLX(va,2));
  570. print("root is %p\n", root);
  571. }
  572. pte = &root[PTLX(va, 2)];
  573. if(DBGFLG > 1) {
  574. print("pte is %p\n", pte);
  575. print("*pte is %p\n", *pte);
  576. }
  577. for(l = 2; l >= 0; l--){
  578. if(l == level)
  579. break;
  580. if(!(*pte & PteP)){
  581. if(alloc == nil)
  582. break;
  583. pa = alloc(PTSZ);
  584. if(pa == ~0)
  585. return -1;
  586. memset(UINT2PTR(KADDR(pa)), 0, PTSZ);
  587. *pte = pa|PteRW|PteP;
  588. }
  589. else if(*pte & PteFinal)
  590. break;
  591. pte = UINT2PTR(KADDR((*pte&~0x3ff)<<2)); // PPN(*pte)));
  592. if (DBGFLG > 1)
  593. print("pte is %p: ", pte);
  594. pte += PTLX(va, l-1);
  595. if (DBGFLG > 1)
  596. print("and pte after index is %p\n", pte);
  597. }
  598. *ret = pte;
  599. splx(pl);
  600. return l;
  601. }
  602. uintmem
  603. mmuphysaddr(uintptr_t va)
  604. {
  605. int l;
  606. PTE *pte;
  607. uint64_t ppn;
  608. uintmem mask, pa;
  609. msg("mmyphysaddr\n");
  610. /*
  611. * Given a VA, find the PA.
  612. * This is probably not the right interface,
  613. * but will do as an experiment. Usual
  614. * question, should va be void* or uintptr?
  615. */
  616. print("machp() %p \n", machp());
  617. print("mahcp()->MMU.root %p\n", machp()->MMU.root);
  618. print("... va %p\n", machp()->MMU.root->va);
  619. l = mmuwalk(UINT2PTR(machp()->MMU.root->va), va, 0, &pte, nil);
  620. print("pte is %p *pte is 0x%llx\n", pte, *pte);
  621. print("physaddr: va %#p l %d\n", va, l);
  622. if(l < 0)
  623. return ~0;
  624. ppn = (*pte & ~0x3ff) << 2;
  625. print("PPN from PTE is %llx\n", ppn);
  626. mask = PGLSZ(l)-1;
  627. pa = (ppn & ~mask) + (va & mask);
  628. print("physaddr: mask is %llx, ~mask %llx, ppn & ~mask %llx, \n", mask, ~mask, ppn & ~mask);
  629. print("physaddr: RESULT: l %d va %#p pa %#llx\n", l, va, pa);
  630. return pa;
  631. }
  632. /* to accomodate the weirdness of the rv64 modes, we're going to leave it as a 4
  633. * level PT, and fake up the PML4 with one entry when it's 3 levels. Later, we want
  634. * to be smarter, but a lot of our code is pretty wired to assume 4 level PT and I'm
  635. * not wanting to just rip it all out. */
  636. void
  637. mmuinit(void)
  638. {
  639. uint8_t *p;
  640. uint64_t o, pa, sz, n;
  641. n = archmmu();
  642. print("%d page sizes\n", n);
  643. print("mach%d: %#p root %#p npgsz %d\n", machp()->machno, machp(), machp()->MMU.root, sys->npgsz);
  644. if(machp()->machno != 0){
  645. /* NIX: KLUDGE: Has to go when each mach is using
  646. * its own page table
  647. */
  648. p = UINT2PTR(machp()->stack);
  649. p += MACHSTKSZ;
  650. panic("not yet");
  651. #if 0
  652. memmove(p, UINT2PTR(mach0root.va), PTSZ);
  653. machp()->MMU.root = &machp()->MMU.root;
  654. machp()->MMU.root->va = PTR2UINT(p);
  655. machp()->MMU.root->pa = PADDR(p);
  656. machp()->MMU.root->daddr = mach0root.daddr; /* # of user mappings in root */
  657. rootput(machp()->MMU.root->pa);
  658. print("m %#p root %#p\n", machp(), machp()->MMU.root);
  659. #endif
  660. return;
  661. }
  662. machp()->MMU.root = &sys->root;
  663. uintptr_t PhysicalRoot = read_csr(sptbr)<<12;
  664. PTE *root = KADDR(PhysicalRoot);
  665. print("Physical root is 0x%llx and root 0x %p\n", PhysicalRoot, root);
  666. PTE *KzeroPTE;
  667. /* As it happens, as this point, we don't know the number of page table levels.
  668. * But a walk to "level 4" will work even if it's only 3, and we can use that
  669. * information to know what to do. Further, KSEG0 is the last 2M so this will
  670. * get us the last PTE on either an L3 or L2 pte page */
  671. int l;
  672. if((l = mmuwalk(root, KSEG0, 2, &KzeroPTE, nil)) < 0) {
  673. panic("Can't walk to PtePML2");
  674. }
  675. print("KzeroPTE is 0x%llx\n", KzeroPTE);
  676. int PTLevels = (*KzeroPTE>>9)&3;
  677. switch(PTLevels) {
  678. default:
  679. panic("unsupported number of page table levels: %d", PTLevels);
  680. break;
  681. case 0:
  682. machp()->MMU.root->pa = PhysicalRoot;
  683. print("root is 0x%x\n", machp()->MMU.root->pa);
  684. machp()->MMU.root->va = (uintptr_t) KADDR(machp()->MMU.root->pa);
  685. break;
  686. }
  687. print("mach%d: %#p root %#p npgsz %d\n", machp()->machno, machp(), machp()->MMU.root, sys->npgsz);
  688. /*
  689. * Set up the various kernel memory allocator limits:
  690. * pmstart/pmend bound the unused physical memory;
  691. * vmstart/vmend bound the total possible virtual memory
  692. * used by the kernel;
  693. * vmunused is the highest virtual address currently mapped
  694. * and used by the kernel;
  695. * vmunmapped is the highest virtual address currently
  696. * mapped by the kernel.
  697. * Vmunused can be bumped up to vmunmapped before more
  698. * physical memory needs to be allocated and mapped.
  699. *
  700. * This is set up here so meminit can map appropriately.
  701. */
  702. o = sys->pmstart;
  703. sz = ROUNDUP(o, 4*MiB) - o;
  704. pa = asmalloc(0, sz, 1, 0);
  705. if(pa != o)
  706. panic("mmuinit: pa %#llx memstart %#llx\n", pa, o);
  707. sys->pmstart += sz;
  708. sys->vmstart = KSEG0;
  709. /* more issues with arithmetic since physmem is at 80000000 */
  710. o &= 0x7fffffff;
  711. sys->vmunused = sys->vmstart + ROUNDUP(o, 4*KiB);
  712. sys->vmend = sys->vmstart + TMFM;
  713. // on amd64, this was set to just the end of the kernel, because
  714. // only that much was mapped, and also vmap required a lot of
  715. // free *address space* (not memory, *address space*) for the
  716. // vmap functions. vmap was a hack we intended to remove.
  717. // It's still there. But we can get rid of it on riscv.
  718. // There's lots more to do but at least vmap is gone,
  719. // as is the PDMAP hack, which was also supposed to
  720. // be temporary.
  721. // TODO: We get much further now but still
  722. // die in meminit(). When that's fixed remove
  723. // this TODO.
  724. sys->vmunmapped = sys->vmend;
  725. print("mmuinit: vmstart %#p vmunused %#p vmunmapped %#p vmend %#p\n",
  726. sys->vmstart, sys->vmunused, sys->vmunmapped, sys->vmend);
  727. dumpmmuwalk(KZERO);
  728. mmuphysaddr(PTR2UINT(end));
  729. }