mmu.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include "u.h"
  10. #include "../port/lib.h"
  11. #include "mem.h"
  12. #include "dat.h"
  13. #include "fns.h"
  14. #include "encoding.h"
  15. #include "mmu.h"
  16. #undef DBGFLG
  17. #define DBGFLG 0
  18. /* this gets pretty messy. RV64 has *at least* two modes:
  19. * 4 level and 3 level page tables. And people wonder why
  20. * I like soft TLB so much. Anyway, for now, not sure
  21. * how to handle it.
  22. * Would be cool to work out a way to Do The Right Thing
  23. * without regard to page size, so that's what I'm going to
  24. * try to do.
  25. */
  26. void msg(char *);
  27. /*
  28. * To do:
  29. * PteNX;
  30. * mmukmapsync grot for >1 processor;
  31. * mmuptcopy (PteSHARED trick?);
  32. * calculate and map up to TMFM (conf crap);
  33. */
  34. /* strike off 2M so it won't wrap to 0. Sleazy. */
  35. #define TMFM (2 * GiB - 2 * MiB) /* kernel memory */
  36. #define PPN(x) ((x) & ~(PGSZ - 1))
  37. #define PTE2PPN(p) ((p) >> 10)
  38. #define PTE2PA(p) (((p) >> 10) << 12)
  39. #if 0
  40. /* Print the page table structures to the console */
  41. void print_page_table(void){
  42. print_page_table_at((void *)(read_csr(sptbr) << RISCV_PGSHIFT), 0, 0);
  43. }
  44. #endif
  45. void
  46. flush_tlb(void)
  47. {
  48. asm volatile("sfence.vm");
  49. }
  50. usize
  51. pte_ppn(u64 pte)
  52. {
  53. return pte >> PTE_PPN_SHIFT;
  54. }
  55. u64
  56. ptd_create(usize ppn)
  57. {
  58. return (ppn << PTE_PPN_SHIFT) | PTE_V;
  59. }
  60. u64
  61. pte_create(usize ppn, int prot, int user)
  62. {
  63. u64 pte = (ppn << PTE_PPN_SHIFT) | PTE_R | PTE_V;
  64. if(prot & PTE_W)
  65. pte |= PTE_W;
  66. if(prot & PTE_X)
  67. pte |= PTE_X;
  68. if(user)
  69. pte |= PTE_U;
  70. return pte;
  71. }
  72. void
  73. rootput(usize root)
  74. {
  75. Proc *up = externup();
  76. usize ptbr = root >> RISCV_PGSHIFT;
  77. if(0)
  78. print("rootput %p pid %d\n", root, up ? up->pid : -1);
  79. write_csr(sptbr, ptbr);
  80. }
  81. void
  82. mmuflushtlb(void)
  83. {
  84. machp()->tlbpurge++;
  85. if(machp()->MMU.root->daddr){
  86. memset(UINT2PTR(machp()->MMU.root->va), 0, machp()->MMU.root->daddr * sizeof(PTE));
  87. machp()->MMU.root->daddr = 0;
  88. }
  89. rootput((usize)machp()->MMU.root->pa);
  90. }
  91. void
  92. mmuflush(void)
  93. {
  94. Proc *up = externup();
  95. Mpl pl;
  96. pl = splhi();
  97. up->newtlb = 1;
  98. mmuswitch(up);
  99. splx(pl);
  100. }
  101. static void
  102. mmuptpfree(Proc *proc, int clear)
  103. {
  104. int l;
  105. PTE *pte;
  106. Page **last, *page;
  107. if(0)
  108. print("MMUPTPFREE: proc %p, pid %d\n", proc, proc->pid);
  109. for(l = 1; l < 3; l++){
  110. last = &proc->MMU.mmuptp[l];
  111. if(0)
  112. print("%s: level %d: last is %p\n", __func__, l, *last);
  113. if(*last == nil)
  114. continue;
  115. for(page = *last; page != nil; page = page->next){
  116. //what is right here? 2 or 1?
  117. if(l <= 2 && clear)
  118. memset(UINT2PTR(page->va), 0, PTSZ);
  119. pte = UINT2PTR(page->prev->va);
  120. pte[page->daddr] = 0;
  121. last = &page->next;
  122. }
  123. *last = proc->MMU.mmuptp[0];
  124. proc->MMU.mmuptp[0] = proc->MMU.mmuptp[l];
  125. proc->MMU.mmuptp[l] = nil;
  126. }
  127. machp()->MMU.root->daddr = 0;
  128. }
  129. static void
  130. tabs(int n)
  131. {
  132. int i;
  133. for(i = 0; i < n; i++)
  134. print(" ");
  135. }
  136. void
  137. dumpptepg(int lvl, usize pa)
  138. {
  139. PTE *pte;
  140. int tab, i;
  141. tab = 4 - lvl;
  142. pte = UINT2PTR(KADDR(pa));
  143. for(i = 0; i < PTSZ / sizeof(PTE); i++)
  144. if(pte[i] & PteP){
  145. tabs(tab);
  146. print("l%d %#p[%#05x]: %#llx\n", lvl, pa, i, pte[i]);
  147. /* skip kernel mappings */
  148. if((pte[i] & PteU) == 0){
  149. tabs(tab + 1);
  150. print("...kern...\n");
  151. continue;
  152. }
  153. if(lvl > 2)
  154. dumpptepg(lvl - 1, PPN(pte[i]));
  155. }
  156. }
  157. void
  158. dumpmmu(Proc *p)
  159. {
  160. int i;
  161. Page *pg;
  162. print("proc %#p, pid %d\n", p, p->pid);
  163. for(i = 3; i > 0; i--){
  164. print("mmuptp[%d]:\n", i);
  165. for(pg = p->MMU.mmuptp[i]; pg != nil; pg = pg->next)
  166. print("\tpg %#p = va %#llx pa %#llx"
  167. " daddr %#lx next %#p prev %#p\n",
  168. pg, pg->va, pg->pa, pg->daddr, pg->next, pg->prev);
  169. }
  170. print("root %#llx\n", machp()->MMU.root->pa);
  171. }
  172. void
  173. dumpmmuwalk(u64 addr)
  174. {
  175. int l;
  176. PTE *pte, *root;
  177. root = UINT2PTR(machp()->MMU.root->va);
  178. print("root is %p\n", root);
  179. if((l = mmuwalk(root, addr, 2, &pte, nil)) >= 0){
  180. print("\tcpu%d: mmu l%d pte %#p = ", machp()->machno, l, pte);
  181. print("%llx, PA is %llx\n", *pte, PTE2PA(*pte));
  182. }
  183. if((l = mmuwalk(root, addr, 1, &pte, nil)) >= 0){
  184. print("\tcpu%d: mmu l%d pte %#p = ", machp()->machno, l, pte);
  185. print("%llx, PA is %llx\n", *pte, PTE2PA(*pte));
  186. }
  187. if((l = mmuwalk(root, addr, 0, &pte, nil)) >= 0){
  188. print("\tcpu%d: mmu l%d pte %#p = ", machp()->machno, l, pte);
  189. print("%llx, PA is %llx\n", *pte, PTE2PA(*pte));
  190. }
  191. if(PTE2PA(*pte) != 0)
  192. hexdump(KADDR(PTE2PA(*pte)), 32);
  193. }
  194. static Page mmuptpfreelist;
  195. static Page *
  196. mmuptpalloc(void)
  197. {
  198. void *va;
  199. Page *page;
  200. /*
  201. * Do not really need a whole Page structure,
  202. * but it makes testing this out a lot easier.
  203. * Could keep a cache and free excess.
  204. * Have to maintain any fiction for pexit?
  205. */
  206. lock(&mmuptpfreelist.l);
  207. if((page = mmuptpfreelist.next) != nil){
  208. mmuptpfreelist.next = page->next;
  209. mmuptpfreelist.ref--;
  210. unlock(&mmuptpfreelist.l);
  211. if(page->ref++ != 0)
  212. panic("mmuptpalloc ref\n");
  213. page->prev = page->next = nil;
  214. memset(UINT2PTR(page->va), 0, PTSZ);
  215. if(page->pa == 0)
  216. panic("mmuptpalloc: free page with pa == 0");
  217. return page;
  218. }
  219. unlock(&mmuptpfreelist.l);
  220. if((page = malloc(sizeof(Page))) == nil){
  221. if(0)
  222. print("mmuptpalloc Page\n");
  223. return nil;
  224. }
  225. if((va = mallocalign(PTSZ, PTSZ, 0, 0)) == nil){
  226. if(0)
  227. print("mmuptpalloc va\n");
  228. free(page);
  229. return nil;
  230. }
  231. page->va = PTR2UINT(va);
  232. page->pa = PADDR(va);
  233. page->ref = 1;
  234. if(page->pa == 0)
  235. panic("mmuptpalloc: no pa");
  236. return page;
  237. }
  238. void
  239. mmuswitch(Proc *proc)
  240. {
  241. PTE *pte;
  242. Page *page;
  243. Mpl pl;
  244. pl = splhi();
  245. if(proc->newtlb){
  246. /*
  247. * NIX: We cannot clear our page tables if they are going to
  248. * be used in the AC
  249. */
  250. if(proc->ac == nil)
  251. mmuptpfree(proc, 1);
  252. proc->newtlb = 0;
  253. }
  254. /* daddr is the number of user PTEs in use in the root. */
  255. if(machp()->MMU.root->daddr){
  256. if(0)
  257. print("MMUSWITCH: memset(%p, 0, %d\n", UINT2PTR(machp()->MMU.root->va), 0, machp()->MMU.root->daddr * sizeof(PTE));
  258. memset(UINT2PTR(machp()->MMU.root->va), 0, machp()->MMU.root->daddr * sizeof(PTE));
  259. machp()->MMU.root->daddr = 0;
  260. }
  261. pte = UINT2PTR(machp()->MMU.root->va);
  262. if(0)
  263. print("pte %p\n", pte);
  264. /* N.B. On RISCV, we DO NOT SET any of X, R, W bits at this level since
  265. * that we point to page table pages on level down. Also, these are
  266. * explicitly user level pages, so PteU is set. */
  267. for(page = proc->MMU.mmuptp[3]; page != nil; page = page->next){
  268. if(0)
  269. print("MMUSWITCH: mmuptp[3]? page->pa is %p\n", page->pa);
  270. pte[page->daddr] = PPN(page->pa) | PteU | PteP;
  271. if(page->daddr >= machp()->MMU.root->daddr)
  272. machp()->MMU.root->daddr = page->daddr + 1;
  273. page->prev = machp()->MMU.root;
  274. }
  275. if(0)
  276. print("rootput %p\n", (void *)(usize)machp()->MMU.root->pa);
  277. rootput((usize)machp()->MMU.root->pa);
  278. if(0)
  279. print("splx\n");
  280. splx(pl);
  281. }
  282. void
  283. mmurelease(Proc *proc)
  284. {
  285. Page *page, *next;
  286. mmuptpfree(proc, 0);
  287. for(page = proc->MMU.mmuptp[0]; page != nil; page = next){
  288. next = page->next;
  289. if(--page->ref)
  290. panic("mmurelease: page->ref %d\n", page->ref);
  291. lock(&mmuptpfreelist.l);
  292. page->next = mmuptpfreelist.next;
  293. mmuptpfreelist.next = page;
  294. mmuptpfreelist.ref++;
  295. page->prev = nil;
  296. unlock(&mmuptpfreelist.l);
  297. }
  298. if(proc->MMU.mmuptp[0] && pga.rend.l.p)
  299. wakeup(&pga.rend);
  300. proc->MMU.mmuptp[0] = nil;
  301. rootput(machp()->MMU.root->pa);
  302. }
  303. static void
  304. checkpte(u64 ppn, void *a)
  305. {
  306. int l;
  307. PTE *pte, *root;
  308. u64 addr;
  309. char buf[240], *s;
  310. addr = PTR2UINT(a);
  311. root = UINT2PTR(machp()->MMU.root->va);
  312. pte = 0;
  313. s = buf;
  314. *s = 0;
  315. if((l = mmuwalk(root, addr, 2, &pte, nil)) < 0 || (*pte & PteP) == 0)
  316. goto Panic;
  317. s = seprint(s, buf + sizeof buf,
  318. "check2: l%d pte %#p = %llx\n",
  319. l, pte, pte ? *pte : ~0);
  320. if(*pte & PteFinal)
  321. return;
  322. if((l = mmuwalk(root, addr, 1, &pte, nil)) < 0 || (*pte & PteP) == 0)
  323. goto Panic;
  324. seprint(s, buf + sizeof buf,
  325. "check1: l%d pte %#p = %llx\n",
  326. l, pte, pte ? *pte : ~0);
  327. return;
  328. Panic:
  329. seprint(s, buf + sizeof buf,
  330. "checkpte: l%d addr %#p ppn %#llx kaddr %#p pte %#p = %llx",
  331. l, a, ppn, KADDR(ppn), pte, pte ? *pte : ~0);
  332. print("%s\n", buf);
  333. seprint(buf, buf + sizeof buf, "start %#llx unused %#llx"
  334. " unmap %#llx end %#llx\n",
  335. sys->vmstart, sys->vmunused, sys->vmunmapped, sys->vmend);
  336. panic("%s", buf);
  337. }
  338. static void
  339. mmuptpcheck(Proc *proc)
  340. {
  341. int lvl, npgs, i;
  342. Page *lp, *p, *pgs[16], *fp;
  343. u32 idx[16];
  344. if(proc == nil)
  345. return;
  346. lp = machp()->MMU.root;
  347. for(lvl = 3; lvl >= 2; lvl--){
  348. npgs = 0;
  349. for(p = proc->MMU.mmuptp[lvl]; p != nil; p = p->next){
  350. for(fp = proc->MMU.mmuptp[0]; fp != nil; fp = fp->next)
  351. if(fp == p){
  352. dumpmmu(proc);
  353. panic("ptpcheck: using free page");
  354. }
  355. for(i = 0; i < npgs; i++){
  356. if(pgs[i] == p){
  357. dumpmmu(proc);
  358. panic("ptpcheck: dup page");
  359. }
  360. if(idx[i] == p->daddr){
  361. dumpmmu(proc);
  362. panic("ptcheck: dup daddr");
  363. }
  364. }
  365. if(npgs >= nelem(pgs))
  366. panic("ptpcheck: pgs is too small");
  367. idx[npgs] = p->daddr;
  368. pgs[npgs++] = p;
  369. if(lvl == 3 && p->prev != lp){
  370. dumpmmu(proc);
  371. panic("ptpcheck: wrong prev");
  372. }
  373. }
  374. }
  375. npgs = 0;
  376. for(fp = proc->MMU.mmuptp[0]; fp != nil; fp = fp->next){
  377. for(i = 0; i < npgs; i++)
  378. if(pgs[i] == fp)
  379. panic("ptpcheck: dup free page");
  380. pgs[npgs++] = fp;
  381. }
  382. }
  383. static u64
  384. pteflags(u32 attr)
  385. {
  386. u64 flags;
  387. flags = 0;
  388. if(attr & ~(PTEVALID | PTEWRITE | PTERONLY | PTEUSER | PTEUNCACHED | PTENOEXEC))
  389. panic("mmuput: wrong attr bits: %#x\n", attr);
  390. if(attr & PTEVALID)
  391. flags |= PteP;
  392. if(attr & PTEWRITE)
  393. flags |= PteRW;
  394. if(attr & PTEUSER)
  395. flags |= PteU;
  396. /* Can't do this -- what do we do?
  397. if(attr&PTEUNCACHED)
  398. flags |= PtePCD;
  399. */
  400. if(attr & PTENOEXEC)
  401. flags &= ~PteX;
  402. return flags;
  403. }
  404. void
  405. invlpg(usize _)
  406. {
  407. // TOODO
  408. if(0)
  409. print("invlpage is not implemented, continuing anyway (addr is %p)\n", _);
  410. }
  411. /*
  412. * pg->pgszi indicates the page size in machp()->pgsz[] used for the mapping.
  413. * For the user, it can be either 2*MiB or 1*GiB pages.
  414. * For 2*MiB pages, we use three levels, not four.
  415. * For 1*GiB pages, we use two levels.
  416. */
  417. void
  418. mmuput(usize va, Page *pg, u32 attr)
  419. {
  420. Proc *up = externup();
  421. int lvl, user, x, pgsz;
  422. PTE *pte;
  423. Page *page, *prev;
  424. Mpl pl;
  425. u64 pa, ppage;
  426. char buf[80];
  427. u64 pteattr = 0;
  428. /* clear attributes base on attr. */
  429. if(attr & PTEVALID){
  430. pteattr = PTE_V | PTE_R | PTE_X;
  431. if(attr & PTENOEXEC)
  432. pteattr &= ~PTE_X;
  433. if(attr & PTEWRITE)
  434. pteattr |= PTE_W;
  435. }
  436. if(DBGFLG){
  437. print("mmuput: va %p, pa %p, attr 0x%x\n", va, pg->pa, attr);
  438. dumpmmuwalk(va);
  439. print("now try the put");
  440. }
  441. ppage = 0;
  442. pa = pg->pa;
  443. if(pa == 0)
  444. panic("mmuput: zero pa");
  445. if(va == 0)
  446. panic("mmuput: zero va");
  447. if(DBGFLG){
  448. snprint(buf, sizeof buf, "cpu%d: up %#p mmuput %#p %#P %#x\n",
  449. machp()->machno, up, va, pa, attr);
  450. print("%s", buf);
  451. }
  452. if(pg->pgszi < 0){
  453. print("mmuput(%p, %p, 0x%x): bad pgszi %d for pa %p\n",
  454. va, pg, attr, pg->pgszi, pa);
  455. assert(pg->pgszi >= 0);
  456. }
  457. pgsz = sys->pgsz[pg->pgszi];
  458. if(pa & (pgsz - 1))
  459. panic("mmuput: pa offset non zero: %#llx\n", pa);
  460. pa |= pteflags(attr);
  461. pl = splhi();
  462. if(DBGFLG)
  463. mmuptpcheck(up);
  464. user = (va < KZERO);
  465. x = PTLX(va, 2);
  466. if(DBGFLG)
  467. print("user is %d, index for %p is 0x%x, ", user, va, x);
  468. pte = UINT2PTR(machp()->MMU.root->va);
  469. pte += x;
  470. prev = machp()->MMU.root;
  471. if(DBGFLG)
  472. print("starting PTE at l2 is %p\n", pte);
  473. for(lvl = 2; lvl >= 0; lvl--){
  474. if(user){
  475. if(pgsz == 2 * MiB && lvl == 1) /* use 2M */
  476. break;
  477. if(pgsz == 1ull * GiB && lvl == 2) /* use 1G */
  478. break;
  479. }
  480. for(page = up->MMU.mmuptp[lvl]; page != nil; page = page->next)
  481. if(page->prev == prev && page->daddr == x){
  482. if(*pte == 0){
  483. print("mmu: jmk and nemo had fun\n");
  484. *pte = (PPN(page->pa) >> 2) | PteP;
  485. if(DBGFLG)
  486. print("level %d: set pte %p to 0x%llx for pa %p\n", lvl, pte, *pte, pa);
  487. }
  488. break;
  489. }
  490. if(page == nil){
  491. if(up->MMU.mmuptp[0] == nil){
  492. page = mmuptpalloc();
  493. if(DBGFLG)
  494. print("\tallocated page %p\n", page);
  495. } else {
  496. page = up->MMU.mmuptp[0];
  497. up->MMU.mmuptp[0] = page->next;
  498. if(DBGFLG)
  499. print("\tReused page %p\n", page);
  500. }
  501. page->daddr = x;
  502. page->next = up->MMU.mmuptp[lvl];
  503. up->MMU.mmuptp[lvl] = page;
  504. page->prev = prev;
  505. *pte = (PPN(page->pa) >> 2) | PteP;
  506. if(DBGFLG)
  507. print("\tlevel %d: set pte %p to 0x%llx for pa %p\n", lvl, pte, *pte, PPN(page->pa));
  508. if(lvl == 2 && x >= machp()->MMU.root->daddr)
  509. machp()->MMU.root->daddr = x + 1;
  510. }
  511. x = PTLX(va, lvl - 1);
  512. if(DBGFLG)
  513. print("\tptlx(%p,%d) is %p\n", va, lvl - 1, x);
  514. ppage = PTE2PA(*pte);
  515. if(DBGFLG)
  516. print("\tpa for pte %p val 0x%llx ppage %p\n", pte, *pte, ppage);
  517. if(ppage == 0)
  518. panic("mmuput: ppn=0 l%d pte %#p = %#P\n", lvl, pte, *pte);
  519. pte = UINT2PTR(KADDR(ppage));
  520. pte += x;
  521. if(DBGFLG)
  522. print("\tpte for next iteration is %p\n", pte);
  523. prev = page;
  524. }
  525. if(DBGFLG)
  526. print("\tAFTER LOOP pte %p val 0x%llx ppn %p\n", pte, *pte, pa);
  527. if(DBGFLG)
  528. checkpte(ppage, pte);
  529. *pte = (pa >> 2) | PteU;
  530. if(DBGFLG)
  531. print("\tAFTER SET pte %p val 0x%llx ppn %p\n", pte, *pte, pa);
  532. if(user)
  533. switch(pgsz){
  534. case 2 * MiB:
  535. case 1 * GiB:
  536. *pte |= pteattr | PteFinal | PteP;
  537. if(DBGFLG)
  538. print("\tUSER PAGE pte %p val 0x%llx\n", pte, *pte);
  539. break;
  540. default:
  541. panic("\tmmuput: user pages must be 2M or 1G");
  542. }
  543. splx(pl);
  544. if(DBGFLG){
  545. snprint(buf, sizeof buf, "cpu%d: up %#p new pte %#p = %#llx\n",
  546. machp()->machno, up, pte, pte ? *pte : ~0);
  547. print("%s", buf);
  548. }
  549. invlpg(va); /* only if old entry valid? */
  550. //dumpmmuwalk(va);
  551. //hexdump((void *)va, 16);
  552. if(DBGFLG)
  553. print("returning from mmuput\n");
  554. }
  555. #if 0
  556. static Lock mmukmaplock;
  557. #endif
  558. #define PML4X(v) PTLX((v), 3)
  559. #define PDPX(v) PTLX((v), 2)
  560. #define PDX(v) PTLX((v), 1)
  561. #define PTX(v) PTLX((v), 0)
  562. int
  563. mmukmapsync(u64 va)
  564. {
  565. USED(va);
  566. return 0;
  567. }
  568. // findKSeg2 finds kseg2, i.e., the lowest virtual
  569. // address mapped by firmware. We need to know this so we can
  570. // correctly and easily compute KADDR and PADDR.
  571. // TODO: actually to it.
  572. // It is *possible* that we'll be able to pick this up from
  573. // the configstring.
  574. void *
  575. findKSeg2(void)
  576. {
  577. // return the Sv39 address that we know coreboot
  578. // set up.
  579. return (void *)(~0ULL << 38);
  580. }
  581. /* mmuwalk will walk the page tables as far as we ask (level)
  582. * or as far as possible (you might hit a tera/giga/mega PTE).
  583. * If it gets a valid PTE it will return it in ret; test for
  584. * validity by testing PetP. To see how far it got, check
  585. * the return value. */
  586. int
  587. mmuwalk(PTE *root, usize va, int level, PTE **ret,
  588. u64 (*alloc)(usize))
  589. {
  590. int l;
  591. u64 pa;
  592. PTE *pte;
  593. Mpl pl;
  594. pl = splhi();
  595. if(DBGFLG > 1){
  596. print("mmuwalk%d: va %#p level %d\n", machp()->machno, va, level);
  597. print("PTLX(%p, 2) is 0x%x\n", va, PTLX(va, 2));
  598. print("root is %p\n", root);
  599. }
  600. pte = &root[PTLX(va, 2)];
  601. if(DBGFLG > 1){
  602. print("pte is %p\n", pte);
  603. print("*pte is %p\n", *pte);
  604. }
  605. for(l = 2; l >= 0; l--){
  606. if(l == level)
  607. break;
  608. if(!(*pte & PteP)){
  609. if(alloc == nil)
  610. break;
  611. pa = alloc(PTSZ);
  612. if(pa == ~0)
  613. return -1;
  614. memset(UINT2PTR(KADDR(pa)), 0, PTSZ);
  615. *pte = pa | PteRW | PteP;
  616. } else if(*pte & PteFinal)
  617. break;
  618. pte = UINT2PTR(KADDR((*pte & ~0x3ff) << 2)); // PPN(*pte)));
  619. if(DBGFLG > 1)
  620. print("pte is %p: ", pte);
  621. pte += PTLX(va, l - 1);
  622. if(DBGFLG > 1)
  623. print("and pte after index is %p\n", pte);
  624. }
  625. *ret = pte;
  626. splx(pl);
  627. return l;
  628. }
  629. u64
  630. mmuphysaddr(usize va)
  631. {
  632. int l;
  633. PTE *pte;
  634. u64 ppn;
  635. u64 mask, pa;
  636. msg("mmyphysaddr\n");
  637. /*
  638. * Given a VA, find the PA.
  639. * This is probably not the right interface,
  640. * but will do as an experiment. Usual
  641. * question, should va be void* or usize?
  642. */
  643. print("machp() %p \n", machp());
  644. print("mahcp()->MMU.root %p\n", machp()->MMU.root);
  645. print("... va %p\n", machp()->MMU.root->va);
  646. l = mmuwalk(UINT2PTR(machp()->MMU.root->va), va, 0, &pte, nil);
  647. print("pte is %p *pte is 0x%llx\n", pte, *pte);
  648. print("physaddr: va %#p l %d\n", va, l);
  649. if(l < 0)
  650. return ~0;
  651. ppn = (*pte & ~0x3ff) << 2;
  652. print("PPN from PTE is %llx\n", ppn);
  653. mask = PGLSZ(l) - 1;
  654. pa = (ppn & ~mask) + (va & mask);
  655. print("physaddr: mask is %llx, ~mask %llx, ppn & ~mask %llx, \n", mask, ~mask, ppn & ~mask);
  656. print("physaddr: RESULT: l %d va %#p pa %#llx\n", l, va, pa);
  657. return pa;
  658. }
  659. /* to accomodate the weirdness of the rv64 modes, we're going to leave it as a 4
  660. * level PT, and fake up the PML4 with one entry when it's 3 levels. Later, we want
  661. * to be smarter, but a lot of our code is pretty wired to assume 4 level PT and I'm
  662. * not wanting to just rip it all out. */
  663. void
  664. mmuinit(void)
  665. {
  666. u8 *p;
  667. u64 o, pa, sz, n;
  668. n = archmmu();
  669. print("%d page sizes\n", n);
  670. print("mach%d: %#p root %#p npgsz %d\n", machp()->machno, machp(), machp()->MMU.root, sys->npgsz);
  671. if(machp()->machno != 0){
  672. /* NIX: KLUDGE: Has to go when each mach is using
  673. * its own page table
  674. */
  675. p = UINT2PTR(machp()->stack);
  676. p += MACHSTKSZ;
  677. panic("not yet");
  678. #if 0
  679. memmove(p, UINT2PTR(mach0root.va), PTSZ);
  680. machp()->MMU.root = &machp()->MMU.root;
  681. machp()->MMU.root->va = PTR2UINT(p);
  682. machp()->MMU.root->pa = PADDR(p);
  683. machp()->MMU.root->daddr = mach0root.daddr; /* # of user mappings in root */
  684. rootput(machp()->MMU.root->pa);
  685. print("m %#p root %#p\n", machp(), machp()->MMU.root);
  686. #endif
  687. return;
  688. }
  689. machp()->MMU.root = &sys->root;
  690. usize PhysicalRoot = read_csr(sptbr) << 12;
  691. PTE *root = KADDR(PhysicalRoot);
  692. print("Physical root is 0x%llx and root 0x %p\n", PhysicalRoot, root);
  693. PTE *KzeroPTE;
  694. /* As it happens, as this point, we don't know the number of page table levels.
  695. * But a walk to "level 4" will work even if it's only 3, and we can use that
  696. * information to know what to do. Further, KSEG0 is the last 2M so this will
  697. * get us the last PTE on either an L3 or L2 pte page */
  698. int l;
  699. if((l = mmuwalk(root, KSEG0, 2, &KzeroPTE, nil)) < 0){
  700. panic("Can't walk to PtePML2");
  701. }
  702. print("KzeroPTE is 0x%llx, *KzeroPTE is 0x%llx\n", KzeroPTE, *KzeroPTE);
  703. int PTLevels = (*KzeroPTE >> 7) & 3;
  704. switch(PTLevels){
  705. default:
  706. panic("unsupported number of page table levels: %d", PTLevels);
  707. break;
  708. case 0:
  709. machp()->MMU.root->pa = PhysicalRoot;
  710. print("root is 0x%x\n", machp()->MMU.root->pa);
  711. machp()->MMU.root->va = (usize)KADDR(machp()->MMU.root->pa);
  712. break;
  713. }
  714. print("mach%d: %#p root %#p npgsz %d\n", machp()->machno, machp(), machp()->MMU.root, sys->npgsz);
  715. /*
  716. * Set up the various kernel memory allocator limits:
  717. * pmstart/pmend bound the unused physical memory;
  718. * vmstart/vmend bound the total possible virtual memory
  719. * used by the kernel;
  720. * vmunused is the highest virtual address currently mapped
  721. * and used by the kernel;
  722. * vmunmapped is the highest virtual address currently
  723. * mapped by the kernel.
  724. * Vmunused can be bumped up to vmunmapped before more
  725. * physical memory needs to be allocated and mapped.
  726. *
  727. * This is set up here so meminit can map appropriately.
  728. */
  729. o = sys->pmstart;
  730. sz = ROUNDUP(o, 4 * MiB) - o;
  731. pa = asmalloc(0, sz, 1, 0);
  732. if(pa != o)
  733. panic("mmuinit: pa %#llx memstart %#llx\n", pa, o);
  734. sys->pmstart += sz;
  735. sys->vmstart = KSEG0;
  736. /* more issues with arithmetic since physmem is at 80000000 */
  737. o &= 0x7fffffff;
  738. sys->vmunused = sys->vmstart + ROUNDUP(o, 4 * KiB);
  739. sys->vmend = sys->vmstart + TMFM;
  740. // on amd64, this was set to just the end of the kernel, because
  741. // only that much was mapped, and also vmap required a lot of
  742. // free *address space* (not memory, *address space*) for the
  743. // vmap functions. vmap was a hack we intended to remove.
  744. // It's still there. But we can get rid of it on riscv.
  745. // There's lots more to do but at least vmap is gone,
  746. // as is the PDMAP hack, which was also supposed to
  747. // be temporary.
  748. // TODO: We get much further now but still
  749. // die in meminit(). When that's fixed remove
  750. // this TODO.
  751. sys->vmunmapped = sys->vmend;
  752. print("mmuinit: vmstart %#p vmunused %#p vmunmapped %#p vmend %#p\n",
  753. sys->vmstart, sys->vmunused, sys->vmunmapped, sys->vmend);
  754. dumpmmuwalk(KZERO);
  755. mmuphysaddr(PTR2UINT(end));
  756. }
  757. // This is a no-op on RISC-V.
  758. void
  759. mmuprocinit(Proc *p)
  760. {
  761. USED(p);
  762. }