mmu.c 24 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043
  1. /*
  2. * Memory mappings. Life was easier when 2G of memory was enough.
  3. *
  4. * The kernel memory starts at KZERO, with the text loaded at KZERO+1M
  5. * (9load sits under 1M during the load). The memory from KZERO to the
  6. * top of memory is mapped 1-1 with physical memory, starting at physical
  7. * address 0. All kernel memory and data structures (i.e., the entries stored
  8. * into conf.mem) must sit in this physical range: if KZERO is at 0xF0000000,
  9. * then the kernel can only have 256MB of memory for itself.
  10. *
  11. * The 256M below KZERO comprises three parts. The lowest 4M is the
  12. * virtual page table, a virtual address representation of the current
  13. * page table tree. The second 4M is used for temporary per-process
  14. * mappings managed by kmap and kunmap. The remaining 248M is used
  15. * for global (shared by all procs and all processors) device memory
  16. * mappings and managed by vmap and vunmap. The total amount (256M)
  17. * could probably be reduced somewhat if desired. The largest device
  18. * mapping is that of the video card, and even though modern video cards
  19. * have embarrassing amounts of memory, the video drivers only use one
  20. * frame buffer worth (at most 16M). Each is described in more detail below.
  21. *
  22. * The VPT is a 4M frame constructed by inserting the pdb into itself.
  23. * This short-circuits one level of the page tables, with the result that
  24. * the contents of second-level page tables can be accessed at VPT.
  25. * We use the VPT to edit the page tables (see mmu) after inserting them
  26. * into the page directory. It is a convenient mechanism for mapping what
  27. * might be otherwise-inaccessible pages. The idea was borrowed from
  28. * the Exokernel.
  29. *
  30. * The VPT doesn't solve all our problems, because we still need to
  31. * prepare page directories before we can install them. For that, we
  32. * use tmpmap/tmpunmap, which map a single page at TMPADDR.
  33. */
  34. #include "u.h"
  35. #include "../port/lib.h"
  36. #include "mem.h"
  37. #include "dat.h"
  38. #include "fns.h"
  39. #include "io.h"
  40. /*
  41. * Simple segment descriptors with no translation.
  42. */
  43. #define DATASEGM(p) { 0xFFFF, SEGG|SEGB|(0xF<<16)|SEGP|SEGPL(p)|SEGDATA|SEGW }
  44. #define EXECSEGM(p) { 0xFFFF, SEGG|SEGD|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }
  45. #define EXEC16SEGM(p) { 0xFFFF, SEGG|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }
  46. #define TSSSEGM(b,p) { ((b)<<16)|sizeof(Tss),\
  47. ((b)&0xFF000000)|(((b)>>16)&0xFF)|SEGTSS|SEGPL(p)|SEGP }
  48. Segdesc gdt[NGDT] =
  49. {
  50. [NULLSEG] { 0, 0}, /* null descriptor */
  51. [KDSEG] DATASEGM(0), /* kernel data/stack */
  52. [KESEG] EXECSEGM(0), /* kernel code */
  53. [UDSEG] DATASEGM(3), /* user data/stack */
  54. [UESEG] EXECSEGM(3), /* user code */
  55. [TSSSEG] TSSSEGM(0,0), /* tss segment */
  56. [KESEG16] EXEC16SEGM(0), /* kernel code 16-bit */
  57. };
  58. static int didmmuinit;
  59. static void taskswitch(ulong, ulong);
  60. static void memglobal(void);
  61. #define vpt ((ulong*)VPT)
  62. #define VPTX(va) (((ulong)(va))>>12)
  63. #define vpd (vpt+VPTX(VPT))
  64. void
  65. mmuinit0(void)
  66. {
  67. memmove(m->gdt, gdt, sizeof gdt);
  68. }
  69. void
  70. mmuinit(void)
  71. {
  72. ulong x, *p;
  73. ushort ptr[3];
  74. didmmuinit = 1;
  75. if(0) print("vpt=%#.8ux vpd=%#.8lux kmap=%#.8ux\n",
  76. VPT, (ulong)vpd, KMAP);
  77. memglobal();
  78. m->pdb[PDX(VPT)] = PADDR(m->pdb)|PTEWRITE|PTEVALID;
  79. m->tss = malloc(sizeof(Tss));
  80. memset(m->tss, 0, sizeof(Tss));
  81. m->tss->iomap = 0xDFFF<<16;
  82. /*
  83. * We used to keep the GDT in the Mach structure, but it
  84. * turns out that that slows down access to the rest of the
  85. * page. Since the Mach structure is accessed quite often,
  86. * it pays off anywhere from a factor of 1.25 to 2 on real
  87. * hardware to separate them (the AMDs are more sensitive
  88. * than Intels in this regard). Under VMware it pays off
  89. * a factor of about 10 to 100.
  90. */
  91. memmove(m->gdt, gdt, sizeof gdt);
  92. x = (ulong)m->tss;
  93. m->gdt[TSSSEG].d0 = (x<<16)|sizeof(Tss);
  94. m->gdt[TSSSEG].d1 = (x&0xFF000000)|((x>>16)&0xFF)|SEGTSS|SEGPL(0)|SEGP;
  95. ptr[0] = sizeof(gdt)-1;
  96. x = (ulong)m->gdt;
  97. ptr[1] = x & 0xFFFF;
  98. ptr[2] = (x>>16) & 0xFFFF;
  99. lgdt(ptr);
  100. ptr[0] = sizeof(Segdesc)*256-1;
  101. x = IDTADDR;
  102. ptr[1] = x & 0xFFFF;
  103. ptr[2] = (x>>16) & 0xFFFF;
  104. lidt(ptr);
  105. /* make kernel text unwritable */
  106. for(x = KTZERO; x < (ulong)etext; x += BY2PG){
  107. p = mmuwalk(m->pdb, x, 2, 0);
  108. if(p == nil)
  109. panic("mmuinit");
  110. *p &= ~PTEWRITE;
  111. }
  112. taskswitch(PADDR(m->pdb), (ulong)m + BY2PG);
  113. ltr(TSSSEL);
  114. }
  115. /*
  116. * On processors that support it, we set the PTEGLOBAL bit in
  117. * page table and page directory entries that map kernel memory.
  118. * Doing this tells the processor not to bother flushing them
  119. * from the TLB when doing the TLB flush associated with a
  120. * context switch (write to CR3). Since kernel memory mappings
  121. * are never removed, this is safe. (If we ever remove kernel memory
  122. * mappings, we can do a full flush by turning off the PGE bit in CR4,
  123. * writing to CR3, and then turning the PGE bit back on.)
  124. *
  125. * See also mmukmap below.
  126. *
  127. * Processor support for the PTEGLOBAL bit is enabled in devarch.c.
  128. */
  129. static void
  130. memglobal(void)
  131. {
  132. int i, j;
  133. ulong *pde, *pte;
  134. /* only need to do this once, on bootstrap processor */
  135. if(m->machno != 0)
  136. return;
  137. if(!m->havepge)
  138. return;
  139. pde = m->pdb;
  140. for(i=PDX(KZERO); i<1024; i++){
  141. if(pde[i] & PTEVALID){
  142. pde[i] |= PTEGLOBAL;
  143. if(!(pde[i] & PTESIZE)){
  144. pte = KADDR(pde[i]&~(BY2PG-1));
  145. for(j=0; j<1024; j++)
  146. if(pte[j] & PTEVALID)
  147. pte[j] |= PTEGLOBAL;
  148. }
  149. }
  150. }
  151. }
  152. /*
  153. * Flush all the user-space and device-mapping mmu info
  154. * for this process, because something has been deleted.
  155. * It will be paged back in on demand.
  156. */
  157. void
  158. flushmmu(void)
  159. {
  160. int s;
  161. s = splhi();
  162. up->newtlb = 1;
  163. mmuswitch(up);
  164. splx(s);
  165. }
  166. /*
  167. * Flush a single page mapping from the tlb.
  168. */
  169. void
  170. flushpg(ulong va)
  171. {
  172. if(X86FAMILY(m->cpuidax) >= 4)
  173. invlpg(va);
  174. else
  175. putcr3(m->tss->cr3);
  176. }
  177. /*
  178. * Allocate a new page for a page directory.
  179. * We keep a small cache of pre-initialized
  180. * page directories in each mach.
  181. */
  182. static Page*
  183. mmupdballoc(void)
  184. {
  185. int s;
  186. Page *page;
  187. ulong *pdb;
  188. s = splhi();
  189. m->pdballoc++;
  190. if(m->pdbpool == 0){
  191. spllo();
  192. page = newpage(0, 0, 0);
  193. page->va = (ulong)vpd;
  194. splhi();
  195. pdb = tmpmap(page);
  196. memmove(pdb, m->pdb, BY2PG);
  197. pdb[PDX(VPT)] = page->pa|PTEWRITE|PTEVALID; /* set up VPT */
  198. tmpunmap(pdb);
  199. }else{
  200. page = m->pdbpool;
  201. m->pdbpool = page->next;
  202. m->pdbcnt--;
  203. }
  204. splx(s);
  205. return page;
  206. }
  207. static void
  208. mmupdbfree(Proc *proc, Page *p)
  209. {
  210. if(islo())
  211. panic("mmupdbfree: islo");
  212. m->pdbfree++;
  213. if(m->pdbcnt >= 10){
  214. p->next = proc->mmufree;
  215. proc->mmufree = p;
  216. }else{
  217. p->next = m->pdbpool;
  218. m->pdbpool = p;
  219. m->pdbcnt++;
  220. }
  221. }
  222. /*
  223. * A user-space memory segment has been deleted, or the
  224. * process is exiting. Clear all the pde entries for user-space
  225. * memory mappings and device mappings. Any entries that
  226. * are needed will be paged back in as necessary.
  227. */
  228. static void
  229. mmuptefree(Proc* proc)
  230. {
  231. int s;
  232. ulong *pdb;
  233. Page **last, *page;
  234. if(proc->mmupdb == nil || proc->mmuused == nil)
  235. return;
  236. s = splhi();
  237. pdb = tmpmap(proc->mmupdb);
  238. last = &proc->mmuused;
  239. for(page = *last; page; page = page->next){
  240. pdb[page->daddr] = 0;
  241. last = &page->next;
  242. }
  243. tmpunmap(pdb);
  244. splx(s);
  245. *last = proc->mmufree;
  246. proc->mmufree = proc->mmuused;
  247. proc->mmuused = 0;
  248. }
  249. static void
  250. taskswitch(ulong pdb, ulong stack)
  251. {
  252. Tss *tss;
  253. tss = m->tss;
  254. tss->ss0 = KDSEL;
  255. tss->esp0 = stack;
  256. tss->ss1 = KDSEL;
  257. tss->esp1 = stack;
  258. tss->ss2 = KDSEL;
  259. tss->esp2 = stack;
  260. tss->cr3 = pdb;
  261. putcr3(pdb);
  262. }
  263. void
  264. mmuswitch(Proc* proc)
  265. {
  266. ulong *pdb;
  267. if(proc->newtlb){
  268. mmuptefree(proc);
  269. proc->newtlb = 0;
  270. }
  271. if(proc->mmupdb){
  272. pdb = tmpmap(proc->mmupdb);
  273. pdb[PDX(MACHADDR)] = m->pdb[PDX(MACHADDR)];
  274. tmpunmap(pdb);
  275. taskswitch(proc->mmupdb->pa, (ulong)(proc->kstack+KSTACK));
  276. }else
  277. taskswitch(PADDR(m->pdb), (ulong)(proc->kstack+KSTACK));
  278. }
  279. /*
  280. * Release any pages allocated for a page directory base or page-tables
  281. * for this process:
  282. * switch to the prototype pdb for this processor (m->pdb);
  283. * call mmuptefree() to place all pages used for page-tables (proc->mmuused)
  284. * onto the process' free list (proc->mmufree). This has the side-effect of
  285. * cleaning any user entries in the pdb (proc->mmupdb);
  286. * if there's a pdb put it in the cache of pre-initialised pdb's
  287. * for this processor (m->pdbpool) or on the process' free list;
  288. * finally, place any pages freed back into the free pool (palloc).
  289. * This routine is only called from schedinit() with palloc locked.
  290. */
  291. void
  292. mmurelease(Proc* proc)
  293. {
  294. Page *page, *next;
  295. ulong *pdb;
  296. if(islo())
  297. panic("mmurelease: islo");
  298. taskswitch(PADDR(m->pdb), (ulong)m + BY2PG);
  299. if(proc->kmaptable){
  300. if(proc->mmupdb == nil)
  301. panic("mmurelease: no mmupdb");
  302. if(--proc->kmaptable->ref)
  303. panic("mmurelease: kmap ref %d\n", proc->kmaptable->ref);
  304. if(up->nkmap)
  305. panic("mmurelease: nkmap %d\n", up->nkmap);
  306. /*
  307. * remove kmaptable from pdb before putting pdb up for reuse.
  308. */
  309. pdb = tmpmap(proc->mmupdb);
  310. if(PPN(pdb[PDX(KMAP)]) != proc->kmaptable->pa)
  311. panic("mmurelease: bad kmap pde %#.8lux kmap %#.8lux",
  312. pdb[PDX(KMAP)], proc->kmaptable->pa);
  313. pdb[PDX(KMAP)] = 0;
  314. tmpunmap(pdb);
  315. /*
  316. * move kmaptable to free list.
  317. */
  318. pagechainhead(proc->kmaptable);
  319. proc->kmaptable = 0;
  320. }
  321. if(proc->mmupdb){
  322. mmuptefree(proc);
  323. mmupdbfree(proc, proc->mmupdb);
  324. proc->mmupdb = 0;
  325. }
  326. for(page = proc->mmufree; page; page = next){
  327. next = page->next;
  328. if(--page->ref)
  329. panic("mmurelease: page->ref %d\n", page->ref);
  330. pagechainhead(page);
  331. }
  332. if(proc->mmufree && palloc.r.p)
  333. wakeup(&palloc.r);
  334. proc->mmufree = 0;
  335. }
  336. /*
  337. * Allocate and install pdb for the current process.
  338. */
  339. static void
  340. upallocpdb(void)
  341. {
  342. int s;
  343. ulong *pdb;
  344. Page *page;
  345. page = mmupdballoc();
  346. s = splhi();
  347. pdb = tmpmap(page);
  348. pdb[PDX(MACHADDR)] = m->pdb[PDX(MACHADDR)];
  349. tmpunmap(pdb);
  350. up->mmupdb = page;
  351. //XXX should have this m->tss->cr3 = up->mmupdb->pa;
  352. putcr3(up->mmupdb->pa);
  353. splx(s);
  354. }
  355. /*
  356. * Update the mmu in response to a user fault. pa may have PTEWRITE set.
  357. */
  358. void
  359. putmmu(ulong va, ulong pa, Page*)
  360. {
  361. int old;
  362. Page *page;
  363. if(up->mmupdb == nil)
  364. upallocpdb();
  365. if(!(vpd[PDX(va)]&PTEVALID)){
  366. if(up->mmufree == 0)
  367. page = newpage(0, 0, 0);
  368. else{
  369. page = up->mmufree;
  370. up->mmufree = page->next;
  371. }
  372. vpd[PDX(va)] = PPN(page->pa)|PTEUSER|PTEWRITE|PTEVALID;
  373. /* page is now mapped into the VPT - clear it */
  374. memset((void*)(VPT+PDX(va)*BY2PG), 0, BY2PG);
  375. page->daddr = PDX(va);
  376. page->next = up->mmuused;
  377. up->mmuused = page;
  378. }
  379. old = vpt[VPTX(va)];
  380. vpt[VPTX(va)] = pa|PTEUSER|PTEVALID;
  381. if(old&PTEVALID)
  382. flushpg(va);
  383. if(getcr3() != up->mmupdb->pa)
  384. print("bad cr3 %.8lux %.8lux\n", getcr3(), up->mmupdb->pa);
  385. }
  386. /*
  387. * Double-check the user MMU.
  388. * Error checking only.
  389. */
  390. void
  391. checkmmu(ulong va, ulong pa)
  392. {
  393. if(up->mmupdb == 0)
  394. return;
  395. if(!(vpd[PDX(va)]&PTEVALID) || !(vpt[VPTX(va)]&PTEVALID))
  396. return;
  397. if(PPN(vpt[VPTX(va)]) != pa)
  398. print("%ld %s: va=0x%08lux pa=0x%08lux pte=0x%08lux\n",
  399. up->pid, up->text,
  400. va, pa, vpt[VPTX(va)]);
  401. }
  402. /*
  403. * Walk the page-table pointed to by pdb and return a pointer
  404. * to the entry for virtual address va at the requested level.
  405. * If the entry is invalid and create isn't requested then bail
  406. * out early. Otherwise, for the 2nd level walk, allocate a new
  407. * page-table page and register it in the 1st level. This is used
  408. * only to edit kernel mappings, which use pages from kernel memory,
  409. * so it's okay to use KADDR to look at the tables.
  410. */
  411. ulong*
  412. mmuwalk(ulong* pdb, ulong va, int level, int create)
  413. {
  414. ulong *table;
  415. void *map;
  416. table = &pdb[PDX(va)];
  417. if(!(*table & PTEVALID) && create == 0)
  418. return 0;
  419. switch(level){
  420. default:
  421. return 0;
  422. case 1:
  423. return table;
  424. case 2:
  425. if(*table & PTESIZE)
  426. panic("mmuwalk2: va %luX entry %luX\n", va, *table);
  427. if(!(*table & PTEVALID)){
  428. /*
  429. * Have to call low-level allocator from
  430. * memory.c if we haven't set up the xalloc
  431. * tables yet.
  432. */
  433. if(didmmuinit)
  434. map = xspanalloc(BY2PG, BY2PG, 0);
  435. else
  436. map = rampage();
  437. if(map == nil)
  438. panic("mmuwalk xspanalloc failed");
  439. *table = PADDR(map)|PTEWRITE|PTEVALID;
  440. }
  441. table = KADDR(PPN(*table));
  442. return &table[PTX(va)];
  443. }
  444. }
  445. /*
  446. * Device mappings are shared by all procs and processors and
  447. * live in the virtual range VMAP to VMAP+VMAPSIZE. The master
  448. * copy of the mappings is stored in mach0->pdb, and they are
  449. * paged in from there as necessary by vmapsync during faults.
  450. */
  451. static Lock vmaplock;
  452. static int findhole(ulong *a, int n, int count);
  453. static ulong vmapalloc(ulong size);
  454. static void pdbunmap(ulong*, ulong, int);
  455. /*
  456. * Add a device mapping to the vmap range.
  457. */
  458. void*
  459. vmap(ulong pa, int size)
  460. {
  461. int osize;
  462. ulong o, va;
  463. /*
  464. * might be asking for less than a page.
  465. */
  466. osize = size;
  467. o = pa & (BY2PG-1);
  468. pa -= o;
  469. size += o;
  470. size = ROUND(size, BY2PG);
  471. if(pa == 0){
  472. print("vmap pa=0 pc=%#.8lux\n", getcallerpc(&pa));
  473. return nil;
  474. }
  475. ilock(&vmaplock);
  476. if((va = vmapalloc(size)) == 0
  477. || pdbmap(MACHP(0)->pdb, pa|PTEUNCACHED|PTEWRITE, va, size) < 0){
  478. iunlock(&vmaplock);
  479. return 0;
  480. }
  481. iunlock(&vmaplock);
  482. /* avoid trap on local processor
  483. for(i=0; i<size; i+=4*MB)
  484. vmapsync(va+i);
  485. */
  486. USED(osize);
  487. // print(" vmap %#.8lux %d => %#.8lux\n", pa+o, osize, va+o);
  488. return (void*)(va + o);
  489. }
  490. static int
  491. findhole(ulong *a, int n, int count)
  492. {
  493. int have, i;
  494. have = 0;
  495. for(i=0; i<n; i++){
  496. if(a[i] == 0)
  497. have++;
  498. else
  499. have = 0;
  500. if(have >= count)
  501. return i+1 - have;
  502. }
  503. return -1;
  504. }
  505. /*
  506. * Look for free space in the vmap.
  507. */
  508. static ulong
  509. vmapalloc(ulong size)
  510. {
  511. int i, n, o;
  512. ulong *vpdb;
  513. int vpdbsize;
  514. vpdb = &MACHP(0)->pdb[PDX(VMAP)];
  515. vpdbsize = VMAPSIZE/(4*MB);
  516. if(size >= 4*MB){
  517. n = (size+4*MB-1) / (4*MB);
  518. if((o = findhole(vpdb, vpdbsize, n)) != -1)
  519. return VMAP + o*4*MB;
  520. return 0;
  521. }
  522. n = (size+BY2PG-1) / BY2PG;
  523. for(i=0; i<vpdbsize; i++)
  524. if((vpdb[i]&PTEVALID) && !(vpdb[i]&PTESIZE))
  525. if((o = findhole(KADDR(PPN(vpdb[i])), WD2PG, n)) != -1)
  526. return VMAP + i*4*MB + o*BY2PG;
  527. if((o = findhole(vpdb, vpdbsize, 1)) != -1)
  528. return VMAP + o*4*MB;
  529. /*
  530. * could span page directory entries, but not worth the trouble.
  531. * not going to be very much contention.
  532. */
  533. return 0;
  534. }
  535. /*
  536. * Remove a device mapping from the vmap range.
  537. * Since pdbunmap does not remove page tables, just entries,
  538. * the call need not be interlocked with vmap.
  539. */
  540. void
  541. vunmap(void *v, int size)
  542. {
  543. int i;
  544. ulong va, o;
  545. Mach *nm;
  546. Proc *p;
  547. /*
  548. * might not be aligned
  549. */
  550. va = (ulong)v;
  551. o = va&(BY2PG-1);
  552. va -= o;
  553. size += o;
  554. size = ROUND(size, BY2PG);
  555. if(size < 0 || va < VMAP || va+size > VMAP+VMAPSIZE)
  556. panic("vunmap va=%#.8lux size=%#x pc=%#.8lux\n",
  557. va, size, getcallerpc(&va));
  558. pdbunmap(MACHP(0)->pdb, va, size);
  559. /*
  560. * Flush mapping from all the tlbs and copied pdbs.
  561. * This can be (and is) slow, since it is called only rarely.
  562. */
  563. for(i=0; i<conf.nproc; i++){
  564. p = proctab(i);
  565. if(p->state == Dead)
  566. continue;
  567. if(p != up)
  568. p->newtlb = 1;
  569. }
  570. for(i=0; i<conf.nmach; i++){
  571. nm = MACHP(i);
  572. if(nm != m)
  573. nm->flushmmu = 1;
  574. }
  575. flushmmu();
  576. for(i=0; i<conf.nmach; i++){
  577. nm = MACHP(i);
  578. if(nm != m)
  579. while((active.machs&(1<<nm->machno)) && nm->flushmmu)
  580. ;
  581. }
  582. }
  583. /*
  584. * Add kernel mappings for pa -> va for a section of size bytes.
  585. */
  586. int
  587. pdbmap(ulong *pdb, ulong pa, ulong va, int size)
  588. {
  589. int pse;
  590. ulong pae, pgsz, *pte, *table;
  591. ulong flag;
  592. flag = pa&0xFFF;
  593. pa &= ~0xFFF;
  594. if((MACHP(0)->cpuiddx & 0x08) && (getcr4() & 0x10))
  595. pse = 1;
  596. else
  597. pse = 0;
  598. pae = pa + size;
  599. while(pa < pae){
  600. table = &pdb[PDX(va)];
  601. if((*table&PTEVALID) && (*table&PTESIZE))
  602. panic("vmap: va=%#.8lux pa=%#.8lux pde=%#.8lux",
  603. va, pa, *table);
  604. /*
  605. * Check if it can be mapped using a 4MB page:
  606. * va, pa aligned and size >= 4MB and processor can do it.
  607. */
  608. if(pse && pa%(4*MB) == 0 && va%(4*MB) == 0 && (pae >= pa+4*MB)){
  609. *table = pa|PTESIZE|flag|PTEVALID;
  610. pgsz = 4*MB;
  611. }else{
  612. pte = mmuwalk(pdb, va, 2, 1);
  613. if(*pte&PTEVALID)
  614. panic("vmap: va=%#.8lux pa=%#.8lux pte=%#.8lux",
  615. va, pa, *pte);
  616. *pte = pa|flag|PTEVALID;
  617. pgsz = BY2PG;
  618. }
  619. pa += pgsz;
  620. va += pgsz;
  621. }
  622. return 0;
  623. }
  624. /*
  625. * Remove mappings. Must already exist, for sanity.
  626. * Only used for kernel mappings, so okay to use KADDR.
  627. */
  628. static void
  629. pdbunmap(ulong *pdb, ulong va, int size)
  630. {
  631. ulong vae;
  632. ulong *table;
  633. vae = va+size;
  634. while(va < vae){
  635. table = &pdb[PDX(va)];
  636. if(!(*table & PTEVALID)){
  637. panic("vunmap: not mapped");
  638. /*
  639. va = (va+4*MB-1) & ~(4*MB-1);
  640. continue;
  641. */
  642. }
  643. if(*table & PTESIZE){
  644. *table = 0;
  645. va = (va+4*MB-1) & ~(4*MB-1);
  646. continue;
  647. }
  648. table = KADDR(PPN(*table));
  649. if(!(table[PTX(va)] & PTEVALID))
  650. panic("vunmap: not mapped");
  651. table[PTX(va)] = 0;
  652. va += BY2PG;
  653. }
  654. }
  655. /*
  656. * Handle a fault by bringing vmap up to date.
  657. * Only copy pdb entries and they never go away,
  658. * so no locking needed.
  659. */
  660. int
  661. vmapsync(ulong va)
  662. {
  663. ulong entry, *table;
  664. if(va < VMAP || va >= VMAP+VMAPSIZE)
  665. return 0;
  666. entry = MACHP(0)->pdb[PDX(va)];
  667. if(!(entry&PTEVALID))
  668. return 0;
  669. if(!(entry&PTESIZE)){
  670. /* make sure entry will help the fault */
  671. table = KADDR(PPN(entry));
  672. if(!(table[PTX(va)]&PTEVALID))
  673. return 0;
  674. }
  675. vpd[PDX(va)] = entry;
  676. /*
  677. * TLB doesn't cache negative results, so no flush needed.
  678. */
  679. return 1;
  680. }
  681. /*
  682. * KMap is used to map individual pages into virtual memory.
  683. * It is rare to have more than a few KMaps at a time (in the
  684. * absence of interrupts, only two at a time are ever used,
  685. * but interrupts can stack). The mappings are local to a process,
  686. * so we can use the same range of virtual address space for
  687. * all processes without any coordination.
  688. */
  689. #define kpt (vpt+VPTX(KMAP))
  690. #define NKPT (KMAPSIZE/BY2PG)
  691. KMap*
  692. kmap(Page *page)
  693. {
  694. int i, o, s;
  695. if(up == nil)
  696. panic("kmap: up=0 pc=%#.8lux", getcallerpc(&page));
  697. if(up->mmupdb == nil)
  698. upallocpdb();
  699. up->nkmap++;
  700. if(!(vpd[PDX(KMAP)]&PTEVALID)){
  701. /* allocate page directory */
  702. if(KMAPSIZE > BY2XPG)
  703. panic("bad kmapsize");
  704. if(up->kmaptable != nil)
  705. panic("kmaptable");
  706. s = spllo();
  707. up->kmaptable = newpage(0, 0, 0);
  708. splx(s);
  709. vpd[PDX(KMAP)] = up->kmaptable->pa|PTEWRITE|PTEVALID;
  710. memset(kpt, 0, BY2PG);
  711. /* might as well finish the job */
  712. kpt[0] = page->pa|PTEWRITE|PTEVALID;
  713. up->lastkmap = 0;
  714. return (KMap*)KMAP;
  715. }
  716. if(up->kmaptable == nil)
  717. panic("no kmaptable");
  718. o = up->lastkmap+1;
  719. for(i=0; i<NKPT; i++){
  720. if(kpt[(i+o)%NKPT] == 0){
  721. o = (i+o)%NKPT;
  722. kpt[o] = page->pa|PTEWRITE|PTEVALID;
  723. up->lastkmap = o;
  724. return (KMap*)(KMAP+o*BY2PG);
  725. }
  726. }
  727. panic("out of kmap");
  728. return nil;
  729. }
  730. void
  731. kunmap(KMap *k)
  732. {
  733. ulong va;
  734. va = (ulong)k;
  735. if(up->mmupdb == nil || !(vpd[PDX(KMAP)]&PTEVALID))
  736. panic("kunmap: no kmaps");
  737. if(va < KMAP || va >= KMAP+KMAPSIZE)
  738. panic("kunmap: bad address %#.8lux pc=%#.8lux", va, getcallerpc(&k));
  739. if(!(vpt[VPTX(va)]&PTEVALID))
  740. panic("kunmap: not mapped %#.8lux pc=%#.8lux", va, getcallerpc(&k));
  741. up->nkmap--;
  742. vpt[VPTX(va)] = 0;
  743. flushpg(va);
  744. }
  745. /*
  746. * Temporary one-page mapping used to edit page directories.
  747. *
  748. * The fasttmp #define controls whether the code optimizes
  749. * the case where the page is already mapped in the physical
  750. * memory window.
  751. */
  752. #define fasttmp 1
  753. void*
  754. tmpmap(Page *p)
  755. {
  756. ulong i;
  757. ulong *entry;
  758. if(islo())
  759. panic("tmpaddr: islo");
  760. if(fasttmp && p->pa < -KZERO)
  761. return KADDR(p->pa);
  762. /*
  763. * PDX(TMPADDR) == PDX(MACHADDR), so this
  764. * entry is private to the processor and shared
  765. * between up->mmupdb (if any) and m->pdb.
  766. */
  767. entry = &vpt[VPTX(TMPADDR)];
  768. if(!(*entry&PTEVALID)){
  769. for(i=KZERO; i<=CPU0MACH; i+=BY2PG)
  770. print("%.8lux: *%.8lux=%.8lux (vpt=%.8lux index=%.8lux)\n", i, &vpt[VPTX(i)], vpt[VPTX(i)], vpt, VPTX(i));
  771. panic("tmpmap: no entry");
  772. }
  773. if(PPN(*entry) != PPN(TMPADDR-KZERO))
  774. panic("tmpmap: already mapped entry=%#.8lux", *entry);
  775. *entry = p->pa|PTEWRITE|PTEVALID;
  776. flushpg(TMPADDR);
  777. return (void*)TMPADDR;
  778. }
  779. void
  780. tmpunmap(void *v)
  781. {
  782. ulong *entry;
  783. if(islo())
  784. panic("tmpaddr: islo");
  785. if(fasttmp && (ulong)v >= KZERO && v != (void*)TMPADDR)
  786. return;
  787. if(v != (void*)TMPADDR)
  788. panic("tmpunmap: bad address");
  789. entry = &vpt[VPTX(TMPADDR)];
  790. if(!(*entry&PTEVALID) || PPN(*entry) == PPN(PADDR(TMPADDR)))
  791. panic("tmpmap: not mapped entry=%#.8lux", *entry);
  792. *entry = PPN(TMPADDR-KZERO)|PTEWRITE|PTEVALID;
  793. flushpg(TMPADDR);
  794. }
  795. /*
  796. * These could go back to being macros once the kernel is debugged,
  797. * but the extra checking is nice to have.
  798. */
  799. void*
  800. kaddr(ulong pa)
  801. {
  802. if(pa > (ulong)-KZERO)
  803. panic("kaddr: pa=%#.8lux", pa);
  804. return (void*)(pa+KZERO);
  805. }
  806. ulong
  807. paddr(void *v)
  808. {
  809. ulong va;
  810. va = (ulong)v;
  811. if(va < KZERO)
  812. panic("paddr: va=%#.8lux pc=%#.8lux", va, getcallerpc(&va));
  813. return va-KZERO;
  814. }
  815. /*
  816. * More debugging.
  817. */
  818. void
  819. countpagerefs(ulong *ref, int print)
  820. {
  821. int i, n;
  822. Mach *mm;
  823. Page *pg;
  824. Proc *p;
  825. n = 0;
  826. for(i=0; i<conf.nproc; i++){
  827. p = proctab(i);
  828. if(p->mmupdb){
  829. if(print){
  830. if(ref[pagenumber(p->mmupdb)])
  831. iprint("page %#.8lux is proc %d (pid %lud) pdb\n",
  832. p->mmupdb->pa, i, p->pid);
  833. continue;
  834. }
  835. if(ref[pagenumber(p->mmupdb)]++ == 0)
  836. n++;
  837. else
  838. iprint("page %#.8lux is proc %d (pid %lud) pdb but has other refs!\n",
  839. p->mmupdb->pa, i, p->pid);
  840. }
  841. if(p->kmaptable){
  842. if(print){
  843. if(ref[pagenumber(p->kmaptable)])
  844. iprint("page %#.8lux is proc %d (pid %lud) kmaptable\n",
  845. p->kmaptable->pa, i, p->pid);
  846. continue;
  847. }
  848. if(ref[pagenumber(p->kmaptable)]++ == 0)
  849. n++;
  850. else
  851. iprint("page %#.8lux is proc %d (pid %lud) kmaptable but has other refs!\n",
  852. p->kmaptable->pa, i, p->pid);
  853. }
  854. for(pg=p->mmuused; pg; pg=pg->next){
  855. if(print){
  856. if(ref[pagenumber(pg)])
  857. iprint("page %#.8lux is on proc %d (pid %lud) mmuused\n",
  858. pg->pa, i, p->pid);
  859. continue;
  860. }
  861. if(ref[pagenumber(pg)]++ == 0)
  862. n++;
  863. else
  864. iprint("page %#.8lux is on proc %d (pid %lud) mmuused but has other refs!\n",
  865. pg->pa, i, p->pid);
  866. }
  867. for(pg=p->mmufree; pg; pg=pg->next){
  868. if(print){
  869. if(ref[pagenumber(pg)])
  870. iprint("page %#.8lux is on proc %d (pid %lud) mmufree\n",
  871. pg->pa, i, p->pid);
  872. continue;
  873. }
  874. if(ref[pagenumber(pg)]++ == 0)
  875. n++;
  876. else
  877. iprint("page %#.8lux is on proc %d (pid %lud) mmufree but has other refs!\n",
  878. pg->pa, i, p->pid);
  879. }
  880. }
  881. if(!print)
  882. iprint("%d pages in proc mmu\n", n);
  883. n = 0;
  884. for(i=0; i<conf.nmach; i++){
  885. mm = MACHP(i);
  886. for(pg=mm->pdbpool; pg; pg=pg->next){
  887. if(print){
  888. if(ref[pagenumber(pg)])
  889. iprint("page %#.8lux is in cpu%d pdbpool\n",
  890. pg->pa, i);
  891. continue;
  892. }
  893. if(ref[pagenumber(pg)]++ == 0)
  894. n++;
  895. else
  896. iprint("page %#.8lux is in cpu%d pdbpool but has other refs!\n",
  897. pg->pa, i);
  898. }
  899. }
  900. if(!print){
  901. iprint("%d pages in mach pdbpools\n", n);
  902. for(i=0; i<conf.nmach; i++)
  903. iprint("cpu%d: %d pdballoc, %d pdbfree\n",
  904. i, MACHP(i)->pdballoc, MACHP(i)->pdbfree);
  905. }
  906. }
  907. void
  908. checkfault(ulong addr, ulong pc)
  909. {
  910. ulong *a;
  911. int i;
  912. print("user fault: addr=%.8lux pc=%.8lux\n", addr, pc);
  913. if(!(vpd[PDX(addr)]&PTEVALID))
  914. print("addr not mapped (vpd=%.8lux)\n", vpd[PDX(addr)]);
  915. else if(!(vpt[VPTX(addr)]&PTEVALID))
  916. print("addr not mapped (vpd=%.8lux vpt=%.8lux)\n",
  917. vpd[PDX(addr)], vpt[VPTX(addr)]);
  918. else
  919. print("addr mapped (vpd=%.8lux vpt=%.8lux)\n",
  920. vpd[PDX(addr)], vpt[VPTX(addr)]);
  921. if(!(vpd[PDX(pc)]&PTEVALID))
  922. print("pc not mapped (vpd=%.8lux)\n", vpd[PDX(pc)]);
  923. else if(!(vpt[VPTX(pc)]&PTEVALID))
  924. print("pc not mapped (vpd=%.8lux vpt=%.8lux)\n",
  925. vpd[PDX(pc)], vpt[VPTX(pc)]);
  926. else{
  927. print("pc mapped (vpd=%.8lux vpt=%.8lux)\n",
  928. vpd[PDX(pc)], vpt[VPTX(pc)]);
  929. if(PPN(pc) == PPN(pc+4)) /* not crossing into an unmapped page */
  930. print("*pc: %.8lux\n", *(ulong*)pc);
  931. a = (ulong*)PPN(pc);
  932. for(i=0; i<WD2PG; i++)
  933. if(a[i] != 0)
  934. break;
  935. if(i == WD2PG)
  936. print("pc's page is all zeros\n");
  937. else{
  938. for(i=0; i<256/4; i+=8){
  939. print("%.8lux: %.8lux %.8lux %.8lux %.8lux %.8lux %.8lux %.8lux %.8lux\n",
  940. PPN(pc)+i*4, a[i], a[i+1], a[i+2], a[i+3],
  941. a[i+4], a[i+5], a[i+6], a[i+7]);
  942. }
  943. }
  944. }
  945. }