123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496 |
- #include "u.h"
- #include "../port/lib.h"
- #include "mem.h"
- #include "dat.h"
- #include "fns.h"
- #include "io.h"
- #define DATASEGM(p) { 0xFFFF, SEGG|SEGB|(0xF<<16)|SEGP|SEGPL(p)|SEGDATA|SEGW }
- #define EXECSEGM(p) { 0xFFFF, SEGG|SEGD|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }
- #define TSSSEGM(b,p) { ((b)<<16)|sizeof(Tss),\
- ((b)&0xFF000000)|(((b)>>16)&0xFF)|SEGTSS|SEGPL(p)|SEGP }
- Segdesc gdt[NGDT] =
- {
- [NULLSEG] { 0, 0}, /* null descriptor */
- [KDSEG] DATASEGM(0), /* kernel data/stack */
- [KESEG] EXECSEGM(0), /* kernel code */
- [UDSEG] DATASEGM(3), /* user data/stack */
- [UESEG] EXECSEGM(3), /* user code */
- [TSSSEG] TSSSEGM(0,0), /* tss segment */
- };
- static void
- taskswitch(ulong pdb, ulong stack)
- {
- Tss *tss;
- tss = m->tss;
- tss->ss0 = KDSEL;
- tss->esp0 = stack;
- tss->ss1 = KDSEL;
- tss->esp1 = stack;
- tss->ss2 = KDSEL;
- tss->esp2 = stack;
- tss->cr3 = pdb;
- putcr3(pdb);
- }
- /*
- * On processors that support it, we set the PTEGLOBAL bit in
- * page table and page directory entries that map kernel memory.
- * Doing this tells the processor not to bother flushing them
- * from the TLB when doing the TLB flush associated with a
- * context switch (write to CR3). Since kernel memory mappings
- * are never removed, this is safe. (If we ever remove kernel memory
- * mappings, we can do a full flush by turning off the PGE bit in CR4,
- * writing to CR3, and then turning the PGE bit back on.)
- *
- * See also mmukmap below.
- *
- * Processor support for the PTEGLOBAL bit is enabled in devarch.c.
- */
- static void
- memglobal(void)
- {
- int i, j;
- ulong *pde, *pte;
- /* only need to do this once, on bootstrap processor */
- if(m->machno != 0)
- return;
- if(!m->havepge)
- return;
- pde = m->pdb;
- for(i=512; i<1024; i++){ /* 512: start at entry for virtual 0x80000000 */
- if(pde[i] & PTEVALID){
- pde[i] |= PTEGLOBAL;
- if(!(pde[i] & PTESIZE)){
- pte = KADDR(pde[i]&~(BY2PG-1));
- for(j=0; j<1024; j++)
- if(pte[j] & PTEVALID)
- pte[j] |= PTEGLOBAL;
- }
- }
- }
- }
- void
- mmuinit(void)
- {
- ulong x, *p;
- ushort ptr[3];
- memglobal();
- m->tss = malloc(sizeof(Tss));
- memset(m->tss, 0, sizeof(Tss));
- m->tss->iomap = 0xDFFF<<16;
- /*
- * We used to keep the GDT in the Mach structure, but it
- * turns out that that slows down access to the rest of the
- * page. Since the Mach structure is accessed quite often,
- * it pays off anywhere from a factor of 1.25 to 2 on real
- * hardware to separate them (the AMDs are more sensitive
- * than Intels in this regard). Under VMware it pays off
- * a factor of about 10 to 100.
- */
- memmove(m->gdt, gdt, sizeof gdt);
- x = (ulong)m->tss;
- m->gdt[TSSSEG].d0 = (x<<16)|sizeof(Tss);
- m->gdt[TSSSEG].d1 = (x&0xFF000000)|((x>>16)&0xFF)|SEGTSS|SEGPL(0)|SEGP;
- ptr[0] = sizeof(gdt)-1;
- x = (ulong)m->gdt;
- ptr[1] = x & 0xFFFF;
- ptr[2] = (x>>16) & 0xFFFF;
- lgdt(ptr);
- ptr[0] = sizeof(Segdesc)*256-1;
- x = IDTADDR;
- ptr[1] = x & 0xFFFF;
- ptr[2] = (x>>16) & 0xFFFF;
- lidt(ptr);
- /* make kernel text unwritable */
- for(x = KTZERO; x < (ulong)etext; x += BY2PG){
- p = mmuwalk(m->pdb, x, 2, 0);
- if(p == nil)
- panic("mmuinit");
- *p &= ~PTEWRITE;
- }
- taskswitch(PADDR(m->pdb), (ulong)m + BY2PG);
- ltr(TSSSEL);
- }
- void
- flushmmu(void)
- {
- int s;
- s = splhi();
- up->newtlb = 1;
- mmuswitch(up);
- splx(s);
- }
- static void
- mmuptefree(Proc* proc)
- {
- ulong *pdb;
- Page **last, *page;
- if(proc->mmupdb && proc->mmuused){
- pdb = (ulong*)proc->mmupdb->va;
- last = &proc->mmuused;
- for(page = *last; page; page = page->next){
- pdb[page->daddr] = 0;
- last = &page->next;
- }
- *last = proc->mmufree;
- proc->mmufree = proc->mmuused;
- proc->mmuused = 0;
- }
- }
- void
- mmuswitch(Proc* proc)
- {
- ulong *pdb;
- if(proc->newtlb){
- mmuptefree(proc);
- proc->newtlb = 0;
- }
- if(proc->mmupdb){
- pdb = (ulong*)proc->mmupdb->va;
- pdb[PDX(MACHADDR)] = m->pdb[PDX(MACHADDR)];
- taskswitch(proc->mmupdb->pa, (ulong)(proc->kstack+KSTACK));
- }
- else
- taskswitch(PADDR(m->pdb), (ulong)(proc->kstack+KSTACK));
- }
- void
- mmurelease(Proc* proc)
- {
- Page *page, *next;
- /*
- * Release any pages allocated for a page directory base or page-tables
- * for this process:
- * switch to the prototype pdb for this processor (m->pdb);
- * call mmuptefree() to place all pages used for page-tables (proc->mmuused)
- * onto the process' free list (proc->mmufree). This has the side-effect of
- * cleaning any user entries in the pdb (proc->mmupdb);
- * if there's a pdb put it in the cache of pre-initialised pdb's
- * for this processor (m->pdbpool) or on the process' free list;
- * finally, place any pages freed back into the free pool (palloc).
- * This routine is only called from sched() with palloc locked.
- */
- taskswitch(PADDR(m->pdb), (ulong)m + BY2PG);
- mmuptefree(proc);
- if(proc->mmupdb){
- if(m->pdbcnt > 10){
- proc->mmupdb->next = proc->mmufree;
- proc->mmufree = proc->mmupdb;
- }
- else{
- proc->mmupdb->next = m->pdbpool;
- m->pdbpool = proc->mmupdb;
- m->pdbcnt++;
- }
- proc->mmupdb = 0;
- }
- for(page = proc->mmufree; page; page = next){
- next = page->next;
- if(--page->ref)
- panic("mmurelease: page->ref %d\n", page->ref);
- pagechainhead(page);
- }
- if(proc->mmufree && palloc.r.p)
- wakeup(&palloc.r);
- proc->mmufree = 0;
- }
- static Page*
- mmupdballoc(void)
- {
- int s;
- Page *page;
- s = splhi();
- if(m->pdbpool == 0){
- spllo();
- page = newpage(0, 0, 0);
- page->va = VA(kmap(page));
- memmove((void*)page->va, m->pdb, BY2PG);
- }
- else{
- page = m->pdbpool;
- m->pdbpool = page->next;
- m->pdbcnt--;
- }
- splx(s);
- return page;
- }
- void
- checkmmu(ulong va, ulong pa)
- {
- ulong *pdb, *pte;
- int pdbx;
-
- if(up->mmupdb == 0)
- return;
- pdb = (ulong*)up->mmupdb->va;
- pdbx = PDX(va);
- if(PPN(pdb[pdbx]) == 0){
- /* okay to be empty - will fault and get filled */
- return;
- }
-
- pte = KADDR(PPN(pdb[pdbx]));
- if(pte[PTX(va)] == 0)
- return;
- if((pte[PTX(va)]&~4095) != pa)
- print("%ld %s: va=0x%08lux pa=0x%08lux pte=0x%08lux\n",
- up->pid, up->text,
- va, pa, pte[PTX(va)]);
- }
- void
- putmmu(ulong va, ulong pa, Page*)
- {
- int pdbx;
- Page *page;
- ulong *pdb, *pte;
- int s;
- if(up->mmupdb == 0)
- up->mmupdb = mmupdballoc();
- pdb = (ulong*)up->mmupdb->va;
- pdbx = PDX(va);
- if(PPN(pdb[pdbx]) == 0){
- if(up->mmufree == 0){
- page = newpage(1, 0, 0);
- page->va = VA(kmap(page));
- }
- else {
- page = up->mmufree;
- up->mmufree = page->next;
- memset((void*)page->va, 0, BY2PG);
- }
- pdb[pdbx] = PPN(page->pa)|PTEUSER|PTEWRITE|PTEVALID;
- page->daddr = pdbx;
- page->next = up->mmuused;
- up->mmuused = page;
- }
- pte = KADDR(PPN(pdb[pdbx]));
- pte[PTX(va)] = pa|PTEUSER;
- s = splhi();
- pdb[PDX(MACHADDR)] = m->pdb[PDX(MACHADDR)];
- mmuflushtlb(up->mmupdb->pa);
- splx(s);
- }
- ulong*
- mmuwalk(ulong* pdb, ulong va, int level, int create)
- {
- ulong pa, *table;
- /*
- * Walk the page-table pointed to by pdb and return a pointer
- * to the entry for virtual address va at the requested level.
- * If the entry is invalid and create isn't requested then bail
- * out early. Otherwise, for the 2nd level walk, allocate a new
- * page-table page and register it in the 1st level.
- */
- table = &pdb[PDX(va)];
- if(!(*table & PTEVALID) && create == 0)
- return 0;
- switch(level){
- default:
- return 0;
- case 1:
- return table;
- case 2:
- if(*table & PTESIZE)
- panic("mmuwalk2: va %luX entry %luX\n", va, *table);
- if(!(*table & PTEVALID)){
- pa = PADDR(xspanalloc(BY2PG, BY2PG, 0));
- *table = pa|PTEWRITE|PTEVALID;
- }
- table = KADDR(PPN(*table));
- return &table[PTX(va)];
- }
- }
- static Lock mmukmaplock;
- int
- mmukmapsync(ulong va)
- {
- Mach *mach0;
- ulong entry, *pte;
- mach0 = MACHP(0);
- ilock(&mmukmaplock);
- if((pte = mmuwalk(mach0->pdb, va, 1, 0)) == nil){
- iunlock(&mmukmaplock);
- return 0;
- }
- if(!(*pte & PTESIZE) && mmuwalk(mach0->pdb, va, 2, 0) == nil){
- iunlock(&mmukmaplock);
- return 0;
- }
- entry = *pte;
- if(!(m->pdb[PDX(va)] & PTEVALID))
- m->pdb[PDX(va)] = entry;
- if(up && up->mmupdb){
- ((ulong*)up->mmupdb->va)[PDX(va)] = entry;
- mmuflushtlb(up->mmupdb->pa);
- }
- else
- mmuflushtlb(PADDR(m->pdb));
- iunlock(&mmukmaplock);
- return 1;
- }
- ulong
- mmukmap(ulong pa, ulong va, int size)
- {
- Mach *mach0;
- ulong ova, pae, *table, pgsz, *pte, x;
- int pse, sync;
- mach0 = MACHP(0);
- if((mach0->cpuiddx & 0x08) && (getcr4() & 0x10))
- pse = 1;
- else
- pse = 0;
- sync = 0;
- pa = PPN(pa);
- if(va == 0)
- va = (ulong)KADDR(pa);
- else
- va = PPN(va);
- ova = va;
- pae = pa + size;
- ilock(&mmukmaplock);
- while(pa < pae){
- table = &mach0->pdb[PDX(va)];
- /*
- * Possibly already mapped.
- */
- if(*table & PTEVALID){
- if(*table & PTESIZE){
- /*
- * Big page. Does it fit within?
- * If it does, adjust pgsz so the correct end can be
- * returned and get out.
- * If not, adjust pgsz up to the next 4MB boundary
- * and continue.
- */
- x = PPN(*table);
- if(x != pa)
- panic("mmukmap1: pa %luX entry %luX\n",
- pa, *table);
- x += 4*MB;
- if(pae <= x){
- pa = pae;
- break;
- }
- pgsz = x - pa;
- pa += pgsz;
- va += pgsz;
- continue;
- }
- else{
- /*
- * Little page. Walk to the entry.
- * If the entry is valid, set pgsz and continue.
- * If not, make it so, set pgsz, sync and continue.
- */
- pte = mmuwalk(mach0->pdb, va, 2, 0);
- if(pte && *pte & PTEVALID){
- x = PPN(*pte);
- if(x != pa)
- panic("mmukmap2: pa %luX entry %luX\n",
- pa, *pte);
- pgsz = BY2PG;
- pa += pgsz;
- va += pgsz;
- sync++;
- continue;
- }
- }
- }
- /*
- * Not mapped. Check if it can be mapped using a big page -
- * starts on a 4MB boundary, size >= 4MB and processor can do it.
- * If not a big page, walk the walk, talk the talk.
- * Sync is set.
- *
- * If we're creating a kernel mapping, we know that it will never
- * expire and thus we can set the PTEGLOBAL bit to make the entry
- * persist in the TLB across flushes. If we do add support later for
- * unmapping kernel addresses, see devarch.c for instructions on
- * how to do a full TLB flush.
- */
- if(pse && (pa % (4*MB)) == 0 && (pae >= pa+4*MB)){
- *table = pa|PTESIZE|PTEWRITE|PTEUNCACHED|PTEVALID;
- if((va&KZERO) && m->havepge)
- *table |= PTEGLOBAL;
- pgsz = 4*MB;
- }
- else{
- pte = mmuwalk(mach0->pdb, va, 2, 1);
- *pte = pa|PTEWRITE|PTEUNCACHED|PTEVALID;
- if((va&KZERO) && m->havepge)
- *pte |= PTEGLOBAL;
- pgsz = BY2PG;
- }
- pa += pgsz;
- va += pgsz;
- sync++;
- }
- iunlock(&mmukmaplock);
- /*
- * If something was added
- * then need to sync up.
- */
- if(sync)
- mmukmapsync(ova);
- return pa;
- }
|