mmu.c 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373
  1. #include "all.h"
  2. #include "mem.h"
  3. #include "io.h"
  4. #include "ureg.h"
  5. /*
  6. * task state segment. Plan 9 ignores all the task switching goo and just
  7. * uses the tss for esp0 and ss0 on gate's into the kernel, interrupts,
  8. * and exceptions. The rest is completely ignored.
  9. *
  10. * This means that we only need one tss in the whole system.
  11. */
  12. typedef struct Tss Tss;
  13. struct Tss
  14. {
  15. ulong backlink; /* unused */
  16. ulong sp0; /* pl0 stack pointer */
  17. ulong ss0; /* pl0 stack selector */
  18. ulong sp1; /* pl1 stack pointer */
  19. ulong ss1; /* pl1 stack selector */
  20. ulong sp2; /* pl2 stack pointer */
  21. ulong ss2; /* pl2 stack selector */
  22. ulong cr3; /* page table descriptor */
  23. ulong eip; /* instruction pointer */
  24. ulong eflags; /* processor flags */
  25. ulong eax; /* general (hah?) registers */
  26. ulong ecx;
  27. ulong edx;
  28. ulong ebx;
  29. ulong esp;
  30. ulong ebp;
  31. ulong esi;
  32. ulong edi;
  33. ulong es; /* segment selectors */
  34. ulong cs;
  35. ulong ss;
  36. ulong ds;
  37. ulong fs;
  38. ulong gs;
  39. ulong ldt; /* local descriptor table */
  40. ulong iomap; /* io map base */
  41. };
  42. Tss tss;
  43. /*
  44. * segment descriptor initializers
  45. */
  46. #define DATASEGM(p) { 0xFFFF, SEGG|SEGB|(0xF<<16)|SEGP|SEGPL(p)|SEGDATA|SEGW }
  47. #define EXECSEGM(p) { 0xFFFF, SEGG|SEGD|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }
  48. #define CALLGATE(s,o,p) { ((o)&0xFFFF)|((s)<<16), (o)&0xFFFF0000|SEGP|SEGPL(p)|SEGCG }
  49. #define D16SEGM(p) { 0xFFFF, (0x0<<16)|SEGP|SEGPL(p)|SEGDATA|SEGW }
  50. #define E16SEGM(p) { 0xFFFF, (0x0<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }
  51. #define TSSSEGM(b,p) { ((b)<<16)|sizeof(Tss),\
  52. ((b)&0xFF000000)|(((b)>>16)&0xFF)|SEGTSS|SEGPL(p)|SEGP }
  53. /*
  54. * global descriptor table describing all segments
  55. */
  56. Segdesc gdt[] =
  57. {
  58. [NULLSEG] { 0, 0}, /* null descriptor */
  59. [KDSEG] DATASEGM(0), /* kernel data/stack */
  60. [KESEG] EXECSEGM(0), /* kernel code */
  61. [UDSEG] DATASEGM(3), /* user data/stack */
  62. [UESEG] EXECSEGM(3), /* user code */
  63. [TSSSEG] TSSSEGM(0,0), /* tss segment */
  64. };
  65. static struct {
  66. ulong va;
  67. ulong pa;
  68. } ktoppg; /* prototype top level page table
  69. * containing kernel mappings */
  70. static ulong *kpt; /* 2nd level page tables for kernel mem */
  71. #define ROUNDUP(s,v) (((s)+(v-1))&~(v-1))
  72. /*
  73. * offset of virtual address into
  74. * top level page table
  75. */
  76. #define TOPOFF(v) (((ulong)(v))>>(2*PGSHIFT-2))
  77. /*
  78. * offset of virtual address into
  79. * bottom level page table
  80. */
  81. #define BTMOFF(v) ((((ulong)(v))>>(PGSHIFT))&(WD2PG-1))
  82. /*
  83. * Change current page table and the stack to use for exceptions
  84. * (traps & interrupts). The exception stack comes from the tss.
  85. * Since we use only one tss, (we hope) there's no need for a
  86. * puttr().
  87. */
  88. static void
  89. taskswitch(ulong pagetbl, ulong stack)
  90. {
  91. tss.ss0 = KDSEL;
  92. tss.sp0 = stack;
  93. tss.ss1 = KDSEL;
  94. tss.sp1 = stack;
  95. tss.ss2 = KDSEL;
  96. tss.sp2 = stack;
  97. tss.cr3 = pagetbl;
  98. putcr3(pagetbl);
  99. }
  100. /*
  101. * Create a prototype page map that maps all of memory into
  102. * kernel (KZERO) space. This is the default map. It is used
  103. * whenever the processor is not running a process or whenever running
  104. * a process which does not yet have its own map.
  105. */
  106. void
  107. mmuinit(void)
  108. {
  109. int i, nkpt, npage, nbytes;
  110. ulong x;
  111. ulong y;
  112. ulong *top;
  113. /*
  114. * set up the global descriptor table. we make the tss entry here
  115. * since it requires arithmetic on an address and hence cannot
  116. * be a compile or link time constant.
  117. */
  118. x = (ulong)&tss;
  119. gdt[TSSSEG].d0 = (x<<16)|sizeof(Tss);
  120. gdt[TSSSEG].d1 = (x&0xFF000000)|((x>>16)&0xFF)|SEGTSS|SEGPL(0)|SEGP;
  121. putgdt(gdt, sizeof gdt);
  122. /*
  123. * set up system page tables.
  124. * map all of physical memory to start at KZERO.
  125. * leave a map entry for a user area.
  126. */
  127. /*
  128. * allocate top level table
  129. */
  130. top = ialloc(BY2PG, BY2PG);
  131. ktoppg.va = (ulong)top;
  132. ktoppg.pa = ktoppg.va & ~KZERO;
  133. /* map all memory to KZERO */
  134. npage = mconf.topofmem/BY2PG;
  135. nbytes = PGROUND(npage*BY2WD); /* words of page map */
  136. nkpt = nbytes/BY2PG; /* pages of page map */
  137. kpt = ialloc(nbytes, BY2PG);
  138. for(i = 0; i < npage; i++)
  139. kpt[i] = (0+i*BY2PG) | PTEVALID | PTEKERNEL | PTEWRITE;
  140. x = TOPOFF(KZERO);
  141. y = ((ulong)kpt)&~KZERO;
  142. for(i = 0; i < nkpt; i++)
  143. top[x+i] = (y+i*BY2PG) | PTEVALID | PTEKERNEL | PTEWRITE;
  144. /*
  145. * set up the task segment
  146. */
  147. memset(&tss, 0, sizeof(tss));
  148. taskswitch(ktoppg.pa, BY2PG + (ulong)m);
  149. puttr(TSSSEL);/**/
  150. }
  151. /*
  152. * used to map a page into 16 meg - BY2PG for confinit(). tpt is the temporary
  153. * page table set up by l.s.
  154. */
  155. long*
  156. mapaddr(ulong addr)
  157. {
  158. ulong base;
  159. ulong off;
  160. static ulong *pte, top;
  161. extern ulong tpt[];
  162. if(pte == 0){
  163. top = (((ulong)tpt)+(BY2PG-1))&~(BY2PG-1);
  164. pte = (ulong*)top;
  165. top &= ~KZERO;
  166. top += BY2PG;
  167. pte += (4*1024*1024-BY2PG)>>PGSHIFT;
  168. }
  169. base = off = addr;
  170. base &= ~(KZERO|(BY2PG-1));
  171. off &= BY2PG-1;
  172. *pte = base|PTEVALID|PTEKERNEL|PTEWRITE; /**/
  173. putcr3((ulong)top);
  174. return (long*)(KZERO | 4*1024*1024-BY2PG | off);
  175. }
  176. #define PDX(va) ((((ulong)(va))>>22) & 0x03FF)
  177. #define PTX(va) ((((ulong)(va))>>12) & 0x03FF)
  178. #define PPN(x) ((x)&~(BY2PG-1))
  179. #define KADDR(a) ((void*)((ulong)(a)|KZERO))
  180. ulong*
  181. mmuwalk(ulong* pdb, ulong va, int level, int create)
  182. {
  183. ulong pa, *table;
  184. /*
  185. * Walk the page-table pointed to by pdb and return a pointer
  186. * to the entry for virtual address va at the requested level.
  187. * If the entry is invalid and create isn't requested then bail
  188. * out early. Otherwise, for the 2nd level walk, allocate a new
  189. * page-table page and register it in the 1st level.
  190. */
  191. table = &pdb[PDX(va)];
  192. if(!(*table & PTEVALID) && create == 0)
  193. return 0;
  194. switch(level){
  195. default:
  196. return 0;
  197. case 1:
  198. return table;
  199. case 2:
  200. if(*table & PTESIZE)
  201. panic("mmuwalk2: va 0x%ux entry 0x%ux\n", va, *table);
  202. if(!(*table & PTEVALID)){
  203. pa = PADDR(ialloc(BY2PG, BY2PG));
  204. *table = pa|PTEWRITE|PTEVALID;
  205. }
  206. table = KADDR(PPN(*table));
  207. return &table[PTX(va)];
  208. }
  209. }
  210. static Lock mmukmaplock;
  211. ulong
  212. mmukmap(ulong pa, ulong va, int size)
  213. {
  214. ulong pae, *table, *pdb, pgsz, *pte, x;
  215. int pse, sync;
  216. extern int cpuidax, cpuiddx;
  217. pdb = (ulong*)ktoppg.va;
  218. if((cpuiddx & 0x08) && (getcr4() & 0x10))
  219. pse = 1;
  220. else
  221. pse = 0;
  222. sync = 0;
  223. pa = PPN(pa);
  224. if(va == 0)
  225. va = (ulong)KADDR(pa);
  226. else
  227. va = PPN(va);
  228. pae = pa + size;
  229. lock(&mmukmaplock);
  230. while(pa < pae){
  231. table = &pdb[PDX(va)];
  232. /*
  233. * Possibly already mapped.
  234. */
  235. if(*table & PTEVALID){
  236. if(*table & PTESIZE){
  237. /*
  238. * Big page. Does it fit within?
  239. * If it does, adjust pgsz so the correct end can be
  240. * returned and get out.
  241. * If not, adjust pgsz up to the next 4MB boundary
  242. * and continue.
  243. */
  244. x = PPN(*table);
  245. if(x != pa)
  246. panic("mmukmap1: pa 0x%ux entry 0x%ux\n",
  247. pa, *table);
  248. x += 4*MB;
  249. if(pae <= x){
  250. pa = pae;
  251. break;
  252. }
  253. pgsz = x - pa;
  254. pa += pgsz;
  255. va += pgsz;
  256. continue;
  257. }
  258. else{
  259. /*
  260. * Little page. Walk to the entry.
  261. * If the entry is valid, set pgsz and continue.
  262. * If not, make it so, set pgsz, sync and continue.
  263. */
  264. pte = mmuwalk(pdb, va, 2, 0);
  265. if(pte && *pte & PTEVALID){
  266. x = PPN(*pte);
  267. if(x != pa)
  268. panic("mmukmap2: pa 0x%ux entry 0x%ux\n",
  269. pa, *pte);
  270. pgsz = BY2PG;
  271. pa += pgsz;
  272. va += pgsz;
  273. sync++;
  274. continue;
  275. }
  276. }
  277. }
  278. /*
  279. * Not mapped. Check if it can be mapped using a big page -
  280. * starts on a 4MB boundary, size >= 4MB and processor can do it.
  281. * If not a big page, walk the walk, talk the talk.
  282. * Sync is set.
  283. */
  284. if(pse && (pa % (4*MB)) == 0 && (pae >= pa+4*MB)){
  285. *table = pa|PTESIZE|PTEWRITE|PTEUNCACHED|PTEVALID;
  286. pgsz = 4*MB;
  287. }
  288. else{
  289. pte = mmuwalk(pdb, va, 2, 1);
  290. *pte = pa|PTEWRITE|PTEUNCACHED|PTEVALID;
  291. pgsz = BY2PG;
  292. }
  293. pa += pgsz;
  294. va += pgsz;
  295. sync++;
  296. }
  297. unlock(&mmukmaplock);
  298. /*
  299. * If something was added
  300. * then need to sync up.
  301. */
  302. if(sync)
  303. putcr3(ktoppg.pa);
  304. return pa;
  305. }
  306. ulong
  307. upamalloc(ulong addr, int size, int align)
  308. {
  309. ulong ae;
  310. /*
  311. * Another horrible hack because
  312. * I CAN'T BE BOTHERED WITH THIS FILESERVER BEING
  313. * COMPLETELY INCOMPATIBLE ANYMORE.
  314. */
  315. if((addr < mconf.topofmem) || align)
  316. panic("upamalloc: (0x%lux < 0x%lux) || %d\n",
  317. addr, mconf.topofmem, align);
  318. ae = mmukmap(addr, 0, size);
  319. /*
  320. * Should check here that it was all delivered
  321. * and put it back and barf if not.
  322. */
  323. USED(ae);
  324. /*
  325. * Be very careful this returns a PHYSICAL address.
  326. */
  327. return addr;
  328. }