vm.c 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361
  1. #include "fwcfg.h"
  2. #include "vm.h"
  3. #include "libcflat.h"
  4. static void *free = 0;
  5. static void *vfree_top = 0;
  6. static void free_memory(void *mem, unsigned long size)
  7. {
  8. void *end;
  9. assert_msg((unsigned long) mem % PAGE_SIZE == 0,
  10. "mem not page aligned: %p", mem);
  11. assert_msg(size % PAGE_SIZE == 0, "size not page aligned: %#lx", size);
  12. assert_msg(size == 0 || mem + size > mem,
  13. "mem + size overflow: %p + %#lx", mem, size);
  14. if (size == 0) {
  15. free = NULL;
  16. return;
  17. }
  18. free = mem;
  19. end = mem + size;
  20. while (mem + PAGE_SIZE != end) {
  21. *(void **)mem = (mem + PAGE_SIZE);
  22. mem += PAGE_SIZE;
  23. }
  24. *(void **)mem = NULL;
  25. }
  26. void *alloc_page()
  27. {
  28. void *p;
  29. if (!free)
  30. return 0;
  31. p = free;
  32. free = *(void **)free;
  33. return p;
  34. }
  35. /*
  36. * Allocates (1 << order) physically contiguous and naturally aligned pages.
  37. * Returns NULL if there's no memory left.
  38. */
  39. void *alloc_pages(unsigned long order)
  40. {
  41. /* Generic list traversal. */
  42. void *prev;
  43. void *curr = NULL;
  44. void *next = free;
  45. /* Looking for a run of length (1 << order). */
  46. unsigned long run = 0;
  47. const unsigned long n = 1ul << order;
  48. const unsigned long align_mask = (n << PAGE_SHIFT) - 1;
  49. void *run_start = NULL;
  50. void *run_prev = NULL;
  51. unsigned long run_next_pa = 0;
  52. unsigned long pa;
  53. assert(order < sizeof(unsigned long) * 8);
  54. for (;;) {
  55. prev = curr;
  56. curr = next;
  57. next = curr ? *((void **) curr) : NULL;
  58. if (!curr)
  59. return 0;
  60. pa = virt_to_phys(curr);
  61. if (run == 0) {
  62. if (!(pa & align_mask)) {
  63. run_start = curr;
  64. run_prev = prev;
  65. run_next_pa = pa + PAGE_SIZE;
  66. run = 1;
  67. }
  68. } else if (pa == run_next_pa) {
  69. run_next_pa += PAGE_SIZE;
  70. run += 1;
  71. } else {
  72. run = 0;
  73. }
  74. if (run == n) {
  75. if (run_prev)
  76. *((void **) run_prev) = next;
  77. else
  78. free = next;
  79. return run_start;
  80. }
  81. }
  82. }
  83. void free_page(void *page)
  84. {
  85. *(void **)page = free;
  86. free = page;
  87. }
  88. extern char edata;
  89. static unsigned long end_of_memory;
  90. unsigned long *install_pte(unsigned long *cr3,
  91. int pte_level,
  92. void *virt,
  93. unsigned long pte,
  94. unsigned long *pt_page)
  95. {
  96. int level;
  97. unsigned long *pt = cr3;
  98. unsigned offset;
  99. for (level = PAGE_LEVEL; level > pte_level; --level) {
  100. offset = PGDIR_OFFSET((unsigned long)virt, level);
  101. if (!(pt[offset] & PT_PRESENT_MASK)) {
  102. unsigned long *new_pt = pt_page;
  103. if (!new_pt)
  104. new_pt = alloc_page();
  105. else
  106. pt_page = 0;
  107. memset(new_pt, 0, PAGE_SIZE);
  108. pt[offset] = virt_to_phys(new_pt) | PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
  109. }
  110. pt = phys_to_virt(pt[offset] & PT_ADDR_MASK);
  111. }
  112. offset = PGDIR_OFFSET((unsigned long)virt, level);
  113. pt[offset] = pte;
  114. return &pt[offset];
  115. }
  116. /*
  117. * Finds last PTE in the mapping of @virt that's at or above @lowest_level. The
  118. * returned PTE isn't necessarily present, but its parent is.
  119. */
  120. struct pte_search find_pte_level(unsigned long *cr3, void *virt,
  121. int lowest_level)
  122. {
  123. unsigned long *pt = cr3, pte;
  124. unsigned offset;
  125. unsigned long shift;
  126. struct pte_search r;
  127. assert(lowest_level >= 1 && lowest_level <= PAGE_LEVEL);
  128. for (r.level = PAGE_LEVEL;; --r.level) {
  129. shift = (r.level - 1) * PGDIR_WIDTH + 12;
  130. offset = ((unsigned long)virt >> shift) & PGDIR_MASK;
  131. r.pte = &pt[offset];
  132. pte = *r.pte;
  133. if (!(pte & PT_PRESENT_MASK))
  134. return r;
  135. if ((r.level == 2 || r.level == 3) && (pte & PT_PAGE_SIZE_MASK))
  136. return r;
  137. if (r.level == lowest_level)
  138. return r;
  139. pt = phys_to_virt(pte & 0xffffffffff000ull);
  140. }
  141. }
  142. /*
  143. * Returns the leaf PTE in the mapping of @virt (i.e., 4K PTE or a present huge
  144. * PTE). Returns NULL if no leaf PTE exists.
  145. */
  146. unsigned long *get_pte(unsigned long *cr3, void *virt)
  147. {
  148. struct pte_search search;
  149. search = find_pte_level(cr3, virt, 1);
  150. return found_leaf_pte(search) ? search.pte : NULL;
  151. }
  152. /*
  153. * Returns the PTE in the mapping of @virt at the given level @pte_level.
  154. * Returns NULL if the PT at @pte_level isn't present (i.e., the mapping at
  155. * @pte_level - 1 isn't present).
  156. */
  157. unsigned long *get_pte_level(unsigned long *cr3, void *virt, int pte_level)
  158. {
  159. struct pte_search search;
  160. search = find_pte_level(cr3, virt, pte_level);
  161. return search.level == pte_level ? search.pte : NULL;
  162. }
  163. unsigned long *install_large_page(unsigned long *cr3,
  164. unsigned long phys,
  165. void *virt)
  166. {
  167. return install_pte(cr3, 2, virt,
  168. phys | PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK | PT_PAGE_SIZE_MASK, 0);
  169. }
  170. unsigned long *install_page(unsigned long *cr3,
  171. unsigned long phys,
  172. void *virt)
  173. {
  174. return install_pte(cr3, 1, virt, phys | PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK, 0);
  175. }
  176. void install_pages(unsigned long *cr3, unsigned long phys, unsigned long len,
  177. void *virt)
  178. {
  179. unsigned long max = (u64)len + (u64)phys;
  180. assert(phys % PAGE_SIZE == 0);
  181. assert((unsigned long) virt % PAGE_SIZE == 0);
  182. assert(len % PAGE_SIZE == 0);
  183. while (phys + PAGE_SIZE <= max) {
  184. install_page(cr3, phys, virt);
  185. phys += PAGE_SIZE;
  186. virt = (char *) virt + PAGE_SIZE;
  187. }
  188. }
  189. bool any_present_pages(unsigned long *cr3, void *virt, unsigned long len)
  190. {
  191. unsigned long max = (unsigned long) virt + len;
  192. unsigned long curr;
  193. for (curr = (unsigned long) virt; curr < max; curr += PAGE_SIZE) {
  194. unsigned long *ptep = get_pte(cr3, (void *) curr);
  195. if (ptep && (*ptep & PT_PRESENT_MASK))
  196. return true;
  197. }
  198. return false;
  199. }
  200. static void setup_mmu_range(unsigned long *cr3, unsigned long start,
  201. unsigned long len)
  202. {
  203. u64 max = (u64)len + (u64)start;
  204. u64 phys = start;
  205. while (phys + LARGE_PAGE_SIZE <= max) {
  206. install_large_page(cr3, phys, (void *)(ulong)phys);
  207. phys += LARGE_PAGE_SIZE;
  208. }
  209. install_pages(cr3, phys, max - phys, (void *)(ulong)phys);
  210. }
  211. static void setup_mmu(unsigned long len)
  212. {
  213. unsigned long *cr3 = alloc_page();
  214. memset(cr3, 0, PAGE_SIZE);
  215. #ifdef __x86_64__
  216. if (len < (1ul << 32))
  217. len = (1ul << 32); /* map mmio 1:1 */
  218. setup_mmu_range(cr3, 0, len);
  219. #else
  220. if (len > (1ul << 31))
  221. len = (1ul << 31);
  222. /* 0 - 2G memory, 2G-3G valloc area, 3G-4G mmio */
  223. setup_mmu_range(cr3, 0, len);
  224. setup_mmu_range(cr3, 3ul << 30, (1ul << 30));
  225. vfree_top = (void*)(3ul << 30);
  226. #endif
  227. write_cr3(virt_to_phys(cr3));
  228. #ifndef __x86_64__
  229. write_cr4(X86_CR4_PSE);
  230. #endif
  231. write_cr0(X86_CR0_PG |X86_CR0_PE | X86_CR0_WP);
  232. printf("paging enabled\n");
  233. printf("cr0 = %lx\n", read_cr0());
  234. printf("cr3 = %lx\n", read_cr3());
  235. printf("cr4 = %lx\n", read_cr4());
  236. }
  237. void setup_vm()
  238. {
  239. assert(!end_of_memory);
  240. end_of_memory = fwcfg_get_u64(FW_CFG_RAM_SIZE);
  241. free_memory(&edata, end_of_memory - (unsigned long)&edata);
  242. setup_mmu(end_of_memory);
  243. }
  244. void *vmalloc(unsigned long size)
  245. {
  246. void *mem, *p;
  247. unsigned pages;
  248. size += sizeof(unsigned long);
  249. size = (size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
  250. vfree_top -= size;
  251. mem = p = vfree_top;
  252. pages = size / PAGE_SIZE;
  253. while (pages--) {
  254. install_page(phys_to_virt(read_cr3()), virt_to_phys(alloc_page()), p);
  255. p += PAGE_SIZE;
  256. }
  257. *(unsigned long *)mem = size;
  258. mem += sizeof(unsigned long);
  259. return mem;
  260. }
  261. uint64_t virt_to_phys_cr3(void *mem)
  262. {
  263. return (*get_pte(phys_to_virt(read_cr3()), mem) & PT_ADDR_MASK) + ((ulong)mem & (PAGE_SIZE - 1));
  264. }
  265. void vfree(void *mem)
  266. {
  267. unsigned long size = ((unsigned long *)mem)[-1];
  268. while (size) {
  269. free_page(phys_to_virt(*get_pte(phys_to_virt(read_cr3()), mem) & PT_ADDR_MASK));
  270. mem += PAGE_SIZE;
  271. size -= PAGE_SIZE;
  272. }
  273. }
  274. void *vmap(unsigned long long phys, unsigned long size)
  275. {
  276. void *mem, *p;
  277. unsigned pages;
  278. size = (size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
  279. vfree_top -= size;
  280. phys &= ~(unsigned long long)(PAGE_SIZE - 1);
  281. mem = p = vfree_top;
  282. pages = size / PAGE_SIZE;
  283. while (pages--) {
  284. install_page(phys_to_virt(read_cr3()), phys, p);
  285. phys += PAGE_SIZE;
  286. p += PAGE_SIZE;
  287. }
  288. return mem;
  289. }
  290. void *alloc_vpages(ulong nr)
  291. {
  292. vfree_top -= PAGE_SIZE * nr;
  293. return vfree_top;
  294. }
  295. void *alloc_vpage(void)
  296. {
  297. return alloc_vpages(1);
  298. }