segment.c 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include "u.h"
  10. #include "../port/lib.h"
  11. #include "mem.h"
  12. #include "dat.h"
  13. #include "fns.h"
  14. #include "../port/error.h"
  15. /* Segment type from portdat.h */
  16. char *segtypes[SG_TYPE]={
  17. [SG_BAD0] = "Bad0",
  18. [SG_TEXT] = "Text",
  19. [SG_DATA] = "Data",
  20. [SG_BSS] = "Bss",
  21. [SG_STACK] = "Stack",
  22. [SG_SHARED] = "Shared",
  23. [SG_PHYSICAL] = "Phys",
  24. [SG_LOAD] = "Load"
  25. };
  26. uintmem
  27. segppn(Segment *s, uintmem pa)
  28. {
  29. uintmem pgsz;
  30. pgsz = sys->pgsz[s->pgszi];
  31. pa &= ~(pgsz-1);
  32. return pa;
  33. }
  34. /*
  35. * Sizes are given in multiples of BIGPGSZ.
  36. * The actual page size used is either BIGPGSZ or 1*GiB
  37. * 1G is disabled for now. RGM.
  38. * if base is aligned to 1G and size is >= 1G and we support 1G pages.
  39. */
  40. Segment *
  41. newseg(int type, uintptr_t base, uint64_t size)
  42. {
  43. Segment *s;
  44. int mapsize;
  45. uint pgsz;
  46. if(size > SEGMAPSIZE*(PTEMAPMEM/BIGPGSZ))
  47. error(Enovmem);
  48. pgsz = BIGPGSZ;
  49. if (0) // TODO: re enable this on a per-process basis via a write to /proc/pid/ctl.
  50. if(size*BIGPGSZ >= 1*GiB && getpgszi(1*GiB) >= 0 &&
  51. (base&(1ULL*GiB-1)) == 0 && ((size*BIGPGSZ)&(1ULL*GiB-1)) == 0){
  52. DBG("newseg: using 1G pages\n");
  53. pgsz = 1*GiB;
  54. }
  55. s = smalloc(sizeof(Segment));
  56. s->r.ref = 1;
  57. s->type = type;
  58. s->base = base;
  59. s->ptepertab = PTEMAPMEM/pgsz;
  60. s->top = base+(size*BIGPGSZ);
  61. s->size = size;
  62. s->pgszi = getpgszi(pgsz);
  63. if(s->pgszi < 0)
  64. panic("newseg: getpgszi %d", pgsz);
  65. s->sema.prev = &s->sema;
  66. s->sema.next = &s->sema;
  67. s->color = NOCOLOR;
  68. mapsize = HOWMANY(size*BIGPGSZ/pgsz, s->ptepertab);
  69. if(mapsize > nelem(s->ssegmap)){
  70. mapsize *= 2;
  71. if(mapsize > (SEGMAPSIZE*s->ptepertab))
  72. mapsize = (SEGMAPSIZE*s->ptepertab);
  73. s->map = smalloc(mapsize*sizeof(Pte*));
  74. s->mapsize = mapsize;
  75. }
  76. else{
  77. s->map = s->ssegmap;
  78. s->mapsize = nelem(s->ssegmap);
  79. }
  80. return s;
  81. }
  82. void
  83. putseg(Segment *s)
  84. {
  85. Pte **pp, **emap;
  86. Image *i;
  87. extern void freezseg(Segment*);
  88. if(s == 0)
  89. return;
  90. i = s->image;
  91. if(i != 0) {
  92. lock(&i->r.l);
  93. lock(&s->r.l);
  94. if(i->s == s && s->r.ref == 1)
  95. i->s = 0;
  96. unlock(&i->r.l);
  97. }
  98. else
  99. lock(&s->r.l);
  100. s->r.ref--;
  101. if(s->r.ref != 0) {
  102. unlock(&s->r.l);
  103. return;
  104. }
  105. unlock(&s->r.l);
  106. qlock(&s->lk);
  107. if(i)
  108. putimage(i);
  109. emap = &s->map[s->mapsize];
  110. for(pp = s->map; pp < emap; pp++)
  111. if(*pp)
  112. freepte(s, *pp);
  113. qunlock(&s->lk);
  114. if(s->map != s->ssegmap)
  115. free(s->map);
  116. if(s->profile != 0)
  117. free(s->profile);
  118. if(s->type&SG_ZIO)
  119. freezseg(s);
  120. free(s);
  121. }
  122. void
  123. relocateseg(Segment *s, uintptr_t offset)
  124. {
  125. Page **pg, *x;
  126. Pte *pte, **p, **endpte;
  127. endpte = &s->map[s->mapsize];
  128. for(p = s->map; p < endpte; p++) {
  129. if(*p == 0)
  130. continue;
  131. pte = *p;
  132. for(pg = pte->first; pg <= pte->last; pg++) {
  133. if(x = *pg)
  134. x->va += offset;
  135. }
  136. }
  137. }
  138. Segment*
  139. dupseg(Segment **seg, int segno, int share)
  140. {
  141. Proc *up = externup();
  142. int i, size;
  143. Pte *pte;
  144. Segment *n, *s;
  145. SET(n);
  146. s = seg[segno];
  147. qlock(&s->lk);
  148. if(waserror()){
  149. qunlock(&s->lk);
  150. nexterror();
  151. }
  152. switch(s->type&SG_TYPE) {
  153. case SG_TEXT: /* New segment shares pte set */
  154. case SG_SHARED:
  155. case SG_PHYSICAL:
  156. case SG_MMAP:
  157. goto sameseg;
  158. case SG_STACK:
  159. n = newseg(s->type, s->base, s->size);
  160. break;
  161. case SG_BSS: /* Just copy on write */
  162. if(share)
  163. goto sameseg;
  164. n = newseg(s->type, s->base, s->size);
  165. break;
  166. case SG_LOAD:
  167. if((s->type & SG_EXEC) != 0 && (s->type & SG_WRITE) == 0)
  168. goto sameseg;
  169. case SG_DATA: /* Copy on write plus demand load info */
  170. if((s->type & SG_EXEC) != 0){
  171. poperror();
  172. qunlock(&s->lk);
  173. return data2txt(s);
  174. }
  175. if(share)
  176. goto sameseg;
  177. n = newseg(s->type, s->base, s->size);
  178. incref(&s->image->r);
  179. n->image = s->image;
  180. n->ldseg = s->ldseg;
  181. n->pgszi = s->pgszi;
  182. n->color = s->color;
  183. n->ptepertab = s->ptepertab;
  184. break;
  185. }
  186. size = s->mapsize;
  187. for(i = 0; i < size; i++)
  188. if(pte = s->map[i])
  189. n->map[i] = ptecpy(n, pte);
  190. n->flushme = s->flushme;
  191. if(s->r.ref > 1)
  192. procflushseg(s);
  193. poperror();
  194. qunlock(&s->lk);
  195. return n;
  196. sameseg:
  197. incref(&s->r);
  198. poperror();
  199. qunlock(&s->lk);
  200. return s;
  201. }
  202. void
  203. segpage(Segment *s, Page *p)
  204. {
  205. Pte **pte;
  206. uintptr_t soff;
  207. uintmem pgsz;
  208. Page **pg;
  209. if(s->pgszi < 0)
  210. s->pgszi = p->pgszi;
  211. if(s->color == NOCOLOR)
  212. s->color = p->color;
  213. if(s->pgszi != p->pgszi) {
  214. iprint("s %p s->pgszi %d p %p p->pgszi %p\n", s, s->pgszi, p, p->pgszi);
  215. panic("segpage: s->pgszi != p->pgszi");
  216. }
  217. if(p->va < s->base || p->va >= s->top)
  218. panic("segpage: p->va < s->base || p->va >= s->top");
  219. soff = p->va - s->base;
  220. pte = &s->map[soff/PTEMAPMEM];
  221. if(*pte == 0)
  222. *pte = ptealloc(s);
  223. pgsz = sys->pgsz[s->pgszi];
  224. pg = &(*pte)->pages[(soff&(PTEMAPMEM-1))/pgsz];
  225. *pg = p;
  226. if(pg < (*pte)->first)
  227. (*pte)->first = pg;
  228. if(pg > (*pte)->last)
  229. (*pte)->last = pg;
  230. }
  231. /*
  232. * called with s->lk locked
  233. */
  234. void
  235. mfreeseg(Segment *s, uintptr_t start, int pages)
  236. {
  237. int i, j, size;
  238. uintptr_t soff;
  239. uintmem pgsz;
  240. Page *pg;
  241. Page *list;
  242. pgsz = sys->pgsz[s->pgszi];
  243. soff = start-s->base;
  244. j = (soff&(PTEMAPMEM-1))/pgsz;
  245. size = s->mapsize;
  246. list = nil;
  247. for(i = soff/PTEMAPMEM; i < size; i++) {
  248. if(pages <= 0)
  249. break;
  250. if(s->map[i] == 0) {
  251. pages -= s->ptepertab-j;
  252. j = 0;
  253. continue;
  254. }
  255. while(j < s->ptepertab) {
  256. pg = s->map[i]->pages[j];
  257. /*
  258. * We want to zero s->map[i]->page[j] and putpage(pg),
  259. * but we have to make sure other processors flush the
  260. * entry from their TLBs before the page is freed.
  261. * We construct a list of the pages to be freed, zero
  262. * the entries, then (below) call procflushseg, and call
  263. * putpage on the whole list.
  264. *
  265. * Swapped-out pages don't appear in TLBs, so it's okay
  266. * to putswap those pages before procflushseg.
  267. */
  268. if(pg){
  269. if(onswap(pg))
  270. putswap(pg);
  271. else{
  272. pg->next = list;
  273. list = pg;
  274. }
  275. s->map[i]->pages[j] = 0;
  276. }
  277. if(--pages == 0)
  278. goto out;
  279. j++;
  280. }
  281. j = 0;
  282. }
  283. out:
  284. /* flush this seg in all other processes */
  285. if(s->r.ref > 1)
  286. procflushseg(s);
  287. /* free the pages */
  288. for(pg = list; pg != nil; pg = list){
  289. list = list->next;
  290. putpage(pg);
  291. }
  292. }
  293. Segment*
  294. isoverlap(Proc* p, uintptr_t va, usize len)
  295. {
  296. int i;
  297. Segment *ns;
  298. uintptr_t newtop;
  299. newtop = va+len;
  300. for(i = 0; i < NSEG; i++) {
  301. ns = p->seg[i];
  302. if(ns == 0)
  303. continue;
  304. if((newtop > ns->base && newtop <= ns->top) ||
  305. (va >= ns->base && va < ns->top))
  306. return ns;
  307. }
  308. return nil;
  309. }
  310. void
  311. segclock(uintptr_t pc)
  312. {
  313. Proc *up = externup();
  314. Segment *s;
  315. int sno;
  316. for(sno = 0; sno < NSEG; sno++){
  317. s = up->seg[sno];
  318. if(s == nil)
  319. continue;
  320. if((s->type & SG_EXEC) == 0 || s->profile == 0)
  321. continue;
  322. s->profile[0] += TK2MS(1);
  323. if(pc >= s->base && pc < s->top) {
  324. pc -= s->base;
  325. s->profile[pc>>LRESPROF] += TK2MS(1);
  326. }
  327. }
  328. }
  329. static void
  330. prepageseg(int i)
  331. {
  332. Proc *up = externup();
  333. Segment *s;
  334. uintptr_t addr, pgsz;
  335. s = up->seg[i];
  336. if(s == nil)
  337. return;
  338. DBG("prepage: base %#p top %#p\n", s->base, s->top);
  339. pgsz = sys->pgsz[s->pgszi];
  340. for(addr = s->base; addr < s->top; addr += pgsz)
  341. fault(addr, -1, (s->type & SG_WRITE) ? FT_WRITE : FT_READ);
  342. }
  343. /*
  344. * BUG: should depend only in segment attributes, not in
  345. * the slot used in up->seg.
  346. */
  347. void
  348. nixprepage(int i)
  349. {
  350. if(i >= 0)
  351. prepageseg(i);
  352. else
  353. for(i = 0; i < NSEG; i++)
  354. prepageseg(i);
  355. }