segment.c 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include "u.h"
  10. #include "../port/lib.h"
  11. #include "mem.h"
  12. #include "dat.h"
  13. #include "fns.h"
  14. #include "../port/error.h"
  15. /* Segment type from portdat.h */
  16. char *segtypes[SG_TYPE] = {
  17. [SG_BAD0] = "Bad0",
  18. [SG_TEXT] = "Text",
  19. [SG_DATA] = "Data",
  20. [SG_BSS] = "Bss",
  21. [SG_STACK] = "Stack",
  22. [SG_SHARED] = "Shared",
  23. [SG_PHYSICAL] = "Phys",
  24. [SG_LOAD] = "Load"};
  25. u64
  26. segppn(Segment *s, u64 pa)
  27. {
  28. u64 pgsz;
  29. pgsz = sys->pgsz[s->pgszi];
  30. pa &= ~(pgsz - 1);
  31. return pa;
  32. }
  33. /*
  34. * Sizes are given in multiples of BIGPGSZ.
  35. * The actual page size used is either BIGPGSZ or 1*GiB
  36. * 1G is disabled for now. RGM.
  37. * if base is aligned to 1G and size is >= 1G and we support 1G pages.
  38. */
  39. Segment *
  40. newseg(int type, usize base, u64 size)
  41. {
  42. Segment *s;
  43. int mapsize;
  44. u32 pgsz;
  45. if(size > SEGMAPSIZE * (PTEMAPMEM / BIGPGSZ))
  46. error(Enovmem);
  47. pgsz = BIGPGSZ;
  48. if(0) // TODO: re enable this on a per-process basis via a write to /proc/pid/ctl.
  49. if(size * BIGPGSZ >= 1 * GiB && getpgszi(1 * GiB) >= 0 &&
  50. (base & (1ULL * GiB - 1)) == 0 && ((size * BIGPGSZ) & (1ULL * GiB - 1)) == 0){
  51. DBG("newseg: using 1G pages\n");
  52. pgsz = 1 * GiB;
  53. }
  54. s = smalloc(sizeof(Segment));
  55. s->r.ref = 1;
  56. s->type = type;
  57. s->base = base;
  58. s->ptepertab = PTEMAPMEM / pgsz;
  59. s->top = base + (size * BIGPGSZ);
  60. s->size = size;
  61. s->pgszi = getpgszi(pgsz);
  62. if(s->pgszi < 0)
  63. panic("newseg: getpgszi %d", pgsz);
  64. s->sema.prev = &s->sema;
  65. s->sema.next = &s->sema;
  66. s->color = NOCOLOR;
  67. mapsize = HOWMANY(size * BIGPGSZ / pgsz, s->ptepertab);
  68. if(mapsize > nelem(s->ssegmap)){
  69. mapsize *= 2;
  70. if(mapsize > (SEGMAPSIZE * s->ptepertab))
  71. mapsize = (SEGMAPSIZE * s->ptepertab);
  72. s->map = smalloc(mapsize * sizeof(Pte *));
  73. s->mapsize = mapsize;
  74. } else {
  75. s->map = s->ssegmap;
  76. s->mapsize = nelem(s->ssegmap);
  77. }
  78. return s;
  79. }
  80. void
  81. putseg(Segment *s)
  82. {
  83. Pte **pp, **emap;
  84. Image *i;
  85. extern void freezseg(Segment *);
  86. if(s == 0)
  87. return;
  88. i = s->image;
  89. if(i != 0){
  90. lock(&i->r.l);
  91. lock(&s->r.l);
  92. if(i->s == s && s->r.ref == 1)
  93. i->s = 0;
  94. unlock(&i->r.l);
  95. } else
  96. lock(&s->r.l);
  97. s->r.ref--;
  98. if(s->r.ref != 0){
  99. unlock(&s->r.l);
  100. return;
  101. }
  102. unlock(&s->r.l);
  103. qlock(&s->lk);
  104. if(i)
  105. putimage(i);
  106. emap = &s->map[s->mapsize];
  107. for(pp = s->map; pp < emap; pp++)
  108. if(*pp)
  109. freepte(s, *pp);
  110. qunlock(&s->lk);
  111. if(s->map != s->ssegmap)
  112. free(s->map);
  113. if(s->profile != 0)
  114. free(s->profile);
  115. if(s->type & SG_ZIO)
  116. freezseg(s);
  117. free(s);
  118. }
  119. void
  120. relocateseg(Segment *s, usize offset)
  121. {
  122. Page **pg, *x;
  123. Pte *pte, **p, **endpte;
  124. endpte = &s->map[s->mapsize];
  125. for(p = s->map; p < endpte; p++){
  126. if(*p == 0)
  127. continue;
  128. pte = *p;
  129. for(pg = pte->first; pg <= pte->last; pg++){
  130. if((x = *pg) != nil)
  131. x->va += offset;
  132. }
  133. }
  134. }
  135. Segment *
  136. dupseg(Segment **seg, int segno, int share)
  137. {
  138. Proc *up = externup();
  139. int i, size;
  140. Pte *pte;
  141. Segment *n, *s;
  142. SET(n);
  143. s = seg[segno];
  144. qlock(&s->lk);
  145. if(waserror()){
  146. qunlock(&s->lk);
  147. nexterror();
  148. }
  149. switch(s->type & SG_TYPE){
  150. case SG_TEXT: /* New segment shares pte set */
  151. case SG_SHARED:
  152. case SG_PHYSICAL:
  153. case SG_MMAP:
  154. goto sameseg;
  155. case SG_STACK:
  156. n = newseg(s->type, s->base, s->size);
  157. break;
  158. case SG_BSS: /* Just copy on write */
  159. if(share)
  160. goto sameseg;
  161. n = newseg(s->type, s->base, s->size);
  162. break;
  163. case SG_LOAD:
  164. if((s->type & SG_EXEC) != 0 && (s->type & SG_WRITE) == 0)
  165. goto sameseg;
  166. case SG_DATA: /* Copy on write plus demand load info */
  167. if((s->type & SG_EXEC) != 0){
  168. poperror();
  169. qunlock(&s->lk);
  170. return data2txt(s);
  171. }
  172. if(share)
  173. goto sameseg;
  174. n = newseg(s->type, s->base, s->size);
  175. incref(&s->image->r);
  176. n->image = s->image;
  177. n->ldseg = s->ldseg;
  178. n->pgszi = s->pgszi;
  179. n->color = s->color;
  180. n->ptepertab = s->ptepertab;
  181. break;
  182. }
  183. size = s->mapsize;
  184. for(i = 0; i < size; i++)
  185. if((pte = s->map[i]) != nil)
  186. n->map[i] = ptecpy(n, pte);
  187. n->flushme = s->flushme;
  188. if(s->r.ref > 1)
  189. procflushseg(s);
  190. poperror();
  191. qunlock(&s->lk);
  192. return n;
  193. sameseg:
  194. incref(&s->r);
  195. poperror();
  196. qunlock(&s->lk);
  197. return s;
  198. }
  199. void
  200. segpage(Segment *s, Page *p)
  201. {
  202. Pte **pte;
  203. usize soff;
  204. u64 pgsz;
  205. Page **pg;
  206. if(s->pgszi < 0)
  207. s->pgszi = p->pgszi;
  208. if(s->color == NOCOLOR)
  209. s->color = p->color;
  210. if(s->pgszi != p->pgszi){
  211. iprint("s %p s->pgszi %d p %p p->pgszi %p\n", s, s->pgszi, p, p->pgszi);
  212. panic("segpage: s->pgszi != p->pgszi");
  213. }
  214. if(p->va < s->base || p->va >= s->top)
  215. panic("segpage: p->va < s->base || p->va >= s->top");
  216. soff = p->va - s->base;
  217. pte = &s->map[soff / PTEMAPMEM];
  218. if(*pte == 0)
  219. *pte = ptealloc(s);
  220. pgsz = sys->pgsz[s->pgszi];
  221. pg = &(*pte)->pages[(soff & (PTEMAPMEM - 1)) / pgsz];
  222. *pg = p;
  223. if(pg < (*pte)->first)
  224. (*pte)->first = pg;
  225. if(pg > (*pte)->last)
  226. (*pte)->last = pg;
  227. }
  228. /*
  229. * called with s->lk locked
  230. */
  231. void
  232. mfreeseg(Segment *s, usize start, int pages)
  233. {
  234. int i, j, size;
  235. usize soff;
  236. u64 pgsz;
  237. Page *pg;
  238. Page *list;
  239. pgsz = sys->pgsz[s->pgszi];
  240. soff = start - s->base;
  241. j = (soff & (PTEMAPMEM - 1)) / pgsz;
  242. size = s->mapsize;
  243. list = nil;
  244. for(i = soff / PTEMAPMEM; i < size; i++){
  245. if(pages <= 0)
  246. break;
  247. if(s->map[i] == 0){
  248. pages -= s->ptepertab - j;
  249. j = 0;
  250. continue;
  251. }
  252. while(j < s->ptepertab){
  253. pg = s->map[i]->pages[j];
  254. /*
  255. * We want to zero s->map[i]->page[j] and putpage(pg),
  256. * but we have to make sure other processors flush the
  257. * entry from their TLBs before the page is freed.
  258. * We construct a list of the pages to be freed, zero
  259. * the entries, then (below) call procflushseg, and call
  260. * putpage on the whole list.
  261. *
  262. * Swapped-out pages don't appear in TLBs, so it's okay
  263. * to putswap those pages before procflushseg.
  264. */
  265. if(pg){
  266. if(onswap(pg))
  267. putswap(pg);
  268. else {
  269. pg->next = list;
  270. list = pg;
  271. }
  272. s->map[i]->pages[j] = 0;
  273. }
  274. if(--pages == 0)
  275. goto out;
  276. j++;
  277. }
  278. j = 0;
  279. }
  280. out:
  281. /* flush this seg in all other processes */
  282. if(s->r.ref > 1)
  283. procflushseg(s);
  284. /* free the pages */
  285. for(pg = list; pg != nil; pg = list){
  286. list = list->next;
  287. putpage(pg);
  288. }
  289. }
  290. Segment *
  291. isoverlap(Proc *p, usize va, usize len)
  292. {
  293. int i;
  294. Segment *ns;
  295. usize newtop;
  296. newtop = va + len;
  297. for(i = 0; i < NSEG; i++){
  298. ns = p->seg[i];
  299. if(ns == 0)
  300. continue;
  301. if((newtop > ns->base && newtop <= ns->top) ||
  302. (va >= ns->base && va < ns->top))
  303. return ns;
  304. }
  305. return nil;
  306. }
  307. void
  308. segclock(usize pc)
  309. {
  310. Proc *up = externup();
  311. Segment *s;
  312. int sno;
  313. for(sno = 0; sno < NSEG; sno++){
  314. s = up->seg[sno];
  315. if(s == nil)
  316. continue;
  317. if((s->type & SG_EXEC) == 0 || s->profile == 0)
  318. continue;
  319. s->profile[0] += TK2MS(1);
  320. if(pc >= s->base && pc < s->top){
  321. pc -= s->base;
  322. s->profile[pc >> LRESPROF] += TK2MS(1);
  323. }
  324. }
  325. }
  326. static void
  327. prepageseg(int i)
  328. {
  329. Proc *up = externup();
  330. Segment *s;
  331. usize addr, pgsz;
  332. s = up->seg[i];
  333. if(s == nil)
  334. return;
  335. DBG("prepage: base %#p top %#p\n", s->base, s->top);
  336. pgsz = sys->pgsz[s->pgszi];
  337. for(addr = s->base; addr < s->top; addr += pgsz)
  338. fault(addr, -1, (s->type & SG_WRITE) ? FT_WRITE : FT_READ);
  339. }
  340. /*
  341. * BUG: should depend only in segment attributes, not in
  342. * the slot used in up->seg.
  343. */
  344. void
  345. nixprepage(int i)
  346. {
  347. if(i >= 0)
  348. prepageseg(i);
  349. else
  350. for(i = 0; i < NSEG; i++)
  351. prepageseg(i);
  352. }