1
0

segment.c 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include "u.h"
  10. #include "../port/lib.h"
  11. #include "mem.h"
  12. #include "dat.h"
  13. #include "fns.h"
  14. #include "../port/error.h"
  15. /* Segment type from portdat.h */
  16. char *segtypes[SG_TYPE]={
  17. [SG_BAD0] "Bad0",
  18. [SG_TEXT] "Text",
  19. [SG_DATA] "Data",
  20. [SG_BSS] "Bss",
  21. [SG_STACK] "Stack",
  22. [SG_SHARED] "Shared",
  23. [SG_PHYSICAL] "Phys",
  24. [SG_LOAD] "Load"
  25. };
  26. uintmem
  27. segppn(Segment *s, uintmem pa)
  28. {
  29. uintmem pgsz;
  30. pgsz = sys->pgsz[s->pgszi];
  31. pa &= ~(pgsz-1);
  32. return pa;
  33. }
  34. /*
  35. * Sizes are given in multiples of BIGPGSZ.
  36. * The actual page size used is either BIGPGSZ or 1*GiB
  37. * 1G is disabled for now. RGM.
  38. * if base is aligned to 1G and size is >= 1G and we support 1G pages.
  39. */
  40. Segment *
  41. newseg(int type, uintptr_t base, uint64_t size)
  42. {
  43. Segment *s;
  44. int mapsize;
  45. uint pgsz;
  46. if(size > SEGMAPSIZE*(PTEMAPMEM/BIGPGSZ))
  47. error(Enovmem);
  48. pgsz = BIGPGSZ;
  49. if (0) // TODO: re enable this on a per-process basis via a write to /proc/pid/ctl.
  50. if(size*BIGPGSZ >= 1*GiB && getpgszi(1*GiB) >= 0 &&
  51. (base&(1ULL*GiB-1)) == 0 && ((size*BIGPGSZ)&(1ULL*GiB-1)) == 0){
  52. DBG("newseg: using 1G pages\n");
  53. pgsz = 1*GiB;
  54. }
  55. s = smalloc(sizeof(Segment));
  56. s->ref = 1;
  57. s->type = type;
  58. s->base = base;
  59. s->ptepertab = PTEMAPMEM/pgsz;
  60. s->top = base+(size*BIGPGSZ);
  61. s->size = size;
  62. s->pgszi = getpgszi(pgsz);
  63. if(s->pgszi < 0)
  64. panic("newseg: getpgszi %d", pgsz);
  65. s->sema.prev = &s->sema;
  66. s->sema.next = &s->sema;
  67. s->color = NOCOLOR;
  68. mapsize = HOWMANY(size*BIGPGSZ/pgsz, s->ptepertab);
  69. if(mapsize > nelem(s->ssegmap)){
  70. mapsize *= 2;
  71. if(mapsize > (SEGMAPSIZE*s->ptepertab))
  72. mapsize = (SEGMAPSIZE*s->ptepertab);
  73. s->map = smalloc(mapsize*sizeof(Pte*));
  74. s->mapsize = mapsize;
  75. }
  76. else{
  77. s->map = s->ssegmap;
  78. s->mapsize = nelem(s->ssegmap);
  79. }
  80. return s;
  81. }
  82. #define NHASH 101
  83. #define SHASH(np) (PTR2UINT(np)%NHASH)
  84. Sem*
  85. segmksem(Segment *sg, int *np)
  86. {
  87. Sem *s, **l;
  88. qlock(&sg->lk);
  89. if(sg->sems.s == nil)
  90. sg->sems.s = mallocz(NHASH * sizeof(Sem*), 1);
  91. for(l = &sg->sems.s[SHASH(np)]; (s = *l) != nil; l = &s->next)
  92. if(s->np == np){
  93. qunlock(&sg->lk);
  94. return s;
  95. }
  96. s = mallocz(sizeof *s, 1);
  97. s->np = np;
  98. *l = s;
  99. qunlock(&sg->lk);
  100. return s;
  101. }
  102. void
  103. putseg(Segment *s)
  104. {
  105. Pte **pp, **emap;
  106. Image *i;
  107. extern void freezseg(Segment*);
  108. if(s == 0)
  109. return;
  110. i = s->image;
  111. if(i != 0) {
  112. lock(i);
  113. lock(s);
  114. if(i->s == s && s->ref == 1)
  115. i->s = 0;
  116. unlock(i);
  117. }
  118. else
  119. lock(s);
  120. s->ref--;
  121. if(s->ref != 0) {
  122. unlock(s);
  123. return;
  124. }
  125. unlock(s);
  126. qlock(&s->lk);
  127. if(i)
  128. putimage(i);
  129. emap = &s->map[s->mapsize];
  130. for(pp = s->map; pp < emap; pp++)
  131. if(*pp)
  132. freepte(s, *pp);
  133. qunlock(&s->lk);
  134. if(s->map != s->ssegmap)
  135. free(s->map);
  136. if(s->profile != 0)
  137. free(s->profile);
  138. if(s->sems.s != nil)
  139. free(s->sems.s);
  140. if(s->type&SG_ZIO)
  141. freezseg(s);
  142. free(s);
  143. }
  144. void
  145. relocateseg(Segment *s, uintptr_t offset)
  146. {
  147. Page **pg, *x;
  148. Pte *pte, **p, **endpte;
  149. endpte = &s->map[s->mapsize];
  150. for(p = s->map; p < endpte; p++) {
  151. if(*p == 0)
  152. continue;
  153. pte = *p;
  154. for(pg = pte->first; pg <= pte->last; pg++) {
  155. if(x = *pg)
  156. x->va += offset;
  157. }
  158. }
  159. }
  160. Segment*
  161. dupseg(Segment **seg, int segno, int share)
  162. {
  163. Proc *up = externup();
  164. int i, size;
  165. Pte *pte;
  166. Segment *n, *s;
  167. SET(n);
  168. s = seg[segno];
  169. qlock(&s->lk);
  170. if(waserror()){
  171. qunlock(&s->lk);
  172. nexterror();
  173. }
  174. switch(s->type&SG_TYPE) {
  175. case SG_TEXT: /* New segment shares pte set */
  176. case SG_SHARED:
  177. case SG_PHYSICAL:
  178. case SG_MMAP:
  179. goto sameseg;
  180. case SG_STACK:
  181. n = newseg(s->type, s->base, s->size);
  182. break;
  183. case SG_BSS: /* Just copy on write */
  184. if(share)
  185. goto sameseg;
  186. n = newseg(s->type, s->base, s->size);
  187. break;
  188. case SG_LOAD:
  189. if((s->type & SG_EXEC) != 0 && (s->type & SG_WRITE) == 0)
  190. goto sameseg;
  191. case SG_DATA: /* Copy on write plus demand load info */
  192. if((s->type & SG_EXEC) != 0){
  193. poperror();
  194. qunlock(&s->lk);
  195. return data2txt(s);
  196. }
  197. if(share)
  198. goto sameseg;
  199. n = newseg(s->type, s->base, s->size);
  200. incref(s->image);
  201. n->image = s->image;
  202. n->ldseg = s->ldseg;
  203. n->pgszi = s->pgszi;
  204. n->color = s->color;
  205. n->ptepertab = s->ptepertab;
  206. break;
  207. }
  208. size = s->mapsize;
  209. for(i = 0; i < size; i++)
  210. if(pte = s->map[i])
  211. n->map[i] = ptecpy(n, pte);
  212. n->flushme = s->flushme;
  213. if(s->ref > 1)
  214. procflushseg(s);
  215. poperror();
  216. qunlock(&s->lk);
  217. return n;
  218. sameseg:
  219. incref(s);
  220. poperror();
  221. qunlock(&s->lk);
  222. return s;
  223. }
  224. void
  225. segpage(Segment *s, Page *p)
  226. {
  227. Pte **pte;
  228. uintptr_t soff;
  229. uintmem pgsz;
  230. Page **pg;
  231. if(s->pgszi < 0)
  232. s->pgszi = p->pgszi;
  233. if(s->color == NOCOLOR)
  234. s->color = p->color;
  235. if(s->pgszi != p->pgszi) {
  236. iprint("s %p s->pgszi %d p %p p->pgszi %p\n", s, s->pgszi, p, p->pgszi);
  237. panic("segpage: s->pgszi != p->pgszi");
  238. }
  239. if(p->va < s->base || p->va >= s->top)
  240. panic("segpage: p->va < s->base || p->va >= s->top");
  241. soff = p->va - s->base;
  242. pte = &s->map[soff/PTEMAPMEM];
  243. if(*pte == 0)
  244. *pte = ptealloc(s);
  245. pgsz = sys->pgsz[s->pgszi];
  246. pg = &(*pte)->pages[(soff&(PTEMAPMEM-1))/pgsz];
  247. *pg = p;
  248. if(pg < (*pte)->first)
  249. (*pte)->first = pg;
  250. if(pg > (*pte)->last)
  251. (*pte)->last = pg;
  252. }
  253. /*
  254. * called with s->lk locked
  255. */
  256. void
  257. mfreeseg(Segment *s, uintptr_t start, int pages)
  258. {
  259. int i, j, size;
  260. uintptr_t soff;
  261. uintmem pgsz;
  262. Page *pg;
  263. Page *list;
  264. pgsz = sys->pgsz[s->pgszi];
  265. soff = start-s->base;
  266. j = (soff&(PTEMAPMEM-1))/pgsz;
  267. size = s->mapsize;
  268. list = nil;
  269. for(i = soff/PTEMAPMEM; i < size; i++) {
  270. if(pages <= 0)
  271. break;
  272. if(s->map[i] == 0) {
  273. pages -= s->ptepertab-j;
  274. j = 0;
  275. continue;
  276. }
  277. while(j < s->ptepertab) {
  278. pg = s->map[i]->pages[j];
  279. /*
  280. * We want to zero s->map[i]->page[j] and putpage(pg),
  281. * but we have to make sure other processors flush the
  282. * entry from their TLBs before the page is freed.
  283. * We construct a list of the pages to be freed, zero
  284. * the entries, then (below) call procflushseg, and call
  285. * putpage on the whole list.
  286. *
  287. * Swapped-out pages don't appear in TLBs, so it's okay
  288. * to putswap those pages before procflushseg.
  289. */
  290. if(pg){
  291. if(onswap(pg))
  292. putswap(pg);
  293. else{
  294. pg->next = list;
  295. list = pg;
  296. }
  297. s->map[i]->pages[j] = 0;
  298. }
  299. if(--pages == 0)
  300. goto out;
  301. j++;
  302. }
  303. j = 0;
  304. }
  305. out:
  306. /* flush this seg in all other processes */
  307. if(s->ref > 1)
  308. procflushseg(s);
  309. /* free the pages */
  310. for(pg = list; pg != nil; pg = list){
  311. list = list->next;
  312. putpage(pg);
  313. }
  314. }
  315. Segment*
  316. isoverlap(Proc* p, uintptr_t va, usize len)
  317. {
  318. int i;
  319. Segment *ns;
  320. uintptr_t newtop;
  321. newtop = va+len;
  322. for(i = 0; i < NSEG; i++) {
  323. ns = p->seg[i];
  324. if(ns == 0)
  325. continue;
  326. if((newtop > ns->base && newtop <= ns->top) ||
  327. (va >= ns->base && va < ns->top))
  328. return ns;
  329. }
  330. return nil;
  331. }
  332. void
  333. segclock(uintptr_t pc)
  334. {
  335. Proc *up = externup();
  336. Segment *s;
  337. int sno;
  338. for(sno = 0; sno < NSEG; sno++){
  339. s = up->seg[sno];
  340. if(s == nil)
  341. continue;
  342. if((s->type & SG_EXEC) == 0 || s->profile == 0)
  343. continue;
  344. s->profile[0] += TK2MS(1);
  345. if(pc >= s->base && pc < s->top) {
  346. pc -= s->base;
  347. s->profile[pc>>LRESPROF] += TK2MS(1);
  348. }
  349. }
  350. }
  351. static void
  352. prepageseg(int i)
  353. {
  354. Proc *up = externup();
  355. Segment *s;
  356. uintptr_t addr, pgsz;
  357. s = up->seg[i];
  358. if(s == nil)
  359. return;
  360. DBG("prepage: base %#p top %#p\n", s->base, s->top);
  361. pgsz = sys->pgsz[s->pgszi];
  362. for(addr = s->base; addr < s->top; addr += pgsz)
  363. fault(addr, -1, (s->type & SG_WRITE) ? FT_WRITE : FT_READ);
  364. }
  365. /*
  366. * BUG: should depend only in segment attributes, not in
  367. * the slot used in up->seg.
  368. */
  369. void
  370. nixprepage(int i)
  371. {
  372. if(i >= 0)
  373. prepageseg(i);
  374. else
  375. for(i = 0; i < NSEG; i++)
  376. prepageseg(i);
  377. }