segment.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799
  1. #include "u.h"
  2. #include "../port/lib.h"
  3. #include "mem.h"
  4. #include "dat.h"
  5. #include "fns.h"
  6. #include "../port/error.h"
  7. static void imagereclaim(void);
  8. static void imagechanreclaim(void);
  9. #include "io.h"
  10. /*
  11. * Attachable segment types
  12. */
  13. static Physseg physseg[10] = {
  14. { SG_SHARED, "shared", 0, SEGMAXSIZE, 0, 0 },
  15. { SG_BSS, "memory", 0, SEGMAXSIZE, 0, 0 },
  16. { 0, 0, 0, 0, 0, 0 },
  17. };
  18. static Lock physseglock;
  19. #define NFREECHAN 64
  20. #define IHASHSIZE 64
  21. #define ihash(s) imagealloc.hash[s%IHASHSIZE]
  22. static struct Imagealloc
  23. {
  24. Lock;
  25. Image *free;
  26. Image *hash[IHASHSIZE];
  27. QLock ireclaim; /* mutex on reclaiming free images */
  28. Chan **freechan; /* free image channels */
  29. int nfreechan; /* number of free channels */
  30. int szfreechan; /* size of freechan array */
  31. QLock fcreclaim; /* mutex on reclaiming free channels */
  32. }imagealloc;
  33. Segment* (*_globalsegattach)(Proc*, char*);
  34. void
  35. initseg(void)
  36. {
  37. Image *i, *ie;
  38. imagealloc.free = xalloc(conf.nimage*sizeof(Image));
  39. if (imagealloc.free == nil)
  40. panic("initseg: no memory");
  41. ie = &imagealloc.free[conf.nimage-1];
  42. for(i = imagealloc.free; i < ie; i++)
  43. i->next = i+1;
  44. i->next = 0;
  45. imagealloc.freechan = malloc(NFREECHAN * sizeof(Chan*));
  46. imagealloc.szfreechan = NFREECHAN;
  47. }
  48. Segment *
  49. newseg(int type, ulong base, ulong size)
  50. {
  51. Segment *s;
  52. int mapsize;
  53. if(size > (SEGMAPSIZE*PTEPERTAB))
  54. error(Enovmem);
  55. s = smalloc(sizeof(Segment));
  56. s->ref = 1;
  57. s->type = type;
  58. s->base = base;
  59. s->top = base+(size*BY2PG);
  60. s->size = size;
  61. s->sema.prev = &s->sema;
  62. s->sema.next = &s->sema;
  63. mapsize = ROUND(size, PTEPERTAB)/PTEPERTAB;
  64. if(mapsize > nelem(s->ssegmap)){
  65. mapsize *= 2;
  66. if(mapsize > (SEGMAPSIZE*PTEPERTAB))
  67. mapsize = (SEGMAPSIZE*PTEPERTAB);
  68. s->map = smalloc(mapsize*sizeof(Pte*));
  69. s->mapsize = mapsize;
  70. }
  71. else{
  72. s->map = s->ssegmap;
  73. s->mapsize = nelem(s->ssegmap);
  74. }
  75. return s;
  76. }
  77. void
  78. putseg(Segment *s)
  79. {
  80. Pte **pp, **emap;
  81. Image *i;
  82. if(s == 0)
  83. return;
  84. i = s->image;
  85. if(i != 0) {
  86. lock(i);
  87. lock(s);
  88. if(i->s == s && s->ref == 1)
  89. i->s = 0;
  90. unlock(i);
  91. }
  92. else
  93. lock(s);
  94. s->ref--;
  95. if(s->ref != 0) {
  96. unlock(s);
  97. return;
  98. }
  99. unlock(s);
  100. qlock(&s->lk);
  101. if(i)
  102. putimage(i);
  103. emap = &s->map[s->mapsize];
  104. for(pp = s->map; pp < emap; pp++)
  105. if(*pp)
  106. freepte(s, *pp);
  107. qunlock(&s->lk);
  108. if(s->map != s->ssegmap)
  109. free(s->map);
  110. if(s->profile != 0)
  111. free(s->profile);
  112. free(s);
  113. }
  114. void
  115. relocateseg(Segment *s, ulong offset)
  116. {
  117. Page **pg, *x;
  118. Pte *pte, **p, **endpte;
  119. endpte = &s->map[s->mapsize];
  120. for(p = s->map; p < endpte; p++) {
  121. if(*p == 0)
  122. continue;
  123. pte = *p;
  124. for(pg = pte->first; pg <= pte->last; pg++) {
  125. if(x = *pg)
  126. x->va += offset;
  127. }
  128. }
  129. }
  130. Segment*
  131. dupseg(Segment **seg, int segno, int share)
  132. {
  133. int i, size;
  134. Pte *pte;
  135. Segment *n, *s;
  136. SET(n);
  137. s = seg[segno];
  138. qlock(&s->lk);
  139. if(waserror()){
  140. qunlock(&s->lk);
  141. nexterror();
  142. }
  143. switch(s->type&SG_TYPE) {
  144. case SG_TEXT: /* New segment shares pte set */
  145. case SG_SHARED:
  146. case SG_PHYSICAL:
  147. goto sameseg;
  148. case SG_STACK:
  149. n = newseg(s->type, s->base, s->size);
  150. break;
  151. case SG_BSS: /* Just copy on write */
  152. if(share)
  153. goto sameseg;
  154. n = newseg(s->type, s->base, s->size);
  155. break;
  156. case SG_DATA: /* Copy on write plus demand load info */
  157. if(segno == TSEG){
  158. poperror();
  159. qunlock(&s->lk);
  160. return data2txt(s);
  161. }
  162. if(share)
  163. goto sameseg;
  164. n = newseg(s->type, s->base, s->size);
  165. incref(s->image);
  166. n->image = s->image;
  167. n->fstart = s->fstart;
  168. n->flen = s->flen;
  169. break;
  170. }
  171. size = s->mapsize;
  172. for(i = 0; i < size; i++)
  173. if(pte = s->map[i])
  174. n->map[i] = ptecpy(pte);
  175. n->flushme = s->flushme;
  176. if(s->ref > 1)
  177. procflushseg(s);
  178. poperror();
  179. qunlock(&s->lk);
  180. return n;
  181. sameseg:
  182. incref(s);
  183. poperror();
  184. qunlock(&s->lk);
  185. return s;
  186. }
  187. void
  188. segpage(Segment *s, Page *p)
  189. {
  190. Pte **pte;
  191. ulong off;
  192. Page **pg;
  193. if(p->va < s->base || p->va >= s->top)
  194. panic("segpage");
  195. off = p->va - s->base;
  196. pte = &s->map[off/PTEMAPMEM];
  197. if(*pte == 0)
  198. *pte = ptealloc();
  199. pg = &(*pte)->pages[(off&(PTEMAPMEM-1))/BY2PG];
  200. *pg = p;
  201. if(pg < (*pte)->first)
  202. (*pte)->first = pg;
  203. if(pg > (*pte)->last)
  204. (*pte)->last = pg;
  205. }
  206. Image*
  207. attachimage(int type, Chan *c, ulong base, ulong len)
  208. {
  209. Image *i, **l;
  210. /* reclaim any free channels from reclaimed segments */
  211. if(imagealloc.nfreechan)
  212. imagechanreclaim();
  213. lock(&imagealloc);
  214. /*
  215. * Search the image cache for remains of the text from a previous
  216. * or currently running incarnation
  217. */
  218. for(i = ihash(c->qid.path); i; i = i->hash) {
  219. if(c->qid.path == i->qid.path) {
  220. lock(i);
  221. if(eqqid(c->qid, i->qid) &&
  222. eqqid(c->mqid, i->mqid) &&
  223. c->mchan == i->mchan &&
  224. c->type == i->type) {
  225. goto found;
  226. }
  227. unlock(i);
  228. }
  229. }
  230. /*
  231. * imagereclaim dumps pages from the free list which are cached by image
  232. * structures. This should free some image structures.
  233. */
  234. while(!(i = imagealloc.free)) {
  235. unlock(&imagealloc);
  236. imagereclaim();
  237. sched();
  238. lock(&imagealloc);
  239. }
  240. imagealloc.free = i->next;
  241. lock(i);
  242. incref(c);
  243. i->c = c;
  244. i->type = c->type;
  245. i->qid = c->qid;
  246. i->mqid = c->mqid;
  247. i->mchan = c->mchan;
  248. l = &ihash(c->qid.path);
  249. i->hash = *l;
  250. *l = i;
  251. found:
  252. unlock(&imagealloc);
  253. if(i->s == 0) {
  254. /* Disaster after commit in exec */
  255. if(waserror()) {
  256. unlock(i);
  257. pexit(Enovmem, 1);
  258. }
  259. i->s = newseg(type, base, len);
  260. i->s->image = i;
  261. i->ref++;
  262. poperror();
  263. }
  264. else
  265. incref(i->s);
  266. return i;
  267. }
  268. static struct {
  269. int calls; /* times imagereclaim was called */
  270. int loops; /* times the main loop was run */
  271. uvlong ticks; /* total time in the main loop */
  272. uvlong maxt; /* longest time in main loop */
  273. } irstats;
  274. static void
  275. imagereclaim(void)
  276. {
  277. int n;
  278. Page *p;
  279. uvlong ticks;
  280. irstats.calls++;
  281. /* Somebody is already cleaning the page cache */
  282. if(!canqlock(&imagealloc.ireclaim))
  283. return;
  284. lock(&palloc);
  285. ticks = fastticks(nil);
  286. n = 0;
  287. /*
  288. * All the pages with images backing them are at the
  289. * end of the list (see putpage) so start there and work
  290. * backward.
  291. */
  292. for(p = palloc.tail; p && p->image && n<1000; p = p->prev) {
  293. if(p->ref == 0 && canlock(p)) {
  294. if(p->ref == 0) {
  295. n++;
  296. uncachepage(p);
  297. }
  298. unlock(p);
  299. }
  300. }
  301. ticks = fastticks(nil) - ticks;
  302. unlock(&palloc);
  303. irstats.loops++;
  304. irstats.ticks += ticks;
  305. if(ticks > irstats.maxt)
  306. irstats.maxt = ticks;
  307. //print("T%llud+", ticks);
  308. qunlock(&imagealloc.ireclaim);
  309. }
  310. /*
  311. * since close can block, this has to be called outside of
  312. * spin locks.
  313. */
  314. static void
  315. imagechanreclaim(void)
  316. {
  317. Chan *c;
  318. /* Somebody is already cleaning the image chans */
  319. if(!canqlock(&imagealloc.fcreclaim))
  320. return;
  321. /*
  322. * We don't have to recheck that nfreechan > 0 after we
  323. * acquire the lock, because we're the only ones who decrement
  324. * it (the other lock contender increments it), and there's only
  325. * one of us thanks to the qlock above.
  326. */
  327. while(imagealloc.nfreechan > 0){
  328. lock(&imagealloc);
  329. imagealloc.nfreechan--;
  330. c = imagealloc.freechan[imagealloc.nfreechan];
  331. unlock(&imagealloc);
  332. cclose(c);
  333. }
  334. qunlock(&imagealloc.fcreclaim);
  335. }
  336. void
  337. putimage(Image *i)
  338. {
  339. Chan *c, **cp;
  340. Image *f, **l;
  341. if(i->notext)
  342. return;
  343. lock(i);
  344. if(--i->ref == 0) {
  345. l = &ihash(i->qid.path);
  346. mkqid(&i->qid, ~0, ~0, QTFILE);
  347. unlock(i);
  348. c = i->c;
  349. lock(&imagealloc);
  350. for(f = *l; f; f = f->hash) {
  351. if(f == i) {
  352. *l = i->hash;
  353. break;
  354. }
  355. l = &f->hash;
  356. }
  357. i->next = imagealloc.free;
  358. imagealloc.free = i;
  359. /* defer freeing channel till we're out of spin lock's */
  360. if(imagealloc.nfreechan == imagealloc.szfreechan){
  361. imagealloc.szfreechan += NFREECHAN;
  362. cp = malloc(imagealloc.szfreechan*sizeof(Chan*));
  363. if(cp == nil)
  364. panic("putimage");
  365. memmove(cp, imagealloc.freechan, imagealloc.nfreechan*sizeof(Chan*));
  366. free(imagealloc.freechan);
  367. imagealloc.freechan = cp;
  368. }
  369. imagealloc.freechan[imagealloc.nfreechan++] = c;
  370. unlock(&imagealloc);
  371. return;
  372. }
  373. unlock(i);
  374. }
  375. long
  376. ibrk(ulong addr, int seg)
  377. {
  378. Segment *s, *ns;
  379. ulong newtop, newsize;
  380. int i, mapsize;
  381. Pte **map;
  382. s = up->seg[seg];
  383. if(s == 0)
  384. error(Ebadarg);
  385. if(addr == 0)
  386. return s->base;
  387. qlock(&s->lk);
  388. /* We may start with the bss overlapping the data */
  389. if(addr < s->base) {
  390. if(seg != BSEG || up->seg[DSEG] == 0 || addr < up->seg[DSEG]->base) {
  391. qunlock(&s->lk);
  392. error(Enovmem);
  393. }
  394. addr = s->base;
  395. }
  396. newtop = PGROUND(addr);
  397. newsize = (newtop-s->base)/BY2PG;
  398. if(newtop < s->top) {
  399. /*
  400. * do not shrink a segment shared with other procs, as the
  401. * to-be-freed address space may have been passed to the kernel
  402. * already by another proc and is past the validaddr stage.
  403. */
  404. if(s->ref > 1){
  405. qunlock(&s->lk);
  406. error(Einuse);
  407. }
  408. mfreeseg(s, newtop, (s->top-newtop)/BY2PG);
  409. s->top = newtop;
  410. s->size = newsize;
  411. qunlock(&s->lk);
  412. flushmmu();
  413. return 0;
  414. }
  415. for(i = 0; i < NSEG; i++) {
  416. ns = up->seg[i];
  417. if(ns == 0 || ns == s)
  418. continue;
  419. if(newtop >= ns->base && newtop < ns->top) {
  420. qunlock(&s->lk);
  421. error(Esoverlap);
  422. }
  423. }
  424. if(newsize > (SEGMAPSIZE*PTEPERTAB)) {
  425. qunlock(&s->lk);
  426. error(Enovmem);
  427. }
  428. mapsize = ROUND(newsize, PTEPERTAB)/PTEPERTAB;
  429. if(mapsize > s->mapsize){
  430. map = smalloc(mapsize*sizeof(Pte*));
  431. memmove(map, s->map, s->mapsize*sizeof(Pte*));
  432. if(s->map != s->ssegmap)
  433. free(s->map);
  434. s->map = map;
  435. s->mapsize = mapsize;
  436. }
  437. s->top = newtop;
  438. s->size = newsize;
  439. qunlock(&s->lk);
  440. return 0;
  441. }
  442. /*
  443. * called with s->lk locked
  444. */
  445. void
  446. mfreeseg(Segment *s, ulong start, int pages)
  447. {
  448. int i, j, size;
  449. ulong soff;
  450. Page *pg;
  451. Page *list;
  452. soff = start-s->base;
  453. j = (soff&(PTEMAPMEM-1))/BY2PG;
  454. size = s->mapsize;
  455. list = nil;
  456. for(i = soff/PTEMAPMEM; i < size; i++) {
  457. if(pages <= 0)
  458. break;
  459. if(s->map[i] == 0) {
  460. pages -= PTEPERTAB-j;
  461. j = 0;
  462. continue;
  463. }
  464. while(j < PTEPERTAB) {
  465. pg = s->map[i]->pages[j];
  466. /*
  467. * We want to zero s->map[i]->page[j] and putpage(pg),
  468. * but we have to make sure other processors flush the
  469. * entry from their TLBs before the page is freed.
  470. * We construct a list of the pages to be freed, zero
  471. * the entries, then (below) call procflushseg, and call
  472. * putpage on the whole list.
  473. *
  474. * Swapped-out pages don't appear in TLBs, so it's okay
  475. * to putswap those pages before procflushseg.
  476. */
  477. if(pg){
  478. if(onswap(pg))
  479. putswap(pg);
  480. else{
  481. pg->next = list;
  482. list = pg;
  483. }
  484. s->map[i]->pages[j] = 0;
  485. }
  486. if(--pages == 0)
  487. goto out;
  488. j++;
  489. }
  490. j = 0;
  491. }
  492. out:
  493. /* flush this seg in all other processes */
  494. if(s->ref > 1)
  495. procflushseg(s);
  496. /* free the pages */
  497. for(pg = list; pg != nil; pg = list){
  498. list = list->next;
  499. putpage(pg);
  500. }
  501. }
  502. Segment*
  503. isoverlap(Proc *p, ulong va, int len)
  504. {
  505. int i;
  506. Segment *ns;
  507. ulong newtop;
  508. newtop = va+len;
  509. for(i = 0; i < NSEG; i++) {
  510. ns = p->seg[i];
  511. if(ns == 0)
  512. continue;
  513. if((newtop > ns->base && newtop <= ns->top) ||
  514. (va >= ns->base && va < ns->top))
  515. return ns;
  516. }
  517. return nil;
  518. }
  519. int
  520. addphysseg(Physseg* new)
  521. {
  522. Physseg *ps;
  523. /*
  524. * Check not already entered and there is room
  525. * for a new entry and the terminating null entry.
  526. */
  527. lock(&physseglock);
  528. for(ps = physseg; ps->name; ps++){
  529. if(strcmp(ps->name, new->name) == 0){
  530. unlock(&physseglock);
  531. return -1;
  532. }
  533. }
  534. if(ps-physseg >= nelem(physseg)-2){
  535. unlock(&physseglock);
  536. return -1;
  537. }
  538. *ps = *new;
  539. unlock(&physseglock);
  540. return 0;
  541. }
  542. int
  543. isphysseg(char *name)
  544. {
  545. Physseg *ps;
  546. int rv = 0;
  547. lock(&physseglock);
  548. for(ps = physseg; ps->name; ps++){
  549. if(strcmp(ps->name, name) == 0){
  550. rv = 1;
  551. break;
  552. }
  553. }
  554. unlock(&physseglock);
  555. return rv;
  556. }
  557. ulong
  558. segattach(Proc *p, ulong attr, char *name, ulong va, ulong len)
  559. {
  560. int sno;
  561. Segment *s, *os;
  562. Physseg *ps;
  563. if(va != 0 && va >= USTKTOP)
  564. error(Ebadarg);
  565. validaddr((ulong)name, 1, 0);
  566. vmemchr(name, 0, ~0);
  567. for(sno = 0; sno < NSEG; sno++)
  568. if(p->seg[sno] == nil && sno != ESEG)
  569. break;
  570. if(sno == NSEG)
  571. error(Enovmem);
  572. /*
  573. * first look for a global segment with the
  574. * same name
  575. */
  576. if(_globalsegattach != nil){
  577. s = (*_globalsegattach)(p, name);
  578. if(s != nil){
  579. p->seg[sno] = s;
  580. return s->base;
  581. }
  582. }
  583. len = PGROUND(len);
  584. if(len == 0)
  585. error(Ebadarg);
  586. /*
  587. * Find a hole in the address space.
  588. * Starting at the lowest possible stack address - len,
  589. * check for an overlapping segment, and repeat at the
  590. * base of that segment - len until either a hole is found
  591. * or the address space is exhausted. Ensure that we don't
  592. * map the zero page.
  593. */
  594. if(va == 0) {
  595. for (os = p->seg[SSEG]; os != nil; os = isoverlap(p, va, len)) {
  596. va = os->base;
  597. if(len >= va)
  598. error(Enovmem);
  599. va -= len;
  600. }
  601. va &= ~(BY2PG-1);
  602. } else {
  603. va &= ~(BY2PG-1);
  604. if(va == 0 || va >= USTKTOP)
  605. error(Ebadarg);
  606. }
  607. if(isoverlap(p, va, len) != nil)
  608. error(Esoverlap);
  609. for(ps = physseg; ps->name; ps++)
  610. if(strcmp(name, ps->name) == 0)
  611. goto found;
  612. error(Ebadarg);
  613. found:
  614. if(len > ps->size)
  615. error(Enovmem);
  616. attr &= ~SG_TYPE; /* Turn off what is not allowed */
  617. attr |= ps->attr; /* Copy in defaults */
  618. s = newseg(attr, va, len/BY2PG);
  619. s->pseg = ps;
  620. p->seg[sno] = s;
  621. return va;
  622. }
  623. void
  624. pteflush(Pte *pte, int s, int e)
  625. {
  626. int i;
  627. Page *p;
  628. for(i = s; i < e; i++) {
  629. p = pte->pages[i];
  630. if(pagedout(p) == 0)
  631. memset(p->cachectl, PG_TXTFLUSH, sizeof(p->cachectl));
  632. }
  633. }
  634. long
  635. syssegflush(ulong *arg)
  636. {
  637. Segment *s;
  638. ulong addr, l;
  639. Pte *pte;
  640. int chunk, ps, pe, len;
  641. addr = arg[0];
  642. len = arg[1];
  643. while(len > 0) {
  644. s = seg(up, addr, 1);
  645. if(s == 0)
  646. error(Ebadarg);
  647. s->flushme = 1;
  648. more:
  649. l = len;
  650. if(addr+l > s->top)
  651. l = s->top - addr;
  652. ps = addr-s->base;
  653. pte = s->map[ps/PTEMAPMEM];
  654. ps &= PTEMAPMEM-1;
  655. pe = PTEMAPMEM;
  656. if(pe-ps > l){
  657. pe = ps + l;
  658. pe = (pe+BY2PG-1)&~(BY2PG-1);
  659. }
  660. if(pe == ps) {
  661. qunlock(&s->lk);
  662. error(Ebadarg);
  663. }
  664. if(pte)
  665. pteflush(pte, ps/BY2PG, pe/BY2PG);
  666. chunk = pe-ps;
  667. len -= chunk;
  668. addr += chunk;
  669. if(len > 0 && addr < s->top)
  670. goto more;
  671. qunlock(&s->lk);
  672. }
  673. flushmmu();
  674. return 0;
  675. }
  676. void
  677. segclock(ulong pc)
  678. {
  679. Segment *s;
  680. s = up->seg[TSEG];
  681. if(s == 0 || s->profile == 0)
  682. return;
  683. s->profile[0] += TK2MS(1);
  684. if(pc >= s->base && pc < s->top) {
  685. pc -= s->base;
  686. s->profile[pc>>LRESPROF] += TK2MS(1);
  687. }
  688. }