segment.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794
  1. #include "u.h"
  2. #include "../port/lib.h"
  3. #include "mem.h"
  4. #include "dat.h"
  5. #include "fns.h"
  6. #include "../port/error.h"
  7. static void imagereclaim(void);
  8. static void imagechanreclaim(void);
  9. #include "io.h"
  10. /*
  11. * Attachable segment types
  12. */
  13. static Physseg physseg[10] = {
  14. { SG_SHARED, "shared", 0, SEGMAXSIZE, 0, 0 },
  15. { SG_BSS, "memory", 0, SEGMAXSIZE, 0, 0 },
  16. { 0, 0, 0, 0, 0, 0 },
  17. };
  18. static Lock physseglock;
  19. #define NFREECHAN 64
  20. #define IHASHSIZE 64
  21. #define ihash(s) imagealloc.hash[s%IHASHSIZE]
  22. static struct Imagealloc
  23. {
  24. Lock;
  25. Image *free;
  26. Image *hash[IHASHSIZE];
  27. QLock ireclaim; /* mutex on reclaiming free images */
  28. Chan **freechan; /* free image channels */
  29. int nfreechan; /* number of free channels */
  30. int szfreechan; /* size of freechan array */
  31. QLock fcreclaim; /* mutex on reclaiming free channels */
  32. }imagealloc;
  33. Segment* (*_globalsegattach)(Proc*, char*);
  34. void
  35. initseg(void)
  36. {
  37. Image *i, *ie;
  38. imagealloc.free = xalloc(conf.nimage*sizeof(Image));
  39. ie = &imagealloc.free[conf.nimage-1];
  40. for(i = imagealloc.free; i < ie; i++)
  41. i->next = i+1;
  42. i->next = 0;
  43. imagealloc.freechan = malloc(NFREECHAN * sizeof(Chan*));
  44. imagealloc.szfreechan = NFREECHAN;
  45. }
  46. Segment *
  47. newseg(int type, ulong base, ulong size)
  48. {
  49. Segment *s;
  50. int mapsize;
  51. if(size > (SEGMAPSIZE*PTEPERTAB))
  52. error(Enovmem);
  53. if(swapfull())
  54. error(Enoswap);
  55. s = smalloc(sizeof(Segment));
  56. s->ref = 1;
  57. s->type = type;
  58. s->base = base;
  59. s->top = base+(size*BY2PG);
  60. s->size = size;
  61. s->sema.prev = &s->sema;
  62. s->sema.next = &s->sema;
  63. mapsize = ROUND(size, PTEPERTAB)/PTEPERTAB;
  64. if(mapsize > nelem(s->ssegmap)){
  65. mapsize *= 2;
  66. if(mapsize > (SEGMAPSIZE*PTEPERTAB))
  67. mapsize = (SEGMAPSIZE*PTEPERTAB);
  68. s->map = smalloc(mapsize*sizeof(Pte*));
  69. s->mapsize = mapsize;
  70. }
  71. else{
  72. s->map = s->ssegmap;
  73. s->mapsize = nelem(s->ssegmap);
  74. }
  75. return s;
  76. }
  77. void
  78. putseg(Segment *s)
  79. {
  80. Pte **pp, **emap;
  81. Image *i;
  82. if(s == 0)
  83. return;
  84. i = s->image;
  85. if(i != 0) {
  86. lock(i);
  87. lock(s);
  88. if(i->s == s && s->ref == 1)
  89. i->s = 0;
  90. unlock(i);
  91. }
  92. else
  93. lock(s);
  94. s->ref--;
  95. if(s->ref != 0) {
  96. unlock(s);
  97. return;
  98. }
  99. unlock(s);
  100. qlock(&s->lk);
  101. if(i)
  102. putimage(i);
  103. emap = &s->map[s->mapsize];
  104. for(pp = s->map; pp < emap; pp++)
  105. if(*pp)
  106. freepte(s, *pp);
  107. qunlock(&s->lk);
  108. if(s->map != s->ssegmap)
  109. free(s->map);
  110. if(s->profile != 0)
  111. free(s->profile);
  112. free(s);
  113. }
  114. void
  115. relocateseg(Segment *s, ulong offset)
  116. {
  117. Page **pg, *x;
  118. Pte *pte, **p, **endpte;
  119. endpte = &s->map[s->mapsize];
  120. for(p = s->map; p < endpte; p++) {
  121. if(*p == 0)
  122. continue;
  123. pte = *p;
  124. for(pg = pte->first; pg <= pte->last; pg++) {
  125. if(x = *pg)
  126. x->va += offset;
  127. }
  128. }
  129. }
  130. Segment*
  131. dupseg(Segment **seg, int segno, int share)
  132. {
  133. int i, size;
  134. Pte *pte;
  135. Segment *n, *s;
  136. SET(n);
  137. s = seg[segno];
  138. qlock(&s->lk);
  139. if(waserror()){
  140. qunlock(&s->lk);
  141. nexterror();
  142. }
  143. switch(s->type&SG_TYPE) {
  144. case SG_TEXT: /* New segment shares pte set */
  145. case SG_SHARED:
  146. case SG_PHYSICAL:
  147. goto sameseg;
  148. case SG_STACK:
  149. n = newseg(s->type, s->base, s->size);
  150. break;
  151. case SG_BSS: /* Just copy on write */
  152. if(share)
  153. goto sameseg;
  154. n = newseg(s->type, s->base, s->size);
  155. break;
  156. case SG_DATA: /* Copy on write plus demand load info */
  157. if(segno == TSEG){
  158. poperror();
  159. qunlock(&s->lk);
  160. return data2txt(s);
  161. }
  162. if(share)
  163. goto sameseg;
  164. n = newseg(s->type, s->base, s->size);
  165. incref(s->image);
  166. n->image = s->image;
  167. n->fstart = s->fstart;
  168. n->flen = s->flen;
  169. break;
  170. }
  171. size = s->mapsize;
  172. for(i = 0; i < size; i++)
  173. if(pte = s->map[i])
  174. n->map[i] = ptecpy(pte);
  175. n->flushme = s->flushme;
  176. if(s->ref > 1)
  177. procflushseg(s);
  178. poperror();
  179. qunlock(&s->lk);
  180. return n;
  181. sameseg:
  182. incref(s);
  183. poperror();
  184. qunlock(&s->lk);
  185. return s;
  186. }
  187. void
  188. segpage(Segment *s, Page *p)
  189. {
  190. Pte **pte;
  191. ulong off;
  192. Page **pg;
  193. if(p->va < s->base || p->va >= s->top)
  194. panic("segpage");
  195. off = p->va - s->base;
  196. pte = &s->map[off/PTEMAPMEM];
  197. if(*pte == 0)
  198. *pte = ptealloc();
  199. pg = &(*pte)->pages[(off&(PTEMAPMEM-1))/BY2PG];
  200. *pg = p;
  201. if(pg < (*pte)->first)
  202. (*pte)->first = pg;
  203. if(pg > (*pte)->last)
  204. (*pte)->last = pg;
  205. }
  206. Image*
  207. attachimage(int type, Chan *c, ulong base, ulong len)
  208. {
  209. Image *i, **l;
  210. /* reclaim any free channels from reclaimed segments */
  211. if(imagealloc.nfreechan)
  212. imagechanreclaim();
  213. lock(&imagealloc);
  214. /*
  215. * Search the image cache for remains of the text from a previous
  216. * or currently running incarnation
  217. */
  218. for(i = ihash(c->qid.path); i; i = i->hash) {
  219. if(c->qid.path == i->qid.path) {
  220. lock(i);
  221. if(eqqid(c->qid, i->qid) &&
  222. eqqid(c->mqid, i->mqid) &&
  223. c->mchan == i->mchan &&
  224. c->type == i->type) {
  225. goto found;
  226. }
  227. unlock(i);
  228. }
  229. }
  230. /*
  231. * imagereclaim dumps pages from the free list which are cached by image
  232. * structures. This should free some image structures.
  233. */
  234. while(!(i = imagealloc.free)) {
  235. unlock(&imagealloc);
  236. imagereclaim();
  237. sched();
  238. lock(&imagealloc);
  239. }
  240. imagealloc.free = i->next;
  241. lock(i);
  242. incref(c);
  243. i->c = c;
  244. i->type = c->type;
  245. i->qid = c->qid;
  246. i->mqid = c->mqid;
  247. i->mchan = c->mchan;
  248. l = &ihash(c->qid.path);
  249. i->hash = *l;
  250. *l = i;
  251. found:
  252. unlock(&imagealloc);
  253. if(i->s == 0) {
  254. /* Disaster after commit in exec */
  255. if(waserror()) {
  256. unlock(i);
  257. pexit(Enovmem, 1);
  258. }
  259. i->s = newseg(type, base, len);
  260. i->s->image = i;
  261. i->ref++;
  262. poperror();
  263. }
  264. else
  265. incref(i->s);
  266. return i;
  267. }
  268. static struct {
  269. int calls; /* times imagereclaim was called */
  270. int loops; /* times the main loop was run */
  271. uvlong ticks; /* total time in the main loop */
  272. uvlong maxt; /* longest time in main loop */
  273. } irstats;
  274. static void
  275. imagereclaim(void)
  276. {
  277. int n;
  278. Page *p;
  279. uvlong ticks;
  280. irstats.calls++;
  281. /* Somebody is already cleaning the page cache */
  282. if(!canqlock(&imagealloc.ireclaim))
  283. return;
  284. lock(&palloc);
  285. ticks = fastticks(nil);
  286. n = 0;
  287. /*
  288. * All the pages with images backing them are at the
  289. * end of the list (see putpage) so start there and work
  290. * backward.
  291. */
  292. for(p = palloc.tail; p && p->image && n<1000; p = p->prev) {
  293. if(p->ref == 0 && canlock(p)) {
  294. if(p->ref == 0) {
  295. n++;
  296. uncachepage(p);
  297. }
  298. unlock(p);
  299. }
  300. }
  301. ticks = fastticks(nil) - ticks;
  302. unlock(&palloc);
  303. irstats.loops++;
  304. irstats.ticks += ticks;
  305. if(ticks > irstats.maxt)
  306. irstats.maxt = ticks;
  307. //print("T%llud+", ticks);
  308. qunlock(&imagealloc.ireclaim);
  309. }
  310. /*
  311. * since close can block, this has to be called outside of
  312. * spin locks.
  313. */
  314. static void
  315. imagechanreclaim(void)
  316. {
  317. Chan *c;
  318. /* Somebody is already cleaning the image chans */
  319. if(!canqlock(&imagealloc.fcreclaim))
  320. return;
  321. /*
  322. * We don't have to recheck that nfreechan > 0 after we
  323. * acquire the lock, because we're the only ones who decrement
  324. * it (the other lock contender increments it), and there's only
  325. * one of us thanks to the qlock above.
  326. */
  327. while(imagealloc.nfreechan > 0){
  328. lock(&imagealloc);
  329. imagealloc.nfreechan--;
  330. c = imagealloc.freechan[imagealloc.nfreechan];
  331. unlock(&imagealloc);
  332. cclose(c);
  333. }
  334. qunlock(&imagealloc.fcreclaim);
  335. }
  336. void
  337. putimage(Image *i)
  338. {
  339. Chan *c, **cp;
  340. Image *f, **l;
  341. if(i->notext)
  342. return;
  343. lock(i);
  344. if(--i->ref == 0) {
  345. l = &ihash(i->qid.path);
  346. mkqid(&i->qid, ~0, ~0, QTFILE);
  347. unlock(i);
  348. c = i->c;
  349. lock(&imagealloc);
  350. for(f = *l; f; f = f->hash) {
  351. if(f == i) {
  352. *l = i->hash;
  353. break;
  354. }
  355. l = &f->hash;
  356. }
  357. i->next = imagealloc.free;
  358. imagealloc.free = i;
  359. /* defer freeing channel till we're out of spin lock's */
  360. if(imagealloc.nfreechan == imagealloc.szfreechan){
  361. imagealloc.szfreechan += NFREECHAN;
  362. cp = malloc(imagealloc.szfreechan*sizeof(Chan*));
  363. if(cp == nil)
  364. panic("putimage");
  365. memmove(cp, imagealloc.freechan, imagealloc.nfreechan*sizeof(Chan*));
  366. free(imagealloc.freechan);
  367. imagealloc.freechan = cp;
  368. }
  369. imagealloc.freechan[imagealloc.nfreechan++] = c;
  370. unlock(&imagealloc);
  371. return;
  372. }
  373. unlock(i);
  374. }
  375. long
  376. ibrk(ulong addr, int seg)
  377. {
  378. Segment *s, *ns;
  379. ulong newtop, newsize;
  380. int i, mapsize;
  381. Pte **map;
  382. s = up->seg[seg];
  383. if(s == 0)
  384. error(Ebadarg);
  385. if(addr == 0)
  386. return s->base;
  387. qlock(&s->lk);
  388. /* We may start with the bss overlapping the data */
  389. if(addr < s->base) {
  390. if(seg != BSEG || up->seg[DSEG] == 0 || addr < up->seg[DSEG]->base) {
  391. qunlock(&s->lk);
  392. error(Enovmem);
  393. }
  394. addr = s->base;
  395. }
  396. newtop = PGROUND(addr);
  397. newsize = (newtop-s->base)/BY2PG;
  398. if(newtop < s->top) {
  399. mfreeseg(s, newtop, (s->top-newtop)/BY2PG);
  400. s->top = newtop;
  401. s->size = newsize;
  402. qunlock(&s->lk);
  403. flushmmu();
  404. return 0;
  405. }
  406. if(swapfull()){
  407. qunlock(&s->lk);
  408. error(Enoswap);
  409. }
  410. for(i = 0; i < NSEG; i++) {
  411. ns = up->seg[i];
  412. if(ns == 0 || ns == s)
  413. continue;
  414. if(newtop >= ns->base && newtop < ns->top) {
  415. qunlock(&s->lk);
  416. error(Esoverlap);
  417. }
  418. }
  419. if(newsize > (SEGMAPSIZE*PTEPERTAB)) {
  420. qunlock(&s->lk);
  421. error(Enovmem);
  422. }
  423. mapsize = ROUND(newsize, PTEPERTAB)/PTEPERTAB;
  424. if(mapsize > s->mapsize){
  425. map = smalloc(mapsize*sizeof(Pte*));
  426. memmove(map, s->map, s->mapsize*sizeof(Pte*));
  427. if(s->map != s->ssegmap)
  428. free(s->map);
  429. s->map = map;
  430. s->mapsize = mapsize;
  431. }
  432. s->top = newtop;
  433. s->size = newsize;
  434. qunlock(&s->lk);
  435. return 0;
  436. }
  437. /*
  438. * called with s->lk locked
  439. */
  440. void
  441. mfreeseg(Segment *s, ulong start, int pages)
  442. {
  443. int i, j, size;
  444. ulong soff;
  445. Page *pg;
  446. Page *list;
  447. soff = start-s->base;
  448. j = (soff&(PTEMAPMEM-1))/BY2PG;
  449. size = s->mapsize;
  450. list = nil;
  451. for(i = soff/PTEMAPMEM; i < size; i++) {
  452. if(pages <= 0)
  453. break;
  454. if(s->map[i] == 0) {
  455. pages -= PTEPERTAB-j;
  456. j = 0;
  457. continue;
  458. }
  459. while(j < PTEPERTAB) {
  460. pg = s->map[i]->pages[j];
  461. /*
  462. * We want to zero s->map[i]->page[j] and putpage(pg),
  463. * but we have to make sure other processors flush the
  464. * entry from their TLBs before the page is freed.
  465. * We construct a list of the pages to be freed, zero
  466. * the entries, then (below) call procflushseg, and call
  467. * putpage on the whole list.
  468. *
  469. * Swapped-out pages don't appear in TLBs, so it's okay
  470. * to putswap those pages before procflushseg.
  471. */
  472. if(pg){
  473. if(onswap(pg))
  474. putswap(pg);
  475. else{
  476. pg->next = list;
  477. list = pg;
  478. }
  479. s->map[i]->pages[j] = 0;
  480. }
  481. if(--pages == 0)
  482. goto out;
  483. j++;
  484. }
  485. j = 0;
  486. }
  487. out:
  488. /* flush this seg in all other processes */
  489. if(s->ref > 1)
  490. procflushseg(s);
  491. /* free the pages */
  492. for(pg = list; pg != nil; pg = list){
  493. list = list->next;
  494. putpage(pg);
  495. }
  496. }
  497. Segment*
  498. isoverlap(Proc *p, ulong va, int len)
  499. {
  500. int i;
  501. Segment *ns;
  502. ulong newtop;
  503. newtop = va+len;
  504. for(i = 0; i < NSEG; i++) {
  505. ns = p->seg[i];
  506. if(ns == 0)
  507. continue;
  508. if((newtop > ns->base && newtop <= ns->top) ||
  509. (va >= ns->base && va < ns->top))
  510. return ns;
  511. }
  512. return nil;
  513. }
  514. int
  515. addphysseg(Physseg* new)
  516. {
  517. Physseg *ps;
  518. /*
  519. * Check not already entered and there is room
  520. * for a new entry and the terminating null entry.
  521. */
  522. lock(&physseglock);
  523. for(ps = physseg; ps->name; ps++){
  524. if(strcmp(ps->name, new->name) == 0){
  525. unlock(&physseglock);
  526. return -1;
  527. }
  528. }
  529. if(ps-physseg >= nelem(physseg)-2){
  530. unlock(&physseglock);
  531. return -1;
  532. }
  533. *ps = *new;
  534. unlock(&physseglock);
  535. return 0;
  536. }
  537. int
  538. isphysseg(char *name)
  539. {
  540. Physseg *ps;
  541. int rv = 0;
  542. lock(&physseglock);
  543. for(ps = physseg; ps->name; ps++){
  544. if(strcmp(ps->name, name) == 0){
  545. rv = 1;
  546. break;
  547. }
  548. }
  549. unlock(&physseglock);
  550. return rv;
  551. }
  552. ulong
  553. segattach(Proc *p, ulong attr, char *name, ulong va, ulong len)
  554. {
  555. int sno;
  556. Segment *s, *os;
  557. Physseg *ps;
  558. if(va != 0 && va >= USTKTOP)
  559. error(Ebadarg);
  560. validaddr((ulong)name, 1, 0);
  561. vmemchr(name, 0, ~0);
  562. for(sno = 0; sno < NSEG; sno++)
  563. if(p->seg[sno] == nil && sno != ESEG)
  564. break;
  565. if(sno == NSEG)
  566. error(Enovmem);
  567. /*
  568. * first look for a global segment with the
  569. * same name
  570. */
  571. if(_globalsegattach != nil){
  572. s = (*_globalsegattach)(p, name);
  573. if(s != nil){
  574. p->seg[sno] = s;
  575. return s->base;
  576. }
  577. }
  578. len = PGROUND(len);
  579. if(len == 0)
  580. error(Ebadarg);
  581. /*
  582. * Find a hole in the address space.
  583. * Starting at the lowest possible stack address - len,
  584. * check for an overlapping segment, and repeat at the
  585. * base of that segment - len until either a hole is found
  586. * or the address space is exhausted.
  587. */
  588. if(va == 0) {
  589. va = p->seg[SSEG]->base - len;
  590. for(;;) {
  591. os = isoverlap(p, va, len);
  592. if(os == nil)
  593. break;
  594. va = os->base;
  595. if(len > va)
  596. error(Enovmem);
  597. va -= len;
  598. }
  599. }
  600. va = va&~(BY2PG-1);
  601. if(isoverlap(p, va, len) != nil)
  602. error(Esoverlap);
  603. for(ps = physseg; ps->name; ps++)
  604. if(strcmp(name, ps->name) == 0)
  605. goto found;
  606. error(Ebadarg);
  607. found:
  608. if(len > ps->size)
  609. error(Enovmem);
  610. attr &= ~SG_TYPE; /* Turn off what is not allowed */
  611. attr |= ps->attr; /* Copy in defaults */
  612. s = newseg(attr, va, len/BY2PG);
  613. s->pseg = ps;
  614. p->seg[sno] = s;
  615. return va;
  616. }
  617. void
  618. pteflush(Pte *pte, int s, int e)
  619. {
  620. int i;
  621. Page *p;
  622. for(i = s; i < e; i++) {
  623. p = pte->pages[i];
  624. if(pagedout(p) == 0)
  625. memset(p->cachectl, PG_TXTFLUSH, sizeof(p->cachectl));
  626. }
  627. }
  628. long
  629. syssegflush(ulong *arg)
  630. {
  631. Segment *s;
  632. ulong addr, l;
  633. Pte *pte;
  634. int chunk, ps, pe, len;
  635. addr = arg[0];
  636. len = arg[1];
  637. while(len > 0) {
  638. s = seg(up, addr, 1);
  639. if(s == 0)
  640. error(Ebadarg);
  641. s->flushme = 1;
  642. more:
  643. l = len;
  644. if(addr+l > s->top)
  645. l = s->top - addr;
  646. ps = addr-s->base;
  647. pte = s->map[ps/PTEMAPMEM];
  648. ps &= PTEMAPMEM-1;
  649. pe = PTEMAPMEM;
  650. if(pe-ps > l){
  651. pe = ps + l;
  652. pe = (pe+BY2PG-1)&~(BY2PG-1);
  653. }
  654. if(pe == ps) {
  655. qunlock(&s->lk);
  656. error(Ebadarg);
  657. }
  658. if(pte)
  659. pteflush(pte, ps/BY2PG, pe/BY2PG);
  660. chunk = pe-ps;
  661. len -= chunk;
  662. addr += chunk;
  663. if(len > 0 && addr < s->top)
  664. goto more;
  665. qunlock(&s->lk);
  666. }
  667. flushmmu();
  668. return 0;
  669. }
  670. void
  671. segclock(ulong pc)
  672. {
  673. Segment *s;
  674. s = up->seg[TSEG];
  675. if(s == 0 || s->profile == 0)
  676. return;
  677. s->profile[0] += TK2MS(1);
  678. if(pc >= s->base && pc < s->top) {
  679. pc -= s->base;
  680. s->profile[pc>>LRESPROF] += TK2MS(1);
  681. }
  682. }