segment.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796
  1. #include "u.h"
  2. #include "../port/lib.h"
  3. #include "mem.h"
  4. #include "dat.h"
  5. #include "fns.h"
  6. #include "../port/error.h"
  7. static void imagereclaim(void);
  8. static void imagechanreclaim(void);
  9. #include "io.h"
  10. /*
  11. * Attachable segment types
  12. */
  13. static Physseg physseg[10] = {
  14. { SG_SHARED, "shared", 0, SEGMAXSIZE, 0, 0 },
  15. { SG_BSS, "memory", 0, SEGMAXSIZE, 0, 0 },
  16. { 0, 0, 0, 0, 0, 0 },
  17. };
  18. static Lock physseglock;
  19. #define NFREECHAN 64
  20. #define IHASHSIZE 64
  21. #define ihash(s) imagealloc.hash[s%IHASHSIZE]
  22. static struct Imagealloc
  23. {
  24. Lock;
  25. Image *free;
  26. Image *hash[IHASHSIZE];
  27. QLock ireclaim; /* mutex on reclaiming free images */
  28. Chan **freechan; /* free image channels */
  29. int nfreechan; /* number of free channels */
  30. int szfreechan; /* size of freechan array */
  31. QLock fcreclaim; /* mutex on reclaiming free channels */
  32. }imagealloc;
  33. Segment* (*_globalsegattach)(Proc*, char*);
  34. void
  35. initseg(void)
  36. {
  37. Image *i, *ie;
  38. imagealloc.free = xalloc(conf.nimage*sizeof(Image));
  39. if (imagealloc.free == nil)
  40. panic("initseg: no memory");
  41. ie = &imagealloc.free[conf.nimage-1];
  42. for(i = imagealloc.free; i < ie; i++)
  43. i->next = i+1;
  44. i->next = 0;
  45. imagealloc.freechan = malloc(NFREECHAN * sizeof(Chan*));
  46. imagealloc.szfreechan = NFREECHAN;
  47. }
  48. Segment *
  49. newseg(int type, ulong base, ulong size)
  50. {
  51. Segment *s;
  52. int mapsize;
  53. if(size > (SEGMAPSIZE*PTEPERTAB))
  54. error(Enovmem);
  55. if(swapfull())
  56. error(Enoswap);
  57. s = smalloc(sizeof(Segment));
  58. s->ref = 1;
  59. s->type = type;
  60. s->base = base;
  61. s->top = base+(size*BY2PG);
  62. s->size = size;
  63. s->sema.prev = &s->sema;
  64. s->sema.next = &s->sema;
  65. mapsize = ROUND(size, PTEPERTAB)/PTEPERTAB;
  66. if(mapsize > nelem(s->ssegmap)){
  67. mapsize *= 2;
  68. if(mapsize > (SEGMAPSIZE*PTEPERTAB))
  69. mapsize = (SEGMAPSIZE*PTEPERTAB);
  70. s->map = smalloc(mapsize*sizeof(Pte*));
  71. s->mapsize = mapsize;
  72. }
  73. else{
  74. s->map = s->ssegmap;
  75. s->mapsize = nelem(s->ssegmap);
  76. }
  77. return s;
  78. }
  79. void
  80. putseg(Segment *s)
  81. {
  82. Pte **pp, **emap;
  83. Image *i;
  84. if(s == 0)
  85. return;
  86. i = s->image;
  87. if(i != 0) {
  88. lock(i);
  89. lock(s);
  90. if(i->s == s && s->ref == 1)
  91. i->s = 0;
  92. unlock(i);
  93. }
  94. else
  95. lock(s);
  96. s->ref--;
  97. if(s->ref != 0) {
  98. unlock(s);
  99. return;
  100. }
  101. unlock(s);
  102. qlock(&s->lk);
  103. if(i)
  104. putimage(i);
  105. emap = &s->map[s->mapsize];
  106. for(pp = s->map; pp < emap; pp++)
  107. if(*pp)
  108. freepte(s, *pp);
  109. qunlock(&s->lk);
  110. if(s->map != s->ssegmap)
  111. free(s->map);
  112. if(s->profile != 0)
  113. free(s->profile);
  114. free(s);
  115. }
  116. void
  117. relocateseg(Segment *s, ulong offset)
  118. {
  119. Page **pg, *x;
  120. Pte *pte, **p, **endpte;
  121. endpte = &s->map[s->mapsize];
  122. for(p = s->map; p < endpte; p++) {
  123. if(*p == 0)
  124. continue;
  125. pte = *p;
  126. for(pg = pte->first; pg <= pte->last; pg++) {
  127. if(x = *pg)
  128. x->va += offset;
  129. }
  130. }
  131. }
  132. Segment*
  133. dupseg(Segment **seg, int segno, int share)
  134. {
  135. int i, size;
  136. Pte *pte;
  137. Segment *n, *s;
  138. SET(n);
  139. s = seg[segno];
  140. qlock(&s->lk);
  141. if(waserror()){
  142. qunlock(&s->lk);
  143. nexterror();
  144. }
  145. switch(s->type&SG_TYPE) {
  146. case SG_TEXT: /* New segment shares pte set */
  147. case SG_SHARED:
  148. case SG_PHYSICAL:
  149. goto sameseg;
  150. case SG_STACK:
  151. n = newseg(s->type, s->base, s->size);
  152. break;
  153. case SG_BSS: /* Just copy on write */
  154. if(share)
  155. goto sameseg;
  156. n = newseg(s->type, s->base, s->size);
  157. break;
  158. case SG_DATA: /* Copy on write plus demand load info */
  159. if(segno == TSEG){
  160. poperror();
  161. qunlock(&s->lk);
  162. return data2txt(s);
  163. }
  164. if(share)
  165. goto sameseg;
  166. n = newseg(s->type, s->base, s->size);
  167. incref(s->image);
  168. n->image = s->image;
  169. n->fstart = s->fstart;
  170. n->flen = s->flen;
  171. break;
  172. }
  173. size = s->mapsize;
  174. for(i = 0; i < size; i++)
  175. if(pte = s->map[i])
  176. n->map[i] = ptecpy(pte);
  177. n->flushme = s->flushme;
  178. if(s->ref > 1)
  179. procflushseg(s);
  180. poperror();
  181. qunlock(&s->lk);
  182. return n;
  183. sameseg:
  184. incref(s);
  185. poperror();
  186. qunlock(&s->lk);
  187. return s;
  188. }
  189. void
  190. segpage(Segment *s, Page *p)
  191. {
  192. Pte **pte;
  193. ulong off;
  194. Page **pg;
  195. if(p->va < s->base || p->va >= s->top)
  196. panic("segpage");
  197. off = p->va - s->base;
  198. pte = &s->map[off/PTEMAPMEM];
  199. if(*pte == 0)
  200. *pte = ptealloc();
  201. pg = &(*pte)->pages[(off&(PTEMAPMEM-1))/BY2PG];
  202. *pg = p;
  203. if(pg < (*pte)->first)
  204. (*pte)->first = pg;
  205. if(pg > (*pte)->last)
  206. (*pte)->last = pg;
  207. }
  208. Image*
  209. attachimage(int type, Chan *c, ulong base, ulong len)
  210. {
  211. Image *i, **l;
  212. /* reclaim any free channels from reclaimed segments */
  213. if(imagealloc.nfreechan)
  214. imagechanreclaim();
  215. lock(&imagealloc);
  216. /*
  217. * Search the image cache for remains of the text from a previous
  218. * or currently running incarnation
  219. */
  220. for(i = ihash(c->qid.path); i; i = i->hash) {
  221. if(c->qid.path == i->qid.path) {
  222. lock(i);
  223. if(eqqid(c->qid, i->qid) &&
  224. eqqid(c->mqid, i->mqid) &&
  225. c->mchan == i->mchan &&
  226. c->type == i->type) {
  227. goto found;
  228. }
  229. unlock(i);
  230. }
  231. }
  232. /*
  233. * imagereclaim dumps pages from the free list which are cached by image
  234. * structures. This should free some image structures.
  235. */
  236. while(!(i = imagealloc.free)) {
  237. unlock(&imagealloc);
  238. imagereclaim();
  239. sched();
  240. lock(&imagealloc);
  241. }
  242. imagealloc.free = i->next;
  243. lock(i);
  244. incref(c);
  245. i->c = c;
  246. i->type = c->type;
  247. i->qid = c->qid;
  248. i->mqid = c->mqid;
  249. i->mchan = c->mchan;
  250. l = &ihash(c->qid.path);
  251. i->hash = *l;
  252. *l = i;
  253. found:
  254. unlock(&imagealloc);
  255. if(i->s == 0) {
  256. /* Disaster after commit in exec */
  257. if(waserror()) {
  258. unlock(i);
  259. pexit(Enovmem, 1);
  260. }
  261. i->s = newseg(type, base, len);
  262. i->s->image = i;
  263. i->ref++;
  264. poperror();
  265. }
  266. else
  267. incref(i->s);
  268. return i;
  269. }
  270. static struct {
  271. int calls; /* times imagereclaim was called */
  272. int loops; /* times the main loop was run */
  273. uvlong ticks; /* total time in the main loop */
  274. uvlong maxt; /* longest time in main loop */
  275. } irstats;
  276. static void
  277. imagereclaim(void)
  278. {
  279. int n;
  280. Page *p;
  281. uvlong ticks;
  282. irstats.calls++;
  283. /* Somebody is already cleaning the page cache */
  284. if(!canqlock(&imagealloc.ireclaim))
  285. return;
  286. lock(&palloc);
  287. ticks = fastticks(nil);
  288. n = 0;
  289. /*
  290. * All the pages with images backing them are at the
  291. * end of the list (see putpage) so start there and work
  292. * backward.
  293. */
  294. for(p = palloc.tail; p && p->image && n<1000; p = p->prev) {
  295. if(p->ref == 0 && canlock(p)) {
  296. if(p->ref == 0) {
  297. n++;
  298. uncachepage(p);
  299. }
  300. unlock(p);
  301. }
  302. }
  303. ticks = fastticks(nil) - ticks;
  304. unlock(&palloc);
  305. irstats.loops++;
  306. irstats.ticks += ticks;
  307. if(ticks > irstats.maxt)
  308. irstats.maxt = ticks;
  309. //print("T%llud+", ticks);
  310. qunlock(&imagealloc.ireclaim);
  311. }
  312. /*
  313. * since close can block, this has to be called outside of
  314. * spin locks.
  315. */
  316. static void
  317. imagechanreclaim(void)
  318. {
  319. Chan *c;
  320. /* Somebody is already cleaning the image chans */
  321. if(!canqlock(&imagealloc.fcreclaim))
  322. return;
  323. /*
  324. * We don't have to recheck that nfreechan > 0 after we
  325. * acquire the lock, because we're the only ones who decrement
  326. * it (the other lock contender increments it), and there's only
  327. * one of us thanks to the qlock above.
  328. */
  329. while(imagealloc.nfreechan > 0){
  330. lock(&imagealloc);
  331. imagealloc.nfreechan--;
  332. c = imagealloc.freechan[imagealloc.nfreechan];
  333. unlock(&imagealloc);
  334. cclose(c);
  335. }
  336. qunlock(&imagealloc.fcreclaim);
  337. }
  338. void
  339. putimage(Image *i)
  340. {
  341. Chan *c, **cp;
  342. Image *f, **l;
  343. if(i->notext)
  344. return;
  345. lock(i);
  346. if(--i->ref == 0) {
  347. l = &ihash(i->qid.path);
  348. mkqid(&i->qid, ~0, ~0, QTFILE);
  349. unlock(i);
  350. c = i->c;
  351. lock(&imagealloc);
  352. for(f = *l; f; f = f->hash) {
  353. if(f == i) {
  354. *l = i->hash;
  355. break;
  356. }
  357. l = &f->hash;
  358. }
  359. i->next = imagealloc.free;
  360. imagealloc.free = i;
  361. /* defer freeing channel till we're out of spin lock's */
  362. if(imagealloc.nfreechan == imagealloc.szfreechan){
  363. imagealloc.szfreechan += NFREECHAN;
  364. cp = malloc(imagealloc.szfreechan*sizeof(Chan*));
  365. if(cp == nil)
  366. panic("putimage");
  367. memmove(cp, imagealloc.freechan, imagealloc.nfreechan*sizeof(Chan*));
  368. free(imagealloc.freechan);
  369. imagealloc.freechan = cp;
  370. }
  371. imagealloc.freechan[imagealloc.nfreechan++] = c;
  372. unlock(&imagealloc);
  373. return;
  374. }
  375. unlock(i);
  376. }
  377. long
  378. ibrk(ulong addr, int seg)
  379. {
  380. Segment *s, *ns;
  381. ulong newtop, newsize;
  382. int i, mapsize;
  383. Pte **map;
  384. s = up->seg[seg];
  385. if(s == 0)
  386. error(Ebadarg);
  387. if(addr == 0)
  388. return s->base;
  389. qlock(&s->lk);
  390. /* We may start with the bss overlapping the data */
  391. if(addr < s->base) {
  392. if(seg != BSEG || up->seg[DSEG] == 0 || addr < up->seg[DSEG]->base) {
  393. qunlock(&s->lk);
  394. error(Enovmem);
  395. }
  396. addr = s->base;
  397. }
  398. newtop = PGROUND(addr);
  399. newsize = (newtop-s->base)/BY2PG;
  400. if(newtop < s->top) {
  401. mfreeseg(s, newtop, (s->top-newtop)/BY2PG);
  402. s->top = newtop;
  403. s->size = newsize;
  404. qunlock(&s->lk);
  405. flushmmu();
  406. return 0;
  407. }
  408. if(swapfull()){
  409. qunlock(&s->lk);
  410. error(Enoswap);
  411. }
  412. for(i = 0; i < NSEG; i++) {
  413. ns = up->seg[i];
  414. if(ns == 0 || ns == s)
  415. continue;
  416. if(newtop >= ns->base && newtop < ns->top) {
  417. qunlock(&s->lk);
  418. error(Esoverlap);
  419. }
  420. }
  421. if(newsize > (SEGMAPSIZE*PTEPERTAB)) {
  422. qunlock(&s->lk);
  423. error(Enovmem);
  424. }
  425. mapsize = ROUND(newsize, PTEPERTAB)/PTEPERTAB;
  426. if(mapsize > s->mapsize){
  427. map = smalloc(mapsize*sizeof(Pte*));
  428. memmove(map, s->map, s->mapsize*sizeof(Pte*));
  429. if(s->map != s->ssegmap)
  430. free(s->map);
  431. s->map = map;
  432. s->mapsize = mapsize;
  433. }
  434. s->top = newtop;
  435. s->size = newsize;
  436. qunlock(&s->lk);
  437. return 0;
  438. }
  439. /*
  440. * called with s->lk locked
  441. */
  442. void
  443. mfreeseg(Segment *s, ulong start, int pages)
  444. {
  445. int i, j, size;
  446. ulong soff;
  447. Page *pg;
  448. Page *list;
  449. soff = start-s->base;
  450. j = (soff&(PTEMAPMEM-1))/BY2PG;
  451. size = s->mapsize;
  452. list = nil;
  453. for(i = soff/PTEMAPMEM; i < size; i++) {
  454. if(pages <= 0)
  455. break;
  456. if(s->map[i] == 0) {
  457. pages -= PTEPERTAB-j;
  458. j = 0;
  459. continue;
  460. }
  461. while(j < PTEPERTAB) {
  462. pg = s->map[i]->pages[j];
  463. /*
  464. * We want to zero s->map[i]->page[j] and putpage(pg),
  465. * but we have to make sure other processors flush the
  466. * entry from their TLBs before the page is freed.
  467. * We construct a list of the pages to be freed, zero
  468. * the entries, then (below) call procflushseg, and call
  469. * putpage on the whole list.
  470. *
  471. * Swapped-out pages don't appear in TLBs, so it's okay
  472. * to putswap those pages before procflushseg.
  473. */
  474. if(pg){
  475. if(onswap(pg))
  476. putswap(pg);
  477. else{
  478. pg->next = list;
  479. list = pg;
  480. }
  481. s->map[i]->pages[j] = 0;
  482. }
  483. if(--pages == 0)
  484. goto out;
  485. j++;
  486. }
  487. j = 0;
  488. }
  489. out:
  490. /* flush this seg in all other processes */
  491. if(s->ref > 1)
  492. procflushseg(s);
  493. /* free the pages */
  494. for(pg = list; pg != nil; pg = list){
  495. list = list->next;
  496. putpage(pg);
  497. }
  498. }
  499. Segment*
  500. isoverlap(Proc *p, ulong va, int len)
  501. {
  502. int i;
  503. Segment *ns;
  504. ulong newtop;
  505. newtop = va+len;
  506. for(i = 0; i < NSEG; i++) {
  507. ns = p->seg[i];
  508. if(ns == 0)
  509. continue;
  510. if((newtop > ns->base && newtop <= ns->top) ||
  511. (va >= ns->base && va < ns->top))
  512. return ns;
  513. }
  514. return nil;
  515. }
  516. int
  517. addphysseg(Physseg* new)
  518. {
  519. Physseg *ps;
  520. /*
  521. * Check not already entered and there is room
  522. * for a new entry and the terminating null entry.
  523. */
  524. lock(&physseglock);
  525. for(ps = physseg; ps->name; ps++){
  526. if(strcmp(ps->name, new->name) == 0){
  527. unlock(&physseglock);
  528. return -1;
  529. }
  530. }
  531. if(ps-physseg >= nelem(physseg)-2){
  532. unlock(&physseglock);
  533. return -1;
  534. }
  535. *ps = *new;
  536. unlock(&physseglock);
  537. return 0;
  538. }
  539. int
  540. isphysseg(char *name)
  541. {
  542. Physseg *ps;
  543. int rv = 0;
  544. lock(&physseglock);
  545. for(ps = physseg; ps->name; ps++){
  546. if(strcmp(ps->name, name) == 0){
  547. rv = 1;
  548. break;
  549. }
  550. }
  551. unlock(&physseglock);
  552. return rv;
  553. }
  554. ulong
  555. segattach(Proc *p, ulong attr, char *name, ulong va, ulong len)
  556. {
  557. int sno;
  558. Segment *s, *os;
  559. Physseg *ps;
  560. if(va != 0 && va >= USTKTOP)
  561. error(Ebadarg);
  562. validaddr((ulong)name, 1, 0);
  563. vmemchr(name, 0, ~0);
  564. for(sno = 0; sno < NSEG; sno++)
  565. if(p->seg[sno] == nil && sno != ESEG)
  566. break;
  567. if(sno == NSEG)
  568. error(Enovmem);
  569. /*
  570. * first look for a global segment with the
  571. * same name
  572. */
  573. if(_globalsegattach != nil){
  574. s = (*_globalsegattach)(p, name);
  575. if(s != nil){
  576. p->seg[sno] = s;
  577. return s->base;
  578. }
  579. }
  580. len = PGROUND(len);
  581. if(len == 0)
  582. error(Ebadarg);
  583. /*
  584. * Find a hole in the address space.
  585. * Starting at the lowest possible stack address - len,
  586. * check for an overlapping segment, and repeat at the
  587. * base of that segment - len until either a hole is found
  588. * or the address space is exhausted.
  589. */
  590. if(va == 0) {
  591. va = p->seg[SSEG]->base - len;
  592. for(;;) {
  593. os = isoverlap(p, va, len);
  594. if(os == nil)
  595. break;
  596. va = os->base;
  597. if(len > va)
  598. error(Enovmem);
  599. va -= len;
  600. }
  601. }
  602. va = va&~(BY2PG-1);
  603. if(isoverlap(p, va, len) != nil)
  604. error(Esoverlap);
  605. for(ps = physseg; ps->name; ps++)
  606. if(strcmp(name, ps->name) == 0)
  607. goto found;
  608. error(Ebadarg);
  609. found:
  610. if(len > ps->size)
  611. error(Enovmem);
  612. attr &= ~SG_TYPE; /* Turn off what is not allowed */
  613. attr |= ps->attr; /* Copy in defaults */
  614. s = newseg(attr, va, len/BY2PG);
  615. s->pseg = ps;
  616. p->seg[sno] = s;
  617. return va;
  618. }
  619. void
  620. pteflush(Pte *pte, int s, int e)
  621. {
  622. int i;
  623. Page *p;
  624. for(i = s; i < e; i++) {
  625. p = pte->pages[i];
  626. if(pagedout(p) == 0)
  627. memset(p->cachectl, PG_TXTFLUSH, sizeof(p->cachectl));
  628. }
  629. }
  630. long
  631. syssegflush(ulong *arg)
  632. {
  633. Segment *s;
  634. ulong addr, l;
  635. Pte *pte;
  636. int chunk, ps, pe, len;
  637. addr = arg[0];
  638. len = arg[1];
  639. while(len > 0) {
  640. s = seg(up, addr, 1);
  641. if(s == 0)
  642. error(Ebadarg);
  643. s->flushme = 1;
  644. more:
  645. l = len;
  646. if(addr+l > s->top)
  647. l = s->top - addr;
  648. ps = addr-s->base;
  649. pte = s->map[ps/PTEMAPMEM];
  650. ps &= PTEMAPMEM-1;
  651. pe = PTEMAPMEM;
  652. if(pe-ps > l){
  653. pe = ps + l;
  654. pe = (pe+BY2PG-1)&~(BY2PG-1);
  655. }
  656. if(pe == ps) {
  657. qunlock(&s->lk);
  658. error(Ebadarg);
  659. }
  660. if(pte)
  661. pteflush(pte, ps/BY2PG, pe/BY2PG);
  662. chunk = pe-ps;
  663. len -= chunk;
  664. addr += chunk;
  665. if(len > 0 && addr < s->top)
  666. goto more;
  667. qunlock(&s->lk);
  668. }
  669. flushmmu();
  670. return 0;
  671. }
  672. void
  673. segclock(ulong pc)
  674. {
  675. Segment *s;
  676. s = up->seg[TSEG];
  677. if(s == 0 || s->profile == 0)
  678. return;
  679. s->profile[0] += TK2MS(1);
  680. if(pc >= s->base && pc < s->top) {
  681. pc -= s->base;
  682. s->profile[pc>>LRESPROF] += TK2MS(1);
  683. }
  684. }