segment.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791
  1. #include "u.h"
  2. #include "../port/lib.h"
  3. #include "mem.h"
  4. #include "dat.h"
  5. #include "fns.h"
  6. #include "../port/error.h"
  7. static void imagereclaim(void);
  8. static void imagechanreclaim(void);
  9. #include "io.h"
  10. /*
  11. * Attachable segment types
  12. */
  13. static Physseg physseg[10] = {
  14. { SG_SHARED, "shared", 0, SEGMAXSIZE, 0, 0 },
  15. { SG_BSS, "memory", 0, SEGMAXSIZE, 0, 0 },
  16. { 0, 0, 0, 0, 0, 0 },
  17. };
  18. static Lock physseglock;
  19. #define NFREECHAN 64
  20. #define IHASHSIZE 64
  21. #define ihash(s) imagealloc.hash[s%IHASHSIZE]
  22. static struct Imagealloc
  23. {
  24. Lock;
  25. Image *free;
  26. Image *hash[IHASHSIZE];
  27. QLock ireclaim; /* mutex on reclaiming free images */
  28. Chan **freechan; /* free image channels */
  29. int nfreechan; /* number of free channels */
  30. int szfreechan; /* size of freechan array */
  31. QLock fcreclaim; /* mutex on reclaiming free channels */
  32. }imagealloc;
  33. Segment* (*_globalsegattach)(Proc*, char*);
  34. void
  35. initseg(void)
  36. {
  37. Image *i, *ie;
  38. imagealloc.free = xalloc(conf.nimage*sizeof(Image));
  39. ie = &imagealloc.free[conf.nimage-1];
  40. for(i = imagealloc.free; i < ie; i++)
  41. i->next = i+1;
  42. i->next = 0;
  43. imagealloc.freechan = malloc(NFREECHAN * sizeof(Chan*));
  44. imagealloc.szfreechan = NFREECHAN;
  45. }
  46. Segment *
  47. newseg(int type, ulong base, ulong size)
  48. {
  49. Segment *s;
  50. int mapsize;
  51. if(size > (SEGMAPSIZE*PTEPERTAB))
  52. error(Enovmem);
  53. if(swapfull())
  54. error(Enoswap);
  55. s = smalloc(sizeof(Segment));
  56. s->ref = 1;
  57. s->type = type;
  58. s->base = base;
  59. s->top = base+(size*BY2PG);
  60. s->size = size;
  61. mapsize = ROUND(size, PTEPERTAB)/PTEPERTAB;
  62. if(mapsize > nelem(s->ssegmap)){
  63. mapsize *= 2;
  64. if(mapsize > (SEGMAPSIZE*PTEPERTAB))
  65. mapsize = (SEGMAPSIZE*PTEPERTAB);
  66. s->map = smalloc(mapsize*sizeof(Pte*));
  67. s->mapsize = mapsize;
  68. }
  69. else{
  70. s->map = s->ssegmap;
  71. s->mapsize = nelem(s->ssegmap);
  72. }
  73. return s;
  74. }
  75. void
  76. putseg(Segment *s)
  77. {
  78. Pte **pp, **emap;
  79. Image *i;
  80. if(s == 0)
  81. return;
  82. i = s->image;
  83. if(i != 0) {
  84. lock(i);
  85. lock(s);
  86. if(i->s == s && s->ref == 1)
  87. i->s = 0;
  88. unlock(i);
  89. }
  90. else
  91. lock(s);
  92. s->ref--;
  93. if(s->ref != 0) {
  94. unlock(s);
  95. return;
  96. }
  97. unlock(s);
  98. qlock(&s->lk);
  99. if(i)
  100. putimage(i);
  101. emap = &s->map[s->mapsize];
  102. for(pp = s->map; pp < emap; pp++)
  103. if(*pp)
  104. freepte(s, *pp);
  105. qunlock(&s->lk);
  106. if(s->map != s->ssegmap)
  107. free(s->map);
  108. if(s->profile != 0)
  109. free(s->profile);
  110. free(s);
  111. }
  112. void
  113. relocateseg(Segment *s, ulong offset)
  114. {
  115. Page **pg, *x;
  116. Pte *pte, **p, **endpte;
  117. endpte = &s->map[s->mapsize];
  118. for(p = s->map; p < endpte; p++) {
  119. if(*p == 0)
  120. continue;
  121. pte = *p;
  122. for(pg = pte->first; pg <= pte->last; pg++) {
  123. if(x = *pg)
  124. x->va += offset;
  125. }
  126. }
  127. }
  128. Segment*
  129. dupseg(Segment **seg, int segno, int share)
  130. {
  131. int i, size;
  132. Pte *pte;
  133. Segment *n, *s;
  134. SET(n);
  135. s = seg[segno];
  136. qlock(&s->lk);
  137. if(waserror()){
  138. qunlock(&s->lk);
  139. nexterror();
  140. }
  141. switch(s->type&SG_TYPE) {
  142. case SG_TEXT: /* New segment shares pte set */
  143. case SG_SHARED:
  144. case SG_PHYSICAL:
  145. goto sameseg;
  146. case SG_STACK:
  147. n = newseg(s->type, s->base, s->size);
  148. break;
  149. case SG_BSS: /* Just copy on write */
  150. if(share)
  151. goto sameseg;
  152. n = newseg(s->type, s->base, s->size);
  153. break;
  154. case SG_DATA: /* Copy on write plus demand load info */
  155. if(segno == TSEG){
  156. poperror();
  157. qunlock(&s->lk);
  158. return data2txt(s);
  159. }
  160. if(share)
  161. goto sameseg;
  162. n = newseg(s->type, s->base, s->size);
  163. incref(s->image);
  164. n->image = s->image;
  165. n->fstart = s->fstart;
  166. n->flen = s->flen;
  167. break;
  168. }
  169. size = s->mapsize;
  170. for(i = 0; i < size; i++)
  171. if(pte = s->map[i])
  172. n->map[i] = ptecpy(pte);
  173. n->flushme = s->flushme;
  174. if(s->ref > 1)
  175. procflushseg(s);
  176. poperror();
  177. qunlock(&s->lk);
  178. return n;
  179. sameseg:
  180. incref(s);
  181. poperror();
  182. qunlock(&s->lk);
  183. return s;
  184. }
  185. void
  186. segpage(Segment *s, Page *p)
  187. {
  188. Pte **pte;
  189. ulong off;
  190. Page **pg;
  191. if(p->va < s->base || p->va >= s->top)
  192. panic("segpage");
  193. off = p->va - s->base;
  194. pte = &s->map[off/PTEMAPMEM];
  195. if(*pte == 0)
  196. *pte = ptealloc();
  197. pg = &(*pte)->pages[(off&(PTEMAPMEM-1))/BY2PG];
  198. *pg = p;
  199. if(pg < (*pte)->first)
  200. (*pte)->first = pg;
  201. if(pg > (*pte)->last)
  202. (*pte)->last = pg;
  203. }
  204. Image*
  205. attachimage(int type, Chan *c, ulong base, ulong len)
  206. {
  207. Image *i, **l;
  208. /* reclaim any free channels from reclaimed segments */
  209. if(imagealloc.nfreechan)
  210. imagechanreclaim();
  211. lock(&imagealloc);
  212. /*
  213. * Search the image cache for remains of the text from a previous
  214. * or currently running incarnation
  215. */
  216. for(i = ihash(c->qid.path); i; i = i->hash) {
  217. if(c->qid.path == i->qid.path) {
  218. lock(i);
  219. if(eqqid(c->qid, i->qid) &&
  220. eqqid(c->mqid, i->mqid) &&
  221. c->mchan == i->mchan &&
  222. c->type == i->type) {
  223. goto found;
  224. }
  225. unlock(i);
  226. }
  227. }
  228. /*
  229. * imagereclaim dumps pages from the free list which are cached by image
  230. * structures. This should free some image structures.
  231. */
  232. while(!(i = imagealloc.free)) {
  233. unlock(&imagealloc);
  234. imagereclaim();
  235. sched();
  236. lock(&imagealloc);
  237. }
  238. imagealloc.free = i->next;
  239. lock(i);
  240. incref(c);
  241. i->c = c;
  242. i->type = c->type;
  243. i->qid = c->qid;
  244. i->mqid = c->mqid;
  245. i->mchan = c->mchan;
  246. l = &ihash(c->qid.path);
  247. i->hash = *l;
  248. *l = i;
  249. found:
  250. unlock(&imagealloc);
  251. if(i->s == 0) {
  252. /* Disaster after commit in exec */
  253. if(waserror()) {
  254. unlock(i);
  255. pexit(Enovmem, 1);
  256. }
  257. i->s = newseg(type, base, len);
  258. i->s->image = i;
  259. i->ref++;
  260. poperror();
  261. }
  262. else
  263. incref(i->s);
  264. return i;
  265. }
  266. static struct {
  267. int calls; /* times imagereclaim was called */
  268. int loops; /* times the main loop was run */
  269. uvlong ticks; /* total time in the main loop */
  270. uvlong maxt; /* longest time in main loop */
  271. } irstats;
  272. static void
  273. imagereclaim(void)
  274. {
  275. int n;
  276. Page *p;
  277. uvlong ticks;
  278. irstats.calls++;
  279. /* Somebody is already cleaning the page cache */
  280. if(!canqlock(&imagealloc.ireclaim))
  281. return;
  282. lock(&palloc);
  283. ticks = fastticks(nil);
  284. n = 0;
  285. /*
  286. * All the pages with images backing them are at the
  287. * end of the list (see putpage) so start there and work
  288. * backward.
  289. */
  290. for(p = palloc.tail; p && p->image && n<1000; p = p->prev) {
  291. if(p->ref == 0 && canlock(p)) {
  292. if(p->ref == 0) {
  293. n++;
  294. uncachepage(p);
  295. }
  296. unlock(p);
  297. }
  298. }
  299. ticks = fastticks(nil) - ticks;
  300. unlock(&palloc);
  301. irstats.loops++;
  302. irstats.ticks += ticks;
  303. if(ticks > irstats.maxt)
  304. irstats.maxt = ticks;
  305. //print("T%llud+", ticks);
  306. qunlock(&imagealloc.ireclaim);
  307. }
  308. /*
  309. * since close can block, this has to be called outside of
  310. * spin locks.
  311. */
  312. static void
  313. imagechanreclaim(void)
  314. {
  315. Chan *c;
  316. /* Somebody is already cleaning the image chans */
  317. if(!canqlock(&imagealloc.fcreclaim))
  318. return;
  319. /*
  320. * We don't have to recheck that nfreechan > 0 after we
  321. * acquire the lock, because we're the only ones who decrement
  322. * it (the other lock contender increments it), and there's only
  323. * one of us thanks to the qlock above.
  324. */
  325. while(imagealloc.nfreechan > 0){
  326. lock(&imagealloc);
  327. imagealloc.nfreechan--;
  328. c = imagealloc.freechan[imagealloc.nfreechan];
  329. unlock(&imagealloc);
  330. cclose(c);
  331. }
  332. qunlock(&imagealloc.fcreclaim);
  333. }
  334. void
  335. putimage(Image *i)
  336. {
  337. Chan *c, **cp;
  338. Image *f, **l;
  339. if(i->notext)
  340. return;
  341. lock(i);
  342. if(--i->ref == 0) {
  343. l = &ihash(i->qid.path);
  344. mkqid(&i->qid, ~0, ~0, QTFILE);
  345. unlock(i);
  346. c = i->c;
  347. lock(&imagealloc);
  348. for(f = *l; f; f = f->hash) {
  349. if(f == i) {
  350. *l = i->hash;
  351. break;
  352. }
  353. l = &f->hash;
  354. }
  355. i->next = imagealloc.free;
  356. imagealloc.free = i;
  357. /* defer freeing channel till we're out of spin lock's */
  358. if(imagealloc.nfreechan == imagealloc.szfreechan){
  359. imagealloc.szfreechan += NFREECHAN;
  360. cp = malloc(imagealloc.szfreechan*sizeof(Chan*));
  361. if(cp == nil)
  362. panic("putimage");
  363. memmove(cp, imagealloc.freechan, imagealloc.nfreechan*sizeof(Chan*));
  364. free(imagealloc.freechan);
  365. imagealloc.freechan = cp;
  366. }
  367. imagealloc.freechan[imagealloc.nfreechan++] = c;
  368. unlock(&imagealloc);
  369. return;
  370. }
  371. unlock(i);
  372. }
  373. long
  374. ibrk(ulong addr, int seg)
  375. {
  376. Segment *s, *ns;
  377. ulong newtop, newsize;
  378. int i, mapsize;
  379. Pte **map;
  380. s = up->seg[seg];
  381. if(s == 0)
  382. error(Ebadarg);
  383. if(addr == 0)
  384. return s->base;
  385. qlock(&s->lk);
  386. /* We may start with the bss overlapping the data */
  387. if(addr < s->base) {
  388. if(seg != BSEG || up->seg[DSEG] == 0 || addr < up->seg[DSEG]->base) {
  389. qunlock(&s->lk);
  390. error(Enovmem);
  391. }
  392. addr = s->base;
  393. }
  394. newtop = PGROUND(addr);
  395. newsize = (newtop-s->base)/BY2PG;
  396. if(newtop < s->top) {
  397. mfreeseg(s, newtop, (s->top-newtop)/BY2PG);
  398. s->top = newtop;
  399. s->size = newsize;
  400. qunlock(&s->lk);
  401. flushmmu();
  402. return 0;
  403. }
  404. if(swapfull()){
  405. qunlock(&s->lk);
  406. error(Enoswap);
  407. }
  408. for(i = 0; i < NSEG; i++) {
  409. ns = up->seg[i];
  410. if(ns == 0 || ns == s)
  411. continue;
  412. if(newtop >= ns->base && newtop < ns->top) {
  413. qunlock(&s->lk);
  414. error(Esoverlap);
  415. }
  416. }
  417. if(newsize > (SEGMAPSIZE*PTEPERTAB)) {
  418. qunlock(&s->lk);
  419. error(Enovmem);
  420. }
  421. mapsize = ROUND(newsize, PTEPERTAB)/PTEPERTAB;
  422. if(mapsize > s->mapsize){
  423. map = smalloc(mapsize*sizeof(Pte*));
  424. memmove(map, s->map, s->mapsize*sizeof(Pte*));
  425. if(s->map != s->ssegmap)
  426. free(s->map);
  427. s->map = map;
  428. s->mapsize = mapsize;
  429. }
  430. s->top = newtop;
  431. s->size = newsize;
  432. qunlock(&s->lk);
  433. return 0;
  434. }
  435. /*
  436. * called with s->lk locked
  437. */
  438. void
  439. mfreeseg(Segment *s, ulong start, int pages)
  440. {
  441. int i, j, size;
  442. ulong soff;
  443. Page *pg;
  444. Page *list;
  445. soff = start-s->base;
  446. j = (soff&(PTEMAPMEM-1))/BY2PG;
  447. size = s->mapsize;
  448. list = nil;
  449. for(i = soff/PTEMAPMEM; i < size; i++) {
  450. if(pages <= 0)
  451. break;
  452. if(s->map[i] == 0) {
  453. pages -= PTEPERTAB-j;
  454. j = 0;
  455. continue;
  456. }
  457. while(j < PTEPERTAB) {
  458. pg = s->map[i]->pages[j];
  459. /*
  460. * We want to zero s->map[i]->page[j] and putpage(pg),
  461. * but we have to make sure other processors flush the
  462. * entry from their TLBs before the page is freed.
  463. * We construct a list of the pages to be freed, zero
  464. * the entries, then (below) call procflushseg, and call
  465. * putpage on the whole list.
  466. *
  467. * Swapped-out pages don't appear in TLBs, so it's okay
  468. * to putswap those pages before procflushseg.
  469. */
  470. if(pg){
  471. if(onswap(pg))
  472. putswap(pg);
  473. else{
  474. pg->next = list;
  475. list = pg;
  476. }
  477. s->map[i]->pages[j] = 0;
  478. }
  479. if(--pages == 0)
  480. goto out;
  481. j++;
  482. }
  483. j = 0;
  484. }
  485. out:
  486. /* flush this seg in all other processes */
  487. if(s->ref > 1)
  488. procflushseg(s);
  489. /* free the pages */
  490. for(pg = list; pg != nil; pg = list){
  491. list = list->next;
  492. putpage(pg);
  493. }
  494. }
  495. Segment*
  496. isoverlap(Proc *p, ulong va, int len)
  497. {
  498. int i;
  499. Segment *ns;
  500. ulong newtop;
  501. newtop = va+len;
  502. for(i = 0; i < NSEG; i++) {
  503. ns = p->seg[i];
  504. if(ns == 0)
  505. continue;
  506. if((newtop > ns->base && newtop <= ns->top) ||
  507. (va >= ns->base && va < ns->top))
  508. return ns;
  509. }
  510. return nil;
  511. }
  512. int
  513. addphysseg(Physseg* new)
  514. {
  515. Physseg *ps;
  516. /*
  517. * Check not already entered and there is room
  518. * for a new entry and the terminating null entry.
  519. */
  520. lock(&physseglock);
  521. for(ps = physseg; ps->name; ps++){
  522. if(strcmp(ps->name, new->name) == 0){
  523. unlock(&physseglock);
  524. return -1;
  525. }
  526. }
  527. if(ps-physseg >= nelem(physseg)-2){
  528. unlock(&physseglock);
  529. return -1;
  530. }
  531. *ps = *new;
  532. unlock(&physseglock);
  533. return 0;
  534. }
  535. int
  536. isphysseg(char *name)
  537. {
  538. Physseg *ps;
  539. int rv = 0;
  540. lock(&physseglock);
  541. for(ps = physseg; ps->name; ps++){
  542. if(strcmp(ps->name, name) == 0){
  543. rv = 1;
  544. break;
  545. }
  546. }
  547. unlock(&physseglock);
  548. return rv;
  549. }
  550. ulong
  551. segattach(Proc *p, ulong attr, char *name, ulong va, ulong len)
  552. {
  553. int sno;
  554. Segment *s, *os;
  555. Physseg *ps;
  556. if(va != 0 && va >= USTKTOP)
  557. error(Ebadarg);
  558. validaddr((ulong)name, 1, 0);
  559. vmemchr(name, 0, ~0);
  560. for(sno = 0; sno < NSEG; sno++)
  561. if(p->seg[sno] == nil && sno != ESEG)
  562. break;
  563. if(sno == NSEG)
  564. error(Enovmem);
  565. /*
  566. * first look for a global segment with the
  567. * same name
  568. */
  569. if(_globalsegattach != nil){
  570. s = (*_globalsegattach)(p, name);
  571. if(s != nil){
  572. p->seg[sno] = s;
  573. return s->base;
  574. }
  575. }
  576. len = PGROUND(len);
  577. if(len == 0)
  578. error(Ebadarg);
  579. /*
  580. * Find a hole in the address space.
  581. * Starting at the lowest possible stack address - len,
  582. * check for an overlapping segment, and repeat at the
  583. * base of that segment - len until either a hole is found
  584. * or the address space is exhausted.
  585. */
  586. if(va == 0) {
  587. va = p->seg[SSEG]->base - len;
  588. for(;;) {
  589. os = isoverlap(p, va, len);
  590. if(os == nil)
  591. break;
  592. va = os->base;
  593. if(len > va)
  594. error(Enovmem);
  595. va -= len;
  596. }
  597. }
  598. va = va&~(BY2PG-1);
  599. if(isoverlap(p, va, len) != nil)
  600. error(Esoverlap);
  601. for(ps = physseg; ps->name; ps++)
  602. if(strcmp(name, ps->name) == 0)
  603. goto found;
  604. error(Ebadarg);
  605. found:
  606. if(len > ps->size)
  607. error(Enovmem);
  608. attr &= ~SG_TYPE; /* Turn off what is not allowed */
  609. attr |= ps->attr; /* Copy in defaults */
  610. s = newseg(attr, va, len/BY2PG);
  611. s->pseg = ps;
  612. p->seg[sno] = s;
  613. return va;
  614. }
  615. void
  616. pteflush(Pte *pte, int s, int e)
  617. {
  618. int i;
  619. Page *p;
  620. for(i = s; i < e; i++) {
  621. p = pte->pages[i];
  622. if(pagedout(p) == 0)
  623. memset(p->cachectl, PG_TXTFLUSH, sizeof(p->cachectl));
  624. }
  625. }
  626. long
  627. syssegflush(ulong *arg)
  628. {
  629. Segment *s;
  630. ulong addr, l;
  631. Pte *pte;
  632. int chunk, ps, pe, len;
  633. addr = arg[0];
  634. len = arg[1];
  635. while(len > 0) {
  636. s = seg(up, addr, 1);
  637. if(s == 0)
  638. error(Ebadarg);
  639. s->flushme = 1;
  640. more:
  641. l = len;
  642. if(addr+l > s->top)
  643. l = s->top - addr;
  644. ps = addr-s->base;
  645. pte = s->map[ps/PTEMAPMEM];
  646. ps &= PTEMAPMEM-1;
  647. pe = PTEMAPMEM;
  648. if(pe-ps > l){
  649. pe = ps + l;
  650. pe = (pe+BY2PG-1)&~(BY2PG-1);
  651. }
  652. if(pe == ps) {
  653. qunlock(&s->lk);
  654. error(Ebadarg);
  655. }
  656. if(pte)
  657. pteflush(pte, ps/BY2PG, pe/BY2PG);
  658. chunk = pe-ps;
  659. len -= chunk;
  660. addr += chunk;
  661. if(len > 0 && addr < s->top)
  662. goto more;
  663. qunlock(&s->lk);
  664. }
  665. flushmmu();
  666. return 0;
  667. }
  668. void
  669. segclock(ulong pc)
  670. {
  671. Segment *s;
  672. s = up->seg[TSEG];
  673. if(s == 0 || s->profile == 0)
  674. return;
  675. s->profile[0] += TK2MS(1);
  676. if(pc >= s->base && pc < s->top) {
  677. pc -= s->base;
  678. s->profile[pc>>LRESPROF] += TK2MS(1);
  679. }
  680. }