page.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include "u.h"
  10. #include "../port/lib.h"
  11. #include "mem.h"
  12. #include "dat.h"
  13. #include "fns.h"
  14. enum
  15. {
  16. Nstartpgs = 32,
  17. Nminfree = 3,
  18. Nfreepgs = 512,
  19. };
  20. typedef struct Pgnd Pgnd;
  21. enum
  22. {
  23. Punused = 0,
  24. Pused,
  25. Pfreed,
  26. };
  27. struct Pgnd
  28. {
  29. uintmem pa;
  30. int sts;
  31. };
  32. #define pghash(daddr) pga.hash[(daddr>>PGSHFT)&(PGHSIZE-1)]
  33. Pgalloc pga; /* new allocator */
  34. char*
  35. seprintpagestats(char *s, char *e)
  36. {
  37. int i;
  38. lock(&pga);
  39. for(i = 0; i < m->npgsz; i++)
  40. if(m->pgsz[i] != 0)
  41. s = seprint(s, e, "%uld/%d %dK user pages avail\n",
  42. pga.pgsza[i].freecount,
  43. pga.pgsza[i].npages.ref, m->pgsz[i]/KiB);
  44. unlock(&pga);
  45. return s;
  46. }
  47. /*
  48. * Preallocate some pages:
  49. * some 2M ones will be used by the first process.
  50. * some 1G ones will be allocated for each domain so processes may use them.
  51. */
  52. void
  53. pageinit(void)
  54. {
  55. int si, i, color;
  56. Page *pg;
  57. pga.userinit = 1;
  58. DBG("pageinit: npgsz = %d\n", m->npgsz);
  59. /*
  60. * Don't pre-allocate 4K pages, we are not using them anymore.
  61. */
  62. for(si = 1; si < m->npgsz; si++){
  63. for(i = 0; i < Nstartpgs; i++){
  64. if(si < 2)
  65. color = -1;
  66. else
  67. color = i;
  68. pg = pgalloc(m->pgsz[si], color);
  69. if(pg == nil){
  70. DBG("pageinit: pgalloc failed. breaking.\n");
  71. break; /* don't consume more memory */
  72. }
  73. DBG("pageinit: alloced pa %#P sz %#ux color %d\n",
  74. pg->pa, m->pgsz[si], pg->color);
  75. lock(&pga);
  76. pg->ref = 0;
  77. pagechainhead(pg);
  78. unlock(&pga);
  79. }
  80. }
  81. pga.userinit = 0;
  82. }
  83. int
  84. getpgszi(usize size)
  85. {
  86. int si;
  87. for(si = 0; si < m->npgsz; si++)
  88. if(size == m->pgsz[si])
  89. return si;
  90. print("getpgszi: size %#ulx not found\n", size);
  91. return -1;
  92. }
  93. Page*
  94. pgalloc(usize size, int color)
  95. {
  96. Page *pg;
  97. int si;
  98. si = getpgszi(size);
  99. if((pg = malloc(sizeof(Page))) == nil){
  100. DBG("pgalloc: malloc failed\n");
  101. return nil;
  102. }
  103. memset(pg, 0, sizeof *pg);
  104. if((pg->pa = physalloc(size, &color, pg)) == 0){
  105. DBG("pgalloc: physalloc failed: size %#ulx color %d\n", size, color);
  106. free(pg);
  107. return nil;
  108. }
  109. pg->pgszi = si; /* size index */
  110. incref(&pga.pgsza[si].npages);
  111. pg->color = color;
  112. return pg;
  113. }
  114. void
  115. pgfree(Page* pg)
  116. {
  117. decref(&pga.pgsza[pg->pgszi].npages);
  118. physfree(pg->pa, m->pgsz[pg->pgszi]);
  119. free(pg);
  120. }
  121. void
  122. pageunchain(Page *p)
  123. {
  124. Pgsza *pa;
  125. if(canlock(&pga))
  126. panic("pageunchain");
  127. pa = &pga.pgsza[p->pgszi];
  128. if(p->prev)
  129. p->prev->next = p->next;
  130. else
  131. pa->head = p->next;
  132. if(p->next)
  133. p->next->prev = p->prev;
  134. else
  135. pa->tail = p->prev;
  136. p->prev = p->next = nil;
  137. pa->freecount--;
  138. }
  139. void
  140. pagechaintail(Page *p)
  141. {
  142. Pgsza *pa;
  143. if(canlock(&pga))
  144. panic("pagechaintail");
  145. pa = &pga.pgsza[p->pgszi];
  146. if(pa->tail) {
  147. p->prev = pa->tail;
  148. pa->tail->next = p;
  149. }
  150. else {
  151. pa->head = p;
  152. p->prev = 0;
  153. }
  154. pa->tail = p;
  155. p->next = 0;
  156. pa->freecount++;
  157. }
  158. void
  159. pagechainhead(Page *p)
  160. {
  161. Pgsza *pa;
  162. if(canlock(&pga))
  163. panic("pagechainhead");
  164. pa = &pga.pgsza[p->pgszi];
  165. if(pa->head) {
  166. p->next = pa->head;
  167. pa->head->prev = p;
  168. }
  169. else {
  170. pa->tail = p;
  171. p->next = 0;
  172. }
  173. pa->head = p;
  174. p->prev = 0;
  175. pa->freecount++;
  176. }
  177. static Page*
  178. findpg(Page *pl, int color)
  179. {
  180. Page *p;
  181. for(p = pl; p != nil; p = p->next)
  182. if(color == NOCOLOR || p->color == color)
  183. return p;
  184. return nil;
  185. }
  186. /*
  187. * can be called with up == nil during boot.
  188. */
  189. Page*
  190. newpage(int clear, Segment **s, uintptr va, usize size, int color)
  191. {
  192. Page *p;
  193. KMap *k;
  194. uint8_t ct;
  195. Pgsza *pa;
  196. int i, dontalloc, si;
  197. static int once;
  198. si = getpgszi(size);
  199. pa = &pga.pgsza[si];
  200. lock(&pga);
  201. /*
  202. * Beware, new page may enter a loop even if this loop does not
  203. * loop more than once, if the segment is lost and fault calls us
  204. * again. Either way, we accept any color if we failed a couple of times.
  205. */
  206. for(i = 0;; i++){
  207. if(i > 3)
  208. color = NOCOLOR;
  209. /*
  210. * 1. try to reuse a free one.
  211. */
  212. p = findpg(pa->head, color);
  213. if(p != nil)
  214. break;
  215. /*
  216. * 2. try to allocate a new one from physical memory
  217. */
  218. p = pgalloc(size, color);
  219. if(p != nil){
  220. pagechainhead(p);
  221. break;
  222. }
  223. /*
  224. * 3. out of memory, try with the pager.
  225. * but release the segment (if any) while in the pager.
  226. */
  227. unlock(&pga);
  228. dontalloc = 0;
  229. if(s && *s) {
  230. qunlock(&((*s)->lk));
  231. *s = 0;
  232. dontalloc = 1;
  233. }
  234. /*
  235. * Try to get any page of the desired color
  236. * or any color for NOCOLOR.
  237. */
  238. kickpager(si, color);
  239. /*
  240. * If called from fault and we lost the segment from
  241. * underneath don't waste time allocating and freeing
  242. * a page. Fault will call newpage again when it has
  243. * reacquired the segment locks
  244. */
  245. if(dontalloc)
  246. return 0;
  247. lock(&pga);
  248. }
  249. assert(p != nil);
  250. ct = PG_NEWCOL;
  251. pageunchain(p);
  252. lock(p);
  253. if(p->ref != 0)
  254. panic("newpage pa %#ullx", p->pa);
  255. uncachepage(p);
  256. p->ref++;
  257. p->va = va;
  258. p->modref = 0;
  259. for(i = 0; i < nelem(p->cachectl); i++)
  260. p->cachectl[i] = ct;
  261. unlock(p);
  262. unlock(&pga);
  263. if(clear) {
  264. k = kmap(p);
  265. memset((void*)VA(k), 0, m->pgsz[p->pgszi]);
  266. kunmap(k);
  267. }
  268. DBG("newpage: va %#p pa %#ullx pgsz %#ux color %d\n",
  269. p->va, p->pa, m->pgsz[p->pgszi], p->color);
  270. return p;
  271. }
  272. void
  273. putpage(Page *p)
  274. {
  275. Pgsza *pa;
  276. int rlse;
  277. lock(&pga);
  278. lock(p);
  279. if(p->ref == 0)
  280. panic("putpage");
  281. if(--p->ref > 0) {
  282. unlock(p);
  283. unlock(&pga);
  284. return;
  285. }
  286. rlse = 0;
  287. if(p->image != nil)
  288. pagechaintail(p);
  289. else{
  290. /*
  291. * Free pages if we have plenty in the free list.
  292. */
  293. pa = &pga.pgsza[p->pgszi];
  294. if(pa->freecount > Nfreepgs)
  295. rlse = 1;
  296. else
  297. pagechainhead(p);
  298. }
  299. if(pga.r.p != nil)
  300. wakeup(&pga.r);
  301. unlock(p);
  302. if(rlse)
  303. pgfree(p);
  304. unlock(&pga);
  305. }
  306. /*
  307. * Get an auxiliary page.
  308. * Don't do so if less than Nminfree pages.
  309. * Only used by cache.
  310. * The interface must specify page size.
  311. */
  312. Page*
  313. auxpage(usize size)
  314. {
  315. Page *p;
  316. Pgsza *pa;
  317. int si;
  318. si = getpgszi(size);
  319. lock(&pga);
  320. pa = &pga.pgsza[si];
  321. p = pa->head;
  322. if(pa->freecount < Nminfree){
  323. unlock(&pga);
  324. return nil;
  325. }
  326. pageunchain(p);
  327. lock(p);
  328. if(p->ref != 0)
  329. panic("auxpage");
  330. p->ref++;
  331. uncachepage(p);
  332. unlock(p);
  333. unlock(&pga);
  334. return p;
  335. }
  336. static int dupretries = 15000;
  337. int
  338. duppage(Page *p) /* Always call with p locked */
  339. {
  340. Pgsza *pa;
  341. Page *np;
  342. int color;
  343. int retries;
  344. retries = 0;
  345. retry:
  346. if(retries++ > dupretries){
  347. print("duppage %d, up %#p\n", retries, up);
  348. dupretries += 100;
  349. if(dupretries > 100000)
  350. panic("duppage\n");
  351. uncachepage(p);
  352. return 1;
  353. }
  354. /* don't dup pages with no image */
  355. if(p->ref == 0 || p->image == nil || p->image->notext)
  356. return 0;
  357. /*
  358. * normal lock ordering is to call
  359. * lock(&pga) before lock(p).
  360. * To avoid deadlock, we have to drop
  361. * our locks and try again.
  362. */
  363. if(!canlock(&pga)){
  364. unlock(p);
  365. if(up)
  366. sched();
  367. lock(p);
  368. goto retry;
  369. }
  370. pa = &pga.pgsza[p->pgszi];
  371. /* No freelist cache when memory is very low */
  372. if(pa->freecount < Nminfree){
  373. unlock(&pga);
  374. uncachepage(p);
  375. return 1;
  376. }
  377. color = p->color;
  378. for(np = pa->head; np; np = np->next)
  379. if(np->color == color)
  380. break;
  381. /* No page of the correct color */
  382. if(np == 0){
  383. unlock(&pga);
  384. uncachepage(p);
  385. return 1;
  386. }
  387. pageunchain(np);
  388. pagechaintail(np);
  389. /*
  390. * XXX - here's a bug? - np is on the freelist but it's not really free.
  391. * when we unlock palloc someone else can come in, decide to
  392. * use np, and then try to lock it. they succeed after we've
  393. * run copypage and cachepage and unlock(np). then what?
  394. * they call pageunchain before locking(np), so it's removed
  395. * from the freelist, but still in the cache because of
  396. * cachepage below. if someone else looks in the cache
  397. * before they remove it, the page will have a nonzero ref
  398. * once they finally lock(np).
  399. *
  400. * What I know is that not doing the pagechaintail, but
  401. * doing it at the end, to prevent the race, leads to a
  402. * deadlock, even following the pga, pg lock ordering. -nemo
  403. */
  404. lock(np);
  405. unlock(&pga);
  406. /* Cache the new version */
  407. uncachepage(np);
  408. np->va = p->va;
  409. np->daddr = p->daddr;
  410. copypage(p, np);
  411. cachepage(np, p->image);
  412. unlock(np);
  413. uncachepage(p);
  414. return 0;
  415. }
  416. void
  417. copypage(Page *f, Page *t)
  418. {
  419. KMap *ks, *kd;
  420. if(f->pgszi != t->pgszi || t->pgszi < 0)
  421. panic("copypage");
  422. ks = kmap(f);
  423. kd = kmap(t);
  424. memmove((void*)VA(kd), (void*)VA(ks), m->pgsz[t->pgszi]);
  425. kunmap(ks);
  426. kunmap(kd);
  427. }
  428. void
  429. uncachepage(Page *p) /* Always called with a locked page */
  430. {
  431. Page **l, *f;
  432. if(p->image == 0)
  433. return;
  434. lock(&pga.hashlock);
  435. l = &pghash(p->daddr);
  436. for(f = *l; f; f = f->hash){
  437. if(f == p){
  438. *l = p->hash;
  439. break;
  440. }
  441. l = &f->hash;
  442. }
  443. unlock(&pga.hashlock);
  444. putimage(p->image);
  445. p->image = 0;
  446. p->daddr = 0;
  447. }
  448. void
  449. cachepage(Page *p, Image *i)
  450. {
  451. Page **l;
  452. /* If this ever happens it should be fixed by calling
  453. * uncachepage instead of panic. I think there is a race
  454. * with pio in which this can happen. Calling uncachepage is
  455. * correct - I just wanted to see if we got here.
  456. */
  457. if(p->image)
  458. panic("cachepage");
  459. incref(i);
  460. lock(&pga.hashlock);
  461. p->image = i;
  462. l = &pghash(p->daddr);
  463. p->hash = *l;
  464. *l = p;
  465. unlock(&pga.hashlock);
  466. }
  467. void
  468. cachedel(Image *i, uint32_t daddr)
  469. {
  470. Page *f, **l;
  471. lock(&pga.hashlock);
  472. l = &pghash(daddr);
  473. for(f = *l; f; f = f->hash){
  474. if(f->image == i && f->daddr == daddr){
  475. lock(f);
  476. if(f->image == i && f->daddr == daddr){
  477. *l = f->hash;
  478. putimage(f->image);
  479. f->image = nil;
  480. f->daddr = 0;
  481. }
  482. unlock(f);
  483. break;
  484. }
  485. l = &f->hash;
  486. }
  487. unlock(&pga.hashlock);
  488. }
  489. Page *
  490. lookpage(Image *i, uint32_t daddr)
  491. {
  492. Page *f;
  493. lock(&pga.hashlock);
  494. for(f = pghash(daddr); f; f = f->hash){
  495. if(f->image == i && f->daddr == daddr){
  496. unlock(&pga.hashlock);
  497. lock(&pga);
  498. lock(f);
  499. if(f->image != i || f->daddr != daddr){
  500. unlock(f);
  501. unlock(&pga);
  502. return 0;
  503. }
  504. if(++f->ref == 1)
  505. pageunchain(f);
  506. unlock(&pga);
  507. unlock(f);
  508. return f;
  509. }
  510. }
  511. unlock(&pga.hashlock);
  512. return nil;
  513. }
  514. /*
  515. * Called from imagereclaim, to try to release Images.
  516. * The argument shows the preferred image to release pages from.
  517. * All images will be tried, from lru to mru.
  518. */
  519. uint64_t
  520. pagereclaim(Image *i)
  521. {
  522. Page *p;
  523. uint64_t ticks;
  524. lock(&pga);
  525. ticks = fastticks(nil);
  526. /*
  527. * All the pages with images backing them are at the
  528. * end of the list (see putpage) so start there and work
  529. * backward.
  530. */
  531. for(p = pga.pgsza[0].tail; p && p->image == i; p = p->prev){
  532. if(p->ref == 0 && canlock(p)){
  533. if(p->ref == 0) {
  534. uncachepage(p);
  535. }
  536. unlock(p);
  537. }
  538. }
  539. ticks = fastticks(nil) - ticks;
  540. unlock(&pga);
  541. return ticks;
  542. }
  543. Pte*
  544. ptecpy(Segment *s, Pte *old)
  545. {
  546. Pte *new;
  547. Page **src, **dst;
  548. new = ptealloc(s);
  549. dst = &new->pages[old->first-old->pages];
  550. new->first = dst;
  551. for(src = old->first; src <= old->last; src++, dst++)
  552. if(*src){
  553. if(onswap(*src))
  554. panic("ptecpy: no swap");
  555. else{
  556. lock(*src);
  557. (*src)->ref++;
  558. unlock(*src);
  559. }
  560. new->last = dst;
  561. *dst = *src;
  562. }
  563. return new;
  564. }
  565. Pte*
  566. ptealloc(Segment *s)
  567. {
  568. Pte *new;
  569. new = smalloc(sizeof(Pte) + sizeof(Page*)*s->ptepertab);
  570. new->first = &new->pages[s->ptepertab];
  571. new->last = new->pages;
  572. return new;
  573. }
  574. void
  575. freepte(Segment *s, Pte *p)
  576. {
  577. int ref;
  578. void (*fn)(Page*);
  579. Page *pt, **pg, **ptop;
  580. switch(s->type&SG_TYPE) {
  581. case SG_PHYSICAL:
  582. fn = s->pseg->pgfree;
  583. ptop = &p->pages[s->ptepertab];
  584. if(fn) {
  585. for(pg = p->pages; pg < ptop; pg++) {
  586. if(*pg == 0)
  587. continue;
  588. (*fn)(*pg);
  589. *pg = 0;
  590. }
  591. break;
  592. }
  593. for(pg = p->pages; pg < ptop; pg++) {
  594. pt = *pg;
  595. if(pt == 0)
  596. continue;
  597. lock(pt);
  598. ref = --pt->ref;
  599. unlock(pt);
  600. if(ref == 0)
  601. free(pt);
  602. }
  603. break;
  604. default:
  605. for(pg = p->first; pg <= p->last; pg++)
  606. if(*pg) {
  607. putpage(*pg);
  608. *pg = 0;
  609. }
  610. }
  611. free(p);
  612. }