page.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include "u.h"
  10. #include "../port/lib.h"
  11. #include "mem.h"
  12. #include "dat.h"
  13. #include "fns.h"
  14. enum
  15. {
  16. Nstartpgs = 32,
  17. Nminfree = 3,
  18. Nfreepgs = 512,
  19. };
  20. typedef struct Pgnd Pgnd;
  21. enum
  22. {
  23. Punused = 0,
  24. Pused,
  25. Pfreed,
  26. };
  27. struct Pgnd
  28. {
  29. uintmem pa;
  30. int sts;
  31. };
  32. #define pghash(daddr) pga.hash[(daddr>>PGSHFT)&(PGHSIZE-1)]
  33. Pgalloc pga; /* new allocator */
  34. char*
  35. seprintpagestats(char *s, char *e)
  36. {
  37. Mach *m = machp();
  38. int i;
  39. lock(&pga);
  40. for(i = 0; i < m->npgsz; i++)
  41. if(m->pgsz[i] != 0)
  42. s = seprint(s, e, "%uld/%d %dK user pages avail\n",
  43. pga.pgsza[i].freecount,
  44. pga.pgsza[i].npages.ref, m->pgsz[i]/KiB);
  45. unlock(&pga);
  46. return s;
  47. }
  48. /*
  49. * Preallocate some pages:
  50. * some 2M ones will be used by the first process.
  51. * some 1G ones will be allocated for each domain so processes may use them.
  52. */
  53. void
  54. pageinit(void)
  55. {
  56. Mach *m = machp();
  57. int si, i, color;
  58. Page *pg;
  59. pga.userinit = 1;
  60. DBG("pageinit: npgsz = %d\n", m->npgsz);
  61. /*
  62. * Don't pre-allocate 4K pages, we are not using them anymore.
  63. */
  64. for(si = 1; si < m->npgsz; si++){
  65. for(i = 0; i < Nstartpgs; i++){
  66. if(si < 2)
  67. color = -1;
  68. else
  69. color = i;
  70. pg = pgalloc(m->pgsz[si], color);
  71. if(pg == nil){
  72. DBG("pageinit: pgalloc failed. breaking.\n");
  73. break; /* don't consume more memory */
  74. }
  75. DBG("pageinit: alloced pa %#P sz %#ux color %d\n",
  76. pg->pa, m->pgsz[si], pg->color);
  77. lock(&pga);
  78. pg->ref = 0;
  79. pagechainhead(pg);
  80. unlock(&pga);
  81. }
  82. }
  83. pga.userinit = 0;
  84. }
  85. int
  86. getpgszi(usize size)
  87. {
  88. Mach *m = machp();
  89. int si;
  90. for(si = 0; si < m->npgsz; si++)
  91. if(size == m->pgsz[si])
  92. return si;
  93. print("getpgszi: size %#ulx not found\n", size);
  94. return -1;
  95. }
  96. Page*
  97. pgalloc(usize size, int color)
  98. {
  99. Page *pg;
  100. int si;
  101. si = getpgszi(size);
  102. if((pg = malloc(sizeof(Page))) == nil){
  103. DBG("pgalloc: malloc failed\n");
  104. return nil;
  105. }
  106. memset(pg, 0, sizeof *pg);
  107. if((pg->pa = physalloc(size, &color, pg)) == 0){
  108. DBG("pgalloc: physalloc failed: size %#ulx color %d\n", size, color);
  109. free(pg);
  110. return nil;
  111. }
  112. pg->pgszi = si; /* size index */
  113. incref(&pga.pgsza[si].npages);
  114. pg->color = color;
  115. return pg;
  116. }
  117. void
  118. pgfree(Page* pg)
  119. {
  120. Mach *m = machp();
  121. decref(&pga.pgsza[pg->pgszi].npages);
  122. physfree(pg->pa, m->pgsz[pg->pgszi]);
  123. free(pg);
  124. }
  125. void
  126. pageunchain(Page *p)
  127. {
  128. Pgsza *pa;
  129. if(canlock(&pga))
  130. panic("pageunchain");
  131. pa = &pga.pgsza[p->pgszi];
  132. if(p->prev)
  133. p->prev->next = p->next;
  134. else
  135. pa->head = p->next;
  136. if(p->next)
  137. p->next->prev = p->prev;
  138. else
  139. pa->tail = p->prev;
  140. p->prev = p->next = nil;
  141. pa->freecount--;
  142. }
  143. void
  144. pagechaintail(Page *p)
  145. {
  146. Pgsza *pa;
  147. if(canlock(&pga))
  148. panic("pagechaintail");
  149. pa = &pga.pgsza[p->pgszi];
  150. if(pa->tail) {
  151. p->prev = pa->tail;
  152. pa->tail->next = p;
  153. }
  154. else {
  155. pa->head = p;
  156. p->prev = 0;
  157. }
  158. pa->tail = p;
  159. p->next = 0;
  160. pa->freecount++;
  161. }
  162. void
  163. pagechainhead(Page *p)
  164. {
  165. Pgsza *pa;
  166. if(canlock(&pga))
  167. panic("pagechainhead");
  168. pa = &pga.pgsza[p->pgszi];
  169. if(pa->head) {
  170. p->next = pa->head;
  171. pa->head->prev = p;
  172. }
  173. else {
  174. pa->tail = p;
  175. p->next = 0;
  176. }
  177. pa->head = p;
  178. p->prev = 0;
  179. pa->freecount++;
  180. }
  181. static Page*
  182. findpg(Page *pl, int color)
  183. {
  184. Page *p;
  185. for(p = pl; p != nil; p = p->next)
  186. if(color == NOCOLOR || p->color == color)
  187. return p;
  188. return nil;
  189. }
  190. int trip;
  191. /*
  192. * can be called with up == nil during boot.
  193. */
  194. Page*
  195. newpage(int clear, Segment **s, uintptr_t va, usize size, int color)
  196. {
  197. Mach *m = machp();
  198. Page *p;
  199. KMap *k;
  200. uint8_t ct;
  201. Pgsza *pa;
  202. int i, dontalloc, si;
  203. // static int once;
  204. si = getpgszi(size);
  205. //iprint("(remove this print and diea)newpage, size %x, si %d\n", size, si);
  206. pa = &pga.pgsza[si];
  207. lock(&pga);
  208. /*
  209. * Beware, new page may enter a loop even if this loop does not
  210. * loop more than once, if the segment is lost and fault calls us
  211. * again. Either way, we accept any color if we failed a couple of times.
  212. */
  213. for(i = 0;; i++){
  214. if(i > 3)
  215. color = NOCOLOR;
  216. /*
  217. * 1. try to reuse a free one.
  218. */
  219. p = findpg(pa->head, color);
  220. if(p != nil)
  221. break;
  222. /*
  223. * 2. try to allocate a new one from physical memory
  224. */
  225. p = pgalloc(size, color);
  226. if(p != nil){
  227. pagechainhead(p);
  228. break;
  229. }
  230. /*
  231. * 3. out of memory, try with the pager.
  232. * but release the segment (if any) while in the pager.
  233. */
  234. unlock(&pga);
  235. dontalloc = 0;
  236. if(s && *s) {
  237. qunlock(&((*s)->lk));
  238. *s = 0;
  239. dontalloc = 1;
  240. }
  241. /*
  242. * Try to get any page of the desired color
  243. * or any color for NOCOLOR.
  244. */
  245. kickpager(si, color);
  246. /*
  247. * If called from fault and we lost the segment from
  248. * underneath don't waste time allocating and freeing
  249. * a page. Fault will call newpage again when it has
  250. * reacquired the segment locks
  251. */
  252. if(dontalloc)
  253. return 0;
  254. lock(&pga);
  255. }
  256. assert(p != nil);
  257. ct = PG_NEWCOL;
  258. pageunchain(p);
  259. lock(p);
  260. if(p->ref != 0)
  261. panic("newpage pa %#ullx", p->pa);
  262. uncachepage(p);
  263. p->ref++;
  264. p->va = va;
  265. p->modref = 0;
  266. for(i = 0; i < nelem(p->cachectl); i++)
  267. p->cachectl[i] = ct;
  268. unlock(p);
  269. unlock(&pga);
  270. if(clear) {
  271. k = kmap(p);
  272. if (VA(k) == 0xfffffe007d800000ULL) trip++;
  273. // if (trip) die("trip before memset");
  274. // This will frequently die if we use 3K-1 (3071 -- 0xbff)
  275. // it will not if we use 3070.
  276. // The fault is a null pointer deref.
  277. //memset((void*)VA(k), 0, m->pgsz[p->pgszi]);
  278. // thinking about it, using memset is stupid.
  279. // Don't get upset about this loop;
  280. // we make it readable, compilers optimize it.
  281. int i;
  282. uint64_t *v = (void *)VA(k);
  283. if (1)
  284. for(i = 0; i < m->pgsz[p->pgszi]/sizeof(*v); i++)
  285. v[i] = 0;
  286. //if (trip) die("trip");
  287. kunmap(k);
  288. }
  289. DBG("newpage: va %#p pa %#ullx pgsz %#ux color %d\n",
  290. p->va, p->pa, m->pgsz[p->pgszi], p->color);
  291. return p;
  292. }
  293. void
  294. putpage(Page *p)
  295. {
  296. Pgsza *pa;
  297. int rlse;
  298. lock(&pga);
  299. lock(p);
  300. if(p->ref == 0)
  301. panic("putpage");
  302. if(--p->ref > 0) {
  303. unlock(p);
  304. unlock(&pga);
  305. return;
  306. }
  307. rlse = 0;
  308. if(p->image != nil)
  309. pagechaintail(p);
  310. else{
  311. /*
  312. * Free pages if we have plenty in the free list.
  313. */
  314. pa = &pga.pgsza[p->pgszi];
  315. if(pa->freecount > Nfreepgs)
  316. rlse = 1;
  317. else
  318. pagechainhead(p);
  319. }
  320. if(pga.r.p != nil)
  321. wakeup(&pga.r);
  322. unlock(p);
  323. if(rlse)
  324. pgfree(p);
  325. unlock(&pga);
  326. }
  327. /*
  328. * Get an auxiliary page.
  329. * Don't do so if less than Nminfree pages.
  330. * Only used by cache.
  331. * The interface must specify page size.
  332. */
  333. Page*
  334. auxpage(usize size)
  335. {
  336. Page *p;
  337. Pgsza *pa;
  338. int si;
  339. si = getpgszi(size);
  340. lock(&pga);
  341. pa = &pga.pgsza[si];
  342. p = pa->head;
  343. if(pa->freecount < Nminfree){
  344. unlock(&pga);
  345. return nil;
  346. }
  347. pageunchain(p);
  348. lock(p);
  349. if(p->ref != 0)
  350. panic("auxpage");
  351. p->ref++;
  352. uncachepage(p);
  353. unlock(p);
  354. unlock(&pga);
  355. return p;
  356. }
  357. static int dupretries = 15000;
  358. int
  359. duppage(Page *p) /* Always call with p locked */
  360. {
  361. Mach *m = machp();
  362. Pgsza *pa;
  363. Page *np;
  364. int color;
  365. int retries;
  366. retries = 0;
  367. retry:
  368. if(retries++ > dupretries){
  369. print("duppage %d, up %#p\n", retries, m->externup);
  370. dupretries += 100;
  371. if(dupretries > 100000)
  372. panic("duppage\n");
  373. uncachepage(p);
  374. return 1;
  375. }
  376. /* don't dup pages with no image */
  377. if(p->ref == 0 || p->image == nil || p->image->notext)
  378. return 0;
  379. /*
  380. * normal lock ordering is to call
  381. * lock(&pga) before lock(p).
  382. * To avoid deadlock, we have to drop
  383. * our locks and try again.
  384. */
  385. if(!canlock(&pga)){
  386. unlock(p);
  387. if(m->externup)
  388. sched();
  389. lock(p);
  390. goto retry;
  391. }
  392. pa = &pga.pgsza[p->pgszi];
  393. /* No freelist cache when memory is very low */
  394. if(pa->freecount < Nminfree){
  395. unlock(&pga);
  396. uncachepage(p);
  397. return 1;
  398. }
  399. color = p->color;
  400. for(np = pa->head; np; np = np->next)
  401. if(np->color == color)
  402. break;
  403. /* No page of the correct color */
  404. if(np == 0){
  405. unlock(&pga);
  406. uncachepage(p);
  407. return 1;
  408. }
  409. pageunchain(np);
  410. pagechaintail(np);
  411. /*
  412. * XXX - here's a bug? - np is on the freelist but it's not really free.
  413. * when we unlock palloc someone else can come in, decide to
  414. * use np, and then try to lock it. they succeed after we've
  415. * run copypage and cachepage and unlock(np). then what?
  416. * they call pageunchain before locking(np), so it's removed
  417. * from the freelist, but still in the cache because of
  418. * cachepage below. if someone else looks in the cache
  419. * before they remove it, the page will have a nonzero ref
  420. * once they finally lock(np).
  421. *
  422. * What I know is that not doing the pagechaintail, but
  423. * doing it at the end, to prevent the race, leads to a
  424. * deadlock, even following the pga, pg lock ordering. -nemo
  425. */
  426. lock(np);
  427. unlock(&pga);
  428. /* Cache the new version */
  429. uncachepage(np);
  430. np->va = p->va;
  431. np->daddr = p->daddr;
  432. copypage(p, np);
  433. cachepage(np, p->image);
  434. unlock(np);
  435. uncachepage(p);
  436. return 0;
  437. }
  438. void
  439. copypage(Page *f, Page *t)
  440. {
  441. Mach *m = machp();
  442. KMap *ks, *kd;
  443. if(f->pgszi != t->pgszi || t->pgszi < 0)
  444. panic("copypage");
  445. ks = kmap(f);
  446. kd = kmap(t);
  447. memmove((void*)VA(kd), (void*)VA(ks), m->pgsz[t->pgszi]);
  448. kunmap(ks);
  449. kunmap(kd);
  450. }
  451. void
  452. uncachepage(Page *p) /* Always called with a locked page */
  453. {
  454. Page **l, *f;
  455. if(p->image == 0)
  456. return;
  457. lock(&pga.hashlock);
  458. l = &pghash(p->daddr);
  459. for(f = *l; f; f = f->hash){
  460. if(f == p){
  461. *l = p->hash;
  462. break;
  463. }
  464. l = &f->hash;
  465. }
  466. unlock(&pga.hashlock);
  467. putimage(p->image);
  468. p->image = 0;
  469. p->daddr = 0;
  470. }
  471. void
  472. cachepage(Page *p, Image *i)
  473. {
  474. Page **l;
  475. /* If this ever happens it should be fixed by calling
  476. * uncachepage instead of panic. I think there is a race
  477. * with pio in which this can happen. Calling uncachepage is
  478. * correct - I just wanted to see if we got here.
  479. */
  480. if(p->image)
  481. panic("cachepage");
  482. incref(i);
  483. lock(&pga.hashlock);
  484. p->image = i;
  485. l = &pghash(p->daddr);
  486. p->hash = *l;
  487. *l = p;
  488. unlock(&pga.hashlock);
  489. }
  490. void
  491. cachedel(Image *i, uint32_t daddr)
  492. {
  493. Page *f, **l;
  494. lock(&pga.hashlock);
  495. l = &pghash(daddr);
  496. for(f = *l; f; f = f->hash){
  497. if(f->image == i && f->daddr == daddr){
  498. lock(f);
  499. if(f->image == i && f->daddr == daddr){
  500. *l = f->hash;
  501. putimage(f->image);
  502. f->image = nil;
  503. f->daddr = 0;
  504. }
  505. unlock(f);
  506. break;
  507. }
  508. l = &f->hash;
  509. }
  510. unlock(&pga.hashlock);
  511. }
  512. Page *
  513. lookpage(Image *i, uint32_t daddr)
  514. {
  515. Page *f;
  516. lock(&pga.hashlock);
  517. for(f = pghash(daddr); f; f = f->hash){
  518. if(f->image == i && f->daddr == daddr){
  519. unlock(&pga.hashlock);
  520. lock(&pga);
  521. lock(f);
  522. if(f->image != i || f->daddr != daddr){
  523. unlock(f);
  524. unlock(&pga);
  525. return 0;
  526. }
  527. if(++f->ref == 1)
  528. pageunchain(f);
  529. unlock(&pga);
  530. unlock(f);
  531. return f;
  532. }
  533. }
  534. unlock(&pga.hashlock);
  535. return nil;
  536. }
  537. /*
  538. * Called from imagereclaim, to try to release Images.
  539. * The argument shows the preferred image to release pages from.
  540. * All images will be tried, from lru to mru.
  541. */
  542. uint64_t
  543. pagereclaim(Image *i)
  544. {
  545. Page *p;
  546. uint64_t ticks;
  547. lock(&pga);
  548. ticks = fastticks(nil);
  549. /*
  550. * All the pages with images backing them are at the
  551. * end of the list (see putpage) so start there and work
  552. * backward.
  553. */
  554. for(p = pga.pgsza[0].tail; p && p->image == i; p = p->prev){
  555. if(p->ref == 0 && canlock(p)){
  556. if(p->ref == 0) {
  557. uncachepage(p);
  558. }
  559. unlock(p);
  560. }
  561. }
  562. ticks = fastticks(nil) - ticks;
  563. unlock(&pga);
  564. return ticks;
  565. }
  566. Pte*
  567. ptecpy(Segment *s, Pte *old)
  568. {
  569. Pte *new;
  570. Page **src, **dst;
  571. new = ptealloc(s);
  572. dst = &new->pages[old->first-old->pages];
  573. new->first = dst;
  574. for(src = old->first; src <= old->last; src++, dst++)
  575. if(*src){
  576. if(onswap(*src))
  577. panic("ptecpy: no swap");
  578. else{
  579. lock(*src);
  580. (*src)->ref++;
  581. unlock(*src);
  582. }
  583. new->last = dst;
  584. *dst = *src;
  585. }
  586. return new;
  587. }
  588. Pte*
  589. ptealloc(Segment *s)
  590. {
  591. Pte *new;
  592. new = smalloc(sizeof(Pte) + sizeof(Page*)*s->ptepertab);
  593. new->first = &new->pages[s->ptepertab];
  594. new->last = new->pages;
  595. return new;
  596. }
  597. void
  598. freepte(Segment *s, Pte *p)
  599. {
  600. int ref;
  601. void (*fn)(Page*);
  602. Page *pt, **pg, **ptop;
  603. switch(s->type&SG_TYPE) {
  604. case SG_PHYSICAL:
  605. fn = s->pseg->pgfree;
  606. ptop = &p->pages[s->ptepertab];
  607. if(fn) {
  608. for(pg = p->pages; pg < ptop; pg++) {
  609. if(*pg == 0)
  610. continue;
  611. (*fn)(*pg);
  612. *pg = 0;
  613. }
  614. break;
  615. }
  616. for(pg = p->pages; pg < ptop; pg++) {
  617. pt = *pg;
  618. if(pt == 0)
  619. continue;
  620. lock(pt);
  621. ref = --pt->ref;
  622. unlock(pt);
  623. if(ref == 0)
  624. free(pt);
  625. }
  626. break;
  627. default:
  628. for(pg = p->first; pg <= p->last; pg++)
  629. if(*pg) {
  630. putpage(*pg);
  631. *pg = 0;
  632. }
  633. }
  634. free(p);
  635. }