page.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include "u.h"
  10. #include "../port/lib.h"
  11. #include "mem.h"
  12. #include "dat.h"
  13. #include "fns.h"
  14. enum
  15. {
  16. Nstartpgs = 32,
  17. Nminfree = 3,
  18. Nfreepgs = 512,
  19. };
  20. typedef struct Pgnd Pgnd;
  21. enum
  22. {
  23. Punused = 0,
  24. Pused,
  25. Pfreed,
  26. };
  27. struct Pgnd
  28. {
  29. uintmem pa;
  30. int sts;
  31. };
  32. #define pghash(daddr) pga.hash[(daddr>>PGSHFT)&(PGHSIZE-1)]
  33. Pgalloc pga; /* new allocator */
  34. char*
  35. seprintpagestats(char *s, char *e)
  36. {
  37. int i;
  38. lock(&pga.l);
  39. for(i = 0; i < sys->npgsz; i++)
  40. if(sys->pgsz[i] != 0)
  41. s = seprint(s, e, "%lu/%d %dK user pages avail\n",
  42. pga.pgsza[i].freecount,
  43. pga.pgsza[i].npages.ref, sys->pgsz[i]/KiB);
  44. unlock(&pga.l);
  45. return s;
  46. }
  47. /*
  48. * Preallocate some pages:
  49. * some 2M ones will be used by the first process.
  50. * some 1G ones will be allocated for each domain so processes may use them.
  51. */
  52. void
  53. pageinit(void)
  54. {
  55. int si, i, color;
  56. Page *pg;
  57. pga.userinit = 1;
  58. DBG("pageinit: npgsz = %d\n", sys->npgsz);
  59. /*
  60. * Don't pre-allocate 4K pages, we are not using them anymore.
  61. */
  62. for(si = 1; si < sys->npgsz; si++){
  63. for(i = 0; i < Nstartpgs; i++){
  64. if(si < 2)
  65. color = -1;
  66. else
  67. color = i;
  68. pg = pgalloc(sys->pgsz[si], color);
  69. if(pg == nil){
  70. DBG("pageinit: pgalloc failed. breaking.\n");
  71. break; /* don't consume more memory */
  72. }
  73. DBG("pageinit: alloced pa %#P sz %#x color %d\n",
  74. pg->pa, sys->pgsz[si], pg->color);
  75. lock(&pga.l);
  76. pg->ref = 0;
  77. pagechainhead(pg);
  78. unlock(&pga.l);
  79. }
  80. }
  81. pga.userinit = 0;
  82. }
  83. int
  84. getpgszi(usize size)
  85. {
  86. int si;
  87. for(si = 0; si < sys->npgsz; si++)
  88. if(size == sys->pgsz[si])
  89. return si;
  90. print("getpgszi: size %#lx not found\n", size);
  91. return -1;
  92. }
  93. Page*
  94. pgalloc(usize size, int color)
  95. {
  96. Page *pg;
  97. int si;
  98. si = getpgszi(size);
  99. if((pg = malloc(sizeof(Page))) == nil){
  100. DBG("pgalloc: malloc failed\n");
  101. return nil;
  102. }
  103. memset(pg, 0, sizeof *pg);
  104. if((pg->pa = physalloc(size, &color, pg)) == 0){
  105. DBG("pgalloc: physalloc failed: size %#lx color %d\n", size, color);
  106. free(pg);
  107. return nil;
  108. }
  109. pg->pgszi = si; /* size index */
  110. incref(&pga.pgsza[si].npages);
  111. pg->color = color;
  112. return pg;
  113. }
  114. void
  115. pgfree(Page* pg)
  116. {
  117. decref(&pga.pgsza[pg->pgszi].npages);
  118. physfree(pg->pa, sys->pgsz[pg->pgszi]);
  119. free(pg);
  120. }
  121. void
  122. pageunchain(Page *p)
  123. {
  124. Pgsza *pa;
  125. if(canlock(&pga.l))
  126. panic("pageunchain");
  127. pa = &pga.pgsza[p->pgszi];
  128. if(p->prev)
  129. p->prev->next = p->next;
  130. else
  131. pa->head = p->next;
  132. if(p->next)
  133. p->next->prev = p->prev;
  134. else
  135. pa->tail = p->prev;
  136. p->prev = p->next = nil;
  137. pa->freecount--;
  138. }
  139. void
  140. pagechaintail(Page *p)
  141. {
  142. Pgsza *pa;
  143. if(canlock(&pga.l))
  144. panic("pagechaintail");
  145. pa = &pga.pgsza[p->pgszi];
  146. if(pa->tail) {
  147. p->prev = pa->tail;
  148. pa->tail->next = p;
  149. }
  150. else {
  151. pa->head = p;
  152. p->prev = 0;
  153. }
  154. pa->tail = p;
  155. p->next = 0;
  156. pa->freecount++;
  157. }
  158. void
  159. pagechainhead(Page *p)
  160. {
  161. Pgsza *pa;
  162. if(canlock(&pga.l))
  163. panic("pagechainhead");
  164. pa = &pga.pgsza[p->pgszi];
  165. if(pa->head) {
  166. p->next = pa->head;
  167. pa->head->prev = p;
  168. }
  169. else {
  170. pa->tail = p;
  171. p->next = 0;
  172. }
  173. pa->head = p;
  174. p->prev = 0;
  175. pa->freecount++;
  176. }
  177. static Page*
  178. findpg(Page *pl, int color)
  179. {
  180. Page *p;
  181. for(p = pl; p != nil; p = p->next)
  182. if(color == NOCOLOR || p->color == color)
  183. return p;
  184. return nil;
  185. }
  186. int trip;
  187. /*
  188. * can be called with up == nil during boot.
  189. */
  190. Page*
  191. newpage(int clear, Segment **s, uintptr_t va, usize size, int color)
  192. {
  193. Page *p;
  194. KMap *k;
  195. uint8_t ct;
  196. Pgsza *pa;
  197. int i, dontalloc, si;
  198. // static int once;
  199. si = getpgszi(size);
  200. //iprint("(remove this print and diea)newpage, size %x, si %d\n", size, si);
  201. pa = &pga.pgsza[si];
  202. lock(&pga.l);
  203. /*
  204. * Beware, new page may enter a loop even if this loop does not
  205. * loop more than once, if the segment is lost and fault calls us
  206. * again. Either way, we accept any color if we failed a couple of times.
  207. */
  208. for(i = 0;; i++){
  209. if(i > 3)
  210. color = NOCOLOR;
  211. /*
  212. * 1. try to reuse a free one.
  213. */
  214. p = findpg(pa->head, color);
  215. if(p != nil)
  216. break;
  217. /*
  218. * 2. try to allocate a new one from physical memory
  219. */
  220. p = pgalloc(size, color);
  221. if(p != nil){
  222. pagechainhead(p);
  223. break;
  224. }
  225. /*
  226. * 3. out of memory, try with the pager.
  227. * but release the segment (if any) while in the pager.
  228. */
  229. unlock(&pga.l);
  230. dontalloc = 0;
  231. if(s && *s) {
  232. qunlock(&((*s)->lk));
  233. *s = 0;
  234. dontalloc = 1;
  235. }
  236. /*
  237. * Try to get any page of the desired color
  238. * or any color for NOCOLOR.
  239. */
  240. kickpager(si, color);
  241. /*
  242. * If called from fault and we lost the segment from
  243. * underneath don't waste time allocating and freeing
  244. * a page. Fault will call newpage again when it has
  245. * reacquired the segment locks
  246. */
  247. if(dontalloc)
  248. return 0;
  249. lock(&pga.l);
  250. }
  251. assert(p != nil);
  252. ct = PG_NEWCOL;
  253. pageunchain(p);
  254. lock(&p->l);
  255. if(p->ref != 0)
  256. panic("newpage pa %#llx", p->pa);
  257. uncachepage(p);
  258. p->ref++;
  259. p->va = va;
  260. p->modref = 0;
  261. for(i = 0; i < nelem(p->cachectl); i++)
  262. p->cachectl[i] = ct;
  263. unlock(&p->l);
  264. unlock(&pga.l);
  265. if(clear) {
  266. k = kmap(p);
  267. if (VA(k) == 0xfffffe007d800000ULL) trip++;
  268. // if (trip) die("trip before memset");
  269. // This will frequently die if we use 3K-1 (3071 -- 0xbff)
  270. // it will not if we use 3070.
  271. // The fault is a null pointer deref.
  272. //memset((void*)VA(k), 0, machp()->pgsz[p->pgszi]);
  273. // thinking about it, using memset is stupid.
  274. // Don't get upset about this loop;
  275. // we make it readable, compilers optimize it.
  276. int i;
  277. uint64_t *v = (void *)VA(k);
  278. if (1)
  279. for(i = 0; i < sys->pgsz[p->pgszi]/sizeof(*v); i++)
  280. v[i] = 0;
  281. //if (trip) die("trip");
  282. kunmap(k);
  283. }
  284. DBG("newpage: va %#p pa %#llx pgsz %#x color %d\n",
  285. p->va, p->pa, sys->pgsz[p->pgszi], p->color);
  286. return p;
  287. }
  288. void
  289. putpage(Page *p)
  290. {
  291. Pgsza *pa;
  292. int rlse;
  293. lock(&pga.l);
  294. lock(&p->l);
  295. if(p->ref == 0)
  296. panic("putpage");
  297. if(--p->ref > 0) {
  298. unlock(&p->l);
  299. unlock(&pga.l);
  300. return;
  301. }
  302. rlse = 0;
  303. if(p->image != nil)
  304. pagechaintail(p);
  305. else{
  306. /*
  307. * Free pages if we have plenty in the free list.
  308. */
  309. pa = &pga.pgsza[p->pgszi];
  310. if(pa->freecount > Nfreepgs)
  311. rlse = 1;
  312. else
  313. pagechainhead(p);
  314. }
  315. if(pga.rend.l.p != nil)
  316. wakeup(&pga.rend);
  317. unlock(&p->l);
  318. if(rlse)
  319. pgfree(p);
  320. unlock(&pga.l);
  321. }
  322. /*
  323. * Get an auxiliary page.
  324. * Don't do so if less than Nminfree pages.
  325. * Only used by cache.
  326. * The interface must specify page size.
  327. */
  328. Page*
  329. auxpage(usize size)
  330. {
  331. Page *p;
  332. Pgsza *pa;
  333. int si;
  334. si = getpgszi(size);
  335. lock(&pga.l);
  336. pa = &pga.pgsza[si];
  337. p = pa->head;
  338. if(pa->freecount < Nminfree){
  339. unlock(&pga.l);
  340. return nil;
  341. }
  342. pageunchain(p);
  343. lock(&p->l);
  344. if(p->ref != 0)
  345. panic("auxpage");
  346. p->ref++;
  347. uncachepage(p);
  348. unlock(&p->l);
  349. unlock(&pga.l);
  350. return p;
  351. }
  352. static int dupretries = 15000;
  353. int
  354. duppage(Page *p) /* Always call with p locked */
  355. {
  356. Proc *up = externup();
  357. Pgsza *pa;
  358. Page *np;
  359. int color;
  360. int retries;
  361. retries = 0;
  362. retry:
  363. if(retries++ > dupretries){
  364. print("duppage %d, up %#p\n", retries, up);
  365. dupretries += 100;
  366. if(dupretries > 100000)
  367. panic("duppage\n");
  368. uncachepage(p);
  369. return 1;
  370. }
  371. /* don't dup pages with no image */
  372. if(p->ref == 0 || p->image == nil || p->image->notext)
  373. return 0;
  374. /*
  375. * normal lock ordering is to call
  376. * lock(&pga.l) before lock(&p->l).
  377. * To avoid deadlock, we have to drop
  378. * our locks and try again.
  379. */
  380. if(!canlock(&pga.l)){
  381. unlock(&p->l);
  382. if(up)
  383. sched();
  384. lock(&p->l);
  385. goto retry;
  386. }
  387. pa = &pga.pgsza[p->pgszi];
  388. /* No freelist cache when memory is very low */
  389. if(pa->freecount < Nminfree){
  390. unlock(&pga.l);
  391. uncachepage(p);
  392. return 1;
  393. }
  394. color = p->color;
  395. for(np = pa->head; np; np = np->next)
  396. if(np->color == color)
  397. break;
  398. /* No page of the correct color */
  399. if(np == 0){
  400. unlock(&pga.l);
  401. uncachepage(p);
  402. return 1;
  403. }
  404. pageunchain(np);
  405. pagechaintail(np);
  406. /*
  407. * XXX - here's a bug? - np is on the freelist but it's not really free.
  408. * when we unlock palloc someone else can come in, decide to
  409. * use np, and then try to lock it. they succeed after we've
  410. * run copypage and cachepage and unlock(np). then what?
  411. * they call pageunchain before locking(np), so it's removed
  412. * from the freelist, but still in the cache because of
  413. * cachepage below. if someone else looks in the cache
  414. * before they remove it, the page will have a nonzero ref
  415. * once they finally lock(np).
  416. *
  417. * What I know is that not doing the pagechaintail, but
  418. * doing it at the end, to prevent the race, leads to a
  419. * deadlock, even following the pga, pg lock ordering. -nemo
  420. */
  421. lock(&np->l);
  422. unlock(&pga.l);
  423. /* Cache the new version */
  424. uncachepage(np);
  425. np->va = p->va;
  426. np->daddr = p->daddr;
  427. copypage(p, np);
  428. cachepage(np, p->image);
  429. unlock(&np->l);
  430. uncachepage(p);
  431. return 0;
  432. }
  433. void
  434. copypage(Page *f, Page *t)
  435. {
  436. KMap *ks, *kd;
  437. if(f->pgszi != t->pgszi || t->pgszi < 0)
  438. panic("copypage");
  439. ks = kmap(f);
  440. kd = kmap(t);
  441. memmove((void*)VA(kd), (void*)VA(ks), sys->pgsz[t->pgszi]);
  442. kunmap(ks);
  443. kunmap(kd);
  444. }
  445. void
  446. uncachepage(Page *p) /* Always called with a locked page */
  447. {
  448. Page **l, *f;
  449. if(p->image == 0)
  450. return;
  451. lock(&pga.hashlock);
  452. l = &pghash(p->daddr);
  453. for(f = *l; f; f = f->hash){
  454. if(f == p){
  455. *l = p->hash;
  456. break;
  457. }
  458. l = &f->hash;
  459. }
  460. unlock(&pga.hashlock);
  461. putimage(p->image);
  462. p->image = 0;
  463. p->daddr = 0;
  464. }
  465. void
  466. cachepage(Page *p, Image *i)
  467. {
  468. Page **l;
  469. /* If this ever happens it should be fixed by calling
  470. * uncachepage instead of panic. I think there is a race
  471. * with pio in which this can happen. Calling uncachepage is
  472. * correct - I just wanted to see if we got here.
  473. */
  474. if(p->image)
  475. panic("cachepage");
  476. incref(&i->r);
  477. lock(&pga.hashlock);
  478. p->image = i;
  479. l = &pghash(p->daddr);
  480. p->hash = *l;
  481. *l = p;
  482. unlock(&pga.hashlock);
  483. }
  484. void
  485. cachedel(Image *i, uint32_t daddr)
  486. {
  487. Page *f, **l;
  488. lock(&pga.hashlock);
  489. l = &pghash(daddr);
  490. for(f = *l; f; f = f->hash){
  491. if(f->image == i && f->daddr == daddr){
  492. lock(&f->l);
  493. if(f->image == i && f->daddr == daddr){
  494. *l = f->hash;
  495. putimage(f->image);
  496. f->image = nil;
  497. f->daddr = 0;
  498. }
  499. unlock(&f->l);
  500. break;
  501. }
  502. l = &f->hash;
  503. }
  504. unlock(&pga.hashlock);
  505. }
  506. Page *
  507. lookpage(Image *i, uint32_t daddr)
  508. {
  509. Page *f;
  510. lock(&pga.hashlock);
  511. for(f = pghash(daddr); f; f = f->hash){
  512. if(f->image == i && f->daddr == daddr){
  513. unlock(&pga.hashlock);
  514. lock(&pga.l);
  515. lock(&f->l);
  516. if(f->image != i || f->daddr != daddr){
  517. unlock(&f->l);
  518. unlock(&pga.l);
  519. return 0;
  520. }
  521. if(++f->ref == 1)
  522. pageunchain(f);
  523. unlock(&pga.l);
  524. unlock(&f->l);
  525. return f;
  526. }
  527. }
  528. unlock(&pga.hashlock);
  529. return nil;
  530. }
  531. /*
  532. * Called from imagereclaim, to try to release Images.
  533. * The argument shows the preferred image to release pages from.
  534. * All images will be tried, from lru to mru.
  535. */
  536. uint64_t
  537. pagereclaim(Image *i)
  538. {
  539. Page *p;
  540. uint64_t ticks;
  541. lock(&pga.l);
  542. ticks = fastticks(nil);
  543. /*
  544. * All the pages with images backing them are at the
  545. * end of the list (see putpage) so start there and work
  546. * backward.
  547. */
  548. for(p = pga.pgsza[0].tail; p && p->image == i; p = p->prev){
  549. if(p->ref == 0 && canlock(&p->l)){
  550. if(p->ref == 0) {
  551. uncachepage(p);
  552. }
  553. unlock(&p->l);
  554. }
  555. }
  556. ticks = fastticks(nil) - ticks;
  557. unlock(&pga.l);
  558. return ticks;
  559. }
  560. Pte*
  561. ptecpy(Segment *s, Pte *old)
  562. {
  563. Pte *new;
  564. Page **src, **dst;
  565. new = ptealloc(s);
  566. dst = &new->pages[old->first-old->pages];
  567. new->first = dst;
  568. for(src = old->first; src <= old->last; src++, dst++)
  569. if(*src){
  570. if(onswap(*src))
  571. panic("ptecpy: no swap");
  572. else{
  573. lock(&(*src)->l);
  574. (*src)->ref++;
  575. unlock(&(*src)->l);
  576. }
  577. new->last = dst;
  578. *dst = *src;
  579. }
  580. return new;
  581. }
  582. Pte*
  583. ptealloc(Segment *s)
  584. {
  585. Pte *new;
  586. new = smalloc(sizeof(Pte) + sizeof(Page*)*s->ptepertab);
  587. new->first = &new->pages[s->ptepertab];
  588. new->last = new->pages;
  589. return new;
  590. }
  591. void
  592. freepte(Segment *s, Pte *p)
  593. {
  594. int ref;
  595. void (*fn)(Page*);
  596. Page *pt, **pg, **ptop;
  597. switch(s->type&SG_TYPE) {
  598. case SG_PHYSICAL:
  599. fn = s->pseg->pgfree;
  600. ptop = &p->pages[s->ptepertab];
  601. if(fn) {
  602. for(pg = p->pages; pg < ptop; pg++) {
  603. if(*pg == 0)
  604. continue;
  605. (*fn)(*pg);
  606. *pg = 0;
  607. }
  608. break;
  609. }
  610. for(pg = p->pages; pg < ptop; pg++) {
  611. pt = *pg;
  612. if(pt == 0)
  613. continue;
  614. lock(&pt->l);
  615. ref = --pt->ref;
  616. unlock(&pt->l);
  617. if(ref == 0)
  618. free(pt);
  619. }
  620. break;
  621. default:
  622. for(pg = p->first; pg <= p->last; pg++)
  623. if(*pg) {
  624. putpage(*pg);
  625. *pg = 0;
  626. }
  627. }
  628. free(p);
  629. }