physalloc.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. /*
  10. * Buddy allocator for physical memory allocation.
  11. * One per ACPI affinity domain, to color pages depending on their
  12. * NUMA location.
  13. *
  14. */
  15. #include "u.h"
  16. #include "../port/lib.h"
  17. #include "mem.h"
  18. #include "dat.h"
  19. #include "fns.h"
  20. #define ISPOWEROF2(x) (((x) != 0) && !((x) & ((x)-1)))
  21. #define UNO ((uintmem)1)
  22. #undef DBG
  23. #define DBG print
  24. enum {
  25. BKmin = 21, /* Minimum lg2 */
  26. BKmax = 30, /* Maximum lg2 */
  27. Ndoms = 16, /* Max # of domains */
  28. Used = 0,
  29. Avail = 1,
  30. };
  31. #define INDEX(b, v) ((uint)(((v))/(b)->bminsz))
  32. #define BLOCK(b, i) ((i)-INDEX((b),(b)->memory))
  33. typedef struct Buddy Buddy;
  34. struct Buddy {
  35. int16_t tag; /* Used or Avail */
  36. int16_t kval;
  37. uint next;
  38. uint prev;
  39. void *p;
  40. };
  41. /*
  42. * Bals should allocate using its base address as 0.
  43. * For now, all of them refer to the entire memory and we record
  44. * the base and size for each one.
  45. */
  46. typedef struct Bal Bal;
  47. struct Bal {
  48. uintmem base;
  49. uint64_t size;
  50. usize nfree;
  51. usize nblocks;
  52. int kmin; /* Minimum lg2 */
  53. int kmax; /* Maximum lg2 */
  54. uintmem bminsz; /* minimum block sz */
  55. uintmem memory;
  56. uint kspan;
  57. Buddy* blocks;
  58. Buddy* avail;
  59. };
  60. static Bal bal[Ndoms];
  61. static int ndoms;
  62. static Lock budlock;
  63. char*
  64. seprintphysstats(char *s, char *e)
  65. {
  66. Bal *b;
  67. int i;
  68. lock(&budlock);
  69. for(i = 0; i < Ndoms; i++){
  70. b = &bal[i];
  71. if(b->size > 0)
  72. s = seprint(s, e, "%lu/%lu %lluK color %d blocks avail\n",
  73. b->nfree, b->nblocks, b->bminsz/KiB, i);
  74. }
  75. unlock(&budlock);
  76. return s;
  77. }
  78. static void
  79. xphysfree(Bal *b, uintmem data, uint64_t size)
  80. {
  81. uint i;
  82. Buddy *l, *p;
  83. Buddy *blocks, *avail;
  84. DBG("physfree\n");
  85. /*
  86. * Knuth's Algorithm S (Buddy System Liberation).
  87. */
  88. blocks = b->blocks;
  89. avail = b->avail;
  90. if(data == 0 /*|| !ALIGNED(data, b->bminsz)*/)
  91. return;
  92. i = INDEX(b,data);
  93. lock(&budlock);
  94. S1:
  95. /*
  96. * Find buddy.
  97. */
  98. l = &blocks[BLOCK(b,i)];
  99. l->p = nil;
  100. DBG("\tbsl: BLOCK(b,i) %d index %llu kval %d\n",
  101. BLOCK(b,i), BLOCK(b,i)/((1<<l->kval)/b->bminsz), l->kval);
  102. if((BLOCK(b,i)/((1<<l->kval)/b->bminsz)) & 1) /* simpler test? */
  103. p = l - (1<<l->kval)/b->bminsz;
  104. else
  105. p = l + (1<<l->kval)/(b->bminsz);
  106. DBG("\tbsl: l @ %ld buddy @ %ld\n", l - blocks, p - blocks);
  107. /*
  108. * Is buddy available?
  109. * Can't merge if:
  110. * this is the largest block;
  111. * buddy isn't free;
  112. * buddy has been subsequently split again.
  113. */
  114. if(l->kval == b->kmax || p->tag == Used || (p->tag == Avail && p->kval != l->kval)){
  115. /*
  116. * Put on list.
  117. */
  118. l->tag = Avail;
  119. l->next = avail[l->kval].next;
  120. l->prev = 0;
  121. if(l->next != 0)
  122. blocks[BLOCK(b,l->next)].prev = i;
  123. avail[l->kval].next = i;
  124. b->nfree += size/b->bminsz;
  125. unlock(&budlock);
  126. DBG("bsl: free @ i %d BLOCK(b,i) %d kval %d next %d %s\n",
  127. i, BLOCK(b,i), l->kval, l->next, l->tag?"avail":"used");
  128. return;
  129. }
  130. /*
  131. * Combine with buddy.
  132. * This removes block P from the avail list.
  133. */
  134. if(p->prev != 0){
  135. blocks[BLOCK(b,p->prev)].next = p->next;
  136. p->prev = 0;
  137. }
  138. else
  139. avail[p->kval].next = 0;
  140. if(p->next != 0){
  141. blocks[BLOCK(b,p->next)].prev = p->prev;
  142. p->next = 0;
  143. }
  144. p->tag = Used;
  145. /*
  146. * Now can try to merge this larger block.
  147. k++;
  148. */
  149. DBG("\tbsl: l @ %ld p @ %ld\n", l - blocks, p - blocks);
  150. if(p < l)
  151. l = p;
  152. i = l - blocks + INDEX(b,b->memory);
  153. l->kval++;
  154. DBG("bsl: merge @ i %d BLOCK(b,i) %d kval %d next %d tag %s\n",
  155. i, BLOCK(b,i), l->kval, l->next, l->tag?"avail":"used");
  156. goto S1;
  157. }
  158. void
  159. physfree(uintmem data, uint64_t size)
  160. {
  161. Bal *b;
  162. int i;
  163. for(i = 0; i < Ndoms; i++){
  164. b = &bal[i];
  165. if(b->base <= data && data < b->base + b->size){
  166. xphysfree(b, data, size);
  167. return;
  168. }
  169. }
  170. panic("physfree: no bal");
  171. }
  172. static void*
  173. xphystag(Bal *b, uintmem data)
  174. {
  175. uint i;
  176. Buddy *blocks;
  177. DBG("phystag\n");
  178. blocks = b->blocks;
  179. if(data == 0 /*|| !ALIGNED(data, b->bminsz)*/)
  180. return nil;
  181. i = INDEX(b,data);
  182. return blocks[BLOCK(b,i)].p;
  183. }
  184. void*
  185. phystag(uintmem data)
  186. {
  187. Bal *b;
  188. int i;
  189. for(i = 0; i < Ndoms; i++){
  190. b = &bal[i];
  191. if(b->base <= data && data < b->base + b->size)
  192. return xphystag(b, data);
  193. }
  194. return nil;
  195. }
  196. static uint8_t lg2table[256] = {
  197. 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
  198. 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  199. 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
  200. 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
  201. 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
  202. 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
  203. 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
  204. 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
  205. 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
  206. 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
  207. 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
  208. 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
  209. 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
  210. 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
  211. 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
  212. 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
  213. };
  214. static int
  215. lg2floor(uint64_t w)
  216. {
  217. uint64_t hi, lo;
  218. if((lo = (w>>48)) != 0){
  219. if((hi = (lo>>8)) != 0)
  220. return 56+lg2table[hi];
  221. return 48+lg2table[lo];
  222. }
  223. if((lo = (w>>32)) != 0){
  224. if((hi = (lo>>8)) != 0)
  225. return 40+lg2table[hi];
  226. return 32+lg2table[lo];
  227. }
  228. if((lo = (w>>16)) != 0){
  229. if((hi = (lo>>8)) != 0)
  230. return 24+lg2table[hi];
  231. return 16+lg2table[lo];
  232. }
  233. if((hi = (w>>8)) != 0)
  234. return 8+lg2table[hi];
  235. return lg2table[w];
  236. }
  237. static uintmem
  238. xphysalloc(Bal *b, uint64_t size, void *tag)
  239. {
  240. uint i, j, k;
  241. Buddy *l, *p;
  242. Buddy *avail, *blocks;
  243. uintmem m;
  244. DBG("physalloc\n");
  245. assert(b->size > 0);
  246. avail = b->avail;
  247. blocks = b->blocks;
  248. /*
  249. * Knuth's Algorithm R (Buddy System Reservation).
  250. */
  251. if(size < b->bminsz)
  252. size = b->bminsz;
  253. /*
  254. * Find block.
  255. */
  256. if(!ISPOWEROF2(size))
  257. return 0;
  258. k = lg2floor(size);
  259. lock(&budlock);
  260. for(j = k; j <= b->kmax; j++){
  261. if(avail[j].next != 0)
  262. break;
  263. }
  264. DBG("bsr: size %#llud k %d j %d\n", size, k, j);
  265. if(j > b->kmax){
  266. unlock(&budlock);
  267. return 0;
  268. }
  269. /*
  270. * Remove from list.
  271. */
  272. i = avail[j].next;
  273. l = &blocks[BLOCK(b,i)];
  274. DBG("bsr: block @ i %d BLOCK(b,i) %d kval %d next %d %s\n",
  275. i, BLOCK(b,i), l->kval, l->next, l->tag?"avail":"used");
  276. avail[j].next = l->next;
  277. blocks[avail[j].next].prev = 0;
  278. l->prev = l->next = 0;
  279. l->tag = Used;
  280. l->kval = k;
  281. /*
  282. * Split required?
  283. */
  284. while(j > k){
  285. /*
  286. * Split.
  287. */
  288. j--;
  289. p = &blocks[BLOCK(b,i) + (UNO<<j)/(b->bminsz)];
  290. p->tag = Avail;
  291. p->kval = j;
  292. p->next = avail[j].next;
  293. p->prev = 0;
  294. if(p->next != 0)
  295. blocks[BLOCK(b,p->next)].prev = i + (UNO<<j)/(b->bminsz);
  296. avail[j].next = i + (UNO<<j)/(b->bminsz);
  297. DBG("bsr: split @ i %d BLOCK(b,i) %ld j %d next %d (%d) %s\n",
  298. i, p - blocks, j, p->next, BLOCK(b,p->next),
  299. p->tag?"avail":"used");
  300. }
  301. b->nfree -= size/b->bminsz;
  302. unlock(&budlock);
  303. m = b->memory + b->bminsz*BLOCK(b,i);
  304. assert(m >= b->base && m < b->base + b->size);
  305. blocks[BLOCK(b,i)].p = tag;
  306. return m;
  307. }
  308. uintmem
  309. physalloc(uint64_t size, int *colorp, void *tag)
  310. {
  311. int i, color;
  312. uintmem m;
  313. m = 0;
  314. color = *colorp;
  315. if(color >= 0){
  316. color %= ndoms;
  317. if(bal[color].kmin > 0){
  318. *colorp = color;
  319. m = xphysalloc(&bal[color], size, tag);
  320. }
  321. }
  322. if(m == 0)
  323. for(i = 0; i < ndoms; i++)
  324. if(bal[i].kmin > 0)
  325. if((m = xphysalloc(&bal[i], size, tag)) != 0){
  326. *colorp = i;
  327. return m;
  328. }
  329. print("physalloc: return %p\n", m);
  330. return m;
  331. }
  332. #if 0
  333. static void
  334. dump(Bal *b)
  335. {
  336. uint bi, i, k;
  337. Buddy *blocks;
  338. blocks = b->blocks;
  339. for(i = 0; i < (UNO<<(b->kmax-b->kmin+1)); i++){
  340. if(blocks[i].tag == Used)
  341. continue;
  342. print("blocks[%d]: size %d prev %d next %d\n",
  343. i, 1<<b->blocks[i].kval, blocks[i].prev, blocks[i].next);
  344. //i += (1<<blocks[i].kval)/b->bminsz-1;
  345. }
  346. for(k = 0; k <= b->kmax; k++){
  347. print("a[%d]:", k);
  348. for(bi = b->avail[k].next; bi != 0; bi = blocks[BLOCK(b,bi)].next){
  349. print(" %d", bi);
  350. }
  351. print("\n");
  352. }
  353. }
  354. #endif
  355. void
  356. physallocdump(void)
  357. {
  358. int n;
  359. for(n = 0; n < Ndoms; n++)
  360. if(bal[n].size > 0)
  361. print("physalloc color=%d base=%#llx size=%#llx\n",
  362. n, bal[n].base, bal[n].size);
  363. }
  364. static int
  365. plop(Bal *b, uintmem a, int k, int type)
  366. {
  367. uint i;
  368. Buddy *l;
  369. DBG("plop(a %#p k %d type %d)\n", a, k, type);
  370. i = INDEX(b,a);
  371. l = &b->blocks[BLOCK(b,i)];
  372. l->kval = k;
  373. xphysfree(b, a, 1<<k);
  374. return 1;
  375. }
  376. static int
  377. iimbchunk(Bal *b, uintmem a, uintmem e, int type)
  378. {
  379. int k;
  380. uint s;
  381. a = ROUNDUP(a, b->bminsz);
  382. e = ROUNDDN(e, b->bminsz);
  383. DBG("iimbchunk: start a %#P e %#P\n", a, e);
  384. b->nblocks += (e-a)/b->bminsz;
  385. for(k = b->kmin, s = b->bminsz; a+s < e && k < b->kmax; s <<= 1, k += 1){
  386. if(a & s){
  387. plop(b, a, k, type);
  388. a += s;
  389. }
  390. }
  391. DBG("done1 a %#P e %#P s %#x %d\n", a, e, s, k);
  392. while(a+s <= e){
  393. plop(b, a, k, type);
  394. a += s;
  395. }
  396. DBG("done2 a %#P e %#P s %#x %d\n", a, e, s, k);
  397. for(k -= 1, s >>= 1; a < e; s >>= 1, k -= 1){
  398. if(a+s <= e){
  399. plop(b, a, k, type);
  400. a += s;
  401. }
  402. }
  403. DBG("done3 a %#P e %#P s %#x %d\n", a, e, s, k);
  404. return 0;
  405. }
  406. /*
  407. * Called from umeminit to initialize user memory allocators.
  408. */
  409. void
  410. physinit(uintmem a, uint64_t size)
  411. {
  412. uintmem dtsz;
  413. Bal *b;
  414. int i, dom;
  415. uintmem addr, len;
  416. DBG("physinit %#llx %#llx\n", a, size);
  417. for(addr = a; addr < a+size; addr += len){
  418. dom = 0;
  419. len = 0; // acpimblocksize(addr, &dom);
  420. /* len can be zero if there's no acpi information about addr */
  421. if(len == 0 || addr + len > a + size)
  422. len = a + size - addr;
  423. /*
  424. * Each block belongs to a different domain (ie. cpu/mem socket)
  425. * We must create a buddy allocator for each block, so we could
  426. * allocate memory from different domains.
  427. *
  428. * This code assumes that a domain may be extended later and
  429. * that there is no interleaving of domains. Ok by now.
  430. */
  431. DBG("physmem block dom %d addr %#llx size %#llx\n",
  432. dom, addr, len);
  433. if(dom < 0 || dom >= Ndoms){
  434. print("physinit: invalid dom %d\n", dom);
  435. dom = 0;
  436. }
  437. b = &bal[dom];
  438. if(dom >= ndoms)
  439. ndoms = dom+1;
  440. if(b->kmin == 0){
  441. b->base = addr;
  442. b->size = len;
  443. b->kmin = BKmin;
  444. b->kmax = BKmax;
  445. b->bminsz = (UNO<<b->kmin);
  446. b->memory = sys->pmstart;
  447. b->kspan = lg2floor(sys->pmend);
  448. if(!ISPOWEROF2(sys->pmend))
  449. b->kspan++;
  450. dtsz = sizeof(Buddy)*(UNO<<(b->kspan-b->kmin+1));
  451. DBG("kspan %u (arrysz = %llu)\n", b->kspan, dtsz);
  452. b->blocks = malloc(dtsz);
  453. if(b->blocks == nil)
  454. panic("physinit: no blocks");
  455. memset(b->blocks, 0, dtsz);
  456. b->avail = malloc(sizeof(Buddy)*(b->kmax+1));
  457. if(b->avail == nil)
  458. panic("physinit: no avail");
  459. memset(b->avail, 0, sizeof(Buddy)*(b->kmax+1));
  460. }else{
  461. if(addr < b->base)
  462. panic("physinit: decreasing base");
  463. if(b->base+b->size < addr + len)
  464. b->size = (addr-b->base) + len;
  465. for(i = 0; i < Ndoms; i++)
  466. if(bal[i].kmin && &bal[i] != b)
  467. if(bal[i].base < b->base + b->size &&
  468. bal[i].base + bal[i].size > b->base + b->size)
  469. panic("physinit: doms overlap");
  470. }
  471. assert(addr >= b->base && addr+len <= b->base + b->size);
  472. iimbchunk(b, addr, addr+len, 0);
  473. }
  474. }