physalloc.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. /*
  10. * Buddy allocator for physical memory allocation.
  11. * One per ACPI affinity domain, to color pages depending on their
  12. * NUMA location.
  13. *
  14. */
  15. #include "u.h"
  16. #include "../port/lib.h"
  17. #include "mem.h"
  18. #include "dat.h"
  19. #include "fns.h"
  20. #include "acpi.h"
  21. #define ISPOWEROF2(x) (((x) != 0) && !((x) & ((x)-1)))
  22. #define UNO ((uintmem)1)
  23. enum {
  24. BKmin = 21, /* Minimum lg2 */
  25. BKmax = 30, /* Maximum lg2 */
  26. Ndoms = 16, /* Max # of domains */
  27. Used = 0,
  28. Avail = 1,
  29. };
  30. #define INDEX(b, v) ((uint)(((v))/(b)->bminsz))
  31. #define BLOCK(b, i) ((i)-INDEX((b),(b)->memory))
  32. typedef struct Buddy Buddy;
  33. struct Buddy {
  34. int16_t tag; /* Used or Avail */
  35. int16_t kval;
  36. uint next;
  37. uint prev;
  38. void *p;
  39. };
  40. /*
  41. * Bals should allocate using its base address as 0.
  42. * For now, all of them refer to the entire memory and we record
  43. * the base and size for each one.
  44. */
  45. typedef struct Bal Bal;
  46. struct Bal {
  47. uintmem base;
  48. uint64_t size;
  49. usize nfree;
  50. usize nblocks;
  51. int kmin; /* Minimum lg2 */
  52. int kmax; /* Maximum lg2 */
  53. uintmem bminsz; /* minimum block sz */
  54. uintmem memory;
  55. uint kspan;
  56. Buddy* blocks;
  57. Buddy* avail;
  58. };
  59. static Bal bal[Ndoms];
  60. static int ndoms;
  61. static Lock budlock;
  62. char*
  63. seprintphysstats(char *s, char *e)
  64. {
  65. Bal *b;
  66. int i;
  67. lock(&budlock);
  68. for(i = 0; i < Ndoms; i++){
  69. b = &bal[i];
  70. if(b->size > 0)
  71. s = seprint(s, e, "%lu/%lu %lluK color %d blocks avail\n",
  72. b->nfree, b->nblocks, b->bminsz/KiB, i);
  73. }
  74. unlock(&budlock);
  75. return s;
  76. }
  77. static void
  78. xphysfree(Bal *b, uintmem data, uint64_t size)
  79. {
  80. uint i;
  81. Buddy *l, *p;
  82. Buddy *blocks, *avail;
  83. DBG("physfree\n");
  84. /*
  85. * Knuth's Algorithm S (Buddy System Liberation).
  86. */
  87. blocks = b->blocks;
  88. avail = b->avail;
  89. if(data == 0 /*|| !ALIGNED(data, b->bminsz)*/)
  90. return;
  91. i = INDEX(b,data);
  92. lock(&budlock);
  93. S1:
  94. /*
  95. * Find buddy.
  96. */
  97. l = &blocks[BLOCK(b,i)];
  98. l->p = nil;
  99. DBG("\tbsl: BLOCK(b,i) %d index %llu kval %d\n",
  100. BLOCK(b,i), BLOCK(b,i)/((1<<l->kval)/b->bminsz), l->kval);
  101. if((BLOCK(b,i)/((1<<l->kval)/b->bminsz)) & 1) /* simpler test? */
  102. p = l - (1<<l->kval)/b->bminsz;
  103. else
  104. p = l + (1<<l->kval)/(b->bminsz);
  105. DBG("\tbsl: l @ %ld buddy @ %ld\n", l - blocks, p - blocks);
  106. /*
  107. * Is buddy available?
  108. * Can't merge if:
  109. * this is the largest block;
  110. * buddy isn't free;
  111. * buddy has been subsequently split again.
  112. */
  113. if(l->kval == b->kmax || p->tag == Used || (p->tag == Avail && p->kval != l->kval)){
  114. /*
  115. * Put on list.
  116. */
  117. l->tag = Avail;
  118. l->next = avail[l->kval].next;
  119. l->prev = 0;
  120. if(l->next != 0)
  121. blocks[BLOCK(b,l->next)].prev = i;
  122. avail[l->kval].next = i;
  123. b->nfree += size/b->bminsz;
  124. unlock(&budlock);
  125. DBG("bsl: free @ i %d BLOCK(b,i) %d kval %d next %d %s\n",
  126. i, BLOCK(b,i), l->kval, l->next, l->tag?"avail":"used");
  127. return;
  128. }
  129. /*
  130. * Combine with buddy.
  131. * This removes block P from the avail list.
  132. */
  133. if(p->prev != 0){
  134. blocks[BLOCK(b,p->prev)].next = p->next;
  135. p->prev = 0;
  136. }
  137. else
  138. avail[p->kval].next = 0;
  139. if(p->next != 0){
  140. blocks[BLOCK(b,p->next)].prev = p->prev;
  141. p->next = 0;
  142. }
  143. p->tag = Used;
  144. /*
  145. * Now can try to merge this larger block.
  146. k++;
  147. */
  148. DBG("\tbsl: l @ %ld p @ %ld\n", l - blocks, p - blocks);
  149. if(p < l)
  150. l = p;
  151. i = l - blocks + INDEX(b,b->memory);
  152. l->kval++;
  153. DBG("bsl: merge @ i %d BLOCK(b,i) %d kval %d next %d tag %s\n",
  154. i, BLOCK(b,i), l->kval, l->next, l->tag?"avail":"used");
  155. goto S1;
  156. }
  157. void
  158. physfree(uintmem data, uint64_t size)
  159. {
  160. Bal *b;
  161. int i;
  162. for(i = 0; i < Ndoms; i++){
  163. b = &bal[i];
  164. if(b->base <= data && data < b->base + b->size){
  165. xphysfree(b, data, size);
  166. return;
  167. }
  168. }
  169. panic("physfree: no bal");
  170. }
  171. static void*
  172. xphystag(Bal *b, uintmem data)
  173. {
  174. uint i;
  175. Buddy *blocks;
  176. DBG("phystag\n");
  177. blocks = b->blocks;
  178. if(data == 0 /*|| !ALIGNED(data, b->bminsz)*/)
  179. return nil;
  180. i = INDEX(b,data);
  181. return blocks[BLOCK(b,i)].p;
  182. }
  183. void*
  184. phystag(uintmem data)
  185. {
  186. Bal *b;
  187. int i;
  188. for(i = 0; i < Ndoms; i++){
  189. b = &bal[i];
  190. if(b->base <= data && data < b->base + b->size)
  191. return xphystag(b, data);
  192. }
  193. return nil;
  194. }
  195. static uint8_t lg2table[256] = {
  196. 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
  197. 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  198. 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
  199. 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
  200. 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
  201. 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
  202. 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
  203. 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
  204. 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
  205. 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
  206. 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
  207. 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
  208. 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
  209. 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
  210. 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
  211. 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
  212. };
  213. static int
  214. lg2floor(uint64_t w)
  215. {
  216. uint64_t hi, lo;
  217. if((lo = (w>>48)) != 0){
  218. if((hi = (lo>>8)) != 0)
  219. return 56+lg2table[hi];
  220. return 48+lg2table[lo];
  221. }
  222. if((lo = (w>>32)) != 0){
  223. if((hi = (lo>>8)) != 0)
  224. return 40+lg2table[hi];
  225. return 32+lg2table[lo];
  226. }
  227. if((lo = (w>>16)) != 0){
  228. if((hi = (lo>>8)) != 0)
  229. return 24+lg2table[hi];
  230. return 16+lg2table[lo];
  231. }
  232. if((hi = (w>>8)) != 0)
  233. return 8+lg2table[hi];
  234. return lg2table[w];
  235. }
  236. static uintmem
  237. xphysalloc(Bal *b, uint64_t size, void *tag)
  238. {
  239. uint i, j, k;
  240. Buddy *l, *p;
  241. Buddy *avail, *blocks;
  242. uintmem m;
  243. DBG("physalloc\n");
  244. assert(b->size > 0);
  245. avail = b->avail;
  246. blocks = b->blocks;
  247. /*
  248. * Knuth's Algorithm R (Buddy System Reservation).
  249. */
  250. if(size < b->bminsz)
  251. size = b->bminsz;
  252. /*
  253. * Find block.
  254. */
  255. if(!ISPOWEROF2(size))
  256. return 0;
  257. k = lg2floor(size);
  258. lock(&budlock);
  259. for(j = k; j <= b->kmax; j++){
  260. if(avail[j].next != 0)
  261. break;
  262. }
  263. DBG("bsr: size %#llud k %d j %d\n", size, k, j);
  264. if(j > b->kmax){
  265. unlock(&budlock);
  266. return 0;
  267. }
  268. /*
  269. * Remove from list.
  270. */
  271. i = avail[j].next;
  272. l = &blocks[BLOCK(b,i)];
  273. DBG("bsr: block @ i %d BLOCK(b,i) %d kval %d next %d %s\n",
  274. i, BLOCK(b,i), l->kval, l->next, l->tag?"avail":"used");
  275. avail[j].next = l->next;
  276. blocks[avail[j].next].prev = 0;
  277. l->prev = l->next = 0;
  278. l->tag = Used;
  279. l->kval = k;
  280. /*
  281. * Split required?
  282. */
  283. while(j > k){
  284. /*
  285. * Split.
  286. */
  287. j--;
  288. p = &blocks[BLOCK(b,i) + (UNO<<j)/(b->bminsz)];
  289. p->tag = Avail;
  290. p->kval = j;
  291. p->next = avail[j].next;
  292. p->prev = 0;
  293. if(p->next != 0)
  294. blocks[BLOCK(b,p->next)].prev = i + (UNO<<j)/(b->bminsz);
  295. avail[j].next = i + (UNO<<j)/(b->bminsz);
  296. DBG("bsr: split @ i %d BLOCK(b,i) %ld j %d next %d (%d) %s\n",
  297. i, p - blocks, j, p->next, BLOCK(b,p->next),
  298. p->tag?"avail":"used");
  299. }
  300. b->nfree -= size/b->bminsz;
  301. unlock(&budlock);
  302. m = b->memory + b->bminsz*BLOCK(b,i);
  303. assert(m >= b->base && m < b->base + b->size);
  304. blocks[BLOCK(b,i)].p = tag;
  305. return m;
  306. }
  307. uintmem
  308. physalloc(uint64_t size, int *colorp, void *tag)
  309. {
  310. int i, color;
  311. uintmem m;
  312. m = 0;
  313. color = *colorp;
  314. if(color >= 0){
  315. color %= ndoms;
  316. if(bal[color].kmin > 0){
  317. *colorp = color;
  318. m = xphysalloc(&bal[color], size, tag);
  319. }
  320. }
  321. if(m == 0)
  322. for(i = 0; i < ndoms; i++)
  323. if(bal[i].kmin > 0)
  324. if((m = xphysalloc(&bal[i], size, tag)) != 0){
  325. *colorp = i;
  326. return m;
  327. }
  328. return m;
  329. }
  330. #if 0
  331. static void
  332. dump(Bal *b)
  333. {
  334. uint bi, i, k;
  335. Buddy *blocks;
  336. blocks = b->blocks;
  337. for(i = 0; i < (UNO<<(b->kmax-b->kmin+1)); i++){
  338. if(blocks[i].tag == Used)
  339. continue;
  340. print("blocks[%d]: size %d prev %d next %d\n",
  341. i, 1<<b->blocks[i].kval, blocks[i].prev, blocks[i].next);
  342. //i += (1<<blocks[i].kval)/b->bminsz-1;
  343. }
  344. for(k = 0; k <= b->kmax; k++){
  345. print("a[%d]:", k);
  346. for(bi = b->avail[k].next; bi != 0; bi = blocks[BLOCK(b,bi)].next){
  347. print(" %d", bi);
  348. }
  349. print("\n");
  350. }
  351. }
  352. #endif
  353. void
  354. physallocdump(void)
  355. {
  356. int n;
  357. for(n = 0; n < Ndoms; n++)
  358. if(bal[n].size > 0)
  359. print("physalloc color=%d base=%#ullx size=%#ullx\n",
  360. n, bal[n].base, bal[n].size);
  361. }
  362. static int
  363. plop(Bal *b, uintmem a, int k, int type)
  364. {
  365. uint i;
  366. Buddy *l;
  367. DBG("plop(a %#p k %d type %d)\n", a, k, type);
  368. i = INDEX(b,a);
  369. l = &b->blocks[BLOCK(b,i)];
  370. l->kval = k;
  371. xphysfree(b, a, 1<<k);
  372. return 1;
  373. }
  374. static int
  375. iimbchunk(Bal *b, uintmem a, uintmem e, int type)
  376. {
  377. int k;
  378. uint s;
  379. a = ROUNDUP(a, b->bminsz);
  380. e = ROUNDDN(e, b->bminsz);
  381. DBG("iimbchunk: start a %#P e %#P\n", a, e);
  382. b->nblocks += (e-a)/b->bminsz;
  383. for(k = b->kmin, s = b->bminsz; a+s < e && k < b->kmax; s <<= 1, k += 1){
  384. if(a & s){
  385. plop(b, a, k, type);
  386. a += s;
  387. }
  388. }
  389. DBG("done1 a %#P e %#P s %#ux %d\n", a, e, s, k);
  390. while(a+s <= e){
  391. plop(b, a, k, type);
  392. a += s;
  393. }
  394. DBG("done2 a %#P e %#P s %#ux %d\n", a, e, s, k);
  395. for(k -= 1, s >>= 1; a < e; s >>= 1, k -= 1){
  396. if(a+s <= e){
  397. plop(b, a, k, type);
  398. a += s;
  399. }
  400. }
  401. DBG("done3 a %#P e %#P s %#ux %d\n", a, e, s, k);
  402. return 0;
  403. }
  404. /*
  405. * Called from umeminit to initialize user memory allocators.
  406. */
  407. void
  408. physinit(uintmem a, uint64_t size)
  409. {
  410. uintmem dtsz;
  411. Bal *b;
  412. int i, dom;
  413. uintmem addr, len;
  414. DBG("physinit %#ullx %#ullx\n", a, size);
  415. for(addr = a; addr < a+size; addr += len){
  416. dom = 0;
  417. len = 0; // acpimblocksize(addr, &dom);
  418. /* len can be zero if there's no acpi information about addr */
  419. if(len == 0 || addr + len > a + size)
  420. len = a + size - addr;
  421. /*
  422. * Each block belongs to a different domain (ie. cpu/mem socket)
  423. * We must create a buddy allocator for each block, so we could
  424. * allocate memory from different domains.
  425. *
  426. * This code assumes that a domain may be extended later and
  427. * that there is no interleaving of domains. Ok by now.
  428. */
  429. DBG("physmem block dom %d addr %#ullx size %#ullx\n",
  430. dom, addr, len);
  431. if(dom < 0 || dom >= Ndoms){
  432. print("physinit: invalid dom %d\n", dom);
  433. dom = 0;
  434. }
  435. b = &bal[dom];
  436. if(dom >= ndoms)
  437. ndoms = dom+1;
  438. if(b->kmin == 0){
  439. b->base = addr;
  440. b->size = len;
  441. b->kmin = BKmin;
  442. b->kmax = BKmax;
  443. b->bminsz = (UNO<<b->kmin);
  444. b->memory = sys->pmstart;
  445. b->kspan = lg2floor(sys->pmend);
  446. if(!ISPOWEROF2(sys->pmend))
  447. b->kspan++;
  448. dtsz = sizeof(Buddy)*(UNO<<(b->kspan-b->kmin+1));
  449. DBG("kspan %u (arrysz = %llu)\n", b->kspan, dtsz);
  450. b->blocks = malloc(dtsz);
  451. if(b->blocks == nil)
  452. panic("physinit: no blocks");
  453. memset(b->blocks, 0, dtsz);
  454. b->avail = malloc(sizeof(Buddy)*(b->kmax+1));
  455. if(b->avail == nil)
  456. panic("physinit: no avail");
  457. memset(b->avail, 0, sizeof(Buddy)*(b->kmax+1));
  458. }else{
  459. if(addr < b->base)
  460. panic("physinit: decreasing base");
  461. if(b->base+b->size < addr + len)
  462. b->size = (addr-b->base) + len;
  463. for(i = 0; i < Ndoms; i++)
  464. if(bal[i].kmin && &bal[i] != b)
  465. if(bal[i].base < b->base + b->size &&
  466. bal[i].base + bal[i].size > b->base + b->size)
  467. panic("physinit: doms overlap");
  468. }
  469. assert(addr >= b->base && addr+len <= b->base + b->size);
  470. iimbchunk(b, addr, addr+len, 0);
  471. }
  472. }