reg.c 18 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117
  1. #include "gc.h"
  2. Reg*
  3. rega(void)
  4. {
  5. Reg *r;
  6. r = freer;
  7. if(r == R) {
  8. r = alloc(sizeof(*r));
  9. } else
  10. freer = r->link;
  11. *r = zreg;
  12. return r;
  13. }
  14. int
  15. rcmp(void *a1, void *a2)
  16. {
  17. Rgn *p1, *p2;
  18. int c1, c2;
  19. p1 = a1;
  20. p2 = a2;
  21. c1 = p2->cost;
  22. c2 = p1->cost;
  23. if(c1 -= c2)
  24. return c1;
  25. return p2->varno - p1->varno;
  26. }
  27. void
  28. regopt(Prog *p)
  29. {
  30. Reg *r, *r1, *r2;
  31. Prog *p1;
  32. int i, z;
  33. long initpc, val, npc;
  34. ulong vreg;
  35. Bits bit;
  36. struct
  37. {
  38. long m;
  39. long c;
  40. Reg* p;
  41. } log5[6], *lp;
  42. firstr = R;
  43. lastr = R;
  44. nvar = 0;
  45. regbits = RtoB(REGSP) |
  46. RtoB(REGSB) |
  47. RtoB(REGZERO) |
  48. RtoB(REGRET) |
  49. RtoB(REGTMP) |
  50. RtoB(REGLINK) |
  51. RtoB(REGBAD1) |
  52. RtoB(REGBAD2) |
  53. RtoB(REGBAD3) |
  54. RtoB(REGBAD4) |
  55. 0;
  56. for(z=0; z<BITS; z++) {
  57. externs.b[z] = 0;
  58. params.b[z] = 0;
  59. consts.b[z] = 0;
  60. addrs.b[z] = 0;
  61. }
  62. /*
  63. * pass 1
  64. * build aux data structure
  65. * allocate pcs
  66. * find use and set of variables
  67. */
  68. val = 5L * 5L * 5L * 5L * 5L;
  69. lp = log5;
  70. for(i=0; i<5; i++) {
  71. lp->m = val;
  72. lp->c = 0;
  73. lp->p = R;
  74. val /= 5L;
  75. lp++;
  76. }
  77. val = 0;
  78. for(; p != P; p = p->link) {
  79. switch(p->as) {
  80. case ADATA:
  81. case AGLOBL:
  82. case ANAME:
  83. continue;
  84. }
  85. r = rega();
  86. if(firstr == R) {
  87. firstr = r;
  88. lastr = r;
  89. } else {
  90. lastr->link = r;
  91. r->p1 = lastr;
  92. lastr->s1 = r;
  93. lastr = r;
  94. }
  95. r->prog = p;
  96. r->pc = val;
  97. val++;
  98. lp = log5;
  99. for(i=0; i<5; i++) {
  100. lp->c--;
  101. if(lp->c <= 0) {
  102. lp->c = lp->m;
  103. if(lp->p != R)
  104. lp->p->log5 = r;
  105. lp->p = r;
  106. (lp+1)->c = 0;
  107. break;
  108. }
  109. lp++;
  110. }
  111. r1 = r->p1;
  112. if(r1 != R)
  113. switch(r1->prog->as) {
  114. case ARTS:
  115. case AB:
  116. r->p1 = R;
  117. r1->s1 = R;
  118. }
  119. bit = mkvar(r, &p->from);
  120. if(bany(&bit))
  121. switch(p->as) {
  122. /*
  123. * left side read
  124. */
  125. default:
  126. print("default -- lhs read %P\n", p);
  127. case AMOV:
  128. case AMOVIB:
  129. case AMOVOB:
  130. case AMOVIS:
  131. case AMOVOS:
  132. case ATEXT:
  133. for(z=0; z<BITS; z++)
  134. r->use1.b[z] |= bit.b[z];
  135. break;
  136. /*
  137. * funny
  138. */
  139. case AMOVA:
  140. for(z=0; z<BITS; z++)
  141. addrs.b[z] |= bit.b[z];
  142. break;
  143. }
  144. bit = mkvar(r, &p->to);
  145. if(bany(&bit))
  146. switch(p->as) {
  147. default:
  148. diag(Z, "reg: unknown op: %P", p);
  149. break;
  150. /*
  151. * right side read
  152. */
  153. case ACMPI:
  154. case ACMPO:
  155. for(z=0; z<BITS; z++)
  156. r->use2.b[z] |= bit.b[z];
  157. break;
  158. /*
  159. * right side write
  160. */
  161. case AMOV:
  162. case AMOVIB:
  163. case AMOVOB:
  164. case AMOVIS:
  165. case AMOVOS:
  166. for(z=0; z<BITS; z++)
  167. r->set.b[z] |= bit.b[z];
  168. break;
  169. /*
  170. * right side read+write
  171. */
  172. case AADDI:
  173. case AADDO:
  174. case AAND:
  175. case ASUBI:
  176. case ASUBO:
  177. case AOR:
  178. case AXOR:
  179. for(z=0; z<BITS; z++) {
  180. r->set.b[z] |= bit.b[z];
  181. r->use2.b[z] |= bit.b[z];
  182. }
  183. break;
  184. /*
  185. * funny
  186. */
  187. case ABAL:
  188. for(z=0; z<BITS; z++)
  189. addrs.b[z] |= bit.b[z];
  190. break;
  191. }
  192. }
  193. if(firstr == R)
  194. return;
  195. initpc = pc - val;
  196. npc = val;
  197. /*
  198. * pass 2
  199. * turn branch references to pointers
  200. * build back pointers
  201. */
  202. for(r = firstr; r != R; r = r->link) {
  203. p = r->prog;
  204. if(p->to.type == D_BRANCH) {
  205. val = p->to.offset - initpc;
  206. r1 = firstr;
  207. while(r1 != R) {
  208. r2 = r1->log5;
  209. if(r2 != R && val >= r2->pc) {
  210. r1 = r2;
  211. continue;
  212. }
  213. if(r1->pc == val)
  214. break;
  215. r1 = r1->link;
  216. }
  217. if(r1 == R) {
  218. nearln = p->lineno;
  219. diag(Z, "ref not found\n%P", p);
  220. continue;
  221. }
  222. if(r1 == r) {
  223. nearln = p->lineno;
  224. diag(Z, "ref to self\n%P", p);
  225. continue;
  226. }
  227. r->s2 = r1;
  228. r->p2link = r1->p2;
  229. r1->p2 = r;
  230. }
  231. }
  232. if(debug['R']) {
  233. p = firstr->prog;
  234. print("\n%L %D\n", p->lineno, &p->from);
  235. }
  236. /*
  237. * pass 2.5
  238. * find looping structure
  239. */
  240. for(r = firstr; r != R; r = r->link)
  241. r->active = 0;
  242. change = 0;
  243. loopit(firstr, npc);
  244. if(debug['R'] && debug['v']) {
  245. print("\nlooping structure:\n");
  246. for(r = firstr; r != R; r = r->link) {
  247. print("%ld:%P", r->loop, r->prog);
  248. for(z=0; z<BITS; z++)
  249. bit.b[z] = r->use1.b[z] |
  250. r->use2.b[z] |
  251. r->set.b[z];
  252. if(bany(&bit)) {
  253. print("\t");
  254. if(bany(&r->use1))
  255. print(" u1=%B", r->use1);
  256. if(bany(&r->use2))
  257. print(" u2=%B", r->use2);
  258. if(bany(&r->set))
  259. print(" st=%B", r->set);
  260. }
  261. print("\n");
  262. }
  263. }
  264. /*
  265. * pass 3
  266. * iterate propagating usage
  267. * back until flow graph is complete
  268. */
  269. loop1:
  270. change = 0;
  271. for(r = firstr; r != R; r = r->link)
  272. r->active = 0;
  273. for(r = firstr; r != R; r = r->link)
  274. if(r->prog->as == ARTS)
  275. prop(r, zbits, zbits);
  276. loop11:
  277. /* pick up unreachable code */
  278. i = 0;
  279. for(r = firstr; r != R; r = r1) {
  280. r1 = r->link;
  281. if(r1 && r1->active && !r->active) {
  282. prop(r, zbits, zbits);
  283. i = 1;
  284. }
  285. }
  286. if(i)
  287. goto loop11;
  288. if(change)
  289. goto loop1;
  290. /*
  291. * pass 4
  292. * iterate propagating register/variable synchrony
  293. * forward until graph is complete
  294. */
  295. loop2:
  296. change = 0;
  297. for(r = firstr; r != R; r = r->link)
  298. r->active = 0;
  299. synch(firstr, zbits);
  300. if(change)
  301. goto loop2;
  302. /*
  303. * pass 5
  304. * isolate regions
  305. * calculate costs (paint1)
  306. */
  307. r = firstr;
  308. if(r) {
  309. for(z=0; z<BITS; z++)
  310. bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
  311. ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
  312. if(bany(&bit)) {
  313. nearln = r->prog->lineno;
  314. warn(Z, "used and not set: %B", bit);
  315. if(debug['R'] && !debug['w'])
  316. print("used and not set: %B\n", bit);
  317. }
  318. }
  319. if(debug['R'] && debug['v'])
  320. print("\nprop structure:\n");
  321. for(r = firstr; r != R; r = r->link)
  322. r->act = zbits;
  323. rgp = region;
  324. nregion = 0;
  325. for(r = firstr; r != R; r = r->link) {
  326. if(debug['R'] && debug['v']) {
  327. print("%P\t", r->prog);
  328. if(bany(&r->set))
  329. print("s:%B ", r->set);
  330. if(bany(&r->refahead))
  331. print("ra:%B ", r->refahead);
  332. if(bany(&r->calahead))
  333. print("ca:%B ", r->calahead);
  334. print("\n");
  335. }
  336. for(z=0; z<BITS; z++)
  337. bit.b[z] = r->set.b[z] &
  338. ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
  339. if(bany(&bit)) {
  340. nearln = r->prog->lineno;
  341. warn(Z, "set and not used: %B", bit);
  342. if(debug['R'])
  343. print("set an not used: %B\n", bit);
  344. excise(r);
  345. }
  346. for(z=0; z<BITS; z++)
  347. bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
  348. while(bany(&bit)) {
  349. i = bnum(bit);
  350. rgp->enter = r;
  351. rgp->varno = i;
  352. change = 0;
  353. if(debug['R'] && debug['v'])
  354. print("\n");
  355. paint1(r, i);
  356. bit.b[i/32] &= ~(1L<<(i%32));
  357. if(change <= 0) {
  358. if(debug['R'])
  359. print("%L$%d: %B\n",
  360. r->prog->lineno, change, blsh(i));
  361. continue;
  362. }
  363. rgp->cost = change;
  364. nregion++;
  365. if(nregion >= NRGN) {
  366. warn(Z, "too many regions");
  367. goto brk;
  368. }
  369. rgp++;
  370. }
  371. }
  372. brk:
  373. qsort(region, nregion, sizeof(region[0]), rcmp);
  374. /*
  375. * pass 6
  376. * determine used registers (paint2)
  377. * replace code (paint3)
  378. */
  379. rgp = region;
  380. for(i=0; i<nregion; i++) {
  381. bit = blsh(rgp->varno);
  382. vreg = paint2(rgp->enter, rgp->varno);
  383. vreg = allreg(vreg, rgp);
  384. if(debug['R']) {
  385. print("%L$%d %R: %B\n",
  386. rgp->enter->prog->lineno,
  387. rgp->cost,
  388. rgp->regno,
  389. bit);
  390. }
  391. if(rgp->regno != 0)
  392. paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
  393. rgp++;
  394. }
  395. /*
  396. * pass 7
  397. * peep-hole on basic block
  398. */
  399. if(!debug['R'] || debug['P'])
  400. peep();
  401. /*
  402. * pass 8
  403. * recalculate pc
  404. */
  405. val = initpc;
  406. for(r = firstr; r != R; r = r1) {
  407. r->pc = val;
  408. p = r->prog;
  409. p1 = P;
  410. r1 = r->link;
  411. if(r1 != R)
  412. p1 = r1->prog;
  413. for(; p != p1; p = p->link) {
  414. switch(p->as) {
  415. default:
  416. val++;
  417. break;
  418. case ANOP:
  419. case ADATA:
  420. case AGLOBL:
  421. case ANAME:
  422. break;
  423. }
  424. }
  425. }
  426. pc = val;
  427. /*
  428. * fix up branches
  429. */
  430. if(debug['R'])
  431. if(bany(&addrs))
  432. print("addrs: %B\n", addrs);
  433. r1 = 0; /* set */
  434. for(r = firstr; r != R; r = r->link) {
  435. p = r->prog;
  436. if(p->to.type == D_BRANCH)
  437. p->to.offset = r->s2->pc;
  438. r1 = r;
  439. }
  440. /*
  441. * last pass
  442. * eliminate nops
  443. * free aux structures
  444. */
  445. for(p = firstr->prog; p != P; p = p->link){
  446. while(p->link && p->link->as == ANOP)
  447. p->link = p->link->link;
  448. }
  449. if(r1 != R) {
  450. r1->link = freer;
  451. freer = firstr;
  452. }
  453. }
  454. /*
  455. * add mov b,rn
  456. * just after r
  457. */
  458. void
  459. addmove(Reg *r, int bn, int rn, int f)
  460. {
  461. Prog *p, *p1;
  462. Adr *a;
  463. Var *v;
  464. p1 = alloc(sizeof(*p1));
  465. *p1 = zprog;
  466. p = r->prog;
  467. p1->link = p->link;
  468. p->link = p1;
  469. p1->lineno = p->lineno;
  470. v = var + bn;
  471. a = &p1->to;
  472. a->sym = v->sym;
  473. a->offset = v->offset;
  474. a->etype = v->etype;
  475. a->type = v->name;
  476. switch(v->etype) {
  477. default:
  478. p1->as = AMOV;
  479. break;
  480. case TCHAR:
  481. p1->as = AMOVIB;
  482. break;
  483. case TUCHAR:
  484. p1->as = AMOVOB;
  485. break;
  486. case TSHORT:
  487. p1->as = AMOVIS;
  488. break;
  489. case TUSHORT:
  490. p1->as = AMOVOS;
  491. break;
  492. }
  493. p1->from.type = rn;
  494. if(!f) {
  495. p1->from = *a;
  496. *a = zprog.from;
  497. a->type = rn;
  498. }
  499. if(debug['R'])
  500. print("%P\t.a%P\n", p, p1);
  501. }
  502. ulong
  503. doregbits(int r)
  504. {
  505. ulong b;
  506. b = 0;
  507. if(r >= D_INDIR)
  508. r -= D_INDIR;
  509. if(r >= D_R0 && r <= D_R0+31)
  510. b |= RtoB(r);
  511. return b;
  512. }
  513. Bits
  514. mkvar(Reg *r, Adr *a)
  515. {
  516. Var *v;
  517. int i, t, n, et, z;
  518. long o;
  519. Bits bit;
  520. Sym *s;
  521. /*
  522. * mark registers used
  523. */
  524. t = a->type;
  525. r->regu |= doregbits(t);
  526. r->regu |= doregbits(a->index);
  527. switch(t) {
  528. default:
  529. goto none;
  530. case D_ADDR:
  531. a->type = a->index;
  532. bit = mkvar(r, a);
  533. for(z=0; z<BITS; z++)
  534. addrs.b[z] |= bit.b[z];
  535. a->type = t;
  536. goto none;
  537. case D_EXTERN:
  538. case D_STATIC:
  539. case D_PARAM:
  540. case D_AUTO:
  541. n = t;
  542. break;
  543. }
  544. s = a->sym;
  545. if(s == S)
  546. goto none;
  547. if(s->name[0] == '.')
  548. goto none;
  549. et = a->etype;
  550. o = a->offset;
  551. v = var;
  552. for(i=0; i<nvar; i++) {
  553. if(s == v->sym)
  554. if(n == v->name)
  555. if(o == v->offset)
  556. goto out;
  557. v++;
  558. }
  559. if(nvar >= NVAR) {
  560. if(debug['w'] > 1 && s)
  561. warn(Z, "variable not optimized: %s", s->name);
  562. goto none;
  563. }
  564. i = nvar;
  565. nvar++;
  566. v = &var[i];
  567. v->sym = s;
  568. v->offset = o;
  569. v->name = n;
  570. v->etype = et;
  571. if(debug['R'])
  572. print("bit=%2d et=%2d %D\n", i, et, a);
  573. out:
  574. bit = blsh(i);
  575. if(n == D_EXTERN || n == D_STATIC)
  576. for(z=0; z<BITS; z++)
  577. externs.b[z] |= bit.b[z];
  578. if(n == D_PARAM)
  579. for(z=0; z<BITS; z++)
  580. params.b[z] |= bit.b[z];
  581. if(v->etype != et || !typechlpfd[et]) /* funny punning */
  582. for(z=0; z<BITS; z++)
  583. addrs.b[z] |= bit.b[z];
  584. return bit;
  585. none:
  586. return zbits;
  587. }
  588. void
  589. prop(Reg *r, Bits ref, Bits cal)
  590. {
  591. Reg *r1, *r2;
  592. int z;
  593. for(r1 = r; r1 != R; r1 = r1->p1) {
  594. for(z=0; z<BITS; z++) {
  595. ref.b[z] |= r1->refahead.b[z];
  596. if(ref.b[z] != r1->refahead.b[z]) {
  597. r1->refahead.b[z] = ref.b[z];
  598. change++;
  599. }
  600. cal.b[z] |= r1->calahead.b[z];
  601. if(cal.b[z] != r1->calahead.b[z]) {
  602. r1->calahead.b[z] = cal.b[z];
  603. change++;
  604. }
  605. }
  606. switch(r1->prog->as) {
  607. case ABAL:
  608. for(z=0; z<BITS; z++) {
  609. cal.b[z] |= ref.b[z] | externs.b[z];
  610. ref.b[z] = 0;
  611. }
  612. break;
  613. case ATEXT:
  614. for(z=0; z<BITS; z++) {
  615. cal.b[z] = 0;
  616. ref.b[z] = 0;
  617. }
  618. break;
  619. case ARTS:
  620. for(z=0; z<BITS; z++) {
  621. cal.b[z] = externs.b[z];
  622. ref.b[z] = 0;
  623. }
  624. }
  625. for(z=0; z<BITS; z++) {
  626. ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
  627. r1->use1.b[z] | r1->use2.b[z];
  628. cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
  629. r1->refbehind.b[z] = ref.b[z];
  630. r1->calbehind.b[z] = cal.b[z];
  631. }
  632. if(r1->active)
  633. break;
  634. r1->active = 1;
  635. }
  636. for(; r != r1; r = r->p1)
  637. for(r2 = r->p2; r2 != R; r2 = r2->p2link)
  638. prop(r2, r->refbehind, r->calbehind);
  639. }
  640. /*
  641. * find looping structure
  642. *
  643. * 1) find reverse postordering
  644. * 2) find approximate dominators,
  645. * the actual dominators if the flow graph is reducible
  646. * otherwise, dominators plus some other non-dominators.
  647. * See Matthew S. Hecht and Jeffrey D. Ullman,
  648. * "Analysis of a Simple Algorithm for Global Data Flow Problems",
  649. * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
  650. * Oct. 1-3, 1973, pp. 207-217.
  651. * 3) find all nodes with a predecessor dominated by the current node.
  652. * such a node is a loop head.
  653. * recursively, all preds with a greater rpo number are in the loop
  654. */
  655. long
  656. postorder(Reg *r, Reg **rpo2r, long n)
  657. {
  658. Reg *r1;
  659. r->rpo = 1;
  660. r1 = r->s1;
  661. if(r1 && !r1->rpo)
  662. n = postorder(r1, rpo2r, n);
  663. r1 = r->s2;
  664. if(r1 && !r1->rpo)
  665. n = postorder(r1, rpo2r, n);
  666. rpo2r[n] = r;
  667. n++;
  668. return n;
  669. }
  670. long
  671. rpolca(long *idom, long rpo1, long rpo2)
  672. {
  673. long t;
  674. if(rpo1 == -1)
  675. return rpo2;
  676. while(rpo1 != rpo2){
  677. if(rpo1 > rpo2){
  678. t = rpo2;
  679. rpo2 = rpo1;
  680. rpo1 = t;
  681. }
  682. while(rpo1 < rpo2){
  683. t = idom[rpo2];
  684. if(t >= rpo2)
  685. sysfatal("bad idom");
  686. rpo2 = t;
  687. }
  688. }
  689. return rpo1;
  690. }
  691. int
  692. doms(long *idom, long r, long s)
  693. {
  694. while(s > r)
  695. s = idom[s];
  696. return s == r;
  697. }
  698. int
  699. loophead(long *idom, Reg *r)
  700. {
  701. long src;
  702. src = r->rpo;
  703. if(r->p1 != R && doms(idom, src, r->p1->rpo))
  704. return 1;
  705. for(r = r->p2; r != R; r = r->p2link)
  706. if(doms(idom, src, r->rpo))
  707. return 1;
  708. return 0;
  709. }
  710. void
  711. loopmark(Reg **rpo2r, long head, Reg *r)
  712. {
  713. if(r->rpo < head || r->active == head)
  714. return;
  715. r->active = head;
  716. r->loop += LOOP;
  717. if(r->p1 != R)
  718. loopmark(rpo2r, head, r->p1);
  719. for(r = r->p2; r != R; r = r->p2link)
  720. loopmark(rpo2r, head, r);
  721. }
  722. void
  723. loopit(Reg *r, long nr)
  724. {
  725. Reg *r1;
  726. long i, d, me;
  727. if(nr > maxnr) {
  728. rpo2r = alloc(nr * sizeof(Reg*));
  729. idom = alloc(nr * sizeof(long));
  730. maxnr = nr;
  731. }
  732. d = postorder(r, rpo2r, 0);
  733. if(d > nr)
  734. sysfatal("too many reg nodes");
  735. nr = d;
  736. for(i = 0; i < nr / 2; i++){
  737. r1 = rpo2r[i];
  738. rpo2r[i] = rpo2r[nr - 1 - i];
  739. rpo2r[nr - 1 - i] = r1;
  740. }
  741. for(i = 0; i < nr; i++)
  742. rpo2r[i]->rpo = i;
  743. idom[0] = 0;
  744. for(i = 0; i < nr; i++){
  745. r1 = rpo2r[i];
  746. me = r1->rpo;
  747. d = -1;
  748. if(r1->p1 != R && r1->p1->rpo < me)
  749. d = r1->p1->rpo;
  750. for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
  751. if(r1->rpo < me)
  752. d = rpolca(idom, d, r1->rpo);
  753. idom[i] = d;
  754. }
  755. for(i = 0; i < nr; i++){
  756. r1 = rpo2r[i];
  757. r1->loop++;
  758. if(r1->p2 != R && loophead(idom, r1))
  759. loopmark(rpo2r, i, r1);
  760. }
  761. }
  762. void
  763. synch(Reg *r, Bits dif)
  764. {
  765. Reg *r1;
  766. int z;
  767. for(r1 = r; r1 != R; r1 = r1->s1) {
  768. for(z=0; z<BITS; z++) {
  769. dif.b[z] = (dif.b[z] &
  770. ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
  771. r1->set.b[z] | r1->regdiff.b[z];
  772. if(dif.b[z] != r1->regdiff.b[z]) {
  773. r1->regdiff.b[z] = dif.b[z];
  774. change++;
  775. }
  776. }
  777. if(r1->active)
  778. break;
  779. r1->active = 1;
  780. for(z=0; z<BITS; z++)
  781. dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
  782. if(r1->s2 != R)
  783. synch(r1->s2, dif);
  784. }
  785. }
  786. ulong
  787. allreg(ulong b, Rgn *r)
  788. {
  789. Var *v;
  790. int i;
  791. v = var + r->varno;
  792. r->regno = 0;
  793. switch(v->etype) {
  794. default:
  795. diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
  796. break;
  797. case TCHAR:
  798. case TUCHAR:
  799. case TSHORT:
  800. case TUSHORT:
  801. case TINT:
  802. case TUINT:
  803. case TLONG:
  804. case TULONG:
  805. case TIND:
  806. case TARRAY:
  807. i = BtoR(~b);
  808. if(i && r->cost > 0) {
  809. r->regno = i;
  810. return RtoB(i);
  811. }
  812. break;
  813. case TVLONG:
  814. case TDOUBLE:
  815. case TFLOAT:
  816. break;
  817. }
  818. return 0;
  819. }
  820. void
  821. paint1(Reg *r, int bn)
  822. {
  823. Reg *r1;
  824. Prog *p;
  825. int z;
  826. ulong bb;
  827. z = bn/32;
  828. bb = 1L<<(bn%32);
  829. if(r->act.b[z] & bb)
  830. return;
  831. for(;;) {
  832. if(!(r->refbehind.b[z] & bb))
  833. break;
  834. r1 = r->p1;
  835. if(r1 == R)
  836. break;
  837. if(!(r1->refahead.b[z] & bb))
  838. break;
  839. if(r1->act.b[z] & bb)
  840. break;
  841. r = r1;
  842. }
  843. if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
  844. change -= CLOAD * r->loop;
  845. if(debug['R'] && debug['v'])
  846. print("%ld%P\tld %B $%d\n", r->loop,
  847. r->prog, blsh(bn), change);
  848. }
  849. for(;;) {
  850. r->act.b[z] |= bb;
  851. p = r->prog;
  852. if(r->use1.b[z] & bb) {
  853. change += CREF * r->loop;
  854. if(debug['R'] && debug['v'])
  855. print("%ld%P\tu1 %B $%d\n", r->loop,
  856. p, blsh(bn), change);
  857. }
  858. if((r->use2.b[z]|r->set.b[z]) & bb) {
  859. change += CREF * r->loop;
  860. if(debug['R'] && debug['v'])
  861. print("%ld%P\tu2 %B $%d\n", r->loop,
  862. p, blsh(bn), change);
  863. }
  864. if(STORE(r) & r->regdiff.b[z] & bb) {
  865. change -= CLOAD * r->loop;
  866. if(debug['R'] && debug['v'])
  867. print("%ld%P\tst %B $%d\n", r->loop,
  868. p, blsh(bn), change);
  869. }
  870. if(r->refbehind.b[z] & bb)
  871. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  872. if(r1->refahead.b[z] & bb)
  873. paint1(r1, bn);
  874. if(!(r->refahead.b[z] & bb))
  875. break;
  876. r1 = r->s2;
  877. if(r1 != R)
  878. if(r1->refbehind.b[z] & bb)
  879. paint1(r1, bn);
  880. r = r->s1;
  881. if(r == R)
  882. break;
  883. if(r->act.b[z] & bb)
  884. break;
  885. if(!(r->refbehind.b[z] & bb))
  886. break;
  887. }
  888. }
  889. ulong
  890. paint2(Reg *r, int bn)
  891. {
  892. Reg *r1;
  893. int z;
  894. ulong bb, vreg;
  895. z = bn/32;
  896. bb = 1L << (bn%32);
  897. vreg = regbits;
  898. if(!(r->act.b[z] & bb))
  899. return vreg;
  900. for(;;) {
  901. if(!(r->refbehind.b[z] & bb))
  902. break;
  903. r1 = r->p1;
  904. if(r1 == R)
  905. break;
  906. if(!(r1->refahead.b[z] & bb))
  907. break;
  908. if(!(r1->act.b[z] & bb))
  909. break;
  910. r = r1;
  911. }
  912. for(;;) {
  913. r->act.b[z] &= ~bb;
  914. vreg |= r->regu;
  915. if(r->refbehind.b[z] & bb)
  916. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  917. if(r1->refahead.b[z] & bb)
  918. vreg |= paint2(r1, bn);
  919. if(!(r->refahead.b[z] & bb))
  920. break;
  921. r1 = r->s2;
  922. if(r1 != R)
  923. if(r1->refbehind.b[z] & bb)
  924. vreg |= paint2(r1, bn);
  925. r = r->s1;
  926. if(r == R)
  927. break;
  928. if(!(r->act.b[z] & bb))
  929. break;
  930. if(!(r->refbehind.b[z] & bb))
  931. break;
  932. }
  933. return vreg;
  934. }
  935. void
  936. paint3(Reg *r, int bn, long rb, int rn)
  937. {
  938. Reg *r1;
  939. Prog *p;
  940. int z;
  941. ulong bb;
  942. z = bn/32;
  943. bb = 1L << (bn%32);
  944. if(r->act.b[z] & bb)
  945. return;
  946. for(;;) {
  947. if(!(r->refbehind.b[z] & bb))
  948. break;
  949. r1 = r->p1;
  950. if(r1 == R)
  951. break;
  952. if(!(r1->refahead.b[z] & bb))
  953. break;
  954. if(r1->act.b[z] & bb)
  955. break;
  956. r = r1;
  957. }
  958. if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
  959. addmove(r, bn, rn, 0);
  960. for(;;) {
  961. r->act.b[z] |= bb;
  962. p = r->prog;
  963. if(r->use1.b[z] & bb) {
  964. if(debug['R'])
  965. print("%P", p);
  966. addreg(&p->from, rn);
  967. if(debug['R'])
  968. print("\t.c%P\n", p);
  969. }
  970. if((r->use2.b[z]|r->set.b[z]) & bb) {
  971. if(debug['R'])
  972. print("%P", p);
  973. addreg(&p->to, rn);
  974. if(debug['R'])
  975. print("\t.c%P\n", p);
  976. }
  977. if(STORE(r) & r->regdiff.b[z] & bb)
  978. addmove(r, bn, rn, 1);
  979. r->regu |= rb;
  980. if(r->refbehind.b[z] & bb)
  981. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  982. if(r1->refahead.b[z] & bb)
  983. paint3(r1, bn, rb, rn);
  984. if(!(r->refahead.b[z] & bb))
  985. break;
  986. r1 = r->s2;
  987. if(r1 != R)
  988. if(r1->refbehind.b[z] & bb)
  989. paint3(r1, bn, rb, rn);
  990. r = r->s1;
  991. if(r == R)
  992. break;
  993. if(r->act.b[z] & bb)
  994. break;
  995. if(!(r->refbehind.b[z] & bb))
  996. break;
  997. }
  998. }
  999. void
  1000. addreg(Adr *a, int rn)
  1001. {
  1002. a->sym = 0;
  1003. a->offset = 0;
  1004. a->type = rn;
  1005. }
  1006. long
  1007. RtoB(int r)
  1008. {
  1009. if(r >= D_R0 && r <= D_R0+31)
  1010. return 1L << r-D_R0;
  1011. return 0;
  1012. }
  1013. BtoR(long b)
  1014. {
  1015. if(b == 0)
  1016. return 0;
  1017. return bitno(b) + D_R0;
  1018. }