reg.c 22 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273
  1. #include "gc.h"
  2. Reg*
  3. rega(void)
  4. {
  5. Reg *r;
  6. r = freer;
  7. if(r == R) {
  8. r = alloc(sizeof(*r));
  9. } else
  10. freer = r->link;
  11. *r = zreg;
  12. return r;
  13. }
  14. int
  15. rcmp(const void *a1, const void *a2)
  16. {
  17. Rgn *p1, *p2;
  18. int c1, c2;
  19. p1 = (Rgn*)a1;
  20. p2 = (Rgn*)a2;
  21. c1 = p2->costr;
  22. if(p2->costa > c1)
  23. c1 = p2->costa;
  24. c2 = p1->costr;
  25. if(p1->costa > c2)
  26. c2 = p1->costa;
  27. if(c1 -= c2)
  28. return c1;
  29. return p2->varno - p1->varno;
  30. }
  31. void
  32. regopt(Prog *p)
  33. {
  34. Reg *r, *r1, *r2;
  35. Prog *p1;
  36. int i, z;
  37. long val, initpc, npc;
  38. ulong vreg;
  39. Bits bit;
  40. Var *v;
  41. struct {
  42. long m;
  43. long c;
  44. Reg* p;
  45. } log5[6], *lp;
  46. firstr = R;
  47. lastr = R;
  48. nvar = 0;
  49. for(z=0; z<BITS; z++) {
  50. externs.b[z] = 0;
  51. params.b[z] = 0;
  52. addrs.b[z] = 0;
  53. }
  54. regbits = RtoB(0) | /* return reg */
  55. AtoB(6) | AtoB(7) | /* sp and sb */
  56. FtoB(0) | FtoB(1); /* floating return reg */
  57. for(i=0; i<NREG; i++) {
  58. if(regused[i])
  59. regbits |= RtoB(i);
  60. if(fregused[i])
  61. regbits |= FtoB(i);
  62. if(aregused[i])
  63. regbits |= AtoB(i);
  64. }
  65. /*
  66. * pass 1
  67. * build aux data structure
  68. * allocate pcs
  69. * find use and set of variables
  70. */
  71. val = 5L * 5L * 5L * 5L * 5L;
  72. lp = log5;
  73. for(i=0; i<5; i++) {
  74. lp->m = val;
  75. lp->c = 0;
  76. lp->p = R;
  77. val /= 5L;
  78. lp++;
  79. }
  80. val = 0;
  81. for(; p != P; p = p->link) {
  82. switch(p->as) {
  83. case ADATA:
  84. case AGLOBL:
  85. case ANAME:
  86. continue;
  87. }
  88. r = rega();
  89. if(firstr == R) {
  90. firstr = r;
  91. lastr = r;
  92. } else {
  93. lastr->link = r;
  94. r->p1 = lastr;
  95. lastr->s1 = r;
  96. lastr = r;
  97. }
  98. r->prog = p;
  99. r->pc = val;
  100. val++;
  101. lp = log5;
  102. for(i=0; i<5; i++) {
  103. lp->c--;
  104. if(lp->c <= 0) {
  105. lp->c = lp->m;
  106. if(lp->p != R)
  107. lp->p->log5 = r;
  108. lp->p = r;
  109. (lp+1)->c = 0;
  110. break;
  111. }
  112. lp++;
  113. }
  114. r1 = r->p1;
  115. if(r1 != R)
  116. switch(r1->prog->as) {
  117. case ABRA:
  118. case ARTS:
  119. case ARTE:
  120. r->p1 = R;
  121. r1->s1 = R;
  122. }
  123. bit = mkvar(&p->from, AGOK);
  124. if(bany(&bit))
  125. switch(p->as) {
  126. case ALEA:
  127. if(!(mvbits & B_INDIR))
  128. for(z=0; z<BITS; z++)
  129. addrs.b[z] |= bit.b[z];
  130. default:
  131. if(mvbits & B_ADDR)
  132. for(z=0; z<BITS; z++)
  133. addrs.b[z] |= bit.b[z];
  134. for(z=0; z<BITS; z++)
  135. r->use1.b[z] |= bit.b[z];
  136. }
  137. bit = mkvar(&p->to, p->as);
  138. if(bany(&bit))
  139. switch(p->as) {
  140. case ABSR: /* funny */
  141. for(z=0; z<BITS; z++)
  142. addrs.b[z] |= bit.b[z];
  143. goto def;
  144. case APEA:
  145. if(!(mvbits & B_INDIR))
  146. for(z=0; z<BITS; z++)
  147. addrs.b[z] |= bit.b[z];
  148. def:
  149. case ACMPB: case ACMPW: case ACMPL:
  150. case AFCMPF: case AFCMPD:
  151. case ATSTB: case ATSTW: case ATSTL:
  152. case AFTSTF: case AFTSTD:
  153. case ABFEXTU: case ABFEXTS:
  154. if(mvbits & B_ADDR)
  155. for(z=0; z<BITS; z++)
  156. addrs.b[z] |= bit.b[z];
  157. for(z=0; z<BITS; z++)
  158. r->use2.b[z] |= bit.b[z];
  159. break;
  160. default:
  161. diag(Z, "reg: unknown asop: %A", p->as);
  162. case AADDB: case AADDW: case AADDL:
  163. case ASUBB: case ASUBW: case ASUBL:
  164. case AANDB: case AANDW: case AANDL:
  165. case AORB: case AORW: case AORL:
  166. case AEORB: case AEORW: case AEORL:
  167. case ABFINS:
  168. for(z=0; z<BITS; z++)
  169. r->use2.b[z] |= bit.b[z];
  170. case ANOP:
  171. case AMOVB: case AMOVW: case AMOVL:
  172. case AFMOVEB: case AFMOVEW: case AFMOVEL:
  173. case ACLRB: case ACLRW: case ACLRL:
  174. case AFMOVEF: case AFMOVED:
  175. if(mvbits & B_INDIR)
  176. for(z=0; z<BITS; z++)
  177. r->use2.b[z] |= bit.b[z];
  178. else
  179. for(z=0; z<BITS; z++)
  180. r->set.b[z] |= bit.b[z];
  181. break;
  182. }
  183. }
  184. if(firstr == R)
  185. return;
  186. initpc = pc - val;
  187. npc = val;
  188. /*
  189. * pass 2
  190. * turn branch references to pointers
  191. * build back pointers
  192. */
  193. for(r = firstr; r != R; r = r->link) {
  194. p = r->prog;
  195. if(p->to.type == D_BRANCH) {
  196. val = p->to.offset - initpc;
  197. r1 = firstr;
  198. while(r1 != R) {
  199. r2 = r1->log5;
  200. if(r2 != R && val >= r2->pc) {
  201. r1 = r2;
  202. continue;
  203. }
  204. if(r1->pc == val)
  205. break;
  206. r1 = r1->link;
  207. }
  208. if(r1 == R) {
  209. diag(Z, "ref not found\n%L:%P", p->lineno, p);
  210. continue;
  211. }
  212. if(r1 == r) {
  213. diag(Z, "ref to self");
  214. continue;
  215. }
  216. r->s2 = r1;
  217. r->p2link = r1->p2;
  218. r1->p2 = r;
  219. }
  220. }
  221. if(debug['R'])
  222. print("\n%L %D\n", firstr->prog->lineno, &firstr->prog->from);
  223. /*
  224. * pass 2.5
  225. * find looping structure
  226. */
  227. for(r = firstr; r != R; r = r->link)
  228. r->active = 0;
  229. changer = 0;
  230. loopit(firstr, npc);
  231. if(debug['R'] && debug['v']) {
  232. print("\nlooping structure:\n");
  233. for(r = firstr; r != R; r = r->link) {
  234. print("%ld:%P", r->loop, r->prog);
  235. for(z=0; z<BITS; z++)
  236. bit.b[z] = r->use1.b[z] |
  237. r->use2.b[z] | r->set.b[z];
  238. if(bany(&bit)) {
  239. print("\t");
  240. if(bany(&r->use1))
  241. print(" u1=%B", r->use1);
  242. if(bany(&r->use2))
  243. print(" u2=%B", r->use2);
  244. if(bany(&r->set))
  245. print(" st=%B", r->set);
  246. }
  247. print("\n");
  248. }
  249. }
  250. /*
  251. * pass 3
  252. * iterate propagating usage
  253. * back until flow graph is complete
  254. */
  255. loop1:
  256. changer = 0;
  257. for(r = firstr; r != R; r = r->link)
  258. r->active = 0;
  259. for(r = firstr; r != R; r = r->link)
  260. if(r->prog->as == ARTS)
  261. prop(r, zbits, zbits);
  262. loop11:
  263. /* pick up unreachable code */
  264. i = 0;
  265. for(r = firstr; r != R; r = r1) {
  266. r1 = r->link;
  267. if(r1 && r1->active && !r->active) {
  268. prop(r, zbits, zbits);
  269. i = 1;
  270. }
  271. }
  272. if(i)
  273. goto loop11;
  274. if(changer)
  275. goto loop1;
  276. /*
  277. * pass 4
  278. * iterate propagating register/variable synchrony
  279. * forward until graph is complete
  280. */
  281. loop2:
  282. changer = 0;
  283. for(r = firstr; r != R; r = r->link)
  284. r->active = 0;
  285. synch(firstr, zbits);
  286. if(changer)
  287. goto loop2;
  288. /*
  289. * pass 5
  290. * isolate regions
  291. * calculate costs (paint1)
  292. */
  293. r = firstr;
  294. if(r) {
  295. for(z=0; z<BITS; z++)
  296. bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
  297. ~(externs.b[z] | params.b[z] | addrs.b[z]);
  298. if(bany(&bit)) {
  299. nearln = r->prog->lineno;
  300. warn(Z, "used and not set: %B", bit);
  301. if(debug['R'] && !debug['w'])
  302. print("used and not set: %B\n", bit);
  303. /*
  304. * 68040 'feature':
  305. * load of a denormalized fp will trap
  306. */
  307. while(bany(&bit)) {
  308. i = bnum(bit);
  309. bit.b[i/32] &= ~(1L << (i%32));
  310. v = var + i;
  311. if(v->type == D_AUTO) {
  312. r->set.b[i/32] |= (1L << (i%32));
  313. if(typefd[v->etype])
  314. addmove(r, i, NREG+NREG, 1);
  315. }
  316. }
  317. }
  318. }
  319. if(debug['R'] && debug['v'])
  320. print("\nprop structure:\n");
  321. for(r = firstr; r != R; r = r->link) {
  322. if(debug['R'] && debug['v'])
  323. print("%P\n set = %B; rah = %B; cal = %B\n",
  324. r->prog, r->set, r->refahead, r->calahead);
  325. r->act = zbits;
  326. }
  327. rgp = region;
  328. nregion = 0;
  329. for(r = firstr; r != R; r = r->link) {
  330. for(z=0; z<BITS; z++)
  331. bit.b[z] = r->set.b[z] &
  332. ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
  333. if(bany(&bit)) {
  334. nearln = r->prog->lineno;
  335. warn(Z, "set and not used: %B", bit);
  336. if(debug['R'])
  337. print("set an not used: %B\n", bit);
  338. excise(r);
  339. }
  340. for(z=0; z<BITS; z++)
  341. bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
  342. while(bany(&bit)) {
  343. i = bnum(bit);
  344. rgp->enter = r;
  345. rgp->varno = i;
  346. changer = 0;
  347. changea = 0;
  348. if(debug['R'] && debug['v'])
  349. print("\n");
  350. paint1(r, i);
  351. bit.b[i/32] &= ~(1L<<(i%32));
  352. if(changer <= 0 && changea <= 0) {
  353. if(debug['R'])
  354. print("%L$%d.%d: %B\n",
  355. r->prog->lineno,
  356. changer, changea, blsh(i));
  357. continue;
  358. }
  359. rgp->costr = changer;
  360. rgp->costa = changea;
  361. nregion++;
  362. if(nregion >= NRGN) {
  363. warn(Z, "too many regions");
  364. goto brk;
  365. }
  366. rgp++;
  367. }
  368. }
  369. brk:
  370. qsort(region, nregion, sizeof(region[0]), rcmp);
  371. /*
  372. * pass 6
  373. * determine used registers (paint2)
  374. * replace code (paint3)
  375. */
  376. rgp = region;
  377. for(i=0; i<nregion; i++) {
  378. bit = blsh(rgp->varno);
  379. vreg = paint2(rgp->enter, rgp->varno);
  380. vreg = allreg(vreg, rgp);
  381. if(debug['R'])
  382. print("%L$%d.%d %R: %B\n",
  383. rgp->enter->prog->lineno,
  384. rgp->costr, rgp->costa,
  385. rgp->regno,
  386. bit);
  387. if(rgp->regno != D_NONE)
  388. paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
  389. rgp++;
  390. }
  391. /*
  392. * pass 7
  393. * peep-hole on basic block
  394. */
  395. if(!debug['R'] || debug['P'])
  396. peep();
  397. /*
  398. * pass 8
  399. * recalculate pc
  400. */
  401. val = initpc;
  402. for(r = firstr; r != R; r = r1) {
  403. r->pc = val;
  404. p = r->prog;
  405. p1 = P;
  406. r1 = r->link;
  407. if(r1 != R)
  408. p1 = r1->prog;
  409. for(; p != p1; p = p->link) {
  410. switch(p->as) {
  411. default:
  412. val++;
  413. break;
  414. case ANOP:
  415. case ADATA:
  416. case AGLOBL:
  417. case ANAME:
  418. break;
  419. }
  420. }
  421. }
  422. pc = val;
  423. /*
  424. * fix up branches
  425. */
  426. if(debug['R'])
  427. if(bany(&addrs))
  428. print("addrs: %B\n", addrs);
  429. r1 = 0; /* set */
  430. for(r = firstr; r != R; r = r->link) {
  431. p = r->prog;
  432. if(p->to.type == D_BRANCH)
  433. p->to.offset = r->s2->pc;
  434. r1 = r;
  435. }
  436. /*
  437. * last pass
  438. * eliminate nops
  439. * free aux structures
  440. */
  441. for(p = firstr->prog; p != P; p = p->link){
  442. while(p->link && p->link->as == ANOP)
  443. p->link = p->link->link;
  444. }
  445. if(r1 != R) {
  446. r1->link = freer;
  447. freer = firstr;
  448. }
  449. }
  450. /*
  451. * add mov b,rn
  452. * just after r
  453. */
  454. void
  455. addmove(Reg *r, int bn, int rn, int f)
  456. {
  457. Prog *p, *p1;
  458. Var *v;
  459. int badccr;
  460. badccr = 0;
  461. p = r->prog;
  462. p1 = p->link;
  463. if(p1)
  464. switch(p1->as) {
  465. case AMOVW:
  466. if(p1->from.type == D_CCR)
  467. p = p1;
  468. break;
  469. case ABEQ:
  470. case ABNE:
  471. case ABLE:
  472. case ABLS:
  473. case ABLT:
  474. case ABMI:
  475. case ABGE:
  476. case ABPL:
  477. case ABGT:
  478. case ABHI:
  479. case ABCC:
  480. case ABCS:
  481. p1 = prg();
  482. p1->link = p->link;
  483. p->link = p1;
  484. p1->lineno = p->lineno;
  485. p1->from.type = D_CCR;
  486. p1->to.type = D_TOS;
  487. p1->as = AMOVW;
  488. p = p1;
  489. badccr = 1;
  490. }
  491. p1 = prg();
  492. p1->link = p->link;
  493. p->link = p1;
  494. p1->lineno = p->lineno;
  495. v = var + bn;
  496. p1->from.sym = v->sym;
  497. p1->from.type = v->type;
  498. p1->from.offset = v->offset;
  499. p1->from.etype = v->etype;
  500. p1->to.type = rn;
  501. if(f) {
  502. p1->to = p1->from;
  503. p1->from = zprog.from;
  504. p1->from.type = rn;
  505. }
  506. p1->as = opxt[OAS][v->etype];
  507. if(badccr) {
  508. p = p1;
  509. p1 = prg();
  510. p1->link = p->link;
  511. p->link = p1;
  512. p1->lineno = p->lineno;
  513. p1->from.type = D_TOS;
  514. p1->to.type = D_CCR;
  515. p1->as = AMOVW;
  516. }
  517. if(debug['R'])
  518. print("%P\t.a%P\n", p, p1);
  519. }
  520. Bits
  521. mkvar(Adr *a, int as)
  522. {
  523. Var *v;
  524. int i, t, z;
  525. long o;
  526. Bits bit;
  527. Sym *s;
  528. mvbits = 0;
  529. t = a->type & D_MASK;
  530. switch(t) {
  531. default:
  532. if(t >= D_R0 && t < D_R0+NREG) {
  533. regbits |= RtoB(t-D_R0);
  534. if(as == ADIVUL || as == ADIVSL)
  535. regbits |= RtoB(t-D_R0+1);
  536. }
  537. if(t >= D_A0 && t < D_A0+NREG)
  538. regbits |= AtoB(t-D_A0);
  539. if(t >= D_F0 && t < D_F0+NREG)
  540. regbits |= FtoB(t-D_F0);
  541. goto none;
  542. case D_EXTERN:
  543. case D_STATIC:
  544. case D_AUTO:
  545. case D_PARAM:
  546. break;
  547. }
  548. s = a->sym;
  549. if(s == S)
  550. goto none;
  551. if((a->type & I_MASK) == I_ADDR)
  552. mvbits |= B_ADDR;
  553. switch(a->index & I_MASK) {
  554. case I_INDEX1:
  555. mvbits |= B_ADDR;
  556. break;
  557. case I_INDEX2:
  558. case I_INDEX3:
  559. mvbits |= B_INDIR;
  560. break;
  561. }
  562. o = a->offset;
  563. v = var;
  564. for(i=0; i<nvar; i++) {
  565. if(s == v->sym)
  566. if(t == v->type)
  567. if(o == v->offset)
  568. goto out;
  569. v++;
  570. }
  571. if(s)
  572. if(s->name[0] == '.')
  573. goto none;
  574. if(nvar >= NVAR) {
  575. if(debug['w'] > 1 && s)
  576. warn(Z, "variable not optimized: %s", s->name);
  577. goto none;
  578. }
  579. i = nvar;
  580. nvar++;
  581. v = &var[i];
  582. v->sym = s;
  583. v->offset = o;
  584. v->etype = a->etype;
  585. v->type = t;
  586. if(debug['R'])
  587. print("bit=%2d et=%2d %s (%d,%d,%ld)\n",
  588. i, a->etype, s->name,
  589. (int)v->sym, v->type, v->offset);
  590. out:
  591. bit = blsh(i);
  592. if(t == D_EXTERN || t == D_STATIC)
  593. for(z=0; z<BITS; z++)
  594. externs.b[z] |= bit.b[z];
  595. if(t == D_PARAM)
  596. for(z=0; z<BITS; z++)
  597. params.b[z] |= bit.b[z];
  598. if(a->etype != v->etype || !typechlpfd[a->etype])
  599. for(z=0; z<BITS; z++)
  600. addrs.b[z] |= bit.b[z]; /* funny punning */
  601. return bit;
  602. none:
  603. return zbits;
  604. }
  605. void
  606. prop(Reg *r, Bits ref, Bits cal)
  607. {
  608. Reg *r1, *r2;
  609. int z;
  610. for(r1 = r; r1 != R; r1 = r1->p1) {
  611. for(z=0; z<BITS; z++) {
  612. ref.b[z] |= r1->refahead.b[z];
  613. if(ref.b[z] != r1->refahead.b[z]) {
  614. r1->refahead.b[z] = ref.b[z];
  615. changer++;
  616. }
  617. cal.b[z] |= r1->calahead.b[z];
  618. if(cal.b[z] != r1->calahead.b[z]) {
  619. r1->calahead.b[z] = cal.b[z];
  620. changer++;
  621. }
  622. }
  623. switch(r1->prog->as) {
  624. case ABSR:
  625. for(z=0; z<BITS; z++) {
  626. cal.b[z] |= ref.b[z] | externs.b[z];
  627. ref.b[z] = 0;
  628. }
  629. break;
  630. case ATEXT:
  631. for(z=0; z<BITS; z++) {
  632. cal.b[z] = 0;
  633. ref.b[z] = 0;
  634. }
  635. break;
  636. case ARTS:
  637. for(z=0; z<BITS; z++) {
  638. cal.b[z] = externs.b[z];
  639. ref.b[z] = 0;
  640. }
  641. }
  642. for(z=0; z<BITS; z++) {
  643. ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
  644. r1->use1.b[z] | r1->use2.b[z];
  645. cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
  646. r1->refbehind.b[z] = ref.b[z];
  647. r1->calbehind.b[z] = cal.b[z];
  648. }
  649. if(r1->active)
  650. break;
  651. r1->active = 1;
  652. }
  653. for(; r != r1; r = r->p1)
  654. for(r2 = r->p2; r2 != R; r2 = r2->p2link)
  655. prop(r2, r->refbehind, r->calbehind);
  656. }
  657. /*
  658. * find looping structure
  659. *
  660. * 1) find reverse postordering
  661. * 2) find approximate dominators,
  662. * the actual dominators if the flow graph is reducible
  663. * otherwise, dominators plus some other non-dominators.
  664. * See Matthew S. Hecht and Jeffrey D. Ullman,
  665. * "Analysis of a Simple Algorithm for Global Data Flow Problems",
  666. * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
  667. * Oct. 1-3, 1973, pp. 207-217.
  668. * 3) find all nodes with a predecessor dominated by the current node.
  669. * such a node is a loop head.
  670. * recursively, all preds with a greater rpo number are in the loop
  671. */
  672. long
  673. postorder(Reg *r, Reg **rpo2r, long n)
  674. {
  675. Reg *r1;
  676. r->rpo = 1;
  677. r1 = r->s1;
  678. if(r1 && !r1->rpo)
  679. n = postorder(r1, rpo2r, n);
  680. r1 = r->s2;
  681. if(r1 && !r1->rpo)
  682. n = postorder(r1, rpo2r, n);
  683. rpo2r[n] = r;
  684. n++;
  685. return n;
  686. }
  687. long
  688. rpolca(long *idom, long rpo1, long rpo2)
  689. {
  690. long t;
  691. if(rpo1 == -1)
  692. return rpo2;
  693. while(rpo1 != rpo2){
  694. if(rpo1 > rpo2){
  695. t = rpo2;
  696. rpo2 = rpo1;
  697. rpo1 = t;
  698. }
  699. while(rpo1 < rpo2){
  700. t = idom[rpo2];
  701. if(t >= rpo2)
  702. sysfatal("bad idom");
  703. rpo2 = t;
  704. }
  705. }
  706. return rpo1;
  707. }
  708. int
  709. doms(long *idom, long r, long s)
  710. {
  711. while(s > r)
  712. s = idom[s];
  713. return s == r;
  714. }
  715. int
  716. loophead(long *idom, Reg *r)
  717. {
  718. long src;
  719. src = r->rpo;
  720. if(r->p1 != R && doms(idom, src, r->p1->rpo))
  721. return 1;
  722. for(r = r->p2; r != R; r = r->p2link)
  723. if(doms(idom, src, r->rpo))
  724. return 1;
  725. return 0;
  726. }
  727. void
  728. loopmark(Reg **rpo2r, long head, Reg *r)
  729. {
  730. if(r->rpo < head || r->active == head)
  731. return;
  732. r->active = head;
  733. r->loop += LOOP;
  734. if(r->p1 != R)
  735. loopmark(rpo2r, head, r->p1);
  736. for(r = r->p2; r != R; r = r->p2link)
  737. loopmark(rpo2r, head, r);
  738. }
  739. void
  740. loopit(Reg *r, long nr)
  741. {
  742. Reg *r1;
  743. long i, d, me;
  744. if(nr > maxnr) {
  745. rpo2r = alloc(nr * sizeof(Reg*));
  746. idom = alloc(nr * sizeof(long));
  747. maxnr = nr;
  748. }
  749. d = postorder(r, rpo2r, 0);
  750. if(d > nr)
  751. sysfatal("too many reg nodes");
  752. nr = d;
  753. for(i = 0; i < nr / 2; i++){
  754. r1 = rpo2r[i];
  755. rpo2r[i] = rpo2r[nr - 1 - i];
  756. rpo2r[nr - 1 - i] = r1;
  757. }
  758. for(i = 0; i < nr; i++)
  759. rpo2r[i]->rpo = i;
  760. idom[0] = 0;
  761. for(i = 0; i < nr; i++){
  762. r1 = rpo2r[i];
  763. me = r1->rpo;
  764. d = -1;
  765. if(r1->p1 != R && r1->p1->rpo < me)
  766. d = r1->p1->rpo;
  767. for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
  768. if(r1->rpo < me)
  769. d = rpolca(idom, d, r1->rpo);
  770. idom[i] = d;
  771. }
  772. for(i = 0; i < nr; i++){
  773. r1 = rpo2r[i];
  774. r1->loop++;
  775. if(r1->p2 != R && loophead(idom, r1))
  776. loopmark(rpo2r, i, r1);
  777. }
  778. }
  779. void
  780. synch(Reg *r, Bits dif)
  781. {
  782. Reg *r1;
  783. int z;
  784. for(r1 = r; r1 != R; r1 = r1->s1) {
  785. for(z=0; z<BITS; z++) {
  786. dif.b[z] = (dif.b[z] &
  787. ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
  788. r1->set.b[z] | r1->regdiff.b[z];
  789. if(dif.b[z] != r1->regdiff.b[z]) {
  790. r1->regdiff.b[z] = dif.b[z];
  791. changer++;
  792. }
  793. }
  794. if(r1->active)
  795. break;
  796. r1->active = 1;
  797. for(z=0; z<BITS; z++)
  798. dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
  799. if(r1->s2 != R)
  800. synch(r1->s2, dif);
  801. }
  802. }
  803. ulong
  804. allreg(ulong b, Rgn *r)
  805. {
  806. Var *v;
  807. int i, j;
  808. v = var + r->varno;
  809. r->regno = D_NONE;
  810. switch(v->etype) {
  811. default:
  812. diag(Z, "unknown etype");
  813. break;
  814. case TCHAR:
  815. case TUCHAR:
  816. case TSHORT:
  817. case TUSHORT:
  818. case TINT:
  819. case TUINT:
  820. case TLONG:
  821. case TULONG:
  822. case TIND:
  823. i = BtoR(~b);
  824. j = BtoA(~b);
  825. if(r->costa == r->costr)
  826. if(i > j)
  827. i = NREG;
  828. if(j < NREG && r->costa > 0)
  829. if(r->costa > r->costr || i >= NREG) {
  830. r->regno = D_A0 + j;
  831. return AtoB(j);
  832. }
  833. if(i < NREG && r->costr > 0) {
  834. r->regno = D_R0 + i;
  835. return RtoB(i);
  836. }
  837. break;
  838. case TDOUBLE:
  839. case TFLOAT:
  840. i = BtoF(~b);
  841. if(i < NREG) {
  842. r->regno = D_F0 + i;
  843. return FtoB(i);
  844. }
  845. break;
  846. }
  847. return 0;
  848. }
  849. void
  850. paint1(Reg *r, int bn)
  851. {
  852. Reg *r1;
  853. Prog *p;
  854. int z;
  855. ulong bb;
  856. int x;
  857. z = bn/32;
  858. bb = 1L<<(bn%32);
  859. if(r->act.b[z] & bb)
  860. return;
  861. for(;;) {
  862. if(!(r->refbehind.b[z] & bb))
  863. break;
  864. r1 = r->p1;
  865. if(r1 == R)
  866. break;
  867. if(!(r1->refahead.b[z] & bb))
  868. break;
  869. if(r1->act.b[z] & bb)
  870. break;
  871. r = r1;
  872. }
  873. if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
  874. changer -= CLOAD * r->loop;
  875. changea -= CLOAD * r->loop;
  876. if(debug['R'] && debug['v'])
  877. print("%ld%P\tld %B $%d.%d\n", r->loop,
  878. r->prog, blsh(bn), changer, changea);
  879. }
  880. for(;;) {
  881. r->act.b[z] |= bb;
  882. p = r->prog;
  883. if(r->use1.b[z] & bb) {
  884. changer += CREF * r->loop;
  885. changea += CREF * r->loop;
  886. x = p->from.index;
  887. if(x == D_NONE) {
  888. switch(p->as) {
  889. default:
  890. changea = -CINF;
  891. case AADDL:
  892. case ASUBL:
  893. case AMOVL:
  894. case ACMPL:
  895. break;
  896. }
  897. } else {
  898. changer += (CXREF-CREF) * r->loop;
  899. if(x != (I_INDEX3|D_NONE))
  900. changer = -CINF;
  901. if((x&I_MASK) == I_INDEX1)
  902. changea = -CINF;
  903. }
  904. if(p->as == AMOVL) {
  905. x = p->to.type;
  906. if(x >= D_R0 && x < D_R0+NREG)
  907. changer += r->loop;
  908. if(x >= D_A0 && x < D_A0+NREG)
  909. changea += r->loop;
  910. }
  911. if(debug['R'] && debug['v'])
  912. print("%ld%P\tu1 %B $%d.%d\n", r->loop,
  913. p, blsh(bn), changer, changea);
  914. }
  915. if((r->use2.b[z]|r->set.b[z]) & bb) {
  916. changer += CREF * r->loop;
  917. changea += CREF * r->loop;
  918. x = p->to.index;
  919. if(x == D_NONE)
  920. switch(p->as) {
  921. default:
  922. changea = -CINF;
  923. break;
  924. case AMOVL:
  925. case AADDL:
  926. case ACMPL:
  927. case ASUBL:
  928. case ACLRL: /* can be faked */
  929. case ATSTL: /* can be faked */
  930. break;
  931. }
  932. else {
  933. changer += (CXREF-CREF) * r->loop;
  934. if(x != (I_INDEX3|D_NONE))
  935. changer = -CINF;
  936. if((x&I_MASK) == I_INDEX1)
  937. changea = -CINF;
  938. }
  939. if(p->as == AMOVL) {
  940. x = p->from.type;
  941. if(x >= D_R0 && x < D_R0+NREG)
  942. changer += r->loop;
  943. if(x >= D_A0 && x < D_A0+NREG)
  944. changea += r->loop;
  945. }
  946. if(debug['R'] && debug['v'])
  947. print("%ld%P\tu2 %B $%d.%d\n", r->loop,
  948. p, blsh(bn), changer, changea);
  949. }
  950. if(STORE(r) & r->regdiff.b[z] & bb) {
  951. changer -= CLOAD * r->loop;
  952. changea -= CLOAD * r->loop;
  953. if(debug['R'] && debug['v'])
  954. print("%ld%P\tst %B $%d.%d\n", r->loop,
  955. p, blsh(bn), changer, changea);
  956. }
  957. if(r->refbehind.b[z] & bb)
  958. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  959. if(r1->refahead.b[z] & bb)
  960. paint1(r1, bn);
  961. if(!(r->refahead.b[z] & bb))
  962. break;
  963. r1 = r->s2;
  964. if(r1 != R)
  965. if(r1->refbehind.b[z] & bb)
  966. paint1(r1, bn);
  967. r = r->s1;
  968. if(r == R)
  969. break;
  970. if(r->act.b[z] & bb)
  971. break;
  972. if(!(r->refbehind.b[z] & bb))
  973. break;
  974. }
  975. }
  976. ulong
  977. paint2(Reg *r, int bn)
  978. {
  979. Reg *r1;
  980. int z;
  981. ulong bb, vreg;
  982. z = bn/32;
  983. bb = 1L << (bn%32);
  984. vreg = regbits;
  985. if(!(r->act.b[z] & bb))
  986. return vreg;
  987. for(;;) {
  988. if(!(r->refbehind.b[z] & bb))
  989. break;
  990. r1 = r->p1;
  991. if(r1 == R)
  992. break;
  993. if(!(r1->refahead.b[z] & bb))
  994. break;
  995. if(!(r1->act.b[z] & bb))
  996. break;
  997. r = r1;
  998. }
  999. for(;;) {
  1000. r->act.b[z] &= ~bb;
  1001. vreg |= r->regu;
  1002. if(r->refbehind.b[z] & bb)
  1003. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  1004. if(r1->refahead.b[z] & bb)
  1005. vreg |= paint2(r1, bn);
  1006. if(!(r->refahead.b[z] & bb))
  1007. break;
  1008. r1 = r->s2;
  1009. if(r1 != R)
  1010. if(r1->refbehind.b[z] & bb)
  1011. vreg |= paint2(r1, bn);
  1012. r = r->s1;
  1013. if(r == R)
  1014. break;
  1015. if(!(r->act.b[z] & bb))
  1016. break;
  1017. if(!(r->refbehind.b[z] & bb))
  1018. break;
  1019. }
  1020. return vreg;
  1021. }
  1022. void
  1023. paint3(Reg *r, int bn, ulong rb, int rn)
  1024. {
  1025. Reg *r1;
  1026. Prog *p;
  1027. int z;
  1028. ulong bb;
  1029. z = bn/32;
  1030. bb = 1L << (bn%32);
  1031. if(r->act.b[z] & bb)
  1032. return;
  1033. for(;;) {
  1034. if(!(r->refbehind.b[z] & bb))
  1035. break;
  1036. r1 = r->p1;
  1037. if(r1 == R)
  1038. break;
  1039. if(!(r1->refahead.b[z] & bb))
  1040. break;
  1041. if(r1->act.b[z] & bb)
  1042. break;
  1043. r = r1;
  1044. }
  1045. if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
  1046. addmove(r, bn, rn, 0);
  1047. for(;;) {
  1048. r->act.b[z] |= bb;
  1049. p = r->prog;
  1050. if(r->use1.b[z] & bb) {
  1051. if(debug['R'])
  1052. print("%P", p);
  1053. addreg(&p->from, rn);
  1054. if(debug['R'])
  1055. print("\t.c%P\n", p);
  1056. }
  1057. if((r->use2.b[z]|r->set.b[z]) & bb) {
  1058. if(debug['R'])
  1059. print("%P", p);
  1060. addreg(&p->to, rn);
  1061. if(debug['R'])
  1062. print("\t.c%P\n", p);
  1063. }
  1064. if(STORE(r) & r->regdiff.b[z] & bb)
  1065. addmove(r, bn, rn, 1);
  1066. r->regu |= rb;
  1067. if(r->refbehind.b[z] & bb)
  1068. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  1069. if(r1->refahead.b[z] & bb)
  1070. paint3(r1, bn, rb, rn);
  1071. if(!(r->refahead.b[z] & bb))
  1072. break;
  1073. r1 = r->s2;
  1074. if(r1 != R)
  1075. if(r1->refbehind.b[z] & bb)
  1076. paint3(r1, bn, rb, rn);
  1077. r = r->s1;
  1078. if(r == R)
  1079. break;
  1080. if(r->act.b[z] & bb)
  1081. break;
  1082. if(!(r->refbehind.b[z] & bb))
  1083. break;
  1084. }
  1085. }
  1086. void
  1087. addreg(Adr *a, int rn)
  1088. {
  1089. int x;
  1090. a->sym = 0;
  1091. x = a->index;
  1092. if(rn >= D_R0 && rn < D_R0+NREG)
  1093. goto addr;
  1094. if(x == (I_INDEX3|D_NONE)) {
  1095. a->type = rn | I_INDIR;
  1096. a->index = D_NONE;
  1097. a->offset = a->displace;
  1098. a->displace = 0;
  1099. return;
  1100. }
  1101. if(x != D_NONE) {
  1102. a->type = rn | I_INDIR;
  1103. a->index += I_INDEX1 - I_INDEX2;
  1104. a->offset = a->displace;
  1105. a->displace = 0;
  1106. return;
  1107. }
  1108. a->type = rn | (a->type & I_INDIR);
  1109. return;
  1110. addr:
  1111. if(x == (I_INDEX3|D_NONE)) {
  1112. a->type = D_NONE|I_INDIR;
  1113. a->index += I_INDEX1 + rn - D_NONE - I_INDEX3;
  1114. a->scale = 4; /* .L*1 */
  1115. a->offset = a->displace;
  1116. a->displace = 0;
  1117. return;
  1118. }
  1119. a->type = rn | (a->type & I_INDIR);
  1120. }
  1121. /*
  1122. * bit reg
  1123. * 0-7 R0-R7
  1124. * 8-15 A0-A7
  1125. * 16-23 F0-F7
  1126. */
  1127. ulong
  1128. RtoB(int r)
  1129. {
  1130. if(r < 0 || r >= NREG)
  1131. return 0;
  1132. return 1L << (r + 0);
  1133. }
  1134. int
  1135. BtoR(ulong b)
  1136. {
  1137. b &= 0x0000ffL;
  1138. if(b == 0)
  1139. return NREG;
  1140. return bitno(b) - 0;
  1141. }
  1142. ulong
  1143. AtoB(int a)
  1144. {
  1145. if(a < 0 || a >= NREG)
  1146. return 0;
  1147. return 1L << (a + NREG);
  1148. }
  1149. int
  1150. BtoA(ulong b)
  1151. {
  1152. b &= 0x00ff00L;
  1153. if(b == 0)
  1154. return NREG;
  1155. return bitno(b) - NREG;
  1156. }
  1157. ulong
  1158. FtoB(int f)
  1159. {
  1160. if(f < 0 || f >= NREG)
  1161. return 0;
  1162. return 1L << (f + NREG+NREG);
  1163. }
  1164. int
  1165. BtoF(ulong b)
  1166. {
  1167. b &= 0xff0000L;
  1168. if(b == 0)
  1169. return NREG;
  1170. return bitno(b) - NREG-NREG;
  1171. }