reg.c 21 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219
  1. #include "gc.h"
  2. Reg*
  3. rega(void)
  4. {
  5. Reg *r;
  6. r = freer;
  7. if(r == R) {
  8. r = alloc(sizeof(*r));
  9. } else
  10. freer = r->link;
  11. *r = zreg;
  12. return r;
  13. }
  14. int
  15. rcmp(const void *a1, const void *a2)
  16. {
  17. Rgn *p1, *p2;
  18. int c1, c2;
  19. p1 = (Rgn*)a1;
  20. p2 = (Rgn*)a2;
  21. c1 = p2->costr;
  22. if(p2->costa > c1)
  23. c1 = p2->costa;
  24. c2 = p1->costr;
  25. if(p1->costa > c2)
  26. c2 = p1->costa;
  27. if(c1 -= c2)
  28. return c1;
  29. return p2->varno - p1->varno;
  30. }
  31. void
  32. regopt(Prog *p)
  33. {
  34. Reg *r, *r1, *r2;
  35. Prog *p1;
  36. int i, z;
  37. long val, initpc, npc;
  38. ulong vreg;
  39. Bits bit;
  40. Var *v;
  41. struct {
  42. long m;
  43. long c;
  44. Reg* p;
  45. } log5[6], *lp;
  46. firstr = R;
  47. lastr = R;
  48. nvar = 0;
  49. for(z=0; z<BITS; z++) {
  50. externs.b[z] = 0;
  51. params.b[z] = 0;
  52. addrs.b[z] = 0;
  53. }
  54. regbits = RtoB(0) | /* return reg */
  55. AtoB(6) | AtoB(7) | /* sp and sb */
  56. FtoB(0) | FtoB(1); /* floating return reg */
  57. for(i=0; i<NREG; i++) {
  58. if(regused[i])
  59. regbits |= RtoB(i);
  60. if(fregused[i])
  61. regbits |= FtoB(i);
  62. if(aregused[i])
  63. regbits |= AtoB(i);
  64. }
  65. /*
  66. * pass 1
  67. * build aux data structure
  68. * allocate pcs
  69. * find use and set of variables
  70. */
  71. val = 5L * 5L * 5L * 5L * 5L;
  72. lp = log5;
  73. for(i=0; i<5; i++) {
  74. lp->m = val;
  75. lp->c = 0;
  76. lp->p = R;
  77. val /= 5L;
  78. lp++;
  79. }
  80. val = 0;
  81. for(; p != P; p = p->link) {
  82. switch(p->as) {
  83. case ADATA:
  84. case AGLOBL:
  85. case ANAME:
  86. continue;
  87. }
  88. r = rega();
  89. if(firstr == R) {
  90. firstr = r;
  91. lastr = r;
  92. } else {
  93. lastr->link = r;
  94. r->p1 = lastr;
  95. lastr->s1 = r;
  96. lastr = r;
  97. }
  98. r->prog = p;
  99. r->pc = val;
  100. val++;
  101. lp = log5;
  102. for(i=0; i<5; i++) {
  103. lp->c--;
  104. if(lp->c <= 0) {
  105. lp->c = lp->m;
  106. if(lp->p != R)
  107. lp->p->log5 = r;
  108. lp->p = r;
  109. (lp+1)->c = 0;
  110. break;
  111. }
  112. lp++;
  113. }
  114. r1 = r->p1;
  115. if(r1 != R)
  116. switch(r1->prog->as) {
  117. case ABRA:
  118. case ARTS:
  119. case ARTE:
  120. r->p1 = R;
  121. r1->s1 = R;
  122. }
  123. bit = mkvar(&p->from, AGOK);
  124. if(bany(&bit))
  125. switch(p->as) {
  126. case ALEA:
  127. if(!(mvbits & B_INDIR))
  128. for(z=0; z<BITS; z++)
  129. addrs.b[z] |= bit.b[z];
  130. default:
  131. if(mvbits & B_ADDR)
  132. for(z=0; z<BITS; z++)
  133. addrs.b[z] |= bit.b[z];
  134. for(z=0; z<BITS; z++)
  135. r->use1.b[z] |= bit.b[z];
  136. }
  137. bit = mkvar(&p->to, p->as);
  138. if(bany(&bit))
  139. switch(p->as) {
  140. case ABSR: /* funny */
  141. for(z=0; z<BITS; z++)
  142. addrs.b[z] |= bit.b[z];
  143. goto def;
  144. case APEA:
  145. if(!(mvbits & B_INDIR))
  146. for(z=0; z<BITS; z++)
  147. addrs.b[z] |= bit.b[z];
  148. def:
  149. case ACMPB: case ACMPW: case ACMPL:
  150. case AFCMPF: case AFCMPD:
  151. case ATSTB: case ATSTW: case ATSTL:
  152. case AFTSTF: case AFTSTD:
  153. case ABFEXTU: case ABFEXTS:
  154. if(mvbits & B_ADDR)
  155. for(z=0; z<BITS; z++)
  156. addrs.b[z] |= bit.b[z];
  157. for(z=0; z<BITS; z++)
  158. r->use2.b[z] |= bit.b[z];
  159. break;
  160. default:
  161. diag(Z, "reg: unknown asop: %A", p->as);
  162. case AADDB: case AADDW: case AADDL:
  163. case ASUBB: case ASUBW: case ASUBL:
  164. case AANDB: case AANDW: case AANDL:
  165. case AORB: case AORW: case AORL:
  166. case AEORB: case AEORW: case AEORL:
  167. case ABFINS:
  168. for(z=0; z<BITS; z++)
  169. r->use2.b[z] |= bit.b[z];
  170. case ANOP:
  171. case AMOVB: case AMOVW: case AMOVL:
  172. case AFMOVEB: case AFMOVEW: case AFMOVEL:
  173. case ACLRB: case ACLRW: case ACLRL:
  174. case AFMOVEF: case AFMOVED:
  175. if(mvbits & B_INDIR)
  176. for(z=0; z<BITS; z++)
  177. r->use2.b[z] |= bit.b[z];
  178. else
  179. for(z=0; z<BITS; z++)
  180. r->set.b[z] |= bit.b[z];
  181. break;
  182. }
  183. }
  184. if(firstr == R)
  185. return;
  186. initpc = pc - val;
  187. npc = val;
  188. /*
  189. * pass 2
  190. * turn branch references to pointers
  191. * build back pointers
  192. */
  193. for(r = firstr; r != R; r = r->link) {
  194. p = r->prog;
  195. if(p->to.type == D_BRANCH) {
  196. val = p->to.offset - initpc;
  197. r1 = firstr;
  198. while(r1 != R) {
  199. r2 = r1->log5;
  200. if(r2 != R && val >= r2->pc) {
  201. r1 = r2;
  202. continue;
  203. }
  204. if(r1->pc == val)
  205. break;
  206. r1 = r1->link;
  207. }
  208. if(r1 == R) {
  209. diag(Z, "ref not found\n%L:%P", p->lineno, p);
  210. continue;
  211. }
  212. if(r1 == r) {
  213. diag(Z, "ref to self");
  214. continue;
  215. }
  216. r->s2 = r1;
  217. r->p2link = r1->p2;
  218. r1->p2 = r;
  219. }
  220. }
  221. if(debug['R'])
  222. print("\n%L %D\n", firstr->prog->lineno, &firstr->prog->from);
  223. /*
  224. * pass 2.5
  225. * find looping structure
  226. */
  227. for(r = firstr; r != R; r = r->link)
  228. r->active = 0;
  229. changer = 0;
  230. loopit(firstr, npc);
  231. if(debug['R'] && debug['v']) {
  232. print("\nlooping structure:\n");
  233. for(r = firstr; r != R; r = r->link) {
  234. print("%ld:%P", r->loop, r->prog);
  235. for(z=0; z<BITS; z++)
  236. bit.b[z] = r->use1.b[z] |
  237. r->use2.b[z] | r->set.b[z];
  238. if(bany(&bit)) {
  239. print("\t");
  240. if(bany(&r->use1))
  241. print(" u1=%B", r->use1);
  242. if(bany(&r->use2))
  243. print(" u2=%B", r->use2);
  244. if(bany(&r->set))
  245. print(" st=%B", r->set);
  246. }
  247. print("\n");
  248. }
  249. }
  250. /*
  251. * pass 3
  252. * iterate propagating usage
  253. * back until flow graph is complete
  254. */
  255. loop1:
  256. changer = 0;
  257. for(r = firstr; r != R; r = r->link)
  258. r->active = 0;
  259. for(r = firstr; r != R; r = r->link)
  260. if(r->prog->as == ARTS)
  261. prop(r, zbits, zbits);
  262. loop11:
  263. /* pick up unreachable code */
  264. i = 0;
  265. for(r = firstr; r != R; r = r1) {
  266. r1 = r->link;
  267. if(r1 && r1->active && !r->active) {
  268. prop(r, zbits, zbits);
  269. i = 1;
  270. }
  271. }
  272. if(i)
  273. goto loop11;
  274. if(changer)
  275. goto loop1;
  276. /*
  277. * pass 4
  278. * iterate propagating register/variable synchrony
  279. * forward until graph is complete
  280. */
  281. loop2:
  282. changer = 0;
  283. for(r = firstr; r != R; r = r->link)
  284. r->active = 0;
  285. synch(firstr, zbits);
  286. if(changer)
  287. goto loop2;
  288. /*
  289. * pass 5
  290. * isolate regions
  291. * calculate costs (paint1)
  292. */
  293. r = firstr;
  294. if(r) {
  295. for(z=0; z<BITS; z++)
  296. bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
  297. ~(externs.b[z] | params.b[z] | addrs.b[z]);
  298. if(bany(&bit)) {
  299. nearln = r->prog->lineno;
  300. warn(Z, "used and not set: %B", bit);
  301. if(debug['R'] && !debug['w'])
  302. print("used and not set: %B\n", bit);
  303. /*
  304. * 68040 'feature':
  305. * load of a denormalized fp will trap
  306. */
  307. while(bany(&bit)) {
  308. i = bnum(bit);
  309. bit.b[i/32] &= ~(1L << (i%32));
  310. v = var + i;
  311. if(v->type == D_AUTO) {
  312. r->set.b[i/32] |= (1L << (i%32));
  313. if(typefd[v->etype])
  314. addmove(r, i, NREG+NREG, 1);
  315. }
  316. }
  317. }
  318. }
  319. if(debug['R'] && debug['v'])
  320. print("\nprop structure:\n");
  321. for(r = firstr; r != R; r = r->link) {
  322. if(debug['R'] && debug['v'])
  323. print("%P\n set = %B; rah = %B; cal = %B\n",
  324. r->prog, r->set, r->refahead, r->calahead);
  325. r->act = zbits;
  326. }
  327. rgp = region;
  328. nregion = 0;
  329. for(r = firstr; r != R; r = r->link) {
  330. for(z=0; z<BITS; z++)
  331. bit.b[z] = r->set.b[z] &
  332. ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
  333. if(bany(&bit)) {
  334. nearln = r->prog->lineno;
  335. warn(Z, "set and not used: %B", bit);
  336. if(debug['R'])
  337. print("set an not used: %B\n", bit);
  338. excise(r);
  339. }
  340. for(z=0; z<BITS; z++)
  341. bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
  342. while(bany(&bit)) {
  343. i = bnum(bit);
  344. rgp->enter = r;
  345. rgp->varno = i;
  346. changer = 0;
  347. changea = 0;
  348. if(debug['R'] && debug['v'])
  349. print("\n");
  350. paint1(r, i);
  351. bit.b[i/32] &= ~(1L<<(i%32));
  352. if(changer <= 0 && changea <= 0) {
  353. if(debug['R'])
  354. print("%L$%d.%d: %B\n",
  355. r->prog->lineno,
  356. changer, changea, blsh(i));
  357. continue;
  358. }
  359. rgp->costr = changer;
  360. rgp->costa = changea;
  361. nregion++;
  362. if(nregion >= NRGN) {
  363. warn(Z, "too many regions");
  364. goto brk;
  365. }
  366. rgp++;
  367. }
  368. }
  369. brk:
  370. qsort(region, nregion, sizeof(region[0]), rcmp);
  371. /*
  372. * pass 6
  373. * determine used registers (paint2)
  374. * replace code (paint3)
  375. */
  376. rgp = region;
  377. for(i=0; i<nregion; i++) {
  378. bit = blsh(rgp->varno);
  379. vreg = paint2(rgp->enter, rgp->varno);
  380. vreg = allreg(vreg, rgp);
  381. if(debug['R'])
  382. print("%L$%d.%d %R: %B\n",
  383. rgp->enter->prog->lineno,
  384. rgp->costr, rgp->costa,
  385. rgp->regno,
  386. bit);
  387. if(rgp->regno != D_NONE)
  388. paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
  389. rgp++;
  390. }
  391. /*
  392. * pass 7
  393. * peep-hole on basic block
  394. */
  395. if(!debug['R'] || debug['P'])
  396. peep();
  397. /*
  398. * pass 8
  399. * recalculate pc
  400. */
  401. val = initpc;
  402. for(r = firstr; r != R; r = r1) {
  403. r->pc = val;
  404. p = r->prog;
  405. p1 = P;
  406. r1 = r->link;
  407. if(r1 != R)
  408. p1 = r1->prog;
  409. for(; p != p1; p = p->link) {
  410. switch(p->as) {
  411. default:
  412. val++;
  413. break;
  414. case ANOP:
  415. case ADATA:
  416. case AGLOBL:
  417. case ANAME:
  418. break;
  419. }
  420. }
  421. }
  422. pc = val;
  423. /*
  424. * fix up branches
  425. */
  426. if(debug['R'])
  427. if(bany(&addrs))
  428. print("addrs: %B\n", addrs);
  429. r1 = 0; /* set */
  430. for(r = firstr; r != R; r = r->link) {
  431. p = r->prog;
  432. if(p->to.type == D_BRANCH)
  433. p->to.offset = r->s2->pc;
  434. r1 = r;
  435. }
  436. /*
  437. * last pass
  438. * eliminate nops
  439. * free aux structures
  440. */
  441. for(p = firstr->prog; p != P; p = p->link){
  442. while(p->link && p->link->as == ANOP)
  443. p->link = p->link->link;
  444. }
  445. if(r1 != R) {
  446. r1->link = freer;
  447. freer = firstr;
  448. }
  449. }
  450. /*
  451. * add mov b,rn
  452. * just after r
  453. */
  454. void
  455. addmove(Reg *r, int bn, int rn, int f)
  456. {
  457. Prog *p, *p1;
  458. Var *v;
  459. int badccr;
  460. badccr = 0;
  461. p = r->prog;
  462. p1 = p->link;
  463. if(p1)
  464. switch(p1->as) {
  465. case AMOVW:
  466. if(p1->from.type == D_CCR)
  467. p = p1;
  468. break;
  469. case ABEQ:
  470. case ABNE:
  471. case ABLE:
  472. case ABLS:
  473. case ABLT:
  474. case ABMI:
  475. case ABGE:
  476. case ABPL:
  477. case ABGT:
  478. case ABHI:
  479. case ABCC:
  480. case ABCS:
  481. p1 = prg();
  482. p1->link = p->link;
  483. p->link = p1;
  484. p1->lineno = p->lineno;
  485. p1->from.type = D_CCR;
  486. p1->to.type = D_TOS;
  487. p1->as = AMOVW;
  488. p = p1;
  489. badccr = 1;
  490. }
  491. p1 = prg();
  492. p1->link = p->link;
  493. p->link = p1;
  494. p1->lineno = p->lineno;
  495. v = var + bn;
  496. p1->from.sym = v->sym;
  497. p1->from.type = v->type;
  498. p1->from.offset = v->offset;
  499. p1->from.etype = v->etype;
  500. p1->to.type = rn;
  501. if(f) {
  502. p1->to = p1->from;
  503. p1->from = zprog.from;
  504. p1->from.type = rn;
  505. }
  506. p1->as = opxt[OAS][v->etype];
  507. if(badccr) {
  508. p = p1;
  509. p1 = prg();
  510. p1->link = p->link;
  511. p->link = p1;
  512. p1->lineno = p->lineno;
  513. p1->from.type = D_TOS;
  514. p1->to.type = D_CCR;
  515. p1->as = AMOVW;
  516. }
  517. if(debug['R'])
  518. print("%P\t.a%P\n", p, p1);
  519. }
  520. Bits
  521. mkvar(Adr *a, int as)
  522. {
  523. Var *v;
  524. int i, t, z;
  525. long o;
  526. Bits bit;
  527. Sym *s;
  528. mvbits = 0;
  529. t = a->type & D_MASK;
  530. switch(t) {
  531. default:
  532. if(t >= D_R0 && t < D_R0+NREG) {
  533. regbits |= RtoB(t-D_R0);
  534. if(as == ADIVUL || as == ADIVSL)
  535. regbits |= RtoB(t-D_R0+1);
  536. }
  537. if(t >= D_A0 && t < D_A0+NREG)
  538. regbits |= AtoB(t-D_A0);
  539. if(t >= D_F0 && t < D_F0+NREG)
  540. regbits |= FtoB(t-D_F0);
  541. goto none;
  542. case D_EXTERN:
  543. case D_STATIC:
  544. case D_AUTO:
  545. case D_PARAM:
  546. break;
  547. }
  548. s = a->sym;
  549. if(s == S)
  550. goto none;
  551. if((a->type & I_MASK) == I_ADDR)
  552. mvbits |= B_ADDR;
  553. o = a->offset;
  554. v = var;
  555. for(i=0; i<nvar; i++) {
  556. if(s == v->sym)
  557. if(t == v->type)
  558. if(o == v->offset)
  559. goto out;
  560. v++;
  561. }
  562. if(s)
  563. if(s->name[0] == '.')
  564. goto none;
  565. if(nvar >= NVAR) {
  566. if(debug['w'] > 1 && s)
  567. warn(Z, "variable not optimized: %s", s->name);
  568. goto none;
  569. }
  570. i = nvar;
  571. nvar++;
  572. v = &var[i];
  573. v->sym = s;
  574. v->offset = o;
  575. v->etype = a->etype;
  576. v->type = t;
  577. if(debug['R'])
  578. print("bit=%2d et=%2d %s (%d,%d,%ld)\n",
  579. i, a->etype, s->name,
  580. (int)v->sym, v->type, v->offset);
  581. out:
  582. bit = blsh(i);
  583. if(t == D_EXTERN || t == D_STATIC)
  584. for(z=0; z<BITS; z++)
  585. externs.b[z] |= bit.b[z];
  586. if(t == D_PARAM)
  587. for(z=0; z<BITS; z++)
  588. params.b[z] |= bit.b[z];
  589. if(a->etype != v->etype || !typechlpfd[a->etype])
  590. for(z=0; z<BITS; z++)
  591. addrs.b[z] |= bit.b[z]; /* funny punning */
  592. return bit;
  593. none:
  594. return zbits;
  595. }
  596. void
  597. prop(Reg *r, Bits ref, Bits cal)
  598. {
  599. Reg *r1, *r2;
  600. int z;
  601. for(r1 = r; r1 != R; r1 = r1->p1) {
  602. for(z=0; z<BITS; z++) {
  603. ref.b[z] |= r1->refahead.b[z];
  604. if(ref.b[z] != r1->refahead.b[z]) {
  605. r1->refahead.b[z] = ref.b[z];
  606. changer++;
  607. }
  608. cal.b[z] |= r1->calahead.b[z];
  609. if(cal.b[z] != r1->calahead.b[z]) {
  610. r1->calahead.b[z] = cal.b[z];
  611. changer++;
  612. }
  613. }
  614. switch(r1->prog->as) {
  615. case ABSR:
  616. for(z=0; z<BITS; z++) {
  617. cal.b[z] |= ref.b[z] | externs.b[z];
  618. ref.b[z] = 0;
  619. }
  620. break;
  621. case ATEXT:
  622. for(z=0; z<BITS; z++) {
  623. cal.b[z] = 0;
  624. ref.b[z] = 0;
  625. }
  626. break;
  627. case ARTS:
  628. for(z=0; z<BITS; z++) {
  629. cal.b[z] = externs.b[z];
  630. ref.b[z] = 0;
  631. }
  632. }
  633. for(z=0; z<BITS; z++) {
  634. ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
  635. r1->use1.b[z] | r1->use2.b[z];
  636. cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
  637. r1->refbehind.b[z] = ref.b[z];
  638. r1->calbehind.b[z] = cal.b[z];
  639. }
  640. if(r1->active)
  641. break;
  642. r1->active = 1;
  643. }
  644. for(; r != r1; r = r->p1)
  645. for(r2 = r->p2; r2 != R; r2 = r2->p2link)
  646. prop(r2, r->refbehind, r->calbehind);
  647. }
  648. /*
  649. * find looping structure
  650. *
  651. * 1) find reverse postordering
  652. * 2) find approximate dominators,
  653. * the actual dominators if the flow graph is reducible
  654. * otherwise, dominators plus some other non-dominators.
  655. * See Matthew S. Hecht and Jeffrey D. Ullman,
  656. * "Analysis of a Simple Algorithm for Global Data Flow Problems",
  657. * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
  658. * Oct. 1-3, 1973, pp. 207-217.
  659. * 3) find all nodes with a predecessor dominated by the current node.
  660. * such a node is a loop head.
  661. * recursively, all preds with a greater rpo number are in the loop
  662. */
  663. long
  664. postorder(Reg *r, Reg **rpo2r, long n)
  665. {
  666. Reg *r1;
  667. r->rpo = 1;
  668. r1 = r->s1;
  669. if(r1 && !r1->rpo)
  670. n = postorder(r1, rpo2r, n);
  671. r1 = r->s2;
  672. if(r1 && !r1->rpo)
  673. n = postorder(r1, rpo2r, n);
  674. rpo2r[n] = r;
  675. n++;
  676. return n;
  677. }
  678. long
  679. rpolca(long *idom, long rpo1, long rpo2)
  680. {
  681. long t;
  682. if(rpo1 == -1)
  683. return rpo2;
  684. while(rpo1 != rpo2){
  685. if(rpo1 > rpo2){
  686. t = rpo2;
  687. rpo2 = rpo1;
  688. rpo1 = t;
  689. }
  690. while(rpo1 < rpo2){
  691. t = idom[rpo2];
  692. if(t >= rpo2)
  693. sysfatal("bad idom");
  694. rpo2 = t;
  695. }
  696. }
  697. return rpo1;
  698. }
  699. int
  700. doms(long *idom, long r, long s)
  701. {
  702. while(s > r)
  703. s = idom[s];
  704. return s == r;
  705. }
  706. int
  707. loophead(long *idom, Reg *r)
  708. {
  709. long src;
  710. src = r->rpo;
  711. if(r->p1 != R && doms(idom, src, r->p1->rpo))
  712. return 1;
  713. for(r = r->p2; r != R; r = r->p2link)
  714. if(doms(idom, src, r->rpo))
  715. return 1;
  716. return 0;
  717. }
  718. void
  719. loopmark(Reg **rpo2r, long head, Reg *r)
  720. {
  721. if(r->rpo < head || r->active == head)
  722. return;
  723. r->active = head;
  724. r->loop += LOOP;
  725. if(r->p1 != R)
  726. loopmark(rpo2r, head, r->p1);
  727. for(r = r->p2; r != R; r = r->p2link)
  728. loopmark(rpo2r, head, r);
  729. }
  730. void
  731. loopit(Reg *r, long nr)
  732. {
  733. Reg *r1;
  734. long i, d, me;
  735. if(nr > maxnr) {
  736. rpo2r = alloc(nr * sizeof(Reg*));
  737. idom = alloc(nr * sizeof(long));
  738. maxnr = nr;
  739. }
  740. d = postorder(r, rpo2r, 0);
  741. if(d > nr)
  742. sysfatal("too many reg nodes");
  743. nr = d;
  744. for(i = 0; i < nr / 2; i++){
  745. r1 = rpo2r[i];
  746. rpo2r[i] = rpo2r[nr - 1 - i];
  747. rpo2r[nr - 1 - i] = r1;
  748. }
  749. for(i = 0; i < nr; i++)
  750. rpo2r[i]->rpo = i;
  751. idom[0] = 0;
  752. for(i = 0; i < nr; i++){
  753. r1 = rpo2r[i];
  754. me = r1->rpo;
  755. d = -1;
  756. if(r1->p1 != R && r1->p1->rpo < me)
  757. d = r1->p1->rpo;
  758. for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
  759. if(r1->rpo < me)
  760. d = rpolca(idom, d, r1->rpo);
  761. idom[i] = d;
  762. }
  763. for(i = 0; i < nr; i++){
  764. r1 = rpo2r[i];
  765. r1->loop++;
  766. if(r1->p2 != R && loophead(idom, r1))
  767. loopmark(rpo2r, i, r1);
  768. }
  769. }
  770. void
  771. synch(Reg *r, Bits dif)
  772. {
  773. Reg *r1;
  774. int z;
  775. for(r1 = r; r1 != R; r1 = r1->s1) {
  776. for(z=0; z<BITS; z++) {
  777. dif.b[z] = (dif.b[z] &
  778. ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
  779. r1->set.b[z] | r1->regdiff.b[z];
  780. if(dif.b[z] != r1->regdiff.b[z]) {
  781. r1->regdiff.b[z] = dif.b[z];
  782. changer++;
  783. }
  784. }
  785. if(r1->active)
  786. break;
  787. r1->active = 1;
  788. for(z=0; z<BITS; z++)
  789. dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
  790. if(r1->s2 != R)
  791. synch(r1->s2, dif);
  792. }
  793. }
  794. ulong
  795. allreg(ulong b, Rgn *r)
  796. {
  797. Var *v;
  798. int i, j;
  799. v = var + r->varno;
  800. r->regno = D_NONE;
  801. switch(v->etype) {
  802. default:
  803. diag(Z, "unknown etype");
  804. break;
  805. case TCHAR:
  806. case TUCHAR:
  807. case TSHORT:
  808. case TUSHORT:
  809. case TINT:
  810. case TUINT:
  811. case TLONG:
  812. case TULONG:
  813. case TIND:
  814. i = BtoR(~b);
  815. j = BtoA(~b);
  816. if(r->costa == r->costr)
  817. if(i > j)
  818. i = NREG;
  819. if(j < NREG && r->costa > 0)
  820. if(r->costa > r->costr || i >= NREG) {
  821. r->regno = D_A0 + j;
  822. return AtoB(j);
  823. }
  824. if(i < NREG && r->costr > 0) {
  825. r->regno = D_R0 + i;
  826. return RtoB(i);
  827. }
  828. break;
  829. case TDOUBLE:
  830. case TFLOAT:
  831. i = BtoF(~b);
  832. if(i < NREG) {
  833. r->regno = D_F0 + i;
  834. return FtoB(i);
  835. }
  836. break;
  837. }
  838. return 0;
  839. }
  840. void
  841. paint1(Reg *r, int bn)
  842. {
  843. Reg *r1;
  844. Prog *p;
  845. int z;
  846. ulong bb;
  847. int x;
  848. z = bn/32;
  849. bb = 1L<<(bn%32);
  850. if(r->act.b[z] & bb)
  851. return;
  852. for(;;) {
  853. if(!(r->refbehind.b[z] & bb))
  854. break;
  855. r1 = r->p1;
  856. if(r1 == R)
  857. break;
  858. if(!(r1->refahead.b[z] & bb))
  859. break;
  860. if(r1->act.b[z] & bb)
  861. break;
  862. r = r1;
  863. }
  864. if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
  865. changer -= CLOAD * r->loop;
  866. changea -= CLOAD * r->loop;
  867. if(debug['R'] && debug['v'])
  868. print("%ld%P\tld %B $%d.%d\n", r->loop,
  869. r->prog, blsh(bn), changer, changea);
  870. }
  871. for(;;) {
  872. r->act.b[z] |= bb;
  873. p = r->prog;
  874. if(r->use1.b[z] & bb) {
  875. changer += CREF * r->loop;
  876. changea += CREF * r->loop;
  877. switch(p->as) {
  878. default:
  879. changea = -CINF;
  880. case AADDL:
  881. case ASUBL:
  882. case AMOVL:
  883. case ACMPL:
  884. break;
  885. }
  886. if(p->as == AMOVL) {
  887. x = p->to.type;
  888. if(x >= D_R0 && x < D_R0+NREG)
  889. changer += r->loop;
  890. if(x >= D_A0 && x < D_A0+NREG)
  891. changea += r->loop;
  892. }
  893. if(debug['R'] && debug['v'])
  894. print("%ld%P\tu1 %B $%d.%d\n", r->loop,
  895. p, blsh(bn), changer, changea);
  896. }
  897. if((r->use2.b[z]|r->set.b[z]) & bb) {
  898. changer += CREF * r->loop;
  899. changea += CREF * r->loop;
  900. switch(p->as) {
  901. default:
  902. changea = -CINF;
  903. break;
  904. case AMOVL:
  905. case AADDL:
  906. case ACMPL:
  907. case ASUBL:
  908. case ACLRL: /* can be faked */
  909. case ATSTL: /* can be faked */
  910. break;
  911. }
  912. if(p->as == AMOVL) {
  913. x = p->from.type;
  914. if(x >= D_R0 && x < D_R0+NREG)
  915. changer += r->loop;
  916. if(x >= D_A0 && x < D_A0+NREG)
  917. changea += r->loop;
  918. }
  919. if(debug['R'] && debug['v'])
  920. print("%ld%P\tu2 %B $%d.%d\n", r->loop,
  921. p, blsh(bn), changer, changea);
  922. }
  923. if(STORE(r) & r->regdiff.b[z] & bb) {
  924. changer -= CLOAD * r->loop;
  925. changea -= CLOAD * r->loop;
  926. if(debug['R'] && debug['v'])
  927. print("%ld%P\tst %B $%d.%d\n", r->loop,
  928. p, blsh(bn), changer, changea);
  929. }
  930. if(r->refbehind.b[z] & bb)
  931. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  932. if(r1->refahead.b[z] & bb)
  933. paint1(r1, bn);
  934. if(!(r->refahead.b[z] & bb))
  935. break;
  936. r1 = r->s2;
  937. if(r1 != R)
  938. if(r1->refbehind.b[z] & bb)
  939. paint1(r1, bn);
  940. r = r->s1;
  941. if(r == R)
  942. break;
  943. if(r->act.b[z] & bb)
  944. break;
  945. if(!(r->refbehind.b[z] & bb))
  946. break;
  947. }
  948. }
  949. ulong
  950. paint2(Reg *r, int bn)
  951. {
  952. Reg *r1;
  953. int z;
  954. ulong bb, vreg;
  955. z = bn/32;
  956. bb = 1L << (bn%32);
  957. vreg = regbits;
  958. if(!(r->act.b[z] & bb))
  959. return vreg;
  960. for(;;) {
  961. if(!(r->refbehind.b[z] & bb))
  962. break;
  963. r1 = r->p1;
  964. if(r1 == R)
  965. break;
  966. if(!(r1->refahead.b[z] & bb))
  967. break;
  968. if(!(r1->act.b[z] & bb))
  969. break;
  970. r = r1;
  971. }
  972. for(;;) {
  973. r->act.b[z] &= ~bb;
  974. vreg |= r->regu;
  975. if(r->refbehind.b[z] & bb)
  976. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  977. if(r1->refahead.b[z] & bb)
  978. vreg |= paint2(r1, bn);
  979. if(!(r->refahead.b[z] & bb))
  980. break;
  981. r1 = r->s2;
  982. if(r1 != R)
  983. if(r1->refbehind.b[z] & bb)
  984. vreg |= paint2(r1, bn);
  985. r = r->s1;
  986. if(r == R)
  987. break;
  988. if(!(r->act.b[z] & bb))
  989. break;
  990. if(!(r->refbehind.b[z] & bb))
  991. break;
  992. }
  993. return vreg;
  994. }
  995. void
  996. paint3(Reg *r, int bn, ulong rb, int rn)
  997. {
  998. Reg *r1;
  999. Prog *p;
  1000. int z;
  1001. ulong bb;
  1002. z = bn/32;
  1003. bb = 1L << (bn%32);
  1004. if(r->act.b[z] & bb)
  1005. return;
  1006. for(;;) {
  1007. if(!(r->refbehind.b[z] & bb))
  1008. break;
  1009. r1 = r->p1;
  1010. if(r1 == R)
  1011. break;
  1012. if(!(r1->refahead.b[z] & bb))
  1013. break;
  1014. if(r1->act.b[z] & bb)
  1015. break;
  1016. r = r1;
  1017. }
  1018. if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
  1019. addmove(r, bn, rn, 0);
  1020. for(;;) {
  1021. r->act.b[z] |= bb;
  1022. p = r->prog;
  1023. if(r->use1.b[z] & bb) {
  1024. if(debug['R'])
  1025. print("%P", p);
  1026. addreg(&p->from, rn);
  1027. if(debug['R'])
  1028. print("\t.c%P\n", p);
  1029. }
  1030. if((r->use2.b[z]|r->set.b[z]) & bb) {
  1031. if(debug['R'])
  1032. print("%P", p);
  1033. addreg(&p->to, rn);
  1034. if(debug['R'])
  1035. print("\t.c%P\n", p);
  1036. }
  1037. if(STORE(r) & r->regdiff.b[z] & bb)
  1038. addmove(r, bn, rn, 1);
  1039. r->regu |= rb;
  1040. if(r->refbehind.b[z] & bb)
  1041. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  1042. if(r1->refahead.b[z] & bb)
  1043. paint3(r1, bn, rb, rn);
  1044. if(!(r->refahead.b[z] & bb))
  1045. break;
  1046. r1 = r->s2;
  1047. if(r1 != R)
  1048. if(r1->refbehind.b[z] & bb)
  1049. paint3(r1, bn, rb, rn);
  1050. r = r->s1;
  1051. if(r == R)
  1052. break;
  1053. if(r->act.b[z] & bb)
  1054. break;
  1055. if(!(r->refbehind.b[z] & bb))
  1056. break;
  1057. }
  1058. }
  1059. void
  1060. addreg(Adr *a, int rn)
  1061. {
  1062. a->sym = 0;
  1063. if(rn >= D_R0 && rn < D_R0+NREG)
  1064. goto addr;
  1065. a->type = rn | (a->type & I_INDIR);
  1066. return;
  1067. addr:
  1068. a->type = rn | (a->type & I_INDIR);
  1069. }
  1070. /*
  1071. * bit reg
  1072. * 0-7 R0-R7
  1073. * 8-15 A0-A7
  1074. * 16-23 F0-F7
  1075. */
  1076. ulong
  1077. RtoB(int r)
  1078. {
  1079. if(r < 0 || r >= NREG)
  1080. return 0;
  1081. return 1L << (r + 0);
  1082. }
  1083. int
  1084. BtoR(ulong b)
  1085. {
  1086. b &= 0x0000ffL;
  1087. if(b == 0)
  1088. return NREG;
  1089. return bitno(b) - 0;
  1090. }
  1091. ulong
  1092. AtoB(int a)
  1093. {
  1094. if(a < 0 || a >= NREG)
  1095. return 0;
  1096. return 1L << (a + NREG);
  1097. }
  1098. int
  1099. BtoA(ulong b)
  1100. {
  1101. b &= 0x00ff00L;
  1102. if(b == 0)
  1103. return NREG;
  1104. return bitno(b) - NREG;
  1105. }
  1106. ulong
  1107. FtoB(int f)
  1108. {
  1109. if(f < 0 || f >= NREG)
  1110. return 0;
  1111. return 1L << (f + NREG+NREG);
  1112. }
  1113. int
  1114. BtoF(ulong b)
  1115. {
  1116. b &= 0xff0000L;
  1117. if(b == 0)
  1118. return NREG;
  1119. return bitno(b) - NREG-NREG;
  1120. }