reg.c 20 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255
  1. #include "gc.h"
  2. Reg*
  3. rega(void)
  4. {
  5. Reg *r;
  6. r = freer;
  7. if(r == R) {
  8. r = alloc(sizeof(*r));
  9. } else
  10. freer = r->link;
  11. *r = zreg;
  12. return r;
  13. }
  14. int
  15. rcmp(const void *a1, const void *a2)
  16. {
  17. Rgn *p1, *p2;
  18. int c1, c2;
  19. p1 = (Rgn*)a1;
  20. p2 = (Rgn*)a2;
  21. c1 = p2->cost;
  22. c2 = p1->cost;
  23. if(c1 -= c2)
  24. return c1;
  25. return p2->varno - p1->varno;
  26. }
  27. void
  28. regopt(Prog *p)
  29. {
  30. Reg *r, *r1, *r2;
  31. Prog *p1;
  32. int i, z;
  33. long initpc, val, npc;
  34. ulong vreg;
  35. Bits bit;
  36. struct
  37. {
  38. long m;
  39. long c;
  40. Reg* p;
  41. } log5[6], *lp;
  42. firstr = R;
  43. lastr = R;
  44. nvar = 0;
  45. regbits = RtoB(D_SP) | RtoB(D_AX);
  46. for(z=0; z<BITS; z++) {
  47. externs.b[z] = 0;
  48. params.b[z] = 0;
  49. consts.b[z] = 0;
  50. addrs.b[z] = 0;
  51. }
  52. /*
  53. * pass 1
  54. * build aux data structure
  55. * allocate pcs
  56. * find use and set of variables
  57. */
  58. val = 5L * 5L * 5L * 5L * 5L;
  59. lp = log5;
  60. for(i=0; i<5; i++) {
  61. lp->m = val;
  62. lp->c = 0;
  63. lp->p = R;
  64. val /= 5L;
  65. lp++;
  66. }
  67. val = 0;
  68. for(; p != P; p = p->link) {
  69. switch(p->as) {
  70. case ADATA:
  71. case AGLOBL:
  72. case ANAME:
  73. case ASIGNAME:
  74. continue;
  75. }
  76. r = rega();
  77. if(firstr == R) {
  78. firstr = r;
  79. lastr = r;
  80. } else {
  81. lastr->link = r;
  82. r->p1 = lastr;
  83. lastr->s1 = r;
  84. lastr = r;
  85. }
  86. r->prog = p;
  87. r->pc = val;
  88. val++;
  89. lp = log5;
  90. for(i=0; i<5; i++) {
  91. lp->c--;
  92. if(lp->c <= 0) {
  93. lp->c = lp->m;
  94. if(lp->p != R)
  95. lp->p->log5 = r;
  96. lp->p = r;
  97. (lp+1)->c = 0;
  98. break;
  99. }
  100. lp++;
  101. }
  102. r1 = r->p1;
  103. if(r1 != R)
  104. switch(r1->prog->as) {
  105. case ARET:
  106. case AJMP:
  107. case AIRETL:
  108. r->p1 = R;
  109. r1->s1 = R;
  110. }
  111. bit = mkvar(r, &p->from);
  112. if(bany(&bit))
  113. switch(p->as) {
  114. /*
  115. * funny
  116. */
  117. case ALEAL:
  118. for(z=0; z<BITS; z++)
  119. addrs.b[z] |= bit.b[z];
  120. break;
  121. /*
  122. * left side read
  123. */
  124. default:
  125. for(z=0; z<BITS; z++)
  126. r->use1.b[z] |= bit.b[z];
  127. break;
  128. }
  129. bit = mkvar(r, &p->to);
  130. if(bany(&bit))
  131. switch(p->as) {
  132. default:
  133. diag(Z, "reg: unknown op: %A", p->as);
  134. break;
  135. /*
  136. * right side read
  137. */
  138. case ACMPB:
  139. case ACMPL:
  140. case ACMPW:
  141. for(z=0; z<BITS; z++)
  142. r->use2.b[z] |= bit.b[z];
  143. break;
  144. /*
  145. * right side write
  146. */
  147. case ANOP:
  148. case AMOVL:
  149. case AMOVB:
  150. case AMOVW:
  151. case AMOVBLSX:
  152. case AMOVBLZX:
  153. case AMOVWLSX:
  154. case AMOVWLZX:
  155. for(z=0; z<BITS; z++)
  156. r->set.b[z] |= bit.b[z];
  157. break;
  158. /*
  159. * right side read+write
  160. */
  161. case AADDB:
  162. case AADDL:
  163. case AADDW:
  164. case AANDB:
  165. case AANDL:
  166. case AANDW:
  167. case ASUBB:
  168. case ASUBL:
  169. case ASUBW:
  170. case AORB:
  171. case AORL:
  172. case AORW:
  173. case AXORB:
  174. case AXORL:
  175. case AXORW:
  176. case ASALB:
  177. case ASALL:
  178. case ASALW:
  179. case ASARB:
  180. case ASARL:
  181. case ASARW:
  182. case AROLB:
  183. case AROLL:
  184. case AROLW:
  185. case ARORB:
  186. case ARORL:
  187. case ARORW:
  188. case ASHLB:
  189. case ASHLL:
  190. case ASHLW:
  191. case ASHRB:
  192. case ASHRL:
  193. case ASHRW:
  194. case AIMULL:
  195. case AIMULW:
  196. case ANEGL:
  197. case ANOTL:
  198. case AADCL:
  199. case ASBBL:
  200. for(z=0; z<BITS; z++) {
  201. r->set.b[z] |= bit.b[z];
  202. r->use2.b[z] |= bit.b[z];
  203. }
  204. break;
  205. /*
  206. * funny
  207. */
  208. case AFMOVDP:
  209. case AFMOVFP:
  210. case AFMOVVP:
  211. case ACALL:
  212. for(z=0; z<BITS; z++)
  213. addrs.b[z] |= bit.b[z];
  214. break;
  215. }
  216. switch(p->as) {
  217. case AIMULL:
  218. case AIMULW:
  219. if(p->to.type != D_NONE)
  220. break;
  221. case AIDIVB:
  222. case AIDIVL:
  223. case AIDIVW:
  224. case AIMULB:
  225. case ADIVB:
  226. case ADIVL:
  227. case ADIVW:
  228. case AMULB:
  229. case AMULL:
  230. case AMULW:
  231. case ACWD:
  232. case ACDQ:
  233. r->regu |= RtoB(D_AX) | RtoB(D_DX);
  234. break;
  235. case AREP:
  236. case AREPN:
  237. case ALOOP:
  238. case ALOOPEQ:
  239. case ALOOPNE:
  240. r->regu |= RtoB(D_CX);
  241. break;
  242. case AMOVSB:
  243. case AMOVSL:
  244. case AMOVSW:
  245. case ACMPSB:
  246. case ACMPSL:
  247. case ACMPSW:
  248. r->regu |= RtoB(D_SI) | RtoB(D_DI);
  249. break;
  250. case ASTOSB:
  251. case ASTOSL:
  252. case ASTOSW:
  253. case ASCASB:
  254. case ASCASL:
  255. case ASCASW:
  256. r->regu |= RtoB(D_AX) | RtoB(D_DI);
  257. break;
  258. case AINSB:
  259. case AINSL:
  260. case AINSW:
  261. case AOUTSB:
  262. case AOUTSL:
  263. case AOUTSW:
  264. r->regu |= RtoB(D_DI) | RtoB(D_DX);
  265. break;
  266. case AFSTSW:
  267. case ASAHF:
  268. r->regu |= RtoB(D_AX);
  269. break;
  270. }
  271. }
  272. if(firstr == R)
  273. return;
  274. initpc = pc - val;
  275. npc = val;
  276. /*
  277. * pass 2
  278. * turn branch references to pointers
  279. * build back pointers
  280. */
  281. for(r = firstr; r != R; r = r->link) {
  282. p = r->prog;
  283. if(p->to.type == D_BRANCH) {
  284. val = p->to.offset - initpc;
  285. r1 = firstr;
  286. while(r1 != R) {
  287. r2 = r1->log5;
  288. if(r2 != R && val >= r2->pc) {
  289. r1 = r2;
  290. continue;
  291. }
  292. if(r1->pc == val)
  293. break;
  294. r1 = r1->link;
  295. }
  296. if(r1 == R) {
  297. nearln = p->lineno;
  298. diag(Z, "ref not found\n%P", p);
  299. continue;
  300. }
  301. if(r1 == r) {
  302. nearln = p->lineno;
  303. diag(Z, "ref to self\n%P", p);
  304. continue;
  305. }
  306. r->s2 = r1;
  307. r->p2link = r1->p2;
  308. r1->p2 = r;
  309. }
  310. }
  311. if(debug['R']) {
  312. p = firstr->prog;
  313. print("\n%L %D\n", p->lineno, &p->from);
  314. }
  315. /*
  316. * pass 2.5
  317. * find looping structure
  318. */
  319. for(r = firstr; r != R; r = r->link)
  320. r->active = 0;
  321. change = 0;
  322. loopit(firstr, npc);
  323. if(debug['R'] && debug['v']) {
  324. print("\nlooping structure:\n");
  325. for(r = firstr; r != R; r = r->link) {
  326. print("%ld:%P", r->loop, r->prog);
  327. for(z=0; z<BITS; z++)
  328. bit.b[z] = r->use1.b[z] |
  329. r->use2.b[z] |
  330. r->set.b[z];
  331. if(bany(&bit)) {
  332. print("\t");
  333. if(bany(&r->use1))
  334. print(" u1=%B", r->use1);
  335. if(bany(&r->use2))
  336. print(" u2=%B", r->use2);
  337. if(bany(&r->set))
  338. print(" st=%B", r->set);
  339. }
  340. print("\n");
  341. }
  342. }
  343. /*
  344. * pass 3
  345. * iterate propagating usage
  346. * back until flow graph is complete
  347. */
  348. loop1:
  349. change = 0;
  350. for(r = firstr; r != R; r = r->link)
  351. r->active = 0;
  352. for(r = firstr; r != R; r = r->link)
  353. if(r->prog->as == ARET)
  354. prop(r, zbits, zbits);
  355. loop11:
  356. /* pick up unreachable code */
  357. i = 0;
  358. for(r = firstr; r != R; r = r1) {
  359. r1 = r->link;
  360. if(r1 && r1->active && !r->active) {
  361. prop(r, zbits, zbits);
  362. i = 1;
  363. }
  364. }
  365. if(i)
  366. goto loop11;
  367. if(change)
  368. goto loop1;
  369. /*
  370. * pass 4
  371. * iterate propagating register/variable synchrony
  372. * forward until graph is complete
  373. */
  374. loop2:
  375. change = 0;
  376. for(r = firstr; r != R; r = r->link)
  377. r->active = 0;
  378. synch(firstr, zbits);
  379. if(change)
  380. goto loop2;
  381. /*
  382. * pass 5
  383. * isolate regions
  384. * calculate costs (paint1)
  385. */
  386. r = firstr;
  387. if(r) {
  388. for(z=0; z<BITS; z++)
  389. bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
  390. ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
  391. if(bany(&bit)) {
  392. nearln = r->prog->lineno;
  393. warn(Z, "used and not set: %B", bit);
  394. if(debug['R'] && !debug['w'])
  395. print("used and not set: %B\n", bit);
  396. }
  397. }
  398. if(debug['R'] && debug['v'])
  399. print("\nprop structure:\n");
  400. for(r = firstr; r != R; r = r->link)
  401. r->act = zbits;
  402. rgp = region;
  403. nregion = 0;
  404. for(r = firstr; r != R; r = r->link) {
  405. if(debug['R'] && debug['v']) {
  406. print("%P\t", r->prog);
  407. if(bany(&r->set))
  408. print("s:%B ", r->set);
  409. if(bany(&r->refahead))
  410. print("ra:%B ", r->refahead);
  411. if(bany(&r->calahead))
  412. print("ca:%B ", r->calahead);
  413. print("\n");
  414. }
  415. for(z=0; z<BITS; z++)
  416. bit.b[z] = r->set.b[z] &
  417. ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
  418. if(bany(&bit)) {
  419. nearln = r->prog->lineno;
  420. warn(Z, "set and not used: %B", bit);
  421. if(debug['R'])
  422. print("set and not used: %B\n", bit);
  423. excise(r);
  424. }
  425. for(z=0; z<BITS; z++)
  426. bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
  427. while(bany(&bit)) {
  428. i = bnum(bit);
  429. rgp->enter = r;
  430. rgp->varno = i;
  431. change = 0;
  432. if(debug['R'] && debug['v'])
  433. print("\n");
  434. paint1(r, i);
  435. bit.b[i/32] &= ~(1L<<(i%32));
  436. if(change <= 0) {
  437. if(debug['R'])
  438. print("%L$%d: %B\n",
  439. r->prog->lineno, change, blsh(i));
  440. continue;
  441. }
  442. rgp->cost = change;
  443. nregion++;
  444. if(nregion >= NRGN) {
  445. warn(Z, "too many regions");
  446. goto brk;
  447. }
  448. rgp++;
  449. }
  450. }
  451. brk:
  452. qsort(region, nregion, sizeof(region[0]), rcmp);
  453. /*
  454. * pass 6
  455. * determine used registers (paint2)
  456. * replace code (paint3)
  457. */
  458. rgp = region;
  459. for(i=0; i<nregion; i++) {
  460. bit = blsh(rgp->varno);
  461. vreg = paint2(rgp->enter, rgp->varno);
  462. vreg = allreg(vreg, rgp);
  463. if(debug['R']) {
  464. print("%L$%d %R: %B\n",
  465. rgp->enter->prog->lineno,
  466. rgp->cost,
  467. rgp->regno,
  468. bit);
  469. }
  470. if(rgp->regno != 0)
  471. paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
  472. rgp++;
  473. }
  474. /*
  475. * pass 7
  476. * peep-hole on basic block
  477. */
  478. if(!debug['R'] || debug['P'])
  479. peep();
  480. /*
  481. * pass 8
  482. * recalculate pc
  483. */
  484. val = initpc;
  485. for(r = firstr; r != R; r = r1) {
  486. r->pc = val;
  487. p = r->prog;
  488. p1 = P;
  489. r1 = r->link;
  490. if(r1 != R)
  491. p1 = r1->prog;
  492. for(; p != p1; p = p->link) {
  493. switch(p->as) {
  494. default:
  495. val++;
  496. break;
  497. case ANOP:
  498. case ADATA:
  499. case AGLOBL:
  500. case ANAME:
  501. case ASIGNAME:
  502. break;
  503. }
  504. }
  505. }
  506. pc = val;
  507. /*
  508. * fix up branches
  509. */
  510. if(debug['R'])
  511. if(bany(&addrs))
  512. print("addrs: %B\n", addrs);
  513. r1 = 0; /* set */
  514. for(r = firstr; r != R; r = r->link) {
  515. p = r->prog;
  516. if(p->to.type == D_BRANCH)
  517. p->to.offset = r->s2->pc;
  518. r1 = r;
  519. }
  520. /*
  521. * last pass
  522. * eliminate nops
  523. * free aux structures
  524. */
  525. for(p = firstr->prog; p != P; p = p->link){
  526. while(p->link && p->link->as == ANOP)
  527. p->link = p->link->link;
  528. }
  529. if(r1 != R) {
  530. r1->link = freer;
  531. freer = firstr;
  532. }
  533. }
  534. /*
  535. * add mov b,rn
  536. * just after r
  537. */
  538. void
  539. addmove(Reg *r, int bn, int rn, int f)
  540. {
  541. Prog *p, *p1;
  542. Adr *a;
  543. Var *v;
  544. p1 = alloc(sizeof(*p1));
  545. *p1 = zprog;
  546. p = r->prog;
  547. p1->link = p->link;
  548. p->link = p1;
  549. p1->lineno = p->lineno;
  550. v = var + bn;
  551. a = &p1->to;
  552. a->sym = v->sym;
  553. a->offset = v->offset;
  554. a->etype = v->etype;
  555. a->type = v->name;
  556. p1->as = AMOVL;
  557. if(v->etype == TCHAR || v->etype == TUCHAR)
  558. p1->as = AMOVB;
  559. if(v->etype == TSHORT || v->etype == TUSHORT)
  560. p1->as = AMOVW;
  561. p1->from.type = rn;
  562. if(!f) {
  563. p1->from = *a;
  564. *a = zprog.from;
  565. a->type = rn;
  566. if(v->etype == TUCHAR)
  567. p1->as = AMOVB;
  568. if(v->etype == TUSHORT)
  569. p1->as = AMOVW;
  570. }
  571. if(debug['R'])
  572. print("%P\t.a%P\n", p, p1);
  573. }
  574. ulong
  575. doregbits(int r)
  576. {
  577. ulong b;
  578. b = 0;
  579. if(r >= D_INDIR)
  580. r -= D_INDIR;
  581. if(r >= D_AX && r <= D_DI)
  582. b |= RtoB(r);
  583. else
  584. if(r >= D_AL && r <= D_BL)
  585. b |= RtoB(r-D_AL+D_AX);
  586. else
  587. if(r >= D_AH && r <= D_BH)
  588. b |= RtoB(r-D_AH+D_AX);
  589. return b;
  590. }
  591. Bits
  592. mkvar(Reg *r, Adr *a)
  593. {
  594. Var *v;
  595. int i, t, n, et, z;
  596. long o;
  597. Bits bit;
  598. Sym *s;
  599. /*
  600. * mark registers used
  601. */
  602. t = a->type;
  603. r->regu |= doregbits(t);
  604. r->regu |= doregbits(a->index);
  605. switch(t) {
  606. default:
  607. goto none;
  608. case D_ADDR:
  609. a->type = a->index;
  610. bit = mkvar(r, a);
  611. for(z=0; z<BITS; z++)
  612. addrs.b[z] |= bit.b[z];
  613. a->type = t;
  614. goto none;
  615. case D_EXTERN:
  616. case D_STATIC:
  617. case D_PARAM:
  618. case D_AUTO:
  619. n = t;
  620. break;
  621. }
  622. s = a->sym;
  623. if(s == S)
  624. goto none;
  625. if(s->name[0] == '.')
  626. goto none;
  627. et = a->etype;
  628. o = a->offset;
  629. v = var;
  630. for(i=0; i<nvar; i++) {
  631. if(s == v->sym)
  632. if(n == v->name)
  633. if(o == v->offset)
  634. goto out;
  635. v++;
  636. }
  637. if(nvar >= NVAR) {
  638. if(debug['w'] > 1 && s)
  639. warn(Z, "variable not optimized: %s", s->name);
  640. goto none;
  641. }
  642. i = nvar;
  643. nvar++;
  644. v = &var[i];
  645. v->sym = s;
  646. v->offset = o;
  647. v->name = n;
  648. v->etype = et;
  649. if(debug['R'])
  650. print("bit=%2d et=%2d %D\n", i, et, a);
  651. out:
  652. bit = blsh(i);
  653. if(n == D_EXTERN || n == D_STATIC)
  654. for(z=0; z<BITS; z++)
  655. externs.b[z] |= bit.b[z];
  656. if(n == D_PARAM)
  657. for(z=0; z<BITS; z++)
  658. params.b[z] |= bit.b[z];
  659. if(v->etype != et || !typechlpfd[et]) /* funny punning */
  660. for(z=0; z<BITS; z++)
  661. addrs.b[z] |= bit.b[z];
  662. return bit;
  663. none:
  664. return zbits;
  665. }
  666. void
  667. prop(Reg *r, Bits ref, Bits cal)
  668. {
  669. Reg *r1, *r2;
  670. int z;
  671. for(r1 = r; r1 != R; r1 = r1->p1) {
  672. for(z=0; z<BITS; z++) {
  673. ref.b[z] |= r1->refahead.b[z];
  674. if(ref.b[z] != r1->refahead.b[z]) {
  675. r1->refahead.b[z] = ref.b[z];
  676. change++;
  677. }
  678. cal.b[z] |= r1->calahead.b[z];
  679. if(cal.b[z] != r1->calahead.b[z]) {
  680. r1->calahead.b[z] = cal.b[z];
  681. change++;
  682. }
  683. }
  684. switch(r1->prog->as) {
  685. case ACALL:
  686. for(z=0; z<BITS; z++) {
  687. cal.b[z] |= ref.b[z] | externs.b[z];
  688. ref.b[z] = 0;
  689. }
  690. break;
  691. case ATEXT:
  692. for(z=0; z<BITS; z++) {
  693. cal.b[z] = 0;
  694. ref.b[z] = 0;
  695. }
  696. break;
  697. case ARET:
  698. for(z=0; z<BITS; z++) {
  699. cal.b[z] = externs.b[z];
  700. ref.b[z] = 0;
  701. }
  702. }
  703. for(z=0; z<BITS; z++) {
  704. ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
  705. r1->use1.b[z] | r1->use2.b[z];
  706. cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
  707. r1->refbehind.b[z] = ref.b[z];
  708. r1->calbehind.b[z] = cal.b[z];
  709. }
  710. if(r1->active)
  711. break;
  712. r1->active = 1;
  713. }
  714. for(; r != r1; r = r->p1)
  715. for(r2 = r->p2; r2 != R; r2 = r2->p2link)
  716. prop(r2, r->refbehind, r->calbehind);
  717. }
  718. /*
  719. * find looping structure
  720. *
  721. * 1) find reverse postordering
  722. * 2) find approximate dominators,
  723. * the actual dominators if the flow graph is reducible
  724. * otherwise, dominators plus some other non-dominators.
  725. * See Matthew S. Hecht and Jeffrey D. Ullman,
  726. * "Analysis of a Simple Algorithm for Global Data Flow Problems",
  727. * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
  728. * Oct. 1-3, 1973, pp. 207-217.
  729. * 3) find all nodes with a predecessor dominated by the current node.
  730. * such a node is a loop head.
  731. * recursively, all preds with a greater rpo number are in the loop
  732. */
  733. long
  734. postorder(Reg *r, Reg **rpo2r, long n)
  735. {
  736. Reg *r1;
  737. r->rpo = 1;
  738. r1 = r->s1;
  739. if(r1 && !r1->rpo)
  740. n = postorder(r1, rpo2r, n);
  741. r1 = r->s2;
  742. if(r1 && !r1->rpo)
  743. n = postorder(r1, rpo2r, n);
  744. rpo2r[n] = r;
  745. n++;
  746. return n;
  747. }
  748. long
  749. rpolca(long *idom, long rpo1, long rpo2)
  750. {
  751. long t;
  752. if(rpo1 == -1)
  753. return rpo2;
  754. while(rpo1 != rpo2){
  755. if(rpo1 > rpo2){
  756. t = rpo2;
  757. rpo2 = rpo1;
  758. rpo1 = t;
  759. }
  760. while(rpo1 < rpo2){
  761. t = idom[rpo2];
  762. if(t >= rpo2)
  763. fatal(Z, "bad idom");
  764. rpo2 = t;
  765. }
  766. }
  767. return rpo1;
  768. }
  769. int
  770. doms(long *idom, long r, long s)
  771. {
  772. while(s > r)
  773. s = idom[s];
  774. return s == r;
  775. }
  776. int
  777. loophead(long *idom, Reg *r)
  778. {
  779. long src;
  780. src = r->rpo;
  781. if(r->p1 != R && doms(idom, src, r->p1->rpo))
  782. return 1;
  783. for(r = r->p2; r != R; r = r->p2link)
  784. if(doms(idom, src, r->rpo))
  785. return 1;
  786. return 0;
  787. }
  788. void
  789. loopmark(Reg **rpo2r, long head, Reg *r)
  790. {
  791. if(r->rpo < head || r->active == head)
  792. return;
  793. r->active = head;
  794. r->loop += LOOP;
  795. if(r->p1 != R)
  796. loopmark(rpo2r, head, r->p1);
  797. for(r = r->p2; r != R; r = r->p2link)
  798. loopmark(rpo2r, head, r);
  799. }
  800. void
  801. loopit(Reg *r, long nr)
  802. {
  803. Reg *r1;
  804. long i, d, me;
  805. if(nr > maxnr) {
  806. rpo2r = alloc(nr * sizeof(Reg*));
  807. idom = alloc(nr * sizeof(long));
  808. maxnr = nr;
  809. }
  810. d = postorder(r, rpo2r, 0);
  811. if(d > nr)
  812. fatal(Z, "too many reg nodes");
  813. nr = d;
  814. for(i = 0; i < nr / 2; i++){
  815. r1 = rpo2r[i];
  816. rpo2r[i] = rpo2r[nr - 1 - i];
  817. rpo2r[nr - 1 - i] = r1;
  818. }
  819. for(i = 0; i < nr; i++)
  820. rpo2r[i]->rpo = i;
  821. idom[0] = 0;
  822. for(i = 0; i < nr; i++){
  823. r1 = rpo2r[i];
  824. me = r1->rpo;
  825. d = -1;
  826. if(r1->p1 != R && r1->p1->rpo < me)
  827. d = r1->p1->rpo;
  828. for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
  829. if(r1->rpo < me)
  830. d = rpolca(idom, d, r1->rpo);
  831. idom[i] = d;
  832. }
  833. for(i = 0; i < nr; i++){
  834. r1 = rpo2r[i];
  835. r1->loop++;
  836. if(r1->p2 != R && loophead(idom, r1))
  837. loopmark(rpo2r, i, r1);
  838. }
  839. }
  840. void
  841. synch(Reg *r, Bits dif)
  842. {
  843. Reg *r1;
  844. int z;
  845. for(r1 = r; r1 != R; r1 = r1->s1) {
  846. for(z=0; z<BITS; z++) {
  847. dif.b[z] = (dif.b[z] &
  848. ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
  849. r1->set.b[z] | r1->regdiff.b[z];
  850. if(dif.b[z] != r1->regdiff.b[z]) {
  851. r1->regdiff.b[z] = dif.b[z];
  852. change++;
  853. }
  854. }
  855. if(r1->active)
  856. break;
  857. r1->active = 1;
  858. for(z=0; z<BITS; z++)
  859. dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
  860. if(r1->s2 != R)
  861. synch(r1->s2, dif);
  862. }
  863. }
  864. ulong
  865. allreg(ulong b, Rgn *r)
  866. {
  867. Var *v;
  868. int i;
  869. v = var + r->varno;
  870. r->regno = 0;
  871. switch(v->etype) {
  872. default:
  873. diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
  874. break;
  875. case TCHAR:
  876. case TUCHAR:
  877. case TSHORT:
  878. case TUSHORT:
  879. case TINT:
  880. case TUINT:
  881. case TLONG:
  882. case TULONG:
  883. case TIND:
  884. case TARRAY:
  885. i = BtoR(~b);
  886. if(i && r->cost > 0) {
  887. r->regno = i;
  888. return RtoB(i);
  889. }
  890. break;
  891. case TDOUBLE:
  892. case TFLOAT:
  893. break;
  894. }
  895. return 0;
  896. }
  897. void
  898. paint1(Reg *r, int bn)
  899. {
  900. Reg *r1;
  901. Prog *p;
  902. int z;
  903. ulong bb;
  904. z = bn/32;
  905. bb = 1L<<(bn%32);
  906. if(r->act.b[z] & bb)
  907. return;
  908. for(;;) {
  909. if(!(r->refbehind.b[z] & bb))
  910. break;
  911. r1 = r->p1;
  912. if(r1 == R)
  913. break;
  914. if(!(r1->refahead.b[z] & bb))
  915. break;
  916. if(r1->act.b[z] & bb)
  917. break;
  918. r = r1;
  919. }
  920. if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
  921. change -= CLOAD * r->loop;
  922. if(debug['R'] && debug['v'])
  923. print("%ld%P\tld %B $%d\n", r->loop,
  924. r->prog, blsh(bn), change);
  925. }
  926. for(;;) {
  927. r->act.b[z] |= bb;
  928. p = r->prog;
  929. if(r->use1.b[z] & bb) {
  930. change += CREF * r->loop;
  931. if(p->as == AFMOVL)
  932. if(BtoR(bb) != D_F0)
  933. change = -CINF;
  934. if(debug['R'] && debug['v'])
  935. print("%ld%P\tu1 %B $%d\n", r->loop,
  936. p, blsh(bn), change);
  937. }
  938. if((r->use2.b[z]|r->set.b[z]) & bb) {
  939. change += CREF * r->loop;
  940. if(p->as == AFMOVL)
  941. if(BtoR(bb) != D_F0)
  942. change = -CINF;
  943. if(debug['R'] && debug['v'])
  944. print("%ld%P\tu2 %B $%d\n", r->loop,
  945. p, blsh(bn), change);
  946. }
  947. if(STORE(r) & r->regdiff.b[z] & bb) {
  948. change -= CLOAD * r->loop;
  949. if(p->as == AFMOVL)
  950. if(BtoR(bb) != D_F0)
  951. change = -CINF;
  952. if(debug['R'] && debug['v'])
  953. print("%ld%P\tst %B $%d\n", r->loop,
  954. p, blsh(bn), change);
  955. }
  956. if(r->refbehind.b[z] & bb)
  957. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  958. if(r1->refahead.b[z] & bb)
  959. paint1(r1, bn);
  960. if(!(r->refahead.b[z] & bb))
  961. break;
  962. r1 = r->s2;
  963. if(r1 != R)
  964. if(r1->refbehind.b[z] & bb)
  965. paint1(r1, bn);
  966. r = r->s1;
  967. if(r == R)
  968. break;
  969. if(r->act.b[z] & bb)
  970. break;
  971. if(!(r->refbehind.b[z] & bb))
  972. break;
  973. }
  974. }
  975. ulong
  976. regset(Reg *r, ulong bb)
  977. {
  978. ulong b, set;
  979. Adr v;
  980. int c;
  981. set = 0;
  982. v = zprog.from;
  983. while(b = bb & ~(bb-1)) {
  984. v.type = BtoR(b);
  985. c = copyu(r->prog, &v, A);
  986. if(c == 3)
  987. set |= b;
  988. bb &= ~b;
  989. }
  990. return set;
  991. }
  992. ulong
  993. reguse(Reg *r, ulong bb)
  994. {
  995. ulong b, set;
  996. Adr v;
  997. int c;
  998. set = 0;
  999. v = zprog.from;
  1000. while(b = bb & ~(bb-1)) {
  1001. v.type = BtoR(b);
  1002. c = copyu(r->prog, &v, A);
  1003. if(c == 1 || c == 2 || c == 4)
  1004. set |= b;
  1005. bb &= ~b;
  1006. }
  1007. return set;
  1008. }
  1009. ulong
  1010. paint2(Reg *r, int bn)
  1011. {
  1012. Reg *r1;
  1013. int z;
  1014. ulong bb, vreg, x;
  1015. z = bn/32;
  1016. bb = 1L << (bn%32);
  1017. vreg = regbits;
  1018. if(!(r->act.b[z] & bb))
  1019. return vreg;
  1020. for(;;) {
  1021. if(!(r->refbehind.b[z] & bb))
  1022. break;
  1023. r1 = r->p1;
  1024. if(r1 == R)
  1025. break;
  1026. if(!(r1->refahead.b[z] & bb))
  1027. break;
  1028. if(!(r1->act.b[z] & bb))
  1029. break;
  1030. r = r1;
  1031. }
  1032. for(;;) {
  1033. r->act.b[z] &= ~bb;
  1034. vreg |= r->regu;
  1035. if(r->refbehind.b[z] & bb)
  1036. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  1037. if(r1->refahead.b[z] & bb)
  1038. vreg |= paint2(r1, bn);
  1039. if(!(r->refahead.b[z] & bb))
  1040. break;
  1041. r1 = r->s2;
  1042. if(r1 != R)
  1043. if(r1->refbehind.b[z] & bb)
  1044. vreg |= paint2(r1, bn);
  1045. r = r->s1;
  1046. if(r == R)
  1047. break;
  1048. if(!(r->act.b[z] & bb))
  1049. break;
  1050. if(!(r->refbehind.b[z] & bb))
  1051. break;
  1052. }
  1053. bb = vreg;
  1054. for(; r; r=r->s1) {
  1055. x = r->regu & ~bb;
  1056. if(x) {
  1057. vreg |= reguse(r, x);
  1058. bb |= regset(r, x);
  1059. }
  1060. }
  1061. return vreg;
  1062. }
  1063. void
  1064. paint3(Reg *r, int bn, long rb, int rn)
  1065. {
  1066. Reg *r1;
  1067. Prog *p;
  1068. int z;
  1069. ulong bb;
  1070. z = bn/32;
  1071. bb = 1L << (bn%32);
  1072. if(r->act.b[z] & bb)
  1073. return;
  1074. for(;;) {
  1075. if(!(r->refbehind.b[z] & bb))
  1076. break;
  1077. r1 = r->p1;
  1078. if(r1 == R)
  1079. break;
  1080. if(!(r1->refahead.b[z] & bb))
  1081. break;
  1082. if(r1->act.b[z] & bb)
  1083. break;
  1084. r = r1;
  1085. }
  1086. if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
  1087. addmove(r, bn, rn, 0);
  1088. for(;;) {
  1089. r->act.b[z] |= bb;
  1090. p = r->prog;
  1091. if(r->use1.b[z] & bb) {
  1092. if(debug['R'])
  1093. print("%P", p);
  1094. addreg(&p->from, rn);
  1095. if(debug['R'])
  1096. print("\t.c%P\n", p);
  1097. }
  1098. if((r->use2.b[z]|r->set.b[z]) & bb) {
  1099. if(debug['R'])
  1100. print("%P", p);
  1101. addreg(&p->to, rn);
  1102. if(debug['R'])
  1103. print("\t.c%P\n", p);
  1104. }
  1105. if(STORE(r) & r->regdiff.b[z] & bb)
  1106. addmove(r, bn, rn, 1);
  1107. r->regu |= rb;
  1108. if(r->refbehind.b[z] & bb)
  1109. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  1110. if(r1->refahead.b[z] & bb)
  1111. paint3(r1, bn, rb, rn);
  1112. if(!(r->refahead.b[z] & bb))
  1113. break;
  1114. r1 = r->s2;
  1115. if(r1 != R)
  1116. if(r1->refbehind.b[z] & bb)
  1117. paint3(r1, bn, rb, rn);
  1118. r = r->s1;
  1119. if(r == R)
  1120. break;
  1121. if(r->act.b[z] & bb)
  1122. break;
  1123. if(!(r->refbehind.b[z] & bb))
  1124. break;
  1125. }
  1126. }
  1127. void
  1128. addreg(Adr *a, int rn)
  1129. {
  1130. a->sym = 0;
  1131. a->offset = 0;
  1132. a->type = rn;
  1133. }
  1134. long
  1135. RtoB(int r)
  1136. {
  1137. if(r < D_AX || r > D_DI)
  1138. return 0;
  1139. return 1L << (r-D_AX);
  1140. }
  1141. int
  1142. BtoR(long b)
  1143. {
  1144. b &= 0xffL;
  1145. if(b == 0)
  1146. return 0;
  1147. return bitno(b) + D_AX;
  1148. }