reg.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248
  1. #include "gc.h"
  2. Reg*
  3. rega(void)
  4. {
  5. Reg *r;
  6. r = freer;
  7. if(r == R) {
  8. r = alloc(sizeof(*r));
  9. } else
  10. freer = r->link;
  11. *r = zreg;
  12. return r;
  13. }
  14. int
  15. rcmp(const void *a1, const void *a2)
  16. {
  17. Rgn *p1, *p2;
  18. int c1, c2;
  19. p1 = (Rgn*)a1;
  20. p2 = (Rgn*)a2;
  21. c1 = p2->cost;
  22. c2 = p1->cost;
  23. if(c1 -= c2)
  24. return c1;
  25. return p2->varno - p1->varno;
  26. }
  27. void
  28. regopt(Prog *p)
  29. {
  30. Reg *r, *r1, *r2;
  31. Prog *p1;
  32. int i, z;
  33. long initpc, val, npc;
  34. ulong vreg;
  35. Bits bit;
  36. struct
  37. {
  38. long m;
  39. long c;
  40. Reg* p;
  41. } log5[6], *lp;
  42. firstr = R;
  43. lastr = R;
  44. nvar = 0;
  45. regbits = RtoB(D_SP) | RtoB(D_AX);
  46. for(z=0; z<BITS; z++) {
  47. externs.b[z] = 0;
  48. params.b[z] = 0;
  49. consts.b[z] = 0;
  50. addrs.b[z] = 0;
  51. }
  52. /*
  53. * pass 1
  54. * build aux data structure
  55. * allocate pcs
  56. * find use and set of variables
  57. */
  58. val = 5L * 5L * 5L * 5L * 5L;
  59. lp = log5;
  60. for(i=0; i<5; i++) {
  61. lp->m = val;
  62. lp->c = 0;
  63. lp->p = R;
  64. val /= 5L;
  65. lp++;
  66. }
  67. val = 0;
  68. for(; p != P; p = p->link) {
  69. switch(p->as) {
  70. case ADATA:
  71. case AGLOBL:
  72. case ANAME:
  73. continue;
  74. }
  75. r = rega();
  76. if(firstr == R) {
  77. firstr = r;
  78. lastr = r;
  79. } else {
  80. lastr->link = r;
  81. r->p1 = lastr;
  82. lastr->s1 = r;
  83. lastr = r;
  84. }
  85. r->prog = p;
  86. r->pc = val;
  87. val++;
  88. lp = log5;
  89. for(i=0; i<5; i++) {
  90. lp->c--;
  91. if(lp->c <= 0) {
  92. lp->c = lp->m;
  93. if(lp->p != R)
  94. lp->p->log5 = r;
  95. lp->p = r;
  96. (lp+1)->c = 0;
  97. break;
  98. }
  99. lp++;
  100. }
  101. r1 = r->p1;
  102. if(r1 != R)
  103. switch(r1->prog->as) {
  104. case ARET:
  105. case AJMP:
  106. case AIRETL:
  107. r->p1 = R;
  108. r1->s1 = R;
  109. }
  110. bit = mkvar(r, &p->from);
  111. if(bany(&bit))
  112. switch(p->as) {
  113. /*
  114. * funny
  115. */
  116. case ALEAL:
  117. for(z=0; z<BITS; z++)
  118. addrs.b[z] |= bit.b[z];
  119. break;
  120. /*
  121. * left side read
  122. */
  123. default:
  124. for(z=0; z<BITS; z++)
  125. r->use1.b[z] |= bit.b[z];
  126. break;
  127. }
  128. bit = mkvar(r, &p->to);
  129. if(bany(&bit))
  130. switch(p->as) {
  131. default:
  132. diag(Z, "reg: unknown op: %A", p->as);
  133. break;
  134. /*
  135. * right side read
  136. */
  137. case ACMPB:
  138. case ACMPL:
  139. case ACMPW:
  140. for(z=0; z<BITS; z++)
  141. r->use2.b[z] |= bit.b[z];
  142. break;
  143. /*
  144. * right side write
  145. */
  146. case ANOP:
  147. case AMOVL:
  148. case AMOVB:
  149. case AMOVW:
  150. case AMOVBLSX:
  151. case AMOVBLZX:
  152. case AMOVWLSX:
  153. case AMOVWLZX:
  154. for(z=0; z<BITS; z++)
  155. r->set.b[z] |= bit.b[z];
  156. break;
  157. /*
  158. * right side read+write
  159. */
  160. case AADDB:
  161. case AADDL:
  162. case AADDW:
  163. case AANDB:
  164. case AANDL:
  165. case AANDW:
  166. case ASUBB:
  167. case ASUBL:
  168. case ASUBW:
  169. case AORB:
  170. case AORL:
  171. case AORW:
  172. case AXORB:
  173. case AXORL:
  174. case AXORW:
  175. case ASALB:
  176. case ASALL:
  177. case ASALW:
  178. case ASARB:
  179. case ASARL:
  180. case ASARW:
  181. case AROLB:
  182. case AROLL:
  183. case AROLW:
  184. case ARORB:
  185. case ARORL:
  186. case ARORW:
  187. case ASHLB:
  188. case ASHLL:
  189. case ASHLW:
  190. case ASHRB:
  191. case ASHRL:
  192. case ASHRW:
  193. case AIMULL:
  194. case AIMULW:
  195. for(z=0; z<BITS; z++) {
  196. r->set.b[z] |= bit.b[z];
  197. r->use2.b[z] |= bit.b[z];
  198. }
  199. break;
  200. /*
  201. * funny
  202. */
  203. case AFMOVDP:
  204. case AFMOVFP:
  205. case ACALL:
  206. for(z=0; z<BITS; z++)
  207. addrs.b[z] |= bit.b[z];
  208. break;
  209. }
  210. switch(p->as) {
  211. case AIMULL:
  212. case AIMULW:
  213. if(p->to.type != D_NONE)
  214. break;
  215. case AIDIVB:
  216. case AIDIVL:
  217. case AIDIVW:
  218. case AIMULB:
  219. case ADIVB:
  220. case ADIVL:
  221. case ADIVW:
  222. case AMULB:
  223. case AMULL:
  224. case AMULW:
  225. case ACWD:
  226. case ACDQ:
  227. r->regu |= RtoB(D_AX) | RtoB(D_DX);
  228. break;
  229. case AREP:
  230. case AREPN:
  231. case ALOOP:
  232. case ALOOPEQ:
  233. case ALOOPNE:
  234. r->regu |= RtoB(D_CX);
  235. break;
  236. case AMOVSB:
  237. case AMOVSL:
  238. case AMOVSW:
  239. case ACMPSB:
  240. case ACMPSL:
  241. case ACMPSW:
  242. r->regu |= RtoB(D_SI) | RtoB(D_DI);
  243. break;
  244. case ASTOSB:
  245. case ASTOSL:
  246. case ASTOSW:
  247. case ASCASB:
  248. case ASCASL:
  249. case ASCASW:
  250. r->regu |= RtoB(D_AX) | RtoB(D_DI);
  251. break;
  252. case AINSB:
  253. case AINSL:
  254. case AINSW:
  255. case AOUTSB:
  256. case AOUTSL:
  257. case AOUTSW:
  258. r->regu |= RtoB(D_DI) | RtoB(D_DX);
  259. break;
  260. case AFSTSW:
  261. case ASAHF:
  262. r->regu |= RtoB(D_AX);
  263. break;
  264. }
  265. }
  266. if(firstr == R)
  267. return;
  268. initpc = pc - val;
  269. npc = val;
  270. /*
  271. * pass 2
  272. * turn branch references to pointers
  273. * build back pointers
  274. */
  275. for(r = firstr; r != R; r = r->link) {
  276. p = r->prog;
  277. if(p->to.type == D_BRANCH) {
  278. val = p->to.offset - initpc;
  279. r1 = firstr;
  280. while(r1 != R) {
  281. r2 = r1->log5;
  282. if(r2 != R && val >= r2->pc) {
  283. r1 = r2;
  284. continue;
  285. }
  286. if(r1->pc == val)
  287. break;
  288. r1 = r1->link;
  289. }
  290. if(r1 == R) {
  291. nearln = p->lineno;
  292. diag(Z, "ref not found\n%P", p);
  293. continue;
  294. }
  295. if(r1 == r) {
  296. nearln = p->lineno;
  297. diag(Z, "ref to self\n%P", p);
  298. continue;
  299. }
  300. r->s2 = r1;
  301. r->p2link = r1->p2;
  302. r1->p2 = r;
  303. }
  304. }
  305. if(debug['R']) {
  306. p = firstr->prog;
  307. print("\n%L %D\n", p->lineno, &p->from);
  308. }
  309. /*
  310. * pass 2.5
  311. * find looping structure
  312. */
  313. for(r = firstr; r != R; r = r->link)
  314. r->active = 0;
  315. change = 0;
  316. loopit(firstr, npc);
  317. if(debug['R'] && debug['v']) {
  318. print("\nlooping structure:\n");
  319. for(r = firstr; r != R; r = r->link) {
  320. print("%ld:%P", r->loop, r->prog);
  321. for(z=0; z<BITS; z++)
  322. bit.b[z] = r->use1.b[z] |
  323. r->use2.b[z] |
  324. r->set.b[z];
  325. if(bany(&bit)) {
  326. print("\t");
  327. if(bany(&r->use1))
  328. print(" u1=%B", r->use1);
  329. if(bany(&r->use2))
  330. print(" u2=%B", r->use2);
  331. if(bany(&r->set))
  332. print(" st=%B", r->set);
  333. }
  334. print("\n");
  335. }
  336. }
  337. /*
  338. * pass 3
  339. * iterate propagating usage
  340. * back until flow graph is complete
  341. */
  342. loop1:
  343. change = 0;
  344. for(r = firstr; r != R; r = r->link)
  345. r->active = 0;
  346. for(r = firstr; r != R; r = r->link)
  347. if(r->prog->as == ARET)
  348. prop(r, zbits, zbits);
  349. loop11:
  350. /* pick up unreachable code */
  351. i = 0;
  352. for(r = firstr; r != R; r = r1) {
  353. r1 = r->link;
  354. if(r1 && r1->active && !r->active) {
  355. prop(r, zbits, zbits);
  356. i = 1;
  357. }
  358. }
  359. if(i)
  360. goto loop11;
  361. if(change)
  362. goto loop1;
  363. /*
  364. * pass 4
  365. * iterate propagating register/variable synchrony
  366. * forward until graph is complete
  367. */
  368. loop2:
  369. change = 0;
  370. for(r = firstr; r != R; r = r->link)
  371. r->active = 0;
  372. synch(firstr, zbits);
  373. if(change)
  374. goto loop2;
  375. /*
  376. * pass 5
  377. * isolate regions
  378. * calculate costs (paint1)
  379. */
  380. r = firstr;
  381. if(r) {
  382. for(z=0; z<BITS; z++)
  383. bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
  384. ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
  385. if(bany(&bit)) {
  386. nearln = r->prog->lineno;
  387. warn(Z, "used and not set: %B", bit);
  388. if(debug['R'] && !debug['w'])
  389. print("used and not set: %B\n", bit);
  390. }
  391. }
  392. if(debug['R'] && debug['v'])
  393. print("\nprop structure:\n");
  394. for(r = firstr; r != R; r = r->link)
  395. r->act = zbits;
  396. rgp = region;
  397. nregion = 0;
  398. for(r = firstr; r != R; r = r->link) {
  399. if(debug['R'] && debug['v']) {
  400. print("%P\t", r->prog);
  401. if(bany(&r->set))
  402. print("s:%B ", r->set);
  403. if(bany(&r->refahead))
  404. print("ra:%B ", r->refahead);
  405. if(bany(&r->calahead))
  406. print("ca:%B ", r->calahead);
  407. print("\n");
  408. }
  409. for(z=0; z<BITS; z++)
  410. bit.b[z] = r->set.b[z] &
  411. ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
  412. if(bany(&bit)) {
  413. nearln = r->prog->lineno;
  414. warn(Z, "set and not used: %B", bit);
  415. if(debug['R'])
  416. print("set an not used: %B\n", bit);
  417. excise(r);
  418. }
  419. for(z=0; z<BITS; z++)
  420. bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
  421. while(bany(&bit)) {
  422. i = bnum(bit);
  423. rgp->enter = r;
  424. rgp->varno = i;
  425. change = 0;
  426. if(debug['R'] && debug['v'])
  427. print("\n");
  428. paint1(r, i);
  429. bit.b[i/32] &= ~(1L<<(i%32));
  430. if(change <= 0) {
  431. if(debug['R'])
  432. print("%L$%d: %B\n",
  433. r->prog->lineno, change, blsh(i));
  434. continue;
  435. }
  436. rgp->cost = change;
  437. nregion++;
  438. if(nregion >= NRGN) {
  439. warn(Z, "too many regions");
  440. goto brk;
  441. }
  442. rgp++;
  443. }
  444. }
  445. brk:
  446. qsort(region, nregion, sizeof(region[0]), rcmp);
  447. /*
  448. * pass 6
  449. * determine used registers (paint2)
  450. * replace code (paint3)
  451. */
  452. rgp = region;
  453. for(i=0; i<nregion; i++) {
  454. bit = blsh(rgp->varno);
  455. vreg = paint2(rgp->enter, rgp->varno);
  456. vreg = allreg(vreg, rgp);
  457. if(debug['R']) {
  458. print("%L$%d %R: %B\n",
  459. rgp->enter->prog->lineno,
  460. rgp->cost,
  461. rgp->regno,
  462. bit);
  463. }
  464. if(rgp->regno != 0)
  465. paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
  466. rgp++;
  467. }
  468. /*
  469. * pass 7
  470. * peep-hole on basic block
  471. */
  472. if(!debug['R'] || debug['P'])
  473. peep();
  474. /*
  475. * pass 8
  476. * recalculate pc
  477. */
  478. val = initpc;
  479. for(r = firstr; r != R; r = r1) {
  480. r->pc = val;
  481. p = r->prog;
  482. p1 = P;
  483. r1 = r->link;
  484. if(r1 != R)
  485. p1 = r1->prog;
  486. for(; p != p1; p = p->link) {
  487. switch(p->as) {
  488. default:
  489. val++;
  490. break;
  491. case ANOP:
  492. case ADATA:
  493. case AGLOBL:
  494. case ANAME:
  495. break;
  496. }
  497. }
  498. }
  499. pc = val;
  500. /*
  501. * fix up branches
  502. */
  503. if(debug['R'])
  504. if(bany(&addrs))
  505. print("addrs: %B\n", addrs);
  506. r1 = 0; /* set */
  507. for(r = firstr; r != R; r = r->link) {
  508. p = r->prog;
  509. if(p->to.type == D_BRANCH)
  510. p->to.offset = r->s2->pc;
  511. r1 = r;
  512. }
  513. /*
  514. * last pass
  515. * eliminate nops
  516. * free aux structures
  517. */
  518. for(p = firstr->prog; p != P; p = p->link){
  519. while(p->link && p->link->as == ANOP)
  520. p->link = p->link->link;
  521. }
  522. if(r1 != R) {
  523. r1->link = freer;
  524. freer = firstr;
  525. }
  526. }
  527. /*
  528. * add mov b,rn
  529. * just after r
  530. */
  531. void
  532. addmove(Reg *r, int bn, int rn, int f)
  533. {
  534. Prog *p, *p1;
  535. Adr *a;
  536. Var *v;
  537. p1 = alloc(sizeof(*p1));
  538. *p1 = zprog;
  539. p = r->prog;
  540. p1->link = p->link;
  541. p->link = p1;
  542. p1->lineno = p->lineno;
  543. v = var + bn;
  544. a = &p1->to;
  545. a->sym = v->sym;
  546. a->offset = v->offset;
  547. a->etype = v->etype;
  548. a->type = v->name;
  549. p1->as = AMOVL;
  550. if(v->etype == TCHAR || v->etype == TUCHAR)
  551. p1->as = AMOVB;
  552. if(v->etype == TSHORT || v->etype == TUSHORT)
  553. p1->as = AMOVW;
  554. p1->from.type = rn;
  555. if(!f) {
  556. p1->from = *a;
  557. *a = zprog.from;
  558. a->type = rn;
  559. if(v->etype == TUCHAR)
  560. p1->as = AMOVB;
  561. if(v->etype == TUSHORT)
  562. p1->as = AMOVW;
  563. }
  564. if(debug['R'])
  565. print("%P\t.a%P\n", p, p1);
  566. }
  567. ulong
  568. doregbits(int r)
  569. {
  570. ulong b;
  571. b = 0;
  572. if(r >= D_INDIR)
  573. r -= D_INDIR;
  574. if(r >= D_AX && r <= D_DI)
  575. b |= RtoB(r);
  576. else
  577. if(r >= D_AL && r <= D_BL)
  578. b |= RtoB(r-D_AL+D_AX);
  579. else
  580. if(r >= D_AH && r <= D_BH)
  581. b |= RtoB(r-D_AH+D_AX);
  582. return b;
  583. }
  584. Bits
  585. mkvar(Reg *r, Adr *a)
  586. {
  587. Var *v;
  588. int i, t, n, et, z;
  589. long o;
  590. Bits bit;
  591. Sym *s;
  592. /*
  593. * mark registers used
  594. */
  595. t = a->type;
  596. r->regu |= doregbits(t);
  597. r->regu |= doregbits(a->index);
  598. switch(t) {
  599. default:
  600. goto none;
  601. case D_ADDR:
  602. a->type = a->index;
  603. bit = mkvar(r, a);
  604. for(z=0; z<BITS; z++)
  605. addrs.b[z] |= bit.b[z];
  606. a->type = t;
  607. goto none;
  608. case D_EXTERN:
  609. case D_STATIC:
  610. case D_PARAM:
  611. case D_AUTO:
  612. n = t;
  613. break;
  614. }
  615. s = a->sym;
  616. if(s == S)
  617. goto none;
  618. if(s->name[0] == '.')
  619. goto none;
  620. et = a->etype;
  621. o = a->offset;
  622. v = var;
  623. for(i=0; i<nvar; i++) {
  624. if(s == v->sym)
  625. if(n == v->name)
  626. if(o == v->offset)
  627. goto out;
  628. v++;
  629. }
  630. if(nvar >= NVAR) {
  631. if(debug['w'] > 1 && s)
  632. warn(Z, "variable not optimized: %s", s->name);
  633. goto none;
  634. }
  635. i = nvar;
  636. nvar++;
  637. v = &var[i];
  638. v->sym = s;
  639. v->offset = o;
  640. v->name = n;
  641. v->etype = et;
  642. if(debug['R'])
  643. print("bit=%2d et=%2d %D\n", i, et, a);
  644. out:
  645. bit = blsh(i);
  646. if(n == D_EXTERN || n == D_STATIC)
  647. for(z=0; z<BITS; z++)
  648. externs.b[z] |= bit.b[z];
  649. if(n == D_PARAM)
  650. for(z=0; z<BITS; z++)
  651. params.b[z] |= bit.b[z];
  652. if(v->etype != et || !typechlpfd[et]) /* funny punning */
  653. for(z=0; z<BITS; z++)
  654. addrs.b[z] |= bit.b[z];
  655. return bit;
  656. none:
  657. return zbits;
  658. }
  659. void
  660. prop(Reg *r, Bits ref, Bits cal)
  661. {
  662. Reg *r1, *r2;
  663. int z;
  664. for(r1 = r; r1 != R; r1 = r1->p1) {
  665. for(z=0; z<BITS; z++) {
  666. ref.b[z] |= r1->refahead.b[z];
  667. if(ref.b[z] != r1->refahead.b[z]) {
  668. r1->refahead.b[z] = ref.b[z];
  669. change++;
  670. }
  671. cal.b[z] |= r1->calahead.b[z];
  672. if(cal.b[z] != r1->calahead.b[z]) {
  673. r1->calahead.b[z] = cal.b[z];
  674. change++;
  675. }
  676. }
  677. switch(r1->prog->as) {
  678. case ACALL:
  679. for(z=0; z<BITS; z++) {
  680. cal.b[z] |= ref.b[z] | externs.b[z];
  681. ref.b[z] = 0;
  682. }
  683. break;
  684. case ATEXT:
  685. for(z=0; z<BITS; z++) {
  686. cal.b[z] = 0;
  687. ref.b[z] = 0;
  688. }
  689. break;
  690. case ARET:
  691. for(z=0; z<BITS; z++) {
  692. cal.b[z] = externs.b[z];
  693. ref.b[z] = 0;
  694. }
  695. }
  696. for(z=0; z<BITS; z++) {
  697. ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
  698. r1->use1.b[z] | r1->use2.b[z];
  699. cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
  700. r1->refbehind.b[z] = ref.b[z];
  701. r1->calbehind.b[z] = cal.b[z];
  702. }
  703. if(r1->active)
  704. break;
  705. r1->active = 1;
  706. }
  707. for(; r != r1; r = r->p1)
  708. for(r2 = r->p2; r2 != R; r2 = r2->p2link)
  709. prop(r2, r->refbehind, r->calbehind);
  710. }
  711. /*
  712. * find looping structure
  713. *
  714. * 1) find reverse postordering
  715. * 2) find approximate dominators,
  716. * the actual dominators if the flow graph is reducible
  717. * otherwise, dominators plus some other non-dominators.
  718. * See Matthew S. Hecht and Jeffrey D. Ullman,
  719. * "Analysis of a Simple Algorithm for Global Data Flow Problems",
  720. * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
  721. * Oct. 1-3, 1973, pp. 207-217.
  722. * 3) find all nodes with a predecessor dominated by the current node.
  723. * such a node is a loop head.
  724. * recursively, all preds with a greater rpo number are in the loop
  725. */
  726. long
  727. postorder(Reg *r, Reg **rpo2r, long n)
  728. {
  729. Reg *r1;
  730. r->rpo = 1;
  731. r1 = r->s1;
  732. if(r1 && !r1->rpo)
  733. n = postorder(r1, rpo2r, n);
  734. r1 = r->s2;
  735. if(r1 && !r1->rpo)
  736. n = postorder(r1, rpo2r, n);
  737. rpo2r[n] = r;
  738. n++;
  739. return n;
  740. }
  741. long
  742. rpolca(long *idom, long rpo1, long rpo2)
  743. {
  744. long t;
  745. if(rpo1 == -1)
  746. return rpo2;
  747. while(rpo1 != rpo2){
  748. if(rpo1 > rpo2){
  749. t = rpo2;
  750. rpo2 = rpo1;
  751. rpo1 = t;
  752. }
  753. while(rpo1 < rpo2){
  754. t = idom[rpo2];
  755. if(t >= rpo2)
  756. sysfatal("bad idom");
  757. rpo2 = t;
  758. }
  759. }
  760. return rpo1;
  761. }
  762. int
  763. doms(long *idom, long r, long s)
  764. {
  765. while(s > r)
  766. s = idom[s];
  767. return s == r;
  768. }
  769. int
  770. loophead(long *idom, Reg *r)
  771. {
  772. long src;
  773. src = r->rpo;
  774. if(r->p1 != R && doms(idom, src, r->p1->rpo))
  775. return 1;
  776. for(r = r->p2; r != R; r = r->p2link)
  777. if(doms(idom, src, r->rpo))
  778. return 1;
  779. return 0;
  780. }
  781. void
  782. loopmark(Reg **rpo2r, long head, Reg *r)
  783. {
  784. if(r->rpo < head || r->active == head)
  785. return;
  786. r->active = head;
  787. r->loop += LOOP;
  788. if(r->p1 != R)
  789. loopmark(rpo2r, head, r->p1);
  790. for(r = r->p2; r != R; r = r->p2link)
  791. loopmark(rpo2r, head, r);
  792. }
  793. void
  794. loopit(Reg *r, long nr)
  795. {
  796. Reg *r1;
  797. long i, d, me;
  798. if(nr > maxnr) {
  799. rpo2r = alloc(nr * sizeof(Reg*));
  800. idom = alloc(nr * sizeof(long));
  801. maxnr = nr;
  802. }
  803. d = postorder(r, rpo2r, 0);
  804. if(d > nr)
  805. sysfatal("too many reg nodes");
  806. nr = d;
  807. for(i = 0; i < nr / 2; i++){
  808. r1 = rpo2r[i];
  809. rpo2r[i] = rpo2r[nr - 1 - i];
  810. rpo2r[nr - 1 - i] = r1;
  811. }
  812. for(i = 0; i < nr; i++)
  813. rpo2r[i]->rpo = i;
  814. idom[0] = 0;
  815. for(i = 0; i < nr; i++){
  816. r1 = rpo2r[i];
  817. me = r1->rpo;
  818. d = -1;
  819. if(r1->p1 != R && r1->p1->rpo < me)
  820. d = r1->p1->rpo;
  821. for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
  822. if(r1->rpo < me)
  823. d = rpolca(idom, d, r1->rpo);
  824. idom[i] = d;
  825. }
  826. for(i = 0; i < nr; i++){
  827. r1 = rpo2r[i];
  828. r1->loop++;
  829. if(r1->p2 != R && loophead(idom, r1))
  830. loopmark(rpo2r, i, r1);
  831. }
  832. }
  833. void
  834. synch(Reg *r, Bits dif)
  835. {
  836. Reg *r1;
  837. int z;
  838. for(r1 = r; r1 != R; r1 = r1->s1) {
  839. for(z=0; z<BITS; z++) {
  840. dif.b[z] = (dif.b[z] &
  841. ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
  842. r1->set.b[z] | r1->regdiff.b[z];
  843. if(dif.b[z] != r1->regdiff.b[z]) {
  844. r1->regdiff.b[z] = dif.b[z];
  845. change++;
  846. }
  847. }
  848. if(r1->active)
  849. break;
  850. r1->active = 1;
  851. for(z=0; z<BITS; z++)
  852. dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
  853. if(r1->s2 != R)
  854. synch(r1->s2, dif);
  855. }
  856. }
  857. ulong
  858. allreg(ulong b, Rgn *r)
  859. {
  860. Var *v;
  861. int i;
  862. v = var + r->varno;
  863. r->regno = 0;
  864. switch(v->etype) {
  865. default:
  866. diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
  867. break;
  868. case TCHAR:
  869. case TUCHAR:
  870. case TSHORT:
  871. case TUSHORT:
  872. case TINT:
  873. case TUINT:
  874. case TLONG:
  875. case TULONG:
  876. case TIND:
  877. case TARRAY:
  878. i = BtoR(~b);
  879. if(i && r->cost > 0) {
  880. r->regno = i;
  881. return RtoB(i);
  882. }
  883. break;
  884. case TDOUBLE:
  885. case TFLOAT:
  886. break;
  887. }
  888. return 0;
  889. }
  890. void
  891. paint1(Reg *r, int bn)
  892. {
  893. Reg *r1;
  894. Prog *p;
  895. int z;
  896. ulong bb;
  897. z = bn/32;
  898. bb = 1L<<(bn%32);
  899. if(r->act.b[z] & bb)
  900. return;
  901. for(;;) {
  902. if(!(r->refbehind.b[z] & bb))
  903. break;
  904. r1 = r->p1;
  905. if(r1 == R)
  906. break;
  907. if(!(r1->refahead.b[z] & bb))
  908. break;
  909. if(r1->act.b[z] & bb)
  910. break;
  911. r = r1;
  912. }
  913. if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
  914. change -= CLOAD * r->loop;
  915. if(debug['R'] && debug['v'])
  916. print("%ld%P\tld %B $%d\n", r->loop,
  917. r->prog, blsh(bn), change);
  918. }
  919. for(;;) {
  920. r->act.b[z] |= bb;
  921. p = r->prog;
  922. if(r->use1.b[z] & bb) {
  923. change += CREF * r->loop;
  924. if(p->as == AFMOVL)
  925. if(BtoR(bb) != D_F0)
  926. change = -CINF;
  927. if(debug['R'] && debug['v'])
  928. print("%ld%P\tu1 %B $%d\n", r->loop,
  929. p, blsh(bn), change);
  930. }
  931. if((r->use2.b[z]|r->set.b[z]) & bb) {
  932. change += CREF * r->loop;
  933. if(p->as == AFMOVL)
  934. if(BtoR(bb) != D_F0)
  935. change = -CINF;
  936. if(debug['R'] && debug['v'])
  937. print("%ld%P\tu2 %B $%d\n", r->loop,
  938. p, blsh(bn), change);
  939. }
  940. if(STORE(r) & r->regdiff.b[z] & bb) {
  941. change -= CLOAD * r->loop;
  942. if(p->as == AFMOVL)
  943. if(BtoR(bb) != D_F0)
  944. change = -CINF;
  945. if(debug['R'] && debug['v'])
  946. print("%ld%P\tst %B $%d\n", r->loop,
  947. p, blsh(bn), change);
  948. }
  949. if(r->refbehind.b[z] & bb)
  950. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  951. if(r1->refahead.b[z] & bb)
  952. paint1(r1, bn);
  953. if(!(r->refahead.b[z] & bb))
  954. break;
  955. r1 = r->s2;
  956. if(r1 != R)
  957. if(r1->refbehind.b[z] & bb)
  958. paint1(r1, bn);
  959. r = r->s1;
  960. if(r == R)
  961. break;
  962. if(r->act.b[z] & bb)
  963. break;
  964. if(!(r->refbehind.b[z] & bb))
  965. break;
  966. }
  967. }
  968. ulong
  969. regset(Reg *r, ulong bb)
  970. {
  971. ulong b, set;
  972. Adr v;
  973. int c;
  974. set = 0;
  975. v = zprog.from;
  976. while(b = bb & ~(bb-1)) {
  977. v.type = BtoR(b);
  978. c = copyu(r->prog, &v, A);
  979. if(c == 3)
  980. set |= b;
  981. bb &= ~b;
  982. }
  983. return set;
  984. }
  985. ulong
  986. reguse(Reg *r, ulong bb)
  987. {
  988. ulong b, set;
  989. Adr v;
  990. int c;
  991. set = 0;
  992. v = zprog.from;
  993. while(b = bb & ~(bb-1)) {
  994. v.type = BtoR(b);
  995. c = copyu(r->prog, &v, A);
  996. if(c == 1 || c == 2 || c == 4)
  997. set |= b;
  998. bb &= ~b;
  999. }
  1000. return set;
  1001. }
  1002. ulong
  1003. paint2(Reg *r, int bn)
  1004. {
  1005. Reg *r1;
  1006. int z;
  1007. ulong bb, vreg, x;
  1008. z = bn/32;
  1009. bb = 1L << (bn%32);
  1010. vreg = regbits;
  1011. if(!(r->act.b[z] & bb))
  1012. return vreg;
  1013. for(;;) {
  1014. if(!(r->refbehind.b[z] & bb))
  1015. break;
  1016. r1 = r->p1;
  1017. if(r1 == R)
  1018. break;
  1019. if(!(r1->refahead.b[z] & bb))
  1020. break;
  1021. if(!(r1->act.b[z] & bb))
  1022. break;
  1023. r = r1;
  1024. }
  1025. for(;;) {
  1026. r->act.b[z] &= ~bb;
  1027. vreg |= r->regu;
  1028. if(r->refbehind.b[z] & bb)
  1029. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  1030. if(r1->refahead.b[z] & bb)
  1031. vreg |= paint2(r1, bn);
  1032. if(!(r->refahead.b[z] & bb))
  1033. break;
  1034. r1 = r->s2;
  1035. if(r1 != R)
  1036. if(r1->refbehind.b[z] & bb)
  1037. vreg |= paint2(r1, bn);
  1038. r = r->s1;
  1039. if(r == R)
  1040. break;
  1041. if(!(r->act.b[z] & bb))
  1042. break;
  1043. if(!(r->refbehind.b[z] & bb))
  1044. break;
  1045. }
  1046. bb = vreg;
  1047. for(; r; r=r->s1) {
  1048. x = r->regu & ~bb;
  1049. if(x) {
  1050. vreg |= reguse(r, x);
  1051. bb |= regset(r, x);
  1052. }
  1053. }
  1054. return vreg;
  1055. }
  1056. void
  1057. paint3(Reg *r, int bn, long rb, int rn)
  1058. {
  1059. Reg *r1;
  1060. Prog *p;
  1061. int z;
  1062. ulong bb;
  1063. z = bn/32;
  1064. bb = 1L << (bn%32);
  1065. if(r->act.b[z] & bb)
  1066. return;
  1067. for(;;) {
  1068. if(!(r->refbehind.b[z] & bb))
  1069. break;
  1070. r1 = r->p1;
  1071. if(r1 == R)
  1072. break;
  1073. if(!(r1->refahead.b[z] & bb))
  1074. break;
  1075. if(r1->act.b[z] & bb)
  1076. break;
  1077. r = r1;
  1078. }
  1079. if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
  1080. addmove(r, bn, rn, 0);
  1081. for(;;) {
  1082. r->act.b[z] |= bb;
  1083. p = r->prog;
  1084. if(r->use1.b[z] & bb) {
  1085. if(debug['R'])
  1086. print("%P", p);
  1087. addreg(&p->from, rn);
  1088. if(debug['R'])
  1089. print("\t.c%P\n", p);
  1090. }
  1091. if((r->use2.b[z]|r->set.b[z]) & bb) {
  1092. if(debug['R'])
  1093. print("%P", p);
  1094. addreg(&p->to, rn);
  1095. if(debug['R'])
  1096. print("\t.c%P\n", p);
  1097. }
  1098. if(STORE(r) & r->regdiff.b[z] & bb)
  1099. addmove(r, bn, rn, 1);
  1100. r->regu |= rb;
  1101. if(r->refbehind.b[z] & bb)
  1102. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  1103. if(r1->refahead.b[z] & bb)
  1104. paint3(r1, bn, rb, rn);
  1105. if(!(r->refahead.b[z] & bb))
  1106. break;
  1107. r1 = r->s2;
  1108. if(r1 != R)
  1109. if(r1->refbehind.b[z] & bb)
  1110. paint3(r1, bn, rb, rn);
  1111. r = r->s1;
  1112. if(r == R)
  1113. break;
  1114. if(r->act.b[z] & bb)
  1115. break;
  1116. if(!(r->refbehind.b[z] & bb))
  1117. break;
  1118. }
  1119. }
  1120. void
  1121. addreg(Adr *a, int rn)
  1122. {
  1123. a->sym = 0;
  1124. a->offset = 0;
  1125. a->type = rn;
  1126. }
  1127. long
  1128. RtoB(int r)
  1129. {
  1130. if(r < D_AX || r > D_DI)
  1131. return 0;
  1132. return 1L << (r-D_AX);
  1133. }
  1134. int
  1135. BtoR(long b)
  1136. {
  1137. b &= 0xffL;
  1138. if(b == 0)
  1139. return 0;
  1140. return bitno(b) + D_AX;
  1141. }