reg.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275
  1. #include "gc.h"
  2. Reg*
  3. rega(void)
  4. {
  5. Reg *r;
  6. r = freer;
  7. if(r == R) {
  8. r = alloc(sizeof(*r));
  9. } else
  10. freer = r->link;
  11. *r = zreg;
  12. return r;
  13. }
  14. int
  15. rcmp(const void *a1, const void *a2)
  16. {
  17. Rgn *p1, *p2;
  18. int c1, c2;
  19. p1 = (Rgn*)a1;
  20. p2 = (Rgn*)a2;
  21. c1 = p2->costr;
  22. if(p2->costa > c1)
  23. c1 = p2->costa;
  24. c2 = p1->costr;
  25. if(p1->costa > c2)
  26. c2 = p1->costa;
  27. if(c1 -= c2)
  28. return c1;
  29. return p2->varno - p1->varno;
  30. }
  31. void
  32. regopt(Prog *p)
  33. {
  34. Reg *r, *r1, *r2;
  35. Prog *p1;
  36. int i, z;
  37. long val, initpc, npc;
  38. ulong vreg;
  39. Bits bit;
  40. Var *v;
  41. struct {
  42. long m;
  43. long c;
  44. Reg* p;
  45. } log5[6], *lp;
  46. firstr = R;
  47. lastr = R;
  48. nvar = 0;
  49. for(z=0; z<BITS; z++) {
  50. externs.b[z] = 0;
  51. params.b[z] = 0;
  52. addrs.b[z] = 0;
  53. }
  54. regbits = RtoB(0) | /* return reg */
  55. AtoB(6) | AtoB(7) | /* sp and sb */
  56. FtoB(0) | FtoB(1); /* floating return reg */
  57. for(i=0; i<NREG; i++) {
  58. if(regused[i])
  59. regbits |= RtoB(i);
  60. if(fregused[i])
  61. regbits |= FtoB(i);
  62. if(aregused[i])
  63. regbits |= AtoB(i);
  64. }
  65. /*
  66. * pass 1
  67. * build aux data structure
  68. * allocate pcs
  69. * find use and set of variables
  70. */
  71. val = 5L * 5L * 5L * 5L * 5L;
  72. lp = log5;
  73. for(i=0; i<5; i++) {
  74. lp->m = val;
  75. lp->c = 0;
  76. lp->p = R;
  77. val /= 5L;
  78. lp++;
  79. }
  80. val = 0;
  81. for(; p != P; p = p->link) {
  82. switch(p->as) {
  83. case ADATA:
  84. case AGLOBL:
  85. case ANAME:
  86. case ASIGNAME:
  87. continue;
  88. }
  89. r = rega();
  90. if(firstr == R) {
  91. firstr = r;
  92. lastr = r;
  93. } else {
  94. lastr->link = r;
  95. r->p1 = lastr;
  96. lastr->s1 = r;
  97. lastr = r;
  98. }
  99. r->prog = p;
  100. r->pc = val;
  101. val++;
  102. lp = log5;
  103. for(i=0; i<5; i++) {
  104. lp->c--;
  105. if(lp->c <= 0) {
  106. lp->c = lp->m;
  107. if(lp->p != R)
  108. lp->p->log5 = r;
  109. lp->p = r;
  110. (lp+1)->c = 0;
  111. break;
  112. }
  113. lp++;
  114. }
  115. r1 = r->p1;
  116. if(r1 != R)
  117. switch(r1->prog->as) {
  118. case ABRA:
  119. case ARTS:
  120. case ARTE:
  121. r->p1 = R;
  122. r1->s1 = R;
  123. }
  124. bit = mkvar(&p->from, AGOK);
  125. if(bany(&bit))
  126. switch(p->as) {
  127. case ALEA:
  128. if(!(mvbits & B_INDIR))
  129. for(z=0; z<BITS; z++)
  130. addrs.b[z] |= bit.b[z];
  131. default:
  132. if(mvbits & B_ADDR)
  133. for(z=0; z<BITS; z++)
  134. addrs.b[z] |= bit.b[z];
  135. for(z=0; z<BITS; z++)
  136. r->use1.b[z] |= bit.b[z];
  137. }
  138. bit = mkvar(&p->to, p->as);
  139. if(bany(&bit))
  140. switch(p->as) {
  141. case ABSR: /* funny */
  142. for(z=0; z<BITS; z++)
  143. addrs.b[z] |= bit.b[z];
  144. goto def;
  145. case APEA:
  146. if(!(mvbits & B_INDIR))
  147. for(z=0; z<BITS; z++)
  148. addrs.b[z] |= bit.b[z];
  149. def:
  150. case ACMPB: case ACMPW: case ACMPL:
  151. case AFCMPF: case AFCMPD:
  152. case ATSTB: case ATSTW: case ATSTL:
  153. case AFTSTF: case AFTSTD:
  154. case ABFEXTU: case ABFEXTS:
  155. if(mvbits & B_ADDR)
  156. for(z=0; z<BITS; z++)
  157. addrs.b[z] |= bit.b[z];
  158. for(z=0; z<BITS; z++)
  159. r->use2.b[z] |= bit.b[z];
  160. break;
  161. default:
  162. diag(Z, "reg: unknown asop: %A", p->as);
  163. case AADDB: case AADDW: case AADDL:
  164. case ASUBB: case ASUBW: case ASUBL:
  165. case AANDB: case AANDW: case AANDL:
  166. case AORB: case AORW: case AORL:
  167. case AEORB: case AEORW: case AEORL:
  168. case ABFINS:
  169. for(z=0; z<BITS; z++)
  170. r->use2.b[z] |= bit.b[z];
  171. case ANOP:
  172. case AMOVB: case AMOVW: case AMOVL:
  173. case AFMOVEB: case AFMOVEW: case AFMOVEL:
  174. case ACLRB: case ACLRW: case ACLRL:
  175. case AFMOVEF: case AFMOVED:
  176. if(mvbits & B_INDIR)
  177. for(z=0; z<BITS; z++)
  178. r->use2.b[z] |= bit.b[z];
  179. else
  180. for(z=0; z<BITS; z++)
  181. r->set.b[z] |= bit.b[z];
  182. break;
  183. }
  184. }
  185. if(firstr == R)
  186. return;
  187. initpc = pc - val;
  188. npc = val;
  189. /*
  190. * pass 2
  191. * turn branch references to pointers
  192. * build back pointers
  193. */
  194. for(r = firstr; r != R; r = r->link) {
  195. p = r->prog;
  196. if(p->to.type == D_BRANCH) {
  197. val = p->to.offset - initpc;
  198. r1 = firstr;
  199. while(r1 != R) {
  200. r2 = r1->log5;
  201. if(r2 != R && val >= r2->pc) {
  202. r1 = r2;
  203. continue;
  204. }
  205. if(r1->pc == val)
  206. break;
  207. r1 = r1->link;
  208. }
  209. if(r1 == R) {
  210. diag(Z, "ref not found\n%L:%P", p->lineno, p);
  211. continue;
  212. }
  213. if(r1 == r) {
  214. diag(Z, "ref to self");
  215. continue;
  216. }
  217. r->s2 = r1;
  218. r->p2link = r1->p2;
  219. r1->p2 = r;
  220. }
  221. }
  222. if(debug['R'])
  223. print("\n%L %D\n", firstr->prog->lineno, &firstr->prog->from);
  224. /*
  225. * pass 2.5
  226. * find looping structure
  227. */
  228. for(r = firstr; r != R; r = r->link)
  229. r->active = 0;
  230. changer = 0;
  231. loopit(firstr, npc);
  232. if(debug['R'] && debug['v']) {
  233. print("\nlooping structure:\n");
  234. for(r = firstr; r != R; r = r->link) {
  235. print("%ld:%P", r->loop, r->prog);
  236. for(z=0; z<BITS; z++)
  237. bit.b[z] = r->use1.b[z] |
  238. r->use2.b[z] | r->set.b[z];
  239. if(bany(&bit)) {
  240. print("\t");
  241. if(bany(&r->use1))
  242. print(" u1=%B", r->use1);
  243. if(bany(&r->use2))
  244. print(" u2=%B", r->use2);
  245. if(bany(&r->set))
  246. print(" st=%B", r->set);
  247. }
  248. print("\n");
  249. }
  250. }
  251. /*
  252. * pass 3
  253. * iterate propagating usage
  254. * back until flow graph is complete
  255. */
  256. loop1:
  257. changer = 0;
  258. for(r = firstr; r != R; r = r->link)
  259. r->active = 0;
  260. for(r = firstr; r != R; r = r->link)
  261. if(r->prog->as == ARTS)
  262. prop(r, zbits, zbits);
  263. loop11:
  264. /* pick up unreachable code */
  265. i = 0;
  266. for(r = firstr; r != R; r = r1) {
  267. r1 = r->link;
  268. if(r1 && r1->active && !r->active) {
  269. prop(r, zbits, zbits);
  270. i = 1;
  271. }
  272. }
  273. if(i)
  274. goto loop11;
  275. if(changer)
  276. goto loop1;
  277. /*
  278. * pass 4
  279. * iterate propagating register/variable synchrony
  280. * forward until graph is complete
  281. */
  282. loop2:
  283. changer = 0;
  284. for(r = firstr; r != R; r = r->link)
  285. r->active = 0;
  286. synch(firstr, zbits);
  287. if(changer)
  288. goto loop2;
  289. /*
  290. * pass 5
  291. * isolate regions
  292. * calculate costs (paint1)
  293. */
  294. r = firstr;
  295. if(r) {
  296. for(z=0; z<BITS; z++)
  297. bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
  298. ~(externs.b[z] | params.b[z] | addrs.b[z]);
  299. if(bany(&bit)) {
  300. nearln = r->prog->lineno;
  301. warn(Z, "used and not set: %B", bit);
  302. if(debug['R'] && !debug['w'])
  303. print("used and not set: %B\n", bit);
  304. /*
  305. * 68040 'feature':
  306. * load of a denormalized fp will trap
  307. */
  308. while(bany(&bit)) {
  309. i = bnum(bit);
  310. bit.b[i/32] &= ~(1L << (i%32));
  311. v = var + i;
  312. if(v->type == D_AUTO) {
  313. r->set.b[i/32] |= (1L << (i%32));
  314. if(typefd[v->etype])
  315. addmove(r, i, NREG+NREG, 1);
  316. }
  317. }
  318. }
  319. }
  320. if(debug['R'] && debug['v'])
  321. print("\nprop structure:\n");
  322. for(r = firstr; r != R; r = r->link) {
  323. if(debug['R'] && debug['v'])
  324. print("%P\n set = %B; rah = %B; cal = %B\n",
  325. r->prog, r->set, r->refahead, r->calahead);
  326. r->act = zbits;
  327. }
  328. rgp = region;
  329. nregion = 0;
  330. for(r = firstr; r != R; r = r->link) {
  331. for(z=0; z<BITS; z++)
  332. bit.b[z] = r->set.b[z] &
  333. ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
  334. if(bany(&bit)) {
  335. nearln = r->prog->lineno;
  336. warn(Z, "set and not used: %B", bit);
  337. if(debug['R'])
  338. print("set an not used: %B\n", bit);
  339. excise(r);
  340. }
  341. for(z=0; z<BITS; z++)
  342. bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
  343. while(bany(&bit)) {
  344. i = bnum(bit);
  345. rgp->enter = r;
  346. rgp->varno = i;
  347. changer = 0;
  348. changea = 0;
  349. if(debug['R'] && debug['v'])
  350. print("\n");
  351. paint1(r, i);
  352. bit.b[i/32] &= ~(1L<<(i%32));
  353. if(changer <= 0 && changea <= 0) {
  354. if(debug['R'])
  355. print("%L$%d.%d: %B\n",
  356. r->prog->lineno,
  357. changer, changea, blsh(i));
  358. continue;
  359. }
  360. rgp->costr = changer;
  361. rgp->costa = changea;
  362. nregion++;
  363. if(nregion >= NRGN) {
  364. warn(Z, "too many regions");
  365. goto brk;
  366. }
  367. rgp++;
  368. }
  369. }
  370. brk:
  371. qsort(region, nregion, sizeof(region[0]), rcmp);
  372. /*
  373. * pass 6
  374. * determine used registers (paint2)
  375. * replace code (paint3)
  376. */
  377. rgp = region;
  378. for(i=0; i<nregion; i++) {
  379. bit = blsh(rgp->varno);
  380. vreg = paint2(rgp->enter, rgp->varno);
  381. vreg = allreg(vreg, rgp);
  382. if(debug['R'])
  383. print("%L$%d.%d %R: %B\n",
  384. rgp->enter->prog->lineno,
  385. rgp->costr, rgp->costa,
  386. rgp->regno,
  387. bit);
  388. if(rgp->regno != D_NONE)
  389. paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
  390. rgp++;
  391. }
  392. /*
  393. * pass 7
  394. * peep-hole on basic block
  395. */
  396. if(!debug['R'] || debug['P'])
  397. peep();
  398. /*
  399. * pass 8
  400. * recalculate pc
  401. */
  402. val = initpc;
  403. for(r = firstr; r != R; r = r1) {
  404. r->pc = val;
  405. p = r->prog;
  406. p1 = P;
  407. r1 = r->link;
  408. if(r1 != R)
  409. p1 = r1->prog;
  410. for(; p != p1; p = p->link) {
  411. switch(p->as) {
  412. default:
  413. val++;
  414. break;
  415. case ANOP:
  416. case ADATA:
  417. case AGLOBL:
  418. case ANAME:
  419. case ASIGNAME:
  420. break;
  421. }
  422. }
  423. }
  424. pc = val;
  425. /*
  426. * fix up branches
  427. */
  428. if(debug['R'])
  429. if(bany(&addrs))
  430. print("addrs: %B\n", addrs);
  431. r1 = 0; /* set */
  432. for(r = firstr; r != R; r = r->link) {
  433. p = r->prog;
  434. if(p->to.type == D_BRANCH)
  435. p->to.offset = r->s2->pc;
  436. r1 = r;
  437. }
  438. /*
  439. * last pass
  440. * eliminate nops
  441. * free aux structures
  442. */
  443. for(p = firstr->prog; p != P; p = p->link){
  444. while(p->link && p->link->as == ANOP)
  445. p->link = p->link->link;
  446. }
  447. if(r1 != R) {
  448. r1->link = freer;
  449. freer = firstr;
  450. }
  451. }
  452. /*
  453. * add mov b,rn
  454. * just after r
  455. */
  456. void
  457. addmove(Reg *r, int bn, int rn, int f)
  458. {
  459. Prog *p, *p1;
  460. Var *v;
  461. int badccr;
  462. badccr = 0;
  463. p = r->prog;
  464. p1 = p->link;
  465. if(p1)
  466. switch(p1->as) {
  467. case AMOVW:
  468. if(p1->from.type == D_CCR)
  469. p = p1;
  470. break;
  471. case ABEQ:
  472. case ABNE:
  473. case ABLE:
  474. case ABLS:
  475. case ABLT:
  476. case ABMI:
  477. case ABGE:
  478. case ABPL:
  479. case ABGT:
  480. case ABHI:
  481. case ABCC:
  482. case ABCS:
  483. p1 = prg();
  484. p1->link = p->link;
  485. p->link = p1;
  486. p1->lineno = p->lineno;
  487. p1->from.type = D_CCR;
  488. p1->to.type = D_TOS;
  489. p1->as = AMOVW;
  490. p = p1;
  491. badccr = 1;
  492. }
  493. p1 = prg();
  494. p1->link = p->link;
  495. p->link = p1;
  496. p1->lineno = p->lineno;
  497. v = var + bn;
  498. p1->from.sym = v->sym;
  499. p1->from.type = v->type;
  500. p1->from.offset = v->offset;
  501. p1->from.etype = v->etype;
  502. p1->to.type = rn;
  503. if(f) {
  504. p1->to = p1->from;
  505. p1->from = zprog.from;
  506. p1->from.type = rn;
  507. }
  508. p1->as = opxt[OAS][v->etype];
  509. if(badccr) {
  510. p = p1;
  511. p1 = prg();
  512. p1->link = p->link;
  513. p->link = p1;
  514. p1->lineno = p->lineno;
  515. p1->from.type = D_TOS;
  516. p1->to.type = D_CCR;
  517. p1->as = AMOVW;
  518. }
  519. if(debug['R'])
  520. print("%P\t.a%P\n", p, p1);
  521. }
  522. Bits
  523. mkvar(Adr *a, int as)
  524. {
  525. Var *v;
  526. int i, t, z;
  527. long o;
  528. Bits bit;
  529. Sym *s;
  530. mvbits = 0;
  531. t = a->type & D_MASK;
  532. switch(t) {
  533. default:
  534. if(t >= D_R0 && t < D_R0+NREG) {
  535. regbits |= RtoB(t-D_R0);
  536. if(as == ADIVUL || as == ADIVSL)
  537. regbits |= RtoB(t-D_R0+1);
  538. }
  539. if(t >= D_A0 && t < D_A0+NREG)
  540. regbits |= AtoB(t-D_A0);
  541. if(t >= D_F0 && t < D_F0+NREG)
  542. regbits |= FtoB(t-D_F0);
  543. goto none;
  544. case D_EXTERN:
  545. case D_STATIC:
  546. case D_AUTO:
  547. case D_PARAM:
  548. break;
  549. }
  550. s = a->sym;
  551. if(s == S)
  552. goto none;
  553. if((a->type & I_MASK) == I_ADDR)
  554. mvbits |= B_ADDR;
  555. switch(a->index & I_MASK) {
  556. case I_INDEX1:
  557. mvbits |= B_ADDR;
  558. break;
  559. case I_INDEX2:
  560. case I_INDEX3:
  561. mvbits |= B_INDIR;
  562. break;
  563. }
  564. o = a->offset;
  565. v = var;
  566. for(i=0; i<nvar; i++) {
  567. if(s == v->sym)
  568. if(t == v->type)
  569. if(o == v->offset)
  570. goto out;
  571. v++;
  572. }
  573. if(s)
  574. if(s->name[0] == '.')
  575. goto none;
  576. if(nvar >= NVAR) {
  577. if(debug['w'] > 1 && s)
  578. warn(Z, "variable not optimized: %s", s->name);
  579. goto none;
  580. }
  581. i = nvar;
  582. nvar++;
  583. v = &var[i];
  584. v->sym = s;
  585. v->offset = o;
  586. v->etype = a->etype;
  587. v->type = t;
  588. if(debug['R'])
  589. print("bit=%2d et=%2d %s (%p,%d,%ld)\n",
  590. i, a->etype, s->name,
  591. v->sym, v->type, v->offset);
  592. out:
  593. bit = blsh(i);
  594. if(t == D_EXTERN || t == D_STATIC)
  595. for(z=0; z<BITS; z++)
  596. externs.b[z] |= bit.b[z];
  597. if(t == D_PARAM)
  598. for(z=0; z<BITS; z++)
  599. params.b[z] |= bit.b[z];
  600. if(a->etype != v->etype || !typechlpfd[a->etype])
  601. for(z=0; z<BITS; z++)
  602. addrs.b[z] |= bit.b[z]; /* funny punning */
  603. return bit;
  604. none:
  605. return zbits;
  606. }
  607. void
  608. prop(Reg *r, Bits ref, Bits cal)
  609. {
  610. Reg *r1, *r2;
  611. int z;
  612. for(r1 = r; r1 != R; r1 = r1->p1) {
  613. for(z=0; z<BITS; z++) {
  614. ref.b[z] |= r1->refahead.b[z];
  615. if(ref.b[z] != r1->refahead.b[z]) {
  616. r1->refahead.b[z] = ref.b[z];
  617. changer++;
  618. }
  619. cal.b[z] |= r1->calahead.b[z];
  620. if(cal.b[z] != r1->calahead.b[z]) {
  621. r1->calahead.b[z] = cal.b[z];
  622. changer++;
  623. }
  624. }
  625. switch(r1->prog->as) {
  626. case ABSR:
  627. for(z=0; z<BITS; z++) {
  628. cal.b[z] |= ref.b[z] | externs.b[z];
  629. ref.b[z] = 0;
  630. }
  631. break;
  632. case ATEXT:
  633. for(z=0; z<BITS; z++) {
  634. cal.b[z] = 0;
  635. ref.b[z] = 0;
  636. }
  637. break;
  638. case ARTS:
  639. for(z=0; z<BITS; z++) {
  640. cal.b[z] = externs.b[z];
  641. ref.b[z] = 0;
  642. }
  643. }
  644. for(z=0; z<BITS; z++) {
  645. ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
  646. r1->use1.b[z] | r1->use2.b[z];
  647. cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
  648. r1->refbehind.b[z] = ref.b[z];
  649. r1->calbehind.b[z] = cal.b[z];
  650. }
  651. if(r1->active)
  652. break;
  653. r1->active = 1;
  654. }
  655. for(; r != r1; r = r->p1)
  656. for(r2 = r->p2; r2 != R; r2 = r2->p2link)
  657. prop(r2, r->refbehind, r->calbehind);
  658. }
  659. /*
  660. * find looping structure
  661. *
  662. * 1) find reverse postordering
  663. * 2) find approximate dominators,
  664. * the actual dominators if the flow graph is reducible
  665. * otherwise, dominators plus some other non-dominators.
  666. * See Matthew S. Hecht and Jeffrey D. Ullman,
  667. * "Analysis of a Simple Algorithm for Global Data Flow Problems",
  668. * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
  669. * Oct. 1-3, 1973, pp. 207-217.
  670. * 3) find all nodes with a predecessor dominated by the current node.
  671. * such a node is a loop head.
  672. * recursively, all preds with a greater rpo number are in the loop
  673. */
  674. long
  675. postorder(Reg *r, Reg **rpo2r, long n)
  676. {
  677. Reg *r1;
  678. r->rpo = 1;
  679. r1 = r->s1;
  680. if(r1 && !r1->rpo)
  681. n = postorder(r1, rpo2r, n);
  682. r1 = r->s2;
  683. if(r1 && !r1->rpo)
  684. n = postorder(r1, rpo2r, n);
  685. rpo2r[n] = r;
  686. n++;
  687. return n;
  688. }
  689. long
  690. rpolca(long *idom, long rpo1, long rpo2)
  691. {
  692. long t;
  693. if(rpo1 == -1)
  694. return rpo2;
  695. while(rpo1 != rpo2){
  696. if(rpo1 > rpo2){
  697. t = rpo2;
  698. rpo2 = rpo1;
  699. rpo1 = t;
  700. }
  701. while(rpo1 < rpo2){
  702. t = idom[rpo2];
  703. if(t >= rpo2)
  704. fatal(Z, "bad idom");
  705. rpo2 = t;
  706. }
  707. }
  708. return rpo1;
  709. }
  710. int
  711. doms(long *idom, long r, long s)
  712. {
  713. while(s > r)
  714. s = idom[s];
  715. return s == r;
  716. }
  717. int
  718. loophead(long *idom, Reg *r)
  719. {
  720. long src;
  721. src = r->rpo;
  722. if(r->p1 != R && doms(idom, src, r->p1->rpo))
  723. return 1;
  724. for(r = r->p2; r != R; r = r->p2link)
  725. if(doms(idom, src, r->rpo))
  726. return 1;
  727. return 0;
  728. }
  729. void
  730. loopmark(Reg **rpo2r, long head, Reg *r)
  731. {
  732. if(r->rpo < head || r->active == head)
  733. return;
  734. r->active = head;
  735. r->loop += LOOP;
  736. if(r->p1 != R)
  737. loopmark(rpo2r, head, r->p1);
  738. for(r = r->p2; r != R; r = r->p2link)
  739. loopmark(rpo2r, head, r);
  740. }
  741. void
  742. loopit(Reg *r, long nr)
  743. {
  744. Reg *r1;
  745. long i, d, me;
  746. if(nr > maxnr) {
  747. rpo2r = alloc(nr * sizeof(Reg*));
  748. idom = alloc(nr * sizeof(long));
  749. maxnr = nr;
  750. }
  751. d = postorder(r, rpo2r, 0);
  752. if(d > nr)
  753. fatal(Z, "too many reg nodes");
  754. nr = d;
  755. for(i = 0; i < nr / 2; i++){
  756. r1 = rpo2r[i];
  757. rpo2r[i] = rpo2r[nr - 1 - i];
  758. rpo2r[nr - 1 - i] = r1;
  759. }
  760. for(i = 0; i < nr; i++)
  761. rpo2r[i]->rpo = i;
  762. idom[0] = 0;
  763. for(i = 0; i < nr; i++){
  764. r1 = rpo2r[i];
  765. me = r1->rpo;
  766. d = -1;
  767. if(r1->p1 != R && r1->p1->rpo < me)
  768. d = r1->p1->rpo;
  769. for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
  770. if(r1->rpo < me)
  771. d = rpolca(idom, d, r1->rpo);
  772. idom[i] = d;
  773. }
  774. for(i = 0; i < nr; i++){
  775. r1 = rpo2r[i];
  776. r1->loop++;
  777. if(r1->p2 != R && loophead(idom, r1))
  778. loopmark(rpo2r, i, r1);
  779. }
  780. }
  781. void
  782. synch(Reg *r, Bits dif)
  783. {
  784. Reg *r1;
  785. int z;
  786. for(r1 = r; r1 != R; r1 = r1->s1) {
  787. for(z=0; z<BITS; z++) {
  788. dif.b[z] = (dif.b[z] &
  789. ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
  790. r1->set.b[z] | r1->regdiff.b[z];
  791. if(dif.b[z] != r1->regdiff.b[z]) {
  792. r1->regdiff.b[z] = dif.b[z];
  793. changer++;
  794. }
  795. }
  796. if(r1->active)
  797. break;
  798. r1->active = 1;
  799. for(z=0; z<BITS; z++)
  800. dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
  801. if(r1->s2 != R)
  802. synch(r1->s2, dif);
  803. }
  804. }
  805. ulong
  806. allreg(ulong b, Rgn *r)
  807. {
  808. Var *v;
  809. int i, j;
  810. v = var + r->varno;
  811. r->regno = D_NONE;
  812. switch(v->etype) {
  813. default:
  814. diag(Z, "unknown etype");
  815. break;
  816. case TCHAR:
  817. case TUCHAR:
  818. case TSHORT:
  819. case TUSHORT:
  820. case TINT:
  821. case TUINT:
  822. case TLONG:
  823. case TULONG:
  824. case TIND:
  825. i = BtoR(~b);
  826. j = BtoA(~b);
  827. if(r->costa == r->costr)
  828. if(i > j)
  829. i = NREG;
  830. if(j < NREG && r->costa > 0)
  831. if(r->costa > r->costr || i >= NREG) {
  832. r->regno = D_A0 + j;
  833. return AtoB(j);
  834. }
  835. if(i < NREG && r->costr > 0) {
  836. r->regno = D_R0 + i;
  837. return RtoB(i);
  838. }
  839. break;
  840. case TDOUBLE:
  841. case TFLOAT:
  842. i = BtoF(~b);
  843. if(i < NREG) {
  844. r->regno = D_F0 + i;
  845. return FtoB(i);
  846. }
  847. break;
  848. }
  849. return 0;
  850. }
  851. void
  852. paint1(Reg *r, int bn)
  853. {
  854. Reg *r1;
  855. Prog *p;
  856. int z;
  857. ulong bb;
  858. int x;
  859. z = bn/32;
  860. bb = 1L<<(bn%32);
  861. if(r->act.b[z] & bb)
  862. return;
  863. for(;;) {
  864. if(!(r->refbehind.b[z] & bb))
  865. break;
  866. r1 = r->p1;
  867. if(r1 == R)
  868. break;
  869. if(!(r1->refahead.b[z] & bb))
  870. break;
  871. if(r1->act.b[z] & bb)
  872. break;
  873. r = r1;
  874. }
  875. if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
  876. changer -= CLOAD * r->loop;
  877. changea -= CLOAD * r->loop;
  878. if(debug['R'] && debug['v'])
  879. print("%ld%P\tld %B $%d.%d\n", r->loop,
  880. r->prog, blsh(bn), changer, changea);
  881. }
  882. for(;;) {
  883. r->act.b[z] |= bb;
  884. p = r->prog;
  885. if(r->use1.b[z] & bb) {
  886. changer += CREF * r->loop;
  887. changea += CREF * r->loop;
  888. x = p->from.index;
  889. if(x == D_NONE) {
  890. switch(p->as) {
  891. default:
  892. changea = -CINF;
  893. case AADDL:
  894. case ASUBL:
  895. case AMOVL:
  896. case ACMPL:
  897. break;
  898. }
  899. } else {
  900. changer += (CXREF-CREF) * r->loop;
  901. if(x != (I_INDEX3|D_NONE))
  902. changer = -CINF;
  903. if((x&I_MASK) == I_INDEX1)
  904. changea = -CINF;
  905. }
  906. if(p->as == AMOVL) {
  907. x = p->to.type;
  908. if(x >= D_R0 && x < D_R0+NREG)
  909. changer += r->loop;
  910. if(x >= D_A0 && x < D_A0+NREG)
  911. changea += r->loop;
  912. }
  913. if(debug['R'] && debug['v'])
  914. print("%ld%P\tu1 %B $%d.%d\n", r->loop,
  915. p, blsh(bn), changer, changea);
  916. }
  917. if((r->use2.b[z]|r->set.b[z]) & bb) {
  918. changer += CREF * r->loop;
  919. changea += CREF * r->loop;
  920. x = p->to.index;
  921. if(x == D_NONE)
  922. switch(p->as) {
  923. default:
  924. changea = -CINF;
  925. break;
  926. case AMOVL:
  927. case AADDL:
  928. case ACMPL:
  929. case ASUBL:
  930. case ACLRL: /* can be faked */
  931. case ATSTL: /* can be faked */
  932. break;
  933. }
  934. else {
  935. changer += (CXREF-CREF) * r->loop;
  936. if(x != (I_INDEX3|D_NONE))
  937. changer = -CINF;
  938. if((x&I_MASK) == I_INDEX1)
  939. changea = -CINF;
  940. }
  941. if(p->as == AMOVL) {
  942. x = p->from.type;
  943. if(x >= D_R0 && x < D_R0+NREG)
  944. changer += r->loop;
  945. if(x >= D_A0 && x < D_A0+NREG)
  946. changea += r->loop;
  947. }
  948. if(debug['R'] && debug['v'])
  949. print("%ld%P\tu2 %B $%d.%d\n", r->loop,
  950. p, blsh(bn), changer, changea);
  951. }
  952. if(STORE(r) & r->regdiff.b[z] & bb) {
  953. changer -= CLOAD * r->loop;
  954. changea -= CLOAD * r->loop;
  955. if(debug['R'] && debug['v'])
  956. print("%ld%P\tst %B $%d.%d\n", r->loop,
  957. p, blsh(bn), changer, changea);
  958. }
  959. if(r->refbehind.b[z] & bb)
  960. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  961. if(r1->refahead.b[z] & bb)
  962. paint1(r1, bn);
  963. if(!(r->refahead.b[z] & bb))
  964. break;
  965. r1 = r->s2;
  966. if(r1 != R)
  967. if(r1->refbehind.b[z] & bb)
  968. paint1(r1, bn);
  969. r = r->s1;
  970. if(r == R)
  971. break;
  972. if(r->act.b[z] & bb)
  973. break;
  974. if(!(r->refbehind.b[z] & bb))
  975. break;
  976. }
  977. }
  978. ulong
  979. paint2(Reg *r, int bn)
  980. {
  981. Reg *r1;
  982. int z;
  983. ulong bb, vreg;
  984. z = bn/32;
  985. bb = 1L << (bn%32);
  986. vreg = regbits;
  987. if(!(r->act.b[z] & bb))
  988. return vreg;
  989. for(;;) {
  990. if(!(r->refbehind.b[z] & bb))
  991. break;
  992. r1 = r->p1;
  993. if(r1 == R)
  994. break;
  995. if(!(r1->refahead.b[z] & bb))
  996. break;
  997. if(!(r1->act.b[z] & bb))
  998. break;
  999. r = r1;
  1000. }
  1001. for(;;) {
  1002. r->act.b[z] &= ~bb;
  1003. vreg |= r->regu;
  1004. if(r->refbehind.b[z] & bb)
  1005. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  1006. if(r1->refahead.b[z] & bb)
  1007. vreg |= paint2(r1, bn);
  1008. if(!(r->refahead.b[z] & bb))
  1009. break;
  1010. r1 = r->s2;
  1011. if(r1 != R)
  1012. if(r1->refbehind.b[z] & bb)
  1013. vreg |= paint2(r1, bn);
  1014. r = r->s1;
  1015. if(r == R)
  1016. break;
  1017. if(!(r->act.b[z] & bb))
  1018. break;
  1019. if(!(r->refbehind.b[z] & bb))
  1020. break;
  1021. }
  1022. return vreg;
  1023. }
  1024. void
  1025. paint3(Reg *r, int bn, ulong rb, int rn)
  1026. {
  1027. Reg *r1;
  1028. Prog *p;
  1029. int z;
  1030. ulong bb;
  1031. z = bn/32;
  1032. bb = 1L << (bn%32);
  1033. if(r->act.b[z] & bb)
  1034. return;
  1035. for(;;) {
  1036. if(!(r->refbehind.b[z] & bb))
  1037. break;
  1038. r1 = r->p1;
  1039. if(r1 == R)
  1040. break;
  1041. if(!(r1->refahead.b[z] & bb))
  1042. break;
  1043. if(r1->act.b[z] & bb)
  1044. break;
  1045. r = r1;
  1046. }
  1047. if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
  1048. addmove(r, bn, rn, 0);
  1049. for(;;) {
  1050. r->act.b[z] |= bb;
  1051. p = r->prog;
  1052. if(r->use1.b[z] & bb) {
  1053. if(debug['R'])
  1054. print("%P", p);
  1055. addreg(&p->from, rn);
  1056. if(debug['R'])
  1057. print("\t.c%P\n", p);
  1058. }
  1059. if((r->use2.b[z]|r->set.b[z]) & bb) {
  1060. if(debug['R'])
  1061. print("%P", p);
  1062. addreg(&p->to, rn);
  1063. if(debug['R'])
  1064. print("\t.c%P\n", p);
  1065. }
  1066. if(STORE(r) & r->regdiff.b[z] & bb)
  1067. addmove(r, bn, rn, 1);
  1068. r->regu |= rb;
  1069. if(r->refbehind.b[z] & bb)
  1070. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  1071. if(r1->refahead.b[z] & bb)
  1072. paint3(r1, bn, rb, rn);
  1073. if(!(r->refahead.b[z] & bb))
  1074. break;
  1075. r1 = r->s2;
  1076. if(r1 != R)
  1077. if(r1->refbehind.b[z] & bb)
  1078. paint3(r1, bn, rb, rn);
  1079. r = r->s1;
  1080. if(r == R)
  1081. break;
  1082. if(r->act.b[z] & bb)
  1083. break;
  1084. if(!(r->refbehind.b[z] & bb))
  1085. break;
  1086. }
  1087. }
  1088. void
  1089. addreg(Adr *a, int rn)
  1090. {
  1091. int x;
  1092. a->sym = 0;
  1093. x = a->index;
  1094. if(rn >= D_R0 && rn < D_R0+NREG)
  1095. goto addr;
  1096. if(x == (I_INDEX3|D_NONE)) {
  1097. a->type = rn | I_INDIR;
  1098. a->index = D_NONE;
  1099. a->offset = a->displace;
  1100. a->displace = 0;
  1101. return;
  1102. }
  1103. if(x != D_NONE) {
  1104. a->type = rn | I_INDIR;
  1105. a->index += I_INDEX1 - I_INDEX2;
  1106. a->offset = a->displace;
  1107. a->displace = 0;
  1108. return;
  1109. }
  1110. a->type = rn | (a->type & I_INDIR);
  1111. return;
  1112. addr:
  1113. if(x == (I_INDEX3|D_NONE)) {
  1114. a->type = D_NONE|I_INDIR;
  1115. a->index += I_INDEX1 + rn - D_NONE - I_INDEX3;
  1116. a->scale = 4; /* .L*1 */
  1117. a->offset = a->displace;
  1118. a->displace = 0;
  1119. return;
  1120. }
  1121. a->type = rn | (a->type & I_INDIR);
  1122. }
  1123. /*
  1124. * bit reg
  1125. * 0-7 R0-R7
  1126. * 8-15 A0-A7
  1127. * 16-23 F0-F7
  1128. */
  1129. ulong
  1130. RtoB(int r)
  1131. {
  1132. if(r < 0 || r >= NREG)
  1133. return 0;
  1134. return 1L << (r + 0);
  1135. }
  1136. int
  1137. BtoR(ulong b)
  1138. {
  1139. b &= 0x0000ffL;
  1140. if(b == 0)
  1141. return NREG;
  1142. return bitno(b) - 0;
  1143. }
  1144. ulong
  1145. AtoB(int a)
  1146. {
  1147. if(a < 0 || a >= NREG)
  1148. return 0;
  1149. return 1L << (a + NREG);
  1150. }
  1151. int
  1152. BtoA(ulong b)
  1153. {
  1154. b &= 0x00ff00L;
  1155. if(b == 0)
  1156. return NREG;
  1157. return bitno(b) - NREG;
  1158. }
  1159. ulong
  1160. FtoB(int f)
  1161. {
  1162. if(f < 0 || f >= NREG)
  1163. return 0;
  1164. return 1L << (f + NREG+NREG);
  1165. }
  1166. int
  1167. BtoF(ulong b)
  1168. {
  1169. b &= 0xff0000L;
  1170. if(b == 0)
  1171. return NREG;
  1172. return bitno(b) - NREG-NREG;
  1173. }