reg.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221
  1. #include "gc.h"
  2. Reg*
  3. rega(void)
  4. {
  5. Reg *r;
  6. r = freer;
  7. if(r == R) {
  8. r = alloc(sizeof(*r));
  9. } else
  10. freer = r->link;
  11. *r = zreg;
  12. return r;
  13. }
  14. int
  15. rcmp(const void *a1, const void *a2)
  16. {
  17. Rgn *p1, *p2;
  18. int c1, c2;
  19. p1 = (Rgn*)a1;
  20. p2 = (Rgn*)a2;
  21. c1 = p2->costr;
  22. if(p2->costa > c1)
  23. c1 = p2->costa;
  24. c2 = p1->costr;
  25. if(p1->costa > c2)
  26. c2 = p1->costa;
  27. if(c1 -= c2)
  28. return c1;
  29. return p2->varno - p1->varno;
  30. }
  31. void
  32. regopt(Prog *p)
  33. {
  34. Reg *r, *r1, *r2;
  35. Prog *p1;
  36. int i, z;
  37. long val, initpc, npc;
  38. ulong vreg;
  39. Bits bit;
  40. Var *v;
  41. struct {
  42. long m;
  43. long c;
  44. Reg* p;
  45. } log5[6], *lp;
  46. firstr = R;
  47. lastr = R;
  48. nvar = 0;
  49. for(z=0; z<BITS; z++) {
  50. externs.b[z] = 0;
  51. params.b[z] = 0;
  52. addrs.b[z] = 0;
  53. }
  54. regbits = RtoB(0) | /* return reg */
  55. AtoB(6) | AtoB(7) | /* sp and sb */
  56. FtoB(0) | FtoB(1); /* floating return reg */
  57. for(i=0; i<NREG; i++) {
  58. if(regused[i])
  59. regbits |= RtoB(i);
  60. if(fregused[i])
  61. regbits |= FtoB(i);
  62. if(aregused[i])
  63. regbits |= AtoB(i);
  64. }
  65. /*
  66. * pass 1
  67. * build aux data structure
  68. * allocate pcs
  69. * find use and set of variables
  70. */
  71. val = 5L * 5L * 5L * 5L * 5L;
  72. lp = log5;
  73. for(i=0; i<5; i++) {
  74. lp->m = val;
  75. lp->c = 0;
  76. lp->p = R;
  77. val /= 5L;
  78. lp++;
  79. }
  80. val = 0;
  81. for(; p != P; p = p->link) {
  82. switch(p->as) {
  83. case ADATA:
  84. case AGLOBL:
  85. case ANAME:
  86. case ASIGNAME:
  87. continue;
  88. }
  89. r = rega();
  90. if(firstr == R) {
  91. firstr = r;
  92. lastr = r;
  93. } else {
  94. lastr->link = r;
  95. r->p1 = lastr;
  96. lastr->s1 = r;
  97. lastr = r;
  98. }
  99. r->prog = p;
  100. r->pc = val;
  101. val++;
  102. lp = log5;
  103. for(i=0; i<5; i++) {
  104. lp->c--;
  105. if(lp->c <= 0) {
  106. lp->c = lp->m;
  107. if(lp->p != R)
  108. lp->p->log5 = r;
  109. lp->p = r;
  110. (lp+1)->c = 0;
  111. break;
  112. }
  113. lp++;
  114. }
  115. r1 = r->p1;
  116. if(r1 != R)
  117. switch(r1->prog->as) {
  118. case ABRA:
  119. case ARTS:
  120. case ARTE:
  121. r->p1 = R;
  122. r1->s1 = R;
  123. }
  124. bit = mkvar(&p->from, AGOK);
  125. if(bany(&bit))
  126. switch(p->as) {
  127. case ALEA:
  128. if(!(mvbits & B_INDIR))
  129. for(z=0; z<BITS; z++)
  130. addrs.b[z] |= bit.b[z];
  131. default:
  132. if(mvbits & B_ADDR)
  133. for(z=0; z<BITS; z++)
  134. addrs.b[z] |= bit.b[z];
  135. for(z=0; z<BITS; z++)
  136. r->use1.b[z] |= bit.b[z];
  137. }
  138. bit = mkvar(&p->to, p->as);
  139. if(bany(&bit))
  140. switch(p->as) {
  141. case ABSR: /* funny */
  142. for(z=0; z<BITS; z++)
  143. addrs.b[z] |= bit.b[z];
  144. goto def;
  145. case APEA:
  146. if(!(mvbits & B_INDIR))
  147. for(z=0; z<BITS; z++)
  148. addrs.b[z] |= bit.b[z];
  149. def:
  150. case ACMPB: case ACMPW: case ACMPL:
  151. case AFCMPF: case AFCMPD:
  152. case ATSTB: case ATSTW: case ATSTL:
  153. case AFTSTF: case AFTSTD:
  154. case ABFEXTU: case ABFEXTS:
  155. if(mvbits & B_ADDR)
  156. for(z=0; z<BITS; z++)
  157. addrs.b[z] |= bit.b[z];
  158. for(z=0; z<BITS; z++)
  159. r->use2.b[z] |= bit.b[z];
  160. break;
  161. default:
  162. diag(Z, "reg: unknown asop: %A", p->as);
  163. case AADDB: case AADDW: case AADDL:
  164. case ASUBB: case ASUBW: case ASUBL:
  165. case AANDB: case AANDW: case AANDL:
  166. case AORB: case AORW: case AORL:
  167. case AEORB: case AEORW: case AEORL:
  168. case ABFINS:
  169. for(z=0; z<BITS; z++)
  170. r->use2.b[z] |= bit.b[z];
  171. case ANOP:
  172. case AMOVB: case AMOVW: case AMOVL:
  173. case AFMOVEB: case AFMOVEW: case AFMOVEL:
  174. case ACLRB: case ACLRW: case ACLRL:
  175. case AFMOVEF: case AFMOVED:
  176. if(mvbits & B_INDIR)
  177. for(z=0; z<BITS; z++)
  178. r->use2.b[z] |= bit.b[z];
  179. else
  180. for(z=0; z<BITS; z++)
  181. r->set.b[z] |= bit.b[z];
  182. break;
  183. }
  184. }
  185. if(firstr == R)
  186. return;
  187. initpc = pc - val;
  188. npc = val;
  189. /*
  190. * pass 2
  191. * turn branch references to pointers
  192. * build back pointers
  193. */
  194. for(r = firstr; r != R; r = r->link) {
  195. p = r->prog;
  196. if(p->to.type == D_BRANCH) {
  197. val = p->to.offset - initpc;
  198. r1 = firstr;
  199. while(r1 != R) {
  200. r2 = r1->log5;
  201. if(r2 != R && val >= r2->pc) {
  202. r1 = r2;
  203. continue;
  204. }
  205. if(r1->pc == val)
  206. break;
  207. r1 = r1->link;
  208. }
  209. if(r1 == R) {
  210. diag(Z, "ref not found\n%L:%P", p->lineno, p);
  211. continue;
  212. }
  213. if(r1 == r) {
  214. diag(Z, "ref to self");
  215. continue;
  216. }
  217. r->s2 = r1;
  218. r->p2link = r1->p2;
  219. r1->p2 = r;
  220. }
  221. }
  222. if(debug['R'])
  223. print("\n%L %D\n", firstr->prog->lineno, &firstr->prog->from);
  224. /*
  225. * pass 2.5
  226. * find looping structure
  227. */
  228. for(r = firstr; r != R; r = r->link)
  229. r->active = 0;
  230. changer = 0;
  231. loopit(firstr, npc);
  232. if(debug['R'] && debug['v']) {
  233. print("\nlooping structure:\n");
  234. for(r = firstr; r != R; r = r->link) {
  235. print("%ld:%P", r->loop, r->prog);
  236. for(z=0; z<BITS; z++)
  237. bit.b[z] = r->use1.b[z] |
  238. r->use2.b[z] | r->set.b[z];
  239. if(bany(&bit)) {
  240. print("\t");
  241. if(bany(&r->use1))
  242. print(" u1=%B", r->use1);
  243. if(bany(&r->use2))
  244. print(" u2=%B", r->use2);
  245. if(bany(&r->set))
  246. print(" st=%B", r->set);
  247. }
  248. print("\n");
  249. }
  250. }
  251. /*
  252. * pass 3
  253. * iterate propagating usage
  254. * back until flow graph is complete
  255. */
  256. loop1:
  257. changer = 0;
  258. for(r = firstr; r != R; r = r->link)
  259. r->active = 0;
  260. for(r = firstr; r != R; r = r->link)
  261. if(r->prog->as == ARTS)
  262. prop(r, zbits, zbits);
  263. loop11:
  264. /* pick up unreachable code */
  265. i = 0;
  266. for(r = firstr; r != R; r = r1) {
  267. r1 = r->link;
  268. if(r1 && r1->active && !r->active) {
  269. prop(r, zbits, zbits);
  270. i = 1;
  271. }
  272. }
  273. if(i)
  274. goto loop11;
  275. if(changer)
  276. goto loop1;
  277. /*
  278. * pass 4
  279. * iterate propagating register/variable synchrony
  280. * forward until graph is complete
  281. */
  282. loop2:
  283. changer = 0;
  284. for(r = firstr; r != R; r = r->link)
  285. r->active = 0;
  286. synch(firstr, zbits);
  287. if(changer)
  288. goto loop2;
  289. /*
  290. * pass 5
  291. * isolate regions
  292. * calculate costs (paint1)
  293. */
  294. r = firstr;
  295. if(r) {
  296. for(z=0; z<BITS; z++)
  297. bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
  298. ~(externs.b[z] | params.b[z] | addrs.b[z]);
  299. if(bany(&bit)) {
  300. nearln = r->prog->lineno;
  301. warn(Z, "used and not set: %B", bit);
  302. if(debug['R'] && !debug['w'])
  303. print("used and not set: %B\n", bit);
  304. /*
  305. * 68040 'feature':
  306. * load of a denormalized fp will trap
  307. */
  308. while(bany(&bit)) {
  309. i = bnum(bit);
  310. bit.b[i/32] &= ~(1L << (i%32));
  311. v = var + i;
  312. if(v->type == D_AUTO) {
  313. r->set.b[i/32] |= (1L << (i%32));
  314. if(typefd[v->etype])
  315. addmove(r, i, NREG+NREG, 1);
  316. }
  317. }
  318. }
  319. }
  320. if(debug['R'] && debug['v'])
  321. print("\nprop structure:\n");
  322. for(r = firstr; r != R; r = r->link) {
  323. if(debug['R'] && debug['v'])
  324. print("%P\n set = %B; rah = %B; cal = %B\n",
  325. r->prog, r->set, r->refahead, r->calahead);
  326. r->act = zbits;
  327. }
  328. rgp = region;
  329. nregion = 0;
  330. for(r = firstr; r != R; r = r->link) {
  331. for(z=0; z<BITS; z++)
  332. bit.b[z] = r->set.b[z] &
  333. ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
  334. if(bany(&bit)) {
  335. nearln = r->prog->lineno;
  336. warn(Z, "set and not used: %B", bit);
  337. if(debug['R'])
  338. print("set an not used: %B\n", bit);
  339. excise(r);
  340. }
  341. for(z=0; z<BITS; z++)
  342. bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
  343. while(bany(&bit)) {
  344. i = bnum(bit);
  345. rgp->enter = r;
  346. rgp->varno = i;
  347. changer = 0;
  348. changea = 0;
  349. if(debug['R'] && debug['v'])
  350. print("\n");
  351. paint1(r, i);
  352. bit.b[i/32] &= ~(1L<<(i%32));
  353. if(changer <= 0 && changea <= 0) {
  354. if(debug['R'])
  355. print("%L$%d.%d: %B\n",
  356. r->prog->lineno,
  357. changer, changea, blsh(i));
  358. continue;
  359. }
  360. rgp->costr = changer;
  361. rgp->costa = changea;
  362. nregion++;
  363. if(nregion >= NRGN) {
  364. warn(Z, "too many regions");
  365. goto brk;
  366. }
  367. rgp++;
  368. }
  369. }
  370. brk:
  371. qsort(region, nregion, sizeof(region[0]), rcmp);
  372. /*
  373. * pass 6
  374. * determine used registers (paint2)
  375. * replace code (paint3)
  376. */
  377. rgp = region;
  378. for(i=0; i<nregion; i++) {
  379. bit = blsh(rgp->varno);
  380. vreg = paint2(rgp->enter, rgp->varno);
  381. vreg = allreg(vreg, rgp);
  382. if(debug['R'])
  383. print("%L$%d.%d %R: %B\n",
  384. rgp->enter->prog->lineno,
  385. rgp->costr, rgp->costa,
  386. rgp->regno,
  387. bit);
  388. if(rgp->regno != D_NONE)
  389. paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
  390. rgp++;
  391. }
  392. /*
  393. * pass 7
  394. * peep-hole on basic block
  395. */
  396. if(!debug['R'] || debug['P'])
  397. peep();
  398. /*
  399. * pass 8
  400. * recalculate pc
  401. */
  402. val = initpc;
  403. for(r = firstr; r != R; r = r1) {
  404. r->pc = val;
  405. p = r->prog;
  406. p1 = P;
  407. r1 = r->link;
  408. if(r1 != R)
  409. p1 = r1->prog;
  410. for(; p != p1; p = p->link) {
  411. switch(p->as) {
  412. default:
  413. val++;
  414. break;
  415. case ANOP:
  416. case ADATA:
  417. case AGLOBL:
  418. case ANAME:
  419. case ASIGNAME:
  420. break;
  421. }
  422. }
  423. }
  424. pc = val;
  425. /*
  426. * fix up branches
  427. */
  428. if(debug['R'])
  429. if(bany(&addrs))
  430. print("addrs: %B\n", addrs);
  431. r1 = 0; /* set */
  432. for(r = firstr; r != R; r = r->link) {
  433. p = r->prog;
  434. if(p->to.type == D_BRANCH)
  435. p->to.offset = r->s2->pc;
  436. r1 = r;
  437. }
  438. /*
  439. * last pass
  440. * eliminate nops
  441. * free aux structures
  442. */
  443. for(p = firstr->prog; p != P; p = p->link){
  444. while(p->link && p->link->as == ANOP)
  445. p->link = p->link->link;
  446. }
  447. if(r1 != R) {
  448. r1->link = freer;
  449. freer = firstr;
  450. }
  451. }
  452. /*
  453. * add mov b,rn
  454. * just after r
  455. */
  456. void
  457. addmove(Reg *r, int bn, int rn, int f)
  458. {
  459. Prog *p, *p1;
  460. Var *v;
  461. int badccr;
  462. badccr = 0;
  463. p = r->prog;
  464. p1 = p->link;
  465. if(p1)
  466. switch(p1->as) {
  467. case AMOVW:
  468. if(p1->from.type == D_CCR)
  469. p = p1;
  470. break;
  471. case ABEQ:
  472. case ABNE:
  473. case ABLE:
  474. case ABLS:
  475. case ABLT:
  476. case ABMI:
  477. case ABGE:
  478. case ABPL:
  479. case ABGT:
  480. case ABHI:
  481. case ABCC:
  482. case ABCS:
  483. p1 = prg();
  484. p1->link = p->link;
  485. p->link = p1;
  486. p1->lineno = p->lineno;
  487. p1->from.type = D_CCR;
  488. p1->to.type = D_TOS;
  489. p1->as = AMOVW;
  490. p = p1;
  491. badccr = 1;
  492. }
  493. p1 = prg();
  494. p1->link = p->link;
  495. p->link = p1;
  496. p1->lineno = p->lineno;
  497. v = var + bn;
  498. p1->from.sym = v->sym;
  499. p1->from.type = v->type;
  500. p1->from.offset = v->offset;
  501. p1->from.etype = v->etype;
  502. p1->to.type = rn;
  503. if(f) {
  504. p1->to = p1->from;
  505. p1->from = zprog.from;
  506. p1->from.type = rn;
  507. }
  508. p1->as = opxt[OAS][v->etype];
  509. if(badccr) {
  510. p = p1;
  511. p1 = prg();
  512. p1->link = p->link;
  513. p->link = p1;
  514. p1->lineno = p->lineno;
  515. p1->from.type = D_TOS;
  516. p1->to.type = D_CCR;
  517. p1->as = AMOVW;
  518. }
  519. if(debug['R'])
  520. print("%P\t.a%P\n", p, p1);
  521. }
  522. Bits
  523. mkvar(Adr *a, int as)
  524. {
  525. Var *v;
  526. int i, t, z;
  527. long o;
  528. Bits bit;
  529. Sym *s;
  530. mvbits = 0;
  531. t = a->type & D_MASK;
  532. switch(t) {
  533. default:
  534. if(t >= D_R0 && t < D_R0+NREG) {
  535. regbits |= RtoB(t-D_R0);
  536. if(as == ADIVUL || as == ADIVSL)
  537. regbits |= RtoB(t-D_R0+1);
  538. }
  539. if(t >= D_A0 && t < D_A0+NREG)
  540. regbits |= AtoB(t-D_A0);
  541. if(t >= D_F0 && t < D_F0+NREG)
  542. regbits |= FtoB(t-D_F0);
  543. goto none;
  544. case D_EXTERN:
  545. case D_STATIC:
  546. case D_AUTO:
  547. case D_PARAM:
  548. break;
  549. }
  550. s = a->sym;
  551. if(s == S)
  552. goto none;
  553. if((a->type & I_MASK) == I_ADDR)
  554. mvbits |= B_ADDR;
  555. o = a->offset;
  556. v = var;
  557. for(i=0; i<nvar; i++) {
  558. if(s == v->sym)
  559. if(t == v->type)
  560. if(o == v->offset)
  561. goto out;
  562. v++;
  563. }
  564. if(s)
  565. if(s->name[0] == '.')
  566. goto none;
  567. if(nvar >= NVAR) {
  568. if(debug['w'] > 1 && s)
  569. warn(Z, "variable not optimized: %s", s->name);
  570. goto none;
  571. }
  572. i = nvar;
  573. nvar++;
  574. v = &var[i];
  575. v->sym = s;
  576. v->offset = o;
  577. v->etype = a->etype;
  578. v->type = t;
  579. if(debug['R'])
  580. print("bit=%2d et=%2d %s (%p,%d,%ld)\n",
  581. i, a->etype, s->name,
  582. v->sym, v->type, v->offset);
  583. out:
  584. bit = blsh(i);
  585. if(t == D_EXTERN || t == D_STATIC)
  586. for(z=0; z<BITS; z++)
  587. externs.b[z] |= bit.b[z];
  588. if(t == D_PARAM)
  589. for(z=0; z<BITS; z++)
  590. params.b[z] |= bit.b[z];
  591. if(a->etype != v->etype || !typechlpfd[a->etype])
  592. for(z=0; z<BITS; z++)
  593. addrs.b[z] |= bit.b[z]; /* funny punning */
  594. return bit;
  595. none:
  596. return zbits;
  597. }
  598. void
  599. prop(Reg *r, Bits ref, Bits cal)
  600. {
  601. Reg *r1, *r2;
  602. int z;
  603. for(r1 = r; r1 != R; r1 = r1->p1) {
  604. for(z=0; z<BITS; z++) {
  605. ref.b[z] |= r1->refahead.b[z];
  606. if(ref.b[z] != r1->refahead.b[z]) {
  607. r1->refahead.b[z] = ref.b[z];
  608. changer++;
  609. }
  610. cal.b[z] |= r1->calahead.b[z];
  611. if(cal.b[z] != r1->calahead.b[z]) {
  612. r1->calahead.b[z] = cal.b[z];
  613. changer++;
  614. }
  615. }
  616. switch(r1->prog->as) {
  617. case ABSR:
  618. for(z=0; z<BITS; z++) {
  619. cal.b[z] |= ref.b[z] | externs.b[z];
  620. ref.b[z] = 0;
  621. }
  622. break;
  623. case ATEXT:
  624. for(z=0; z<BITS; z++) {
  625. cal.b[z] = 0;
  626. ref.b[z] = 0;
  627. }
  628. break;
  629. case ARTS:
  630. for(z=0; z<BITS; z++) {
  631. cal.b[z] = externs.b[z];
  632. ref.b[z] = 0;
  633. }
  634. }
  635. for(z=0; z<BITS; z++) {
  636. ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
  637. r1->use1.b[z] | r1->use2.b[z];
  638. cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
  639. r1->refbehind.b[z] = ref.b[z];
  640. r1->calbehind.b[z] = cal.b[z];
  641. }
  642. if(r1->active)
  643. break;
  644. r1->active = 1;
  645. }
  646. for(; r != r1; r = r->p1)
  647. for(r2 = r->p2; r2 != R; r2 = r2->p2link)
  648. prop(r2, r->refbehind, r->calbehind);
  649. }
  650. /*
  651. * find looping structure
  652. *
  653. * 1) find reverse postordering
  654. * 2) find approximate dominators,
  655. * the actual dominators if the flow graph is reducible
  656. * otherwise, dominators plus some other non-dominators.
  657. * See Matthew S. Hecht and Jeffrey D. Ullman,
  658. * "Analysis of a Simple Algorithm for Global Data Flow Problems",
  659. * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
  660. * Oct. 1-3, 1973, pp. 207-217.
  661. * 3) find all nodes with a predecessor dominated by the current node.
  662. * such a node is a loop head.
  663. * recursively, all preds with a greater rpo number are in the loop
  664. */
  665. long
  666. postorder(Reg *r, Reg **rpo2r, long n)
  667. {
  668. Reg *r1;
  669. r->rpo = 1;
  670. r1 = r->s1;
  671. if(r1 && !r1->rpo)
  672. n = postorder(r1, rpo2r, n);
  673. r1 = r->s2;
  674. if(r1 && !r1->rpo)
  675. n = postorder(r1, rpo2r, n);
  676. rpo2r[n] = r;
  677. n++;
  678. return n;
  679. }
  680. long
  681. rpolca(long *idom, long rpo1, long rpo2)
  682. {
  683. long t;
  684. if(rpo1 == -1)
  685. return rpo2;
  686. while(rpo1 != rpo2){
  687. if(rpo1 > rpo2){
  688. t = rpo2;
  689. rpo2 = rpo1;
  690. rpo1 = t;
  691. }
  692. while(rpo1 < rpo2){
  693. t = idom[rpo2];
  694. if(t >= rpo2)
  695. fatal(Z, "bad idom");
  696. rpo2 = t;
  697. }
  698. }
  699. return rpo1;
  700. }
  701. int
  702. doms(long *idom, long r, long s)
  703. {
  704. while(s > r)
  705. s = idom[s];
  706. return s == r;
  707. }
  708. int
  709. loophead(long *idom, Reg *r)
  710. {
  711. long src;
  712. src = r->rpo;
  713. if(r->p1 != R && doms(idom, src, r->p1->rpo))
  714. return 1;
  715. for(r = r->p2; r != R; r = r->p2link)
  716. if(doms(idom, src, r->rpo))
  717. return 1;
  718. return 0;
  719. }
  720. void
  721. loopmark(Reg **rpo2r, long head, Reg *r)
  722. {
  723. if(r->rpo < head || r->active == head)
  724. return;
  725. r->active = head;
  726. r->loop += LOOP;
  727. if(r->p1 != R)
  728. loopmark(rpo2r, head, r->p1);
  729. for(r = r->p2; r != R; r = r->p2link)
  730. loopmark(rpo2r, head, r);
  731. }
  732. void
  733. loopit(Reg *r, long nr)
  734. {
  735. Reg *r1;
  736. long i, d, me;
  737. if(nr > maxnr) {
  738. rpo2r = alloc(nr * sizeof(Reg*));
  739. idom = alloc(nr * sizeof(long));
  740. maxnr = nr;
  741. }
  742. d = postorder(r, rpo2r, 0);
  743. if(d > nr)
  744. fatal(Z, "too many reg nodes");
  745. nr = d;
  746. for(i = 0; i < nr / 2; i++){
  747. r1 = rpo2r[i];
  748. rpo2r[i] = rpo2r[nr - 1 - i];
  749. rpo2r[nr - 1 - i] = r1;
  750. }
  751. for(i = 0; i < nr; i++)
  752. rpo2r[i]->rpo = i;
  753. idom[0] = 0;
  754. for(i = 0; i < nr; i++){
  755. r1 = rpo2r[i];
  756. me = r1->rpo;
  757. d = -1;
  758. if(r1->p1 != R && r1->p1->rpo < me)
  759. d = r1->p1->rpo;
  760. for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
  761. if(r1->rpo < me)
  762. d = rpolca(idom, d, r1->rpo);
  763. idom[i] = d;
  764. }
  765. for(i = 0; i < nr; i++){
  766. r1 = rpo2r[i];
  767. r1->loop++;
  768. if(r1->p2 != R && loophead(idom, r1))
  769. loopmark(rpo2r, i, r1);
  770. }
  771. }
  772. void
  773. synch(Reg *r, Bits dif)
  774. {
  775. Reg *r1;
  776. int z;
  777. for(r1 = r; r1 != R; r1 = r1->s1) {
  778. for(z=0; z<BITS; z++) {
  779. dif.b[z] = (dif.b[z] &
  780. ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
  781. r1->set.b[z] | r1->regdiff.b[z];
  782. if(dif.b[z] != r1->regdiff.b[z]) {
  783. r1->regdiff.b[z] = dif.b[z];
  784. changer++;
  785. }
  786. }
  787. if(r1->active)
  788. break;
  789. r1->active = 1;
  790. for(z=0; z<BITS; z++)
  791. dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
  792. if(r1->s2 != R)
  793. synch(r1->s2, dif);
  794. }
  795. }
  796. ulong
  797. allreg(ulong b, Rgn *r)
  798. {
  799. Var *v;
  800. int i, j;
  801. v = var + r->varno;
  802. r->regno = D_NONE;
  803. switch(v->etype) {
  804. default:
  805. diag(Z, "unknown etype");
  806. break;
  807. case TCHAR:
  808. case TUCHAR:
  809. case TSHORT:
  810. case TUSHORT:
  811. case TINT:
  812. case TUINT:
  813. case TLONG:
  814. case TULONG:
  815. case TIND:
  816. i = BtoR(~b);
  817. j = BtoA(~b);
  818. if(r->costa == r->costr)
  819. if(i > j)
  820. i = NREG;
  821. if(j < NREG && r->costa > 0)
  822. if(r->costa > r->costr || i >= NREG) {
  823. r->regno = D_A0 + j;
  824. return AtoB(j);
  825. }
  826. if(i < NREG && r->costr > 0) {
  827. r->regno = D_R0 + i;
  828. return RtoB(i);
  829. }
  830. break;
  831. case TDOUBLE:
  832. case TFLOAT:
  833. i = BtoF(~b);
  834. if(i < NREG) {
  835. r->regno = D_F0 + i;
  836. return FtoB(i);
  837. }
  838. break;
  839. }
  840. return 0;
  841. }
  842. void
  843. paint1(Reg *r, int bn)
  844. {
  845. Reg *r1;
  846. Prog *p;
  847. int z;
  848. ulong bb;
  849. int x;
  850. z = bn/32;
  851. bb = 1L<<(bn%32);
  852. if(r->act.b[z] & bb)
  853. return;
  854. for(;;) {
  855. if(!(r->refbehind.b[z] & bb))
  856. break;
  857. r1 = r->p1;
  858. if(r1 == R)
  859. break;
  860. if(!(r1->refahead.b[z] & bb))
  861. break;
  862. if(r1->act.b[z] & bb)
  863. break;
  864. r = r1;
  865. }
  866. if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
  867. changer -= CLOAD * r->loop;
  868. changea -= CLOAD * r->loop;
  869. if(debug['R'] && debug['v'])
  870. print("%ld%P\tld %B $%d.%d\n", r->loop,
  871. r->prog, blsh(bn), changer, changea);
  872. }
  873. for(;;) {
  874. r->act.b[z] |= bb;
  875. p = r->prog;
  876. if(r->use1.b[z] & bb) {
  877. changer += CREF * r->loop;
  878. changea += CREF * r->loop;
  879. switch(p->as) {
  880. default:
  881. changea = -CINF;
  882. case AADDL:
  883. case ASUBL:
  884. case AMOVL:
  885. case ACMPL:
  886. break;
  887. }
  888. if(p->as == AMOVL) {
  889. x = p->to.type;
  890. if(x >= D_R0 && x < D_R0+NREG)
  891. changer += r->loop;
  892. if(x >= D_A0 && x < D_A0+NREG)
  893. changea += r->loop;
  894. }
  895. if(debug['R'] && debug['v'])
  896. print("%ld%P\tu1 %B $%d.%d\n", r->loop,
  897. p, blsh(bn), changer, changea);
  898. }
  899. if((r->use2.b[z]|r->set.b[z]) & bb) {
  900. changer += CREF * r->loop;
  901. changea += CREF * r->loop;
  902. switch(p->as) {
  903. default:
  904. changea = -CINF;
  905. break;
  906. case AMOVL:
  907. case AADDL:
  908. case ACMPL:
  909. case ASUBL:
  910. case ACLRL: /* can be faked */
  911. case ATSTL: /* can be faked */
  912. break;
  913. }
  914. if(p->as == AMOVL) {
  915. x = p->from.type;
  916. if(x >= D_R0 && x < D_R0+NREG)
  917. changer += r->loop;
  918. if(x >= D_A0 && x < D_A0+NREG)
  919. changea += r->loop;
  920. }
  921. if(debug['R'] && debug['v'])
  922. print("%ld%P\tu2 %B $%d.%d\n", r->loop,
  923. p, blsh(bn), changer, changea);
  924. }
  925. if(STORE(r) & r->regdiff.b[z] & bb) {
  926. changer -= CLOAD * r->loop;
  927. changea -= CLOAD * r->loop;
  928. if(debug['R'] && debug['v'])
  929. print("%ld%P\tst %B $%d.%d\n", r->loop,
  930. p, blsh(bn), changer, changea);
  931. }
  932. if(r->refbehind.b[z] & bb)
  933. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  934. if(r1->refahead.b[z] & bb)
  935. paint1(r1, bn);
  936. if(!(r->refahead.b[z] & bb))
  937. break;
  938. r1 = r->s2;
  939. if(r1 != R)
  940. if(r1->refbehind.b[z] & bb)
  941. paint1(r1, bn);
  942. r = r->s1;
  943. if(r == R)
  944. break;
  945. if(r->act.b[z] & bb)
  946. break;
  947. if(!(r->refbehind.b[z] & bb))
  948. break;
  949. }
  950. }
  951. ulong
  952. paint2(Reg *r, int bn)
  953. {
  954. Reg *r1;
  955. int z;
  956. ulong bb, vreg;
  957. z = bn/32;
  958. bb = 1L << (bn%32);
  959. vreg = regbits;
  960. if(!(r->act.b[z] & bb))
  961. return vreg;
  962. for(;;) {
  963. if(!(r->refbehind.b[z] & bb))
  964. break;
  965. r1 = r->p1;
  966. if(r1 == R)
  967. break;
  968. if(!(r1->refahead.b[z] & bb))
  969. break;
  970. if(!(r1->act.b[z] & bb))
  971. break;
  972. r = r1;
  973. }
  974. for(;;) {
  975. r->act.b[z] &= ~bb;
  976. vreg |= r->regu;
  977. if(r->refbehind.b[z] & bb)
  978. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  979. if(r1->refahead.b[z] & bb)
  980. vreg |= paint2(r1, bn);
  981. if(!(r->refahead.b[z] & bb))
  982. break;
  983. r1 = r->s2;
  984. if(r1 != R)
  985. if(r1->refbehind.b[z] & bb)
  986. vreg |= paint2(r1, bn);
  987. r = r->s1;
  988. if(r == R)
  989. break;
  990. if(!(r->act.b[z] & bb))
  991. break;
  992. if(!(r->refbehind.b[z] & bb))
  993. break;
  994. }
  995. return vreg;
  996. }
  997. void
  998. paint3(Reg *r, int bn, ulong rb, int rn)
  999. {
  1000. Reg *r1;
  1001. Prog *p;
  1002. int z;
  1003. ulong bb;
  1004. z = bn/32;
  1005. bb = 1L << (bn%32);
  1006. if(r->act.b[z] & bb)
  1007. return;
  1008. for(;;) {
  1009. if(!(r->refbehind.b[z] & bb))
  1010. break;
  1011. r1 = r->p1;
  1012. if(r1 == R)
  1013. break;
  1014. if(!(r1->refahead.b[z] & bb))
  1015. break;
  1016. if(r1->act.b[z] & bb)
  1017. break;
  1018. r = r1;
  1019. }
  1020. if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
  1021. addmove(r, bn, rn, 0);
  1022. for(;;) {
  1023. r->act.b[z] |= bb;
  1024. p = r->prog;
  1025. if(r->use1.b[z] & bb) {
  1026. if(debug['R'])
  1027. print("%P", p);
  1028. addreg(&p->from, rn);
  1029. if(debug['R'])
  1030. print("\t.c%P\n", p);
  1031. }
  1032. if((r->use2.b[z]|r->set.b[z]) & bb) {
  1033. if(debug['R'])
  1034. print("%P", p);
  1035. addreg(&p->to, rn);
  1036. if(debug['R'])
  1037. print("\t.c%P\n", p);
  1038. }
  1039. if(STORE(r) & r->regdiff.b[z] & bb)
  1040. addmove(r, bn, rn, 1);
  1041. r->regu |= rb;
  1042. if(r->refbehind.b[z] & bb)
  1043. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  1044. if(r1->refahead.b[z] & bb)
  1045. paint3(r1, bn, rb, rn);
  1046. if(!(r->refahead.b[z] & bb))
  1047. break;
  1048. r1 = r->s2;
  1049. if(r1 != R)
  1050. if(r1->refbehind.b[z] & bb)
  1051. paint3(r1, bn, rb, rn);
  1052. r = r->s1;
  1053. if(r == R)
  1054. break;
  1055. if(r->act.b[z] & bb)
  1056. break;
  1057. if(!(r->refbehind.b[z] & bb))
  1058. break;
  1059. }
  1060. }
  1061. void
  1062. addreg(Adr *a, int rn)
  1063. {
  1064. a->sym = 0;
  1065. if(rn >= D_R0 && rn < D_R0+NREG)
  1066. goto addr;
  1067. a->type = rn | (a->type & I_INDIR);
  1068. return;
  1069. addr:
  1070. a->type = rn | (a->type & I_INDIR);
  1071. }
  1072. /*
  1073. * bit reg
  1074. * 0-7 R0-R7
  1075. * 8-15 A0-A7
  1076. * 16-23 F0-F7
  1077. */
  1078. ulong
  1079. RtoB(int r)
  1080. {
  1081. if(r < 0 || r >= NREG)
  1082. return 0;
  1083. return 1L << (r + 0);
  1084. }
  1085. int
  1086. BtoR(ulong b)
  1087. {
  1088. b &= 0x0000ffL;
  1089. if(b == 0)
  1090. return NREG;
  1091. return bitno(b) - 0;
  1092. }
  1093. ulong
  1094. AtoB(int a)
  1095. {
  1096. if(a < 0 || a >= NREG)
  1097. return 0;
  1098. return 1L << (a + NREG);
  1099. }
  1100. int
  1101. BtoA(ulong b)
  1102. {
  1103. b &= 0x00ff00L;
  1104. if(b == 0)
  1105. return NREG;
  1106. return bitno(b) - NREG;
  1107. }
  1108. ulong
  1109. FtoB(int f)
  1110. {
  1111. if(f < 0 || f >= NREG)
  1112. return 0;
  1113. return 1L << (f + NREG+NREG);
  1114. }
  1115. int
  1116. BtoF(ulong b)
  1117. {
  1118. b &= 0xff0000L;
  1119. if(b == 0)
  1120. return NREG;
  1121. return bitno(b) - NREG-NREG;
  1122. }